diff --git a/.asf.yaml b/.asf.yaml index 08837a974e5..ebc42cac609 100644 --- a/.asf.yaml +++ b/.asf.yaml @@ -33,12 +33,16 @@ github: - php - python - ruby + - rust enabled_merge_buttons: merge: false rebase: true squash: true + collaborators: + - jbonofre + notifications: commits: commits@avro.apache.org issues: issues@avro.apache.org diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 00000000000..bb261cfd8c1 --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,36 @@ +{ + "name": "Avro Development", + "build": { + "dockerfile": "../share/docker/Dockerfile", + "context": ".." + }, + "customizations": { + "vscode": { + "settings": { + }, + "extensions": [ + // Python + "ms-python.python", + "ms-python.vscode-pylance", + // C/C++ + "ms-vscode.cpptools", + // C# + "ms-dotnettools.csharp", + // Rust + "vadimcn.vscode-lldb", + "mutantdino.resourcemonitor", + "matklad.rust-analyzer", + "tamasfe.even-better-toml", + "serayuzgur.crates", + // Java + "vscjava.vscode-java-pack", + // Shell script + "timonwong.shellcheck", + // YAML + "redhat.vscode-yaml", + // Git + "eamodio.gitlens" + ] + } + } +} \ No newline at end of file diff --git a/.editorconfig b/.editorconfig index b2d8a7c5fc9..b96e2b9c6e8 100644 --- a/.editorconfig +++ b/.editorconfig @@ -19,13 +19,30 @@ root = true charset = utf-8 end_of_line = lf insert_final_newline = true +ij_any_block_comment_at_first_column = false +ij_any_line_comment_at_first_column = false [*.{java,xml,sh}] indent_style = space indent_size = 2 trim_trailing_whitespace=true -[*.{cs,ps1}] +ij_continuation_indent_size = 4 +ij_java_wrap_comments = true +ij_any_indent_case_from_switch = false + +[*.{avsc,avpr,avdl}] +indent_style = space +indent_size = 2 +trim_trailing_whitespace=true + +ij_continuation_indent_size = 4 +ij_json_space_after_colon = true +ij_json_space_before_colon = true +ij_json_spaces_within_brackets = true +ij_any_array_initializer_wrap = off + +[*.{ps1}] indent_style = space indent_size = 4 trim_trailing_whitespace=true @@ -37,3 +54,174 @@ trim_trailing_whitespace=true [*.py] indent_style = space indent_size = 4 + +# Generated code +[*{_AssemblyInfo.cs,.notsupported.cs,AsmOffsets.cs}] +generated_code = true + +# C# files +[*.cs] +indent_style = space +indent_size = 4 +trim_trailing_whitespace=true + +# New line preferences +csharp_new_line_before_open_brace = all +csharp_new_line_before_else = true +csharp_new_line_before_catch = true +csharp_new_line_before_finally = true +csharp_new_line_before_members_in_object_initializers = true +csharp_new_line_before_members_in_anonymous_types = true +csharp_new_line_between_query_expression_clauses = true + +# Indentation preferences +csharp_indent_block_contents = true +csharp_indent_braces = false +csharp_indent_case_contents = true +csharp_indent_case_contents_when_block = true +csharp_indent_switch_labels = true +csharp_indent_labels = one_less_than_current + +# Modifier preferences +csharp_preferred_modifier_order = public,private,protected,internal,static,extern,new,virtual,abstract,sealed,override,readonly,unsafe,volatile,async:suggestion + +# avoid this. unless absolutely necessary +dotnet_style_qualification_for_field = false:suggestion +dotnet_style_qualification_for_property = false:suggestion +dotnet_style_qualification_for_method = false:suggestion +dotnet_style_qualification_for_event = false:suggestion + +# Types: use keywords instead of BCL types, and permit var only when the type is clear +csharp_style_var_for_built_in_types = false:suggestion +csharp_style_var_when_type_is_apparent = false:none +csharp_style_var_elsewhere = false:suggestion +dotnet_style_predefined_type_for_locals_parameters_members = true:suggestion +dotnet_style_predefined_type_for_member_access = true:suggestion + +# Non-private static fields are PascalCase +dotnet_naming_rule.non_private_static_fields_should_be_pascal_case.severity = suggestion +dotnet_naming_rule.non_private_static_fields_should_be_pascal_case.symbols = non_private_static_fields +dotnet_naming_rule.non_private_static_fields_should_be_pascal_case.style = non_private_static_field_style +dotnet_naming_symbols.non_private_static_fields.applicable_kinds = field +dotnet_naming_symbols.non_private_static_fields.applicable_accessibilities = public, protected, internal, protected_internal, private_protected +dotnet_naming_symbols.non_private_static_fields.required_modifiers = static +dotnet_naming_style.non_private_static_field_style.capitalization = pascal_case + +# Constants are PascalCase +dotnet_naming_rule.constants_should_be_pascal_case.severity = suggestion +dotnet_naming_rule.constants_should_be_pascal_case.symbols = constants +dotnet_naming_rule.constants_should_be_pascal_case.style = constant_style +dotnet_naming_symbols.constants.applicable_kinds = field, local +dotnet_naming_symbols.constants.required_modifiers = const +dotnet_naming_style.constant_style.capitalization = pascal_case + +# Static fields are camelCase and start with s_ +dotnet_naming_rule.static_fields_should_be_camel_case.severity = suggestion +dotnet_naming_rule.static_fields_should_be_camel_case.symbols = static_fields +dotnet_naming_rule.static_fields_should_be_camel_case.style = static_field_style +dotnet_naming_symbols.static_fields.applicable_kinds = field +dotnet_naming_symbols.static_fields.required_modifiers = static +dotnet_naming_style.static_field_style.capitalization = camel_case +dotnet_naming_style.static_field_style.required_prefix = s_ + +# Instance fields are camelCase and start with _ +dotnet_naming_rule.instance_fields_should_be_camel_case.severity = suggestion +dotnet_naming_rule.instance_fields_should_be_camel_case.symbols = instance_fields +dotnet_naming_rule.instance_fields_should_be_camel_case.style = instance_field_style +dotnet_naming_symbols.instance_fields.applicable_kinds = field +dotnet_naming_style.instance_field_style.capitalization = camel_case +dotnet_naming_style.instance_field_style.required_prefix = _ + +# Locals and parameters are camelCase +dotnet_naming_rule.locals_should_be_camel_case.severity = suggestion +dotnet_naming_rule.locals_should_be_camel_case.symbols = locals_and_parameters +dotnet_naming_rule.locals_should_be_camel_case.style = camel_case_style +dotnet_naming_symbols.locals_and_parameters.applicable_kinds = parameter, local +dotnet_naming_style.camel_case_style.capitalization = camel_case + +# Local functions are PascalCase +dotnet_naming_rule.local_functions_should_be_pascal_case.severity = suggestion +dotnet_naming_rule.local_functions_should_be_pascal_case.symbols = local_functions +dotnet_naming_rule.local_functions_should_be_pascal_case.style = local_function_style +dotnet_naming_symbols.local_functions.applicable_kinds = local_function +dotnet_naming_style.local_function_style.capitalization = pascal_case + +# By default, name items with PascalCase +dotnet_naming_rule.members_should_be_pascal_case.severity = suggestion +dotnet_naming_rule.members_should_be_pascal_case.symbols = all_members +dotnet_naming_rule.members_should_be_pascal_case.style = pascal_case_style +dotnet_naming_symbols.all_members.applicable_kinds = * +dotnet_naming_style.pascal_case_style.capitalization = pascal_case + +# Code style defaults +csharp_using_directive_placement = outside_namespace:suggestion +dotnet_sort_system_directives_first = true +csharp_prefer_braces = true:silent +csharp_preserve_single_line_blocks = true:none +csharp_preserve_single_line_statements = false:none +csharp_prefer_static_local_function = true:suggestion +csharp_prefer_simple_using_statement = false:none +csharp_style_prefer_switch_expression = false:none +dotnet_style_readonly_field = true:suggestion + +# Expression-level preferences +dotnet_style_object_initializer = true:suggestion +dotnet_style_collection_initializer = true:suggestion +dotnet_style_explicit_tuple_names = true:suggestion +dotnet_style_coalesce_expression = true:suggestion +dotnet_style_null_propagation = true:suggestion +dotnet_style_prefer_is_null_check_over_reference_equality_method = true:suggestion +dotnet_style_prefer_inferred_tuple_names = true:suggestion +dotnet_style_prefer_inferred_anonymous_type_member_names = true:suggestion +dotnet_style_prefer_auto_properties = true:suggestion +dotnet_style_prefer_conditional_expression_over_assignment = true:silent +dotnet_style_prefer_conditional_expression_over_return = true:silent +csharp_prefer_simple_default_expression = true:suggestion + +# Expression-bodied members +csharp_style_expression_bodied_methods = true:silent +csharp_style_expression_bodied_constructors = true:silent +csharp_style_expression_bodied_operators = true:silent +csharp_style_expression_bodied_properties = true:silent +csharp_style_expression_bodied_indexers = true:silent +csharp_style_expression_bodied_accessors = true:silent +csharp_style_expression_bodied_lambdas = true:silent +csharp_style_expression_bodied_local_functions = true:silent + +# Pattern matching +csharp_style_pattern_matching_over_is_with_cast_check = true:suggestion +csharp_style_pattern_matching_over_as_with_null_check = true:suggestion +csharp_style_inlined_variable_declaration = true:suggestion + +# Null checking preferences +csharp_style_throw_expression = true:suggestion +csharp_style_conditional_delegate_call = true:suggestion + +# Other features +csharp_style_prefer_index_operator = false:none +csharp_style_prefer_range_operator = false:none +csharp_style_pattern_local_over_anonymous_function = false:none + +# Space preferences +csharp_space_after_cast = false +csharp_space_after_colon_in_inheritance_clause = true +csharp_space_after_comma = true +csharp_space_after_dot = false +csharp_space_after_keywords_in_control_flow_statements = true +csharp_space_after_semicolon_in_for_statement = true +csharp_space_around_binary_operators = before_and_after +csharp_space_around_declaration_statements = false +csharp_space_before_colon_in_inheritance_clause = true +csharp_space_before_comma = false +csharp_space_before_dot = false +csharp_space_before_open_square_brackets = false +csharp_space_before_semicolon_in_for_statement = false +csharp_space_between_empty_square_brackets = false +csharp_space_between_method_call_empty_parameter_list_parentheses = false +csharp_space_between_method_call_name_and_opening_parenthesis = false +csharp_space_between_method_call_parameter_list_parentheses = false +csharp_space_between_method_declaration_empty_parameter_list_parentheses = false +csharp_space_between_method_declaration_name_and_open_parenthesis = false +csharp_space_between_method_declaration_parameter_list_parentheses = false +csharp_space_between_parentheses = false +csharp_space_between_square_brackets = false diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 00000000000..b12292b62e4 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,11 @@ +# Set default behavior to automatically normalize line endings. +* text=auto + +# Force bash scripts to always use lf line endings so that if a repo is accessed +# in Unix via a file share from Windows, the scripts will work. +*.sh text eol=lf + +# Force batch scripts to always use crlf line endings so that if a repo is accessed +# in Unix via a file share from Windows, the scripts will work. +*.cmd text eol=crlf +*.bat text eol=crlf diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index f8efdbd7f8e..2823e406003 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -1,26 +1,60 @@ -Make sure you have checked _all_ steps below. + + +## What is the purpose of the change + +*(For example: This pull request improves file read performance by buffering data, fixing AVRO-XXXX.)* + + +## Verifying this change + +*(Please pick one of the following options)* + +This change is a trivial rework / code cleanup without any test coverage. + +*(or)* + +This change is already covered by existing tests, such as *(please describe tests)*. + +*(or)* + +This change added tests and can be verified as follows: + +*(example:)* +- *Extended interop tests to verify consistent valid schema names between SDKs* +- *Added test that validates that Java throws an AvroRuntimeException on invalid binary data* +- *Manually verified the change by building the website and checking the new redirect* + + +## Documentation + +- Does this pull request introduce a new feature? (yes / no) +- If yes, how is the feature documented? (not applicable / docs / JavaDocs / not documented) diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 439a4dfcb33..14d2f29b768 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -51,6 +51,13 @@ updates: day: "sunday" open-pull-requests-limit: 20 + - package-ecosystem: "npm" + directory: "/doc" + schedule: + interval: "weekly" + day: "sunday" + open-pull-requests-limit: 10 + - package-ecosystem: "pip" directory: "/lang/py/" schedule: @@ -65,10 +72,21 @@ updates: day: "sunday" open-pull-requests-limit: 20 - - package-ecosystem: "cargo" - directory: "/lang/rust/" + - package-ecosystem: "bundler" + directory: "/doc/" schedule: interval: "weekly" day: "sunday" open-pull-requests-limit: 20 + - package-ecosystem: "cargo" + directory: "/lang/rust/" + schedule: + interval: "daily" + open-pull-requests-limit: 20 + + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "weekly" + day: "sunday" diff --git a/.github/labeler.yml b/.github/labeler.yml index ae59a356e43..d694c7e6d65 100644 --- a/.github/labeler.yml +++ b/.github/labeler.yml @@ -18,15 +18,39 @@ # # Pull Request Labeler Github Action Configuration: https://github.com/marketplace/actions/labeler -C: ["lang/c/**/*"] -C++: ["lang/c++/**/*"] -C#: ["lang/csharp/**/*"] -Java: ["lang/java/**/*"] -Js: ["lang/js/**/*"] -Perl: ["lang/perl/**/*"] -Php: ["lang/php/**/*"] -Python: ["lang/py/**/*"] -Ruby: ["lang/ruby/**/*"] -Rust: ["lang/rust/**/*"] -build: ["**/*Dockerfile*", "**/*.sh", "**/*pom.xml", ".github/**/*"] -website: ["doc/**/*"] +C: + - changed-files: + - any-glob-to-any-file: "lang/c/**/*" +C++: + - changed-files: + - any-glob-to-any-file: "lang/c++/**/*" +C#: + - changed-files: + - any-glob-to-any-file: "lang/csharp/**/*" +Java: + - changed-files: + - any-glob-to-any-file: "lang/java/**/*" +Js: + - changed-files: + - any-glob-to-any-file: "lang/js/**/*" +Perl: + - changed-files: + - any-glob-to-any-file: "lang/perl/**/*" +Php: + - changed-files: + - any-glob-to-any-file: "lang/php/**/*" +Python: + - changed-files: + - any-glob-to-any-file: "lang/py/**/*" +Ruby: + - changed-files: + - any-glob-to-any-file: "lang/ruby/**/*" +Rust: + - changed-files: + - any-glob-to-any-file: "lang/rust/**/*" +build: + - changed-files: + - any-glob-to-any-file: ["**/*Dockerfile*", "**/*.sh", "**/*pom.xml", ".github/**/*"] +website: + - changed-files: + - any-glob-to-any-file: "doc/**/*" diff --git a/.github/workflows/codeql-csharp-analysis.yml b/.github/workflows/codeql-csharp-analysis.yml index 3cbb0fdd245..b6153490740 100644 --- a/.github/workflows/codeql-csharp-analysis.yml +++ b/.github/workflows/codeql-csharp-analysis.yml @@ -23,15 +23,19 @@ name: "CodeQL C#" on: push: branches: - - master + - main pull_request: # The branches below must be a subset of the branches above branches: - - master + - main paths: - .github/workflows/codeql-csharp-analysis.yml - lang/csharp/** +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + jobs: analyze: name: Analyze @@ -49,15 +53,26 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v2 + uses: actions/checkout@v4 with: # We must fetch at least the immediate parents so that if this is # a pull request then we can checkout the head. fetch-depth: 2 + # Install .NET SDKs + - name: Install .NET SDKs + uses: actions/setup-dotnet@v4 + with: + dotnet-version: | + 3.1.x + 5.0.x + 6.0.x + 7.0.x + 8.0.x + # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@v1 + uses: github/codeql-action/init@v3 with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -69,7 +84,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@v1 + uses: github/codeql-action/autobuild@v3 # â„šī¸ Command-line programs to run using the OS shell. # 📚 https://git.io/JvXDl @@ -79,4 +94,4 @@ jobs: # uses a compiled language - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@v1 + uses: github/codeql-action/analyze@v3 diff --git a/.github/workflows/codeql-java-analysis.yml b/.github/workflows/codeql-java-analysis.yml index 1b4933fcf60..7e42f8120fb 100644 --- a/.github/workflows/codeql-java-analysis.yml +++ b/.github/workflows/codeql-java-analysis.yml @@ -23,15 +23,19 @@ on: workflow_dispatch: push: branches: - - master + - main pull_request: branches: - - master + - main paths: - .github/workflows/codeql-java-analysis.yml - lang/java/** - pom.xml +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + jobs: analyze: name: Analyze @@ -49,7 +53,7 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v2 + uses: actions/checkout@v4 with: # We must fetch at least the immediate parents so that if this is # a pull request then we can checkout the head. @@ -57,7 +61,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@v1 + uses: github/codeql-action/init@v3 with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -66,10 +70,25 @@ jobs: # queries: ./path/to/local/query, your-org/your-repo/queries@main queries: +security-and-quality + - name: 'Setup Temurin JDK 8, 11, 17 & 21' + uses: actions/setup-java@99b8673ff64fbf99d8d325f52d9a5bdedb8483e9 # v4.2.1 + with: + distribution: 'temurin' + java-version: | + 8 + 11 + 17 + 21 + + - name: 'Setup Maven 3.9.6' + uses: stCarolas/setup-maven@d6af6abeda15e98926a57b5aa970a96bb37f97d1 # v5 + with: + maven-version: 3.9.6 + # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - - name: Autobuild - uses: github/codeql-action/autobuild@v1 +# - name: Autobuild +# uses: github/codeql-action/autobuild@v3 # â„šī¸ Command-line programs to run using the OS shell. # 📚 https://git.io/JvXDl @@ -77,6 +96,8 @@ jobs: # âœī¸ If the Autobuild fails above, remove it and uncomment the following three lines # and modify them (or add more) to build your code if your project # uses a compiled language + - name: 'Java Test' + run: mvn clean test - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@v1 + uses: github/codeql-action/analyze@v3 diff --git a/.github/workflows/codeql-js-analysis.yml b/.github/workflows/codeql-js-analysis.yml index 58d2a0a6296..68b9aa85fe7 100644 --- a/.github/workflows/codeql-js-analysis.yml +++ b/.github/workflows/codeql-js-analysis.yml @@ -23,15 +23,19 @@ name: "CodeQL JavaScript" on: push: branches: - - master + - main pull_request: # The branches below must be a subset of the branches above branches: - - master + - main paths: - .github/workflows/codeql-js-analysis.yml - lang/js/** +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + jobs: analyze: name: Analyze @@ -49,7 +53,7 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v2 + uses: actions/checkout@v4 with: # We must fetch at least the immediate parents so that if this is # a pull request then we can checkout the head. @@ -57,7 +61,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@v1 + uses: github/codeql-action/init@v3 with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -69,7 +73,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@v1 + uses: github/codeql-action/autobuild@v3 # â„šī¸ Command-line programs to run using the OS shell. # 📚 https://git.io/JvXDl @@ -79,4 +83,4 @@ jobs: # uses a compiled language - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@v1 + uses: github/codeql-action/analyze@v3 diff --git a/.github/workflows/codeql-py-analysis.yml b/.github/workflows/codeql-py-analysis.yml index 048b2ed1a9b..60a47472fec 100644 --- a/.github/workflows/codeql-py-analysis.yml +++ b/.github/workflows/codeql-py-analysis.yml @@ -23,15 +23,19 @@ name: "CodeQL Python" on: push: branches: - - master + - main pull_request: # The branches below must be a subset of the branches above branches: - - master + - main paths: - .github/workflows/codeql-py-analysis.yml - lang/py/** +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + jobs: analyze: name: Analyze @@ -49,7 +53,7 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v2 + uses: actions/checkout@v4 with: # We must fetch at least the immediate parents so that if this is # a pull request then we can checkout the head. @@ -57,7 +61,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@v1 + uses: github/codeql-action/init@v3 with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -69,7 +73,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@v1 + uses: github/codeql-action/autobuild@v3 # â„šī¸ Command-line programs to run using the OS shell. # 📚 https://git.io/JvXDl @@ -79,4 +83,4 @@ jobs: # uses a compiled language - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@v1 + uses: github/codeql-action/analyze@v3 diff --git a/.github/workflows/java-publish-snapshot.yml b/.github/workflows/java-publish-snapshot.yml new file mode 100644 index 00000000000..89d8759a61b --- /dev/null +++ b/.github/workflows/java-publish-snapshot.yml @@ -0,0 +1,71 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# For most projects, this workflow file will not need changing; you simply need +# to commit it to your repository. + +name: "Publish Snapshot to Maven" +on: + workflow_dispatch: + push: + branches: [ main ] + paths: + - .github/workflows/java-publish-snapshot.yml + - lang/java/** + - pom.xml + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +defaults: + run: + working-directory: lang/java + +jobs: + publish-snapshot: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Cache Local Maven Repository + uses: actions/cache@v4 + with: + path: ~/.m2/repository + key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }} + restore-keys: | + ${{ runner.os }}-maven- + + - name: 'Setup Temurin JDK 8, 11, 17 & 21' + uses: actions/setup-java@99b8673ff64fbf99d8d325f52d9a5bdedb8483e9 # v4.2.1 + with: + distribution: 'temurin' + java-version: | + 8 + 11 + 17 + 21 + + - name: 'Setup Maven 3.9.6' + uses: stCarolas/setup-maven@d6af6abeda15e98926a57b5aa970a96bb37f97d1 # v5 + with: + maven-version: 3.9.6 + + - name: 'Deploy Maven snapshots' + env: + ASF_USERNAME: ${{ secrets.NEXUS_USER }} + ASF_PASSWORD: ${{ secrets.NEXUS_PW }} + run: | + echo "apache.snapshots.https$ASF_USERNAME$ASF_PASSWORD" > settings.xml + mvn --settings settings.xml -U -B -e -fae -ntp -PskipQuality deploy diff --git a/.github/workflows/labeler.yml b/.github/workflows/labeler.yml index c786eb6440e..815d5d02a5c 100644 --- a/.github/workflows/labeler.yml +++ b/.github/workflows/labeler.yml @@ -23,7 +23,7 @@ jobs: triage: runs-on: ubuntu-latest steps: - - uses: actions/labeler@v2 + - uses: actions/labeler@v5 with: repo-token: "${{ secrets.GITHUB_TOKEN }}" sync-labels: true diff --git a/.github/workflows/maven4.yml b/.github/workflows/maven4.yml new file mode 100644 index 00000000000..5376488b3e3 --- /dev/null +++ b/.github/workflows/maven4.yml @@ -0,0 +1,69 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: 'Maven 4' +on: + workflow_dispatch: + push: + branches: [ main ] + pull_request: + branches: [ main ] + paths: + - .github/workflows/maven4.yml + - lang/java/** + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + maven4: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Cache Local Maven Repository + uses: actions/cache@v4 + with: + path: ~/.m2/repository + key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }} + restore-keys: | + ${{ runner.os }}-maven- + + - name: Cache Maven 4 Build Cache + uses: actions/cache@v4 + with: + path: ~/.m2/build-cache + key: ${{ runner.os }}-maven-build-cache-${{ hashFiles('**/pom.xml') }} + restore-keys: | + ${{ runner.os }}-maven-build-cache + + - name: 'Setup Temurin JDK 8, 11, 17 & 21' + uses: actions/setup-java@99b8673ff64fbf99d8d325f52d9a5bdedb8483e9 # v4.2.1 + with: + distribution: 'temurin' + java-version: | + 8 + 11 + 17 + 21 + + - name: Setup Maven 4 + uses: stCarolas/setup-maven@d6af6abeda15e98926a57b5aa970a96bb37f97d1 # v5 + with: + maven-version: 4.0.0-alpha-10 + + - name: Test + run: mvn clean verify diff --git a/.github/workflows/rat.yml b/.github/workflows/rat.yml index d3fa1868a46..c38d808f8c3 100644 --- a/.github/workflows/rat.yml +++ b/.github/workflows/rat.yml @@ -17,29 +17,42 @@ name: 'Rat' on: workflow_dispatch: push: - branches: [ master ] + branches: [ main ] pull_request: - branches: [ master ] + branches: [ main ] + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true jobs: rat: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Cache Local Maven Repository - uses: actions/cache@v2 + uses: actions/cache@v4 with: path: ~/.m2/repository key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }} restore-keys: | ${{ runner.os }}-maven- - - name: Setup Java - uses: actions/setup-java@v2 + - name: 'Setup Temurin JDK 8, 11, 17 & 21' + uses: actions/setup-java@99b8673ff64fbf99d8d325f52d9a5bdedb8483e9 # v4.2.1 + with: + distribution: 'temurin' + java-version: | + 8 + 11 + 17 + 21 + + - name: 'Setup Maven 3.9.6' + uses: stCarolas/setup-maven@d6af6abeda15e98926a57b5aa970a96bb37f97d1 # v5 with: - distribution: 'adopt' - java-version: '11' + maven-version: 3.9.6 - name: Run Rat - run: mvn test -Dmaven.main.skip=true -Dmaven.test.skip=true -DskipTests=true -P rat -pl :avro-toplevel + run: mvn test -Dmaven.main.skip=true -Dmaven.test.skip=true -DskipTests=true -Dinvoker.skip=true -P rat -pl :avro-toplevel diff --git a/.github/workflows/spotless.yml b/.github/workflows/spotless.yml index 45c7e9de4d7..f69108badab 100644 --- a/.github/workflows/spotless.yml +++ b/.github/workflows/spotless.yml @@ -17,32 +17,45 @@ name: 'Spotless' on: workflow_dispatch: push: - branches: [ master ] + branches: [ main ] pull_request: - branches: [ master ] + branches: [ main ] paths: - .github/workflows/spotless.yml - lang/java/** +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + jobs: spotless: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Cache Local Maven Repository - uses: actions/cache@v2 + uses: actions/cache@v4 with: path: ~/.m2/repository key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }} restore-keys: | ${{ runner.os }}-maven- - - name: Setup Java - uses: actions/setup-java@v2 + - name: 'Setup Temurin JDK 8, 11, 17 & 21' + uses: actions/setup-java@99b8673ff64fbf99d8d325f52d9a5bdedb8483e9 # v4.2.1 + with: + distribution: 'temurin' + java-version: | + 8 + 11 + 17 + 21 + + - name: 'Setup Maven 3.9.6' + uses: stCarolas/setup-maven@d6af6abeda15e98926a57b5aa970a96bb37f97d1 # v5 with: - distribution: 'adopt' - java-version: '11' + maven-version: 3.9.6 - name: Run Spotless Check run: mvn spotless:check diff --git a/.github/workflows/test-docker.yml b/.github/workflows/test-docker.yml new file mode 100644 index 00000000000..5c9b6aa3d3a --- /dev/null +++ b/.github/workflows/test-docker.yml @@ -0,0 +1,38 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: 'Docker tests' +on: + workflow_dispatch: + push: + branches: [ main ] + pull_request: + branches: [ main ] + paths: + - 'share/docker/*' + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Run Docker tests + shell: bash + run: ./build.sh docker-test diff --git a/.github/workflows/test-lang-c++-ARM.yml b/.github/workflows/test-lang-c++-ARM.yml new file mode 100644 index 00000000000..f101eaeb2b5 --- /dev/null +++ b/.github/workflows/test-lang-c++-ARM.yml @@ -0,0 +1,52 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: 'Test C++ on ARM' +on: + workflow_dispatch: + push: + branches: [ main ] + pull_request: + branches: [ main ] + paths: + - '.github/workflows/test-lang-c\+\+.yml' + - 'lang/c\+\+/**' + +defaults: + run: + working-directory: lang/c++ + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + arm64: + name: C++ on Linux ARM64 + runs-on: ["self-hosted", "asf-arm"] + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Install dependencies + run: | + sudo apt-get update -q + sudo apt-get install -q -y gcc g++ libboost-all-dev libfmt-dev cmake + + - name: Build + run: | + set -x + ./build.sh clean test diff --git a/.github/workflows/test-lang-c++.yml b/.github/workflows/test-lang-c++.yml index c7db3804fec..61afa7ff61c 100644 --- a/.github/workflows/test-lang-c++.yml +++ b/.github/workflows/test-lang-c++.yml @@ -17,9 +17,9 @@ name: Test C++ on: workflow_dispatch: push: - branches: [ master ] + branches: [ main ] pull_request: - branches: [ master ] + branches: [ main ] paths: - '.github/workflows/test-lang-c\+\+.yml' - 'lang/c\+\+/**' @@ -28,14 +28,24 @@ defaults: run: working-directory: lang/c++ +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + jobs: test: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Install Dependencies - run: sudo apt-get install -qqy cppcheck libboost-all-dev libsnappy-dev cmake + run: sudo apt update && sudo apt-get install -qqy cppcheck libboost-all-dev libsnappy-dev libfmt-dev cmake + + - name: Print Versions + run: | + gcc --version + cmake --version + cppcheck --version - name: Clean run: ./build.sh clean @@ -45,3 +55,9 @@ jobs: - name: Test run: ./build.sh test + + - name: Release build + run: | + mkdir -p build + cd build + cmake -G "Unix Makefiles" -D CMAKE_BUILD_TYPE=Release .. diff --git a/.github/workflows/test-lang-c-ARM.yml b/.github/workflows/test-lang-c-ARM.yml new file mode 100644 index 00000000000..ffb31be2b39 --- /dev/null +++ b/.github/workflows/test-lang-c-ARM.yml @@ -0,0 +1,52 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: 'Test C on ARM' +on: + workflow_dispatch: + push: + branches: [ main ] + pull_request: + branches: [ main ] + paths: + - .github/workflows/test-lang-c.yml + - lang/c/** + +defaults: + run: + working-directory: lang/c + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + arm64: + name: C on Linux ARM64 + runs-on: ["self-hosted", "asf-arm"] + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Install dependencies + run: | + sudo apt-get update -q + sudo apt-get install -q -y cmake liblzma-dev libsnappy-dev libjansson-dev zlib1g-dev pkg-config + + - name: Build + run: | + set -x + ./build.sh clean test diff --git a/.github/workflows/test-lang-c.yml b/.github/workflows/test-lang-c.yml index 764a29364d6..9f31078bf1c 100644 --- a/.github/workflows/test-lang-c.yml +++ b/.github/workflows/test-lang-c.yml @@ -17,9 +17,9 @@ name: Test C on: workflow_dispatch: push: - branches: [ master ] + branches: [ main ] pull_request: - branches: [ master ] + branches: [ main ] paths: - .github/workflows/test-lang-c.yml - lang/c/** @@ -28,11 +28,15 @@ defaults: run: working-directory: lang/c +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + jobs: test: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Install Dependencies run: sudo apt-get install -qqy libjansson-dev libsnappy-dev @@ -43,17 +47,40 @@ jobs: - name: Test run: ./build.sh test + - name: Check pkg-config + run: | + mkdir -p build + cd build + cmake .. + export PKG_CONFIG_PATH=./src + pkg-config --libs avro-c + - name: Cache Local Maven Repository - uses: actions/cache@v2 + uses: actions/cache@v4 with: path: ~/.m2/repository key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }} restore-keys: | ${{ runner.os }}-maven- + - name: 'Setup Temurin JDK 8, 11, 17 & 21' + uses: actions/setup-java@99b8673ff64fbf99d8d325f52d9a5bdedb8483e9 # v4.2.1 + with: + distribution: 'temurin' + java-version: | + 8 + 11 + 17 + 21 + + - name: 'Setup Maven 3.9.6' + uses: stCarolas/setup-maven@d6af6abeda15e98926a57b5aa970a96bb37f97d1 # v5 + with: + maven-version: 3.9.6 + - name: Install Java Avro for Interop Test working-directory: . - run: mvn -B install -DskipTests + run: mvn -B install -PskipQuality - name: Create Interop Data Directory working-directory: . @@ -72,7 +99,7 @@ jobs: interop: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Install Dependencies run: | @@ -83,16 +110,31 @@ jobs: libzstd-dev - name: Cache Local Maven Repository - uses: actions/cache@v2 + uses: actions/cache@v4 with: path: ~/.m2/repository key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }} restore-keys: | ${{ runner.os }}-maven- + - name: 'Setup Temurin JDK 8, 11, 17 & 21' + uses: actions/setup-java@99b8673ff64fbf99d8d325f52d9a5bdedb8483e9 # v4.2.1 + with: + distribution: 'temurin' + java-version: | + 8 + 11 + 17 + 21 + + - name: 'Setup Maven 3.9.6' + uses: stCarolas/setup-maven@d6af6abeda15e98926a57b5aa970a96bb37f97d1 # v5 + with: + maven-version: 3.9.6 + - name: Install Java Avro for Interop Test working-directory: . - run: mvn -B install -DskipTests + run: mvn -B install -PskipQuality - name: Create Interop Data Directory working-directory: . diff --git a/.github/workflows/test-lang-csharp-ARM.yml b/.github/workflows/test-lang-csharp-ARM.yml new file mode 100644 index 00000000000..4c3eacb0521 --- /dev/null +++ b/.github/workflows/test-lang-csharp-ARM.yml @@ -0,0 +1,160 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: 'Test C# on ARM' +on: + workflow_dispatch: + push: + branches: [ main ] + pull_request: + branches: [ main ] + paths: + - .github/workflows/test-lang-csharp.yml + - lang/csharp/** + +defaults: + run: + working-directory: lang/csharp + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Add libzstd + shell: bash + run: sudo apt-get install -y libzstd-dev + + - name: Install .NET SDKs + uses: actions/setup-dotnet@v4 + with: + dotnet-version: | + 3.1.x + 5.0.x + 6.0.x + 7.0.x + 8.0.x + + - uses: actions/cache@v4 + with: + path: ~/.nuget/packages + key: ${{ runner.os }}-nuget-${{ hashFiles('**/packages.lock.json') }} + restore-keys: | + ${{ runner.os }}-nuget- + + - name: Lint + run: ./build.sh lint + + - name: Test + run: ./build.sh test + + interop: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Add libzstd + shell: bash + run: sudo apt-get install -y libzstd-dev + + - name: Install .NET SDKs + uses: actions/setup-dotnet@v4 + with: + dotnet-version: | + 3.1.x + 5.0.x + 6.0.x + 7.0.x + 8.0.x + + - name: Cache Local Maven Repository + uses: actions/cache@v4 + with: + path: ~/.m2/repository + key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }} + restore-keys: | + ${{ runner.os }}-maven- + + - name: 'Setup Temurin JDK 8, 11, 17 & 21' + uses: actions/setup-java@99b8673ff64fbf99d8d325f52d9a5bdedb8483e9 # v4.2.1 + with: + distribution: 'temurin' + java-version: | + 8 + 11 + 17 + 21 + + - name: 'Setup Maven 3.9.6' + uses: stCarolas/setup-maven@d6af6abeda15e98926a57b5aa970a96bb37f97d1 # v5 + with: + maven-version: 3.9.6 + + - name: Install Java Avro for Interop Test + working-directory: . + run: mvn -B install -PskipQuality + + - name: Create Interop Data Directory + working-directory: . + run: mkdir -p build/interop/data + + - name: Generate Interop Resources + working-directory: lang/java/avro + run: mvn -B -P interop-data-generate generate-resources + + - name: Generate Interop Data + run: ./build.sh interop-data-generate + + - name: Run Interop Tests + run: ./build.sh interop-data-test + + arm64: + name: C# on Linux ARM64 + runs-on: ["self-hosted", "asf-arm"] + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Cache Nuget + uses: actions/cache@v4 + with: + path: ~/.nuget/packages + key: ${{ runner.os }}-nuget-${{ hashFiles('**/packages.lock.json') }} + restore-keys: | + ${{ runner.os }}-nuget- + + - name: Install dependencies + run: | + sudo apt-get update -q + sudo apt-get install -q -y wget libzstd-dev libicu-dev + wget https://dot.net/v1/dotnet-install.sh + bash ./dotnet-install.sh --channel "3.1" --install-dir "$HOME/.dotnet" # 3.1 + bash ./dotnet-install.sh --channel "5.0" --install-dir "$HOME/.dotnet" # 5.0 + bash ./dotnet-install.sh --channel "6.0" --install-dir "$HOME/.dotnet" # 6.0 + bash ./dotnet-install.sh --channel "7.0" --install-dir "$HOME/.dotnet" # 7.0 + bash ./dotnet-install.sh --channel "8.0" --install-dir "$HOME/.dotnet" # 8.0 + + - name: Build + run: | + set -x + export PATH=$HOME/.dotnet:$PATH + dotnet --list-sdks + ./build.sh clean test diff --git a/.github/workflows/test-lang-csharp.yml b/.github/workflows/test-lang-csharp.yml index b1959009e64..c81628213c0 100644 --- a/.github/workflows/test-lang-csharp.yml +++ b/.github/workflows/test-lang-csharp.yml @@ -17,9 +17,9 @@ name: 'Test C#' on: workflow_dispatch: push: - branches: [ master ] + branches: [ main ] pull_request: - branches: [ master ] + branches: [ main ] paths: - .github/workflows/test-lang-csharp.yml - lang/csharp/** @@ -28,13 +28,31 @@ defaults: run: working-directory: lang/csharp +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + jobs: test: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 + + - name: Add libzstd + shell: bash + run: sudo apt-get install -y libzstd-dev + + - name: Install .NET SDKs + uses: actions/setup-dotnet@v4 + with: + dotnet-version: | + 3.1.x + 5.0.x + 6.0.x + 7.0.x + 8.0.x - - uses: actions/cache@v2 + - uses: actions/cache@v4 with: path: ~/.nuget/packages key: ${{ runner.os }}-nuget-${{ hashFiles('**/packages.lock.json') }} @@ -50,19 +68,48 @@ jobs: interop: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 + + - name: Add libzstd + shell: bash + run: sudo apt-get install -y libzstd-dev + + - name: Install .NET SDKs + uses: actions/setup-dotnet@v4 + with: + dotnet-version: | + 3.1.x + 5.0.x + 6.0.x + 7.0.x + 8.0.x - name: Cache Local Maven Repository - uses: actions/cache@v2 + uses: actions/cache@v4 with: path: ~/.m2/repository key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }} restore-keys: | ${{ runner.os }}-maven- + - name: 'Setup Temurin JDK 8, 11, 17 & 21' + uses: actions/setup-java@99b8673ff64fbf99d8d325f52d9a5bdedb8483e9 # v4.2.1 + with: + distribution: 'temurin' + java-version: | + 8 + 11 + 17 + 21 + + - name: 'Setup Maven 3.9.6' + uses: stCarolas/setup-maven@d6af6abeda15e98926a57b5aa970a96bb37f97d1 # v5 + with: + maven-version: 3.9.6 + - name: Install Java Avro for Interop Test working-directory: . - run: mvn -B install -DskipTests + run: mvn -B install -PskipQuality - name: Create Interop Data Directory working-directory: . diff --git a/.github/workflows/test-lang-java-ARM.yml b/.github/workflows/test-lang-java-ARM.yml new file mode 100644 index 00000000000..ba48cf194f5 --- /dev/null +++ b/.github/workflows/test-lang-java-ARM.yml @@ -0,0 +1,79 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: 'Test Java on ARM' +on: + workflow_dispatch: + push: + branches: [ main ] + pull_request: + branches: [ main ] + paths: + - .github/workflows/test-lang-java.yml + - lang/java/** + - pom.xml + +defaults: + run: + working-directory: lang/java + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + arm64: + name: Java on Linux ARM64 + runs-on: ["self-hosted", "asf-arm"] + + steps: + - name: 'Checkout sourcecode' + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + + - name: 'Setup Temurin JDK 8, 11, 17 & 21' + uses: actions/setup-java@99b8673ff64fbf99d8d325f52d9a5bdedb8483e9 # v4.2.1 + with: + distribution: 'temurin' + java-version: | + 8 + 11 + 17 + 21 + + - name: 'Cache Local Maven Repository' + uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2 # v4.0.0 + with: + path: ~/.m2/repository + key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }} + restore-keys: | + ${{ runner.os }}-maven- + + - name: 'Setup Maven 3.9.6' + uses: stCarolas/setup-maven@d6af6abeda15e98926a57b5aa970a96bb37f97d1 # v5 + with: + maven-version: 3.9.6 + + - name: Build + run: ./build.sh clean test +# set -x +# export MAVEN_VERSION="3.9.6" +# wget https://archive.apache.org/dist/maven/maven-3/$MAVEN_VERSION/binaries/apache-maven-$MAVEN_VERSION-bin.tar.gz +# tar zxvf apache-maven-$MAVEN_VERSION-bin.tar.gz +# export M2_HOME=$PWD/apache-maven-$MAVEN_VERSION +# export PATH="$M2_HOME/bin:$PATH" +# java -version +# mvn -version +# #MAVEN_OPTS="-Dsurefire.excludes=*TestCustomCodec*,*TestAllCodecs*,*TestNettyServer*" ./build.sh clean test +# ./build.sh clean test diff --git a/.github/workflows/test-lang-java.yml b/.github/workflows/test-lang-java.yml index b76b9c3a5b8..bca1f0b7ae8 100644 --- a/.github/workflows/test-lang-java.yml +++ b/.github/workflows/test-lang-java.yml @@ -17,9 +17,9 @@ name: 'Test Java' on: workflow_dispatch: push: - branches: [ master ] + branches: [ main ] pull_request: - branches: [ master ] + branches: [ main ] paths: - .github/workflows/test-lang-java.yml - lang/java/** @@ -29,94 +29,128 @@ defaults: run: working-directory: lang/java +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + jobs: test: - name: Java ${{ matrix.java }} Test + name: 'Java Test' runs-on: ubuntu-latest - strategy: - matrix: - java: - - '8' - - '11' steps: - - uses: actions/checkout@v2 + - name: 'Checkout sourcecode' + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 - - name: Cache Local Maven Repository - uses: actions/cache@v2 + - name: 'Cache Local Maven Repository' + uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2 # v4.0.0 with: path: ~/.m2/repository key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }} restore-keys: | ${{ runner.os }}-maven- - - name: Setup Java - uses: actions/setup-java@v2 + - name: 'Setup Temurin JDK 11, 17 & 21' + uses: actions/setup-java@99b8673ff64fbf99d8d325f52d9a5bdedb8483e9 # v4.2.1 + with: + distribution: 'temurin' + java-version: | + 11 + 17 + 21 + + - name: 'Setup Maven 3.9.6' + uses: stCarolas/setup-maven@d6af6abeda15e98926a57b5aa970a96bb37f97d1 # v5 with: - distribution: 'adopt' - java-version: ${{ matrix.java }} + maven-version: 3.9.6 - - name: Lint + - name: 'Java Lint' run: ./build.sh lint - - name: Test + - name: 'Java Test' run: ./build.sh test + - name: 'Install Java Avro for reproducibility test' + working-directory: . + run: mvn -B clean install -PskipQuality + + - name: 'Test Reproducible Build' + working-directory: . + run: mvn clean verify -PskipQuality artifact:compare + interop: - name: Java ${{ matrix.java }} Interop + name: 'Java Interop' runs-on: ubuntu-latest - strategy: - matrix: - java: - - '8' - - '11' + steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 - - name: Cache Local Maven Repository - uses: actions/cache@v2 + - name: 'Cache Local Maven Repository' + uses: actions/cache@v4 with: path: ~/.m2/repository key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }} restore-keys: | ${{ runner.os }}-maven- - - name: Setup Java - uses: actions/setup-java@v2 + - name: 'Setup Temurin JDK 11, 17 & 21' + uses: actions/setup-java@99b8673ff64fbf99d8d325f52d9a5bdedb8483e9 # v4.2.1 with: - distribution: 'adopt' - java-version: ${{ matrix.java }} + distribution: 'temurin' + java-version: | + 11 + 17 + 21 + + - name: 'Setup Maven 3.9.6' + uses: stCarolas/setup-maven@d6af6abeda15e98926a57b5aa970a96bb37f97d1 # v5 + with: + maven-version: 3.9.6 - - name: Setup Python for Generating Input Data - uses: actions/setup-python@v2 + - name: 'Setup Python for Generating Input Data' + uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0 - - name: Apt Install Compression Libs Required by Python + - name: 'Apt Install Compression Libs Required by Python' run: | sudo apt-get install -qqy --no-install-recommends libbz2-dev \ liblzma-dev \ libsnappy-dev \ libzstd-dev - - name: Install Python Dependencies + - name: 'Install Python Dependencies' run: | - python3 -m pip install --upgrade pip setuptools tox-wheel + python3 -m pip install --upgrade pip setuptools tox python3 -m pip install python-snappy zstandard - - name: Install Java Avro for Interop Test - working-directory: . - run: mvn -B install -DskipTests - - - name: Create Interop Data Directory + - name: 'Setup C# for Generating Interop Data' + uses: actions/setup-dotnet@v4 + with: + dotnet-version: | + 3.1.x + 5.0.x + 6.0.x + 7.0.x + 8.0.x + + - name: 'Create Interop Data Directory' working-directory: . run: mkdir -p build/interop/data - - name: Generate Interop Resources - working-directory: lang/java/avro - run: mvn -B -P interop-data-generate generate-resources - - - name: Generate Interop Data using Python + - name: 'Generate Interop Data using Python' working-directory: lang/py run: ./build.sh interop-data-generate - - name: Run Interop Tests - working-directory: lang/java/ipc - run: mvn -B test -P interop-data-test + - name: 'Generate Interop Data using C#' + working-directory: lang/csharp + run: ./build.sh interop-data-generate + + - name: 'Install Java Avro for other tests' + working-directory: . + run: mvn -B install -PskipQuality + + - name: 'Generate Interop Data using Java 11, 17 & 21' + working-directory: lang/java/interop-data-test + run: mvn -B verify -Pgenerate-test-data + + - name: 'Run Interop Tests using Java 11, 17 & 21' + working-directory: lang/java/interop-data-test + run: mvn -B verify -Pcheck-test-data diff --git a/.github/workflows/test-lang-js-ARM.yml b/.github/workflows/test-lang-js-ARM.yml new file mode 100644 index 00000000000..54e1216eaca --- /dev/null +++ b/.github/workflows/test-lang-js-ARM.yml @@ -0,0 +1,65 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: 'Test JavaScript on ARM' +on: + workflow_dispatch: + push: + branches: [ main ] + pull_request: + branches: [ main ] + paths: + - .github/workflows/test-lang-js.yml + - lang/js/** + +defaults: + run: + working-directory: lang/js + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + arm64: + name: JavaScript on Linux ARM64 + runs-on: ["self-hosted", "asf-arm"] + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Cache Npm + uses: actions/cache@v4 + with: + path: ~/.npm + key: ${{ runner.os }}-node-${{ hashFiles('**/package-lock.json') }} + restore-keys: | + ${{ runner.os }}-node- + + - name: Setup Node + uses: actions/setup-node@v4 + with: + node-version: 18 + + - name: Install dependencies + run: | + sudo apt-get update -q + sudo apt-get install -q -y wget tar xz-utils + + - name: Build + run: | + set -x + ./build.sh clean test diff --git a/.github/workflows/test-lang-js.yml b/.github/workflows/test-lang-js.yml index 1f5bebce252..20f2cc09f4c 100644 --- a/.github/workflows/test-lang-js.yml +++ b/.github/workflows/test-lang-js.yml @@ -17,9 +17,9 @@ name: 'Test JavaScript' on: workflow_dispatch: push: - branches: [ master ] + branches: [ main ] pull_request: - branches: [ master ] + branches: [ main ] paths: - .github/workflows/test-lang-js.yml - lang/js/** @@ -28,6 +28,10 @@ defaults: run: working-directory: lang/js +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + jobs: test: name: Node ${{ matrix.node }} @@ -37,14 +41,15 @@ jobs: node: - 12 - 14 + - 16 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Setup Node - uses: actions/setup-node@v2 + uses: actions/setup-node@v4 with: node-version: ${{ matrix.node }} - - uses: actions/cache@v2 + - uses: actions/cache@v4 with: path: ~/.npm key: ${{ runner.os }}-node-${{ hashFiles('**/package-lock.json') }} @@ -65,39 +70,42 @@ jobs: node: - 12 - 14 + - 16 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Setup Node - uses: actions/setup-node@v2 + uses: actions/setup-node@v4 with: node-version: ${{ matrix.node }} - - uses: actions/cache@v2 + - uses: actions/cache@v4 with: path: ~/.npm key: ${{ runner.os }}-node-${{ hashFiles('**/package-lock.json') }} restore-keys: | ${{ runner.os }}-node- - - name: Cache Local Maven Repository - uses: actions/cache@v2 - with: - path: ~/.m2/repository - key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }} - restore-keys: | - ${{ runner.os }}-maven- + - name: Setup Python for Generating Input Data + uses: actions/setup-python@v5 - - name: Install Java Avro for Interop Test - working-directory: . - run: mvn -B install -DskipTests + - name: Apt Install Compression Libs Required by Python + run: | + sudo apt-get install -qqy --no-install-recommends libbz2-dev \ + liblzma-dev \ + libsnappy-dev \ + libzstd-dev + - name: Install Python Dependencies + run: | + python3 -m pip install --upgrade pip setuptools tox + python3 -m pip install python-snappy zstandard - name: Create Interop Data Directory working-directory: . run: mkdir -p build/interop/data - - name: Generate Interop Resources - working-directory: lang/java/avro - run: mvn -B -P interop-data-generate generate-resources + - name: Generate Interop Data using Python + working-directory: lang/py + run: ./build.sh interop-data-generate - name: Generate Interop Data run: ./build.sh interop-data-generate diff --git a/.github/workflows/test-lang-perl-ARM.yml b/.github/workflows/test-lang-perl-ARM.yml new file mode 100644 index 00000000000..a7d3576cbdf --- /dev/null +++ b/.github/workflows/test-lang-perl-ARM.yml @@ -0,0 +1,144 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: 'Test Perl on ARM' +on: + workflow_dispatch: + push: + branches: [ main ] + pull_request: + branches: [ main ] + paths: + - .github/workflows/test-lang-perl.yml + - lang/perl/** + +defaults: + run: + working-directory: lang/perl + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + test: + name: Perl ${{ matrix.perl }} Tests + runs-on: ubuntu-latest + strategy: + matrix: + perl: + - '5.32' + steps: + - uses: actions/checkout@v4 + + - uses: shogo82148/actions-setup-perl@v1 + with: + perl-version: ${{ matrix.perl }} + + - name: Install Dependencies + run: | + cpanm --mirror https://www.cpan.org/ install Compress::Zstd \ + Encode \ + Error::Simple \ + JSON::MaybeXS \ + Module::Install \ + Module::Install::ReadmeFromPod \ + Object::Tiny \ + Perl::Critic \ + Regexp::Common \ + Test::Exception \ + Test::More \ + Test::Pod \ + Try::Tiny + + - name: Lint + run: ./build.sh lint + + - name: Test + run: ./build.sh test + + interop: + name: Perl ${{ matrix.perl }} Interop + runs-on: ubuntu-latest + strategy: + matrix: + perl: + - '5.32' + steps: + - uses: actions/checkout@v4 + + - uses: shogo82148/actions-setup-perl@v1 + with: + perl-version: ${{ matrix.perl }} + + - name: Install Dependencies + run: | + sudo apt-get -qqy install --no-install-recommends libjansson-dev \ + libsnappy-dev + cpanm --mirror https://www.cpan.org/ install CPAN::Uploader \ + Compress::Zstd \ + Encode \ + Error::Simple \ + JSON::MaybeXS \ + Module::Install \ + Module::Install::ReadmeFromPod \ + Object::Tiny \ + Perl::Critic \ + Regexp::Common \ + Test::Exception \ + Test::More \ + Test::Pod \ + Try::Tiny + + - name: Cache Local Maven Repository + uses: actions/cache@v4 + with: + path: ~/.m2/repository + key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }} + restore-keys: | + ${{ runner.os }}-maven- + + - name: 'Setup Temurin JDK 8, 11, 17 & 21' + uses: actions/setup-java@99b8673ff64fbf99d8d325f52d9a5bdedb8483e9 # v4.2.1 + with: + distribution: 'temurin' + java-version: | + 8 + 11 + 17 + 21 + + - name: 'Setup Maven 3.9.6' + uses: stCarolas/setup-maven@d6af6abeda15e98926a57b5aa970a96bb37f97d1 # v5 + with: + maven-version: 3.9.6 + + - name: Install Java Avro for Interop Test + working-directory: . + run: mvn -B install -PskipQuality + + - name: Create Interop Data Directory + working-directory: . + run: mkdir -p build/interop/data + + - name: Generate Interop Resources + working-directory: lang/java/avro + run: mvn -B -P interop-data-generate generate-resources + + - name: Generate Interop Data + run: ./build.sh interop-data-generate + + - name: Run Interop Tests + run: ./build.sh interop-data-test diff --git a/.github/workflows/test-lang-perl.yml b/.github/workflows/test-lang-perl.yml index bed6c367023..44e4105a2a5 100644 --- a/.github/workflows/test-lang-perl.yml +++ b/.github/workflows/test-lang-perl.yml @@ -17,9 +17,9 @@ name: 'Test Perl' on: workflow_dispatch: push: - branches: [ master ] + branches: [ main ] pull_request: - branches: [ master ] + branches: [ main ] paths: - .github/workflows/test-lang-perl.yml - lang/perl/** @@ -28,6 +28,10 @@ defaults: run: working-directory: lang/perl +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + jobs: test: name: Perl ${{ matrix.perl }} Tests @@ -37,7 +41,7 @@ jobs: perl: - '5.32' steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - uses: shogo82148/actions-setup-perl@v1 with: @@ -45,25 +49,19 @@ jobs: - name: Install Dependencies run: | - sudo apt-get -qqy install --no-install-recommends libjansson-dev \ - libcompress-raw-zlib-perl \ - libcpan-uploader-perl \ - libencode-perl \ - libio-string-perl \ - libjson-xs-perl \ - libmodule-install-perl \ - libmodule-install-readmefrompod-perl \ - libobject-tiny-perl \ - libperl-critic-perl \ - libsnappy-dev \ - libtest-exception-perl \ - libtest-pod-perl cpanm --mirror https://www.cpan.org/ install Compress::Zstd \ + Encode \ Error::Simple \ - Module::Install::Repository \ + JSON::MaybeXS \ + Module::Install \ + Module::Install::ReadmeFromPod \ + Object::Tiny \ + Perl::Critic \ Regexp::Common \ - Try::Tiny \ - inc::Module::Install + Test::Exception \ + Test::More \ + Test::Pod \ + Try::Tiny - name: Lint run: ./build.sh lint @@ -79,7 +77,7 @@ jobs: perl: - '5.32' steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - uses: shogo82148/actions-setup-perl@v1 with: @@ -87,37 +85,48 @@ jobs: - name: Install Dependencies run: | - sudo apt-get -qqy install --no-install-recommends libcompress-raw-zlib-perl \ - libcpan-uploader-perl \ - libencode-perl \ - libio-string-perl \ - libjansson-dev \ - libjson-xs-perl \ - libmodule-install-perl \ - libmodule-install-readmefrompod-perl \ - libobject-tiny-perl \ - libsnappy-dev \ - libtest-exception-perl \ - libtest-pod-perl - cpanm --mirror https://www.cpan.org/ install Compress::Zstd \ + sudo apt-get -qqy install --no-install-recommends libjansson-dev \ + libsnappy-dev + cpanm --mirror https://www.cpan.org/ install CPAN::Uploader \ + Compress::Zstd \ + Encode \ Error::Simple \ - Module::Install::Repository \ + JSON::MaybeXS \ + Module::Install \ + Module::Install::ReadmeFromPod \ Object::Tiny \ Regexp::Common \ - Try::Tiny \ - inc::Module::Install + Test::Exception \ + Test::More \ + Test::Pod \ + Try::Tiny - name: Cache Local Maven Repository - uses: actions/cache@v2 + uses: actions/cache@v4 with: path: ~/.m2/repository key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }} restore-keys: | ${{ runner.os }}-maven- + - name: 'Setup Temurin JDK 8, 11, 17 & 21' + uses: actions/setup-java@99b8673ff64fbf99d8d325f52d9a5bdedb8483e9 # v4.2.1 + with: + distribution: 'temurin' + java-version: | + 8 + 11 + 17 + 21 + + - name: 'Setup Maven 3.9.6' + uses: stCarolas/setup-maven@d6af6abeda15e98926a57b5aa970a96bb37f97d1 # v5 + with: + maven-version: 3.9.6 + - name: Install Java Avro for Interop Test working-directory: . - run: mvn -B install -DskipTests + run: mvn -B install -PskipQuality - name: Create Interop Data Directory working-directory: . diff --git a/.github/workflows/test-lang-php-ARM.yml b/.github/workflows/test-lang-php-ARM.yml new file mode 100644 index 00000000000..85c2aa349bf --- /dev/null +++ b/.github/workflows/test-lang-php-ARM.yml @@ -0,0 +1,58 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: 'Test PHP on ARM' +on: + workflow_dispatch: + push: + branches: [ main ] + pull_request: + branches: [ main ] + paths: + - .github/workflows/test-lang-php.yml + - lang/php/** + +defaults: + run: + working-directory: lang/php + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + arm64: + name: PHP on Linux ARM64 + runs-on: ["self-hosted", "asf-arm"] + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Install dependencies + run: | + sudo apt-get update -q + sudo apt-get install -q -y wget php php-xml php-mbstring php-curl php-gmp php-bz2 unzip libtidy-dev libpq5 + php -r "copy('https://getcomposer.org/installer', 'composer-setup.php');" + php -r "if (hash_file('sha384', 'composer-setup.php') === file_get_contents('https://composer.github.io/installer.sig')) { echo 'Installer verified'; } else { echo 'Installer corrupt'; unlink('composer-setup.php'); } echo PHP_EOL;" + php composer-setup.php --version=2.2.5 + php -r "unlink('composer-setup.php');" + sudo mv composer.phar /usr/local/bin/composer + + - name: Build + run: | + set -x + composer --version + ./build.sh clean test diff --git a/.github/workflows/test-lang-php.yml b/.github/workflows/test-lang-php.yml index 1fc227f7f6c..b6329f04d06 100644 --- a/.github/workflows/test-lang-php.yml +++ b/.github/workflows/test-lang-php.yml @@ -17,9 +17,9 @@ name: 'Test PHP' on: workflow_dispatch: push: - branches: [ master ] + branches: [ main ] pull_request: - branches: [ master ] + branches: [ main ] paths: - .github/workflows/test-lang-php.yml - lang/php/** @@ -28,6 +28,10 @@ defaults: run: working-directory: lang/php +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + jobs: test: name: PHP ${{ matrix.php }} Test @@ -40,18 +44,19 @@ jobs: - '8.0' steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Setup PHP uses: shivammathur/setup-php@v2 with: php-version: ${{ matrix.php }} + tools: composer:2.2.5 - name: Get Composer Cache Directory id: composer-cache - run: echo "::set-output name=dir::$(composer config cache-files-dir)" + run: echo "dir=$(composer config cache-files-dir)" >> $GITHUB_OUTPUT - - uses: actions/cache@v2 + - uses: actions/cache@v4 with: path: ${{ steps.composer-cache.outputs.dir }} key: ${{ runner.os }}-composer-${{ hashFiles('**/composer.lock') }} @@ -75,24 +80,40 @@ jobs: - '8.0' steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Setup PHP uses: shivammathur/setup-php@v2 with: php-version: ${{ matrix.php }} + tools: composer:2.2.5 - name: Cache Local Maven Repository - uses: actions/cache@v2 + uses: actions/cache@v4 with: path: ~/.m2/repository key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }} restore-keys: | ${{ runner.os }}-maven- + - name: 'Setup Temurin JDK 8, 11, 17 & 21' + uses: actions/setup-java@99b8673ff64fbf99d8d325f52d9a5bdedb8483e9 # v4.2.1 + with: + distribution: 'temurin' + java-version: | + 8 + 11 + 17 + 21 + + - name: 'Setup Maven 3.9.6' + uses: stCarolas/setup-maven@d6af6abeda15e98926a57b5aa970a96bb37f97d1 # v5 + with: + maven-version: 3.9.6 + - name: Install Java Avro for Interop Test working-directory: . - run: mvn -B install -DskipTests + run: mvn -B install -PskipQuality - name: Create Interop Data Directory working-directory: . @@ -102,7 +123,7 @@ jobs: working-directory: lang/java/avro run: mvn -B -P interop-data-generate generate-resources - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 with: repository: kjdev/php-ext-zstd path: lang/php/php-ext-zstd @@ -118,7 +139,7 @@ jobs: echo "extension=zstd.so" | sudo tee -a /etc/php/${{ matrix.php }}/cli/conf.d/10-zstd.ini php -m - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 with: repository: kjdev/php-ext-snappy path: lang/php/php-ext-snappy diff --git a/.github/workflows/test-lang-py-ARM.yml b/.github/workflows/test-lang-py-ARM.yml new file mode 100644 index 00000000000..874c7c35f9b --- /dev/null +++ b/.github/workflows/test-lang-py-ARM.yml @@ -0,0 +1,53 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: 'Test Python on ARM' +on: + workflow_dispatch: + push: + branches: [ main ] + pull_request: + branches: [ main ] + paths: + - .github/workflows/test-lang-py.yml + - lang/py/** + +defaults: + run: + working-directory: lang/py + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + arm64: + name: Python on Linux ARM64 + runs-on: ["self-hosted", "asf-arm"] + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Install dependencies + run: | + sudo apt-get update -q + sudo apt-get install -q -y python3 python3-dev python3-pip git libbz2-dev libjansson-dev liblzma-dev libsnappy-dev libzstd-dev + python3 -m pip install --upgrade pip setuptools tox + + - name: Build + run: | + set -x + ./build.sh clean test diff --git a/.github/workflows/test-lang-py.yml b/.github/workflows/test-lang-py.yml index 19522c01b7e..83bd1f83f15 100644 --- a/.github/workflows/test-lang-py.yml +++ b/.github/workflows/test-lang-py.yml @@ -17,9 +17,9 @@ name: 'Test Python' on: workflow_dispatch: push: - branches: [ master ] + branches: [ main ] pull_request: - branches: [ master ] + branches: [ main ] paths: - .github/workflows/test-lang-py.yml - lang/py/** @@ -28,26 +28,32 @@ defaults: run: working-directory: lang/py +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + jobs: test: name: Python ${{ matrix.python }} Tests - runs-on: ubuntu-latest + runs-on: ubuntu-20.04 strategy: fail-fast: false matrix: python: + - '3.12' + - '3.11' + - '3.10' - '3.9' - '3.8' - '3.7' - - '3.6' - - 'pypy-3.7' - - 'pypy-3.6' + - 'pypy-3.9' + - 'pypy-3.10' steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Setup Python - uses: actions/setup-python@v2 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python }} @@ -61,14 +67,14 @@ jobs: - name: Install Dependencies run: | - python3 -m pip install --upgrade pip setuptools tox-wheel + python3 -m pip install --upgrade pip setuptools tox - name: Lint - if: ${{ matrix.python == '3.9' }} + if: ${{ matrix.python == '3.10' }} run: python3 -m tox -e lint - name: Typechecks - if: ${{ matrix.python == '3.9' }} + if: ${{ matrix.python == '3.10' }} run: python3 -m tox -e typechecks - name: Test @@ -76,23 +82,24 @@ jobs: interop: name: Python ${{ matrix.python }} Interop - runs-on: ubuntu-latest + runs-on: ubuntu-20.04 strategy: fail-fast: false matrix: python: + - '3.11' + - '3.10' - '3.9' - '3.8' - '3.7' - - '3.6' - - 'pypy-3.7' - - 'pypy-3.6' + - 'pypy-3.9' + - 'pypy-3.10' steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Setup Python - uses: actions/setup-python@v2 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python }} @@ -106,20 +113,35 @@ jobs: - name: Install Dependencies run: | - python3 -m pip install --upgrade pip setuptools tox-wheel + python3 -m pip install --upgrade pip setuptools tox python3 -m pip install python-snappy zstandard - name: Cache Local Maven Repository - uses: actions/cache@v2 + uses: actions/cache@v4 with: path: ~/.m2/repository key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }} restore-keys: | ${{ runner.os }}-maven- + - name: 'Setup Temurin JDK 8, 11, 17 & 21' + uses: actions/setup-java@99b8673ff64fbf99d8d325f52d9a5bdedb8483e9 # v4.2.1 + with: + distribution: 'temurin' + java-version: | + 8 + 11 + 17 + 21 + + - name: 'Setup Maven 3.9.6' + uses: stCarolas/setup-maven@d6af6abeda15e98926a57b5aa970a96bb37f97d1 # v5 + with: + maven-version: 3.9.6 + - name: Install Java Avro for Interop Test working-directory: . - run: mvn -B install -DskipTests + run: mvn -B install -PskipQuality - name: Create Interop Data Directory working-directory: . diff --git a/.github/workflows/test-lang-ruby-ARM.yml b/.github/workflows/test-lang-ruby-ARM.yml new file mode 100644 index 00000000000..c1ce73878bf --- /dev/null +++ b/.github/workflows/test-lang-ruby-ARM.yml @@ -0,0 +1,60 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: 'Test Ruby on ARM' +on: + workflow_dispatch: + push: + branches: [ main ] + pull_request: + branches: [ main ] + paths: + - .github/workflows/test-lang-ruby.yml + - lang/ruby/** + +defaults: + run: + working-directory: lang/ruby + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + arm64: + name: Ruby on Linux ARM64 + runs-on: ["self-hosted", "asf-arm"] + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Cache gems + uses: actions/cache@v4 + with: + path: .gem + key: ${{ runner.os }}-gems-${{ hashFiles('**/Gemfile.lock') }} + restore-keys: | + ${{ runner.os }}-gems- + + - name: Install dependencies + run: | + sudo apt-get update -q + sudo apt-get install -q -y ruby-dev bundler libsnappy-dev libyaml-dev + + - name: Build + run: | + set -x + ./build.sh clean test diff --git a/.github/workflows/test-lang-ruby.yml b/.github/workflows/test-lang-ruby.yml index 8f4f5076b89..ac85605f8e1 100644 --- a/.github/workflows/test-lang-ruby.yml +++ b/.github/workflows/test-lang-ruby.yml @@ -17,9 +17,9 @@ name: 'Test Ruby' on: workflow_dispatch: push: - branches: [ master ] + branches: [ main ] pull_request: - branches: [ master ] + branches: [ main ] paths: - .github/workflows/test-lang-ruby.yml - lang/ruby/** @@ -28,6 +28,10 @@ defaults: run: working-directory: lang/ruby +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + jobs: test: name: Ruby ${{ matrix.ruby }} Tests @@ -35,20 +39,22 @@ jobs: strategy: matrix: ruby: - - '2.6' - '2.7' - '3.0' + - '3.1' + - '3.2' + - '3.3' steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - uses: ruby/setup-ruby@v1 with: ruby-version: ${{ matrix.ruby }} - name: Install Dependencies - run: sudo apt-get install -qqy bundler libsnappy-dev + run: sudo apt-get install -qqy libsnappy-dev - - uses: actions/cache@v2 + - uses: actions/cache@v4 with: path: .gem key: ${{ runner.os }}-gems-${{ hashFiles('**/Gemfile.lock') }} @@ -74,20 +80,22 @@ jobs: strategy: matrix: ruby: - - '2.6' - '2.7' - '3.0' + - '3.1' + - '3.2' + - '3.3' steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - uses: ruby/setup-ruby@v1 with: ruby-version: ${{ matrix.ruby }} - name: Install Dependencies - run: sudo apt-get install -qqy bundler libsnappy-dev + run: sudo apt-get install -qqy libsnappy-dev - - uses: actions/cache@v2 + - uses: actions/cache@v4 with: path: .gem key: ${{ runner.os }}-gems-${{ hashFiles('**/Gemfile.lock') }} @@ -98,16 +106,31 @@ jobs: run: bundle config path .gem - name: Cache Local Maven Repository - uses: actions/cache@v2 + uses: actions/cache@v4 with: path: ~/.m2/repository key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }} restore-keys: | ${{ runner.os }}-maven- + - name: 'Setup Temurin JDK 8, 11, 17 & 21' + uses: actions/setup-java@99b8673ff64fbf99d8d325f52d9a5bdedb8483e9 # v4.2.1 + with: + distribution: 'temurin' + java-version: | + 8 + 11 + 17 + 21 + + - name: 'Setup Maven 3.9.6' + uses: stCarolas/setup-maven@d6af6abeda15e98926a57b5aa970a96bb37f97d1 # v5 + with: + maven-version: 3.9.6 + - name: Install Java Avro for Interop Test working-directory: . - run: mvn -B install -DskipTests + run: mvn -B install -PskipQuality - name: Create Interop Data Directory working-directory: . diff --git a/.github/workflows/test-lang-rust-audit.yml b/.github/workflows/test-lang-rust-audit.yml index efb3f1eaff7..d4bbd4b6931 100644 --- a/.github/workflows/test-lang-rust-audit.yml +++ b/.github/workflows/test-lang-rust-audit.yml @@ -19,31 +19,55 @@ name: Rust Security Audit on: workflow_dispatch: push: - branches: [ master ] + branches: [ main ] + paths: + - .github/workflows/test-lang-rust-audit.yml + - lang/rust/**/Cargo.toml + - lang/rust/Cargo.lock + - lang/rust/deny.toml pull_request: - branches: [ master ] + branches: [ main ] paths: - .github/workflows/test-lang-rust-audit.yml - - lang/rust/Cargo.toml + - lang/rust/**/Cargo.toml - lang/rust/Cargo.lock + - lang/rust/deny.toml + +permissions: + contents: read + +env: + RUSTFLAGS: -Dwarnings defaults: run: working-directory: lang/rust +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + jobs: audit: runs-on: ubuntu-latest steps: - name: Checkout - uses: actions/checkout@v2 - # Currently does not work. See https://github.com/actions-rs/audit-check/issues/194 - #- name: Rust Audit - # uses: actions-rs/audit-check@v1 - # with: - # token: ${{ secrets.GITHUB_TOKEN }} - # Install it manually + uses: actions/checkout@v4 + - name: Dependency Review + if: github.event_name == 'pull_request' + uses: actions/dependency-review-action@v4 - name: Install Cargo Audit run: cargo install cargo-audit - name: Audit run: cargo audit + + deny: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Install Cargo Deny + run: cargo install cargo-deny + - name: Check + run: cargo deny check + \ No newline at end of file diff --git a/.github/workflows/test-lang-rust-ci-ARM.yml b/.github/workflows/test-lang-rust-ci-ARM.yml new file mode 100644 index 00000000000..3ac8e0bfdd8 --- /dev/null +++ b/.github/workflows/test-lang-rust-ci-ARM.yml @@ -0,0 +1,78 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: 'Rust Continuous Integration on ARM' +on: + workflow_dispatch: + push: + branches: [ main ] + pull_request: + branches: [ main ] + paths: + - .github/workflows/test-lang-rust-ci.yml + - lang/rust/** + +permissions: + contents: read + +env: + RUSTFLAGS: -Dwarnings + +defaults: + run: + working-directory: lang/rust + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + arm64: + name: Rust on Linux ARM64 + runs-on: ["self-hosted", "asf-arm"] + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Cache Cargo + uses: actions/cache@v4 + with: + # these represent dependencies downloaded by cargo + # and thus do not depend on the OS, arch nor rust version. + path: ~/.cargo + key: ${{ runner.os }}-target-arm64-${{ hashFiles('**/Cargo.lock') }} + + - name: Cache Rust dependencies + uses: actions/cache@v4 + with: + # these represent compiled steps of both dependencies and avro + # and thus are specific for a particular OS, arch and rust version. + path: lang/rust/target + key: ${{ runner.os }}-target-cache1-stable- + + - name: Rust Toolchain + uses: dtolnay/rust-toolchain@nightly + with: + toolchain: stable + components: rustfmt + targets: x86_64-unknown-linux-gnu + + - name: Build + run: | + set -x + ./build.sh test diff --git a/.github/workflows/test-lang-rust-ci.yml b/.github/workflows/test-lang-rust-ci.yml index 977ea110731..025dca1a836 100644 --- a/.github/workflows/test-lang-rust-ci.yml +++ b/.github/workflows/test-lang-rust-ci.yml @@ -19,61 +19,236 @@ name: Rust Continuous Integration on: workflow_dispatch: push: - branches: [ master ] + branches: [ main ] pull_request: - branches: [ master ] + branches: [ main ] paths: - .github/workflows/test-lang-rust-ci.yml - lang/rust/** +permissions: + contents: read + +env: + RUSTFLAGS: -Dwarnings + defaults: run: working-directory: lang/rust +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + jobs: ci: runs-on: ubuntu-latest strategy: matrix: rust: - - stable - - beta - - nightly - - 1.48.0 # MSRV + - 'stable' + - 'beta' + - 'nightly' + - '1.73.0' # MSRV + target: + - x86_64-unknown-linux-gnu + - wasm32-unknown-unknown steps: - name: Checkout - uses: actions/checkout@v2 + uses: actions/checkout@v4 + + - name: Cache Cargo + uses: actions/cache@v4 + with: + # these represent dependencies downloaded by cargo + # and thus do not depend on the OS, arch nor rust version. + path: ~/.cargo + key: ${{ runner.os }}-target-cache1-${{ hashFiles('**/Cargo.lock') }} + - name: Cache Rust dependencies + uses: actions/cache@v4 + with: + # these represent compiled steps of both dependencies and avro + # and thus are specific for a particular OS, arch and rust version. + path: lang/rust/target + key: ${{ runner.os }}-target-cache1-${{ matrix.rust }}-${{ hashFiles('**/Cargo.lock') }} - name: Rust Toolchain - uses: actions-rs/toolchain@v1 + uses: dtolnay/rust-toolchain@nightly with: - profile: minimal toolchain: ${{ matrix.rust }} - override: true components: rustfmt + targets: ${{ matrix.target }} - - name: Rust Format - uses: actions-rs/cargo@v1 + - name: Cache cargo-rdme + if: matrix.rust == 'stable' && matrix.target == 'x86_64-unknown-linux-gnu' + uses: actions/cache@v4 with: - command: fmt - args: --manifest-path lang/rust/Cargo.toml --all -- --check + path: ~/.cargo-${{ matrix.rust }}/cargo-rdme + key: cargo-rdme- + + # Check if the doc cumment in avro/src/lib.rs and avro/README.md are in sync. + - name: Run cargo-rdme + # The result is environment independent so one test pattern is enough. + if: matrix.rust == 'stable' && matrix.target == 'x86_64-unknown-linux-gnu' + run: | + cargo install --root ~/.cargo-${{ matrix.rust }}/cargo-rdme --locked cargo-rdme + export PATH=$PATH:~/.cargo-${{ matrix.rust }}/cargo-rdme/bin + cargo rdme --check + + - name: Rust Format + if: matrix.target != 'wasm32-unknown-unknown' + run: cargo fmt --all -- --check - name: Rust Build - uses: actions-rs/cargo@v1 - with: - command: build - args: --manifest-path lang/rust/Cargo.toml --all-features --all-targets + run: cargo build --all-features --all-targets - name: Rust Test - uses: actions-rs/cargo@v1 - with: - command: test - args: --manifest-path lang/rust/Cargo.toml --all-features --all-targets + if: matrix.target != 'wasm32-unknown-unknown' + run: cargo test --all-features --target ${{ matrix.target }} + + - name: Rust Test AVRO-3549 + if: matrix.target != 'wasm32-unknown-unknown' + run: cargo test --target ${{ matrix.target }} test_avro_3549_read_not_enabled_codec # because of https://github.com/rust-lang/cargo/issues/6669 - name: Rust Test docs - uses: actions-rs/cargo@v1 + if: matrix.target != 'wasm32-unknown-unknown' + run: cargo test --doc + + interop: + runs-on: ubuntu-latest + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Rust Toolchain + uses: dtolnay/rust-toolchain@nightly + with: + toolchain: stable + + - name: Cache Cargo + uses: actions/cache@v4 + with: + # these represent dependencies downloaded by cargo + # and thus do not depend on the OS, arch nor rust version. + path: ~/.cargo + key: ${{ runner.os }}-target-cache1-${{ hashFiles('**/Cargo.lock') }} + - name: Cache Rust dependencies + uses: actions/cache@v4 + with: + # these represent compiled steps of both dependencies and avro + # and thus are specific for a particular OS, arch and rust version. + path: lang/rust/target + key: ${{ runner.os }}-target-cache1-stable-${{ hashFiles('**/Cargo.lock') }} + + - name: Cache Local Maven Repository + uses: actions/cache@v4 + with: + path: ~/.m2/repository + key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }} + restore-keys: | + ${{ runner.os }}-maven- + + - name: 'Setup Temurin JDK 8, 11, 17 & 21' + uses: actions/setup-java@99b8673ff64fbf99d8d325f52d9a5bdedb8483e9 # v4.2.1 with: - command: test - args: --manifest-path lang/rust/Cargo.toml --doc + distribution: 'temurin' + java-version: | + 8 + 11 + 17 + 21 + + - name: 'Setup Maven 3.9.6' + uses: stCarolas/setup-maven@d6af6abeda15e98926a57b5aa970a96bb37f97d1 # v5 + with: + maven-version: 3.9.6 + + - name: Install Java Avro for Interop Test + working-directory: . + run: mvn -B install -PskipQuality + + - name: Create Interop Data Directory + working-directory: . + run: mkdir -p build/interop/data + + - name: Generate Interop Resources + working-directory: lang/java/avro + run: mvn -B -P interop-data-generate generate-resources + + - name: Generate interop data + run: ./build.sh interop-data-generate + + - name: Rust reads interop files created by Java and Rust + run: ./build.sh interop-data-test + + - uses: shogo82148/actions-setup-perl@v1 + with: + perl-version: 5.32 + + - name: Install Dependencies + run: | + sudo apt-get -qqy install --no-install-recommends libcompress-raw-zlib-perl \ + libcpan-uploader-perl \ + libencode-perl \ + libio-string-perl \ + libjansson-dev \ + libjson-xs-perl \ + libmodule-install-perl \ + libmodule-install-readmefrompod-perl \ + libobject-tiny-perl \ + libsnappy-dev \ + libtest-exception-perl \ + libtest-pod-perl + cpanm --mirror https://www.cpan.org/ install Compress::Zstd \ + Error::Simple \ + Module::Install::Repository \ + Object::Tiny \ + Regexp::Common \ + Try::Tiny \ + inc::Module::Install + + + - name: Perl reads interop files created by Java and Rust + working-directory: lang/perl + run: ./build.sh interop-data-test + + web-assembly: + runs-on: ubuntu-latest + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Rust Toolchain + uses: dtolnay/rust-toolchain@nightly + with: + toolchain: stable + targets: wasm32-unknown-unknown + + - name: Cache Cargo + uses: actions/cache@v4 + with: + # these represent dependencies downloaded by cargo + # and thus do not depend on the OS, arch nor rust version. + path: ~/.cargo + key: ${{ runner.os }}-target-cache1-${{ hashFiles('**/Cargo.lock') }} + + - name: Cache Rust dependencies + uses: actions/cache@v4 + with: + # these represent compiled steps of both dependencies and avro + # and thus are specific for a particular OS, arch and rust version. + path: lang/rust/target + key: ${{ runner.os }}-target-cache1-stable-${{ hashFiles('**/Cargo.lock') }} + + - name: Install wasm-pack + run: curl https://rustwasm.github.io/wasm-pack/installer/init.sh -sSf | sh + + - name: Build the Web Assembly demo app + run: wasm-pack build wasm-demo + + - name: Test the Web Assembly demo app + run: RUST_BACKTRACE=1 wasm-pack test --headless --firefox wasm-demo diff --git a/.github/workflows/test-lang-rust-clippy.yml b/.github/workflows/test-lang-rust-clippy.yml index cedc5f5f042..e7e0ba85448 100644 --- a/.github/workflows/test-lang-rust-clippy.yml +++ b/.github/workflows/test-lang-rust-clippy.yml @@ -19,28 +19,39 @@ name: Rust Clippy Check on: workflow_dispatch: push: - branches: [ master ] + branches: [ main ] pull_request: - branches: [ master ] + branches: [ main ] paths: - .github/workflows/test-lang-rust-clippy.yml - lang/rust/** +permissions: + contents: read + +env: + RUSTFLAGS: -Dwarnings + defaults: run: working-directory: lang/rust +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + jobs: clippy_check: runs-on: ubuntu-latest + strategy: + matrix: + rust: + - 'stable' + - '1.73.0' # MSRV steps: - - uses: actions/checkout@v2 - - uses: actions-rs/toolchain@v1 + - uses: actions/checkout@v4 + - uses: dtolnay/rust-toolchain@nightly with: - toolchain: stable + toolchain: ${{ matrix.rust }} components: clippy - override: true - - uses: actions-rs/clippy-check@v1 - with: - token: ${{ secrets.GITHUB_TOKEN }} - args: --manifest-path lang/rust/Cargo.toml --all-features --all-targets -- -Dclippy::all -Dunused_imports + - run: cargo clippy --all-features --all-targets -- -Dclippy::all -Dunused_imports diff --git a/.gitignore b/.gitignore index fd46be4f46f..437a035f0b3 100644 --- a/.gitignore +++ b/.gitignore @@ -28,3 +28,5 @@ test-output vendor composer.lock .phpunit.result.cache +.mvn/jvm.config # Maven JVM settings +**/*.run.xml # Intellij IDEA Run configurations diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 00000000000..d96e7ce1437 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "doc/themes/docsy"] + path = doc/themes/docsy + url = https://github.com/google/docsy diff --git a/.mvn/extensions.xml b/.mvn/extensions.xml new file mode 100644 index 00000000000..e2e84018d96 --- /dev/null +++ b/.mvn/extensions.xml @@ -0,0 +1,25 @@ + + + + + org.apache.maven.extensions + maven-build-cache-extension + 1.0.1 + + diff --git a/.travis/before_install.sh b/.travis/before_install.sh deleted file mode 100755 index db76c129165..00000000000 --- a/.travis/before_install.sh +++ /dev/null @@ -1,44 +0,0 @@ -#!/bin/bash - -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -set -e - -case "$TRAVIS_OS_NAME" in -"linux") - sudo apt-get -q update - sudo apt-get -q install --no-install-recommends -y curl git gnupg-agent locales pinentry-curses pkg-config rsync software-properties-common - sudo apt-get -q clean - sudo rm -rf /var/lib/apt/lists/* - - # Only Yetus 0.9.0+ supports `ADD` and `COPY` commands in Dockerfile - curl -L https://www-us.apache.org/dist/yetus/0.10.0/apache-yetus-0.10.0-bin.tar.gz | tar xvz -C /tmp/ - # A dirty workaround to disable the Yetus robot for TravisCI, - # since it'll cancel the changes that .travis/script.sh will do, - # even if the `--dirty-workspace` option is specified. - rm /tmp/apache-yetus-0.10.0/lib/precommit/robots.d/travisci.sh - ;; -"windows") - # Install all (latest) SDKs which are used by multi framework projects - choco install dotnetcore-2.1-sdk # .NET Core 2.1 - choco install dotnetcore-sdk # .NET Core 3.1 - choco install dotnet-sdk # .NET 5.0 - ;; -*) - echo "Invalid PLATFORM" - exit 1 - ;; -esac diff --git a/BUILD.md b/BUILD.md index c09994e67fb..d3059a2e3ba 100644 --- a/BUILD.md +++ b/BUILD.md @@ -4,21 +4,21 @@ The following packages must be installed before Avro can be built: - - Java: JDK 1.8, Maven 3 or better, protobuf-compile - - PHP: php7, phpunit, php7-gmp - - Python 3: 3.5 or greater + - Java: 11, 17 and 21 with the appropriate toolchain config, Maven 3.9.6 or better, protobuf-compile + - PHP: php8, phpunit, php8-gmp + - Python 3: 3.7 or greater, tox (tox will install other dependencies as needed) - C: gcc, cmake, asciidoc, source-highlight, Jansson, pkg-config - C++: cmake 3.7.2 or greater, g++, flex, bison, libboost-dev - C#: .NET Core 2.2 SDK - JavaScript: Node 12.x+, nodejs, npm - - Ruby: Ruby 2.6 or greater, ruby-dev, gem, bundler, snappy + - Ruby: Ruby 2.7 or greater, ruby-dev, gem, bundler, snappy - Perl: Perl 5.24.1 or greater, gmake, Module::Install, Module::Install::ReadmeFromPod, Module::Install::Repository, - Math::BigInt, JSON::XS, Try::Tiny, Regexp::Common, Encode, - IO::String, Object::Tiny, Compress::ZLib, Error::Simple, - Test::More, Test::Exception, Test::Pod + Math::BigInt, JSON::MaybeXS, Try::Tiny, Regexp::Common, Encode, + Object::Tiny, Compress::ZLib, Error::Simple, Test::More, + Test::Exception, Test::Pod + - Rust: rustc and Cargo 1.65.0 or greater - Apache Ant 1.7 - - Apache Forrest 0.9 (for documentation) - md5sum, sha1sum, used by top-level dist target ## Using docker @@ -59,6 +59,20 @@ DOCKER_IMAGE_NAME=avro-build:1.10.1-rc1 ./build.sh docker DOCKER_RUN_ENTRYPOINT="mvn --version" ./build.sh docker ``` +## Developing inside a Container (Visual Studio Code Devcontainer) + +Requirement: + - [Visual Studio Code](https://code.visualstudio.com/) + - [Remote Development extension pack](https://aka.ms/vscode-remote/download/extension) + - Docker + - Windows: (Docker Desktop)[https://www.docker.com/products/docker-desktop] + - macOS: (Docker Desktop)[https://www.docker.com/products/docker-desktop] + - Linux: (Docker CE/EE)[https://docs.docker.com/install/#supported-platforms] and (Docker Compose)[https://docs.docker.com/compose/install] + +Useful links: + - (Developing inside a Container)[https://code.visualstudio.com/docs/remote/containers] + - (Going further with Dev Containers)[https://microsoft.github.io/code-with-engineering-playbook/developer-experience/going-further/] + ## Building Once the requirements are installed (or from the Docker container), diff --git a/DIST_README.txt b/DIST_README.txt index 003751c75b5..9c68790ee86 100644 --- a/DIST_README.txt +++ b/DIST_README.txt @@ -9,6 +9,6 @@ This distribution contains the following files: - avro-doc-x.y.z.tar.gz contains Avro's pre-built documentation. - - the c/, cpp/, csharp/, java/, js/, perl/, php/, py/, and ruby/ + - the c/, cpp/, csharp/, java/, js/, perl/, php/, py/, rust/ and ruby/ subdirectories contain pre-built, language-specific binaries, bundles, etc. as conveniences. diff --git a/LICENSE.txt b/LICENSE.txt index 7e159a69bc2..42761f6f813 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -295,16 +295,6 @@ Copyright (C) 2006 Toni Ronkko | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | OTHER DEALINGS IN THE SOFTWARE. ----------------------------------------------------------------------- -License for ivy-2.2.0.jar used in the python implementation: - -Apache License version 2.0 (see above) - ----------------------------------------------------------------------- -License for pyAntTasks-1.3.jar used in the python implementation: - -Apache License version 2.0 (see above) - ---------------------------------------------------------------------- License for NUnit binary included with the C# implementation: File: nunit.framework.dll diff --git a/NOTICE.txt b/NOTICE.txt index 737629b09ba..41fa8b76b62 100644 --- a/NOTICE.txt +++ b/NOTICE.txt @@ -37,25 +37,6 @@ The Odiago NOTICE at the time of the contribution: | This product includes software developed by Odiago, Inc. | (https://www.wibidata.com). -Apache Ivy includes the following in its NOTICE file: - -| Apache Ivy -| Copyright 2007-2010 The Apache Software Foundation -| -| This product includes software developed by -| The Apache Software Foundation (https://www.apache.org/). -| -| Portions of Ivy were originally developed by -| Jayasoft SARL (http://www.jayasoft.fr/) -| and are licensed to the Apache Software Foundation under the -| "Software Grant License Agreement" -| -| SSH and SFTP support is provided by the JCraft JSch package, -| which is open source software, available under -| the terms of a BSD style license. -| The original software and related information is available -| at http://www.jcraft.com/jsch/. - Apache Log4Net includes the following in its NOTICE file: | Apache log4net diff --git a/README.md b/README.md index 472656a3eb4..8167ed4ca75 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,7 @@ -# Apache Avroâ„ĸ +Apache Avroâ„ĸAvro Logo +============ +### Current CI status (Github servers) [![test c][test c img]][test c] [![test c#][test c# img]][test c#] [![test c++][test c++ img]][test c++] @@ -14,6 +16,19 @@ [![rust clippy check][rust clippy check img]][rust clippy check] [![rust security audit][rust security audit img]][rust security audit] +### Current CI status (ARM based servers) +[![test c ARM][test c ARM img]][test c ARM] +[![test c# ARM][test c# ARM img]][test c# ARM] +[![test c++ ARM][test c++ ARM img]][test c++ ARM] +[![test java ARM][test java ARM img]][test java ARM] +[![test javascript ARM][test javascript ARM img]][test javascript ARM] +[![test perl ARM][test perl ARM img]][test perl ARM] +[![test ruby ARM][test ruby ARM img]][test ruby ARM] +[![test python ARM][test python ARM img]][test python ARM] +[![test php ARM][test php ARM img]][test php ARM] +[![rust continuous integration ARM][rust continuous integration ARM img]][rust continuous integration ARM] + +### Current CodeQL status [![codeql c#][codeql c# img]][codeql c#] [![codeql java][codeql java img]][codeql java] [![codeql javascript][codeql javascript img]][codeql javascript] @@ -43,7 +58,18 @@ To contribute to Avro, please read: [test python]: https://github.com/apache/avro/actions/workflows/test-lang-py.yml [test php]: https://github.com/apache/avro/actions/workflows/test-lang-php.yml +[test c ARM]: https://github.com/apache/avro/actions/workflows/test-lang-c-ARM.yml +[test c# ARM]: https://github.com/apache/avro/actions/workflows/test-lang-csharp-ARM.yml +[test c++ ARM]: https://github.com/apache/avro/actions/workflows/test-lang-c++-ARM.yml +[test java ARM]: https://github.com/apache/avro/actions/workflows/test-lang-java-ARM.yml +[test javascript ARM]: https://github.com/apache/avro/actions/workflows/test-lang-js-ARM.yml +[test perl ARM]: https://github.com/apache/avro/actions/workflows/test-lang-perl-ARM.yml +[test ruby ARM]: https://github.com/apache/avro/actions/workflows/test-lang-ruby-ARM.yml +[test python ARM]: https://github.com/apache/avro/actions/workflows/test-lang-py-ARM.yml +[test php ARM]: https://github.com/apache/avro/actions/workflows/test-lang-php-ARM.yml + [rust continuous integration]: https://github.com/apache/avro/actions/workflows/test-lang-rust-ci.yml +[rust continuous integration ARM]: https://github.com/apache/avro/actions/workflows/test-lang-rust-ci-ARM.yml [rust clippy check]: https://github.com/apache/avro/actions/workflows/test-lang-rust-clippy.yml [rust security audit]: https://github.com/apache/avro/actions/workflows/test-lang-rust-audit.yml @@ -62,11 +88,34 @@ To contribute to Avro, please read: [test python img]: https://github.com/apache/avro/actions/workflows/test-lang-py.yml/badge.svg [test php img]: https://github.com/apache/avro/actions/workflows/test-lang-php.yml/badge.svg +[test c ARM img]: https://github.com/apache/avro/actions/workflows/test-lang-c-ARM.yml/badge.svg +[test c# ARM img]: https://github.com/apache/avro/actions/workflows/test-lang-csharp-ARM.yml/badge.svg +[test c++ ARM img]: https://github.com/apache/avro/actions/workflows/test-lang-c++-ARM.yml/badge.svg +[test java ARM img]: https://github.com/apache/avro/actions/workflows/test-lang-java-ARM.yml/badge.svg +[test javascript ARM img]: https://github.com/apache/avro/actions/workflows/test-lang-js-ARM.yml/badge.svg +[test perl ARM img]: https://github.com/apache/avro/actions/workflows/test-lang-perl-ARM.yml/badge.svg +[test ruby ARM img]: https://github.com/apache/avro/actions/workflows/test-lang-ruby-ARM.yml/badge.svg +[test python ARM img]: https://github.com/apache/avro/actions/workflows/test-lang-py-ARM.yml/badge.svg +[test php ARM img]: https://github.com/apache/avro/actions/workflows/test-lang-php-ARM.yml/badge.svg + [rust continuous integration img]: https://github.com/apache/avro/actions/workflows/test-lang-rust-ci.yml/badge.svg [rust clippy check img]: https://github.com/apache/avro/actions/workflows/test-lang-rust-clippy.yml/badge.svg [rust security audit img]: https://github.com/apache/avro/actions/workflows/test-lang-rust-audit.yml/badge.svg +[rust continuous integration ARM img]: https://github.com/apache/avro/actions/workflows/test-lang-rust-ci-ARM.yml/badge.svg + [codeql c# img]: https://github.com/apache/avro/actions/workflows/codeql-csharp-analysis.yml/badge.svg [codeql java img]: https://github.com/apache/avro/actions/workflows/codeql-java-analysis.yml/badge.svg [codeql javascript img]: https://github.com/apache/avro/actions/workflows/codeql-js-analysis.yml/badge.svg [codeql python img]: https://github.com/apache/avro/actions/workflows/codeql-py-analysis.yml/badge.svg + +You can use devcontainers to develop Avro: + +* [![Open in Visual Studio Code](https://img.shields.io/static/v1?label=&message=Open%20in%20Visual%20Studio%20Code&color=blue&logo=visualstudiocode&style=flat)](https://vscode.dev/redirect?url=vscode://ms-vscode-remote.remote-containers/cloneInVolume?url=https://github.com/apache/avro) +* [![Open in Github Codespaces](https://img.shields.io/static/v1?label=&message=Open%20in%20Github%20Codespaces&color=2f362d&logo=github)](https://codespaces.new/apache/avro?quickstart=1&hide_repo_select=true) + + +### Trademark & logo's +ApacheÂŽ, Apache Avro and the Apache Avro airplane logo are trademarks of The Apache Software Foundation. + +The Apache Avro airplane logo on this page has been designed by [Emma Kellam](https://github.com/emmak3l) for use by this project. diff --git a/build.sh b/build.sh index 231a20e7e2f..2598148e024 100755 --- a/build.sh +++ b/build.sh @@ -39,6 +39,9 @@ change_java_version() { # =========================================================================== +# This might not have been sourced if the entrypoint is not bash +[[ -f "$HOME/.cargo/env" ]] && . "$HOME/.cargo/env" + set -xe cd "${0%/*}" @@ -53,6 +56,9 @@ DOCKER_BUILD_XTRA_ARGS=${DOCKER_BUILD_XTRA_ARGS-} # Override the docker image name used. DOCKER_IMAGE_NAME=${DOCKER_IMAGE_NAME-} +# When building a docker container, these are the files that will sent and available. +DOCKER_EXTRA_CONTEXT="lang/ruby/Gemfile lang/ruby/avro.gemspec lang/ruby/Manifest share/VERSION.txt" + usage() { echo "Usage: $0 {lint|test|dist|sign|clean|veryclean|docker [--args \"docker-args\"]|rat|githooks|docker-test}" exit 1 @@ -174,7 +180,15 @@ do cp "lang/perl/Avro-$VERSION.tar.gz" dist/perl/ # build docs - (cd doc; ant) + cp -r doc/ build/staging-web/ + find build/staging-web/ -type f -print0 | xargs -0 sed -r -i "s#\+\+version\+\+#${VERSION,,}#g" + mkdir -p build/staging-web/public/docs/ + mv build/staging-web/doc/content/en/docs/++version++ build/staging-web/public/docs/"${VERSION,,}" + read -n 1 -s -r -p "Build build/staging-web/ manually now. Press a key to continue..." + # If it was a SNAPSHOT, it was lowercased during the build. + cp -R build/staging-web/public/docs/"${VERSION,,}"/* "build/$DOC_DIR/" + cp -R "build/$DOC_DIR/api" build/staging-web/public/docs/"${VERSION,,}"/ + ( cd build/staging-web/public/docs/; ln -s "${VERSION,,}" current ) # add LICENSE and NOTICE for docs mkdir -p "build/$DOC_DIR" cp doc/LICENSE "build/$DOC_DIR" @@ -198,7 +212,13 @@ do \! -name '*.asc' \! -name '*.txt' ); do (cd "${f%/*}" && shasum -a 512 "${f##*/}") > "$f.sha512" - gpg --passphrase "$password" --armor --output "$f.asc" --detach-sig "$f" + + if [ -z "$GPG_LOCAL_USER" ]; then + gpg --pinentry-mode loopback --passphrase "$password" --armor --output "$f.asc" --detach-sig "$f" + else + gpg --pinentry-mode loopback --local-user="$GPG_LOCAL_USER" --passphrase "$password" --armor --output "$f.asc" --detach-sig "$f" + fi + done set -x @@ -206,7 +226,7 @@ do clean) rm -rf build dist - (cd doc; ant clean) + rm -rf doc/public/ doc/resources/ doc/node_modules/ doc/package-lock.json doc/.hugo_build.lock (mvn -B clean) rm -rf lang/java/*/userlogs/ @@ -234,7 +254,7 @@ do veryclean) rm -rf build dist - (cd doc; ant clean) + rm -rf doc/public/ doc/resources/ doc/node_modules/ doc/package-lock.json doc/.hugo_build.lock (mvn -B clean) rm -rf lang/java/*/userlogs/ @@ -264,7 +284,6 @@ do rm -rf lang/perl/inc/ rm -rf lang/ruby/.gem/ rm -rf lang/ruby/Gemfile.lock - rm -rf lang/py/lib/ivy-2.2.0.jar rm -rf lang/csharp/src/apache/ipc.test/bin/ rm -rf lang/csharp/src/apache/ipc.test/obj ;; @@ -286,15 +305,20 @@ do DOCKER_IMAGE_NAME=${DOCKER_IMAGE_NAME:-"avro-build-$USER_NAME:latest"} { cat share/docker/Dockerfile - grep -vF 'FROM avro-build-ci' share/docker/DockerfileLocal echo "ENV HOME /home/$USER_NAME" echo "RUN getent group $GROUP_ID || groupadd -g $GROUP_ID $USER_NAME" echo "RUN getent passwd $USER_ID || useradd -g $GROUP_ID -u $USER_ID -k /root -m $USER_NAME" + echo "RUN mkdir -p /home/$USER_NAME/.m2/repository" } > Dockerfile + + if [ -z "$BUILDPLATFORM" ]; then + export BUILDPLATFORM=$(docker info --format "{{.OSType}}/{{.Architecture}}") + fi + # Include the ruby gemspec for preinstallation. # shellcheck disable=SC2086 - tar -cf- lang/ruby/Gemfile Dockerfile | docker build $DOCKER_BUILD_XTRA_ARGS -t "$DOCKER_IMAGE_NAME" - + tar -cf- Dockerfile $DOCKER_EXTRA_CONTEXT | DOCKER_BUILDKIT=1 docker build $DOCKER_BUILD_XTRA_ARGS --build-arg="BUILDPLATFORM=${BUILDPLATFORM}" -t "$DOCKER_IMAGE_NAME" - rm Dockerfile - # By mapping the .m2 directory you can do an mvn install from + # By mapping the .m2/repository directory you can do an mvn install from # within the container and use the result on your normal # system. And this also is a significant speedup in subsequent # builds because the dependencies are downloaded only once. @@ -306,10 +330,13 @@ do # extra second before the changes are available within the docker container. # shellcheck disable=SC2086 docker run --rm -t -i \ - --env "JAVA=${JAVA:-8}" \ + --env "JAVA=${JAVA:-21}" \ --user "${USER_NAME}" \ --volume "${HOME}/.gnupg:/home/${USER_NAME}/.gnupg" \ - --volume "${HOME}/.m2:/home/${USER_NAME}/.m2${DOCKER_MOUNT_FLAG}" \ + --volume "${PWD}/share/docker/m2:/home/${USER_NAME}/.m2/" \ + --volume "${PWD}/share/docker/m2/toolchains.xml:/home/${USER_NAME}/.m2/toolchains.xml" \ + --volume "${HOME}/.m2/repository:/home/${USER_NAME}/.m2/repository${DOCKER_MOUNT_FLAG}" \ + --volume "${HOME}/.m2/build-cache:/home/${USER_NAME}/.m2/build-cache${DOCKER_MOUNT_FLAG}" \ --volume "${PWD}:/home/${USER_NAME}/avro${DOCKER_MOUNT_FLAG}" \ --workdir "/home/${USER_NAME}/avro" \ ${DOCKER_RUN_XTRA_ARGS} "$DOCKER_IMAGE_NAME" ${DOCKER_RUN_ENTRYPOINT} @@ -327,9 +354,15 @@ do ;; docker-test) - tar -cf- share/docker/Dockerfile lang/ruby/Gemfile | - docker build -t avro-test -f share/docker/Dockerfile - - docker run --rm -v "${PWD}:/avro${DOCKER_MOUNT_FLAG}" --env "JAVA=${JAVA:-8}" avro-test /avro/share/docker/run-tests.sh + if [ -z "$BUILDPLATFORM" ]; then + export BUILDPLATFORM=$(docker info --format "{{.OSType}}/{{.Architecture}}") + fi + tar -cf- share/docker/Dockerfile $DOCKER_EXTRA_CONTEXT | + DOCKER_BUILDKIT=1 docker build -t avro-test --build-arg BUILDPLATFORM="${BUILDPLATFORM}" -f share/docker/Dockerfile - + docker run --rm \ + --volume "${PWD}:/avro${DOCKER_MOUNT_FLAG}" \ + --volume "${PWD}/share/docker/m2/:/root/.m2/" \ + --env "JAVA=${JAVA:-11}" avro-test /avro/share/docker/run-tests.sh ;; *) diff --git a/composer.json b/composer.json index e5f1313aeba..b39f209ff6a 100644 --- a/composer.json +++ b/composer.json @@ -9,5 +9,10 @@ "require-dev": { "phpunit/phpunit": "^9.1", "squizlabs/php_codesniffer": "^3.5" + }, + "config": { + "allow-plugins": { + "beberlei/composer-monorepo-plugin": true + } } } diff --git a/doc/.gitignore b/doc/.gitignore index 567609b1234..b56c8f8a701 100644 --- a/doc/.gitignore +++ b/doc/.gitignore @@ -1 +1,5 @@ -build/ +public/ +resources/ +node_modules/ +package-lock.json +.hugo_build.lock diff --git a/doc/forrest.properties b/doc/Dockerfile similarity index 92% rename from doc/forrest.properties rename to doc/Dockerfile index 32df46f523d..1a671067c65 100644 --- a/doc/forrest.properties +++ b/doc/Dockerfile @@ -17,6 +17,6 @@ # under the License. # -# Make Forrest work with Java6 -forrest.validate.sitemap=false +FROM klakegg/hugo:ext-alpine +RUN apk add git diff --git a/doc/LICENSE b/doc/LICENSE index af6b6731242..e0f8f08e158 100644 --- a/doc/LICENSE +++ b/doc/LICENSE @@ -306,12 +306,6 @@ Prototype JavaScript framework, version 1.4.0_pre4 For a copy of the MIT license text, see above. ----------------------------------------------------------------------- -License for Apache Forrest (skin), included in the Avro documentation: - -Copyright: 2009-2015 The Apache Software Foundation -License: https://www.apache.org/licenses/LICENSE-2.0 (see above) - ---------------------------------------------------------------------- License for Doxygen-generated documentation for the C++ and C# implementations: diff --git a/doc/NOTICE b/doc/NOTICE index 8b7999217fd..7320bb0adfc 100644 --- a/doc/NOTICE +++ b/doc/NOTICE @@ -1,5 +1,5 @@ Apache Avro -Copyright 2010-2015 The Apache Software Foundation +Copyright 2010-2022 The Apache Software Foundation This product includes software developed at The Apache Software Foundation (https://www.apache.org/). @@ -26,16 +26,9 @@ is: | implied. See the License for the specific language governing | permissions and limitations under the License. -The Odiago NOTICE at the time of the contribution: - -| This product includes software developed by Odiago, Inc. -| (https://www.wibidata.com). - -The documentation contains the default Apache Forrest skin. -Apache Forrest includes the following in its NOTICE file: - -| Apache Forrest -| Copyright 2002-2007 The Apache Software Foundation. +|-------------------------------------------------------------------------- +| This product includes software developed by The Docsy Authors. +| (https://www.docsy.dev/). | | This product includes software developed at | The Apache Software Foundation (https://www.apache.org/). @@ -49,35 +42,3 @@ Apache Forrest includes the following in its NOTICE file: | Other accompanying products do not require attribution, so are not listed. | | ------------------------------------------------------------------------ -| This product includes software developed by the OpenSymphony Group -| http://www.opensymphony.com/ -| -| This product includes software developed for project Krysalis -| http://www.krysalis.org/ -| -| This product includes software developed by Andy Clark. -| https://people.apache.org/~andyc/neko/ -| -| This product includes software developed by the ExoLab Project -| https://www.exolab.org/ -| -| This product includes software developed by TouchGraph LLC -| https://www.touchgraph.com/ -| -| This product includes software developed by Marc De Scheemaecker -| http://nanoxml.cyberelf.be/ -| -| This product includes software developed by the ANTLR project -| https://www.antlr.org/ -| -| This product includes software developed by Chaperon -| http://chaperon.sourceforge.net/ -| -| This product includes software developed by Sal Mangano (included in the XSLT Cookbook published by O'Reilly) -| https://www.oreilly.com/catalog/xsltckbk/ -| -| This product includes software developed by The Werken Company. -| http://jaxen.werken.com/ -| -| This product includes software developed by the jfor project -| http://www.jfor.org/ diff --git a/doc/README.md b/doc/README.md new file mode 100644 index 00000000000..31f167d8712 --- /dev/null +++ b/doc/README.md @@ -0,0 +1,61 @@ +# Apache Avro website + +This website is base on [Hugo](https://gohugo.io) and uses the [Docsy](https://www.docsy.dev/) theme. +Before building the website, you need to initialize submodules. + +``` +git submodule update --init --recursive +``` + +## Previewing the website locally + +``` +# From the doc directory, you will need to do this at least once for our SCSS modifications +(cd doc && npm install) + +# Serve the website dynamically using extended hugo: +hugo server --buildDrafts --buildFuture --bind 0.0.0.0 --navigateToChanged + +# You can do the same thing without installing hugo via docker. +# From the Avro root directory: +docker run --rm -v $(pwd):/src -p 1313:1313 jakejarvis/hugo-extended:latest --source doc/ server \ + --buildDrafts --buildFuture --bind 0.0.0.0 --navigateToChanged +``` + +## Building the website in a distribution + +When you build an Avro distribution with the script, there is currently a manual step required. + +After all the binary artifacts and source have been created and copied to the `dist/` directory, the process will +stop with **Build build/staging-web/ manually now. Press a key to continue...** + +At this point, from another terminal and in the Avro root directory, you can build the website: + +``` +# Install the necessary npm packages +docker run --entrypoint=sh --rm -v $(pwd):/src -p 1313:1313 jakejarvis/hugo-extended:latest \ + -c "cd build/staging-web && npm install" +# Generate the website and the release documentation +docker run --rm -v $(pwd):/src -p 1313:1313 jakejarvis/hugo-extended:latest \ + --source build/staging-web/ --gc --minify +# Optional: docker leaves some files with unmanageable permissions +sudo chown -R $USER:$USER build/staging-web +``` + +## Avro version + +(TODO) + +When a new version of Apache Avro is released: + +1. Change the value of `params.avroversion` in `config.toml` +2. Add a new entry to the `Releases` pages in the `Blog` section, for example: + +``` +cp content/en/blog/releases/avro-1.10.2-released.md content/en/blog/releases/avro-1.11.0-released.md +``` + +## Updating the https://avro.apache.org website from a distribution + +(TODO) + diff --git a/doc/assets/icons/logo-black-text.png b/doc/assets/icons/logo-black-text.png new file mode 100644 index 00000000000..c593f891c1d Binary files /dev/null and b/doc/assets/icons/logo-black-text.png differ diff --git a/doc/assets/icons/logo-text.svg b/doc/assets/icons/logo-text.svg new file mode 100644 index 00000000000..6f842518618 --- /dev/null +++ b/doc/assets/icons/logo-text.svg @@ -0,0 +1,38 @@ + + + + + + + + + + + + + + + + + + diff --git a/doc/assets/icons/logo-white-text.png b/doc/assets/icons/logo-white-text.png new file mode 100644 index 00000000000..29ad9ed4fd0 Binary files /dev/null and b/doc/assets/icons/logo-white-text.png differ diff --git a/doc/assets/icons/logo.png b/doc/assets/icons/logo.png new file mode 100644 index 00000000000..5651de93f58 Binary files /dev/null and b/doc/assets/icons/logo.png differ diff --git a/doc/assets/icons/logo.svg b/doc/assets/icons/logo.svg new file mode 100644 index 00000000000..beee014a3d5 --- /dev/null +++ b/doc/assets/icons/logo.svg @@ -0,0 +1,27 @@ + + + + + + + + diff --git a/doc/assets/scss/PTMono-Regular.ttf b/doc/assets/scss/PTMono-Regular.ttf new file mode 100644 index 00000000000..b1983838c66 Binary files /dev/null and b/doc/assets/scss/PTMono-Regular.ttf differ diff --git a/doc/assets/scss/_styles_project.scss b/doc/assets/scss/_styles_project.scss new file mode 100644 index 00000000000..b69a1eb167a --- /dev/null +++ b/doc/assets/scss/_styles_project.scss @@ -0,0 +1,35 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +@font-face { + font-family: 'PT Mono'; + font-style: normal; + font-weight: 400; + font-display: swap; + src: url(./PTMono-Regular.ttf) format('truetype'); +} + +// Disable all github editing links for now +.td-page-meta--view { display: none !important; } +.td-page-meta--edit { display: none !important; } +.td-page-meta--child { display: none !important; } +.td-page-meta--issue { display: none !important; } +.td-page-meta--project-issue { display: none !important; } + +.navbar-brand { + font-family: "PT Mono", monospace; +} diff --git a/doc/assets/scss/_variables_project.scss b/doc/assets/scss/_variables_project.scss new file mode 100644 index 00000000000..fb5495c8669 --- /dev/null +++ b/doc/assets/scss/_variables_project.scss @@ -0,0 +1,21 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +.dropdown-menu .show { + overflow-y: auto; + max-height: 700px; +} diff --git a/doc/build.xml b/doc/build.xml deleted file mode 100644 index d711608a36d..00000000000 --- a/doc/build.xml +++ /dev/null @@ -1,56 +0,0 @@ - - - - - - - - - - - - - - - - - - - <!ENTITY AvroVersion "${version}"> - - - - - - - - - - - - - - - - - - - - diff --git a/doc/config.toml b/doc/config.toml new file mode 100644 index 00000000000..4b58b37f02a --- /dev/null +++ b/doc/config.toml @@ -0,0 +1,419 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +baseURL = "/" +title = "Apache Avro" + +# Language settings +contentDir = "content/en" +defaultContentLanguage = "en" +defaultContentLanguageInSubdir = false +# Useful when translating. +enableMissingTranslationPlaceholders = true + +enableRobotsTXT = true + +# Hugo allows theme composition (and inheritance). The precedence is from left to right. +theme = ["docsy"] + +# Will give values to .Lastmod etc. +enableGitInfo = true + +# Comment out to disable taxonomies in Docsy +# disableKinds = ["taxonomy", "taxonomyTerm"] + +# You can add your own taxonomies +[taxonomies] +tag = "tags" +category = "categories" + +[params.taxonomy] +# set taxonomyCloud = [] to hide taxonomy clouds +taxonomyCloud = ["tags", "categories"] + +# If used, must have same lang as taxonomyCloud +taxonomyCloudTitle = ["Tag Cloud", "Categories"] + +# set taxonomyPageHeader = [] to hide taxonomies on the page headers +taxonomyPageHeader = ["tags", "categories"] + + +# Highlighting config +pygmentsCodeFences = true +pygmentsUseClasses = false +# Use the new Chroma Go highlighter in Hugo. +pygmentsUseClassic = false +#pygmentsOptions = "linenos=table" +# See https://help.farbox.com/pygments.html +pygmentsStyle = "tango" + +# Configure how URLs look like per section. +[permalinks] +blog = "/:section/:year/:month/:day/:slug/" + +## Configuration for BlackFriday markdown parser: https://github.com/russross/blackfriday +[blackfriday] +plainIDAnchors = true +hrefTargetBlank = true +angledQuotes = false +latexDashes = true + +# Image processing configuration. +[imaging] +resampleFilter = "CatmullRom" +quality = 75 +anchor = "smart" + +[services] +[services.googleAnalytics] +# Comment out the next line to disable GA tracking. Also disables the feature described in [params.ui.feedback]. +# id = "UA-00000000-0" + +# Language configuration + +[languages.params] +[languages.en.params] +title = "Apache Avro" +description = "" +languageName ="English" +# Weight used for sorting. +weight = 1 + +[markup] + [markup.goldmark] + [markup.goldmark.renderer] + unsafe = true + [markup.highlight] + # See a complete list of available styles at https://xyproto.github.io/splash/docs/all.html + style = "tango" + # Uncomment if you want your chosen highlight style used for code blocks without a specified language + # guessSyntax = "true" + +# Everything below this are Site Params + +# Comment out if you don't want the "print entire section" link enabled. +[outputs] +section = ["HTML", "print", "RSS"] + +[params] +avroversion = "++version++" +copyright = "The Apache Software Foundation" +apache_foundation = "https://www.apache.org/" +apache_events_logo = "https://www.apache.org/events/current-event-234x60.png" +apache_events_url = "https://www.apache.org/events/current-event.html" +privacy_policy = "http://www.apache.org/foundation/policies/privacy.html" +license = "http://www.apache.org/licenses/" + +# First one is picked as the Twitter card image if not set on page. +# images = ["images/project-illustration.png"] + +# Menu title if your navbar has a versions selector to access old versions of your site. +# This menu appears only if you have at least one [params.versions] set. +version_menu = "Releases" + +# Flag used in the "version-banner" partial to decide whether to display a +# banner on every page indicating that this is an archived version of the docs. +# Set this flag to "true" if you want to display the banner. +archived_version = false + +# The version number for the version of the docs represented in this doc set. +# Used in the "version-banner" partial to display a version number for the +# current doc set. +version = "++version++" + +# A link to latest version of the docs. Used in the "version-banner" partial to +# point people to the main doc site. +url_latest_version = "https://avro.apache.org" + +# Repository configuration (URLs for in-page links to opening issues and suggesting changes) +github_repo = "https://github.com/apache/avro" +github_subdir = "doc" + +# An optional link to a related project repo. For example, the sibling repository where your product code lives. +github_project_repo = "https://github.com/apache/avro" + +# Specify a value here if your content directory is not in your repo's root directory +# github_subdir = "" + +# Uncomment this if you have a newer GitHub repo with "main" as the default branch, +# or specify a new value if you want to reference another branch in your GitHub links +# github_branch= "main" + +# Google Custom Search Engine ID. Remove or comment out to disable search. +# gcs_engine_id = "d72aa9b2712488cc3" + +# Enable Algolia DocSearch +algolia_docsearch = false + +# Enable Lunr.js offline search +offlineSearch = false + +# Enable syntax highlighting and copy buttons on code blocks with Prism +prism_syntax_highlighting = true + +# User interface configuration +[params.ui] +# Set to true to disable breadcrumb navigation. +breadcrumb_disable = false +# Set to true to disable the About link in the site footer +footer_about_disable = true +# Set to false if you don't want to display a logo (/assets/icons/logo.svg) in the top navbar +navbar_logo = true +# Set to true if you don't want the top navbar to be translucent when over a `block/cover`, like on the homepage. +navbar_translucent_over_cover_disable = false +# Enable to show the side bar menu in its compact state. +sidebar_menu_compact = false +# Set to true to hide the sidebar search box (the top nav search box will still be displayed if search is enabled) +sidebar_search_disable = true +sidebar_menu_foldable = true + +# Adds a H2 section titled "Feedback" to the bottom of each doc. The responses are sent to Google Analytics as events. +# This feature depends on [services.googleAnalytics] and will be disabled if "services.googleAnalytics.id" is not set. +# If you want this feature, but occasionally need to remove the "Feedback" section from a single page, +# add "hide_feedback: true" to the page's front matter. +[params.ui.feedback] +enable = false +# The responses that the user sees after clicking "yes" (the page was helpful) or "no" (the page was not helpful). +yes = 'Glad to hear it! Please tell us how we can improve.' +no = 'Sorry to hear that. Please tell us how we can improve.' + +# Adds a reading time to the top of each doc. +# If you want this feature, but occasionally need to remove the Reading time from a single page, +# add "hide_readingtime: true" to the page's front matter +[params.ui.readingtime] +enable = true + +[params.asf] +[[params.asf.links]] +name = "ASF Web Site" +url = "http://www.apache.org/" + +[[params.asf.links]] +name = "License" +url = "http://www.apache.org/licenses/" + +[[params.asf.links]] +name = "Donate" +url = "http://www.apache.org/foundation/sponsorship.html" + + +[[params.asf.links]] +name = "Thanks" +url = "http://www.apache.org/foundation/thanks.html" + +[[params.asf.links]] +name = "Security" +url = "http://www.apache.org/security/" + + +[params.links] +# End user relevant links. These will show up on left side of footer and in the community page if you have one. +[[params.links.user]] + name = "User mailing list" + url = "https://lists.apache.org/list.html?user@avro.apache.org" + icon = "fa fa-envelope" + desc = "Discussion and help from your fellow users" +[[params.links.user]] + name ="Twitter" + url = "https://twitter.com/ApacheAvro" + icon = "fab fa-twitter" + desc = "Follow us on Twitter to get the latest news!" +[[params.links.user]] + name = "Stack Overflow" + url = "https://stackoverflow.com/questions/tagged/avro" + icon = "fab fa-stack-overflow" + desc = "Practical questions and curated answers" +# Developer relevant links. These will show up on right side of footer and in the community page if you have one. +[[params.links.developer]] + name = "GitHub" + url = "https://github.com/apache/avro" + icon = "fab fa-github" + desc = "Development takes place here!" +[[params.links.developer]] + name = "Issues" + url = "https://issues.apache.org/jira/projects/AVRO/issues" + icon = "fab fa-jira" + desc = "Track bugs and new features" +[[params.links.developer]] + name = "Chat with other project developers at Slack" + url = "https://the-asf.slack.com/" + icon = "fab fa-slack" + desc = "Chat with other project developers at #avro channel" +[[params.links.developer]] + name = "Developer mailing list" + url = "https://lists.apache.org/list.html?dev@avro.apache.org" + icon = "fa fa-envelope" + desc = "Discuss development issues around the project" + +[[params.versions]] + version = "++version++ (Current)" + url = "/docs/++version++/" + +[[params.versions]] +version = "1.11.2" +url = "https://avro.apache.org/docs/1.11.2/" + +[[params.versions]] +version = "1.11.1" +url = "https://avro.apache.org/docs/1.11.1/" + +[[params.versions]] +version = "1.11.0" +url = "https://avro.apache.org/docs/1.11.0/" + +[[params.versions]] + version = "1.10.2" + url = "https://avro.apache.org/docs/1.10.2/" + +[[params.versions]] + version = "1.10.1" + url = "https://avro.apache.org/docs/1.10.1/" + +[[params.versions]] + version = "1.10.0" + url = "https://avro.apache.org/docs/1.10.0/" + +[[params.versions]] + version = "1.9.2" + url = "https://avro.apache.org/docs/1.9.2/" + +[[params.versions]] + version = "1.9.1" + url = "https://avro.apache.org/docs/1.9.1/" + +[[params.versions]] + version = "1.9.0" + url = "https://avro.apache.org/docs/1.9.0/" + +[[params.versions]] + version = "1.8.2" + url = "https://avro.apache.org/docs/1.8.2/" + +[[params.versions]] + version = "1.8.1" + url = "https://avro.apache.org/docs/1.8.1/" + +[[params.versions]] + version = "1.8.0" + url = "https://avro.apache.org/docs/1.8.0/" + +[[params.versions]] + version = "1.7.7" + url = "https://avro.apache.org/docs/1.7.7/" + +[[params.versions]] + version = "1.7.6" + url = "https://avro.apache.org/docs/1.7.6/" + +[[params.versions]] + version = "1.7.5" + url = "https://avro.apache.org/docs/1.7.5/" + +[[params.versions]] + version = "1.7.4" + url = "https://avro.apache.org/docs/1.7.4/" + +[[params.versions]] + version = "1.7.3" + url = "https://avro.apache.org/docs/1.7.3/" + +[[params.versions]] + version = "1.7.2" + url = "https://avro.apache.org/docs/1.7.2/" + +[[params.versions]] + version = "1.7.1" + url = "https://avro.apache.org/docs/1.7.1/" + +[[params.versions]] + version = "1.7.0" + url = "https://avro.apache.org/docs/1.7.0/" + +[[params.versions]] + version = "1.6.3" + url = "https://avro.apache.org/docs/1.6.3/" + +[[params.versions]] + version = "1.6.2" + url = "https://avro.apache.org/docs/1.6.2/" + +[[params.versions]] + version = "1.6.1" + url = "https://avro.apache.org/docs/1.6.1/" + +[[params.versions]] + version = "1.6.0" + url = "https://avro.apache.org/docs/1.6.0/" + +[[params.versions]] + version = "1.5.4" + url = "https://avro.apache.org/docs/1.5.4/" + +[[params.versions]] + version = "1.5.3" + url = "https://avro.apache.org/docs/1.5.3/" + +[[params.versions]] + version = "1.5.2" + url = "https://avro.apache.org/docs/1.5.2/" + +[[params.versions]] + version = "1.5.1" + url = "https://avro.apache.org/docs/1.5.1/" + +[[params.versions]] + version = "1.5.0" + url = "https://avro.apache.org/docs/1.5.0/" + +[[params.versions]] + version = "1.4.1" + url = "https://avro.apache.org/docs/1.4.1/" + +[[params.versions]] + version = "1.4.0" + url = "https://avro.apache.org/docs/1.4.0/" + +[[params.versions]] + version = "1.3.3" + url = "https://avro.apache.org/docs/1.3.3/" + +[[params.versions]] + version = "1.3.2" + url = "https://avro.apache.org/docs/1.3.2/" + +[[params.versions]] + version = "1.3.1" + url = "https://avro.apache.org/docs/1.3.1/" + +[[params.versions]] + version = "1.3.0" + url = "https://avro.apache.org/docs/1.3.0/" + +[[params.versions]] + version = "1.2.0" + url = "https://avro.apache.org/docs/1.2.0/" + +[[params.versions]] + version = "1.1.0" + url = "https://avro.apache.org/docs/1.1.0/" + +[[params.versions]] + version = "1.0.0" + url = "https://avro.apache.org/docs/1.0.0/" + diff --git a/doc/content/en/_index.html b/doc/content/en/_index.html new file mode 100644 index 00000000000..618a99a14bf --- /dev/null +++ b/doc/content/en/_index.html @@ -0,0 +1,72 @@ ++++ +title = "Apache Avro" +linkTitle = "Apache Avro" + ++++ + + + + +
+
+
+
+
+

Apache Avroâ„ĸ - a data serialization system

+ +
+
+
+
+
+ +{{% blocks/lead color="primary" %}} +Apache Avroâ„ĸ is the leading serialization format for record data, and first choice for streaming data pipelines. +It offers excellent schema evolution, and has implementations for the JVM (Java, Kotlin, Scala, …), Python, C/C++/C#, PHP, Ruby, +Rust, JavaScript, and even Perl. +{{% /blocks/lead %}} + +{{< blocks/section color="dark" type="features">}} + + +{{% blocks/feature icon="fab fa-java" title="Getting started with Java" url="/docs/++version++/getting-started-java" %}} +For Java / JVM users, find out everything you need to know about specifying a schema, (de)serializing Avro data and code generation. +{{% /blocks/feature %}} + +{{% blocks/feature icon="fab fa-python" title="Getting started with Python" url="/docs/++version++/getting-started-python" %}} +For Python users, find out everything you need to know about specifying a schema and (de)serializing Avro data. +{{% /blocks/feature %}} + +{{% blocks/feature icon="fad fa-comments" title="Join Our Community!" url="/community/" %}} +Learn from or connect with other users in our open and welcoming community. We'd love to hear from you! +{{% /blocks/feature %}} + +{{< /blocks/section >}} \ No newline at end of file diff --git a/doc/content/en/blog/_index.md b/doc/content/en/blog/_index.md new file mode 100644 index 00000000000..85f97bd211d --- /dev/null +++ b/doc/content/en/blog/_index.md @@ -0,0 +1,33 @@ +--- +title: "Blog" +linkTitle: "Blog" +menu: + main: + weight: 30 +--- + + + +This is the **blog** section. It has two categories: News and Releases. + +Files in these directories will be listed in reverse chronological order. + diff --git a/doc/content/en/blog/news/_index.md b/doc/content/en/blog/news/_index.md new file mode 100644 index 00000000000..243dcf5f4a2 --- /dev/null +++ b/doc/content/en/blog/news/_index.md @@ -0,0 +1,26 @@ +--- +title: "News About Apache Avro" +linkTitle: "News" +weight: 20 +--- + + diff --git a/doc/content/en/blog/news/avro-joins-apache.md b/doc/content/en/blog/news/avro-joins-apache.md new file mode 100755 index 00000000000..dbc1872644d --- /dev/null +++ b/doc/content/en/blog/news/avro-joins-apache.md @@ -0,0 +1,28 @@ +--- +title: "Avro joins Apache" +linkTitle: "Avro joins Apache" +date: 2009-04-10 +--- + + + +Avro has joined the Apache Software Foundation as a Hadoop subproject. diff --git a/doc/content/en/blog/news/new-committer-christophe-le-saec.md b/doc/content/en/blog/news/new-committer-christophe-le-saec.md new file mode 100755 index 00000000000..1522c1722b9 --- /dev/null +++ b/doc/content/en/blog/news/new-committer-christophe-le-saec.md @@ -0,0 +1,41 @@ +--- +title: "New committer: Christophe Le Saec" +linkTitle: "New committer: Christophe Le Saec" +date: 2023-08-09 +--- + + + +The Project Management Committee (PMC) for Apache Avro has invited Christophe +Le Saec to become a committer and we are pleased to announce that +he has accepted. + +Christophe definitely puts in the work and, has an impressive breadth of +knowledge about the languages of the Avro SDK! + +As an ASF project, we tend to be very conservative about making changes, and +Christophe brings in fresh ideas and very quickly proposes concrete +implementations to prove them. He has a good understanding of Avro, the +motivation to move things forward, and the expertise to make changes! At the +same time, he's easy to talk to and flexible in coming to a consensus. + +Thanks for all your hard work! diff --git a/doc/content/en/blog/news/new-committer-david-mollitor.md b/doc/content/en/blog/news/new-committer-david-mollitor.md new file mode 100755 index 00000000000..eb793009466 --- /dev/null +++ b/doc/content/en/blog/news/new-committer-david-mollitor.md @@ -0,0 +1,41 @@ +--- +title: "New committer: David Mollitor" +linkTitle: "New committer: David Mollitor" +date: 2021-10-05 +--- + + + +The Project Management Committee (PMC) for Apache Avro +has invited David Mollitor to become a committer and we are pleased +to announce that he has accepted. + +Since 2017, David has raised and fixed many issues in the +Java SDK. Recently he's been finding and providing fixes for subtle +performance issues. His work is always high-quality and he is +reactive and pleasant to talk with on code reviews and JIRA. + +Being a committer enables easier contribution to the +project since there is no need to go via the patch +submission process. This should enable better productivity. + +It's great to have you as part of the team, David! diff --git a/doc/content/en/blog/news/new-committer-martin-grigorov.md b/doc/content/en/blog/news/new-committer-martin-grigorov.md new file mode 100755 index 00000000000..78cc3b61648 --- /dev/null +++ b/doc/content/en/blog/news/new-committer-martin-grigorov.md @@ -0,0 +1,41 @@ +--- +title: "New committer: Martin Grigorov" +linkTitle: "New committer: Martin Grigorov" +date: 2022-01-04 +--- + + + +The Project Management Committee (PMC) for Apache Avro +has invited Martin Grigorov to become a committer and we are pleased +to announce that he has accepted. + +Over the last few months, he has been active, reliable and easy to +work with on PRs and on the mailing list. His work is of high +quality, and he has a breadth of experience in many of the SDK languages. +I'm especially keen to point out the work he's been doing on the website! + +Being a committer enables easier contribution to the +project since there is no need to go via the patch +submission process. This should enable better productivity. + +It's great to have you as part of the team, Martin! diff --git a/doc/content/en/blog/news/new-committer-oscar-westra-van-holthe-kind.md b/doc/content/en/blog/news/new-committer-oscar-westra-van-holthe-kind.md new file mode 100755 index 00000000000..535a2d88185 --- /dev/null +++ b/doc/content/en/blog/news/new-committer-oscar-westra-van-holthe-kind.md @@ -0,0 +1,41 @@ +--- +title: "New committer: Oscar Westra van Holthe - Kind" +linkTitle: "New committer: Oscar Westra van Holthe - Kind" +date: 2023-08-09 +--- + + + +The Project Management Committee (PMC) for Apache Avro has invited Oscar +Westra van Holthe - Kind to become a committer and we are pleased to announce that +he has accepted. + +Oscar has done some really solid work on the IDL and JavaCC parts of the Java +SDK. We trust his work and think it's exceptionally high quality. From the +start, he has already doing much of the work of a committer, demonstrated by +his continuous presence in commenting JIRA, reviewing PRs as well as +encouraging and insightful words on the mailing list. + +As a bonus, in his spare time, Oscar also maintains the IntelliJ plugin for +[IDL support](https://plugins.jetbrains.com/plugin/15728-apache-avro-idl-schema-support)! + +Thanks for all your hard work, and welcome! diff --git a/doc/content/en/blog/news/new-committer-zoltan-csizmadia.md b/doc/content/en/blog/news/new-committer-zoltan-csizmadia.md new file mode 100755 index 00000000000..42834c551c5 --- /dev/null +++ b/doc/content/en/blog/news/new-committer-zoltan-csizmadia.md @@ -0,0 +1,47 @@ +--- +title: "New committer: Zoltan Csizmadia" +linkTitle: "New committer: Zoltan Csizmadia" +date: 2022-03-29 +--- + + + +The Project Management Committee (PMC) for Apache Avro has invited +Zoltan Csizmadia to become a committer and we are pleased to announce +that he has accepted. + +Zoltan has been present in the C# SDK for over two years and has +really increased his activity in maintaining this language in the last +few months. He knows the technology, but more importantly, he is +patient and works well with those of us who rely on the expertise of +others. Recently, he has been engaging with other contributors to +increase the maintainability and quality of the dotnet code, and we +have confidence in his decisions to balance stability of the +established code and the expectations of modern C# developers. + +Being a committer enables easier contribution to the project since +there is no need to go via the patch submission process. This should +enable better productivity. + +Please join me in congratulating Zoltan on his recognition of great +work thus far in our community. + diff --git a/doc/content/en/blog/news/new-pmc-martin-grigorov.md b/doc/content/en/blog/news/new-pmc-martin-grigorov.md new file mode 100755 index 00000000000..659bdb30d42 --- /dev/null +++ b/doc/content/en/blog/news/new-pmc-martin-grigorov.md @@ -0,0 +1,30 @@ +--- +title: "New PMC member: Martin Grigorov" +linkTitle: "New PMC member: Martin Grigorov" +date: 2022-09-13 +--- + + + +The Project Management Committee (PMC) for Apache Avro is pleased to announce that Martin Grigorov has accepted our invitation to become a PMC member. He has has been active, reliable and responsive to the community and a solid contributor to various SDKs, bringing well-thought out reviews and comments to both old and new PRs and JIRA. He definitely stepped up for the website refactoring and preparing for the 1.11.1 release! + +Please join me in welcoming Martin to the Avro PMC! diff --git a/doc/content/en/blog/news/new-pmc-michael-a-smith.md b/doc/content/en/blog/news/new-pmc-michael-a-smith.md new file mode 100755 index 00000000000..2d203128eca --- /dev/null +++ b/doc/content/en/blog/news/new-pmc-michael-a-smith.md @@ -0,0 +1,34 @@ +--- +title: "New PMC member: Michael A. Smith" +linkTitle: "New PMC member: Michael A. Smith" +date: 2023-08-09 +--- + + + +The Project Management Committee (PMC) for Apache Avro has invited Michael A. +Smith to the PMC and we are pleased to announce that he has accepted. + +Notably, Michael has taken a leadership role in ensuring the quality of the +Python SDK, lending his expertise to ensure that Avro has a place in the +python community, while keeping our implementation up-to-date with standards +and modern versions. It's not an easy task, and we appreciate all he does! diff --git a/doc/content/en/blog/news/new-project-logo.md b/doc/content/en/blog/news/new-project-logo.md new file mode 100644 index 00000000000..24f886912da --- /dev/null +++ b/doc/content/en/blog/news/new-project-logo.md @@ -0,0 +1,50 @@ +--- +title: "New Project Logo" +linkTitle: "New Project Logo" +date: 2023-11-21 +--- + + + +The Apache Avro project has a new project logo! + +The old logo was derived from the logo of a (now defunct) aircraft manufacturer +in Great Britain. This posed a risk, as the Apache foundation would not contest +legal action (even if extremely unlikely). + +But thanks to Emma Kellam, we now have a new logo! She has made several logo +designs, and after some debate and several votes (it was a close call!), we can +announce the new logo: + +[//]: # (the logo scales to 100% high or all available width, so limit it) +

+{{< project_logo >}} +

+ +The new logo is an homage to the previous logo, which is also triangular and +uses blue colours. The paper airplane embodies keywords like 'fast', 'small' +and 'efficient'. The blobby tail left behind by the airplane makes the icon +unique and embodies 'flow' and 'transformation'. + +All in all a very nice logo for Apache Avro, which embodies the same keywords. + +Thanks for all your hard work Emma, and welcome! diff --git a/doc/content/en/blog/releases/_index.md b/doc/content/en/blog/releases/_index.md new file mode 100644 index 00000000000..55875726bd8 --- /dev/null +++ b/doc/content/en/blog/releases/_index.md @@ -0,0 +1,28 @@ +--- +title: "Releases" +linkTitle: "Releases" +weight: 20 +aliases: +- /releases.html +--- + + diff --git a/doc/content/en/blog/releases/avro-1.0.0-released.md b/doc/content/en/blog/releases/avro-1.0.0-released.md new file mode 100755 index 00000000000..1bf74fbbb0f --- /dev/null +++ b/doc/content/en/blog/releases/avro-1.0.0-released.md @@ -0,0 +1,28 @@ +--- +title: "Avro 1.0.0" +linkTitle: "Avro 1.0.0" +date: 2010-07-15 +--- + + + +The first release of Avro is now available. To download, it, use the "Download" link below. diff --git a/doc/content/en/blog/releases/avro-1.1.0-released.md b/doc/content/en/blog/releases/avro-1.1.0-released.md new file mode 100755 index 00000000000..f8876219ced --- /dev/null +++ b/doc/content/en/blog/releases/avro-1.1.0-released.md @@ -0,0 +1,28 @@ +--- +title: "Avro 1.1.0" +linkTitle: "Avro 1.1.0" +date: 2009-09-15 +--- + + + +Apache Avro 1.1.0 is now available! diff --git a/doc/content/en/blog/releases/avro-1.10.0-released.md b/doc/content/en/blog/releases/avro-1.10.0-released.md new file mode 100755 index 00000000000..d7a41f5e39d --- /dev/null +++ b/doc/content/en/blog/releases/avro-1.10.0-released.md @@ -0,0 +1,28 @@ +--- +title: "Avro 1.10.0" +linkTitle: "Avro 1.10.0" +date: 2020-06-29 +--- + + + +Apache Avro 1.10.0 has been released! diff --git a/doc/content/en/blog/releases/avro-1.10.1-released.md b/doc/content/en/blog/releases/avro-1.10.1-released.md new file mode 100755 index 00000000000..b8e6b199bd2 --- /dev/null +++ b/doc/content/en/blog/releases/avro-1.10.1-released.md @@ -0,0 +1,28 @@ +--- +title: "Avro 1.10.1" +linkTitle: "Avro 1.10.1" +date: 2020-12-03 +--- + + + +Apache Avro 1.10.1 has been released! diff --git a/doc/content/en/blog/releases/avro-1.10.2-released.md b/doc/content/en/blog/releases/avro-1.10.2-released.md new file mode 100755 index 00000000000..ea1a74a7256 --- /dev/null +++ b/doc/content/en/blog/releases/avro-1.10.2-released.md @@ -0,0 +1,28 @@ +--- +title: "Avro 1.10.2" +linkTitle: "Avro 1.10.2" +date: 2021-03-15 +--- + + + +Apache Avro 1.10.2 has been released! diff --git a/doc/content/en/blog/releases/avro-1.11.0-released.md b/doc/content/en/blog/releases/avro-1.11.0-released.md new file mode 100755 index 00000000000..54fd8b7293c --- /dev/null +++ b/doc/content/en/blog/releases/avro-1.11.0-released.md @@ -0,0 +1,70 @@ +--- +title: "Avro 1.11.0" +linkTitle: "Avro 1.11.0" +date: 2021-10-31 +--- + + + +The Apache Avro community is pleased to announce the release of Avro 1.11.0! + +All signed release artifacts, signatures and verification instructions can +be found }}">here + +This release includes 120 Jira issues, including some interesting features: + +* Specification: AVRO-3212 Support documentation tags for FIXED types +* C#: AVRO-2961 Support dotnet framework 5.0 +* C#: AVRO-3225 Prevent memory errors when deserializing untrusted data +* C++: AVRO-2923 Logical type corrections +* Java: AVRO-2863 Support Avro core on android +* Javascript: AVRO-3131 Drop support for node.js 10 +* Perl: AVRO-3190 Fix error when reading from EOF +* Python: AVRO-2906 Improved performance validating deep record data +* Python: AVRO-2914 Drop Python 2 support +* Python: AVRO-3004 Drop Python 3.5 support +* Ruby: AVRO-3108 Drop Ruby 2.5 support + +For the first time, the 1.11.0 release includes experimental support for +**Rust**. Work is continuing on this donated SDK, but we have not versioned and +published official artifacts for this release. + +**Python**: The avro package fully supports Python 3. We will no longer publish a +separate avro-python3 package + +And of course upgraded dependencies to latest versions, CVE fixes and more: +https://issues.apache.org/jira/issues/?jql=project%3DAVRO%20AND%20fixVersion%3D1.11.0 + +The link to all fixed JIRA issues and a brief summary can be found at: +https://github.com/apache/avro/releases/tag/release-1.11.0 + +In addition, language-specific release artifacts are available: + +* C#: https://www.nuget.org/packages/Apache.Avro/1.11.0 +* Java: from Maven Central, +* Javascript: https://www.npmjs.com/package/avro-js/v/1.11.0 +* Perl: https://metacpan.org/release/Avro +* Python 3: https://pypi.org/project/avro/1.11.0 +* Ruby: https://rubygems.org/gems/avro/versions/1.11.0 + +Thanks to everyone for contributing! + diff --git a/doc/content/en/blog/releases/avro-1.11.1-released.md b/doc/content/en/blog/releases/avro-1.11.1-released.md new file mode 100755 index 00000000000..d78d172930d --- /dev/null +++ b/doc/content/en/blog/releases/avro-1.11.1-released.md @@ -0,0 +1,130 @@ +--- +title: "Avro 1.11.1" +linkTitle: "Avro 1.11.1" +date: 2022-07-31 +--- + + + +The Apache Avro community is pleased to announce the release of Avro 1.11.1! + +All signed release artifacts, signatures and verification instructions can +be found }}">here + +## Most interesting + +This release includes 256 Jira issues, including some interesting features: + +Avro specification +- [AVRO-3436](https://issues.apache.org/jira/browse/AVRO-3436) Clarify which names are allowed to be qualified with namespaces +- [AVRO-3370](https://issues.apache.org/jira/browse/AVRO-3370) Inconsistent behaviour on types as invalid names +- [AVRO-3275](https://issues.apache.org/jira/browse/AVRO-3275) Clarify how fullnames are created, with example +- [AVRO-3257](https://issues.apache.org/jira/browse/AVRO-3257) IDL: add syntax to create optional fields +- [AVRO-2019](https://issues.apache.org/jira/browse/AVRO-2019) Improve docs for logical type annotation + +C++ +- [AVRO-2722](https://issues.apache.org/jira/browse/AVRO-2722) Use of boost::mt19937 is not thread safe + +C# +- [AVRO-3383](https://issues.apache.org/jira/browse/AVRO-3383) Many completed subtasks for modernizing C# coding style +- [AVRO-3481](https://issues.apache.org/jira/browse/AVRO-3481) Input and output variable type mismatch +- [AVRO-3475](https://issues.apache.org/jira/browse/AVRO-3475) Enforce time-millis and time-micros specification +- [AVRO-3469](https://issues.apache.org/jira/browse/AVRO-3469) Build and test using .NET SDK 7.0 +- [AVRO-3468](https://issues.apache.org/jira/browse/AVRO-3468) Default values for logical types not supported +- [AVRO-3467](https://issues.apache.org/jira/browse/AVRO-3467) Use oracle-actions to test with Early Access JDKs +- [AVRO-3453](https://issues.apache.org/jira/browse/AVRO-3453) Avrogen Add Generated Code Attribute +- [AVRO-3432](https://issues.apache.org/jira/browse/AVRO-3432) Add command line option to skip creation of directories +- [AVRO-3411](https://issues.apache.org/jira/browse/AVRO-3411) Add Visual Studio Code Devcontainer support +- [AVRO-3388](https://issues.apache.org/jira/browse/AVRO-3388) Implement extra codecs for C# as seperate nuget packages +- [AVRO-3265](https://issues.apache.org/jira/browse/AVRO-3265) avrogen generates uncompilable code when namespace ends +with ".Avro" +- [AVRO-3219](https://issues.apache.org/jira/browse/AVRO-3219) Support nullable enum type fields + +Java +- [AVRO-3531](https://issues.apache.org/jira/browse/AVRO-3531) GenericDatumReader in multithread lead to infinite loop +- [AVRO-3482](https://issues.apache.org/jira/browse/AVRO-3482) Reuse MAGIC in DataFileReader +- [AVRO-3586](https://issues.apache.org/jira/browse/AVRO-3586) Make Avro Build Reproducible +- [AVRO-3441](https://issues.apache.org/jira/browse/AVRO-3441) Automatically register LogicalTypeFactory classes +- [AVRO-3375](https://issues.apache.org/jira/browse/AVRO-3375) Add union branch, array index and map key "path" +information to serialization errors +- [AVRO-3374](https://issues.apache.org/jira/browse/AVRO-3374) Fully qualified type reference "ns.int" loses namespace +- [AVRO-3294](https://issues.apache.org/jira/browse/AVRO-3294) IDL parsing allows doc comments in strange places +- [AVRO-3273](https://issues.apache.org/jira/browse/AVRO-3273) avro-maven-plugin breaks on old versions of Maven +- [AVRO-3266](https://issues.apache.org/jira/browse/AVRO-3266) Output stream incompatible with MagicS3GuardCommitter +- [AVRO-3243](https://issues.apache.org/jira/browse/AVRO-3243) Lock conflicts when using computeIfAbsent +- [AVRO-3120](https://issues.apache.org/jira/browse/AVRO-3120) Support Next Java LTS (Java 17) +- [AVRO-2498](https://issues.apache.org/jira/browse/AVRO-2498) UUID generation is not working + +Javascript +- [AVRO-3489](https://issues.apache.org/jira/browse/AVRO-3489) Replace istanbul with nyc for code coverage +- [AVRO-3322](https://issues.apache.org/jira/browse/AVRO-3322) Buffer is not defined in browser environment +- [AVRO-3084](https://issues.apache.org/jira/browse/AVRO-3084) Fix JavaScript interop test to work with other languages on CI + +Perl +- [AVRO-3263](https://issues.apache.org/jira/browse/AVRO-3263) Schema validation warning on invalid schema with a long field + +Python +- [AVRO-3542](https://issues.apache.org/jira/browse/AVRO-3542) Scale assignment optimization +- [AVRO-3521](https://issues.apache.org/jira/browse/AVRO-3521) "Scale" property from decimal object +- [AVRO-3380](https://issues.apache.org/jira/browse/AVRO-3380) Byte reading in avro.io does not assert bytes read +- [AVRO-3229](https://issues.apache.org/jira/browse/AVRO-3229) validate the default value of an enum field +- [AVRO-3218](https://issues.apache.org/jira/browse/AVRO-3218) Pass LogicalType to BytesDecimalSchema + +Ruby +- [AVRO-3277](https://issues.apache.org/jira/browse/AVRO-3277) Test against Ruby 3.1 + +Rust +- [AVRO-3558](https://issues.apache.org/jira/browse/AVRO-3558) Add a demo crate that shows usage as WebAssembly +- [AVRO-3526](https://issues.apache.org/jira/browse/AVRO-3526) Improve resolving Bytes and Fixed from string +- [AVRO-3506](https://issues.apache.org/jira/browse/AVRO-3506) Implement Single Object Writer +- [AVRO-3507](https://issues.apache.org/jira/browse/AVRO-3507) Implement Single Object Reader +- [AVRO-3405](https://issues.apache.org/jira/browse/AVRO-3405) Add API for user-provided metadata to file +- [AVRO-3339](https://issues.apache.org/jira/browse/AVRO-3339) Rename crate from avro-rs to apache-avro +- [AVRO-3479](https://issues.apache.org/jira/browse/AVRO-3479) Derive Avro Schema macro + +Website +- [AVRO-2175](https://issues.apache.org/jira/browse/AVRO-2175) Website refactor +- [AVRO-3450](https://issues.apache.org/jira/browse/AVRO-3450) Document IDL support in IDEs + + +## Rust + +This is the first release that provides the `apache-avro` crate at [crates.io](https://crates.io/crates/apache-avro)! + +## JIRA + +A list of all JIRA tickets fixed in 1.11.1 could be found [here](https://issues.apache.org/jira/issues/?jql=project%3DAVRO%20AND%20fixVersion%3D1.11.1) + +## Language repositories + +In addition, language-specific release artifacts are available: + +* C#: https://www.nuget.org/packages/Apache.Avro/1.11.1 +* Java: https://repo1.maven.org/maven2/org/apache/avro/avro/1.11.1/ +* Javascript: https://www.npmjs.com/package/avro-js/v/1.11.1 +* Perl: https://metacpan.org/release/Avro +* Python 3: https://pypi.org/project/avro/1.11.1 +* Ruby: https://rubygems.org/gems/avro/versions/1.11.1 +* Rust: https://crates.io/crates/apache-avro/0.14.0 + +Thanks to everyone for contributing! + diff --git a/doc/content/en/blog/releases/avro-1.11.2-released.md b/doc/content/en/blog/releases/avro-1.11.2-released.md new file mode 100755 index 00000000000..3949d5f52ed --- /dev/null +++ b/doc/content/en/blog/releases/avro-1.11.2-released.md @@ -0,0 +1,98 @@ +--- +title: "Avro 1.11.2" +linkTitle: "Avro 1.11.2" +date: 2023-07-03 +--- + + + +The Apache Avro community is pleased to announce the release of Avro 1.11.2! + +All signed release artifacts, signatures and verification instructions can +be found }}">here + +This release addresses 89 [Avro JIRA](https://issues.apache.org/jira/issues/?jql=project%3DAVRO%20AND%20fixVersion%3D1.11.2). + +## Highlights + +C# +- [AVRO-3434](https://issues.apache.org/jira/browse/AVRO-3434): Support logical schemas in reflect reader and writer +- [AVRO-3670](https://issues.apache.org/jira/browse/AVRO-3670): Add NET 7.0 support +- [AVRO-3724](https://issues.apache.org/jira/browse/AVRO-3724): Fix C# JsonEncoder for nested array of records +- [AVRO-3756](https://issues.apache.org/jira/browse/AVRO-3756): Add a method to return types instead of writing them to disk + +C++ +- [AVRO-3601](https://issues.apache.org/jira/browse/AVRO-3601): C++ API header contains breaking include +- [AVRO-3705](https://issues.apache.org/jira/browse/AVRO-3705): C++17 support + +Java +- [AVRO-2943](https://issues.apache.org/jira/browse/AVRO-2943): Add new GenericData String/Utf8 ARRAY comparison test +- [AVRO-2943](https://issues.apache.org/jira/browse/AVRO-2943): improve GenericRecord MAP type comparison +- [AVRO-3473](https://issues.apache.org/jira/browse/AVRO-3473): Use ServiceLoader to discover Conversion +- [AVRO-3536](https://issues.apache.org/jira/browse/AVRO-3536): Inherit conversions for Union type +- [AVRO-3597](https://issues.apache.org/jira/browse/AVRO-3597): Allow custom readers to override string creation +- [AVRO-3560](https://issues.apache.org/jira/browse/AVRO-3560): Throw SchemaParseException on dangling content beyond end of schema +- [AVRO-3602](https://issues.apache.org/jira/browse/AVRO-3602): Support Map(with non-String keys) and Set in ReflectDatumReader +- [AVRO-3676](https://issues.apache.org/jira/browse/AVRO-3676): Produce valid toString() for UUID JSON +- [AVRO-3698](https://issues.apache.org/jira/browse/AVRO-3698): SpecificData.getClassName must replace reserved words +- [AVRO-3700](https://issues.apache.org/jira/browse/AVRO-3700): Publish Java SBOM artifacts with CycloneDX +- [AVRO-3783](https://issues.apache.org/jira/browse/AVRO-3783): Read LONG length for bytes, only allow INT sizes +- [AVRO-3706](https://issues.apache.org/jira/browse/AVRO-3706): accept space in folder name + +Python +- [AVRO-3761](https://issues.apache.org/jira/browse/AVRO-3761): Fix broken validation of nullable UUID field +- [AVRO-3229](https://issues.apache.org/jira/browse/AVRO-3229): Raise on invalid enum default only if validation enabled +- [AVRO-3622](https://issues.apache.org/jira/browse/AVRO-3622): Fix compatibility check for schemas having or missing namespace +- [AVRO-3669](https://issues.apache.org/jira/browse/AVRO-3669): Add py.typed marker file (PEP561 compliance) +- [AVRO-3672](https://issues.apache.org/jira/browse/AVRO-3672): Add CI testing for Python 3.11 +- [AVRO-3680](https://issues.apache.org/jira/browse/AVRO-3680): allow to disable name validation + +Ruby +- [AVRO-3775](https://issues.apache.org/jira/browse/AVRO-3775): Fix decoded default value of logical type +- [AVRO-3697](https://issues.apache.org/jira/browse/AVRO-3697): Test against Ruby 3.2 +- [AVRO-3722](https://issues.apache.org/jira/browse/AVRO-3722): Eagerly initialize instance variables for better inline cache hits + +Rust +- Many, many bug fixes and implementation progress in this experimental SDK. +- Rust CI builds and lints are passing, and has been released to crates.io as version 0.15.0 + +In addition: +- Upgrade dependencies to latest versions, including CVE fixes. +- Testing and build improvements. +- Performance fixes, other bug fixes, better documentation and more... + + +Known issues +- [AVRO-3789](https://issues.apache.org/jira/browse/AVRO-3789) Java: Problem when comparing empty MAP types. + +## Language SDK / Convenience artifacts + +* C#: https://www.nuget.org/packages/Apache.Avro/1.11.2 +* Java: https://repo1.maven.org/maven2/org/apache/avro/avro/1.11.2/ +* Javascript: https://www.npmjs.com/package/avro-js/v/1.11.2 +* Perl: https://metacpan.org/release/Avro +* Python 3: https://pypi.org/project/avro/1.11.2 +* Ruby: https://rubygems.org/gems/avro/versions/1.11.2 +* Rust: https://crates.io/crates/apache-avro/0.15.0 + +Thanks to everyone for contributing! + diff --git a/doc/content/en/blog/releases/avro-1.11.3-released.md b/doc/content/en/blog/releases/avro-1.11.3-released.md new file mode 100755 index 00000000000..50a0eef3fcf --- /dev/null +++ b/doc/content/en/blog/releases/avro-1.11.3-released.md @@ -0,0 +1,79 @@ +--- +title: "Avro 1.11.3" +linkTitle: "Avro 1.11.3" +date: 2023-09-22 +--- + + + +The Apache Avro community is pleased to announce the release of Avro 1.11.3! + +All signed release artifacts, signatures and verification instructions can +be found }}">here + +This release [addresses 39 Jira issues](https://issues.apache.org/jira/issues/?jql=project%3DAVRO%20AND%20fixVersion%3D1.11.3). + +## Highlights + +Java +- [AVRO-3789](https://issues.apache.org/jira/browse/AVRO-3789): Comparing maps in GenericData is wrong for certain combinations and fails for empty maps +- [AVRO-3713](https://issues.apache.org/jira/browse/AVRO-3713): Thread scalability problem with the use of SynchronizedMap +- [AVRO-3486](https://issues.apache.org/jira/browse/AVRO-3486): Protocol namespace not parsed correctly if protocol is defined by full name +- [AVRO-2771](https://issues.apache.org/jira/browse/AVRO-2771): Allow having Error in a Record +- [AVRO-3819](https://issues.apache.org/jira/browse/AVRO-3819): Rationalize the system properties that limit allocation + +Python +- [AVRO-3819](https://issues.apache.org/jira/browse/AVRO-3819): Rationalize the system properties that limit allocation +- [AVRO-312](https://issues.apache.org/jira/browse/AVRO-312): Generate documentation for Python with Sphinx + +Rust +- [AVRO-3853](https://issues.apache.org/jira/browse/AVRO-3853): Support local-timestamp logical types for the Rust SDK +- [AVRO-3851](https://issues.apache.org/jira/browse/AVRO-3851): Validate default value for record fields and enums on parsing +- [AVRO-3847](https://issues.apache.org/jira/browse/AVRO-3847): Record field doesn't accept default value if field type is union and the type of default value is pre-defined name +- [AVRO-3846](https://issues.apache.org/jira/browse/AVRO-3846): Race condition can happen among serde tests +- [AVRO-3838](https://issues.apache.org/jira/browse/AVRO-3838): Replace regex crate with regex-lite +- [AVRO-3837](https://issues.apache.org/jira/browse/AVRO-3837): Disallow invalid namespaces for the Rust binding +- [AVRO-3835](https://issues.apache.org/jira/browse/AVRO-3835): Get rid of byteorder and zerocopy dependencies +- [AVRO-3830](https://issues.apache.org/jira/browse/AVRO-3830): Handle namespace properly if a name starts with dot +- [AVRO-3827](https://issues.apache.org/jira/browse/AVRO-3827): Disallow duplicate field names +- [AVRO-3787](https://issues.apache.org/jira/browse/AVRO-3787): Deserialization fails to use default if an enum in a record in a union is given an unknown symbol +- [AVRO-3786](https://issues.apache.org/jira/browse/AVRO-3786): Deserialization results in FindUnionVariant error if the writer and reader have the same symbol but at different positions +- + +In addition: +- Upgrade dependencies to latest versions, including CVE fixes. +- Testing and build improvements. +- Performance fixes, other bug fixes, better documentation and more. + +Known issues: ∅ + +## Language SDK / Convenience artifacts + +* C#: https://www.nuget.org/packages/Apache.Avro/1.11.3 +* Java: https://repo1.maven.org/maven2/org/apache/avro/avro/1.11.3/ +* Javascript: https://www.npmjs.com/package/avro-js/v/1.11.3 +* Perl: https://metacpan.org/release/Avro +* Python 3: https://pypi.org/project/avro/1.11.3 +* Ruby: https://rubygems.org/gems/avro/versions/1.11.3 +* Rust: https://crates.io/crates/apache-avro/0.16.0 + +Thanks to everyone for contributing! diff --git a/doc/content/en/blog/releases/avro-1.2.0-released.md b/doc/content/en/blog/releases/avro-1.2.0-released.md new file mode 100755 index 00000000000..24fc57ad92e --- /dev/null +++ b/doc/content/en/blog/releases/avro-1.2.0-released.md @@ -0,0 +1,28 @@ +--- +title: "Avro 1.2.0" +linkTitle: "Avro 1.2.0" +date: 2009-10-15 +--- + + + +Apache Avro 1.2.0 is now available! diff --git a/doc/content/en/blog/releases/avro-1.3.0-released.md b/doc/content/en/blog/releases/avro-1.3.0-released.md new file mode 100755 index 00000000000..dc29d337dc5 --- /dev/null +++ b/doc/content/en/blog/releases/avro-1.3.0-released.md @@ -0,0 +1,28 @@ +--- +title: "Avro 1.3.0" +linkTitle: "Avro 1.3.0" +date: 2010-02-26 +--- + + + +Apache Avro 1.3.0 has been released! diff --git a/doc/content/en/blog/releases/avro-1.3.1-released.md b/doc/content/en/blog/releases/avro-1.3.1-released.md new file mode 100755 index 00000000000..f767d50dd49 --- /dev/null +++ b/doc/content/en/blog/releases/avro-1.3.1-released.md @@ -0,0 +1,28 @@ +--- +title: "Avro 1.3.1" +linkTitle: "Avro 1.3.1" +date: 2010-03-19 +--- + + + +Apache Avro 1.3.1 has been released! diff --git a/doc/content/en/blog/releases/avro-1.3.2-released.md b/doc/content/en/blog/releases/avro-1.3.2-released.md new file mode 100755 index 00000000000..3a0492df121 --- /dev/null +++ b/doc/content/en/blog/releases/avro-1.3.2-released.md @@ -0,0 +1,28 @@ +--- +title: "Avro 1.3.2" +linkTitle: "Avro 1.3.2" +date: 2010-03-31 +--- + + + +Apache Avro 1.3.2 has been released! diff --git a/doc/content/en/blog/releases/avro-1.3.3-released.md b/doc/content/en/blog/releases/avro-1.3.3-released.md new file mode 100755 index 00000000000..15eeabd4ecf --- /dev/null +++ b/doc/content/en/blog/releases/avro-1.3.3-released.md @@ -0,0 +1,28 @@ +--- +title: "Avro 1.3.3" +linkTitle: "Avro 1.3.3" +date: 2010-07-07 +--- + + + +Apache Avro 1.3.3 has been released! diff --git a/doc/content/en/blog/releases/avro-1.4.0-released.md b/doc/content/en/blog/releases/avro-1.4.0-released.md new file mode 100755 index 00000000000..ca9df708ba5 --- /dev/null +++ b/doc/content/en/blog/releases/avro-1.4.0-released.md @@ -0,0 +1,28 @@ +--- +title: "Avro 1.4.0" +linkTitle: "Avro 1.4.0" +date: 2010-09-08 +--- + + + +Apache Avro 1.4.0 has been released! diff --git a/doc/content/en/blog/releases/avro-1.4.1-released.md b/doc/content/en/blog/releases/avro-1.4.1-released.md new file mode 100755 index 00000000000..b2ef0836f84 --- /dev/null +++ b/doc/content/en/blog/releases/avro-1.4.1-released.md @@ -0,0 +1,28 @@ +--- +title: "Avro 1.4.1" +linkTitle: "Avro 1.4.1" +date: 2010-10-13 +--- + + + +Apache Avro 1.4.1 has been released! diff --git a/doc/content/en/blog/releases/avro-1.5.0-released.md b/doc/content/en/blog/releases/avro-1.5.0-released.md new file mode 100755 index 00000000000..94daaccc724 --- /dev/null +++ b/doc/content/en/blog/releases/avro-1.5.0-released.md @@ -0,0 +1,28 @@ +--- +title: "Avro 1.5.0" +linkTitle: "Avro 1.5.0" +date: 2011-03-11 +--- + + + +Apache Avro 1.5.0 has been released! diff --git a/doc/content/en/blog/releases/avro-1.5.1-released.md b/doc/content/en/blog/releases/avro-1.5.1-released.md new file mode 100755 index 00000000000..24354286ad1 --- /dev/null +++ b/doc/content/en/blog/releases/avro-1.5.1-released.md @@ -0,0 +1,28 @@ +--- +title: "Avro 1.5.1" +linkTitle: "Avro 1.5.1" +date: 2011-05-06 +--- + + + +Apache Avro 1.5.1 has been released! diff --git a/doc/content/en/blog/releases/avro-1.5.2-released.md b/doc/content/en/blog/releases/avro-1.5.2-released.md new file mode 100755 index 00000000000..25fb5b7549c --- /dev/null +++ b/doc/content/en/blog/releases/avro-1.5.2-released.md @@ -0,0 +1,28 @@ +--- +title: "Avro 1.5.2" +linkTitle: "Avro 1.5.2" +date: 2011-08-12 +--- + + + +Apache Avro 1.5.2 has been released! diff --git a/doc/content/en/blog/releases/avro-1.5.3-released.md b/doc/content/en/blog/releases/avro-1.5.3-released.md new file mode 100755 index 00000000000..5be5225a925 --- /dev/null +++ b/doc/content/en/blog/releases/avro-1.5.3-released.md @@ -0,0 +1,28 @@ +--- +title: "Avro 1.5.3" +linkTitle: "Avro 1.5.3" +date: 2011-08-29 +--- + + + +Apache Avro 1.5.3 has been released! diff --git a/doc/content/en/blog/releases/avro-1.5.4-released.md b/doc/content/en/blog/releases/avro-1.5.4-released.md new file mode 100755 index 00000000000..8eeab4f4b54 --- /dev/null +++ b/doc/content/en/blog/releases/avro-1.5.4-released.md @@ -0,0 +1,28 @@ +--- +title: "Avro 1.5.4" +linkTitle: "Avro 1.5.4" +date: 2011-09-12 +--- + + + +Apache Avro 1.5.4 has been released! diff --git a/doc/content/en/blog/releases/avro-1.6.0-released.md b/doc/content/en/blog/releases/avro-1.6.0-released.md new file mode 100755 index 00000000000..e131f4534f1 --- /dev/null +++ b/doc/content/en/blog/releases/avro-1.6.0-released.md @@ -0,0 +1,28 @@ +--- +title: "Avro 1.6.0" +linkTitle: "Avro 1.6.0" +date: 2011-11-02 +--- + + + +Apache Avro 1.6.0 has been released! diff --git a/doc/content/en/blog/releases/avro-1.6.1-released.md b/doc/content/en/blog/releases/avro-1.6.1-released.md new file mode 100755 index 00000000000..724b9b04003 --- /dev/null +++ b/doc/content/en/blog/releases/avro-1.6.1-released.md @@ -0,0 +1,28 @@ +--- +title: "Avro 1.6.1" +linkTitle: "Avro 1.6.1" +date: 2011-11-14 +--- + + + +Apache Avro 1.6.1 has been released! diff --git a/doc/content/en/blog/releases/avro-1.6.2-released.md b/doc/content/en/blog/releases/avro-1.6.2-released.md new file mode 100755 index 00000000000..15fae250b4e --- /dev/null +++ b/doc/content/en/blog/releases/avro-1.6.2-released.md @@ -0,0 +1,28 @@ +--- +title: "Avro 1.6.2" +linkTitle: "Avro 1.6.2" +date: 2012-02-14 +--- + + + +Apache Avro 1.6.2 has been released! diff --git a/doc/content/en/blog/releases/avro-1.6.3-released.md b/doc/content/en/blog/releases/avro-1.6.3-released.md new file mode 100755 index 00000000000..3029f9e4739 --- /dev/null +++ b/doc/content/en/blog/releases/avro-1.6.3-released.md @@ -0,0 +1,28 @@ +--- +title: "Avro 1.6.3" +linkTitle: "Avro 1.6.3" +date: 2012-03-19 +--- + + + +Apache Avro 1.6.3 has been released! diff --git a/doc/content/en/blog/releases/avro-1.7.0-released.md b/doc/content/en/blog/releases/avro-1.7.0-released.md new file mode 100755 index 00000000000..02e5c15156a --- /dev/null +++ b/doc/content/en/blog/releases/avro-1.7.0-released.md @@ -0,0 +1,28 @@ +--- +title: "Avro 1.7.0" +linkTitle: "Avro 1.7.0" +date: 2012-06-11 +--- + + + +Apache Avro 1.7.0 has been released! diff --git a/doc/content/en/blog/releases/avro-1.7.1-released.md b/doc/content/en/blog/releases/avro-1.7.1-released.md new file mode 100755 index 00000000000..6ef9278dae3 --- /dev/null +++ b/doc/content/en/blog/releases/avro-1.7.1-released.md @@ -0,0 +1,28 @@ +--- +title: "Avro 1.7.1" +linkTitle: "Avro 1.7.1" +date: 2012-07-18 +--- + + + +Apache Avro 1.7.1 has been released! diff --git a/doc/content/en/blog/releases/avro-1.7.2-released.md b/doc/content/en/blog/releases/avro-1.7.2-released.md new file mode 100755 index 00000000000..94e5719ed0e --- /dev/null +++ b/doc/content/en/blog/releases/avro-1.7.2-released.md @@ -0,0 +1,28 @@ +--- +title: "Avro 1.7.2" +linkTitle: "Avro 1.7.2" +date: 2012-09-25 +--- + + + +Apache Avro 1.7.2 has been released! diff --git a/doc/content/en/blog/releases/avro-1.7.3-released.md b/doc/content/en/blog/releases/avro-1.7.3-released.md new file mode 100755 index 00000000000..63ff58392f8 --- /dev/null +++ b/doc/content/en/blog/releases/avro-1.7.3-released.md @@ -0,0 +1,28 @@ +--- +title: "Avro 1.7.3" +linkTitle: "Avro 1.7.3" +date: 2012-12-07 +--- + + + +Apache Avro 1.7.3 has been released! diff --git a/doc/content/en/blog/releases/avro-1.7.4-released.md b/doc/content/en/blog/releases/avro-1.7.4-released.md new file mode 100755 index 00000000000..a91a8ebc18b --- /dev/null +++ b/doc/content/en/blog/releases/avro-1.7.4-released.md @@ -0,0 +1,28 @@ +--- +title: "Avro 1.7.4" +linkTitle: "Avro 1.7.4" +date: 2013-02-26 +--- + + + +Apache Avro 1.7.4 has been released! diff --git a/doc/content/en/blog/releases/avro-1.7.5-released.md b/doc/content/en/blog/releases/avro-1.7.5-released.md new file mode 100755 index 00000000000..44288ccf66e --- /dev/null +++ b/doc/content/en/blog/releases/avro-1.7.5-released.md @@ -0,0 +1,28 @@ +--- +title: "Avro 1.7.5" +linkTitle: "Avro 1.7.5" +date: 2013-08-19 +--- + + + +Apache Avro 1.7.5 has been released! diff --git a/doc/content/en/blog/releases/avro-1.7.6-released.md b/doc/content/en/blog/releases/avro-1.7.6-released.md new file mode 100755 index 00000000000..fe93cd5c2ff --- /dev/null +++ b/doc/content/en/blog/releases/avro-1.7.6-released.md @@ -0,0 +1,28 @@ +--- +title: "Avro 1.7.6" +linkTitle: "Avro 1.7.6" +date: 2014-01-22 +--- + + + +Apache Avro 1.7.6 has been released! diff --git a/doc/content/en/blog/releases/avro-1.7.7-released.md b/doc/content/en/blog/releases/avro-1.7.7-released.md new file mode 100755 index 00000000000..07a378ec000 --- /dev/null +++ b/doc/content/en/blog/releases/avro-1.7.7-released.md @@ -0,0 +1,28 @@ +--- +title: "Avro 1.7.7" +linkTitle: "Avro 1.7.7" +date: 2014-07-23 +--- + + + +Apache Avro 1.7.7 has been released! diff --git a/doc/content/en/blog/releases/avro-1.8.0-released.md b/doc/content/en/blog/releases/avro-1.8.0-released.md new file mode 100755 index 00000000000..9ca4a129d1f --- /dev/null +++ b/doc/content/en/blog/releases/avro-1.8.0-released.md @@ -0,0 +1,28 @@ +--- +title: "Avro 1.8.0" +linkTitle: "Avro 1.8.0" +date: 2016-01-29 +--- + + + +Apache Avro 1.8.0 has been released! diff --git a/doc/content/en/blog/releases/avro-1.8.1-released.md b/doc/content/en/blog/releases/avro-1.8.1-released.md new file mode 100755 index 00000000000..ed20e60219d --- /dev/null +++ b/doc/content/en/blog/releases/avro-1.8.1-released.md @@ -0,0 +1,28 @@ +--- +title: "Avro 1.8.1" +linkTitle: "Avro 1.8.1" +date: 2016-05-19 +--- + + + +Apache Avro 1.8.1 has been released! diff --git a/doc/content/en/blog/releases/avro-1.8.2-released.md b/doc/content/en/blog/releases/avro-1.8.2-released.md new file mode 100755 index 00000000000..07720e05053 --- /dev/null +++ b/doc/content/en/blog/releases/avro-1.8.2-released.md @@ -0,0 +1,28 @@ +--- +title: "Avro 1.8.2" +linkTitle: "Avro 1.8.2" +date: 2017-05-20 +--- + + + +Apache Avro 1.8.2 has been released! diff --git a/doc/content/en/blog/releases/avro-1.9.0-released.md b/doc/content/en/blog/releases/avro-1.9.0-released.md new file mode 100755 index 00000000000..0833216066a --- /dev/null +++ b/doc/content/en/blog/releases/avro-1.9.0-released.md @@ -0,0 +1,28 @@ +--- +title: "Avro 1.9.0" +linkTitle: "Avro 1.9.0" +date: 2019-05-14 +--- + + + +Apache Avro 1.9.0 has been released! diff --git a/doc/content/en/blog/releases/avro-1.9.1-released.md b/doc/content/en/blog/releases/avro-1.9.1-released.md new file mode 100755 index 00000000000..6ae614e5b3b --- /dev/null +++ b/doc/content/en/blog/releases/avro-1.9.1-released.md @@ -0,0 +1,28 @@ +--- +title: "Avro 1.9.1" +linkTitle: "Avro 1.9.1" +date: 2019-09-02 +--- + + + +Apache Avro 1.9.1 has been released! diff --git a/doc/content/en/blog/releases/avro-1.9.2-released.md b/doc/content/en/blog/releases/avro-1.9.2-released.md new file mode 100755 index 00000000000..e4580fa42bd --- /dev/null +++ b/doc/content/en/blog/releases/avro-1.9.2-released.md @@ -0,0 +1,28 @@ +--- +title: "Avro 1.9.2" +linkTitle: "Avro 1.9.2" +date: 2020-02-19 +--- + + + +Apache Avro 1.9.2 has been released! diff --git a/doc/content/en/blog/releases/avro-joins-apache.md b/doc/content/en/blog/releases/avro-joins-apache.md new file mode 100755 index 00000000000..dbc1872644d --- /dev/null +++ b/doc/content/en/blog/releases/avro-joins-apache.md @@ -0,0 +1,28 @@ +--- +title: "Avro joins Apache" +linkTitle: "Avro joins Apache" +date: 2009-04-10 +--- + + + +Avro has joined the Apache Software Foundation as a Hadoop subproject. diff --git a/doc/content/en/community/_index.md b/doc/content/en/community/_index.md new file mode 100644 index 00000000000..643c532589a --- /dev/null +++ b/doc/content/en/community/_index.md @@ -0,0 +1,35 @@ +--- +title: Community +menu: + main: + weight: 40 +aliases: +- /irc.html +- /issue_tracking.html +- /mailing_lists.html +- /mail/ +- /version_control.html +--- + + + + \ No newline at end of file diff --git a/doc/content/en/docs/++version++/Getting started (Java)/_index.md b/doc/content/en/docs/++version++/Getting started (Java)/_index.md new file mode 100644 index 00000000000..429e9837641 --- /dev/null +++ b/doc/content/en/docs/++version++/Getting started (Java)/_index.md @@ -0,0 +1,289 @@ +--- +categories: [] +tags: ["java"] +title: "Getting Started (Java)" +linkTitle: "Getting Started (Java)" +weight: 2 +--- + + + +This is a short guide for getting started with Apache Avroâ„ĸ using Java. This guide only covers using Avro for data serialization; see Patrick Hunt's [Avro RPC Quick Start](https://github.com/phunt/avro-rpc-quickstart) for a good introduction to using Avro for RPC. + +## Download + +Avro implementations for C, C++, C#, Java, PHP, Python, and Ruby can be downloaded from the [Apache Avroâ„ĸ Download]({{< relref "/project/download" >}}) page. This guide uses Avro {{< avro_version >}}, the latest version at the time of writing. For the examples in this guide, download avro-{{< avro_version >}}.jar and avro-tools-{{< avro_version >}}.jar. + +Alternatively, if you are using Maven, add the following dependency to your POM: + +```xml + + org.apache.avro + avro + {{< avro_version >}} + +``` + +As well as the Avro Maven plugin (for performing code generation): + +```xml + + org.apache.avro + avro-maven-plugin + {{< avro_version >}} + + ${project.basedir}/src/main/avro/ + ${project.basedir}/src/main/java/ + + + + generate-sources + + schema + + + + + + org.apache.maven.plugins + maven-compiler-plugin + + 1.8 + 1.8 + + +``` + +You may also build the required Avro jars from source. Building Avro is beyond the scope of this guide; see the Build Documentation page in the wiki for more information. + +## Defining a schema + +Avro schemas are defined using JSON or IDL (the latter requires an extra dependency). Schemas are composed of primitive types (null, boolean, int, long, float, double, bytes, and string) and complex types (record, enum, array, map, union, and fixed). You can learn more about Avro schemas and types from the specification, but for now let's start with a simple schema example, user.avsc: + +```json +{"namespace": "example.avro", + "type": "record", + "name": "User", + "fields": [ + {"name": "name", "type": "string"}, + {"name": "favorite_number", "type": ["int", "null"]}, + {"name": "favorite_color", "type": ["string", "null"]} + ] +} +``` + +This schema defines a record representing a hypothetical user. (Note that a schema file can only contain a single schema definition.) At minimum, a record definition must include its type ("type": "record"), a name ("name": "User"), and fields, in this case name, favorite_number, and favorite_color. We also define a namespace ("namespace": "example.avro"), which together with the name attribute defines the "full name" of the schema (example.avro.User in this case). + +Fields are defined via an array of objects, each of which defines a name and type (other attributes are optional, see the record specification for more details). The type attribute of a field is another schema object, which can be either a primitive or complex type. For example, the name field of our User schema is the primitive type string, whereas the favorite_number and favorite_color fields are both unions, represented by JSON arrays. unions are a complex type that can be any of the types listed in the array; e.g., favorite_number can either be an int or null, essentially making it an optional field. + +## Serializing and deserializing with code generation + +### Compiling the schema +Code generation allows us to automatically create classes based on our previously-defined schema. Once we have defined the relevant classes, there is no need to use the schema directly in our programs. We use the avro-tools jar to generate code as follows: + +```shell +java -jar /path/to/avro-tools-{{< avro_version >}}.jar compile schema +``` + +This will generate the appropriate source files in a package based on the schema's namespace in the provided destination folder. For instance, to generate a User class in package example.avro from the schema defined above, run + +```shell +java -jar /path/to/avro-tools-{{< avro_version >}}.jar compile schema user.avsc . +``` + +Note that if you using the Avro Maven plugin, there is no need to manually invoke the schema compiler; the plugin automatically performs code generation on any .avsc files present in the configured source directory. + +### Creating Users +Now that we've completed the code generation, let's create some Users, serialize them to a data file on disk, and then read back the file and deserialize the User objects. + +First let's create some Users and set their fields. + +```java +User user1 = new User(); +user1.setName("Alyssa"); +user1.setFavoriteNumber(256); +// Leave favorite color null + +// Alternate constructor +User user2 = new User("Ben", 7, "red"); + +// Construct via builder +User user3 = User.newBuilder() + .setName("Charlie") + .setFavoriteColor("blue") + .setFavoriteNumber(null) + .build(); +``` + +As shown in this example, Avro objects can be created either by invoking a constructor directly or by using a builder. Unlike constructors, builders will automatically set any default values specified in the schema. Additionally, builders validate the data as it set, whereas objects constructed directly will not cause an error until the object is serialized. However, using constructors directly generally offers better performance, as builders create a copy of the datastructure before it is written. + +Note that we do not set user1's favorite color. Since that record is of type ["string", "null"], we can either set it to a string or leave it null; it is essentially optional. Similarly, we set user3's favorite number to null (using a builder requires setting all fields, even if they are null). + +### Serializing +Now let's serialize our Users to disk. + +```java +// Serialize user1, user2 and user3 to disk +DatumWriter userDatumWriter = new SpecificDatumWriter(User.class); +DataFileWriter dataFileWriter = new DataFileWriter(userDatumWriter); +dataFileWriter.create(user1.getSchema(), new File("users.avro")); +dataFileWriter.append(user1); +dataFileWriter.append(user2); +dataFileWriter.append(user3); +dataFileWriter.close(); +``` + +We create a DatumWriter, which converts Java objects into an in-memory serialized format. The SpecificDatumWriter class is used with generated classes and extracts the schema from the specified generated type. + +Next we create a DataFileWriter, which writes the serialized records, as well as the schema, to the file specified in the dataFileWriter.create call. We write our users to the file via calls to the dataFileWriter.append method. When we are done writing, we close the data file. + +### Deserializing +Finally, let's deserialize the data file we just created. + +```java +// Deserialize Users from disk +DatumReader userDatumReader = new SpecificDatumReader(User.class); +DataFileReader dataFileReader = new DataFileReader(file, userDatumReader); +User user = null; +while (dataFileReader.hasNext()) { +// Reuse user object by passing it to next(). This saves us from +// allocating and garbage collecting many objects for files with +// many items. +user = dataFileReader.next(user); +System.out.println(user); +} +``` + +This snippet will output: + +```json +{"name": "Alyssa", "favorite_number": 256, "favorite_color": null} +{"name": "Ben", "favorite_number": 7, "favorite_color": "red"} +{"name": "Charlie", "favorite_number": null, "favorite_color": "blue"} +``` + +Deserializing is very similar to serializing. We create a SpecificDatumReader, analogous to the SpecificDatumWriter we used in serialization, which converts in-memory serialized items into instances of our generated class, in this case User. We pass the DatumReader and the previously created File to a DataFileReader, analogous to the DataFileWriter, which reads both the schema used by the writer as well as the data from the file on disk. The data will be read using the writer's schema included in the file and the schema provided by the reader, in this case the User class. The writer's schema is needed to know the order in which fields were written, while the reader's schema is needed to know what fields are expected and how to fill in default values for fields added since the file was written. If there are differences between the two schemas, they are resolved according to the Schema Resolution specification. + +Next we use the DataFileReader to iterate through the serialized Users and print the deserialized object to stdout. Note how we perform the iteration: we create a single User object which we store the current deserialized user in, and pass this record object to every call of dataFileReader.next. This is a performance optimization that allows the DataFileReader to reuse the same User object rather than allocating a new User for every iteration, which can be very expensive in terms of object allocation and garbage collection if we deserialize a large data file. While this technique is the standard way to iterate through a data file, it's also possible to use for (User user : dataFileReader) if performance is not a concern. + +### Compiling and running the example code +This example code is included as a Maven project in the examples/java-example directory in the Avro docs. From this directory, execute the following commands to build and run the example: + +```shell +$ mvn compile # includes code generation via Avro Maven plugin +$ mvn -q exec:java -Dexec.mainClass=example.SpecificMain +``` + +### Beta feature: Generating faster code +In release 1.9.0, we introduced a new approach to generating code that speeds up decoding of objects by more than 10% and encoding by more than 30% (future performance enhancements are underway). To ensure a smooth introduction of this change into production systems, this feature is controlled by a feature flag, the system property org.apache.avro.specific.use_custom_coders. In this first release, this feature is off by default. To turn it on, set the system flag to true at runtime. In the sample above, for example, you could enable the fater coders as follows: + +$ mvn -q exec:java -Dexec.mainClass=example.SpecificMain \ + -Dorg.apache.avro.specific.use_custom_coders=true + +Note that you do not have to recompile your Avro schema to have access to this feature. The feature is compiled and built into your code, and you turn it on and off at runtime using the feature flag. As a result, you can turn it on during testing, for example, and then off in production. Or you can turn it on in production, and quickly turn it off if something breaks. + +We encourage the Avro community to exercise this new feature early to help build confidence. (For those paying one-demand for compute resources in the cloud, it can lead to meaningful cost savings.) As confidence builds, we will turn this feature on by default, and eventually eliminate the feature flag (and the old code). + +## Serializing and deserializing without code generation +Data in Avro is always stored with its corresponding schema, meaning we can always read a serialized item regardless of whether we know the schema ahead of time. This allows us to perform serialization and deserialization without code generation. + +Let's go over the same example as in the previous section, but without using code generation: we'll create some users, serialize them to a data file on disk, and then read back the file and deserialize the users objects. + +### Creating users +First, we use a SchemaParser to read our schema definition and create a Schema object. + +```java +Schema schema = new SchemaParser().parse(new File("user.avsc")).mainSchema(); +``` + +Using this schema, let's create some users. + +```java +GenericRecord user1 = new GenericData.Record(schema); +user1.put("name", "Alyssa"); +user1.put("favorite_number", 256); +// Leave favorite color null + +GenericRecord user2 = new GenericData.Record(schema); +user2.put("name", "Ben"); +user2.put("favorite_number", 7); +user2.put("favorite_color", "red"); +``` + +Since we're not using code generation, we use GenericRecords to represent users. GenericRecord uses the schema to verify that we only specify valid fields. If we try to set a non-existent field (e.g., user1.put("favorite_animal", "cat")), we'll get an AvroRuntimeException when we run the program. + +Note that we do not set user1's favorite color. Since that record is of type ["string", "null"], we can either set it to a string or leave it null; it is essentially optional. + +### Serializing +Now that we've created our user objects, serializing and deserializing them is almost identical to the example above which uses code generation. The main difference is that we use generic instead of specific readers and writers. + +First we'll serialize our users to a data file on disk. + +```java +// Serialize user1 and user2 to disk +File file = new File("users.avro"); +DatumWriter datumWriter = new GenericDatumWriter(schema); +DataFileWriter dataFileWriter = new DataFileWriter(datumWriter); +dataFileWriter.create(schema, file); +dataFileWriter.append(user1); +dataFileWriter.append(user2); +dataFileWriter.close(); +``` + +We create a DatumWriter, which converts Java objects into an in-memory serialized format. Since we are not using code generation, we create a GenericDatumWriter. It requires the schema both to determine how to write the GenericRecords and to verify that all non-nullable fields are present. + +As in the code generation example, we also create a DataFileWriter, which writes the serialized records, as well as the schema, to the file specified in the dataFileWriter.create call. We write our users to the file via calls to the dataFileWriter.append method. When we are done writing, we close the data file. + +### Deserializing +Finally, we'll deserialize the data file we just created. + +```java +// Deserialize users from disk +DatumReader datumReader = new GenericDatumReader(schema); +DataFileReader dataFileReader = new DataFileReader(file, datumReader); +GenericRecord user = null; +while (dataFileReader.hasNext()) { +// Reuse user object by passing it to next(). This saves us from +// allocating and garbage collecting many objects for files with +// many items. +user = dataFileReader.next(user); +System.out.println(user); +``` + +This outputs: + +```json +{"name": "Alyssa", "favorite_number": 256, "favorite_color": null} +{"name": "Ben", "favorite_number": 7, "favorite_color": "red"} +``` + +Deserializing is very similar to serializing. We create a GenericDatumReader, analogous to the GenericDatumWriter we used in serialization, which converts in-memory serialized items into GenericRecords. We pass the DatumReader and the previously created File to a DataFileReader, analogous to the DataFileWriter, which reads both the schema used by the writer as well as the data from the file on disk. The data will be read using the writer's schema included in the file, and the reader's schema provided to the GenericDatumReader. The writer's schema is needed to know the order in which fields were written, while the reader's schema is needed to know what fields are expected and how to fill in default values for fields added since the file was written. If there are differences between the two schemas, they are resolved according to the Schema Resolution specification. + +Next, we use the DataFileReader to iterate through the serialized users and print the deserialized object to stdout. Note how we perform the iteration: we create a single GenericRecord object which we store the current deserialized user in, and pass this record object to every call of dataFileReader.next. This is a performance optimization that allows the DataFileReader to reuse the same record object rather than allocating a new GenericRecord for every iteration, which can be very expensive in terms of object allocation and garbage collection if we deserialize a large data file. While this technique is the standard way to iterate through a data file, it's also possible to use for (GenericRecord user : dataFileReader) if performance is not a concern. + +### Compiling and running the example code +This example code is included as a Maven project in the examples/java-example directory in the Avro docs. From this directory, execute the following commands to build and run the example: + +```shell +$ mvn compile +$ mvn -q exec:java -Dexec.mainClass=example.GenericMain +``` diff --git a/doc/content/en/docs/++version++/Getting started (Python)/_index.md b/doc/content/en/docs/++version++/Getting started (Python)/_index.md new file mode 100644 index 00000000000..44e3a8f37fd --- /dev/null +++ b/doc/content/en/docs/++version++/Getting started (Python)/_index.md @@ -0,0 +1,147 @@ +--- +categories: [] +tags: ["python"] +title: "Getting Started (Python)" +linkTitle: "Getting Started (Python)" +weight: 3 +--- + + + +This is a short guide for getting started with Apache Avroâ„ĸ using Python. This guide only covers using Avro for data serialization; see Patrick Hunt's Avro RPC Quick Start for a good introduction to using Avro for RPC. + +## Notice for Python 3 users +A package called "avro-python3" had been provided to support Python 3 previously, but the codebase was consolidated into the "avro" package and that supports both Python 2 and 3 now. The avro-python3 package will be removed in the near future, so users should use the "avro" package instead. They are mostly API compatible, but there's a few minor difference (e.g., function name capitalization, such as avro.schema.Parse vs avro.schema.parse). + +## Download +For Python, the easiest way to get started is to install it from PyPI. Python's Avro API is available over PyPi. + +```shell +$ python3 -m pip install avro +``` + +The official releases of the Avro implementations for C, C++, C#, Java, PHP, Python, and Ruby can be downloaded from the Apache Avroâ„ĸ Releases page. This guide uses Avro {{< avro_version >}}, the latest version at the time of writing. Download and unzip avro-{{< avro_version >}}.tar.gz, and install via python setup.py (this will probably require root privileges). Ensure that you can import avro from a Python prompt. + +```shell +$ tar xvf avro-{{< avro_version >}}.tar.gz +$ cd avro-{{< avro_version >}} +$ python setup.py install +$ python +>>> import avro # should not raise ImportError +``` + +Alternatively, you may build the Avro Python library from source. From your the root Avro directory, run the commands + +```shell +$ cd lang/py/ +$ python3 -m pip install -e . +$ python +``` + +## Defining a schema +Avro schemas are defined using JSON. Schemas are composed of primitive types (null, boolean, int, long, float, double, bytes, and string) and complex types (record, enum, array, map, union, and fixed). You can learn more about Avro schemas and types from the specification, but for now let's start with a simple schema example, user.avsc: + +```json +{"namespace": "example.avro", + "type": "record", + "name": "User", + "fields": [ + {"name": "name", "type": "string"}, + {"name": "favorite_number", "type": ["int", "null"]}, + {"name": "favorite_color", "type": ["string", "null"]} + ] +} +``` + +This schema defines a record representing a hypothetical user. (Note that a schema file can only contain a single schema definition.) At minimum, a record definition must include its type ("type": "record"), a name ("name": "User"), and fields, in this case name, favorite_number, and favorite_color. We also define a namespace ("namespace": "example.avro"), which together with the name attribute defines the "full name" of the schema (example.avro.User in this case). + +Fields are defined via an array of objects, each of which defines a name and type (other attributes are optional, see the record specification for more details). The type attribute of a field is another schema object, which can be either a primitive or complex type. For example, the name field of our User schema is the primitive type string, whereas the favorite_number and favorite_color fields are both unions, represented by JSON arrays. unions are a complex type that can be any of the types listed in the array; e.g., favorite_number can either be an int or null, essentially making it an optional field. + +## Serializing and deserializing without code generation +Data in Avro is always stored with its corresponding schema, meaning we can always read a serialized item, regardless of whether we know the schema ahead of time. This allows us to perform serialization and deserialization without code generation. Note that the Avro Python library does not support code generation. + +Try running the following code snippet, which serializes two users to a data file on disk, and then reads back and deserializes the data file: + +```python +import avro.schema +from avro.datafile import DataFileReader, DataFileWriter +from avro.io import DatumReader, DatumWriter + +schema = avro.schema.parse(open("user.avsc", "rb").read()) + +writer = DataFileWriter(open("users.avro", "wb"), DatumWriter(), schema) +writer.append({"name": "Alyssa", "favorite_number": 256}) +writer.append({"name": "Ben", "favorite_number": 7, "favorite_color": "red"}) +writer.close() + +reader = DataFileReader(open("users.avro", "rb"), DatumReader()) +for user in reader: + print(user) +reader.close() +``` + +This outputs: + +```json +{'favorite_color': None, 'favorite_number': 256, 'name': 'Alyssa'} +{'favorite_color': 'red', 'favorite_number': 7, 'name': 'Ben'} +``` + +Do make sure that you open your files in binary mode (i.e. using the modes wb or rb respectively). Otherwise you might generate corrupt files due to automatic replacement of newline characters with the platform-specific representations. + +Let's take a closer look at what's going on here. + +```python +schema = avro.schema.parse(open("user.avsc", "rb").read()) +``` + +avro.schema.parse takes a string containing a JSON schema definition as input and outputs a avro.schema.Schema object (specifically a subclass of Schema, in this case RecordSchema). We're passing in the contents of our user.avsc schema file here. + +```python +writer = DataFileWriter(open("users.avro", "wb"), DatumWriter(), schema) +``` + +We create a DataFileWriter, which we'll use to write serialized items to a data file on disk. The DataFileWriter constructor takes three arguments: + +* The file we'll serialize to +* A DatumWriter, which is responsible for actually serializing the items to Avro's binary format (DatumWriters can be used separately from DataFileWriters, e.g., to perform IPC with Avro). +* The schema we're using. The DataFileWriter needs the schema both to write the schema to the data file, and to verify that the items we write are valid items and write the appropriate fields. + +```python +writer.append({"name": "Alyssa", "favorite_number": 256}) +writer.append({"name": "Ben", "favorite_number": 7, "favorite_color": "red"}) +``` + +We use DataFileWriter.append to add items to our data file. Avro records are represented as Python dicts. Since the field favorite_color has type ["string", "null"], we are not required to specify this field, as shown in the first append. Were we to omit the required name field, an exception would be raised. Any extra entries not corresponding to a field are present in the dict are ignored. + +```python +reader = DataFileReader(open("users.avro", "rb"), DatumReader()) +``` + +We open the file again, this time for reading back from disk. We use a DataFileReader and DatumReader analagous to the DataFileWriter and DatumWriter above. + +```python +for user in reader: + print(user) +``` + +The DataFileReader is an iterator that returns dicts corresponding to the serialized items. diff --git a/doc/content/en/docs/++version++/IDL Language/_index.md b/doc/content/en/docs/++version++/IDL Language/_index.md new file mode 100644 index 00000000000..7d0121274a9 --- /dev/null +++ b/doc/content/en/docs/++version++/IDL Language/_index.md @@ -0,0 +1,511 @@ +--- +title: "IDL Language" +linkTitle: "IDL Language" +weight: 201 +--- + + + +## Introduction +This document defines Avro IDL, a higher-level language for authoring Avro schemata. Before reading this document, you should have familiarity with the concepts of schemata and protocols, as well as the various primitive and complex types available in Avro. + +## Overview + +### Purpose +The aim of the Avro IDL language is to enable developers to author schemata in a way that feels more similar to common programming languages like Java, C++, or Python. Additionally, the Avro IDL language may feel more familiar for those users who have previously used the interface description languages (IDLs) in other frameworks like Thrift, Protocol Buffers, or CORBA. + +### Usage +Each Avro IDL file defines either a single Avro Protocol, or an Avro Schema with supporting named schemata in a namespace. When parsed, it thus yields either a Protocol or a Schema. These can be respectively written to JSON-format Avro Protocol files with extension .avpr or JSON-format Avro Schema files with extension .avsc. + +To convert a _.avdl_ file into a _.avpr_ file, it may be processed by the `idl` tool. For example: +```shell +$ java -jar avro-tools.jar idl src/test/idl/input/namespaces.avdl /tmp/namespaces.avpr +$ head /tmp/namespaces.avpr +{ + "protocol" : "TestNamespace", + "namespace" : "avro.test.protocol", +``` +To convert a _.avdl_ file into a _.avsc_ file, it may be processed by the `idl` tool too. For example: +```shell +$ java -jar avro-tools.jar idl src/test/idl/input/schema_syntax_schema.avdl /tmp/schema_syntax.avsc +$ head /tmp/schema_syntax.avsc +{ + "type": "array", + "items": { + "type": "record", + "name": "StatusUpdate", +``` +The `idl` tool can also process input to and from _stdin_ and _stdout_. See `idl --help` for full usage information. + +A Maven plugin is also provided to compile .avdl files. To use it, add something like the following to your pom.xml: +```xml + + + + org.apache.avro + avro-maven-plugin + + + + idl + + + + + + +``` + +## Defining a Schema in Avro IDL +An Avro IDL file consists of exactly one (main) schema definition. The minimal schema is defined by the following code: +```java +schema int; +``` +This is equivalent to (and generates) the following JSON schema definition: +```json +{ + "type": "int" +} +``` +More complex schemata can also be defined, for example by adding named schemata like this: +```java +namespace default.namespace.for.named.schemata; +schema Message; + +record Message { + string? title = null; + string message; +} +``` +This is equivalent to (and generates) the following JSON schema definition: +```json +{ + "type" : "record", + "name" : "Message", + "namespace" : "default.namespace.for.named.schemata", + "fields" : [ { + "name" : "title", + "type" : [ "null", "string" ], + "default": null + }, { + "name" : "message", + "type" : "string" + } ] +} +``` +Schemata in Avro IDL can contain the following items: + +* Imports of external protocol and schema files (only named schemata are imported). +* Definitions of named schemata, including records, errors, enums, and fixeds. + +## Defining a Protocol in Avro IDL +An Avro IDL file consists of exactly one protocol definition. The minimal protocol is defined by the following code: +```java +protocol MyProtocol { +} +``` +This is equivalent to (and generates) the following JSON protocol definition: +```json +{ +"protocol" : "MyProtocol", + "types" : [ ], + "messages" : { + } +} +``` +The namespace of the protocol may be changed using the @namespace annotation: +```java +@namespace("mynamespace") +protocol MyProtocol { +} +``` +This notation is used throughout Avro IDL as a way of specifying properties for the annotated element, as will be described later in this document. + +Protocols in Avro IDL can contain the following items: + +* Imports of external protocol and schema files. +* Definitions of named schemata, including records, errors, enums, and fixeds. +* Definitions of RPC messages + +## Imports +Files may be imported in one of three formats: + +* An IDL file may be imported with a statement like: + + `import idl "foo.avdl";` + +* A JSON protocol file may be imported with a statement like: + + `import protocol "foo.avpr";` + +* A JSON schema file may be imported with a statement like: + + `import schema "foo.avsc";` + +When importing into an IDL schema file, only (named) types are imported into this file. When importing into an IDL protocol, messages are imported into the protocol as well. + +Imported file names are resolved relative to the current IDL file. + +## Defining an Enumeration +Enums are defined in Avro IDL using a syntax similar to C or Java. An Avro Enum supports optional default values. In the case that a reader schema is unable to recognize a symbol written by the writer, the reader will fall back to using the defined default value. This default is only used when an incompatible symbol is read. It is not used if the enum field is missing. + +Example Writer Enum Definition +```java +enum Shapes { + SQUARE, TRIANGLE, CIRCLE, OVAL +} +``` +Example Reader Enum Definition +```java +enum Shapes { + SQUARE, TRIANGLE, CIRCLE +} = CIRCLE; +``` +In the above example, the reader will use the default value of `CIRCLE` whenever reading data written with the `OVAL` symbol of the writer. Also note that, unlike the JSON format, anonymous enums cannot be defined. + +## Defining a Fixed Length Field +Fixed fields are defined using the following syntax: +``` +fixed MD5(16); +``` +This example defines a fixed-length type called MD5, which contains 16 bytes. + +## Defining Records and Errors +Records are defined in Avro IDL using a syntax similar to a struct definition in C: +```java +record Employee { + string name; + boolean active = true; + long salary; +} +``` +The above example defines a record with the name “Employee” with three fields. + +To define an error, simply use the keyword _error_ instead of _record_. For example: +```java +error Kaboom { + string explanation; + int result_code = -1; +} +``` +Each field in a record or error consists of a type and a name, optional property annotations and an optional default value. + +A type reference in Avro IDL must be one of: + +* A primitive type +* A logical type +* A named schema (either defined or imported) +* A complex type (array, map, or union) + +### Primitive Types +The primitive types supported by Avro IDL are the same as those supported by Avro's JSON format. This list includes _int_, _long_, _string_, _boolean_, _float_, _double_, _null_, and _bytes_. + +### Logical Types +Some of the logical types supported by Avro's JSON format are directly supported by Avro IDL. The currently supported types are: + +* _decimal_ (logical type [decimal]({{< relref "specification#decimal" >}})) +* _date_ (logical type [date]({{< relref "specification#date" >}})) +* _time_ms_ (logical type [time-millis]({{< relref "specification#time-millisecond-precision" >}})) +* _timestamp_ms_ (logical type [timestamp-millis]({{< relref "specification#timestamp-millisecond-precision" >}})) +* _local_timestamp_ms_ (logical type [local-timestamp-millis]({{< relref "specification#local_timestamp_ms" >}})) +* _uuid_ (logical type [uuid]({{< relref "specification#uuid" >}})) + +For example: +```java +record Job { + string jobid; + date submitDate; + time_ms submitTime; + timestamp_ms finishTime; + decimal(9,2) finishRatio; + uuid pk = "a1a2a3a4-b1b2-c1c2-d1d2-d3d4d5d6d7d8"; +} +``` + +Logical types can also be specified via an annotation, which is useful for logical types for which a keyword does not exist: + +```java +record Job { + string jobid; + @logicalType("timestamp-micros") + long finishTime; +} +``` + +### References to Named Schemata +If a named schema has already been defined in the same Avro IDL file, it may be referenced by name as if it were a primitive type: +```java +record Card { + Suit suit; // refers to the enum Card defined above + int number; +} +``` + +### Default Values +Default values for fields may be optionally specified by using an equals sign after the field name followed by a JSON expression indicating the default value. This JSON is interpreted as described in the [spec]({{< relref "specification#schema-record" >}}). + +### Complex Types + +#### Arrays +Array types are written in a manner that will seem familiar to C++ or Java programmers. An array of any type t is denoted `array`. For example, an array of strings is denoted `array`, and a multidimensional array of Foo records would be `array>`. + +#### Maps +Map types are written similarly to array types. An array that contains values of type t is written `map`. As in the JSON schema format, all maps contain `string`-type keys. + +#### Unions +Union types are denoted as `union { typeA, typeB, typeC, ... }`. For example, this record contains a string field that is optional (unioned with null), and a field containing either a precise or a imprecise number: +```java +record RecordWithUnion { + union { null, string } optionalString; + union { decimal(12, 6), float } number; +} +``` +Note that the same restrictions apply to Avro IDL unions as apply to unions defined in the JSON format; namely, a union may not contain multiple elements of the same type. Also, fields/parameters that use the union type and have a default parameter must specify a default value of the same type as the **first** union type. + +Because it occurs so often, there is a special shorthand to denote a union of `null` with one other schema. The first three fields in the following snippet have identical schemata, as do the last two fields: + +```java +record RecordWithUnion { + union { null, string } optionalString1 = null; + string? optionalString2 = null; + string? optionalString3; // No default value + + union { string, null } optionalString4 = "something"; + string? optionalString5 = "something else"; +} +``` + +Note that unlike explicit unions, the position of the `null` type is fluid; it will be the first or last type depending on the default value (if any). So all fields are valid in the example above. + +## Defining RPC Messages +The syntax to define an RPC message within a Avro IDL protocol is similar to the syntax for a method declaration within a C header file or a Java interface. To define an RPC message _add_ which takes two arguments named _foo_ and _bar_, returning an _int_, simply include the following definition within the protocol: +```java +int add(int foo, int bar = 0); +``` +Message arguments, like record fields, may specify default values. + +To define a message with no response, you may use the alias _void_, equivalent to the Avro _null_ type: +```java +void logMessage(string message); +``` +If you have defined or imported an error type within the same protocol, you may declare that a message can throw this error using the syntax: +```java +void goKaboom() throws Kaboom; +``` +To define a one-way message, use the keyword `oneway` after the parameter list, for example: +```java +void fireAndForget(string message) oneway; +``` + +## Other Language Features + +### Comments and documentation +All Java-style comments are supported within a Avro IDL file. Any text following _//_ on a line is ignored, as is any text between _/*_ and _*/_, possibly spanning multiple lines. + +Comments that begin with _/**_ are used as the documentation string for the type or field definition that follows the comment. + +### Escaping Identifiers +Occasionally, one may want to distinguish between identifiers and languages keywords. In order to do so, backticks (`) may be used to escape +the identifier. For example, to define a message with the literal name error, you may write: +```java +void `error`(); +``` +This syntax is allowed anywhere an identifier is expected. + +### Annotations for Ordering and Namespaces +Java-style annotations may be used to add additional properties to types and fields throughout Avro IDL. These can be custom properties, or +special properties as used in the JSON-format Avro Schema and Protocol files. + +For example, to specify the sort order of a field within a record, one may use the `@order` annotation before the field name as follows: +```java +record MyRecord { + string @order("ascending") myAscendingSortField; + string @order("descending") myDescendingField; + string @order("ignore") myIgnoredField; +} +``` +A field's type (with the exception of type references) may also be preceded by annotations, e.g.: +```java +record MyRecord { + @java-class("java.util.ArrayList") array myStrings; +} +``` +This can be used to support java classes that can be serialized/deserialized via their `toString`/`String constructor`, e.g.: +```java +record MyRecord { + @java-class("java.math.BigDecimal") string value; + @java-key-class("java.io.File") map fileStates; + array<@java-class("java.math.BigDecimal") string> weights; +} +``` +Similarly, a `@namespace` annotation may be used to modify the namespace when defining a named schema. For example: +```java +@namespace("org.apache.avro.firstNamespace") +protocol MyProto { + @namespace("org.apache.avro.someOtherNamespace") + record Foo {} + + record Bar {} +} +``` +will define a protocol in the _firstNamespace_ namespace. The record _Foo_ will be defined in _someOtherNamespace_ and _Bar_ will be defined in _firstNamespace_ as it inherits its default from its container. + +Type and field aliases are specified with the `@aliases` annotation as follows: +```java +@aliases(["org.old.OldRecord", "org.ancient.AncientRecord"]) +record MyRecord { + string @aliases(["oldField", "ancientField"]) myNewField; +} +``` +Some annotations like those listed above are handled specially. All other annotations are added as properties to the protocol, message, schema or field. You can use any identifier or series of identifiers separated by dots and/or dashes as property name. + +## Complete Example +The following is an example of two Avro IDL files that together show most of the above features: + +### schema.avdl +```java +/* + * Header with license information. + */ +// Optional default namespace (if absent, the default namespace is the null namespace). +namespace org.apache.avro.test; +// Optional main schema definition; if used, the IDL file is equivalent to a .avsc file. +schema TestRecord; + +/** Documentation for the enum type Kind */ +@aliases(["org.foo.KindOf"]) +enum Kind { + FOO, + BAR, // the bar enum value + BAZ +} = FOO; // For schema evolution purposes, unmatched values do not throw an error, but are resolved to FOO. + +/** MD5 hash; good enough to avoid most collisions, and smaller than (for example) SHA256. */ +fixed MD5(16); + +record TestRecord { + /** Record name; has no intrinsic order */ + string @order("ignore") name; + + Kind @order("descending") kind; + + MD5 hash; + + /* + Note that 'null' is the first union type. Just like .avsc / .avpr files, the default value must be of the first union type. + */ + union { null, MD5 } /** Optional field */ @aliases(["hash"]) nullableHash = null; + // Shorthand syntax; the null in this union is placed based on the default value (or first is there's no default). + MD5? anotherNullableHash = null; + + array arrayOfLongs; +} +``` + +### protocol.avdl +```java +/* + * Header with license information. + */ + +/** + * An example protocol in Avro IDL + */ +@namespace("org.apache.avro.test") +protocol Simple { + // Import the example file above + import idl "schema.avdl"; + + /** Errors are records that can be thrown from a method */ + error TestError { + string message; + } + + string hello(string greeting); + /** Return what was given. Demonstrates the use of backticks to name types/fields/messages/parameters after keywords */ + TestRecord echo(TestRecord `record`); + int add(int arg1, int arg2); + bytes echoBytes(bytes data); + void `error`() throws TestError; + // The oneway keyword forces the method to return null. + void ping() oneway; +} +``` + +Additional examples may be found in the Avro source tree under the `src/test/idl/input` directory. + +## IDE support + +There are several editors and IDEs that support Avro IDL files, usually via plugins. + +### JetBrains + +Apache Avro IDL Schema Support 203.1.2 was released in 9 December 2021. + +Features: +* Syntax Highlighting +* Code Completion +* Code Formatting +* Error Highlighting +* Inspections & quick fixes +* JSON schemas for .avpr and .avsc files + +It's available via the [JetBrains Marketplace](https://plugins.jetbrains.com/plugin/15728-apache-avro-idl-schema-support) +and on [GitHub](https://github.com/opwvhk/avro-schema-support). + +The plugin supports almost the all JetBrains products: IntelliJ IDEA, PyCharm, WebStorm, Android Studio, AppCode, GoLand, Rider, CLion, RubyMine, PhpStorm, DataGrip, DataSpell, MPS, Code With Me Guest and JetBrains Client. + +Only JetBrains Gateway does not support this plugin directly. But the backend (JetBrains) IDE that it connects to does. + +### Eclipse + +Avroclipse 0.0.11 was released on 4 December 2019. + +Features: +* Syntax Highlighting +* Error Highlighting +* Code Completion + +It is available on the [Eclipse Marketplace](https://marketplace.eclipse.org/content/avroclipse) +and [GitHub](https://github.com/dvdkruk/avroclipse). + +### Visual Studio Code + +avro-idl 0.5.0 was released on 16 June 2021. It provides syntax highlighting. + +It is available on the [VisualStudio Marketplace](https://marketplace.visualstudio.com/items?itemName=streetsidesoftware.avro) +and [GitHub](https://github.com/Jason3S/vscode-avro-ext) + +### Atom.io + +atom-language-avro 0.0.13 was released on 14 August 2015. It provides syntax highlighting. + +It is available as [Atom.io package](https://atom.io/packages/atom-language-avro) +and [GitHub](https://github.com/jonesetc/atom-language-avro) + +### Vim + +A `.avdl` detecting plugin by Gurpreet Atwal on [GitHub](https://github.com/gurpreetatwal/vim-avro) (Last change in December 2016) + +[avro-idl.vim](https://github.com/apache/avro/blob/main/share/editors/avro-idl.vim) in the Avro repository `share/editors` directory (last change in September 2010) + +Both provide syntax highlighting. diff --git a/doc/content/en/docs/++version++/MapReduce guide/_index.md b/doc/content/en/docs/++version++/MapReduce guide/_index.md new file mode 100644 index 00000000000..f262bc6e2a7 --- /dev/null +++ b/doc/content/en/docs/++version++/MapReduce guide/_index.md @@ -0,0 +1,396 @@ +--- +title: "MapReduce guide" +linkTitle: "MapReduce guide" +weight: 200 +--- + + + +Avro provides a convenient way to represent complex data structures within a Hadoop MapReduce job. Avro data can be used as both input to and output from a MapReduce job, as well as the intermediate format. The example in this guide uses Avro data for all three, but it's possible to mix and match; for instance, MapReduce can be used to aggregate a particular field in an Avro record. + +This guide assumes basic familiarity with both Hadoop MapReduce and Avro. See the [Hadoop documentation](https://hadoop.apache.org/docs/current/) and the [Avro getting started guide](./getting-started-java/) for introductions to these projects. This guide uses the old MapReduce API (`org.apache.hadoop.mapred`) and the new MapReduce API (`org.apache.hadoop.mapreduce`). + +## Setup +The code from this guide is included in the Avro docs under examples/mr-example. The example is set up as a Maven project that includes the necessary Avro and MapReduce dependencies and the Avro Maven plugin for code generation, so no external jars are needed to run the example. In particular, the POM includes the following dependencies: +```xml + + org.apache.avro + avro + {{< avro_version >}} + + + org.apache.avro + avro-mapred + {{< avro_version >}} + + + org.apache.hadoop + hadoop-client + 3.1.2 + +``` +And the following plugin: +```xml + + org.apache.avro + avro-maven-plugin + {{< avro_version >}} + + + generate-sources + + schema + + + ${project.basedir}/../ + ${project.basedir}/target/generated-sources/ + + + + +``` + +If you do not configure the *sourceDirectory* and *outputDirectory* properties, the defaults will be used. The *sourceDirectory* property defaults to *src/main/avro*. The *outputDirectory* property defaults to *target/generated-sources*. You can change the paths to match your project layout. + +Alternatively, Avro jars can be downloaded directly from the Apache Avroâ„ĸ Releases [page](https://avro.apache.org/releases.html). The relevant Avro jars for this guide are *avro-{{< avro_version >}}.jar* and *avro-mapred-{{< avro_version >}}.jar*, as well as *avro-tools-{{< avro_version >}}.jar* for code generation and viewing Avro data files as JSON. In addition, you will need to install Hadoop in order to use MapReduce. + +## Example: ColorCount +Below is a simple example of a MapReduce that uses Avro. There is an example for both the old (org.apache.hadoop.mapred) and new (org.apache.hadoop.mapreduce) APIs under *examples/mr-example/src/main/java/example/*. _MapredColorCount_ is the example for the older mapred API while _MapReduceColorCount_ is the example for the newer mapreduce API. Both examples are below, but we will detail the mapred API in our subsequent examples. + +MapredColorCount.java: +```java +package example; + +import java.io.IOException; + +import org.apache.avro.*; +import org.apache.avro.Schema.Type; +import org.apache.avro.mapred.*; +import org.apache.hadoop.conf.*; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.mapred.*; +import org.apache.hadoop.util.*; + +import example.avro.User; + +public class MapredColorCount extends Configured implements Tool { + + public static class ColorCountMapper extends AvroMapper> { + @Override + public void map(User user, AvroCollector> collector, Reporter reporter) + throws IOException { + CharSequence color = user.getFavoriteColor(); + // We need this check because the User.favorite_color field has type ["string", "null"] + if (color == null) { + color = "none"; + } + collector.collect(new Pair(color, 1)); + } + } + + public static class ColorCountReducer extends AvroReducer> { + @Override + public void reduce(CharSequence key, Iterable values, + AvroCollector> collector, + Reporter reporter) + throws IOException { + int sum = 0; + for (Integer value : values) { + sum += value; + } + collector.collect(new Pair(key, sum)); + } + } + + public int run(String[] args) throws Exception { + if (args.length != 2) { + System.err.println("Usage: MapredColorCount "); + return -1; + } + + JobConf conf = new JobConf(getConf(), MapredColorCount.class); + conf.setJobName("colorcount"); + + FileInputFormat.setInputPaths(conf, new Path(args[0])); + FileOutputFormat.setOutputPath(conf, new Path(args[1])); + + AvroJob.setMapperClass(conf, ColorCountMapper.class); + AvroJob.setReducerClass(conf, ColorCountReducer.class); + + // Note that AvroJob.setInputSchema and AvroJob.setOutputSchema set + // relevant config options such as input/output format, map output + // classes, and output key class. + AvroJob.setInputSchema(conf, User.getClassSchema()); + AvroJob.setOutputSchema(conf, Pair.getPairSchema(Schema.create(Type.STRING), + Schema.create(Type.INT))); + + JobClient.runJob(conf); + return 0; + } + + public static void main(String[] args) throws Exception { + int res = ToolRunner.run(new Configuration(), new MapredColorCount(), args); + System.exit(res); + } +} +``` + +MapReduceColorCount.java: +```java +package example; + +import java.io.IOException; + +import org.apache.avro.Schema; +import org.apache.avro.mapred.AvroKey; +import org.apache.avro.mapred.AvroValue; +import org.apache.avro.mapreduce.AvroJob; +import org.apache.avro.mapreduce.AvroKeyInputFormat; +import org.apache.avro.mapreduce.AvroKeyValueOutputFormat; +import org.apache.hadoop.conf.Configured; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.Mapper; +import org.apache.hadoop.mapreduce.Reducer; +import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; +import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; +import org.apache.hadoop.util.Tool; +import org.apache.hadoop.util.ToolRunner; + +import example.avro.User; + +public class MapReduceColorCount extends Configured implements Tool { + + public static class ColorCountMapper extends + Mapper, NullWritable, Text, IntWritable> { + + @Override + public void map(AvroKey key, NullWritable value, Context context) + throws IOException, InterruptedException { + + CharSequence color = key.datum().getFavoriteColor(); + if (color == null) { + color = "none"; + } + context.write(new Text(color.toString()), new IntWritable(1)); + } + } + + public static class ColorCountReducer extends + Reducer, AvroValue> { + + @Override + public void reduce(Text key, Iterable values, + Context context) throws IOException, InterruptedException { + + int sum = 0; + for (IntWritable value : values) { + sum += value.get(); + } + context.write(new AvroKey(key.toString()), new AvroValue(sum)); + } + } + + public int run(String[] args) throws Exception { + if (args.length != 2) { + System.err.println("Usage: MapReduceColorCount "); + return -1; + } + + Job job = new Job(getConf()); + job.setJarByClass(MapReduceColorCount.class); + job.setJobName("Color Count"); + + FileInputFormat.setInputPaths(job, new Path(args[0])); + FileOutputFormat.setOutputPath(job, new Path(args[1])); + + job.setInputFormatClass(AvroKeyInputFormat.class); + job.setMapperClass(ColorCountMapper.class); + AvroJob.setInputKeySchema(job, User.getClassSchema()); + job.setMapOutputKeyClass(Text.class); + job.setMapOutputValueClass(IntWritable.class); + + job.setOutputFormatClass(AvroKeyValueOutputFormat.class); + job.setReducerClass(ColorCountReducer.class); + AvroJob.setOutputKeySchema(job, Schema.create(Schema.Type.STRING)); + AvroJob.setOutputValueSchema(job, Schema.create(Schema.Type.INT)); + + return (job.waitForCompletion(true) ? 0 : 1); + } + + public static void main(String[] args) throws Exception { + int res = ToolRunner.run(new MapReduceColorCount(), args); + System.exit(res); + } +} +``` +ColorCount reads in data files containing *User* records, defined in _examples/user.avsc_, and counts the number of instances of each favorite color. (This example draws inspiration from the canonical _WordCount_ MapReduce application.) This example uses the old MapReduce API. See MapReduceAvroWordCount, found under _doc/examples/mr-example/src/main/java/example/_ to see the new MapReduce API example. The User schema is defined as follows: +```json +{"namespace": "example.avro", + "type": "record", + "name": "User", + "fields": [ + {"name": "name", "type": "string"}, + {"name": "favorite_number", "type": ["int", "null"]}, + {"name": "favorite_color", "type": ["string", "null"]} + ] +} +``` +This schema is compiled into the *User* class used by *ColorCount* via the Avro Maven plugin (see _examples/mr-example/pom.xml_ for how this is set up). + +*ColorCountMapper* essentially takes a *User* as input and extracts the User's favorite color, emitting the key-value pair ``. _ColorCountReducer_ then adds up how many occurrences of a particular favorite color were emitted, and outputs the result as a Pair record. These Pairs are serialized to an Avro data file. + +## Running ColorCount +The _ColorCount_ application is provided as a Maven project in the Avro docs under _examples/mr-example_. To build the project, including the code generation of the User schema, run: +```shell +mvn compile +``` +Next, run _GenerateData_ from `examples/mr-examples` to create an Avro data file, `input/users.avro`, containing 20 Users with favorite colors chosen randomly from a list: +```shell +mvn exec:java -q -Dexec.mainClass=example.GenerateData +``` +Besides creating the data file, GenerateData prints the JSON representations of the Users generated to stdout, for example: +```json +{"name": "user", "favorite_number": null, "favorite_color": "red"} +{"name": "user", "favorite_number": null, "favorite_color": "green"} +{"name": "user", "favorite_number": null, "favorite_color": "purple"} +{"name": "user", "favorite_number": null, "favorite_color": null} +... +``` +Now we're ready to run ColorCount. We specify our freshly-generated input folder as the input path and output as our output folder (note that MapReduce will not start a job if the output folder already exists): +```shell +mvn exec:java -q -Dexec.mainClass=example.MapredColorCount -Dexec.args="input output" +``` +Once ColorCount completes, checking the contents of the new output directory should yield the following: +```shell +$ ls output/ +part-00000.avro _SUCCESS +``` +You can check the contents of the generated Avro file using the avro-tools jar: +```shell +$ java -jar /path/to/avro-tools-{{< avro_version >}}.jar tojson output/part-00000.avro +{"value": 3, "key": "blue"} +{"value": 7, "key": "green"} +{"value": 1, "key": "none"} +{"value": 2, "key": "orange"} +{"value": 3, "key": "purple"} +{"value": 2, "key": "red"} +{"value": 2, "key": "yellow"} +``` +Now let's go over the ColorCount example in detail. + +## AvroMapper - org.apache.hadoop.mapred API + +The easiest way to use Avro data files as input to a MapReduce job is to subclass `AvroMapper`. An `AvroMapper` defines a `map` function that takes an Avro datum as input and outputs a key/value pair represented as a Pair record. In the ColorCount example, ColorCountMapper is an AvroMapper that takes a User as input and outputs a `Pair>`, where the CharSequence key is the user's favorite color and the Integer value is 1. +```java +public static class ColorCountMapper extends AvroMapper> { + @Override + public void map(User user, AvroCollector> collector, Reporter reporter) + throws IOException { + CharSequence color = user.getFavoriteColor(); + // We need this check because the User.favorite_color field has type ["string", "null"] + if (color == null) { + color = "none"; + } + collector.collect(new Pair(color, 1)); + } +} +``` +In order to use our AvroMapper, we must call AvroJob.setMapperClass and AvroJob.setInputSchema. +```java +AvroJob.setMapperClass(conf, ColorCountMapper.class); +AvroJob.setInputSchema(conf, User.getClassSchema()); +``` +Note that `AvroMapper` does not implement the `Mapper` interface. Under the hood, the specified Avro data files are deserialized into AvroWrappers containing the actual data, which are processed by a Mapper that calls the configured AvroMapper's map function. AvroJob.setInputSchema sets up the relevant configuration parameters needed to make this happen, thus you should not need to call `JobConf.setMapperClass`, `JobConf.setInputFormat`, `JobConf.setMapOutputKeyClass`, `JobConf.setMapOutputValueClass`, or `JobConf.setOutputKeyComparatorClass`. + +## Mapper - org.apache.hadoop.mapreduce API +This document will not go into all the differences between the mapred and mapreduce APIs, however will describe the main differences. As you can see, ColorCountMapper is now a subclass of the Hadoop Mapper class and is passed an AvroKey as it's key. Additionally, the AvroJob method calls were slightly changed. +```java + public static class ColorCountMapper extends + Mapper, NullWritable, Text, IntWritable> { + + @Override + public void map(AvroKey key, NullWritable value, Context context) + throws IOException, InterruptedException { + + CharSequence color = key.datum().getFavoriteColor(); + if (color == null) { + color = "none"; + } + context.write(new Text(color.toString()), new IntWritable(1)); + } + } +``` + +## AvroReducer - org.apache.hadoop.mapred API +Analogously to AvroMapper, an AvroReducer defines a reducer function that takes the key/value types output by an AvroMapper (or any mapper that outputs Pairs) and outputs a key/value pair represented a Pair record. In the ColorCount example, ColorCountReducer is an AvroReducer that takes the CharSequence key representing a favorite color and the `Iterable` representing the counts for that color (they should all be 1 in this example) and adds up the counts. +```java +public static class ColorCountReducer extends AvroReducer> { + @Override + public void reduce(CharSequence key, Iterable values, + AvroCollector> collector, + Reporter reporter) + throws IOException { + int sum = 0; + for (Integer value : values) { + sum += value; + } + collector.collect(new Pair(key, sum)); + } +} +``` +In order to use our AvroReducer, we must call AvroJob.setReducerClass and AvroJob.setOutputSchema. +```java +AvroJob.setReducerClass(conf, ColorCountReducer.class); +AvroJob.setOutputSchema(conf, Pair.getPairSchema(Schema.create(Type.STRING), + Schema.create(Type.INT))); +``` +Note that _AvroReducer_ does not implement the _Reducer_ interface. The intermediate Pairs output by the mapper are split into _AvroKeys_ and _AvroValues_, which are processed by a Reducer that calls the configured AvroReducer's `reduce` function. `AvroJob.setOutputSchema` sets up the relevant configuration parameters needed to make this happen, thus you should not need to call `JobConf.setReducerClass`, `JobConf.setOutputFormat`, `JobConf.setOutputKeyClass`, `JobConf.setMapOutputKeyClass`, `JobConf.setMapOutputValueClass`, or `JobConf.setOutputKeyComparatorClass`. + +## Reduce - org.apache.hadoop.mapreduce API +As before we not detail every difference between the APIs. As with the _Mapper_ change _ColorCountReducer_ is now a subclass of _Reducer_ and _AvroKey_ and _AvroValue_ are emitted. Additionally, the _AvroJob_ method calls were slightly changed. +```java + public static class ColorCountReducer extends + Reducer, AvroValue> { + + @Override + public void reduce(Text key, Iterable values, + Context context) throws IOException, InterruptedException { + + int sum = 0; + for (IntWritable value : values) { + sum += value.get(); + } + context.write(new AvroKey(key.toString()), new AvroValue(sum)); + } + } +``` + +## Learning more +The mapred API allows users to mix Avro AvroMappers and AvroReducers with non-Avro Mappers and Reducers and the mapreduce API allows users input Avro and output non-Avro or vice versa. + +The mapred package has API org.apache.avro.mapred documentation as does the `org.apache.avro.mapreduce` package. MapReduce API (`org.apache.hadoop.mapreduce`). Similarily to the mapreduce package, it's possible with the mapred API to implement your own Mappers and Reducers directly using the public classes provided in these libraries. See the `AvroWordCount` application, found under _examples/mr-example/src/main/java/example/AvroWordCount.java_ in the Avro documentation, for an example of implementing a Reducer that outputs Avro data using the old MapReduce API. See the `MapReduceAvroWordCount` application, found under _examples/mr-example/src/main/java/example/MapReduceAvroWordCount.java_ in the Avro documentation, for an example of implementing a Reducer that outputs Avro data using the new MapReduce API. diff --git a/doc/content/en/docs/++version++/SASL profile/_index.md b/doc/content/en/docs/++version++/SASL profile/_index.md new file mode 100644 index 00000000000..a938310414d --- /dev/null +++ b/doc/content/en/docs/++version++/SASL profile/_index.md @@ -0,0 +1,93 @@ +--- +title: "SASL profile" +linkTitle: "SASL profile" +weight: 202 +--- + + + +## Introduction +SASL ([RFC 2222](https://www.ietf.org/rfc/rfc2222.txt)) provides a framework for authentication and security of network protocols. Each protocol that uses SASL is meant to define a SASL profile. This document provides a SASL profile for connection-based Avro RPC. + +## Overview +SASL negotiation proceeds as a series of message interactions over a connection between a client and server using a selected SASL mechanism. The client starts this negotiation by sending its chosen mechanism name with an initial (possibly empty) message. Negotiation proceeds with the exchange of messages until either side indicates success or failure. The content of the messages is mechanism-specific. If the negotiation succeeds, then the session can proceed over the connection, otherwise it must be abandoned. + +Some mechanisms continue to process session data after negotiation (e.g., encrypting it), while some specify that further session data is transmitted unmodified. + +## Negotiation + +### Commands +Avro SASL negotiation uses four one-byte commands. + +* 0: START Used in a client's initial message. +* 1: CONTINUE Used while negotiation is ongoing. +* 2: FAIL Terminates negotiation unsuccessfully. +* 3: COMPLETE Terminates negotiation successfully. + +The format of a START message is: + +`| 0 | 4-byte mechanism name length | mechanism name | 4-byte payload length | payload data |` + +The format of a CONTINUE message is: + +`| 1 | 4-byte payload length | payload data |` + +The format of a FAIL message is: + +`| 2 | 4-byte message length | UTF-8 message |` + +The format of a COMPLETE message is: + +`| 3 | 4-byte payload length | payload data |` + +### Process +Negotiation is initiated by a client sending a START command containing the client's chosen mechanism name and any mechanism-specific payload data. + +The server and client then interchange some number (possibly zero) of CONTINUE messages. Each message contains payload data that is processed by the security mechanism to generate the next message. + +Once either the client or server send a FAIL message then negotiation has failed. UTF-8-encoded text is included in the failure message. Once either a FAIL message has been sent or received, or any other error occurs in the negotiation, further communication on this connection must cease. + +Once either the client or server send a COMPLETE message then negotiation has completed successfully. Session data may now be transmitted over the connection until it is closed by either side. + +## Session Data +If no SASL QOP (quality of protection) is negotiated, then all subsequent writes to/reads over this connection are written/read unmodified. In particular, messages use Avro [framing](#Message+Framing), and are of the form: + +`| 4-byte frame length | frame data | ... | 4 zero bytes |` + +If a SASL QOP is negotiated, then it must be used by the connection for all subsequent messages. This is done by wrapping each non-empty frame written using the security mechanism and unwrapping each non-empty frame read. The length written in each non-empty frame is the length of the wrapped data. Complete frames must be passed to the security mechanism for unwrapping. Unwrapped data is then passed to the application as the content of the frame. + +If at any point processing fails due to wrapping, unwrapping or framing errors, then all further communication on this connection must cease. + +## Anonymous Mechanism +The SASL anonymous mechanism ([RFC 2245](https://www.ietf.org/rfc/rfc2222.txt)) is quite simple to implement. In particular, an initial anonymous request may be prefixed by the following static sequence: + +`| 0 | 0009 | ANONYMOUS | 0000 |` + +If a server uses the anonymous mechanism, it should check that the mechanism name in the start message prefixing the first request received is 'ANONYMOUS', then simply prefix its initial response with a COMPLETE message of: + +`| 3 | 0000 |` + +If an anonymous server recieves some other mechanism name, then it may respond with a FAIL message as simple as: + +`| 2 | 0000 |` + +Note that the anonymous mechanism need add no additional round-trip messages between client and server. The START message can be piggybacked on the initial request and the COMPLETE or FAIL message can be piggybacked on the initial response. diff --git a/doc/content/en/docs/++version++/Specification/_index.md b/doc/content/en/docs/++version++/Specification/_index.md new file mode 100755 index 00000000000..75eda7b7f62 --- /dev/null +++ b/doc/content/en/docs/++version++/Specification/_index.md @@ -0,0 +1,896 @@ +--- +title: "Specification" +linkTitle: "Specification" +weight: 4 +date: 2021-10-25 +aliases: +- spec.html +--- + + + +## Introduction +This document defines Apache Avro. It is intended to be the authoritative specification. Implementations of Avro must adhere to this document. + +## Schema Declaration {#schema-declaration} +A Schema is represented in [JSON](https://www.json.org/) by one of: + +* A JSON string, naming a defined type. +* A JSON object, of the form: +```js +{"type": "typeName", ...attributes...} +``` +where _typeName_ is either a primitive or derived type name, as defined below. Attributes not defined in this document are permitted as metadata, but must not affect the format of serialized data. +* A JSON array, representing a union of embedded types. + +## Primitive Types +The set of primitive type names is: + +* _null_: no value +* _boolean_: a binary value +* _int_: 32-bit signed integer +* _long_: 64-bit signed integer +* _float_: single precision (32-bit) IEEE 754 floating-point number +* _double_: double precision (64-bit) IEEE 754 floating-point number +* _bytes_: sequence of 8-bit unsigned bytes +* _string_: unicode character sequence + +Primitive types have no specified attributes. + +Primitive type names are also defined type names. Thus, for example, the schema "string" is equivalent to: +```json +{"type": "string"} +``` + +## Complex Types +Avro supports six kinds of complex types: _records_, _enums_, _arrays_, _maps_, _unions_ and _fixed_. + +### Records {#schema-record} +Records use the type name "record" and support the following attributes: + +* _name_: a JSON string providing the name of the record (required). +* _namespace_, a JSON string that qualifies the name (optional); +* _doc_: a JSON string providing documentation to the user of this schema (optional). +* _aliases_: a JSON array of strings, providing alternate names for this record (optional). +* _fields_: a JSON array, listing fields (required). Each field is a JSON object with the following attributes: + * _name_: a JSON string providing the name of the field (required), and + * _doc_: a JSON string describing this field for users (optional). + * _type_: a [schema]({{< ref "#schema-declaration" >}} "Schema declaration"), as defined above + * _order_: specifies how this field impacts sort ordering of this record (optional). Valid values are "ascending" (the default), "descending", or "ignore". For more details on how this is used, see the sort order section below. + * _aliases_: a JSON array of strings, providing alternate names for this field (optional). + * _default_: A default value for this field, only used when reading instances that lack the field for schema evolution purposes. The presence of a default value does not make the field optional at encoding time. Permitted values depend on the field's schema type, according to the table below. Default values for union fields correspond to the first schema that matches in the union. Default values for bytes and fixed fields are JSON strings, where Unicode code points 0-255 are mapped to unsigned 8-bit byte values 0-255. Avro encodes a field even if its value is equal to its default. + +*field default values* + +| **avro type** | **json type** | **example** | +|---------------|----------------|-------------| +| null | null | `null` | +| boolean | boolean | `true` | +| int,long | integer | `1` | +| float,double | number | `1.1` | +| bytes | string | `"\u00FF"` | +| string | string | `"foo"` | +| record | object | `{"a": 1}` | +| enum | string | `"FOO"` | +| array | array | `[1]` | +| map | object | `{"a": 1}` | +| fixed | string | `"\u00ff"` | + +For example, a linked-list of 64-bit values may be defined with: +```jsonc +{ + "type": "record", + "name": "LongList", + "aliases": ["LinkedLongs"], // old name for this + "fields" : [ + {"name": "value", "type": "long"}, // each element has a long + {"name": "next", "type": ["null", "LongList"]} // optional next element + ] +} +``` + +### Enums +Enums use the type name "enum" and support the following attributes: + +* _name_: a JSON string providing the name of the enum (required). +* _namespace_, a JSON string that qualifies the name (optional); +* _aliases_: a JSON array of strings, providing alternate names for this enum (optional). +* _doc_: a JSON string providing documentation to the user of this schema (optional). +* _symbols_: a JSON array, listing symbols, as JSON strings (required). All symbols in an enum must be unique; duplicates are prohibited. Every symbol must match the regular expression [A-Za-z_][A-Za-z0-9_]* (the same requirement as for [names]({{< ref "#names" >}} "Names")). +* _default_: A default value for this enumeration, used during resolution when the reader encounters a symbol from the writer that isn't defined in the reader's schema (optional). The value provided here must be a JSON string that's a member of the symbols array. See documentation on schema resolution for how this gets used. + +For example, playing card suits might be defined with: +```json +{ + "type": "enum", + "name": "Suit", + "symbols" : ["SPADES", "HEARTS", "DIAMONDS", "CLUBS"] +} +``` + +### Arrays +Arrays use the type name "array" and support a single attribute: + +* _items_: the schema of the array's items. + +For example, an array of strings is declared with: +```json +{ + "type": "array", + "items" : "string", + "default": [] +} +``` + +### Maps +Maps use the type name "map" and support one attribute: + +* _values_: the schema of the map's values. + +Map keys are assumed to be strings. + +For example, a map from string to long is declared with: +```json +{ + "type": "map", + "values" : "long", + "default": {} +} +``` + +### Unions +Unions, as mentioned above, are represented using JSON arrays. For example, `["null", "string"]` declares a schema which may be either a null or string. + +(Note that when a [default value]({{< ref "#schema-record" >}} "Schema record") is specified for a record field whose type is a union, the type of the default value must match with one element of the union. + +Unions may not contain more than one schema with the same type, except for the named types record, fixed and enum. For example, unions containing two array types or two map types are not permitted, but two types with different names are permitted. (Names permit efficient resolution when reading and writing unions.) + +Unions may not immediately contain other unions. + +### Fixed +Fixed uses the type name "fixed" and supports the following attributes: + +* _name_: a string naming this fixed (required). +* _namespace_, a string that qualifies the name (optional); +* _aliases_: a JSON array of strings, providing alternate names for this enum (optional). +* _size_: an integer, specifying the number of bytes per value (required). + +For example, 16-byte quantity may be declared with: +```json +{"type": "fixed", "size": 16, "name": "md5"} +``` + +### Names +Record, enums and fixed are named types. Each has a fullname that is composed of two parts: a name and a namespace, separated by a dot. Equality of names is defined on the fullname – it is an error to specify two different types with the same name. + +Record fields and enum symbols have names as well (but no namespace). Equality of field names and enum symbols is defined within their scope (the record/enum that defines them). It is an error to define multiple fields or enum symbols with the same name in a single type. Fields and enum symbols across scopes are never equal, so field names and enum symbols can be reused in a different type. + +The name portion of the fullname of named types, record field names, and enum symbols must: + +* start with [A-Za-z_] +* subsequently contain only [A-Za-z0-9_] + +A namespace is a dot-separated sequence of such names. The empty string may also be used as a namespace to indicate the null namespace. Equality of names (including field names and enum symbols) as well as fullnames is case-sensitive. + +The null namespace may not be used in a dot-separated sequence of names. So the grammar for a namespace is: +``` + | [()*] +``` + +In record, enum and fixed definitions, the fullname is determined according to the algorithm below the example: + +``` +{ + "type": "record", + "name": "Example", + "doc": "A simple name (attribute) and no namespace attribute: use the null namespace (\"\"); the fullname is 'Example'.", + "fields": [ + { + "name": "inheritNull", + "type": { + "type": "enum", + "name": "Simple", + "doc": "A simple name (attribute) and no namespace attribute: inherit the null namespace of the enclosing type 'Example'. The fullname is 'Simple'.", + "symbols": ["a", "b"] + } + }, { + "name": "explicitNamespace", + "type": { + "type": "fixed", + "name": "Simple", + "namespace": "explicit", + "doc": "A simple name (attribute) and a namespace (attribute); the fullname is 'explicit.Simple' (this is a different type than of the 'inheritNull' field).", + "size": 12 + } + }, { + "name": "fullName", + "type": { + "type": "record", + "name": "a.full.Name", + "namespace": "ignored", + "doc": "A name attribute with a fullname, so the namespace attribute is ignored. The fullname is 'a.full.Name', and the namespace is 'a.full'.", + "fields": [ + { + "name": "inheritNamespace", + "type": { + "type": "enum", + "name": "Understanding", + "doc": "A simple name (attribute) and no namespace attribute: inherit the namespace of the enclosing type 'a.full.Name'. The fullname is 'a.full.Understanding'.", + "symbols": ["d", "e"] + } + } + ] + } + } + ] +} +``` + +The fullname of a record, enum or fixed definition is determined by the required `name` and optional `namespace` attributes like this: + +* A fullname is specified. If the name specified contains a dot, then it is assumed to be a fullname, and any namespace also specified is ignored. For example, use "name": "org.foo.X" to indicate the fullname org.foo.X. +* A simple name (a name that contains no dots) and namespace are both specified. For example, one might use "name": "X", "namespace": "org.foo" to indicate the fullname org.foo.X. +* A simple name only is specified (a name that contains no dots). In this case the namespace is taken from the most tightly enclosing named schema or protocol, and the fullname is constructed from that namespace and the name. For example, if "name": "X" is specified, and this occurs within a field of the record definition of org.foo.Y, then the fullname is org.foo.X. This also happens if there is no enclosing namespace (i.e., the enclosing schema definition has the null namespace). + +References to previously defined names are as in the latter two cases above: if they contain a dot they are a fullname, if they do not contain a dot, the namespace is the namespace of the enclosing definition. + +Primitive type names (`null`, `boolean`, `int`, `long`, `float`, `double`, `bytes`, `string`) have no namespace and their names may not be defined in any namespace. + +Complex types (`record`, `enum`, `array`, `map`, `fixed`) have no namespace, but their names (as well as `union`) are permitted to be reused as type names. This can be confusing to the human reader, but is always unambiguous for binary serialization. Due to the limitations of JSON encoding, it is a best practice to use a namespace when using these names. + +A schema or protocol may not contain multiple definitions of a fullname. Further, a name must be defined before it is used ("before" in the depth-first, left-to-right traversal of the JSON parse tree, where the types attribute of a protocol is always deemed to come "before" the messages attribute.) + +### Aliases +Named types and fields may have aliases. An implementation may optionally use aliases to map a writer's schema to the reader's. This facilitates both schema evolution as well as processing disparate datasets. + +Aliases function by re-writing the writer's schema using aliases from the reader's schema. For example, if the writer's schema was named "Foo" and the reader's schema is named "Bar" and has an alias of "Foo", then the implementation would act as though "Foo" were named "Bar" when reading. Similarly, if data was written as a record with a field named "x" and is read as a record with a field named "y" with alias "x", then the implementation would act as though "x" were named "y" when reading. + +A type alias may be specified either as a fully namespace-qualified, or relative to the namespace of the name it is an alias for. For example, if a type named "a.b" has aliases of "c" and "x.y", then the fully qualified names of its aliases are "a.c" and "x.y". + +Aliases are alternative names, and thus subject to the same uniqueness constraints as names. Aliases should be valid names, but this is not required: any string is accepted as an alias. When aliases are used "to map a writer's schema to the reader's" (see above), this allows schema evolution to correct illegal names in old schemata. + +## Fixing an invalid, but previously accepted, schema +Over time, rules and validations on schemas have changed. It is therefore possible that a schema used to work with an older version of Avro, but now fails to parse. + +This can have several reasons, as listed below. Each reason also describes a fix, which can be applied using [schema resolution]({{< ref "#schema-resolution" >}}): you fix the problems in the schema in a way that is compatible, and then you can use the new schema to read the old data. + +### Invalid names +Invalid names of types and fields can be corrected by renaming (using an [alias]({{< ref "#aliases" >}})). This works for simple names, namespaces and fullnames. + +This fix is twofold: first, you add the invalid name as an alias to the type/field. Then, you change the name to any valid name. + +### Invalid defaults +Default values are only used to fill in missing data when reading. Invalid defaults create invalid values in these cases. The fix is to correct the default values. + + +## Data Serialization and Deserialization +Binary encoded Avro data does not include type information or field names. The benefit is that the serialized data is small, but as a result a schema must always be used in order to read Avro data correctly. The best way to ensure that the schema is structurally identical to the one used to write the data is to use the exact same schema. + +Therefore, files or systems that store Avro data should always include the writer's schema for that data. Avro-based remote procedure call (RPC) systems must also guarantee that remote recipients of data have a copy of the schema used to write that data. In general, it is advisable that any reader of Avro data should use a schema that is the same (as defined more fully in [Parsing Canonical Form for Schemas]({{< ref "#parsing-canonical-form-for-schemas" >}} "Parsing Canonical Form for Schemas")) as the schema that was used to write the data in order to deserialize it correctly. Deserializing data into a newer schema is accomplished by specifying an additional schema, the results of which are described in [Schema Resolution]({{< ref "#schema-resolution" >}}). + +In general, both serialization and deserialization proceed as a depth-first, left-to-right traversal of the schema, serializing or deserializing primitive types as they are encountered. Therefore, it is possible, though not advisable, to read Avro data with a schema that does not have the same Parsing Canonical Form as the schema with which the data was written. In order for this to work, the serialized primitive values must be compatible, in order value by value, with the items in the deserialization schema. For example, int and long are always serialized the same way, so an int could be deserialized as a long. Since the compatibility of two schemas depends on both the data and the serialization format (eg. binary is more permissive than JSON because JSON includes field names, eg. a long that is too large will overflow an int), it is simpler and more reliable to use schemas with identical Parsing Canonical Form. + +### Encodings +Avro specifies two serialization encodings: binary and JSON. Most applications will use the binary encoding, as it is smaller and faster. But, for debugging and web-based applications, the JSON encoding may sometimes be appropriate. + +### Binary Encoding {#binary-encoding} +Binary encoding does not include field names, self-contained information about the types of individual bytes, nor field or record separators. Therefore readers are wholly reliant on the schema used when the data was encoded. + +#### Primitive Types +Primitive types are encoded in binary as follows: + +* _null_ is written as zero bytes. +* a _boolean_ is written as a single byte whose value is either 0 (false) or 1 (true). +* _int_ and _long_ values are written using [variable-length](https://lucene.apache.org/java/3_5_0/fileformats.html#VInt) [zig-zag](https://code.google.com/apis/protocolbuffers/docs/encoding.html#types) coding. Some examples: + +| *value* | *hex* | +|---|---| +| 0 | 00 | +|-1 | 01 | +| 1 | 02 | +|-2 | 03 | +| 2 | 04 | +|...|...| +|-64 | 7f | +|64 | 80 01| +|...|...| + +* a _float_ is written as 4 bytes. The float is converted into a 32-bit integer using a method equivalent to Java's [floatToRawIntBits](https://docs.oracle.com/javase/8/docs/api/java/lang/Float.html#floatToRawIntBits-float-) and then encoded in little-endian format. +* a _double_ is written as 8 bytes. The double is converted into a 64-bit integer using a method equivalent to Java's [doubleToRawLongBits](https://docs.oracle.com/javase/8/docs/api/java/lang/Double.html#doubleToRawLongBits-double-) and then encoded in little-endian format. +* _bytes_ are encoded as a long followed by that many bytes of data. +* a _string_ is encoded as a long followed by that many bytes of UTF-8 encoded character data. +For example, the three-character string "foo" would be encoded as the long value 3 (encoded as hex 06) followed by the UTF-8 encoding of 'f', 'o', and 'o' (the hex bytes 66 6f 6f): +``` +06 66 6f 6f +``` + +### Complex Types +Complex types are encoded in binary as follows: + +#### Records +A record is encoded by encoding the values of its fields in the order that they are declared. In other words, a record is encoded as just the concatenation of the encodings of its fields. Field values are encoded per their schema. + +For example, the record schema +```json +{ + "type": "record", + "name": "test", + "fields" : [ + {"name": "a", "type": "long"}, + {"name": "b", "type": "string"} + ] +} +``` + +An instance of this record whose a field has value 27 (encoded as hex 36) and whose b field has value "foo" (encoded as hex bytes 06 66 6f 6f), would be encoded simply as the concatenation of these, namely the hex byte sequence: +``` +36 06 66 6f 6f +``` + +#### Enums +An enum is encoded by a int, representing the zero-based position of the symbol in the schema. + +For example, consider the enum: +```json +{"type": "enum", "name": "Foo", "symbols": ["A", "B", "C", "D"] } +``` + +This would be encoded by an int between zero and three, with zero indicating "A", and 3 indicating "D". + +#### Arrays +Arrays are encoded as a series of blocks. Each block consists of a long count value, followed by that many array items. A block with count zero indicates the end of the array. Each item is encoded per the array's item schema. + +If a block's count is negative, its absolute value is used, and the count is followed immediately by a long block size indicating the number of bytes in the block. This block size permits fast skipping through data, e.g., when projecting a record to a subset of its fields. + +For example, the array schema +```json +{"type": "array", "items": "long"} +``` +an array containing the items 3 and 27 could be encoded as the long value 2 (encoded as hex 04) followed by long values 3 and 27 (encoded as hex 06 36) terminated by zero: +``` +04 06 36 00 +``` + +The blocked representation permits one to read and write arrays larger than can be buffered in memory, since one can start writing items without knowing the full length of the array. + +#### Maps {#schema-maps} +Maps are encoded as a series of _blocks_. Each block consists of a `long` _count_ value, followed by that many key/value pairs. A block with count zero indicates the end of the map. Each item is encoded per the map's value schema. + +If a block's count is negative, its absolute value is used, and the count is followed immediately by a `long` block size indicating the number of bytes in the block. This block size permits fast skipping through data, e.g., when projecting a record to a subset of its fields. + +The blocked representation permits one to read and write maps larger than can be buffered in memory, since one can start writing items without knowing the full length of the map. + +#### Unions +A union is encoded by first writing an `int` value indicating the zero-based position within the union of the schema of its value. The value is then encoded per the indicated schema within the union. + +For example, the union schema `["null","string"]` would encode: + +* _null_ as zero (the index of "null" in the union): +`00` +* the string "a" as one (the index of "string" in the union, 1, encoded as hex 02), followed by the serialized string: +`02 02 61` +NOTE: Currently for C/C++ implementations, the positions are practically an int, but theoretically a long. In reality, we don't expect unions with 215M members + +#### Fixed +Fixed instances are encoded using the number of bytes declared in the schema. + +### JSON Encoding +Except for unions, the JSON encoding is the same as is used to encode [field default values]({{< ref "#schema-record" >}}). + +The value of a union is encoded in JSON as follows: + +* if its type is _null_, then it is encoded as a JSON _null_; +* otherwise it is encoded as a JSON object with one name/value pair whose name is the type's name and whose value is the recursively encoded value. For Avro's named types (record, fixed or enum) the user-specified name is used, for other types the type name is used. + +For example, the union schema `["null","string","Foo"]`, where Foo is a record name, would encode: + +* _null_ as _null_; +* the string "a" as `{"string": "a"}` and +* a Foo instance as `{"Foo": {...}}`, where `{...}` indicates the JSON encoding of a Foo instance. + +Note that the original schema is still required to correctly process JSON-encoded data. For example, the JSON encoding does not distinguish between _int_ and _long_, _float_ and _double_, records and maps, enums and strings, etc. + +### Single-object encoding +In some situations a single Avro serialized object is to be stored for a longer period of time. One very common example is storing Avro records for several weeks in an [Apache Kafka](https://kafka.apache.org/) topic. + +In the period after a schema change this persistence system will contain records that have been written with different schemas. So the need arises to know which schema was used to write a record to support schema evolution correctly. In most cases the schema itself is too large to include in the message, so this binary wrapper format supports the use case more effectively. + +#### Single object encoding specification +Single Avro objects are encoded as follows: + +1. A two-byte marker, `C3 01`, to show that the message is Avro and uses this single-record format (version 1). +1. The 8-byte little-endian CRC-64-AVRO [fingerprint]({{< ref "#schema-fingerprints" >}} "Schema fingerprints") of the object's schema. +1. The Avro object encoded using [Avro's binary encoding]({{< ref "#binary-encoding" >}}). + +Implementations use the 2-byte marker to determine whether a payload is Avro. This check helps avoid expensive lookups that resolve the schema from a fingerprint, when the message is not an encoded Avro payload. + +## Sort Order +Avro defines a standard sort order for data. This permits data written by one system to be efficiently sorted by another system. This can be an important optimization, as sort order comparisons are sometimes the most frequent per-object operation. Note also that Avro binary-encoded data can be efficiently ordered without deserializing it to objects. + +Data items may only be compared if they have identical schemas. Pairwise comparisons are implemented recursively with a depth-first, left-to-right traversal of the schema. The first mismatch encountered determines the order of the items. + +Two items with the same schema are compared according to the following rules. + +* _null_ data is always equal. +* _boolean_ data is ordered with false before true. +* _int_, _long_, _float_ and _double_ data is ordered by ascending numeric value. +* _bytes_ and fixed data are compared lexicographically by unsigned 8-bit values. +* _string_ data is compared lexicographically by Unicode code point. Note that since UTF-8 is used as the binary encoding for strings, sorting of bytes and string binary data is identical. +* _array_ data is compared lexicographically by element. +* _enum_ data is ordered by the symbol's position in the enum schema. For example, an enum whose symbols are `["z", "a"]` would sort "z" values before "a" values. +* _union_ data is first ordered by the branch within the union, and, within that, by the type of the branch. For example, an `["int", "string"]` union would order all int values before all string values, with the ints and strings themselves ordered as defined above. +* _record_ data is ordered lexicographically by field. If a field specifies that its order is: + * "ascending", then the order of its values is unaltered. + * "descending", then the order of its values is reversed. + * "ignore", then its values are ignored when sorting. +* _map_ data may not be compared. It is an error to attempt to compare data containing maps unless those maps are in an `"order":"ignore"` record field. + +## Object Container Files +Avro includes a simple object container file format. A file has a schema, and all objects stored in the file must be written according to that schema, using binary encoding. Objects are stored in blocks that may be compressed. Syncronization markers are used between blocks to permit efficient splitting of files for MapReduce processing. + +Files may include arbitrary user-specified metadata. + +A file consists of: + +* A file header, followed by +* one or more file data blocks. + +A file header consists of: + +* Four bytes, ASCII 'O', 'b', 'j', followed by 1. +* file metadata, including the schema. +* The 16-byte, randomly-generated sync marker for this file. + +File metadata is written as if defined by the following [map]({{< ref "#schema-maps" >}}) schema: +```json +{"type": "map", "values": "bytes"} +``` +All metadata properties that start with "avro." are reserved. The following file metadata properties are currently used: + +* **avro.schema** contains the schema of objects stored in the file, as JSON data (required). +* **avro.codec** the name of the compression codec used to compress blocks, as a string. Implementations are required to support the following codecs: "null" and "deflate". If codec is absent, it is assumed to be "null". The codecs are described with more detail below. + +A file header is thus described by the following schema: +```json +{"type": "record", "name": "org.apache.avro.file.Header", + "fields" : [ + {"name": "magic", "type": {"type": "fixed", "name": "Magic", "size": 4}}, + {"name": "meta", "type": {"type": "map", "values": "bytes"}}, + {"name": "sync", "type": {"type": "fixed", "name": "Sync", "size": 16}} + ] +} +``` + +A file data block consists of: + +* A long indicating the count of objects in this block. +* A long indicating the size in bytes of the serialized objects in the current block, after any codec is applied +* The serialized objects. If a codec is specified, this is compressed by that codec. +* The file's 16-byte sync marker. + +A file data block is thus described by the following schema: +```json +{"type": "record", "name": "org.apache.avro.file.DataBlock", + "fields" : [ + {"name": "count", "type": "long"}, + {"name": "data", "type": "bytes"}, + {"name": "sync", "type": {"type": "fixed", "name": "Sync", "size": 16}} + ] +} +``` + +Each block's binary data can be efficiently extracted or skipped without deserializing the contents. The combination of block size, object counts, and sync markers enable detection of corrupt blocks and help ensure data integrity. + +### Required Codecs + +_null_ + +The "null" codec simply passes through data uncompressed. + +_deflate_ + +The "deflate" codec writes the data block using the deflate algorithm as specified in [RFC 1951](https://www.isi.edu/in-notes/rfc1951.txt), and typically implemented using the zlib library. Note that this format (unlike the "zlib format" in RFC 1950) does not have a checksum. + +### Optional Codecs +_bzip2_ + +The "bzip2" codec uses the [bzip2](https://sourceware.org/bzip2/) compression library. + +_snappy_ + +The "snappy" codec uses Google's [Snappy](https://code.google.com/p/snappy/) compression library. Each compressed block is followed by the 4-byte, big-endian CRC32 checksum of the uncompressed data in the block. + +_xz_ + +The "xz" codec uses the [XZ](https://tukaani.org/xz/) compression library. + +_zstandard_ + +The "zstandard" codec uses Facebook's [Zstandard](https://facebook.github.io/zstd/) compression library. + +### Protocol Declaration +Avro protocols describe RPC interfaces. Like schemas, they are defined with JSON text. + +A protocol is a JSON object with the following attributes: + +* _protocol_, a string, the name of the protocol (required); +* _namespace_, an optional string that qualifies the name (optional); +* _doc_, an optional string describing this protocol; +* _types_, an optional list of definitions of named types (records, enums, fixed and errors). An error definition is just like a record definition except it uses "error" instead of "record". Note that forward references to named types are not permitted. +* _messages_, an optional JSON object whose keys are message names and whose values are objects whose attributes are described below. No two messages may have the same name. + +The name and namespace qualification rules defined for schema objects apply to protocols as well. + +### Messages +A message has attributes: + +* a _doc_, an optional description of the message, +* a _request_, a list of named, typed parameter schemas (this has the same form as the fields of a record declaration); +* a _response_ schema; +* an optional union of declared error schemas. The effective union has "string" prepended to the declared union, to permit transmission of undeclared "system" errors. For example, if the declared error union is `["AccessError"]`, then the effective union is `["string", "AccessError"]`. When no errors are declared, the effective error union is `["string"]`. Errors are serialized using the effective union; however, a protocol's JSON declaration contains only the declared union. +* an optional one-way boolean parameter. + +A request parameter list is processed equivalently to an anonymous record. Since record field lists may vary between reader and writer, request parameters may also differ between the caller and responder, and such differences are resolved in the same manner as record field differences. + +The one-way parameter may only be true when the response type is `"null"` and no errors are listed. + +### Sample Protocol +For example, one may define a simple HelloWorld protocol with: +```json +{ + "namespace": "com.acme", + "protocol": "HelloWorld", + "doc": "Protocol Greetings", + + "types": [ + {"name": "Greeting", "type": "record", "fields": [ + {"name": "message", "type": "string"}]}, + {"name": "Curse", "type": "error", "fields": [ + {"name": "message", "type": "string"}]} + ], + + "messages": { + "hello": { + "doc": "Say hello.", + "request": [{"name": "greeting", "type": "Greeting" }], + "response": "Greeting", + "errors": ["Curse"] + } + } +} +``` + +## Protocol Wire Format + +### Message Transport +Messages may be transmitted via different transport mechanisms. + +To the transport, a _message_ is an opaque byte sequence. + +A transport is a system that supports: + +* **transmission of request messages** +* **receipt of corresponding response messages** +Servers may send a response message back to the client corresponding to a request message. The mechanism of correspondence is transport-specific. For example, in HTTP it is implicit, since HTTP directly supports requests and responses. But a transport that multiplexes many client threads over a single socket would need to tag messages with unique identifiers. + +Transports may be either stateless or stateful. In a stateless transport, messaging assumes no established connection state, while stateful transports establish connections that may be used for multiple messages. This distinction is discussed further in the [handshake](#handshake) section below. + +#### HTTP as Transport +When [HTTP](https://www.w3.org/Protocols/rfc2616/rfc2616.html) is used as a transport, each Avro message exchange is an HTTP request/response pair. All messages of an Avro protocol should share a single URL at an HTTP server. Other protocols may also use that URL. Both normal and error Avro response messages should use the 200 (OK) response code. The chunked encoding may be used for requests and responses, but, regardless the Avro request and response are the entire content of an HTTP request and response. The HTTP Content-Type of requests and responses should be specified as "avro/binary". Requests should be made using the POST method. + +HTTP is used by Avro as a stateless transport. + +### Message Framing +Avro messages are _framed_ as a list of buffers. + +Framing is a layer between messages and the transport. It exists to optimize certain operations. + +The format of framed message data is: + +* a series of buffers, where each buffer consists of: + * a four-byte, big-endian _buffer length_, followed by + * that many bytes of _buffer_ data. +* a message is always terminated by a zero-length buffer. + +Framing is transparent to request and response message formats (described below). Any message may be presented as a single or multiple buffers. + +Framing can permit readers to more efficiently get different buffers from different sources and for writers to more efficiently store different buffers to different destinations. In particular, it can reduce the number of times large binary objects are copied. For example, if an RPC parameter consists of a megabyte of file data, that data can be copied directly to a socket from a file descriptor, and, on the other end, it could be written directly to a file descriptor, never entering user space. + +A simple, recommended, framing policy is for writers to create a new segment whenever a single binary object is written that is larger than a normal output buffer. Small objects are then appended in buffers, while larger objects are written as their own buffers. When a reader then tries to read a large object the runtime can hand it an entire buffer directly, without having to copy it. + +### Handshake +The purpose of the handshake is to ensure that the client and the server have each other's protocol definition, so that the client can correctly deserialize responses, and the server can correctly deserialize requests. Both clients and servers should maintain a cache of recently seen protocols, so that, in most cases, a handshake will be completed without extra round-trip network exchanges or the transmission of full protocol text. + +RPC requests and responses may not be processed until a handshake has been completed. With a stateless transport, all requests and responses are prefixed by handshakes. With a stateful transport, handshakes are only attached to requests and responses until a successful handshake response has been returned over a connection. After this, request and response payloads are sent without handshakes for the lifetime of that connection. + +The handshake process uses the following record schemas: +```json +{ + "type": "record", + "name": "HandshakeRequest", "namespace":"org.apache.avro.ipc", + "fields": [ + {"name": "clientHash", + "type": {"type": "fixed", "name": "MD5", "size": 16}}, + {"name": "clientProtocol", "type": ["null", "string"]}, + {"name": "serverHash", "type": "MD5"}, + {"name": "meta", "type": ["null", {"type": "map", "values": "bytes"}]} + ] +} +{ + "type": "record", + "name": "HandshakeResponse", "namespace": "org.apache.avro.ipc", + "fields": [ + {"name": "match", + "type": {"type": "enum", "name": "HandshakeMatch", + "symbols": ["BOTH", "CLIENT", "NONE"]}}, + {"name": "serverProtocol", + "type": ["null", "string"]}, + {"name": "serverHash", + "type": ["null", {"type": "fixed", "name": "MD5", "size": 16}]}, + {"name": "meta", + "type": ["null", {"type": "map", "values": "bytes"}]} + ] +} +``` + +* A client first prefixes each request with a `HandshakeRequest` containing just the hash of its protocol and of the server's protocol (`clientHash!=null, clientProtocol=null, serverHash!=null`), where the hashes are 128-bit MD5 hashes of the JSON protocol text. If a client has never connected to a given server, it sends its hash as a guess of the server's hash, otherwise it sends the hash that it previously obtained from this server. +The server responds with a HandshakeResponse containing one of: + * `match=BOTH, serverProtocol=null, serverHash=null` if the client sent the valid hash of the server's protocol and the server knows what protocol corresponds to the client's hash. In this case, the request is complete and the response data immediately follows the HandshakeResponse. + * `match=CLIENT, serverProtocol!=null, serverHash!=null` if the server has previously seen the client's protocol, but the client sent an incorrect hash of the server's protocol. The request is complete and the response data immediately follows the HandshakeResponse. The client must use the returned protocol to process the response and should also cache that protocol and its hash for future interactions with this server. + * `match=NONE` if the server has not previously seen the client's protocol. The serverHash and serverProtocol may also be non-null if the server's protocol hash was incorrect. +In this case the client must then re-submit its request with its protocol text (`clientHash!=null, clientProtocol!=null, serverHash!=null`) and the server should respond with a successful match (match=BOTH, serverProtocol=null, serverHash=null) as above. + +The meta field is reserved for future handshake enhancements. + +### Call Format +A _call_ consists of a request message paired with its resulting response or error message. Requests and responses contain extensible metadata, and both kinds of messages are framed as described above. + +The format of a call request is: + +* _request metadata_, a map with values of type bytes +* the _message name_, an Avro string, followed by +* the _message parameters_. Parameters are serialized according to the message's request declaration. +When the empty string is used as a message name a server should ignore the parameters and return an empty response. A client may use this to ping a server or to perform a handshake without sending a protocol message. + +When a message is declared one-way and a stateful connection has been established by a successful handshake response, no response data is sent. Otherwise the format of the call response is: + +* _response metadata_, a map with values of type bytes +* a one-byte error _flag_ boolean, followed by either: + * if the error flag is false, the message _response_, serialized per the message's response schema. + * if the error flag is true, the _error_, serialized per the message's effective error union schema. + +### Schema Resolution {#schema-resolution} +A reader of Avro data, whether from an RPC or a file, can always parse that data because the original schema must be provided along with the data. However, the reader may be programmed to read data into a different schema. For example, if the data was written with a different version of the software than it is read, then fields may have been added or removed from records. This section specifies how such schema differences should be resolved. + +We refer to the schema used to write the data as the writer's schema, and the schema that the application expects the reader's schema. Differences between these should be resolved as follows: + +* It is an error if the two schemas do not _match_. +To match, one of the following must hold: + * both schemas are arrays whose item types match + * both schemas are maps whose value types match + * both schemas are enums whose (unqualified) names match + * both schemas are fixed whose sizes and (unqualified) names match + * both schemas are records with the same (unqualified) name + * either schema is a union + * both schemas have same primitive type + * the writer's schema may be promoted to the reader's as follows: + * int is promotable to long, float, or double + * long is promotable to float or double + * float is promotable to double + * string is promotable to bytes + * bytes is promotable to string +* **if both are records**: + * the ordering of fields may be different: fields are matched by name. + * schemas for fields with the same name in both records are resolved recursively. + * if the writer's record contains a field with a name not present in the reader's record, the writer's value for that field is ignored. + * if the reader's record schema has a field that contains a default value, and writer's schema does not have a field with the same name, then the reader should use the default value from its field. + * if the reader's record schema has a field with no default value, and writer's schema does not have a field with the same name, an error is signalled. +* **if both are enums**: +if the writer's symbol is not present in the reader's enum and the reader has a default value, then that value is used, otherwise an error is signalled. + +* **if both are arrays**: +This resolution algorithm is applied recursively to the reader's and writer's array item schemas. + +* **if both are maps**: +This resolution algorithm is applied recursively to the reader's and writer's value schemas. + +* **if both are unions**: +The first schema in the reader's union that matches the selected writer's union schema is recursively resolved against it. if none match, an error is signalled. + +* **if reader's is a union, but writer's is not** +The first schema in the reader's union that matches the writer's schema is recursively resolved against it. If none match, an error is signalled. + +* **if writer's is a union, but reader's is not** +If the reader's schema matches the selected writer's schema, it is recursively resolved against it. If they do not match, an error is signalled. + +A schema's _doc_ fields are ignored for the purposes of schema resolution. Hence, the _doc_ portion of a schema may be dropped at serialization. + +### Parsing Canonical Form for Schemas {#parsing-canonical-form-for-schemas} +One of the defining characteristics of Avro is that a reader must use the schema used by the writer of the data in order to know how to read the data. This assumption results in a data format that's compact and also amenable to many forms of schema evolution. However, the specification so far has not defined what it means for the reader to have the "same" schema as the writer. Does the schema need to be textually identical? Well, clearly adding or removing some whitespace to a JSON expression does not change its meaning. At the same time, reordering the fields of records clearly does change the meaning. So what does it mean for a reader to have "the same" schema as a writer? + +Parsing Canonical Form is a transformation of a writer's schema that let's us define what it means for two schemas to be "the same" for the purpose of reading data written against the schema. It is called Parsing Canonical Form because the transformations strip away parts of the schema, like "doc" attributes, that are irrelevant to readers trying to parse incoming data. It is called Canonical Form because the transformations normalize the JSON text (such as the order of attributes) in a way that eliminates unimportant differences between schemas. If the Parsing Canonical Forms of two different schemas are textually equal, then those schemas are "the same" as far as any reader is concerned, i.e., there is no serialized data that would allow a reader to distinguish data generated by a writer using one of the original schemas from data generated by a writing using the other original schema. (We sketch a proof of this property in a companion document.) + +The next subsection specifies the transformations that define Parsing Canonical Form. But with a well-defined canonical form, it can be convenient to go one step further, transforming these canonical forms into simple integers ("fingerprints") that can be used to uniquely identify schemas. The subsection after next recommends some standard practices for generating such fingerprints. + +#### Transforming into Parsing Canonical Form +Assuming an input schema (in JSON form) that's already UTF-8 text for a _valid_ Avro schema (including all quotes as required by JSON), the following transformations will produce its Parsing Canonical Form: + +* [PRIMITIVES] Convert primitive schemas to their simple form (e.g., int instead of `{"type":"int"}`). +* [FULLNAMES] Replace short names with fullnames, using applicable namespaces to do so. Then eliminate namespace attributes, which are now redundant. +* [STRIP] Keep only attributes that are relevant to parsing data, which are: _type_, _name_, _fields_, _symbols_, _items_, _values_, _size_. Strip all others (e.g., _doc_ and _aliases_). +* [ORDER] Order the appearance of fields of JSON objects as follows: _name_, _type_, _fields_, _symbols_, _items_, _values_, _size_. For example, if an object has _type_, _name_, and _size_ fields, then the _name_ field should appear first, followed by the _type_ and then the _size_ fields. +* [STRINGS] For all JSON string literals in the schema text, replace any escaped characters (e.g., \uXXXX escapes) with their UTF-8 equivalents. +* [INTEGERS] Eliminate quotes around and any leading zeros in front of JSON integer literals (which appear in the _size_ attributes of _fixed_ schemas). +* [WHITESPACE] Eliminate all whitespace in JSON outside of string literals. + +#### Schema Fingerprints {#schema-fingerprints} +"[A] fingerprinting algorithm is a procedure that maps an arbitrarily large data item (such as a computer file) to a much shorter bit string, its fingerprint, that uniquely identifies the original data for all practical purposes" (quoted from [Wikipedia](https://en.wikipedia.org/wiki/Fingerprint_(computing))). In the Avro context, fingerprints of Parsing Canonical Form can be useful in a number of applications; for example, to cache encoder and decoder objects, to tag data items with a short substitute for the writer's full schema, and to quickly negotiate common-case schemas between readers and writers. + +In designing fingerprinting algorithms, there is a fundamental trade-off between the length of the fingerprint and the probability of collisions. To help application designers find appropriate points within this trade-off space, while encouraging interoperability and ease of implementation, we recommend using one of the following three algorithms when fingerprinting Avro schemas: + +* When applications can tolerate longer fingerprints, we recommend using the [SHA-256 digest algorithm](https://en.wikipedia.org/wiki/SHA-2) to generate 256-bit fingerprints of Parsing Canonical Forms. Most languages today have SHA-256 implementations in their libraries. +* At the opposite extreme, the smallest fingerprint we recommend is a 64-bit [Rabin fingerprint](https://en.wikipedia.org/wiki/Rabin_fingerprint). Below, we provide pseudo-code for this algorithm that can be easily translated into any programming language. 64-bit fingerprints should guarantee uniqueness for schema caches of up to a million entries (for such a cache, the chance of a collision is 3E-8). We don't recommend shorter fingerprints, as the chances of collisions is too great (for example, with 32-bit fingerprints, a cache with as few as 100,000 schemas has a 50% chance of having a collision). +* Between these two extremes, we recommend using the [MD5 message digest](https://en.wikipedia.org/wiki/MD5) to generate 128-bit fingerprints. These make sense only where very large numbers of schemas are being manipulated (tens of millions); otherwise, 64-bit fingerprints should be sufficient. As with SHA-256, MD5 implementations are found in most libraries today. + +These fingerprints are not meant to provide any security guarantees, even the longer SHA-256-based ones. Most Avro applications should be surrounded by security measures that prevent attackers from writing random data and otherwise interfering with the consumers of schemas. We recommend that these surrounding mechanisms be used to prevent collision and pre-image attacks (i.e., "forgery") on schema fingerprints, rather than relying on the security properties of the fingerprints themselves. + +Rabin fingerprints are [cyclic redundancy checks](https://en.wikipedia.org/wiki/Cyclic_redundancy_check) computed using irreducible polynomials. In the style of the Appendix of [RFC 1952](https://www.ietf.org/rfc/rfc1952.txt) (pg 10), which defines the CRC-32 algorithm, here's our definition of the 64-bit AVRO fingerprinting algorithm: +```java +long fingerprint64(byte[] buf) { + if (FP_TABLE == null) initFPTable(); + long fp = EMPTY; + for (int i = 0; i < buf.length; i++) + fp = (fp >>> 8) ^ FP_TABLE[(int)(fp ^ buf[i]) & 0xff]; + return fp; +} + +static long EMPTY = 0xc15d213aa4d7a795L; +static long[] FP_TABLE = null; + +void initFPTable() { + FP_TABLE = new long[256]; + for (int i = 0; i < 256; i++) { + long fp = i; + for (int j = 0; j < 8; j++) + fp = (fp >>> 1) ^ (EMPTY & -(fp & 1L)); + FP_TABLE[i] = fp; + } +} +``` + +Readers interested in the mathematics behind this algorithm may want to read [Chapter 14 of the Second Edition of Hacker's Delight](https://books.google.com/books?id=XD9iAwAAQBAJ&pg=PA319). (Unlike RFC-1952 and the book chapter, we prepend a single one bit to messages. We do this because CRCs ignore leading zero bits, which can be problematic. Our code prepends a one-bit by initializing fingerprints using EMPTY, rather than initializing using zero as in RFC-1952 and the book chapter.) + +## Logical Types +A logical type is an Avro primitive or complex type with extra attributes to represent a derived type. The attribute `logicalType` must always be present for a logical type, and is a string with the name of one of the logical types listed later in this section. Other attributes may be defined for particular logical types. + +A logical type is always serialized using its underlying Avro type so that values are encoded in exactly the same way as the equivalent Avro type that does not have a `logicalType` attribute. Language implementations may choose to represent logical types with an appropriate native type, although this is not required. + +Language implementations must ignore unknown logical types when reading, and should use the underlying Avro type. If a logical type is invalid, for example a decimal with scale greater than its precision, then implementations should ignore the logical type and use the underlying Avro type. + +### Decimal +The `decimal` logical type represents an arbitrary-precision signed decimal number of the form _unscaled × 10-scale_. + +A `decimal` logical type annotates Avro _bytes_ or _fixed_ types. The byte array must contain the two's-complement representation of the unscaled integer value in big-endian byte order. The scale is fixed, and is specified using an attribute. + +The following attributes are supported: + +* _scale_, a JSON integer representing the scale (optional). If not specified the scale is 0. +* _precision_, a JSON integer representing the (maximum) precision of decimals stored in this type (required). +For example, the following schema represents decimal numbers with a maximum precision of 4 and a scale of 2: +```json +{ + "type": "bytes", + "logicalType": "decimal", + "precision": 4, + "scale": 2 +} +``` +Precision must be a positive integer greater than zero. If the underlying type is a _fixed_, then the precision is limited by its size. An array of length n can store at most _floor(log10(28 × n - 1 - 1))_ base-10 digits of precision. + +Scale must be zero or a positive integer less than or equal to the precision. + +For the purposes of schema resolution, two schemas that are `decimal` logical types _match_ if their scales and precisions match. + +**alternative** + +As it's not always possible to fix scale and precision in advance for a decimal field, `big-decimal` is another `decimal` logical type restrict to Avro _bytes_. + +_Currently only available in Java and Rust_. + +```json +{ + "type": "bytes", + "logicalType": "big-decimal" +} +``` +Here, as scale property is stored in value itself it needs more bytes than preceding `decimal` type, but it allows more flexibility. + +### UUID + +The `uuid` logical type represents a random generated universally unique identifier (UUID). + +A `uuid` logical type annotates an Avro `string` or `fixed` of length 16. Both the string and `fixed` byte layout have to conform with [RFC-4122](https://www.ietf.org/rfc/rfc4122.txt). + +The following schemas represent a uuid: + +```json +{ + "type": "string", + "logicalType": "uuid" +} +``` + +```json +{ + "type": "fixed", + "size": 16, + "logicalType": "uuid" +} +``` + +### Date +The `date` logical type represents a date within the calendar, with no reference to a particular time zone or time of day. + +A `date` logical type annotates an Avro `int`, where the int stores the number of days from the unix epoch, 1 January 1970 (ISO calendar). + +The following schema represents a date: +```json +{ + "type": "int", + "logicalType": "date" +} +``` + +### Time (millisecond precision) {#time_ms} +The `time-millis` logical type represents a time of day, with no reference to a particular calendar, time zone or date, with a precision of one millisecond. + +A `time-millis` logical type annotates an Avro `int`, where the int stores the number of milliseconds after midnight, 00:00:00.000. + +### Time (microsecond precision) +The `time-micros` logical type represents a time of day, with no reference to a particular calendar, time zone or date, with a precision of one microsecond. + +A `time-micros` logical type annotates an Avro `long`, where the long stores the number of microseconds after midnight, 00:00:00.000000. + +### Timestamps {#timestamps} + +The `timestamp-{millis,micros,nanos}` logical type represents an instant on the global timeline, independent of a particular time zone or calendar. Upon reading a value back, we can only reconstruct the instant, but not the original representation. In practice, such timestamps are typically displayed to users in their local time zones, therefore they may be displayed differently depending on the execution environment. + +- `timestamp-millis`: logical type annotates an Avro `long`, where the long stores the number of milliseconds from the unix epoch, 1 January 1970 00:00:00.000. +- `timestamp-micros`: logical type annotates an Avro `long`, where the long stores the number of microseconds from the unix epoch, 1 January 1970 00:00:00.000000. +- `timestamp-nanos`: logical type annotates an Avro `long`, where the long stores the number of nanoseconds from the unix epoch, 1 January 1970 00:00:00.000000000. + +Example: Given an event at noon local time (12:00) on January 1, 2000, in Helsinki where the local time was two hours east of UTC (UTC+2). The timestamp is first shifted to UTC 2000-01-01T10:00:00 and that is then converted to Avro long 946720800000 (milliseconds) and written. + +### Local Timestamps {#local_timestamp} + +The `local-timestamp-{millis,micros,nanos}` logical type represents a timestamp in a local timezone, regardless of what specific time zone is considered local. + +- `local-timestamp-millis`: logical type annotates an Avro `long`, where the long stores the number of milliseconds, from 1 January 1970 00:00:00.000. +- `local-timestamp-micros`: logical type annotates an Avro `long`, where the long stores the number of microseconds, from 1 January 1970 00:00:00.000000. +- `local-timestamp-nanos`: logical type annotates an Avro `long`, where the long stores the number of nanoseconds, from 1 January 1970 00:00:00.000000000. + +Example: Given an event at noon local time (12:00) on January 1, 2000, in Helsinki where the local time was two hours east of UTC (UTC+2). The timestamp is converted to Avro long 946728000000 (milliseconds) and then written. + +### Duration +The `duration` logical type represents an amount of time defined by a number of months, days and milliseconds. This is not equivalent to a number of milliseconds, because, depending on the moment in time from which the duration is measured, the number of days in the month and number of milliseconds in a day may differ. Other standard periods such as years, quarters, hours and minutes can be expressed through these basic periods. + +A `duration` logical type annotates Avro `fixed` type of size 12, which stores three little-endian unsigned integers that represent durations at different granularities of time. The first stores a number in months, the second stores a number in days, and the third stores a number in milliseconds. diff --git a/doc/content/en/docs/++version++/_index.md b/doc/content/en/docs/++version++/_index.md new file mode 100755 index 00000000000..13b815d86ee --- /dev/null +++ b/doc/content/en/docs/++version++/_index.md @@ -0,0 +1,59 @@ +--- +title: "Apache Avroâ„ĸ ++version++ Documentation" +linkTitle: "++version++" +type: docs +weight: 10 +--- + + + +## Introduction + +Apache Avroâ„ĸ is a data serialization system. + +Avro provides: + +* Rich data structures. +* A compact, fast, binary data format. +* A container file, to store persistent data. +* Remote procedure call (RPC). +* Simple integration with dynamic languages. Code generation is not required to read or write data files nor to use or implement RPC protocols. Code generation as an optional optimization, only worth implementing for statically typed languages. + +## Schemas + +Avro relies on schemas. When Avro data is read, the schema used when writing it is always present. This permits each datum to be written with no per-value overheads, making serialization both fast and small. This also facilitates use with dynamic, scripting languages, since data, together with its schema, is fully self-describing. + +When Avro data is stored in a file, its schema is stored with it, so that files may be processed later by any program. If the program reading the data expects a different schema this can be easily resolved, since both schemas are present. + +When Avro is used in RPC, the client and server exchange schemas in the connection handshake. (This can be optimized so that, for most calls, no schemas are actually transmitted.) Since both client and server both have the other's full schema, correspondence between same named fields, missing fields, extra fields, etc. can all be easily resolved. + +Avro schemas are defined with JSON . This facilitates implementation in languages that already have JSON libraries. + +## Comparison with other systems + +Avro provides functionality similar to systems such as [Thrift](https://thrift.apache.org/), [Protocol Buffers](https://code.google.com/p/protobuf/), etc. Avro differs from these systems in the following fundamental aspects. + +* Dynamic typing: Avro does not require that code be generated. Data is always accompanied by a schema that permits full processing of that data without code generation, static datatypes, etc. This facilitates construction of generic data-processing systems and languages. +* Untagged data: Since the schema is present when data is read, considerably less type information need be encoded with data, resulting in smaller serialization size. +* No manually-assigned field IDs: When a schema changes, both the old and new schema are always present when processing data, so differences may be resolved symbolically, using field names. + + diff --git a/doc/content/en/docs/++version++/api-c++.md b/doc/content/en/docs/++version++/api-c++.md new file mode 100644 index 00000000000..4382750a46a --- /dev/null +++ b/doc/content/en/docs/++version++/api-c++.md @@ -0,0 +1,29 @@ +--- +title: "C++ API" +linkTitle: "C++ API" +weight: 102 +manualLink: /docs/++version++/api/cpp/html/ +--- + + + +The C++ API documentation can be found here. diff --git a/doc/content/en/docs/++version++/api-c.md b/doc/content/en/docs/++version++/api-c.md new file mode 100644 index 00000000000..79a5209e526 --- /dev/null +++ b/doc/content/en/docs/++version++/api-c.md @@ -0,0 +1,29 @@ +--- +title: "C API" +linkTitle: "C API" +weight: 101 +manualLink: /docs/++version++/api/c/ +--- + + + +The C API documentation can be found here. diff --git a/doc/content/en/docs/++version++/api-csharp.md b/doc/content/en/docs/++version++/api-csharp.md new file mode 100644 index 00000000000..cfad0d1e343 --- /dev/null +++ b/doc/content/en/docs/++version++/api-csharp.md @@ -0,0 +1,29 @@ +--- +title: "C# API" +linkTitle: "C# API" +weight: 103 +manualLink: /docs/++version++/api/csharp/html/ +--- + + + +The C# API documentation can be found here. diff --git a/doc/content/en/docs/++version++/api-java.md b/doc/content/en/docs/++version++/api-java.md new file mode 100644 index 00000000000..12d743567df --- /dev/null +++ b/doc/content/en/docs/++version++/api-java.md @@ -0,0 +1,29 @@ +--- +title: "Java API" +linkTitle: "Java API" +weight: 100 +manualLink: /docs/++version++/api/java/ +--- + + + +The Javadocs can be found here. diff --git a/doc/content/en/docs/++version++/api-py.md b/doc/content/en/docs/++version++/api-py.md new file mode 100644 index 00000000000..fb4f4ba13b5 --- /dev/null +++ b/doc/content/en/docs/++version++/api-py.md @@ -0,0 +1,29 @@ +--- +title: "Python API" +linkTitle: "Python API" +weight: 104 +manualLink: /docs/++version++/api/py/html/ +--- + + + +The Python API documentation can be found here. diff --git a/doc/content/en/docs/_index.md b/doc/content/en/docs/_index.md new file mode 100755 index 00000000000..541db8d6b76 --- /dev/null +++ b/doc/content/en/docs/_index.md @@ -0,0 +1,58 @@ + +--- +title: "Documentation" +linkTitle: "Documentation" +--- + + + +## Introduction + +Apache Avroâ„ĸ is a data serialization system. + +Avro provides: + +* Rich data structures. +* A compact, fast, binary data format. +* A container file, to store persistent data. +* Remote procedure call (RPC). +* Simple integration with dynamic languages. Code generation is not required to read or write data files nor to use or implement RPC protocols. Code generation as an optional optimization, only worth implementing for statically typed languages. + +## Schemas + +Avro relies on schemas. When Avro data is read, the schema used when writing it is always present. This permits each datum to be written with no per-value overheads, making serialization both fast and small. This also facilitates use with dynamic, scripting languages, since data, together with its schema, is fully self-describing. + +When Avro data is stored in a file, its schema is stored with it, so that files may be processed later by any program. If the program reading the data expects a different schema this can be easily resolved, since both schemas are present. + +When Avro is used in RPC, the client and server exchange schemas in the connection handshake. (This can be optimized so that, for most calls, no schemas are actually transmitted.) Since both client and server both have the other's full schema, correspondence between same named fields, missing fields, extra fields, etc. can all be easily resolved. + +Avro schemas are defined with JSON . This facilitates implementation in languages that already have JSON libraries. + +## Comparison with other systems + +Avro provides functionality similar to systems such as [Thrift](https://thrift.apache.org/), [Protocol Buffers](https://code.google.com/p/protobuf/), etc. Avro differs from these systems in the following fundamental aspects. + +* Dynamic typing: Avro does not require that code be generated. Data is always accompanied by a schema that permits full processing of that data without code generation, static datatypes, etc. This facilitates construction of generic data-processing systems and languages. +* Untagged data: Since the schema is present when data is read, considerably less type information need be encoded with data, resulting in smaller serialization size. +* No manually-assigned field IDs: When a schema changes, both the old and new schema are always present when processing data, so differences may be resolved symbolically, using field names. + + diff --git a/doc/content/en/project/Articles/_index.md b/doc/content/en/project/Articles/_index.md new file mode 100755 index 00000000000..e30d9ef1d07 --- /dev/null +++ b/doc/content/en/project/Articles/_index.md @@ -0,0 +1,84 @@ +--- +title: "Articles" +linkTitle: "Articles" +weight: 4 +--- + + +** ** + +**Guide to Apache Avro** +Feb 19, 2023, by baeldung. + +https://www.baeldung.com/java-apache-avro + +** ** + +**Apache Avro IDL Schema Support**, +Apr 11, 2022, by Oscar Westra van Holthe - Kind. + +https://plugins.jetbrains.com/plugin/15728-apache-avro-idl-schema-support + +** ** + +**Generate random JSON data from an AVRO schema using Java**, +Jan 24, 2022, by Maarten Smeets. + +https://technology.amis.nl/soa/kafka/generate-random-json-data-from-an-avro-schema-using-java/ + +** ** + +**A Gentle (and Practical) Introduction to Apache Avro**, +Dec 22, 2020, by Anton Rodriguez. + +https://dzone.com/articles/gentle-and-practical-introduction-to-apache-avro-part-1 + +** ** + +**Apache Avro – A data serialization system** +Dec 09, 2018, by Dennis Vriend. + +https://binx.io/2018/12/09/apache-avro/ + +** ** + +**Introduction to Apache Avro** +Mar 12, 2016, by Bartosz Konieczny. + +https://www.waitingforcode.com/apache-avro/introduction-to-apache-avro/read + +** ** + +**Reading and Writing Avro Files from the Command Line**, +Mar 17, 2013, by Michael G. Noll. + +https://www.michael-noll.com/blog/2013/03/17/reading-and-writing-avro-files-from-the-command-line/ + +** ** + +**Using Apache Avro** +Jan 25, 2011, by Boris Lublinsky. + +https://www.infoq.com/articles/ApacheAvro/ + + + diff --git a/doc/content/en/project/Committer onboarding guide/_index.md b/doc/content/en/project/Committer onboarding guide/_index.md new file mode 100755 index 00000000000..eb865a42ae2 --- /dev/null +++ b/doc/content/en/project/Committer onboarding guide/_index.md @@ -0,0 +1,48 @@ +--- +title: "Committer onboarding guide" +linkTitle: "Committer onboarding guide" +weight: 7 +--- + + +** ** +For you, the new committer: + +1. File your ICLA and send it to secretary@apache.org +2. Log in to https://whimsy.apache.org; that will confirm a working ASF account +3. You can edit email routing for the account, and add other emails that you own +4. You can directly edit mailing list subscriptions (for example, you might switch them to your ASF account - you can still post from any of your registered emails) +5. Link your GitHub account with your ASF account at https://gitbox.apache.org/ once you see the big green "Merge" button on pull requests, this is working +7. Read the ASF new committer guide: https://www.apache.org/dev/new-committers-guide.html + +** ** + +A committer in JIRA can add a new contributor by following these steps: + +1. Log in to JIRA with your committer credentials. +2. Navigate to the project where you want to add the new contributor. +3. Click on the "People" tab at the top of the page. +4. Click on the "Add People" button. +5. Enter the email address of the new contributor in the "Email Address" field. +6. Select the appropriate role for the new contributor from the "Role" dropdown menu. +7. Click the "Add" button to add the new contributor to the project. +8. An email will be sent to the new contributor asking them to accept the invitation to join the project. diff --git a/doc/content/en/project/Contributors onboarding guide/_index.md b/doc/content/en/project/Contributors onboarding guide/_index.md new file mode 100644 index 00000000000..e53b136c681 --- /dev/null +++ b/doc/content/en/project/Contributors onboarding guide/_index.md @@ -0,0 +1,39 @@ +--- +title: "Contributor onboarding guide" +linkTitle: "Contributor onboarding guide" +weight: 8 +--- + + + + +1. Familiarize yourself with Apache Avro: Before you start contributing to Apache Avro, it's essential to have a good understanding of what Apache Avro is and how it works. You can start by reading the Apache Avro documentation to get an overview of the project's features, use cases, and architecture. + +2. Join the Apache Avro community: Join the Apache Avro mailing lists, IRC channels, and forums to interact with other contributors and users. You can ask questions, discuss ideas, and get feedback on your contributions from experienced contributors. +3. Set up your development environment: To contribute to Apache Avro, you need to set up your development environment. The Apache Avro project uses Git for version control, and Apache Maven for building. You can follow the instructions in the Apache Avro documentation to set up your environment. +4. Choose a contribution: Apache Avro is an open-source project, and there are always new features, bug fixes, and improvements that can be made. You can choose from a wide range of contributions, from documentation updates to code changes. +5. Review existing issues and pull requests: Before you start working on a contribution, it's important to review existing issues and pull requests to avoid duplicating efforts. You can use the Apache Avro issue tracker to search for issues and pull requests related to your contribution. +6. Create a new issue or pull request: If you can't find an existing issue or pull request related to your contribution, you can create a new one. Make sure to provide detailed information about your contribution, including a description of the problem, proposed solution, and any relevant code changes. +7. Work on your contribution: Once you have a clear understanding of the contribution you want to make, you can start working on it. Make sure to follow the Apache Avro coding guidelines and best practices to ensure that your code is of high quality. +8. Submit your contribution: When you're ready to submit your contribution, create a pull request in the Apache Avro GitHub repository. Make sure to include a detailed description of your changes, and any relevant documentation or test cases. +9. Participate in reviews: Once you've submitted your contribution, it will be reviewed by other contributors. You may need to make additional changes based on their feedback before your contribution is accepted. +Celebrate your contribution: Once your contribution has been accepted, celebrate your achievement! You've helped improve Apache Avro and contributed to the open-source community. \ No newline at end of file diff --git a/doc/content/en/project/Credits/_index.md b/doc/content/en/project/Credits/_index.md new file mode 100644 index 00000000000..4b3f892115b --- /dev/null +++ b/doc/content/en/project/Credits/_index.md @@ -0,0 +1,67 @@ +--- +title: "Credits" +linkTitle: "Credits" +weight: 2 +aliases: +- /credits.html +--- + + + +## Apache Avro credits + +### Committers + +Apache Avro's active committers are: + +| **username** | **name** | **organization** | **roles** | **timezone** | +|:-------------|:--------------------|:------------------------|:----------------------:|:------------:| +| blue | Ryan Blue | Netflix | spec, java, ruby | -8 | +| brucem | Bruce Mitchener | Army of Bruce | c | +7 | +| busbey | Sean Busbey | Cloudera | java, ruby | -6 | +| cutting | Doug Cutting | Cloudera | spec, java | -8 | +| dcreager | Douglas Creager | RedJack, LLC | c | -5 | +| hammer | Jeff Hammerbacher | Cloudera | python | -8 | +| iemejia | IsmaÃĢl Mejía | Talend | java, docker | +1 | +| kojiromike | Michael A. Smith | Independent | python, docker | -5 | +| massie | Matt Massie | UC Berkeley | c | -8 | +| martinkl | Martin Kleppmann | University of Cambridge | c, ruby | 0 | +| mgrigorov | Martin Grigorov | Huawei | rust | +2 | +| nielsbasjes | Niels Basjes | Bol.com | java, docker | +1 | +| philz | Philip Zeyliger | Cloudera | java | -8 | +| rskraba | Ryan Skraba | Talend | java, docker | +1 | +| sbanacho | Scott Banachowski | Microsoft | c++ | -8 | +| scottcarey | Scott Carey | RichRelevance | java | -8 | +| sekikn | Kengo Seki | NTT Data | perl, interoperability | +9 | +| sharadag | Sharad Agarwal | InMobi | python | +5.5 | +| thiru | Thiruvalluvan M. G. | VertiCloud | java | +5.5 | +| tjwp | Tim Perkins | Shopify | ruby | -5 | +| tomwhite | Tom White | Cloudera | java | 0 | +------------- + +### Contributors +A list of Avro contributors and their contributions is available from [Jira](http://s.apache.org/AvroFixed) + +### Emeriti +Contributors who are no longer active on Avro are: + +* None diff --git a/doc/content/en/project/Donate/_index.md b/doc/content/en/project/Donate/_index.md new file mode 100755 index 00000000000..c87561fefc1 --- /dev/null +++ b/doc/content/en/project/Donate/_index.md @@ -0,0 +1,29 @@ +--- +title: "Donate" +linkTitle: "Donate" +weight: 13 +manualLink: https://www.apache.org/foundation/sponsorship.html +--- + + + +If you would like to donate please see the Apache Software Foundation [donation program](https://www.apache.org/foundation/sponsorship.html) diff --git a/doc/content/en/project/Download/_index.md b/doc/content/en/project/Download/_index.md new file mode 100755 index 00000000000..eff8aa3e621 --- /dev/null +++ b/doc/content/en/project/Download/_index.md @@ -0,0 +1,81 @@ +--- +title: "Download" +linkTitle: "Download" +weight: 1 +--- + + + +## Download +Releases may be downloaded from Apache mirrors: [Download](https://www.apache.org/dyn/closer.cgi/avro/) + +The latest release is: Avro {{< avro_version >}} (3.4M, source, [pgp](https://downloads.apache.org/avro/avro-{{< avro_version >}}/avro-src-{{< avro_version >}}.tar.gz.asc), [sha512](https://downloads.apache.org/avro/avro-{{< avro_version >}}/avro-src-{{< avro_version >}}.tar.gz.sha512)) + +* C#: https://www.nuget.org/packages/Apache.Avro/{{< avro_version >}} +* Java: from Maven Central, +* Javascript: https://www.npmjs.com/package/avro-js/v/{{< avro_version >}} +* Perl: https://metacpan.org/release/Avro +* Python 3: https://pypi.org/project/avro/{{< avro_version >}} +* Ruby: https://rubygems.org/gems/avro/versions/{{< avro_version >}} + + +## Release Notes +Release notes for Avro releases are available in [Jira](https://issues.apache.org/jira/browse/AVRO?report=com.atlassian.jira.plugin.system.project:changelog-panel#selectedTab=com.atlassian.jira.plugin.system.project%3Achangelog-panel) + +##Verifying a release +It is essential that you verify the integrity of the downloaded files using the PGP signatures or SHA512 checksums. Please read [How to verify downloaded](https://www.apache.org/info/verification.html) files for more information on why you should verify our releases. + +The PGP signatures can be verified using PGP or GPG. First download the [KEYS](https://downloads.apache.org/avro/KEYS) file as well as the .asc signature files for the relevant release packages. Make sure you get these files from the main distribution directory, rather than from a mirror. Then verify the signatures using: + +```shell +% gpg --import KEYS +% gpg --verify downloaded_file.asc downloaded_file +``` + +or + +```shell +% pgpk -a KEYS +% pgpv downloaded_file.asc +``` + +or + +```shell +% pgp -ka KEYS +% pgp downloaded_file.asc +``` +Alternatively, you can verify the hash on the file. + +Hashes can be calculated using GPG: +```shell +% gpg --print-md SHA256 downloaded_file +``` +The output should be compared with the contents of the SHA256 file. Similarly for other hashes (SHA512, SHA1, MD5 etc) which may be provided. + +Windows 7 and later systems should all now have certUtil: +```shell +% certUtil -hashfile pathToFileToCheck +``` +HashAlgorithm choices: _MD2 MD4 MD5 SHA1 SHA256 SHA384 SHA512_ + +Unix-like systems (and macOS) will have a utility called _md5_, _md5sum_ or _shasum_. diff --git a/doc/content/en/project/Events/_index.md b/doc/content/en/project/Events/_index.md new file mode 100755 index 00000000000..7d8646283f0 --- /dev/null +++ b/doc/content/en/project/Events/_index.md @@ -0,0 +1,28 @@ +--- +title: "Events" +linkTitle: "Events" +weight: 12 +--- + + + +Apache Avro members often participate in events organized by the [Apache Software Foundation](https://www.apache.org/events/current-event.html) diff --git a/doc/content/en/project/How to contribute/_index.md b/doc/content/en/project/How to contribute/_index.md new file mode 100755 index 00000000000..6514d7c3627 --- /dev/null +++ b/doc/content/en/project/How to contribute/_index.md @@ -0,0 +1,388 @@ +--- +title: "How to contribute" +linkTitle: "How to contribute" +weight: 5 +--- + + + +## Getting the source code + +First of all, you need the Avro source code. + +The easiest way is to clone or fork the GitHub mirror: + +```shell +git clone https://github.com/apache/avro.git -o github +``` + +## Making Changes + +Before you start, file an issue in [JIRA](https://issues.apache.org/jira/browse/AVRO) or discuss your ideas on the [Avro developer mailing list](http://avro.apache.org/mailing_lists.html). Describe your proposed changes and check that they fit in with what others are doing and have planned for the project. Be patient, it may take folks a while to understand your requirements. + +Modify the source code and add some (very) nice features using your favorite IDE. + +But take care about the following points + +**All Languages** +- Contributions should pass existing unit tests. +- Contributions should document public facing APIs. +- Contributions should add new tests to demonstrate bug fixes or test new features. + +**Java** + +- All public classes and methods should have informative [Javadoc comments](https://www.oracle.com/fr/technical-resources/articles/java/javadoc-tool.html). +- Do not use @author tags. +- Java code should be formatted according to [Oracle's conventions](https://www.oracle.com/java/technologies/javase/codeconventions-introduction.html), with one exception: + - Indent two spaces per level, not four. +- [JUnit](http://www.junit.org/) is our test framework: +- You must implement a class whose class name starts with Test. +- Define methods within your class and tag them with the @Test annotation. Call JUnit's many assert methods to verify conditions; these methods will be executed when you run mvn test. +- By default, do not let tests write any temporary files to /tmp. Instead, the tests should write to the location specified by the test.dir system property. +- Place your class in the src/test/java/ tree. +- You can run all the unit tests with the command mvn test, or you can run a specific unit test with the command mvn -Dtest= test (for example mvn -Dtest=TestFoo test) + + +## Code Style (Autoformatting) + +For Java code we use [Spotless](https://github.com/diffplug/spotless/) to format the code to comply with Avro's code style conventions (see above). Automatic formatting relies on [Avro's Eclipse JDT formatter definition](https://github.com/apache/avro/blob/main/lang/java/eclipse-java-formatter.xml). You can use the same definition to auto format from Eclipse or from IntelliJ configuring the Eclipse formatter plugin. + +If you use maven code styles issues are checked at the compile phase. If your code breaks because of bad formatting, you can format it automatically by running the command: +```shell +mvn spotless:apply +``` + +## Unit Tests + +Please make sure that all unit tests succeed before constructing your patch and that no new compiler warnings are introduced by your patch. Each language has its own directory and test process. + +
Java + +```shell +cd avro-trunk/lang/java +mvn clean test +``` +
+ +
Python + +```shell +cd avro-trunk/lang/py +./setup.py build test +``` +
+ +
Rust + +```shell +cd avro-trunk/lang/rust +./build.sh clean test +``` +
+ +
C# + +```shell +cd avro-trunk/lang/csharp +./build.sh clean test +``` +
+ +
C + +```shell +cd avro-trunk/lang/c +./build.sh clean +./build.sh test +``` +
+ +
C++ + +```shell +cd avro-trunk/lang/c++ +./build.sh clean test +``` +
+ +
Ruby + +```shell +cd avro-trunk/lang/ruby +gem install echoe +rake clean test +``` +
+ +
PHP + +```shell +cd avro-trunk/lang/php +./build.sh clean +./build.sh test +``` +
+ + +## Contributing your code + +Contribution can be made directly via github with a Pull Request, or via a patch. + +**Via Github** + +Method is to create a [pull request](https://help.github.com/articles/using-pull-requests/). + +On your fork, create a branch named with JIRA (avro-1234_fixNpe for example) +On source, go to it +```shell +git pull +git switch avro-1234_fixNpe +``` + +code your changes (following preceding recommendations) + +check and add updated sources +```shell +git status + +# Add any new or changed files with: +git add src/.../MyNewClass.java +git add src/.../TestMyNewClass.java +``` + +Finally, create a commit with your changes and a good log message, and push it: +```shell +git commit -m "AVRO-1234: Fix NPE by adding check to ..." +git push +``` +On your github fork site, a button will propose you to build the Pull Request. +Click on it, fill Conversation form, and create it. +Link this PR to the corresponding JIRA ticket (on JIRA ticket, add PR to "Issue Links" chapter, and add label 'pull-request-available' to it . + + + +## Jira Guidelines + +Please comment on issues in [Jira](https://issues.apache.org/jira/projects/AVRO/issues), making your concerns known. Please also vote for issues that are a high priority for you. + +Please refrain from editing descriptions and comments if possible, as edits spam the mailing list and clutter Jira's "All" display, which is otherwise very useful. Instead, preview descriptions and comments using the preview button (on the right) before posting them. Keep descriptions brief and save more elaborate proposals for comments, since descriptions are included in Jira's automatically sent messages. If you change your mind, note this in a new comment, rather than editing an older comment. The issue should preserve this history of the discussion. + +## Stay involved + +Contributors should join the Avro mailing lists. In particular, the commit list (to see changes as they are made), the dev list (to join discussions of changes) and the user list (to help others). + +## Workflow + +Building and running the site locally requires a recent extended version of Hugo. Install [Hugo](https://gohugo.io/installation/) for your environment. Once you've made your working copy of the site repo, from the repo root folder, run: + +```shell +hugo server --navigateToChanged +``` +Edit .md and .html files in content/ folder + +Once satisfied with the changes, commit them: +```shell +git commit -a +``` +Generate the HTML file stop hugo server --navigateToChanged (with Ctrl+C) and run +```shell +hugo +``` +This will generate the HTMLs in public/ folder and this is actually what is being deployed + +Add the modified HTML files to Git + +```shell +git add . +git rm offline-search-index.<>.json +git commit -a +git push +``` +This way even when the PR modifies a lot of files we can review only the first commit, the meaningful one, with the modified files in content/ folder + + +## Running a container locally +You can also run avro-website inside a Docker container, the container runs with a volume bound to the avro-website folder. This approach doesn't require you to install any dependencies other than Docker Desktop on Windows and Mac, and Docker Compose on Linux. + +Build the docker image + +```shell +docker-compose build +``` +Run the built image + ```shell +docker-compose up +``` +NOTE: You can run both commands at once with docker-compose up --build. + +Verify that the service is working. + +Open your web browser and type http://localhost:1313 in your navigation bar, This opens a local instance of the docsy-example homepage. You can now make changes to the docsy example and those changes will immediately show up in your browser after you save. + +**Cleanup** + +To stop Docker Compose, on your terminal window, press Ctrl + C. + +To remove the produced images run: + ```shell +docker-compose rm +``` + +## Troubleshooting +As you run the website locally, you may run into the following error: + ```shell +➜ hugo server + +INFO 2021/01/21 21:07:55 Using config file: +Building sites â€Ļ INFO 2021/01/21 21:07:55 syncing static files to / +Built in 288 ms +Error: Error building site: TOCSS: failed to transform "scss/main.scss" (text/x-scss): resource "scss/scss/main.scss_9fadf33d895a46083cdd64396b57ef68" not found in file cache + ``` +This error occurs if you have not installed the extended version of Hugo. See our user guide for instructions on how to install Hugo. + +## Edit content +The website content is in content/en folder. It contains .md (Markdown) and .html (HTML) files. + +**Layouts** + +To change the layout of any page edit layouts//**.html. If there is no layout for a given page at that location then copy the one provided by the theme and edit it: + ```shell + cp themes/docsy/layouts/ layouts/ + ``` +**Avro version** + +When a new version of Apache Avro is released: + +Change the value of params.avroversion in config.toml +Add a new entry to the Releases pages in the Blog section, for example: + ```shell +cp content/en/blog/releases/avro-1.10.2-released.md content/en/blog/releases/avro-1.11.0-released.md + ``` +**API documentation for C/C++/C# modules** + +The API documentations for C/C++/C# are built by their respective build.sh dist implementations. The final HTML should be copied to the external folder, for example: + ```shell +cp ../avro/build/avro-doc-1.12.0-SNAPSHOT/api/c/* content/en/docs/external/c/ + ``` + +## JIRA conventions + +Issue types: JIRA issues are categorized into different types such as bugs, improvements, new features, etc. Each issue type has a unique icon and a set of fields that are specific to that type. + +Workflow: JIRA issues follow a predefined workflow that defines the steps that an issue goes through from creation to resolution. Each step in the workflow can have its own set of conditions and actions. + +Priority: JIRA allows users to set priorities for issues to help determine the order in which they should be addressed. The priority can be set to one of five levels: Blocker, Critical, Major, Minor, and Trivial. Blocker is the highest priority and Trivial is the lowest priority. + +Labels: Labels are used to tag issues with keywords or phrases that can help with searching and filtering. + +Components: Components are used to group related issues together. For example, a software project might have components for the user interface, database, and networking. + +## See Also + +- [Apache contributor documentation](http://www.apache.org/dev/contributors.html) +- [Apache voting documentation](http://www.apache.org/foundation/voting.html) + diff --git a/doc/content/en/project/License/_index.md b/doc/content/en/project/License/_index.md new file mode 100755 index 00000000000..1840ccfc0aa --- /dev/null +++ b/doc/content/en/project/License/_index.md @@ -0,0 +1,29 @@ +--- +title: "License" +linkTitle: "License" +weight: 11 +manualLink: https://www.apache.org/licenses/ +--- + + + +Apache Avro project is licensed under [Apache Software License 2.0](https://www.apache.org/licenses/LICENSE-2.0) diff --git a/doc/content/en/project/Papers/_index.md b/doc/content/en/project/Papers/_index.md new file mode 100755 index 00000000000..beaace16894 --- /dev/null +++ b/doc/content/en/project/Papers/_index.md @@ -0,0 +1,73 @@ +--- +title: "Papers" +linkTitle: "Papers" +weight: 3 +--- + + +** ** + +**A Benchmark of JSON-compatible Binary Serialization Specifications** +Jan 9 2022, by Juan Cruz Viotti, Mital Kinderkhedia. + +https://arxiv.org/abs/2201.03051 + +** ** + +**A Survey of JSON-compatible Binary Serialization Specifications** +Jan 6 2022, by Juan Cruz Viotti, Mital Kinderkhedia. + +https://arxiv.org/abs/2201.02089 + +** ** + +**Putting Avro into Hive** +Apr 2017, by S. Sreekanth, A Sai Ram Pramodhini, Ch S Likita, Chiluka Manisha. + +https://journals.pen2print.org/index.php/ijr/article/view/7377/0 + + +** ** + +**Benchmarking Performance of Data Serialization and RPC Frameworks in Microservices Architecture: gRPC vs. Apache Thrift vs. Apache Avro** +Oct 27 2016, by Nguyen, Thuy. + +https://aaltodoc.aalto.fi/handle/123456789/23386 + +** ** + +**Apache Avro** +Sep 30 2016, by Deepak Vohra. + +https://link.springer.com/chapter/10.1007/978-1-4842-2199-0_7 + +** ** + + +**Object serialization vs relational data modelling in Apache Cassandra: a performance evaluation** +Apr 2015, by Valdemar Johansen. + +https://www.diva-portal.org/smash/get/diva2:839521/FULLTEXT02.pdf + + + + diff --git a/doc/content/en/project/Privacy policy/_index.md b/doc/content/en/project/Privacy policy/_index.md new file mode 100755 index 00000000000..0be9694d7d4 --- /dev/null +++ b/doc/content/en/project/Privacy policy/_index.md @@ -0,0 +1,29 @@ +--- +title: "Privacy policy" +linkTitle: "Privacy policy" +weight: 9 +manualLink: https://privacy.apache.org/policies/privacy-policy-public.html +--- + + + +Apache Avro project shares the same privacy policy as the [Apache Software Foundation](https://privacy.apache.org/policies/privacy-policy-public.html) diff --git a/doc/content/en/project/Security/_index.md b/doc/content/en/project/Security/_index.md new file mode 100755 index 00000000000..baa55da933f --- /dev/null +++ b/doc/content/en/project/Security/_index.md @@ -0,0 +1,29 @@ +--- +title: "Security" +linkTitle: "Security" +weight: 10 +manualLink: https://www.apache.org/security/ +--- + + + +Apache Avro project shares the same security policy as the [Apache Software Foundation](https://www.apache.org/security/) diff --git a/doc/content/en/project/Thanks/_index.md b/doc/content/en/project/Thanks/_index.md new file mode 100755 index 00000000000..b2ae7dff925 --- /dev/null +++ b/doc/content/en/project/Thanks/_index.md @@ -0,0 +1,29 @@ +--- +title: "Thanks" +linkTitle: "Thanks" +weight: 14 +manualLink: https://www.apache.org/foundation/thanks.html +--- + + + +Apache Avro project could not exist without the continued generous support from the community! We would like to take this opportunity to thank the ASF [Sponsors](https://www.apache.org/foundation/thanks.html). diff --git a/doc/content/en/project/_index.md b/doc/content/en/project/_index.md new file mode 100755 index 00000000000..19a78e4205d --- /dev/null +++ b/doc/content/en/project/_index.md @@ -0,0 +1,35 @@ +--- +title: "Project" +linkTitle: "Project" +weight: 1 +layout: project +menu: + main: + weight: 1 +aliases: +- /linkmap.html + +--- + + + +Apache Avro project is a member of the Apache Software Foundation! diff --git a/doc/content/en/project/pmc onboarding guide/_index.md b/doc/content/en/project/pmc onboarding guide/_index.md new file mode 100644 index 00000000000..22d7545eda8 --- /dev/null +++ b/doc/content/en/project/pmc onboarding guide/_index.md @@ -0,0 +1,34 @@ +--- +title: "PMC onboarding guide" +linkTitle: "PMC onboarding guide" +weight: 6 +--- + + + +1. Use https://whimsy.apache.org you can check that you got added to the PMC list properly +2. Validate you are in the PMC group in JIRA and the Confluence Wiki +3. Subscribe to private@avro.apache.org; you can use whimsy to do this for whatever email account you want, or send mail from that mail address to private-subscribe@ +4. You should have access also to https://reporter.apache.org which seeds our board reports +5. You can now access and read the private list archive (for linking to vote threads, etc) at https://lists.apache.org/list.html?private@avro.apache.org +6. Review the ASF PMC guides. There are a few, but you should re-read what the responsibilities are. +7. The PMC keeps a set of valuable resources in https://svn.apache.org/repos/private/pmc \ No newline at end of file diff --git a/doc/content/en/search.md b/doc/content/en/search.md new file mode 100644 index 00000000000..5ac34d9ea48 --- /dev/null +++ b/doc/content/en/search.md @@ -0,0 +1,25 @@ +--- +title: Search Results +layout: search +--- + + diff --git a/doc/docker-compose.yaml b/doc/docker-compose.yaml new file mode 100644 index 00000000000..833d8839a0b --- /dev/null +++ b/doc/docker-compose.yaml @@ -0,0 +1,32 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +version: "3.3" + +services: + + site: + image: docsy/docsy-example + build: + context: . + command: server + ports: + - "1313:1313" + volumes: + - .:/src diff --git a/doc/examples/example.py b/doc/examples/example.py index 7b88c1cc195..f81bbe67cc4 100644 --- a/doc/examples/example.py +++ b/doc/examples/example.py @@ -16,18 +16,28 @@ # specific language governing permissions and limitations # under the License. # +from pathlib import Path + import avro.schema from avro.datafile import DataFileReader, DataFileWriter from avro.io import DatumReader, DatumWriter -schema = avro.schema.parse(open("user.avsc").read()) +# read in the schema file +schema_text = Path("user.avsc").read_text() +# then parse it +schema = avro.schema.parse(schema_text) -writer = DataFileWriter(open("/tmp/users.avro", "w"), DatumWriter(), schema) -writer.append({"name": "Alyssa", "favorite_number": 256, "WTF": 2}) -writer.append({"name": "Ben", "favorite_number": 7, "favorite_color": "red"}) -writer.close() +# create a DataFileWriter to write data to a file +users_file = Path("/tmp/users.avro") +with users_file.open("wb") as users_fh, DataFileWriter( + users_fh, DatumWriter(), schema +) as writer: + writer.append({"name": "Alyssa", "favorite_number": 256}) + writer.append({"name": "Ben", "favorite_number": 7, "favorite_color": "red"}) -reader = DataFileReader(open("/tmp/users.avro", "r"), DatumReader()) -for user in reader: - print user -reader.close() +# create a DataFileReader to read data from a file +with users_file.open("rb") as users_fh, DataFileReader( + users_fh, DatumReader() +) as reader: + for user in reader: + print(user) diff --git a/doc/examples/java-example/pom.xml b/doc/examples/java-example/pom.xml index d741dfe7a4f..66502ed0b24 100644 --- a/doc/examples/java-example/pom.xml +++ b/doc/examples/java-example/pom.xml @@ -26,6 +26,7 @@ java-example https://maven.apache.org + 1.11.3 UTF-8 @@ -38,7 +39,7 @@ org.apache.avro avro - 1.10.2 + ${avro.version} @@ -48,36 +49,27 @@ maven-compiler-plugin 3.8.1 - 1.8 - 1.8 + 11 + 11 org.apache.avro avro-maven-plugin - 1.10.2 + ${avro.version} + + ${project.basedir}/../ + ${project.basedir}/src/main/java/ + generate-sources schema - - ${project.basedir}/../ - ${project.basedir}/src/main/java/ - - - org.apache.maven.plugins - maven-plugin - 1.10.2 - - 1.8 - 1.8 - - @@ -92,7 +84,7 @@ org.apache.avro avro-maven-plugin - [1.10.2,) + [${avro.version},) schema diff --git a/doc/examples/mr-example/pom.xml b/doc/examples/mr-example/pom.xml index 2f64b35ec8e..be6b689b07c 100644 --- a/doc/examples/mr-example/pom.xml +++ b/doc/examples/mr-example/pom.xml @@ -28,6 +28,7 @@ mr-example + 1.11.3 UTF-8 @@ -38,14 +39,14 @@ maven-compiler-plugin 3.8.1 - 1.8 - 1.8 + 11 + 11 org.apache.avro avro-maven-plugin - 1.10.0 + ${avro.version} generate-sources @@ -73,7 +74,7 @@ org.apache.avro avro-maven-plugin - [1.10.0,) + [${avro.version},) schema @@ -94,12 +95,12 @@ org.apache.avro avro - 1.10.2 + ${avro.version} org.apache.avro avro-mapred - 1.10.2 + ${avro.version} org.apache.hadoop diff --git a/doc/layouts/404.html b/doc/layouts/404.html new file mode 100644 index 00000000000..4d5d5158a77 --- /dev/null +++ b/doc/layouts/404.html @@ -0,0 +1,29 @@ + + +{{ define "main"}} +
+
+

Not found

+

Oops! This page doesn't exist. Try going back to our home page.

+
+
+{{ end }} diff --git a/doc/layouts/partials/favicons.html b/doc/layouts/partials/favicons.html new file mode 100644 index 00000000000..7ff1b9f01c1 --- /dev/null +++ b/doc/layouts/partials/favicons.html @@ -0,0 +1,37 @@ + + + + + + + + + + + + + + + + + + diff --git a/doc/layouts/partials/footer.html b/doc/layouts/partials/footer.html new file mode 100644 index 00000000000..0638c0074de --- /dev/null +++ b/doc/layouts/partials/footer.html @@ -0,0 +1,63 @@ + + +{{ $links := .Site.Params.links }} +
+
+
+
+ {{ with $links }} + {{ with index . "user"}} + {{ template "footer-links-block" . }} + {{ end }} + {{ end }} +
+
+ {{ with $links }} + {{ with index . "developer"}} + {{ template "footer-links-block" . }} + {{ end }} + {{ end }} +
+
+ {{ with .Site.Params }}© {{ now.Year}} {{ .copyright }} {{ T "footer_all_rights_reserved" }}{{ end }} + {{ if not .Site.Params.ui.footer_about_disable }} + {{ with .Site.GetPage "about" }}

{{ .Title }}

{{ end }} + {{ end }} +

Apache Avro, Avro™, Apache®, and the Apache feather logo are either registered trademarks or trademarks of The Apache Software Foundation.

+
+
+ {{ with .Site.Params }}{{ end }} +
+
+
+
+{{ define "footer-links-block" }} +
    + {{ range . }} +
  • + + + +
  • + {{ end }} +
+{{ end }} diff --git a/doc/layouts/partials/navbar-asf-links.html b/doc/layouts/partials/navbar-asf-links.html new file mode 100644 index 00000000000..54e3b8dcf08 --- /dev/null +++ b/doc/layouts/partials/navbar-asf-links.html @@ -0,0 +1,29 @@ + + + + diff --git a/doc/layouts/partials/navbar-docs-selector.html b/doc/layouts/partials/navbar-docs-selector.html new file mode 100644 index 00000000000..a60c934cb8c --- /dev/null +++ b/doc/layouts/partials/navbar-docs-selector.html @@ -0,0 +1,29 @@ + + + + diff --git a/doc/layouts/partials/navbar.html b/doc/layouts/partials/navbar.html new file mode 100644 index 00000000000..a57cdd1f31c --- /dev/null +++ b/doc/layouts/partials/navbar.html @@ -0,0 +1,61 @@ + + +{{ $cover := and (.HasShortcode "blocks/cover") (not .Site.Params.ui.navbar_translucent_over_cover_disable) }} + diff --git a/doc/layouts/project/baseof.html b/doc/layouts/project/baseof.html new file mode 100644 index 00000000000..9ec1e4d1793 --- /dev/null +++ b/doc/layouts/project/baseof.html @@ -0,0 +1,53 @@ + + + + + + {{ partial "head.html" . }} + + +
+ {{ partial "navbar.html" . }} +
+
+
+
+ + +
+ {{ partial "version-banner.html" . }} + {{ if not .Site.Params.ui.breadcrumb_disable }}{{ partial "breadcrumb.html" . }}{{ end }} + {{ block "main" . }}{{ end }} +
+
+
+ {{ partial "footer.html" . }} +
+ {{ partial "scripts.html" . }} + + diff --git a/doc/layouts/project/baseof.print.html b/doc/layouts/project/baseof.print.html new file mode 100644 index 00000000000..b74e38c0e2e --- /dev/null +++ b/doc/layouts/project/baseof.print.html @@ -0,0 +1,47 @@ + + + + + + {{ partial "head.html" . }} + + +
+ {{ partial "navbar.html" . }} +
+
+
+
+
+
+
+
+
+ {{ block "main" . }}{{ end }} +
+
+
+ {{ partial "footer.html" . }} +
+ {{ partial "scripts.html" . }} + + diff --git a/doc/layouts/project/list.html b/doc/layouts/project/list.html new file mode 100644 index 00000000000..885d754e559 --- /dev/null +++ b/doc/layouts/project/list.html @@ -0,0 +1,52 @@ +{{ define "main" }} + +
+

{{ .Title }}

+ {{ with .Params.description }}
{{ . | markdownify }}
{{ end }} + + {{ .Content }} + {{ partial "section-index.html" . }} + {{ if (and (not .Params.hide_feedback) (.Site.Params.ui.feedback.enable) (.Site.GoogleAnalytics)) }} + {{ partial "feedback.html" .Site.Params.ui.feedback }} +
+ {{ end }} + {{ if (.Site.DisqusShortname) }} +
+ {{ partial "disqus-comment.html" . }} + {{ end }} + {{ partial "page-meta-lastmod.html" . }} +
+{{ end }} diff --git a/doc/layouts/project/list.print.html b/doc/layouts/project/list.print.html new file mode 100644 index 00000000000..33fa25d9fe1 --- /dev/null +++ b/doc/layouts/project/list.print.html @@ -0,0 +1,23 @@ +{{ define "main" }} + +{{ partial "print/render" . }} +{{ end }} diff --git a/doc/layouts/project/single.html b/doc/layouts/project/single.html new file mode 100644 index 00000000000..bbc65acfe9d --- /dev/null +++ b/doc/layouts/project/single.html @@ -0,0 +1,24 @@ + + +{{ define "main" }} +{{ .Render "content" }} +{{ end }} diff --git a/doc/layouts/shortcodes/avro_version.html b/doc/layouts/shortcodes/avro_version.html new file mode 100644 index 00000000000..04a4bf5ef24 --- /dev/null +++ b/doc/layouts/shortcodes/avro_version.html @@ -0,0 +1,24 @@ +{{/* + + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +*/}}{{/* + +This file must not have a trailing newline. + +*/}}{{ $.Site.Params.avroversion }} \ No newline at end of file diff --git a/doc/layouts/shortcodes/project_logo.html b/doc/layouts/shortcodes/project_logo.html new file mode 100644 index 00000000000..42503d33a25 --- /dev/null +++ b/doc/layouts/shortcodes/project_logo.html @@ -0,0 +1,22 @@ +{{/* + +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + +https://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. + +*/}}{{/* +This file should not have a trailing newline. +*/}}{{ with resources.Get "/icons/logo-text.svg" }}{{ (.|minify).Content | safeHTML }}{{ end }} diff --git a/doc/package.json b/doc/package.json new file mode 100644 index 00000000000..02c3dc72247 --- /dev/null +++ b/doc/package.json @@ -0,0 +1,7 @@ +{ + "devDependencies": { + "autoprefixer": "^10.4.0", + "postcss": "^8.3.7", + "postcss-cli": "^11.0.0" + } +} diff --git a/doc/src/cli.xconf b/doc/src/cli.xconf deleted file mode 100644 index 85712ac715b..00000000000 --- a/doc/src/cli.xconf +++ /dev/null @@ -1,328 +0,0 @@ - - - - - - - - . - WEB-INF/cocoon.xconf - ../tmp/cocoon-work - ../site - - - - - - - - - - - - - - - index.html - - - - - - - */* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/doc/src/content/htmldocs/canonical-completeness.html b/doc/src/content/htmldocs/canonical-completeness.html deleted file mode 100644 index 0827d57812e..00000000000 --- a/doc/src/content/htmldocs/canonical-completeness.html +++ /dev/null @@ -1,204 +0,0 @@ - - - -Completeness of "Parsing Canonical Form" - - - -

Completeness of "Parsing Canonical Form"

- -

1.0 Introduction

- -

One of the defining characteristics of Avro is that a reader is assumed to have the "same" schema used by the writer of the data the reader is reading. This assumption leads to a data format that's compact and amenable to many forms of schema evolution. However, there are nuances to defining exactly what it means for the reader to have "the same" schema used by the writer. We want to allow, for example, trivial transformations, such as the insertion of whitespace. But we can't allow transformations that change the real meaning of schemas, such as a reordering of fields in a record

- -

To clearly define what it means for a reader to have "the same" schema as a writer, the Avro specification defines Parsing Canonical Form (PCF), a set of transformations on Avro schemas that strip away irrelevencies (e.g., "doc" attributes) and normalize the JSON text (e.g., dealing with whitespace). Two schemas are defined to be "the same" as far as a reader is concerned if and only if their PCFs are textually equal.

- -

We believe that PCF is sound and complete. Soundness means that the PCF of a schema is logically equivalent to the original form, i.e., we can use the PCF in place of the original form without introducing bugs. Completeness is "maximal soundness:" if two schemas are logically equivalent, then their PFCs will be textually identical. The Avro specification claims that PCF is complete when it says: "[if two schemas have the same PCF, then] there is no serialized data that would allow a reader to distinguish data generated by a writer using one of the original schemas from data generated by a writing using the other original schema."

- -

We believe that the transformations that define PCF are "self-evidently" sound to people familiar with Avro. For example, fixing the order of fields in a JSON object, or eliminating irrelevant attributes like doc, or using the simple int in place of {"type":"int"} clearly don't change the meaning of a schema.

- -

Completeness, on the other hand, is much less obvious. How do we know that there aren't two logically equivalent schemas that happen to reduce to different canonical forms? All it takes is one such pair to foil our claim of completeness.

- -

In general, completeness properties like this can be tricky to prove. It turns out that, while soundness is critical to us, completeness is not. If two schemas are operationally equivalent (i.e., a reader can't tell their output apart), but we accidentally treat them as if they are different, then typically all that happens is that we'll do more work. For example, we might generate a decoder object to decode some incoming data when it turns out that we had already cached a decoder object that could do the job. This is not likely to happen often, and thus incompleteness isn't a huge problem.

- -

At the same time, if we knew that our canonical forms were complete, then we might take advantage of that fact in some circumstances (e.g., to serialize schemas). Also, the Schema.equals(Object) method provided in the Avro implementation makes many of the same assumptions made in the PCF definition. Thus, a completeness proof for our canonicalization would give us confidence in the correctness of this equality algorithm. So this issue is not entirely academic.

- -

We haven't worked out a full, formal proof (we hope someone from the community will step up to that task!). However, we've been thinking about it quite a bit, and we thought we'd share our thoughts so far.

- - -

2.0 Completeness argument for Parsing Canonical Form

- -

Our formalization of Avro schemas would be based on interpreting them as grammars. In this interpretation, Avro schemas are grammars that generate tagged data streams. Consider, for example, the following schema for a linked-list: -

-  {"type":"record", "name":"list", "fields":[
-     {"name":"value", "type":"int"},
-     {"name":"tail",  "type":["null", "list"]}
-   ]}
-
-Interpreted as a grammar, it can generate a tagged data-stream that looks like this: -
-  [record,"list"][field,"value"][int,10][field,"tail"][union,1]
-    [record,"list"][field,"value"][int,22][field,"tail"][union,0]
-
-(this is a two-record linked list whose first cell contains the value "10" and second cell the value "22"). Avro schemas can trivially be interpreted as grammars for such tagged data streams. Formal proofs involving Avro schemas can be carried out as proofs about languages and grammars.

- -

So what does it mean for the canonical form of a schema to be "complete?" Let L(S) denote the language generated by the Avro schema S, and C(S) denote the canonical form of the schema. The canonicalization is complete if: -

-For all schemas S1 and S2,
-    L(S1) = L(S2) ⇒ C(S1) = C(S2) -
-That is, for any two schemas that generate the same language, their canonicalizations are textually equivalent. - -

To prove this, we need to define some functions: -

-J is a variable name we often use to denote a JSON expression representing an Avro schema
-C(J) is the Parsing Canonical Form of J as defined in the Avro specification
-P(J) is the ASG for an Avro schema generated by parsing J (think of P(J) as a Schema Java object)
-S is a variable name we often use to denote such ASGs
-L(S) is the language generated by a schema ASG -
-

With all these symbols defined, our completeness criteria is now rendered as: -

-∀ J1, J2: -L(P(J1)) = L(P(J2)) ⇒ C(J1) = C(J2) -
-We'll prove this by breaking it into two parts: -
-(1): ∀ S1, S2: -L(S1) = L(S2) ⇒ S1 ≅ S2
-(2): ∀ J1, J2: -P(J1) ≅ P(J2) ⇒ C(J1) = C(J2) -
-
-In this two-step decomposition, we've introduced a new operator ≅, which compares the ASGs of two Avro schemas. The ASG of an Avro schema can be viewed as a rooted, labeled, directed graph. Because Avro schemas can be recursive, these graphs can be cyclic. The ≅ operator is "true" between two ASGs when the set of minimal labeled paths (no cycles, starting from the root) on the two ASGs are the same. (The Schema.equals(Object) method in the Avro implementation computes something close to this ≅ relation, except that ≅ ignores "irrelevant" attributes like doc and aliases.) - -

It turns out that, implicit in the Avro Specification, there are "canonicalization" rules that are important to our proof of completeness. In particular, the Avro Specification says that a name must be defined "before" it is used, and that a name cannot be defined more than once in a schema. Consider the following redefinition of the linked-list schema, for example: -

-  {"type":"record", "name":"list", "fields":[
-    {"name":"value", "type":"int"},
-    {"name":"tail",
-      "type":["null", {"type":"record", "name":"list", "fields":[
-                        {"name":"value", "type":"int"},
-                        {"name":"tail", "type":["null", "list"]}]}]}
-  ]}
-
-In this redefinition, we've "unpacked" the recursion in the linked list by one level. In some sense, this is a perfectly fine definition of a linked list, and is operationally equivalent to the more compact version given earlier. So it makes sense that our claim of completeness is dependent upon this kind of "unpacking" not occuring in real schemas.

- -

To deal with this issue in our proof, we pretend that the Avro specification does not require that named schemas be defined just once, and be defined "before" they are used. Rather, we treat this requirement as an additional transformation rule in the definition of Parsing Canonical Form: -

    -
  • [MINIMIZE] Eliminate redundant definitions of named types (records, enums, and fixeds). That is, for each named type, have a defining instance that appears at first use, and then use just the name (rather than the full schema) everywhere else.
  • -
-(As in the Avro spec, "first use" is defined as the first occurrence in a depth-first, left-to-right traversal of the schema abstract-syntax graph (ASG).) - -

Getting back to the proof of (1) and (2) from above, we need to introduce more functions: -

-P(J)=PA(PJ(J)) - decompose parser into:
-  PJ is the JSON parser
-  PA is the Avro parser (takes JSON ASTs as input)
-C(J)=CJ(CA(CM(J))) - decompose canonicalization into:
-  CM(J) the MINIMIZE step
-  CA(J) Avro normalizations
-  CJ(J) JSON normalizations
-M(S) is the "named-schema NFA minimzation" of S
-
-"Named-schema NFA minimization" is similar to general NFA minimization, except that we only collapse nodes and edges related to named schema entities and not other nodes. For example, we would not collapse the nodes associated with int or union schemas. - -

Our proof of (1) looks like this (this proof refers to lemmas (3) and (4), which are defined later): -

- - - - - - - - - - -
S1,S2:L(S1)=L(S2)
M(S1)=M(S2)by (3)
S1≅S2by (4)
-
-Here's the proof of (2) (this proof refers to lemmas (4)-(7), which are defined later): -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
J1,J2:P(J1)≅P(J2)
M(P(J1))=M(P(J2))by (4)
P(CM(J1))=P(CM(J2))by (5)
PA(PJ(CM(J1)))=PA(PJ(CM(J2)))by definition of P
PJ(CA(CM(J1)))=PJ(CA(CM(J2)))by (6)
CJ(CA(CM(J1)))=CJ(CA(CM(J2)))by (7)
C(J1)=C(J2)by definition of C
-
- -Here are the lemmas needed above: -
-(3): ∀ S1, S2: -L(S1) = L(S2) ⇒ M(S1) = M(S2)
- -(4): ∀ S1, S2: -M(S1) = M(S2) ⇔ S1 ≅ S2
- -(5): ∀ J: M(P(J)) = P(CM(J))
- -(6): ∀ J1, J2: -PA(PJ(J1)) = PA(PJ(J2)) ⇒ PJ(CA(J1)) = PJ(CA(J2))
- -(7): ∀ J1, J2: -PJ(J1) = PJ(J2) ⇒ CJ(J1) = CJ(J2)
-
- -

Proving the lemmas: -

    -
  1. This says that the language-related part of our canonicalization is complete, i.e., M finds the equivalence-classes of L. I would imagine one could prove this by modifying a proof that the equality of LL(1) grammars is a decidable problem. I haven't gotten very far in showing this, however. -
  2. The right-hand direction of this follows from the definition of minimization. The left-hand direction seems correct, but I'm not sure how to prove it (I think it also follows from the definition of minimization). -
  3. This is showing that the MINIMIZE step (which is done on JSON expressions) is equivalent to doing an named-schema NFA minimization on the ASG representation. This should follow pretty directly from a detailed definition of M, if we provided one. -
  4. This says that the Avro-related part of our canonicalization is complete, i.e., that CA finds equivalence-classes of PA. -
  5. This says that the JSON-related part of our canonicalization is complete, i.e., that CJ finds equivalence-classes of PJ. Note that, implicitly, this lemma ranges over only JSON expressions that are legal Avro schemas with no doc strings or default values, and thus (for example) doesn't need to worry about normalization of floating-point literals. -
- - -

3.0 Concluding remarks

- -Engineers have a history of running ahead of formal mathematical proofs, when things "seem correct" to them. In this case, it seems pretty obvious that Parsing Canonical Form is complete as well as sound, and we should go ahead and treat it as such. At the same time, formal proofs often turn up corner cases and exceptions that are valuable to document and account for. Thus, it'd nice if someone could provide a better completeness argument than we've been able to so far. - - - diff --git a/doc/src/content/htmldocs/performance-testing.html b/doc/src/content/htmldocs/performance-testing.html deleted file mode 100644 index d98992e4118..00000000000 --- a/doc/src/content/htmldocs/performance-testing.html +++ /dev/null @@ -1,173 +0,0 @@ - - - -Testing performance improvements - - - - -(Note: This document pertains only to the Java implementation Avro.) - - -

1.0 Introduction

- -

Recent work on improving the performance of "specific record" (AVRO-2090 and AVRO-2247 has highlighted the need for a benchmark that can be used to test the validity of alleged performance "improvements."

- -

As a starting point, the Avro project has class called Perf (in the test source of the ipc subproject). Perf is a command-line tool contains close to 70 performance individual performance tests. These tests include tests for reading and writing primitive values, arrays and maps, plus tests for reading and writing records through all of the APIs (generic, specific, reflect).

- -

When using Perf for some recent performance work, we encountered two problems. First, because it depends on build artifacts from across the Avro project, it can be tricky to invoke. Second, and more seriously, independent runs of the tests in Perf can vary in performance by as much as 40%. While typical variance is less than that, the variance is high enough that it makes it impossible to tell if a change in performance is simply this noise, or can be properly attributed to a proposed optimization.

- -

This document addresses both problems, the usability problem in Section 2 and the variability issue in Section 3. Regarding the variability issue, as you will see, we haven't really been able to manage it in a fundamental manner. As suggested by Zoltan Frakas, we should look into porting Perf over to using the Java Microbenchmark Harness (JMH).

- - -

2.0 Invoking Perf

- -

2.1 Simple invocation

- -

Here is the easiest way we found to directly invoke Perf.

- -

As mentioned in the Introduction, Perf is dependent upon build artifacts from some of the other Avro subprojects. When you invoke Perf, it should be invoked with your most recent build of those artifacts (assuming you're performance-testing your current work). We have found that the easiest way to ensure the proper artifacts are used is to use Maven to invoke Perf.

- -

The recipe for using Maven in this way is simple. First, from the lang/java directoy, you need to build and install Avro:

- -

    mvn clean install

- -

(You can add -DskipTests to the above command line if you don't need to run test suite.) When this is done, you need to change your working directory to the lang/java/ipc directory. From there, you can invoke Perf with the following command line:

- -

-    mvn exec:java -Dexec.classpathScope=test -Dexec.mainClass=org.apache.avro.io.Perf -Dexec.args="..." -

- -

The exec.args string contains the arguments you want to pass through to the Perf.main function.

- -

To speed up your edit-compile-test loop, you can do a selective build of Avro in addition to skipping tests: - -

    mvn clean && mvn -pl "avro,compiler,maven-plugin,ipc" install -DskipTests

- - - -

2.2 Using the run-perf.sh script

- -

If you're using Perf, chances are that you want to compare the performance of a proposed optimization against the performance of a baseline (that baseline most likely being the current master branch of Avro). Generating this comparative data can be tedious if you're running Perf by hand. To relieve this tedium, you can use the run-perf.sh script instead (found in the share/test directory from the Avro top-level directory).

- -

To use this script, you put different implementations of Avro onto different branches of your Avro git repository. One of these branches is designated the "baseline" branch and the others are the "treatment" branches. The script will run the baseline and all the treatments, and will compare generate a CSV file containing a comparison of the treatments against the baseline.

- -

Running run-perf.sh --help will output a detailed manual-page for this script. Appendix A of this document contains sample invocations of this test script for different use cases.

- -

NOTE: as mentioned in run-perf.sh --help, this script is designed to be run from the lang/java/ipc directory, which is the Maven project containing the Perf program.

- - - -

3.0 Managing variance

- -As mentioned in the introduction, we tried a number of different mechanisms to reduce variance, including: -
    -
  • Varying org.apache.avro.io.perf.count, org.apache.io.perf.cycles, and org.apache.avro.io.perf.use-direct, as well as the number of times we run Perf.java within a single "run" of a test. - -

  • Taking the minimum times across runs, rather than the maximum times, using the second or third run as a baseline rather than the first, using statistical methods to eliminate outlying values. - -

  • Modified the code slightly, for example: starting the timer of a cycle after, rather than before, encoders or decoders are constructed; cacheing encoders and decoders; and reusing record objects during read tests rather than construct new ones for each record being read. - -

  • Using Docker's --cpuset-cpus flag to force the tests onto a single core. - -

  • Using a dedicated EC2 instance (c5d.2xlarge). -
-Of the above, the only change that made a significant difference was the last: in going from a laptop and desktop computer to a dedicated EC2 instances, we went from over 70 tests (out of 200) with a variance of 5% or more between runs to 35. As mentioned in the introduction, we should switch to a framework like JMH to attack this problem more fundamentally. - -

If you want to setup your own EC2 instance for testing, here's how we did it. We launched a dedicated EC2 c5d.2xlarge instance from the AWS console, using the "Amazon Linux 64-bit HVM GP2" AMI. We logged into this instance and ran the following commands to install Docker and Git (we did all our Avro build and testing inside the Docker image): -

-  sudo yum update
-  sudo yum install -y git-all
-  git config --global user.name "Your Name"
-  git config --global user.email email-address-used@github.com
-  git config --global core.editor emacs
-  sudo install -y docker
-  sudo usermod -aG docker ec2-user ## Need to log back in for this to take effect
-  sudo service docker start
-
-At this point you can checkout Avro and launch your Docker container: -
-  git clone https://github.com/apache/avro.git
-  cd avro
-  screen
-  ./build.sh docker --args "--cpuset-cpus 2,6"
-
-Note the use of screen here: executions of run-perf.sh can take a few hours, depending on the configuration. By running it inside of screen, you are protected from an SSH disconnection causing run-perf.sh to prematurely terminate. - -

The --args flag in the last command deserves some explanation. In general, the --args allows you to pass additional arguments to the docker run command executed inside build.sh. In this case, the --cpuset-cpus flag for docker tells docker to schedule the contianer exclusively on the listed (virtual) CPUs. We identified vCPUs 2 and 6 using the lscpu Linux command: -

-  [ec2-user@ip-0-0-0-0 avro]$ lscpu --extended
-  CPU NODE SOCKET CORE L1d:L1i:L2:L3 ONLINE
-  0   0    0      0    0:0:0:0       yes
-  1   0    0      1    1:1:1:0       yes
-  2   0    0      2    2:2:2:0       yes
-  3   0    0      3    3:3:3:0       yes
-  4   0    0      0    0:0:0:0       yes
-  5   0    0      1    1:1:1:0       yes
-  6   0    0      2    2:2:2:0       yes
-  7   0    0      3    3:3:3:0       yes
-
-Notice that (v)CPUs 2 and 6 are both on core 2: it's sufficient to schedule the container on the same core, vs a single vCPU. One final tip: to confirm that your container is running on the expected CPUs, run top and then press the 1 key -- this will show you the load on each individual CPU. - - -

Appendix A: Sample uses of run-perf.sh

- -

A detailed explanation of run-perf.sh is printed when you give it the --help flag. To help you more quickly understand how to use run-perf.sh we present here a few examples of how we used it in our recent testing efforts. - -

To summarize, you invoke it as follows: -

-    ../../../share/test/run-perf.sh [--out-dir D] \
-       [--perf-args STRING] [-Dkey=value]* [--] \
-       [-Dkey=value]* branch_baseline[:name_baseline_run] \
-       [-Dkey=value]* branch_1[:name_treatment_run_1] \
-       ... 
- [-Dkey=value]* branch_n[:name_treatment_run_n]
-
-The path given here is relative to the lang/java/ipc directory, which needs to be the current working directory when calling this script. The script executes multiple runs of testing. The first run is called the baseline run, the subsequent runs are the treatment runs. Each run consists of four identical executions of Perf.java. The running times for each Perf.java test are averaged to obtain the final running time for the test. For each treatment run, the final running times for each test are compared, as a percentage, to the running time for the test in the baseline run. These percentages are output in the file summary.csv. - -

The following invocation is what we used to measure the variance of Perf.java: -

-../../../share/test/run-perf.sh --out-dir ~/calibration \
-    -Dorg.apache.avro.specific.use_custom_coders=true \
-    AVRO-2269:baseline AVRO-2269:run1 AVRO-2269:run2 AVRO-2269:run3
-
-In this invocation, the baseline run and all three treatment runs come from the same Git branch: AVRO-2269. We need to give a name to each run: in this case runs have been named "baseline"--the baseline run--and "run1", "run2", and "run3"--the treatment runs. Note that the name of the Git branch to be used for a run must always be provided, but the name for the run itself (e.g., "baseline") is optional. If a name for the run is not provided, then the name of the Git branch will be used as the name of the run. However, each run must have a unique name, so in this example we had to explicitly name the branches since all runs are on the same branch. - -

run-perf.sh uses Maven to invoke Perf.java. The -D flag is used to pass system properties to Maven, which in turn will pass them through to Perf.java. In the example above, we use this flag to turn on the custom-coders feature recently checked into Avro. Note that initial -D flags will be passed to all runs, while -D switches that come just before the name of Git branch of a run apply to only that run. In the case of the baseline run, which comes first, if you want to pass -D flags to just that run, then use the -- flag to indicate that all global parameters for run-perf.sh have been provided, followed by the -D flags you want to pass to only the baseline run. - -

Finally, note that run-perf.sh generates a lot of intermediate files as well as the final summary.csv file. Thus, it is recommended that the output of each execution of run-pref.sh is sent to a dedicated directory, provided by the --out-dir flag. If that directory does not exist, it will be created. (Observe that run-perf.sh outputs a file called command.txt containing the full command-line used to invoke it. This can be helpful if you run a lot of experiments and forget the detailed setup of some of them along the way.) - -

The next invocation is what we used to ensure that the new "custom coders" optimization for specific records does indeed improve performance: -

-../../../share/test/run-perf.sh --out-dir ~/retest-codegen \
-    --perf-args "-Sf" \
-    AVRO-2269:baseline \
-    -Dorg.apache.avro.specific.use_custom_coders=true AVRO-2269:custom-coders
-
-In this case, unlike the previous one, the -D flag that turns on the use of custom coders is applied specifically to the treatment run, and not globally. Also, since this flag only affects the Specific Record case, we use the --perf-args flag to pass additional arguments to Perf.java; in this case, the -Sf flag tells Perf.java to run just the specific-record related tests and not the entire test suite. - -

This last example shows how we checked the performance impact of two new feature-branches we've been developing: -

-../../../share/test/run-perf.sh --out-dir ~/new-branches \
-    -Dorg.apache.avro.specific.use_custom_coders=true \
-    AVRO-2269:baseline combined-opts full-refactor
-
-In this case, once again, we turn on custom-coders for all runs. In this case, again, the Git branch AVRO-2269 is used for our baseline run. However, in this case, the treatment runs come from two other Git branches: combined-opts and full-refactor. We didn't provide run-names for these runs because the Git branch-name were fine to be used as run names (we explicitly named the first run "baseline" not because we had to, but because we like the convention of using that name). - -

Although we didn't state it before, in preparing for a run, run-perf.sh will checkout the Git branch to be used for the run and use mvn install to build and install it. It does this for each branch, so the invocation just given will checkout and build three different branches during its overall execution. (As an optimization, if one run uses the same branch as the previous run, then the branch is not checked-out or rebuilt between runs.) - - - diff --git a/doc/src/content/mddocs/refactoring-resolution.md b/doc/src/content/mddocs/refactoring-resolution.md deleted file mode 100644 index 860f5c802db..00000000000 --- a/doc/src/content/mddocs/refactoring-resolution.md +++ /dev/null @@ -1,143 +0,0 @@ - - -# Refactoring Resolution -by Raymie Stata - - -## Problem statement - -In the early days of Avro, Schema resolution was implemented in a -number of places, e.g., `GenericDatumReader` as well as -`ResolvingGrammarGenerator`. However, Schema resolution is -complicated and thus error prone. Multiple implementations were hard -to maintain, both for correctness and for updates to the -schema-resolution spec. - -To address the problems of multiple implementations, we converged on -the implementation found in `ResolvingGrammarGenerator` (together with -`ResolvingDecoder`) as the single implementation, and refactored other -parts of Avro to depend on this implementation. - -Converging on a single implementation solved the maintenance problem, -and has served well for a number of years. However, the logic in -`ResolvingGrammarGenerator` does _two_ things: it contains the logic -for _schema resolution_ itself, and it contains the logic for -embedding that logic into a grammar that can be used by -`ResolvingDecoder`. - -Recently, Avro contributors have wanted access to the logic of schema -resolution _apart from_ `ResolvingDecoder`. For example, -[AVRO-2247](https://issues.apache.org/jira/browse/AVRO-2247) proposes -a new, faster approach to implementing `DatumReaders`. The initial -implementation of AVRO-2247 was forced to reimplement Schema -resolution -- going back to the world of multiple implementations -- -because there isn't a reusable implementation of our resolution logic. - -Similarly, as I've been working on extending the performance -improvements of -[AVRO-2090](https://issues.apache.org/jira/browse/AVRO-2090) when -writing data, I've been thinking about the possibilities of dynamic -code generation. Here too, I can't reuse `ResolvingGrammarGenerator`, -which would force me to reimplement the schema-resolution logic. - - -## Proposed solution - -We introduce a new class to encapsulate the logic of schema resolution -independent from the logic of implementing schema resolution as a -`ResolvingDecoder` grammar. In particular, we introduce a new class -`org.apache.avro.Resolver` with the following key function: - - public static Resolver.Action resolve(Schema writer, Schema reader); - -The subclasses of `Resolver.Action` encapsulate various ways to -resolve schemas. The `resolve` function walks the reader's and -writer's schema parse trees together, and generate a tree of -`Resolver.Action` nodes indicating how to resolve each subtree of the -writer's schema into the corresponding subtree of the reader's. - -`Resolve.Action` has the following subclasses: - - * `DoNothing` -- nothing needs to be done to resolve the writer's - data into the reader's schema. That is, the reader should read - the data written by the writer as if it were written using the - reader's own schema. This can be generated for any kind of - schema -- for example, if the reader's and writer's schemas are - the exact same union schema, a `DoNothing` will be generated -- - so consumers of `Resolver` need to be able to handle `DoNothing` - for all schemas. - - * `Promote` -- the writer's value needs to be promoted to the - reader's schema. Generated only for numeric and byte/string - types. - - * `ContainerAction` -- no resolution is needed directly on - container schemas, but a `ContainerAction` contains the `Action` - needed for the contained schema - - * `EnumAdjust` -- resolution involves dealing with reordering of - symbols and symbols that have been removed from the enumeration. - An `EnumAdjust` object contains the information needed to do so. - - * `RecordAdjust` -- resolution involves recursively resolving the - schemas for each field, and dealing with reordering and removal - of fields. A `RecordAdjust` object contains the information - needed to do so. - - * `SkipAction` -- only generated as a sub-action of a - `RecordAdjust` action. Used to indicate that a writer's field - does not appear in the reader's schema and thus should be - skipped. - - * `WriterUnion` -- generated when the writer's schema is a union - and the reader's schema is not the identical union. Has - subactions for resolving each branch of the writer's union - against the reader's schema. - - * `ReaderUnion` -- generated when the reader's schema is a union - and the writer's was not. Had information indicating which of - the reader's union-branch was the best fit for the writer's - schema, and a subaction for resolving the schema of that branch - against the writer's schema. - - * `ErrorAction` -- generated when the (sub)schemas can't be - resolved. - -These new classes aresimilar to the family of `Symbol` objects we've -defined for `ResolvingGrammarGenerator`. For example, -`Action.RecordAdjust` is similar to `Symbol.FieldOrderAction`, and -`Action.EnumAdjust` in `Symbol.EnumAdjustAction`. This similarity is -not surprising, since those `Symbol` objects were design to -encapsulate the logic of schema resolution as well. - -However, where `ResolvingGrammarGenerator` embeds those `Symbol` -objects into flattened productions highly optimized for the LL(1) -parser implemented by `ResolvingDecoder`. The `Resolver`, in -contrast, captures the schema-resolution logic in a tree-like -structure that closely mirrors the syntax-tree of the schemas being -resolved. This tree-like representation is easily consumed by -multiple implementations of resolution -- be it the grammar-based -implementation of `ResolvingDecoder`, the "action-sequence"-based -implementation of AVRO-2247, or the dynamic code-gen implementation -being considered as an extension to AVRO-2090. - -We have reimplemented `ResolvingGrammarGenerator` to eliminate it's -implementaiton of schema-resolution logic and instead consume the -output of `Resolver.resolve`. Thus, it might be helpful to study -`ResolvingGrammarGenerator` to better understand how to consume this -output in other circumstances. diff --git a/doc/src/content/xdocs/gettingstartedjava.xml b/doc/src/content/xdocs/gettingstartedjava.xml deleted file mode 100644 index 5440b07efe4..00000000000 --- a/doc/src/content/xdocs/gettingstartedjava.xml +++ /dev/null @@ -1,527 +0,0 @@ - - - - %avro-entities; -]> - -

- Apache Avro™ &AvroVersion; Getting Started (Java) -
- -

- This is a short guide for getting started with Apache Avro™ using - Java. This guide only covers using Avro for data serialization; see - Patrick Hunt's Avro - RPC Quick Start for a good introduction to using Avro for RPC. -

-
- Download -

- Avro implementations for C, C++, C#, Java, PHP, Python, and Ruby can be - downloaded from the Apache Avro™ - Releases page. This guide uses Avro &AvroVersion;, the latest - version at the time of writing. For the examples in this guide, - download avro-&AvroVersion;.jar and - avro-tools-&AvroVersion;.jar. -

-

- Alternatively, if you are using Maven, add the following dependency to - your POM: -

- -<dependency> - <groupId>org.apache.avro</groupId> - <artifactId>avro</artifactId> - <version>&AvroVersion;</version> -</dependency> - -

- As well as the Avro Maven plugin (for performing code generation): -

- -<plugin> - <groupId>org.apache.avro</groupId> - <artifactId>avro-maven-plugin</artifactId> - <version>&AvroVersion;</version> - <executions> - <execution> - <phase>generate-sources</phase> - <goals> - <goal>schema</goal> - </goals> - <configuration> - <sourceDirectory>${project.basedir}/src/main/avro/</sourceDirectory> - <outputDirectory>${project.basedir}/src/main/java/</outputDirectory> - </configuration> - </execution> - </executions> -</plugin> -<plugin> - <groupId>org.apache.maven.plugins</groupId> - <artifactId>maven-compiler-plugin</artifactId> - <configuration> - <source>1.8</source> - <target>1.8</target> - </configuration> -</plugin> - -

- You may also build the required Avro jars from source. Building Avro is - beyond the scope of this guide; see the Build - Documentation page in the wiki for more information. -

-
- -
- Defining a schema -

- Avro schemas are defined using JSON. Schemas are composed of primitive types - (null, boolean, int, - long, float, double, - bytes, and string) and complex types (record, - enum, array, map, - union, and fixed). You can learn more about - Avro schemas and types from the specification, but for now let's start - with a simple schema example, user.avsc: -

- -{"namespace": "example.avro", - "type": "record", - "name": "User", - "fields": [ - {"name": "name", "type": "string"}, - {"name": "favorite_number", "type": ["int", "null"]}, - {"name": "favorite_color", "type": ["string", "null"]} - ] -} - -

- This schema defines a record representing a hypothetical user. (Note - that a schema file can only contain a single schema definition.) At - minimum, a record definition must include its type ("type": - "record"), a name ("name": "User"), and fields, in - this case name, favorite_number, and - favorite_color. We also define a namespace - ("namespace": "example.avro"), which together with the name - attribute defines the "full name" of the schema - (example.avro.User in this case). - -

-

- Fields are defined via an array of objects, each of which defines a name - and type (other attributes are optional, see the record specification for more - details). The type attribute of a field is another schema object, which - can be either a primitive or complex type. For example, the - name field of our User schema is the primitive type - string, whereas the favorite_number and - favorite_color fields are both unions, - represented by JSON arrays. unions are a complex type that - can be any of the types listed in the array; e.g., - favorite_number can either be an int or - null, essentially making it an optional field. -

-
- -
- Serializing and deserializing with code generation -
- Compiling the schema -

- Code generation allows us to automatically create classes based on our - previously-defined schema. Once we have defined the relevant classes, - there is no need to use the schema directly in our programs. We use the - avro-tools jar to generate code as follows: -

- -java -jar /path/to/avro-tools-&AvroVersion;.jar compile schema <schema file> <destination> - -

- This will generate the appropriate source files in a package based on - the schema's namespace in the provided destination folder. For - instance, to generate a User class in package - example.avro from the schema defined above, run -

- -java -jar /path/to/avro-tools-&AvroVersion;.jar compile schema user.avsc . - -

- Note that if you using the Avro Maven plugin, there is no need to - manually invoke the schema compiler; the plugin automatically - performs code generation on any .avsc files present in the configured - source directory. -

-
-
- Creating Users -

- Now that we've completed the code generation, let's create some - Users, serialize them to a data file on disk, and then - read back the file and deserialize the User objects. -

-

- First let's create some Users and set their fields. -

- -User user1 = new User(); -user1.setName("Alyssa"); -user1.setFavoriteNumber(256); -// Leave favorite color null - -// Alternate constructor -User user2 = new User("Ben", 7, "red"); - -// Construct via builder -User user3 = User.newBuilder() - .setName("Charlie") - .setFavoriteColor("blue") - .setFavoriteNumber(null) - .build(); - -

- As shown in this example, Avro objects can be created either by - invoking a constructor directly or by using a builder. Unlike - constructors, builders will automatically set any default values - specified in the schema. Additionally, builders validate the data as - it set, whereas objects constructed directly will not cause an error - until the object is serialized. However, using constructors directly - generally offers better performance, as builders create a copy of the - datastructure before it is written. -

-

- Note that we do not set user1's favorite color. Since - that record is of type ["string", "null"], we can either - set it to a string or leave it null; it is - essentially optional. Similarly, we set user3's favorite - number to null (using a builder requires setting all fields, even if - they are null). -

-
-
- Serializing -

- Now let's serialize our Users to disk. -

- -// Serialize user1, user2 and user3 to disk -DatumWriter<User> userDatumWriter = new SpecificDatumWriter<User>(User.class); -DataFileWriter<User> dataFileWriter = new DataFileWriter<User>(userDatumWriter); -dataFileWriter.create(user1.getSchema(), new File("users.avro")); -dataFileWriter.append(user1); -dataFileWriter.append(user2); -dataFileWriter.append(user3); -dataFileWriter.close(); - -

- We create a DatumWriter, which converts Java objects into - an in-memory serialized format. The SpecificDatumWriter - class is used with generated classes and extracts the schema from the - specified generated type. -

-

- Next we create a DataFileWriter, which writes the - serialized records, as well as the schema, to the file specified in the - dataFileWriter.create call. We write our users to the file - via calls to the dataFileWriter.append method. When we are - done writing, we close the data file. -

-
-
- Deserializing -

- Finally, let's deserialize the data file we just created. -

- -// Deserialize Users from disk -DatumReader<User> userDatumReader = new SpecificDatumReader<User>(User.class); -DataFileReader<User> dataFileReader = new DataFileReader<User>(file, userDatumReader); -User user = null; -while (dataFileReader.hasNext()) { -// Reuse user object by passing it to next(). This saves us from -// allocating and garbage collecting many objects for files with -// many items. -user = dataFileReader.next(user); -System.out.println(user); -} - -

- This snippet will output: -

- -{"name": "Alyssa", "favorite_number": 256, "favorite_color": null} -{"name": "Ben", "favorite_number": 7, "favorite_color": "red"} -{"name": "Charlie", "favorite_number": null, "favorite_color": "blue"} - -

- Deserializing is very similar to serializing. We create a - SpecificDatumReader, analogous to the - SpecificDatumWriter we used in serialization, which - converts in-memory serialized items into instances of our generated - class, in this case User. We pass the - DatumReader and the previously created File - to a DataFileReader, analogous to the - DataFileWriter, which reads both the schema used by the - writer as well as the data from the file on disk. The data will be - read using the writer's schema included in the file and the - schema provided by the reader, in this case the User - class. The writer's schema is needed to know the order in which - fields were written, while the reader's schema is needed to know what - fields are expected and how to fill in default values for fields - added since the file was written. If there are differences between - the two schemas, they are resolved according to the - Schema Resolution - specification. -

-

- Next we use the DataFileReader to iterate through the - serialized Users and print the deserialized object to - stdout. Note how we perform the iteration: we create a single - User object which we store the current deserialized user - in, and pass this record object to every call of - dataFileReader.next. This is a performance optimization - that allows the DataFileReader to reuse the same - User object rather than allocating a new - User for every iteration, which can be very expensive in - terms of object allocation and garbage collection if we deserialize a - large data file. While this technique is the standard way to iterate - through a data file, it's also possible to use for (User user : - dataFileReader) if performance is not a concern. -

-
-
- Compiling and running the example code -

- This example code is included as a Maven project in the - examples/java-example directory in the Avro docs. From this - directory, execute the following commands to build and run the - example: -

- -$ mvn compile # includes code generation via Avro Maven plugin -$ mvn -q exec:java -Dexec.mainClass=example.SpecificMain - -
-
- Beta feature: Generating faster code -

- In this release we have introduced a new approach to - generating code that speeds up decoding of objects by more - than 10% and encoding by more than 30% (future performance - enhancements are underway). To ensure a smooth introduction - of this change into production systems, this feature is - controlled by a feature flag, the system - property org.apache.avro.specific.use_custom_coders. - In this first release, this feature is off by default. To - turn it on, set the system flag to true at - runtime. In the sample above, for example, you could enable - the fater coders as follows: -

- -$ mvn -q exec:java -Dexec.mainClass=example.SpecificMain \ - -Dorg.apache.avro.specific.use_custom_coders=true - -

- Note that you do not have to recompile your Avro - schema to have access to this feature. The feature is - compiled and built into your code, and you turn it on and - off at runtime using the feature flag. As a result, you can - turn it on during testing, for example, and then off in - production. Or you can turn it on in production, and - quickly turn it off if something breaks. -

-

- We encourage the Avro community to exercise this new feature - early to help build confidence. (For those paying - one-demand for compute resources in the cloud, it can lead - to meaningful cost savings.) As confidence builds, we will - turn this feature on by default, and eventually eliminate - the feature flag (and the old code). -

-
-
- -
- Serializing and deserializing without code generation -

- Data in Avro is always stored with its corresponding schema, meaning we - can always read a serialized item regardless of whether we know the - schema ahead of time. This allows us to perform serialization and - deserialization without code generation. -

-

- Let's go over the same example as in the previous section, but without - using code generation: we'll create some users, serialize them to a data - file on disk, and then read back the file and deserialize the users - objects. -

-
- Creating users -

- First, we use a Parser to read our schema definition and - create a Schema object. -

- -Schema schema = new Schema.Parser().parse(new File("user.avsc")); - -

- Using this schema, let's create some users. -

- -GenericRecord user1 = new GenericData.Record(schema); -user1.put("name", "Alyssa"); -user1.put("favorite_number", 256); -// Leave favorite color null - -GenericRecord user2 = new GenericData.Record(schema); -user2.put("name", "Ben"); -user2.put("favorite_number", 7); -user2.put("favorite_color", "red"); - -

- Since we're not using code generation, we use - GenericRecords to represent users. - GenericRecord uses the schema to verify that we only - specify valid fields. If we try to set a non-existent field (e.g., - user1.put("favorite_animal", "cat")), we'll get an - AvroRuntimeException when we run the program. -

-

- Note that we do not set user1's favorite color. Since - that record is of type ["string", "null"], we can either - set it to a string or leave it null; it is - essentially optional. -

-
-
- Serializing -

- Now that we've created our user objects, serializing and deserializing - them is almost identical to the example above which uses code - generation. The main difference is that we use generic instead of - specific readers and writers. -

-

- First we'll serialize our users to a data file on disk. -

- -// Serialize user1 and user2 to disk -File file = new File("users.avro"); -DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<GenericRecord>(schema); -DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<GenericRecord>(datumWriter); -dataFileWriter.create(schema, file); -dataFileWriter.append(user1); -dataFileWriter.append(user2); -dataFileWriter.close(); - -

- We create a DatumWriter, which converts Java objects into - an in-memory serialized format. Since we are not using code - generation, we create a GenericDatumWriter. It requires - the schema both to determine how to write the - GenericRecords and to verify that all non-nullable fields - are present. -

-

- As in the code generation example, we also create a - DataFileWriter, which writes the serialized records, as - well as the schema, to the file specified in the - dataFileWriter.create call. We write our users to the - file via calls to the dataFileWriter.append method. When - we are done writing, we close the data file. -

-
-
- Deserializing -

- Finally, we'll deserialize the data file we just created. -

- -// Deserialize users from disk -DatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>(schema); -DataFileReader<GenericRecord> dataFileReader = new DataFileReader<GenericRecord>(file, datumReader); -GenericRecord user = null; -while (dataFileReader.hasNext()) { -// Reuse user object by passing it to next(). This saves us from -// allocating and garbage collecting many objects for files with -// many items. -user = dataFileReader.next(user); -System.out.println(user); - -

This outputs:

- -{"name": "Alyssa", "favorite_number": 256, "favorite_color": null} -{"name": "Ben", "favorite_number": 7, "favorite_color": "red"} - -

- Deserializing is very similar to serializing. We create a - GenericDatumReader, analogous to the - GenericDatumWriter we used in serialization, which - converts in-memory serialized items into GenericRecords. - We pass the DatumReader and the previously created - File to a DataFileReader, analogous to the - DataFileWriter, which reads both the schema used by the - writer as well as the data from the file on disk. The data will be - read using the writer's schema included in the file, and the reader's - schema provided to the GenericDatumReader. The writer's - schema is needed to know the order in which fields were written, - while the reader's schema is needed to know what fields are expected - and how to fill in default values for fields added since the file - was written. If there are differences between the two schemas, they - are resolved according to the - Schema Resolution - specification. -

-

- Next, we use the DataFileReader to iterate through the - serialized users and print the deserialized object to stdout. Note - how we perform the iteration: we create a single - GenericRecord object which we store the current - deserialized user in, and pass this record object to every call of - dataFileReader.next. This is a performance optimization - that allows the DataFileReader to reuse the same record - object rather than allocating a new GenericRecord for - every iteration, which can be very expensive in terms of object - allocation and garbage collection if we deserialize a large data file. - While this technique is the standard way to iterate through a data - file, it's also possible to use for (GenericRecord user : - dataFileReader) if performance is not a concern. -

-
-
- Compiling and running the example code -

- This example code is included as a Maven project in the - examples/java-example directory in the Avro docs. From this - directory, execute the following commands to build and run the - example: -

- -$ mvn compile -$ mvn -q exec:java -Dexec.mainClass=example.GenericMain - -
-
- - diff --git a/doc/src/content/xdocs/gettingstartedpython.xml b/doc/src/content/xdocs/gettingstartedpython.xml deleted file mode 100644 index f6216b116d8..00000000000 --- a/doc/src/content/xdocs/gettingstartedpython.xml +++ /dev/null @@ -1,258 +0,0 @@ - - - - %avro-entities; -]> - -
- Apache Avro™ &AvroVersion; Getting Started (Python) -
- -

- This is a short guide for getting started with Apache Avro™ using - Python. This guide only covers using Avro for data serialization; see - Patrick Hunt's Avro - RPC Quick Start for a good introduction to using Avro for RPC. -

- -
- Notice for Python 3 users -

- A package called "avro-python3" had been provided to support - Python 3 previously, but the codebase was consolidated into - the "avro" package that supports Python 3 now. - - The avro-python3 package will be removed in the near future, - so users should use the "avro" package instead. - They are mostly API compatible, but there's a few minor difference - (e.g., function name capitalization, - such as avro.schema.Parse vs avro.schema.parse). -

-
- -
- Download and Install -

- The easiest way to get started in Python is to install avro from PyPI - using pip, the Python Package Installer. -

- -$ python3 -m pip install avro - -

Consider doing a local install or using a virtualenv to avoid permissions problems and interfering with system packages:

- -$ python3 -m pip install --user install avro - -

or

- - $ python3 -m venv avro-venv - $ avro-venv/bin/pip install avro - -

- The official releases of the Avro implementations for C, C++, C#, Java, - PHP, Python, and Ruby can be downloaded from the Apache Avro™ - Releases page. This guide uses Avro &AvroVersion;, the latest - version at the time of writing. Download and install - avro-&AvroVersion;-py2.py3-none-any.whl or - avro-&AvroVersion;.tar.gz via - python -m pip avro-&AvroVersion;-py2.py3-none-any.whl - or - python -m pip avro-&AvroVersion;.tar.gz. - (As above, consider using a virtualenv or user-local install.) -

-

Check that you can import avro from a Python prompt.

- -$ python3 -c 'import avro; print(avro.__version__)' - -

The above should print &AvroVersion;. It should not raise an ImportError.

-

- Alternatively, you may build the Avro Python library from source. From - your the root Avro directory, run the commands -

- -$ cd lang/py/ -$ python3 -m pip install -e . -$ python3 - -
- -
- Defining a schema -

- Avro schemas are defined using JSON. Schemas are composed of primitive types - (null, boolean, int, - long, float, double, - bytes, and string) and complex types (record, - enum, array, map, - union, and fixed). You can learn more about - Avro schemas and types from the specification, but for now let's start - with a simple schema example, user.avsc: -

- -{"namespace": "example.avro", - "type": "record", - "name": "User", - "fields": [ - {"name": "name", "type": "string"}, - {"name": "favorite_number", "type": ["int", "null"]}, - {"name": "favorite_color", "type": ["string", "null"]} - ] -} - -

- This schema defines a record representing a hypothetical user. (Note - that a schema file can only contain a single schema definition.) At - minimum, a record definition must include its type ("type": - "record"), a name ("name": "User"), and fields, in - this case name, favorite_number, and - favorite_color. We also define a namespace - ("namespace": "example.avro"), which together with the name - attribute defines the "full name" of the schema - (example.avro.User in this case). - -

-

- Fields are defined via an array of objects, each of which defines a name - and type (other attributes are optional, see the record specification for more - details). The type attribute of a field is another schema object, which - can be either a primitive or complex type. For example, the - name field of our User schema is the primitive type - string, whereas the favorite_number and - favorite_color fields are both unions, - represented by JSON arrays. unions are a complex type that - can be any of the types listed in the array; e.g., - favorite_number can either be an int or - null, essentially making it an optional field. -

-
- -
- Serializing and deserializing without code generation -

- Data in Avro is always stored with its corresponding schema, meaning we - can always read a serialized item, regardless of whether we know the - schema ahead of time. This allows us to perform serialization and - deserialization without code generation. Note that the Avro Python - library does not support code generation. -

-

- Try running the following code snippet, which serializes two users to a - data file on disk, and then reads back and deserializes the data file: -

- -import avro.schema -from avro.datafile import DataFileReader, DataFileWriter -from avro.io import DatumReader, DatumWriter - -schema = avro.schema.parse(open("user.avsc", "rb").read()) - -writer = DataFileWriter(open("users.avro", "wb"), DatumWriter(), schema) -writer.append({"name": "Alyssa", "favorite_number": 256}) -writer.append({"name": "Ben", "favorite_number": 7, "favorite_color": "red"}) -writer.close() - -reader = DataFileReader(open("users.avro", "rb"), DatumReader()) -for user in reader: - print user -reader.close() - -

This outputs:

- -{u'favorite_color': None, u'favorite_number': 256, u'name': u'Alyssa'} -{u'favorite_color': u'red', u'favorite_number': 7, u'name': u'Ben'} - -

- Do make sure that you open your files in binary mode (i.e. using the modes - wb or rb respectively). Otherwise you might - generate corrupt files due to - - automatic replacement of newline characters with the - platform-specific representations. -

-

- Let's take a closer look at what's going on here. -

- -schema = avro.schema.parse(open("user.avsc", "rb").read()) - -

- avro.schema.parse takes a string containing a JSON schema - definition as input and outputs a avro.schema.Schema object - (specifically a subclass of Schema, in this case - RecordSchema). We're passing in the contents of our - user.avsc schema file here. -

- -writer = DataFileWriter(open("users.avro", "wb"), DatumWriter(), schema) - -

- We create a DataFileWriter, which we'll use to write - serialized items to a data file on disk. The - DataFileWriter constructor takes three arguments: -

-
    -
  • The file we'll serialize to
  • -
  • A DatumWriter, which is responsible for actually - serializing the items to Avro's binary format - (DatumWriters can be used separately from - DataFileWriters, e.g., to perform IPC with Avro).
  • -
  • The schema we're using. The DataFileWriter needs the - schema both to write the schema to the data file, and to verify that - the items we write are valid items and write the appropriate - fields.
  • -
- -writer.append({"name": "Alyssa", "favorite_number": 256}) -writer.append({"name": "Ben", "favorite_number": 7, "favorite_color": "red"}) - -

- We use DataFileWriter.append to add items to our data - file. Avro records are represented as Python dicts. - Since the field favorite_color has type ["int", - "null"], we are not required to specify this field, as shown in - the first append. Were we to omit the required name - field, an exception would be raised. Any extra entries not - corresponding to a field are present in the dict are - ignored. -

- -reader = DataFileReader(open("users.avro", "rb"), DatumReader()) - -

- We open the file again, this time for reading back from disk. We use - a DataFileReader and DatumReader analagous - to the DataFileWriter and DatumWriter above. -

- -for user in reader: - print user - -

- The DataFileReader is an iterator that returns - dicts corresponding to the serialized items. -

-
- -
diff --git a/doc/src/content/xdocs/idl.xml b/doc/src/content/xdocs/idl.xml deleted file mode 100644 index 52a607503b3..00000000000 --- a/doc/src/content/xdocs/idl.xml +++ /dev/null @@ -1,486 +0,0 @@ - - - - %avro-entities; -]> - -
- Apache Avro™ &AvroVersion; IDL -
- - -
- Introduction - -

This document defines Avro IDL, a higher-level language for authoring Avro schemata. - Before reading this document, you should have familiarity with the concepts of schemata and protocols, - as well as the various primitive and complex types available in Avro. -

-
- -
- Overview -
- Purpose -

The aim of the Avro IDL language is to enable developers to author schemata in a way that - feels more similar to common programming languages like Java, C++, or Python. Additionally, - the Avro IDL language may feel more familiar for those users who have previously used the - interface description languages (IDLs) in other frameworks like Thrift, Protocol Buffers, or CORBA. -

-
-
- Usage -

- Each Avro IDL file defines a single Avro Protocol, and thus generates as its output a JSON-format - Avro Protocol file with extension .avpr. -

-

- To convert a .avdl file into a .avpr file, it may be processed by the - idl tool. For example: -

- -$ java -jar avro-tools.jar idl src/test/idl/input/namespaces.avdl /tmp/namespaces.avpr -$ head /tmp/namespaces.avpr -{ - "protocol" : "TestNamespace", - "namespace" : "avro.test.protocol", - -

- The idl tool can also process input to and from stdin and stdout. - See idl --help for full usage information. -

-

A Maven plugin is also provided to compile .avdl files. To - use it, add something like the following to your pom.xml:

- - - - org.apache.avro - avro-maven-plugin - - - - idl-protocol - - - - - - -]]> -
-
- -
- Defining a Protocol in Avro IDL - -

An Avro IDL file consists of exactly one protocol definition. The minimal protocol is defined - by the following code: -

- -protocol MyProtocol { -} - -

- This is equivalent to (and generates) the following JSON protocol definition: -

- - -{ -"protocol" : "MyProtocol", - "types" : [ ], - "messages" : { - } -} - -

- The namespace of the protocol may be changed using the @namespace annotation: -

- -@namespace("mynamespace") -protocol MyProtocol { -} - -

- This notation is used throughout Avro IDL as a way of specifying properties for the annotated element, - as will be described later in this document. -

-

- Protocols in Avro IDL can contain the following items: -

-
    -
  • Imports of external protocol and schema files.
  • -
  • Definitions of named schemata, including records, errors, enums, and fixeds.
  • -
  • Definitions of RPC messages
  • -
-
-
- Imports -

Files may be imported in one of three formats:

-
    -
  • An IDL file may be imported with a statement like: - import idl "foo.avdl"; -
  • -
  • A JSON protocol file may be imported with a statement like: - import protocol "foo.avpr"; -
  • -
  • A JSON schema file may be imported with a statement like: - import schema "foo.avsc"; -
  • -
-

Messages and types in the imported file are added to this - file's protocol.

-

Imported file names are resolved relative to the current IDL file.

-
-
- Defining an Enumeration -

- Enums are defined in Avro IDL using a syntax similar to C or Java. An Avro Enum supports optional default values. - In the case that a reader schema is unable to recognize a symbol written by the writer, the reader will fall back to using the defined default value. - This default is only used when an incompatible symbol is read. It is not used if the enum field is missing. -

-

- Example Writer Enum Definition -

- -enum Shapes { - SQUARE, TRIANGLE, CIRCLE, OVAL -} - -

- Example Reader Enum Definition -

- -enum Shapes { - SQUARE, TRIANGLE, CIRCLE -} = CIRCLE; - -

- In the above example, the reader will use the default value of CIRCLE whenever reading data written with the OVAL symbol of the writer. - Also note that, unlike the JSON format, anonymous enums cannot be defined. -

-
-
- Defining a Fixed Length Field -

- Fixed fields are defined using the following syntax: -

- -fixed MD5(16); - -

This example defines a fixed-length type called MD5 which contains 16 bytes.

-
- -
- Defining Records and Errors -

- Records are defined in Avro IDL using a syntax similar to a struct definition in C: -

- -record Employee { - string name; - boolean active = true; - long salary; -} - -

- The above example defines a record with the name “Employee” with three fields. -

-

- To define an error, simply use the keyword error instead of record. - For example: -

- -error Kaboom { - string explanation; - int result_code = -1; -} - -

- Each field in a record or error consists of a type and a name, - optional property annotations and an optional default value. -

-

A type reference in Avro IDL must be one of:

-
    -
  • A primitive type
  • -
  • A logical type
  • -
  • A named schema defined prior to this usage in the same Protocol
  • -
  • A complex type (array, map, or union)
  • -
- -
- Primitive Types -

The primitive types supported by Avro IDL are the same as those supported by Avro's JSON format. - This list includes int, long, string, boolean, - float, double, null, and bytes. -

-
- -
- Logical Types -

Some of the logical types supported by Avro's JSON format are also supported by Avro IDL. - The currently supported types are: -

- -

For example:

- -record Job { - string jobid; - date submitDate; - time_ms submitTime; - timestamp_ms finishTime; - decimal(9,2) finishRatio; -} - -
- -
- References to Named Schemata -

If a named schema has already been defined in the same Avro IDL file, it may be referenced by name - as if it were a primitive type: -

- -record Card { - Suit suit; // refers to the enum Card defined above - int number; -} - -
-
- Default Values - -

Default values for fields may be optionally - specified by using an equals sign after the field name - followed by a JSON expression indicating the default value. - This JSON is interpreted as described in - the spec.

- -
-
- Complex Types - -
- Arrays -

- Array types are written in a manner that will seem familiar to C++ or Java programmers. An array of - any type t is denoted array<t>. For example, an array of strings is - denoted array<string>, and a multidimensional array of Foo records - would be array<array<Foo>>. -

-
- -
- Maps -

Map types are written similarly to array types. An array that contains values of type - t is written map<t>. As in the JSON schema format, all - maps contain string-type keys.

-
- -
- Unions -

Union types are denoted as union { typeA, typeB, typeC, ... }. For example, - this record contains a string field that is optional (unioned with null): -

- -record RecordWithUnion { - union { null, string } optionalString; -} - -

- Note that the same restrictions apply to Avro IDL unions as apply to unions defined in the - JSON format; namely, a record may not contain multiple elements of the same type. -

-
-
-
-
- Defining RPC Messages -

The syntax to define an RPC message within a Avro IDL protocol is similar to the syntax for - a method declaration within a C header file or a Java interface. To define an RPC message - add which takes two arguments named foo and bar, - returning an int, simply include the following definition within the protocol: -

- -int add(int foo, int bar = 0); - -

Message arguments, like record fields, may specify default - values.

-

To define a message with no response, you may use the alias void, equivalent - to the Avro null type: -

- -void logMessage(string message); - -

- If you have previously defined an error type within the same protocol, you may declare that - a message can throw this error using the syntax: -

- -void goKaboom() throws Kaboom; - -

To define a one-way message, use the - keyword oneway after the parameter list, for example: -

- -void fireAndForget(string message) oneway; - -
-
- Other Language Features -
- Comments -

All Java-style comments are supported within a Avro IDL file. Any text following - // on a line is ignored, as is any text between /* and - */, possibly spanning multiple lines.

-

Comments that begin with /** are used as the - documentation string for the type or field definition that - follows the comment.

-
-
- Escaping Identifiers -

Occasionally, one will need to use a reserved language keyword as an identifier. In order - to do so, backticks (`) may be used to escape the identifier. For example, to define - a message with the literal name error, you may write: -

- -void `error`(); - -

This syntax is allowed anywhere an identifier is expected.

-
-
- Annotations for Ordering and Namespaces -

Java-style annotations may be used to add additional - properties to types and fields throughout Avro IDL.

- -

For example, to specify the sort order of a field within - a record, one may use the @order annotation - before the field name as follows:

- -record MyRecord { - string @order("ascending") myAscendingSortField; - string @order("descending") myDescendingField; - string @order("ignore") myIgnoredField; -} - -

A field's type may also be preceded by annotations, e.g.:

- -record MyRecord { - @java-class("java.util.ArrayList") array<string> myStrings; -} - - -

This can be used to support java classes that can be - serialized/deserialized via their toString/String constructor, e.g.:

- -record MyRecord { - @java-class("java.math.BigDecimal") string value; - @java-key-class("java.io.File") map<string> fileStates; - array<@java-class("java.math.BigDecimal") string> weights; -} - - -

Similarly, a @namespace annotation may be used to modify the namespace - when defining a named schema. For example: -

- -@namespace("org.apache.avro.firstNamespace") -protocol MyProto { - @namespace("org.apache.avro.someOtherNamespace") - record Foo {} - - record Bar {} -} - -

- will define a protocol in the firstNamespace namespace. The record Foo will be - defined in someOtherNamespace and Bar will be defined in firstNamespace - as it inherits its default from its container. -

-

Type and field aliases are specified with - the @aliases annotation as follows:

- -@aliases(["org.old.OldRecord", "org.ancient.AncientRecord"]) -record MyRecord { - string @aliases(["oldField", "ancientField"]) myNewField; -} - -

Some annotations like those listed above are handled - specially. All other annotations are added as properties to - the protocol, message, schema or field.

-
-
-
- Complete Example -

The following is a complete example of a Avro IDL file that shows most of the above features:

- -/** - * An example protocol in Avro IDL - */ -@namespace("org.apache.avro.test") -protocol Simple { - - @aliases(["org.foo.KindOf"]) - enum Kind { - FOO, - BAR, // the bar enum value - BAZ - } - - fixed MD5(16); - - record TestRecord { - @order("ignore") - string name; - - @order("descending") - Kind kind; - - MD5 hash; - - union { MD5, null} @aliases(["hash"]) nullableHash; - - array<long> arrayOfLongs; - } - - error TestError { - string message; - } - - string hello(string greeting); - TestRecord echo(TestRecord `record`); - int add(int arg1, int arg2); - bytes echoBytes(bytes data); - void `error`() throws TestError; - void ping() oneway; -} - -

Additional examples may be found in the Avro source tree under the src/test/idl/input directory.

-
- -

Apache Avro, Avro, Apache, and the Avro and Apache logos are - trademarks of The Apache Software Foundation.

- - -
diff --git a/doc/src/content/xdocs/index.xml b/doc/src/content/xdocs/index.xml deleted file mode 100644 index 4247e212ec3..00000000000 --- a/doc/src/content/xdocs/index.xml +++ /dev/null @@ -1,96 +0,0 @@ - - - - %avro-entities; -]> - -
- Apache Avro™ &AvroVersion; Documentation -
- -
- Introduction -

Apache Avro™ is a data serialization system.

-

Avro provides:

-
    -
  • Rich data structures.
  • -
  • A compact, fast, binary data format.
  • -
  • A container file, to store persistent data.
  • -
  • Remote procedure call (RPC).
  • -
  • Simple integration with dynamic languages. Code - generation is not required to read or write data files nor - to use or implement RPC protocols. Code generation as an - optional optimization, only worth implementing for - statically typed languages.
  • -
-
-
- Schemas -

Avro relies on schemas. When Avro data is read, the - schema used when writing it is always present. This permits - each datum to be written with no per-value overheads, making - serialization both fast and small. This also facilitates use - with dynamic, scripting languages, since data, together with - its schema, is fully self-describing.

-

When Avro data is stored in a file, its schema is stored with - it, so that files may be processed later by any program. If - the program reading the data expects a different schema this - can be easily resolved, since both schemas are present.

-

When Avro is used in RPC, the client and server exchange - schemas in the connection handshake. (This can be optimized - so that, for most calls, no schemas are actually transmitted.) - Since both client and server both have the other's full - schema, correspondence between same named fields, missing - fields, extra fields, etc. can all be easily resolved.

-

Avro schemas are defined with - JSON . This - facilitates implementation in languages that already have - JSON libraries.

-
-
- Comparison with other systems -

Avro provides functionality similar to systems such - as Thrift, - Protocol - Buffers, etc. Avro differs from these systems in the - following fundamental aspects.

-
    -
  • Dynamic typing: Avro does not require that code - be generated. Data is always accompanied by a schema that - permits full processing of that data without code - generation, static datatypes, etc. This facilitates - construction of generic data-processing systems and - languages.
  • -
  • Untagged data: Since the schema is present when - data is read, considerably less type information need be - encoded with data, resulting in smaller serialization size.
  • -
  • No manually-assigned field IDs: When a schema - changes, both the old and new schema are always present when - processing data, so differences may be resolved - symbolically, using field names.
  • -
-
- -

Apache Avro, Avro, Apache, and the Avro and Apache logos are - trademarks of The Apache Software Foundation.

- - -
diff --git a/doc/src/content/xdocs/mr.xml b/doc/src/content/xdocs/mr.xml deleted file mode 100644 index f5a70b95a58..00000000000 --- a/doc/src/content/xdocs/mr.xml +++ /dev/null @@ -1,580 +0,0 @@ - - - - %avro-entities; -]> - -
- Apache Avro™ &AvroVersion; Hadoop MapReduce guide -
- -

- Avro provides a convenient way to represent complex data structures within - a Hadoop MapReduce job. Avro data can be used as both input to and output - from a MapReduce job, as well as the intermediate format. The example in - this guide uses Avro data for all three, but it's possible to mix and - match; for instance, MapReduce can be used to aggregate a particular field - in an Avro record. -

-

- This guide assumes basic familiarity with both Hadoop MapReduce and Avro. - See the Hadoop - documentation and the Avro getting - started guide for introductions to these projects. This guide uses - the old MapReduce API (org.apache.hadoop.mapred) and the new - MapReduce API (org.apache.hadoop.mapreduce). -

-
- Setup -

- The code from this guide is included in the Avro docs under - examples/mr-example. The example is set up as a Maven project - that includes the necessary Avro and MapReduce dependencies and the Avro - Maven plugin for code generation, so no external jars are needed to run - the example. In particular, the POM includes the following dependencies: -

- -<dependency> - <groupId>org.apache.avro</groupId> - <artifactId>avro</artifactId> - <version>&AvroVersion;</version> -</dependency> -<dependency> - <groupId>org.apache.avro</groupId> - <artifactId>avro-mapred</artifactId> - <version>&AvroVersion;</version> -</dependency> -<dependency> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-client</artifactId> - <version>3.1.2</version> -</dependency> - -

- And the following plugin: -

- -<plugin> - <groupId>org.apache.avro</groupId> - <artifactId>avro-maven-plugin</artifactId> - <version>&AvroVersion;</version> - <executions> - <execution> - <phase>generate-sources</phase> - <goals> - <goal>schema</goal> - </goals> - <configuration> - <sourceDirectory>${project.basedir}/../</sourceDirectory> - <outputDirectory>${project.basedir}/target/generated-sources/</outputDirectory> - </configuration> - </execution> - </executions> -</plugin> - -

- If you do not configure the sourceDirectory and outputDirectory - properties, the defaults will be used. The sourceDirectory property - defaults to src/main/avro. The outputDirectory property - defaults to target/generated-sources. You can change the paths to - match your project layout. -

-

- Alternatively, Avro jars can be downloaded directly from the Apache Avro™ - Releases page. The relevant Avro jars for this guide are - avro-&AvroVersion;.jar and - avro-mapred-&AvroVersion;.jar, as well as - avro-tools-&AvroVersion;.jar for code generation and viewing - Avro data files as JSON. In addition, you will need to install Hadoop - in order to use MapReduce. -

-
- -
- Example: ColorCount -

- Below is a simple example of a MapReduce that uses Avro. There is an example - for both the old (org.apache.hadoop.mapred) and new - (org.apache.hadoop.mapreduce) APIs under - examples/mr-example/src/main/java/example/. MapredColorCount - is the example for the older mapred API while MapReduceColorCount is - the example for the newer mapreduce API. Both examples are below, but - we will detail the mapred API in our subsequent examples. -

- -

MapredColorCount:

- -package example; - -import java.io.IOException; - -import org.apache.avro.*; -import org.apache.avro.Schema.Type; -import org.apache.avro.mapred.*; -import org.apache.hadoop.conf.*; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.mapred.*; -import org.apache.hadoop.util.*; - -import example.avro.User; - -public class MapredColorCount extends Configured implements Tool { - - public static class ColorCountMapper extends AvroMapper<User, Pair<CharSequence, Integer>> { - @Override - public void map(User user, AvroCollector<Pair<CharSequence, Integer>> collector, Reporter reporter) - throws IOException { - CharSequence color = user.getFavoriteColor(); - // We need this check because the User.favorite_color field has type ["string", "null"] - if (color == null) { - color = "none"; - } - collector.collect(new Pair<CharSequence, Integer>(color, 1)); - } - } - - public static class ColorCountReducer extends AvroReducer<CharSequence, Integer, - Pair<CharSequence, Integer>> { - @Override - public void reduce(CharSequence key, Iterable<Integer> values, - AvroCollector<Pair<CharSequence, Integer>> collector, - Reporter reporter) - throws IOException { - int sum = 0; - for (Integer value : values) { - sum += value; - } - collector.collect(new Pair<CharSequence, Integer>(key, sum)); - } - } - - public int run(String[] args) throws Exception { - if (args.length != 2) { - System.err.println("Usage: MapredColorCount <input path> <output path>"); - return -1; - } - - JobConf conf = new JobConf(getConf(), MapredColorCount.class); - conf.setJobName("colorcount"); - - FileInputFormat.setInputPaths(conf, new Path(args[0])); - FileOutputFormat.setOutputPath(conf, new Path(args[1])); - - AvroJob.setMapperClass(conf, ColorCountMapper.class); - AvroJob.setReducerClass(conf, ColorCountReducer.class); - - // Note that AvroJob.setInputSchema and AvroJob.setOutputSchema set - // relevant config options such as input/output format, map output - // classes, and output key class. - AvroJob.setInputSchema(conf, User.getClassSchema()); - AvroJob.setOutputSchema(conf, Pair.getPairSchema(Schema.create(Type.STRING), - Schema.create(Type.INT))); - - JobClient.runJob(conf); - return 0; - } - - public static void main(String[] args) throws Exception { - int res = ToolRunner.run(new Configuration(), new MapredColorCount(), args); - System.exit(res); - } -} - - -

MapReduceColorCount:

- -package example; - -import java.io.IOException; - -import org.apache.avro.Schema; -import org.apache.avro.mapred.AvroKey; -import org.apache.avro.mapred.AvroValue; -import org.apache.avro.mapreduce.AvroJob; -import org.apache.avro.mapreduce.AvroKeyInputFormat; -import org.apache.avro.mapreduce.AvroKeyValueOutputFormat; -import org.apache.hadoop.conf.Configured; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.io.IntWritable; -import org.apache.hadoop.io.NullWritable; -import org.apache.hadoop.io.Text; -import org.apache.hadoop.mapreduce.Job; -import org.apache.hadoop.mapreduce.Mapper; -import org.apache.hadoop.mapreduce.Reducer; -import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; -import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; -import org.apache.hadoop.util.Tool; -import org.apache.hadoop.util.ToolRunner; - -import example.avro.User; - -public class MapReduceColorCount extends Configured implements Tool { - - public static class ColorCountMapper extends - Mapper<AvroKey<User>, NullWritable, Text, IntWritable> { - - @Override - public void map(AvroKey<User> key, NullWritable value, Context context) - throws IOException, InterruptedException { - - CharSequence color = key.datum().getFavoriteColor(); - if (color == null) { - color = "none"; - } - context.write(new Text(color.toString()), new IntWritable(1)); - } - } - - public static class ColorCountReducer extends - Reducer<Text, IntWritable, AvroKey<CharSequence>, AvroValue<Integer>> { - - @Override - public void reduce(Text key, Iterable<IntWritable> values, - Context context) throws IOException, InterruptedException { - - int sum = 0; - for (IntWritable value : values) { - sum += value.get(); - } - context.write(new AvroKey<CharSequence>(key.toString()), new AvroValue<Integer>(sum)); - } - } - - public int run(String[] args) throws Exception { - if (args.length != 2) { - System.err.println("Usage: MapReduceColorCount <input path> <output path>"); - return -1; - } - - Job job = new Job(getConf()); - job.setJarByClass(MapReduceColorCount.class); - job.setJobName("Color Count"); - - FileInputFormat.setInputPaths(job, new Path(args[0])); - FileOutputFormat.setOutputPath(job, new Path(args[1])); - - job.setInputFormatClass(AvroKeyInputFormat.class); - job.setMapperClass(ColorCountMapper.class); - AvroJob.setInputKeySchema(job, User.getClassSchema()); - job.setMapOutputKeyClass(Text.class); - job.setMapOutputValueClass(IntWritable.class); - - job.setOutputFormatClass(AvroKeyValueOutputFormat.class); - job.setReducerClass(ColorCountReducer.class); - AvroJob.setOutputKeySchema(job, Schema.create(Schema.Type.STRING)); - AvroJob.setOutputValueSchema(job, Schema.create(Schema.Type.INT)); - - return (job.waitForCompletion(true) ? 0 : 1); - } - - public static void main(String[] args) throws Exception { - int res = ToolRunner.run(new MapReduceColorCount(), args); - System.exit(res); - } -} - - -

- ColorCount reads in data files containing User records, - defined in examples/user.avsc, and counts the number of - instances of each favorite color. (This example draws inspiration from - the canonical WordCount MapReduce application.) This example uses the - old MapReduce API. See MapReduceAvroWordCount, found under - doc/examples/mr-example/src/main/java/example/ to see the new MapReduce - API example. The User - schema is defined as follows: -

- -{"namespace": "example.avro", - "type": "record", - "name": "User", - "fields": [ - {"name": "name", "type": "string"}, - {"name": "favorite_number", "type": ["int", "null"]}, - {"name": "favorite_color", "type": ["string", "null"]} - ] -} - -

- This schema is compiled into the User class used by - ColorCount via the Avro Maven plugin (see - examples/mr-example/pom.xml for how this is set up). -

-

- ColorCountMapper essentially takes a User as input and - extracts the User's favorite color, emitting the key-value - pair <favoriteColor, 1>. - ColorCountReducer then adds up how many occurrences of a particular - favorite color were emitted, and outputs the result as a - Pair record. These Pairs are serialized to an - Avro data file. -

-
- Running ColorCount -

- The ColorCount application is provided as a Maven project in the Avro - docs under examples/mr-example. To build the project, - including the code generation of the User schema, run: -

- -mvn compile - -

- Next, run GenerateData from examples/mr-examples to create an Avro data - file, input/users.avro, containing 20 Users with - favorite colors chosen randomly from a list: -

- -mvn exec:java -q -Dexec.mainClass=example.GenerateData - -

- Besides creating the data file, GenerateData prints the JSON - representations of the Users generated to stdout, for example: -

- -{"name": "user", "favorite_number": null, "favorite_color": "red"} -{"name": "user", "favorite_number": null, "favorite_color": "green"} -{"name": "user", "favorite_number": null, "favorite_color": "purple"} -{"name": "user", "favorite_number": null, "favorite_color": null} -... - -

- Now we're ready to run ColorCount. We specify our freshly-generated - input folder as the input path and output as our - output folder (note that MapReduce will not start a job if the output - folder already exists): -

- -mvn exec:java -q -Dexec.mainClass=example.MapredColorCount -Dexec.args="input output" - -

- Once ColorCount completes, checking the contents of the new - output directory should yield the following: -

- -$ ls output/ -part-00000.avro _SUCCESS - -

- You can check the contents of the generated Avro file using the avro-tools jar: -

- -$ java -jar /path/to/avro-tools-&AvroVersion;.jar tojson output/part-00000.avro -{"value": 3, "key": "blue"} -{"value": 7, "key": "green"} -{"value": 1, "key": "none"} -{"value": 2, "key": "orange"} -{"value": 3, "key": "purple"} -{"value": 2, "key": "red"} -{"value": 2, "key": "yellow"} - -
-
-

Now let's go over the ColorCount example in detail.

-
- Mapper - org.apache.hadoop.mapred API -

- The easiest way to use Avro data files as input to a MapReduce job is to - subclass AvroMapper. An AvroMapper defines a - map function that takes an Avro datum as input and outputs a key/value - pair represented as a Pair record. In the ColorCount - example, ColorCountMapper is an AvroMapper - that takes a User as input and outputs a - Pair<CharSequence, Integer>>, where the - CharSequence key is the user's favorite color and the - Integer value is 1. -

- -public static class ColorCountMapper extends AvroMapper<User, Pair<CharSequence, Integer>> { - @Override - public void map(User user, AvroCollector<Pair<CharSequence, Integer>> collector, Reporter reporter) - throws IOException { - CharSequence color = user.getFavoriteColor(); - // We need this check because the User.favorite_color field has type ["string", "null"] - if (color == null) { - color = "none"; - } - collector.collect(new Pair<CharSequence, Integer>(color, 1)); - } -} - -

- In order to use our AvroMapper, we must call - AvroJob.setMapperClass and - AvroJob.setInputSchema. -

- -AvroJob.setMapperClass(conf, ColorCountMapper.class); -AvroJob.setInputSchema(conf, User.getClassSchema()); - -

- Note that AvroMapper does not implement the - Mapper interface. Under the hood, the specified Avro data - files are deserialized into AvroWrappers containing the - actual data, which are processed by a Mapper that calls the - configured AvroMapper's map function. - AvroJob.setInputSchema sets up the relevant configuration - parameters needed to make this happen, thus you should not need to call - JobConf.setMapperClass, - JobConf.setInputFormat, - JobConf.setMapOutputKeyClass, - JobConf.setMapOutputValueClass, or - JobConf.setOutputKeyComparatorClass. -

-
-
- Mapper - org.apache.hadoop.mapreduce API -

- This document will not go into all the differences between the mapred and mapreduce APIs, - however will describe the main differences. As you can see, ColorCountMapper is now a - subclass of the Hadoop Mapper class and is passed an AvroKey as it's key. - - Additionally, the AvroJob method calls were slightly changed. -

- - public static class ColorCountMapper extends - Mapper<AvroKey<User>, NullWritable, Text, IntWritable> { - - @Override - public void map(AvroKey<User> key, NullWritable value, Context context) - throws IOException, InterruptedException { - - CharSequence color = key.datum().getFavoriteColor(); - if (color == null) { - color = "none"; - } - context.write(new Text(color.toString()), new IntWritable(1)); - } - } - -
-
- Reducer - org.apache.hadoop.mapred API -

- Analogously to AvroMapper, an AvroReducer - defines a reducer function that takes the key/value types output by an - AvroMapper (or any mapper that outputs Pairs) - and outputs a key/value pair represented a Pair record. In - the ColorCount example, ColorCountReducer is an - AvroReducer that takes the CharSequence key - representing a favorite color and the Iterable<Integer> - representing the counts for that color (they should all be 1 in this - example) and adds up the counts. -

- -public static class ColorCountReducer extends AvroReducer<CharSequence, Integer, - Pair<CharSequence, Integer>> { - @Override - public void reduce(CharSequence key, Iterable<Integer> values, - AvroCollector<Pair<CharSequence, Integer>> collector, - Reporter reporter) - throws IOException { - int sum = 0; - for (Integer value : values) { - sum += value; - } - collector.collect(new Pair<CharSequence, Integer>(key, sum)); - } -} - -

- In order to use our AvroReducer, we must call - AvroJob.setReducerClass and - AvroJob.setOutputSchema. -

- -AvroJob.setReducerClass(conf, ColorCountReducer.class); -AvroJob.setOutputSchema(conf, Pair.getPairSchema(Schema.create(Type.STRING), - Schema.create(Type.INT))); - -

- Note that AvroReducer does not implement the - Reducer interface. The intermediate Pairs - output by the mapper are split into AvroKeys and - AvroValues, which are processed by a Reducer - that calls the configured AvroReducer's reduce function. - AvroJob.setOutputSchema sets up the relevant configuration - parameters needed to make this happen, thus you should not need to call - JobConf.setReducerClass, - JobConf.setOutputFormat, - JobConf.setOutputKeyClass, - JobConf.setMapOutputKeyClass, - JobConf.setMapOutputValueClass, or - JobConf.setOutputKeyComparatorClass. -

-
-
- Reduce - org.apache.hadoop.mapreduce API -

- As before we not detail every difference between the APIs. As with the Mapper - change ColorCountReducer is now a subclass of Reducer and AvroKey and AvroValue - are emitted. - - Additionally, the AvroJob method calls were slightly changed. -

- - public static class ColorCountReducer extends - Reducer<Text, IntWritable, AvroKey<CharSequence>, AvroValue<Integer>> { - - @Override - public void reduce(Text key, Iterable<IntWritable> values, - Context context) throws IOException, InterruptedException { - - int sum = 0; - for (IntWritable value : values) { - sum += value.get(); - } - context.write(new AvroKey<CharSequence>(key.toString()), new AvroValue<Integer>(sum)); - } - } - -
-
- Learning more -

- The mapred API allows users to mix Avro AvroMappers and - AvroReducers with non-Avro Mappers and - Reducers and the mapreduce API allows users input Avro - and output non-Avro or vice versa. -

- -

- The mapred package has API - org.apache.avro.mapred documentation as does the - org.apache.avro.mapreduce package. - MapReduce API (org.apache.hadoop.mapreduce). Similarily to the mapreduce package, - it's possible with the mapred API to implement your own Mappers and - Reducers directly using the public classes provided in - these libraries. See the AvroWordCount application, found under - examples/mr-example/src/main/java/example/AvroWordCount.java in - the Avro documentation, for an example of implementing a - Reducer that outputs Avro data using the old MapReduce API. - See the MapReduceAvroWordCount application, found under - examples/mr-example/src/main/java/example/MapReduceAvroWordCount.java in - the Avro documentation, for an example of implementing a - Reducer that outputs Avro data using the new MapReduce API. -

-
- -
diff --git a/doc/src/content/xdocs/sasl.xml b/doc/src/content/xdocs/sasl.xml deleted file mode 100644 index 514cca5aff1..00000000000 --- a/doc/src/content/xdocs/sasl.xml +++ /dev/null @@ -1,152 +0,0 @@ - - - - %avro-entities; -]> - -
- Apache Avro™ &AvroVersion; SASL Profile -
- -
- Introduction -

SASL (RFC 2222) - provides a framework for authentication and security of network - protocols. Each protocol that uses SASL is meant to define a - SASL profile. This document provides a SASL profile - for connection-based Avro RPC.

-
- -
- Overview -

SASL negotiation proceeds as a series of message interactions - over a connection between a client and server using a selected - SASL mechanism. The client starts this negotiation by - sending its chosen mechanism name with an initial (possibly - empty) message. Negotiation proceeds with the exchange of - messages until either side indicates success or failure. The - content of the messages is mechanism-specific. If the - negotiation succeeds, then the session can proceed over the - connection, otherwise it must be abandoned.

-

Some mechanisms continue to process session data after - negotiation (e.g., encrypting it), while some specify that - further session data is transmitted unmodifed.

-
- -
- Negotiation -
- Commands -

Avro SASL negotiation uses four one-byte commands.

-
    -
  • 0: START Used in a client's initial message.
  • -
  • 1: CONTINUE Used while negotiation is ongoing.
  • -
  • 2: FAIL Terminates negotiation unsuccessfully.
  • -
  • 3: COMPLETE Terminates negotiation sucessfully.
  • -
- -

The format of a START message is:

- | 0 | 4-byte mechanism name length | mechanism name | 4-byte payload length | payload data | - -

The format of a CONTINUE message is:

- | 1 | 4-byte payload length | payload data | - -

The format of a FAIL message is:

- | 2 | 4-byte message length | UTF-8 message | - -

The format of a COMPLETE message is:

- | 3 | 4-byte payload length | payload data | -
- -
- Process -

Negotiation is initiated by a client sending a START command - containing the client's chosen mechanism name and any - mechanism-specific payload data.

- -

The server and client then interchange some number - (possibly zero) of CONTINUE messages. Each message contains - payload data that is processed by the security mechanism to - generate the next message.

- -

Once either the client or server send a FAIL message then - negotiation has failed. UTF-8-encoded text is included in - the failure message. Once either a FAIL message has been - sent or received, or any other error occurs in the - negotiation, further communication on this connection must - cease.

- -

Once either the client or server send a COMPLETE message - then negotiation has completed successfully. Session data - may now be transmitted over the connection until it is - closed by either side.

-
- -
- -
- Session Data -

If no SASL QOP (quality of protection) is negotiated, then - all subsequent writes to/reads over this connection are - written/read unmodified. In particular, messages use - Avro framing, and are - of the form:

- | 4-byte frame length | frame data | ... | 4 zero bytes | -

If a SASL QOP is negotiated, then it must be used by the - connection for all subsequent messages. This is done by - wrapping each non-empty frame written using the security - mechanism and unwrapping each non-empty frame read. The - length written in each non-empty frame is the length of the - wrapped data. Complete frames must be passed to the security - mechanism for unwrapping. Unwrapped data is then passed to - the application as the content of the frame.

-

If at any point processing fails due to wrapping, unwrapping - or framing errors, then all further communication on this - connection must cease.

-
- -
- Anonymous Mechanism -

The SASL anonymous mechanism - (RFC 2245) is - quite simple to implement. In particular, an initial anonymous - request may be prefixed by the following static sequence:

- | 0 | 0009 | ANONYMOUS | 0000 | -

If a server uses the anonymous mechanism, it should check - that the mechanism name in the start message prefixing the first - request received is 'ANONYMOUS', then simply prefix its initial - response with a COMPLETE message of:

- | 3 | 0000 | -

If an anonymous server recieves some other mechanism name, - then it may respond with a FAIL message as simple as:

- | 2 | 0000 | -

Note that the anonymous mechanism need add no additional - round-trip messages between client and server. The START - message can be piggybacked on the initial request and the - COMPLETE or FAIL message can be piggybacked on the initial - response.

-
- -

Apache Avro, Avro, Apache, and the Avro and Apache logos are - trademarks of The Apache Software Foundation.

- - -
diff --git a/doc/src/content/xdocs/site.xml b/doc/src/content/xdocs/site.xml deleted file mode 100644 index d3dcbb9435c..00000000000 --- a/doc/src/content/xdocs/site.xml +++ /dev/null @@ -1,91 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/doc/src/content/xdocs/spec.xml b/doc/src/content/xdocs/spec.xml deleted file mode 100644 index 09a9d353d12..00000000000 --- a/doc/src/content/xdocs/spec.xml +++ /dev/null @@ -1,1624 +0,0 @@ - - - - %avro-entities; -]> - -
- Apache Avro™ &AvroVersion; Specification -
- - -
- Introduction - -

This document defines Apache Avro. It is intended to be the - authoritative specification. Implementations of Avro must - adhere to this document. -

- -
- -
- Schema Declaration -

A Schema is represented in JSON by one of:

-
    -
  • A JSON string, naming a defined type.
  • - -
  • A JSON object, of the form: - - {"type": "typeName" ...attributes...} - - where typeName is either a primitive or derived - type name, as defined below. Attributes not defined in this - document are permitted as metadata, but must not affect - the format of serialized data. -
  • -
  • A JSON array, representing a union of embedded types.
  • -
- -
- Primitive Types -

The set of primitive type names is:

-
    -
  • null: no value
  • -
  • boolean: a binary value
  • -
  • int: 32-bit signed integer
  • -
  • long: 64-bit signed integer
  • -
  • float: single precision (32-bit) IEEE 754 floating-point number
  • -
  • double: double precision (64-bit) IEEE 754 floating-point number
  • -
  • bytes: sequence of 8-bit unsigned bytes
  • -
  • string: unicode character sequence
  • -
- -

Primitive types have no specified attributes.

- -

Primitive type names are also defined type names. Thus, for - example, the schema "string" is equivalent to:

- - {"type": "string"} - -
- -
- Complex Types - -

Avro supports six kinds of complex types: records, enums, - arrays, maps, unions and fixed.

- -
- Records - -

Records use the type name "record" and support the following attributes:

-
    -
  • name: a JSON string providing the name - of the record (required).
  • -
  • namespace, a JSON string that qualifies the name;
  • -
  • doc: a JSON string providing documentation to the - user of this schema (optional).
  • -
  • aliases: a JSON array of strings, providing - alternate names for this record (optional).
  • -
  • fields: a JSON array, listing fields (required). - Each field is a JSON object with the following attributes: -
      -
    • name: a JSON string providing the name - of the field (required), and
    • -
    • doc: a JSON string describing this field - for users (optional).
    • -
    • type: a schema, as defined above
    • -
    • default: A default value for this - field, only used when reading instances that lack - the field for schema evolution purposes. The - presence of a default value does not make the - field optional at encoding time. Permitted values - depend on the field's schema type, according to the - table below. Default values for union fields correspond - to the first schema in the union. Default values for bytes - and fixed fields are JSON strings, where Unicode - code points 0-255 are mapped to unsigned 8-bit byte - values 0-255. Avro encodes a field even if its - value is equal to its default. - - - - - - - - - - - - - - -
      field default values
      avro typejson typeexample
      nullnullnull
      booleanbooleantrue
      int,longinteger1
      float,doublenumber1.1
      bytesstring"\u00FF"
      stringstring"foo"
      recordobject{"a": 1}
      enumstring"FOO"
      arrayarray[1]
      mapobject{"a": 1}
      fixedstring"\u00ff"
      -
    • -
    • order: specifies how this field - impacts sort ordering of this record (optional). - Valid values are "ascending" (the default), - "descending", or "ignore". For more details on how - this is used, see the sort - order section below.
    • -
    • aliases: a JSON array of strings, providing - alternate names for this field (optional).
    • -
    -
  • -
- -

For example, a linked-list of 64-bit values may be defined with:

- -{ - "type": "record", - "name": "LongList", - "aliases": ["LinkedLongs"], // old name for this - "fields" : [ - {"name": "value", "type": "long"}, // each element has a long - {"name": "next", "type": ["null", "LongList"]} // optional next element - ] -} - -
- -
- Enums - -

Enums use the type name "enum" and support the following - attributes:

-
    -
  • name: a JSON string providing the name - of the enum (required).
  • -
  • namespace, a JSON string that qualifies the name;
  • -
  • aliases: a JSON array of strings, providing - alternate names for this enum (optional).
  • -
  • doc: a JSON string providing documentation to the - user of this schema (optional).
  • -
  • symbols: a JSON array, listing symbols, - as JSON strings (required). All symbols in an enum must - be unique; duplicates are prohibited. Every symbol must - match the regular expression [A-Za-z_][A-Za-z0-9_]* - (the same requirement as for names).
  • -
  • default: A default value for this - enumeration, used during resolution when the reader - encounters a symbol from the writer that isn't defined - in the reader's schema (optional). The value provided - here must be a JSON string that's a member of - the symbols array. - See documentation on schema resolution for how this gets - used.
  • -
-

For example, playing card suits might be defined with:

- -{ - "type": "enum", - "name": "Suit", - "symbols" : ["SPADES", "HEARTS", "DIAMONDS", "CLUBS"] -} - -
- -
- Arrays -

Arrays use the type name "array" and support - a single attribute:

-
    -
  • items: the schema of the array's items.
  • -
-

For example, an array of strings is declared - with:

- -{ - "type": "array", - "items" : "string", - "default": [] -} - -
- -
- Maps -

Maps use the type name "map" and support - one attribute:

-
    -
  • values: the schema of the map's values.
  • -
-

Map keys are assumed to be strings.

-

For example, a map from string to long is declared - with:

- -{ - "type": "map", - "values" : "long", - "default": {} -} - -
- -
- Unions -

Unions, as mentioned above, are represented using JSON - arrays. For example, ["null", "string"] - declares a schema which may be either a null or string.

-

(Note that when a default - value is specified for a record field whose type is a - union, the type of the default value must match the - first element of the union. Thus, for unions - containing "null", the "null" is usually listed first, since - the default value of such unions is typically null.)

-

Unions may not contain more than one schema with the same - type, except for the named types record, fixed and enum. For - example, unions containing two array types or two map types - are not permitted, but two types with different names are - permitted. (Names permit efficient resolution when reading - and writing unions.)

-

Unions may not immediately contain other unions.

-
- -
- Fixed -

Fixed uses the type name "fixed" and supports - the following attributes:

-
    -
  • name: a string naming this fixed (required).
  • -
  • namespace, a string that qualifies the name;
  • -
  • aliases: a JSON array of strings, providing - alternate names for this enum (optional).
  • -
  • doc: a JSON string providing documentation to the - user of this schema (optional).
  • -
  • size: an integer, specifying the number - of bytes per value (required).
  • -
-

For example, 16-byte quantity may be declared with:

- {"type": "fixed", "size": 16, "name": "md5"} -
- - -
- -
- Names -

Record, enums and fixed are named types. Each has - a fullname that is composed of two parts; - a name and a namespace. Equality of names - is defined on the fullname.

-

The name portion of a fullname, record field names, and - enum symbols must:

-
    -
  • start with [A-Za-z_]
  • -
  • subsequently contain only [A-Za-z0-9_]
  • -
-

A namespace is a dot-separated sequence of such names. - The empty string may also be used as a namespace to indicate the - null namespace. - Equality of names (including field names and enum symbols) - as well as fullnames is case-sensitive.

-

The null namespace may not be used in a dot-separated - sequence of names. So the grammar for a namespace - is:

-

  <empty> | <name>[(<dot><name>)*]

-

In record, enum and fixed definitions, the fullname is - determined in one of the following ways:

-
    -
  • A name and namespace are both specified. For example, - one might use "name": "X", "namespace": - "org.foo" to indicate the - fullname org.foo.X.
  • -
  • A fullname is specified. If the name specified contains - a dot, then it is assumed to be a fullname, and any - namespace also specified is ignored. For example, - use "name": "org.foo.X" to indicate the - fullname org.foo.X.
  • -
  • A name only is specified, i.e., a name that contains no - dots. In this case the namespace is taken from the most - tightly enclosing schema or protocol. For example, - if "name": "X" is specified, and this occurs - within a field of the record definition - of org.foo.Y, then the fullname - is org.foo.X. If there is no enclosing - namespace then the null namespace is used.
  • -
-

References to previously defined names are as in the latter - two cases above: if they contain a dot they are a fullname, if - they do not contain a dot, the namespace is the namespace of - the enclosing definition.

-

Primitive type names have no namespace and their names may - not be defined in any namespace.

-

A schema or protocol may not contain multiple definitions - of a fullname. Further, a name must be defined before it is - used ("before" in the depth-first, left-to-right traversal of - the JSON parse tree, where the types attribute of - a protocol is always deemed to come "before" the - messages attribute.) -

-
- -
- Aliases -

Named types and fields may have aliases. An implementation - may optionally use aliases to map a writer's schema to the - reader's. This faciliates both schema evolution as well as - processing disparate datasets.

-

Aliases function by re-writing the writer's schema using - aliases from the reader's schema. For example, if the - writer's schema was named "Foo" and the reader's schema is - named "Bar" and has an alias of "Foo", then the implementation - would act as though "Foo" were named "Bar" when reading. - Similarly, if data was written as a record with a field named - "x" and is read as a record with a field named "y" with alias - "x", then the implementation would act as though "x" were - named "y" when reading.

-

A type alias may be specified either as a fully - namespace-qualified, or relative to the namespace of the name - it is an alias for. For example, if a type named "a.b" has - aliases of "c" and "x.y", then the fully qualified names of - its aliases are "a.c" and "x.y".

-
- -
- -
- Data Serialization and Deserialization - -

Binary encoded Avro data does not include type information or - field names. The benefit is that the serialized data is small, but - as a result a schema must always be used in order to read Avro data - correctly. The best way to ensure that the schema is structurally - identical to the one used to write the data is to use the exact same - schema.

- -

Therefore, files or systems that store Avro data should always - include the writer's schema for that data. Avro-based remote procedure - call (RPC) systems must also guarantee that remote recipients of data - have a copy of the schema used to write that data. In general, it is - advisable that any reader of Avro data should use a schema that is - the same (as defined more fully in - Parsing Canonical Form for - Schemas) as the schema that was used to write the data in order to - deserialize it correctly. Deserializing data into a newer schema is - accomplished by specifying an additional schema, the results of which are - described in Schema Resolution.

- -

In general, both serialization and deserialization proceed as a - depth-first, left-to-right traversal of the schema, serializing or - deserializing primitive types as they are encountered. Therefore, it is - possible, though not advisable, to read Avro data with a schema that - does not have the same Parsing Canonical Form as the schema with which - the data was written. In order for this to work, the serialized primitive - values must be compatible, in order value by value, with the items in the - deserialization schema. For example, int and long are always serialized - the same way, so an int could be deserialized as a long. Since the - compatibility of two schemas depends on both the data and the - serialization format (eg. binary is more permissive than JSON because JSON - includes field names, eg. a long that is too large will overflow an int), - it is simpler and more reliable to use schemas with identical Parsing - Canonical Form.

- -
- Encodings -

Avro specifies two serialization encodings: binary and - JSON. Most applications will use the binary encoding, as it - is smaller and faster. But, for debugging and web-based - applications, the JSON encoding may sometimes be - appropriate.

-
- -
- Binary Encoding -

Binary encoding does not include field names, self-contained - information about the types of individual bytes, nor field or - record separators. Therefore readers are wholly reliant on - the schema used when the data was encoded.

- -
- Primitive Types -

Primitive types are encoded in binary as follows:

-
    -
  • null is written as zero bytes.
  • -
  • a boolean is written as a single byte whose - value is either 0 (false) or 1 - (true).
  • -
  • int and long values are written - using variable-length - zig-zag coding. Some examples: - - - - - - - - - - - -
    valuehex
    000
    -101
    102
    -203
    204
    ...
    -647f
    64 80 01
    ...
    -
  • -
  • a float is written as 4 bytes. The float is - converted into a 32-bit integer using a method equivalent - to Java's floatToIntBits and then encoded - in little-endian format.
  • -
  • a double is written as 8 bytes. The double - is converted into a 64-bit integer using a method equivalent - to Java's - doubleToLongBits and then encoded in little-endian - format.
  • -
  • bytes are encoded as - a long followed by that many bytes of data. -
  • -
  • a string is encoded as - a long followed by that many bytes of UTF-8 - encoded character data. -

    For example, the three-character string "foo" would - be encoded as the long value 3 (encoded as - hex 06) followed by the UTF-8 encoding of - 'f', 'o', and 'o' (the hex bytes 66 6f - 6f): -

    - 06 66 6f 6f -
  • -
- -
- - -
- Complex Types -

Complex types are encoded in binary as follows:

- -
- Records -

A record is encoded by encoding the values of its - fields in the order that they are declared. In other - words, a record is encoded as just the concatenation of - the encodings of its fields. Field values are encoded per - their schema.

-

For example, the record schema

- - { - "type": "record", - "name": "test", - "fields" : [ - {"name": "a", "type": "long"}, - {"name": "b", "type": "string"} - ] - } - -

An instance of this record whose a field has - value 27 (encoded as hex 36) and - whose b field has value "foo" (encoded as hex - bytes 06 66 6f 6f), would be encoded simply - as the concatenation of these, namely the hex byte - sequence:

- 36 06 66 6f 6f -
- -
- Enums -

An enum is encoded by a int, representing - the zero-based position of the symbol in the schema.

-

For example, consider the enum:

- - {"type": "enum", "name": "Foo", "symbols": ["A", "B", "C", "D"] } - -

This would be encoded by an int between - zero and three, with zero indicating "A", and 3 indicating - "D".

-
- - -
- Arrays -

Arrays are encoded as a series of blocks. - Each block consists of a long count - value, followed by that many array items. A block with - count zero indicates the end of the array. Each item is - encoded per the array's item schema.

- -

If a block's count is negative, its absolute value is used, - and the count is followed immediately by a long - block size indicating the number of bytes in the - block. This block size permits fast skipping through data, - e.g., when projecting a record to a subset of its fields.

- -

For example, the array schema

- {"type": "array", "items": "long"} -

an array containing the items 3 and 27 could be encoded - as the long value 2 (encoded as hex 04) followed by long - values 3 and 27 (encoded as hex 06 36) - terminated by zero:

- 04 06 36 00 - -

The blocked representation permits one to read and write - arrays larger than can be buffered in memory, since one can - start writing items without knowing the full length of the - array.

- -
- -
- Maps -

Maps are encoded as a series of blocks. Each - block consists of a long count - value, followed by that many key/value pairs. A block - with count zero indicates the end of the map. Each item - is encoded per the map's value schema.

- -

If a block's count is negative, its absolute value is used, - and the count is followed immediately by a long - block size indicating the number of bytes in the - block. This block size permits fast skipping through data, - e.g., when projecting a record to a subset of its fields.

- -

The blocked representation permits one to read and write - maps larger than can be buffered in memory, since one can - start writing items without knowing the full length of the - map.

- -
- -
- Unions -

A union is encoded by first writing an int - value indicating the zero-based position within the - union of the schema of its value. The value is then - encoded per the indicated schema within the union.

-

For example, the union - schema ["null","string"] would encode:

-
    -
  • null as zero (the index of "null" in the union): - 00
  • -
  • the string "a" as one (the index of - "string" in the union, encoded as hex 02), - followed by the serialized string: - 02 02 61
  • -
-

NOTE: Currently for C/C++ implementations, the positions are practically an int, but theoretically a long. - In reality, we don't expect unions with 215M members

-
- -
- Fixed -

Fixed instances are encoded using the number of bytes - declared in the schema.

-
- -
- -
- -
- JSON Encoding - -

Except for unions, the JSON encoding is the same as is used - to encode field default - values.

- -

The value of a union is encoded in JSON as follows:

- -
    -
  • if its type is null, then it is encoded as - a JSON null;
  • -
  • otherwise it is encoded as a JSON object with one - name/value pair whose name is the type's name and whose - value is the recursively encoded value. For Avro's named - types (record, fixed or enum) the user-specified name is - used, for other types the type name is used.
  • -
- -

For example, the union - schema ["null","string","Foo"], where Foo is a - record name, would encode:

-
    -
  • null as null;
  • -
  • the string "a" as - {"string": "a"}; and
  • -
  • a Foo instance as {"Foo": {...}}, - where {...} indicates the JSON encoding of a - Foo instance.
  • -
- -

Note that the original schema is still required to correctly - process JSON-encoded data. For example, the JSON encoding does not - distinguish between int - and long, float - and double, records and maps, enums and strings, - etc.

- -
- -
- Single-object encoding - -

In some situations a single Avro serialized object is to be stored for a - longer period of time. One very common example is storing Avro records - for several weeks in an Apache Kafka topic.

-

In the period after a schema change this persistence system will contain records - that have been written with different schemas. So the need arises to know which schema - was used to write a record to support schema evolution correctly. - In most cases the schema itself is too large to include in the message, - so this binary wrapper format supports the use case more effectively.

- -
- Single object encoding specification -

Single Avro objects are encoded as follows:

-
    -
  1. A two-byte marker, C3 01, to show that the message is Avro and uses this single-record format (version 1).
  2. -
  3. The 8-byte little-endian CRC-64-AVRO fingerprint of the object's schema
  4. -
  5. The Avro object encoded using Avro's binary encoding
  6. -
-
- -

Implementations use the 2-byte marker to determine whether a payload is Avro. - This check helps avoid expensive lookups that resolve the schema from a - fingerprint, when the message is not an encoded Avro payload.

- -
- -
- -
- Sort Order - -

Avro defines a standard sort order for data. This permits - data written by one system to be efficiently sorted by another - system. This can be an important optimization, as sort order - comparisons are sometimes the most frequent per-object - operation. Note also that Avro binary-encoded data can be - efficiently ordered without deserializing it to objects.

- -

Data items may only be compared if they have identical - schemas. Pairwise comparisons are implemented recursively - with a depth-first, left-to-right traversal of the schema. - The first mismatch encountered determines the order of the - items.

- -

Two items with the same schema are compared according to the - following rules.

-
    -
  • null data is always equal.
  • -
  • boolean data is ordered with false before true.
  • -
  • int, long, float - and double data is ordered by ascending numeric - value.
  • -
  • bytes and fixed data are - compared lexicographically by unsigned 8-bit values.
  • -
  • string data is compared lexicographically by - Unicode code point. Note that since UTF-8 is used as the - binary encoding for strings, sorting of bytes and string - binary data is identical.
  • -
  • array data is compared lexicographically by - element.
  • -
  • enum data is ordered by the symbol's position - in the enum schema. For example, an enum whose symbols are - ["z", "a"] would sort "z" values - before "a" values.
  • -
  • union data is first ordered by the branch - within the union, and, within that, by the type of the - branch. For example, an ["int", "string"] - union would order all int values before all string values, - with the ints and strings themselves ordered as defined - above.
  • -
  • record data is ordered lexicographically by - field. If a field specifies that its order is: -
      -
    • "ascending", then the order of its values - is unaltered.
    • -
    • "descending", then the order of its values - is reversed.
    • -
    • "ignore", then its values are ignored - when sorting.
    • -
    -
  • -
  • map data may not be compared. It is an error - to attempt to compare data containing maps unless those maps - are in an "order":"ignore" record field. -
  • -
-
- -
- Object Container Files -

Avro includes a simple object container file format. A file - has a schema, and all objects stored in the file must be written - according to that schema, using binary encoding. Objects are - stored in blocks that may be compressed. Syncronization markers - are used between blocks to permit efficient splitting of files - for MapReduce processing.

- -

Files may include arbitrary user-specified metadata.

- -

A file consists of:

-
    -
  • A file header, followed by
  • -
  • one or more file data blocks.
  • -
- -

A file header consists of:

-
    -
  • Four bytes, ASCII 'O', 'b', 'j', followed by 1.
  • -
  • file metadata, including the schema.
  • -
  • The 16-byte, randomly-generated sync marker for this file.
  • -
- -

File metadata is written as if defined by the following map schema:

- {"type": "map", "values": "bytes"} - -

All metadata properties that start with "avro." are reserved. - The following file metadata properties are currently used:

-
    -
  • avro.schema contains the schema of objects - stored in the file, as JSON data (required).
  • -
  • avro.codec the name of the compression codec - used to compress blocks, as a string. Implementations - are required to support the following codecs: "null" and "deflate". - If codec is absent, it is assumed to be "null". The codecs - are described with more detail below.
  • -
- -

A file header is thus described by the following schema:

- -{"type": "record", "name": "org.apache.avro.file.Header", - "fields" : [ - {"name": "magic", "type": {"type": "fixed", "name": "Magic", "size": 4}}, - {"name": "meta", "type": {"type": "map", "values": "bytes"}}, - {"name": "sync", "type": {"type": "fixed", "name": "Sync", "size": 16}}, - ] -} - - -

A file data block consists of:

-
    -
  • A long indicating the count of objects in this block.
  • -
  • A long indicating the size in bytes of the serialized objects - in the current block, after any codec is applied
  • -
  • The serialized objects. If a codec is specified, this is - compressed by that codec.
  • -
  • The file's 16-byte sync marker.
  • -
-

Thus, each block's binary data can be efficiently extracted or skipped without - deserializing the contents. The combination of block size, object counts, and - sync markers enable detection of corrupt blocks and help ensure data integrity.

-
- Required Codecs -
- null -

The "null" codec simply passes through data uncompressed.

-
- -
- deflate -

The "deflate" codec writes the data block using the - deflate algorithm as specified in - RFC 1951, - and typically implemented using the zlib library. Note that this - format (unlike the "zlib format" in RFC 1950) does not have a - checksum. -

-
-
-
- Optional Codecs -
- bzip2 -

The "bzip2" codec uses the bzip2 - compression library.

-
- -
- snappy -

The "snappy" codec uses - Google's Snappy - compression library. Each compressed block is followed - by the 4-byte, big-endian CRC32 checksum of the - uncompressed data in the block.

-
- -
- xz -

The "xz" codec uses the XZ - compression library.

-
- -
- zstandard -

The "zstandard" codec uses - Facebook's Zstandard - compression library.

-
-
-
- -
- Protocol Declaration -

Avro protocols describe RPC interfaces. Like schemas, they are - defined with JSON text.

- -

A protocol is a JSON object with the following attributes:

-
    -
  • protocol, a string, the name of the protocol - (required);
  • -
  • namespace, an optional string that qualifies the name;
  • -
  • doc, an optional string describing this protocol;
  • -
  • types, an optional list of definitions of named types - (records, enums, fixed and errors). An error definition is - just like a record definition except it uses "error" instead - of "record". Note that forward references to named types - are not permitted.
  • -
  • messages, an optional JSON object whose keys are - message names and whose values are objects whose attributes - are described below. No two messages may have the same - name.
  • -
-

The name and namespace qualification rules defined for schema objects - apply to protocols as well.

- -
- Messages -

A message has attributes:

-
    -
  • a doc, an optional description of the message,
  • -
  • a request, a list of named, - typed parameter schemas (this has the same form - as the fields of a record declaration);
  • -
  • a response schema;
  • -
  • an optional union of declared error schemas. - The effective union has "string" - prepended to the declared union, to permit transmission of - undeclared "system" errors. For example, if the declared - error union is ["AccessError"], then the - effective union is ["string", "AccessError"]. - When no errors are declared, the effective error union - is ["string"]. Errors are serialized using - the effective union; however, a protocol's JSON - declaration contains only the declared union. -
  • -
  • an optional one-way boolean parameter.
  • -
-

A request parameter list is processed equivalently to an - anonymous record. Since record field lists may vary between - reader and writer, request parameters may also differ - between the caller and responder, and such differences are - resolved in the same manner as record field differences.

-

The one-way parameter may only be true when the response type - is "null" and no errors are listed.

-
-
- Sample Protocol -

For example, one may define a simple HelloWorld protocol with:

- -{ - "namespace": "com.acme", - "protocol": "HelloWorld", - "doc": "Protocol Greetings", - - "types": [ - {"name": "Greeting", "type": "record", "fields": [ - {"name": "message", "type": "string"}]}, - {"name": "Curse", "type": "error", "fields": [ - {"name": "message", "type": "string"}]} - ], - - "messages": { - "hello": { - "doc": "Say hello.", - "request": [{"name": "greeting", "type": "Greeting" }], - "response": "Greeting", - "errors": ["Curse"] - } - } -} - -
-
- -
- Protocol Wire Format - -
- Message Transport -

Messages may be transmitted via - different transport mechanisms.

- -

To the transport, a message is an opaque byte sequence.

- -

A transport is a system that supports:

-
    -
  • transmission of request messages -
  • -
  • receipt of corresponding response messages -

    Servers may send a response message back to the client - corresponding to a request message. The mechanism of - correspondance is transport-specific. For example, in - HTTP it is implicit, since HTTP directly supports requests - and responses. But a transport that multiplexes many - client threads over a single socket would need to tag - messages with unique identifiers.

    -
  • -
- -

Transports may be either stateless - or stateful. In a stateless transport, messaging - assumes no established connection state, while stateful - transports establish connections that may be used for multiple - messages. This distinction is discussed further in - the handshake section below.

- -
- HTTP as Transport -

When - HTTP - is used as a transport, each Avro message exchange is an - HTTP request/response pair. All messages of an Avro - protocol should share a single URL at an HTTP server. - Other protocols may also use that URL. Both normal and - error Avro response messages should use the 200 (OK) - response code. The chunked encoding may be used for - requests and responses, but, regardless the Avro request - and response are the entire content of an HTTP request and - response. The HTTP Content-Type of requests and responses - should be specified as "avro/binary". Requests should be - made using the POST method.

-

HTTP is used by Avro as a stateless transport.

-
-
- -
- Message Framing -

Avro messages are framed as a list of buffers.

-

Framing is a layer between messages and the transport. - It exists to optimize certain operations.

- -

The format of framed message data is:

-
    -
  • a series of buffers, where each buffer consists of: -
      -
    • a four-byte, big-endian buffer length, followed by
    • -
    • that many bytes of buffer data.
    • -
    -
  • -
  • A message is always terminated by a zero-length buffer.
  • -
- -

Framing is transparent to request and response message - formats (described below). Any message may be presented as a - single or multiple buffers.

- -

Framing can permit readers to more efficiently get - different buffers from different sources and for writers to - more efficiently store different buffers to different - destinations. In particular, it can reduce the number of - times large binary objects are copied. For example, if an RPC - parameter consists of a megabyte of file data, that data can - be copied directly to a socket from a file descriptor, and, on - the other end, it could be written directly to a file - descriptor, never entering user space.

- -

A simple, recommended, framing policy is for writers to - create a new segment whenever a single binary object is - written that is larger than a normal output buffer. Small - objects are then appended in buffers, while larger objects are - written as their own buffers. When a reader then tries to - read a large object the runtime can hand it an entire buffer - directly, without having to copy it.

-
- -
- Handshake - -

The purpose of the handshake is to ensure that the client - and the server have each other's protocol definition, so that - the client can correctly deserialize responses, and the server - can correctly deserialize requests. Both clients and servers - should maintain a cache of recently seen protocols, so that, - in most cases, a handshake will be completed without extra - round-trip network exchanges or the transmission of full - protocol text.

- -

RPC requests and responses may not be processed until a - handshake has been completed. With a stateless transport, all - requests and responses are prefixed by handshakes. With a - stateful transport, handshakes are only attached to requests - and responses until a successful handshake response has been - returned over a connection. After this, request and response - payloads are sent without handshakes for the lifetime of that - connection.

- -

The handshake process uses the following record schemas:

- - -{ - "type": "record", - "name": "HandshakeRequest", "namespace":"org.apache.avro.ipc", - "fields": [ - {"name": "clientHash", - "type": {"type": "fixed", "name": "MD5", "size": 16}}, - {"name": "clientProtocol", "type": ["null", "string"]}, - {"name": "serverHash", "type": "MD5"}, - {"name": "meta", "type": ["null", {"type": "map", "values": "bytes"}]} - ] -} -{ - "type": "record", - "name": "HandshakeResponse", "namespace": "org.apache.avro.ipc", - "fields": [ - {"name": "match", - "type": {"type": "enum", "name": "HandshakeMatch", - "symbols": ["BOTH", "CLIENT", "NONE"]}}, - {"name": "serverProtocol", - "type": ["null", "string"]}, - {"name": "serverHash", - "type": ["null", {"type": "fixed", "name": "MD5", "size": 16}]}, - {"name": "meta", - "type": ["null", {"type": "map", "values": "bytes"}]} - ] -} - - -
    -
  • A client first prefixes each request with - a HandshakeRequest containing just the hash of - its protocol and of the server's protocol - (clientHash!=null, clientProtocol=null, - serverHash!=null), where the hashes are 128-bit MD5 - hashes of the JSON protocol text. If a client has never - connected to a given server, it sends its hash as a guess of - the server's hash, otherwise it sends the hash that it - previously obtained from this server.
  • - -
  • The server responds with - a HandshakeResponse containing one of: -
      -
    • match=BOTH, serverProtocol=null, - serverHash=null if the client sent the valid hash - of the server's protocol and the server knows what - protocol corresponds to the client's hash. In this case, - the request is complete and the response data - immediately follows the HandshakeResponse.
    • - -
    • match=CLIENT, serverProtocol!=null, - serverHash!=null if the server has previously - seen the client's protocol, but the client sent an - incorrect hash of the server's protocol. The request is - complete and the response data immediately follows the - HandshakeResponse. The client must use the returned - protocol to process the response and should also cache - that protocol and its hash for future interactions with - this server.
    • - -
    • match=NONE if the server has not - previously seen the client's protocol. - The serverHash - and serverProtocol may also be non-null if - the server's protocol hash was incorrect. - -

      In this case the client must then re-submit its request - with its protocol text (clientHash!=null, - clientProtocol!=null, serverHash!=null) and the - server should respond with a successful match - (match=BOTH, serverProtocol=null, - serverHash=null) as above.

      -
    • -
    -
  • -
- -

The meta field is reserved for future - handshake enhancements.

- -
- -
- Call Format -

A call consists of a request message paired with - its resulting response or error message. Requests and - responses contain extensible metadata, and both kinds of - messages are framed as described above.

- -

The format of a call request is:

-
    -
  • request metadata, a map with values of - type bytes
  • -
  • the message name, an Avro string, - followed by
  • -
  • the message parameters. Parameters are - serialized according to the message's request - declaration.
  • -
- -

When the empty string is used as a message name a server - should ignore the parameters and return an empty response. A - client may use this to ping a server or to perform a handshake - without sending a protocol message.

- -

When a message is declared one-way and a stateful - connection has been established by a successful handshake - response, no response data is sent. Otherwise the format of - the call response is:

-
    -
  • response metadata, a map with values of - type bytes
  • -
  • a one-byte error flag boolean, followed by either: -
      -
    • if the error flag is false, the message response, - serialized per the message's response schema.
    • -
    • if the error flag is true, the error, - serialized per the message's effective error union - schema.
    • -
    -
  • -
-
- -
- -
- Schema Resolution - -

A reader of Avro data, whether from an RPC or a file, can - always parse that data because the original schema must be - provided along with the data. However, the reader may be - programmed to read data into a different schema. - For example, if the data was written with a different version - of the software than it is read, then fields may have been - added or removed from records. This section specifies how such - schema differences should be resolved.

- -

We refer to the schema used to write the data as - the writer's schema, and the schema that the - application expects the reader's schema. Differences - between these should be resolved as follows:

- -
    -
  • It is an error if the two schemas do not match.

    -

    To match, one of the following must hold:

    -
      -
    • both schemas are arrays whose item types match
    • -
    • both schemas are maps whose value types match
    • -
    • both schemas are enums whose (unqualified) names match
    • -
    • both schemas are fixed whose sizes and (unqualified) names match
    • -
    • both schemas are records with the same (unqualified) name
    • -
    • either schema is a union
    • -
    • both schemas have same primitive type
    • -
    • the writer's schema may be promoted to the - reader's as follows: -
        -
      • int is promotable to long, float, or double
      • -
      • long is promotable to float or double
      • -
      • float is promotable to double
      • -
      • string is promotable to bytes
      • -
      • bytes is promotable to string
      • -
      -
    • -
    -
  • - -
  • if both are records: -
      -
    • the ordering of fields may be different: fields are - matched by name.
    • - -
    • schemas for fields with the same name in both records - are resolved recursively.
    • - -
    • if the writer's record contains a field with a name - not present in the reader's record, the writer's value - for that field is ignored.
    • - -
    • if the reader's record schema has a field that - contains a default value, and writer's schema does not - have a field with the same name, then the reader should - use the default value from its field.
    • - -
    • if the reader's record schema has a field with no - default value, and writer's schema does not have a field - with the same name, an error is signalled.
    • -
    -
  • - -
  • if both are enums: -

    if the writer's symbol is not present in the reader's - enum and the reader has a default value, then - that value is used, otherwise an error is signalled.

    -
  • - -
  • if both are arrays: -

    This resolution algorithm is applied recursively to the reader's and - writer's array item schemas.

    -
  • - -
  • if both are maps: -

    This resolution algorithm is applied recursively to the reader's and - writer's value schemas.

    -
  • - -
  • if both are unions: -

    The first schema in the reader's union that matches the - selected writer's union schema is recursively resolved - against it. if none match, an error is signalled.

    -
  • - -
  • if reader's is a union, but writer's is not -

    The first schema in the reader's union that matches the - writer's schema is recursively resolved against it. If none - match, an error is signalled.

    -
  • - -
  • if writer's is a union, but reader's is not -

    If the reader's schema matches the selected writer's schema, - it is recursively resolved against it. If they do not - match, an error is signalled.

    -
  • - -
- -

A schema's "doc" fields are ignored for the purposes of schema resolution. Hence, - the "doc" portion of a schema may be dropped at serialization.

- -
- -
- Parsing Canonical Form for Schemas - -

One of the defining characteristics of Avro is that a reader - must use the schema used by the writer of the data in - order to know how to read the data. This assumption results in a data - format that's compact and also amenable to many forms of schema - evolution. However, the specification so far has not defined - what it means for the reader to have the "same" schema as the - writer. Does the schema need to be textually identical? Well, - clearly adding or removing some whitespace to a JSON expression - does not change its meaning. At the same time, reordering the - fields of records clearly does change the meaning. So - what does it mean for a reader to have "the same" schema as a - writer?

- -

Parsing Canonical Form is a transformation of a - writer's schema that let's us define what it means for two - schemas to be "the same" for the purpose of reading data written - against the schema. It is called Parsing Canonical Form - because the transformations strip away parts of the schema, like - "doc" attributes, that are irrelevant to readers trying to parse - incoming data. It is called Canonical Form because the - transformations normalize the JSON text (such as the order of - attributes) in a way that eliminates unimportant differences - between schemas. If the Parsing Canonical Forms of two - different schemas are textually equal, then those schemas are - "the same" as far as any reader is concerned, i.e., there is no - serialized data that would allow a reader to distinguish data - generated by a writer using one of the original schemas from - data generated by a writing using the other original schema. - (We sketch a proof of this property in a companion - document.)

- -

The next subsection specifies the transformations that define - Parsing Canonical Form. But with a well-defined canonical form, - it can be convenient to go one step further, transforming these - canonical forms into simple integers ("fingerprints") that can - be used to uniquely identify schemas. The subsection after next - recommends some standard practices for generating such - fingerprints.

- -
- Transforming into Parsing Canonical Form - -

Assuming an input schema (in JSON form) that's already - UTF-8 text for a valid Avro schema (including all - quotes as required by JSON), the following transformations - will produce its Parsing Canonical Form:

-
    -
  • [PRIMITIVES] Convert primitive schemas to their simple - form (e.g., int instead of - {"type":"int"}).
  • - -
  • [FULLNAMES] Replace short names with fullnames, using - applicable namespaces to do so. Then eliminate - namespace attributes, which are now redundant.
  • - -
  • [STRIP] Keep only attributes that are relevant to - parsing data, which are: type, - name, fields, - symbols, items, - values, size. Strip all others - (e.g., doc and aliases).
  • - -
  • [ORDER] Order the appearance of fields of JSON objects - as follows: name, type, - fields, symbols, - items, values, size. - For example, if an object has type, - name, and size fields, then the - name field should appear first, followed by the - type and then the size fields.
  • - -
  • [STRINGS] For all JSON string literals in the schema - text, replace any escaped characters (e.g., \uXXXX escapes) - with their UTF-8 equivalents.
  • - -
  • [INTEGERS] Eliminate quotes around and any leading - zeros in front of JSON integer literals (which appear in the - size attributes of fixed schemas).
  • - -
  • [WHITESPACE] Eliminate all whitespace in JSON outside of string literals.
  • -
-
- -
- Schema Fingerprints - -

"[A] fingerprinting algorithm is a procedure that maps an - arbitrarily large data item (such as a computer file) to a - much shorter bit string, its fingerprint, that - uniquely identifies the original data for all practical - purposes" (quoted from [Wikipedia]). - In the Avro context, fingerprints of Parsing Canonical Form - can be useful in a number of applications; for example, to - cache encoder and decoder objects, to tag data items with a - short substitute for the writer's full schema, and to quickly - negotiate common-case schemas between readers and writers.

- -

In designing fingerprinting algorithms, there is a - fundamental trade-off between the length of the fingerprint - and the probability of collisions. To help application - designers find appropriate points within this trade-off space, - while encouraging interoperability and ease of implementation, - we recommend using one of the following three algorithms when - fingerprinting Avro schemas:

- -
    -
  • When applications can tolerate longer fingerprints, we - recommend using the SHA-256 digest - algorithm to generate 256-bit fingerprints of Parsing - Canonical Forms. Most languages today have SHA-256 - implementations in their libraries.
  • - -
  • At the opposite extreme, the smallest fingerprint we - recommend is a 64-bit Rabin - fingerprint. Below, we provide pseudo-code for this - algorithm that can be easily translated into any programming - language. 64-bit fingerprints should guarantee uniqueness - for schema caches of up to a million entries (for such a - cache, the chance of a collision is 3E-8). We don't - recommend shorter fingerprints, as the chances of collisions - is too great (for example, with 32-bit fingerprints, a cache - with as few as 100,000 schemas has a 50% chance of having a - collision).
  • - -
  • Between these two extremes, we recommend using the MD5 message - digest to generate 128-bit fingerprints. These make - sense only where very large numbers of schemas are being - manipulated (tens of millions); otherwise, 64-bit - fingerprints should be sufficient. As with SHA-256, MD5 - implementations are found in most libraries today.
  • -
- -

These fingerprints are not meant to provide any - security guarantees, even the longer SHA-256-based ones. Most - Avro applications should be surrounded by security measures - that prevent attackers from writing random data and otherwise - interfering with the consumers of schemas. We recommend that - these surrounding mechanisms be used to prevent collision and - pre-image attacks (i.e., "forgery") on schema fingerprints, - rather than relying on the security properties of the - fingerprints themselves.

- -

Rabin fingerprints are cyclic - redundancy checks computed using irreducible polynomials. - In the style of the Appendix of RFC 1952 - (pg 10), which defines the CRC-32 algorithm, here's our - definition of the 64-bit AVRO fingerprinting algorithm:

- - -long fingerprint64(byte[] buf) { - if (FP_TABLE == null) initFPTable(); - long fp = EMPTY; - for (int i = 0; i < buf.length; i++) - fp = (fp >>> 8) ^ FP_TABLE[(int)(fp ^ buf[i]) & 0xff]; - return fp; -} - -static long EMPTY = 0xc15d213aa4d7a795L; -static long[] FP_TABLE = null; - -void initFPTable() { - FP_TABLE = new long[256]; - for (int i = 0; i < 256; i++) { - long fp = i; - for (int j = 0; j < 8; j++) - fp = (fp >>> 1) ^ (EMPTY & -(fp & 1L)); - FP_TABLE[i] = fp; - } -} - - -

Readers interested in the mathematics behind this - algorithm may want to read - Chapter 14 of the Second Edition of Hacker's Delight. - (Unlike RFC-1952 and the book chapter, we prepend - a single one bit to messages. We do this because CRCs ignore - leading zero bits, which can be problematic. Our code - prepends a one-bit by initializing fingerprints using - EMPTY, rather than initializing using zero as in - RFC-1952 and the book chapter.)

-
-
- -
- Logical Types - -

A logical type is an Avro primitive or complex type with extra attributes to - represent a derived type. The attribute logicalType must - always be present for a logical type, and is a string with the name of one of - the logical types listed later in this section. Other attributes may be defined - for particular logical types.

- -

A logical type is always serialized using its underlying Avro type so - that values are encoded in exactly the same way as the equivalent Avro - type that does not have a logicalType attribute. Language - implementations may choose to represent logical types with an - appropriate native type, although this is not required.

- -

Language implementations must ignore unknown logical types when - reading, and should use the underlying Avro type. If a logical type is - invalid, for example a decimal with scale greater than its precision, - then implementations should ignore the logical type and use the - underlying Avro type.

- -
- Decimal -

The decimal logical type represents an arbitrary-precision signed - decimal number of the form unscaled × 10-scale.

- -

A decimal logical type annotates Avro - bytes or fixed types. The byte array must - contain the two's-complement representation of the unscaled integer - value in big-endian byte order. The scale is fixed, and is specified - using an attribute.

- -

The following attributes are supported:

-
    -
  • scale, a JSON integer representing the scale - (optional). If not specified the scale is 0.
  • -
  • precision, a JSON integer representing the (maximum) - precision of decimals stored in this type (required).
  • -
- -

For example, the following schema represents decimal numbers with a - maximum precision of 4 and a scale of 2:

- -{ - "type": "bytes", - "logicalType": "decimal", - "precision": 4, - "scale": 2 -} - - -

Precision must be a positive integer greater than zero. If the - underlying type is a fixed, then the precision is - limited by its size. An array of length n can store at - most floor(log_10(28 × n - 1 - 1)) - base-10 digits of precision.

- -

Scale must be zero or a positive integer less than or equal to the - precision.

- -

For the purposes of schema resolution, two schemas that are - decimal logical types match if their scales and - precisions match.

- -
- -
- UUID -

- The uuid logical type represents a random generated universally unique identifier (UUID). -

- -

- A uuid logical type annotates an Avro string. The string has to conform with RFC-4122 -

-
- -
- Date -

- The date logical type represents a date within the calendar, with no reference to a particular time zone or time of day. -

-

- A date logical type annotates an Avro int, where the int stores the number of days from the unix epoch, 1 January 1970 (ISO calendar). -

-

The following schema represents a date:

- -{ - "type": "int", - "logicalType": "date" -} - -
- -
- Time (millisecond precision) -

- The time-millis logical type represents a time of day, with no reference to a particular calendar, time zone or date, with a precision of one millisecond. -

-

- A time-millis logical type annotates an Avro int, where the int stores the number of milliseconds after midnight, 00:00:00.000. -

-
- -
- Time (microsecond precision) -

- The time-micros logical type represents a time of day, with no reference to a particular calendar, time zone or date, with a precision of one microsecond. -

-

- A time-micros logical type annotates an Avro long, where the long stores the number of microseconds after midnight, 00:00:00.000000. -

-
- -
- Timestamp (millisecond precision) -

- The timestamp-millis logical type represents an instant on the global timeline, independent of a particular time zone or calendar, with a precision of one millisecond. - Please note that time zone information gets lost in this process. Upon reading a value back, we can only reconstruct the instant, but not the original representation. - In practice, such timestamps are typically displayed to users in their local time zones, therefore they may be displayed differently depending on the execution environment. -

-

- A timestamp-millis logical type annotates an Avro long, where the long stores the number of milliseconds from the unix epoch, 1 January 1970 00:00:00.000 UTC. -

-
- -
- Timestamp (microsecond precision) -

- The timestamp-micros logical type represents an instant on the global timeline, independent of a particular time zone or calendar, with a precision of one microsecond. - Please note that time zone information gets lost in this process. Upon reading a value back, we can only reconstruct the instant, but not the original representation. - In practice, such timestamps are typically displayed to users in their local time zones, therefore they may be displayed differently depending on the execution environment. -

-

- A timestamp-micros logical type annotates an Avro long, where the long stores the number of microseconds from the unix epoch, 1 January 1970 00:00:00.000000 UTC. -

-
- -
- Local timestamp (millisecond precision) -

- The local-timestamp-millis logical type represents a timestamp in a local timezone, regardless of what specific time zone is considered local, with a precision of one millisecond. -

-

- A local-timestamp-millis logical type annotates an Avro long, where the long stores the number of milliseconds, from 1 January 1970 00:00:00.000. -

-
- -
- Local timestamp (microsecond precision) -

- The local-timestamp-micros logical type represents a timestamp in a local timezone, regardless of what specific time zone is considered local, with a precision of one microsecond. -

-

- A local-timestamp-micros logical type annotates an Avro long, where the long stores the number of microseconds, from 1 January 1970 00:00:00.000000. -

-
- -
- Duration -

- The duration logical type represents an amount of time defined by a number of months, days and milliseconds. This is not equivalent to a number of milliseconds, because, depending on the moment in time from which the duration is measured, the number of days in the month and number of milliseconds in a day may differ. Other standard periods such as years, quarters, hours and minutes can be expressed through these basic periods. -

-

- A duration logical type annotates Avro fixed type of size 12, which stores three little-endian unsigned integers that represent durations at different granularities of time. The first stores a number in months, the second stores a number in days, and the third stores a number in milliseconds. -

-
- -
- -

Apache Avro, Avro, Apache, and the Avro and Apache logos are - trademarks of The Apache Software Foundation.

- - -
diff --git a/doc/src/content/xdocs/tabs.xml b/doc/src/content/xdocs/tabs.xml deleted file mode 100644 index 97e7d2c3813..00000000000 --- a/doc/src/content/xdocs/tabs.xml +++ /dev/null @@ -1,39 +0,0 @@ - - - - %avro-entities; -]> - - - - - - - - - - diff --git a/doc/src/resources/images/apache_feather.gif b/doc/src/resources/images/apache_feather.gif deleted file mode 100644 index 1a0c3e6b6d1..00000000000 Binary files a/doc/src/resources/images/apache_feather.gif and /dev/null differ diff --git a/doc/src/resources/images/avro-logo.png b/doc/src/resources/images/avro-logo.png deleted file mode 100644 index 4cbe12dd97b..00000000000 Binary files a/doc/src/resources/images/avro-logo.png and /dev/null differ diff --git a/doc/src/resources/images/favicon.ico b/doc/src/resources/images/favicon.ico deleted file mode 100644 index 161bcf7841c..00000000000 Binary files a/doc/src/resources/images/favicon.ico and /dev/null differ diff --git a/doc/src/skinconf.xml b/doc/src/skinconf.xml deleted file mode 100644 index 2328639c8ce..00000000000 --- a/doc/src/skinconf.xml +++ /dev/null @@ -1,350 +0,0 @@ - - - - - - - - - - - - true - - false - - true - - true - - - true - - - true - - - true - - - false - - - true - - - Avro - Serialization System - https://avro.apache.org/ - images/avro-logo.png - - - Apache - The Apache Software Foundation - https://www.apache.org/ - images/apache_feather.gif - - - - - - - images/favicon.ico - - - 2012 - The Apache Software Foundation. - https://www.apache.org/licenses/ - - - - - - - - - - - - - - - - - - - p.quote { - margin-left: 2em; - padding: .5em; - background-color: #f0f0f0; - font-family: monospace; - } - table.right { - text-align: right; - display: block; - } - - - - - - - - - - - - - - - - - - - - - - - - - 1in - 1in - 1.25in - 1in - - - - false - - - false - - - - - - Built with Apache Forrest - https://forrest.apache.org/ - images/built-with-forrest-button.png - 88 - 31 - - - - - - diff --git a/doc/themes/docsy b/doc/themes/docsy new file mode 160000 index 00000000000..a77761a6de8 --- /dev/null +++ b/doc/themes/docsy @@ -0,0 +1 @@ +Subproject commit a77761a6de8c5d4bb284dab5d0b47447883eb6d2 diff --git a/lang/c++/CMakeLists.txt b/lang/c++/CMakeLists.txt index 4a3793152e6..19059a41b13 100644 --- a/lang/c++/CMakeLists.txt +++ b/lang/c++/CMakeLists.txt @@ -16,17 +16,21 @@ # specific language governing permissions and limitations # under the License. # -cmake_minimum_required (VERSION 3.1) +cmake_minimum_required (VERSION 3.5) set (CMAKE_LEGACY_CYGWIN_WIN32 0) if (NOT DEFINED CMAKE_CXX_STANDARD) - set(CMAKE_CXX_STANDARD 11) + set(CMAKE_CXX_STANDARD 17) +endif() + +if (CMAKE_CXX_STANDARD LESS 17) + message(FATAL_ERROR "Avro requires at least C++17") endif() set(CMAKE_CXX_STANDARD_REQUIRED ON) -if (CMAKE_VERSION VERSION_GREATER_EQUAL 3.0) +if (APPLE) # Enable MACOSX_RPATH by default cmake_policy (SET CMP0042 NEW) endif() @@ -64,7 +68,7 @@ if (WIN32 AND NOT CYGWIN AND NOT MSYS) endif() if (CMAKE_COMPILER_IS_GNUCXX) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -Wduplicated-cond -Wduplicated-branches -Wlogical-op -Wuseless-cast -Wconversion -pedantic -Werror") if (AVRO_ADD_PROTECTOR_FLAGS) set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fstack-protector-all -D_GLIBCXX_DEBUG") # Unset _GLIBCXX_DEBUG for avrogencpp.cc because using Boost Program Options @@ -78,6 +82,16 @@ endif () find_package (Boost 1.38 REQUIRED COMPONENTS filesystem iostreams program_options regex system) +include(FetchContent) +FetchContent_Declare( + fmt + GIT_REPOSITORY https://github.com/fmtlib/fmt.git + GIT_TAG 10.2.1 + GIT_PROGRESS TRUE + USES_TERMINAL_DOWNLOAD TRUE +) +FetchContent_MakeAvailable(fmt) + find_package(Snappy) if (SNAPPY_FOUND) set(SNAPPY_PKG libsnappy) @@ -92,7 +106,9 @@ endif (SNAPPY_FOUND) add_definitions (${Boost_LIB_DIAGNOSTIC_DEFINITIONS}) -include_directories (api ${CMAKE_CURRENT_BINARY_DIR} ${Boost_INCLUDE_DIRS}) +add_definitions (-DAVRO_VERSION="${AVRO_VERSION_MAJOR}.${AVRO_VERSION_MINOR}.${AVRO_VERSION_PATCH}") + +include_directories (include/avro ${CMAKE_CURRENT_BINARY_DIR} ${Boost_INCLUDE_DIRS}) set (AVRO_SOURCE_FILES impl/Compiler.cc impl/Node.cc impl/LogicalType.cc @@ -109,6 +125,7 @@ set (AVRO_SOURCE_FILES impl/json/JsonIO.cc impl/json/JsonDom.cc impl/Resolver.cc impl/Validator.cc + impl/CustomAttributes.cc ) add_library (avrocpp SHARED ${AVRO_SOURCE_FILES}) @@ -118,6 +135,7 @@ set_property (TARGET avrocpp add_library (avrocpp_s STATIC ${AVRO_SOURCE_FILES}) target_include_directories(avrocpp_s PRIVATE ${SNAPPY_INCLUDE_DIR}) +target_link_libraries(avrocpp_s ${Boost_LIBRARIES} ${SNAPPY_LIBRARIES} fmt::fmt-header-only) set_property (TARGET avrocpp avrocpp_s APPEND PROPERTY COMPILE_DEFINITIONS AVRO_SOURCE) @@ -128,19 +146,19 @@ set_target_properties (avrocpp PROPERTIES set_target_properties (avrocpp_s PROPERTIES VERSION ${AVRO_VERSION_MAJOR}.${AVRO_VERSION_MINOR}.${AVRO_VERSION_PATCH}) -target_link_libraries (avrocpp ${Boost_LIBRARIES} ${SNAPPY_LIBRARIES}) +target_link_libraries (avrocpp ${Boost_LIBRARIES} ${SNAPPY_LIBRARIES} fmt::fmt-header-only) target_include_directories(avrocpp PRIVATE ${SNAPPY_INCLUDE_DIR}) add_executable (precompile test/precompile.cc) -target_link_libraries (precompile avrocpp_s ${Boost_LIBRARIES} ${SNAPPY_LIBRARIES}) +target_link_libraries (precompile avrocpp_s) macro (gen file ns) add_custom_command (OUTPUT ${file}.hh COMMAND avrogencpp -p - -i ${CMAKE_CURRENT_SOURCE_DIR}/jsonschemas/${file} - -o ${file}.hh -n ${ns} -U + -o ${file}.hh -n ${ns} DEPENDS avrogencpp ${CMAKE_CURRENT_SOURCE_DIR}/jsonschemas/${file}) add_custom_target (${file}_hh DEPENDS ${file}.hh) endmacro (gen) @@ -153,6 +171,7 @@ gen (tweet testgen3) gen (union_array_union uau) gen (union_map_union umu) gen (union_conflict uc) +gen (union_empty_record uer) gen (recursive rec) gen (reuse ru) gen (circulardep cd) @@ -161,9 +180,19 @@ gen (tree2 tr2) gen (crossref cr) gen (primitivetypes pt) gen (cpp_reserved_words cppres) +gen (cpp_reserved_words_union_typedef cppres_union) add_executable (avrogencpp impl/avrogencpp.cc) -target_link_libraries (avrogencpp avrocpp_s ${Boost_LIBRARIES} ${SNAPPY_LIBRARIES}) +target_link_libraries (avrogencpp avrocpp_s) + +target_include_directories(avrocpp_s PUBLIC + $ + $ +) +target_include_directories(avrocpp PUBLIC + $ + $ +) enable_testing() @@ -186,6 +215,7 @@ unittest (JsonTests) unittest (AvrogencppTests) unittest (CompilerTests) unittest (AvrogencppTestReservedWords) +unittest (CommonsSchemasTests) add_dependencies (AvrogencppTestReservedWords cpp_reserved_words_hh) @@ -193,7 +223,8 @@ add_dependencies (AvrogencppTests bigrecord_hh bigrecord_r_hh bigrecord2_hh tweet_hh union_array_union_hh union_map_union_hh union_conflict_hh recursive_hh reuse_hh circulardep_hh tree1_hh tree2_hh crossref_hh - primitivetypes_hh empty_record_hh) + primitivetypes_hh empty_record_hh cpp_reserved_words_union_typedef_hh + union_empty_record_hh) include (InstallRequiredSystemLibraries) @@ -208,7 +239,7 @@ install (TARGETS avrocpp avrocpp_s install (TARGETS avrogencpp RUNTIME DESTINATION bin) -install (DIRECTORY api/ DESTINATION include/avro +install (DIRECTORY include/avro DESTINATION include FILES_MATCHING PATTERN *.hh) if (NOT CMAKE_BUILD_TYPE) diff --git a/lang/c++/LICENSE b/lang/c++/LICENSE index d641439cded..d7f066e1d81 100644 --- a/lang/c++/LICENSE +++ b/lang/c++/LICENSE @@ -201,36 +201,6 @@ See the License for the specific language governing permissions and limitations under the License. ----------------------------------------------------------------------- -License for the m4 macros used by the C++ implementation: - -Files: -* lang/c++/m4/m4_ax_boost_system.m4 - Copyright (c) 2008 Thomas Porschberg - Copyright (c) 2008 Michael Tindal - Copyright (c) 2008 Daniel Casimiro -* lang/c++/m4/m4_ax_boost_asio.m4 - Copyright (c) 2008 Thomas Porschberg - Copyright (c) 2008 Pete Greenwell -* lang/c++/m4/m4_ax_boost_filesystem.m4 - Copyright (c) 2009 Thomas Porschberg - Copyright (c) 2009 Michael Tindal - Copyright (c) 2009 Roman Rybalko -* lang/c++/m4/m4_ax_boost_thread.m4 - Copyright (c) 2009 Thomas Porschberg - Copyright (c) 2009 Michael Tindal -* lang/c++/m4/m4_ax_boost_regex.m4 - Copyright (c) 2008 Thomas Porschberg - Copyright (c) 2008 Michael Tindal -* lang/c++/m4/m4_ax_boost_base.m4 - Copyright (c) 2008 Thomas Porschberg - -License text: -| Copying and distribution of this file, with or without modification, are -| permitted in any medium without royalty provided the copyright notice -| and this notice are preserved. This file is offered as-is, without any -| warranty. - ---------------------------------------------------------------------- License for the AVRO_BOOT_NO_TRAIT code in the C++ implementation: File: lang/c++/api/Boost.hh diff --git a/lang/c++/MainPage.dox b/lang/c++/MainPage.dox index eab49d3e627..91977fca2f1 100644 --- a/lang/c++/MainPage.dox +++ b/lang/c++/MainPage.dox @@ -55,15 +55,14 @@ One should be able to build Avro C++ on (1) any UNIX flavor including cygwin for In order to build Avro C++, one needs the following:
    -
  • A C++ compiler and runtime libraries. +
  • A C++17 or later compiler and runtime libraries.
  • Boost library version 1.38 or later. Apart from the header-only libraries of Boost, Avro C++ requires filesystem, iostreams, system and program_options libraries. Please see https://www.boost.org or your platform's documentation for details on how to set up Boost for your platform. -
  • CMake build tool version 2.6 or later. Please see https://www.cmake.org or your platform's documentation for details on how to set up CMake for your system. +
  • CMake build tool version 3.5 or later. Please see https://www.cmake.org or your platform's documentation for details on how to set up CMake for your system.
  • Python. If not already present, please consult your platform-specific documentation on how to install Python on your system.
For Ubuntu Linux, for example, you can have these by doing apt-get install for the following packages: -\ul \li cmake \li g++ \li libboost-dev @@ -73,7 +72,6 @@ For Ubuntu Linux, for example, you can have these by doing \li libboost-system-dev For Windows native builds, you need to install the following: -\ul \li cmake \li boost distribution from Boost consulting \li Visual studio @@ -336,4 +334,3 @@ corresponding to a given schema. Please see DataFile.hh for more details. */ - diff --git a/lang/c++/README b/lang/c++/README index 6b081f13a86..be5f2ff62d7 100644 --- a/lang/c++/README +++ b/lang/c++/README @@ -29,9 +29,9 @@ INSTRUCTIONS Pre-requisites: -To compile requires boost headers, and the boost regex library. Optionally, it requires Snappy compression library. If Snappy is available, it builds support for Snappy compression and skips it otherwise. (Please see your OS-specific instructions on how to install Boost and Snappy for your OS). +To compile requires boost headers. Optionally, it requires Snappy compression library. If Snappy is available, it builds support for Snappy compression and skips it otherwise. (Please see your OS-specific instructions on how to install Boost and Snappy for your OS). -To build one requires cmake 2.6 or later. +To build one requires cmake 3.5 or later and a compiler supporting C++17 or later. To generate a Makefile under Unix, MacOS (using GNU) or Cygwin use: @@ -39,8 +39,8 @@ mkdir build cd build cmake -G "Unix Makefiles" .. -If it doesn't work, either you are missing some packages (boost, flex or bison), -or you need to help configure locate them. +If it doesn't work, either you are missing boost package or you need to help +configure locate it. If the Makefile is configured correctly, then you can make and run tests: diff --git a/lang/c++/build.sh b/lang/c++/build.sh index ac9964c75e5..11e1599d1d9 100755 --- a/lang/c++/build.sh +++ b/lang/c++/build.sh @@ -58,8 +58,8 @@ function do_doc() { function do_dist() { rm -rf $BUILD_CPP/ mkdir -p $BUILD_CPP - cp -r api AUTHORS build.sh CMakeLists.txt ChangeLog \ - LICENSE NOTICE impl jsonschemas NEWS parser README test examples \ + cp -r include AUTHORS build.sh CMakeLists.txt ChangeLog \ + LICENSE NOTICE impl jsonschemas NEWS README test examples \ $BUILD_CPP find $BUILD_CPP -name '.svn' | xargs rm -rf cp ../../share/VERSION.txt $BUILD_CPP @@ -71,10 +71,10 @@ function do_dist() { fi } -(mkdir -p build; cd build; cmake --version; cmake -G "Unix Makefiles" ..) for target in "$@" do +cmake -S . -B build case "$target" in lint) # some versions of cppcheck seem to require an explicit @@ -83,16 +83,20 @@ case "$target" in ;; test) - (cd build && cmake -G "Unix Makefiles" -D CMAKE_BUILD_TYPE=Debug -D AVRO_ADD_PROTECTOR_FLAGS=1 .. && make && cd .. \ + (cmake -S. -Bbuild -D CMAKE_BUILD_TYPE=Debug -D AVRO_ADD_PROTECTOR_FLAGS=1 && cmake --build build -- -k \ && ./build/buffertest \ && ./build/unittest \ + && ./build/AvrogencppTestReservedWords \ + && ./build/AvrogencppTests \ && ./build/CodecTests \ + && ./build/CommonsSchemasTests \ && ./build/CompilerTests \ - && ./build/StreamTests \ - && ./build/SpecificTests \ - && ./build/AvrogencppTests \ && ./build/DataFileTests \ - && ./build/SchemaTests) + && ./build/JsonTests \ + && ./build/LargeSchemaTests \ + && ./build/SchemaTests \ + && ./build/SpecificTests \ + && ./build/StreamTests) ;; xcode-test) @@ -104,7 +108,7 @@ case "$target" in ;; dist) - (cd build && cmake -G "Unix Makefiles" -D CMAKE_BUILD_TYPE=Release ..) + (cd build && cmake -D CMAKE_BUILD_TYPE=Release ..) do_dist do_doc ;; @@ -118,12 +122,12 @@ case "$target" in ;; clean) - (cd build && make clean) + (cmake --build build --target clean) rm -rf doc test.avro test?.df test??.df test_skip.df test_lastSync.df test_readRecordUsingLastSync.df ;; install) - (cd build && cmake -G "Unix Makefiles" -D CMAKE_BUILD_TYPE=Release .. && make install) + (cmake -S. -Bbuild -D CMAKE_BUILD_TYPE=Release && cmake --build build --target install) ;; *) diff --git a/lang/c++/examples/cpx.hh b/lang/c++/examples/cpx.hh index 53c6ee130db..e240abb568a 100644 --- a/lang/c++/examples/cpx.hh +++ b/lang/c++/examples/cpx.hh @@ -16,14 +16,12 @@ * limitations under the License. */ - #ifndef CPX_HH_1278398428__H_ #define CPX_HH_1278398428__H_ - -#include "avro/Specific.hh" -#include "avro/Encoder.hh" #include "avro/Decoder.hh" +#include "avro/Encoder.hh" +#include "avro/Specific.hh" namespace c { struct cpx { @@ -31,18 +29,19 @@ struct cpx { double im; }; -} +} // namespace c namespace avro { -template<> struct codec_traits { - static void encode(Encoder& e, const c::cpx& v) { +template<> +struct codec_traits { + static void encode(Encoder &e, const c::cpx &v) { avro::encode(e, v.re); avro::encode(e, v.im); } - static void decode(Decoder& d, c::cpx& v) { + static void decode(Decoder &d, c::cpx &v) { avro::decode(d, v.re); avro::decode(d, v.im); } }; -} +} // namespace avro #endif diff --git a/lang/c++/examples/custom.cc b/lang/c++/examples/custom.cc index e3aa32da8c4..baf6d0cf796 100644 --- a/lang/c++/examples/custom.cc +++ b/lang/c++/examples/custom.cc @@ -18,19 +18,19 @@ #include -#include "avro/Encoder.hh" #include "avro/Decoder.hh" +#include "avro/Encoder.hh" #include "avro/Specific.hh" namespace avro { template -struct codec_traits > { - static void encode(Encoder& e, const std::complex& c) { +struct codec_traits> { + static void encode(Encoder &e, const std::complex &c) { avro::encode(e, std::real(c)); avro::encode(e, std::imag(c)); } - static void decode(Decoder& d, std::complex& c) { + static void decode(Decoder &d, std::complex &c) { T re, im; avro::decode(d, re); avro::decode(d, im); @@ -38,10 +38,8 @@ struct codec_traits > { } }; -} -int -main() -{ +} // namespace avro +int main() { std::unique_ptr out = avro::memoryOutputStream(); avro::EncoderPtr e = avro::binaryEncoder(); e->init(*out); diff --git a/lang/c++/examples/datafile.cc b/lang/c++/examples/datafile.cc index 2875e8fc738..6942a3074fa 100644 --- a/lang/c++/examples/datafile.cc +++ b/lang/c++/examples/datafile.cc @@ -18,25 +18,21 @@ #include -#include "cpx.hh" -#include "avro/Encoder.hh" -#include "avro/Decoder.hh" -#include "avro/ValidSchema.hh" #include "avro/Compiler.hh" #include "avro/DataFile.hh" +#include "avro/Decoder.hh" +#include "avro/Encoder.hh" +#include "avro/ValidSchema.hh" +#include "cpx.hh" - -avro::ValidSchema loadSchema(const char* filename) -{ +avro::ValidSchema loadSchema(const char *filename) { std::ifstream ifs(filename); avro::ValidSchema result; avro::compileJsonSchema(ifs, result); return result; } -int -main() -{ +int main() { avro::ValidSchema cpxSchema = loadSchema("cpx.json"); { @@ -59,4 +55,3 @@ main() } return 0; } - diff --git a/lang/c++/examples/generated.cc b/lang/c++/examples/generated.cc index f1394ee602a..42a8dd923f1 100644 --- a/lang/c++/examples/generated.cc +++ b/lang/c++/examples/generated.cc @@ -16,14 +16,11 @@ * limitations under the License. */ -#include "cpx.hh" -#include "avro/Encoder.hh" #include "avro/Decoder.hh" +#include "avro/Encoder.hh" +#include "cpx.hh" - -int -main() -{ +int main() { std::unique_ptr out = avro::memoryOutputStream(); avro::EncoderPtr e = avro::binaryEncoder(); e->init(*out); @@ -41,4 +38,3 @@ main() std::cout << '(' << c2.re << ", " << c2.im << ')' << std::endl; return 0; } - diff --git a/lang/c++/examples/generic.cc b/lang/c++/examples/generic.cc index 2675564e8ba..3abd37eccbc 100644 --- a/lang/c++/examples/generic.cc +++ b/lang/c++/examples/generic.cc @@ -16,20 +16,18 @@ * limitations under the License. */ -#include #include +#include #include "cpx.hh" #include "avro/Compiler.hh" -#include "avro/Encoder.hh" #include "avro/Decoder.hh" -#include "avro/Specific.hh" +#include "avro/Encoder.hh" #include "avro/Generic.hh" +#include "avro/Specific.hh" -int -main() -{ +int main() { std::ifstream ifs("cpx.json"); avro::ValidSchema cpxSchema; @@ -51,14 +49,14 @@ main() avro::decode(*d, datum); std::cout << "Type: " << datum.type() << std::endl; if (datum.type() == avro::AVRO_RECORD) { - const avro::GenericRecord& r = datum.value(); + const avro::GenericRecord &r = datum.value(); std::cout << "Field-count: " << r.fieldCount() << std::endl; if (r.fieldCount() == 2) { - const avro::GenericDatum& f0 = r.fieldAt(0); + const avro::GenericDatum &f0 = r.fieldAt(0); if (f0.type() == avro::AVRO_DOUBLE) { std::cout << "Real: " << f0.value() << std::endl; } - const avro::GenericDatum& f1 = r.fieldAt(1); + const avro::GenericDatum &f1 = r.fieldAt(1); if (f1.type() == avro::AVRO_DOUBLE) { std::cout << "Imaginary: " << f1.value() << std::endl; } diff --git a/lang/c++/examples/imaginary.hh b/lang/c++/examples/imaginary.hh index 774aefd1172..a268ea1c835 100644 --- a/lang/c++/examples/imaginary.hh +++ b/lang/c++/examples/imaginary.hh @@ -16,31 +16,30 @@ * limitations under the License. */ - #ifndef IMAGINARY_HH_3460301992__H_ #define IMAGINARY_HH_3460301992__H_ - -#include "boost/any.hpp" -#include "avro/Specific.hh" -#include "avro/Encoder.hh" #include "avro/Decoder.hh" +#include "avro/Encoder.hh" +#include "avro/Specific.hh" +#include "boost/any.hpp" namespace i { struct cpx { double im; }; -} +} // namespace i namespace avro { -template<> struct codec_traits { - static void encode(Encoder& e, const i::cpx& v) { +template<> +struct codec_traits { + static void encode(Encoder &e, const i::cpx &v) { avro::encode(e, v.im); } - static void decode(Decoder& d, i::cpx& v) { + static void decode(Decoder &d, i::cpx &v) { avro::decode(d, v.im); } }; -} +} // namespace avro #endif diff --git a/lang/c++/examples/resolving.cc b/lang/c++/examples/resolving.cc index 8ce9addb894..a3aec70bc05 100644 --- a/lang/c++/examples/resolving.cc +++ b/lang/c++/examples/resolving.cc @@ -22,24 +22,19 @@ #include "imaginary.hh" #include "avro/Compiler.hh" -#include "avro/Encoder.hh" #include "avro/Decoder.hh" -#include "avro/Specific.hh" +#include "avro/Encoder.hh" #include "avro/Generic.hh" +#include "avro/Specific.hh" - - -avro::ValidSchema load(const char* filename) -{ +avro::ValidSchema load(const char *filename) { std::ifstream ifs(filename); avro::ValidSchema result; avro::compileJsonSchema(ifs, result); return result; } -int -main() -{ +int main() { avro::ValidSchema cpxSchema = load("cpx.json"); avro::ValidSchema imaginarySchema = load("imaginary.json"); @@ -53,11 +48,10 @@ main() std::unique_ptr in = avro::memoryInputStream(*out); avro::DecoderPtr d = avro::resolvingDecoder(cpxSchema, imaginarySchema, - avro::binaryDecoder()); + avro::binaryDecoder()); d->init(*in); i::cpx c2; avro::decode(*d, c2); std::cout << "Imaginary: " << c2.im << std::endl; - } diff --git a/lang/c++/examples/schemaload.cc b/lang/c++/examples/schemaload.cc index d6b442dd960..63375af9a54 100644 --- a/lang/c++/examples/schemaload.cc +++ b/lang/c++/examples/schemaload.cc @@ -18,13 +18,10 @@ #include -#include "avro/ValidSchema.hh" #include "avro/Compiler.hh" +#include "avro/ValidSchema.hh" - -int -main() -{ +int main() { std::ifstream in("cpx.json"); avro::ValidSchema cpxSchema; diff --git a/lang/c++/examples/validating.cc b/lang/c++/examples/validating.cc index 64f0649fa5d..5479edeb3d4 100644 --- a/lang/c++/examples/validating.cc +++ b/lang/c++/examples/validating.cc @@ -16,23 +16,23 @@ * limitations under the License. */ -#include #include +#include #include "avro/Compiler.hh" -#include "avro/Encoder.hh" #include "avro/Decoder.hh" +#include "avro/Encoder.hh" #include "avro/Specific.hh" namespace avro { template -struct codec_traits > { - static void encode(Encoder& e, const std::complex& c) { +struct codec_traits> { + static void encode(Encoder &e, const std::complex &c) { avro::encode(e, std::real(c)); avro::encode(e, std::imag(c)); } - static void decode(Decoder& d, std::complex& c) { + static void decode(Decoder &d, std::complex &c) { T re, im; avro::decode(d, re); avro::decode(d, im); @@ -40,10 +40,8 @@ struct codec_traits > { } }; -} -int -main() -{ +} // namespace avro +int main() { std::ifstream ifs("cpx.json"); avro::ValidSchema cpxSchema; @@ -51,14 +49,14 @@ main() std::unique_ptr out = avro::memoryOutputStream(); avro::EncoderPtr e = avro::validatingEncoder(cpxSchema, - avro::binaryEncoder()); + avro::binaryEncoder()); e->init(*out); std::complex c1(1.0, 2.0); avro::encode(*e, c1); std::unique_ptr in = avro::memoryInputStream(*out); avro::DecoderPtr d = avro::validatingDecoder(cpxSchema, - avro::binaryDecoder()); + avro::binaryDecoder()); d->init(*in); std::complex c2; diff --git a/lang/c++/impl/BinaryDecoder.cc b/lang/c++/impl/BinaryDecoder.cc index 248b503342a..a970d605207 100644 --- a/lang/c++/impl/BinaryDecoder.cc +++ b/lang/c++/impl/BinaryDecoder.cc @@ -74,14 +74,13 @@ bool BinaryDecoder::decodeBool() { } else if (v == 1) { return true; } - throw Exception(boost::format("Invalid value for bool: %1%") % v); + throw Exception("Invalid value for bool: {}", v); } int32_t BinaryDecoder::decodeInt() { auto val = doDecodeLong(); if (val < INT32_MIN || val > INT32_MAX) { - throw Exception( - boost::format("Value out of range for Avro int: %1%") % val); + throw Exception("Value out of range for Avro int: {}", val); } return static_cast(val); } @@ -105,8 +104,7 @@ double BinaryDecoder::decodeDouble() { size_t BinaryDecoder::doDecodeLength() { ssize_t len = decodeInt(); if (len < 0) { - throw Exception( - boost::format("Cannot have negative length: %1%") % len); + throw Exception("Cannot have negative length: {}", len); } return len; } diff --git a/lang/c++/impl/Compiler.cc b/lang/c++/impl/Compiler.cc index d76546f317d..3b287c9eeb0 100644 --- a/lang/c++/impl/Compiler.cc +++ b/lang/c++/impl/Compiler.cc @@ -17,9 +17,12 @@ */ #include #include +#include #include #include "Compiler.hh" +#include "CustomAttributes.hh" +#include "NodeConcepts.hh" #include "Schema.hh" #include "Stream.hh" #include "Types.hh" @@ -93,7 +96,7 @@ static NodePtr makeNode(const string &t, SymbolTable &st, const string &ns) { if (it != st.end()) { return NodePtr(new NodeSymbolic(asSingleAttribute(n), it->second)); } - throw Exception(boost::format("Unknown type: %1%") % n.fullname()); + throw Exception("Unknown type: {}", n); } /** Returns "true" if the field is in the container */ @@ -109,7 +112,7 @@ json::Object::const_iterator findField(const Entity &e, template void ensureType(const Entity &e, const string &name) { if (e.type() != json::type_traits::type()) { - throw Exception(boost::format("Json field \"%1%\" is not a %2%: %3%") % name % json::type_traits::name() % e.toString()); + throw Exception("Json field \"{}\" is not a {}: {}", name, json::type_traits::name(), e.toString()); } } @@ -144,16 +147,20 @@ string getDocField(const Entity &e, const Object &m) { struct Field { const string name; + const vector aliases; const NodePtr schema; const GenericDatum defaultValue; - Field(string n, NodePtr v, GenericDatum dv) : name(std::move(n)), schema(std::move(v)), defaultValue(std::move(dv)) {} + const CustomAttributes customAttributes; + + Field(string n, vector a, NodePtr v, GenericDatum dv, const CustomAttributes &ca) + : name(std::move(n)), aliases(std::move(a)), schema(std::move(v)), defaultValue(std::move(dv)), customAttributes(ca) {} }; static void assertType(const Entity &e, EntityType et) { if (e.type() != et) { - throw Exception(boost::format("Unexpected type for default value: " - "Expected %1%, but found %2% in line %3%") - % json::typeToString(et) % json::typeToString(e.type()) % e.line()); + throw Exception( + "Unexpected type for default value: Expected {}, but found {} in line {}", + json::typeToString(et), json::typeToString(e.type()), e.line()); } } @@ -212,9 +219,9 @@ static GenericDatum makeGenericDatum(NodePtr n, for (size_t i = 0; i < n->leaves(); ++i) { auto it = v.find(n->nameAt(i)); if (it == v.end()) { - throw Exception(boost::format( - "No value found in default for %1%") - % n->nameAt(i)); + throw Exception( + "No value found in default for {}", + n->nameAt(i)); } result.setFieldAt(i, makeGenericDatum(n->leafAt(i), it->second, st)); @@ -252,13 +259,39 @@ static GenericDatum makeGenericDatum(NodePtr n, case AVRO_FIXED: assertType(e, json::EntityType::String); return GenericDatum(n, GenericFixed(n, toBin(e.bytesValue()))); - default: throw Exception(boost::format("Unknown type: %1%") % t); + default: throw Exception("Unknown type: {}", t); + } +} + +static const std::unordered_set &getKnownFields() { + // return known fields + static const std::unordered_set kKnownFields = + {"name", "type", "aliases", "default", "doc", "size", "logicalType", + "values", "precision", "scale", "namespace"}; + return kKnownFields; +} + +static void getCustomAttributes(const Object &m, CustomAttributes &customAttributes) { + // Don't add known fields on primitive type and fixed type into custom + // fields. + const std::unordered_set &kKnownFields = getKnownFields(); + for (const auto &entry : m) { + if (kKnownFields.find(entry.first) == kKnownFields.end()) { + customAttributes.addAttribute(entry.first, entry.second.stringValue()); + } } } static Field makeField(const Entity &e, SymbolTable &st, const string &ns) { const Object &m = e.objectValue(); - const string &n = getStringField(e, m, "name"); + string n = getStringField(e, m, "name"); + vector aliases; + string aliasesName = "aliases"; + if (containsField(m, aliasesName)) { + for (const auto &alias : getArrayField(e, m, aliasesName)) { + aliases.emplace_back(alias.stringValue()); + } + } auto it = findField(e, m, "type"); auto it2 = m.find("default"); NodePtr node = makeNode(it->second, st, ns); @@ -266,31 +299,38 @@ static Field makeField(const Entity &e, SymbolTable &st, const string &ns) { node->setDoc(getDocField(e, m)); } GenericDatum d = (it2 == m.end()) ? GenericDatum() : makeGenericDatum(node, it2->second, st); - return Field(n, node, d); + // Get custom attributes + CustomAttributes customAttributes; + getCustomAttributes(m, customAttributes); + return Field(std::move(n), std::move(aliases), node, d, customAttributes); } // Extended makeRecordNode (with doc). static NodePtr makeRecordNode(const Entity &e, const Name &name, const string *doc, const Object &m, SymbolTable &st, const string &ns) { - const Array &v = getArrayField(e, m, "fields"); concepts::MultiAttribute fieldNames; + vector> fieldAliases; concepts::MultiAttribute fieldValues; + concepts::MultiAttribute customAttributes; vector defaultValues; - - for (const auto &it : v) { + string fields = "fields"; + for (const auto &it : getArrayField(e, m, fields)) { Field f = makeField(it, st, ns); fieldNames.add(f.name); + fieldAliases.push_back(f.aliases); fieldValues.add(f.schema); defaultValues.push_back(f.defaultValue); + customAttributes.add(f.customAttributes); } + NodeRecord *node; if (doc == nullptr) { node = new NodeRecord(asSingleAttribute(name), fieldValues, fieldNames, - defaultValues); + fieldAliases, defaultValues, customAttributes); } else { node = new NodeRecord(asSingleAttribute(name), asSingleAttribute(*doc), - fieldValues, fieldNames, defaultValues); + fieldValues, fieldNames, fieldAliases, defaultValues, customAttributes); } return NodePtr(node); } @@ -305,9 +345,10 @@ static LogicalType makeLogicalType(const Entity &e, const Object &m) { if (typeField == "decimal") { LogicalType decimalType(LogicalType::DECIMAL); try { - decimalType.setPrecision(getLongField(e, m, "precision")); + // Precision probably won't go over 38 and scale beyond -77/+77 + decimalType.setPrecision(static_cast(getLongField(e, m, "precision"))); if (containsField(m, "scale")) { - decimalType.setScale(getLongField(e, m, "scale")); + decimalType.setScale(static_cast(getLongField(e, m, "scale"))); } } catch (Exception &ex) { // If any part of the logical type is malformed, per the standard we @@ -337,11 +378,12 @@ static LogicalType makeLogicalType(const Entity &e, const Object &m) { static NodePtr makeEnumNode(const Entity &e, const Name &name, const Object &m) { - const Array &v = getArrayField(e, m, "symbols"); + string symbolsName = "symbols"; + const Array &v = getArrayField(e, m, symbolsName); concepts::MultiAttribute symbols; for (const auto &it : v) { if (it.type() != json::EntityType::String) { - throw Exception(boost::format("Enum symbol not a string: %1%") % it.toString()); + throw Exception("Enum symbol not a string: {}", it.toString()); } symbols.add(it.stringValue()); } @@ -354,12 +396,12 @@ static NodePtr makeEnumNode(const Entity &e, static NodePtr makeFixedNode(const Entity &e, const Name &name, const Object &m) { - int v = static_cast(getLongField(e, m, "size")); + int64_t v = getLongField(e, m, "size"); if (v <= 0) { - throw Exception(boost::format("Size for fixed is not positive: %1%") % e.toString()); + throw Exception("Size for fixed is not positive: {}", e.toString()); } NodePtr node = - NodePtr(new NodeFixed(asSingleAttribute(name), asSingleAttribute(v))); + NodePtr(new NodeFixed(asSingleAttribute(name), asSingleAttribute(static_cast(v)))); if (containsField(m, "doc")) { node->setDoc(getDocField(e, m)); } @@ -392,21 +434,31 @@ static NodePtr makeMapNode(const Entity &e, const Object &m, static Name getName(const Entity &e, const Object &m, const string &ns) { const string &name = getStringField(e, m, "name"); + Name result; if (isFullName(name)) { - return Name(name); + result = Name(name); } else { auto it = m.find("namespace"); if (it != m.end()) { if (it->second.type() != json::type_traits::type()) { - throw Exception(boost::format( - "Json field \"%1%\" is not a %2%: %3%") - % "namespace" % json::type_traits::name() % it->second.toString()); + throw Exception( + "Json field \"namespace\" is not a string: {}", + it->second.toString()); } - Name result = Name(name, it->second.stringValue()); - return result; + result = Name(name, it->second.stringValue()); + } else { + result = Name(name, ns); } - return Name(name, ns); } + + std::string aliases = "aliases"; + if (containsField(m, aliases)) { + for (const auto &alias : getArrayField(e, m, aliases)) { + result.addAlias(alias.stringValue()); + } + } + + return result; } static NodePtr makeNode(const Entity &e, const Object &m, @@ -452,11 +504,10 @@ static NodePtr makeNode(const Entity &e, const Object &m, return result; } - throw Exception(boost::format("Unknown type definition: %1%") - % e.toString()); + throw Exception("Unknown type definition: %1%", e.toString()); } -static NodePtr makeNode(const Entity &e, const Array &m, +static NodePtr makeNode(const Entity &, const Array &m, SymbolTable &st, const string &ns) { concepts::MultiAttribute mm; for (const auto &it : m) { @@ -470,13 +521,13 @@ static NodePtr makeNode(const json::Entity &e, SymbolTable &st, const string &ns case json::EntityType::String: return makeNode(e.stringValue(), st, ns); case json::EntityType::Obj: return makeNode(e, e.objectValue(), st, ns); case json::EntityType::Arr: return makeNode(e, e.arrayValue(), st, ns); - default: throw Exception(boost::format("Invalid Avro type: %1%") % e.toString()); + default: throw Exception("Invalid Avro type: {}", e.toString()); } } json::Object::const_iterator findField(const Entity &e, const Object &m, const string &fieldName) { auto it = m.find(fieldName); if (it == m.end()) { - throw Exception(boost::format("Missing Json field \"%1%\": %2%") % fieldName % e.toString()); + throw Exception("Missing Json field \"{}\": {}", fieldName, e.toString()); } else { return it; } diff --git a/lang/c++/impl/CustomAttributes.cc b/lang/c++/impl/CustomAttributes.cc new file mode 100644 index 00000000000..13c56a5e926 --- /dev/null +++ b/lang/c++/impl/CustomAttributes.cc @@ -0,0 +1,53 @@ + +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "CustomAttributes.hh" +#include "Exception.hh" +#include +#include + +namespace avro { + +boost::optional CustomAttributes::getAttribute(const std::string &name) const { + boost::optional result; + std::map::const_iterator iter = + attributes_.find(name); + if (iter == attributes_.end()) { + return result; + } + result = iter->second; + return result; +} + +void CustomAttributes::addAttribute(const std::string &name, + const std::string &value) { + auto iter_and_find = + attributes_.insert(std::pair(name, value)); + if (!iter_and_find.second) { + throw Exception(name + " already exists and cannot be added"); + } +} + +void CustomAttributes::printJson(std::ostream &os, + const std::string &name) const { + if (attributes().find(name) == attributes().end()) { + throw Exception(name + " doesn't exist"); + } + os << "\"" << name << "\": \"" << attributes().at(name) << "\""; +} +} // namespace avro diff --git a/lang/c++/impl/DataFile.cc b/lang/c++/impl/DataFile.cc index 18fb3f61a68..66281ae9820 100644 --- a/lang/c++/impl/DataFile.cc +++ b/lang/c++/impl/DataFile.cc @@ -93,9 +93,9 @@ DataFileWriterBase::DataFileWriterBase(std::unique_ptr outputStrea void DataFileWriterBase::init(const ValidSchema &schema, size_t syncInterval, const Codec &codec) { if (syncInterval < minSyncInterval || syncInterval > maxSyncInterval) { - throw Exception(boost::format("Invalid sync interval: %1%. " - "Should be between %2% and %3%") - % syncInterval % minSyncInterval % maxSyncInterval); + throw Exception( + "Invalid sync interval: {}. Should be between {} and {}", + syncInterval, minSyncInterval, maxSyncInterval); } setMetadata(AVRO_CODEC_KEY, AVRO_NULL_CODEC); @@ -108,7 +108,7 @@ void DataFileWriterBase::init(const ValidSchema &schema, size_t syncInterval, co setMetadata(AVRO_CODEC_KEY, AVRO_SNAPPY_CODEC); #endif } else { - throw Exception(boost::format("Unknown codec: %1%") % codec); + throw Exception("Unknown codec: {}", int(codec)); } setMetadata(AVRO_SCHEMA_KEY, schema.toJson(false)); @@ -120,7 +120,9 @@ void DataFileWriterBase::init(const ValidSchema &schema, size_t syncInterval, co DataFileWriterBase::~DataFileWriterBase() { if (stream_) { - close(); + try { + close(); + } catch (...) {} } } @@ -193,10 +195,10 @@ void DataFileWriterBase::sync() { os.push(boost::iostreams::back_inserter(temp)); boost::iostreams::write(os, compressed.c_str(), compressed_size); } - temp.push_back((checksum >> 24) & 0xFF); - temp.push_back((checksum >> 16) & 0xFF); - temp.push_back((checksum >> 8) & 0xFF); - temp.push_back(checksum & 0xFF); + temp.push_back(static_cast((checksum >> 24) & 0xFF)); + temp.push_back(static_cast((checksum >> 16) & 0xFF)); + temp.push_back(static_cast((checksum >> 8) & 0xFF)); + temp.push_back(static_cast(checksum & 0xFF)); std::unique_ptr in = memoryInputStream( reinterpret_cast(temp.data()), temp.size()); int64_t byteCount = temp.size(); @@ -232,9 +234,8 @@ void DataFileWriterBase::flush() { sync(); } -boost::mt19937 random(static_cast(time(nullptr))); - DataFileSync DataFileWriterBase::makeSync() { + boost::mt19937 random(static_cast(time(nullptr))); DataFileSync sync; std::generate(sync.begin(), sync.end(), random); return sync; @@ -257,14 +258,14 @@ void DataFileWriterBase::setMetadata(const string &key, const string &value) { metadata_[key] = v; } -DataFileReaderBase::DataFileReaderBase(const char *filename) : filename_(filename), codec_(NULL_CODEC), stream_(fileSeekableInputStream(filename)), - decoder_(binaryDecoder()), objectCount_(0), eof_(false), blockStart_(-1), - blockEnd_(-1) { +DataFileReaderBase::DataFileReaderBase(const char *filename) : filename_(filename), stream_(fileSeekableInputStream(filename)), + decoder_(binaryDecoder()), objectCount_(0), eof_(false), + codec_(NULL_CODEC), blockStart_(-1), blockEnd_(-1) { readHeader(); } -DataFileReaderBase::DataFileReaderBase(std::unique_ptr inputStream) : codec_(NULL_CODEC), stream_(std::move(inputStream)), - decoder_(binaryDecoder()), objectCount_(0), eof_(false) { +DataFileReaderBase::DataFileReaderBase(std::unique_ptr inputStream) : stream_(std::move(inputStream)), + decoder_(binaryDecoder()), objectCount_(0), eof_(false), codec_(NULL_CODEC) { readHeader(); } @@ -392,6 +393,9 @@ void DataFileReaderBase::readDataBlock() { compressed_.insert(compressed_.end(), data, data + len); } len = compressed_.size(); + if (len < 4) + throw Exception("Cannot read compressed data, expected at least 4 bytes, got " + std::to_string(len)); + int b1 = compressed_[len - 4] & 0xFF; int b2 = compressed_[len - 3] & 0xFF; int b3 = compressed_[len - 2] & 0xFF; @@ -407,8 +411,8 @@ void DataFileReaderBase::readDataBlock() { uint32_t c = crc(); if (checksum != c) { throw Exception( - boost::format("Checksum did not match for Snappy compression: Expected: %1%, computed: %2%") % checksum - % c); + "Checksum did not match for Snappy compression: Expected: {}, computed: {}", + checksum, c); } os_.reset(new boost::iostreams::filtering_istream()); os_->push( @@ -451,7 +455,7 @@ static ValidSchema makeSchema(const vector &v) { istringstream iss(toString(v)); ValidSchema vs; compileJsonSchema(iss, vs); - return ValidSchema(vs); + return vs; } void DataFileReaderBase::readHeader() { @@ -523,8 +527,7 @@ void DataFileReaderBase::sync(int64_t position) { eof_ = true; return; } - int len = - std::min(static_cast(SyncSize - i), n); + size_t len = std::min(SyncSize - i, n); memcpy(&sync_buffer[i], p, len); p += len; n -= len; diff --git a/lang/c++/impl/FileStream.cc b/lang/c++/impl/FileStream.cc index 749fd835abd..9063cf1f734 100644 --- a/lang/c++/impl/FileStream.cc +++ b/lang/c++/impl/FileStream.cc @@ -49,9 +49,9 @@ struct BufferCopyIn { struct FileBufferCopyIn : public BufferCopyIn { #ifdef _WIN32 HANDLE h_; - FileBufferCopyIn(const char *filename) : h_(::CreateFileA(filename, GENERIC_READ, 0, NULL, OPEN_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL)) { + explicit FileBufferCopyIn(const char *filename) : h_(::CreateFileA(filename, GENERIC_READ, 0, NULL, OPEN_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL)) { if (h_ == INVALID_HANDLE_VALUE) { - throw Exception(boost::format("Cannot open file: %1%") % ::GetLastError()); + throw Exception("Cannot open file: {}", ::GetLastError()); } } @@ -59,16 +59,16 @@ struct FileBufferCopyIn : public BufferCopyIn { ::CloseHandle(h_); } - void seek(size_t len) { + void seek(size_t len) override { if (::SetFilePointer(h_, len, NULL, FILE_CURRENT) == INVALID_SET_FILE_POINTER && ::GetLastError() != NO_ERROR) { - throw Exception(boost::format("Cannot skip file: %1%") % ::GetLastError()); + throw Exception("Cannot skip file: {}", ::GetLastError()); } } - bool read(uint8_t *b, size_t toRead, size_t &actual) { + bool read(uint8_t *b, size_t toRead, size_t &actual) override { DWORD dw = 0; if (!::ReadFile(h_, b, toRead, &dw, NULL)) { - throw Exception(boost::format("Cannot read file: %1%") % ::GetLastError()); + throw Exception("Cannot read file: {}", ::GetLastError()); } actual = static_cast(dw); return actual != 0; @@ -78,7 +78,7 @@ struct FileBufferCopyIn : public BufferCopyIn { explicit FileBufferCopyIn(const char *filename) : fd_(open(filename, O_RDONLY | O_BINARY)) { if (fd_ < 0) { - throw Exception(boost::format("Cannot open file: %1%") % ::strerror(errno)); + throw Exception("Cannot open file: {}", strerror(errno)); } } @@ -89,12 +89,12 @@ struct FileBufferCopyIn : public BufferCopyIn { void seek(size_t len) final { off_t r = ::lseek(fd_, len, SEEK_CUR); if (r == static_cast(-1)) { - throw Exception(boost::format("Cannot skip file: %1%") % strerror(errno)); + throw Exception("Cannot skip file: {}", strerror(errno)); } } bool read(uint8_t *b, size_t toRead, size_t &actual) final { - int n = ::read(fd_, b, toRead); + auto n = ::read(fd_, b, toRead); if (n > 0) { actual = n; return true; @@ -232,9 +232,9 @@ struct BufferCopyOut { struct FileBufferCopyOut : public BufferCopyOut { #ifdef _WIN32 HANDLE h_; - FileBufferCopyOut(const char *filename) : h_(::CreateFileA(filename, GENERIC_WRITE, 0, NULL, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL)) { + explicit FileBufferCopyOut(const char *filename) : h_(::CreateFileA(filename, GENERIC_WRITE, 0, NULL, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL)) { if (h_ == INVALID_HANDLE_VALUE) { - throw Exception(boost::format("Cannot open file: %1%") % ::GetLastError()); + throw Exception("Cannot open file: {}", ::GetLastError()); } } @@ -242,11 +242,11 @@ struct FileBufferCopyOut : public BufferCopyOut { ::CloseHandle(h_); } - void write(const uint8_t *b, size_t len) { + void write(const uint8_t *b, size_t len) override { while (len > 0) { DWORD dw = 0; if (!::WriteFile(h_, b, len, &dw, NULL)) { - throw Exception(boost::format("Cannot read file: %1%") % ::GetLastError()); + throw Exception("Cannot read file: {}", ::GetLastError()); } b += dw; len -= dw; @@ -258,7 +258,7 @@ struct FileBufferCopyOut : public BufferCopyOut { explicit FileBufferCopyOut(const char *filename) : fd_(::open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0644)) { if (fd_ < 0) { - throw Exception(boost::format("Cannot open file: %1%") % ::strerror(errno)); + throw Exception("Cannot open file: {}", ::strerror(errno)); } } @@ -268,7 +268,7 @@ struct FileBufferCopyOut : public BufferCopyOut { void write(const uint8_t *b, size_t len) final { if (::write(fd_, b, len) < 0) { - throw Exception(boost::format("Cannot write file: %1%") % ::strerror(errno)); + throw Exception("Cannot write file: {}", ::strerror(errno)); } } #endif diff --git a/lang/c++/impl/Generic.cc b/lang/c++/impl/Generic.cc index 6e0436ae344..1535c604be7 100644 --- a/lang/c++/impl/Generic.cc +++ b/lang/c++/impl/Generic.cc @@ -29,7 +29,7 @@ typedef vector bytes; void GenericContainer::assertType(const NodePtr &schema, Type type) { if (schema->type() != type) { - throw Exception(boost::format("Schema type %1 expected %2") % toString(schema->type()) % toString(type)); + throw Exception("Schema type {} expected {}", schema->type(), type); } } @@ -129,7 +129,7 @@ void GenericReader::read(GenericDatum &datum, Decoder &d, bool isResolving) { } } break; default: - throw Exception(boost::format("Unknown schema type %1%") % toString(datum.type())); + throw Exception("Unknown schema type {}", datum.type()); } } @@ -217,7 +217,7 @@ void GenericWriter::write(const GenericDatum &datum, Encoder &e) { e.mapEnd(); } break; default: - throw Exception(boost::format("Unknown schema type %1%") % toString(datum.type())); + throw Exception("Unknown schema type {}", datum.type()); } } diff --git a/lang/c++/impl/GenericDatum.cc b/lang/c++/impl/GenericDatum.cc index 7b2bf93bca9..49700a927f5 100644 --- a/lang/c++/impl/GenericDatum.cc +++ b/lang/c++/impl/GenericDatum.cc @@ -83,7 +83,7 @@ void GenericDatum::init(const NodePtr &schema) { value_ = GenericUnion(sc); break; default: - throw Exception(boost::format("Unknown schema type %1%") % toString(type_)); + throw Exception("Unknown schema type {}", toString(type_)); } } diff --git a/lang/c++/impl/LogicalType.cc b/lang/c++/impl/LogicalType.cc index 1aa24bf20de..5e03a313d8f 100644 --- a/lang/c++/impl/LogicalType.cc +++ b/lang/c++/impl/LogicalType.cc @@ -28,22 +28,22 @@ LogicalType::Type LogicalType::type() const { return type_; } -void LogicalType::setPrecision(int precision) { +void LogicalType::setPrecision(int32_t precision) { if (type_ != DECIMAL) { throw Exception("Only logical type DECIMAL can have precision"); } if (precision <= 0) { - throw Exception(boost::format("Precision cannot be: %1%") % precision); + throw Exception("Precision cannot be: {}", precision); } precision_ = precision; } -void LogicalType::setScale(int scale) { +void LogicalType::setScale(int32_t scale) { if (type_ != DECIMAL) { throw Exception("Only logical type DECIMAL can have scale"); } if (scale < 0) { - throw Exception(boost::format("Scale cannot be: %1%") % scale); + throw Exception("Scale cannot be: {}", scale); } scale_ = scale; } diff --git a/lang/c++/impl/Node.cc b/lang/c++/impl/Node.cc index 46310d0f9ef..14ce6ecf05b 100644 --- a/lang/c++/impl/Node.cc +++ b/lang/c++/impl/Node.cc @@ -17,6 +17,7 @@ */ #include +#include #include "Node.hh" @@ -26,12 +27,44 @@ using std::string; Node::~Node() = default; +struct Name::Aliases { + std::vector raw; + std::unordered_set fullyQualified; +}; + +Name::Name() = default; + Name::Name(const std::string &name) { fullname(name); } +Name::Name(std::string simpleName, std::string ns) : ns_(std::move(ns)), simpleName_(std::move(simpleName)) { + check(); +} + +Name::Name(const Name &other) { + *this = other; +} + +Name &Name::operator=(const Name &other) { + if (this != &other) { + ns_ = other.ns_; + simpleName_ = other.simpleName_; + if (other.aliases_) { + aliases_ = std::make_unique(*other.aliases_); + } + } + return *this; +} + +Name::Name(Name &&other) = default; + +Name &Name::operator=(Name &&other) = default; + +Name::~Name() = default; + string Name::fullname() const { - return (ns_.empty()) ? simpleName_ : ns_ + "." + simpleName_; + return ns_.empty() ? simpleName_ : ns_ + "." + simpleName_; } void Name::fullname(const string &name) { @@ -46,6 +79,23 @@ void Name::fullname(const string &name) { check(); } +const std::vector &Name::aliases() const { + static const std::vector emptyAliases; + return aliases_ ? aliases_->raw : emptyAliases; +} + +void Name::addAlias(const std::string &alias) { + if (!aliases_) { + aliases_ = std::make_unique(); + } + aliases_->raw.push_back(alias); + if (!ns_.empty() && alias.find_last_of('.') == string::npos) { + aliases_->fullyQualified.emplace(ns_ + "." + alias); + } else { + aliases_->fullyQualified.insert(alias); + } +} + bool Name::operator<(const Name &n) const { return (ns_ < n.ns_) || (!(n.ns_ < ns_) && (simpleName_ < n.simpleName_)); } @@ -72,6 +122,16 @@ bool Name::operator==(const Name &n) const { return ns_ == n.ns_ && simpleName_ == n.simpleName_; } +bool Name::equalOrAliasedBy(const Name &n) const { + return *this == n || (n.aliases_ && n.aliases_->fullyQualified.find(fullname()) != n.aliases_->fullyQualified.end()); +} + +void Name::clear() { + ns_.clear(); + simpleName_.clear(); + aliases_.reset(); +} + void Node::setLogicalType(LogicalType logicalType) { checkLock(); @@ -86,14 +146,13 @@ void Node::setLogicalType(LogicalType logicalType) { if (type_ == AVRO_FIXED) { // Max precision that can be supported by the current size of // the FIXED type. - long maxPrecision = floor(log10(2.0) * (8.0 * fixedSize() - 1)); + auto maxPrecision = static_cast(floor(log10(2.0) * (8.0 * static_cast(fixedSize()) - 1))); if (logicalType.precision() > maxPrecision) { throw Exception( - boost::format( - "DECIMAL precision %1% is too large for the " - "FIXED type of size %2%, precision cannot be " - "larger than %3%") - % logicalType.precision() % fixedSize() % maxPrecision); + "DECIMAL precision {} is too large for the " + "FIXED type of size {}, precision cannot be " + "larger than {}", + logicalType.precision(), fixedSize(), maxPrecision); } } if (logicalType.scale() > logicalType.precision()) { diff --git a/lang/c++/impl/NodeImpl.cc b/lang/c++/impl/NodeImpl.cc index 810e1641ed9..e3073aaaef2 100644 --- a/lang/c++/impl/NodeImpl.cc +++ b/lang/c++/impl/NodeImpl.cc @@ -71,7 +71,7 @@ string escape(const string &unescaped) { // Wrap an indentation in a struct for ostream operator<< struct indent { explicit indent(size_t depth) : d(depth) {} - int d; + size_t d; }; /// ostream operator for indent @@ -83,6 +83,18 @@ std::ostream &operator<<(std::ostream &os, indent x) { return os; } +void printCustomAttributes(const CustomAttributes &customAttributes, size_t depth, + std::ostream &os) { + std::map::const_iterator iter = + customAttributes.attributes().begin(); + while (iter != customAttributes.attributes().end()) { + os << ",\n" + << indent(depth); + customAttributes.printJson(os, iter->first); + ++iter; + } +} + } // anonymous namespace const int kByteStringSize = 6; @@ -101,7 +113,7 @@ NodePrimitive::resolve(const Node &reader) const { return RESOLVE_PROMOTABLE_TO_LONG; } - // fall-through intentional + [[fallthrough]]; case AVRO_LONG: @@ -109,7 +121,7 @@ NodePrimitive::resolve(const Node &reader) const { return RESOLVE_PROMOTABLE_TO_FLOAT; } - // fall-through intentional + [[fallthrough]]; case AVRO_FLOAT: @@ -243,17 +255,35 @@ static void printName(std::ostream &os, const Name &n, size_t depth) { void NodeRecord::printJson(std::ostream &os, size_t depth) const { os << "{\n"; os << indent(++depth) << "\"type\": \"record\",\n"; - printName(os, nameAttribute_.get(), depth); + const Name &name = nameAttribute_.get(); + printName(os, name, depth); + + const auto &aliases = name.aliases(); + if (!aliases.empty()) { + os << indent(depth) << "\"aliases\": ["; + ++depth; + for (size_t i = 0; i < aliases.size(); ++i) { + if (i > 0) { + os << ','; + } + os << '\n' + << indent(depth) << "\"" << aliases[i] << "\""; + } + os << '\n' + << indent(--depth) << "]\n"; + } + if (!getDoc().empty()) { os << indent(depth) << R"("doc": ")" << escape(getDoc()) << "\",\n"; } - os << indent(depth) << "\"fields\": ["; + os << indent(depth) << "\"fields\": ["; size_t fields = leafAttributes_.size(); ++depth; - // Serialize "default" field: - assert(defaultValues.empty() || (defaultValues.size() == fields)); + assert(fieldsAliases_.empty() || (fieldsAliases_.size() == fields)); + assert(fieldsDefaultValues_.empty() || (fieldsDefaultValues_.size() == fields)); + assert(customAttributes_.size() == 0 || customAttributes_.size() == fields); for (size_t i = 0; i < fields; ++i) { if (i > 0) { os << ','; @@ -264,16 +294,37 @@ void NodeRecord::printJson(std::ostream &os, size_t depth) const { os << indent(depth) << "\"type\": "; leafAttributes_.get(i)->printJson(os, depth); - if (!defaultValues.empty()) { - if (!defaultValues[i].isUnion() && defaultValues[i].type() == AVRO_NULL) { + if (!fieldsAliases_.empty() && !fieldsAliases_[i].empty()) { + os << ",\n" + << indent(depth) << "\"aliases\": ["; + ++depth; + for (size_t j = 0; j < fieldsAliases_[i].size(); ++j) { + if (j > 0) { + os << ','; + } + os << '\n' + << indent(depth) << "\"" << fieldsAliases_[i][j] << "\""; + } + os << '\n' + << indent(--depth) << ']'; + } + + // Serialize "default" field: + if (!fieldsDefaultValues_.empty()) { + if (!fieldsDefaultValues_[i].isUnion() && fieldsDefaultValues_[i].type() == AVRO_NULL) { // No "default" field. } else { os << ",\n" << indent(depth) << "\"default\": "; - leafAttributes_.get(i)->printDefaultToJson(defaultValues[i], os, + leafAttributes_.get(i)->printDefaultToJson(fieldsDefaultValues_[i], os, depth); } } + + if (customAttributes_.size() == fields) { + printCustomAttributes(customAttributes_.get(i), depth, os); + } + os << '\n'; os << indent(--depth) << '}'; } @@ -283,7 +334,7 @@ void NodeRecord::printJson(std::ostream &os, size_t depth) const { } void NodePrimitive::printDefaultToJson(const GenericDatum &g, std::ostream &os, - size_t depth) const { + size_t) const { assert(isPrimitive(g.type())); switch (g.type()) { @@ -324,13 +375,13 @@ void NodePrimitive::printDefaultToJson(const GenericDatum &g, std::ostream &os, } void NodeEnum::printDefaultToJson(const GenericDatum &g, std::ostream &os, - size_t depth) const { + size_t) const { assert(g.type() == AVRO_ENUM); os << "\"" << g.value().symbol() << "\""; } void NodeFixed::printDefaultToJson(const GenericDatum &g, std::ostream &os, - size_t depth) const { + size_t) const { assert(g.type() == AVRO_FIXED); // ex: "\uOOff" // Convert to a string @@ -409,16 +460,38 @@ void NodeRecord::printDefaultToJson(const GenericDatum &g, std::ostream &os, << indent(--depth) << "}"; } } -NodeRecord::NodeRecord(const HasName &name, - const MultiLeaves &fields, - const LeafNames &fieldsNames, - std::vector dv) : NodeImplRecord(AVRO_RECORD, name, fields, fieldsNames, NoSize()), - defaultValues(std::move(dv)) { + +NodeRecord::NodeRecord(const HasName &name, const MultiLeaves &fields, + const LeafNames &fieldsNames, std::vector dv) + : NodeRecord(name, HasDoc(), fields, fieldsNames, {}, std::move(dv), MultiAttributes()) {} + +NodeRecord::NodeRecord(const HasName &name, const HasDoc &doc, const MultiLeaves &fields, + const LeafNames &fieldsNames, std::vector dv) + : NodeRecord(name, doc, fields, fieldsNames, {}, std::move(dv), MultiAttributes()) {} + +NodeRecord::NodeRecord(const HasName &name, const MultiLeaves &fields, + const LeafNames &fieldsNames, std::vector> fieldsAliases, + std::vector dv, const MultiAttributes &customAttributes) + : NodeRecord(name, HasDoc(), fields, fieldsNames, std::move(fieldsAliases), std::move(dv), customAttributes) {} + +NodeRecord::NodeRecord(const HasName &name, const HasDoc &doc, const MultiLeaves &fields, + const LeafNames &fieldsNames, std::vector> fieldsAliases, + std::vector dv, const MultiAttributes &customAttributes) + : NodeImplRecord(AVRO_RECORD, name, doc, fields, fieldsNames, customAttributes, NoSize()), + fieldsAliases_(std::move(fieldsAliases)), + fieldsDefaultValues_(std::move(dv)) { + for (size_t i = 0; i < leafNameAttributes_.size(); ++i) { if (!nameIndex_.add(leafNameAttributes_.get(i), i)) { - throw Exception(boost::format( - "Cannot add duplicate field: %1%") - % leafNameAttributes_.get(i)); + throw Exception("Cannot add duplicate field: {}", leafNameAttributes_.get(i)); + } + + if (!fieldsAliases_.empty()) { + for (const auto &alias : fieldsAliases_[i]) { + if (!nameIndex_.add(alias, i)) { + throw Exception("Cannot add duplicate field: {}", alias); + } + } } } } @@ -458,9 +531,9 @@ void NodeEnum::printJson(std::ostream &os, size_t depth) const { printName(os, nameAttribute_.get(), depth); os << indent(depth) << "\"symbols\": [\n"; - int names = leafNameAttributes_.size(); + auto names = leafNameAttributes_.size(); ++depth; - for (int i = 0; i < names; ++i) { + for (size_t i = 0; i < names; ++i) { if (i > 0) { os << ",\n"; } @@ -504,9 +577,9 @@ NodeMap::NodeMap() : NodeImplMap(AVRO_MAP) { void NodeUnion::printJson(std::ostream &os, size_t depth) const { os << "[\n"; - int fields = leafAttributes_.size(); + auto fields = leafAttributes_.size(); ++depth; - for (int i = 0; i < fields; ++i) { + for (size_t i = 0; i < fields; ++i) { if (i > 0) { os << ",\n"; } diff --git a/lang/c++/impl/Resolver.cc b/lang/c++/impl/Resolver.cc index 919345e8a2d..5fdd551a317 100644 --- a/lang/c++/impl/Resolver.cc +++ b/lang/c++/impl/Resolver.cc @@ -51,7 +51,7 @@ class PrimitiveSkipper : public Resolver { public: PrimitiveSkipper() : Resolver() {} - void parse(Reader &reader, uint8_t *address) const final { + void parse(Reader &reader, uint8_t *) const final { T val; reader.readValue(val); DEBUG_OUT("Skipping " << val); @@ -93,7 +93,7 @@ class PrimitivePromoter : public Resolver { DEBUG_OUT("Promoting " << val); } - void parseIt(Reader &reader, uint8_t *, const std::false_type &) const {} + void parseIt(Reader &, uint8_t *, const std::false_type &) const {} template void parseIt(Reader &reader, uint8_t *address) const { @@ -108,7 +108,7 @@ class PrimitiveSkipper> : public Resolver { public: PrimitiveSkipper() : Resolver() {} - void parse(Reader &reader, uint8_t *address) const final { + void parse(Reader &reader, uint8_t *) const final { std::vector val; reader.readBytes(val); DEBUG_OUT("Skipping bytes"); @@ -276,9 +276,9 @@ class ArrayParser : public Resolver { class EnumSkipper : public Resolver { public: - EnumSkipper(ResolverFactory &factory, const NodePtr &writer) : Resolver() {} + EnumSkipper(ResolverFactory &, const NodePtr &) : Resolver() {} - void parse(Reader &reader, uint8_t *address) const final { + void parse(Reader &reader, uint8_t *) const final { int64_t val = reader.readEnum(); DEBUG_OUT("Skipping enum" << val); } @@ -290,9 +290,9 @@ class EnumParser : public Resolver { VAL }; - EnumParser(ResolverFactory &factory, const NodePtr &writer, const NodePtr &reader, const CompoundLayout &offsets) : Resolver(), - offset_(offsets.at(0).offset()), - readerSize_(reader->names()) { + EnumParser(ResolverFactory &, const NodePtr &writer, const NodePtr &reader, const CompoundLayout &offsets) : Resolver(), + offset_(offsets.at(0).offset()), + readerSize_(reader->names()) { const size_t writerSize = writer->names(); mapping_.reserve(writerSize); @@ -307,7 +307,7 @@ class EnumParser : public Resolver { void parse(Reader &reader, uint8_t *address) const final { auto val = static_cast(reader.readEnum()); - assert(static_cast(val) < mapping_.size()); + assert(val < mapping_.size()); if (mapping_[val] < readerSize_) { auto *location = reinterpret_cast(address + offset_); @@ -349,7 +349,7 @@ class UnionParser : public Resolver { *readerChoice = choiceMapping_[writerChoice]; auto *setter = reinterpret_cast(address + setFuncOffset_); - auto *value = reinterpret_cast(address + offset_); + uint8_t *value = address + offset_; uint8_t *location = (*setter)(value, *readerChoice); resolvers_[writerChoice]->parse(reader, location); @@ -397,7 +397,7 @@ class NonUnionToUnionParser : public Resolver { auto *choice = reinterpret_cast(address + choiceOffset_); *choice = choice_; auto *setter = reinterpret_cast(address + setFuncOffset_); - auto *value = reinterpret_cast(address + offset_); + uint8_t *value = address + offset_; uint8_t *location = (*setter)(value, choice_); resolver_->parse(reader, location); @@ -413,35 +413,35 @@ class NonUnionToUnionParser : public Resolver { class FixedSkipper : public Resolver { public: - FixedSkipper(ResolverFactory &factory, const NodePtr &writer) : Resolver() { + FixedSkipper(ResolverFactory &, const NodePtr &writer) : Resolver() { size_ = writer->fixedSize(); } - void parse(Reader &reader, uint8_t *address) const final { + void parse(Reader &reader, uint8_t *) const final { DEBUG_OUT("Skipping fixed"); std::unique_ptr val(new uint8_t[size_]); reader.readFixed(&val[0], size_); } protected: - int size_; + size_t size_; }; class FixedParser : public Resolver { public: - FixedParser(ResolverFactory &factory, const NodePtr &writer, const NodePtr &reader, const CompoundLayout &offsets) : Resolver() { + FixedParser(ResolverFactory &, const NodePtr &writer, const NodePtr &, const CompoundLayout &offsets) : Resolver() { size_ = writer->fixedSize(); offset_ = offsets.at(0).offset(); } void parse(Reader &reader, uint8_t *address) const final { DEBUG_OUT("Reading fixed"); - auto *location = reinterpret_cast(address + offset_); + uint8_t *location = address + offset_; reader.readFixed(location, size_); } protected: - int size_; + size_t size_; size_t offset_; }; @@ -449,7 +449,7 @@ class ResolverFactory : private boost::noncopyable { template unique_ptr - constructPrimitiveSkipper(const NodePtr &writer) { + constructPrimitiveSkipper(const NodePtr &) { return unique_ptr(new PrimitiveSkipper()); } @@ -710,8 +710,8 @@ NonUnionToUnionParser::NonUnionToUnionParser(ResolverFactory &factory, const NodePtr &writer, const NodePtr &reader, const CompoundLayout &offsets) : Resolver(), - offset_(offsets.offset()), choice_(0), + offset_(offsets.offset()), choiceOffset_(offsets.at(0).offset()), setFuncOffset_(offsets.at(1).offset()) { #ifndef NDEBUG diff --git a/lang/c++/impl/Schema.cc b/lang/c++/impl/Schema.cc index 42245292e67..8f42b850a09 100644 --- a/lang/c++/impl/Schema.cc +++ b/lang/c++/impl/Schema.cc @@ -18,6 +18,7 @@ #include +#include "CustomAttributes.hh" #include "Schema.hh" namespace avro { @@ -27,11 +28,18 @@ RecordSchema::RecordSchema(const std::string &name) : Schema(new NodeRecord) { } void RecordSchema::addField(const std::string &name, const Schema &fieldSchema) { + const CustomAttributes emptyCustomAttribute; + addField(name, fieldSchema, emptyCustomAttribute); +} + +void RecordSchema::addField(const std::string &name, const Schema &fieldSchema, const CustomAttributes &customFields) { // add the name first. it will throw if the name is a duplicate, preventing // the leaf from being added node_->addName(name); node_->addLeaf(fieldSchema.root()); + + node_->addCustomAttributesForField(customFields); } std::string RecordSchema::getDoc() const { diff --git a/lang/c++/impl/Stream.cc b/lang/c++/impl/Stream.cc index 63a8b4e8fc5..738b1e40520 100644 --- a/lang/c++/impl/Stream.cc +++ b/lang/c++/impl/Stream.cc @@ -117,7 +117,7 @@ class MemoryInputStream2 : public InputStream { } }; -class MemoryOutputStream : public OutputStream { +class MemoryOutputStream final : public OutputStream { public: const size_t chunkSize_; std::vector data_; @@ -129,7 +129,7 @@ class MemoryOutputStream : public OutputStream { ~MemoryOutputStream() final { for (std::vector::const_iterator it = data_.begin(); it != data_.end(); ++it) { - delete[] * it; + delete[] *it; } } diff --git a/lang/c++/impl/ValidSchema.cc b/lang/c++/impl/ValidSchema.cc index 63a3bbee919..d99d7e24198 100644 --- a/lang/c++/impl/ValidSchema.cc +++ b/lang/c++/impl/ValidSchema.cc @@ -16,7 +16,6 @@ * limitations under the License. */ -#include #include #include #include @@ -25,7 +24,6 @@ #include "Schema.hh" #include "ValidSchema.hh" -using boost::format; using std::make_pair; using std::ostringstream; using std::shared_ptr; @@ -37,8 +35,7 @@ using SymbolMap = std::map; static bool validate(const NodePtr &node, SymbolMap &symbolMap) { if (!node->isValid()) { - throw Exception(format("Schema is invalid, due to bad node of type %1%") - % node->type()); + throw Exception("Schema is invalid, due to bad node of type {}", node->type()); } if (node->hasName()) { @@ -51,7 +48,7 @@ static bool validate(const NodePtr &node, SymbolMap &symbolMap) { if (node->type() == AVRO_SYMBOLIC) { if (!found) { - throw Exception(format("Symbolic name \"%1%\" is unknown") % node->name()); + throw Exception("Symbolic name \"{}\" is unknown", node->name()); } shared_ptr symNode = @@ -69,8 +66,8 @@ static bool validate(const NodePtr &node, SymbolMap &symbolMap) { } node->lock(); - auto leaves = node->leaves(); - for (auto i = 0; i < leaves; ++i) { + size_t leaves = node->leaves(); + for (size_t i = 0; i < leaves; ++i) { const NodePtr &leaf(node->leafAt(i)); if (!validate(leaf, symbolMap)) { diff --git a/lang/c++/impl/Validator.cc b/lang/c++/impl/Validator.cc index 0e5fd8bedad..c00460480b1 100644 --- a/lang/c++/impl/Validator.cc +++ b/lang/c++/impl/Validator.cc @@ -62,7 +62,7 @@ bool Validator::countingSetup() { compoundStack_.pop_back(); proceed = false; } else { - counters_.push_back(static_cast(count_)); + counters_.push_back(count_); } } @@ -71,14 +71,14 @@ bool Validator::countingSetup() { void Validator::countingAdvance() { if (countingSetup()) { - auto index = (compoundStack_.back().pos)++; + size_t index = (compoundStack_.back().pos)++; const NodePtr &node = compoundStack_.back().node; if (index < node->leaves()) { setupOperation(node->leafAt(index)); } else { compoundStack_.back().pos = 0; - int count = --counters_.back(); + size_t count = --counters_.back(); if (count == 0) { counters_.pop_back(); compoundStarted_ = true; @@ -100,14 +100,13 @@ void Validator::unionAdvance() { waitingForCount_ = false; NodePtr node = compoundStack_.back().node; - if (count_ < static_cast(node->leaves())) { + if (count_ < node->leaves()) { compoundStack_.pop_back(); setupOperation(node->leafAt(static_cast(count_))); } else { throw Exception( - boost::format("Union selection out of range, got %1%," - " expecting 0-%2%") - % count_ % (node->leaves() - 1)); + "Union selection out of range, got {}, expecting 0-{}", + count_, node->leaves() - 1); } } } @@ -117,7 +116,7 @@ void Validator::fixedAdvance() { compoundStack_.pop_back(); } -int Validator::nextSizeExpected() const { +size_t Validator::nextSizeExpected() const { return compoundStack_.back().node->fixedSize(); } @@ -169,11 +168,9 @@ void Validator::advance() { } } -void Validator::setCount(int64_t count) { +void Validator::setCount(size_t count) { if (!waitingForCount_) { throw Exception("Not expecting count"); - } else if (count_ < 0) { - throw Exception("Count cannot be negative"); } count_ = count; diff --git a/lang/c++/impl/Zigzag.cc b/lang/c++/impl/Zigzag.cc index 538a89cbaa7..7875f789bd2 100644 --- a/lang/c++/impl/Zigzag.cc +++ b/lang/c++/impl/Zigzag.cc @@ -30,11 +30,11 @@ encodeInt64(int64_t input, std::array &output) noexcept { auto v = val & mask; size_t bytesOut = 0; while (val >>= 7) { - output[bytesOut++] = (v | 0x80); + output[bytesOut++] = static_cast(v | 0x80); v = val & mask; } - output[bytesOut++] = v; + output[bytesOut++] = static_cast(v); return bytesOut; } size_t @@ -46,11 +46,11 @@ encodeInt32(int32_t input, std::array &output) noexcept { auto v = val & mask; size_t bytesOut = 0; while (val >>= 7) { - output[bytesOut++] = (v | 0x80); + output[bytesOut++] = static_cast(v | 0x80); v = val & mask; } - output[bytesOut++] = v; + output[bytesOut++] = static_cast(v); return bytesOut; } diff --git a/lang/c++/impl/avrogencpp.cc b/lang/c++/impl/avrogencpp.cc index 0b6b35a2f23..39da7af3539 100644 --- a/lang/c++/impl/avrogencpp.cc +++ b/lang/c++/impl/avrogencpp.cc @@ -32,8 +32,6 @@ #include #include -#include - #include "Compiler.hh" #include "NodeImpl.hh" #include "ValidSchema.hh" @@ -53,12 +51,6 @@ using boost::lexical_cast; using avro::compileJsonSchema; using avro::ValidSchema; -#if __cplusplus >= 201703L -#define ANY_NS "std" -#else -#define ANY_NS "boost" -#endif - struct PendingSetterGetter { string structName; string type; @@ -108,6 +100,7 @@ class CodeGen { void generateRecordTraits(const NodePtr &n); void generateUnionTraits(const NodePtr &n); void emitCopyright(); + void emitGeneratedWarning(); public: CodeGen(std::ostream &os, std::string ns, @@ -117,7 +110,9 @@ class CodeGen { schemaFile_(std::move(schemaFile)), headerFile_(std::move(headerFile)), includePrefix_(std::move(includePrefix)), noUnion_(noUnion), guardString_(std::move(guardString)), - random_(static_cast(::time(nullptr))) {} + random_(static_cast(::time(nullptr))) { + } + void generate(const ValidSchema &schema); }; @@ -125,7 +120,7 @@ static string decorate(const std::string &name) { static const char *cppReservedWords[] = { "alignas", "alignof", "and", "and_eq", "asm", "auto", "bitand", "bitor", "bool", "break", "case", "catch", "char", "char8_t", "char16_t", "char32_t", "class", "compl", "concept", - "const", "consteval", "constexpr", "const_cast", "continue", "co_await", "co_return", + "const", "consteval", "constexpr", "constinit", "const_cast", "continue", "co_await", "co_return", "co_yield", "decltype", "default", "delete", "do", "double", "dynamic_cast", "else", "enum", "explicit", "export", "extern", "false", "float", "for", "friend", "goto", "if", "import", "inline", "int", "long", "module", "mutable", "namespace", "new", "noexcept", "not", @@ -250,6 +245,11 @@ string CodeGen::generateRecordType(const NodePtr &n) { if (n->leafAt(i)->type() == avro::AVRO_UNION) { os_ << " typedef " << types[i] << ' ' << n->nameAt(i) << "_t;\n"; + types[i] = n->nameAt(i) + "_t"; + } + if (n->leafAt(i)->type() == avro::AVRO_ARRAY && n->leafAt(i)->leafAt(0)->type() == avro::AVRO_UNION) { + os_ << " typedef " << types[i] << "::value_type" + << ' ' << n->nameAt(i) << "_item_t;\n"; } } } @@ -257,11 +257,7 @@ string CodeGen::generateRecordType(const NodePtr &n) { // the nameAt(i) does not take c++ reserved words into account // so we need to call decorate on it std::string decoratedNameAt = decorate(n->nameAt(i)); - if (!noUnion_ && n->leafAt(i)->type() == avro::AVRO_UNION) { - os_ << " " << decoratedNameAt << "_t"; - } else { - os_ << " " << types[i]; - } + os_ << " " << types[i]; os_ << ' ' << decoratedNameAt << ";\n"; } @@ -275,13 +271,7 @@ string CodeGen::generateRecordType(const NodePtr &n) { // so we need to call decorate on it std::string decoratedNameAt = decorate(n->nameAt(i)); os_ << " " << decoratedNameAt << "("; - if (!noUnion_ && n->leafAt(i)->type() == avro::AVRO_UNION) { - // the nameAt(i) does not take c++ reserved words into account - // so we need to call decorate on it - os_ << decoratedNameAt << "_t"; - } else { - os_ << types[i]; - } + os_ << types[i]; os_ << "())"; if (i != (c - 1)) { os_ << ','; @@ -326,9 +316,9 @@ static void generateGetterAndSetter(ostream &os, os << type << sn << "get_" << name << "() const {\n" << " if (idx_ != " << idx << ") {\n" << " throw avro::Exception(\"Invalid type for " - << "union\");\n" + << "union " << structName << "\");\n" << " }\n" - << " return " << ANY_NS << "::any_cast<" << type << " >(value_);\n" + << " return std::any_cast<" << type << " >(value_);\n" << "}\n\n"; os << "inline\n" @@ -385,7 +375,7 @@ string CodeGen::generateUnionType(const NodePtr &n) { os_ << "struct " << result << " {\n" << "private:\n" << " size_t idx_;\n" - << " " << ANY_NS << "::any value_;\n" + << " std::any value_;\n" << "public:\n" << " size_t idx() const { return idx_; }\n"; @@ -397,7 +387,7 @@ string CodeGen::generateUnionType(const NodePtr &n) { << " }\n" << " void set_null() {\n" << " idx_ = " << i << ";\n" - << " value_ = " << ANY_NS << "::any();\n" + << " value_ = std::any();\n" << " }\n"; } else { const string &type = types[i]; @@ -551,8 +541,22 @@ void CodeGen::generateRecordTraits(const NodePtr &n) { } string fn = fullname(decorate(n->name())); - os_ << "template<> struct codec_traits<" << fn << "> {\n" - << " static void encode(Encoder& e, const " << fn << "& v) {\n"; + os_ << "template<> struct codec_traits<" << fn << "> {\n"; + + if (c == 0) { + os_ << " static void encode(Encoder&, const " << fn << "&) {}\n"; + // ResolvingDecoder::fieldOrder mutates the state of the decoder, so if that decoder is + // passed in, we need to call the method even though it will return an empty vector. + os_ << " static void decode(Decoder& d, " << fn << "&) {\n"; + os_ << " if (avro::ResolvingDecoder *rd = dynamic_cast(&d)) {\n"; + os_ << " rd->fieldOrder();\n"; + os_ << " }\n"; + os_ << " }\n"; + os_ << "};\n"; + return; + } + + os_ << " static void encode(Encoder& e, const " << fn << "& v) {\n"; for (size_t i = 0; i < c; ++i) { // the nameAt(i) does not take c++ reserved words into account @@ -702,17 +706,22 @@ void CodeGen::emitCopyright() { " * See the License for the specific language governing " "permissions and\n" " * limitations under the License.\n" - " */\n\n\n"; + " */\n\n"; +} + +void CodeGen::emitGeneratedWarning() { + os_ << "/* This code was generated by avrogencpp " << AVRO_VERSION << ". Do not edit.*/\n\n"; } string CodeGen::guard() { string h = headerFile_; makeCanonical(h, true); - return h + "_" + lexical_cast(random_()) + "__H_"; + return h + "_" + lexical_cast(random_()) + "_H"; } void CodeGen::generate(const ValidSchema &schema) { emitCopyright(); + emitGeneratedWarning(); string h = guardString_.empty() ? guard() : guardString_; @@ -720,24 +729,14 @@ void CodeGen::generate(const ValidSchema &schema) { os_ << "#define " << h << "\n\n\n"; os_ << "#include \n" -#if __cplusplus >= 201703L << "#include \n" -#else - << "#include \"boost/any.hpp\"\n" -#endif << "#include \"" << includePrefix_ << "Specific.hh\"\n" << "#include \"" << includePrefix_ << "Encoder.hh\"\n" << "#include \"" << includePrefix_ << "Decoder.hh\"\n" << "\n"; - vector nsVector; if (!ns_.empty()) { - boost::algorithm::split_regex(nsVector, ns_, boost::regex("::")); - for (vector::const_iterator it = - nsVector.begin(); - it != nsVector.end(); ++it) { - os_ << "namespace " << *it << " {\n"; - } + os_ << "namespace " << ns_ << " {\n"; inNamespace_ = true; } @@ -760,11 +759,7 @@ void CodeGen::generate(const ValidSchema &schema) { if (!ns_.empty()) { inNamespace_ = false; - for (vector::const_iterator it = - nsVector.begin(); - it != nsVector.end(); ++it) { - os_ << "}\n"; - } + os_ << "}\n"; } os_ << "namespace avro {\n"; @@ -810,14 +805,32 @@ int main(int argc, char **argv) { const string NO_UNION_TYPEDEF("no-union-typedef"); po::options_description desc("Allowed options"); - desc.add_options()("help,h", "produce help message")("include-prefix,p", po::value()->default_value("avro"), - "prefix for include headers, - for none, default: avro")("no-union-typedef,U", "do not generate typedefs for unions in records")("namespace,n", po::value(), "set namespace for generated code")("input,i", po::value(), "input file")("output,o", po::value(), "output file to generate"); + // clang-format off + desc.add_options() + ("help,h", "produce help message") + ("version,V", "produce version information") + ("include-prefix,p", po::value()->default_value("avro"), "prefix for include headers, - for none, default: avro") + ("no-union-typedef,U", "do not generate typedefs for unions in records") + ("namespace,n", po::value(), "set namespace for generated code") + ("input,i", po::value(), "input file") + ("output,o", po::value(), "output file to generate"); + // clang-format on po::variables_map vm; po::store(po::parse_command_line(argc, argv, desc), vm); po::notify(vm); - if (vm.count("help") || vm.count(IN_FILE) == 0 || vm.count(OUT_FILE) == 0) { + if (vm.count("help")) { + std::cout << desc << std::endl; + return 0; + } + + if (vm.count("version")) { + std::cout << AVRO_VERSION << std::endl; + return 0; + } + + if (vm.count(IN_FILE) == 0 || vm.count(OUT_FILE) == 0) { std::cout << desc << std::endl; return 1; } @@ -827,6 +840,7 @@ int main(int argc, char **argv) { string inf = vm.count(IN_FILE) > 0 ? vm[IN_FILE].as() : string(); string incPrefix = vm[INCLUDE_PREFIX].as(); bool noUnion = vm.count(NO_UNION_TYPEDEF) != 0; + if (incPrefix == "-") { incPrefix.clear(); } else if (*incPrefix.rbegin() != '/') { diff --git a/lang/c++/impl/json/JsonDom.cc b/lang/c++/impl/json/JsonDom.cc index 5bffda2559c..c2696d827ad 100644 --- a/lang/c++/impl/json/JsonDom.cc +++ b/lang/c++/impl/json/JsonDom.cc @@ -25,9 +25,6 @@ #include "JsonIO.hh" #include "Stream.hh" -using boost::format; -using std::string; - namespace avro { namespace json { const char *typeToString(EntityType t) { @@ -142,8 +139,7 @@ void writeEntity(JsonGenerator &g, const Entity &n) { void Entity::ensureType(EntityType type) const { if (type_ != type) { - format msg = format("Invalid type. Expected \"%1%\" actual %2%") % typeToString(type) % typeToString(type_); - throw Exception(msg); + throw Exception("Invalid type. Expected \"{}\" actual {}", typeToString(type), typeToString(type_)); } } diff --git a/lang/c++/impl/json/JsonDom.hh b/lang/c++/impl/json/JsonDom.hh index 3fb5670b70b..2a0695adff6 100644 --- a/lang/c++/impl/json/JsonDom.hh +++ b/lang/c++/impl/json/JsonDom.hh @@ -76,22 +76,22 @@ public: explicit Entity(size_t line = 0) : type_(EntityType::Null), line_(line) {} // Not explicit because do want implicit conversion // NOLINTNEXTLINE(google-explicit-constructor) - Entity(Bool v, size_t line = 0) : type_(EntityType::Bool), value_(v), line_(line) {} + explicit Entity(Bool v, size_t line = 0) : type_(EntityType::Bool), value_(v), line_(line) {} // Not explicit because do want implicit conversion // NOLINTNEXTLINE(google-explicit-constructor) - Entity(Long v, size_t line = 0) : type_(EntityType::Long), value_(v), line_(line) {} + explicit Entity(Long v, size_t line = 0) : type_(EntityType::Long), value_(v), line_(line) {} // Not explicit because do want implicit conversion // NOLINTNEXTLINE(google-explicit-constructor) - Entity(Double v, size_t line = 0) : type_(EntityType::Double), value_(v), line_(line) {} + explicit Entity(Double v, size_t line = 0) : type_(EntityType::Double), value_(v), line_(line) {} // Not explicit because do want implicit conversion // NOLINTNEXTLINE(google-explicit-constructor) - Entity(const std::shared_ptr &v, size_t line = 0) : type_(EntityType::String), value_(v), line_(line) {} + explicit Entity(const std::shared_ptr &v, size_t line = 0) : type_(EntityType::String), value_(v), line_(line) {} // Not explicit because do want implicit conversion // NOLINTNEXTLINE(google-explicit-constructor) - Entity(const std::shared_ptr &v, size_t line = 0) : type_(EntityType::Arr), value_(v), line_(line) {} + explicit Entity(const std::shared_ptr &v, size_t line = 0) : type_(EntityType::Arr), value_(v), line_(line) {} // Not explicit because do want implicit conversion // NOLINTNEXTLINE(google-explicit-constructor) - Entity(const std::shared_ptr &v, size_t line = 0) : type_(EntityType::Obj), value_(v), line_(line) {} + explicit Entity(const std::shared_ptr &v, size_t line = 0) : type_(EntityType::Obj), value_(v), line_(line) {} EntityType type() const { return type_; } diff --git a/lang/c++/impl/json/JsonIO.cc b/lang/c++/impl/json/JsonIO.cc index 62549484a92..8273f392e88 100644 --- a/lang/c++/impl/json/JsonIO.cc +++ b/lang/c++/impl/json/JsonIO.cc @@ -55,7 +55,8 @@ void JsonParser::expectToken(Token tk) { if (cur() == Token::String && (sv == "Infinity" || sv == "-Infinity" || sv == "NaN")) { curToken = Token::Double; - dv = sv == "Infinity" ? std::numeric_limits::infinity() : sv == "-Infinity" ? -std::numeric_limits::infinity() : std::numeric_limits::quiet_NaN(); + dv = sv == "Infinity" ? std::numeric_limits::infinity() : sv == "-Infinity" ? -std::numeric_limits::infinity() + : std::numeric_limits::quiet_NaN(); return; } else if (cur() == Token::Long) { dv = double(lv); @@ -146,7 +147,8 @@ JsonParser::Token JsonParser::tryNumber(char ch) { sv.push_back(ch); hasNext = false; - int state = (ch == '-') ? 0 : (ch == '0') ? 1 : 2; + int state = (ch == '-') ? 0 : (ch == '0') ? 1 + : 2; for (;;) { switch (state) { case 0: @@ -314,12 +316,40 @@ JsonParser::Token JsonParser::tryString() { } } +// Decode the given string and return contents as UTF8-encoded bytes. +// The input does not have the enclosing double-quotes. string JsonParser::decodeString(const string &s, bool binary) { string result; - for (string::const_iterator it = s.begin(); it != s.end(); ++it) { - char ch = *it; + auto it = s.cbegin(); + const auto end = s.cend(); + const auto readNextByte = [&]() -> char { + if (it == end) { + throw Exception("Unexpected EOF"); + } + return *it++; + }; + const auto unicodeParse = [&]() { + uint32_t n = 0; + for (int i = 0; i < 4; i++) { + auto c = readNextByte(); + n *= 16; + if (isdigit(c)) { + n += c - '0'; + } else if (c >= 'a' && c <= 'f') { + n += c - 'a' + 10; + } else if (c >= 'A' && c <= 'F') { + n += c - 'A' + 10; + } else { + throw Exception("Invalid hex character: {}", c); + } + } + return n; + }; + while (it != end) { + string::const_iterator startSeq = it; + char ch = readNextByte(); if (ch == '\\') { - ch = *++it; + ch = readNextByte(); switch (ch) { case '"': case '\\': @@ -343,48 +373,48 @@ string JsonParser::decodeString(const string &s, bool binary) { continue; case 'u': case 'U': { - uint32_t n = 0; - char e[4]; - for (char &i : e) { - n *= 16; - char c = *++it; - i = c; - if (isdigit(c)) { - n += c - '0'; - } else if (c >= 'a' && c <= 'f') { - n += c - 'a' + 10; - } else if (c >= 'A' && c <= 'F') { - n += c - 'A' + 10; - } - } + uint32_t n = unicodeParse(); if (binary) { if (n > 0xff) { - throw Exception(boost::format( - "Invalid byte for binary: %1%%2%") - % ch % string(e, 4)); + throw Exception("Invalid byte for binary: {}{}", ch, string(startSeq, ++it)); } else { - result.push_back(n); + result.push_back(static_cast(n)); continue; } } + if (n >= 0xd800 && n < 0xdc00) { + ch = readNextByte(); + if (ch != '\\') { + throw Exception("Invalid unicode sequence: {}", string(startSeq, it)); + } + ch = readNextByte(); + if (ch != 'u' && ch != 'U') { + throw Exception("Invalid unicode sequence: {}", string(startSeq, it)); + } + uint32_t m = unicodeParse(); + if (m < 0xdc00 || m > 0xdfff) { + throw Exception("Invalid unicode sequence: {}", string(startSeq, it)); + } + n = 0x10000 + (((n - 0xd800) << 10) | (m - 0xdc00)); + } else if (n >= 0xdc00 && n < 0xdfff) { + throw Exception("Invalid unicode sequence: {}", string(startSeq, it)); + } if (n < 0x80) { - result.push_back(n); + result.push_back(static_cast(n)); } else if (n < 0x800) { - result.push_back((n >> 6) | 0xc0); - result.push_back((n & 0x3f) | 0x80); + result.push_back(static_cast((n >> 6) | 0xc0)); + result.push_back(static_cast((n & 0x3f) | 0x80)); } else if (n < 0x10000) { - result.push_back((n >> 12) | 0xe0); - result.push_back(((n >> 6) & 0x3f) | 0x80); - result.push_back((n & 0x3f) | 0x80); - } else if (n < 110000) { - result.push_back((n >> 18) | 0xf0); - result.push_back(((n >> 12) & 0x3f) | 0x80); - result.push_back(((n >> 6) & 0x3f) | 0x80); - result.push_back((n & 0x3f) | 0x80); + result.push_back(static_cast((n >> 12) | 0xe0)); + result.push_back(static_cast(((n >> 6) & 0x3f) | 0x80)); + result.push_back(static_cast((n & 0x3f) | 0x80)); + } else if (n < 0x110000) { + result.push_back(static_cast((n >> 18) | 0xf0)); + result.push_back(static_cast(((n >> 12) & 0x3f) | 0x80)); + result.push_back(static_cast(((n >> 6) & 0x3f) | 0x80)); + result.push_back(static_cast((n & 0x3f) | 0x80)); } else { - throw Exception(boost::format( - "Invalid unicode value: %1%i%2%") - % ch % string(e, 4)); + throw Exception("Invalid unicode value: {}{}", n, string(startSeq, ++it)); } } continue; diff --git a/lang/c++/impl/json/JsonIO.hh b/lang/c++/impl/json/JsonIO.hh index 94889e5d010..203bf895fd5 100644 --- a/lang/c++/impl/json/JsonIO.hh +++ b/lang/c++/impl/json/JsonIO.hh @@ -34,7 +34,7 @@ namespace avro { namespace json { inline char toHex(unsigned int n) { - return (n < 10) ? (n + '0') : (n + 'a' - 10); + return static_cast((n < 10) ? (n + '0') : (n + 'a' - 10)); } class AVRO_DECL JsonParser : boost::noncopyable { @@ -263,11 +263,22 @@ class AVRO_DECL JsonGenerator { out_.write(toHex((static_cast(c)) % 16)); } - void escapeUnicode(uint32_t c) { + void escapeUnicode16(uint32_t c) { out_.write('\\'); out_.write('u'); - writeHex((c >> 8) & 0xff); - writeHex(c & 0xff); + writeHex(static_cast((c >> 8) & 0xff)); + writeHex(static_cast(c & 0xff)); + } + void escapeUnicode(uint32_t c) { + if (c < 0x10000) { + escapeUnicode16(c); + } else if (c < 0x110000) { + c -= 0x10000; + escapeUnicode16(((c >> 10) & 0x3ff) | 0xd800); + escapeUnicode16((c & 0x3ff) | 0xdc00); + } else { + throw Exception("Invalid code-point: {}", c); + } } void doEncodeString(const char *b, size_t len, bool binary) { const char *e = b + len; @@ -310,7 +321,6 @@ class AVRO_DECL JsonGenerator { switch (*p) { case '\\': case '"': - case '/': escape(*p, b, p); break; case '\b': diff --git a/lang/c++/impl/parsing/JsonCodec.cc b/lang/c++/impl/parsing/JsonCodec.cc index 4fd04816069..84b366606f9 100644 --- a/lang/c++/impl/parsing/JsonCodec.cc +++ b/lang/c++/impl/parsing/JsonCodec.cc @@ -162,8 +162,7 @@ class JsonDecoderHandler { case Symbol::Kind::Field: expectToken(in_, JsonParser::Token::String); if (s.extra() != in_.stringValue()) { - throw Exception(boost::format("Incorrect field: expected \"%1%\" but got \"%2%\".") % - s.extra() % in_.stringValue()); + throw Exception(R"(Incorrect field: expected "{}" but got "{}".)", s.extra(), in_.stringValue()); } break; default: @@ -241,8 +240,7 @@ int32_t JsonDecoder

::decodeInt() { expect(JsonParser::Token::Long); int64_t result = in_.longValue(); if (result < INT32_MIN || result > INT32_MAX) { - throw Exception(boost::format("Value out of range for Avro int: %1%") - % result); + throw Exception("Value out of range for Avro int: {}", result); } return static_cast(result); } @@ -496,6 +494,7 @@ class JsonEncoder : public Encoder { template void JsonEncoder::init(OutputStream &os) { out_.init(os); + parser_.reset(); } template diff --git a/lang/c++/impl/parsing/ResolvingDecoder.cc b/lang/c++/impl/parsing/ResolvingDecoder.cc index d86f6e58293..1553b8a4b62 100644 --- a/lang/c++/impl/parsing/ResolvingDecoder.cc +++ b/lang/c++/impl/parsing/ResolvingDecoder.cc @@ -19,6 +19,8 @@ #include #include #include +#include +#include #include #include @@ -38,17 +40,14 @@ using std::make_shared; namespace parsing { -using std::make_shared; using std::shared_ptr; using std::static_pointer_cast; -using std::find_if; -using std::istringstream; using std::make_pair; using std::map; -using std::ostringstream; using std::pair; using std::reverse; +using std::set; using std::stack; using std::string; using std::unique_ptr; @@ -67,16 +66,7 @@ class ResolvingGrammarGenerator : public ValidatingGrammarGenerator { const NodePtr &reader, map &m, map &m2); - static vector> fields(const NodePtr &n) { - vector> result; - size_t c = n->names(); - for (size_t i = 0; i < c; ++i) { - result.emplace_back(n->nameAt(i), i); - } - return result; - } - - static int bestBranch(const NodePtr &writer, const NodePtr &reader); + static std::optional bestBranch(const NodePtr &writer, const NodePtr &reader); ProductionPtr getWriterProduction(const NodePtr &n, map &m2); @@ -101,8 +91,8 @@ Symbol ResolvingGrammarGenerator::generate( return Symbol::rootSymbol(main, backup); } -int ResolvingGrammarGenerator::bestBranch(const NodePtr &writer, - const NodePtr &reader) { +std::optional ResolvingGrammarGenerator::bestBranch(const NodePtr &writer, + const NodePtr &reader) { Type t = writer->type(); const size_t c = reader->leaves(); @@ -141,7 +131,7 @@ int ResolvingGrammarGenerator::bestBranch(const NodePtr &writer, break; } } - return -1; + return std::nullopt; } static shared_ptr> getAvroBinary( @@ -154,15 +144,6 @@ static shared_ptr> getAvroBinary( return snapshot(*os); } -template -struct equalsFirst { - const T1 &v_; - explicit equalsFirst(const T1 &v) : v_(v) {} - bool operator()(const pair &p) { - return p.first == v_; - } -}; - ProductionPtr ResolvingGrammarGenerator::getWriterProduction( const NodePtr &n, map &m2) { const NodePtr &nn = (n->type() == AVRO_SYMBOLIC) ? static_cast(*n).getNode() : n; @@ -182,10 +163,18 @@ ProductionPtr ResolvingGrammarGenerator::resolveRecords( map &m2) { ProductionPtr result = make_shared(); - vector> wf = fields(writer); - vector> rf = fields(reader); + vector wf(writer->names()); + for (size_t i = 0; i < wf.size(); ++i) { + wf[i] = writer->nameAt(i); + } + + set rf; + for (size_t i = 0; i < reader->names(); ++i) { + rf.emplace(i); + } + vector fieldOrder; - fieldOrder.reserve(reader->names()); + fieldOrder.reserve(rf.size()); /* * We look for all writer fields in the reader. If found, recursively @@ -193,19 +182,15 @@ ProductionPtr ResolvingGrammarGenerator::resolveRecords( * If no matching field is found for reader, arrange to skip the writer * field. */ - for (vector>::const_iterator it = wf.begin(); - it != wf.end(); ++it) { - auto it2 = find_if(rf.begin(), rf.end(), - equalsFirst(it->first)); - if (it2 != rf.end()) { - ProductionPtr p = doGenerate2(writer->leafAt(it->second), - reader->leafAt(it2->second), m, m2); + for (size_t wi = 0; wi != wf.size(); ++wi) { + size_t ri; + if (reader->nameIndex(wf[wi], ri)) { + ProductionPtr p = doGenerate2(writer->leafAt(wi), reader->leafAt(ri), m, m2); copy(p->rbegin(), p->rend(), back_inserter(*result)); - fieldOrder.push_back(it2->second); - rf.erase(it2); + fieldOrder.push_back(ri); + rf.erase(ri); } else { - ProductionPtr p = getWriterProduction( - writer->leafAt(it->second), m2); + ProductionPtr p = getWriterProduction(writer->leafAt(wi), m2); result->push_back(Symbol::skipStart()); if (p->size() == 1) { result->push_back((*p)[0]); @@ -216,24 +201,21 @@ ProductionPtr ResolvingGrammarGenerator::resolveRecords( } /* - * Examine the reader fields left out, (i.e. those didn't have corresponding + * Examine the reader fields left out (i.e. those didn't have corresponding * writer field). */ - for (vector>::const_iterator it = rf.begin(); - it != rf.end(); ++it) { - - NodePtr s = reader->leafAt(it->second); - fieldOrder.push_back(it->second); + for (const auto ri : rf) { + NodePtr s = reader->leafAt(ri); + fieldOrder.push_back(ri); if (s->type() == AVRO_SYMBOLIC) { s = resolveSymbol(s); } shared_ptr> defaultBinary = - getAvroBinary(reader->defaultValueAt(it->second)); + getAvroBinary(reader->defaultValueAt(ri)); result->push_back(Symbol::defaultStartAction(defaultBinary)); - map>::const_iterator it2 = - m.find(NodePair(s, s)); - ProductionPtr p = (it2 == m.end()) ? doGenerate2(s, s, m, m2) : it2->second; + auto it = m.find(NodePair(s, s)); + ProductionPtr p = it == m.end() ? doGenerate2(s, s, m, m2) : it->second; copy(p->rbegin(), p->rend(), back_inserter(*result)); result->push_back(Symbol::defaultEndAction()); } @@ -289,7 +271,7 @@ ProductionPtr ResolvingGrammarGenerator::doGenerate2( case AVRO_BYTES: return make_shared(1, Symbol::bytesSymbol()); case AVRO_FIXED: - if (writer->name() == reader->name() && writer->fixedSize() == reader->fixedSize()) { + if (writer->name().equalOrAliasedBy(reader->name()) && writer->fixedSize() == reader->fixedSize()) { ProductionPtr result = make_shared(); result->push_back(Symbol::sizeCheckSymbol(reader->fixedSize())); result->push_back(Symbol::fixedSymbol()); @@ -298,7 +280,7 @@ ProductionPtr ResolvingGrammarGenerator::doGenerate2( } break; case AVRO_RECORD: - if (writer->name() == reader->name()) { + if (writer->name().equalOrAliasedBy(reader->name())) { const pair key(writer, reader); map::const_iterator kp = m.find(key); if (kp != m.end()) { @@ -312,7 +294,7 @@ ProductionPtr ResolvingGrammarGenerator::doGenerate2( break; case AVRO_ENUM: - if (writer->name() == reader->name()) { + if (writer->name().equalOrAliasedBy(reader->name())) { ProductionPtr result = make_shared(); result->push_back(Symbol::enumAdjustSymbol(writer, reader)); result->push_back(Symbol::enumSymbol()); @@ -385,16 +367,18 @@ ProductionPtr ResolvingGrammarGenerator::doGenerate2( if (writerType == AVRO_INT || writerType == AVRO_LONG || writerType == AVRO_FLOAT) { return make_shared(1, - Symbol::resolveSymbol(writerType == AVRO_INT ? Symbol::Kind::Int : writerType == AVRO_LONG ? Symbol::Kind::Long : Symbol::Kind::Float, Symbol::Kind::Double)); + Symbol::resolveSymbol(writerType == AVRO_INT ? Symbol::Kind::Int : writerType == AVRO_LONG ? Symbol::Kind::Long + : Symbol::Kind::Float, + Symbol::Kind::Double)); } break; case AVRO_UNION: { - int j = bestBranch(writer, reader); - if (j >= 0) { - ProductionPtr p = doGenerate2(writer, reader->leafAt(j), m, m2); + auto j = bestBranch(writer, reader); + if (j) { + ProductionPtr p = doGenerate2(writer, reader->leafAt(*j), m, m2); ProductionPtr result = make_shared(); - result->push_back(Symbol::unionAdjustSymbol(j, p)); + result->push_back(Symbol::unionAdjustSymbol(*j, p)); result->push_back(Symbol::unionSymbol()); return result; } @@ -530,13 +514,18 @@ int64_t ResolvingDecoderImpl

::decodeLong() { template float ResolvingDecoderImpl

::decodeFloat() { Symbol::Kind k = parser_.advance(Symbol::Kind::Float); - return k == Symbol::Kind::Int ? base_->decodeInt() : k == Symbol::Kind::Long ? base_->decodeLong() : base_->decodeFloat(); + return k == Symbol::Kind::Int ? static_cast(base_->decodeInt()) + : k == Symbol::Kind::Long ? static_cast(base_->decodeLong()) + : base_->decodeFloat(); } template double ResolvingDecoderImpl

::decodeDouble() { Symbol::Kind k = parser_.advance(Symbol::Kind::Double); - return k == Symbol::Kind::Int ? base_->decodeInt() : k == Symbol::Kind::Long ? base_->decodeLong() : k == Symbol::Kind::Float ? base_->decodeFloat() : base_->decodeDouble(); + return k == Symbol::Kind::Int ? static_cast(base_->decodeInt()) + : k == Symbol::Kind::Long ? static_cast(base_->decodeLong()) + : k == Symbol::Kind::Float ? base_->decodeFloat() + : base_->decodeDouble(); } template diff --git a/lang/c++/impl/parsing/Symbol.cc b/lang/c++/impl/parsing/Symbol.cc index b7a35517af8..fe87c5205b4 100644 --- a/lang/c++/impl/parsing/Symbol.cc +++ b/lang/c++/impl/parsing/Symbol.cc @@ -75,7 +75,7 @@ Symbol Symbol::enumAdjustSymbol(const NodePtr &writer, const NodePtr &reader) { } size_t wc = writer->names(); - vector adj; + vector adj; // enums are encoded as ints adj.reserve(wc); vector err; @@ -85,10 +85,10 @@ Symbol Symbol::enumAdjustSymbol(const NodePtr &writer, const NodePtr &reader) { vector::const_iterator it = find(rs.begin(), rs.end(), s); if (it == rs.end()) { auto pos = err.size() + 1; - adj.push_back(-pos); + adj.push_back(static_cast(-pos)); err.push_back(s); } else { - adj.push_back(it - rs.begin()); + adj.push_back(static_cast(it - rs.begin())); } } return Symbol(Kind::EnumAdjust, make_pair(adj, err)); diff --git a/lang/c++/impl/parsing/Symbol.hh b/lang/c++/impl/parsing/Symbol.hh index 21e46a85ae4..c8760c34dfa 100644 --- a/lang/c++/impl/parsing/Symbol.hh +++ b/lang/c++/impl/parsing/Symbol.hh @@ -363,6 +363,10 @@ template class SimpleParser { Decoder *decoder_; Handler &handler_; + /* + * parsingStack always has root at the bottom of it. + * So it is safe to call top() on it. + */ std::stack parsingStack; static void throwMismatch(Symbol::Kind actual, Symbol::Kind expected) { @@ -742,6 +746,14 @@ public: } else if (s.kind() == Symbol::Kind::SkipStart) { parsingStack.pop(); skip(*decoder_); + } else if (s.kind() == Symbol::Kind::Indirect) { + ProductionPtr pp = s.extra(); + parsingStack.pop(); + append(pp); + } else if (s.kind() == Symbol::Kind::Symbolic) { + ProductionPtr pp(s.extra>()); + parsingStack.pop(); + append(pp); } else { break; } @@ -756,6 +768,8 @@ public: while (parsingStack.size() > 1) { parsingStack.pop(); } + Symbol &s = parsingStack.top(); + append(boost::tuples::get<0>(*s.extrap())); } }; diff --git a/lang/c++/impl/parsing/ValidatingCodec.cc b/lang/c++/impl/parsing/ValidatingCodec.cc index cfb82225f15..7a1f8d91bc8 100644 --- a/lang/c++/impl/parsing/ValidatingCodec.cc +++ b/lang/c++/impl/parsing/ValidatingCodec.cc @@ -152,7 +152,7 @@ ProductionPtr ValidatingGrammarGenerator::doGenerate(const NodePtr &n, } struct DummyHandler { - static size_t handle(const Symbol &s) { + static size_t handle(const Symbol &) { return 0; } }; @@ -502,6 +502,7 @@ void ValidatingEncoder

::setItemCount(size_t count) { template void ValidatingEncoder

::startItem() { + parser_.processImplicitActions(); if (parser_.top() != Symbol::Kind::Repeater) { throw Exception("startItem at not an item boundary"); } diff --git a/lang/c++/api/AvroParse.hh b/lang/c++/include/avro/AvroParse.hh similarity index 100% rename from lang/c++/api/AvroParse.hh rename to lang/c++/include/avro/AvroParse.hh diff --git a/lang/c++/api/AvroSerialize.hh b/lang/c++/include/avro/AvroSerialize.hh similarity index 100% rename from lang/c++/api/AvroSerialize.hh rename to lang/c++/include/avro/AvroSerialize.hh diff --git a/lang/c++/api/AvroTraits.hh b/lang/c++/include/avro/AvroTraits.hh similarity index 94% rename from lang/c++/api/AvroTraits.hh rename to lang/c++/include/avro/AvroTraits.hh index 7b5a636ec33..465470a9382 100644 --- a/lang/c++/api/AvroTraits.hh +++ b/lang/c++/include/avro/AvroTraits.hh @@ -60,10 +60,10 @@ struct is_defined { typedef char no[2]; template - static yes &test(char (*)[sizeof(U)]) { throw 0; }; + static yes &test(char (*)[sizeof(U)]) { throw 0; } template - static no &test(...) { throw 0; }; + static no &test(...) { throw 0; } static const bool value = sizeof(test(0)) == sizeof(yes); }; @@ -82,10 +82,10 @@ struct is_not_defined { typedef char no[2]; template - static yes &test(char (*)[sizeof(U)]) { throw 0; }; + static yes &test(char (*)[sizeof(U)]) { throw 0; } template - static no &test(...) { throw 0; }; + static no &test(...) { throw 0; } static const bool value = sizeof(test(0)) == sizeof(no); }; diff --git a/lang/c++/api/Compiler.hh b/lang/c++/include/avro/Compiler.hh similarity index 100% rename from lang/c++/api/Compiler.hh rename to lang/c++/include/avro/Compiler.hh diff --git a/lang/c++/api/Config.hh b/lang/c++/include/avro/Config.hh similarity index 100% rename from lang/c++/api/Config.hh rename to lang/c++/include/avro/Config.hh diff --git a/lang/c++/include/avro/CustomAttributes.hh b/lang/c++/include/avro/CustomAttributes.hh new file mode 100644 index 00000000000..b4cc6fbbaf8 --- /dev/null +++ b/lang/c++/include/avro/CustomAttributes.hh @@ -0,0 +1,56 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef avro_CustomAttributes_hh__ +#define avro_CustomAttributes_hh__ + +#include "Config.hh" +#include +#include +#include +#include + +namespace avro { + +// CustomAttributes class stores avro custom attributes. +// Each attribute is represented by a unique name and value. +// User is supposed to create CustomAttributes object and then add it to Schema. +class AVRO_DECL CustomAttributes { +public: + // Retrieves the custom attribute json entity for that attributeName, returns an + // null if the attribute doesn't exist. + boost::optional getAttribute(const std::string &name) const; + + // Adds a custom attribute. If the attribute already exists, throw an exception. + void addAttribute(const std::string &name, const std::string &value); + + // Provides a way to iterate over the custom attributes or check attribute size. + const std::map &attributes() const { + return attributes_; + } + + // Prints the attribute value for the specific attribute. + void printJson(std::ostream &os, const std::string &name) const; + +private: + std::map attributes_; +}; + +} // namespace avro + +#endif diff --git a/lang/c++/api/DataFile.hh b/lang/c++/include/avro/DataFile.hh similarity index 100% rename from lang/c++/api/DataFile.hh rename to lang/c++/include/avro/DataFile.hh diff --git a/lang/c++/api/Decoder.hh b/lang/c++/include/avro/Decoder.hh similarity index 100% rename from lang/c++/api/Decoder.hh rename to lang/c++/include/avro/Decoder.hh diff --git a/lang/c++/api/Encoder.hh b/lang/c++/include/avro/Encoder.hh similarity index 100% rename from lang/c++/api/Encoder.hh rename to lang/c++/include/avro/Encoder.hh diff --git a/lang/c++/api/Exception.hh b/lang/c++/include/avro/Exception.hh similarity index 84% rename from lang/c++/api/Exception.hh rename to lang/c++/include/avro/Exception.hh index 691869bed8c..234a1c93023 100644 --- a/lang/c++/api/Exception.hh +++ b/lang/c++/include/avro/Exception.hh @@ -20,19 +20,21 @@ #define avro_Exception_hh__ #include "Config.hh" -#include +#include #include namespace avro { /// Wrapper for std::runtime_error that provides convenience constructor -/// for boost::format objects +/// for formatted messages class AVRO_DECL Exception : public virtual std::runtime_error { public: explicit Exception(const std::string &msg) : std::runtime_error(msg) {} - explicit Exception(const boost::format &msg) : std::runtime_error(boost::str(msg)) {} + template + Exception(fmt::format_string fmt, Args &&...args) + : std::runtime_error(fmt::format(fmt, std::forward(args)...)) {} }; } // namespace avro diff --git a/lang/c++/api/Generic.hh b/lang/c++/include/avro/Generic.hh similarity index 100% rename from lang/c++/api/Generic.hh rename to lang/c++/include/avro/Generic.hh diff --git a/lang/c++/api/GenericDatum.hh b/lang/c++/include/avro/GenericDatum.hh similarity index 91% rename from lang/c++/api/GenericDatum.hh rename to lang/c++/include/avro/GenericDatum.hh index f58fd949950..a6ce2463ca8 100644 --- a/lang/c++/api/GenericDatum.hh +++ b/lang/c++/include/avro/GenericDatum.hh @@ -19,17 +19,12 @@ #ifndef avro_GenericDatum_hh__ #define avro_GenericDatum_hh__ +#include #include #include #include #include -#if __cplusplus >= 201703L -#include -#else -#include "boost/any.hpp" -#endif - #include "LogicalType.hh" #include "Node.hh" #include "ValidSchema.hh" @@ -62,11 +57,7 @@ class AVRO_DECL GenericDatum { protected: Type type_; LogicalType logicalType_; -#if __cplusplus >= 201703L std::any value_; -#else - boost::any value_; -#endif explicit GenericDatum(Type t) : type_(t), logicalType_(LogicalType::NONE) {} @@ -192,11 +183,7 @@ public: template GenericDatum(const NodePtr &schema, const T &v) : type_(schema->type()), logicalType_(schema->logicalType()) { init(schema); -#if __cplusplus >= 201703L *std::any_cast(&value_) = v; -#else - *boost::any_cast(&value_) = v; -#endif } /** @@ -539,65 +526,33 @@ public: }; inline Type GenericDatum::type() const { - return (type_ == AVRO_UNION) ? -#if __cplusplus >= 201703L - std::any_cast(&value_)->datum().type() - : -#else - boost::any_cast(&value_)->datum().type() - : -#endif - type_; + return (type_ == AVRO_UNION) ? std::any_cast(&value_)->datum().type() + : type_; } inline LogicalType GenericDatum::logicalType() const { - return (type_ == AVRO_UNION) ? -#if __cplusplus >= 201703L - std::any_cast(&value_)->datum().logicalType() : -#else - boost::any_cast(&value_)->datum().logicalType() : -#endif - logicalType_; + return (type_ == AVRO_UNION) ? std::any_cast(&value_)->datum().logicalType() + : logicalType_; } template T &GenericDatum::value() { - return (type_ == AVRO_UNION) ? -#if __cplusplus >= 201703L - std::any_cast(&value_)->datum().value() + return (type_ == AVRO_UNION) ? std::any_cast(&value_)->datum().value() : *std::any_cast(&value_); -#else - boost::any_cast(&value_)->datum().value() - : *boost::any_cast(&value_); -#endif } template const T &GenericDatum::value() const { - return (type_ == AVRO_UNION) ? -#if __cplusplus >= 201703L - std::any_cast(&value_)->datum().value() + return (type_ == AVRO_UNION) ? std::any_cast(&value_)->datum().value() : *std::any_cast(&value_); -#else - boost::any_cast(&value_)->datum().value() - : *boost::any_cast(&value_); -#endif } inline size_t GenericDatum::unionBranch() const { -#if __cplusplus >= 201703L return std::any_cast(&value_)->currentBranch(); -#else - return boost::any_cast(&value_)->currentBranch(); -#endif } inline void GenericDatum::selectBranch(size_t branch) { -#if __cplusplus >= 201703L std::any_cast(&value_)->selectBranch(branch); -#else - boost::any_cast(&value_)->selectBranch(branch); -#endif } } // namespace avro diff --git a/lang/c++/api/Layout.hh b/lang/c++/include/avro/Layout.hh similarity index 100% rename from lang/c++/api/Layout.hh rename to lang/c++/include/avro/Layout.hh diff --git a/lang/c++/api/LogicalType.hh b/lang/c++/include/avro/LogicalType.hh similarity index 87% rename from lang/c++/api/LogicalType.hh rename to lang/c++/include/avro/LogicalType.hh index 4d06e74f635..ff430fd086a 100644 --- a/lang/c++/api/LogicalType.hh +++ b/lang/c++/include/avro/LogicalType.hh @@ -47,17 +47,17 @@ public: // Precision must be positive and scale must be either positive or zero. The // setters will throw an exception if they are called on any type other // than DECIMAL. - void setPrecision(int precision); - int precision() const { return precision_; } - void setScale(int scale); - int scale() const { return scale_; } + void setPrecision(int32_t precision); + int32_t precision() const { return precision_; } + void setScale(int32_t scale); + int32_t scale() const { return scale_; } void printJson(std::ostream &os) const; private: Type type_; - int precision_; - int scale_; + int32_t precision_; + int32_t scale_; }; } // namespace avro diff --git a/lang/c++/api/Node.hh b/lang/c++/include/avro/Node.hh similarity index 83% rename from lang/c++/api/Node.hh rename to lang/c++/include/avro/Node.hh index 3c9389da50a..f76078b052b 100644 --- a/lang/c++/api/Node.hh +++ b/lang/c++/include/avro/Node.hh @@ -26,6 +26,7 @@ #include #include +#include "CustomAttributes.hh" #include "Exception.hh" #include "LogicalType.hh" #include "SchemaResolution.hh" @@ -39,30 +40,38 @@ class GenericDatum; using NodePtr = std::shared_ptr; class AVRO_DECL Name { + struct Aliases; + std::string ns_; std::string simpleName_; + std::unique_ptr aliases_; public: - Name() = default; - explicit Name(const std::string &fullname); - Name(std::string simpleName, std::string ns) : ns_(std::move(ns)), simpleName_(std::move(simpleName)) { check(); } + Name(); + explicit Name(const std::string &name); + Name(std::string simpleName, std::string ns); + Name(const Name &other); + Name &operator=(const Name &other); + Name(Name &&other); + Name &operator=(Name &&other); + ~Name(); std::string fullname() const; const std::string &ns() const { return ns_; } const std::string &simpleName() const { return simpleName_; } + const std::vector &aliases() const; void ns(std::string n) { ns_ = std::move(n); } void simpleName(std::string n) { simpleName_ = std::move(n); } void fullname(const std::string &n); + void addAlias(const std::string &alias); bool operator<(const Name &n) const; void check() const; bool operator==(const Name &n) const; bool operator!=(const Name &n) const { return !((*this) == n); } - void clear() { - ns_.clear(); - simpleName_.clear(); - } + bool equalOrAliasedBy(const Name &n) const; + void clear(); explicit operator std::string() const { return fullname(); } @@ -135,7 +144,7 @@ public: virtual size_t leaves() const = 0; virtual const NodePtr &leafAt(size_t index) const = 0; virtual const GenericDatum &defaultValueAt(size_t index) { - throw Exception(boost::format("No default value at: %1%") % index); + throw Exception("No default value at: {}", index); } void addName(const std::string &name) { @@ -153,6 +162,11 @@ public: } virtual size_t fixedSize() const = 0; + void addCustomAttributesForField(const CustomAttributes &customAttributes) { + checkLock(); + doAddCustomAttribute(customAttributes); + } + virtual bool isValid() const = 0; virtual SchemaResolution resolve(const Node &reader) const = 0; @@ -185,6 +199,7 @@ protected: virtual void doAddLeaf(const NodePtr &newLeaf) = 0; virtual void doAddName(const std::string &name) = 0; virtual void doSetFixedSize(size_t size) = 0; + virtual void doAddCustomAttribute(const CustomAttributes &customAttributes) = 0; private: const Type type_; @@ -201,4 +216,12 @@ inline std::ostream &operator<<(std::ostream &os, const avro::Node &n) { } } // namespace std +template<> +struct fmt::formatter : fmt::formatter { + template + auto format(const avro::Name &n, FormatContext &ctx) { + return fmt::formatter::format(n.fullname(), ctx); + } +}; + #endif diff --git a/lang/c++/api/NodeConcepts.hh b/lang/c++/include/avro/NodeConcepts.hh similarity index 98% rename from lang/c++/api/NodeConcepts.hh rename to lang/c++/include/avro/NodeConcepts.hh index 155c8ed9193..2f219cd94cc 100644 --- a/lang/c++/api/NodeConcepts.hh +++ b/lang/c++/include/avro/NodeConcepts.hh @@ -159,11 +159,11 @@ private: template struct NameIndexConcept { - bool lookup(const std::string &name, size_t &index) const { + bool lookup(const std::string &, size_t &) const { throw Exception("Name index does not exist"); } - bool add(const ::std::string &name, size_t) { + bool add(const ::std::string &, size_t) { throw Exception("Name index does not exist"); } }; diff --git a/lang/c++/api/NodeImpl.hh b/lang/c++/include/avro/NodeImpl.hh similarity index 82% rename from lang/c++/api/NodeImpl.hh rename to lang/c++/include/avro/NodeImpl.hh index c74d39e6b8b..3e5546c94ea 100644 --- a/lang/c++/api/NodeImpl.hh +++ b/lang/c++/include/avro/NodeImpl.hh @@ -30,6 +30,7 @@ #include #include +#include "CustomAttributes.hh" #include "Node.hh" #include "NodeConcepts.hh" @@ -42,6 +43,7 @@ template< class NameConcept, class LeavesConcept, class LeafNamesConcept, + class MultiAttributesConcept, class SizeConcept> class NodeImpl : public Node { @@ -51,17 +53,20 @@ protected: docAttribute_(), leafAttributes_(), leafNameAttributes_(), + customAttributes_(), sizeAttribute_() {} NodeImpl(Type type, const NameConcept &name, const LeavesConcept &leaves, const LeafNamesConcept &leafNames, + const MultiAttributesConcept &customAttributes, const SizeConcept &size) : Node(type), nameAttribute_(name), docAttribute_(), leafAttributes_(leaves), leafNameAttributes_(leafNames), + customAttributes_(customAttributes), sizeAttribute_(size) {} // Ctor with "doc" @@ -70,11 +75,13 @@ protected: const concepts::SingleAttribute &doc, const LeavesConcept &leaves, const LeafNamesConcept &leafNames, + const MultiAttributesConcept &customAttributes, const SizeConcept &size) : Node(type), nameAttribute_(name), docAttribute_(doc), leafAttributes_(leaves), leafNameAttributes_(leafNames), + customAttributes_(customAttributes), sizeAttribute_(size) {} void swap(NodeImpl &impl) { @@ -83,6 +90,7 @@ protected: std::swap(leafAttributes_, impl.leafAttributes_); std::swap(leafNameAttributes_, impl.leafNameAttributes_); std::swap(sizeAttribute_, impl.sizeAttribute_); + std::swap(customAttributes_, impl.customAttributes_); std::swap(nameIndex_, impl.nameIndex_); } @@ -121,7 +129,7 @@ protected: void doAddName(const std::string &name) override { if (!nameIndex_.add(name, leafNameAttributes_.size())) { - throw Exception(boost::format("Cannot add duplicate name: %1%") % name); + throw Exception("Cannot add duplicate name: {}", name); } leafNameAttributes_.add(name); } @@ -152,6 +160,10 @@ protected: void setLeafToSymbolic(size_t index, const NodePtr &node) override; + void doAddCustomAttribute(const CustomAttributes &customAttributes) override { + customAttributes_.add(customAttributes); + } + SchemaResolution furtherResolution(const Node &reader) const { SchemaResolution match = RESOLVE_NO_MATCH; @@ -195,6 +207,7 @@ protected: LeavesConcept leafAttributes_; LeafNamesConcept leafNameAttributes_; + MultiAttributesConcept customAttributes_; SizeConcept sizeAttribute_; concepts::NameIndexConcept nameIndex_; }; @@ -210,19 +223,21 @@ using MultiLeaves = concepts::MultiAttribute; using NoLeafNames = concepts::NoAttribute; using LeafNames = concepts::MultiAttribute; +using MultiAttributes = concepts::MultiAttribute; +using NoAttributes = concepts::NoAttribute; -using NoSize = concepts::NoAttribute; -using HasSize = concepts::SingleAttribute; +using NoSize = concepts::NoAttribute; +using HasSize = concepts::SingleAttribute; -using NodeImplPrimitive = NodeImpl; -using NodeImplSymbolic = NodeImpl; +using NodeImplPrimitive = NodeImpl; +using NodeImplSymbolic = NodeImpl; -using NodeImplRecord = NodeImpl; -using NodeImplEnum = NodeImpl; -using NodeImplArray = NodeImpl; -using NodeImplMap = NodeImpl; -using NodeImplUnion = NodeImpl; -using NodeImplFixed = NodeImpl; +using NodeImplRecord = NodeImpl; +using NodeImplEnum = NodeImpl; +using NodeImplArray = NodeImpl; +using NodeImplMap = NodeImpl; +using NodeImplUnion = NodeImpl; +using NodeImplFixed = NodeImpl; class AVRO_DECL NodePrimitive : public NodeImplPrimitive { public: @@ -245,9 +260,9 @@ class AVRO_DECL NodeSymbolic : public NodeImplSymbolic { public: NodeSymbolic() : NodeImplSymbolic(AVRO_SYMBOLIC) {} - explicit NodeSymbolic(const HasName &name) : NodeImplSymbolic(AVRO_SYMBOLIC, name, NoLeaves(), NoLeafNames(), NoSize()) {} + explicit NodeSymbolic(const HasName &name) : NodeImplSymbolic(AVRO_SYMBOLIC, name, NoLeaves(), NoLeafNames(), NoAttributes(), NoSize()) {} - NodeSymbolic(const HasName &name, const NodePtr &n) : NodeImplSymbolic(AVRO_SYMBOLIC, name, NoLeaves(), NoLeafNames(), NoSize()), actualNode_(n) {} + NodeSymbolic(const HasName &name, const NodePtr &n) : NodeImplSymbolic(AVRO_SYMBOLIC, name, NoLeaves(), NoLeafNames(), NoAttributes(), NoSize()), actualNode_(n) {} SchemaResolution resolve(const Node &reader) const override; void printJson(std::ostream &os, size_t depth) const override; @@ -265,7 +280,7 @@ public: NodePtr getNode() const { NodePtr node = actualNode_.lock(); if (!node) { - throw Exception(boost::format("Could not follow symbol %1%") % name()); + throw Exception("Could not follow symbol {}", name()); } return node; } @@ -279,30 +294,30 @@ protected: }; class AVRO_DECL NodeRecord : public NodeImplRecord { - std::vector defaultValues; + std::vector> fieldsAliases_; + std::vector fieldsDefaultValues_; public: NodeRecord() : NodeImplRecord(AVRO_RECORD) {} + NodeRecord(const HasName &name, const MultiLeaves &fields, - const LeafNames &fieldsNames, - std::vector dv); + const LeafNames &fieldsNames, std::vector dv); NodeRecord(const HasName &name, const HasDoc &doc, const MultiLeaves &fields, - const LeafNames &fieldsNames, - std::vector dv) : NodeImplRecord(AVRO_RECORD, name, doc, fields, fieldsNames, NoSize()), - defaultValues(std::move(dv)) { - for (size_t i = 0; i < leafNameAttributes_.size(); ++i) { - if (!nameIndex_.add(leafNameAttributes_.get(i), i)) { - throw Exception(boost::format( - "Cannot add duplicate field: %1%") - % leafNameAttributes_.get(i)); - } - } - } + const LeafNames &fieldsNames, std::vector dv); + + NodeRecord(const HasName &name, const MultiLeaves &fields, + const LeafNames &fieldsNames, std::vector> fieldsAliases, + std::vector dv, const MultiAttributes &customAttributes); + + NodeRecord(const HasName &name, const HasDoc &doc, const MultiLeaves &fields, + const LeafNames &fieldsNames, std::vector> fieldsAliases, + std::vector dv, const MultiAttributes &customAttributes); void swap(NodeRecord &r) { NodeImplRecord::swap(r); - defaultValues.swap(r.defaultValues); + fieldsAliases_.swap(r.fieldsAliases_); + fieldsDefaultValues_.swap(r.fieldsDefaultValues_); } SchemaResolution resolve(const Node &reader) const override; @@ -310,11 +325,11 @@ public: void printJson(std::ostream &os, size_t depth) const override; bool isValid() const override { - return ((nameAttribute_.size() == 1) && (leafAttributes_.size() == leafNameAttributes_.size())); + return ((nameAttribute_.size() == 1) && (leafAttributes_.size() == leafNameAttributes_.size()) && (customAttributes_.size() == 0 || customAttributes_.size() == leafAttributes_.size())); } const GenericDatum &defaultValueAt(size_t index) override { - return defaultValues[index]; + return fieldsDefaultValues_[index]; } void printDefaultToJson(const GenericDatum &g, std::ostream &os, size_t depth) const override; @@ -324,10 +339,10 @@ class AVRO_DECL NodeEnum : public NodeImplEnum { public: NodeEnum() : NodeImplEnum(AVRO_ENUM) {} - NodeEnum(const HasName &name, const LeafNames &symbols) : NodeImplEnum(AVRO_ENUM, name, NoLeaves(), symbols, NoSize()) { + NodeEnum(const HasName &name, const LeafNames &symbols) : NodeImplEnum(AVRO_ENUM, name, NoLeaves(), symbols, NoAttributes(), NoSize()) { for (size_t i = 0; i < leafNameAttributes_.size(); ++i) { if (!nameIndex_.add(leafNameAttributes_.get(i), i)) { - throw Exception(boost::format("Cannot add duplicate enum: %1%") % leafNameAttributes_.get(i)); + throw Exception("Cannot add duplicate enum: {}", leafNameAttributes_.get(i)); } } } @@ -348,7 +363,7 @@ class AVRO_DECL NodeArray : public NodeImplArray { public: NodeArray() : NodeImplArray(AVRO_ARRAY) {} - explicit NodeArray(const SingleLeaf &items) : NodeImplArray(AVRO_ARRAY, NoName(), items, NoLeafNames(), NoSize()) {} + explicit NodeArray(const SingleLeaf &items) : NodeImplArray(AVRO_ARRAY, NoName(), items, NoLeafNames(), NoAttributes(), NoSize()) {} SchemaResolution resolve(const Node &reader) const override; @@ -365,7 +380,7 @@ class AVRO_DECL NodeMap : public NodeImplMap { public: NodeMap(); - explicit NodeMap(const SingleLeaf &values) : NodeImplMap(AVRO_MAP, NoName(), MultiLeaves(values), NoLeafNames(), NoSize()) { + explicit NodeMap(const SingleLeaf &values) : NodeImplMap(AVRO_MAP, NoName(), MultiLeaves(values), NoLeafNames(), NoAttributes(), NoSize()) { // need to add the key for the map too NodePtr key(new NodePrimitive(AVRO_STRING)); doAddLeaf(key); @@ -389,7 +404,7 @@ class AVRO_DECL NodeUnion : public NodeImplUnion { public: NodeUnion() : NodeImplUnion(AVRO_UNION) {} - explicit NodeUnion(const MultiLeaves &types) : NodeImplUnion(AVRO_UNION, NoName(), types, NoLeafNames(), NoSize()) {} + explicit NodeUnion(const MultiLeaves &types) : NodeImplUnion(AVRO_UNION, NoName(), types, NoLeafNames(), NoAttributes(), NoSize()) {} SchemaResolution resolve(const Node &reader) const override; @@ -458,7 +473,7 @@ class AVRO_DECL NodeFixed : public NodeImplFixed { public: NodeFixed() : NodeImplFixed(AVRO_FIXED) {} - NodeFixed(const HasName &name, const HasSize &size) : NodeImplFixed(AVRO_FIXED, name, NoLeaves(), NoLeafNames(), size) {} + NodeFixed(const HasName &name, const HasSize &size) : NodeImplFixed(AVRO_FIXED, name, NoLeaves(), NoLeafNames(), NoAttributes(), size) {} SchemaResolution resolve(const Node &reader) const override; @@ -472,9 +487,9 @@ public: void printDefaultToJson(const GenericDatum &g, std::ostream &os, size_t depth) const override; }; -template +template inline void -NodeImpl::setLeafToSymbolic(size_t index, const NodePtr &node) { +NodeImpl::setLeafToSymbolic(size_t index, const NodePtr &node) { if (!B::hasAttribute) { throw Exception("Cannot change leaf node for nonexistent leaf"); } @@ -490,21 +505,21 @@ NodeImpl::setLeafToSymbolic(size_t index, const NodePtr &node) { replaceNode = symbol; } -template +template inline void -NodeImpl::printBasicInfo(std::ostream &os) const { +NodeImpl::printBasicInfo(std::ostream &os) const { os << type(); if (hasName()) { os << ' ' << nameAttribute_.get(); } - if (D::hasAttribute) { + if (E::hasAttribute) { os << " " << sizeAttribute_.get(); } os << '\n'; - int count = leaves(); + size_t count = leaves(); count = count ? count : names(); - for (int i = 0; i < count; ++i) { + for (size_t i = 0; i < count; ++i) { if (C::hasAttribute) { os << "name " << nameAt(i) << '\n'; } diff --git a/lang/c++/api/Parser.hh b/lang/c++/include/avro/Parser.hh similarity index 100% rename from lang/c++/api/Parser.hh rename to lang/c++/include/avro/Parser.hh diff --git a/lang/c++/api/Reader.hh b/lang/c++/include/avro/Reader.hh similarity index 96% rename from lang/c++/api/Reader.hh rename to lang/c++/include/avro/Reader.hh index ca6a719e31c..62d81c2365f 100644 --- a/lang/c++/api/Reader.hh +++ b/lang/c++/include/avro/Reader.hh @@ -84,7 +84,7 @@ public: union { double d; uint64_t i; - } v; + } v = {0}; reader_.read(v.i); val = v.d; } @@ -176,15 +176,15 @@ private: return encoded; } - int64_t readSize() { + size_t readSize() { uint64_t encoded = readVarInt(); - int64_t size = decodeZigzag64(encoded); + auto size = static_cast(decodeZigzag64(encoded)); return size; } - int64_t readCount() { + size_t readCount() { validator_.checkTypeExpected(AVRO_LONG); - int64_t count = readSize(); + size_t count = readSize(); validator_.setCount(count); return count; } diff --git a/lang/c++/api/Resolver.hh b/lang/c++/include/avro/Resolver.hh similarity index 100% rename from lang/c++/api/Resolver.hh rename to lang/c++/include/avro/Resolver.hh diff --git a/lang/c++/api/ResolverSchema.hh b/lang/c++/include/avro/ResolverSchema.hh similarity index 100% rename from lang/c++/api/ResolverSchema.hh rename to lang/c++/include/avro/ResolverSchema.hh diff --git a/lang/c++/api/ResolvingReader.hh b/lang/c++/include/avro/ResolvingReader.hh similarity index 100% rename from lang/c++/api/ResolvingReader.hh rename to lang/c++/include/avro/ResolvingReader.hh diff --git a/lang/c++/api/Schema.hh b/lang/c++/include/avro/Schema.hh similarity index 94% rename from lang/c++/api/Schema.hh rename to lang/c++/include/avro/Schema.hh index abd646f9fc7..6eec0e8b6e4 100644 --- a/lang/c++/api/Schema.hh +++ b/lang/c++/include/avro/Schema.hh @@ -20,6 +20,7 @@ #define avro_Schema_hh__ #include "Config.hh" +#include "CustomAttributes.hh" #include "NodeImpl.hh" #include @@ -100,6 +101,9 @@ class AVRO_DECL RecordSchema : public Schema { public: explicit RecordSchema(const std::string &name); void addField(const std::string &name, const Schema &fieldSchema); + // Add a field with custom attributes + void addField(const std::string &name, const Schema &fieldSchema, + const CustomAttributes &customAttributes); std::string getDoc() const; void setDoc(const std::string &); diff --git a/lang/c++/api/SchemaResolution.hh b/lang/c++/include/avro/SchemaResolution.hh similarity index 100% rename from lang/c++/api/SchemaResolution.hh rename to lang/c++/include/avro/SchemaResolution.hh diff --git a/lang/c++/api/Serializer.hh b/lang/c++/include/avro/Serializer.hh similarity index 100% rename from lang/c++/api/Serializer.hh rename to lang/c++/include/avro/Serializer.hh diff --git a/lang/c++/api/Specific.hh b/lang/c++/include/avro/Specific.hh similarity index 100% rename from lang/c++/api/Specific.hh rename to lang/c++/include/avro/Specific.hh diff --git a/lang/c++/api/Stream.hh b/lang/c++/include/avro/Stream.hh similarity index 99% rename from lang/c++/api/Stream.hh rename to lang/c++/include/avro/Stream.hh index fe2c97ee2dd..81448d26d02 100644 --- a/lang/c++/api/Stream.hh +++ b/lang/c++/include/avro/Stream.hh @@ -22,6 +22,7 @@ #include #include #include +#include #include "boost/utility.hpp" diff --git a/lang/c++/api/Types.hh b/lang/c++/include/avro/Types.hh similarity index 92% rename from lang/c++/api/Types.hh rename to lang/c++/include/avro/Types.hh index e3296ae0d00..84a33976e9f 100644 --- a/lang/c++/api/Types.hh +++ b/lang/c++/include/avro/Types.hh @@ -19,6 +19,7 @@ #ifndef avro_Types_hh__ #define avro_Types_hh__ +#include #include #include "Config.hh" @@ -109,4 +110,12 @@ std::ostream &operator<<(std::ostream &os, const Null &null); } // namespace avro +template<> +struct fmt::formatter : fmt::formatter { + template + auto format(avro::Type t, FormatContext &ctx) { + return fmt::formatter::format(avro::toString(t), ctx); + } +}; + #endif diff --git a/lang/c++/api/ValidSchema.hh b/lang/c++/include/avro/ValidSchema.hh similarity index 100% rename from lang/c++/api/ValidSchema.hh rename to lang/c++/include/avro/ValidSchema.hh diff --git a/lang/c++/api/Validator.hh b/lang/c++/include/avro/Validator.hh similarity index 81% rename from lang/c++/api/Validator.hh rename to lang/c++/include/avro/Validator.hh index ab5d068df0b..6437a549ff1 100644 --- a/lang/c++/api/Validator.hh +++ b/lang/c++/include/avro/Validator.hh @@ -32,10 +32,10 @@ namespace avro { class AVRO_DECL NullValidator : private boost::noncopyable { public: - explicit NullValidator(const ValidSchema &schema) {} + explicit NullValidator(const ValidSchema &) {} NullValidator() = default; - void setCount(int64_t) {} + void setCount(size_t) {} static bool typeIsExpected(Type) { return true; @@ -45,20 +45,20 @@ public: return AVRO_UNKNOWN; } - static int nextSizeExpected() { + static size_t nextSizeExpected() { return 0; } - static bool getCurrentRecordName(std::string &name) { + static bool getCurrentRecordName(std::string &) { return true; } - static bool getNextFieldName(std::string &name) { + static bool getNextFieldName(std::string &) { return true; } void checkTypeExpected(Type) {} - void checkFixedSizeExpected(int) {} + void checkFixedSizeExpected(size_t) {} }; /// This class is used by both the ValidatingSerializer and ValidationParser @@ -71,7 +71,7 @@ class AVRO_DECL Validator : private boost::noncopyable { public: explicit Validator(ValidSchema schema); - void setCount(int64_t val); + void setCount(size_t val); bool typeIsExpected(Type type) const { return (expectedTypesFlag_ & typeToFlag(type)) != 0; @@ -81,25 +81,21 @@ public: return nextType_; } - int nextSizeExpected() const; + size_t nextSizeExpected() const; bool getCurrentRecordName(std::string &name) const; bool getNextFieldName(std::string &name) const; void checkTypeExpected(Type type) { if (!typeIsExpected(type)) { - throw Exception( - boost::format("Type %1% does not match schema %2%") - % type % nextType_); + throw Exception("Type {} does not match schema {}", type, nextType_); } advance(); } - void checkFixedSizeExpected(int size) { + void checkFixedSizeExpected(size_t size) { if (nextSizeExpected() != size) { - throw Exception( - boost::format("Wrong size for fixed, got %1%, expected %2%") - % size % nextSizeExpected()); + throw Exception("Wrong size for fixed, got {}, expected {}", size, nextSizeExpected()); } checkTypeExpected(AVRO_FIXED); } @@ -108,7 +104,7 @@ private: using flag_t = uint32_t; static flag_t typeToFlag(Type type) { - flag_t flag = (1L << type); + flag_t flag = 1u << static_cast(type); return flag; } @@ -133,7 +129,7 @@ private: flag_t expectedTypesFlag_; bool compoundStarted_; bool waitingForCount_; - int64_t count_; + size_t count_; struct CompoundType { explicit CompoundType(NodePtr n) : node(std::move(n)), pos(0) {} diff --git a/lang/c++/api/Writer.hh b/lang/c++/include/avro/Writer.hh similarity index 100% rename from lang/c++/api/Writer.hh rename to lang/c++/include/avro/Writer.hh diff --git a/lang/c++/api/Zigzag.hh b/lang/c++/include/avro/Zigzag.hh similarity index 90% rename from lang/c++/api/Zigzag.hh rename to lang/c++/include/avro/Zigzag.hh index fefdc3f32e7..5d20e028b2d 100644 --- a/lang/c++/api/Zigzag.hh +++ b/lang/c++/include/avro/Zigzag.hh @@ -30,16 +30,14 @@ namespace avro { AVRO_DECL constexpr uint64_t encodeZigzag64(int64_t input) noexcept { - // cppcheck-suppress shiftTooManyBitsSigned - return ((input << 1) ^ (input >> 63)); + return ((static_cast(input) << 1) ^ (input >> 63)); } AVRO_DECL constexpr int64_t decodeZigzag64(uint64_t input) noexcept { return static_cast(((input >> 1) ^ -(static_cast(input) & 1))); } AVRO_DECL constexpr uint32_t encodeZigzag32(int32_t input) noexcept { - // cppcheck-suppress shiftTooManyBitsSigned - return ((input << 1) ^ (input >> 31)); + return (static_cast(input) << 1) ^ (input >> 31); } AVRO_DECL constexpr int32_t decodeZigzag32(uint32_t input) noexcept { return static_cast(((input >> 1) ^ -(static_cast(input) & 1))); diff --git a/lang/c++/api/buffer/Buffer.hh b/lang/c++/include/avro/buffer/Buffer.hh similarity index 98% rename from lang/c++/api/buffer/Buffer.hh rename to lang/c++/include/avro/buffer/Buffer.hh index bc3baf12330..16a22ef626e 100644 --- a/lang/c++/api/buffer/Buffer.hh +++ b/lang/c++/include/avro/buffer/Buffer.hh @@ -145,7 +145,7 @@ public: **/ size_type wroteTo(size_type size) { - int wrote = 0; + size_type wrote = 0; if (size) { if (size > freeSpace()) { throw std::length_error("Impossible to write more data than free space"); @@ -276,7 +276,7 @@ public: * Returns the number of chunks that contain free space. **/ - int numChunks() const { + size_t numChunks() const { return pimpl_->numFreeChunks(); } @@ -284,7 +284,7 @@ public: * Returns the number of chunks that contain data **/ - int numDataChunks() const { + size_t numDataChunks() const { return pimpl_->numDataChunks(); } @@ -384,7 +384,7 @@ public: * Returns the number of chunks containing data. **/ - int numChunks() const { + size_t numChunks() const { return pimpl_->numDataChunks(); } @@ -476,10 +476,10 @@ inline InputBuffer OutputBuffer::extractData(size_type bytes) { template inline void toIovec(BufferType &buf, std::vector &iov) { - const int chunks = buf.numChunks(); + const size_t chunks = buf.numChunks(); iov.resize(chunks); typename BufferType::const_iterator iter = buf.begin(); - for (int i = 0; i < chunks; ++i) { + for (size_t i = 0; i < chunks; ++i) { iov[i].iov_base = const_cast(iter->data()); iov[i].iov_len = iter->size(); ++iter; diff --git a/lang/c++/api/buffer/BufferPrint.hh b/lang/c++/include/avro/buffer/BufferPrint.hh similarity index 99% rename from lang/c++/api/buffer/BufferPrint.hh rename to lang/c++/include/avro/buffer/BufferPrint.hh index c8eb15b719a..8d4001529c9 100644 --- a/lang/c++/api/buffer/BufferPrint.hh +++ b/lang/c++/include/avro/buffer/BufferPrint.hh @@ -47,7 +47,7 @@ hexPrint(std::ostream &os, BufferReader &reader) { std::ios_base::fmtflags savedFlags = os.flags(); char sixteenBytes[16]; - int offset = 0; + size_t offset = 0; os << std::setfill('0'); os << std::hex; diff --git a/lang/c++/api/buffer/BufferReader.hh b/lang/c++/include/avro/buffer/BufferReader.hh similarity index 100% rename from lang/c++/api/buffer/BufferReader.hh rename to lang/c++/include/avro/buffer/BufferReader.hh diff --git a/lang/c++/api/buffer/BufferStream.hh b/lang/c++/include/avro/buffer/BufferStream.hh similarity index 100% rename from lang/c++/api/buffer/BufferStream.hh rename to lang/c++/include/avro/buffer/BufferStream.hh diff --git a/lang/c++/api/buffer/BufferStreambuf.hh b/lang/c++/include/avro/buffer/BufferStreambuf.hh similarity index 96% rename from lang/c++/api/buffer/BufferStreambuf.hh rename to lang/c++/include/avro/buffer/BufferStreambuf.hh index 2b7aea4d779..42eb20c21c6 100644 --- a/lang/c++/api/buffer/BufferStreambuf.hh +++ b/lang/c++/include/avro/buffer/BufferStreambuf.hh @@ -135,7 +135,11 @@ protected: memcpy(c, gptr(), toCopy); c += toCopy; bytesCopied += toCopy; - gbump(toCopy); + while (toCopy > static_cast(std::numeric_limits::max())) { + gbump(std::numeric_limits::max()); + toCopy -= static_cast(std::numeric_limits::max()); + } + gbump(static_cast(toCopy)); } if (bytesCopied < len) { diff --git a/lang/c++/api/buffer/detail/BufferDetail.hh b/lang/c++/include/avro/buffer/detail/BufferDetail.hh similarity index 99% rename from lang/c++/api/buffer/detail/BufferDetail.hh rename to lang/c++/include/avro/buffer/detail/BufferDetail.hh index b487cdb3935..652e98d51ba 100644 --- a/lang/c++/api/buffer/detail/BufferDetail.hh +++ b/lang/c++/include/avro/buffer/detail/BufferDetail.hh @@ -481,13 +481,13 @@ public: } /// The number of chunks containing data. Used for debugging. - int numDataChunks() const { + size_t numDataChunks() const { return readChunks_.size(); } /// The number of chunks containing free space (note that an entire chunk /// may not be free). Used for debugging. - int numFreeChunks() const { + size_t numFreeChunks() const { return writeChunks_.size(); } diff --git a/lang/c++/api/buffer/detail/BufferDetailIterator.hh b/lang/c++/include/avro/buffer/detail/BufferDetailIterator.hh similarity index 100% rename from lang/c++/api/buffer/detail/BufferDetailIterator.hh rename to lang/c++/include/avro/buffer/detail/BufferDetailIterator.hh diff --git a/lang/c++/jsonschemas/cpp_reserved_words_union_typedef b/lang/c++/jsonschemas/cpp_reserved_words_union_typedef new file mode 100644 index 00000000000..215f2f4c0fc --- /dev/null +++ b/lang/c++/jsonschemas/cpp_reserved_words_union_typedef @@ -0,0 +1,13 @@ +{ + "type": "record", + "name": "Record", + "fields": [ + { + "name": "void", + "type": [ + "int", + "double" + ] + } + ] +} diff --git a/lang/c++/jsonschemas/union_empty_record b/lang/c++/jsonschemas/union_empty_record new file mode 100644 index 00000000000..5d2523165ff --- /dev/null +++ b/lang/c++/jsonschemas/union_empty_record @@ -0,0 +1,25 @@ +{ + "type": "record", + "name": "StackCalculator", + "fields": [ + { + "name": "stack", + "type": { + "type": "array", + "items": [ + "int", + { + "type": "record", + "name": "Dup", + "fields": [] + }, + { + "type": "record", + "name": "Add", + "fields": [] + } + ] + } + } + ] +} diff --git a/lang/c++/m4/README b/lang/c++/m4/README deleted file mode 100644 index 6d90a5a133e..00000000000 --- a/lang/c++/m4/README +++ /dev/null @@ -1,3 +0,0 @@ -The macros in this directory came from https://www.nongnu.org/autoconf-archive/index.html - -Please refer to the files for their licensing info. diff --git a/lang/c++/m4/m4_ax_boost_asio.m4 b/lang/c++/m4/m4_ax_boost_asio.m4 deleted file mode 100644 index d0d070b017b..00000000000 --- a/lang/c++/m4/m4_ax_boost_asio.m4 +++ /dev/null @@ -1,108 +0,0 @@ -# =========================================================================== -# https://www.gnu.org/software/autoconf-archive/ax_boost_asio.html -# =========================================================================== -# -# SYNOPSIS -# -# AX_BOOST_ASIO -# -# DESCRIPTION -# -# Test for Asio library from the Boost C++ libraries. The macro requires a -# preceding call to AX_BOOST_BASE. Further documentation is available at -# . -# -# This macro calls: -# -# AC_SUBST(BOOST_ASIO_LIB) -# -# And sets: -# -# HAVE_BOOST_ASIO -# -# LICENSE -# -# Copyright (c) 2008 Thomas Porschberg -# Copyright (c) 2008 Pete Greenwell -# -# Copying and distribution of this file, with or without modification, are -# permitted in any medium without royalty provided the copyright notice -# and this notice are preserved. This file is offered as-is, without any -# warranty. - -#serial 7 - -AC_DEFUN([AX_BOOST_ASIO], -[ - AC_ARG_WITH([boost-asio], - AS_HELP_STRING([--with-boost-asio@<:@=special-lib@:>@], - [use the ASIO library from boost - it is possible to specify a certain library for the linker - e.g. --with-boost-asio=boost_system-gcc41-mt-1_34 ]), - [ - if test "$withval" = "no"; then - want_boost="no" - elif test "$withval" = "yes"; then - want_boost="yes" - ax_boost_user_asio_lib="" - else - want_boost="yes" - ax_boost_user_asio_lib="$withval" - fi - ], - [want_boost="yes"] - ) - - if test "x$want_boost" = "xyes"; then - AC_REQUIRE([AC_PROG_CC]) - CPPFLAGS_SAVED="$CPPFLAGS" - CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS" - export CPPFLAGS - - LDFLAGS_SAVED="$LDFLAGS" - LDFLAGS="$LDFLAGS $BOOST_LDFLAGS" - export LDFLAGS - - AC_CACHE_CHECK(whether the Boost::ASIO library is available, - ax_cv_boost_asio, - [AC_LANG_PUSH([C++]) - AC_COMPILE_IFELSE(AC_LANG_PROGRAM([[ @%:@include - ]], - [[ - - boost::asio::io_service io; - boost::system::error_code timer_result; - boost::asio::deadline_timer t(io); - t.cancel(); - io.run_one(); - return 0; - ]]), - ax_cv_boost_asio=yes, ax_cv_boost_asio=no) - AC_LANG_POP([C++]) - ]) - if test "x$ax_cv_boost_asio" = "xyes"; then - AC_DEFINE(HAVE_BOOST_ASIO,,[define if the Boost::ASIO library is available]) - BN=boost_system - if test "x$ax_boost_user_asio_lib" = "x"; then - for ax_lib in $BN $BN-$CC $BN-$CC-mt $BN-$CC-mt-s $BN-$CC-s \ - lib$BN lib$BN-$CC lib$BN-$CC-mt lib$BN-$CC-mt-s lib$BN-$CC-s \ - $BN-mgw $BN-mgw $BN-mgw-mt $BN-mgw-mt-s $BN-mgw-s ; do - AC_CHECK_LIB($ax_lib, main, [BOOST_ASIO_LIB="-l$ax_lib" AC_SUBST(BOOST_ASIO_LIB) link_thread="yes" break], - [link_thread="no"]) - done - else - for ax_lib in $ax_boost_user_asio_lib $BN-$ax_boost_user_asio_lib; do - AC_CHECK_LIB($ax_lib, main, - [BOOST_ASIO_LIB="-l$ax_lib" AC_SUBST(BOOST_ASIO_LIB) link_asio="yes" break], - [link_asio="no"]) - done - - fi - if test "x$link_asio" = "xno"; then - AC_MSG_ERROR(Could not link against $ax_lib !) - fi - fi - - CPPFLAGS="$CPPFLAGS_SAVED" - LDFLAGS="$LDFLAGS_SAVED" - fi -]) diff --git a/lang/c++/m4/m4_ax_boost_base.m4 b/lang/c++/m4/m4_ax_boost_base.m4 deleted file mode 100644 index 34f63c751a8..00000000000 --- a/lang/c++/m4/m4_ax_boost_base.m4 +++ /dev/null @@ -1,219 +0,0 @@ -# =========================================================================== -# https://www.nongnu.org/autoconf-archive/ax_boost_base.html -# =========================================================================== -# -# SYNOPSIS -# -# AX_BOOST_BASE([MINIMUM-VERSION]) -# -# DESCRIPTION -# -# Test for the Boost C++ libraries of a particular version (or newer) -# -# If no path to the installed boost library is given the macro searchs -# under /usr, /usr/local, /opt and /opt/local and evaluates the -# $BOOST_ROOT environment variable. Further documentation is available at -# . -# -# This macro calls: -# -# AC_SUBST(BOOST_CPPFLAGS) / AC_SUBST(BOOST_LDFLAGS) -# -# And sets: -# -# HAVE_BOOST -# -# LICENSE -# -# Copyright (c) 2008 Thomas Porschberg -# -# Copying and distribution of this file, with or without modification, are -# permitted in any medium without royalty provided the copyright notice -# and this notice are preserved. - -AC_DEFUN([AX_BOOST_BASE], -[ -AC_ARG_WITH([boost], - AS_HELP_STRING([--with-boost@<:@=DIR@:>@], [use boost (default is yes) - it is possible to specify the root directory for boost (optional)]), - [ - if test "$withval" = "no"; then - want_boost="no" - elif test "$withval" = "yes"; then - want_boost="yes" - ac_boost_path="" - else - want_boost="yes" - ac_boost_path="$withval" - fi - ], - [want_boost="yes"]) - - -AC_ARG_WITH([boost-libdir], - AS_HELP_STRING([--with-boost-libdir=LIB_DIR], - [Force given directory for boost libraries. Note that this will overwrite library path detection, so use this parameter only if default library detection fails and you know exactly where your boost libraries are located.]), - [ - if test -d $withval - then - ac_boost_lib_path="$withval" - else - AC_MSG_ERROR(--with-boost-libdir expected directory name) - fi - ], - [ac_boost_lib_path=""] -) - -if test "x$want_boost" = "xyes"; then - boost_lib_version_req=ifelse([$1], ,1.20.0,$1) - boost_lib_version_req_shorten=`expr $boost_lib_version_req : '\([[0-9]]*\.[[0-9]]*\)'` - boost_lib_version_req_major=`expr $boost_lib_version_req : '\([[0-9]]*\)'` - boost_lib_version_req_minor=`expr $boost_lib_version_req : '[[0-9]]*\.\([[0-9]]*\)'` - boost_lib_version_req_sub_minor=`expr $boost_lib_version_req : '[[0-9]]*\.[[0-9]]*\.\([[0-9]]*\)'` - if test "x$boost_lib_version_req_sub_minor" = "x" ; then - boost_lib_version_req_sub_minor="0" - fi - WANT_BOOST_VERSION=`expr $boost_lib_version_req_major \* 100000 \+ $boost_lib_version_req_minor \* 100 \+ $boost_lib_version_req_sub_minor` - AC_MSG_CHECKING(for boostlib >= $boost_lib_version_req) - succeeded=no - - dnl first we check the system location for boost libraries - dnl this location ist chosen if boost libraries are installed with the --layout=system option - dnl or if you install boost with RPM - if test "$ac_boost_path" != ""; then - BOOST_LDFLAGS="-L$ac_boost_path/lib" - BOOST_CPPFLAGS="-I$ac_boost_path/include" - else - for ac_boost_path_tmp in /usr /usr/local /opt /opt/local ; do - if test -d "$ac_boost_path_tmp/include/boost" && test -r "$ac_boost_path_tmp/include/boost"; then - BOOST_LDFLAGS="-L$ac_boost_path_tmp/lib" - BOOST_CPPFLAGS="-I$ac_boost_path_tmp/include" - break; - fi - done - fi - - dnl overwrite ld flags if we have required special directory with - dnl --with-boost-libdir parameter - if test "$ac_boost_lib_path" != ""; then - BOOST_LDFLAGS="-L$ac_boost_lib_path" - fi - - CPPFLAGS_SAVED="$CPPFLAGS" - CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS" - export CPPFLAGS - - LDFLAGS_SAVED="$LDFLAGS" - LDFLAGS="$LDFLAGS $BOOST_LDFLAGS" - export LDFLAGS - - AC_LANG_PUSH(C++) - AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[ - @%:@include - ]], [[ - #if BOOST_VERSION >= $WANT_BOOST_VERSION - // Everything is okay - #else - # error Boost version is too old - #endif - ]])],[ - AC_MSG_RESULT(yes) - succeeded=yes - found_system=yes - ],[ - ]) - AC_LANG_POP([C++]) - - - - dnl if we found no boost with system layout we search for boost libraries - dnl built and installed without the --layout=system option or for a staged(not installed) version - if test "x$succeeded" != "xyes"; then - _version=0 - if test "$ac_boost_path" != ""; then - if test -d "$ac_boost_path" && test -r "$ac_boost_path"; then - for i in `ls -d $ac_boost_path/include/boost-* 2>/dev/null`; do - _version_tmp=`echo $i | sed "s#$ac_boost_path##" | sed 's/\/include\/boost-//' | sed 's/_/./'` - V_CHECK=`expr $_version_tmp \> $_version` - if test "$V_CHECK" = "1" ; then - _version=$_version_tmp - fi - VERSION_UNDERSCORE=`echo $_version | sed 's/\./_/'` - BOOST_CPPFLAGS="-I$ac_boost_path/include/boost-$VERSION_UNDERSCORE" - done - fi - else - for ac_boost_path in /usr /usr/local /opt /opt/local ; do - if test -d "$ac_boost_path" && test -r "$ac_boost_path"; then - for i in `ls -d $ac_boost_path/include/boost-* 2>/dev/null`; do - _version_tmp=`echo $i | sed "s#$ac_boost_path##" | sed 's/\/include\/boost-//' | sed 's/_/./'` - V_CHECK=`expr $_version_tmp \> $_version` - if test "$V_CHECK" = "1" ; then - _version=$_version_tmp - best_path=$ac_boost_path - fi - done - fi - done - - VERSION_UNDERSCORE=`echo $_version | sed 's/\./_/'` - BOOST_CPPFLAGS="-I$best_path/include/boost-$VERSION_UNDERSCORE" - if test "$ac_boost_lib_path" = "" - then - BOOST_LDFLAGS="-L$best_path/lib" - fi - - if test "x$BOOST_ROOT" != "x"; then - if test -d "$BOOST_ROOT" && test -r "$BOOST_ROOT" && test -d "$BOOST_ROOT/stage/lib" && test -r "$BOOST_ROOT/stage/lib"; then - version_dir=`expr //$BOOST_ROOT : '.*/\(.*\)'` - stage_version=`echo $version_dir | sed 's/boost_//' | sed 's/_/./g'` - stage_version_shorten=`expr $stage_version : '\([[0-9]]*\.[[0-9]]*\)'` - V_CHECK=`expr $stage_version_shorten \>\= $_version` - if test "$V_CHECK" = "1" -a "$ac_boost_lib_path" = "" ; then - AC_MSG_NOTICE(We will use a staged boost library from $BOOST_ROOT) - BOOST_CPPFLAGS="-I$BOOST_ROOT" - BOOST_LDFLAGS="-L$BOOST_ROOT/stage/lib" - fi - fi - fi - fi - - CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS" - export CPPFLAGS - LDFLAGS="$LDFLAGS $BOOST_LDFLAGS" - export LDFLAGS - - AC_LANG_PUSH(C++) - AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[ - @%:@include - ]], [[ - #if BOOST_VERSION >= $WANT_BOOST_VERSION - // Everything is okay - #else - # error Boost version is too old - #endif - ]])],[ - AC_MSG_RESULT(yes) - succeeded=yes - found_system=yes - ],[ - ]) - AC_LANG_POP([C++]) - fi - - if test "$succeeded" != "yes" ; then - if test "$_version" = "0" ; then - AC_MSG_ERROR([[We could not detect the boost libraries (version $boost_lib_version_req_shorten or higher). If you have a staged boost library (still not installed) please specify \$BOOST_ROOT in your environment and do not give a PATH to --with-boost option. If you are sure you have boost installed, then check your version number looking in . See https://www.randspringer.de/boost for more documentation.]]) - else - AC_MSG_ERROR([Your boost libraries seems too old (version $_version).]) - fi - else - AC_SUBST(BOOST_CPPFLAGS) - AC_SUBST(BOOST_LDFLAGS) - AC_DEFINE(HAVE_BOOST,,[define if the Boost library is available]) - fi - - CPPFLAGS="$CPPFLAGS_SAVED" - LDFLAGS="$LDFLAGS_SAVED" -fi - -]) diff --git a/lang/c++/m4/m4_ax_boost_filesystem.m4 b/lang/c++/m4/m4_ax_boost_filesystem.m4 deleted file mode 100644 index a52ce6ad166..00000000000 --- a/lang/c++/m4/m4_ax_boost_filesystem.m4 +++ /dev/null @@ -1,115 +0,0 @@ -# =========================================================================== -# https://www.gnu.org/software/autoconf-archive/ax_boost_filesystem.html -# =========================================================================== -# -# SYNOPSIS -# -# AX_BOOST_FILESYSTEM -# -# DESCRIPTION -# -# Test for Filesystem library from the Boost C++ libraries. The macro -# requires a preceding call to AX_BOOST_BASE. Further documentation is -# available at . -# -# This macro calls: -# -# AC_SUBST(BOOST_FILESYSTEM_LIB) -# -# And sets: -# -# HAVE_BOOST_FILESYSTEM -# -# LICENSE -# -# Copyright (c) 2009 Thomas Porschberg -# Copyright (c) 2009 Michael Tindal -# Copyright (c) 2009 Roman Rybalko -# -# Copying and distribution of this file, with or without modification, are -# permitted in any medium without royalty provided the copyright notice -# and this notice are preserved. This file is offered as-is, without any -# warranty. - -#serial 13 - -AC_DEFUN([AX_BOOST_FILESYSTEM], -[ - AC_ARG_WITH([boost-filesystem], - AS_HELP_STRING([--with-boost-filesystem@<:@=special-lib@:>@], - [use the Filesystem library from boost - it is possible to specify a certain library for the linker - e.g. --with-boost-filesystem=boost_filesystem-gcc-mt ]), - [ - if test "$withval" = "no"; then - want_boost="no" - elif test "$withval" = "yes"; then - want_boost="yes" - ax_boost_user_filesystem_lib="" - else - want_boost="yes" - ax_boost_user_filesystem_lib="$withval" - fi - ], - [want_boost="yes"] - ) - - if test "x$want_boost" = "xyes"; then - AC_REQUIRE([AC_PROG_CC]) - CPPFLAGS_SAVED="$CPPFLAGS" - CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS" - export CPPFLAGS - - LDFLAGS_SAVED="$LDFLAGS" - LDFLAGS="$LDFLAGS $BOOST_LDFLAGS" - export LDFLAGS - - LIBS_SAVED=$LIBS - LIBS="$LIBS $BOOST_SYSTEM_LIB" - export LIBS - - AC_CACHE_CHECK(whether the Boost::Filesystem library is available, - ax_cv_boost_filesystem, - [AC_LANG_PUSH([C++]) - AC_COMPILE_IFELSE(AC_LANG_PROGRAM([[@%:@include ]], - [[using namespace boost::filesystem; - path my_path( "foo/bar/data.txt" ); - return 0;]]), - ax_cv_boost_filesystem=yes, ax_cv_boost_filesystem=no) - AC_LANG_POP([C++]) - ]) - if test "x$ax_cv_boost_filesystem" = "xyes"; then - AC_DEFINE(HAVE_BOOST_FILESYSTEM,,[define if the Boost::Filesystem library is available]) - BOOSTLIBDIR=`echo $BOOST_LDFLAGS | sed -e 's/@<:@^\/@:>@*//'` - if test "x$ax_boost_user_filesystem_lib" = "x"; then - for libextension in `ls $BOOSTLIBDIR/libboost_filesystem*.{so,dylib,a}* 2>/dev/null | sed 's,.*/,,' | sed -e 's;^lib\(boost_filesystem.*\)\.so.*$;\1;' -e 's;^lib\(boost_filesystem.*\)\.a*$;\1;' -e 's;^lib\(boost_filesystem.*\)\.dylib$;\1;'` ; do - ax_lib=${libextension} - AC_CHECK_LIB($ax_lib, exit, - [BOOST_FILESYSTEM_LIB="-l$ax_lib"; AC_SUBST(BOOST_FILESYSTEM_LIB) link_filesystem="yes"; break], - [link_filesystem="no"]) - done - if test "x$link_program_options" != "xyes"; then - for libextension in `ls $BOOSTLIBDIR/boost_filesystem*.{dll,a}* 2>/dev/null | sed 's,.*/,,' | sed -e 's;^\(boost_filesystem.*\)\.dll.*$;\1;' -e 's;^\(boost_filesystem.*\)\.a*$;\1;'` ; do - ax_lib=${libextension} - AC_CHECK_LIB($ax_lib, exit, - [BOOST_FILESYSTEM_LIB="-l$ax_lib"; AC_SUBST(BOOST_FILESYSTEM_LIB) link_filesystem="yes"; break], - [link_filesystem="no"]) - done - fi - else - for ax_lib in $ax_boost_user_filesystem_lib boost_filesystem-$ax_boost_user_filesystem_lib; do - AC_CHECK_LIB($ax_lib, exit, - [BOOST_FILESYSTEM_LIB="-l$ax_lib"; AC_SUBST(BOOST_FILESYSTEM_LIB) link_filesystem="yes"; break], - [link_filesystem="no"]) - done - - fi - if test "x$link_filesystem" != "xyes"; then - AC_MSG_ERROR(Could not link against $ax_lib !) - fi - fi - - CPPFLAGS="$CPPFLAGS_SAVED" - LDFLAGS="$LDFLAGS_SAVED" - LIBS="$LIBS_SAVED" - fi -]) diff --git a/lang/c++/m4/m4_ax_boost_regex.m4 b/lang/c++/m4/m4_ax_boost_regex.m4 deleted file mode 100644 index 4ec2eda39f9..00000000000 --- a/lang/c++/m4/m4_ax_boost_regex.m4 +++ /dev/null @@ -1,105 +0,0 @@ -# =========================================================================== -# https://www.nongnu.org/autoconf-archive/ax_boost_regex.html -# =========================================================================== -# -# SYNOPSIS -# -# AX_BOOST_REGEX -# -# DESCRIPTION -# -# Test for Regex library from the Boost C++ libraries. The macro requires -# a preceding call to AX_BOOST_BASE. Further documentation is available at -# . -# -# This macro calls: -# -# AC_SUBST(BOOST_REGEX_LIB) -# -# And sets: -# -# HAVE_BOOST_REGEX -# -# LICENSE -# -# Copyright (c) 2008 Thomas Porschberg -# Copyright (c) 2008 Michael Tindal -# -# Copying and distribution of this file, with or without modification, are -# permitted in any medium without royalty provided the copyright notice -# and this notice are preserved. - -AC_DEFUN([AX_BOOST_REGEX], -[ - AC_ARG_WITH([boost-regex], - AS_HELP_STRING([--with-boost-regex@<:@=special-lib@:>@], - [use the Regex library from boost - it is possible to specify a certain library for the linker - e.g. --with-boost-regex=boost_regex-gcc-mt-d-1_33_1 ]), - [ - if test "$withval" = "no"; then - want_boost="no" - elif test "$withval" = "yes"; then - want_boost="yes" - ax_boost_user_regex_lib="" - else - want_boost="yes" - ax_boost_user_regex_lib="$withval" - fi - ], - [want_boost="yes"] - ) - - if test "x$want_boost" = "xyes"; then - AC_REQUIRE([AC_PROG_CC]) - CPPFLAGS_SAVED="$CPPFLAGS" - CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS" - export CPPFLAGS - - LDFLAGS_SAVED="$LDFLAGS" - LDFLAGS="$LDFLAGS $BOOST_LDFLAGS" - export LDFLAGS - - AC_CACHE_CHECK(whether the Boost::Regex library is available, - ax_cv_boost_regex, - [AC_LANG_PUSH([C++]) - AC_COMPILE_IFELSE(AC_LANG_PROGRAM([[@%:@include - ]], - [[boost::regex r(); return 0;]]), - ax_cv_boost_regex=yes, ax_cv_boost_regex=no) - AC_LANG_POP([C++]) - ]) - if test "x$ax_cv_boost_regex" = "xyes"; then - AC_DEFINE(HAVE_BOOST_REGEX,,[define if the Boost::Regex library is available]) - BOOSTLIBDIR=`echo $BOOST_LDFLAGS | sed -e 's/@<:@^\/@:>@*//'` - if test "x$ax_boost_user_regex_lib" = "x"; then - for libextension in `ls $BOOSTLIBDIR/libboost_regex*.{so,a}* 2>/dev/null | sed 's,.*/,,' | sed -e 's;^lib\(boost_regex.*\)\.so.*$;\1;' -e 's;^lib\(boost_regex.*\)\.a*$;\1;'` ; do - ax_lib=${libextension} - AC_CHECK_LIB($ax_lib, exit, - [BOOST_REGEX_LIB="-l$ax_lib"; AC_SUBST(BOOST_REGEX_LIB) link_regex="yes"; break], - [link_regex="no"]) - done - if test "x$link_regex" != "xyes"; then - for libextension in `ls $BOOSTLIBDIR/boost_regex*.{dll,a}* 2>/dev/null | sed 's,.*/,,' | sed -e 's;^\(boost_regex.*\)\.dll.*$;\1;' -e 's;^\(boost_regex.*\)\.a*$;\1;'` ; do - ax_lib=${libextension} - AC_CHECK_LIB($ax_lib, exit, - [BOOST_REGEX_LIB="-l$ax_lib"; AC_SUBST(BOOST_REGEX_LIB) link_regex="yes"; break], - [link_regex="no"]) - done - fi - - else - for ax_lib in $ax_boost_user_regex_lib boost_regex-$ax_boost_user_regex_lib; do - AC_CHECK_LIB($ax_lib, main, - [BOOST_REGEX_LIB="-l$ax_lib"; AC_SUBST(BOOST_REGEX_LIB) link_regex="yes"; break], - [link_regex="no"]) - done - fi - if test "x$link_regex" != "xyes"; then - AC_MSG_ERROR(Could not link against $ax_lib !) - fi - fi - - CPPFLAGS="$CPPFLAGS_SAVED" - LDFLAGS="$LDFLAGS_SAVED" - fi -]) diff --git a/lang/c++/m4/m4_ax_boost_system.m4 b/lang/c++/m4/m4_ax_boost_system.m4 deleted file mode 100644 index 3a4cb611b2d..00000000000 --- a/lang/c++/m4/m4_ax_boost_system.m4 +++ /dev/null @@ -1,117 +0,0 @@ -# =========================================================================== -# https://www.gnu.org/software/autoconf-archive/ax_boost_system.html -# =========================================================================== -# -# SYNOPSIS -# -# AX_BOOST_SYSTEM -# -# DESCRIPTION -# -# Test for System library from the Boost C++ libraries. The macro requires -# a preceding call to AX_BOOST_BASE. Further documentation is available at -# . -# -# This macro calls: -# -# AC_SUBST(BOOST_SYSTEM_LIB) -# -# And sets: -# -# HAVE_BOOST_SYSTEM -# -# LICENSE -# -# Copyright (c) 2008 Thomas Porschberg -# Copyright (c) 2008 Michael Tindal -# Copyright (c) 2008 Daniel Casimiro -# -# Copying and distribution of this file, with or without modification, are -# permitted in any medium without royalty provided the copyright notice -# and this notice are preserved. This file is offered as-is, without any -# warranty. - -#serial 7 - -AC_DEFUN([AX_BOOST_SYSTEM], -[ - AC_ARG_WITH([boost-system], - AS_HELP_STRING([--with-boost-system@<:@=special-lib@:>@], - [use the System library from boost - it is possible to specify a certain library for the linker - e.g. --with-boost-system=boost_system-gcc-mt ]), - [ - if test "$withval" = "no"; then - want_boost="no" - elif test "$withval" = "yes"; then - want_boost="yes" - ax_boost_user_system_lib="" - else - want_boost="yes" - ax_boost_user_system_lib="$withval" - fi - ], - [want_boost="yes"] - ) - - if test "x$want_boost" = "xyes"; then - AC_REQUIRE([AC_PROG_CC]) - AC_REQUIRE([AC_CANONICAL_BUILD]) - CPPFLAGS_SAVED="$CPPFLAGS" - CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS" - export CPPFLAGS - - LDFLAGS_SAVED="$LDFLAGS" - LDFLAGS="$LDFLAGS $BOOST_LDFLAGS" - export LDFLAGS - - AC_CACHE_CHECK(whether the Boost::System library is available, - ax_cv_boost_system, - [AC_LANG_PUSH([C++]) - CXXFLAGS_SAVE=$CXXFLAGS - - AC_COMPILE_IFELSE(AC_LANG_PROGRAM([[@%:@include ]], - [[boost::system::system_category]]), - ax_cv_boost_system=yes, ax_cv_boost_system=no) - CXXFLAGS=$CXXFLAGS_SAVE - AC_LANG_POP([C++]) - ]) - if test "x$ax_cv_boost_system" = "xyes"; then - AC_SUBST(BOOST_CPPFLAGS) - - AC_DEFINE(HAVE_BOOST_SYSTEM,,[define if the Boost::System library is available]) - BOOSTLIBDIR=`echo $BOOST_LDFLAGS | sed -e 's/@<:@^\/@:>@*//'` - - LDFLAGS_SAVE=$LDFLAGS - if test "x$ax_boost_user_system_lib" = "x"; then - for libextension in `ls $BOOSTLIBDIR/libboost_system*.{so,a}* 2>/dev/null | sed 's,.*/,,' | sed -e 's;^lib\(boost_system.*\)\.so.*$;\1;' -e 's;^lib\(boost_system.*\)\.a*$;\1;'` ; do - ax_lib=${libextension} - AC_CHECK_LIB($ax_lib, exit, - [BOOST_SYSTEM_LIB="-l$ax_lib"; AC_SUBST(BOOST_SYSTEM_LIB) link_system="yes"; break], - [link_system="no"]) - done - if test "x$link_system" != "xyes"; then - for libextension in `ls $BOOSTLIBDIR/boost_system*.{dll,a}* 2>/dev/null | sed 's,.*/,,' | sed -e 's;^\(boost_system.*\)\.dll.*$;\1;' -e 's;^\(boost_system.*\)\.a*$;\1;'` ; do - ax_lib=${libextension} - AC_CHECK_LIB($ax_lib, exit, - [BOOST_SYSTEM_LIB="-l$ax_lib"; AC_SUBST(BOOST_SYSTEM_LIB) link_system="yes"; break], - [link_system="no"]) - done - fi - - else - for ax_lib in $ax_boost_user_system_lib boost_system-$ax_boost_user_system_lib; do - AC_CHECK_LIB($ax_lib, exit, - [BOOST_SYSTEM_LIB="-l$ax_lib"; AC_SUBST(BOOST_SYSTEM_LIB) link_system="yes"; break], - [link_system="no"]) - done - - fi - if test "x$link_system" = "xno"; then - AC_MSG_ERROR(Could not link against $ax_lib !) - fi - fi - - CPPFLAGS="$CPPFLAGS_SAVED" - LDFLAGS="$LDFLAGS_SAVED" - fi -]) diff --git a/lang/c++/m4/m4_ax_boost_thread.m4 b/lang/c++/m4/m4_ax_boost_thread.m4 deleted file mode 100644 index bff7defa7ad..00000000000 --- a/lang/c++/m4/m4_ax_boost_thread.m4 +++ /dev/null @@ -1,146 +0,0 @@ -# =========================================================================== -# https://www.gnu.org/software/autoconf-archive/ax_boost_thread.html -# =========================================================================== -# -# SYNOPSIS -# -# AX_BOOST_THREAD -# -# DESCRIPTION -# -# Test for Thread library from the Boost C++ libraries. The macro requires -# a preceding call to AX_BOOST_BASE. Further documentation is available at -# . -# -# This macro calls: -# -# AC_SUBST(BOOST_THREAD_LIB) -# -# And sets: -# -# HAVE_BOOST_THREAD -# -# LICENSE -# -# Copyright (c) 2009 Thomas Porschberg -# Copyright (c) 2009 Michael Tindal -# -# Copying and distribution of this file, with or without modification, are -# permitted in any medium without royalty provided the copyright notice -# and this notice are preserved. This file is offered as-is, without any -# warranty. - -#serial 15 - -AC_DEFUN([AX_BOOST_THREAD], -[ - AC_ARG_WITH([boost-thread], - AS_HELP_STRING([--with-boost-thread@<:@=special-lib@:>@], - [use the Thread library from boost - it is possible to specify a certain library for the linker - e.g. --with-boost-thread=boost_thread-gcc-mt ]), - [ - if test "$withval" = "no"; then - want_boost="no" - elif test "$withval" = "yes"; then - want_boost="yes" - ax_boost_user_thread_lib="" - else - want_boost="yes" - ax_boost_user_thread_lib="$withval" - fi - ], - [want_boost="yes"] - ) - - if test "x$want_boost" = "xyes"; then - AC_REQUIRE([AC_PROG_CC]) - AC_REQUIRE([AC_CANONICAL_BUILD]) - CPPFLAGS_SAVED="$CPPFLAGS" - CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS" - export CPPFLAGS - - LDFLAGS_SAVED="$LDFLAGS" - LDFLAGS="$LDFLAGS $BOOST_LDFLAGS" - export LDFLAGS - - AC_CACHE_CHECK(whether the Boost::Thread library is available, - ax_cv_boost_thread, - [AC_LANG_PUSH([C++]) - CXXFLAGS_SAVE=$CXXFLAGS - - if test "x$build_os" = "xsolaris" ; then - CXXFLAGS="-pthreads $CXXFLAGS" - elif test "x$build_os" = "xming32" ; then - CXXFLAGS="-mthreads $CXXFLAGS" - else - CXXFLAGS="-pthread $CXXFLAGS" - fi - AC_COMPILE_IFELSE(AC_LANG_PROGRAM([[@%:@include ]], - [[boost::thread_group thrds; - return 0;]]), - ax_cv_boost_thread=yes, ax_cv_boost_thread=no) - CXXFLAGS=$CXXFLAGS_SAVE - AC_LANG_POP([C++]) - ]) - if test "x$ax_cv_boost_thread" = "xyes"; then - if test "x$build_os" = "xsolaris" ; then - BOOST_CPPFLAGS="-pthreads $BOOST_CPPFLAGS" - elif test "x$build_os" = "xming32" ; then - BOOST_CPPFLAGS="-mthreads $BOOST_CPPFLAGS" - else - BOOST_CPPFLAGS="-pthread $BOOST_CPPFLAGS" - fi - - AC_SUBST(BOOST_CPPFLAGS) - - AC_DEFINE(HAVE_BOOST_THREAD,,[define if the Boost::Thread library is available]) - BOOSTLIBDIR=`echo $BOOST_LDFLAGS | sed -e 's/@<:@^\/@:>@*//'` - - LDFLAGS_SAVE=$LDFLAGS - case "x$build_os" in - *bsd* ) - LDFLAGS="-pthread $LDFLAGS" - break; - ;; - esac - if test "x$ax_boost_user_thread_lib" = "x"; then - for libextension in `ls $BOOSTLIBDIR/libboost_thread*.so* 2>/dev/null | sed 's,.*/,,' | sed -e 's;^lib\(boost_thread.*\)\.so.*$;\1;'` `ls $BOOSTLIBDIR/libboost_thread*.a* 2>/dev/null | sed 's,.*/,,' | sed -e 's;^lib\(boost_thread.*\)\.a*$;\1;'`; do - ax_lib=${libextension} - AC_CHECK_LIB($ax_lib, exit, - [BOOST_THREAD_LIB="-l$ax_lib"; AC_SUBST(BOOST_THREAD_LIB) link_thread="yes"; break], - [link_thread="no"]) - done - if test "x$link_thread" != "xyes"; then - for libextension in `ls $BOOSTLIBDIR/boost_thread*.dll* 2>/dev/null | sed 's,.*/,,' | sed -e 's;^\(boost_thread.*\)\.dll.*$;\1;'` `ls $BOOSTLIBDIR/boost_thread*.a* 2>/dev/null | sed 's,.*/,,' | sed -e 's;^\(boost_thread.*\)\.a*$;\1;'` ; do - ax_lib=${libextension} - AC_CHECK_LIB($ax_lib, exit, - [BOOST_THREAD_LIB="-l$ax_lib"; AC_SUBST(BOOST_THREAD_LIB) link_thread="yes"; break], - [link_thread="no"]) - done - fi - - else - for ax_lib in $ax_boost_user_thread_lib boost_thread-$ax_boost_user_thread_lib; do - AC_CHECK_LIB($ax_lib, exit, - [BOOST_THREAD_LIB="-l$ax_lib"; AC_SUBST(BOOST_THREAD_LIB) link_thread="yes"; break], - [link_thread="no"]) - done - - fi - if test "x$link_thread" = "xno"; then - AC_MSG_ERROR(Could not link against $ax_lib !) - else - case "x$build_os" in - *bsd* ) - BOOST_LDFLAGS="-pthread $BOOST_LDFLAGS" - break; - ;; - esac - - fi - fi - - CPPFLAGS="$CPPFLAGS_SAVED" - LDFLAGS="$LDFLAGS_SAVED" - fi -]) diff --git a/lang/c++/parser/AvroLex.ll b/lang/c++/parser/AvroLex.ll deleted file mode 100644 index 6070e089a9e..00000000000 --- a/lang/c++/parser/AvroLex.ll +++ /dev/null @@ -1,203 +0,0 @@ -%{ -/* - Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - https://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -// on some systems, won't find an EOF definition -#ifndef EOF -#define EOF (-1) -#endif - -#include "AvroYacc.hh" - -// this undef is a hack for my mac implementation -#undef yyFlexLexer -#include "Compiler.hh" - -#define YY_STACK_USED 1 - -using std::cin; -using std::cout; -using std::cerr; - -%} - -%option c++ -%option noyywrap - -%{ - -int yylex(int *val, void *ctx) -{ - avro::CompilerContext *c = static_cast(ctx); - int ret = c->lexer().yylex(); - if( ret > AVRO_LEX_OUTPUT_TEXT_BEGIN && ret < AVRO_LEX_OUTPUT_TEXT_END ) { - c->setText( c->lexer().YYText()) ; - } - return ret; -} - -%} - -%x READTYPE -%x STARTTYPE -%x STARTSCHEMA -%x READNAME -%x READFIELD -%x READFIELDS -%x READFIELDNAME -%x READSYMBOLS -%x READSYMBOL -%x READSIZE -%x INUNION -%x INOBJECT -%x READMETADATA -%x SKIPJSONSTRING -%x SKIPJSONARRAY -%x SKIPJSONOBJECT - -ws [ \t\r\n] -nonws [^ \t\r\n] -delim {ws}*:{ws}* -avrotext [a-zA-Z_][a-zA-Z0-9_.]* -startunion \[ -startobject \{ -integer [0-9]+ -anytext .* - -%% -int return AVRO_LEX_INT; -long return AVRO_LEX_LONG; -null return AVRO_LEX_NULL; -boolean return AVRO_LEX_BOOL; -float return AVRO_LEX_FLOAT; -double return AVRO_LEX_DOUBLE; -string return AVRO_LEX_STRING; -bytes return AVRO_LEX_BYTES; -record return AVRO_LEX_RECORD; -enum return AVRO_LEX_ENUM; -map return AVRO_LEX_MAP; -array return AVRO_LEX_ARRAY; -fixed return AVRO_LEX_FIXED; -{avrotext} return AVRO_LEX_NAMED_TYPE; -\" yy_pop_state(); - -{avrotext} return AVRO_LEX_NAME; -\" yy_pop_state(); - -{avrotext} return AVRO_LEX_SYMBOL; -\" yy_pop_state(); - -{avrotext} return AVRO_LEX_FIELD_NAME; -\" yy_pop_state(); - -\"type\"{delim} yy_push_state(STARTSCHEMA); -\"name\"{delim}\" yy_push_state(READFIELDNAME); -\} yy_pop_state(); return AVRO_LEX_FIELD_END; -, return yytext[0]; -\"{avrotext}\"+{delim} yy_push_state(READMETADATA); return AVRO_LEX_METADATA; -{ws} ; - -\{ yy_push_state(READFIELD); return AVRO_LEX_FIELD; -\] yy_pop_state(); return AVRO_LEX_FIELDS_END; -, return yytext[0]; -{ws} ; - -\" yy_push_state(READSYMBOL); -, return yytext[0]; -\] yy_pop_state(); return AVRO_LEX_SYMBOLS_END; -{ws} ; - -{integer} yy_pop_state(); return AVRO_LEX_SIZE; - -\" yy_push_state(READTYPE); return AVRO_LEX_SIMPLE_TYPE; -{startobject} yy_push_state(INOBJECT); return yytext[0]; -\] yy_pop_state(); return yytext[0]; -, return yytext[0]; -{ws} ; - -\" yy_pop_state(); -\\. ; -[^\"\\]+ ; - -\} yy_pop_state(); -\{ yy_push_state(SKIPJSONOBJECT); -\" yy_push_state(SKIPJSONSTRING); -[^\{\}\"]+ ; - -\] yy_pop_state(); -\[ yy_push_state(SKIPJSONARRAY); -\" yy_push_state(SKIPJSONSTRING); -[^\[\]\"]+ ; - -\" yy_pop_state(); yy_push_state(SKIPJSONSTRING); -\{ yy_pop_state(); yy_push_state(SKIPJSONOBJECT); -\[ yy_pop_state(); yy_push_state(SKIPJSONARRAY); -[^\"\{\[,\}]+ yy_pop_state(); - -\"type\"{delim} yy_push_state(STARTTYPE); return AVRO_LEX_TYPE; -\"name\"{delim}\" yy_push_state(READNAME); -\"size\"{delim} yy_push_state(READSIZE); -\"items\"{delim} yy_push_state(STARTSCHEMA); return AVRO_LEX_ITEMS; -\"values\"{delim} yy_push_state(STARTSCHEMA); return AVRO_LEX_VALUES; -\"fields\"{delim}\[ yy_push_state(READFIELDS); return AVRO_LEX_FIELDS; -\"symbols\"{delim}\[ yy_push_state(READSYMBOLS); return AVRO_LEX_SYMBOLS; -, return yytext[0]; -\} yy_pop_state(); return yytext[0]; -\"{avrotext}+\"{delim} yy_push_state(READMETADATA); return AVRO_LEX_METADATA; -{ws} ; - -\" yy_pop_state(); yy_push_state(READTYPE); -{startunion} yy_pop_state(); yy_push_state(INUNION); return yytext[0]; -{startobject} yy_pop_state(); yy_push_state(INOBJECT); return yytext[0]; - -\" yy_pop_state(); yy_push_state(READTYPE); return AVRO_LEX_SIMPLE_TYPE; -{startunion} yy_pop_state(); yy_push_state(INUNION); return yytext[0]; -{startobject} yy_pop_state(); yy_push_state(INOBJECT); return yytext[0]; - -{startobject} yy_push_state(INOBJECT); return yytext[0]; -{startunion} yy_push_state(INUNION); return yytext[0]; -\" yy_push_state(READTYPE); return AVRO_LEX_SIMPLE_TYPE; -{ws} ; -<> { -#if !YY_FLEX_SUBMINOR_VERSION || YY_FLEX_SUBMINOR_VERSION < 27 -// The versions of flex before 3.5.27 do not free their stack when done, so explcitly free it. -// Note that versions before did not actually define a subminor macro. - if (yy_start_stack) { - yy_flex_free(yy_start_stack); - yy_start_stack = 0; - } -#endif -#if YY_FLEX_SUBMINOR_VERSION > 35 -// At this time, 3.5.35 is the latest version. -#warning "Warning: untested version of flex" -#endif -#if YY_FLEX_SUBMINOR_VERSION >= 31 && YY_FLEX_SUBMINOR_VERSION < 34 -// The versions of flex starting 3.5.31 do not free yy_buffer_stack, so do so -// explicitly (first yy_delete_buffer must be called to free pointers stored on the stack, then it is -// safe to remove the stack). This was fixed in 3.4.34. - if(yy_buffer_stack) { - yy_delete_buffer(YY_CURRENT_BUFFER); - yyfree(yy_buffer_stack); - yy_buffer_stack = 0; - } -#endif - yyterminate(); - } - -%% - diff --git a/lang/c++/parser/AvroYacc.yy b/lang/c++/parser/AvroYacc.yy deleted file mode 100644 index 404d39585e3..00000000000 --- a/lang/c++/parser/AvroYacc.yy +++ /dev/null @@ -1,200 +0,0 @@ -%{ -/* - Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - https://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#include -#include "Compiler.hh" -#include "Exception.hh" - -#define YYLEX_PARAM ctx -#define YYPARSE_PARAM ctx - -void yyerror(const char *str) -{ - throw avro::Exception(boost::format("Parser error: %1%") % str); -} - -extern void *lexer; -extern int yylex(int *, void *); - -avro::CompilerContext &context(void *ctx) { - return *static_cast(ctx); -}; - -%} - -%pure-parser -%error-verbose - -%token AVRO_LEX_INT AVRO_LEX_LONG -%token AVRO_LEX_FLOAT AVRO_LEX_DOUBLE -%token AVRO_LEX_BOOL AVRO_LEX_NULL -%token AVRO_LEX_BYTES AVRO_LEX_STRING -%token AVRO_LEX_RECORD AVRO_LEX_ENUM AVRO_LEX_ARRAY AVRO_LEX_MAP AVRO_LEX_UNION AVRO_LEX_FIXED - -%token AVRO_LEX_METADATA - -%token AVRO_LEX_SYMBOLS AVRO_LEX_SYMBOLS_END -%token AVRO_LEX_FIELDS AVRO_LEX_FIELDS_END AVRO_LEX_FIELD AVRO_LEX_FIELD_END - -%token AVRO_LEX_TYPE AVRO_LEX_ITEMS AVRO_LEX_VALUES - -// Tokens that output text: -%token AVRO_LEX_OUTPUT_TEXT_BEGIN -%token AVRO_LEX_NAME -%token AVRO_LEX_NAMED_TYPE -%token AVRO_LEX_FIELD_NAME -%token AVRO_LEX_SYMBOL -%token AVRO_LEX_SIZE -%token AVRO_LEX_OUTPUT_TEXT_END - -%token AVRO_LEX_SIMPLE_TYPE - -%% - -avroschema: - simpleprimitive | object | union_t - ; - -primitive: - AVRO_LEX_INT { context(ctx).addType(avro::AVRO_INT); } - | - AVRO_LEX_LONG { context(ctx).addType(avro::AVRO_LONG); } - | - AVRO_LEX_FLOAT { context(ctx).addType(avro::AVRO_FLOAT); } - | - AVRO_LEX_DOUBLE { context(ctx).addType(avro::AVRO_DOUBLE); } - | - AVRO_LEX_BOOL { context(ctx).addType(avro::AVRO_BOOL); } - | - AVRO_LEX_NULL { context(ctx).addType(avro::AVRO_NULL); } - | - AVRO_LEX_BYTES { context(ctx).addType(avro::AVRO_BYTES); } - | - AVRO_LEX_STRING { context(ctx).addType(avro::AVRO_STRING); } - | - AVRO_LEX_NAMED_TYPE { context(ctx).addNamedType(); } - ; - -simpleprimitive: - AVRO_LEX_SIMPLE_TYPE { context(ctx).startType(); } primitive { context(ctx).stopType(); } - ; - -primitive_t: - AVRO_LEX_TYPE primitive - ; - -array_t: - AVRO_LEX_TYPE AVRO_LEX_ARRAY { context(ctx).addType(avro::AVRO_ARRAY); } - ; - -enum_t: - AVRO_LEX_TYPE AVRO_LEX_ENUM { context(ctx).addType(avro::AVRO_ENUM); } - ; - -fixed_t: - AVRO_LEX_TYPE AVRO_LEX_FIXED { context(ctx).addType(avro::AVRO_FIXED); } - ; - -map_t: - AVRO_LEX_TYPE AVRO_LEX_MAP { context(ctx).addType(avro::AVRO_MAP); } - ; - -record_t: - AVRO_LEX_TYPE AVRO_LEX_RECORD { context(ctx).addType(avro::AVRO_RECORD); } - ; - -type_attribute: - array_t | enum_t | fixed_t | map_t | record_t | primitive_t - ; - -union_t: - '[' { context(ctx).startType(); context(ctx).addType(avro::AVRO_UNION); context(ctx).setTypesAttribute(); } - unionlist - ']' { context(ctx).stopType(); } - ; - -object: - '{' { context(ctx).startType(); } - attributelist - '}' { context(ctx).stopType(); } - ; - -name_attribute: - AVRO_LEX_NAME { context(ctx).setNameAttribute(); } - ; - -size_attribute: - AVRO_LEX_SIZE { context(ctx).setSizeAttribute(); } - ; - -values_attribute: - AVRO_LEX_VALUES { context(ctx).setValuesAttribute(); } avroschema - ; - -fields_attribute: - AVRO_LEX_FIELDS { context(ctx).setFieldsAttribute(); } fieldslist AVRO_LEX_FIELDS_END - ; - -items_attribute: - AVRO_LEX_ITEMS { context(ctx).setItemsAttribute(); } avroschema - ; - -symbols_attribute: - AVRO_LEX_SYMBOLS symbollist AVRO_LEX_SYMBOLS_END - ; - -attribute: - type_attribute | name_attribute | fields_attribute | items_attribute | size_attribute | values_attribute | symbols_attribute | AVRO_LEX_METADATA - ; - -attributelist: - attribute | attributelist ',' attribute - ; - -symbol: - AVRO_LEX_SYMBOL { context(ctx).setSymbolsAttribute(); } - ; - -symbollist: - symbol | symbollist ',' symbol - ; - -fieldsetting: - fieldname | avroschema | AVRO_LEX_METADATA - ; - -fieldsettinglist: - fieldsetting | fieldsettinglist ',' fieldsetting - ; - -fields: - AVRO_LEX_FIELD fieldsettinglist AVRO_LEX_FIELD_END - ; - -fieldname: - AVRO_LEX_FIELD_NAME { context(ctx).textContainsFieldName(); } - ; - -fieldslist: - fields | fieldslist ',' fields - ; - -unionlist: - avroschema | unionlist ',' avroschema - ; diff --git a/lang/c++/test/AvrogencppTestReservedWords.cc b/lang/c++/test/AvrogencppTestReservedWords.cc index 7d305fb44d9..e3b9838e5b0 100644 --- a/lang/c++/test/AvrogencppTestReservedWords.cc +++ b/lang/c++/test/AvrogencppTestReservedWords.cc @@ -16,10 +16,11 @@ * limitations under the License. */ #include "cpp_reserved_words.hh" +#include "cpp_reserved_words_union_typedef.hh" #include "Compiler.hh" -#include +#include #ifdef min #undef min diff --git a/lang/c++/test/AvrogencppTests.cc b/lang/c++/test/AvrogencppTests.cc index 2130f818b55..d393e373dc8 100644 --- a/lang/c++/test/AvrogencppTests.cc +++ b/lang/c++/test/AvrogencppTests.cc @@ -21,9 +21,10 @@ #include "bigrecord_r.hh" #include "tweet.hh" #include "union_array_union.hh" +#include "union_empty_record.hh" #include "union_map_union.hh" -#include +#include #ifdef min #undef min @@ -219,16 +220,16 @@ void testNamespace() { twPoint.set_AvroPoint(point); } -void setRecord(uau::r1 &r) { +void setRecord(uau::r1 &) { } -void check(const uau::r1 &r1, const uau::r1 &r2) { +void check(const uau::r1 &, const uau::r1 &) { } -void setRecord(umu::r1 &r) { +void setRecord(umu::r1 &) { } -void check(const umu::r1 &r1, const umu::r1 &r2) { +void check(const umu::r1 &, const umu::r1 &) { } template @@ -267,13 +268,45 @@ void testEncoding2() { check(t2, t1); } -boost::unit_test::test_suite * -init_unit_test_suite(int /*argc*/, char * /*argv*/[]) { +void testEmptyRecord() { + uer::StackCalculator calc; + uer::StackCalculator::stack_item_t item; + item.set_int(3); + calc.stack.push_back(item); + item.set_Dup(uer::Dup()); + calc.stack.push_back(item); + item.set_Add(uer::Add()); + calc.stack.push_back(item); + + ValidSchema s; + ifstream ifs("jsonschemas/union_empty_record"); + compileJsonSchema(ifs, s); + + unique_ptr os = memoryOutputStream(); + EncoderPtr e = validatingEncoder(s, binaryEncoder()); + e->init(*os); + avro::encode(*e, calc); + e->flush(); + + DecoderPtr d = validatingDecoder(s, binaryDecoder()); + unique_ptr is = memoryInputStream(*os); + d->init(*is); + uer::StackCalculator calc2; + avro::decode(*d, calc2); + + BOOST_CHECK_EQUAL(calc.stack.size(), calc2.stack.size()); + BOOST_CHECK_EQUAL(calc2.stack[0].idx(), 0); + BOOST_CHECK_EQUAL(calc2.stack[1].idx(), 1); + BOOST_CHECK_EQUAL(calc2.stack[2].idx(), 2); +} + +boost::unit_test::test_suite *init_unit_test_suite(int /*argc*/, char * /*argv*/[]) { auto *ts = BOOST_TEST_SUITE("Code generator tests"); ts->add(BOOST_TEST_CASE(testEncoding)); ts->add(BOOST_TEST_CASE(testResolution)); ts->add(BOOST_TEST_CASE(testEncoding2)); ts->add(BOOST_TEST_CASE(testEncoding2)); ts->add(BOOST_TEST_CASE(testNamespace)); + ts->add(BOOST_TEST_CASE(testEmptyRecord)); return ts; } diff --git a/lang/c++/test/CodecTests.cc b/lang/c++/test/CodecTests.cc index a99cdd67498..fb888778368 100644 --- a/lang/c++/test/CodecTests.cc +++ b/lang/c++/test/CodecTests.cc @@ -25,7 +25,7 @@ #include "Specific.hh" #include "ValidSchema.hh" -#include +#include #include #include #include @@ -34,7 +34,7 @@ #include #include -#include +#include #include #include @@ -68,7 +68,7 @@ static const unsigned int count = 10; * promote data type, convert from union to plain data type and vice versa, * insert or remove fields in records or reorder fields in a record. * - * To test Json encoder and decoder, we use the same technqiue with only + * To test Json encoder and decoder, we use the same technique with only * one difference - we use JsonEncoder and JsonDecoder. * * We also use the same infrastructure to test GenericReader and GenericWriter. @@ -160,7 +160,7 @@ static string randomString(size_t len) { if (c == '\0') { c = '\x7f'; } - result.push_back(c); + result.push_back(static_cast(c)); } return result; } @@ -169,7 +169,7 @@ static vector randomBytes(size_t len) { vector result; result.reserve(len); for (size_t i = 0; i < len; ++i) { - result.push_back(rnd()); + result.push_back(static_cast(rnd())); } return result; } @@ -335,7 +335,7 @@ struct StackElement { }; } // namespace -static vector::const_iterator skipCalls(Scanner &sc, Decoder &d, +static vector::const_iterator skipCalls(Scanner &sc, Decoder &, vector::const_iterator it, bool isArray) { char end = isArray ? ']' : '}'; int level = 0; @@ -364,7 +364,7 @@ static vector::const_iterator skipCalls(Scanner &sc, Decoder &d, case 'K': case 'b': case 'f': - case 'e': ++it; // Fall through. + case 'e': ++it; [[fallthrough]]; case 'c': case 'U': sc.extractInt(); @@ -525,7 +525,7 @@ ValidSchema makeValidSchema(const char *schema) { istringstream iss(schema); ValidSchema vs; compileJsonSchema(iss, vs); - return ValidSchema(vs); + return vs; } void testEncoder(const EncoderPtr &e, const char *writerCalls, @@ -594,7 +594,6 @@ struct TestData4 { const char *readerCalls; const char *readerValues[100]; unsigned int depth; - size_t recordCount; }; void appendSentinel(OutputStream &os) { @@ -838,7 +837,7 @@ void testGenericResolving(const TestData3 &td) { GenericReader gr(wvs, rvs, d1); GenericDatum datum; gr.read(datum); - d1->drain(); + gr.drain(); assertSentinel(*in1); EncoderPtr e2 = CodecFactory::newEncoder(rvs); @@ -963,6 +962,11 @@ static const TestData data[] = { {R"({"type":"map", "values": "boolean"})", "{c1sK5Bc2sK5BsK5B}", 2}, + // Record with no fields + {"{\"type\":\"record\",\"name\":\"empty\",\"fields\":[]}", + "", 1}, + + // Single-field records {"{\"type\":\"record\",\"name\":\"r\",\"fields\":[" "{\"name\":\"f\", \"type\":\"boolean\"}]}", "B", 1}, @@ -1002,6 +1006,16 @@ static const TestData data[] = { "{\"name\":\"f7\", \"type\":\"bytes\"}]}", "NBILFDS10b25", 1}, // record of records + {"{\"type\":\"record\",\"name\":\"r\",\"fields\":[" + "{\"name\":\"f1\",\"type\":\"boolean\"}," + "{\"name\":\"f2\", \"type\":{\"type\":\"record\"," + "\"name\":\"inner\",\"fields\":[]}}]}", + "B", 1}, + {"{\"type\":\"record\",\"name\":\"r\",\"fields\":[" + "{\"name\":\"f1\",\"type\":\"boolean\"}," + "{\"name\":\"f2\", \"type\":{\"type\":\"array\"," + "\"items\":\"r\"}}]}", + "B[]", 1}, {"{\"type\":\"record\",\"name\":\"outer\",\"fields\":[" "{\"name\":\"f1\", \"type\":{\"type\":\"record\", " "\"name\":\"inner\", \"fields\":[" @@ -1264,249 +1278,558 @@ static const TestData3 data3[] = { {R"(["boolean", "int"])", "U1I", R"(["boolean", "long"])", "U1L", 1}, {R"(["boolean", "int"])", "U1I", R"(["long", "boolean"])", "U0L", 1}, + + // Aliases + {R"({"type": "record", "name": "r", "fields": [ + {"name": "f0", "type": "int"}, + {"name": "f1", "type": "boolean"}, + {"name": "f2", "type": "double"}]})", + "IBD", + R"({"type":"record", "name":"s", "aliases":["r"], "fields":[ + {"name":"g0", "type":"int", "aliases":["f0"]}, + {"name":"g1", "type":"boolean", "aliases":["f1"]}, + {"name":"f2", "type":"double", "aliases":["g2"]}]})", + "IBD", + 1}, + {R"({"type": "record", "name": "r", "namespace": "n", "fields": [ + {"name": "f0", "type": "int"}]})", + "I", + R"({"type": "record", "name": "s", "namespace": "n2", "aliases": ["t", "n.r"], "fields":[ + {"name": "f0", "type": "int"}]})", + "I", + 1}, + {R"({"type": "enum", "name": "e", "symbols": ["a", "b"]})", + "e1", + R"({"type": "enum", "name": "f", "aliases": ["e"], "symbols":["a", "b", "c"]})", + "e1", + 1}, + {R"({"type": "enum", "name": "e", "namespace": "n", "symbols": ["a", "b"]})", + "e1", + R"({"type": "enum", "name": "f", "namespace": "n2", "aliases": ["g", "n.e"], "symbols": ["a", "b"]})", + "e1", + 1}, + {R"({"type": "fixed", "name": "f", "size": 8})", + "f8", + R"({"type": "fixed", "name": "g", "aliases": ["f"], "size": 8})", + "f8", + 1}, + {R"({"type": "fixed", "name": "f", "namespace": "n", "size": 8})", + "f8", + R"({"type": "fixed", "name": "g", "namespace": "n2", "aliases": ["h", "n.f"], "size": 8})", + "f8", + 1}, + {R"({"type": "record", "name": "r1", "fields": [ + {"name": "f1", "type": ["null", {"type": "record", "name": "r2", "fields": [{"name": "f11", "type": "string"}]}]}, + {"name": "f2", "type": {"type": "array", "items": "r2"}} + ]})", + "U0N[c3sS1sS2sS3]", + R"({"type": "record", "name": "r1", "fields": [ + {"name": "f1", "type": [ + "null", + {"type": "record", "name": "r2", "fields": [{"name": "f11", "type": "string"}]}, + {"type": "record", "name": "r3", "fields": [ + {"name": "g11", "type": {"type": "array", "items": {"type": "record", "name": "r31", "fields": [{"name": "g111", "type": "double"}]}}} + ]} + ]}, + {"name": "f2", "type": {"type": "array", "items": "r2"}}, + {"name": "f3", "type": {"type": "array", "items": "r3"}, "default": []} + ]})", + "U0N[c3sS1sS2sS3][]", + 1}, + { + R"({"name": "Project", "type": "record", "fields": [ + { "name": "_types", "type": [ + "null", + { "name": "Record1", "type": "record", "fields": [{ "name": "Record1_field1", "type": "string" }]} + ]}, + { "name": "field1", "type": { "type": "array", "items": "Record1" } } + ]})", + "U0N[c3sS1sS2sS3]", + R"({"name": "Project", "type": "record", "fields": [ + { "name": "_types", "type": [ + "null", + { "name": "Record1", "type": "record", "fields": [{ "name": "Record1_field1", "type": "string" }]}, + { "name": "Record3", "type": "record", "fields": [ + { "name": "Record3_field1", "type": { "type": "array", "items": { "name": "Record2", "type": "record", + "fields":[{ "name": "Record2_field1", "type": "double" }]} + }} + ]} + ]}, + { "name": "field1", "type": { "type": "array", "items": "Record1" } }, + { "name": "field2", "type": { "type": "array", "items": "Record3" }, "default": [] } + ]})", + "U0N[c3sS1sS2sS3][]", + 1}, + { + R"({"name": "Project", "type": "record", "fields": [ + { "name": "_types", "type": [ + "null", + { "name": "Record1", "type": "record", "fields": [{ "name": "Record1_field1", "type": "string" }]}, + { "name": "Record3", "type": "record", "fields": [ + { "name": "Record3_field1", "type": { "type": "array", "items": { "name": "Record2", "type": "record", + "fields":[{ "name": "Record2_field1", "type": "double" }]} + }} + ]} + ]}, + { "name": "field1", "type": { "type": "array", "items": "Record1" } }, + { "name": "field2", "type": { "type": "array", "items": "Record3" }, "default": [] } + ]})", + "U0N[c3sS1sS2sS3][]", + R"({"name": "Project", "type": "record", "fields": [ + { "name": "_types", "type": [ + "null", + { "name": "Record1", "type": "record", "fields": [{ "name": "Record1_field1", "type": "string" }]} + ]}, + { "name": "field1", "type": { "type": "array", "items": "Record1" } } + ]})", + "U0N[c3sS1sS2sS3]", + 1}, }; static const TestData4 data4[] = { // Projection - {"{\"type\":\"record\",\"name\":\"r\",\"fields\":[" - "{\"name\":\"f1\", \"type\":\"string\"}," - "{\"name\":\"f2\", \"type\":\"string\"}," - "{\"name\":\"f3\", \"type\":\"int\"}]}", - "S10S10IS10S10I", - {"s1", "s2", "100", "t1", "t2", "200", nullptr}, - "{\"type\":\"record\",\"name\":\"r\",\"fields\":[" - "{\"name\":\"f1\", \"type\":\"string\" }," - "{\"name\":\"f2\", \"type\":\"string\"}]}", - "RS10S10RS10S10", - {"s1", "s2", "t1", "t2", nullptr}, - 1, - 2}, + { + R"({ + "type": "record", + "name": "r", + "fields": [ + {"name": "f1", "type": "string"}, + {"name": "f2", "type": "string"}, + {"name": "f3", "type": "int"} + ] + })", + "S10S10IS10S10I", + {"s1", "s2", "100", "t1", "t2", "200", nullptr}, + R"({ + "type": "record", + "name": "r", + "fields": [ + {"name": "f1", "type": "string"}, + {"name": "f2", "type": "string"} + ] + })", + "RS10S10RS10S10", + {"s1", "s2", "t1", "t2", nullptr}, + 1}, // Reordered fields - {"{\"type\":\"record\",\"name\":\"r\",\"fields\":[" - "{\"name\":\"f1\", \"type\":\"int\"}," - "{\"name\":\"f2\", \"type\":\"string\"}]}", - "IS10", - {"10", "hello", nullptr}, - "{\"type\":\"record\",\"name\":\"r\",\"fields\":[" - "{\"name\":\"f2\", \"type\":\"string\" }," - "{\"name\":\"f1\", \"type\":\"long\"}]}", - "RLS10", - {"10", "hello", nullptr}, - 1, - 1}, + { + R"({ + "type": "record", + "name": "r", + "fields": [ + {"name": "f1", "type": "int"}, + {"name": "f2", "type": "string"} + ] + })", + "IS10", + {"10", "hello", nullptr}, + R"({ + "type": "record", + "name": "r", + "fields": [ + {"name": "f2", "type": "string" }, + {"name": "f1", "type": "long"} + ] + })", + "RLS10", + {"10", "hello", nullptr}, + 1}, // Default values - {R"({"type":"record","name":"r","fields":[]})", "", {nullptr}, "{\"type\":\"record\",\"name\":\"r\",\"fields\":[" - "{\"name\":\"f\", \"type\":\"int\", \"default\": 100}]}", - "RI", - {"100", nullptr}, - 1, - 1}, - - {"{\"type\":\"record\",\"name\":\"r\",\"fields\":[" - "{\"name\":\"f2\", \"type\":\"int\"}]}", + { + R"({"type": "record", "name": "r", "fields": []})", + "", + {nullptr}, + R"({ + "type": "record", + "name": "r", + "fields": [{"name": "f", "type": "int", "default": 100}] + })", + "RI", + {"100", nullptr}, + 1}, + + {R"({"type": "record", "name": "r", "fields": [{"name": "f2", "type": "int"}]})", "I", {"10", nullptr}, - "{\"type\":\"record\",\"name\":\"r\",\"fields\":[" - "{\"name\":\"f1\", \"type\":\"int\", \"default\": 101}," - "{\"name\":\"f2\", \"type\":\"int\"}]}", + R"({ + "type": "record", + "name": "r", + "fields": [ + {"name": "f1", "type": "int", "default": 101}, + {"name": "f2", "type": "int"} + ] + })", "RII", {"10", "101", nullptr}, - 1, 1}, - {"{\"type\":\"record\",\"name\":\"outer\",\"fields\":[" - "{\"name\": \"g1\", " - "\"type\":{\"type\":\"record\",\"name\":\"inner\",\"fields\":[" - "{\"name\":\"f2\", \"type\":\"int\"}]}}, " - "{\"name\": \"g2\", \"type\": \"long\"}]}", - "IL", - {"10", "11", nullptr}, - "{\"type\":\"record\",\"name\":\"outer\",\"fields\":[" - "{\"name\": \"g1\", " - "\"type\":{\"type\":\"record\",\"name\":\"inner\",\"fields\":[" - "{\"name\":\"f1\", \"type\":\"int\", \"default\": 101}," - "{\"name\":\"f2\", \"type\":\"int\"}]}}, " - "{\"name\": \"g2\", \"type\": \"long\"}]}}", - "RRIIL", - {"10", "101", "11", nullptr}, - 1, - 1}, + { + R"({ + "type": "record", + "name": "outer", + "fields": [ + { + "name": "g1", + "type": { + "type": "record", + "name": "inner", + "fields": [{"name": "f2", "type": "int"}] + } + }, + {"name": "g2", "type": "long"} + ] + })", + "IL", + {"10", "11", nullptr}, + R"({ + "type": "record", + "name": "outer", + "fields": [ + { + "name": "g1", + "type": { + "type": "record", + "name": "inner", + "fields": [ + { + "name": "f1", + "type": "int", + "default": 101 + }, + {"name": "f2", "type": "int"} + ] + } + }, + {"name": "g2", "type": "long"} + ] + })", + "RRIIL", + {"10", "101", "11", nullptr}, + 1}, // Default value for a record. - {"{\"type\":\"record\",\"name\":\"outer\",\"fields\":[" - "{\"name\": \"g1\", " - "\"type\":{\"type\":\"record\",\"name\":\"inner1\",\"fields\":[" - "{\"name\":\"f1\", \"type\":\"long\" }," - "{\"name\":\"f2\", \"type\":\"int\"}] } }, " - "{\"name\": \"g2\", \"type\": \"long\"}]}", - "LIL", - {"10", "12", "13", nullptr}, - "{\"type\":\"record\",\"name\":\"outer\",\"fields\":[" - "{\"name\": \"g1\", " - "\"type\":{\"type\":\"record\",\"name\":\"inner1\",\"fields\":[" - "{\"name\":\"f1\", \"type\":\"long\" }," - "{\"name\":\"f2\", \"type\":\"int\"}] } }, " - "{\"name\": \"g2\", \"type\": \"long\"}," - "{\"name\": \"g3\", " - "\"type\":{\"type\":\"record\",\"name\":\"inner2\",\"fields\":[" - "{\"name\":\"f1\", \"type\":\"long\" }," - "{\"name\":\"f2\", \"type\":\"int\"}] }, " - "\"default\": { \"f1\": 15, \"f2\": 101 } }] } ", - "RRLILRLI", - {"10", "12", "13", "15", "101", nullptr}, - 1, - 1}, + { + R"({ + "type": "record", + "name": "outer", + "fields": [ + { + "name": "g1", + "type": { + "type": "record", + "name": "inner1", + "fields": [ + {"name": "f1", "type": "long"}, + {"name": "f2", "type": "int"} + ] + } + }, + {"name": "g2", "type": "long"} + ] + })", + "LIL", + {"10", "12", "13", nullptr}, + R"({ + "type": "record", + "name": "outer", + "fields": [ + { + "name": "g1", + "type": { + "type": "record", + "name": "inner1", + "fields": [ + {"name": "f1", "type": "long"}, + {"name": "f2", "type": "int"} + ] + } + }, + {"name": "g2", "type": "long"}, + { + "name": "g3", + "type": { + "type": "record", + "name": "inner2", + "fields": [ + {"name": "f1", "type": "long"}, + {"name": "f2", "type": "int"} + ] + }, + "default": {"f1": 15, "f2": 101} + } + ] + })", + "RRLILRLI", + {"10", "12", "13", "15", "101", nullptr}, + 1}, - {"{\"type\":\"record\",\"name\":\"outer\",\"fields\":[" - "{\"name\": \"g1\", " - "\"type\":{\"type\":\"record\",\"name\":\"inner1\",\"fields\":[" - "{\"name\":\"f1\", \"type\":\"long\" }," - "{\"name\":\"f2\", \"type\":\"int\"}] } }, " - "{\"name\": \"g2\", \"type\": \"long\"}]}", - "LIL", - {"10", "12", "13", nullptr}, - "{\"type\":\"record\",\"name\":\"outer\",\"fields\":[" - "{\"name\": \"g1\", " - "\"type\":{\"type\":\"record\",\"name\":\"inner1\",\"fields\":[" - "{\"name\":\"f1\", \"type\":\"long\" }," - "{\"name\":\"f2\", \"type\":\"int\"}] } }, " - "{\"name\": \"g2\", \"type\": \"long\"}," - "{\"name\": \"g3\", " - "\"type\":\"inner1\", " - "\"default\": { \"f1\": 15, \"f2\": 101 } }] } ", - "RRLILRLI", - {"10", "12", "13", "15", "101", nullptr}, - 1, - 1}, + { + R"({ + "type": "record", + "name": "outer", + "fields": [ + { + "name": "g1", + "type": { + "type": "record", + "name": "inner1", + "fields": [ + {"name": "f1", "type": "long"}, + {"name": "f2", "type": "int"} + ] + } + }, + {"name": "g2", "type": "long"} + ] + })", + "LIL", + {"10", "12", "13", nullptr}, + R"({ + "type": "record", + "name": "outer", + "fields": [ + { + "name": "g1", + "type": { + "type": "record", + "name": "inner1", + "fields": [ + {"name": "f1", "type": "long"}, + {"name": "f2", "type": "int"} + ] + } + }, + {"name": "g2", "type": "long"}, + { + "name": "g3", + "type": "inner1", + "default": {"f1": 15, "f2": 101} + } + ] + })", + "RRLILRLI", + {"10", "12", "13", "15", "101", nullptr}, + 1}, - {R"({"type":"record","name":"r","fields":[]})", "", {nullptr}, "{\"type\":\"record\",\"name\":\"r\",\"fields\":[" - "{\"name\":\"f\", \"type\":{ \"type\": \"array\", \"items\": \"int\" }," - "\"default\": [100]}]}", - "[c1sI]", - {"100", nullptr}, - 1, - 1}, + // TODO mkmkme HERE + { + R"({ + "type": "record", + "name": "r", + "fields": [] + })", + "", + {nullptr}, + R"({ + "type": "record", + "name": "r", + "fields": [ + { + "name": "f", + "type": {"type": "array", "items": "int"}, + "default": [100] + } + ] + })", + "[c1sI]", + {"100", nullptr}, + 1}, - {"{ \"type\": \"array\", \"items\": {\"type\":\"record\"," - "\"name\":\"r\",\"fields\":[" - "{\"name\":\"f0\", \"type\": \"int\"}]} }", - "[c1sI]", - {"99", nullptr}, - "{ \"type\": \"array\", \"items\": {\"type\":\"record\"," - "\"name\":\"r\",\"fields\":[" - "{\"name\":\"f\", \"type\":\"int\", \"default\": 100}]} }", - "[Rc1sI]", - {"100", nullptr}, - 1, - 1}, + { + R"({ + "type": "array", + "items": { + "type": "record", + "name": "r", + "fields": [{"name": "f0", "type": "int"}] + } + })", + "[c1sI]", + {"99", nullptr}, + R"({ + "type": "array", + "items": { + "type": "record", + "name": "r", + "fields": [{"name": "f", "type": "int", "default": 100}] + } + })", + "[Rc1sI]", + {"100", nullptr}, + 1}, // Record of array of record with deleted field as last field - {"{\"type\":\"record\",\"name\":\"outer\",\"fields\":[" - "{\"name\": \"g1\"," - "\"type\":{\"type\":\"array\",\"items\":{" - "\"name\":\"item\",\"type\":\"record\",\"fields\":[" - "{\"name\":\"f1\", \"type\":\"int\"}," - "{\"name\":\"f2\", \"type\": \"long\", \"default\": 0}]}}}]}", - "[c1sIL]", - {"10", "11", nullptr}, - "{\"type\":\"record\",\"name\":\"outer\",\"fields\":[" - "{\"name\": \"g1\"," - "\"type\":{\"type\":\"array\",\"items\":{" - "\"name\":\"item\",\"type\":\"record\",\"fields\":[" - "{\"name\":\"f1\", \"type\":\"int\"}]}}}]}", - "R[c1sI]", - {"10", nullptr}, - 2, - 1}, + { + R"({ + "type": "record", + "name": "outer", + "fields":[ + { + "name": "g1", + "type": { + "type": "array", + "items": { + "name": "item", + "type": "record", + "fields": [ + {"name": "f1", "type": "int"}, + {"name": "f2", "type": "long", "default": 0} + ] + } + } + } + ] + })", + "[c1sIL]", + {"10", "11", nullptr}, + R"({ + "type": "record", + "name": "outer", + "fields": [ + { + "name": "g1", + "type": { + "type": "array", + "items": { + "name": "item", + "type": "record", + "fields": [{"name": "f1", "type": "int"}] + } + } + } + ] + })", + "R[c1sI]", + {"10", nullptr}, + 2}, // Enum resolution - {R"({"type":"enum","name":"e","symbols":["x","y","z"]})", - "e2", - {nullptr}, - R"({"type":"enum","name":"e","symbols":[ "y", "z" ]})", - "e1", - {nullptr}, - 1, - 1}, - - {R"({"type":"enum","name":"e","symbols":[ "x", "y" ]})", + { + R"({"type":"enum","name":"e","symbols":["x","y","z"]})", + "e2", + {nullptr}, + R"({"type": "enum", "name": "e", "symbols": ["y", "z"]})", + "e1", + {nullptr}, + 1}, + + {R"({"type": "enum", "name": "e", "symbols": ["x", "y"]})", "e1", {nullptr}, - R"({"type":"enum","name":"e","symbols":[ "y", "z" ]})", + R"({"type": "enum", "name": "e", "symbols": ["y", "z"]})", "e0", {nullptr}, - 1, 1}, // Union - {"\"int\"", "I", {"100", nullptr}, R"([ "long", "int"])", "U1I", {"100", nullptr}, 1, 1}, - - {R"([ "long", "int"])", "U1I", {"100", nullptr}, "\"int\"", "I", {"100", nullptr}, 1, 1}, + { + R"("int")", + "I", + {"100", nullptr}, + R"(["long", "int"])", + "U1I", + {"100", nullptr}, + 1}, + + {R"(["long", "int"])", + "U1I", + {"100", nullptr}, + R"("int")", + "I", + {"100", nullptr}, + 1}, // Arrray of unions - {R"({"type":"array", "items":[ "long", "int"]})", - "[c2sU1IsU1I]", - {"100", "100", nullptr}, - R"({"type":"array", "items": "int"})", - "[c2sIsI]", - {"100", "100", nullptr}, - 2, - 1}, + { + R"({"type": "array", "items": ["long", "int"]})", + "[c2sU1IsU1I]", + {"100", "100", nullptr}, + R"({"type":"array", "items": "int"})", + "[c2sIsI]", + {"100", "100", nullptr}, + 2}, // Map of unions - {R"({"type":"map", "values":[ "long", "int"]})", - "{c2sS10U1IsS10U1I}", - {"k1", "100", "k2", "100", nullptr}, - R"({"type":"map", "values": "int"})", - "{c2sS10IsS10I}", - {"k1", "100", "k2", "100", nullptr}, - 2, - 1}, + { + R"({"type": "map", "values": ["long", "int"]})", + "{c2sS10U1IsS10U1I}", + {"k1", "100", "k2", "100", nullptr}, + R"({"type":"map", "values": "int"})", + "{c2sS10IsS10I}", + {"k1", "100", "k2", "100", nullptr}, + 2}, // Union + promotion - {"\"int\"", "I", {"100", nullptr}, R"([ "long", "string"])", "U0L", {"100", nullptr}, 1, 1}, - - {R"([ "int", "string"])", "U0I", {"100", nullptr}, "\"long\"", "L", {"100", nullptr}, 1, 1}, + { + R"("int")", + "I", + {"100", nullptr}, + R"(["long", "string"])", + "U0L", + {"100", nullptr}, + 1}, + + {R"(["int", "string"])", + "U0I", + {"100", nullptr}, + R"("long")", + "L", + {"100", nullptr}, + 1}, // Record where union field is skipped. - {"{\"type\":\"record\",\"name\":\"r\",\"fields\":[" - "{\"name\":\"f0\", \"type\":\"boolean\"}," - "{\"name\":\"f1\", \"type\":\"int\"}," - "{\"name\":\"f2\", \"type\":[\"int\", \"long\"]}," - "{\"name\":\"f3\", \"type\":\"float\"}" - "]}", - "BIU0IF", - {"1", "100", "121", "10.75", nullptr}, - "{\"type\":\"record\",\"name\":\"r\",\"fields\":[" - "{\"name\":\"f0\", \"type\":\"boolean\"}," - "{\"name\":\"f1\", \"type\":\"long\"}," - "{\"name\":\"f3\", \"type\":\"double\"}]}", - "BLD", - {"1", "100", "10.75", nullptr}, - 1, - 1}, + { + R"({ + "type": "record", + "name": "r", + "fields": [ + {"name": "f0", "type": "boolean"}, + {"name": "f1", "type": "int"}, + {"name": "f2", "type": ["int", "long"]}, + {"name": "f3", "type": "float"} + ] + })", + "BIU0IF", + {"1", "100", "121", "10.75", nullptr}, + R"({ + "type": "record", + "name": "r", + "fields": [ + {"name": "f0", "type": "boolean"}, + {"name": "f1", "type": "long"}, + {"name": "f3", "type": "double"} + ] + })", + "BLD", + {"1", "100", "10.75", nullptr}, + 1}, }; static const TestData4 data4BinaryOnly[] = { // Arrray of unions - {R"({"type":"array", "items":[ "long", "int"]})", - "[c1sU1Ic1sU1I]", - {"100", "100", nullptr}, - R"({"type":"array", "items": "int"})", - "[c1sIc1sI]", - {"100", "100", nullptr}, - 2}, + { + R"({ + "type":"array", + "items": ["long", "int"] + })", + "[c1sU1Ic1sU1I]", + {"100", "100", nullptr}, + R"({"type":"array", "items": "int"})", + "[c1sIc1sI]", + {"100", "100", nullptr}, + 2}, // Map of unions - {R"({"type":"map", "values":[ "long", "int"]})", - "{c1sS10U1Ic1sS10U1I}", - {"k1", "100", "k2", "100", nullptr}, - R"({"type":"map", "values": "int"})", - "{c1sS10Ic1sS10I}", - {"k1", "100", "k2", "100", nullptr}, - 2}, + { + R"({"type":"map", "values":[ "long", "int"]})", + "{c1sS10U1Ic1sS10U1I}", + {"k1", "100", "k2", "100", nullptr}, + R"({"type":"map", "values": "int"})", + "{c1sS10Ic1sS10I}", + {"k1", "100", "k2", "100", nullptr}, + 2}, }; #define COUNTOF(x) sizeof(x) / sizeof(x[0]) @@ -1524,13 +1847,13 @@ Test testWithData(const Test &test, const Data &) { testWithData(&testFunc, data), data, data + COUNTOF(data))) struct BinaryEncoderFactory { - static EncoderPtr newEncoder(const ValidSchema &schema) { + static EncoderPtr newEncoder(const ValidSchema &) { return binaryEncoder(); } }; struct BinaryDecoderFactory { - static DecoderPtr newDecoder(const ValidSchema &schema) { + static DecoderPtr newDecoder(const ValidSchema &) { return binaryDecoder(); } }; @@ -1791,7 +2114,7 @@ static void testByteCount() { } // namespace avro boost::unit_test::test_suite * -init_unit_test_suite(int argc, char *argv[]) { +init_unit_test_suite(int, char *[]) { using namespace boost::unit_test; auto *ts = BOOST_TEST_SUITE("Avro C++ unit tests for codecs"); diff --git a/lang/c++/test/CommonsSchemasTests.cc b/lang/c++/test/CommonsSchemasTests.cc new file mode 100644 index 00000000000..84ee74928be --- /dev/null +++ b/lang/c++/test/CommonsSchemasTests.cc @@ -0,0 +1,91 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +#include "Compiler.hh" +#include "DataFile.hh" +#include "Generic.hh" +#include "ValidSchema.hh" +#include +#include +#include + +using avro::DataFileReader; +using avro::DataFileWriter; +using avro::GenericDatum; +using avro::GenericReader; +using avro::validatingDecoder; + +void testCommonSchema(const std::filesystem::path &dir_path) { + const std::filesystem::path &schemaFile = dir_path / "schema.json"; + std::ifstream in(schemaFile.c_str()); + + avro::ValidSchema schema; + avro::compileJsonSchema(in, schema); + + const std::filesystem::path &dataFile = dir_path / "data.avro"; + + GenericDatum datum(schema); + const std::filesystem::path &outputDataFile = dir_path / "data_out.avro"; + + DataFileReader reader(dataFile.c_str()); + DataFileWriter writer(outputDataFile.c_str(), schema); + + while (reader.read(datum)) { + datum.value(); + writer.write(datum); + } + writer.close(); + reader.close(); + + GenericDatum datumOrig(schema); + GenericDatum datumNew(schema); + + DataFileReader readerOrig(dataFile.c_str()); + DataFileReader readerNew(outputDataFile.c_str()); + while (readerOrig.read(datumOrig)) { + BOOST_CHECK(readerNew.read(datumNew)); + avro::GenericRecord &rec1 = datumOrig.value(); + avro::GenericRecord &rec2 = datumNew.value(); + BOOST_CHECK_EQUAL(rec1.fieldCount(), rec2.fieldCount()); + } + BOOST_CHECK(!readerNew.read(datumNew)); + + std::filesystem::remove(outputDataFile); +} + +void testCommonsSchemas() { + const std::filesystem::path commons_schemas{"../../share/test/data/schemas"}; + if (!std::filesystem::exists(commons_schemas)) { + std::cout << "\nWarn: Can't access share test folder '../../share/test/data/schemas'\n" + << std::endl; + return; + } + for (auto const &dir_entry : std::filesystem::directory_iterator{commons_schemas}) { + if (std::filesystem::is_directory(dir_entry)) { + testCommonSchema(dir_entry.path()); + } + } +} + +boost::unit_test::test_suite * +init_unit_test_suite(int /*argc*/, char * /*argv*/[]) { + using namespace boost::unit_test; + + auto *ts = BOOST_TEST_SUITE("Avro C++ unit tests for commons schemas"); + ts->add(BOOST_TEST_CASE(&testCommonsSchemas)); + return ts; +} diff --git a/lang/c++/test/CompilerTests.cc b/lang/c++/test/CompilerTests.cc index e3d4426a3c6..6fa5344ac1a 100644 --- a/lang/c++/test/CompilerTests.cc +++ b/lang/c++/test/CompilerTests.cc @@ -18,7 +18,7 @@ #include -#include +#include #include #include "Compiler.hh" diff --git a/lang/c++/test/DataFileTests.cc b/lang/c++/test/DataFileTests.cc index fec7f316a4c..ad6796fbcf8 100644 --- a/lang/c++/test/DataFileTests.cc +++ b/lang/c++/test/DataFileTests.cc @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include #include @@ -123,7 +123,7 @@ static ValidSchema makeValidSchema(const char *schema) { istringstream iss(schema); ValidSchema vs; compileJsonSchema(iss, vs); - return ValidSchema(vs); + return vs; } static const char sch[] = "{\"type\": \"record\"," @@ -217,7 +217,7 @@ class DataFileTest { #endif void testWriteWithCodec(avro::Codec codec) { - avro::DataFileWriter df(filename, writerSchema, 100); + avro::DataFileWriter df(filename, writerSchema, 100, codec); int64_t re = 3; int64_t im = 5; for (int i = 0; i < count; ++i, re *= im, im += 3) { @@ -405,7 +405,7 @@ class DataFileTest { } std::set> actual; int num = 0; - for (int i = sync_points.size() - 2; i >= 0; --i) { + for (ssize_t i = sync_points.size() - 2; i >= 0; --i) { df.seek(sync_points[i]); ComplexInteger ci; // Subtract avro::SyncSize here because sync and pastSync @@ -473,7 +473,7 @@ class DataFileTest { avro::DataFileReader df(filename, writerSchema); std::ifstream just_for_length( filename, std::ifstream::ate | std::ifstream::binary); - int length = just_for_length.tellg(); + int length = static_cast(just_for_length.tellg()); int splits = 10; int end = length; // end of split int remaining = end; // bytes remaining @@ -575,7 +575,7 @@ class DataFileTest { } { avro::DataFileReader reader(filename, dschema); - std::vector found; + std::vector found; ComplexInteger record; while (reader.read(record)) { found.push_back(record.re); @@ -948,7 +948,7 @@ void testReadRecordEfficientlyUsingLastSync(avro::Codec codec) { std::unique_ptr inputStream = avro::memoryInputStream(stitchedData.data(), stitchedData.size()); - int recordsUptoRecordToRead = recordToRead - recordsUptoLastSync; + size_t recordsUptoRecordToRead = recordToRead - recordsUptoLastSync; // Ensure this is not the first record in the chunk. BOOST_CHECK_GT(recordsUptoRecordToRead, 0); @@ -956,7 +956,7 @@ void testReadRecordEfficientlyUsingLastSync(avro::Codec codec) { avro::DataFileReader df(std::move(inputStream)); TestRecord readRecord("", 0); //::printf("\nReading %d rows until specific record is reached", recordsUptoRecordToRead); - for (int index = 0; index < recordsUptoRecordToRead; index++) { + for (size_t index = 0; index < recordsUptoRecordToRead; index++) { BOOST_CHECK_EQUAL(df.read(readRecord), true); int64_t expectedId = (recordToRead - recordsUptoRecordToRead + index); @@ -1004,7 +1004,7 @@ void testReadRecordEfficientlyUsingLastSyncSnappyCodec() { #endif test_suite * -init_unit_test_suite(int argc, char *argv[]) { +init_unit_test_suite(int, char *[]) { { auto *ts = BOOST_TEST_SUITE("DataFile tests: test0.df"); shared_ptr t1(new DataFileTest("test1.d0", sch, isch, 0)); diff --git a/lang/c++/test/JsonTests.cc b/lang/c++/test/JsonTests.cc index 3832e691317..afaa20f1ac4 100644 --- a/lang/c++/test/JsonTests.cc +++ b/lang/c++/test/JsonTests.cc @@ -18,7 +18,7 @@ #include -#include +#include #include #include @@ -65,9 +65,13 @@ TestData stringData[] = { {R"("\U000a")", EntityType::String, "\n", R"("\n")"}, {R"("\u000a")", EntityType::String, "\n", R"("\n")"}, {R"("\"")", EntityType::String, "\"", R"("\"")"}, - {R"("\/")", EntityType::String, "/", R"("\/")"}, + // While a solidus may be escaped according to the JSON standard, it need not be escaped. + {R"("/\/")", EntityType::String, "//", R"("//")"}, + {R"("\b\f\n\r\t")", EntityType::String, "\b\f\n\r\t", R"("\b\f\n\r\t")"}, {R"("\u20ac")", EntityType::String, "\xe2\x82\xac", R"("\u20ac")"}, {R"("\u03c0")", EntityType::String, "\xcf\x80", R"("\u03c0")"}, + {R"("hello\n")", EntityType::String, "hello\n", R"("hello\n")"}, + {R"("\Ud8ab\udccd")", EntityType::String, "\xf0\xba\xb3\x8d", R"("\ud8ab\udccd")"}, }; void testBool(const TestData &d) { diff --git a/lang/c++/test/LargeSchemaTests.cc b/lang/c++/test/LargeSchemaTests.cc index a12974c0f75..d99e70988ae 100644 --- a/lang/c++/test/LargeSchemaTests.cc +++ b/lang/c++/test/LargeSchemaTests.cc @@ -21,7 +21,7 @@ #include "ValidSchema.hh" #include -#include +#include #include void testLargeSchema() { diff --git a/lang/c++/test/SchemaTests.cc b/lang/c++/test/SchemaTests.cc old mode 100755 new mode 100644 index 3195eabd003..543f9e05087 --- a/lang/c++/test/SchemaTests.cc +++ b/lang/c++/test/SchemaTests.cc @@ -20,7 +20,7 @@ #include "GenericDatum.hh" #include "ValidSchema.hh" -#include +#include #include #include @@ -105,7 +105,13 @@ const char *basicSchemas[] = { // namespace with '$' in it. "{\"type\":\"record\",\"name\":\"Test\",\"namespace\":\"a.b$\",\"fields\":" "[{\"name\":\"f\",\"type\":\"long\"}]}", -}; + + // Custom attribute(s) for field in record + "{\"type\": \"record\",\"name\": \"Test\",\"fields\": " + "[{\"name\": \"f1\",\"type\": \"long\",\"extra field\": \"1\"}]}", + "{\"type\": \"record\",\"name\": \"Test\",\"fields\": " + "[{\"name\": \"f1\",\"type\": \"long\"," + "\"extra field1\": \"1\",\"extra field2\": \"2\"}]}"}; const char *basicSchemaErrors[] = { // Record @@ -217,6 +223,15 @@ const char *roundTripSchemas[] = { // namespace with '$' in it. "{\"type\":\"record\",\"namespace\":\"a.b$\",\"name\":\"Test\",\"fields\":" "[{\"name\":\"f\",\"type\":\"long\"}]}", + + // Custom fields + "{\"type\":\"record\",\"name\":\"Test\",\"fields\":" + "[{\"name\":\"f1\",\"type\":\"long\",\"extra_field\":\"1\"}," + "{\"name\":\"f2\",\"type\":\"int\"}]}", + "{\"type\":\"record\",\"name\":\"Test\",\"fields\":" + "[{\"name\":\"f1\",\"type\":\"long\",\"extra_field\":\"1\"}," + "{\"name\":\"f2\",\"type\":\"int\"," + "\"extra_field1\":\"21\",\"extra_field2\":\"22\"}]}", }; const char *malformedLogicalTypes[] = { @@ -352,7 +367,7 @@ static void testLogicalTypes() { \"logicalType\": \"uuid\"\n\ }"; // AVRO-2923 Union with LogicalType - const char* unionType = "[\n\ + const char *unionType = "[\n\ {\"type\":\"string\", \"logicalType\":\"uuid\"},\"null\"\n\ ]"; { diff --git a/lang/c++/test/SpecificTests.cc b/lang/c++/test/SpecificTests.cc index e027f9518d2..72f2897e45b 100644 --- a/lang/c++/test/SpecificTests.cc +++ b/lang/c++/test/SpecificTests.cc @@ -16,7 +16,7 @@ * limitations under the License. */ -#include +#include #include #include "Specific.hh" diff --git a/lang/c++/test/StreamTests.cc b/lang/c++/test/StreamTests.cc index 262e0600a32..2096197ef18 100644 --- a/lang/c++/test/StreamTests.cc +++ b/lang/c++/test/StreamTests.cc @@ -19,7 +19,7 @@ #include "Exception.hh" #include "Stream.hh" #include "boost/filesystem.hpp" -#include +#include #include namespace avro { @@ -51,7 +51,7 @@ struct Fill1 { StreamWriter w; w.reset(os); for (size_t i = 0; i < len; ++i) { - w.write(i % 10 + '0'); + w.write(static_cast(i % 10 + '0')); } w.flush(); } @@ -65,7 +65,7 @@ struct Fill2 { os.next(&b, &n); size_t j = 0; for (; i < len && j < n; ++j, ++i, ++b) { - *b = i % 10 + '0'; + *b = static_cast(i % 10 + '0'); } if (i == len) { os.backup(n - j); @@ -125,7 +125,7 @@ void testNonEmpty_memoryStream(const TestData &td) { void testNonEmpty2(const TestData &td) { std::vector v; for (size_t i = 0; i < td.dataSize; ++i) { - v.push_back(i % 10 + '0'); + v.push_back(static_cast(i % 10 + '0')); } uint8_t v2 = 0; diff --git a/lang/c++/test/buffertest.cc b/lang/c++/test/buffertest.cc index 1881028593b..23e0e806c21 100644 --- a/lang/c++/test/buffertest.cc +++ b/lang/c++/test/buffertest.cc @@ -16,9 +16,9 @@ * limitations under the License. */ -#include +#include -#include +#include #ifdef HAVE_BOOST_ASIO #include @@ -34,19 +34,18 @@ using detail::kMinBlockSize; using std::cout; using std::endl; +// Make a string of repeating 0123456789ABCDEF0123456789... std::string makeString(size_t len) { - std::string newstring; - newstring.reserve(len); + std::string result; + result.reserve(len); + + constexpr char chars[] = "0123456789ABCDEF"; for (size_t i = 0; i < len; ++i) { - char newchar = '0' + i % 16; - if (newchar > '9') { - newchar += 7; - } - newstring.push_back(newchar); + result.push_back(chars[i % 16]); } - return newstring; + return result; } void printBuffer(const InputBuffer &buf) { @@ -219,7 +218,7 @@ void TestDiscard() { BOOST_CHECK_EQUAL(ob.freeSpace(), kDefaultBlockSize / 2); BOOST_CHECK_EQUAL(ob.numChunks(), 1); - int chunks = 3 - (discarded / kDefaultBlockSize); + size_t chunks = 3 - (discarded / kDefaultBlockSize); BOOST_CHECK_EQUAL(ob.numDataChunks(), chunks); } @@ -331,7 +330,7 @@ void TestExtractToInput() { BOOST_CHECK_EQUAL(ob.freeSpace(), kDefaultBlockSize / 2); BOOST_CHECK_EQUAL(ob.numChunks(), 1); - int chunks = 3 - (extracted / kDefaultBlockSize); + size_t chunks = 3 - (extracted / kDefaultBlockSize); BOOST_CHECK_EQUAL(ob.numDataChunks(), chunks); } @@ -526,7 +525,7 @@ void TestSeek() { avro::InputBuffer buf(tmp1); cout << "Starting string: " << str << '\n'; - BOOST_CHECK_EQUAL(static_cast(buf.size()), str.size()); + BOOST_CHECK_EQUAL(buf.size(), str.size()); avro::istream is(buf); diff --git a/lang/c++/test/testgentest.cc b/lang/c++/test/testgentest.cc index 3d86329ce08..c204dd99633 100644 --- a/lang/c++/test/testgentest.cc +++ b/lang/c++/test/testgentest.cc @@ -16,7 +16,7 @@ * limitations under the License. */ -#include +#include #include #include #include diff --git a/lang/c++/test/unittest.cc b/lang/c++/test/unittest.cc index 82b92f520da..3558a0e2f89 100644 --- a/lang/c++/test/unittest.cc +++ b/lang/c++/test/unittest.cc @@ -16,8 +16,10 @@ * limitations under the License. */ -#include +#include #include +#include +#include #include "Compiler.hh" #include "Decoder.hh" @@ -36,6 +38,10 @@ #include "buffer/BufferStream.hh" #include "AvroSerialize.hh" +#include "CustomAttributes.hh" +#include "NodeConcepts.hh" +#include "NodeImpl.hh" +#include "Types.hh" using namespace avro; @@ -67,7 +73,19 @@ struct TestSchema { void buildSchema() { RecordSchema record("RootRecord"); - record.addField("mylong", LongSchema()); + CustomAttributes customAttributeLong; + customAttributeLong.addAttribute("extra_info_mylong", std::string("it's a long field")); + // Validate that adding a custom attribute with same name is not allowed + bool caught = false; + try { + customAttributeLong.addAttribute("extra_info_mylong", std::string("duplicate")); + } catch (Exception &e) { + std::cout << "(intentional) exception: " << e.what() << '\n'; + caught = true; + } + BOOST_CHECK_EQUAL(caught, true); + // Add custom attribute for the field + record.addField("mylong", LongSchema(), customAttributeLong); IntSchema intSchema; avro::MapSchema map = MapSchema(IntSchema()); @@ -85,7 +103,7 @@ struct TestSchema { myenum.addSymbol("two"); myenum.addSymbol("three"); - bool caught = false; + caught = false; try { myenum.addSymbol("three"); } catch (Exception &e) { @@ -121,7 +139,12 @@ struct TestSchema { } BOOST_CHECK_EQUAL(caught, true); - record.addField("mylong2", LongSchema()); + CustomAttributes customAttributeLong2; + customAttributeLong2.addAttribute("extra_info_mylong2", + std::string("it's a long field")); + customAttributeLong2.addAttribute("more_info_mylong2", + std::string("it's still a long field")); + record.addField("mylong2", LongSchema(), customAttributeLong2); record.addField("anotherint", intSchema); @@ -251,7 +274,7 @@ struct TestSchema { out << is.rdbuf(); } - void printNext(Parser &p) { + void printNext(Parser &) { // no-op printer } @@ -387,6 +410,93 @@ struct TestSchema { readData(p); } + void testNodeRecord(const NodeRecord &nodeRecord, + const std::string &expectedJson) { + BOOST_CHECK_EQUAL(nodeRecord.isValid(), true); + + std::ostringstream oss; + nodeRecord.printJson(oss, 0); + std::string actual = oss.str(); + actual.erase(std::remove_if(actual.begin(), actual.end(), + ::isspace), + actual.end()); + + std::string expected = expectedJson; + expected.erase(std::remove_if(expected.begin(), expected.end(), + ::isspace), + expected.end()); + + BOOST_CHECK_EQUAL(actual, expected); + } + + // Create NodeRecord with custom attributes at field level + // validate json serialization + void checkNodeRecordWithCustomAttribute() { + Name recordName("Test"); + HasName nameConcept(recordName); + concepts::MultiAttribute fieldNames; + std::vector> fieldAliases; + concepts::MultiAttribute fieldValues; + std::vector defaultValues; + concepts::MultiAttribute customAttributes; + + CustomAttributes cf; + cf.addAttribute("stringField", std::string("\\\"field value with \\\"double quotes\\\"\\\"")); + cf.addAttribute("booleanField", std::string("true")); + cf.addAttribute("numberField", std::string("1.23")); + cf.addAttribute("nullField", std::string("null")); + cf.addAttribute("arrayField", std::string("[1]")); + cf.addAttribute("mapField", std::string("{\\\"key1\\\":\\\"value1\\\", \\\"key2\\\":\\\"value2\\\"}")); + fieldNames.add("f1"); + fieldValues.add(NodePtr(new NodePrimitive(Type::AVRO_LONG))); + customAttributes.add(cf); + + NodeRecord nodeRecordWithCustomAttribute(nameConcept, fieldValues, + fieldNames, fieldAliases, defaultValues, + customAttributes); + std::string expectedJsonWithCustomAttribute = + "{\"type\": \"record\", \"name\": \"Test\",\"fields\": " + "[{\"name\": \"f1\", \"type\": \"long\", " + "\"arrayField\": \"[1]\", " + "\"booleanField\": \"true\", " + "\"mapField\": \"{\\\"key1\\\":\\\"value1\\\", \\\"key2\\\":\\\"value2\\\"}\", " + "\"nullField\": \"null\", " + "\"numberField\": \"1.23\", " + "\"stringField\": \"\\\"field value with \\\"double quotes\\\"\\\"\"" + "}]}"; + testNodeRecord(nodeRecordWithCustomAttribute, + expectedJsonWithCustomAttribute); + } + + // Create NodeRecord without custom attributes at field level + // validate json serialization + void checkNodeRecordWithoutCustomAttribute() { + Name recordName("Test"); + HasName nameConcept(recordName); + concepts::MultiAttribute fieldNames; + concepts::MultiAttribute fieldValues; + std::vector defaultValues; + + fieldNames.add("f1"); + fieldValues.add(NodePtr(new NodePrimitive(Type::AVRO_LONG))); + + NodeRecord nodeRecordWithoutCustomAttribute(nameConcept, fieldValues, + fieldNames, defaultValues); + std::string expectedJsonWithoutCustomAttribute = + "{\"type\": \"record\", \"name\": \"Test\",\"fields\": " + "[{\"name\": \"f1\", \"type\": \"long\"}]}"; + testNodeRecord(nodeRecordWithoutCustomAttribute, + expectedJsonWithoutCustomAttribute); + } + + void checkCustomAttributes_getAttribute() { + CustomAttributes cf; + cf.addAttribute("field1", std::string("1")); + + BOOST_CHECK_EQUAL(std::string("1"), *cf.getAttribute("field1")); + BOOST_CHECK_EQUAL(false, cf.getAttribute("not_existing").is_initialized()); + } + void test() { std::cout << "Before\n"; schema_.toJson(std::cout); @@ -408,6 +518,10 @@ struct TestSchema { readValidatedData(); createExampleSchema(); + + checkNodeRecordWithoutCustomAttribute(); + checkNodeRecordWithCustomAttribute(); + checkCustomAttributes_getAttribute(); } ValidSchema schema_; @@ -917,8 +1031,8 @@ struct TestResolution { }; void testNestedArraySchema() { - ArraySchema b0 = ArraySchema(NullSchema()); - ArraySchema a0 = ArraySchema(b0); + ArraySchema b0{NullSchema()}; + ArraySchema a0 = b0; avro::ValidSchema vs(a0); std::ostringstream actual; @@ -935,8 +1049,8 @@ void testNestedArraySchema() { } void testNestedMapSchema() { - MapSchema b0 = MapSchema(NullSchema()); - MapSchema a0 = MapSchema(b0); + MapSchema b0{NullSchema()}; + MapSchema a0 = b0; avro::ValidSchema vs(a0); std::ostringstream actual; diff --git a/lang/c/CMakeLists.txt b/lang/c/CMakeLists.txt index aa923e1829a..123676b3d86 100644 --- a/lang/c/CMakeLists.txt +++ b/lang/c/CMakeLists.txt @@ -16,7 +16,7 @@ # specific language governing permissions and limitations # under the License. # -cmake_minimum_required(VERSION 3.1) +cmake_minimum_required(VERSION 3.5) project(AvroC C) enable_testing() @@ -151,7 +151,7 @@ endif (ZLIB_FOUND) find_package(Snappy) if (SNAPPY_FOUND AND ZLIB_FOUND) # Snappy borrows crc32 from zlib - set(SNAPPY_PKG libsnappy) + set(SNAPPY_PKG snappy) add_definitions(-DSNAPPY_CODEC) include_directories(${SNAPPY_INCLUDE_DIRS}) message("Enabled snappy codec") @@ -176,7 +176,7 @@ else (LZMA_FOUND) endif (LZMA_FOUND) set(CODEC_LIBRARIES ${ZLIB_LIBRARIES} ${LZMA_LIBRARIES} ${SNAPPY_LIBRARIES}) -set(CODEC_PKG "@ZLIB_PKG@ @LZMA_PKG@ @SNAPPY_PKG@") +set(CODEC_PKG "${ZLIB_PKG} ${LZMA_PKG} ${SNAPPY_PKG}") # Jansson JSON library pkg_check_modules(JANSSON jansson>=2.3) diff --git a/lang/c/cmake_avrolib.bat b/lang/c/cmake_avrolib.bat index 76934bca205..40e8b39e3d7 100644 --- a/lang/c/cmake_avrolib.bat +++ b/lang/c/cmake_avrolib.bat @@ -1,48 +1,48 @@ -REM Licensed to the Apache Software Foundation (ASF) under one -REM or more contributor license agreements. See the NOTICE file -REM distributed with this work for additional information -REM regarding copyright ownership. The ASF licenses this file -REM to you under the Apache License, Version 2.0 (the -REM "License"); you may not use this file except in compliance -REM with the License. You may obtain a copy of the License at -REM -REM https://www.apache.org/licenses/LICENSE-2.0 -REM -REM Unless required by applicable law or agreed to in writing, -REM software distributed under the License is distributed on an -REM "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -REM KIND, either express or implied. See the License for the -REM specific language governing permissions and limitations -REM under the License. - -echo off - -REM Set up the solution file in Windows. - -set my_cmake_path="put_your_cmake_path_here" -set cmake_path_win7="C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" -set cmake_path_xp="C:\Program Files\CMake 2.8\bin\cmake.exe" - -if exist %my_cmake_path% ( - set cmake_path=%my_cmake_path% - goto RUN_CMAKE -) - -if exist %cmake_path_win7% ( - set cmake_path=%cmake_path_win7% - goto RUN_CMAKE -) - -if exist %cmake_path_xp% ( - set cmake_path=%cmake_path_xp% - goto RUN_CMAKE -) - -echo "Set the proper cmake path in the variable 'my_cmake_path' in cmake_windows.bat, and re-run" -goto EXIT_ERROR - -:RUN_CMAKE -%cmake_path% -G"Visual Studio 9 2008" -H. -Bbuild_win32 - - -:EXIT_ERROR +REM Licensed to the Apache Software Foundation (ASF) under one +REM or more contributor license agreements. See the NOTICE file +REM distributed with this work for additional information +REM regarding copyright ownership. The ASF licenses this file +REM to you under the Apache License, Version 2.0 (the +REM "License"); you may not use this file except in compliance +REM with the License. You may obtain a copy of the License at +REM +REM https://www.apache.org/licenses/LICENSE-2.0 +REM +REM Unless required by applicable law or agreed to in writing, +REM software distributed under the License is distributed on an +REM "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +REM KIND, either express or implied. See the License for the +REM specific language governing permissions and limitations +REM under the License. + +echo off + +REM Set up the solution file in Windows. + +set my_cmake_path="put_your_cmake_path_here" +set cmake_path_win7="C:\Program Files (x86)\CMake 2.8\bin\cmake.exe" +set cmake_path_xp="C:\Program Files\CMake 2.8\bin\cmake.exe" + +if exist %my_cmake_path% ( + set cmake_path=%my_cmake_path% + goto RUN_CMAKE +) + +if exist %cmake_path_win7% ( + set cmake_path=%cmake_path_win7% + goto RUN_CMAKE +) + +if exist %cmake_path_xp% ( + set cmake_path=%cmake_path_xp% + goto RUN_CMAKE +) + +echo "Set the proper cmake path in the variable 'my_cmake_path' in cmake_windows.bat, and re-run" +goto EXIT_ERROR + +:RUN_CMAKE +%cmake_path% -G"Visual Studio 9 2008" -H. -Bbuild_win32 + + +:EXIT_ERROR diff --git a/lang/c/docs/index.txt b/lang/c/docs/index.txt index a439a052631..86f67bc8f96 100644 --- a/lang/c/docs/index.txt +++ b/lang/c/docs/index.txt @@ -117,7 +117,7 @@ This section provides an overview of the methods that you can call on an interface, but not all of them make sense for all Avro schema types. For instance, you won't be able to call +avro_value_set_boolean+ on an Avro array value. If you try to call an inappropriate method, we'll -return an +EINVAL+ error code. +return an +EINVAL+/+AVRO_INVALID+ error code. Note that the functions in this section apply to _all_ Avro values, regardless of which value implementation is used under the covers. This @@ -178,7 +178,7 @@ different versions of the Avro library. That means that it's really only safe to use these hash values internally within the context of a single execution of a single application. -The +reset+ method “clears out” an +avro_value_t instance, making sure +The +reset+ method “clears out” an +avro_value_t+ instance, making sure that it's ready to accept the contents of a new value. For scalars, this is usually a no-op, since the new value will just overwrite the old one. For arrays and maps, this removes any existing elements from the diff --git a/lang/c/examples/quickstop.c b/lang/c/examples/quickstop.c index ff9e9700590..b26dad10c04 100644 --- a/lang/c/examples/quickstop.c +++ b/lang/c/examples/quickstop.c @@ -107,7 +107,7 @@ int print_person(avro_file_reader_t db, avro_schema_t reader_schema) if (rval == 0) { int64_t id; int32_t age; - int32_t *p; + const char *p; size_t size; avro_value_t id_value; avro_value_t first_value; diff --git a/lang/c/src/avro/basics.h b/lang/c/src/avro/basics.h index 368509b90c8..62c899c691a 100644 --- a/lang/c/src/avro/basics.h +++ b/lang/c/src/avro/basics.h @@ -24,6 +24,7 @@ extern "C" { #define CLOSE_EXTERN #endif +#include enum avro_type_t { AVRO_STRING, @@ -40,7 +41,8 @@ enum avro_type_t { AVRO_MAP, AVRO_ARRAY, AVRO_UNION, - AVRO_LINK + AVRO_LINK, + AVRO_INVALID = EINVAL, }; typedef enum avro_type_t avro_type_t; diff --git a/lang/c/src/avro/refcount.h b/lang/c/src/avro/refcount.h index 27369900ab9..fd431f3ccea 100644 --- a/lang/c/src/avro/refcount.h +++ b/lang/c/src/avro/refcount.h @@ -86,7 +86,10 @@ avro_refcount_dec(volatile int *refcount) * Mac OS X */ -#elif __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ >= 1050 +#elif __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ >= 1050 \ + && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 101200 + +/* macOS 10.12 deprecates OSAtomic* so we'll use the GCC/Clang branch below */ #include diff --git a/lang/c/src/consume-binary.c b/lang/c/src/consume-binary.c index 9f92799d888..5e1db20684f 100644 --- a/lang/c/src/consume-binary.c +++ b/lang/c/src/consume-binary.c @@ -322,6 +322,9 @@ avro_consume_binary(avro_reader_t reader, avro_consumer_t *consumer, void *ud) case AVRO_LINK: avro_set_error("Consumer can't consume a link schema directly"); return EINVAL; + case AVRO_INVALID: + avro_set_error("Consumer can't consume an invalid schema"); + return EINVAL; } return 0; diff --git a/lang/c/src/datum.c b/lang/c/src/datum.c index 2c427809090..5307c7a8385 100644 --- a/lang/c/src/datum.c +++ b/lang/c/src/datum.c @@ -1086,6 +1086,7 @@ static void avro_datum_free(avro_datum_t datum) } break; case AVRO_NULL: + case AVRO_INVALID: /* Nothing allocated */ break; @@ -1094,7 +1095,7 @@ static void avro_datum_free(avro_datum_t datum) record = avro_datum_to_record(datum); avro_schema_decref(record->schema); st_foreach(record->fields_byname, - HASH_FUNCTION_CAST char_datum_free_foreach, 0); + (hash_function_foreach) char_datum_free_foreach, 0); st_free_table(record->field_order); st_free_table(record->fields_byname); avro_freet(struct avro_record_datum_t, record); @@ -1122,7 +1123,7 @@ static void avro_datum_free(avro_datum_t datum) struct avro_map_datum_t *map; map = avro_datum_to_map(datum); avro_schema_decref(map->schema); - st_foreach(map->map, HASH_FUNCTION_CAST char_datum_free_foreach, + st_foreach(map->map, (hash_function_foreach) char_datum_free_foreach, 0); st_free_table(map->map); st_free_table(map->indices_by_key); @@ -1134,7 +1135,7 @@ static void avro_datum_free(avro_datum_t datum) struct avro_array_datum_t *array; array = avro_datum_to_array(datum); avro_schema_decref(array->schema); - st_foreach(array->els, HASH_FUNCTION_CAST array_free_foreach, 0); + st_foreach(array->els, (hash_function_foreach) array_free_foreach, 0); st_free_table(array->els); avro_freet(struct avro_array_datum_t, array); } @@ -1182,7 +1183,7 @@ avro_datum_reset(avro_datum_t datum) { struct avro_array_datum_t *array; array = avro_datum_to_array(datum); - st_foreach(array->els, HASH_FUNCTION_CAST array_free_foreach, 0); + st_foreach(array->els, (hash_function_foreach) array_free_foreach, 0); st_free_table(array->els); rval = avro_init_array(array); @@ -1197,7 +1198,7 @@ avro_datum_reset(avro_datum_t datum) { struct avro_map_datum_t *map; map = avro_datum_to_map(datum); - st_foreach(map->map, HASH_FUNCTION_CAST char_datum_free_foreach, 0); + st_foreach(map->map, (hash_function_foreach) char_datum_free_foreach, 0); st_free_table(map->map); st_free_table(map->indices_by_key); st_free_table(map->keys_by_index); @@ -1216,7 +1217,7 @@ avro_datum_reset(avro_datum_t datum) record = avro_datum_to_record(datum); rval = 0; st_foreach(record->fields_byname, - HASH_FUNCTION_CAST datum_reset_foreach, (st_data_t) &rval); + (hash_function_foreach) datum_reset_foreach, (st_data_t) &rval); return rval; } diff --git a/lang/c/src/datum_equal.c b/lang/c/src/datum_equal.c index 2ef750f9bf9..7e7c9b94099 100644 --- a/lang/c/src/datum_equal.c +++ b/lang/c/src/datum_equal.c @@ -78,7 +78,7 @@ static int map_equal(struct avro_map_datum_t *a, struct avro_map_datum_t *b) if (a->map->num_entries != b->map->num_entries) { return 0; } - st_foreach(a->map, HASH_FUNCTION_CAST st_equal_foreach, (st_data_t) & args); + st_foreach(a->map, (hash_function_foreach) st_equal_foreach, (st_data_t) & args); return args.rval; } @@ -93,7 +93,7 @@ static int record_equal(struct avro_record_datum_t *a, if (a->fields_byname->num_entries != b->fields_byname->num_entries) { return 0; } - st_foreach(a->fields_byname, HASH_FUNCTION_CAST st_equal_foreach, (st_data_t) & args); + st_foreach(a->fields_byname, (hash_function_foreach) st_equal_foreach, (st_data_t) & args); return args.rval; } @@ -181,6 +181,12 @@ int avro_datum_equal(const avro_datum_t a, const avro_datum_t b) * TODO */ return 0; + case AVRO_INVALID: + /* + * Invalid datums should not be compared and returning 0 + * matches the other error conditions + */ + return 0; } return 0; } diff --git a/lang/c/src/datum_size.c b/lang/c/src/datum_size.c index 770cb655f57..3877f31384c 100644 --- a/lang/c/src/datum_size.c +++ b/lang/c/src/datum_size.c @@ -126,7 +126,7 @@ size_map(avro_writer_t writer, const avro_encoding_t * enc, if (datum->map->num_entries) { size_accum(rval, size, enc->size_long(writer, datum->map->num_entries)); - st_foreach(datum->map, HASH_FUNCTION_CAST size_map_foreach, (st_data_t) & args); + st_foreach(datum->map, (hash_function_foreach) size_map_foreach, (st_data_t) & args); size += args.size; } if (!args.rval) { @@ -271,6 +271,7 @@ static int64_t size_datum(avro_writer_t writer, const avro_encoding_t * enc, avro_datum_to_union(datum)); case AVRO_LINK: + case AVRO_INVALID: break; } diff --git a/lang/c/src/datum_skip.c b/lang/c/src/datum_skip.c index aa51d793468..e0ce561642e 100644 --- a/lang/c/src/datum_skip.c +++ b/lang/c/src/datum_skip.c @@ -196,6 +196,9 @@ int avro_skip_data(avro_reader_t reader, avro_schema_t writers_schema) avro_skip_data(reader, (avro_schema_to_link(writers_schema))->to); break; + case AVRO_INVALID: + rval = EINVAL; + break; } return rval; diff --git a/lang/c/src/datum_validate.c b/lang/c/src/datum_validate.c index d15ebdddaad..e997d306761 100644 --- a/lang/c/src/datum_validate.c +++ b/lang/c/src/datum_validate.c @@ -123,7 +123,7 @@ avro_schema_datum_validate(avro_schema_t expected_schema, avro_datum_t datum) { avro_schema_to_map(expected_schema)->values, 1 }; st_foreach(avro_datum_to_map(datum)->map, - HASH_FUNCTION_CAST schema_map_validate_foreach, + (hash_function_foreach) schema_map_validate_foreach, (st_data_t) & vst); return vst.rval; } @@ -188,6 +188,8 @@ avro_schema_datum_validate(avro_schema_t expected_schema, avro_datum_t datum) datum); } break; + case AVRO_INVALID: + return EINVAL; } return 0; } diff --git a/lang/c/src/datum_value.c b/lang/c/src/datum_value.c index a4fa55a0c9e..597d38c45bc 100644 --- a/lang/c/src/datum_value.c +++ b/lang/c/src/datum_value.c @@ -80,19 +80,7 @@ avro_datum_value_get_type(const avro_value_iface_t *iface, const void *vself) { AVRO_UNUSED(iface); const avro_datum_t self = (const avro_datum_t) vself; -#ifdef _WIN32 -#pragma message("#warning: Bug: EINVAL is not of type avro_type_t.") -#else -#warning "Bug: EINVAL is not of type avro_type_t." -#endif - /* We shouldn't use EINVAL as the return value to - * check_param(), because EINVAL (= 22) is not a valid enum - * avro_type_t. This is a structural issue -- we would need a - * different interface on all the get_type functions to fix - * this. For now, suppressing the error by casting EINVAL to - * (avro_type_t) so the code compiles under C++. - */ - check_param((avro_type_t) EINVAL, self, "datum instance"); + check_param(AVRO_INVALID, self, "datum instance"); return avro_typeof(self); } diff --git a/lang/c/src/memoize.c b/lang/c/src/memoize.c index 933fecbd043..e3602884d0d 100644 --- a/lang/c/src/memoize.c +++ b/lang/c/src/memoize.c @@ -52,8 +52,8 @@ avro_memoize_key_hash(avro_memoize_key_t *a) static struct st_hash_type avro_memoize_hash_type = { - HASH_FUNCTION_CAST avro_memoize_key_cmp, - HASH_FUNCTION_CAST avro_memoize_key_hash + (hash_function_compare) avro_memoize_key_cmp, + (hash_function_hash) avro_memoize_key_hash }; @@ -78,7 +78,7 @@ avro_memoize_free_key(avro_memoize_key_t *key, void *result, void *dummy) void avro_memoize_done(avro_memoize_t *mem) { - st_foreach((st_table *) mem->cache, HASH_FUNCTION_CAST avro_memoize_free_key, 0); + st_foreach((st_table *) mem->cache, (hash_function_foreach) avro_memoize_free_key, 0); st_free_table((st_table *) mem->cache); memset(mem, 0, sizeof(avro_memoize_t)); } diff --git a/lang/c/src/schema.c b/lang/c/src/schema.c index 7b389002b00..a4d8e9f898a 100644 --- a/lang/c/src/schema.c +++ b/lang/c/src/schema.c @@ -126,6 +126,7 @@ static void avro_schema_free(avro_schema_t schema) case AVRO_DOUBLE: case AVRO_BOOLEAN: case AVRO_NULL: + case AVRO_INVALID: /* no memory allocated for primitives */ return; @@ -136,7 +137,7 @@ static void avro_schema_free(avro_schema_t schema) if (record->space) { avro_str_free(record->space); } - st_foreach(record->fields, HASH_FUNCTION_CAST record_free_foreach, + st_foreach(record->fields, (hash_function_foreach) record_free_foreach, 0); st_free_table(record->fields_byname); st_free_table(record->fields); @@ -151,7 +152,7 @@ static void avro_schema_free(avro_schema_t schema) if (enump->space) { avro_str_free(enump->space); } - st_foreach(enump->symbols, HASH_FUNCTION_CAST enum_free_foreach, + st_foreach(enump->symbols, (hash_function_foreach) enum_free_foreach, 0); st_free_table(enump->symbols); st_free_table(enump->symbols_byname); @@ -188,7 +189,7 @@ static void avro_schema_free(avro_schema_t schema) case AVRO_UNION:{ struct avro_union_schema_t *unionp; unionp = avro_schema_to_union(schema); - st_foreach(unionp->branches, HASH_FUNCTION_CAST union_free_foreach, + st_foreach(unionp->branches, (hash_function_foreach) union_free_foreach, 0); st_free_table(unionp->branches); st_free_table(unionp->branches_byname); @@ -876,15 +877,7 @@ static int avro_schema_from_json_t(json_t *json, avro_schema_t *schema, st_table *named_schemas, const char *parent_namespace) { -#ifdef _WIN32 - #pragma message("#warning: Bug: '0' is not of type avro_type_t.") -#else - #warning "Bug: '0' is not of type avro_type_t." -#endif - /* We should really have an "AVRO_INVALID" type in - * avro_type_t. Suppress warning below in which we set type to 0. - */ - avro_type_t type = (avro_type_t) 0; + avro_type_t type = AVRO_INVALID; unsigned int i; avro_schema_t named_type = NULL; @@ -1246,7 +1239,7 @@ avro_schema_from_json_root(json_t *root, avro_schema_t *schema) /* json_dumpf(root, stderr, 0); */ rval = avro_schema_from_json_t(root, schema, named_schemas, NULL); json_decref(root); - st_foreach(named_schemas, HASH_FUNCTION_CAST named_schema_free_foreach, 0); + st_foreach(named_schemas, (hash_function_foreach) named_schema_free_foreach, 0); st_free_table(named_schemas); return rval; } @@ -1462,7 +1455,7 @@ avro_schema_t avro_schema_copy(avro_schema_t schema) } new_schema = avro_schema_copy_root(schema, named_schemas); - st_foreach(named_schemas, HASH_FUNCTION_CAST named_schema_free_foreach, 0); + st_foreach(named_schemas, (hash_function_foreach) named_schema_free_foreach, 0); st_free_table(named_schemas); return new_schema; } @@ -1882,6 +1875,8 @@ avro_schema_to_json2(const avro_schema_t schema, avro_writer_t out, return write_union(out, avro_schema_to_union(schema), parent_namespace); case AVRO_LINK: return write_link(out, avro_schema_to_link(schema), parent_namespace); + case AVRO_INVALID: + return EINVAL; } if (is_avro_primitive(schema)) { diff --git a/lang/c/src/st.c b/lang/c/src/st.c index 27578289ecb..8437777cb92 100644 --- a/lang/c/src/st.c +++ b/lang/c/src/st.c @@ -39,8 +39,8 @@ struct st_table_entry { static int numcmp(long, long); static int numhash(long); static struct st_hash_type type_numhash = { - HASH_FUNCTION_CAST numcmp, - HASH_FUNCTION_CAST numhash + (hash_function_compare) numcmp, + (hash_function_hash) numhash }; /* @@ -48,8 +48,8 @@ static struct st_hash_type type_numhash = { */ static int strhash(const char *); static struct st_hash_type type_strhash = { - HASH_FUNCTION_CAST strcmp, - HASH_FUNCTION_CAST strhash + (hash_function_compare) strcmp, + (hash_function_hash) strhash }; static void rehash(st_table *); @@ -212,7 +212,7 @@ void st_free_table(st_table *table) } #define PTR_NOT_EQUAL(table, ptr, hash_val, key) \ -((ptr) != 0 && (ptr->hash != (hash_val) || !EQUAL((table), (key), (ptr)->key))) +((ptr) != 0 && (ptr->hash != (hash_val) || !EQUAL((table), (void*) (key), (void*) (ptr)->key))) #ifdef HASH_LOG #define COLLISION collision++ @@ -237,7 +237,7 @@ int st_lookup(st_table *table, register st_data_t key, st_data_t *value) unsigned int hash_val, bin_pos; register st_table_entry *ptr; - hash_val = do_hash(key, table); + hash_val = do_hash((void*) key, table); FIND_ENTRY(table, ptr, hash_val, bin_pos); if (ptr == 0) { @@ -272,7 +272,7 @@ int st_insert(register st_table *table, register st_data_t key, st_data_t value) unsigned int hash_val, bin_pos; register st_table_entry *ptr; - hash_val = do_hash(key, table); + hash_val = do_hash((void*) key, table); FIND_ENTRY(table, ptr, hash_val, bin_pos); if (ptr == 0) { @@ -288,7 +288,7 @@ void st_add_direct(st_table *table,st_data_t key,st_data_t value) { unsigned int hash_val, bin_pos; - hash_val = do_hash(key, table); + hash_val = do_hash((void*) key, table); bin_pos = hash_val % table->num_bins; ADD_DIRECT(table, key, value, hash_val, bin_pos); } @@ -363,7 +363,7 @@ int st_delete(register st_table *table,register st_data_t *key,st_data_t *value) st_table_entry *tmp; register st_table_entry *ptr; - hash_val = do_hash_bin(*key, table); + hash_val = do_hash_bin((void*) *key, table); ptr = table->bins[hash_val]; if (ptr == 0) { @@ -372,7 +372,7 @@ int st_delete(register st_table *table,register st_data_t *key,st_data_t *value) return 0; } - if (EQUAL(table, *key, ptr->key)) { + if (EQUAL(table, (void*) *key, (void*) ptr->key)) { table->bins[hash_val] = ptr->next; table->num_entries--; if (value != 0) @@ -383,7 +383,7 @@ int st_delete(register st_table *table,register st_data_t *key,st_data_t *value) } for (; ptr->next != 0; ptr = ptr->next) { - if (EQUAL(table, ptr->next->key, *key)) { + if (EQUAL(table, (void*) ptr->next->key, (void*) *key)) { tmp = ptr->next; ptr->next = ptr->next->next; table->num_entries--; @@ -403,7 +403,7 @@ int st_delete_safe(register st_table *table,register st_data_t *key,st_data_t *v unsigned int hash_val; register st_table_entry *ptr; - hash_val = do_hash_bin(*key, table); + hash_val = do_hash_bin((void*) *key, table); ptr = table->bins[hash_val]; if (ptr == 0) { @@ -413,7 +413,7 @@ int st_delete_safe(register st_table *table,register st_data_t *key,st_data_t *v } for (; ptr != 0; ptr = ptr->next) { - if ((ptr->key != never) && EQUAL(table, ptr->key, *key)) { + if ((ptr->key != never) && EQUAL(table, (void*) ptr->key, (void*) *key)) { table->num_entries--; *key = ptr->key; if (value != 0) @@ -439,11 +439,11 @@ void st_cleanup_safe(st_table *table,st_data_t never) { int num_entries = table->num_entries; - st_foreach(table, HASH_FUNCTION_CAST delete_never, never); + st_foreach(table, (hash_function_foreach) delete_never, never); table->num_entries = num_entries; } -int st_foreach(st_table *table,int (*func) (ANYARGS),st_data_t arg) +int st_foreach(st_table *table,int (*func) (void*, void*, void*),st_data_t arg) { st_table_entry *ptr, *last, *tmp; enum st_retval retval; @@ -452,7 +452,9 @@ int st_foreach(st_table *table,int (*func) (ANYARGS),st_data_t arg) for (i = 0; i < table->num_bins; i++) { last = 0; for (ptr = table->bins[i]; ptr != 0;) { - retval = (enum st_retval) (*func) (ptr->key, ptr->record, arg); + retval = (enum st_retval) (*func) ((void*) ptr->key, + (void*) ptr->record, + (void*) arg); switch (retval) { case ST_CHECK: /* check if hash is modified during * iteration */ diff --git a/lang/c/src/st.h b/lang/c/src/st.h index cf8a2249169..93da018bd9b 100644 --- a/lang/c/src/st.h +++ b/lang/c/src/st.h @@ -20,26 +20,22 @@ extern "C" { #pragma GCC visibility push(hidden) -#ifndef ANYARGS - #ifdef __cplusplus - #define ANYARGS ... - #else - #define ANYARGS - #endif -#endif - #ifdef _WIN32 - #define HASH_FUNCTION_CAST (int (__cdecl *)(ANYARGS)) + typedef int (__cdecl *hash_function_compare)(void*, void*); + typedef int (__cdecl *hash_function_hash)(void*); + typedef int (__cdecl *hash_function_foreach)(void*, void*, void*); #else - #define HASH_FUNCTION_CAST + typedef int (*hash_function_compare)(void*, void*); + typedef int (*hash_function_hash)(void*); + typedef int (*hash_function_foreach)(void*, void*, void*); #endif typedef uintptr_t st_data_t; typedef struct st_table st_table; struct st_hash_type { - int (*compare) (ANYARGS); - int (*hash) (ANYARGS); + hash_function_compare compare; + hash_function_hash hash; }; struct st_table { @@ -67,7 +63,7 @@ int st_delete _((st_table *, st_data_t *, st_data_t *)); int st_delete_safe _((st_table *, st_data_t *, st_data_t *, st_data_t)); int st_insert _((st_table *, st_data_t, st_data_t)); int st_lookup _((st_table *, st_data_t, st_data_t *)); -int st_foreach _((st_table *, int (*)(ANYARGS), st_data_t)); +int st_foreach _((st_table *, hash_function_foreach, st_data_t)); void st_add_direct _((st_table *, st_data_t, st_data_t)); void st_free_table _((st_table *)); void st_cleanup_safe _((st_table *, st_data_t)); diff --git a/lang/c/tests/CMakeLists.txt b/lang/c/tests/CMakeLists.txt index 2e84a06a31c..3200164770d 100644 --- a/lang/c/tests/CMakeLists.txt +++ b/lang/c/tests/CMakeLists.txt @@ -64,7 +64,9 @@ add_avro_executable(test_interop_data) add_avro_test_checkmem(test_data_structures) add_avro_test_checkmem(test_avro_schema) +add_avro_test_checkmem(test_avro_commons_schema) add_avro_test_checkmem(test_avro_schema_names) +add_avro_test_checkmem(test_avro_type_collision) add_avro_test_checkmem(test_avro_values) add_avro_test_checkmem(test_avro_766) add_avro_test_checkmem(test_avro_968) diff --git a/lang/c/tests/test_avro_commons_schema.c b/lang/c/tests/test_avro_commons_schema.c new file mode 100644 index 00000000000..e3751e9836a --- /dev/null +++ b/lang/c/tests/test_avro_commons_schema.c @@ -0,0 +1,150 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +#include "avro.h" +#include "avro_private.h" +#include +#include +#include +#include +#ifdef _WIN32 + #include "msdirent.h" +#else + #include +#endif + +avro_writer_t avro_stderr; + +static avro_schema_t read_common_schema_test(const char *dirpath) { + char schemafilepath[1024]; + char jsontext[4096]; + + avro_schema_t schema; + int n = snprintf(schemafilepath, sizeof(schemafilepath), "%s/schema.json", dirpath); + if (n < 0) { + fprintf(stderr, "Size of dir path is too long %s !\n", dirpath); + exit(EXIT_FAILURE); + } + FILE* fp = fopen(schemafilepath, "r"); + if (!fp) { + fprintf(stderr, "can't open file %s !\n", schemafilepath); + exit(EXIT_FAILURE); + } + int rval = fread(jsontext, 1, sizeof(jsontext) - 1, fp); + fclose(fp); + jsontext[rval] = '\0'; + + int test_rval = avro_schema_from_json(jsontext, 0, &schema, NULL); + if (test_rval != 0) { + fprintf(stderr, "fail! Can' read schema from file %s\n", schemafilepath); + exit(EXIT_FAILURE); + } + return schema; +} + +static void create_writer(avro_schema_t schema, avro_file_writer_t* writer) +{ + // create / reset copy.avro file. + FILE* copyFile = fopen("./copy.avro", "w"); + if (!copyFile) { + fprintf(stderr, "can't create file copy.avro !\n"); + exit(EXIT_FAILURE); + } + fclose(copyFile); + + // create avro writer on file. + if (avro_file_writer_create("./copy.avro", schema, writer)) { + fprintf(stdout, "\nThere was an error creating db: %s", avro_strerror()); + exit(EXIT_FAILURE); + } +} + +static void read_data(const char *dirpath, avro_schema_t schema) { + char datafilepath[1024]; + int n = snprintf(datafilepath, sizeof(datafilepath), "%s/data.avro", dirpath); + if (n < 0) { + fprintf(stderr, "Size of dir path is too long %s/data.avro !\n", dirpath); + exit(EXIT_FAILURE); + } + + avro_file_reader_t reader; + avro_datum_t datum; + int rval = avro_file_reader(datafilepath, &reader); + if (rval) { + exit(EXIT_FAILURE); + } + + avro_file_writer_t writer; + create_writer(schema, &writer); + + int records_read = 0; + while ((rval = avro_file_reader_read(reader, schema, &datum)) == 0) { + records_read++; + if (avro_file_writer_append(writer, datum)) { + fprintf(stdout, "\nCan't write record: %s\n", avro_strerror()); + exit(EXIT_FAILURE); + } + + avro_datum_decref(datum); + } + fprintf(stdout, "\nExit run test OK => %d records", records_read); + remove("./copy.avro"); + fflush(stdout); + avro_file_reader_close(reader); + avro_file_writer_close(writer); +} + +static void run_tests(const char *dirpath) +{ + fprintf(stdout, "\nRun test for path '%s'", dirpath); + avro_schema_t schema = read_common_schema_test(dirpath); + read_data(dirpath, schema); + avro_schema_decref(schema); +} + + + +int main(int argc, char *argv[]) +{ + char *srcdir = "../../../share/test/data/schemas"; + AVRO_UNUSED(argc); + AVRO_UNUSED(argv); + + avro_stderr = avro_writer_file(stderr); + + DIR* dir = opendir(srcdir); + if (dir == NULL) { + fprintf(stdout, "Unable to open '%s'\n", srcdir); + fflush(stdout); + exit(EXIT_FAILURE); + } + struct dirent *dent; + do { + dent = readdir(dir); + + if (dent && dent->d_name[0] != '.' && dent->d_type == DT_DIR) { + char filepath[1024]; + snprintf(filepath, sizeof(filepath), "%s/%s", srcdir, dent->d_name); + run_tests(filepath); + } + } + while(dent != NULL); + closedir(dir); + + avro_writer_free(avro_stderr); + return EXIT_SUCCESS; +} diff --git a/lang/c/tests/test_avro_type_collision.c b/lang/c/tests/test_avro_type_collision.c new file mode 100644 index 00000000000..1dda590fd77 --- /dev/null +++ b/lang/c/tests/test_avro_type_collision.c @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +#include "avro.h" + +#include +#include + +#if defined(__GNUC__) || defined(__clang__) +#pragma GCC diagnostic error "-Wswitch" +#endif + +#define ASSERT_NOT_AVRO_INVALID(type) \ + if (type == AVRO_INVALID) { \ + fprintf(stderr, #type " collides with AVRO_INVALID\n"); \ + exit(EXIT_FAILURE); \ + } else { \ + break; \ + } + +#define CASE_ASSERTION(type) case type: ASSERT_NOT_AVRO_INVALID(type) + +int main(void) +{ + avro_schema_t null_schema = avro_schema_null(); + avro_type_t type = avro_typeof(null_schema); + avro_schema_decref(null_schema); + + switch (type) { + CASE_ASSERTION(AVRO_STRING) + CASE_ASSERTION(AVRO_BYTES) + CASE_ASSERTION(AVRO_INT32) + CASE_ASSERTION(AVRO_INT64) + CASE_ASSERTION(AVRO_FLOAT) + CASE_ASSERTION(AVRO_DOUBLE) + CASE_ASSERTION(AVRO_BOOLEAN) + CASE_ASSERTION(AVRO_NULL) + CASE_ASSERTION(AVRO_RECORD) + CASE_ASSERTION(AVRO_ENUM) + CASE_ASSERTION(AVRO_FIXED) + CASE_ASSERTION(AVRO_MAP) + CASE_ASSERTION(AVRO_ARRAY) + CASE_ASSERTION(AVRO_UNION) + CASE_ASSERTION(AVRO_LINK) + case AVRO_INVALID: + break; + } + + return EXIT_SUCCESS; +} diff --git a/lang/c/version.sh b/lang/c/version.sh index be90c0f6329..0481bcc23e5 100755 --- a/lang/c/version.sh +++ b/lang/c/version.sh @@ -34,9 +34,9 @@ # libavro_binary_age = 0 # libavro_interface_age = 0 # -libavro_micro_version=23 +libavro_micro_version=24 libavro_interface_age=0 -libavro_binary_age=0 +libavro_binary_age=1 # IGNORE EVERYTHING ELSE FROM HERE DOWN......... if test $# != 1; then diff --git a/lang/csharp/.gitignore b/lang/csharp/.gitignore index 80304575bd8..4218bd59d51 100644 --- a/lang/csharp/.gitignore +++ b/lang/csharp/.gitignore @@ -52,5 +52,7 @@ obj/ #Test results TestResult.xml +Coverage +TestResults .vs/ diff --git a/lang/csharp/Avro.sln b/lang/csharp/Avro.sln index 68036ebdd50..729235110a0 100644 --- a/lang/csharp/Avro.sln +++ b/lang/csharp/Avro.sln @@ -13,18 +13,39 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Avro.msbuild", "src\apache\ EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Avro.perf", "src\apache\perf\Avro.perf.csproj", "{AC4E1909-2594-4D01-9B2B-B832C07BAFE5}" EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Codecs", "Codecs", "{0FAEE4F6-D72F-4B18-869A-7A90BAC1280F}" +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Avro.File.BZip2", "src\apache\codec\Avro.File.BZip2\Avro.File.BZip2.csproj", "{FFA119B2-0D60-4090-B5A6-ECA718138812}" +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Avro.File.BZip2.Test", "src\apache\codec\Avro.File.BZip2.Test\Avro.File.BZip2.Test.csproj", "{D5ED6642-3E33-493F-9217-FE00E4885699}" +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Avro.File.Snappy", "src\apache\codec\Avro.File.Snappy\Avro.File.Snappy.csproj", "{B15BEEDC-A371-46D0-BFF6-63FC8105B520}" +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Avro.File.Snappy.Test", "src\apache\codec\Avro.File.Snappy.Test\Avro.File.Snappy.Test.csproj", "{AA2CA9A3-71C0-4D16-B7E7-F6F50E400F23}" +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Avro.File.XZ", "src\apache\codec\Avro.File.XZ\Avro.File.XZ.csproj", "{98CE721F-10AF-4665-9B14-3EA2CDF8F4C7}" +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Avro.File.XZ.Test", "src\apache\codec\Avro.File.XZ.Test\Avro.File.XZ.Test.csproj", "{99711F8E-C5C1-4864-A51F-3317E19CAD7B}" +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Avro.File.Zstandard", "src\apache\codec\Avro.File.Zstandard\Avro.File.Zstandard.csproj", "{8207A628-6285-4DDF-B846-C0C7ED3E3D16}" +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Avro.File.Zstandard.Test", "src\apache\codec\Avro.File.Zstandard.Test\Avro.File.Zstandard.Test.csproj", "{04264DDD-C204-4F59-88D4-FB4C69BD80C3}" +EndProject Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution Items", "{8A671DF3-BC71-4E1A-BB06-0A225799A274}" ProjectSection(SolutionItems) = preProject ..\..\.editorconfig = ..\..\.editorconfig .gitignore = .gitignore Avro.ruleset = Avro.ruleset - build.ps1 = build.ps1 build.sh = build.sh common.props = common.props + versions.props = versions.props + CodeAnalysis.src.globalconfig = CodeAnalysis.src.globalconfig + CodeAnalysis.test.globalconfig = CodeAnalysis.test.globalconfig README.md = README.md - stylecop.json = stylecop.json EndProjectSection EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Avro.benchmark", "src\apache\benchmark\Avro.benchmark.csproj", "{29271A29-9E89-47B1-A0CA-DD6704C89570}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -87,10 +108,128 @@ Global {AC4E1909-2594-4D01-9B2B-B832C07BAFE5}.Release|Mixed Platforms.ActiveCfg = Release|Any CPU {AC4E1909-2594-4D01-9B2B-B832C07BAFE5}.Release|Mixed Platforms.Build.0 = Release|Any CPU {AC4E1909-2594-4D01-9B2B-B832C07BAFE5}.Release|x86.ActiveCfg = Release|Any CPU + {29271A29-9E89-47B1-A0CA-DD6704C89570}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {29271A29-9E89-47B1-A0CA-DD6704C89570}.Debug|Any CPU.Build.0 = Debug|Any CPU + {29271A29-9E89-47B1-A0CA-DD6704C89570}.Debug|Mixed Platforms.ActiveCfg = Debug|Any CPU + {29271A29-9E89-47B1-A0CA-DD6704C89570}.Debug|Mixed Platforms.Build.0 = Debug|Any CPU + {29271A29-9E89-47B1-A0CA-DD6704C89570}.Debug|x86.ActiveCfg = Debug|Any CPU + {29271A29-9E89-47B1-A0CA-DD6704C89570}.Debug|x86.Build.0 = Debug|Any CPU + {29271A29-9E89-47B1-A0CA-DD6704C89570}.Release|Any CPU.ActiveCfg = Release|Any CPU + {29271A29-9E89-47B1-A0CA-DD6704C89570}.Release|Any CPU.Build.0 = Release|Any CPU + {29271A29-9E89-47B1-A0CA-DD6704C89570}.Release|Mixed Platforms.ActiveCfg = Release|Any CPU + {29271A29-9E89-47B1-A0CA-DD6704C89570}.Release|Mixed Platforms.Build.0 = Release|Any CPU + {29271A29-9E89-47B1-A0CA-DD6704C89570}.Release|x86.ActiveCfg = Release|Any CPU + {29271A29-9E89-47B1-A0CA-DD6704C89570}.Release|x86.Build.0 = Release|Any CPU + {FFA119B2-0D60-4090-B5A6-ECA718138812}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {FFA119B2-0D60-4090-B5A6-ECA718138812}.Debug|Any CPU.Build.0 = Debug|Any CPU + {FFA119B2-0D60-4090-B5A6-ECA718138812}.Debug|Mixed Platforms.ActiveCfg = Debug|Any CPU + {FFA119B2-0D60-4090-B5A6-ECA718138812}.Debug|Mixed Platforms.Build.0 = Debug|Any CPU + {FFA119B2-0D60-4090-B5A6-ECA718138812}.Debug|x86.ActiveCfg = Debug|Any CPU + {FFA119B2-0D60-4090-B5A6-ECA718138812}.Debug|x86.Build.0 = Debug|Any CPU + {FFA119B2-0D60-4090-B5A6-ECA718138812}.Release|Any CPU.ActiveCfg = Release|Any CPU + {FFA119B2-0D60-4090-B5A6-ECA718138812}.Release|Any CPU.Build.0 = Release|Any CPU + {FFA119B2-0D60-4090-B5A6-ECA718138812}.Release|Mixed Platforms.ActiveCfg = Release|Any CPU + {FFA119B2-0D60-4090-B5A6-ECA718138812}.Release|Mixed Platforms.Build.0 = Release|Any CPU + {FFA119B2-0D60-4090-B5A6-ECA718138812}.Release|x86.ActiveCfg = Release|Any CPU + {FFA119B2-0D60-4090-B5A6-ECA718138812}.Release|x86.Build.0 = Release|Any CPU + {D5ED6642-3E33-493F-9217-FE00E4885699}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {D5ED6642-3E33-493F-9217-FE00E4885699}.Debug|Any CPU.Build.0 = Debug|Any CPU + {D5ED6642-3E33-493F-9217-FE00E4885699}.Debug|Mixed Platforms.ActiveCfg = Debug|Any CPU + {D5ED6642-3E33-493F-9217-FE00E4885699}.Debug|Mixed Platforms.Build.0 = Debug|Any CPU + {D5ED6642-3E33-493F-9217-FE00E4885699}.Debug|x86.ActiveCfg = Debug|Any CPU + {D5ED6642-3E33-493F-9217-FE00E4885699}.Debug|x86.Build.0 = Debug|Any CPU + {D5ED6642-3E33-493F-9217-FE00E4885699}.Release|Any CPU.ActiveCfg = Release|Any CPU + {D5ED6642-3E33-493F-9217-FE00E4885699}.Release|Any CPU.Build.0 = Release|Any CPU + {D5ED6642-3E33-493F-9217-FE00E4885699}.Release|Mixed Platforms.ActiveCfg = Release|Any CPU + {D5ED6642-3E33-493F-9217-FE00E4885699}.Release|Mixed Platforms.Build.0 = Release|Any CPU + {D5ED6642-3E33-493F-9217-FE00E4885699}.Release|x86.ActiveCfg = Release|Any CPU + {D5ED6642-3E33-493F-9217-FE00E4885699}.Release|x86.Build.0 = Release|Any CPU + {B15BEEDC-A371-46D0-BFF6-63FC8105B520}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {B15BEEDC-A371-46D0-BFF6-63FC8105B520}.Debug|Any CPU.Build.0 = Debug|Any CPU + {B15BEEDC-A371-46D0-BFF6-63FC8105B520}.Debug|Mixed Platforms.ActiveCfg = Debug|Any CPU + {B15BEEDC-A371-46D0-BFF6-63FC8105B520}.Debug|Mixed Platforms.Build.0 = Debug|Any CPU + {B15BEEDC-A371-46D0-BFF6-63FC8105B520}.Debug|x86.ActiveCfg = Debug|Any CPU + {B15BEEDC-A371-46D0-BFF6-63FC8105B520}.Debug|x86.Build.0 = Debug|Any CPU + {B15BEEDC-A371-46D0-BFF6-63FC8105B520}.Release|Any CPU.ActiveCfg = Release|Any CPU + {B15BEEDC-A371-46D0-BFF6-63FC8105B520}.Release|Any CPU.Build.0 = Release|Any CPU + {B15BEEDC-A371-46D0-BFF6-63FC8105B520}.Release|Mixed Platforms.ActiveCfg = Release|Any CPU + {B15BEEDC-A371-46D0-BFF6-63FC8105B520}.Release|Mixed Platforms.Build.0 = Release|Any CPU + {B15BEEDC-A371-46D0-BFF6-63FC8105B520}.Release|x86.ActiveCfg = Release|Any CPU + {B15BEEDC-A371-46D0-BFF6-63FC8105B520}.Release|x86.Build.0 = Release|Any CPU + {AA2CA9A3-71C0-4D16-B7E7-F6F50E400F23}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {AA2CA9A3-71C0-4D16-B7E7-F6F50E400F23}.Debug|Any CPU.Build.0 = Debug|Any CPU + {AA2CA9A3-71C0-4D16-B7E7-F6F50E400F23}.Debug|Mixed Platforms.ActiveCfg = Debug|Any CPU + {AA2CA9A3-71C0-4D16-B7E7-F6F50E400F23}.Debug|Mixed Platforms.Build.0 = Debug|Any CPU + {AA2CA9A3-71C0-4D16-B7E7-F6F50E400F23}.Debug|x86.ActiveCfg = Debug|Any CPU + {AA2CA9A3-71C0-4D16-B7E7-F6F50E400F23}.Debug|x86.Build.0 = Debug|Any CPU + {AA2CA9A3-71C0-4D16-B7E7-F6F50E400F23}.Release|Any CPU.ActiveCfg = Release|Any CPU + {AA2CA9A3-71C0-4D16-B7E7-F6F50E400F23}.Release|Any CPU.Build.0 = Release|Any CPU + {AA2CA9A3-71C0-4D16-B7E7-F6F50E400F23}.Release|Mixed Platforms.ActiveCfg = Release|Any CPU + {AA2CA9A3-71C0-4D16-B7E7-F6F50E400F23}.Release|Mixed Platforms.Build.0 = Release|Any CPU + {AA2CA9A3-71C0-4D16-B7E7-F6F50E400F23}.Release|x86.ActiveCfg = Release|Any CPU + {AA2CA9A3-71C0-4D16-B7E7-F6F50E400F23}.Release|x86.Build.0 = Release|Any CPU + {98CE721F-10AF-4665-9B14-3EA2CDF8F4C7}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {98CE721F-10AF-4665-9B14-3EA2CDF8F4C7}.Debug|Any CPU.Build.0 = Debug|Any CPU + {98CE721F-10AF-4665-9B14-3EA2CDF8F4C7}.Debug|Mixed Platforms.ActiveCfg = Debug|Any CPU + {98CE721F-10AF-4665-9B14-3EA2CDF8F4C7}.Debug|Mixed Platforms.Build.0 = Debug|Any CPU + {98CE721F-10AF-4665-9B14-3EA2CDF8F4C7}.Debug|x86.ActiveCfg = Debug|Any CPU + {98CE721F-10AF-4665-9B14-3EA2CDF8F4C7}.Debug|x86.Build.0 = Debug|Any CPU + {98CE721F-10AF-4665-9B14-3EA2CDF8F4C7}.Release|Any CPU.ActiveCfg = Release|Any CPU + {98CE721F-10AF-4665-9B14-3EA2CDF8F4C7}.Release|Any CPU.Build.0 = Release|Any CPU + {98CE721F-10AF-4665-9B14-3EA2CDF8F4C7}.Release|Mixed Platforms.ActiveCfg = Release|Any CPU + {98CE721F-10AF-4665-9B14-3EA2CDF8F4C7}.Release|Mixed Platforms.Build.0 = Release|Any CPU + {98CE721F-10AF-4665-9B14-3EA2CDF8F4C7}.Release|x86.ActiveCfg = Release|Any CPU + {98CE721F-10AF-4665-9B14-3EA2CDF8F4C7}.Release|x86.Build.0 = Release|Any CPU + {99711F8E-C5C1-4864-A51F-3317E19CAD7B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {99711F8E-C5C1-4864-A51F-3317E19CAD7B}.Debug|Any CPU.Build.0 = Debug|Any CPU + {99711F8E-C5C1-4864-A51F-3317E19CAD7B}.Debug|Mixed Platforms.ActiveCfg = Debug|Any CPU + {99711F8E-C5C1-4864-A51F-3317E19CAD7B}.Debug|Mixed Platforms.Build.0 = Debug|Any CPU + {99711F8E-C5C1-4864-A51F-3317E19CAD7B}.Debug|x86.ActiveCfg = Debug|Any CPU + {99711F8E-C5C1-4864-A51F-3317E19CAD7B}.Debug|x86.Build.0 = Debug|Any CPU + {99711F8E-C5C1-4864-A51F-3317E19CAD7B}.Release|Any CPU.ActiveCfg = Release|Any CPU + {99711F8E-C5C1-4864-A51F-3317E19CAD7B}.Release|Any CPU.Build.0 = Release|Any CPU + {99711F8E-C5C1-4864-A51F-3317E19CAD7B}.Release|Mixed Platforms.ActiveCfg = Release|Any CPU + {99711F8E-C5C1-4864-A51F-3317E19CAD7B}.Release|Mixed Platforms.Build.0 = Release|Any CPU + {99711F8E-C5C1-4864-A51F-3317E19CAD7B}.Release|x86.ActiveCfg = Release|Any CPU + {99711F8E-C5C1-4864-A51F-3317E19CAD7B}.Release|x86.Build.0 = Release|Any CPU + {8207A628-6285-4DDF-B846-C0C7ED3E3D16}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {8207A628-6285-4DDF-B846-C0C7ED3E3D16}.Debug|Any CPU.Build.0 = Debug|Any CPU + {8207A628-6285-4DDF-B846-C0C7ED3E3D16}.Debug|Mixed Platforms.ActiveCfg = Debug|Any CPU + {8207A628-6285-4DDF-B846-C0C7ED3E3D16}.Debug|Mixed Platforms.Build.0 = Debug|Any CPU + {8207A628-6285-4DDF-B846-C0C7ED3E3D16}.Debug|x86.ActiveCfg = Debug|Any CPU + {8207A628-6285-4DDF-B846-C0C7ED3E3D16}.Debug|x86.Build.0 = Debug|Any CPU + {8207A628-6285-4DDF-B846-C0C7ED3E3D16}.Release|Any CPU.ActiveCfg = Release|Any CPU + {8207A628-6285-4DDF-B846-C0C7ED3E3D16}.Release|Any CPU.Build.0 = Release|Any CPU + {8207A628-6285-4DDF-B846-C0C7ED3E3D16}.Release|Mixed Platforms.ActiveCfg = Release|Any CPU + {8207A628-6285-4DDF-B846-C0C7ED3E3D16}.Release|Mixed Platforms.Build.0 = Release|Any CPU + {8207A628-6285-4DDF-B846-C0C7ED3E3D16}.Release|x86.ActiveCfg = Release|Any CPU + {8207A628-6285-4DDF-B846-C0C7ED3E3D16}.Release|x86.Build.0 = Release|Any CPU + {04264DDD-C204-4F59-88D4-FB4C69BD80C3}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {04264DDD-C204-4F59-88D4-FB4C69BD80C3}.Debug|Any CPU.Build.0 = Debug|Any CPU + {04264DDD-C204-4F59-88D4-FB4C69BD80C3}.Debug|Mixed Platforms.ActiveCfg = Debug|Any CPU + {04264DDD-C204-4F59-88D4-FB4C69BD80C3}.Debug|Mixed Platforms.Build.0 = Debug|Any CPU + {04264DDD-C204-4F59-88D4-FB4C69BD80C3}.Debug|x86.ActiveCfg = Debug|Any CPU + {04264DDD-C204-4F59-88D4-FB4C69BD80C3}.Debug|x86.Build.0 = Debug|Any CPU + {04264DDD-C204-4F59-88D4-FB4C69BD80C3}.Release|Any CPU.ActiveCfg = Release|Any CPU + {04264DDD-C204-4F59-88D4-FB4C69BD80C3}.Release|Any CPU.Build.0 = Release|Any CPU + {04264DDD-C204-4F59-88D4-FB4C69BD80C3}.Release|Mixed Platforms.ActiveCfg = Release|Any CPU + {04264DDD-C204-4F59-88D4-FB4C69BD80C3}.Release|Mixed Platforms.Build.0 = Release|Any CPU + {04264DDD-C204-4F59-88D4-FB4C69BD80C3}.Release|x86.ActiveCfg = Release|Any CPU + {04264DDD-C204-4F59-88D4-FB4C69BD80C3}.Release|x86.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE EndGlobalSection + GlobalSection(NestedProjects) = preSolution + {FFA119B2-0D60-4090-B5A6-ECA718138812} = {0FAEE4F6-D72F-4B18-869A-7A90BAC1280F} + {D5ED6642-3E33-493F-9217-FE00E4885699} = {0FAEE4F6-D72F-4B18-869A-7A90BAC1280F} + {B15BEEDC-A371-46D0-BFF6-63FC8105B520} = {0FAEE4F6-D72F-4B18-869A-7A90BAC1280F} + {AA2CA9A3-71C0-4D16-B7E7-F6F50E400F23} = {0FAEE4F6-D72F-4B18-869A-7A90BAC1280F} + {98CE721F-10AF-4665-9B14-3EA2CDF8F4C7} = {0FAEE4F6-D72F-4B18-869A-7A90BAC1280F} + {99711F8E-C5C1-4864-A51F-3317E19CAD7B} = {0FAEE4F6-D72F-4B18-869A-7A90BAC1280F} + {8207A628-6285-4DDF-B846-C0C7ED3E3D16} = {0FAEE4F6-D72F-4B18-869A-7A90BAC1280F} + {04264DDD-C204-4F59-88D4-FB4C69BD80C3} = {0FAEE4F6-D72F-4B18-869A-7A90BAC1280F} + EndGlobalSection GlobalSection(ExtensibilityGlobals) = postSolution SolutionGuid = {ACE75CE8-16B2-4C6E-A5BE-B6F6DB5FE095} EndGlobalSection diff --git a/lang/csharp/CODECOVERAGE.md b/lang/csharp/CODECOVERAGE.md new file mode 100644 index 00000000000..c06cc5af7f1 --- /dev/null +++ b/lang/csharp/CODECOVERAGE.md @@ -0,0 +1,31 @@ + +# C# Avro Code Coverage + +The following instructions should be followed in order to create a code coverage report locally. + +1. Open a command prompt +2. Install ReportGenerator globally\ + a. Run the following command line: `dotnet tool install --global dotnet-reportgenerator-globaltool --version 5.1.4 --add-source https://www.nuget.org/packages/`\ + b. The latest version can be found at [Nuget ReportGenerator](https://www.nuget.org/packages/dotnet-reportgenerator-globaltool/) +3. Navigate to the test project `avro\lang\csharp\src\apache\test` +4. Run the following test command `dotnet test --results-directory ./TestResults --collect:"XPlat Code Coverage"` +5. Generate the report with the following command `ReportGenerator "-reports:./TestResults/*/coverage.cobertura.xml" "-targetdir:./Coverage/" -reporttypes:HTML` +6. Open Report under `avro\lang\csharp\src\apache\test\Coverage\index.html` diff --git a/lang/csharp/CodeAnalysis.src.globalconfig b/lang/csharp/CodeAnalysis.src.globalconfig new file mode 100644 index 00000000000..2537599edcb --- /dev/null +++ b/lang/csharp/CodeAnalysis.src.globalconfig @@ -0,0 +1,1585 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# !!! Original: https://github.com/dotnet/runtime/blob/main/eng/CodeAnalysis.src.globalconfig +# !!! Any overrides should be added to the end of the file + +is_global = true + +# AD0001: Analyzer threw an exception +dotnet_diagnostic.AD0001.severity = suggestion + +# BCL0001: Ensure minimum API surface is respected +dotnet_diagnostic.BCL0001.severity = warning + +# BCL0010: AppContext default value expected to be true +dotnet_diagnostic.BCL0010.severity = warning + +# BCL0011: AppContext default value defined in if statement with incorrect pattern +dotnet_diagnostic.BCL0011.severity = warning + +# BCL0012: AppContext default value defined in if statement at root of switch case +dotnet_diagnostic.BCL0012.severity = warning + +# BCL0015: Invalid P/Invoke call +dotnet_diagnostic.BCL0015.severity = none + +# BCL0020: Invalid SR.Format call +dotnet_diagnostic.BCL0020.severity = warning + +# CA1000: Do not declare static members on generic types +dotnet_diagnostic.CA1000.severity = none + +# CA1001: Types that own disposable fields should be disposable +dotnet_diagnostic.CA1001.severity = none + +# CA1002: Do not expose generic lists +dotnet_diagnostic.CA1002.severity = none + +# CA1003: Use generic event handler instances +dotnet_diagnostic.CA1003.severity = none + +# CA1005: Avoid excessive parameters on generic types +dotnet_diagnostic.CA1005.severity = none + +# CA1008: Enums should have zero value +dotnet_diagnostic.CA1008.severity = none + +# CA1010: Generic interface should also be implemented +dotnet_diagnostic.CA1010.severity = none + +# CA1012: Abstract types should not have public constructors +dotnet_diagnostic.CA1012.severity = none + +# CA1014: Mark assemblies with CLSCompliant +dotnet_diagnostic.CA1014.severity = none + +# CA1016: Mark assemblies with assembly version +dotnet_diagnostic.CA1016.severity = none + +# CA1017: Mark assemblies with ComVisible +dotnet_diagnostic.CA1017.severity = none + +# CA1018: Mark attributes with AttributeUsageAttribute +dotnet_diagnostic.CA1018.severity = warning + +# CA1019: Define accessors for attribute arguments +dotnet_diagnostic.CA1019.severity = none + +# CA1021: Avoid out parameters +dotnet_diagnostic.CA1021.severity = none + +# CA1024: Use properties where appropriate +dotnet_diagnostic.CA1024.severity = none + +# CA1027: Mark enums with FlagsAttribute +dotnet_diagnostic.CA1027.severity = none + +# CA1028: Enum Storage should be Int32 +dotnet_diagnostic.CA1028.severity = none + +# CA1030: Use events where appropriate +dotnet_diagnostic.CA1030.severity = none + +# CA1031: Do not catch general exception types +dotnet_diagnostic.CA1031.severity = none + +# CA1032: Implement standard exception constructors +dotnet_diagnostic.CA1032.severity = none + +# CA1033: Interface methods should be callable by child types +dotnet_diagnostic.CA1033.severity = none + +# CA1034: Nested types should not be visible +dotnet_diagnostic.CA1034.severity = none + +# CA1036: Override methods on comparable types +dotnet_diagnostic.CA1036.severity = none + +# CA1040: Avoid empty interfaces +dotnet_diagnostic.CA1040.severity = none + +# CA1041: Provide ObsoleteAttribute message +dotnet_diagnostic.CA1041.severity = none + +# CA1043: Use Integral Or String Argument For Indexers +dotnet_diagnostic.CA1043.severity = none + +# CA1044: Properties should not be write only +dotnet_diagnostic.CA1044.severity = none + +# CA1045: Do not pass types by reference +dotnet_diagnostic.CA1045.severity = none + +# CA1046: Do not overload equality operator on reference types +dotnet_diagnostic.CA1046.severity = none + +# CA1047: Do not declare protected member in sealed type +dotnet_diagnostic.CA1047.severity = warning + +# CA1050: Declare types in namespaces +dotnet_diagnostic.CA1050.severity = warning + +# CA1051: Do not declare visible instance fields +dotnet_diagnostic.CA1051.severity = none + +# CA1052: Static holder types should be Static or NotInheritable +dotnet_diagnostic.CA1052.severity = warning +dotnet_code_quality.CA1052.api_surface = private, internal + +# CA1054: URI-like parameters should not be strings +dotnet_diagnostic.CA1054.severity = none + +# CA1055: URI-like return values should not be strings +dotnet_diagnostic.CA1055.severity = none + +# CA1056: URI-like properties should not be strings +dotnet_diagnostic.CA1056.severity = none + +# CA1058: Types should not extend certain base types +dotnet_diagnostic.CA1058.severity = none + +# CA1060: Move pinvokes to native methods class +dotnet_diagnostic.CA1060.severity = none + +# CA1061: Do not hide base class methods +dotnet_diagnostic.CA1061.severity = none + +# CA1062: Validate arguments of public methods +dotnet_diagnostic.CA1062.severity = none + +# CA1063: Implement IDisposable Correctly +dotnet_diagnostic.CA1063.severity = none + +# CA1064: Exceptions should be public +dotnet_diagnostic.CA1064.severity = none + +# CA1065: Do not raise exceptions in unexpected locations +dotnet_diagnostic.CA1065.severity = none + +# CA1066: Implement IEquatable when overriding Object.Equals +dotnet_diagnostic.CA1066.severity = warning + +# CA1067: Override Object.Equals(object) when implementing IEquatable +dotnet_diagnostic.CA1067.severity = warning + +# CA1068: CancellationToken parameters must come last +dotnet_diagnostic.CA1068.severity = none + +# CA1069: Enums values should not be duplicated +dotnet_diagnostic.CA1069.severity = none + +# CA1070: Do not declare event fields as virtual +dotnet_diagnostic.CA1070.severity = suggestion + +# CA1200: Avoid using cref tags with a prefix +dotnet_diagnostic.CA1200.severity = suggestion + +# CA1303: Do not pass literals as localized parameters +dotnet_diagnostic.CA1303.severity = none + +# CA1304: Specify CultureInfo +dotnet_diagnostic.CA1304.severity = none + +# CA1305: Specify IFormatProvider +dotnet_diagnostic.CA1305.severity = none + +# CA1307: Specify StringComparison for clarity +dotnet_diagnostic.CA1307.severity = none + +# CA1308: Normalize strings to uppercase +dotnet_diagnostic.CA1308.severity = none + +# CA1309: Use ordinal string comparison +dotnet_diagnostic.CA1309.severity = none + +# CA1310: Specify StringComparison for correctness +dotnet_diagnostic.CA1310.severity = suggestion + +# CA1401: P/Invokes should not be visible +dotnet_diagnostic.CA1401.severity = warning + +# CA1416: Validate platform compatibility +dotnet_diagnostic.CA1416.severity = warning + +# CA1417: Do not use 'OutAttribute' on string parameters for P/Invokes +dotnet_diagnostic.CA1417.severity = warning + +# CA1418: Use valid platform string +dotnet_diagnostic.CA1418.severity = warning + +# CA1419: Provide a parameterless constructor that is as visible as the containing type for concrete types derived from 'System.Runtime.InteropServices.SafeHandle' +dotnet_diagnostic.CA1419.severity = warning + +# CA1501: Avoid excessive inheritance +dotnet_diagnostic.CA1501.severity = none + +# CA1502: Avoid excessive complexity +dotnet_diagnostic.CA1502.severity = none + +# CA1505: Avoid unmaintainable code +dotnet_diagnostic.CA1505.severity = none + +# CA1506: Avoid excessive class coupling +dotnet_diagnostic.CA1506.severity = none + +# CA1507: Use nameof to express symbol names +dotnet_diagnostic.CA1507.severity = warning + +# CA1508: Avoid dead conditional code +dotnet_diagnostic.CA1508.severity = none + +# CA1509: Invalid entry in code metrics rule specification file +dotnet_diagnostic.CA1509.severity = none + +# CA1700: Do not name enum values 'Reserved' +dotnet_diagnostic.CA1700.severity = none + +# CA1707: Identifiers should not contain underscores +dotnet_diagnostic.CA1707.severity = none + +# CA1708: Identifiers should differ by more than case +dotnet_diagnostic.CA1708.severity = none + +# CA1710: Identifiers should have correct suffix +dotnet_diagnostic.CA1710.severity = none + +# CA1711: Identifiers should not have incorrect suffix +dotnet_diagnostic.CA1711.severity = none + +# CA1712: Do not prefix enum values with type name +dotnet_diagnostic.CA1712.severity = none + +# CA1713: Events should not have 'Before' or 'After' prefix +dotnet_diagnostic.CA1713.severity = none + +# CA1715: Identifiers should have correct prefix +dotnet_diagnostic.CA1715.severity = none + +# CA1716: Identifiers should not match keywords +dotnet_diagnostic.CA1716.severity = none + +# CA1720: Identifier contains type name +dotnet_diagnostic.CA1720.severity = none + +# CA1721: Property names should not match get methods +dotnet_diagnostic.CA1721.severity = none + +# CA1724: Type names should not match namespaces +dotnet_diagnostic.CA1724.severity = none + +# CA1725: Parameter names should match base declaration +dotnet_diagnostic.CA1725.severity = suggestion + +# CA1727: Use PascalCase for named placeholders +dotnet_diagnostic.CA1727.severity = suggestion + +# CA1802: Use literals where appropriate +dotnet_diagnostic.CA1802.severity = warning +dotnet_code_quality.CA1802.api_surface = private, internal + +# CA1805: Do not initialize unnecessarily +dotnet_diagnostic.CA1805.severity = warning + +# CA1806: Do not ignore method results +dotnet_diagnostic.CA1806.severity = none + +# CA1810: Initialize reference type static fields inline +dotnet_diagnostic.CA1810.severity = warning + +# CA1812: Avoid uninstantiated internal classes +dotnet_diagnostic.CA1812.severity = none + +# CA1813: Avoid unsealed attributes +dotnet_diagnostic.CA1813.severity = none + +# CA1814: Prefer jagged arrays over multidimensional +dotnet_diagnostic.CA1814.severity = none + +# CA1815: Override equals and operator equals on value types +dotnet_diagnostic.CA1815.severity = none + +# CA1816: Dispose methods should call SuppressFinalize +dotnet_diagnostic.CA1816.severity = none + +# CA1819: Properties should not return arrays +dotnet_diagnostic.CA1819.severity = none + +# CA1820: Test for empty strings using string length +dotnet_diagnostic.CA1820.severity = none + +# CA1821: Remove empty Finalizers +dotnet_diagnostic.CA1821.severity = warning + +# CA1822: Mark members as static +dotnet_diagnostic.CA1822.severity = none + +# CA1823: Avoid unused private fields +dotnet_diagnostic.CA1823.severity = warning + +# CA1824: Mark assemblies with NeutralResourcesLanguageAttribute +dotnet_diagnostic.CA1824.severity = warning + +# CA1825: Avoid zero-length array allocations +dotnet_diagnostic.CA1825.severity = warning + +# CA1826: Do not use Enumerable methods on indexable collections +dotnet_diagnostic.CA1826.severity = warning + +# CA1827: Do not use Count() or LongCount() when Any() can be used +dotnet_diagnostic.CA1827.severity = warning + +# CA1828: Do not use CountAsync() or LongCountAsync() when AnyAsync() can be used +dotnet_diagnostic.CA1828.severity = warning + +# CA1829: Use Length/Count property instead of Count() when available +dotnet_diagnostic.CA1829.severity = warning + +# CA1830: Prefer strongly-typed Append and Insert method overloads on StringBuilder +dotnet_diagnostic.CA1830.severity = warning + +# CA1831: Use AsSpan or AsMemory instead of Range-based indexers when appropriate +dotnet_diagnostic.CA1831.severity = warning + +# CA1832: Use AsSpan or AsMemory instead of Range-based indexers when appropriate +dotnet_diagnostic.CA1832.severity = warning + +# CA1833: Use AsSpan or AsMemory instead of Range-based indexers when appropriate +dotnet_diagnostic.CA1833.severity = warning + +# CA1834: Consider using 'StringBuilder.Append(char)' when applicable +dotnet_diagnostic.CA1834.severity = warning + +# CA1835: Prefer the 'Memory'-based overloads for 'ReadAsync' and 'WriteAsync' +dotnet_diagnostic.CA1835.severity = warning + +# CA1836: Prefer IsEmpty over Count +dotnet_diagnostic.CA1836.severity = warning + +# CA1837: Use 'Environment.ProcessId' +dotnet_diagnostic.CA1837.severity = warning + +# CA1838: Avoid 'StringBuilder' parameters for P/Invokes +dotnet_diagnostic.CA1838.severity = warning + +# CA1839: Use 'Environment.ProcessPath' +dotnet_diagnostic.CA1839.severity = warning + +# CA1840: Use 'Environment.CurrentManagedThreadId' +dotnet_diagnostic.CA1840.severity = warning + +# CA1841: Prefer Dictionary.Contains methods +dotnet_diagnostic.CA1841.severity = warning + +# CA1842: Do not use 'WhenAll' with a single task +dotnet_diagnostic.CA1842.severity = warning + +# CA1843: Do not use 'WaitAll' with a single task +dotnet_diagnostic.CA1843.severity = warning + +# CA1844: Provide memory-based overrides of async methods when subclassing 'Stream' +dotnet_diagnostic.CA1844.severity = warning + +# CA1845: Use span-based 'string.Concat' +dotnet_diagnostic.CA1845.severity = warning + +# CA1846: Prefer 'AsSpan' over 'Substring' +dotnet_diagnostic.CA1846.severity = warning + +# CA1847: Use char literal for a single character lookup +dotnet_diagnostic.CA1847.severity = warning + +# CA1848: Use the LoggerMessage delegates +dotnet_diagnostic.CA1848.severity = none + +# CA1849: Call async methods when in an async method +dotnet_diagnostic.CA1849.severity = suggestion + +# CA1850: Prefer static 'HashData' method over 'ComputeHash' +dotnet_diagnostic.CA1850.severity = warning + +# CA2000: Dispose objects before losing scope +dotnet_diagnostic.CA2000.severity = none + +# CA2002: Do not lock on objects with weak identity +dotnet_diagnostic.CA2002.severity = none + +# CA2007: Consider calling ConfigureAwait on the awaited task +dotnet_diagnostic.CA2007.severity = warning + +# CA2008: Do not create tasks without passing a TaskScheduler +dotnet_diagnostic.CA2008.severity = warning + +# CA2009: Do not call ToImmutableCollection on an ImmutableCollection value +dotnet_diagnostic.CA2009.severity = warning + +# CA2011: Avoid infinite recursion +dotnet_diagnostic.CA2011.severity = warning + +# CA2012: Use ValueTasks correctly +dotnet_diagnostic.CA2012.severity = warning + +# CA2013: Do not use ReferenceEquals with value types +dotnet_diagnostic.CA2013.severity = warning + +# CA2014: Do not use stackalloc in loops +dotnet_diagnostic.CA2014.severity = warning + +# CA2015: Do not define finalizers for types derived from MemoryManager +dotnet_diagnostic.CA2015.severity = warning + +# CA2016: Forward the 'CancellationToken' parameter to methods +dotnet_diagnostic.CA2016.severity = warning + +# CA2017: Parameter count mismatch +dotnet_diagnostic.CA2017.severity = warning + +# CA2018: 'Buffer.BlockCopy' expects the number of bytes to be copied for the 'count' argument +dotnet_diagnostic.CA2018.severity = warning + +# CA2100: Review SQL queries for security vulnerabilities +dotnet_diagnostic.CA2100.severity = none + +# CA2101: Specify marshaling for P/Invoke string arguments +dotnet_diagnostic.CA2101.severity = none + +# CA2109: Review visible event handlers +dotnet_diagnostic.CA2109.severity = none + +# CA2119: Seal methods that satisfy private interfaces +dotnet_diagnostic.CA2119.severity = none + +# CA2153: Do Not Catch Corrupted State Exceptions +dotnet_diagnostic.CA2153.severity = none + +# CA2200: Rethrow to preserve stack details +dotnet_diagnostic.CA2200.severity = warning + +# CA2201: Do not raise reserved exception types +dotnet_diagnostic.CA2201.severity = none + +# CA2207: Initialize value type static fields inline +dotnet_diagnostic.CA2207.severity = warning + +# CA2208: Instantiate argument exceptions correctly +dotnet_diagnostic.CA2208.severity = warning +dotnet_code_quality.CA2208.api_surface = public + +# CA2211: Non-constant fields should not be visible +dotnet_diagnostic.CA2211.severity = none + +# CA2213: Disposable fields should be disposed +dotnet_diagnostic.CA2213.severity = none + +# CA2214: Do not call overridable methods in constructors +dotnet_diagnostic.CA2214.severity = none + +# CA2215: Dispose methods should call base class dispose +dotnet_diagnostic.CA2215.severity = none + +# CA2216: Disposable types should declare finalizer +dotnet_diagnostic.CA2216.severity = none + +# CA2217: Do not mark enums with FlagsAttribute +dotnet_diagnostic.CA2217.severity = none + +# CA2218: Override GetHashCode on overriding Equals +dotnet_diagnostic.CA2218.severity = none + +# CA2219: Do not raise exceptions in finally clauses +dotnet_diagnostic.CA2219.severity = none + +# CA2224: Override Equals on overloading operator equals +dotnet_diagnostic.CA2224.severity = none + +# CA2225: Operator overloads have named alternates +dotnet_diagnostic.CA2225.severity = none + +# CA2226: Operators should have symmetrical overloads +dotnet_diagnostic.CA2226.severity = none + +# CA2227: Collection properties should be read only +dotnet_diagnostic.CA2227.severity = none + +# CA2229: Implement serialization constructors +dotnet_diagnostic.CA2229.severity = warning + +# CA2231: Overload operator equals on overriding value type Equals +dotnet_diagnostic.CA2231.severity = none + +# CA2234: Pass system uri objects instead of strings +dotnet_diagnostic.CA2234.severity = none + +# CA2235: Mark all non-serializable fields +dotnet_diagnostic.CA2235.severity = none + +# CA2237: Mark ISerializable types with serializable +dotnet_diagnostic.CA2237.severity = none + +# CA2241: Provide correct arguments to formatting methods +dotnet_diagnostic.CA2241.severity = warning + +# CA2242: Test for NaN correctly +dotnet_diagnostic.CA2242.severity = warning + +# CA2243: Attribute string literals should parse correctly +dotnet_diagnostic.CA2243.severity = warning + +# CA2244: Do not duplicate indexed element initializations +dotnet_diagnostic.CA2244.severity = warning + +# CA2245: Do not assign a property to itself +dotnet_diagnostic.CA2245.severity = warning + +# CA2246: Assigning symbol and its member in the same statement +dotnet_diagnostic.CA2246.severity = warning + +# CA2247: Argument passed to TaskCompletionSource constructor should be TaskCreationOptions enum instead of TaskContinuationOptions enum +dotnet_diagnostic.CA2247.severity = warning + +# CA2248: Provide correct 'enum' argument to 'Enum.HasFlag' +dotnet_diagnostic.CA2248.severity = warning + +# CA2249: Consider using 'string.Contains' instead of 'string.IndexOf' +dotnet_diagnostic.CA2249.severity = warning + +# CA2250: Use 'ThrowIfCancellationRequested' +dotnet_diagnostic.CA2250.severity = warning + +# CA2251: Use 'string.Equals' +dotnet_diagnostic.CA2251.severity = warning + +# CA2252: This API requires opting into preview features +dotnet_diagnostic.CA2252.severity = error + +# CA2253: Named placeholders should not be numeric values +dotnet_diagnostic.CA2253.severity = none + +# CA2254: Template should be a static expression +dotnet_diagnostic.CA2254.severity = none + +# CA2255: The 'ModuleInitializer' attribute should not be used in libraries +dotnet_diagnostic.CA2255.severity = warning + +# CA2256: All members declared in parent interfaces must have an implementation in a DynamicInterfaceCastableImplementation-attributed interface +dotnet_diagnostic.CA2256.severity = warning + +# CA2257: Members defined on an interface with the 'DynamicInterfaceCastableImplementationAttribute' should be 'static' +dotnet_diagnostic.CA2257.severity = warning + +# CA2258: Providing a 'DynamicInterfaceCastableImplementation' interface in Visual Basic is unsupported +dotnet_diagnostic.CA2258.severity = warning + +# CA2300: Do not use insecure deserializer BinaryFormatter +dotnet_diagnostic.CA2300.severity = none + +# CA2301: Do not call BinaryFormatter.Deserialize without first setting BinaryFormatter.Binder +dotnet_diagnostic.CA2301.severity = none + +# CA2302: Ensure BinaryFormatter.Binder is set before calling BinaryFormatter.Deserialize +dotnet_diagnostic.CA2302.severity = none + +# CA2305: Do not use insecure deserializer LosFormatter +dotnet_diagnostic.CA2305.severity = none + +# CA2310: Do not use insecure deserializer NetDataContractSerializer +dotnet_diagnostic.CA2310.severity = none + +# CA2311: Do not deserialize without first setting NetDataContractSerializer.Binder +dotnet_diagnostic.CA2311.severity = none + +# CA2312: Ensure NetDataContractSerializer.Binder is set before deserializing +dotnet_diagnostic.CA2312.severity = none + +# CA2315: Do not use insecure deserializer ObjectStateFormatter +dotnet_diagnostic.CA2315.severity = none + +# CA2321: Do not deserialize with JavaScriptSerializer using a SimpleTypeResolver +dotnet_diagnostic.CA2321.severity = none + +# CA2322: Ensure JavaScriptSerializer is not initialized with SimpleTypeResolver before deserializing +dotnet_diagnostic.CA2322.severity = none + +# CA2326: Do not use TypeNameHandling values other than None +dotnet_diagnostic.CA2326.severity = none + +# CA2327: Do not use insecure JsonSerializerSettings +dotnet_diagnostic.CA2327.severity = none + +# CA2328: Ensure that JsonSerializerSettings are secure +dotnet_diagnostic.CA2328.severity = none + +# CA2329: Do not deserialize with JsonSerializer using an insecure configuration +dotnet_diagnostic.CA2329.severity = none + +# CA2330: Ensure that JsonSerializer has a secure configuration when deserializing +dotnet_diagnostic.CA2330.severity = none + +# CA2350: Do not use DataTable.ReadXml() with untrusted data +dotnet_diagnostic.CA2350.severity = none + +# CA2351: Do not use DataSet.ReadXml() with untrusted data +dotnet_diagnostic.CA2351.severity = none + +# CA2352: Unsafe DataSet or DataTable in serializable type can be vulnerable to remote code execution attacks +dotnet_diagnostic.CA2352.severity = none + +# CA2353: Unsafe DataSet or DataTable in serializable type +dotnet_diagnostic.CA2353.severity = none + +# CA2354: Unsafe DataSet or DataTable in deserialized object graph can be vulnerable to remote code execution attacks +dotnet_diagnostic.CA2354.severity = none + +# CA2355: Unsafe DataSet or DataTable type found in deserializable object graph +dotnet_diagnostic.CA2355.severity = none + +# CA2356: Unsafe DataSet or DataTable type in web deserializable object graph +dotnet_diagnostic.CA2356.severity = none + +# CA2361: Ensure auto-generated class containing DataSet.ReadXml() is not used with untrusted data +dotnet_diagnostic.CA2361.severity = none + +# CA2362: Unsafe DataSet or DataTable in auto-generated serializable type can be vulnerable to remote code execution attacks +dotnet_diagnostic.CA2362.severity = none + +# CA3001: Review code for SQL injection vulnerabilities +dotnet_diagnostic.CA3001.severity = none + +# CA3002: Review code for XSS vulnerabilities +dotnet_diagnostic.CA3002.severity = none + +# CA3003: Review code for file path injection vulnerabilities +dotnet_diagnostic.CA3003.severity = none + +# CA3004: Review code for information disclosure vulnerabilities +dotnet_diagnostic.CA3004.severity = none + +# CA3005: Review code for LDAP injection vulnerabilities +dotnet_diagnostic.CA3005.severity = none + +# CA3006: Review code for process command injection vulnerabilities +dotnet_diagnostic.CA3006.severity = none + +# CA3007: Review code for open redirect vulnerabilities +dotnet_diagnostic.CA3007.severity = none + +# CA3008: Review code for XPath injection vulnerabilities +dotnet_diagnostic.CA3008.severity = none + +# CA3009: Review code for XML injection vulnerabilities +dotnet_diagnostic.CA3009.severity = none + +# CA3010: Review code for XAML injection vulnerabilities +dotnet_diagnostic.CA3010.severity = none + +# CA3011: Review code for DLL injection vulnerabilities +dotnet_diagnostic.CA3011.severity = none + +# CA3012: Review code for regex injection vulnerabilities +dotnet_diagnostic.CA3012.severity = none + +# CA3061: Do Not Add Schema By URL +dotnet_diagnostic.CA3061.severity = warning + +# CA3075: Insecure DTD processing in XML +dotnet_diagnostic.CA3075.severity = warning + +# CA3076: Insecure XSLT script processing. +dotnet_diagnostic.CA3076.severity = warning + +# CA3077: Insecure Processing in API Design, XmlDocument and XmlTextReader +dotnet_diagnostic.CA3077.severity = warning + +# CA3147: Mark Verb Handlers With Validate Antiforgery Token +dotnet_diagnostic.CA3147.severity = warning + +# CA5350: Do Not Use Weak Cryptographic Algorithms +dotnet_diagnostic.CA5350.severity = warning + +# CA5351: Do Not Use Broken Cryptographic Algorithms +dotnet_diagnostic.CA5351.severity = warning + +# CA5358: Review cipher mode usage with cryptography experts +dotnet_diagnostic.CA5358.severity = none + +# CA5359: Do Not Disable Certificate Validation +dotnet_diagnostic.CA5359.severity = warning + +# CA5360: Do Not Call Dangerous Methods In Deserialization +dotnet_diagnostic.CA5360.severity = warning + +# CA5361: Do Not Disable SChannel Use of Strong Crypto +dotnet_diagnostic.CA5361.severity = warning + +# CA5362: Potential reference cycle in deserialized object graph +dotnet_diagnostic.CA5362.severity = none + +# CA5363: Do Not Disable Request Validation +dotnet_diagnostic.CA5363.severity = warning + +# CA5364: Do Not Use Deprecated Security Protocols +dotnet_diagnostic.CA5364.severity = warning + +# CA5365: Do Not Disable HTTP Header Checking +dotnet_diagnostic.CA5365.severity = warning + +# CA5366: Use XmlReader for 'DataSet.ReadXml()' +dotnet_diagnostic.CA5366.severity = none + +# CA5367: Do Not Serialize Types With Pointer Fields +dotnet_diagnostic.CA5367.severity = none + +# CA5368: Set ViewStateUserKey For Classes Derived From Page +dotnet_diagnostic.CA5368.severity = warning + +# CA5369: Use XmlReader for 'XmlSerializer.Deserialize()' +dotnet_diagnostic.CA5369.severity = none + +# CA5370: Use XmlReader for XmlValidatingReader constructor +dotnet_diagnostic.CA5370.severity = warning + +# CA5371: Use XmlReader for 'XmlSchema.Read()' +dotnet_diagnostic.CA5371.severity = none + +# CA5372: Use XmlReader for XPathDocument constructor +dotnet_diagnostic.CA5372.severity = none + +# CA5373: Do not use obsolete key derivation function +dotnet_diagnostic.CA5373.severity = warning + +# CA5374: Do Not Use XslTransform +dotnet_diagnostic.CA5374.severity = warning + +# CA5375: Do Not Use Account Shared Access Signature +dotnet_diagnostic.CA5375.severity = none + +# CA5376: Use SharedAccessProtocol HttpsOnly +dotnet_diagnostic.CA5376.severity = warning + +# CA5377: Use Container Level Access Policy +dotnet_diagnostic.CA5377.severity = warning + +# CA5378: Do not disable ServicePointManagerSecurityProtocols +dotnet_diagnostic.CA5378.severity = warning + +# CA5379: Ensure Key Derivation Function algorithm is sufficiently strong +dotnet_diagnostic.CA5379.severity = warning + +# CA5380: Do Not Add Certificates To Root Store +dotnet_diagnostic.CA5380.severity = warning + +# CA5381: Ensure Certificates Are Not Added To Root Store +dotnet_diagnostic.CA5381.severity = warning + +# CA5382: Use Secure Cookies In ASP.NET Core +dotnet_diagnostic.CA5382.severity = none + +# CA5383: Ensure Use Secure Cookies In ASP.NET Core +dotnet_diagnostic.CA5383.severity = none + +# CA5384: Do Not Use Digital Signature Algorithm (DSA) +dotnet_diagnostic.CA5384.severity = warning + +# CA5385: Use RivestīŋŊShamirīŋŊAdleman (RSA) Algorithm With Sufficient Key Size +dotnet_diagnostic.CA5385.severity = warning + +# CA5386: Avoid hardcoding SecurityProtocolType value +dotnet_diagnostic.CA5386.severity = none + +# CA5387: Do Not Use Weak Key Derivation Function With Insufficient Iteration Count +dotnet_diagnostic.CA5387.severity = none + +# CA5388: Ensure Sufficient Iteration Count When Using Weak Key Derivation Function +dotnet_diagnostic.CA5388.severity = none + +# CA5389: Do Not Add Archive Item's Path To The Target File System Path +dotnet_diagnostic.CA5389.severity = none + +# CA5390: Do not hard-code encryption key +dotnet_diagnostic.CA5390.severity = none + +# CA5391: Use antiforgery tokens in ASP.NET Core MVC controllers +dotnet_diagnostic.CA5391.severity = none + +# CA5392: Use DefaultDllImportSearchPaths attribute for P/Invokes +dotnet_diagnostic.CA5392.severity = none + +# CA5393: Do not use unsafe DllImportSearchPath value +dotnet_diagnostic.CA5393.severity = none + +# CA5394: Do not use insecure randomness +dotnet_diagnostic.CA5394.severity = none + +# CA5395: Miss HttpVerb attribute for action methods +dotnet_diagnostic.CA5395.severity = none + +# CA5396: Set HttpOnly to true for HttpCookie +dotnet_diagnostic.CA5396.severity = none + +# CA5397: Do not use deprecated SslProtocols values +dotnet_diagnostic.CA5397.severity = none + +# CA5398: Avoid hardcoded SslProtocols values +dotnet_diagnostic.CA5398.severity = none + +# CA5399: HttpClients should enable certificate revocation list checks +dotnet_diagnostic.CA5399.severity = none + +# CA5400: Ensure HttpClient certificate revocation list check is not disabled +dotnet_diagnostic.CA5400.severity = none + +# CA5401: Do not use CreateEncryptor with non-default IV +dotnet_diagnostic.CA5401.severity = none + +# CA5402: Use CreateEncryptor with the default IV +dotnet_diagnostic.CA5402.severity = none + +# CA5403: Do not hard-code certificate +dotnet_diagnostic.CA5403.severity = none + +# CA5404: Do not disable token validation checks +dotnet_diagnostic.CA5404.severity = none + +# CA5405: Do not always skip token validation in delegates +dotnet_diagnostic.CA5405.severity = none + +# IL3000: Avoid using accessing Assembly file path when publishing as a single-file +dotnet_diagnostic.IL3000.severity = warning + +# IL3001: Avoid using accessing Assembly file path when publishing as a single-file +dotnet_diagnostic.IL3001.severity = warning + +# IL3002: Using member with RequiresAssemblyFilesAttribute can break functionality when embedded in a single-file app +dotnet_diagnostic.IL3002.severity = warning + +# SA0001: XML comments +dotnet_diagnostic.SA0001.severity = none + +# SA1000: Spacing around keywords +dotnet_diagnostic.SA1000.severity = warning + +# SA1001: Commas should not be preceded by whitespace +dotnet_diagnostic.SA1001.severity = warning + +# SA1002: Semicolons should not be preceded by a space +dotnet_diagnostic.SA1002.severity = none + +# SA1003: Operator should not appear at the end of a line +dotnet_diagnostic.SA1003.severity = none + +# SA1004: Documentation line should begin with a space +dotnet_diagnostic.SA1004.severity = none + +# SA1005: Single line comment should begin with a space +dotnet_diagnostic.SA1005.severity = none + +# SA1008: Opening parenthesis should not be preceded by a space +dotnet_diagnostic.SA1008.severity = none + +# SA1009: Closing parenthesis should not be followed by a space +dotnet_diagnostic.SA1009.severity = none + +# SA1010: Opening square brackets should not be preceded by a space +dotnet_diagnostic.SA1010.severity = none + +# SA1011: Closing square bracket should be followed by a space +dotnet_diagnostic.SA1011.severity = none + +# SA1012: Opening brace should be followed by a space +dotnet_diagnostic.SA1012.severity = none + +# SA1013: Closing brace should be preceded by a space +dotnet_diagnostic.SA1013.severity = none + +# SA1014: Opening generic brackets should not be preceded by a space +dotnet_diagnostic.SA1014.severity = warning + +# SA1015: Closing generic bracket should not be followed by a space +dotnet_diagnostic.SA1015.severity = none + +# SA1018: Nullable type symbol should not be preceded by a space +dotnet_diagnostic.SA1018.severity = warning + +# SA1020: Increment symbol should not be preceded by a space +dotnet_diagnostic.SA1020.severity = warning + +# SA1021: Negative sign should be preceded by a space +dotnet_diagnostic.SA1021.severity = none + +# SA1023: Dereference symbol '*' should not be preceded by a space." +dotnet_diagnostic.SA1023.severity = none + +# SA1024: Colon should be followed by a space +dotnet_diagnostic.SA1024.severity = none + +# SA1025: Code should not contain multiple whitespace characters in a row +dotnet_diagnostic.SA1025.severity = none + +# SA1026: Keyword followed by span or blank line +dotnet_diagnostic.SA1026.severity = warning + +# SA1027: Tabs and spaces should be used correctly +dotnet_diagnostic.SA1027.severity = warning + +# SA1028: Code should not contain trailing whitespace +dotnet_diagnostic.SA1028.severity = warning + +# SA1100: Do not prefix calls with base unless local implementation exists +dotnet_diagnostic.SA1100.severity = none + +# SA1101: Prefix local calls with this +dotnet_diagnostic.SA1101.severity = none + +# SA1102: Query clause should follow previous clause +dotnet_diagnostic.SA1102.severity = warning + +# SA1105: Query clauses spanning multiple lines should begin on own line +dotnet_diagnostic.SA1105.severity = warning + +# SA1106: Code should not contain empty statements +dotnet_diagnostic.SA1106.severity = none + +# SA1107: Code should not contain multiple statements on one line +dotnet_diagnostic.SA1107.severity = none + +# SA1108: Block statements should not contain embedded comments +dotnet_diagnostic.SA1108.severity = none + +# SA1110: Opening parenthesis or bracket should be on declaration line +dotnet_diagnostic.SA1110.severity = none + +# SA1111: Closing parenthesis should be on line of last parameter +dotnet_diagnostic.SA1111.severity = none + +# SA1113: Comma should be on the same line as previous parameter +dotnet_diagnostic.SA1113.severity = warning + +# SA1114: Parameter list should follow declaration +dotnet_diagnostic.SA1114.severity = none + +# SA1115: Parameter should begin on the line after the previous parameter +dotnet_diagnostic.SA1115.severity = warning + +# SA1116: Split parameters should start on line after declaration +dotnet_diagnostic.SA1116.severity = none + +# SA1117: Parameters should be on same line or separate lines +dotnet_diagnostic.SA1117.severity = none + +# SA1118: Parameter should not span multiple lines +dotnet_diagnostic.SA1118.severity = none + +# SA1119: Statement should not use unnecessary parenthesis +dotnet_diagnostic.SA1119.severity = none + +# SA1120: Comments should contain text +dotnet_diagnostic.SA1120.severity = none + +# SA1121: Use built-in type alias +dotnet_diagnostic.SA1121.severity = warning + +# SA1122: Use string.Empty for empty strings +dotnet_diagnostic.SA1122.severity = none + +# SA1123: Region should not be located within a code element +dotnet_diagnostic.SA1123.severity = none + +# SA1124: Do not use regions +dotnet_diagnostic.SA1124.severity = none + +# SA1125: Use shorthand for nullable types +dotnet_diagnostic.SA1125.severity = none + +# SA1127: Generic type constraints should be on their own line +dotnet_diagnostic.SA1127.severity = none + +# SA1128: Put constructor initializers on their own line +dotnet_diagnostic.SA1128.severity = none + +# SA1129: Do not use default value type constructor +dotnet_diagnostic.SA1129.severity = warning + +# SA1130: Use lambda syntax +dotnet_diagnostic.SA1130.severity = none + +# SA1131: Constant values should appear on the right-hand side of comparisons +dotnet_diagnostic.SA1131.severity = none + +# SA1132: Do not combine fields +dotnet_diagnostic.SA1132.severity = none + +# SA1133: Do not combine attributes +dotnet_diagnostic.SA1133.severity = none + +# SA1134: Each attribute should be placed on its own line of code +dotnet_diagnostic.SA1134.severity = none + +# SA1135: Using directive should be qualified +dotnet_diagnostic.SA1135.severity = none + +# SA1136: Enum values should be on separate lines +dotnet_diagnostic.SA1136.severity = none + +# SA1137: Elements should have the same indentation +dotnet_diagnostic.SA1137.severity = none + +# SA1139: Use literal suffix notation instead of casting +dotnet_diagnostic.SA1139.severity = none + +# SA1141: Use tuple syntax +dotnet_diagnostic.SA1141.severity = warning + +# SA1142: Refer to tuple elements by name +dotnet_diagnostic.SA1142.severity = warning + +# SA1200: Using directive should appear within a namespace declaration +dotnet_diagnostic.SA1200.severity = none + +# SA1201: Elements should appear in the correct order +dotnet_diagnostic.SA1201.severity = none + +# SA1202: Elements should be ordered by access +dotnet_diagnostic.SA1202.severity = none + +# SA1203: Constants should appear before fields +dotnet_diagnostic.SA1203.severity = none + +# SA1204: Static elements should appear before instance elements +dotnet_diagnostic.SA1204.severity = none + +# SA1205: Partial elements should declare an access modifier +dotnet_diagnostic.SA1205.severity = warning + +# SA1206: Keyword ordering +dotnet_diagnostic.SA1206.severity = warning + +# SA1208: Using directive ordering +dotnet_diagnostic.SA1208.severity = none + +# SA1209: Using alias directives should be placed after all using namespace directives +dotnet_diagnostic.SA1209.severity = none + +# SA1210: Using directives should be ordered alphabetically by the namespaces +dotnet_diagnostic.SA1210.severity = none + +# SA1211: Using alias directive ordering +dotnet_diagnostic.SA1211.severity = none + +# SA1212: A get accessor appears after a set accessor within a property or indexer +dotnet_diagnostic.SA1212.severity = warning + +# SA1214: Readonly fields should appear before non-readonly fields +dotnet_diagnostic.SA1214.severity = none + +# SA1216: Using static directives should be placed at the correct location +dotnet_diagnostic.SA1216.severity = none + +# SA1300: Element should begin with an uppercase letter +dotnet_diagnostic.SA1300.severity = none + +# SA1302: Interface names should begin with I +dotnet_diagnostic.SA1302.severity = warning + +# SA1303: Const field names should begin with upper-case letter +dotnet_diagnostic.SA1303.severity = none + +# SA1304: Non-private readonly fields should begin with upper-case letter +dotnet_diagnostic.SA1304.severity = none + +# SA1306: Field should begin with lower-case letter +dotnet_diagnostic.SA1306.severity = none + +# SA1307: Field should begin with upper-case letter +dotnet_diagnostic.SA1307.severity = none + +# SA1308: Field should not begin with the prefix 's_' +dotnet_diagnostic.SA1308.severity = none + +# SA1309: Field names should not begin with underscore +dotnet_diagnostic.SA1309.severity = none + +# SA1310: Field should not contain an underscore +dotnet_diagnostic.SA1310.severity = none + +# SA1311: Static readonly fields should begin with upper-case letter +dotnet_diagnostic.SA1311.severity = none + +# SA1312: Variable should begin with lower-case letter +dotnet_diagnostic.SA1312.severity = none + +# SA1313: Parameter should begin with lower-case letter +dotnet_diagnostic.SA1313.severity = none + +# SA1314: Type parameter names should begin with T +dotnet_diagnostic.SA1314.severity = none + +# SA1316: Tuple element names should use correct casing +dotnet_diagnostic.SA1316.severity = none + +# SA1400: Member should declare an access modifier +dotnet_diagnostic.SA1400.severity = warning + +# SA1401: Fields should be private +dotnet_diagnostic.SA1401.severity = none + +# SA1402: File may only contain a single type +dotnet_diagnostic.SA1402.severity = none + +# SA1403: File may only contain a single namespace +dotnet_diagnostic.SA1403.severity = none + +# SA1404: Code analysis suppression should have justification +dotnet_diagnostic.SA1404.severity = warning + +# SA1405: Debug.Assert should provide message text +dotnet_diagnostic.SA1405.severity = none + +# SA1407: Arithmetic expressions should declare precedence +dotnet_diagnostic.SA1407.severity = none + +# SA1408: Conditional expressions should declare precedence +dotnet_diagnostic.SA1408.severity = none + +# SA1410: Remove delegate parentheses when possible +dotnet_diagnostic.SA1410.severity = warning + +# SA1411: Attribute constructor shouldn't use unnecessary parenthesis +dotnet_diagnostic.SA1411.severity = warning + +# SA1413: Use trailing comma in multi-line initializers +dotnet_diagnostic.SA1413.severity = none + +# SA1414: Tuple types in signatures should have element names +dotnet_diagnostic.SA1414.severity = none + +# SA1500: Braces for multi-line statements should not share line +dotnet_diagnostic.SA1500.severity = none + +# SA1501: Statement should not be on a single line +dotnet_diagnostic.SA1501.severity = none + +# SA1502: Element should not be on a single line +dotnet_diagnostic.SA1502.severity = none + +# SA1503: Braces should not be omitted +dotnet_diagnostic.SA1503.severity = none + +# SA1504: All accessors should be single-line or multi-line +dotnet_diagnostic.SA1504.severity = none + +# SA1505: An opening brace should not be followed by a blank line +dotnet_diagnostic.SA1505.severity = none + +# SA1506: Element documentation headers should not be followed by blank line +dotnet_diagnostic.SA1506.severity = none + +# SA1507: Code should not contain multiple blank lines in a row +dotnet_diagnostic.SA1507.severity = none + +# SA1508: A closing brace should not be preceded by a blank line +dotnet_diagnostic.SA1508.severity = none + +# SA1509: Opening braces should not be preceded by blank line +dotnet_diagnostic.SA1509.severity = none + +# SA1510: 'else' statement should not be preceded by a blank line +dotnet_diagnostic.SA1510.severity = none + +# SA1512: Single-line comments should not be followed by blank line +dotnet_diagnostic.SA1512.severity = none + +# SA1513: Closing brace should be followed by blank line +dotnet_diagnostic.SA1513.severity = none + +# SA1514: Element documentation header should be preceded by blank line +dotnet_diagnostic.SA1514.severity = none + +# SA1515: Single-line comment should be preceded by blank line +dotnet_diagnostic.SA1515.severity = none + +# SA1516: Elements should be separated by blank line +dotnet_diagnostic.SA1516.severity = none + +# SA1517: Code should not contain blank lines at start of file +dotnet_diagnostic.SA1517.severity = warning + +# SA1518: Code should not contain blank lines at the end of the file +dotnet_diagnostic.SA1518.severity = warning + +# SA1519: Braces should not be omitted from multi-line child statement +dotnet_diagnostic.SA1519.severity = none + +# SA1520: Use braces consistently +dotnet_diagnostic.SA1520.severity = none + +# SA1600: Elements should be documented +dotnet_diagnostic.SA1600.severity = none + +# SA1601: Partial elements should be documented +dotnet_diagnostic.SA1601.severity = none + +# SA1602: Enumeration items should be documented +dotnet_diagnostic.SA1602.severity = none + +# SA1604: Element documentation should have summary +dotnet_diagnostic.SA1604.severity = none + +# SA1605: Partial element documentation should have summary +dotnet_diagnostic.SA1605.severity = none + +# SA1606: Element documentation should have summary text +dotnet_diagnostic.SA1606.severity = none + +# SA1608: Element documentation should not have default summary +dotnet_diagnostic.SA1608.severity = none + +# SA1610: Property documentation should have value text +dotnet_diagnostic.SA1610.severity = none + +# SA1611: The documentation for parameter 'message' is missing +dotnet_diagnostic.SA1611.severity = none + +# SA1612: The parameter documentation is at incorrect position +dotnet_diagnostic.SA1612.severity = none + +# SA1614: Element parameter documentation should have text +dotnet_diagnostic.SA1614.severity = none + +# SA1615: Element return value should be documented +dotnet_diagnostic.SA1615.severity = none + +# SA1616: Element return value documentation should have text +dotnet_diagnostic.SA1616.severity = none + +# SA1618: The documentation for type parameter is missing +dotnet_diagnostic.SA1618.severity = none + +# SA1619: The documentation for type parameter is missing +dotnet_diagnostic.SA1619.severity = none + +# SA1622: Generic type parameter documentation should have text +dotnet_diagnostic.SA1622.severity = none + +# SA1623: Property documentation text +dotnet_diagnostic.SA1623.severity = none + +# SA1624: Because the property only contains a visible get accessor, the documentation summary text should begin with 'Gets' +dotnet_diagnostic.SA1624.severity = none + +# SA1625: Element documentation should not be copied and pasted +dotnet_diagnostic.SA1625.severity = none + +# SA1626: Single-line comments should not use documentation style slashes +dotnet_diagnostic.SA1626.severity = none + +# SA1627: The documentation text within the \'exception\' tag should not be empty +dotnet_diagnostic.SA1627.severity = none + +# SA1629: Documentation text should end with a period +dotnet_diagnostic.SA1629.severity = none + +# SA1633: File should have header +dotnet_diagnostic.SA1633.severity = none + +# SA1642: Constructor summary documentation should begin with standard text +dotnet_diagnostic.SA1642.severity = none + +# SA1643: Destructor summary documentation should begin with standard text +dotnet_diagnostic.SA1643.severity = none + +# SA1649: File name should match first type name +dotnet_diagnostic.SA1649.severity = none + +# IDE0001: Simplify name +dotnet_diagnostic.IDE0001.severity = suggestion + +# IDE0002: Simplify member access +dotnet_diagnostic.IDE0002.severity = suggestion + +# IDE0003: Remove this or Me qualification +dotnet_diagnostic.IDE0003.severity = suggestion + +# IDE0004: Remove Unnecessary Cast +dotnet_diagnostic.IDE0004.severity = suggestion + +# IDE0005: Using directive is unnecessary. +dotnet_diagnostic.IDE0005.severity = suggestion + +# IDE0007: Use implicit type +dotnet_diagnostic.IDE0007.severity = silent + +# IDE0008: Use explicit type +dotnet_diagnostic.IDE0008.severity = suggestion + +# IDE0009: Add this or Me qualification +dotnet_diagnostic.IDE0009.severity = silent + +# IDE0010: Add missing cases +dotnet_diagnostic.IDE0010.severity = silent + +# IDE0011: Add braces +dotnet_diagnostic.IDE0011.severity = silent + +# IDE0016: Use 'throw' expression +dotnet_diagnostic.IDE0016.severity = silent + +# IDE0017: Simplify object initialization +dotnet_diagnostic.IDE0017.severity = suggestion + +# IDE0018: Inline variable declaration +dotnet_diagnostic.IDE0018.severity = suggestion + +# IDE0019: Use pattern matching to avoid as followed by a null check +dotnet_diagnostic.IDE0019.severity = suggestion + +# IDE0020: Use pattern matching to avoid is check followed by a cast (with variable) +dotnet_diagnostic.IDE0020.severity = suggestion + +# IDE0021: Use expression body for constructors +dotnet_diagnostic.IDE0021.severity = silent + +# IDE0022: Use expression body for methods +dotnet_diagnostic.IDE0022.severity = silent + +# IDE0023: Use expression body for operators +dotnet_diagnostic.IDE0023.severity = silent + +# IDE0024: Use expression body for operators +dotnet_diagnostic.IDE0024.severity = silent + +# IDE0025: Use expression body for properties +dotnet_diagnostic.IDE0025.severity = silent + +# IDE0026: Use expression body for indexers +dotnet_diagnostic.IDE0026.severity = silent + +# IDE0027: Use expression body for accessors +dotnet_diagnostic.IDE0027.severity = silent + +# IDE0028: Simplify collection initialization +dotnet_diagnostic.IDE0028.severity = suggestion + +# IDE0029: Use coalesce expression +dotnet_diagnostic.IDE0029.severity = suggestion + +# IDE0030: Use coalesce expression +dotnet_diagnostic.IDE0030.severity = suggestion + +# IDE0031: Use null propagation +dotnet_diagnostic.IDE0031.severity = silent + +# IDE0032: Use auto property +dotnet_diagnostic.IDE0032.severity = silent + +# IDE0033: Use explicitly provided tuple name +dotnet_diagnostic.IDE0033.severity = suggestion + +# IDE0034: Simplify 'default' expression +dotnet_diagnostic.IDE0034.severity = suggestion + +# IDE0035: Remove unreachable code +dotnet_diagnostic.IDE0035.severity = suggestion + +# IDE0036: Order modifiers +dotnet_diagnostic.IDE0036.severity = suggestion + +# IDE0037: Use inferred member name +dotnet_diagnostic.IDE0037.severity = silent + +# IDE0038: Use pattern matching to avoid is check followed by a cast (without variable) +dotnet_diagnostic.IDE0038.severity = suggestion + +# IDE0039: Use local function +dotnet_diagnostic.IDE0039.severity = suggestion + +# IDE0040: Add accessibility modifiers +dotnet_diagnostic.IDE0040.severity = suggestion + +# IDE0041: Use 'is null' check +dotnet_diagnostic.IDE0041.severity = warning + +# IDE0042: Deconstruct variable declaration +dotnet_diagnostic.IDE0042.severity = silent + +# IDE0043: Invalid format string +dotnet_diagnostic.IDE0043.severity = warning + +# IDE0044: Add readonly modifier +dotnet_diagnostic.IDE0044.severity = suggestion + +# IDE0045: Use conditional expression for assignment +dotnet_diagnostic.IDE0045.severity = suggestion + +# IDE0046: Use conditional expression for return +dotnet_diagnostic.IDE0046.severity = suggestion + +# IDE0047: Remove unnecessary parentheses +dotnet_diagnostic.IDE0047.severity = silent + +# IDE0048: Add parentheses for clarity +dotnet_diagnostic.IDE0048.severity = silent + +# IDE0049: Use language keywords instead of framework type names for type references +dotnet_diagnostic.IDE0049.severity = warning + +# IDE0050: Convert anonymous type to tuple +dotnet_diagnostic.IDE0050.severity = suggestion + +# IDE0051: Remove unused private members +dotnet_diagnostic.IDE0051.severity = suggestion + +# IDE0052: Remove unread private members +dotnet_diagnostic.IDE0052.severity = suggestion + +# IDE0053: Use expression body for lambdas +dotnet_diagnostic.IDE0053.severity = silent + +# IDE0054: Use compound assignment +dotnet_diagnostic.IDE0054.severity = suggestion + +# IDE0055: Fix formatting +dotnet_diagnostic.IDE0055.severity = suggestion + +# IDE0056: Use index operator +dotnet_diagnostic.IDE0056.severity = suggestion + +# IDE0057: Use range operator +dotnet_diagnostic.IDE0057.severity = suggestion + +# IDE0058: Expression value is never used +dotnet_diagnostic.IDE0058.severity = silent + +# IDE0059: Unnecessary assignment of a value +dotnet_diagnostic.IDE0059.severity = warning + +# IDE0060: Remove unused parameter +dotnet_diagnostic.IDE0060.severity = silent + +# IDE0061: Use expression body for local functions +dotnet_diagnostic.IDE0061.severity = silent + +# IDE0062: Make local function 'static' +dotnet_diagnostic.IDE0062.severity = warning + +# IDE0063: Use simple 'using' statement +dotnet_diagnostic.IDE0063.severity = silent + +# IDE0064: Make readonly fields writable +dotnet_diagnostic.IDE0064.severity = silent + +# IDE0065: Misplaced using directive +dotnet_diagnostic.IDE0065.severity = suggestion + +# IDE0066: Convert switch statement to expression +dotnet_diagnostic.IDE0066.severity = suggestion + +# IDE0070: Use 'System.HashCode' +dotnet_diagnostic.IDE0070.severity = suggestion + +# IDE0071: Simplify interpolation +dotnet_diagnostic.IDE0071.severity = suggestion + +# IDE0072: Add missing cases +dotnet_diagnostic.IDE0072.severity = silent + +# IDE0073: The file header is missing or not located at the top of the file +dotnet_diagnostic.IDE0073.severity = warning + +# IDE0074: Use compound assignment +dotnet_diagnostic.IDE0074.severity = suggestion + +# IDE0075: Simplify conditional expression +dotnet_diagnostic.IDE0075.severity = silent + +# IDE0076: Invalid global 'SuppressMessageAttribute' +dotnet_diagnostic.IDE0076.severity = warning + +# IDE0077: Avoid legacy format target in 'SuppressMessageAttribute' +dotnet_diagnostic.IDE0077.severity = silent + +# IDE0078: Use pattern matching +dotnet_diagnostic.IDE0078.severity = suggestion + +# IDE0079: Remove unnecessary suppression +dotnet_diagnostic.IDE0079.severity = suggestion + +# IDE0080: Remove unnecessary suppression operator +dotnet_diagnostic.IDE0080.severity = warning + +# IDE0081: Remove unnecessary suppression operator +dotnet_diagnostic.IDE0081.severity = none + +# IDE0082: 'typeof' can be converted to 'nameof' +dotnet_diagnostic.IDE0082.severity = warning + +# IDE0083: Use pattern matching +dotnet_diagnostic.IDE0083.severity = silent + +# IDE0084: Use pattern matching (IsNot operator) +dotnet_diagnostic.IDE0084.severity = none + +# IDE0090: Use 'new(...)' +dotnet_diagnostic.IDE0090.severity = silent + +# IDE0100: Remove redundant equality +dotnet_diagnostic.IDE0100.severity = suggestion + +# IDE0110: Remove unnecessary discard +dotnet_diagnostic.IDE0110.severity = suggestion + +# IDE0120: Simplify LINQ expression +dotnet_diagnostic.IDE0120.severity = none + +# IDE0130: Namespace does not match folder structure +dotnet_diagnostic.IDE0130.severity = silent + +# IDE0140: Simplify object creation +dotnet_diagnostic.IDE0140.severity = none + +# IDE0150: Prefer 'null' check over type check +dotnet_diagnostic.IDE0150.severity = silent + +# IDE0160: Convert to block scoped namespace +dotnet_diagnostic.IDE0160.severity = silent + +# IDE0161: Convert to file-scoped namespace +dotnet_diagnostic.IDE0161.severity = silent + +# IDE1005: Delegate invocation can be simplified. +dotnet_diagnostic.IDE1005.severity = suggestion + +# IDE1006: Naming styles +dotnet_diagnostic.IDE1006.severity = silent + +# IDE2000: Allow multiple blank lines +dotnet_diagnostic.IDE2000.severity = silent + +# IDE2001: Embedded statements must be on their own line +dotnet_diagnostic.IDE2001.severity = silent + +# IDE2002: Consecutive braces must not have blank line between them +dotnet_diagnostic.IDE2002.severity = silent + +# IDE2003: Allow statement immediately after block +dotnet_diagnostic.IDE2003.severity = silent + +# IDE2004: Blank line not allowed after constructor initializer colon +dotnet_diagnostic.IDE2004.severity = silent + +# !!! OVERRIDES +# !!! Note: It is preferred to minimize the overrides if possible (just to follow the MS dotnet convention as much as possible) + +# IDE0008: Use explicit type +dotnet_diagnostic.IDE0008.severity = none diff --git a/lang/csharp/CodeAnalysis.test.globalconfig b/lang/csharp/CodeAnalysis.test.globalconfig new file mode 100644 index 00000000000..ffb541fa360 --- /dev/null +++ b/lang/csharp/CodeAnalysis.test.globalconfig @@ -0,0 +1,1729 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# !!! Original: https://github.com/dotnet/runtime/blob/main/eng/CodeAnalysis.test.globalconfig +# !!! Any overrides should be added to the end of the file + +is_global = true + +# AD0001: Analyzer threw an exception +dotnet_diagnostic.AD0001.severity = none + +# BCL0001: Ensure minimum API surface is respected +dotnet_diagnostic.BCL0001.severity = none + +# BCL0010: AppContext default value expected to be true +dotnet_diagnostic.BCL0010.severity = none + +# BCL0011: AppContext default value defined in if statement with incorrect pattern +dotnet_diagnostic.BCL0011.severity = none + +# BCL0012: AppContext default value defined in if statement at root of switch case +dotnet_diagnostic.BCL0012.severity = none + +# BCL0015: Invalid P/Invoke call +dotnet_diagnostic.BCL0015.severity = none + +# BCL0020: Invalid SR.Format call +dotnet_diagnostic.BCL0020.severity = none + +# CA1000: Do not declare static members on generic types +dotnet_diagnostic.CA1000.severity = none + +# CA1001: Types that own disposable fields should be disposable +dotnet_diagnostic.CA1001.severity = none + +# CA1002: Do not expose generic lists +dotnet_diagnostic.CA1002.severity = none + +# CA1003: Use generic event handler instances +dotnet_diagnostic.CA1003.severity = none + +# CA1005: Avoid excessive parameters on generic types +dotnet_diagnostic.CA1005.severity = none + +# CA1008: Enums should have zero value +dotnet_diagnostic.CA1008.severity = none + +# CA1010: Generic interface should also be implemented +dotnet_diagnostic.CA1010.severity = none + +# CA1012: Abstract types should not have public constructors +dotnet_diagnostic.CA1012.severity = none + +# CA1014: Mark assemblies with CLSCompliant +dotnet_diagnostic.CA1014.severity = none + +# CA1016: Mark assemblies with assembly version +dotnet_diagnostic.CA1016.severity = none + +# CA1017: Mark assemblies with ComVisible +dotnet_diagnostic.CA1017.severity = none + +# CA1018: Mark attributes with AttributeUsageAttribute +dotnet_diagnostic.CA1018.severity = none + +# CA1019: Define accessors for attribute arguments +dotnet_diagnostic.CA1019.severity = none + +# CA1021: Avoid out parameters +dotnet_diagnostic.CA1021.severity = none + +# CA1024: Use properties where appropriate +dotnet_diagnostic.CA1024.severity = none + +# CA1027: Mark enums with FlagsAttribute +dotnet_diagnostic.CA1027.severity = none + +# CA1028: Enum Storage should be Int32 +dotnet_diagnostic.CA1028.severity = none + +# CA1030: Use events where appropriate +dotnet_diagnostic.CA1030.severity = none + +# CA1031: Do not catch general exception types +dotnet_diagnostic.CA1031.severity = none + +# CA1032: Implement standard exception constructors +dotnet_diagnostic.CA1032.severity = none + +# CA1033: Interface methods should be callable by child types +dotnet_diagnostic.CA1033.severity = none + +# CA1034: Nested types should not be visible +dotnet_diagnostic.CA1034.severity = none + +# CA1036: Override methods on comparable types +dotnet_diagnostic.CA1036.severity = none + +# CA1040: Avoid empty interfaces +dotnet_diagnostic.CA1040.severity = none + +# CA1041: Provide ObsoleteAttribute message +dotnet_diagnostic.CA1041.severity = none + +# CA1043: Use Integral Or String Argument For Indexers +dotnet_diagnostic.CA1043.severity = none + +# CA1044: Properties should not be write only +dotnet_diagnostic.CA1044.severity = none + +# CA1045: Do not pass types by reference +dotnet_diagnostic.CA1045.severity = none + +# CA1046: Do not overload equality operator on reference types +dotnet_diagnostic.CA1046.severity = none + +# CA1047: Do not declare protected member in sealed type +dotnet_diagnostic.CA1047.severity = none + +# CA1050: Declare types in namespaces +dotnet_diagnostic.CA1050.severity = none + +# CA1051: Do not declare visible instance fields +dotnet_diagnostic.CA1051.severity = none + +# CA1052: Static holder types should be Static or NotInheritable +dotnet_diagnostic.CA1052.severity = none + +# CA1054: URI-like parameters should not be strings +dotnet_diagnostic.CA1054.severity = none + +# CA1055: URI-like return values should not be strings +dotnet_diagnostic.CA1055.severity = none + +# CA1056: URI-like properties should not be strings +dotnet_diagnostic.CA1056.severity = none + +# CA1058: Types should not extend certain base types +dotnet_diagnostic.CA1058.severity = none + +# CA1060: Move pinvokes to native methods class +dotnet_diagnostic.CA1060.severity = none + +# CA1061: Do not hide base class methods +dotnet_diagnostic.CA1061.severity = none + +# CA1062: Validate arguments of public methods +dotnet_diagnostic.CA1062.severity = none + +# CA1063: Implement IDisposable Correctly +dotnet_diagnostic.CA1063.severity = none + +# CA1064: Exceptions should be public +dotnet_diagnostic.CA1064.severity = none + +# CA1065: Do not raise exceptions in unexpected locations +dotnet_diagnostic.CA1065.severity = none + +# CA1066: Implement IEquatable when overriding Object.Equals +dotnet_diagnostic.CA1066.severity = none + +# CA1067: Override Object.Equals(object) when implementing IEquatable +dotnet_diagnostic.CA1067.severity = none + +# CA1068: CancellationToken parameters must come last +dotnet_diagnostic.CA1068.severity = none + +# CA1069: Enums values should not be duplicated +dotnet_diagnostic.CA1069.severity = none + +# CA1070: Do not declare event fields as virtual +dotnet_diagnostic.CA1070.severity = none + +# CA1200: Avoid using cref tags with a prefix +dotnet_diagnostic.CA1200.severity = none + +# CA1303: Do not pass literals as localized parameters +dotnet_diagnostic.CA1303.severity = none + +# CA1304: Specify CultureInfo +dotnet_diagnostic.CA1304.severity = none + +# CA1305: Specify IFormatProvider +dotnet_diagnostic.CA1305.severity = none + +# CA1307: Specify StringComparison for clarity +dotnet_diagnostic.CA1307.severity = none + +# CA1308: Normalize strings to uppercase +dotnet_diagnostic.CA1308.severity = none + +# CA1309: Use ordinal string comparison +dotnet_diagnostic.CA1309.severity = none + +# CA1310: Specify StringComparison for correctness +dotnet_diagnostic.CA1310.severity = none + +# CA1401: P/Invokes should not be visible +dotnet_diagnostic.CA1401.severity = none + +# CA1416: Validate platform compatibility +dotnet_diagnostic.CA1416.severity = none + +# CA1417: Do not use 'OutAttribute' on string parameters for P/Invokes +dotnet_diagnostic.CA1417.severity = none + +# CA1418: Use valid platform string +dotnet_diagnostic.CA1418.severity = none + +# CA1419: Provide a parameterless constructor that is as visible as the containing type for concrete types derived from 'System.Runtime.InteropServices.SafeHandle' +dotnet_diagnostic.CA1419.severity = none + +# CA1501: Avoid excessive inheritance +dotnet_diagnostic.CA1501.severity = none + +# CA1502: Avoid excessive complexity +dotnet_diagnostic.CA1502.severity = none + +# CA1505: Avoid unmaintainable code +dotnet_diagnostic.CA1505.severity = none + +# CA1506: Avoid excessive class coupling +dotnet_diagnostic.CA1506.severity = none + +# CA1507: Use nameof to express symbol names +dotnet_diagnostic.CA1507.severity = none + +# CA1508: Avoid dead conditional code +dotnet_diagnostic.CA1508.severity = none + +# CA1509: Invalid entry in code metrics rule specification file +dotnet_diagnostic.CA1509.severity = none + +# CA1700: Do not name enum values 'Reserved' +dotnet_diagnostic.CA1700.severity = none + +# CA1707: Identifiers should not contain underscores +dotnet_diagnostic.CA1707.severity = none + +# CA1708: Identifiers should differ by more than case +dotnet_diagnostic.CA1708.severity = none + +# CA1710: Identifiers should have correct suffix +dotnet_diagnostic.CA1710.severity = none + +# CA1711: Identifiers should not have incorrect suffix +dotnet_diagnostic.CA1711.severity = none + +# CA1712: Do not prefix enum values with type name +dotnet_diagnostic.CA1712.severity = none + +# CA1713: Events should not have 'Before' or 'After' prefix +dotnet_diagnostic.CA1713.severity = none + +# CA1715: Identifiers should have correct prefix +dotnet_diagnostic.CA1715.severity = none + +# CA1716: Identifiers should not match keywords +dotnet_diagnostic.CA1716.severity = none + +# CA1720: Identifier contains type name +dotnet_diagnostic.CA1720.severity = none + +# CA1721: Property names should not match get methods +dotnet_diagnostic.CA1721.severity = none + +# CA1724: Type names should not match namespaces +dotnet_diagnostic.CA1724.severity = none + +# CA1725: Parameter names should match base declaration +dotnet_diagnostic.CA1725.severity = none + +# CA1727: Use PascalCase for named placeholders +dotnet_diagnostic.CA1727.severity = none + +# CA1802: Use literals where appropriate +dotnet_diagnostic.CA1802.severity = none + +# CA1805: Do not initialize unnecessarily +dotnet_diagnostic.CA1805.severity = none + +# CA1806: Do not ignore method results +dotnet_diagnostic.CA1806.severity = none + +# CA1810: Initialize reference type static fields inline +dotnet_diagnostic.CA1810.severity = none + +# CA1812: Avoid uninstantiated internal classes +dotnet_diagnostic.CA1812.severity = none + +# CA1813: Avoid unsealed attributes +dotnet_diagnostic.CA1813.severity = none + +# CA1814: Prefer jagged arrays over multidimensional +dotnet_diagnostic.CA1814.severity = none + +# CA1815: Override equals and operator equals on value types +dotnet_diagnostic.CA1815.severity = none + +# CA1816: Dispose methods should call SuppressFinalize +dotnet_diagnostic.CA1816.severity = none + +# CA1819: Properties should not return arrays +dotnet_diagnostic.CA1819.severity = none + +# CA1820: Test for empty strings using string length +dotnet_diagnostic.CA1820.severity = none + +# CA1821: Remove empty Finalizers +dotnet_diagnostic.CA1821.severity = none + +# CA1822: Mark members as static +dotnet_diagnostic.CA1822.severity = none + +# CA1823: Avoid unused private fields +dotnet_diagnostic.CA1823.severity = none + +# CA1824: Mark assemblies with NeutralResourcesLanguageAttribute +dotnet_diagnostic.CA1824.severity = none + +# CA1825: Avoid zero-length array allocations. +dotnet_diagnostic.CA1825.severity = none + +# CA1826: Do not use Enumerable methods on indexable collections +dotnet_diagnostic.CA1826.severity = none + +# CA1827: Do not use Count() or LongCount() when Any() can be used +dotnet_diagnostic.CA1827.severity = none + +# CA1828: Do not use CountAsync() or LongCountAsync() when AnyAsync() can be used +dotnet_diagnostic.CA1828.severity = none + +# CA1829: Use Length/Count property instead of Count() when available +dotnet_diagnostic.CA1829.severity = none + +# CA1830: Prefer strongly-typed Append and Insert method overloads on StringBuilder +dotnet_diagnostic.CA1830.severity = none + +# CA1831: Use AsSpan or AsMemory instead of Range-based indexers when appropriate +dotnet_diagnostic.CA1831.severity = none + +# CA1832: Use AsSpan or AsMemory instead of Range-based indexers when appropriate +dotnet_diagnostic.CA1832.severity = none + +# CA1833: Use AsSpan or AsMemory instead of Range-based indexers when appropriate +dotnet_diagnostic.CA1833.severity = none + +# CA1834: Consider using 'StringBuilder.Append(char)' when applicable +dotnet_diagnostic.CA1834.severity = none + +# CA1835: Prefer the 'Memory'-based overloads for 'ReadAsync' and 'WriteAsync' +dotnet_diagnostic.CA1835.severity = none + +# CA1836: Prefer IsEmpty over Count +dotnet_diagnostic.CA1836.severity = none + +# CA1837: Use 'Environment.ProcessId' +dotnet_diagnostic.CA1837.severity = none + +# CA1838: Avoid 'StringBuilder' parameters for P/Invokes +dotnet_diagnostic.CA1838.severity = none + +# CA1839: Use 'Environment.ProcessPath' +dotnet_diagnostic.CA1839.severity = none + +# CA1840: Use 'Environment.CurrentManagedThreadId' +dotnet_diagnostic.CA1840.severity = none + +# CA1841: Prefer Dictionary.Contains methods +dotnet_diagnostic.CA1841.severity = none + +# CA1842: Do not use 'WhenAll' with a single task +dotnet_diagnostic.CA1842.severity = none + +# CA1843: Do not use 'WaitAll' with a single task +dotnet_diagnostic.CA1843.severity = none + +# CA1844: Provide memory-based overrides of async methods when subclassing 'Stream' +dotnet_diagnostic.CA1844.severity = none + +# CA1845: Use span-based 'string.Concat' +dotnet_diagnostic.CA1845.severity = none + +# CA1846: Prefer 'AsSpan' over 'Substring' +dotnet_diagnostic.CA1846.severity = none + +# CA1847: Use char literal for a single character lookup +dotnet_diagnostic.CA1847.severity = none + +# CA1848: Use the LoggerMessage delegates +dotnet_diagnostic.CA1848.severity = none + +# CA1849: Call async methods when in an async method +dotnet_diagnostic.CA1849.severity = none + +# CA1850: Prefer static 'HashData' method over 'ComputeHash' +dotnet_diagnostic.CA1850.severity = none + +# CA2000: Dispose objects before losing scope +dotnet_diagnostic.CA2000.severity = none + +# CA2002: Do not lock on objects with weak identity +dotnet_diagnostic.CA2002.severity = none + +# CA2007: Consider calling ConfigureAwait on the awaited task +dotnet_diagnostic.CA2007.severity = none + +# CA2008: Do not create tasks without passing a TaskScheduler +dotnet_diagnostic.CA2008.severity = none + +# CA2009: Do not call ToImmutableCollection on an ImmutableCollection value +dotnet_diagnostic.CA2009.severity = none + +# CA2011: Avoid infinite recursion +dotnet_diagnostic.CA2011.severity = none + +# CA2012: Use ValueTasks correctly +dotnet_diagnostic.CA2012.severity = none + +# CA2013: Do not use ReferenceEquals with value types +dotnet_diagnostic.CA2013.severity = none + +# CA2014: Do not use stackalloc in loops. +dotnet_diagnostic.CA2014.severity = none + +# CA2015: Do not define finalizers for types derived from MemoryManager +dotnet_diagnostic.CA2015.severity = none + +# CA2016: Forward the 'CancellationToken' parameter to methods +dotnet_diagnostic.CA2016.severity = none + +# CA2017: Parameter count mismatch +dotnet_diagnostic.CA2017.severity = warning + +# CA2018: 'Buffer.BlockCopy' expects the number of bytes to be copied for the 'count' argument +dotnet_diagnostic.CA2018.severity = warning + +# CA2100: Review SQL queries for security vulnerabilities +dotnet_diagnostic.CA2100.severity = none + +# CA2101: Specify marshaling for P/Invoke string arguments +dotnet_diagnostic.CA2101.severity = none + +# CA2109: Review visible event handlers +dotnet_diagnostic.CA2109.severity = none + +# CA2119: Seal methods that satisfy private interfaces +dotnet_diagnostic.CA2119.severity = none + +# CA2153: Do Not Catch Corrupted State Exceptions +dotnet_diagnostic.CA2153.severity = none + +# CA2200: Rethrow to preserve stack details +dotnet_diagnostic.CA2200.severity = none + +# CA2201: Do not raise reserved exception types +dotnet_diagnostic.CA2201.severity = none + +# CA2207: Initialize value type static fields inline +dotnet_diagnostic.CA2207.severity = none + +# CA2208: Instantiate argument exceptions correctly +dotnet_diagnostic.CA2208.severity = none + +# CA2211: Non-constant fields should not be visible +dotnet_diagnostic.CA2211.severity = none + +# CA2213: Disposable fields should be disposed +dotnet_diagnostic.CA2213.severity = none + +# CA2214: Do not call overridable methods in constructors +dotnet_diagnostic.CA2214.severity = none + +# CA2215: Dispose methods should call base class dispose +dotnet_diagnostic.CA2215.severity = none + +# CA2216: Disposable types should declare finalizer +dotnet_diagnostic.CA2216.severity = none + +# CA2217: Do not mark enums with FlagsAttribute +dotnet_diagnostic.CA2217.severity = none + +# CA2218: Override GetHashCode on overriding Equals +dotnet_diagnostic.CA2218.severity = none + +# CA2219: Do not raise exceptions in finally clauses +dotnet_diagnostic.CA2219.severity = none + +# CA2224: Override Equals on overloading operator equals +dotnet_diagnostic.CA2224.severity = none + +# CA2225: Operator overloads have named alternates +dotnet_diagnostic.CA2225.severity = none + +# CA2226: Operators should have symmetrical overloads +dotnet_diagnostic.CA2226.severity = none + +# CA2227: Collection properties should be read only +dotnet_diagnostic.CA2227.severity = none + +# CA2229: Implement serialization constructors +dotnet_diagnostic.CA2229.severity = none + +# CA2231: Overload operator equals on overriding value type Equals +dotnet_diagnostic.CA2231.severity = none + +# CA2234: Pass system uri objects instead of strings +dotnet_diagnostic.CA2234.severity = none + +# CA2235: Mark all non-serializable fields +dotnet_diagnostic.CA2235.severity = none + +# CA2237: Mark ISerializable types with serializable +dotnet_diagnostic.CA2237.severity = none + +# CA2241: Provide correct arguments to formatting methods +dotnet_diagnostic.CA2241.severity = none + +# CA2242: Test for NaN correctly +dotnet_diagnostic.CA2242.severity = none + +# CA2243: Attribute string literals should parse correctly +dotnet_diagnostic.CA2243.severity = none + +# CA2244: Do not duplicate indexed element initializations +dotnet_diagnostic.CA2244.severity = none + +# CA2245: Do not assign a property to itself +dotnet_diagnostic.CA2245.severity = none + +# CA2246: Assigning symbol and its member in the same statement +dotnet_diagnostic.CA2246.severity = none + +# CA2247: Argument passed to TaskCompletionSource constructor should be TaskCreationOptions enum instead of TaskContinuationOptions enum +dotnet_diagnostic.CA2247.severity = none + +# CA2248: Provide correct 'enum' argument to 'Enum.HasFlag' +dotnet_diagnostic.CA2248.severity = none + +# CA2249: Consider using 'string.Contains' instead of 'string.IndexOf' +dotnet_diagnostic.CA2249.severity = none + +# CA2250: Use 'ThrowIfCancellationRequested' +dotnet_diagnostic.CA2250.severity = none + +# CA2251: Use 'string.Equals' +dotnet_diagnostic.CA2251.severity = none + +# CA2252: This API requires opting into preview features +dotnet_diagnostic.CA2252.severity = error + +# CA2253: Named placeholders should not be numeric values +dotnet_diagnostic.CA2253.severity = none + +# CA2254: Template should be a static expression +dotnet_diagnostic.CA2254.severity = none + +# CA2255: The 'ModuleInitializer' attribute should not be used in libraries +dotnet_diagnostic.CA2255.severity = warning + +# CA2256: All members declared in parent interfaces must have an implementation in a DynamicInterfaceCastableImplementation-attributed interface +dotnet_diagnostic.CA2256.severity = warning + +# CA2257: Members defined on an interface with the 'DynamicInterfaceCastableImplementationAttribute' should be 'static' +dotnet_diagnostic.CA2257.severity = warning + +# CA2258: Providing a 'DynamicInterfaceCastableImplementation' interface in Visual Basic is unsupported +dotnet_diagnostic.CA2258.severity = warning + +# CA2300: Do not use insecure deserializer BinaryFormatter +dotnet_diagnostic.CA2300.severity = none + +# CA2301: Do not call BinaryFormatter.Deserialize without first setting BinaryFormatter.Binder +dotnet_diagnostic.CA2301.severity = none + +# CA2302: Ensure BinaryFormatter.Binder is set before calling BinaryFormatter.Deserialize +dotnet_diagnostic.CA2302.severity = none + +# CA2305: Do not use insecure deserializer LosFormatter +dotnet_diagnostic.CA2305.severity = none + +# CA2310: Do not use insecure deserializer NetDataContractSerializer +dotnet_diagnostic.CA2310.severity = none + +# CA2311: Do not deserialize without first setting NetDataContractSerializer.Binder +dotnet_diagnostic.CA2311.severity = none + +# CA2312: Ensure NetDataContractSerializer.Binder is set before deserializing +dotnet_diagnostic.CA2312.severity = none + +# CA2315: Do not use insecure deserializer ObjectStateFormatter +dotnet_diagnostic.CA2315.severity = none + +# CA2321: Do not deserialize with JavaScriptSerializer using a SimpleTypeResolver +dotnet_diagnostic.CA2321.severity = none + +# CA2322: Ensure JavaScriptSerializer is not initialized with SimpleTypeResolver before deserializing +dotnet_diagnostic.CA2322.severity = none + +# CA2326: Do not use TypeNameHandling values other than None +dotnet_diagnostic.CA2326.severity = none + +# CA2327: Do not use insecure JsonSerializerSettings +dotnet_diagnostic.CA2327.severity = none + +# CA2328: Ensure that JsonSerializerSettings are secure +dotnet_diagnostic.CA2328.severity = none + +# CA2329: Do not deserialize with JsonSerializer using an insecure configuration +dotnet_diagnostic.CA2329.severity = none + +# CA2330: Ensure that JsonSerializer has a secure configuration when deserializing +dotnet_diagnostic.CA2330.severity = none + +# CA2350: Do not use DataTable.ReadXml() with untrusted data +dotnet_diagnostic.CA2350.severity = none + +# CA2351: Do not use DataSet.ReadXml() with untrusted data +dotnet_diagnostic.CA2351.severity = none + +# CA2352: Unsafe DataSet or DataTable in serializable type can be vulnerable to remote code execution attacks +dotnet_diagnostic.CA2352.severity = none + +# CA2353: Unsafe DataSet or DataTable in serializable type +dotnet_diagnostic.CA2353.severity = none + +# CA2354: Unsafe DataSet or DataTable in deserialized object graph can be vulnerable to remote code execution attacks +dotnet_diagnostic.CA2354.severity = none + +# CA2355: Unsafe DataSet or DataTable type found in deserializable object graph +dotnet_diagnostic.CA2355.severity = none + +# CA2356: Unsafe DataSet or DataTable type in web deserializable object graph +dotnet_diagnostic.CA2356.severity = none + +# CA2361: Ensure auto-generated class containing DataSet.ReadXml() is not used with untrusted data +dotnet_diagnostic.CA2361.severity = none + +# CA2362: Unsafe DataSet or DataTable in auto-generated serializable type can be vulnerable to remote code execution attacks +dotnet_diagnostic.CA2362.severity = none + +# CA3001: Review code for SQL injection vulnerabilities +dotnet_diagnostic.CA3001.severity = none + +# CA3002: Review code for XSS vulnerabilities +dotnet_diagnostic.CA3002.severity = none + +# CA3003: Review code for file path injection vulnerabilities +dotnet_diagnostic.CA3003.severity = none + +# CA3004: Review code for information disclosure vulnerabilities +dotnet_diagnostic.CA3004.severity = none + +# CA3005: Review code for LDAP injection vulnerabilities +dotnet_diagnostic.CA3005.severity = none + +# CA3006: Review code for process command injection vulnerabilities +dotnet_diagnostic.CA3006.severity = none + +# CA3007: Review code for open redirect vulnerabilities +dotnet_diagnostic.CA3007.severity = none + +# CA3008: Review code for XPath injection vulnerabilities +dotnet_diagnostic.CA3008.severity = none + +# CA3009: Review code for XML injection vulnerabilities +dotnet_diagnostic.CA3009.severity = none + +# CA3010: Review code for XAML injection vulnerabilities +dotnet_diagnostic.CA3010.severity = none + +# CA3011: Review code for DLL injection vulnerabilities +dotnet_diagnostic.CA3011.severity = none + +# CA3012: Review code for regex injection vulnerabilities +dotnet_diagnostic.CA3012.severity = none + +# CA3061: Do Not Add Schema By URL +dotnet_diagnostic.CA3061.severity = none + +# CA3075: Insecure DTD processing in XML +dotnet_diagnostic.CA3075.severity = none + +# CA3076: Insecure XSLT script processing. +dotnet_diagnostic.CA3076.severity = none + +# CA3077: Insecure Processing in API Design, XmlDocument and XmlTextReader +dotnet_diagnostic.CA3077.severity = none + +# CA3147: Mark Verb Handlers With Validate Antiforgery Token +dotnet_diagnostic.CA3147.severity = none + +# CA5350: Do Not Use Weak Cryptographic Algorithms +dotnet_diagnostic.CA5350.severity = none + +# CA5351: Do Not Use Broken Cryptographic Algorithms +dotnet_diagnostic.CA5351.severity = none + +# CA5358: Review cipher mode usage with cryptography experts +dotnet_diagnostic.CA5358.severity = none + +# CA5359: Do Not Disable Certificate Validation +dotnet_diagnostic.CA5359.severity = none + +# CA5360: Do Not Call Dangerous Methods In Deserialization +dotnet_diagnostic.CA5360.severity = none + +# CA5361: Do Not Disable SChannel Use of Strong Crypto +dotnet_diagnostic.CA5361.severity = none + +# CA5362: Potential reference cycle in deserialized object graph +dotnet_diagnostic.CA5362.severity = none + +# CA5363: Do Not Disable Request Validation +dotnet_diagnostic.CA5363.severity = none + +# CA5364: Do Not Use Deprecated Security Protocols +dotnet_diagnostic.CA5364.severity = none + +# CA5365: Do Not Disable HTTP Header Checking +dotnet_diagnostic.CA5365.severity = none + +# CA5366: Use XmlReader for 'DataSet.ReadXml()' +dotnet_diagnostic.CA5366.severity = none + +# CA5367: Do Not Serialize Types With Pointer Fields +dotnet_diagnostic.CA5367.severity = none + +# CA5368: Set ViewStateUserKey For Classes Derived From Page +dotnet_diagnostic.CA5368.severity = none + +# CA5369: Use XmlReader for 'XmlSerializer.Deserialize()' +dotnet_diagnostic.CA5369.severity = none + +# CA5370: Use XmlReader for XmlValidatingReader constructor +dotnet_diagnostic.CA5370.severity = none + +# CA5371: Use XmlReader for 'XmlSchema.Read()' +dotnet_diagnostic.CA5371.severity = none + +# CA5372: Use XmlReader for XPathDocument constructor +dotnet_diagnostic.CA5372.severity = none + +# CA5373: Do not use obsolete key derivation function +dotnet_diagnostic.CA5373.severity = none + +# CA5374: Do Not Use XslTransform +dotnet_diagnostic.CA5374.severity = none + +# CA5375: Do Not Use Account Shared Access Signature +dotnet_diagnostic.CA5375.severity = none + +# CA5376: Use SharedAccessProtocol HttpsOnly +dotnet_diagnostic.CA5376.severity = none + +# CA5377: Use Container Level Access Policy +dotnet_diagnostic.CA5377.severity = none + +# CA5378: Do not disable ServicePointManagerSecurityProtocols +dotnet_diagnostic.CA5378.severity = none + +# CA5379: Ensure Key Derivation Function algorithm is sufficiently strong +dotnet_diagnostic.CA5379.severity = none + +# CA5380: Do Not Add Certificates To Root Store +dotnet_diagnostic.CA5380.severity = none + +# CA5381: Ensure Certificates Are Not Added To Root Store +dotnet_diagnostic.CA5381.severity = none + +# CA5382: Use Secure Cookies In ASP.Net Core +dotnet_diagnostic.CA5382.severity = none + +# CA5383: Ensure Use Secure Cookies In ASP.NET Core +dotnet_diagnostic.CA5383.severity = none + +# CA5384: Do Not Use Digital Signature Algorithm (DSA) +dotnet_diagnostic.CA5384.severity = none + +# CA5385: Use RivestīŋŊShamirīŋŊAdleman (RSA) Algorithm With Sufficient Key Size +dotnet_diagnostic.CA5385.severity = none + +# CA5386: Avoid hardcoding SecurityProtocolType value +dotnet_diagnostic.CA5386.severity = none + +# CA5387: Do Not Use Weak Key Derivation Function With Insufficient Iteration Count +dotnet_diagnostic.CA5387.severity = none + +# CA5388: Ensure Sufficient Iteration Count When Using Weak Key Derivation Function +dotnet_diagnostic.CA5388.severity = none + +# CA5389: Do Not Add Archive Item's Path To The Target File System Path +dotnet_diagnostic.CA5389.severity = none + +# CA5390: Do not hard-code encryption key +dotnet_diagnostic.CA5390.severity = none + +# CA5391: Use antiforgery tokens in ASP.NET Core MVC controllers +dotnet_diagnostic.CA5391.severity = none + +# CA5392: Use DefaultDllImportSearchPaths attribute for P/Invokes +dotnet_diagnostic.CA5392.severity = none + +# CA5393: Do not use unsafe DllImportSearchPath value +dotnet_diagnostic.CA5393.severity = none + +# CA5394: Do not use insecure randomness +dotnet_diagnostic.CA5394.severity = none + +# CA5395: Miss HttpVerb attribute for action methods +dotnet_diagnostic.CA5395.severity = none + +# CA5396: Set HttpOnly to true for HttpCookie +dotnet_diagnostic.CA5396.severity = none + +# CA5397: Do not use deprecated SslProtocols values +dotnet_diagnostic.CA5397.severity = none + +# CA5398: Avoid hardcoded SslProtocols values +dotnet_diagnostic.CA5398.severity = none + +# CA5399: HttpClients should enable certificate revocation list checks +dotnet_diagnostic.CA5399.severity = none + +# CA5400: Ensure HttpClient certificate revocation list check is not disabled +dotnet_diagnostic.CA5400.severity = none + +# CA5401: Do not use CreateEncryptor with non-default IV +dotnet_diagnostic.CA5401.severity = none + +# CA5402: Use CreateEncryptor with the default IV +dotnet_diagnostic.CA5402.severity = none + +# CA5403: Do not hard-code certificate +dotnet_diagnostic.CA5403.severity = none + +# CA5404: Do not disable token validation checks +dotnet_diagnostic.CA5404.severity = none + +# CA5405: Do not always skip token validation in delegates +dotnet_diagnostic.CA5405.severity = none + +# IL3000: Avoid using accessing Assembly file path when publishing as a single-file +dotnet_diagnostic.IL3000.severity = none + +# IL3001: Avoid using accessing Assembly file path when publishing as a single-file +dotnet_diagnostic.IL3001.severity = none + +# IL3002: Using member with RequiresAssemblyFilesAttribute can break functionality when embedded in a single-file app +dotnet_diagnostic.IL3002.severity = none + +# SA0001: XML comments +dotnet_diagnostic.SA0001.severity = none + +# SA1000: Spacing around keywords +dotnet_diagnostic.SA1000.severity = none + +# SA1001: Commas should not be preceded by whitespace +dotnet_diagnostic.SA1001.severity = none + +# SA1002: Semicolons should not be preceded by a space +dotnet_diagnostic.SA1002.severity = none + +# SA1003: Operator should not appear at the end of a line +dotnet_diagnostic.SA1003.severity = none + +# SA1004: Documentation line should begin with a space +dotnet_diagnostic.SA1004.severity = none + +# SA1005: Single line comment should begin with a space +dotnet_diagnostic.SA1005.severity = none + +# SA1008: Opening parenthesis should not be preceded by a space +dotnet_diagnostic.SA1008.severity = none + +# SA1009: Closing parenthesis should not be followed by a space +dotnet_diagnostic.SA1009.severity = none + +# SA1010: Opening square brackets should not be preceded by a space +dotnet_diagnostic.SA1010.severity = none + +# SA1011: Closing square bracket should be followed by a space +dotnet_diagnostic.SA1011.severity = none + +# SA1012: Opening brace should be followed by a space +dotnet_diagnostic.SA1012.severity = none + +# SA1013: Closing brace should be preceded by a space +dotnet_diagnostic.SA1013.severity = none + +# SA1014: Opening generic brackets should not be preceded by a space +dotnet_diagnostic.SA1014.severity = none + +# SA1015: Closing generic bracket should not be followed by a space +dotnet_diagnostic.SA1015.severity = none + +# SA1018: Nullable type symbol should not be preceded by a space +dotnet_diagnostic.SA1018.severity = none + +# SA1020: Increment symbol should not be preceded by a space +dotnet_diagnostic.SA1020.severity = none + +# SA1021: Negative sign should be preceded by a space +dotnet_diagnostic.SA1021.severity = none + +# SA1023: Dereference symbol '*' should not be preceded by a space." +dotnet_diagnostic.SA1023.severity = none + +# SA1024: Colon should be followed by a space +dotnet_diagnostic.SA1024.severity = none + +# SA1025: Code should not contain multiple whitespace characters in a row +dotnet_diagnostic.SA1025.severity = none + +# SA1026: Keyword followed by span or blank line +dotnet_diagnostic.SA1026.severity = none + +# SA1027: Tabs and spaces should be used correctly +dotnet_diagnostic.SA1027.severity = none + +# SA1028: Code should not contain trailing whitespace +dotnet_diagnostic.SA1028.severity = none + +# SA1100: Do not prefix calls with base unless local implementation exists +dotnet_diagnostic.SA1100.severity = none + +# SA1101: Prefix local calls with this +dotnet_diagnostic.SA1101.severity = none + +# SA1102: Query clause should follow previous clause +dotnet_diagnostic.SA1102.severity = none + +# SA1105: Query clauses spanning multiple lines should begin on own line +dotnet_diagnostic.SA1105.severity = none + +# SA1106: Code should not contain empty statements +dotnet_diagnostic.SA1106.severity = none + +# SA1107: Code should not contain multiple statements on one line +dotnet_diagnostic.SA1107.severity = none + +# SA1108: Block statements should not contain embedded comments +dotnet_diagnostic.SA1108.severity = none + +# SA1110: Opening parenthesis or bracket should be on declaration line +dotnet_diagnostic.SA1110.severity = none + +# SA1111: Closing parenthesis should be on line of last parameter +dotnet_diagnostic.SA1111.severity = none + +# SA1113: Comma should be on the same line as previous parameter +dotnet_diagnostic.SA1113.severity = none + +# SA1114: Parameter list should follow declaration +dotnet_diagnostic.SA1114.severity = none + +# SA1115: Parameter should begin on the line after the previous parameter +dotnet_diagnostic.SA1115.severity = none + +# SA1116: Split parameters should start on line after declaration +dotnet_diagnostic.SA1116.severity = none + +# SA1117: Parameters should be on same line or separate lines +dotnet_diagnostic.SA1117.severity = none + +# SA1118: Parameter should not span multiple lines +dotnet_diagnostic.SA1118.severity = none + +# SA1119: Statement should not use unnecessary parenthesis +dotnet_diagnostic.SA1119.severity = none + +# SA1120: Comments should contain text +dotnet_diagnostic.SA1120.severity = none + +# SA1121: Use built-in type alias +dotnet_diagnostic.SA1121.severity = none + +# SA1122: Use string.Empty for empty strings +dotnet_diagnostic.SA1122.severity = none + +# SA1123: Region should not be located within a code element +dotnet_diagnostic.SA1123.severity = none + +# SA1124: Do not use regions +dotnet_diagnostic.SA1124.severity = none + +# SA1125: Use shorthand for nullable types +dotnet_diagnostic.SA1125.severity = none + +# SA1127: Generic type constraints should be on their own line +dotnet_diagnostic.SA1127.severity = none + +# SA1128: Put constructor initializers on their own line +dotnet_diagnostic.SA1128.severity = none + +# SA1129: Do not use default value type constructor +dotnet_diagnostic.SA1129.severity = none + +# SA1130: Use lambda syntax +dotnet_diagnostic.SA1130.severity = none + +# SA1131: Constant values should appear on the right-hand side of comparisons +dotnet_diagnostic.SA1131.severity = none + +# SA1132: Do not combine fields +dotnet_diagnostic.SA1132.severity = none + +# SA1133: Do not combine attributes +dotnet_diagnostic.SA1133.severity = none + +# SA1134: Each attribute should be placed on its own line of code +dotnet_diagnostic.SA1134.severity = none + +# SA1135: Using directive should be qualified +dotnet_diagnostic.SA1135.severity = none + +# SA1136: Enum values should be on separate lines +dotnet_diagnostic.SA1136.severity = none + +# SA1137: Elements should have the same indentation +dotnet_diagnostic.SA1137.severity = none + +# SA1139: Use literal suffix notation instead of casting +dotnet_diagnostic.SA1139.severity = none + +# SA1141: Use tuple syntax +dotnet_diagnostic.SA1141.severity = none + +# SA1142: Refer to tuple elements by name +dotnet_diagnostic.SA1142.severity = none + +# SA1200: Using directive should appear within a namespace declaration +dotnet_diagnostic.SA1200.severity = none + +# SA1201: Elements should appear in the correct order +dotnet_diagnostic.SA1201.severity = none + +# SA1202: Elements should be ordered by access +dotnet_diagnostic.SA1202.severity = none + +# SA1203: Constants should appear before fields +dotnet_diagnostic.SA1203.severity = none + +# SA1204: Static elements should appear before instance elements +dotnet_diagnostic.SA1204.severity = none + +# SA1205: Partial elements should declare an access modifier +dotnet_diagnostic.SA1205.severity = none + +# SA1206: Keyword ordering +dotnet_diagnostic.SA1206.severity = none + +# SA1208: Using directive ordering +dotnet_diagnostic.SA1208.severity = none + +# SA1209: Using alias directives should be placed after all using namespace directives +dotnet_diagnostic.SA1209.severity = none + +# SA1210: Using directives should be ordered alphabetically by the namespaces +dotnet_diagnostic.SA1210.severity = none + +# SA1211: Using alias directive ordering +dotnet_diagnostic.SA1211.severity = none + +# SA1212: A get accessor appears after a set accessor within a property or indexer +dotnet_diagnostic.SA1212.severity = none + +# SA1214: Readonly fields should appear before non-readonly fields +dotnet_diagnostic.SA1214.severity = none + +# SA1216: Using static directives should be placed at the correct location +dotnet_diagnostic.SA1216.severity = none + +# SA1300: Element should begin with an uppercase letter +dotnet_diagnostic.SA1300.severity = none + +# SA1302: Interface names should begin with I +dotnet_diagnostic.SA1302.severity = none + +# SA1303: Const field names should begin with upper-case letter +dotnet_diagnostic.SA1303.severity = none + +# SA1304: Non-private readonly fields should begin with upper-case letter +dotnet_diagnostic.SA1304.severity = none + +# SA1306: Field should begin with lower-case letter +dotnet_diagnostic.SA1306.severity = none + +# SA1307: Field should begin with upper-case letter +dotnet_diagnostic.SA1307.severity = none + +# SA1308: Field should not begin with the prefix 's_' +dotnet_diagnostic.SA1308.severity = none + +# SA1309: Field names should not begin with underscore +dotnet_diagnostic.SA1309.severity = none + +# SA1310: Field should not contain an underscore +dotnet_diagnostic.SA1310.severity = none + +# SA1311: Static readonly fields should begin with upper-case letter +dotnet_diagnostic.SA1311.severity = none + +# SA1312: Variable should begin with lower-case letter +dotnet_diagnostic.SA1312.severity = none + +# SA1313: Parameter should begin with lower-case letter +dotnet_diagnostic.SA1313.severity = none + +# SA1314: Type parameter names should begin with T +dotnet_diagnostic.SA1314.severity = none + +# SA1316: Tuple element names should use correct casing +dotnet_diagnostic.SA1316.severity = none + +# SA1400: Member should declare an access modifier +dotnet_diagnostic.SA1400.severity = none + +# SA1401: Fields should be private +dotnet_diagnostic.SA1401.severity = none + +# SA1402: File may only contain a single type +dotnet_diagnostic.SA1402.severity = none + +# SA1403: File may only contain a single namespace +dotnet_diagnostic.SA1403.severity = none + +# SA1404: Code analysis suppression should have justification +dotnet_diagnostic.SA1404.severity = none + +# SA1405: Debug.Assert should provide message text +dotnet_diagnostic.SA1405.severity = none + +# SA1407: Arithmetic expressions should declare precedence +dotnet_diagnostic.SA1407.severity = none + +# SA1408: Conditional expressions should declare precedence +dotnet_diagnostic.SA1408.severity = none + +# SA1410: Remove delegate parentheses when possible +dotnet_diagnostic.SA1410.severity = none + +# SA1411: Attribute constructor shouldn't use unnecessary parenthesis +dotnet_diagnostic.SA1411.severity = none + +# SA1413: Use trailing comma in multi-line initializers +dotnet_diagnostic.SA1413.severity = none + +# SA1414: Tuple types in signatures should have element names +dotnet_diagnostic.SA1414.severity = none + +# SA1500: Braces for multi-line statements should not share line +dotnet_diagnostic.SA1500.severity = none + +# SA1501: Statement should not be on a single line +dotnet_diagnostic.SA1501.severity = none + +# SA1502: Element should not be on a single line +dotnet_diagnostic.SA1502.severity = none + +# SA1503: Braces should not be omitted +dotnet_diagnostic.SA1503.severity = none + +# SA1504: All accessors should be single-line or multi-line +dotnet_diagnostic.SA1504.severity = none + +# SA1505: An opening brace should not be followed by a blank line +dotnet_diagnostic.SA1505.severity = none + +# SA1506: Element documentation headers should not be followed by blank line +dotnet_diagnostic.SA1506.severity = none + +# SA1507: Code should not contain multiple blank lines in a row +dotnet_diagnostic.SA1507.severity = none + +# SA1508: A closing brace should not be preceded by a blank line +dotnet_diagnostic.SA1508.severity = none + +# SA1509: Opening braces should not be preceded by blank line +dotnet_diagnostic.SA1509.severity = none + +# SA1510: 'else' statement should not be preceded by a blank line +dotnet_diagnostic.SA1510.severity = none + +# SA1512: Single-line comments should not be followed by blank line +dotnet_diagnostic.SA1512.severity = none + +# SA1513: Closing brace should be followed by blank line +dotnet_diagnostic.SA1513.severity = none + +# SA1514: Element documentation header should be preceded by blank line +dotnet_diagnostic.SA1514.severity = none + +# SA1515: Single-line comment should be preceded by blank line +dotnet_diagnostic.SA1515.severity = none + +# SA1516: Elements should be separated by blank line +dotnet_diagnostic.SA1516.severity = none + +# SA1517: Code should not contain blank lines at start of file +dotnet_diagnostic.SA1517.severity = none + +# SA1518: Code should not contain blank lines at the end of the file +dotnet_diagnostic.SA1518.severity = none + +# SA1519: Braces should not be omitted from multi-line child statement +dotnet_diagnostic.SA1519.severity = none + +# SA1520: Use braces consistently +dotnet_diagnostic.SA1520.severity = none + +# SA1600: Elements should be documented +dotnet_diagnostic.SA1600.severity = none + +# SA1601: Partial elements should be documented +dotnet_diagnostic.SA1601.severity = none + +# SA1602: Enumeration items should be documented +dotnet_diagnostic.SA1602.severity = none + +# SA1604: Element documentation should have summary +dotnet_diagnostic.SA1604.severity = none + +# SA1605: Partial element documentation should have summary +dotnet_diagnostic.SA1605.severity = none + +# SA1606: Element documentation should have summary text +dotnet_diagnostic.SA1606.severity = none + +# SA1608: Element documentation should not have default summary +dotnet_diagnostic.SA1608.severity = none + +# SA1610: Property documentation should have value text +dotnet_diagnostic.SA1610.severity = none + +# SA1611: The documentation for parameter 'message' is missing +dotnet_diagnostic.SA1611.severity = none + +# SA1612: The parameter documentation is at incorrect position +dotnet_diagnostic.SA1612.severity = none + +# SA1614: Element parameter documentation should have text +dotnet_diagnostic.SA1614.severity = none + +# SA1615: Element return value should be documented +dotnet_diagnostic.SA1615.severity = none + +# SA1616: Element return value documentation should have text +dotnet_diagnostic.SA1616.severity = none + +# SA1618: The documentation for type parameter is missing +dotnet_diagnostic.SA1618.severity = none + +# SA1619: The documentation for type parameter is missing +dotnet_diagnostic.SA1619.severity = none + +# SA1622: Generic type parameter documentation should have text +dotnet_diagnostic.SA1622.severity = none + +# SA1623: Property documentation text +dotnet_diagnostic.SA1623.severity = none + +# SA1624: Because the property only contains a visible get accessor, the documentation summary text should begin with 'Gets' +dotnet_diagnostic.SA1624.severity = none + +# SA1625: Element documentation should not be copied and pasted +dotnet_diagnostic.SA1625.severity = none + +# SA1626: Single-line comments should not use documentation style slashes +dotnet_diagnostic.SA1626.severity = none + +# SA1627: The documentation text within the \'exception\' tag should not be empty +dotnet_diagnostic.SA1627.severity = none + +# SA1629: Documentation text should end with a period +dotnet_diagnostic.SA1629.severity = none + +# SA1633: File should have header +dotnet_diagnostic.SA1633.severity = none + +# SA1642: Constructor summary documentation should begin with standard text +dotnet_diagnostic.SA1642.severity = none + +# SA1643: Destructor summary documentation should begin with standard text +dotnet_diagnostic.SA1643.severity = none + +# SA1649: File name should match first type name +dotnet_diagnostic.SA1649.severity = none + +# IDE0001: Simplify name +dotnet_diagnostic.IDE0001.severity = silent + +# IDE0002: Simplify member access +dotnet_diagnostic.IDE0002.severity = silent + +# IDE0003: Remove this or Me qualification +dotnet_diagnostic.IDE0003.severity = silent + +# IDE0004: Remove Unnecessary Cast +dotnet_diagnostic.IDE0004.severity = silent + +# IDE0005: Using directive is unnecessary. +dotnet_diagnostic.IDE0005.severity = silent + +# IDE0007: Use implicit type +dotnet_diagnostic.IDE0007.severity = silent + +# IDE0008: Use explicit type +dotnet_diagnostic.IDE0008.severity = silent + +# IDE0009: Add this or Me qualification +dotnet_diagnostic.IDE0009.severity = silent + +# IDE0010: Add missing cases +dotnet_diagnostic.IDE0010.severity = silent + +# IDE0011: Add braces +dotnet_diagnostic.IDE0011.severity = silent + +# IDE0016: Use 'throw' expression +dotnet_diagnostic.IDE0016.severity = silent + +# IDE0017: Simplify object initialization +dotnet_diagnostic.IDE0017.severity = silent + +# IDE0018: Inline variable declaration +dotnet_diagnostic.IDE0018.severity = silent + +# IDE0019: Use pattern matching to avoid as followed by a null check +dotnet_diagnostic.IDE0019.severity = silent + +# IDE0020: Use pattern matching to avoid is check followed by a cast (with variable) +dotnet_diagnostic.IDE0020.severity = silent + +# IDE0021: Use expression body for constructors +dotnet_diagnostic.IDE0021.severity = silent + +# IDE0022: Use expression body for methods +dotnet_diagnostic.IDE0022.severity = silent + +# IDE0023: Use expression body for operators +dotnet_diagnostic.IDE0023.severity = silent + +# IDE0024: Use expression body for operators +dotnet_diagnostic.IDE0024.severity = silent + +# IDE0025: Use expression body for properties +dotnet_diagnostic.IDE0025.severity = silent + +# IDE0026: Use expression body for indexers +dotnet_diagnostic.IDE0026.severity = silent + +# IDE0027: Use expression body for accessors +dotnet_diagnostic.IDE0027.severity = silent + +# IDE0028: Simplify collection initialization +dotnet_diagnostic.IDE0028.severity = silent + +# IDE0029: Use coalesce expression +dotnet_diagnostic.IDE0029.severity = silent + +# IDE0030: Use coalesce expression +dotnet_diagnostic.IDE0030.severity = silent + +# IDE0031: Use null propagation +dotnet_diagnostic.IDE0031.severity = silent + +# IDE0032: Use auto property +dotnet_diagnostic.IDE0032.severity = silent + +# IDE0033: Use explicitly provided tuple name +dotnet_diagnostic.IDE0033.severity = silent + +# IDE0034: Simplify 'default' expression +dotnet_diagnostic.IDE0034.severity = silent + +# IDE0035: Remove unreachable code +dotnet_diagnostic.IDE0035.severity = silent + +# IDE0036: Order modifiers +dotnet_diagnostic.IDE0036.severity = silent + +# IDE0037: Use inferred member name +dotnet_diagnostic.IDE0037.severity = silent + +# IDE0038: Use pattern matching to avoid is check followed by a cast (without variable) +dotnet_diagnostic.IDE0038.severity = silent + +# IDE0039: Use local function +dotnet_diagnostic.IDE0039.severity = silent + +# IDE0040: Add accessibility modifiers +dotnet_diagnostic.IDE0040.severity = silent + +# IDE0041: Use 'is null' check +dotnet_diagnostic.IDE0041.severity = silent + +# IDE0042: Deconstruct variable declaration +dotnet_diagnostic.IDE0042.severity = silent + +# IDE0043: Invalid format string +dotnet_diagnostic.IDE0043.severity = silent + +# IDE0044: Add readonly modifier +dotnet_diagnostic.IDE0044.severity = silent + +# IDE0045: Use conditional expression for assignment +dotnet_diagnostic.IDE0045.severity = silent + +# IDE0046: Use conditional expression for return +dotnet_diagnostic.IDE0046.severity = silent + +# IDE0047: Remove unnecessary parentheses +dotnet_diagnostic.IDE0047.severity = silent + +# IDE0048: Add parentheses for clarity +dotnet_diagnostic.IDE0048.severity = silent + +# IDE0049: Use language keywords instead of framework type names for type references +dotnet_diagnostic.IDE0049.severity = silent + +# IDE0050: Convert anonymous type to tuple +dotnet_diagnostic.IDE0050.severity = silent + +# IDE0051: Remove unused private members +dotnet_diagnostic.IDE0051.severity = silent + +# IDE0052: Remove unread private members +dotnet_diagnostic.IDE0052.severity = silent + +# IDE0053: Use expression body for lambdas +dotnet_diagnostic.IDE0053.severity = silent + +# IDE0054: Use compound assignment +dotnet_diagnostic.IDE0054.severity = silent + +# IDE0055: Fix formatting +dotnet_diagnostic.IDE0055.severity = silent + +# IDE0056: Use index operator +dotnet_diagnostic.IDE0056.severity = silent + +# IDE0057: Use range operator +dotnet_diagnostic.IDE0057.severity = silent + +# IDE0058: Expression value is never used +dotnet_diagnostic.IDE0058.severity = silent + +# IDE0059: Unnecessary assignment of a value +dotnet_diagnostic.IDE0059.severity = silent + +# IDE0060: Remove unused parameter +dotnet_diagnostic.IDE0060.severity = silent + +# IDE0061: Use expression body for local functions +dotnet_diagnostic.IDE0061.severity = silent + +# IDE0062: Make local function 'static' +dotnet_diagnostic.IDE0062.severity = silent + +# IDE0063: Use simple 'using' statement +dotnet_diagnostic.IDE0063.severity = silent + +# IDE0064: Make readonly fields writable +dotnet_diagnostic.IDE0064.severity = silent + +# IDE0065: Misplaced using directive +dotnet_diagnostic.IDE0065.severity = silent + +# IDE0066: Convert switch statement to expression +dotnet_diagnostic.IDE0066.severity = silent + +# IDE0070: Use 'System.HashCode' +dotnet_diagnostic.IDE0070.severity = silent + +# IDE0071: Simplify interpolation +dotnet_diagnostic.IDE0071.severity = silent + +# IDE0072: Add missing cases +dotnet_diagnostic.IDE0072.severity = silent + +# IDE0073: The file header is missing or not located at the top of the file +dotnet_diagnostic.IDE0073.severity = silent + +# IDE0074: Use compound assignment +dotnet_diagnostic.IDE0074.severity = silent + +# IDE0075: Simplify conditional expression +dotnet_diagnostic.IDE0075.severity = silent + +# IDE0076: Invalid global 'SuppressMessageAttribute' +dotnet_diagnostic.IDE0076.severity = silent + +# IDE0077: Avoid legacy format target in 'SuppressMessageAttribute' +dotnet_diagnostic.IDE0077.severity = silent + +# IDE0078: Use pattern matching +dotnet_diagnostic.IDE0078.severity = silent + +# IDE0079: RemoveUnnecessarySuppression +dotnet_diagnostic.IDE0079.severity = silent + +# IDE0080: Remove unnecessary suppression operator +dotnet_diagnostic.IDE0080.severity = silent + +# IDE0081: RemoveUnnecessaryByVal +dotnet_diagnostic.IDE0081.severity = silent + +# IDE0082: 'typeof' can be converted to 'nameof' +dotnet_diagnostic.IDE0082.severity = silent + +# IDE0083: Use pattern matching +dotnet_diagnostic.IDE0083.severity = silent + +# IDE0084: Use pattern matching (IsNot operator) +dotnet_diagnostic.IDE0084.severity = silent + +# IDE0090: Use 'new(...)' +dotnet_diagnostic.IDE0090.severity = silent + +# IDE0100: Remove redundant equality +dotnet_diagnostic.IDE0100.severity = silent + +# IDE0110: Remove unnecessary discard +dotnet_diagnostic.IDE0110.severity = silent + +# IDE0120: Simplify LINQ expression +dotnet_diagnostic.IDE0120.severity = silent + +# IDE0130: Namespace does not match folder structure +dotnet_diagnostic.IDE0130.severity = silent + +# IDE0140: Simplify object creation +dotnet_diagnostic.IDE0140.severity = silent + +# IDE0150: Prefer 'null' check over type check +dotnet_diagnostic.IDE0150.severity = silent + +# IDE0160: Convert to block scoped namespace +dotnet_diagnostic.IDE0160.severity = silent + +# IDE0161: Convert to file-scoped namespace +dotnet_diagnostic.IDE0161.severity = silent + +# IDE1005: Delegate invocation can be simplified. +dotnet_diagnostic.IDE1005.severity = silent + +# IDE1006: Naming Styles +dotnet_diagnostic.IDE1006.severity = silent + +# IDE2000: C# +dotnet_diagnostic.IDE2000.severity = silent + +# IDE2001: Embedded statements must be on their own line +dotnet_diagnostic.IDE2001.severity = silent + +# IDE2002: Consecutive braces must not have blank line between them +dotnet_diagnostic.IDE2002.severity = silent + +# IDE2003: C# +dotnet_diagnostic.IDE2003.severity = silent + +# IDE2004: Blank line not allowed after constructor initializer colon +dotnet_diagnostic.IDE2004.severity = silent + +# xUnit1000: Test classes must be public +dotnet_diagnostic.xUnit1000.severity = warning + +# xUnit1001: Fact methods cannot have parameters +dotnet_diagnostic.xUnit1001.severity = warning + +# xUnit1002: Test methods cannot have multiple Fact or Theory attributes +dotnet_diagnostic.xUnit1002.severity = warning + +# xUnit1003: Theory methods must have test data +dotnet_diagnostic.xUnit1003.severity = warning + +# xUnit1004: Test methods should not be skipped +dotnet_diagnostic.xUnit1004.severity = warning + +# xUnit1005: Fact methods should not have test data +dotnet_diagnostic.xUnit1005.severity = warning + +# xUnit1006: Theory methods should have parameters +dotnet_diagnostic.xUnit1006.severity = warning + +# xUnit1007: ClassData must point at a valid class +dotnet_diagnostic.xUnit1007.severity = warning + +# xUnit1008: Test data attribute should only be used on a Theory +dotnet_diagnostic.xUnit1008.severity = warning + +# xUnit1009: InlineData must match the number of method parameters +dotnet_diagnostic.xUnit1009.severity = warning + +# xUnit1010: The value is not convertible to the method parameter type +dotnet_diagnostic.xUnit1010.severity = warning + +# xUnit1011: There is no matching method parameter +dotnet_diagnostic.xUnit1011.severity = warning + +# xUnit1012: Null should not be used for value type parameters +dotnet_diagnostic.xUnit1012.severity = warning + +# xUnit1013: Public methods should be marked as test +dotnet_diagnostic.xUnit1013.severity = warning + +# xUnit1014: MemberData should use nameof operator for member name +dotnet_diagnostic.xUnit1014.severity = warning + +# xUnit1015: MemberData must reference an existing member +dotnet_diagnostic.xUnit1015.severity = warning + +# xUnit1016: MemberData must reference a public member +dotnet_diagnostic.xUnit1016.severity = warning + +# xUnit1017: MemberData must reference a static member +dotnet_diagnostic.xUnit1017.severity = warning + +# xUnit1018: MemberData must reference a valid member kind +dotnet_diagnostic.xUnit1018.severity = warning + +# xUnit1019: MemberData must reference a member providing a valid data type +dotnet_diagnostic.xUnit1019.severity = warning + +# xUnit1020: MemberData must reference a property with a getter +dotnet_diagnostic.xUnit1020.severity = warning + +# xUnit1021: MemberData should not have parameters if the referenced member is not a method +dotnet_diagnostic.xUnit1021.severity = warning + +# xUnit1022: Theory methods cannot have a parameter array +dotnet_diagnostic.xUnit1022.severity = warning + +# xUnit1023: Theory methods cannot have default parameter values +dotnet_diagnostic.xUnit1023.severity = warning + +# xUnit1024: Test methods cannot have overloads +dotnet_diagnostic.xUnit1024.severity = warning + +# xUnit1025: InlineData should be unique within the Theory it belongs to +dotnet_diagnostic.xUnit1025.severity = warning + +# xUnit1026: Theory methods should use all of their parameters +dotnet_diagnostic.xUnit1026.severity = warning + +# xUnit2000: Constants and literals should be the expected argument +dotnet_diagnostic.xUnit2000.severity = warning + +# xUnit2001: Do not use invalid equality check +dotnet_diagnostic.xUnit2001.severity = warning + +# xUnit2002: Do not use null check on value type +dotnet_diagnostic.xUnit2002.severity = warning + +# xUnit2003: Do not use equality check to test for null value +dotnet_diagnostic.xUnit2003.severity = warning + +# xUnit2004: Do not use equality check to test for boolean conditions +dotnet_diagnostic.xUnit2004.severity = warning + +# xUnit2005: Do not use identity check on value type +dotnet_diagnostic.xUnit2005.severity = warning + +# xUnit2006: Do not use invalid string equality check +dotnet_diagnostic.xUnit2006.severity = warning + +# xUnit2007: Do not use typeof expression to check the type +dotnet_diagnostic.xUnit2007.severity = warning + +# xUnit2008: Do not use boolean check to match on regular expressions +dotnet_diagnostic.xUnit2008.severity = warning + +# xUnit2009: Do not use boolean check to check for substrings +dotnet_diagnostic.xUnit2009.severity = warning + +# xUnit2010: Do not use boolean check to check for string equality +dotnet_diagnostic.xUnit2010.severity = warning + +# xUnit2011: Do not use empty collection check +dotnet_diagnostic.xUnit2011.severity = warning + +# xUnit2012: Do not use Enumerable.Any() to check if a value exists in a collection +dotnet_diagnostic.xUnit2012.severity = warning + +# xUnit2013: Do not use equality check to check for collection size. +dotnet_diagnostic.xUnit2013.severity = none + +# xUnit2014: Do not use throws check to check for asynchronously thrown exception +dotnet_diagnostic.xUnit2014.severity = none + +# xUnit2015: Do not use typeof expression to check the exception type +dotnet_diagnostic.xUnit2015.severity = warning + +# xUnit2016: Keep precision in the allowed range when asserting equality of doubles or decimals +dotnet_diagnostic.xUnit2016.severity = warning + +# xUnit2017: Do not use Contains() to check if a value exists in a collection +dotnet_diagnostic.xUnit2017.severity = none + +# xUnit2018: Do not compare an object's exact type to an abstract class or interface +dotnet_diagnostic.xUnit2018.severity = warning + +# xUnit2019: Do not use obsolete throws check to check for asynchronously thrown exception +dotnet_diagnostic.xUnit2019.severity = warning + +# xUnit3000: Test case classes must derive directly or indirectly from Xunit.LongLivedMarshalByRefObject +dotnet_diagnostic.xUnit3000.severity = warning + +# xUnit3001: Classes that implement Xunit.Abstractions.IXunitSerializable must have a public parameterless constructor +dotnet_diagnostic.xUnit3001.severity = warning + +# !!! +dotnet_diagnostic.xUnit3001.severity = warning + +# !!! OVERRIDES +# !!! Note: It is preferred to minimize the overrides if possible (just to follow the MS dotnet convention as much as possible) diff --git a/lang/csharp/README.md b/lang/csharp/README.md index 39dee79bb0c..9f6a79c958c 100644 --- a/lang/csharp/README.md +++ b/lang/csharp/README.md @@ -1,4 +1,4 @@ -# Avro C# [![Build Status](https://travis-ci.org/apache/avro.svg?branch=master)](https://travis-ci.org/apache/avro) [![NuGet Package](https://img.shields.io/nuget/v/Apache.Avro.svg)](https://www.nuget.org/packages/Apache.Avro) +# Avro C# [![Test C#](https://github.com/apache/avro/actions/workflows/test-lang-csharp.yml/badge.svg)](https://github.com/apache/avro/actions/workflows/test-lang-csharp.yml) [![NuGet Package](https://img.shields.io/nuget/v/Apache.Avro.svg)](https://www.nuget.org/packages/Apache.Avro) [![Avro](https://avro.apache.org/images/avro-logo.png)](http://avro.apache.org/) @@ -12,9 +12,26 @@ Install-Package Apache.Avro ## Build & Test -1. Install [.NET SDK 5.0+](https://dotnet.microsoft.com/download/dotnet-core) +1. Install [.NET SDK 8.0+](https://dotnet.microsoft.com/download/dotnet-core) 2. `dotnet test` +## Project Target Frameworks + +| Project | Published to nuget.org | Type | .NET Standard 2.0 | .NET Standard 2.1 | .NET Core 3.1 | .NET 5.0 | .NET 6.0 | .NET 7.0 | .NET 8.0 | +|:-------------------:|:--------------------------:|:----------:|:------------------:|:-----------------:|:-------------:|:---------:|:---------:|:---------:|:---------:| +| Avro.main | Apache.Avro | Library | âœ”ī¸ | âœ”ī¸ | | | | | | +| Avro.File.Snappy | Apache.Avro.File.Snappy | Library | âœ”ī¸ | âœ”ī¸ | | | | | | +| Avro.File.BZip2 | Apache.Avro.File.BZip2 | Library | âœ”ī¸ | âœ”ī¸ | | | | | | +| Avro.File.XZ | Apache.Avro.File.XZ | Library | âœ”ī¸ | âœ”ī¸ | | | | | | +| Avro.File.Zstandard | Apache.Avro.File.Zstandard | Library | âœ”ī¸ | âœ”ī¸ | | | | | | +| Avro.codegen | Apache.Avro.Tools | Exe | | | âœ”ī¸ |âœ”ī¸ |âœ”ī¸ |âœ”ī¸ |âœ”ī¸ | +| Avro.ipc | | Library | âœ”ī¸ | âœ”ī¸ | | | | | | +| Avro.ipc.test | | Unit Tests | | | âœ”ī¸ |âœ”ī¸ |âœ”ī¸ |âœ”ī¸ |âœ”ī¸ | +| Avro.msbuild | | Library | âœ”ī¸ | âœ”ī¸ | | | | | | +| Avro.perf | | Exe | | | âœ”ī¸ |âœ”ī¸ |âœ”ī¸ |âœ”ī¸ |âœ”ī¸ | +| Avro.test | | Unit Tests | | | âœ”ī¸ |âœ”ī¸ |âœ”ī¸ |âœ”ī¸ |âœ”ī¸ | +| Avro.benchmark | | Exe | | | âœ”ī¸ |âœ”ī¸ |âœ”ī¸ |âœ”ī¸ |âœ”ī¸ | + ## Dependency package version strategy 1. Use [`versions.props`](./versions.props) to specify package versions. `PackageReference` elements in `.csproj` files should use only version properties defined in [`versions.props`](./versions.props). @@ -25,3 +42,7 @@ In short, we should only update the version of the dependencies in our libraries ## Notes The [LICENSE](./LICENSE) and [NOTICE](./NOTICE) files in the lang/csharp source directory are used to build the binary distribution. The [LICENSE.txt](../../LICENSE.txt) and [NOTICE.txt](../../NOTICE.txt) information for the Avro C# source distribution is in the root directory. + +## Styling Guidelines + +Can be found in [STYLING](./STYLING.MD). diff --git a/lang/csharp/STYLING.md b/lang/csharp/STYLING.md new file mode 100644 index 00000000000..948eddaba8e --- /dev/null +++ b/lang/csharp/STYLING.md @@ -0,0 +1,1595 @@ + +# C# Styling Rules for Apache.Avro + +The following rules are currently used within the .editorconfig of the Avro solution. Any changes to this documentation should be reflected in the .editorconfig file and vice versa. + +Notes + - The examples shown are based on the current settings in .editorconfig + - :exclamation: Not defined :exclamation: means we have not set a preference + - There are cases where it is not explicitly defined in the .editorconfig, but there is a default option + - The project currently targets a framework that uses C# 7.3 + - When violating a formatting rule it may show up as an IDE0055 Fix formatting violation. + +## New line preferences + +### csharp_new_line_before_open_brace +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/formatting-rules#csharp_new_line_before_open_brace) + +This rule concerns whether an open brace { should be placed on the same line as the preceding code, or on a new line. + +**Example** +``` +void MyMethod() +{ + if (...) + { + ... + } +} +``` +--- +### csharp_new_line_before_else +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/formatting-rules#csharp_new_line_before_else) + +**Example** +``` +if (...) { + ... +} +else { + ... +} +``` +--- +### csharp_new_line_before_catch +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/formatting-rules#csharp_new_line_before_catch) + +**Example** +``` +try { + ... +} +catch (Exception e) { + ... +} +``` +--- +### csharp_new_line_before_finally +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/formatting-rules#csharp_new_line_before_finally) + +**Example** +``` +try { + ... +} +catch (Exception e) { + ... +} +finally { + ... +} +``` +--- +### csharp_new_line_before_members_in_object_initializers +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/formatting-rules#csharp_new_line_before_members_in_object_initializers) + +**Example** +``` +var z = new B() +{ + A = 3, + B = 4 +} +``` +--- +### csharp_new_line_before_members_in_anonymous_types +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/formatting-rules#csharp_new_line_before_members_in_anonymous_types) + +**Example** +``` +var z = new +{ + A = 3, + B = 4 +} +``` +--- +### csharp_new_line_between_query_expression_clauses +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/formatting-rules#csharp_new_line_between_query_expression_clauses) + +**Example** +``` +var q = from a in e + from b in e + select a * b; +``` +--- + +## Indentation preferences + +### csharp_indent_case_contents +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/formatting-rules#csharp_indent_case_contents) + +**Example** +``` +switch(c) { + case Color.Red: + Console.WriteLine("The color is red"); + break; + case Color.Blue: + Console.WriteLine("The color is blue"); + break; + default: + Console.WriteLine("The color is unknown."); + break; +} +``` +--- +### csharp_indent_switch_labels +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/formatting-rules#csharp_indent_switch_labels) + +**Example** +``` +switch(c) { + case Color.Red: + Console.WriteLine("The color is red"); + break; + case Color.Blue: + Console.WriteLine("The color is blue"); + break; + default: + Console.WriteLine("The color is unknown."); + break; +} +``` +--- +### csharp_indent_labels +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/formatting-rules#csharp_indent_labels) + +Labels are placed at one less indent to the current context + +**Example** +``` +class C +{ + private string MyMethod(...) + { + if (...) { + goto error; + } + error: + throw new Exception(...); + } +} +``` +--- +### csharp_indent_block_contents +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/formatting-rules#csharp_indent_block_contents) + +**Example** +``` +static void Hello() +{ + Console.WriteLine("Hello"); +} +``` +--- +### csharp_indent_braces +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/formatting-rules#csharp_indent_braces) + +**Example** +``` +static void Hello() +{ + Console.WriteLine("Hello"); +} +``` +--- +### csharp_indent_case_contents_when_block +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/formatting-rules#csharp_indent_case_contents_when_block) + +**Example** +``` +case 0: + { + Console.WriteLine("Hello"); + break; + } +``` +--- + +## Spacing Preferences + +### csharp_space_after_cast +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/formatting-rules#csharp_space_after_cast) + +**Example** +``` +int y = (int)x; +``` +--- +### csharp_space_after_keywords_in_control_flow_statements +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/formatting-rules#csharp_space_after_keywords_in_control_flow_statements) + +**Example** +``` +for (int i;i>, &, ^, |) precedence + +Default is always_for_clarity + +**Example** +``` +var v = a + (b * c); +``` +--- +### dotnet_style_parentheses_in_relational_binary_operators +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0047-ide0048#dotnet_style_parentheses_in_relational_binary_operators) + +Prefer parentheses to clarify relational operator (>, <, <=, >=, is, as, ==, !=) precedence + +Default is always_for_clarity + +**Example** +``` +var v = (a < b) == (c > d); +``` +--- +### dotnet_style_parentheses_in_other_binary_operators +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0047-ide0048#dotnet_style_parentheses_in_other_binary_operators) + +Prefer parentheses to clarify other binary operator (&&, ||, ??) precedence + +Default is always_for_clarity + +**Example** +``` +var v = a || (b && c); +``` +--- +### dotnet_style_parentheses_in_other_operators +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0047-ide0048#dotnet_style_parentheses_in_other_operators) + +Prefer to not have parentheses when operator precedence is obvious + +Default is never_if_unnecessary + +**Example** +``` +var v = a.b.Length; +``` +--- + +## Expression-level preferences + +### dotnet_style_object_initializer +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0017#dotnet_style_object_initializer) + +Prefer objects to be initialized using object initializers when possible + +default is true + +**Example** +``` +var c = new Customer() { Age = 21 }; +``` +--- +### csharp_style_inlined_variable_declaration +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0018#csharp_style_inlined_variable_declaration) + +Prefer out variables to be declared inline in the argument list of a method call when possible + +**Example** +``` +if (int.TryParse(value, out int i) {...} +``` +--- +### dotnet_style_collection_initializer +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0028#dotnet_style_collection_initializer) + +Prefer collections to be initialized using collection initializers when possible + +**Example** +``` +var list = new List { 1, 2, 3 }; +``` +--- +### dotnet_style_prefer_auto_properties +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0032#dotnet_style_prefer_auto_properties) + +Prefer auto properties over properties with private backing fields + +**Example** +``` +private int Age { get; } +``` +--- +### dotnet_style_explicit_tuple_names +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0033#dotnet_style_explicit_tuple_names) + +Prefer tuple names to ItemX properties + +**Example** +``` +(string name, int age) customer = GetCustomer(); +var name = customer.name; +``` +--- +### csharp_prefer_simple_default_expression +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0034#csharp_prefer_simple_default_expression) + +Prefer default over default(T) + +**Example** +``` +void DoWork(CancellationToken cancellationToken = default) { ... } +``` +--- +### dotnet_style_prefer_inferred_tuple_names +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0037#dotnet_style_prefer_inferred_tuple_names) + +Prefer inferred tuple element names + +**Example** +``` +var tuple = (age, name); +``` +--- +### dotnet_style_prefer_inferred_anonymous_type_member_names +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0037#dotnet_style_prefer_inferred_anonymous_type_member_names) + +Prefer inferred anonymous type member names + +**Example** +``` +var anon = new { age, name }; +``` +--- +### csharp_style_pattern_local_over_anonymous_function +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0039#csharp_style_pattern_local_over_anonymous_function) + +Prefer anonymous functions over local functions + +**Example** +``` +Func fibonacci = null; +fibonacci = (int n) => +{ + return n <= 1 ? 1 : fibonacci(n - 1) + fibonacci(n - 2); +}; +``` +--- +### csharp_style_deconstructed_variable_declaration +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0042#csharp_style_deconstructed_variable_declaration) + +Prefer deconstructed variable declaration + +default is true + +**Example** +``` +var (name, age) = GetPersonTuple(); +Console.WriteLine($"{name} {age}"); + +(int x, int y) = GetPointTuple(); +Console.WriteLine($"{x} {y}"); +``` +--- +### dotnet_style_prefer_conditional_expression_over_assignment +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0045#dotnet_style_prefer_conditional_expression_over_assignment) + +Prefer assignments with a ternary conditional over an if-else statement + +**Example** +``` +string s = expr ? "hello" : "world"; +``` +--- +### dotnet_style_prefer_conditional_expression_over_return +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0046#dotnet_style_prefer_conditional_expression_over_return) + +Prefer return statements to use a ternary conditional over an if-else statement + +**Example** +``` +return expr ? "hello" : "world" +``` +--- +### dotnet_style_prefer_compound_assignment +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0054-ide0074#dotnet_style_prefer_compound_assignment) + +Prefer compound assignment expressions + +default is true + +**Example** +``` +x += 1; +``` +--- +### dotnet_style_prefer_simplified_boolean_expressions +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0075#dotnet_style_prefer_simplified_boolean_expressions) + +Prefer simplified conditional expressions + +default is true + +**Example** +``` +var result1 = M1() && M2(); +var result2 = M1() || M2(); +``` +--- +### csharp_style_implicit_object_creation_when_type_is_apparent +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0090#csharp_style_implicit_object_creation_when_type_is_apparent) + +Prefer target-typed new expressions when created type is apparent + +default is true + +**Example** +``` +C c = new(); +C c2 = new() { Field = 0 }; +``` +--- + +## Null-checking Preferences + +### csharp_style_throw_expression +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0016#csharp_style_throw_expression) + +Prefer to use throw expressions instead of throw statements + +**Example** +``` +_s = s ?? throw new ArgumentNullException(nameof(s)); +``` +--- +### dotnet_style_coalesce_expression +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0029-ide0030#dotnet_style_coalesce_expression) + +Prefer null coalescing expressions to ternary operator checking + +**Example** +``` +var v = x ?? y; +``` +--- +### dotnet_style_coalesce_expression +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0031#dotnet_style_null_propagation) + +Prefer to use null-conditional operator when possible + +**Example** +``` +string v = o?.ToString(); +``` +--- +### dotnet_style_prefer_is_null_check_over_reference_equality_method +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0041#dotnet_style_prefer_is_null_check_over_reference_equality_method) + +Prefer is null check over reference equality method + +**Example** +``` +if (value is null) + return; +``` +--- +### csharp_style_conditional_delegate_call +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide1005#csharp_style_conditional_delegate_call) + +Prefer to use the conditional coalescing operator (?.) when invoking a lambda expression, instead of performing a null check + +**Example** +``` +func?.Invoke(args); +``` +--- + +## var Preferences + +### csharp_style_var_for_built_in_types +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0007-ide0008#csharp_style_var_for_built_in_types) + +Prefer explicit type over var to declare variables with built-in system types such as int + +**Example** +``` +int x = 5; +``` +--- +### csharp_style_var_when_type_is_apparent +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0007-ide0008#csharp_style_var_when_type_is_apparent) + +Prefer explicit type over var when the type is already mentioned on the right-hand side of a declaration expression + +**Example** +``` +Customer obj = new Customer(); +``` +--- +### csharp_style_var_elsewhere +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0007-ide0008#csharp_style_var_elsewhere) + +Prefer explicit type over var in all cases, unless overridden by another code style rule + +**Example** +``` +bool f = this.Init(); +``` +--- + +## Expression-bodied member Preferences + +### csharp_style_expression_bodied_constructors +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0021#csharp_style_expression_bodied_constructors) + +Prefer expression bodies for constructors + +**Example** +``` +public Customer(int age) => Age = age; +``` +--- +### csharp_style_expression_bodied_methods +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0022#csharp_style_expression_bodied_methods) + +Prefer expression bodies for methods + +**Example** +``` +public int GetAge() => this.Age; +``` +--- +### csharp_style_expression_bodied_operators +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0023-ide0024#csharp_style_expression_bodied_operators) + +Prefer expression bodies for operators + +**Example** +``` +public static ComplexNumber operator + (ComplexNumber c1, ComplexNumber c2) + => new ComplexNumber(c1.Real + c2.Real, c1.Imaginary + c2.Imaginary); +``` +--- +### csharp_style_expression_bodied_properties +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0025#csharp_style_expression_bodied_properties) + +Prefer expression bodies for properties + +**Example** +``` +public int Age => _age; +``` +--- +### csharp_style_expression_bodied_indexers +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0026#csharp_style_expression_bodied_indexers) + +Prefer expression bodies for indexers + +**Example** +``` +public T this[int i] => _values[i]; +``` +--- +### csharp_style_expression_bodied_accessors +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0027#csharp_style_expression_bodied_accessors) + +Prefer expression bodies for accessors + +**Example** +``` +public int Age { get => _age; set => _age = value; } +``` +--- +### csharp_style_expression_bodied_lambdas +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0053#csharp_style_expression_bodied_lambdas) + +Prefer expression bodies for lambdas + +**Example** +``` +Func square = x => x * x; +``` +--- +### csharp_style_expression_bodied_local_functions +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0061#csharp_style_expression_bodied_local_functions) + +Prefer expression bodies for local functions + +**Example** +``` +void M() +{ + Hello(); + void Hello() => Console.WriteLine("Hello"); +} +``` +--- + +## Pattern matching Preferences + +### csharp_style_pattern_matching_over_as_with_null_check +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0019#csharp_style_pattern_matching_over_as_with_null_check) + +Prefer pattern matching instead of as expressions with null checks to determine if something is of a particular type + +**Example** +``` +if (o is string s) {...} +``` +--- +### csharp_style_pattern_matching_over_is_with_cast_check +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0020-ide0038#csharp_style_pattern_matching_over_is_with_cast_check) + +Prefer pattern matching instead of is expressions with type casts + +**Example** +``` +if (o is int i) {...} +``` +--- +### csharp_style_prefer_switch_expression +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0066#csharp_style_prefer_switch_expression) + +Prefer to use a switch expression (introduced with C# 8.0) + +**Example** +``` +return x switch +{ + 1 => 1 * 1, + 2 => 2 * 2, + _ => 0, +}; +``` +--- +### csharp_style_prefer_not_pattern +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0083#csharp_style_prefer_not_pattern) + +Prefer to use 'not' pattern, when possible (introduced with C# 9.0) + +Default is true + +**Example** +``` +var y = o is not C c; +``` +--- + +## Code block Prerferences + +### csharp_prefer_braces +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0011#csharp_prefer_braces) + +Prefer curly braces even for one line of code + +**Example** +``` +if (test) { this.Display(); } +``` +--- +### csharp_prefer_simple_using_statement +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0063#csharp_prefer_simple_using_statement) + +Don't prefer to use a simple using statement + +**Example** +``` +using (var a = b) { } +``` +--- + +## File Header Preferences + +### file_header_template +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0073#file_header_template) + +unset or empty string - Do not require file header. + +Default is unset + +**Example** +``` +namespace N2 +{ + class C2 { } +} +``` +--- + +## Naming Rules + +### Non-private static fields are PascalCase +**Example** +``` +public static MyString = "value"; +protected static MyString = "value"; +internal static MyString = "value"; +protected_internal static MyString = "value"; +private_protected static MyString = "value";; +``` +--- +### Constants are PascalCase +**Example** +``` +public const string MyConstant = "value"; +``` +--- +### Static fields are camelCase and start with s_ +**Example** +``` +private static int s_myInt; +``` +--- +# Instance fields are camelCase and start with _ +**Example** +``` +private int _myInt; + +internal string _myString; +``` +--- +# Locals and parameters are camelCase +**Example** +``` +private static string GetText(string path, string filename) +{ + var reader = File.OpenText($"{AppendPathSeparator(path)}{filename}"); + var text = reader.ReadToEnd(); + return text; + + string AppendPathSeparator(string filepath) + { + return filepath.EndsWith(@"\") ? filepath : filepath + @"\"; + } +} +``` +--- +# Local functions are PascalCase +**Example** +``` +private static string GetText(string path, string filename) +{ + var reader = File.OpenText($"{AppendPathSeparator(path)}{filename}"); + var text = reader.ReadToEnd(); + return text; + + string AppendPathSeparator(string filepath) + { + return filepath.EndsWith(@"\") ? filepath : filepath + @"\"; + } +} +``` +--- +# By default, name items with PascalCase +**Example** +``` +public void MyMethod() { }; +``` +--- + +## Formatting Rules + +### dotnet_sort_system_directives_first +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/formatting-rules#dotnet_sort_system_directives_first) + +Sort System.* using directives alphabetically, and place them before other using directives. + +**Example** +``` +using System.Collections.Generic; +using System.Threading.Tasks; +using Avro; +``` +--- +### dotnet_separate_import_directive_groups +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/formatting-rules#dotnet_separate_import_directive_groups) + +:exclamation: Not defined :exclamation: + +**Example** +``` +// dotnet_separate_import_directive_groups = true +using System.Collections.Generic; +using System.Threading.Tasks; + +using Avro; + +// dotnet_separate_import_directive_groups = false +using System.Collections.Generic; +using System.Threading.Tasks; +using Avro; +``` +--- +### dotnet_style_namespace_match_folder +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/formatting-rules#dotnet_style_namespace_match_folder) + +:exclamation: Not defined :exclamation: + +**Example** +``` +// dotnet_style_namespace_match_folder = true +// file path: Example/Convention/C.cs +using System; + +namespace Example.Convention +{ + class C + { + } +} + +// dotnet_style_namespace_match_folder = false +// file path: Example/Convention/C.cs +using System; + +namespace Example +{ + class C + { + } +} +``` +--- + +## Unnecessary Code Rules + +### Simplify name (IDE0001) +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0001) +**Example** +``` +using System.IO; +class C +{ + // IDE0001: 'System.IO.FileInfo' can be simplified to 'FileInfo' + System.IO.FileInfo file; + + // Fixed code + FileInfo file; +} +``` +--- +### Simplify member access (IDE0002) +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0002) +**Example** +``` +static void M1() { } +static void M2() +{ + // IDE0002: 'C.M1' can be simplified to 'M1' + C.M1(); + + // Fixed code + M1(); +} +``` +--- +### Remove unnecessary cast (IDE0004) +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0004) +**Example** +``` +// Code with violations +int v = (int)0; + +// Fixed code +int v = 0; +``` +--- +### Remove unnecessary import (IDE0005) +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0005) +**Example** +``` +// Code with violations +using System; +using System.IO; // IDE0005: Using directive is unnecessary +class C +{ + public static void M() + { + Console.WriteLine("Hello"); + } +} + +// Fixed code +using System; +class C +{ + public static void M() + { + Console.WriteLine("Hello"); + } +} +``` +--- +### Remove unreachable code (IDE0035) +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0035) +**Example** +``` +// Code with violations +void M() +{ + throw new System.Exception(); + + // IDE0035: Remove unreachable code + int v = 0; +} + +// Fixed code +void M() +{ + throw new System.Exception(); +} +``` +--- +### Remove unused private member (IDE0051) +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0051) +**Example** +``` +// Code with violations +class C +{ + // IDE0051: Remove unused private members + private readonly int _fieldPrivate; + private int PropertyPrivate => 1; + private int GetNumPrivate() => 1; + + // No IDE0051 + internal readonly int FieldInternal; + private readonly int _fieldPrivateUsed; + public int PropertyPublic => _fieldPrivateUsed; + private int GetNumPrivateUsed() => 1; + internal int GetNumInternal() => GetNumPrivateUsed(); + public int GetNumPublic() => GetNumPrivateUsed(); +} + +// Fixed code +class C +{ + // No IDE0051 + internal readonly int FieldInternal; + private readonly int _fieldPrivateUsed; + public int PropertyPublic => _fieldPrivateUsed; + private int GetNumPrivateUsed() => 1; + internal int GetNumInternal() => GetNumPrivateUsed(); + public int GetNumPublic() => GetNumPrivateUsed(); +} +``` +--- +### Remove unread private member (IDE0052) +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0052) +**Example** +``` +class C +{ + // IDE0052: Remove unread private members + private readonly int _field1; + private int _field2; + private int Property { get; set; } + + public C() + { + _field1 = 0; + } + + public void SetMethod() + { + _field2 = 0; + Property = 0; + } +} + +// Fixed code +class C +{ + public C() + { + } + + public void SetMethod() + { + } +} +``` +--- +### csharp_style_unused_value_expression_statement_preference +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0058#csharp_style_unused_value_expression_statement_preference) + +Prefer to assign an unused expression to a discard + +Default is discard_variable + +**Example** +``` +_ = System.Convert.ToInt32("35"); +``` +--- +### csharp_style_unused_value_assignment_preference +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0059#csharp_style_unused_value_assignment_preference) + +Prefer to use a discard when assigning a value that's not used + +Default is discard_variable + +**Example** +``` +int GetCount(Dictionary wordCount, string searchWord) +{ + _ = wordCount.TryGetValue(searchWord, out var count); + return count; +} +``` +--- +### dotnet_code_quality_unused_parameters +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0060#dotnet_code_quality_unused_parameters) + +Flag methods with any accessibility that contain unused parameters + +Default is all + +**Example** +``` +public int GetNum1(int unusedParam) { return 1; } +internal int GetNum2(int unusedParam) { return 1; } +private int GetNum3(int unusedParam) { return 1; } +``` +--- +### dotnet_remove_unnecessary_suppression_exclusions +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0079#dotnet_remove_unnecessary_suppression_exclusions) + + enables the rule for all rule IDs and rule categories + +Default is none + +**Example** +``` +using System.Diagnostics.CodeAnalysis; + +class C1 +{ + // 'dotnet_remove_unnecessary_suppression_exclusions = IDE0051' + + // Unnecessary pragma suppression, but not flagged by IDE0079 +#pragma warning disable IDE0051 // IDE0051: Remove unused member + private int UsedMethod() => 0; +#pragma warning restore IDE0051 + + public int PublicMethod() => UsedMethod(); +} +``` +--- +### Remove unnecessary suppression operator (IDE0080) +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0080) +**Example** +``` +// Code with violations +if (o !is string) { } + +// Potential fixes: +// 1. +if (o is not string) { } + +// 2. +if (!(o is string)) { } + +// 3. +if (o is string) { } +``` +--- +### Remove unnecessary equality operator (IDE0100) +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0100) +**Example** +``` +// Code with violations +if (x == true) { } +if (M() != false) { } + +// Fixed code +if (x) { } +if (M()) { } +``` +--- +### Remove unnecessary discard (IDE0110) +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0110) +**Example** +``` +// Code with violations +switch (o) +{ + case int _: + Console.WriteLine("Value was an int"); + break; + case string _: + Console.WriteLine("Value was a string"); + break; +} + +// Fixed code +switch (o) +{ + case int: + Console.WriteLine("Value was an int"); + break; + case string: + Console.WriteLine("Value was a string"); + break; +} +``` +--- + +## Miscellaneous Rules + +### Remove invalid global 'SuppressMessageAttribute' (IDE0076) +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0076) +**Example** +``` +// IDE0076: Invalid target '~F:N.C.F2' - no matching field named 'F2' +[assembly: System.Diagnostics.CodeAnalysis.SuppressMessage("Category", "Id: Title", Scope = "member", Target = "~F:N.C.F2")] +// IDE0076: Invalid scope 'property' +[assembly: System.Diagnostics.CodeAnalysis.SuppressMessage("Category", "Id: Title", Scope = "property", Target = "~P:N.C.P")] + +// Fixed code +[assembly: System.Diagnostics.CodeAnalysis.SuppressMessage("Category", "Id: Title", Scope = "member", Target = "~F:N.C.F")] +[assembly: System.Diagnostics.CodeAnalysis.SuppressMessage("Category", "Id: Title", Scope = "member", Target = "~P:N.C.P")] + +namespace N +{ + class C + { + public int F; + public int P { get; } + } +} +``` +--- +### Avoid legacy format target in global 'SuppressMessageAttribute' (IDE0077) +[Reference](https://docs.microsoft.com/en-us/dotnet/fundamentals/code-analysis/style-rules/ide0077) +**Example** +``` +// IDE0077: Legacy format target 'N.C.#F' +[assembly: System.Diagnostics.CodeAnalysis.SuppressMessage("Category", "Id: Title", Scope = "member", Target = "N.C.#F")] + +// Fixed code +[assembly: System.Diagnostics.CodeAnalysis.SuppressMessage("Category", "Id: Title", Scope = "member", Target = "~F:N.C.F")] + +namespace N +{ + class C + { + public int F; + } +} +``` +--- diff --git a/lang/csharp/build.sh b/lang/csharp/build.sh index fefbe28e9ba..2efd4463174 100755 --- a/lang/csharp/build.sh +++ b/lang/csharp/build.sh @@ -35,14 +35,14 @@ do test) dotnet build --configuration Release Avro.sln - # AVRO-2442: Explictly set LANG to work around ICU bug in `dotnet test` - LANG=en_US.UTF-8 dotnet test --configuration Release --no-build \ + # AVRO-2442: Explicitly set LANG to work around ICU bug in `dotnet test` + LANG=en_US.UTF-8 dotnet test --configuration Release --no-build \ --filter "TestCategory!=Interop" Avro.sln ;; perf) pushd ./src/apache/perf/ - dotnet run --configuration Release --framework net5.0 + dotnet run --configuration Release --framework net8.0 ;; dist) @@ -50,18 +50,25 @@ do dotnet pack --configuration Release Avro.sln # add the binary LICENSE and NOTICE to the tarball - mkdir build/ + mkdir -p build/ cp LICENSE NOTICE build/ # add binaries to the tarball - mkdir build/main/ + mkdir -p build/main/ cp -R src/apache/main/bin/Release/* build/main/ - mkdir build/codegen/ + # add codec binaries to the tarball + for codec in Avro.File.Snappy Avro.File.BZip2 Avro.File.XZ Avro.File.Zstandard + do + mkdir -p build/codec/$codec/ + cp -R src/apache/codec/$codec/bin/Release/* build/codec/$codec/ + done + # add codegen binaries to the tarball + mkdir -p build/codegen/ cp -R src/apache/codegen/bin/Release/* build/codegen/ # build the tarball mkdir -p ${ROOT}/dist/csharp - (cd build; tar czf ${ROOT}/../dist/csharp/avro-csharp-${VERSION}.tar.gz main codegen LICENSE NOTICE) + (cd build; tar czf ${ROOT}/../dist/csharp/avro-csharp-${VERSION}.tar.gz main codegen codec LICENSE NOTICE) # build documentation doxygen Avro.dox @@ -70,15 +77,16 @@ do ;; interop-data-generate) - dotnet run --project src/apache/test/Avro.test.csproj --framework net5.0 ../../share/test/schemas/interop.avsc ../../build/interop/data + dotnet run --project src/apache/test/Avro.test.csproj --framework net8.0 ../../share/test/schemas/interop.avsc ../../build/interop/data ;; interop-data-test) - LANG=en_US.UTF-8 dotnet test --filter "TestCategory=Interop" --verbosity normal + LANG=en_US.UTF-8 dotnet test --filter "TestCategory=Interop" --logger "console;verbosity=normal;noprogress=true" src/apache/test/Avro.test.csproj ;; clean) - rm -rf src/apache/{main,test,codegen,ipc,msbuild,perf}/{obj,bin} + rm -rf src/apache/{main,test,codegen,ipc,msbuild,perf,benchmark}/{obj,bin} + rm -rf src/apache/codec/Avro.File.{BZip2,Snappy,XZ,ZStandard}{,.Test}/{obj,bin} rm -rf build rm -f TestResult.xml ;; diff --git a/lang/csharp/common.props b/lang/csharp/common.props index 3bae5bff4a8..569102df587 100644 --- a/lang/csharp/common.props +++ b/lang/csharp/common.props @@ -15,6 +15,8 @@ limitations under the License. --> + + $(MSBuildThisFileDirectory)/../../share/VERSION.txt @@ -33,11 +35,21 @@ $(MajorVersion).$(MinorVersion).$(BuildNumber).0 + + + netcoreapp3.1;net5.0;net6.0;net7.0;net8.0 + + netstandard2.0;netstandard2.1 + + $(DefaultExeTargetFrameworks) + + Copyright Š 2019 The Apache Software Foundation. - avro-logo.png + logo.png LICENSE + README.md https://avro.apache.org/ Avro;Apache;Serialization;Binary;Json;Schema https://github.com/apache/avro.git @@ -45,7 +57,33 @@ - + + + + + + + false + true + + + + false + true + + true + false + + + + + + + + + + + diff --git a/lang/csharp/src/apache/benchmark/.gitignore b/lang/csharp/src/apache/benchmark/.gitignore new file mode 100644 index 00000000000..43e05771fa4 --- /dev/null +++ b/lang/csharp/src/apache/benchmark/.gitignore @@ -0,0 +1 @@ +BenchmarkDotNet.Artifacts/ \ No newline at end of file diff --git a/lang/csharp/src/apache/benchmark/Avro.benchmark.csproj b/lang/csharp/src/apache/benchmark/Avro.benchmark.csproj new file mode 100644 index 00000000000..b944de3c2d4 --- /dev/null +++ b/lang/csharp/src/apache/benchmark/Avro.benchmark.csproj @@ -0,0 +1,52 @@ + + + + + + $(DefaultExeTargetFrameworks) + Exe + + + AnyCPU + pdbonly + true + true + true + Release + false + + + + + + $(NoWarn);CS8981 + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/lang/csharp/src/apache/benchmark/Benchmarks.cs b/lang/csharp/src/apache/benchmark/Benchmarks.cs new file mode 100644 index 00000000000..4c7ec73be88 --- /dev/null +++ b/lang/csharp/src/apache/benchmark/Benchmarks.cs @@ -0,0 +1,254 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +using System.Collections.Generic; +using System.IO; +using BenchmarkDotNet.Attributes; +using Avro.Generic; +using Avro.IO; +using Avro.Specific; + +namespace Avro.Benchmark +{ + public class Benchmarks + { + private const int _numberOfRecordsInAvro = 100; + + private readonly string _schemaStrSmall; + private readonly string _schemaStrBig; + + private readonly RecordSchema _schemaSmall; + private readonly RecordSchema _schemaBig; + private readonly RecordSchema _schemaAddress; + + private readonly byte[] _avroGenericSmall; + private readonly byte[] _avroGenericBig; + private readonly byte[] _avroSpecificSmall; + private readonly byte[] _avroSpecificBig; + + public Benchmarks() + { + _schemaStrSmall = System.IO.File.ReadAllText("schema/small.avsc"); + _schemaStrBig = System.IO.File.ReadAllText("schema/big.avsc"); + + _schemaSmall = (RecordSchema)Schema.Parse(_schemaStrSmall); + _schemaBig = (RecordSchema)Schema.Parse(_schemaStrBig); + _schemaAddress = (RecordSchema)_schemaBig["address"].Schema; + + // Create avro for reading benchmarking + _avroGenericSmall = GenericRecordsToAvro(CreateGenericRecordSmall()); + _avroGenericBig = GenericRecordsToAvro(CreateGenericRecordBig()); + + _avroSpecificSmall = SpecificRecordsToAvro(CreateSpecificRecordSmall()); + _avroSpecificBig = SpecificRecordsToAvro(CreateSpecificRecordBig()); + } + + private byte[] GenericRecordsToAvro(GenericRecord record) + { + using (MemoryStream outputStream = new MemoryStream()) + { + GenericDatumWriter writer = new GenericDatumWriter(record.Schema); + BinaryEncoder encoder = new BinaryEncoder(outputStream); + + for (int i = 0; i < _numberOfRecordsInAvro; i++) + { + writer.Write(record, encoder); + } + + encoder.Flush(); + + return outputStream.ToArray(); + } + } + + private IList AvroToGenericRecordsToAvro(byte[] avro, RecordSchema schema) + { + using (MemoryStream inputStream = new MemoryStream(avro)) + { + GenericDatumReader reader = new GenericDatumReader(schema, schema); + BinaryDecoder decoder = new BinaryDecoder(inputStream); + List records = new List(); + + for (int i = 0; i < _numberOfRecordsInAvro; i++) + { + GenericRecord record = reader.Read(null, decoder); + if (record == null) + break; + records.Add(record); + } + + return records; + } + } + + private byte[] SpecificRecordsToAvro(T record) where T : ISpecificRecord + { + using (MemoryStream outputStream = new MemoryStream()) + { + SpecificDatumWriter writer = new SpecificDatumWriter(record.Schema); + BinaryEncoder encoder = new BinaryEncoder(outputStream); + + for (int i = 0; i < _numberOfRecordsInAvro; i++) + { + writer.Write(record, encoder); + } + + encoder.Flush(); + + return outputStream.ToArray(); + } + } + + private IList AvroToSpecificRecords(byte[] avro, RecordSchema schema) where T : ISpecificRecord + { + using (MemoryStream inputStream = new MemoryStream(avro)) + { + SpecificDatumReader reader = new SpecificDatumReader(schema, schema); + BinaryDecoder decoder = new BinaryDecoder(inputStream); + List records = new List(); + + for (int i = 0; i < _numberOfRecordsInAvro; i++) + { + T record = reader.Read(default, decoder); ; + if (record == null) + break; + records.Add(record); + } + + return records; + } + } + + [Benchmark] + public void ParseSchemaSmall() + { + Schema.Parse(_schemaStrSmall); + } + + [Benchmark] + public void ParseSchemaBig() + { + Schema.Parse(_schemaStrBig); + } + + [Benchmark] + public GenericRecord CreateGenericRecordSmall() + { + GenericRecord record = new GenericRecord(_schemaSmall); + record.Add("field", "foo"); + + return record; + } + + [Benchmark] + public GenericRecord CreateGenericRecordBig() + { + GenericRecord address = new GenericRecord(_schemaAddress); + address.Add("street", "street"); + address.Add("city", "city"); + address.Add("state_prov", "state_prov"); + address.Add("country", "country"); + address.Add("zip", "zip"); + + GenericRecord record = new GenericRecord(_schemaBig); + record.Add("username", "username"); + record.Add("age", 10); + record.Add("phone", "000000000"); + record.Add("housenum", "0000"); + record.Add("address", address); + + return record; + } + + [Benchmark] + public ISpecificRecord CreateSpecificRecordSmall() + { + return new org.apache.avro.benchmark.small.test() + { + field = "foo" + }; + } + + [Benchmark] + public ISpecificRecord CreateSpecificRecordBig() + { + return new org.apache.avro.benchmark.big.userInfo() + { + username = "username", + age = 10, + phone = "000000000", + housenum = "0000", + address = new org.apache.avro.benchmark.big.mailing_address() + { + street = "street", + city = "city", + state_prov = "state_prov", + country = "country", + zip = "zip" + } + }; + } + + [Benchmark] + public void GenericRecordsToAvroSmall() + { + GenericRecordsToAvro(CreateGenericRecordSmall()); + } + + [Benchmark] + public void GenericRecordsToAvroBig() + { + GenericRecordsToAvro(CreateGenericRecordBig()); + } + + [Benchmark] + public void AvroToGenericRecordsSmall() + { + AvroToGenericRecordsToAvro(_avroGenericSmall, _schemaSmall); + } + + [Benchmark] + public void AvroToGenericRecordsBig() + { + AvroToGenericRecordsToAvro(_avroGenericBig, _schemaBig); + } + + [Benchmark] + public void SpecificRecordsToAvroSmall() + { + SpecificRecordsToAvro(CreateSpecificRecordSmall()); + } + + [Benchmark] + public void SpecificRecordsToAvroBig() + { + SpecificRecordsToAvro(CreateSpecificRecordBig()); + } + + [Benchmark] + public void AvroToSpecificRecordsSmall() + { + AvroToSpecificRecords(_avroSpecificSmall, _schemaSmall); + } + + [Benchmark] + public void AvroToSpecificRecordsBig() + { + AvroToSpecificRecords(_avroSpecificBig, _schemaBig); + } + } +} diff --git a/lang/csharp/src/apache/benchmark/Program.cs b/lang/csharp/src/apache/benchmark/Program.cs new file mode 100644 index 00000000000..4359f378d85 --- /dev/null +++ b/lang/csharp/src/apache/benchmark/Program.cs @@ -0,0 +1,31 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +using BenchmarkDotNet.Running; + +namespace Avro.Benchmark +{ + public class Program + { + // dotnet run -c Release -f net8.0 + // dotnet run -c Release -f net8.0 --runtimes netcoreapp3.1 net5.0 net6.0 net7.0 net8.0 + public static void Main(string[] args) + { + BenchmarkSwitcher.FromAssembly(typeof(Program).Assembly).Run(args); + } + } +} diff --git a/lang/csharp/src/apache/benchmark/org/apache/avro/benchmark/big/mailing_address.cs b/lang/csharp/src/apache/benchmark/org/apache/avro/benchmark/big/mailing_address.cs new file mode 100644 index 00000000000..10f003deb54 --- /dev/null +++ b/lang/csharp/src/apache/benchmark/org/apache/avro/benchmark/big/mailing_address.cs @@ -0,0 +1,128 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +// ------------------------------------------------------------------------------ +// +// Generated by avrogen, version 1.11.0.0 +// Changes to this file may cause incorrect behavior and will be lost if code +// is regenerated +// +// ------------------------------------------------------------------------------ +namespace org.apache.avro.benchmark.big +{ + using System; + using System.Collections.Generic; + using System.Text; + using Avro; + using Avro.Specific; + + public partial class mailing_address : ISpecificRecord + { + public static Schema _SCHEMA = Avro.Schema.Parse(@"{""type"":""record"",""name"":""mailing_address"",""namespace"":""org.apache.avro.benchmark.big"",""fields"":[{""name"":""street"",""default"":""NONE"",""type"":""string""},{""name"":""city"",""default"":""NONE"",""type"":""string""},{""name"":""state_prov"",""default"":""NONE"",""type"":""string""},{""name"":""country"",""default"":""NONE"",""type"":""string""},{""name"":""zip"",""default"":""NONE"",""type"":""string""}]}"); + private string _street; + private string _city; + private string _state_prov; + private string _country; + private string _zip; + public virtual Schema Schema + { + get + { + return mailing_address._SCHEMA; + } + } + public string street + { + get + { + return this._street; + } + set + { + this._street = value; + } + } + public string city + { + get + { + return this._city; + } + set + { + this._city = value; + } + } + public string state_prov + { + get + { + return this._state_prov; + } + set + { + this._state_prov = value; + } + } + public string country + { + get + { + return this._country; + } + set + { + this._country = value; + } + } + public string zip + { + get + { + return this._zip; + } + set + { + this._zip = value; + } + } + public virtual object Get(int fieldPos) + { + switch (fieldPos) + { + case 0: return this.street; + case 1: return this.city; + case 2: return this.state_prov; + case 3: return this.country; + case 4: return this.zip; + default: throw new AvroRuntimeException("Bad index " + fieldPos + " in Get()"); + }; + } + public virtual void Put(int fieldPos, object fieldValue) + { + switch (fieldPos) + { + case 0: this.street = (System.String)fieldValue; break; + case 1: this.city = (System.String)fieldValue; break; + case 2: this.state_prov = (System.String)fieldValue; break; + case 3: this.country = (System.String)fieldValue; break; + case 4: this.zip = (System.String)fieldValue; break; + default: throw new AvroRuntimeException("Bad index " + fieldPos + " in Put()"); + }; + } + } +} diff --git a/lang/csharp/src/apache/benchmark/org/apache/avro/benchmark/big/userInfo.cs b/lang/csharp/src/apache/benchmark/org/apache/avro/benchmark/big/userInfo.cs new file mode 100644 index 00000000000..4ddbe0ac071 --- /dev/null +++ b/lang/csharp/src/apache/benchmark/org/apache/avro/benchmark/big/userInfo.cs @@ -0,0 +1,128 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +// ------------------------------------------------------------------------------ +// +// Generated by avrogen, version 1.11.0.0 +// Changes to this file may cause incorrect behavior and will be lost if code +// is regenerated +// +// ------------------------------------------------------------------------------ +namespace org.apache.avro.benchmark.big +{ + using System; + using System.Collections.Generic; + using System.Text; + using Avro; + using Avro.Specific; + + public partial class userInfo : ISpecificRecord + { + public static Schema _SCHEMA = Avro.Schema.Parse(@"{""type"":""record"",""name"":""userInfo"",""namespace"":""org.apache.avro.benchmark.big"",""fields"":[{""name"":""username"",""default"":""NONE"",""type"":""string""},{""name"":""age"",""default"":-1,""type"":""int""},{""name"":""phone"",""default"":""NONE"",""type"":""string""},{""name"":""housenum"",""default"":""NONE"",""type"":""string""},{""name"":""address"",""default"":{},""type"":{""type"":""record"",""name"":""mailing_address"",""namespace"":""org.apache.avro.benchmark.big"",""fields"":[{""name"":""street"",""default"":""NONE"",""type"":""string""},{""name"":""city"",""default"":""NONE"",""type"":""string""},{""name"":""state_prov"",""default"":""NONE"",""type"":""string""},{""name"":""country"",""default"":""NONE"",""type"":""string""},{""name"":""zip"",""default"":""NONE"",""type"":""string""}]}}]}"); + private string _username; + private int _age; + private string _phone; + private string _housenum; + private org.apache.avro.benchmark.big.mailing_address _address; + public virtual Schema Schema + { + get + { + return userInfo._SCHEMA; + } + } + public string username + { + get + { + return this._username; + } + set + { + this._username = value; + } + } + public int age + { + get + { + return this._age; + } + set + { + this._age = value; + } + } + public string phone + { + get + { + return this._phone; + } + set + { + this._phone = value; + } + } + public string housenum + { + get + { + return this._housenum; + } + set + { + this._housenum = value; + } + } + public org.apache.avro.benchmark.big.mailing_address address + { + get + { + return this._address; + } + set + { + this._address = value; + } + } + public virtual object Get(int fieldPos) + { + switch (fieldPos) + { + case 0: return this.username; + case 1: return this.age; + case 2: return this.phone; + case 3: return this.housenum; + case 4: return this.address; + default: throw new AvroRuntimeException("Bad index " + fieldPos + " in Get()"); + }; + } + public virtual void Put(int fieldPos, object fieldValue) + { + switch (fieldPos) + { + case 0: this.username = (System.String)fieldValue; break; + case 1: this.age = (System.Int32)fieldValue; break; + case 2: this.phone = (System.String)fieldValue; break; + case 3: this.housenum = (System.String)fieldValue; break; + case 4: this.address = (org.apache.avro.benchmark.big.mailing_address)fieldValue; break; + default: throw new AvroRuntimeException("Bad index " + fieldPos + " in Put()"); + }; + } + } +} diff --git a/lang/csharp/src/apache/benchmark/org/apache/avro/benchmark/small/test.cs b/lang/csharp/src/apache/benchmark/org/apache/avro/benchmark/small/test.cs new file mode 100644 index 00000000000..b0553d1fe16 --- /dev/null +++ b/lang/csharp/src/apache/benchmark/org/apache/avro/benchmark/small/test.cs @@ -0,0 +1,73 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +// ------------------------------------------------------------------------------ +// +// Generated by avrogen, version 1.11.0.0 +// Changes to this file may cause incorrect behavior and will be lost if code +// is regenerated +// +// ------------------------------------------------------------------------------ +namespace org.apache.avro.benchmark.small +{ + using System; + using System.Collections.Generic; + using System.Text; + using Avro; + using Avro.Specific; + + public partial class test : ISpecificRecord + { + public static Schema _SCHEMA = Avro.Schema.Parse("{\"type\":\"record\",\"name\":\"test\",\"namespace\":\"org.apache.avro.benchmark.small\",\"fie" + + "lds\":[{\"name\":\"field\",\"type\":\"string\"}]}"); + private string _field; + public virtual Schema Schema + { + get + { + return test._SCHEMA; + } + } + public string field + { + get + { + return this._field; + } + set + { + this._field = value; + } + } + public virtual object Get(int fieldPos) + { + switch (fieldPos) + { + case 0: return this.field; + default: throw new AvroRuntimeException("Bad index " + fieldPos + " in Get()"); + }; + } + public virtual void Put(int fieldPos, object fieldValue) + { + switch (fieldPos) + { + case 0: this.field = (System.String)fieldValue; break; + default: throw new AvroRuntimeException("Bad index " + fieldPos + " in Put()"); + }; + } + } +} diff --git a/lang/csharp/src/apache/benchmark/schema/big.avsc b/lang/csharp/src/apache/benchmark/schema/big.avsc new file mode 100644 index 00000000000..d9075306a72 --- /dev/null +++ b/lang/csharp/src/apache/benchmark/schema/big.avsc @@ -0,0 +1,62 @@ +{ + "namespace": "org.apache.avro.benchmark.big", + "type": "record", + "name": "userInfo", + "fields": [ + { + "default": "NONE", + "type": "string", + "name": "username" + }, + { + "default": -1, + "type": "int", + "name": "age" + }, + { + "default": "NONE", + "type": "string", + "name": "phone" + }, + { + "default": "NONE", + "type": "string", + "name": "housenum" + }, + { + "default": {}, + "type": { + "fields": [ + { + "default": "NONE", + "type": "string", + "name": "street" + }, + { + "default": "NONE", + "type": "string", + "name": "city" + }, + { + "default": "NONE", + "type": "string", + "name": "state_prov" + }, + { + "default": "NONE", + "type": "string", + "name": "country" + }, + { + "default": "NONE", + "type": "string", + "name": "zip" + } + ], + "type": "record", + "name": "mailing_address" + }, + "name": "address" + } + ] +} \ No newline at end of file diff --git a/lang/csharp/src/apache/benchmark/schema/small.avsc b/lang/csharp/src/apache/benchmark/schema/small.avsc new file mode 100644 index 00000000000..ee320705c48 --- /dev/null +++ b/lang/csharp/src/apache/benchmark/schema/small.avsc @@ -0,0 +1,13 @@ +{ + "namespace": "org.apache.avro.benchmark.small", + "type": "record", + "name": "test", + "fields": [ + { + "type": { + "type": "string" + }, + "name": "field" + } + ] +} \ No newline at end of file diff --git a/lang/csharp/src/apache/codec/Avro.File.BZip2.Test/Avro.File.BZip2.Test.csproj b/lang/csharp/src/apache/codec/Avro.File.BZip2.Test/Avro.File.BZip2.Test.csproj new file mode 100644 index 00000000000..ace1db23c2c --- /dev/null +++ b/lang/csharp/src/apache/codec/Avro.File.BZip2.Test/Avro.File.BZip2.Test.csproj @@ -0,0 +1,42 @@ + + + + + + + $(DefaultUnitTestTargetFrameworks) + false + + + + true + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/lang/csharp/src/apache/codec/Avro.File.BZip2.Test/BZip2Tests.cs b/lang/csharp/src/apache/codec/Avro.File.BZip2.Test/BZip2Tests.cs new file mode 100644 index 00000000000..821cb4b7233 --- /dev/null +++ b/lang/csharp/src/apache/codec/Avro.File.BZip2.Test/BZip2Tests.cs @@ -0,0 +1,97 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +using System.IO; +using System.Linq; +using NUnit.Framework; + +namespace Avro.File.BZip2.Test +{ + public class Tests + { + private static readonly int[] _testLengths = new int[] { 0, 1000, 64 * 1024, 100000 }; + + [Test, Combinatorial] + public void CompressDecompress([ValueSource(nameof(_testLengths))] int length, [Values] BZip2Level level) + { + byte[] data = Enumerable.Range(0, length).Select(x => (byte)x).ToArray(); + + BZip2Codec codec = new BZip2Codec(level); + + byte[] compressed = codec.Compress(data); + byte[] uncompressed = codec.Decompress(compressed, compressed.Length); + + Assert.IsTrue(Enumerable.SequenceEqual(data, uncompressed)); + } + + [Test, Combinatorial] + public void CompressDecompressStream([ValueSource(nameof(_testLengths))] int length, [Values] BZip2Level level) + { + byte[] data = Enumerable.Range(0, length).Select(x => (byte)x).ToArray(); + + BZip2Codec codec = new BZip2Codec(level); + + using (MemoryStream inputStream = new MemoryStream(data)) + using (MemoryStream outputStream = new MemoryStream()) + { + codec.Compress(inputStream, outputStream); + + byte[] compressed = outputStream.ToArray(); + byte[] uncompressed = codec.Decompress(compressed, compressed.Length); + + Assert.IsTrue(Enumerable.SequenceEqual(data, uncompressed)); + } + } + + [Test] + public void ToStringAndName([Values] BZip2Level level) + { + BZip2Codec codec = new BZip2Codec(level); + + Assert.AreEqual("bzip2", codec.GetName()); + Assert.AreEqual($"bzip2-{(int)level}", codec.ToString()); + } + + [Test] + public void DefaultLevel() + { + BZip2Codec codec = new BZip2Codec(); + + Assert.AreEqual(BZip2Level.Default, codec.Level); + } + + [Test] + public void Equal([Values] BZip2Level level) + { + BZip2Codec codec1 = new BZip2Codec(level); + BZip2Codec codec2 = new BZip2Codec(level); + + Assert.IsTrue(codec1.Equals(codec1)); + Assert.IsTrue(codec2.Equals(codec2)); + Assert.IsTrue(codec1.Equals(codec2)); + Assert.IsTrue(codec2.Equals(codec1)); + } + + [Test] + public void HashCode([Values] BZip2Level level) + { + BZip2Codec codec = new BZip2Codec(level); + + Assert.AreNotEqual(0, codec.GetHashCode()); + } + } +} diff --git a/lang/csharp/src/apache/codec/Avro.File.BZip2/Avro.File.BZip2.csproj b/lang/csharp/src/apache/codec/Avro.File.BZip2/Avro.File.BZip2.csproj new file mode 100644 index 00000000000..8dac7c9f1ed --- /dev/null +++ b/lang/csharp/src/apache/codec/Avro.File.BZip2/Avro.File.BZip2.csproj @@ -0,0 +1,47 @@ + + + + + + + $(DefaultLibraryTargetFrameworks) + Avro.File.BZip2 + true + ../../../../Avro.snk + + + + + Apache.Avro.File.BZip2 + BZip2 compression library for Apache.Avro + + + + true + + + + + + + + + + + + diff --git a/lang/csharp/src/apache/codec/Avro.File.BZip2/BZip2.cs b/lang/csharp/src/apache/codec/Avro.File.BZip2/BZip2.cs new file mode 100644 index 00000000000..354c39cc89e --- /dev/null +++ b/lang/csharp/src/apache/codec/Avro.File.BZip2/BZip2.cs @@ -0,0 +1,110 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +using System.IO; + +namespace Avro.File.BZip2 +{ + ///

+ /// BZip2 Compression level + /// + public enum BZip2Level + { + Default = 9, + Level1 = 1, + Level2 = 2, + Level3 = 3, + Level4 = 4, + Level5 = 5, + Level6 = 6, + Level7 = 7, + Level8 = 8, + Level9 = 9 + } + + /// + /// Implements BZip2 compression and decompression. + /// + public class BZip2Codec : Codec + { + public BZip2Level Level {get; private set;} + + public BZip2Codec() + : this(BZip2Level.Default) + { + } + + public BZip2Codec(BZip2Level level) + { + Level = level; + } + + /// + public override byte[] Compress(byte[] uncompressedData) + { + using (MemoryStream inputStream = new MemoryStream(uncompressedData)) + using (MemoryStream outputStream = new MemoryStream()) + { + Compress(inputStream, outputStream); + return outputStream.ToArray(); + } + } + + /// + public override void Compress(MemoryStream inputStream, MemoryStream outputStream) + { + inputStream.Position = 0; + outputStream.SetLength(0); + ICSharpCode.SharpZipLib.BZip2.BZip2.Compress(inputStream, outputStream, false, (int)Level); + } + + /// + public override byte[] Decompress(byte[] compressedData, int blockLength) + { + using (MemoryStream inputStream = new MemoryStream(compressedData, 0, blockLength)) + using (MemoryStream outputStream = new MemoryStream()) + { + ICSharpCode.SharpZipLib.BZip2.BZip2.Decompress(inputStream, outputStream, false); + return outputStream.ToArray(); + } + } + + /// + public override string GetName() + { + return DataFileConstants.BZip2Codec; + } + + /// + public override bool Equals(object other) + { + return this == other || GetType().Name == other.GetType().Name; + } + + /// + public override int GetHashCode() + { + return GetName().GetHashCode(); + } + + /// + public override string ToString() + { + return $"{GetName()}-{(int)Level}"; + } + } +} diff --git a/lang/csharp/src/apache/codec/Avro.File.Snappy.Test/Avro.File.Snappy.Test.csproj b/lang/csharp/src/apache/codec/Avro.File.Snappy.Test/Avro.File.Snappy.Test.csproj new file mode 100644 index 00000000000..ab325b0fa64 --- /dev/null +++ b/lang/csharp/src/apache/codec/Avro.File.Snappy.Test/Avro.File.Snappy.Test.csproj @@ -0,0 +1,42 @@ + + + + + + + $(DefaultUnitTestTargetFrameworks) + false + + + + true + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/lang/csharp/src/apache/codec/Avro.File.Snappy.Test/SnappyTests.cs b/lang/csharp/src/apache/codec/Avro.File.Snappy.Test/SnappyTests.cs new file mode 100644 index 00000000000..148d493aae2 --- /dev/null +++ b/lang/csharp/src/apache/codec/Avro.File.Snappy.Test/SnappyTests.cs @@ -0,0 +1,89 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +using System.IO; +using System.Linq; +using NUnit.Framework; + +namespace Avro.File.Snappy.Test +{ + public class Tests + { + private static readonly int[] _testLengths = new int[] { 0, 1000, 64 * 1024, 100000 }; + + [TestCaseSource(nameof(_testLengths))] + public void CompressDecompress(int length) + { + byte[] data = Enumerable.Range(0, length).Select(x => (byte)x).ToArray(); + + SnappyCodec codec = new SnappyCodec(); + + byte[] compressed = codec.Compress(data); + byte[] uncompressed = codec.Decompress(compressed, compressed.Length); + + Assert.IsTrue(Enumerable.SequenceEqual(data, uncompressed)); + } + + [TestCaseSource(nameof(_testLengths))] + public void CompressDecompressStream(int length) + { + byte[] data = Enumerable.Range(0, length).Select(x => (byte)x).ToArray(); + + SnappyCodec codec = new SnappyCodec(); + + using (MemoryStream inputStream = new MemoryStream(data)) + using (MemoryStream outputStream = new MemoryStream()) + { + codec.Compress(inputStream, outputStream); + + byte[] compressed = outputStream.ToArray(); + byte[] uncompressed = codec.Decompress(compressed, compressed.Length); + + Assert.IsTrue(Enumerable.SequenceEqual(data, uncompressed)); + } + } + + [Test] + public void ToStringAndName() + { + SnappyCodec codec = new SnappyCodec(); + + Assert.AreEqual("snappy", codec.GetName()); + Assert.AreEqual("snappy", codec.ToString()); + } + + [Test] + public void Equal() + { + SnappyCodec codec1 = new SnappyCodec(); + SnappyCodec codec2 = new SnappyCodec(); + + Assert.IsTrue(codec1.Equals(codec1)); + Assert.IsTrue(codec2.Equals(codec2)); + Assert.IsTrue(codec1.Equals(codec2)); + Assert.IsTrue(codec2.Equals(codec1)); + } + + [Test] + public void HashCode() + { + SnappyCodec codec = new SnappyCodec(); + + Assert.AreNotEqual(0, codec.GetHashCode()); + } + } +} diff --git a/lang/csharp/src/apache/codec/Avro.File.Snappy/Avro.File.Snappy.csproj b/lang/csharp/src/apache/codec/Avro.File.Snappy/Avro.File.Snappy.csproj new file mode 100644 index 00000000000..71bc7968827 --- /dev/null +++ b/lang/csharp/src/apache/codec/Avro.File.Snappy/Avro.File.Snappy.csproj @@ -0,0 +1,47 @@ + + + + + + + $(DefaultLibraryTargetFrameworks) + Avro.File.Snappy + true + ../../../../Avro.snk + + + + + Apache.Avro.File.Snappy + Snappy compression library for Apache.Avro + + + + true + + + + + + + + + + + + diff --git a/lang/csharp/src/apache/codec/Avro.File.Snappy/Crc32.cs b/lang/csharp/src/apache/codec/Avro.File.Snappy/Crc32.cs new file mode 100644 index 00000000000..1b832e11e43 --- /dev/null +++ b/lang/csharp/src/apache/codec/Avro.File.Snappy/Crc32.cs @@ -0,0 +1,73 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +using System; + +namespace Avro.File.Snappy +{ + /// + /// Implements a 32-bit CRC hash algorithm. + /// + internal static class Crc32 + { + private const uint DefaultPolynomial = 0xedb88320u; + private const uint DefaultSeed = 0xffffffffu; + + private static uint[] defaultTable; + + public static uint Compute(byte[] buffer) + { + return Compute(DefaultPolynomial, DefaultSeed, buffer); + } + + public static uint Compute(uint polynomial, uint seed, ReadOnlySpan buffer) + { + return ~CalculateHash(InitializeTable(polynomial), seed, buffer); + } + + private static uint[] InitializeTable(uint polynomial) + { + if (polynomial == DefaultPolynomial && defaultTable != null) + return defaultTable; + + uint[] createTable = new uint[256]; + for (int i = 0; i < 256; i++) + { + uint entry = (uint)i; + for (int j = 0; j < 8; j++) + if ((entry & 1) == 1) + entry = (entry >> 1) ^ polynomial; + else + entry >>= 1; + createTable[i] = entry; + } + + if (polynomial == DefaultPolynomial) + defaultTable = createTable; + + return createTable; + } + + private static uint CalculateHash(uint[] table, uint seed, ReadOnlySpan buffer) + { + uint hash = seed; + for (int i = 0; i < buffer.Length; i++) + hash = (hash >> 8) ^ table[buffer[i] ^ hash & 0xff]; + return hash; + } + } +} diff --git a/lang/csharp/src/apache/codec/Avro.File.Snappy/Snappy.cs b/lang/csharp/src/apache/codec/Avro.File.Snappy/Snappy.cs new file mode 100644 index 00000000000..a73f8209c0d --- /dev/null +++ b/lang/csharp/src/apache/codec/Avro.File.Snappy/Snappy.cs @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +using System; +using System.IO; + +namespace Avro.File.Snappy +{ + /// + /// Implements Snappy compression and decompression. + /// + public class SnappyCodec : Codec + { + /// + /// + public override byte[] Compress(byte[] uncompressedData) + { + using (MemoryStream outputStream = new MemoryStream()) + { + byte[] compressedData = IronSnappy.Snappy.Encode(uncompressedData); + outputStream.Write(compressedData, 0, compressedData.Length); + + var crc = ByteSwap(Crc32.Compute(uncompressedData)); + outputStream.Write(BitConverter.GetBytes(crc), 0, 4); + + return outputStream.ToArray(); + } + } + + /// + public override void Compress(MemoryStream inputStream, MemoryStream outputStream) + { + inputStream.Position = 0; + + byte[] uncompressedData = inputStream.ToArray(); + byte[] compressedData = IronSnappy.Snappy.Encode(uncompressedData); + + outputStream.SetLength(0); + + outputStream.Write(compressedData, 0, compressedData.Length); + + var crc = ByteSwap(Crc32.Compute(uncompressedData)); + outputStream.Write(BitConverter.GetBytes(crc), 0, 4); + } + + /// + public override byte[] Decompress(byte[] compressedData, int blockLength) + { + byte[] uncompressedData = IronSnappy.Snappy.Decode(compressedData.AsSpan(0, blockLength - 4)); + + return ByteSwap(Crc32.Compute(uncompressedData)) == BitConverter.ToUInt32(compressedData, blockLength - 4) ? + uncompressedData : + throw new IOException("Checksum failure"); + } + + private static uint ByteSwap(uint word) + { + return ((word >> 24) & 0x000000FF) | ((word >> 8) & 0x0000FF00) | ((word << 8) & 0x00FF0000) | ((word << 24) & 0xFF000000); + } + + /// + public override string GetName() + { + return DataFileConstants.SnappyCodec; + } + + /// + public override bool Equals(object other) + { + return this == other || GetType().Name == other.GetType().Name; + } + + /// + public override int GetHashCode() + { + return GetName().GetHashCode(); + } + } +} diff --git a/lang/csharp/src/apache/codec/Avro.File.XZ.Test/Avro.File.XZ.Test.csproj b/lang/csharp/src/apache/codec/Avro.File.XZ.Test/Avro.File.XZ.Test.csproj new file mode 100644 index 00000000000..354c6a51c26 --- /dev/null +++ b/lang/csharp/src/apache/codec/Avro.File.XZ.Test/Avro.File.XZ.Test.csproj @@ -0,0 +1,42 @@ + + + + + + + $(DefaultUnitTestTargetFrameworks) + false + + + + true + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/lang/csharp/src/apache/codec/Avro.File.XZ.Test/XZTests.cs b/lang/csharp/src/apache/codec/Avro.File.XZ.Test/XZTests.cs new file mode 100644 index 00000000000..27f38dc2bf2 --- /dev/null +++ b/lang/csharp/src/apache/codec/Avro.File.XZ.Test/XZTests.cs @@ -0,0 +1,97 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +using System.IO; +using System.Linq; +using NUnit.Framework; + +namespace Avro.File.XZ.Test +{ + public class Tests + { + private static readonly int[] _testLengths = new int[] { 0, 1000, 64 * 1024, 100000 }; + + [Test, Combinatorial] + public void CompressDecompress([ValueSource(nameof(_testLengths))] int length, [Values] XZLevel level) + { + byte[] data = Enumerable.Range(0, length).Select(x => (byte)x).ToArray(); + + XZCodec codec = new XZCodec(level); + + byte[] compressed = codec.Compress(data); + byte[] uncompressed = codec.Decompress(compressed, compressed.Length); + + Assert.IsTrue(Enumerable.SequenceEqual(data, uncompressed)); + } + + [Test, Combinatorial] + public void CompressDecompressStream([ValueSource(nameof(_testLengths))] int length, [Values] XZLevel level) + { + byte[] data = Enumerable.Range(0, length).Select(x => (byte)x).ToArray(); + + XZCodec codec = new XZCodec(level); + + using (MemoryStream inputStream = new MemoryStream(data)) + using (MemoryStream outputStream = new MemoryStream()) + { + codec.Compress(inputStream, outputStream); + + byte[] compressed = outputStream.ToArray(); + byte[] uncompressed = codec.Decompress(compressed, compressed.Length); + + Assert.IsTrue(Enumerable.SequenceEqual(data, uncompressed)); + } + } + + [Test] + public void ToStringAndName([Values] XZLevel level) + { + XZCodec codec = new XZCodec(level); + + Assert.AreEqual("xz", codec.GetName()); + Assert.AreEqual($"xz-{(int)level}", codec.ToString()); + } + + [Test] + public void DefaultLevel() + { + XZCodec codec = new XZCodec(); + + Assert.AreEqual(XZLevel.Default, codec.Level); + } + + [Test] + public void Equal([Values] XZLevel level) + { + XZCodec codec1 = new XZCodec(level); + XZCodec codec2 = new XZCodec(level); + + Assert.IsTrue(codec1.Equals(codec1)); + Assert.IsTrue(codec2.Equals(codec2)); + Assert.IsTrue(codec1.Equals(codec2)); + Assert.IsTrue(codec2.Equals(codec1)); + } + + [Test] + public void HashCode([Values] XZLevel level) + { + XZCodec codec = new XZCodec(level); + + Assert.AreNotEqual(0, codec.GetHashCode()); + } + } +} diff --git a/lang/csharp/src/apache/codec/Avro.File.XZ/Avro.File.XZ.csproj b/lang/csharp/src/apache/codec/Avro.File.XZ/Avro.File.XZ.csproj new file mode 100644 index 00000000000..034bb99dcce --- /dev/null +++ b/lang/csharp/src/apache/codec/Avro.File.XZ/Avro.File.XZ.csproj @@ -0,0 +1,48 @@ + + + + + + + $(DefaultLibraryTargetFrameworks) + Avro.File.XZ + true + ../../../../Avro.snk + CS8002 + + + + + Apache.Avro.File.XZ + XZ compression library for Apache.Avro + + + + true + + + + + + + + + + + + diff --git a/lang/csharp/src/apache/codec/Avro.File.XZ/XZ.cs b/lang/csharp/src/apache/codec/Avro.File.XZ/XZ.cs new file mode 100644 index 00000000000..84d3742f6cc --- /dev/null +++ b/lang/csharp/src/apache/codec/Avro.File.XZ/XZ.cs @@ -0,0 +1,237 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +using System; +using System.Collections.Generic; +using System.IO; +using System.Runtime.InteropServices; +using Joveler.Compression.XZ; + +namespace Avro.File.XZ +{ + /// + /// XZ Compression level + /// + public enum XZLevel + { + Level0 = 0, + Level1 = 1, + Level2 = 2, + Level3 = 3, + Level4 = 4, + Level5 = 5, + Level6 = 6, + Level7 = 7, + Level8 = 8, + Level9 = 9, + Default = Level6, + Minimum = Level0, + Maximum = Level9 + } + + /// + /// Implements XZ compression and decompression. + /// + public class XZCodec : Codec + { + public XZLevel Level {get; private set;} + public bool Extreme {get; private set;} + public int Threads {get; private set;} + + public XZCodec() + : this(XZLevel.Default) + { + } + + public XZCodec(XZLevel level) + : this(level, false) + { + } + + public XZCodec(XZLevel level, bool extreme) + : this(level, extreme, 0) + { + } + + public XZCodec(XZLevel level, bool extreme, int numOfThreads) + { + Level = level; + Extreme = extreme; + Threads = numOfThreads; + } + + static XZCodec() + { + Initialize(); // One time initialization + } + + private static void Initialize() + { + string arch = RuntimeInformation.OSArchitecture.ToString().ToLower(); + string foundLibPath = string.Empty; + string libPath; + string rid; + string libName; + + // Determine Platform (needed for proper Runtime ID) + if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) + { + rid = $"win-{arch}"; + libName = "liblzma.dll"; + } + else + if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux)) + { + rid = $"linux-{arch}"; + libName = "liblzma.so"; + } + else + if (RuntimeInformation.IsOSPlatform(OSPlatform.OSX)) + { + rid = $"osx-{arch}"; + libName = "liblzma.dylib"; + } + else + { + // Unknown platform + throw new PlatformNotSupportedException("Unknown runtime platform!"); + } + + // Try to search for the lib in the working directory and the application binary directory + foreach (var relPath in new List { ".", AppDomain.CurrentDomain.BaseDirectory }) + { + // Try first the lib name directly + libPath = Path.Combine(relPath, libName); + if (System.IO.File.Exists(libPath)) + { + foundLibPath = libPath; + break; + } + + // Try the runtimes/RID/native location + // This is the default location for netstandard native libs + libPath = Path.Combine(relPath, "runtimes", rid, "native", libName); + if (System.IO.File.Exists(libPath)) + { + foundLibPath = libPath; + break; + } + } + + // Try the OS search path if nothing is found yet + if (string.IsNullOrEmpty(foundLibPath)) + { + var values = Environment.GetEnvironmentVariable("PATH"); + foreach (string path in values.Split(Path.PathSeparator)) + { + libPath = Path.Combine(path, libName); + if (System.IO.File.Exists(libPath)) + { + foundLibPath = libPath; + break; + } + } + } + + if (string.IsNullOrEmpty(foundLibPath)) + throw new PlatformNotSupportedException($"Unable to find {libName}"); + + // Initialize XZ library + XZInit.GlobalInit(foundLibPath); + } + + public static void Uninitialize() + { + XZInit.GlobalCleanup(); + } + + /// + public override byte[] Compress(byte[] uncompressedData) + { + using (MemoryStream inputStream = new MemoryStream(uncompressedData)) + using (MemoryStream outputStream = new MemoryStream()) + { + Compress(inputStream, outputStream); + return outputStream.ToArray(); + } + } + + /// + public override void Compress(MemoryStream inputStream, MemoryStream outputStream) + { + XZCompressOptions compOpts = new XZCompressOptions + { + Level = (LzmaCompLevel)(int)Level, + ExtremeFlag = Extreme, + LeaveOpen = true + }; + + XZThreadedCompressOptions threadOpts = new XZThreadedCompressOptions + { + Threads = Threads, + }; + + inputStream.Position = 0; + outputStream.SetLength(0); + + using (XZStream xzStream = new XZStream(outputStream, compOpts, threadOpts)) + { + inputStream.CopyTo(xzStream); + xzStream.Flush(); + } + } + + /// + public override byte[] Decompress(byte[] compressedData, int blockLength) + { + XZDecompressOptions decompOpts = new XZDecompressOptions(); + + using (MemoryStream inputStream = new MemoryStream(compressedData, 0, blockLength)) + using (MemoryStream outputStream = new MemoryStream()) + using (XZStream xzStream = new XZStream(inputStream, decompOpts)) + { + xzStream.CopyTo(outputStream); + xzStream.Flush(); + return outputStream.ToArray(); + } + } + + /// + public override string GetName() + { + return DataFileConstants.XZCodec; + } + + /// + public override bool Equals(object other) + { + return this == other || GetType().Name == other.GetType().Name; + } + + /// + public override int GetHashCode() + { + return GetName().GetHashCode(); + } + + /// + public override string ToString() + { + return $"{GetName()}-{(int)Level}"; + } + } +} diff --git a/lang/csharp/src/apache/codec/Avro.File.Zstandard.Test/Avro.File.Zstandard.Test.csproj b/lang/csharp/src/apache/codec/Avro.File.Zstandard.Test/Avro.File.Zstandard.Test.csproj new file mode 100644 index 00000000000..651fabded68 --- /dev/null +++ b/lang/csharp/src/apache/codec/Avro.File.Zstandard.Test/Avro.File.Zstandard.Test.csproj @@ -0,0 +1,42 @@ + + + + + + + $(DefaultUnitTestTargetFrameworks) + false + + + + true + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/lang/csharp/src/apache/codec/Avro.File.Zstandard.Test/ZstandardTests.cs b/lang/csharp/src/apache/codec/Avro.File.Zstandard.Test/ZstandardTests.cs new file mode 100644 index 00000000000..e360ee576c8 --- /dev/null +++ b/lang/csharp/src/apache/codec/Avro.File.Zstandard.Test/ZstandardTests.cs @@ -0,0 +1,97 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +using System.IO; +using System.Linq; +using NUnit.Framework; + +namespace Avro.File.Zstandard.Test +{ + public class Tests + { + private static readonly int[] _testLengths = new int[] { 0, 1000, 64 * 1024, 100000 }; + + [Test, Combinatorial] + public void CompressDecompress([ValueSource(nameof(_testLengths))] int length, [Values] ZstandardLevel level) + { + byte[] data = Enumerable.Range(0, length).Select(x => (byte)x).ToArray(); + + ZstandardCodec codec = new ZstandardCodec(level); + + byte[] compressed = codec.Compress(data); + byte[] uncompressed = codec.Decompress(compressed, compressed.Length); + + Assert.IsTrue(Enumerable.SequenceEqual(data, uncompressed)); + } + + [Test, Combinatorial] + public void CompressDecompressStream([ValueSource(nameof(_testLengths))] int length, [Values] ZstandardLevel level) + { + byte[] data = Enumerable.Range(0, length).Select(x => (byte)x).ToArray(); + + ZstandardCodec codec = new ZstandardCodec(level); + + using (MemoryStream inputStream = new MemoryStream(data)) + using (MemoryStream outputStream = new MemoryStream()) + { + codec.Compress(inputStream, outputStream); + + byte[] compressed = outputStream.ToArray(); + byte[] uncompressed = codec.Decompress(compressed, compressed.Length); + + Assert.IsTrue(Enumerable.SequenceEqual(data, uncompressed)); + } + } + + [Test] + public void ToStringAndName([Values] ZstandardLevel level) + { + ZstandardCodec codec = new ZstandardCodec(level); + + Assert.AreEqual("zstandard", codec.GetName()); + Assert.AreEqual($"zstandard[{(int)level}]", codec.ToString()); + } + + [Test] + public void DefaultLevel() + { + ZstandardCodec codec = new ZstandardCodec(); + + Assert.AreEqual(ZstandardLevel.Default, codec.Level); + } + + [Test] + public void Equal([Values] ZstandardLevel level) + { + ZstandardCodec codec1 = new ZstandardCodec(level); + ZstandardCodec codec2 = new ZstandardCodec(level); + + Assert.IsTrue(codec1.Equals(codec1)); + Assert.IsTrue(codec2.Equals(codec2)); + Assert.IsTrue(codec1.Equals(codec2)); + Assert.IsTrue(codec2.Equals(codec1)); + } + + [Test] + public void HashCode([Values] ZstandardLevel level) + { + ZstandardCodec codec = new ZstandardCodec(level); + + Assert.AreNotEqual(0, codec.GetHashCode()); + } + } +} diff --git a/lang/csharp/src/apache/codec/Avro.File.Zstandard/Avro.File.Zstandard.csproj b/lang/csharp/src/apache/codec/Avro.File.Zstandard/Avro.File.Zstandard.csproj new file mode 100644 index 00000000000..17f9f9f00f5 --- /dev/null +++ b/lang/csharp/src/apache/codec/Avro.File.Zstandard/Avro.File.Zstandard.csproj @@ -0,0 +1,48 @@ + + + + + + + $(DefaultLibraryTargetFrameworks) + Avro.File.Zstandard + true + ../../../../Avro.snk + CS8002 + + + + + Apache.Avro.File.Zstandard + Zstandard compression library for Apache.Avro + + + + true + + + + + + + + + + + + diff --git a/lang/csharp/src/apache/codec/Avro.File.Zstandard/Zstandard.cs b/lang/csharp/src/apache/codec/Avro.File.Zstandard/Zstandard.cs new file mode 100644 index 00000000000..5adfb441799 --- /dev/null +++ b/lang/csharp/src/apache/codec/Avro.File.Zstandard/Zstandard.cs @@ -0,0 +1,134 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +using System.IO; +using System.IO.Compression; +using Zstandard.Net; + +namespace Avro.File.Zstandard +{ + /// + /// Zstandard Compression level + /// + public enum ZstandardLevel + { + Level1 = 1, + Level2 = 2, + Level3 = 3, + Level4 = 4, + Level5 = 5, + Level6 = 6, + Level7 = 7, + Level8 = 8, + Level9 = 9, + Level10 = 10, + Level11 = 11, + Level12 = 12, + Level13 = 13, + Level14 = 14, + Level15 = 15, + Level16 = 16, + Level17 = 17, + Level18 = 18, + Level19 = 19, + Default = Level3, + Minimum = Level1, + Maximum = Level19 + } + + /// + /// Implements Zstandard compression and decompression. + /// + public class ZstandardCodec : Codec + { + public ZstandardLevel Level {get; private set;} + + public ZstandardCodec() + : this(ZstandardLevel.Default) + { + } + + public ZstandardCodec(ZstandardLevel level) + { + Level = level; + } + + /// + public override byte[] Compress(byte[] uncompressedData) + { + using (var outputStream = new MemoryStream()) + using (var compressionStream = new ZstandardStream(outputStream, CompressionMode.Compress)) + { + compressionStream.CompressionLevel = (int)Level; + compressionStream.Write(uncompressedData, 0, uncompressedData.Length); + compressionStream.Flush(); + return outputStream.ToArray(); + } + } + + /// + public override void Compress(MemoryStream inputStream, MemoryStream outputStream) + { + inputStream.Position = 0; + outputStream.SetLength(0); + + using (var compressionStream = new ZstandardStream(outputStream, CompressionMode.Compress, true)) + { + compressionStream.CompressionLevel = (int)Level; + inputStream.CopyTo(compressionStream); + compressionStream.Flush(); + } + } + + /// + public override byte[] Decompress(byte[] compressedData, int blockLength) + { + using (var memoryStream = new MemoryStream(compressedData, 0, blockLength)) + using (var outputStream = new MemoryStream()) + using (var compressionStream = new ZstandardStream(memoryStream, CompressionMode.Decompress)) + { + compressionStream.CopyTo(outputStream); + compressionStream.Flush(); + return outputStream.ToArray(); + } + } + + /// + public override string GetName() + { + return DataFileConstants.ZstandardCodec; + } + + /// + public override bool Equals(object other) + { + return this == other || GetType().Name == other.GetType().Name; + } + + /// + public override int GetHashCode() + { + return GetName().GetHashCode(); + } + + /// + public override string ToString() + { + return $"{GetName()}[{(int)Level}]"; + } + } +} diff --git a/lang/csharp/src/apache/codegen/Avro.codegen.csproj b/lang/csharp/src/apache/codegen/Avro.codegen.csproj index 055a20f781f..94aa8123119 100644 --- a/lang/csharp/src/apache/codegen/Avro.codegen.csproj +++ b/lang/csharp/src/apache/codegen/Avro.codegen.csproj @@ -17,20 +17,18 @@ - Exe - netcoreapp2.1;netcoreapp3.1;net5.0 + $(DefaultExeTargetFrameworks) avrogen Avro.codegen - false true ..\..\..\Avro.snk @@ -51,20 +49,19 @@ + + + Major + + true - - - - - - - - - diff --git a/lang/csharp/src/apache/codegen/AvroGen.cs b/lang/csharp/src/apache/codegen/AvroGen.cs index 5f4ffd24db8..3b07ca59fdc 100644 --- a/lang/csharp/src/apache/codegen/AvroGen.cs +++ b/lang/csharp/src/apache/codegen/AvroGen.cs @@ -1,4 +1,4 @@ -īģŋ/** +/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -17,25 +17,43 @@ */ using System; using System.Collections.Generic; -using System.Text; +using System.Linq; +using System.Reflection; namespace Avro { - class AvroGen + public class AvroGenTool { - static int Main(string[] args) + public static int Main(string[] args) { - // Print usage if no arguments provided or help requested - if (args.Length == 0 || args[0] == "-h" || args[0] == "--help") + // Print usage if no arguments provided + if (args.Length == 0) { Usage(); return 1; } + // Print usage if help requested + if (args.Contains("-h") || args.Contains("--help")) + { + Usage(); + return 0; + } + + if (args.Contains("--version") || args.Contains("-V")) + { + // Print version information + // Note: Use InformationalVersion attribute + // It is capable to include semver prerelease information label (if prerelease), e.g. 1.x.y-beta.z + Console.WriteLine(typeof(AvroGenTool).Assembly.GetCustomAttribute().InformationalVersion); + return 0; + } + // Parse command line arguments bool? isProtocol = null; string inputFile = null; string outputDir = null; + bool skipDirectoriesCreation = false; var namespaceMapping = new Dictionary(); for (int i = 0; i < args.Length; ++i) { @@ -82,6 +100,10 @@ static int Main(string[] args) namespaceMapping[parts[0]] = parts[1]; } + else if (args[i] == "--skip-directories") + { + skipDirectoriesCreation = true; + } else if (outputDir == null) { outputDir = args[i]; @@ -116,7 +138,7 @@ static int Main(string[] args) else if (isProtocol.Value) rc = GenProtocol(inputFile, outputDir, namespaceMapping); else - rc = GenSchema(inputFile, outputDir, namespaceMapping); + rc = GenSchema(inputFile, outputDir, namespaceMapping, skipDirectoriesCreation); return rc; } @@ -128,26 +150,24 @@ static void Usage() " avrogen -p [--namespace ]\n" + " avrogen -s [--namespace ]\n\n" + "Options:\n" + - " -h --help Show this screen.\n" + - " --namespace Map an Avro schema/protocol namespace to a C# namespace.\n" + - " The format is \"my.avro.namespace:my.csharp.namespace\".\n" + - " May be specified multiple times to map multiple namespaces.\n", + " -h --help Show this screen.\n" + + " -V --version Show version.\n" + + " --namespace Map an Avro schema/protocol namespace to a C# namespace.\n" + + " The format is \"my.avro.namespace:my.csharp.namespace\".\n" + + " May be specified multiple times to map multiple namespaces.\n" + + " --skip-directories Skip creation of namespace directories. It will generate classes right inside output directory\n", AppDomain.CurrentDomain.FriendlyName); - return; } - static int GenProtocol(string infile, string outdir, + + public static int GenProtocol(string infile, string outdir, IEnumerable> namespaceMapping) { try { string text = System.IO.File.ReadAllText(infile); - Protocol protocol = Protocol.Parse(text); CodeGen codegen = new CodeGen(); - codegen.AddProtocol(protocol); - - foreach (var entry in namespaceMapping) - codegen.NamespaceMapping[entry.Key] = entry.Value; + codegen.AddProtocol(text, namespaceMapping); codegen.GenerateCode(); codegen.WriteTypes(outdir); @@ -160,22 +180,18 @@ static int GenProtocol(string infile, string outdir, return 0; } - static int GenSchema(string infile, string outdir, - IEnumerable> namespaceMapping) + + public static int GenSchema(string infile, string outdir, + IEnumerable> namespaceMapping, bool skipDirectories) { try { string text = System.IO.File.ReadAllText(infile); - Schema schema = Schema.Parse(text); - CodeGen codegen = new CodeGen(); - codegen.AddSchema(schema); - - foreach (var entry in namespaceMapping) - codegen.NamespaceMapping[entry.Key] = entry.Value; + codegen.AddSchema(text, namespaceMapping); codegen.GenerateCode(); - codegen.WriteTypes(outdir); + codegen.WriteTypes(outdir, skipDirectories); } catch (Exception ex) { diff --git a/lang/csharp/src/apache/ipc.test/Avro.ipc.test.csproj b/lang/csharp/src/apache/ipc.test/Avro.ipc.test.csproj index 47eeba2e5bd..dfa5faa4cfe 100644 --- a/lang/csharp/src/apache/ipc.test/Avro.ipc.test.csproj +++ b/lang/csharp/src/apache/ipc.test/Avro.ipc.test.csproj @@ -16,8 +16,11 @@ --> + + + - net40 + $(DefaultUnitTestTargetFrameworks) Avro.ipc.test Avro.ipc.test false @@ -30,23 +33,10 @@ - - 3.10.1 - - - 3.10.0 - - - 3.9.0 - - - - - - - - - + + + + diff --git a/lang/csharp/src/apache/ipc/Avro.ipc.csproj b/lang/csharp/src/apache/ipc/Avro.ipc.csproj index b72fe9fe589..8e082b67c76 100644 --- a/lang/csharp/src/apache/ipc/Avro.ipc.csproj +++ b/lang/csharp/src/apache/ipc/Avro.ipc.csproj @@ -16,8 +16,11 @@ --> + + + - net40;netstandard2.0 + $(DefaultLibraryTargetFrameworks) Avro.ipc Avro.ipc false @@ -27,7 +30,7 @@ - + diff --git a/lang/csharp/src/apache/ipc/Responder.cs b/lang/csharp/src/apache/ipc/Responder.cs index 875977462d5..10c40670a05 100644 --- a/lang/csharp/src/apache/ipc/Responder.cs +++ b/lang/csharp/src/apache/ipc/Responder.cs @@ -181,14 +181,14 @@ public IList Respond(IList buffers, WriteResponse(m.Response, response, output); else { - try - { - WriteError(m.SupportedErrors, error, output); - } - catch (Exception) - { - // Presumably no match on the exception, throw the original - throw error; + try + { + WriteError(m.SupportedErrors, error, output); + } + catch (Exception) + { + // Presumably no match on the exception, throw the original + throw error; } } } diff --git a/lang/csharp/src/apache/ipc/Specific/SpecificRequestor.cs b/lang/csharp/src/apache/ipc/Specific/SpecificRequestor.cs index b14ceff4fed..4ef8916d9b5 100644 --- a/lang/csharp/src/apache/ipc/Specific/SpecificRequestor.cs +++ b/lang/csharp/src/apache/ipc/Specific/SpecificRequestor.cs @@ -1,4 +1,4 @@ -īģŋ/** +/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -87,8 +87,8 @@ public override object ReadResponse(Schema writer, Schema reader, Decoder decode public override Exception ReadError(Schema writer, Schema reader, Decoder decoder) { - var response = new SpecificReader(writer, reader).Read(null, decoder); - + var response = new SpecificReader(writer, reader).Read(null, decoder); + var error = response as Exception; if(error != null) return error; diff --git a/lang/csharp/src/apache/main/AssemblyInfo.cs b/lang/csharp/src/apache/main/AssemblyInfo.cs new file mode 100644 index 00000000000..53eacc1f9df --- /dev/null +++ b/lang/csharp/src/apache/main/AssemblyInfo.cs @@ -0,0 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +using System.Runtime.CompilerServices; + +[assembly: InternalsVisibleTo("Avro.test, PublicKey=00240000048000009400000006020000002400005253413100040000010001001145636d1b96168c2781abfd60478f45d010fe83dd0f318404cbf67252bca8cd827f24648d47ff682f35e60307c05d3cd89f0b063729cf8d2ebe6510b9e7d295dec6707ec91719d859458981f7ca1cbbea79b702b2fb64d1dbf0881887315345b70fa50fcf91b59e6a937c8d23919d409ee2f1f234cc4c8dbf5a29d3d670f3c9")] diff --git a/lang/csharp/src/apache/main/Avro.main.csproj b/lang/csharp/src/apache/main/Avro.main.csproj index 15cfeadde71..beb1dff4d58 100644 --- a/lang/csharp/src/apache/main/Avro.main.csproj +++ b/lang/csharp/src/apache/main/Avro.main.csproj @@ -17,10 +17,9 @@ - - netstandard2.0;netstandard2.1;netcoreapp2.1 + $(DefaultLibraryTargetFrameworks) Avro Avro true @@ -48,28 +47,10 @@ - - - - - - - all - runtime; build; native; contentfiles; analyzers - - - all - runtime; build; native; contentfiles; analyzers; buildtransitive - - - - - - - + diff --git a/lang/csharp/src/apache/main/AvroDecimal.cs b/lang/csharp/src/apache/main/AvroDecimal.cs index 17d11204764..98de9b1b13b 100644 --- a/lang/csharp/src/apache/main/AvroDecimal.cs +++ b/lang/csharp/src/apache/main/AvroDecimal.cs @@ -25,12 +25,10 @@ namespace Avro /// /// Represents a big decimal. /// - #pragma warning disable CS1591 // Missing XML comment for publicly visible type or member - #pragma warning disable CA2225 // Operator overloads have named alternates public struct AvroDecimal : IConvertible, IFormattable, IComparable, IComparable, IEquatable { /// - /// Initializes a new instance of the class from a given double. + /// Initializes a new instance of the struct from a given double. /// /// The double value. public AvroDecimal(double value) @@ -39,7 +37,7 @@ public AvroDecimal(double value) } /// - /// Initializes a new instance of the class from a given float. + /// Initializes a new instance of the struct from a given float. /// /// The float value. public AvroDecimal(float value) @@ -48,7 +46,7 @@ public AvroDecimal(float value) } /// - /// Initializes a new instance of the class from a given decimal. + /// Initializes a new instance of the struct from a given decimal. /// /// The decimal value. public AvroDecimal(decimal value) @@ -62,42 +60,52 @@ public AvroDecimal(decimal value) var scale = bytes[14]; if (bytes[15] == 128) + { unscaledValue *= BigInteger.MinusOne; + } UnscaledValue = unscaledValue; Scale = scale; } /// - /// Initializes a new instance of the class from a given int. + /// Initializes a new instance of the struct from a given int. /// /// The int value. public AvroDecimal(int value) - : this(new BigInteger(value), 0) { } + : this(new BigInteger(value), 0) + { + } /// - /// Initializes a new instance of the class from a given long. + /// Initializes a new instance of the struct from a given long. /// /// The long value. public AvroDecimal(long value) - : this(new BigInteger(value), 0) { } + : this(new BigInteger(value), 0) + { + } /// - /// Initializes a new instance of the class from a given unsigned int. + /// Initializes a new instance of the struct from a given unsigned int. /// /// The unsigned int value. public AvroDecimal(uint value) - : this(new BigInteger(value), 0) { } + : this(new BigInteger(value), 0) + { + } /// - /// Initializes a new instance of the class from a given unsigned long. + /// Initializes a new instance of the struct from a given unsigned long. /// /// The unsigned long value. public AvroDecimal(ulong value) - : this(new BigInteger(value), 0) { } + : this(new BigInteger(value), 0) + { + } /// - /// Initializes a new instance of the class from a given + /// Initializes a new instance of the struct from a given /// and a scale. /// /// The double value. @@ -109,302 +117,567 @@ public AvroDecimal(BigInteger unscaledValue, int scale) } /// - /// Gets the unscaled integer value represented by the current . + /// Gets the unscaled integer value represented by the current . /// + /// + /// The unscaled value. + /// public BigInteger UnscaledValue { get; } /// - /// Gets the scale of the current . + /// Gets the scale of the current . /// + /// + /// The scale. + /// public int Scale { get; } /// - /// Gets the sign of the current . + /// Gets the sign of the current . /// + /// + /// The sign. + /// internal int Sign { get { return UnscaledValue.Sign; } } /// - /// Converts the current to a string. + /// Converts the current to a string. /// - /// A string representation of the numeric value. + /// + /// A string representation of the numeric value. + /// public override string ToString() { var number = UnscaledValue.ToString($"D{Scale + 1}", CultureInfo.CurrentCulture); if (Scale > 0) + { return number.Insert(number.Length - Scale, CultureInfo.CurrentCulture.NumberFormat.NumberDecimalSeparator); + } return number; } + /// + /// Implements the operator ==. + /// + /// The left. + /// The right. + /// + /// The result of the operator. + /// public static bool operator ==(AvroDecimal left, AvroDecimal right) { return left.Equals(right); } + /// + /// Implements the operator !=. + /// + /// The left. + /// The right. + /// + /// The result of the operator. + /// public static bool operator !=(AvroDecimal left, AvroDecimal right) { return !left.Equals(right); } + /// + /// Implements the operator >. + /// + /// The left. + /// The right. + /// + /// The result of the operator. + /// public static bool operator >(AvroDecimal left, AvroDecimal right) { return left.CompareTo(right) > 0; } + /// + /// Implements the operator >=. + /// + /// The left. + /// The right. + /// + /// The result of the operator. + /// public static bool operator >=(AvroDecimal left, AvroDecimal right) { return left.CompareTo(right) >= 0; } + /// + /// Implements the operator <. + /// + /// The left. + /// The right. + /// + /// The result of the operator. + /// public static bool operator <(AvroDecimal left, AvroDecimal right) { return left.CompareTo(right) < 0; } + /// + /// Implements the operator <=. + /// + /// The left. + /// The right. + /// + /// The result of the operator. + /// public static bool operator <=(AvroDecimal left, AvroDecimal right) { return left.CompareTo(right) <= 0; } + /// + /// Implements the operator ==. + /// + /// The left. + /// The right. + /// + /// The result of the operator. + /// public static bool operator ==(AvroDecimal left, decimal right) { return left.Equals(right); } + /// + /// Implements the operator !=. + /// + /// The left. + /// The right. + /// + /// The result of the operator. + /// public static bool operator !=(AvroDecimal left, decimal right) { return !left.Equals(right); } + /// + /// Implements the operator >. + /// + /// The left. + /// The right. + /// + /// The result of the operator. + /// public static bool operator >(AvroDecimal left, decimal right) { return left.CompareTo(right) > 0; } + /// + /// Implements the operator >=. + /// + /// The left. + /// The right. + /// + /// The result of the operator. + /// public static bool operator >=(AvroDecimal left, decimal right) { return left.CompareTo(right) >= 0; } + /// + /// Implements the operator <. + /// + /// The left. + /// The right. + /// + /// The result of the operator. + /// public static bool operator <(AvroDecimal left, decimal right) { return left.CompareTo(right) < 0; } + /// + /// Implements the operator <=. + /// + /// The left. + /// The right. + /// + /// The result of the operator. + /// public static bool operator <=(AvroDecimal left, decimal right) { return left.CompareTo(right) <= 0; } + /// + /// Implements the operator ==. + /// + /// The left. + /// The right. + /// + /// The result of the operator. + /// public static bool operator ==(decimal left, AvroDecimal right) { return left.Equals(right); } + /// + /// Implements the operator !=. + /// + /// The left. + /// The right. + /// + /// The result of the operator. + /// public static bool operator !=(decimal left, AvroDecimal right) { return !left.Equals(right); } + /// + /// Implements the operator >. + /// + /// The left. + /// The right. + /// + /// The result of the operator. + /// public static bool operator >(decimal left, AvroDecimal right) { return left.CompareTo(right) > 0; } + /// + /// Implements the operator >=. + /// + /// The left. + /// The right. + /// + /// The result of the operator. + /// public static bool operator >=(decimal left, AvroDecimal right) { return left.CompareTo(right) >= 0; } + /// + /// Implements the operator <. + /// + /// The left. + /// The right. + /// + /// The result of the operator. + /// public static bool operator <(decimal left, AvroDecimal right) { return left.CompareTo(right) < 0; } + /// + /// Implements the operator <=. + /// + /// The left. + /// The right. + /// + /// The result of the operator. + /// public static bool operator <=(decimal left, AvroDecimal right) { return left.CompareTo(right) <= 0; } + /// + /// Performs an explicit conversion from to . + /// + /// The . + /// + /// A . + /// public static explicit operator byte(AvroDecimal value) { return ToByte(value); } /// - /// Creates a byte from a given . + /// Creates a from a given . /// - /// The . - /// A byte. + /// The . + /// + /// A . + /// public static byte ToByte(AvroDecimal value) { return value.ToType(); } + /// + /// Performs an explicit conversion from to . + /// + /// The . + /// + /// A . + /// public static explicit operator sbyte(AvroDecimal value) { return ToSByte(value); } /// - /// Creates a signed byte from a given . + /// Creates a from a given . /// - /// The . - /// A signed byte. + /// The . + /// + /// A . + /// public static sbyte ToSByte(AvroDecimal value) { return value.ToType(); } + /// + /// Performs an explicit conversion from to . + /// + /// The . + /// + /// A . + /// public static explicit operator short(AvroDecimal value) { return ToInt16(value); } /// - /// Creates a short from a given . + /// Creates a short from a given . /// - /// The . - /// A short. + /// The . + /// + /// A . + /// public static short ToInt16(AvroDecimal value) { return value.ToType(); } + /// + /// Performs an explicit conversion from to . + /// + /// The . + /// + /// An . + /// public static explicit operator int(AvroDecimal value) { return ToInt32(value); } /// - /// Creates an int from a given . + /// Creates an int from a given . /// - /// The . - /// An int. + /// The . + /// + /// An . + /// public static int ToInt32(AvroDecimal value) { return value.ToType(); } + /// + /// Performs an explicit conversion from to . + /// + /// The . + /// + /// A . + /// public static explicit operator long(AvroDecimal value) { return ToInt64(value); } /// - /// Creates a long from a given . + /// Creates a from a given . /// - /// The . - /// A long. + /// The . + /// + /// A . + /// public static long ToInt64(AvroDecimal value) { return value.ToType(); } + /// + /// Performs an explicit conversion from to . + /// + /// The . + /// + /// An . + /// public static explicit operator ushort(AvroDecimal value) { return ToUInt16(value); } /// - /// Creates an unsigned short from a given . + /// Creates an from a given . /// - /// The . - /// An unsigned short. + /// The . + /// + /// An . + /// public static ushort ToUInt16(AvroDecimal value) { return value.ToType(); } + /// + /// Performs an explicit conversion from to . + /// + /// The . + /// + /// An . + /// public static explicit operator uint(AvroDecimal value) { return ToUInt32(value); } /// - /// Creates an unsigned int from a given . + /// Creates an from a given . /// - /// The . - /// An unsigned int. + /// The . + /// + /// An . + /// public static uint ToUInt32(AvroDecimal value) { return value.ToType(); } + /// + /// Performs an explicit conversion from to . + /// + /// The . + /// + /// An . + /// public static explicit operator ulong(AvroDecimal value) { return ToUInt64(value); } /// - /// Creates an unsigned long from a given . + /// Creates an from a given . /// - /// The . - /// An unsigned long. + /// The . + /// + /// An . + /// public static ulong ToUInt64(AvroDecimal value) { return value.ToType(); } + /// + /// Performs an explicit conversion from to . + /// + /// The . + /// + /// A . + /// public static explicit operator float(AvroDecimal value) { return ToSingle(value); } /// - /// Creates a double from a given . + /// Creates a from a given . /// - /// The . - /// A double. + /// The . + /// + /// A . + /// public static float ToSingle(AvroDecimal value) { return value.ToType(); } + /// + /// Performs an explicit conversion from to . + /// + /// The . + /// + /// A . + /// public static explicit operator double(AvroDecimal value) { return ToDouble(value); } /// - /// Creates a double from a given . + /// Creates a from a given . /// - /// The . - /// A double. + /// The . + /// + /// A . + /// public static double ToDouble(AvroDecimal value) { return value.ToType(); } + /// + /// Performs an explicit conversion from to . + /// + /// The . + /// + /// A . + /// public static explicit operator decimal(AvroDecimal value) { return ToDecimal(value); } /// - /// Creates a decimal from a given . + /// Creates a from a given . /// - /// The . - /// A decimal. + /// The . + /// + /// A . + /// public static decimal ToDecimal(AvroDecimal value) { return value.ToType(); } + /// + /// Performs an explicit conversion from to . + /// + /// The . + /// + /// A . + /// public static explicit operator BigInteger(AvroDecimal value) { return ToBigInteger(value); } /// - /// Creates a from a given . + /// Creates a from a given . /// - /// The . - /// A . + /// The . + /// + /// A . + /// public static BigInteger ToBigInteger(AvroDecimal value) { var scaleDivisor = BigInteger.Pow(new BigInteger(10), value.Scale); @@ -412,71 +685,157 @@ public static BigInteger ToBigInteger(AvroDecimal value) return scaledValue; } + /// + /// Performs an implicit conversion from to . + /// + /// The byte . + /// + /// An . + /// public static implicit operator AvroDecimal(byte value) { return new AvroDecimal(value); } + /// + /// Performs an implicit conversion from to . + /// + /// The . + /// + /// An . + /// public static implicit operator AvroDecimal(sbyte value) { return new AvroDecimal(value); } + /// + /// Performs an implicit conversion from to . + /// + /// The . + /// + /// An . + /// public static implicit operator AvroDecimal(short value) { return new AvroDecimal(value); } + /// + /// Performs an implicit conversion from to . + /// + /// The . + /// + /// An . + /// public static implicit operator AvroDecimal(int value) { return new AvroDecimal(value); } + /// + /// Performs an implicit conversion from to . + /// + /// The . + /// + /// An . + /// public static implicit operator AvroDecimal(long value) { return new AvroDecimal(value); } + /// + /// Performs an implicit conversion from to . + /// + /// The . + /// + /// An . + /// public static implicit operator AvroDecimal(ushort value) { return new AvroDecimal(value); } + /// + /// Performs an implicit conversion from to . + /// + /// The . + /// + /// An . + /// public static implicit operator AvroDecimal(uint value) { return new AvroDecimal(value); } + /// + /// Performs an implicit conversion from to . + /// + /// The . + /// + /// An . + /// public static implicit operator AvroDecimal(ulong value) { return new AvroDecimal(value); } + /// + /// Performs an implicit conversion from to . + /// + /// The . + /// + /// An . + /// public static implicit operator AvroDecimal(float value) { return new AvroDecimal(value); } + /// + /// Performs an implicit conversion from to . + /// + /// The . + /// + /// An . + /// public static implicit operator AvroDecimal(double value) { return new AvroDecimal(value); } + /// + /// Performs an implicit conversion from to . + /// + /// The . + /// + /// An . + /// public static implicit operator AvroDecimal(decimal value) { return new AvroDecimal(value); } + /// + /// Performs an implicit conversion from to . + /// + /// The . + /// + /// An . + /// public static implicit operator AvroDecimal(BigInteger value) { return new AvroDecimal(value, 0); - } + } /// - /// Converts the numeric value of the current to a given type. + /// Converts the numeric value of the current to a given type. /// - /// The type to which the value of the current should be converted. - /// A value of type converted from the current . + /// The type to which the value of the current should be converted. + /// + /// A value of type converted from the current . + /// public T ToType() where T : struct { @@ -484,19 +843,24 @@ public T ToType() } /// - /// Converts the numeric value of the current to a given type. + /// Converts the numeric value of the current to a given type. /// - /// The type to which the value of the current should be converted. + /// The type to which the value of the current should be converted. /// An System.IFormatProvider interface implementation that supplies culture-specific formatting information. - /// + /// + /// An instance of type conversionType whose value is equivalent to the value of this instance. + /// + /// The value {UnscaledValue} cannot fit into {conversionType.Name}. object IConvertible.ToType(Type conversionType, IFormatProvider provider) { var scaleDivisor = BigInteger.Pow(new BigInteger(10), Scale); var remainder = BigInteger.Remainder(UnscaledValue, scaleDivisor); var scaledValue = BigInteger.Divide(UnscaledValue, scaleDivisor); - if (scaledValue > new BigInteger(Decimal.MaxValue)) - throw new ArgumentOutOfRangeException("value", "The value " + UnscaledValue + " cannot fit into " + conversionType.Name + "."); + if (scaledValue > new BigInteger(decimal.MaxValue)) + { + throw new OverflowException($"The value {UnscaledValue} cannot fit into {conversionType.Name}."); + } var leftOfDecimal = (decimal)scaledValue; var rightOfDecimal = ((decimal)remainder) / ((decimal)scaleDivisor); @@ -506,220 +870,268 @@ object IConvertible.ToType(Type conversionType, IFormatProvider provider) } /// - /// Returns a value that indicates whether the current and a specified object + /// Returns a value that indicates whether the current and a specified object /// have the same value. /// /// The object to compare. - /// true if the obj argument is an object, and its value - /// is equal to the value of the current instance; otherwise false. + /// + /// true if the obj argument is an object, and its value + /// is equal to the value of the current instance; otherwise false. /// public override bool Equals(object obj) { - return (obj is AvroDecimal) && Equals((AvroDecimal)obj); + return (obj is AvroDecimal @decimal) && Equals(@decimal); } /// - /// Returns the hash code for the current . + /// Returns the hash code for the current . /// - /// The hash code. + /// + /// The hash code. + /// public override int GetHashCode() { return UnscaledValue.GetHashCode() ^ Scale.GetHashCode(); } /// - /// Returns the for the current . + /// Returns the for the current . /// - /// . + /// + /// The enumerated constant that is the of the class or value type that implements this interface. + /// TypeCode IConvertible.GetTypeCode() { return TypeCode.Object; } /// - /// Converts the current to a boolean. + /// Converts the current to a boolean. /// /// The format provider. - /// true or false, which reflects the value of the current . + /// + /// true or false, which reflects the value of the current . + /// bool IConvertible.ToBoolean(IFormatProvider provider) { return Convert.ToBoolean(this, provider); } /// - /// Converts the current to a byte. + /// Converts the current to a byte. /// /// The format provider. - /// A byte. + /// + /// A . + /// byte IConvertible.ToByte(IFormatProvider provider) { return Convert.ToByte(this, provider); } /// - /// Converts the current to a char. + /// Converts the current to a char. /// /// The format provider. - /// This method always throws an . + /// + /// This method always throws an . + /// + /// Cannot cast BigDecimal to Char. char IConvertible.ToChar(IFormatProvider provider) { throw new InvalidCastException("Cannot cast BigDecimal to Char"); } /// - /// Converts the current to a . + /// Converts the current to a . /// /// The format provider. - /// This method always throws an . + /// + /// This method always throws an . + /// + /// Cannot cast BigDecimal to DateTime. DateTime IConvertible.ToDateTime(IFormatProvider provider) { throw new InvalidCastException("Cannot cast BigDecimal to DateTime"); } /// - /// Converts the current to a decimal. + /// Converts the current to a decimal. /// /// The format provider. - /// A decimal. + /// + /// A . + /// decimal IConvertible.ToDecimal(IFormatProvider provider) { return Convert.ToDecimal(this, provider); } /// - /// Converts the current to a double. + /// Converts the current to a double. /// /// The format provider. - /// A double. + /// + /// A . + /// double IConvertible.ToDouble(IFormatProvider provider) { return Convert.ToDouble(this, provider); } /// - /// Converts the current to a short. + /// Converts the current to a short. /// /// The format provider. - /// A short. + /// + /// A . + /// short IConvertible.ToInt16(IFormatProvider provider) { return Convert.ToInt16(this, provider); } /// - /// Converts the current to an int. + /// Converts the current to an int. /// /// The format provider. - /// An int. + /// + /// An . + /// int IConvertible.ToInt32(IFormatProvider provider) { return Convert.ToInt32(this, provider); } /// - /// Converts the current to a long. + /// Converts the current to a long. /// /// The format provider. - /// A long. + /// + /// A . + /// long IConvertible.ToInt64(IFormatProvider provider) { return Convert.ToInt64(this, provider); } /// - /// Converts the current to a signed byte. + /// Converts the current to a signed byte. /// /// The format provider. - /// A signed byte. + /// + /// A . + /// sbyte IConvertible.ToSByte(IFormatProvider provider) { return Convert.ToSByte(this, provider); } /// - /// Converts the current to a float. + /// Converts the current to a float. /// /// The format provider. - /// A float. + /// + /// A . + /// float IConvertible.ToSingle(IFormatProvider provider) { return Convert.ToSingle(this, provider); } /// - /// Converts the current to a string. + /// Converts the current to a string. /// /// The format provider. - /// A string. + /// + /// A . + /// string IConvertible.ToString(IFormatProvider provider) { return Convert.ToString(this, provider); } /// - /// Converts the current to an unsigned short. + /// Converts the current to an unsigned short. /// /// The format provider. - /// An unsigned short. + /// + /// An . + /// ushort IConvertible.ToUInt16(IFormatProvider provider) { return Convert.ToUInt16(this, provider); } /// - /// Converts the current to an unsigned int. + /// Converts the current to an unsigned int. /// /// The format provider. - /// An unsigned int. + /// + /// An . + /// uint IConvertible.ToUInt32(IFormatProvider provider) { return Convert.ToUInt32(this, provider); } /// - /// Converts the current to an unsigned long. + /// Converts the current to an unsigned long. /// /// The format provider. - /// An unsigned long. + /// + /// An . + /// ulong IConvertible.ToUInt64(IFormatProvider provider) { return Convert.ToUInt64(this, provider); } /// - /// Converts the current to a string. + /// Converts the current to a string. /// - /// + /// The format. /// The format provider. - /// A string representation of the numeric value. + /// + /// A string representation of the numeric value. + /// public string ToString(string format, IFormatProvider formatProvider) { return ToString(); } /// - /// Compares the value of the current to the value of another object. + /// Compares the value of the current to the value of another object. /// /// The object to compare. - /// A value that indicates the relative order of the objects being compared. + /// + /// A value that indicates the relative order of the objects being compared. + /// + /// Compare to object must be a BigDecimal - obj. public int CompareTo(object obj) { if (obj == null) + { return 1; + } if (!(obj is AvroDecimal)) + { throw new ArgumentException("Compare to object must be a BigDecimal", nameof(obj)); + } return CompareTo((AvroDecimal)obj); } /// - /// Compares the value of the current to the value of another - /// . + /// Compares the value of the current to the value of another + /// . /// - /// The to compare. - /// A value that indicates the relative order of the - /// instances being compared. + /// The to compare. + /// + /// A value that indicates the relative order of the + /// instances being compared. + /// public int CompareTo(AvroDecimal other) { var unscaledValueCompare = UnscaledValue.CompareTo(other.UnscaledValue); @@ -727,11 +1139,15 @@ public int CompareTo(AvroDecimal other) // if both are the same value, return the value if (unscaledValueCompare == scaleCompare) + { return unscaledValueCompare; + } // if the scales are both the same return unscaled value if (scaleCompare == 0) + { return unscaledValueCompare; + } var scaledValue = BigInteger.Divide(UnscaledValue, BigInteger.Pow(new BigInteger(10), Scale)); var otherScaledValue = BigInteger.Divide(other.UnscaledValue, BigInteger.Pow(new BigInteger(10), other.Scale)); @@ -740,17 +1156,26 @@ public int CompareTo(AvroDecimal other) } /// - /// Returns a value that indicates whether the current has the same - /// value as another . + /// Returns a value that indicates whether the current has the same + /// value as another . /// - /// The to compare. - /// true if the current has the same value as ; - /// otherwise false. + /// The to compare. + /// + /// true if the current has the same value as ; + /// otherwise false. + /// public bool Equals(AvroDecimal other) { return Scale == other.Scale && UnscaledValue == other.UnscaledValue; } + /// + /// Gets the bytes from decimal. + /// + /// The . + /// + /// A byte array. + /// private static byte[] GetBytesFromDecimal(decimal d) { byte[] bytes = new byte[16]; @@ -781,6 +1206,4 @@ private static byte[] GetBytesFromDecimal(decimal d) return bytes; } } - #pragma warning restore CA2225 // Operator overloads have named alternates - #pragma warning restore CS1591 // Missing XML comment for publicly visible type or member } diff --git a/lang/csharp/src/apache/main/CodeGen/CodeGen.cs b/lang/csharp/src/apache/main/CodeGen/CodeGen.cs index 70ab5bddc74..73b95852d7b 100644 --- a/lang/csharp/src/apache/main/CodeGen/CodeGen.cs +++ b/lang/csharp/src/apache/main/CodeGen/CodeGen.cs @@ -21,8 +21,10 @@ using System.Collections.Generic; using System.Globalization; using System.IO; +using System.Linq; using System.Reflection; using System.Text; +using System.Text.RegularExpressions; using Microsoft.CSharp; namespace Avro @@ -33,120 +35,180 @@ namespace Avro public class CodeGen { /// - /// Object that contains all the generated types + /// Gets object that contains all the generated types. /// + /// + /// The code compile unit. + /// public CodeCompileUnit CompileUnit { get; private set; } /// - /// List of schemas to generate code for + /// Gets list of schemas to generate code for. /// + /// + /// The schemas. + /// public IList Schemas { get; private set; } /// - /// List of protocols to generate code for + /// Gets list of protocols to generate code for. /// + /// + /// The protocols. + /// public IList Protocols { get; private set; } /// - /// Mapping of Avro namespaces to C# namespaces + /// Gets mapping of Avro namespaces to C# namespaces. /// + /// + /// The namespace mapping. + /// + [Obsolete("NamespaceMapping is not used, use AddProtocol(string ...) or AddSchema(string ...) instead!")] public IDictionary NamespaceMapping { get; private set; } /// - /// List of generated namespaces + /// Gets list of generated namespaces. /// - [Obsolete("Use NamespaceLookup instead. This will be removed from the public API in a future version.")] - protected Dictionary namespaceLookup = new Dictionary(StringComparer.Ordinal); + /// + /// The namespace lookup. + /// + protected Dictionary NamespaceLookup { get; private set; } /// - /// List of generated namespaces. + /// Initializes a new instance of the class. /// - protected Dictionary NamespaceLookup + public CodeGen() { -#pragma warning disable CS0618 // Type or member is obsolete - get => namespaceLookup; - set => namespaceLookup = value; -#pragma warning restore CS0618 // Type or member is obsolete + Schemas = new List(); + Protocols = new List(); + NamespaceLookup = new Dictionary(StringComparer.Ordinal); } /// - /// Default constructor + /// Initializes a new instance of the class. /// - public CodeGen() + /// The namespace lookup. + public CodeGen(Dictionary namespaceLookup) + : this() { - this.Schemas = new List(); - this.Protocols = new List(); - this.NamespaceMapping = new Dictionary(); + NamespaceLookup = namespaceLookup; } /// - /// Adds a protocol object to generate code for + /// Adds a protocol object to generate code for. /// - /// protocol object + /// The protocol. public virtual void AddProtocol(Protocol protocol) { Protocols.Add(protocol); } /// - /// Adds a schema object to generate code for + /// Parses and adds a protocol object to generate code for. /// - /// schema object + /// The protocol. + /// namespace mapping key value pairs. + public virtual void AddProtocol(string protocolText, IEnumerable> namespaceMapping = null) + { + // Map namespaces + protocolText = ReplaceMappedNamespacesInSchema(protocolText, namespaceMapping); + Protocol protocol = Protocol.Parse(protocolText); + Protocols.Add(protocol); + } + + /// + /// Adds a schema object to generate code for. + /// + /// schema object. public virtual void AddSchema(Schema schema) { Schemas.Add(schema); } /// - /// Adds a namespace object for the given name into the dictionary if it doesn't exist yet + /// Parses and adds a schema object to generate code for. /// - /// name of namespace - /// - protected virtual CodeNamespace addNamespace(string name) + /// schema object. + /// namespace mapping key value pairs. + public virtual void AddSchema(string schemaText, IEnumerable> namespaceMapping = null) + { + // Map namespaces + schemaText = ReplaceMappedNamespacesInSchema(schemaText, namespaceMapping); + Schema schema = Schema.Parse(schemaText); + Schemas.Add(schema); + } + + /// + /// Adds a namespace object for the given name into the dictionary if it doesn't exist yet. + /// + /// name of namespace. + /// + /// Code Namespace. + /// + /// name - name cannot be null. + protected virtual CodeNamespace AddNamespace(string name) { if (string.IsNullOrEmpty(name)) + { throw new ArgumentNullException(nameof(name), "name cannot be null."); + } - CodeNamespace ns = null; - - if (!NamespaceLookup.TryGetValue(name, out ns)) + if (!NamespaceLookup.TryGetValue(name, out CodeNamespace ns)) { - string csharpNamespace; - ns = NamespaceMapping.TryGetValue(name, out csharpNamespace) - ? new CodeNamespace(csharpNamespace) - : new CodeNamespace(CodeGenUtil.Instance.Mangle(name)); + ns = new CodeNamespace(CodeGenUtil.Instance.Mangle(name)); foreach (CodeNamespaceImport nci in CodeGenUtil.Instance.NamespaceImports) + { ns.Imports.Add(nci); + } CompileUnit.Namespaces.Add(ns); NamespaceLookup.Add(name, ns); } + return ns; } /// - /// Generates code for the given protocol and schema objects + /// Adds a namespace object for the given name into the dictionary if it doesn't exist yet. + /// + /// name of namespace. + /// + /// Code Namespace. + /// + /// name - name cannot be null. + [Obsolete("This method is deprecated and it will be removed in a future release! Please change call to AddNamespace(string name).")] + protected virtual CodeNamespace addNamespace(string name) + { + return AddNamespace(name); + } + + /// + /// Generates code for the given protocol and schema objects. /// - /// CodeCompileUnit object + /// + /// CodeCompileUnit object. + /// public virtual CodeCompileUnit GenerateCode() { CompileUnit = new CodeCompileUnit(); - processSchemas(); - processProtocols(); + ProcessSchemas(); + ProcessProtocols(); return CompileUnit; } /// - /// Generates code for the schema objects + /// Generates code for the schema objects. /// - protected virtual void processSchemas() + /// Names in schema should only be of type NamedSchema, type found " + sn.Value.Tag. + protected virtual void ProcessSchemas() { - foreach (Schema schema in this.Schemas) + foreach (Schema schema in Schemas) { - SchemaNames names = generateNames(schema); + SchemaNames names = GenerateNames(schema); foreach (KeyValuePair sn in names) { switch (sn.Value.Tag) @@ -163,13 +225,24 @@ protected virtual void processSchemas() } /// - /// Generates code for the protocol objects + /// Generates code for the schema objects. /// - protected virtual void processProtocols() + /// Names in schema should only be of type NamedSchema, type found " + sn.Value.Tag. + [Obsolete("This method is deprecated and it will be removed in a future release! Please change call to ProcessSchemas().")] + protected virtual void processSchemas() + { + ProcessSchemas(); + } + + /// + /// Generates code for the protocol objects. + /// + /// Names in protocol should only be of type NamedSchema, type found {sn.Value.Tag} + protected virtual void ProcessProtocols() { foreach (Protocol protocol in Protocols) { - SchemaNames names = generateNames(protocol); + SchemaNames names = GenerateNames(protocol); foreach (KeyValuePair sn in names) { switch (sn.Value.Tag) @@ -179,7 +252,7 @@ protected virtual void processProtocols() case Schema.Type.Record: processRecord(sn.Value); break; case Schema.Type.Error: processRecord(sn.Value); break; default: - throw new CodeGenException("Names in protocol should only be of type NamedSchema, type found " + sn.Value.Tag); + throw new CodeGenException($"Names in protocol should only be of type NamedSchema, type found {sn.Value.Tag}"); } } @@ -188,24 +261,74 @@ protected virtual void processProtocols() } /// - /// Generate list of named schemas from given protocol + /// Generates code for the protocol objects. /// - /// protocol to process - /// + /// Names in protocol should only be of type NamedSchema, type found {sn.Value.Tag} + [Obsolete("This method is deprecated and it will be removed in a future release! Please change call to ProcessProtocols().")] + protected virtual void processProtocols() + { + ProcessProtocols(); + } + + /// + /// Generate list of named schemas from given protocol. + /// + /// protocol to process. + /// + /// List of named schemas. + /// + /// protocol - Protocol can not be null. + [Obsolete("This method is deprecated and it will be removed in a future release! Please use GenerateNames() instead.")] protected virtual SchemaNames generateNames(Protocol protocol) { + return GenerateNames(protocol); + } + + /// + /// Generate list of named schemas from given protocol. + /// + /// protocol to process. + /// + /// List of named schemas. + /// + /// protocol - Protocol can not be null. + protected virtual SchemaNames GenerateNames(Protocol protocol) + { + if (protocol == null) + { + throw new ArgumentNullException(nameof(protocol), "Protocol can not be null"); + } + var names = new SchemaNames(); foreach (Schema schema in protocol.Types) + { addName(schema, names); + } + return names; } /// - /// Generate list of named schemas from given schema + /// Generate list of named schemas from given schema. /// - /// schema to process - /// + /// schema to process. + /// + /// List of named schemas. + /// + [Obsolete("This method is deprecated and it will be removed in a future release! Please use GenerateNames() instead.")] protected virtual SchemaNames generateNames(Schema schema) + { + return GenerateNames(schema); + } + + /// + /// Generate list of named schemas from given schema. + /// + /// schema to process. + /// + /// List of named schemas. + /// + protected virtual SchemaNames GenerateNames(Schema schema) { var names = new SchemaNames(); addName(schema, names); @@ -213,14 +336,18 @@ protected virtual SchemaNames generateNames(Schema schema) } /// - /// Recursively search the given schema for named schemas and adds them to the given container + /// Recursively search the given schema for named schemas and adds them to the given container. /// - /// schema object to search - /// list of named schemas + /// schema object to search. + /// list of named schemas. + /// Unable to add name for " + schema.Name + " type " + schema.Tag. protected virtual void addName(Schema schema, SchemaNames names) { NamedSchema ns = schema as NamedSchema; - if (null != ns) if (names.Contains(ns.SchemaName)) return; + if (ns != null && names.Contains(ns.SchemaName)) + { + return; + } switch (schema.Tag) { @@ -245,7 +372,10 @@ protected virtual void addName(Schema schema, SchemaNames names) var rs = schema as RecordSchema; names.Add(rs); foreach (Field field in rs.Fields) + { addName(field.Schema, names); + } + break; case Schema.Type.Array: @@ -261,7 +391,10 @@ protected virtual void addName(Schema schema, SchemaNames names) case Schema.Type.Union: var us = schema as UnionSchema; foreach (Schema usc in us.Schemas) + { addName(usc, names); + } + break; default: @@ -270,13 +403,21 @@ protected virtual void addName(Schema schema, SchemaNames names) } /// - /// Creates a class declaration for fixed schema + /// Creates a class declaration for fixed schema. /// - /// fixed schema + /// fixed schema. + /// + /// Unable to cast schema into a fixed + /// or + /// Namespace required for enum schema " + fixedSchema.Name. + /// protected virtual void processFixed(Schema schema) { FixedSchema fixedSchema = schema as FixedSchema; - if (null == fixedSchema) throw new CodeGenException("Unable to cast schema into a fixed"); + if (fixedSchema == null) + { + throw new CodeGenException("Unable to cast schema into a fixed"); + } CodeTypeDeclaration ctd = new CodeTypeDeclaration(); ctd.Name = CodeGenUtil.Instance.Mangle(fixedSchema.Name); @@ -284,6 +425,7 @@ protected virtual void processFixed(Schema schema) ctd.IsPartial = true; ctd.Attributes = MemberAttributes.Public; ctd.BaseTypes.Add("SpecificFixed"); + ctd.CustomAttributes.Add(CodeGenUtil.Instance.GeneratedCodeAttribute); if (fixedSchema.Documentation != null) { @@ -317,23 +459,35 @@ protected virtual void processFixed(Schema schema) string nspace = fixedSchema.Namespace; if (string.IsNullOrEmpty(nspace)) + { throw new CodeGenException("Namespace required for enum schema " + fixedSchema.Name); - CodeNamespace codens = addNamespace(nspace); + } + + CodeNamespace codens = AddNamespace(nspace); codens.Types.Add(ctd); } /// - /// Creates an enum declaration + /// Creates an enum declaration. /// - /// enum schema + /// enum schema. + /// + /// Unable to cast schema into an enum + /// or + /// Namespace required for enum schema " + enumschema.Name. + /// protected virtual void processEnum(Schema schema) { EnumSchema enumschema = schema as EnumSchema; - if (null == enumschema) throw new CodeGenException("Unable to cast schema into an enum"); + if (enumschema == null) + { + throw new CodeGenException("Unable to cast schema into an enum"); + } CodeTypeDeclaration ctd = new CodeTypeDeclaration(CodeGenUtil.Instance.Mangle(enumschema.Name)); ctd.IsEnum = true; ctd.Attributes = MemberAttributes.Public; + ctd.CustomAttributes.Add(CodeGenUtil.Instance.GeneratedCodeAttribute); if (enumschema.Documentation != null) { @@ -342,16 +496,17 @@ protected virtual void processEnum(Schema schema) foreach (string symbol in enumschema.Symbols) { - if (CodeGenUtil.Instance.ReservedKeywords.Contains(symbol)) - throw new CodeGenException("Enum symbol " + symbol + " is a C# reserved keyword"); CodeMemberField field = new CodeMemberField(typeof(int), symbol); ctd.Members.Add(field); } string nspace = enumschema.Namespace; if (string.IsNullOrEmpty(nspace)) + { throw new CodeGenException("Namespace required for enum schema " + enumschema.Name); - CodeNamespace codens = addNamespace(nspace); + } + + CodeNamespace codens = AddNamespace(nspace); codens.Types.Add(ctd); } @@ -360,6 +515,7 @@ protected virtual void processEnum(Schema schema) /// Generates code for an individual protocol. /// /// Protocol to generate code for. + /// Namespace required for enum schema " + nspace. protected virtual void processInterface(Protocol protocol) { // Create abstract class @@ -369,6 +525,7 @@ protected virtual void processInterface(Protocol protocol) ctd.TypeAttributes = TypeAttributes.Abstract | TypeAttributes.Public; ctd.IsClass = true; ctd.BaseTypes.Add("Avro.Specific.ISpecificProtocol"); + ctd.CustomAttributes.Add(CodeGenUtil.Instance.GeneratedCodeAttribute); AddProtocolDocumentation(protocol, ctd); @@ -394,15 +551,14 @@ protected virtual void processInterface(Protocol protocol) property.Type = new CodeTypeReference("Avro.Protocol"); property.HasGet = true; - property.GetStatements.Add(new CodeTypeReferenceExpression("return protocol")); ctd.Members.Add(property); - //var requestMethod = CreateRequestMethod(); - //ctd.Members.Add(requestMethod); - + // var requestMethod = CreateRequestMethod(); + // ctd.Members.Add(requestMethod); var requestMethod = CreateRequestMethod(); - //requestMethod.Attributes |= MemberAttributes.Override; + + // requestMethod.Attributes |= MemberAttributes.Override; var builder = new StringBuilder(); if (protocol.Messages.Count > 0) @@ -425,6 +581,7 @@ protected virtual void processInterface(Protocol protocol) builder.Append("\t\t\t}"); } + var cseGet = new CodeSnippetExpression(builder.ToString()); requestMethod.Statements.Add(cseGet); @@ -434,8 +591,11 @@ protected virtual void processInterface(Protocol protocol) string nspace = protocol.Namespace; if (string.IsNullOrEmpty(nspace)) + { throw new CodeGenException("Namespace required for enum schema " + nspace); - CodeNamespace codens = addNamespace(nspace); + } + + CodeNamespace codens = AddNamespace(nspace); codens.Types.Add(ctd); @@ -444,11 +604,9 @@ protected virtual void processInterface(Protocol protocol) ctd.TypeAttributes = TypeAttributes.Abstract | TypeAttributes.Public; ctd.IsClass = true; ctd.BaseTypes.Add(protocolNameMangled); + ctd.CustomAttributes.Add(CodeGenUtil.Instance.GeneratedCodeAttribute); // Need to override - - - AddProtocolDocumentation(protocol, ctd); AddMethods(protocol, true, ctd); @@ -456,29 +614,40 @@ protected virtual void processInterface(Protocol protocol) codens.Types.Add(ctd); } + /// + /// Creates the request method. + /// + /// A declaration for a method of a type. private static CodeMemberMethod CreateRequestMethod() { var requestMethod = new CodeMemberMethod(); requestMethod.Attributes = MemberAttributes.Public | MemberAttributes.Final; requestMethod.Name = "Request"; - requestMethod.ReturnType = new CodeTypeReference(typeof (void)); + requestMethod.ReturnType = new CodeTypeReference(typeof(void)); { - var requestor = new CodeParameterDeclarationExpression(typeof (Avro.Specific.ICallbackRequestor), + var requestor = new CodeParameterDeclarationExpression(typeof(Specific.ICallbackRequestor), "requestor"); requestMethod.Parameters.Add(requestor); - var messageName = new CodeParameterDeclarationExpression(typeof (string), "messageName"); + var messageName = new CodeParameterDeclarationExpression(typeof(string), "messageName"); requestMethod.Parameters.Add(messageName); - var args = new CodeParameterDeclarationExpression(typeof (object[]), "args"); + var args = new CodeParameterDeclarationExpression(typeof(object[]), "args"); requestMethod.Parameters.Add(args); - var callback = new CodeParameterDeclarationExpression(typeof (object), "callback"); + var callback = new CodeParameterDeclarationExpression(typeof(object), "callback"); requestMethod.Parameters.Add(callback); } + return requestMethod; } + /// + /// Adds the methods. + /// + /// The protocol. + /// if set to true [generate callback]. + /// The CTD. private static void AddMethods(Protocol protocol, bool generateCallback, CodeTypeDeclaration ctd) { foreach (var e in protocol.Messages) @@ -488,18 +657,22 @@ private static void AddMethods(Protocol protocol, bool generateCallback, CodeTyp var response = message.Response; if (generateCallback && message.Oneway.GetValueOrDefault()) + { continue; + } var messageMember = new CodeMemberMethod(); messageMember.Name = CodeGenUtil.Instance.Mangle(name); messageMember.Attributes = MemberAttributes.Public | MemberAttributes.Abstract; - if (message.Doc!= null && message.Doc.Trim() != string.Empty) + if (message.Doc != null && message.Doc.Trim() != string.Empty) + { messageMember.Comments.Add(new CodeCommentStatement(message.Doc)); + } if (message.Oneway.GetValueOrDefault() || generateCallback) { - messageMember.ReturnType = new CodeTypeReference(typeof (void)); + messageMember.ReturnType = new CodeTypeReference(typeof(void)); } else { @@ -528,11 +701,15 @@ private static void AddMethods(Protocol protocol, bool generateCallback, CodeTyp messageMember.Parameters.Add(parameter); } - ctd.Members.Add(messageMember); } } + /// + /// Adds the protocol documentation. + /// + /// The protocol. + /// The CTD. private void AddProtocolDocumentation(Protocol protocol, CodeTypeDeclaration ctd) { // Add interface documentation @@ -540,25 +717,41 @@ private void AddProtocolDocumentation(Protocol protocol, CodeTypeDeclaration ctd { var interfaceDoc = createDocComment(protocol.Doc); if (interfaceDoc != null) + { ctd.Comments.Add(interfaceDoc); + } } } /// - /// Creates a class declaration + /// Creates a class declaration. /// - /// record schema - /// A new class code type declaration + /// record schema. + /// + /// A new class code type declaration. + /// + /// + /// Unable to cast schema into a record + /// or + /// Namespace required for record schema " + recordSchema.Name. + /// protected virtual CodeTypeDeclaration processRecord(Schema schema) { RecordSchema recordSchema = schema as RecordSchema; - if (null == recordSchema) throw new CodeGenException("Unable to cast schema into a record"); + if (recordSchema == null) + { + throw new CodeGenException("Unable to cast schema into a record"); + } bool isError = recordSchema.Tag == Schema.Type.Error; // declare the class var ctd = new CodeTypeDeclaration(CodeGenUtil.Instance.Mangle(recordSchema.Name)); - ctd.BaseTypes.Add(isError ? "SpecificException" : "ISpecificRecord"); + var baseTypeReference = new CodeTypeReference( + isError ? typeof(Specific.SpecificException) : typeof(Specific.ISpecificRecord), + CodeTypeReferenceOptions.GlobalReference); + ctd.BaseTypes.Add(baseTypeReference); + ctd.CustomAttributes.Add(CodeGenUtil.Instance.GeneratedCodeAttribute); ctd.Attributes = MemberAttributes.Public; ctd.IsClass = true; @@ -609,7 +802,7 @@ protected virtual CodeTypeDeclaration processRecord(Schema schema) codeField.Attributes = MemberAttributes.Private; if (field.Schema is EnumSchema es && es.Default != null) { - codeField.InitExpression = new CodeTypeReferenceExpression($"{es.Name}.{es.Default}"); + codeField.InitExpression = new CodeTypeReferenceExpression($"{es.Namespace}.{es.Name}.{es.Default}"); } // Process field documentation if it exist and add to the field @@ -617,8 +810,10 @@ protected virtual CodeTypeDeclaration processRecord(Schema schema) if (!string.IsNullOrEmpty(field.Documentation)) { propertyComment = createDocComment(field.Documentation); - if (null != propertyComment) + if (propertyComment != null) + { codeField.Comments.Add(propertyComment); + } } // Add field to class @@ -635,8 +830,10 @@ protected virtual CodeTypeDeclaration processRecord(Schema schema) property.Type = ctrfield; property.GetStatements.Add(new CodeMethodReturnStatement(fieldRef)); property.SetStatements.Add(new CodeAssignStatement(fieldRef, new CodePropertySetValueReferenceExpression())); - if (null != propertyComment) + if (propertyComment != null) + { property.Comments.Add(propertyComment); + } // Add field property to class ctd.Members.Add(property); @@ -675,14 +872,14 @@ protected virtual CodeTypeDeclaration processRecord(Schema schema) } // end switch block for Get() - getFieldStmt.AppendLine("\t\t\tdefault: throw new AvroRuntimeException(\"Bad index \" + fieldPos + \" in Get()\");") + getFieldStmt.AppendLine("\t\t\tdefault: throw new global::Avro.AvroRuntimeException(\"Bad index \" + fieldPos + \" in Get()\");") .Append("\t\t\t}"); var cseGet = new CodeSnippetExpression(getFieldStmt.ToString()); cmmGet.Statements.Add(cseGet); ctd.Members.Add(cmmGet); // end switch block for Put() - putFieldStmt.AppendLine("\t\t\tdefault: throw new AvroRuntimeException(\"Bad index \" + fieldPos + \" in Put()\");") + putFieldStmt.AppendLine("\t\t\tdefault: throw new global::Avro.AvroRuntimeException(\"Bad index \" + fieldPos + \" in Put()\");") .Append("\t\t\t}"); var csePut = new CodeSnippetExpression(putFieldStmt.ToString()); cmmPut.Statements.Add(csePut); @@ -690,8 +887,11 @@ protected virtual CodeTypeDeclaration processRecord(Schema schema) string nspace = recordSchema.Namespace; if (string.IsNullOrEmpty(nspace)) + { throw new CodeGenException("Namespace required for record schema " + recordSchema.Name); - CodeNamespace codens = addNamespace(nspace); + } + + CodeNamespace codens = AddNamespace(nspace); codens.Types.Add(ctd); @@ -699,15 +899,30 @@ protected virtual CodeTypeDeclaration processRecord(Schema schema) } /// - /// Gets the string representation of the schema's data type + /// Gets the string representation of the schema's data type. /// - /// schema - /// flag to indicate union with null - /// - /// This method sets this value to indicate whether the enum is nullable. True indicates - /// that it is nullable. False indicates that it is not nullable. - /// - /// Name of the schema's C# type representation. + /// schema. + /// flag to indicate union with null. + /// This method sets this value to indicate whether the enum is nullable. True indicates + /// that it is nullable. False indicates that it is not nullable. + /// + /// Name of the schema's C# type representation. + /// + /// + /// Unable to cast schema into a named schema + /// or + /// Unable to cast schema into a named schema + /// or + /// Unable to cast schema into an array schema + /// or + /// Unable to cast schema into a map schema + /// or + /// Unable to cast schema into a union schema + /// or + /// Unable to cast schema into a logical schema + /// or + /// Unable to generate CodeTypeReference for " + schema.Name + " type " + schema.Tag. + /// internal static string getType(Schema schema, bool nullible, ref bool nullibleEnum) { switch (schema.Tag) @@ -715,20 +930,19 @@ internal static string getType(Schema schema, bool nullible, ref bool nullibleEn case Schema.Type.Null: return typeof(object).ToString(); case Schema.Type.Boolean: - if (nullible) return $"System.Nullable<{typeof(bool)}>"; - else return typeof(bool).ToString(); + return nullible ? $"System.Nullable<{typeof(bool)}>" : typeof(bool).ToString(); + case Schema.Type.Int: - if (nullible) return $"System.Nullable<{typeof(int)}>"; - else return typeof(int).ToString(); + return nullible ? $"System.Nullable<{typeof(int)}>" : typeof(int).ToString(); + case Schema.Type.Long: - if (nullible) return $"System.Nullable<{typeof(long)}>"; - else return typeof(long).ToString(); + return nullible ? $"System.Nullable<{typeof(long)}>" : typeof(long).ToString(); + case Schema.Type.Float: - if (nullible) return $"System.Nullable<{typeof(float)}>"; - else return typeof(float).ToString(); + return nullible ? $"System.Nullable<{typeof(float)}>" : typeof(float).ToString(); + case Schema.Type.Double: - if (nullible) return $"System.Nullable<{typeof(double)}>"; - else return typeof(double).ToString(); + return nullible ? $"System.Nullable<{typeof(double)}>" : typeof(double).ToString(); case Schema.Type.Bytes: return typeof(byte[]).ToString(); @@ -737,108 +951,132 @@ internal static string getType(Schema schema, bool nullible, ref bool nullibleEn case Schema.Type.Enumeration: var namedSchema = schema as NamedSchema; - if (null == namedSchema) + if (namedSchema == null) + { throw new CodeGenException("Unable to cast schema into a named schema"); + } + if (nullible) { nullibleEnum = true; return "System.Nullable<" + CodeGenUtil.Instance.Mangle(namedSchema.Fullname) + ">"; } - else return CodeGenUtil.Instance.Mangle(namedSchema.Fullname); + else + { + return CodeGenUtil.Instance.Mangle(namedSchema.Fullname); + } case Schema.Type.Fixed: case Schema.Type.Record: case Schema.Type.Error: namedSchema = schema as NamedSchema; - if (null == namedSchema) + if (namedSchema == null) + { throw new CodeGenException("Unable to cast schema into a named schema"); + } + return CodeGenUtil.Instance.Mangle(namedSchema.Fullname); case Schema.Type.Array: var arraySchema = schema as ArraySchema; - if (null == arraySchema) + if (arraySchema == null) + { throw new CodeGenException("Unable to cast schema into an array schema"); + } return "IList<" + getType(arraySchema.ItemSchema, false, ref nullibleEnum) + ">"; case Schema.Type.Map: var mapSchema = schema as MapSchema; - if (null == mapSchema) + if (mapSchema == null) + { throw new CodeGenException("Unable to cast schema into a map schema"); + } + return "IDictionary"; case Schema.Type.Union: var unionSchema = schema as UnionSchema; - if (null == unionSchema) + if (unionSchema == null) + { throw new CodeGenException("Unable to cast schema into a union schema"); - Schema nullibleType = getNullableType(unionSchema); - if (null == nullibleType) - return CodeGenUtil.Object; - else - return getType(nullibleType, true, ref nullibleEnum); + } + + Schema nullibleType = GetNullableType(unionSchema); + + return nullibleType == null ? CodeGenUtil.Object : getType(nullibleType, true, ref nullibleEnum); case Schema.Type.Logical: var logicalSchema = schema as LogicalSchema; - if (null == logicalSchema) - throw new CodeGenException("Unable to cast schema into a logical schema"); - var csharpType = logicalSchema.LogicalType.GetCSharpType(nullible); - if (csharpType.IsGenericType && csharpType.GetGenericTypeDefinition() == typeof(Nullable<>)) + if (logicalSchema == null) { - return $"System.Nullable<{csharpType.GetGenericArguments()[0]}>"; - } - else - { - return csharpType.ToString(); + throw new CodeGenException("Unable to cast schema into a logical schema"); } + var csharpType = logicalSchema.LogicalType.GetCSharpType(nullible); + return csharpType.IsGenericType && csharpType.GetGenericTypeDefinition() == typeof(Nullable<>) + ? $"System.Nullable<{csharpType.GetGenericArguments()[0]}>" : csharpType.ToString(); } + throw new CodeGenException("Unable to generate CodeTypeReference for " + schema.Name + " type " + schema.Tag); } /// - /// Gets the schema of a union with null + /// Gets the schema of a union with null. /// - /// union schema - /// schema that is nullible + /// union schema. + /// + /// schema that is nullable. + /// + /// schema - UnionSchema can not be null. + [Obsolete("This method is deprecated and it will be removed in a future release! Please use GetNullableType() instead.")] public static Schema getNullableType(UnionSchema schema) { - Schema ret = null; - if (schema.Count == 2) + return GetNullableType(schema); + } + + /// + /// Gets the schema of a union with null. + /// + /// union schema. + /// + /// schema that is nullable. + /// + /// schema - UnionSchema can not be null. + public static Schema GetNullableType(UnionSchema schema) + { + if (schema == null) { - bool nullable = false; - foreach (Schema childSchema in schema.Schemas) - { - if (childSchema.Tag == Schema.Type.Null) - nullable = true; - else - ret = childSchema; - } - if (!nullable) - ret = null; + throw new ArgumentNullException(nameof(schema), "UnionSchema can not be null"); } - return ret; + + if (schema.Count == 2 && !schema.Schemas.All(x => x.Tag != Schema.Type.Null)) + { + return schema.Schemas.FirstOrDefault(x => x.Tag != Schema.Type.Null); + } + + return null; } /// - /// Creates the static schema field for class types + /// Creates the static schema field for class types. /// - /// schema - /// CodeTypeDeclaration for the class - /// - /// Indicates whether we should add the to the - /// generated property. - /// + /// schema. + /// CodeTypeDeclaration for the class. + /// Indicates whether we should add the to the + /// generated property. protected virtual void createSchemaField(Schema schema, CodeTypeDeclaration ctd, bool overrideFlag) { // create schema field - var ctrfield = new CodeTypeReference("Schema"); + var ctrfield = new CodeTypeReference(typeof(Schema), CodeTypeReferenceOptions.GlobalReference); string schemaFname = "_SCHEMA"; var codeField = new CodeMemberField(ctrfield, schemaFname); codeField.Attributes = MemberAttributes.Public | MemberAttributes.Static; + // create function call Schema.Parse(json) var cpe = new CodePrimitiveExpression(schema.ToString()); var cmie = new CodeMethodInvokeExpression( - new CodeMethodReferenceExpression(new CodeTypeReferenceExpression(typeof(Schema)), "Parse"), + new CodeMethodReferenceExpression(new CodeTypeReferenceExpression(ctrfield), "Parse"), new CodeExpression[] { cpe }); codeField.InitExpression = cmie; ctd.Members.Add(codeField); @@ -846,7 +1084,11 @@ protected virtual void createSchemaField(Schema schema, CodeTypeDeclaration ctd, // create property to get static schema field var property = new CodeMemberProperty(); property.Attributes = MemberAttributes.Public; - if (overrideFlag) property.Attributes |= MemberAttributes.Override; + if (overrideFlag) + { + property.Attributes |= MemberAttributes.Override; + } + property.Name = "Schema"; property.Type = ctrfield; @@ -855,10 +1097,12 @@ protected virtual void createSchemaField(Schema schema, CodeTypeDeclaration ctd, } /// - /// Creates an XML documentation for the given comment + /// Creates an XML documentation for the given comment. /// - /// comment - /// CodeCommentStatement object + /// comment. + /// + /// a statement consisting of a single comment. + /// protected virtual CodeCommentStatement createDocComment(string comment) { string text = string.Format(CultureInfo.InvariantCulture, @@ -867,9 +1111,9 @@ protected virtual CodeCommentStatement createDocComment(string comment) } /// - /// Writes the generated compile unit into one file + /// Writes the generated compile unit into one file. /// - /// name of output file to write to + /// name of output file to write to. public virtual void WriteCompileUnit(string outputFile) { var cscp = new CSharpCodeProvider(); @@ -886,10 +1130,55 @@ public virtual void WriteCompileUnit(string outputFile) } /// - /// Writes each types in each namespaces into individual files + /// Gets names and generated code of the schema(s) types /// - /// name of directory to write to - public virtual void WriteTypes(string outputdir) + /// + public virtual IDictionary GetTypes() + { + using (var cscp = new CSharpCodeProvider()) + { + var opts = new CodeGeneratorOptions + { + BracingStyle = "C", IndentString = "\t", BlankLinesBetweenMembers = false + }; + CodeNamespaceCollection nsc = CompileUnit.Namespaces; + + var sourceCodeByName = new Dictionary(); + for (int i = 0; i < nsc.Count; i++) + { + var ns = nsc[i]; + + var new_ns = new CodeNamespace(ns.Name); + new_ns.Comments.Add(CodeGenUtil.Instance.FileComment); + foreach (CodeNamespaceImport nci in CodeGenUtil.Instance.NamespaceImports) + { + new_ns.Imports.Add(nci); + } + + var types = ns.Types; + for (int j = 0; j < types.Count; j++) + { + var ctd = types[j]; + using (var writer = new StringWriter()) + { + new_ns.Types.Add(ctd); + cscp.GenerateCodeFromNamespace(new_ns, writer, opts); + new_ns.Types.Remove(ctd); + sourceCodeByName[ctd.Name] = writer.ToString(); + } + } + } + + return sourceCodeByName; + } + } + + /// + /// Writes each types in each namespaces into individual files. + /// + /// name of directory to write to. + /// skip creation of directories based on schema namespace + public virtual void WriteTypes(string outputdir, bool skipDirectories = false) { var cscp = new CSharpCodeProvider(); @@ -904,16 +1193,21 @@ public virtual void WriteTypes(string outputdir) var ns = nsc[i]; string dir = outputdir; - foreach (string name in CodeGenUtil.Instance.UnMangle(ns.Name).Split('.')) + if (skipDirectories != true) { - dir = Path.Combine(dir, name); + foreach (string name in CodeGenUtil.Instance.UnMangle(ns.Name).Split('.')) + { + dir = Path.Combine(dir, name); + } } Directory.CreateDirectory(dir); var new_ns = new CodeNamespace(ns.Name); new_ns.Comments.Add(CodeGenUtil.Instance.FileComment); foreach (CodeNamespaceImport nci in CodeGenUtil.Instance.NamespaceImports) + { new_ns.Imports.Add(nci); + } var types = ns.Types; for (int j = 0; j < types.Count; j++) @@ -929,5 +1223,48 @@ public virtual void WriteTypes(string outputdir) } } } + + /// + /// Replace namespace(s) in schema or protocol definition. + /// + /// input schema or protocol definition. + /// namespace mappings object. + private static string ReplaceMappedNamespacesInSchema(string input, IEnumerable> namespaceMapping) + { + if (namespaceMapping == null || input == null) + return input; + + // Replace namespace in "namespace" definitions: + // "namespace": "originalnamespace" -> "namespace": "mappednamespace" + // "namespace": "originalnamespace.whatever" -> "namespace": "mappednamespace.whatever" + // Note: It keeps the original whitespaces + return Regex.Replace(input, @"""namespace""(\s*):(\s*)""([^""]*)""", m => + { + // m.Groups[1]: whitespaces before ':' + // m.Groups[2]: whitespaces after ':' + // m.Groups[3]: the namespace + + string ns = m.Groups[3].Value; + + foreach (var mapping in namespaceMapping) + { + // Full match + if (mapping.Key == ns) + { + ns = mapping.Value; + break; + } + else + // Partial match + if (ns.StartsWith($"{mapping.Key}.")) + { + ns = $"{mapping.Value}.{ns.Substring(mapping.Key.Length + 1)}"; + break; + } + } + + return $@"""namespace""{m.Groups[1].Value}:{m.Groups[2].Value}""{ns}"""; + }); + } } } diff --git a/lang/csharp/src/apache/main/CodeGen/CodeGenException.cs b/lang/csharp/src/apache/main/CodeGen/CodeGenException.cs index 485646fd033..1601a1e285d 100644 --- a/lang/csharp/src/apache/main/CodeGen/CodeGenException.cs +++ b/lang/csharp/src/apache/main/CodeGen/CodeGenException.cs @@ -19,17 +19,34 @@ namespace Avro { - class CodeGenException : AvroException + /// + /// CodeGen Exception. + /// + /// + public class CodeGenException : AvroException { + /// + /// Initializes a new instance of the class. + /// public CodeGenException() { } + /// + /// Initializes a new instance of the class. + /// + /// The message that describes the error. public CodeGenException(string s) : base(s) { } + /// + /// Initializes a new instance of the class. + /// + /// The message that describes the error. + /// The exception that is the cause of the current exception, or a null reference + /// if no inner exception is specified. public CodeGenException(string s, Exception inner) : base(s, inner) { diff --git a/lang/csharp/src/apache/main/CodeGen/CodeGenUtil.cs b/lang/csharp/src/apache/main/CodeGen/CodeGenUtil.cs index 54de067923d..1a720e16533 100644 --- a/lang/csharp/src/apache/main/CodeGen/CodeGenUtil.cs +++ b/lang/csharp/src/apache/main/CodeGen/CodeGenUtil.cs @@ -15,64 +15,94 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + +using System.CodeDom; +using System.CodeDom.Compiler; using System.Collections.Generic; +using System.Reflection; using System.Text; -using System.CodeDom; namespace Avro { /// - /// A singleton class containing data used by codegen + /// A singleton class containing data used by codegen. /// public sealed class CodeGenUtil { /// - /// Singleton instance of this class. + /// Gets singleton instance of this class. /// + /// + /// The instance. + /// public static CodeGenUtil Instance { get; } = new CodeGenUtil(); /// - /// Namespaces to import in generated code. + /// Gets namespaces to import in generated code. /// + /// + /// The namespace imports. + /// public CodeNamespaceImport[] NamespaceImports { get; private set; } /// - /// Comment included at the top of each generated code file. + /// Gets comment included at the top of each generated code file. /// + /// + /// The file comment. + /// public CodeCommentStatement FileComment { get; private set; } /// - /// Reserved keywords in the C# language. + /// Gets reserved keywords in the C# language. /// + /// + /// The reserved keywords. + /// public HashSet ReservedKeywords { get; private set; } + /// + /// Gets the generated code attribute. + /// + /// + /// The generated code attribute. + /// + public CodeAttributeDeclaration GeneratedCodeAttribute { get; private set; } + private const char At = '@'; private const char Dot = '.'; + private readonly string _assemblyInformationVersion = GetInformationalVersion(); /// - /// Fully-qualified name of a type. + /// Fully-qualified name of a type. /// public const string Object = "System.Object"; + /// + /// Prevents a default instance of the class from being created. + /// private CodeGenUtil() { NamespaceImports = new CodeNamespaceImport[] { new CodeNamespaceImport("System"), new CodeNamespaceImport("System.Collections.Generic"), new CodeNamespaceImport("System.Text"), - new CodeNamespaceImport("Avro"), - new CodeNamespaceImport("Avro.Specific") }; + new CodeNamespaceImport("global::Avro"), + new CodeNamespaceImport("global::Avro.Specific") }; FileComment = new CodeCommentStatement( @"------------------------------------------------------------------------------ - Generated by " + System.AppDomain.CurrentDomain.FriendlyName + ", version " + System.Reflection.Assembly.GetExecutingAssembly().GetName().Version + @" + Generated by " + System.AppDomain.CurrentDomain.FriendlyName + ", version " + _assemblyInformationVersion + @" Changes to this file may cause incorrect behavior and will be lost if code is regenerated ------------------------------------------------------------------------------"); // Visual Studio 2010 https://msdn.microsoft.com/en-us/library/x53a06bb.aspx + // Note: + // 1. Contextual keywords are not reserved keywords e.g. value, partial + // 2. __arglist, __makeref, __reftype, __refvalue are undocumented keywords, but recognized by the C# compiler ReservedKeywords = new HashSet() { "abstract","as", "base", "bool", "break", "byte", "case", "catch", "char", "checked", "class", "const", "continue", "decimal", "default", "delegate", "do", "double", "else", "enum", "event", @@ -81,14 +111,17 @@ is regenerated "null", "object", "operator", "out", "override", "params", "private", "protected", "public", "readonly", "ref", "return", "sbyte", "sealed", "short", "sizeof", "stackalloc", "static", "string", "struct", "switch", "this", "throw", "true", "try", "typeof", "uint", "ulong", - "unchecked", "unsafe", "ushort", "using", "virtual", "void", "volatile", "while", "value", "partial" }; + "unchecked", "unsafe", "ushort", "using", "virtual", "void", "volatile", "while", + "__arglist", "__makeref", "__reftype", "__refvalue" }; + + GeneratedCodeAttribute = GetGeneratedCodeAttribute(); } /// - /// Append @ to all reserved keywords that appear on the given name + /// Append @ to all reserved keywords that appear on the given name. /// - /// - /// + /// The name. + /// updated string. public string Mangle(string name) { var builder = new StringBuilder(); @@ -105,10 +138,10 @@ public string Mangle(string name) } /// - /// Remove all the @ + /// Remove all the @. /// - /// - /// + /// The name. + /// updated string. public string UnMangle(string name) { var builder = new StringBuilder(name.Length); @@ -117,5 +150,32 @@ public string UnMangle(string name) builder.Append(name[i]); return builder.ToString(); } + + private CodeAttributeDeclaration GetGeneratedCodeAttribute() + { + GeneratedCodeAttribute generatedCodeAttribute = + new GeneratedCodeAttribute(System.AppDomain.CurrentDomain.FriendlyName, + _assemblyInformationVersion); + + CodePrimitiveExpression tool = new CodePrimitiveExpression(generatedCodeAttribute.Tool); + CodePrimitiveExpression version = new CodePrimitiveExpression(generatedCodeAttribute.Version); + + CodeAttributeDeclaration codeAttributeDeclaration = + new CodeAttributeDeclaration($"global::{generatedCodeAttribute.GetType().FullName}", + new CodeAttributeArgument(tool), + new CodeAttributeArgument(version)); + + return codeAttributeDeclaration; + } + + private static string GetInformationalVersion() + { + System.Reflection.AssemblyInformationalVersionAttribute attribute = + (System.Reflection.AssemblyInformationalVersionAttribute) + System.Reflection.Assembly.GetExecutingAssembly() + .GetCustomAttribute(typeof(System.Reflection.AssemblyInformationalVersionAttribute)); + + return attribute.InformationalVersion; + } } } diff --git a/lang/csharp/src/apache/main/File/Codec.cs b/lang/csharp/src/apache/main/File/Codec.cs index f8667f7cccf..46191997a1d 100644 --- a/lang/csharp/src/apache/main/File/Codec.cs +++ b/lang/csharp/src/apache/main/File/Codec.cs @@ -16,7 +16,10 @@ * limitations under the License. */ +using System; +using System.Collections.Generic; using System.IO; +using System.Reflection; namespace Avro.File { @@ -27,102 +30,199 @@ namespace Avro.File public abstract class Codec { /// - /// Compress data using implemented codec + /// Compress data using implemented codec. /// - /// - /// - abstract public byte[] Compress(byte[] uncompressedData); + /// The uncompressed data. + /// + /// byte array. + /// + public abstract byte[] Compress(byte[] uncompressedData); /// - /// Compress data using implemented codec + /// Compress data using implemented codec. /// - /// The stream which contains the data to be compressed + /// The stream which contains the data to be compressed. /// A reusable stream which will hold the compressed data. That stream should be empty. - abstract public void Compress(MemoryStream inputStream, MemoryStream outputStream); + public abstract void Compress(MemoryStream inputStream, MemoryStream outputStream); + + /// + /// Decompress data using implemented codec. + /// + /// The buffer holding data to decompress. + /// A byte array holding the decompressed data. + [Obsolete] + public virtual byte[] Decompress(byte[] compressedData) + { + return Decompress(compressedData, compressedData.Length); + } /// /// Decompress data using implemented codec /// - /// - /// - abstract public byte[] Decompress(byte[] compressedData); + /// The buffer holding data to decompress. + /// The actual length of bytes to decompress from the buffer. + /// A byte array holding the decompressed data. + public abstract byte[] Decompress(byte[] compressedData, int length); /// - /// Name of this codec type + /// Name of this codec type. /// - /// - abstract public string GetName(); + /// The codec name. + public abstract string GetName(); /// - /// Codecs must implement an equals() method + /// Codecs must implement an equals() method. /// - /// - /// - abstract public override bool Equals(object other); + /// The to compare with this instance. + /// + /// true if the specified is equal to this instance; otherwise, false. + /// + public abstract override bool Equals(object other); /// /// Codecs must implement a HashCode() method that is - /// consistent with Equals + /// consistent with Equals. /// - /// - abstract public override int GetHashCode(); + /// + /// A hash code for this instance, suitable for use in hashing algorithms and data structures like a hash table. + /// + public abstract override int GetHashCode(); /// - /// Codec types + /// Codec types. /// public enum Type { /// - /// Codec type that implments the "deflate" compression algorithm. + /// Codec type that implements the "deflate" compression algorithm. /// Deflate, - //Snappy - /// /// Codec that does not perform any compression. /// - Null - }; + Null, + + /// + /// Codec type that implements the "Snappy" compression algorithm. + /// + Snappy, + + /// + /// Codec type that implements the "BZip2" compression algorithm. + /// + BZip2, + + /// + /// Codec type that implements the "XZ" compression algorithm. + /// + XZ, + + /// + /// Codec type that implements the "Zstandard" compression algorithm. + /// + Zstandard + } + + /// + /// Represents a function capable of resolving a codec string + /// with a matching codec implementation a reader can use to decompress data. + /// + /// The codec string + public delegate Codec CodecResolver(string codecMetaString); + + /// + /// The codec resolvers + /// + private static readonly List _codecResolvers = new List(); + + /// + /// Registers a function that will attempt to resolve a codec identifying string + /// with a matching codec implementation when reading compressed Avro data. + /// + /// A function that is able to find a codec implementation for a given codec string + public static void RegisterResolver(CodecResolver resolver) + { + _codecResolvers.Add(resolver); + } /// - /// Factory method to return child - /// codec instance based on Codec.Type + /// Factory method to return child codec instance based on Codec.Type. /// - /// - /// + /// Type of the codec. + /// + /// Codec based on type. + /// public static Codec CreateCodec(Type codecType) { switch (codecType) { case Type.Deflate: return new DeflateCodec(); - default: + case Type.Null: return new NullCodec(); + case Type.Snappy: + case Type.BZip2: + case Type.XZ: + case Type.Zstandard: + { + // Create codec dynamically from "Avro.File.CODECNAME" assembly + Assembly assembly = Assembly.Load($"Avro.File.{codecType}"); + return assembly.CreateInstance($"Avro.File.{codecType}.{codecType}Codec") as Codec; + } } + + throw new AvroRuntimeException($"Unrecognized codec: {codecType}"); } /// - /// Factory method to return child - /// codec instance based on string type + /// Factory method to return child codec instance based on string type. /// - /// - /// + /// Type of the codec. + /// Codec based on type. public static Codec CreateCodecFromString(string codecType) { + if (codecType == null) + { + // If codec is absent, it is assumed to be "null" + // https://avro.apache.org/docs/current/spec.html + return CreateCodec(Type.Null); + } + + foreach (var resolver in _codecResolvers) + { + var candidateCodec = resolver(codecType); + if (candidateCodec != null) + { + return candidateCodec; + } + } + switch (codecType) { case DataFileConstants.DeflateCodec: - return new DeflateCodec(); - default: - return new NullCodec(); + return CreateCodec(Type.Deflate); + case DataFileConstants.NullCodec: + return CreateCodec(Type.Null); + case DataFileConstants.SnappyCodec: + return CreateCodec(Type.Snappy); + case DataFileConstants.BZip2Codec: + return CreateCodec(Type.BZip2); + case DataFileConstants.XZCodec: + return CreateCodec(Type.XZ); + case DataFileConstants.ZstandardCodec: + return CreateCodec(Type.Zstandard); } + + throw new AvroRuntimeException($"Unrecognized codec: {codecType}"); } /// - /// Returns name of codec + /// Returns name of codec. /// - /// + /// + /// A that represents this instance. + /// public override string ToString() { return GetName(); diff --git a/lang/csharp/src/apache/main/File/DataBlock.cs b/lang/csharp/src/apache/main/File/DataBlock.cs deleted file mode 100644 index 7fd85e7bb15..00000000000 --- a/lang/csharp/src/apache/main/File/DataBlock.cs +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * https://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -using System; -using System.IO; - -namespace Avro.File -{ - /// - /// Encapsulates a block of data read by the . - /// We will remove this class from the public API in a future version because it is only meant - /// to be used internally. - /// - [Obsolete("This will be removed from the public API in a future version.")] - public class DataBlock - { - /// - /// Raw bytes within this block. - /// - public byte[] Data { get; set; } - - /// - /// Number of entries in this block. - /// - public long NumberOfEntries { get; set; } - - /// - /// Size of this block in bytes. - /// - public long BlockSize { get; set; } - - /// - /// Initializes a new instance of the class. - /// - /// Number of entries in this block. - /// Size of this block in bytes. - public DataBlock(long numberOfEntries, long blockSize) - { - NumberOfEntries = numberOfEntries; - BlockSize = blockSize; - Data = new byte[blockSize]; - } - - internal Stream GetDataAsStream() - { - return new MemoryStream(Data); - } - } -} diff --git a/lang/csharp/src/apache/main/File/DataFileConstants.cs b/lang/csharp/src/apache/main/File/DataFileConstants.cs index 27b034e7ac2..438caeca0e4 100644 --- a/lang/csharp/src/apache/main/File/DataFileConstants.cs +++ b/lang/csharp/src/apache/main/File/DataFileConstants.cs @@ -21,9 +21,6 @@ namespace Avro.File /// /// Constants used in data files. /// - [System.Diagnostics.CodeAnalysis.SuppressMessage("Design", - "CA1052:Static holder types should be Static or NotInheritable", - Justification = "Maintain public API")] public class DataFileConstants { /// @@ -51,6 +48,26 @@ public class DataFileConstants /// public const string DeflateCodec = "deflate"; + /// + /// Identifier for the Snappy codec. + /// + public const string SnappyCodec = "snappy"; + + /// + /// Identifier for the BZip2 codec. + /// + public const string BZip2Codec = "bzip2"; + + /// + /// Identifier for the XZ codec. + /// + public const string XZCodec = "xz"; + + /// + /// Identifier for the Zstandard codec. + /// + public const string ZstandardCodec = "zstandard"; + /// /// Reserved 'avro' metadata key. /// @@ -64,10 +81,13 @@ public class DataFileConstants /// /// Magic bytes at the beginning of an Avro data file. /// - public static byte[] Magic = { (byte)'O', - (byte)'b', - (byte)'j', - Version }; + public static readonly byte[] Magic = + { + (byte)'O', + (byte)'b', + (byte)'j', + Version, + }; /// /// Hash code for the null codec. diff --git a/lang/csharp/src/apache/main/File/DataFileReader.cs b/lang/csharp/src/apache/main/File/DataFileReader.cs index f065dbb9fa7..dff13e05885 100644 --- a/lang/csharp/src/apache/main/File/DataFileReader.cs +++ b/lang/csharp/src/apache/main/File/DataFileReader.cs @@ -27,18 +27,21 @@ namespace Avro.File { /// - /// Provides access to Avro data written using the . + /// Provides access to Avro data written using the . /// /// Type to deserialze data objects to. + /// public class DataFileReader : IFileReader { /// - /// Defines the signature for a function that returns a new + /// Defines the signature for a function that returns a new /// given a writer and reader schema. /// /// Schema used to write the datum. /// Schema used to read the datum. - /// A datum reader. + /// + /// A datum reader. + /// public delegate DatumReader CreateDatumReader(Schema writerSchema, Schema readerSchema); private DatumReader _reader; @@ -52,101 +55,124 @@ public class DataFileReader : IFileReader private byte[] _syncBuffer; private long _blockStart; private Stream _stream; - private bool _leaveOpen; - private Schema _readerSchema; + private readonly bool _leaveOpen; + private readonly Schema _readerSchema; private readonly CreateDatumReader _datumReaderFactory; /// - /// Open a reader for a file using path + /// Open a reader for a file using path. /// - /// - /// + /// The path. + /// + /// File Reader. + /// public static IFileReader OpenReader(string path) { return OpenReader(new FileStream(path, FileMode.Open), null); } /// - /// Open a reader for a file using path and the reader's schema + /// Open a reader for a file using path and the reader's schema. /// - /// Path to the file - /// Schema used to read data from the file - /// A new file reader + /// Path to the file. + /// Schema used to read data from the file. + /// + /// A new file reader. + /// public static IFileReader OpenReader(string path, Schema readerSchema) { return OpenReader(new FileStream(path, FileMode.Open), readerSchema); } /// - /// Open a reader for a stream + /// Open a reader for a stream. /// - /// - /// + /// The in stream. + /// + /// File Reader. + /// public static IFileReader OpenReader(Stream inStream) { return OpenReader(inStream, null); } /// - /// Open a reader for a stream + /// Open a reader for a stream. /// - /// - /// Leave the stream open after disposing the object - /// + /// The in stream. + /// Leave the stream open after disposing the object. + /// + /// File Reader. + /// public static IFileReader OpenReader(Stream inStream, bool leaveOpen) { return OpenReader(inStream, null, leaveOpen); } /// - /// Open a reader for a stream using the reader's schema + /// Open a reader for a stream using the reader's schema. /// - /// Stream containing the file contents - /// Schema used to read the file - /// A new file reader + /// Stream containing the file contents. + /// Schema used to read the file. + /// + /// A new file reader. + /// public static IFileReader OpenReader(Stream inStream, Schema readerSchema) { return OpenReader(inStream, readerSchema, CreateDefaultReader); } /// - /// Open a reader for a stream using the reader's schema + /// Open a reader for a stream using the reader's schema. /// - /// Stream containing the file contents - /// Schema used to read the file - /// Leave the stream open after disposing the object - /// A new file reader + /// Stream containing the file contents. + /// Schema used to read the file. + /// Leave the stream open after disposing the object. + /// + /// A new file reader. + /// public static IFileReader OpenReader(Stream inStream, Schema readerSchema, bool leaveOpen) { return OpenReader(inStream, readerSchema, CreateDefaultReader, leaveOpen); } - + /// - /// Open a reader for a stream using the reader's schema and a custom DatumReader + /// Open a reader for a stream using the reader's schema and a custom DatumReader. /// - /// Stream of file contents - /// Schema used to read the file - /// Factory to create datum readers given a reader an writer schema - /// A new file reader + /// Stream of file contents. + /// Schema used to read the file. + /// Factory to create datum readers given a reader an writer schema. + /// + /// A new file reader. + /// public static IFileReader OpenReader(Stream inStream, Schema readerSchema, CreateDatumReader datumReaderFactory) { return new DataFileReader(inStream, readerSchema, datumReaderFactory, false); // (not supporting 1.2 or below, format) } /// - /// Open a reader for a stream using the reader's schema and a custom DatumReader + /// Open a reader for a stream using the reader's schema and a custom DatumReader. /// - /// Stream of file contents - /// Schema used to read the file - /// Factory to create datum readers given a reader an writer schema - /// Leave the stream open after disposing the object - /// A new file reader + /// Stream of file contents. + /// Schema used to read the file. + /// Factory to create datum readers given a reader an writer schema. + /// Leave the stream open after disposing the object. + /// + /// A new file reader. + /// public static IFileReader OpenReader(Stream inStream, Schema readerSchema, CreateDatumReader datumReaderFactory, bool leaveOpen) { return new DataFileReader(inStream, readerSchema, datumReaderFactory, leaveOpen); // (not supporting 1.2 or below, format) } - DataFileReader(Stream stream, Schema readerSchema, CreateDatumReader datumReaderFactory, bool leaveOpen) + /// + /// Initializes a new instance of the class. + /// + /// The stream. + /// The reader schema. + /// The datum reader factory. + /// if set to true [leave open]. + private DataFileReader(Stream stream, Schema readerSchema, CreateDatumReader datumReaderFactory, bool leaveOpen) { _readerSchema = readerSchema; _datumReaderFactory = datumReaderFactory; @@ -228,6 +254,7 @@ public void Seek(long position) public void Sync(long position) { Seek(position); + // work around an issue where 1.5.4 C stored sync in metadata if ((position == 0) && (GetMeta(DataFileConstants.MetaDataSync) != null)) { @@ -307,7 +334,7 @@ public bool HasNext() if (HasNextBlock()) { _currentBlock = NextRawBlock(_currentBlock); - _currentBlock.Data = _codec.Decompress(_currentBlock.Data); + _currentBlock.Data = _codec.Decompress(_currentBlock.Data, (int)_blockSize); _datumDecoder = new BinaryDecoder(_currentBlock.GetDataAsStream()); } } @@ -350,6 +377,15 @@ protected virtual void Dispose(bool disposing) _stream.Dispose(); } + /// + /// Initializes the specified stream. + /// + /// The stream. + /// + /// Not a valid data file! + /// or + /// Not a valid data file!. + /// private void Init(Stream stream) { _stream = stream; @@ -394,6 +430,14 @@ private void Init(Stream stream) _codec = ResolveCodec(); } + /// + /// Creates the default reader. + /// + /// The writer schema. + /// The reader schema. + /// + /// Datum Reader. + /// private static DatumReader CreateDefaultReader(Schema writerSchema, Schema readerSchema) { DatumReader reader = null; @@ -410,9 +454,21 @@ private static DatumReader CreateDefaultReader(Schema writerSchema, Schema re return reader; } + /// + /// Resolves the codec. + /// + /// + /// Resolved codec. + /// private Codec ResolveCodec() { - return Codec.CreateCodecFromString(GetMetaString(DataFileConstants.MetaDataCodec)); + string codec = GetMetaString(DataFileConstants.MetaDataCodec); + + // If codec is absent, it is assumed to be "null" + if (codec == null) + return Codec.CreateCodec(Codec.Type.Null); + + return Codec.CreateCodecFromString(codec); } /// @@ -421,6 +477,15 @@ public T Next() return Next(default(T)); } + /// + /// Reads the next datum from the file. + /// + /// The reuse. + /// Next deserialized data entry. + /// No more datum objects remaining in block! + /// or + /// Error fetching next object from block: {0}. + /// private T Next(T reuse) { try @@ -442,12 +507,25 @@ private T Next(T reuse) } } + /// + /// Ends the stream for the block. + /// private void BlockFinished() { if (_stream.CanSeek) _blockStart = _stream.Position; } + /// + /// Reads the Next block from the file. + /// + /// The reuse. + /// Data Block. + /// + /// No data remaining in block! + /// or + /// Invalid sync!. + /// private DataBlock NextRawBlock(DataBlock reuse) { if (!HasNextBlock()) @@ -473,6 +551,10 @@ private DataBlock NextRawBlock(DataBlock reuse) return reuse; } + /// + /// Evaluates if there is data left in the stream. + /// + /// True if there is data left in the stream, otherwise false. private bool DataLeft() { long currentPosition = _stream.Position; @@ -484,6 +566,17 @@ private bool DataLeft() return true; } + /// + /// Determines whether [has next block]. + /// + /// + /// true if [has next block]; otherwise, false. + /// + /// + /// Block size invalid or too large for this implementation: " + _blockSize + /// or + /// Error ascertaining if data has next block: {0}. + /// private bool HasNextBlock() { try @@ -508,14 +601,14 @@ private bool HasNextBlock() { _blockRemaining = _decoder.ReadLong(); // read block count } - catch(AvroException) + catch (AvroException) { return false; } } _blockSize = _decoder.ReadLong(); // read block size - if (_blockSize > System.Int32.MaxValue || _blockSize < 0) + if (_blockSize > int.MaxValue || _blockSize < 0) { throw new AvroRuntimeException("Block size invalid or too large for this " + "implementation: " + _blockSize); @@ -531,27 +624,37 @@ private bool HasNextBlock() } /// - /// Encapsulates a block of data read by the . + /// Encapsulates a block of data read by the . /// + /// private class DataBlock { /// - /// Raw bytes within this block. + /// Gets or sets raw bytes within this block. /// - public byte[] Data { get; set; } + /// + /// The data. + /// + public byte[] Data { get; set; } /// - /// Number of entries in this block. + /// Gets or sets number of entries in this block. /// + /// + /// The number of entries. + /// public long NumberOfEntries { get; set; } /// - /// Size of this block in bytes. + /// Gets or sets size of this block in bytes. /// + /// + /// The size of the block. + /// public long BlockSize { get; set; } /// - /// Initializes a new instance of the class. + /// Initializes a new instance of the class. /// /// Number of entries in this block. /// Size of this block in bytes. @@ -562,6 +665,10 @@ public DataBlock(long numberOfEntries, long blockSize) Data = new byte[blockSize]; } + /// + /// Gets the data as stream. + /// + /// A stream. internal Stream GetDataAsStream() { return new MemoryStream(Data); diff --git a/lang/csharp/src/apache/main/File/DataFileWriter.cs b/lang/csharp/src/apache/main/File/DataFileWriter.cs index 2a1dd7125de..82ac3a9da8b 100644 --- a/lang/csharp/src/apache/main/File/DataFileWriter.cs +++ b/lang/csharp/src/apache/main/File/DataFileWriter.cs @@ -27,11 +27,12 @@ namespace Avro.File /// /// Stores in a file a sequence of data conforming to a schema. The schema is stored in the file /// with the data. Each datum in a file is of the same schema. Data is written with a - /// . Data is grouped into blocks. A synchronization marker is + /// . Data is grouped into blocks. A synchronization marker is /// written between blocks, so that files may be split. Blocks may be compressed. Extensible /// metadata is stored at the end of the file. Files may be appended to. /// /// Type of datum to write to the file. + /// public class DataFileWriter : IFileWriter { private Schema _schema; @@ -56,7 +57,9 @@ public class DataFileWriter : IFileWriter /// /// Datum writer to use. /// Path to the file. - /// A new file writer. + /// + /// A new file writer. + /// public static IFileWriter OpenWriter(DatumWriter writer, string path) { return OpenWriter(writer, new FileStream(path, FileMode.Create), Codec.CreateCodec(Codec.Type.Null)); @@ -68,7 +71,9 @@ public static IFileWriter OpenWriter(DatumWriter writer, string path) /// /// Datum writer to use. /// Stream to write to. - /// A new file writer. + /// + /// A new file writer. + /// public static IFileWriter OpenWriter(DatumWriter writer, Stream outStream) { return OpenWriter(writer, outStream, Codec.CreateCodec(Codec.Type.Null)); @@ -81,7 +86,9 @@ public static IFileWriter OpenWriter(DatumWriter writer, Stream outStream) /// Datum writer to use. /// Stream to write to. /// Leave the stream open after disposing the object - /// A new file writer. + /// + /// A new file writer. + /// public static IFileWriter OpenWriter(DatumWriter writer, Stream outStream, bool leaveOpen) { return OpenWriter(writer, outStream, Codec.CreateCodec(Codec.Type.Null), leaveOpen); @@ -94,7 +101,9 @@ public static IFileWriter OpenWriter(DatumWriter writer, Stream outStream, /// Datum writer to use. /// Path to the file. /// Codec to use when writing. - /// A new file writer. + /// + /// A new file writer. + /// public static IFileWriter OpenWriter(DatumWriter writer, string path, Codec codec) { return OpenWriter(writer, new FileStream(path, FileMode.Create), codec); @@ -107,7 +116,9 @@ public static IFileWriter OpenWriter(DatumWriter writer, string path, Code /// Datum writer to use. /// Stream to write to. /// Codec to use when writing. - /// A new file writer. + /// + /// A new file writer. + /// public static IFileWriter OpenWriter(DatumWriter writer, Stream outStream, Codec codec) { return new DataFileWriter(writer).Create(writer.Schema, outStream, codec, false); @@ -121,7 +132,9 @@ public static IFileWriter OpenWriter(DatumWriter writer, Stream outStream, /// Stream to write to. /// Codec to use when writing. /// Leave the stream open after disposing the object - /// A new file writer. + /// + /// A new file writer. + /// public static IFileWriter OpenWriter(DatumWriter writer, Stream outStream, Codec codec, bool leaveOpen) { return new DataFileWriter(writer).Create(writer.Schema, outStream, codec, leaveOpen); @@ -132,7 +145,9 @@ public static IFileWriter OpenWriter(DatumWriter writer, Stream outStream, /// /// Datum writer to use. /// Path to the file. - /// A new file writer. + /// + /// A new file writer. + /// public static IFileWriter OpenAppendWriter(DatumWriter writer, string path) { return new DataFileWriter(writer).AppendTo(path); @@ -145,7 +160,14 @@ public static IFileWriter OpenAppendWriter(DatumWriter writer, string path /// Datum writer to use. /// reading the existing file. /// stream to write to, positioned at the end of the existing file. - /// A new file writer. + /// + /// A new file writer. + /// + /// + /// {nameof(inStream)} must have Read access + /// or + /// {nameof(outStream)} must have Write access + /// public static IFileWriter OpenAppendWriter(DatumWriter writer, Stream inStream, Stream outStream) { if (!inStream.CanRead) @@ -161,6 +183,10 @@ public static IFileWriter OpenAppendWriter(DatumWriter writer, Stream inSt return new DataFileWriter(writer).AppendTo(inStream, outStream); } + /// + /// Initializes a new instance of the class. + /// + /// The writer. private DataFileWriter(DatumWriter writer) { _writer = writer; @@ -240,6 +266,11 @@ public void Append(T datum) WriteIfBlockFull(); } + /// + /// Appends to file. + /// + /// The path. + /// a file writer private IFileWriter AppendTo(string path) { using (var inStream = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.ReadWrite)) @@ -252,6 +283,12 @@ private IFileWriter AppendTo(string path) // of this writer. } + /// + /// Appends to stream. + /// + /// The in stream. + /// The out stream. + /// private IFileWriter AppendTo(Stream inStream, Stream outStream) { using (var dataFileReader = DataFileReader.OpenReader(inStream)) @@ -281,6 +318,9 @@ private IFileWriter AppendTo(Stream inStream, Stream outStream) return this; } + /// + /// Ensures the header. + /// private void EnsureHeader() { if (!_headerWritten) @@ -304,6 +344,9 @@ public long Sync() return _stream.Position; } + /// + /// Synchronizes the internal. + /// private void SyncInternal() { AssertOpen(); @@ -323,6 +366,9 @@ public void Close() _isOpen = false; } + /// + /// Writes the header. + /// private void WriteHeader() { _encoder.WriteFixed(DataFileConstants.Magic); @@ -330,6 +376,9 @@ private void WriteHeader() WriteSyncData(); } + /// + /// Initializes this instance. + /// private void Init() { _blockCount = 0; @@ -344,6 +393,10 @@ private void Init() _isOpen = true; } + /// + /// Asserts the open. + /// + /// Cannot complete operation: avro file/stream not open private void AssertOpen() { if (!_isOpen) throw new AvroRuntimeException("Cannot complete operation: avro file/stream not open"); @@ -362,6 +415,9 @@ private IFileWriter Create(Schema schema, Stream outStream, Codec codec, bool return this; } + /// + /// Writes the meta data. + /// private void WriteMetaData() { // Add sync, code & schema to metadata @@ -382,17 +438,29 @@ private void WriteMetaData() _encoder.WriteMapEnd(); } + /// + /// Writes if block full. + /// private void WriteIfBlockFull() { if (BufferInUse() >= _syncInterval) WriteBlock(); } + /// + /// Buffers the in use. + /// + /// + /// Position of block stream + /// private long BufferInUse() { return _blockStream.Position; } + /// + /// Writes the block. + /// private void WriteBlock() { if (_blockCount > 0) @@ -413,11 +481,17 @@ private void WriteBlock() } } + /// + /// Writes the synchronize data. + /// private void WriteSyncData() { _encoder.WriteFixed(_syncData); } + /// + /// Generates the synchronize data. + /// private void GenerateSyncData() { _syncData = new byte[16]; @@ -426,11 +500,21 @@ private void GenerateSyncData() random.NextBytes(_syncData); } + /// + /// Sets the meta internal. + /// + /// The key. + /// The value. private void SetMetaInternal(string key, byte[] value) { _metaData.Add(key, value); } + /// + /// Gets the byte value. + /// + /// The value. + /// byte array of string value private byte[] GetByteValue(string value) { return System.Text.Encoding.UTF8.GetBytes(value); diff --git a/lang/csharp/src/apache/main/File/DeflateCodec.cs b/lang/csharp/src/apache/main/File/DeflateCodec.cs index 1a4d9a6cbc3..0ce37adb092 100644 --- a/lang/csharp/src/apache/main/File/DeflateCodec.cs +++ b/lang/csharp/src/apache/main/File/DeflateCodec.cs @@ -23,7 +23,8 @@ namespace Avro.File /// /// Implements deflate compression and decompression. /// - /// + /// + /// public class DeflateCodec : Codec { /// @@ -37,6 +38,7 @@ public override byte[] Compress(byte[] uncompressedData) { Compress.Write(uncompressedData, 0, uncompressedData.Length); } + return outStream.ToArray(); } @@ -54,27 +56,16 @@ public override void Compress(MemoryStream inputStream, MemoryStream outputStrea } /// - public override byte[] Decompress(byte[] compressedData) - { - MemoryStream inStream = new MemoryStream(compressedData); - MemoryStream outStream = new MemoryStream(); - - using (DeflateStream Decompress = - new DeflateStream(inStream, - CompressionMode.Decompress)) - { - CopyTo(Decompress, outStream); - } - return outStream.ToArray(); - } - - private static void CopyTo(Stream from, Stream to) + public override byte[] Decompress(byte[] compressedData, int length) { - byte[] buffer = new byte[4096]; - int read; - while((read = from.Read(buffer, 0, buffer.Length)) != 0) + using (MemoryStream inStream = new MemoryStream(compressedData, 0, length)) + using (MemoryStream outStream = new MemoryStream()) { - to.Write(buffer, 0, read); + using (DeflateStream decompress = new DeflateStream(inStream, CompressionMode.Decompress)) + { + decompress.CopyTo(outStream); + } + return outStream.ToArray(); } } @@ -87,9 +78,7 @@ public override string GetName() /// public override bool Equals(object other) { - if (this == other) - return true; - return this.GetType().Name == other.GetType().Name; + return this == other || GetType().Name == other.GetType().Name; } /// diff --git a/lang/csharp/src/apache/main/File/Header.cs b/lang/csharp/src/apache/main/File/Header.cs index 1ad22168ead..5cdcfd3717a 100644 --- a/lang/csharp/src/apache/main/File/Header.cs +++ b/lang/csharp/src/apache/main/File/Header.cs @@ -25,22 +25,33 @@ namespace Avro.File public class Header { /// - /// Metadata in this header. + /// Gets the metadata in this header. /// + /// + /// The metadata. + /// public IDictionary MetaData { get; } + /// - /// Sync token. + /// Gets the synchronize token. /// + /// + /// The synchronize token. + /// public byte[] SyncData { get; } + /// - /// Avro schema. + /// Gets or sets the schema. /// + /// + /// The schema. + /// public Schema Schema { get; set; } /// - /// Initializes a new instance of the class. + /// Initializes a new instance of the class. /// public Header() { diff --git a/lang/csharp/src/apache/main/File/IFileReader.cs b/lang/csharp/src/apache/main/File/IFileReader.cs index a4defd2a1e3..c482f7752da 100644 --- a/lang/csharp/src/apache/main/File/IFileReader.cs +++ b/lang/csharp/src/apache/main/File/IFileReader.cs @@ -101,7 +101,7 @@ public interface IFileReader : IDisposable /// /// Position to test. /// - /// True if pasth the next synchronization point after , false + /// True if path the next synchronization point after , false /// otherwise. /// bool PastSync(long position); diff --git a/lang/csharp/src/apache/main/File/NullCodec.cs b/lang/csharp/src/apache/main/File/NullCodec.cs index 12559411315..295d33ae3d7 100644 --- a/lang/csharp/src/apache/main/File/NullCodec.cs +++ b/lang/csharp/src/apache/main/File/NullCodec.cs @@ -27,7 +27,7 @@ namespace Avro.File public class NullCodec : Codec { /// - /// Initializes a new instance of the class. + /// Initializes a new instance of the class. /// public NullCodec() { } @@ -45,7 +45,7 @@ public override void Compress(MemoryStream inputStream, MemoryStream outputStrea } /// - public override byte[] Decompress(byte[] compressedData) + public override byte[] Decompress(byte[] compressedData, int length) { return compressedData; } @@ -59,9 +59,7 @@ public override string GetName() /// public override bool Equals(object other) { - if (this == other) - return true; - return this.GetType().Name == other.GetType().Name; + return this == other || GetType().Name == other.GetType().Name; } /// diff --git a/lang/csharp/src/apache/main/Generic/DatumWriter.cs b/lang/csharp/src/apache/main/Generic/DatumWriter.cs index be6836537a2..0b732d9d760 100644 --- a/lang/csharp/src/apache/main/Generic/DatumWriter.cs +++ b/lang/csharp/src/apache/main/Generic/DatumWriter.cs @@ -15,6 +15,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + using Avro.IO; namespace Avro.Generic @@ -23,8 +24,6 @@ namespace Avro.Generic /// Defines the interface for an object that writes data of a schema. /// /// Type of the in-memory data representation. - [System.Diagnostics.CodeAnalysis.SuppressMessage("Naming", - "CA1715:Identifiers should have correct prefix", Justification = "Maintain public API")] public interface DatumWriter { /// diff --git a/lang/csharp/src/apache/main/Generic/GenericDatumReader.cs b/lang/csharp/src/apache/main/Generic/GenericDatumReader.cs index 76a95b94ead..1ec126b3aa4 100644 --- a/lang/csharp/src/apache/main/Generic/GenericDatumReader.cs +++ b/lang/csharp/src/apache/main/Generic/GenericDatumReader.cs @@ -98,16 +98,16 @@ public GenericEnumAccess(EnumSchema schema) public object CreateEnum(object reuse, int ordinal) { - if (reuse is GenericEnum) + if (reuse is GenericEnum ge) { - var ge = (GenericEnum) reuse; - if (ge.Schema.Equals(this.schema)) + if (ge.Schema.Equals(schema)) { - ge.Value = this.schema[ordinal]; + ge.Value = schema[ordinal]; return ge; } } - return new GenericEnum(this.schema, this.schema[ordinal]); + + return new GenericEnum(schema, schema[ordinal]); } } @@ -204,12 +204,12 @@ class GenericMapAccess : MapAccess { public object Create(object reuse) { - if (reuse is IDictionary) + if (reuse is IDictionary result) { - var result = (IDictionary)reuse; result.Clear(); return result; } + return new Dictionary(); } diff --git a/lang/csharp/src/apache/main/Generic/GenericEnum.cs b/lang/csharp/src/apache/main/Generic/GenericEnum.cs index 168b5552e77..27bba94ce15 100644 --- a/lang/csharp/src/apache/main/Generic/GenericEnum.cs +++ b/lang/csharp/src/apache/main/Generic/GenericEnum.cs @@ -28,29 +28,30 @@ public class GenericEnum /// public EnumSchema Schema { get; private set; } - private string value; + private string _value; /// /// Value of the enum. /// - public string Value { - get { return value; } + public string Value + { + get { return _value; } set { if (!Schema.Contains(value)) { if (!string.IsNullOrEmpty(Schema.Default)) { - this.value = Schema.Default; + _value = Schema.Default; } else { - throw new AvroException("Unknown value for enum: " + value + "(" + Schema + ")"); + throw new AvroException($"Unknown value for enum: {value}({Schema})"); } } else { - this.value = value; + _value = value; } } } @@ -62,31 +63,33 @@ public string Value { /// Value of the enum. public GenericEnum(EnumSchema schema, string value) { - this.Schema = schema; - this.Value = value; + Schema = schema; + Value = value; } /// public override bool Equals(object obj) { - if (obj == this) return true; - return (obj != null && obj is GenericEnum) - ? Value.Equals((obj as GenericEnum).Value, System.StringComparison.Ordinal) - : false; + if (obj == this) + { + return true; + } + + return obj != null + && obj.GetType() == typeof(GenericEnum) + && Value.Equals(((GenericEnum)obj).Value, System.StringComparison.Ordinal); } /// public override int GetHashCode() { -#pragma warning disable CA1307 // Specify StringComparison return 17 * Value.GetHashCode(); -#pragma warning restore CA1307 // Specify StringComparison } /// public override string ToString() { - return "Schema: " + Schema + ", value: " + Value; + return $"Schema: {Schema}, value: {Value}"; } } } diff --git a/lang/csharp/src/apache/main/Generic/GenericReader.cs b/lang/csharp/src/apache/main/Generic/GenericReader.cs index f42e572d010..0b945b9ff5e 100644 --- a/lang/csharp/src/apache/main/Generic/GenericReader.cs +++ b/lang/csharp/src/apache/main/Generic/GenericReader.cs @@ -19,6 +19,7 @@ using System.Collections.Generic; using Avro.IO; using System.IO; +using System.Linq; namespace Avro.Generic { @@ -75,7 +76,7 @@ public GenericReader(DefaultReader reader) /// Reads an object off the stream. /// /// - /// If not null, the implemenation will try to use to return the object + /// If not null, the implementation will try to use to return the object /// /// Decoder to read from. /// Object we read from the decoder. @@ -88,7 +89,7 @@ public T Read(T reuse, Decoder d) /// /// The default implementation for the generic reader. It constructs new .NET objects for avro objects on the /// stream and returns the .NET object. Users can directly use this class or, if they want to customize the - /// object types for differnt Avro schema types, can derive from this class. There are enough hooks in this + /// object types for different Avro schema types, can derive from this class. There are enough hooks in this /// class to allow customization. /// /// @@ -113,7 +114,7 @@ public class DefaultReader /// /// Constructs the default reader for the given schemas using the DefaultReader. If the /// reader's and writer's schemas are different this class performs the resolution. - /// This default implemenation maps Avro types to .NET types as follows: + /// This default implementation maps Avro types to .NET types as follows: /// /// The schema used while generating the data /// The schema desired by the reader @@ -121,6 +122,8 @@ public DefaultReader(Schema writerSchema, Schema readerSchema) { this.ReaderSchema = readerSchema; this.WriterSchema = writerSchema; + if (!ReaderSchema.CanRead(WriterSchema)) + throw new AvroException("Schema mismatch. Reader: " + ReaderSchema + ", writer: " + WriterSchema); } /// @@ -129,14 +132,11 @@ public DefaultReader(Schema writerSchema, Schema readerSchema) /// The type of object to read. A single schema typically returns an object of a single .NET class. /// The only exception is UnionSchema, which can return a object of different types based on the branch selected. /// - /// If not null, the implemenation will try to use to return the object + /// If not null, the implementation will try to use to return the object /// The decoder for deserialization /// Object read from the decoder. public T Read(T reuse, Decoder decoder) { - if (!ReaderSchema.CanRead(WriterSchema)) - throw new AvroException("Schema mismatch. Reader: " + ReaderSchema + ", writer: " + WriterSchema); - return (T)Read(reuse, WriterSchema, ReaderSchema, decoder); } @@ -144,7 +144,7 @@ public T Read(T reuse, Decoder decoder) /// Reads an object off the stream. /// /// - /// If not null, the implemenation will try to use to return the object. + /// If not null, the implementation will try to use to return the object. /// /// Schema used to write the data. /// Schema to use when reading the data. @@ -291,21 +291,21 @@ protected virtual object ReadRecord(object reuse, RecordSchema writerSchema, Sch } } - var defaultStream = new MemoryStream(); - var defaultEncoder = new BinaryEncoder(defaultStream); - var defaultDecoder = new BinaryDecoder(defaultStream); - foreach (Field rf in rs) + using (var defaultStream = new MemoryStream()) { - if (writerSchema.Contains(rf.Name)) continue; - - defaultStream.Position = 0; // reset for writing - Resolver.EncodeDefaultValue(defaultEncoder, rf.Schema, rf.DefaultValue); - defaultStream.Flush(); - defaultStream.Position = 0; // reset for reading - - object obj = null; - TryGetField(rec, rf.Name, rf.Pos, out obj); - AddField(rec, rf.Name, rf.Pos, Read(obj, rf.Schema, rf.Schema, defaultDecoder)); + var defaultEncoder = new BinaryEncoder(defaultStream); + var defaultDecoder = new BinaryDecoder(defaultStream); + foreach (Field rf in rs.Fields.Where(rf => !writerSchema.Contains(rf.Name))) + { + defaultStream.Position = 0; // reset for writing + Resolver.EncodeDefaultValue(defaultEncoder, rf.Schema, rf.DefaultValue); + defaultStream.Flush(); + defaultStream.Position = 0; // reset for reading + + object obj = null; + TryGetField(rec, rf.Name, rf.Pos, out obj); + AddField(rec, rf.Name, rf.Pos, Read(obj, rf.Schema, rf.Schema, defaultDecoder)); + } } return rec; @@ -357,7 +357,7 @@ protected virtual void AddField(object record, string fieldName, int fieldPos, o /// /// Deserializes a enum. Uses CreateEnum to construct the new enum object. /// - /// If appropirate, uses this instead of creating a new enum object. + /// If appropriate, uses this instead of creating a new enum object. /// The schema the writer used while writing the enum /// The schema the reader is using /// The decoder for deserialization. @@ -373,7 +373,7 @@ protected virtual object ReadEnum(object reuse, EnumSchema writerSchema, Schema /// If appropriate, use this enum object instead of a new one. /// The enum schema used by the reader. /// The symbol that needs to be used. - /// The default implemenation returns a GenericEnum. + /// The default implementation returns a GenericEnum. protected virtual object CreateEnum(object reuse, EnumSchema es, string symbol) { if (reuse is GenericEnum) @@ -431,7 +431,7 @@ protected virtual object CreateArray(object reuse, ArraySchema rs) /// /// Returns the size of the given array object. /// - /// Array object whose size is required. This is guaranteed to be somthing returned by + /// Array object whose size is required. This is guaranteed to be something returned by /// a previous call to CreateArray(). /// The size of the array protected virtual int GetArraySize(object array) @@ -442,7 +442,7 @@ protected virtual int GetArraySize(object array) /// /// Resizes the array to the new value. /// - /// Array object whose size is required. This is guaranteed to be somthing returned by + /// Array object whose size is required. This is guaranteed to be something returned by /// a previous call to CreateArray(). /// The new size. protected virtual void ResizeArray(ref object array, int n) @@ -455,7 +455,7 @@ protected virtual void ResizeArray(ref object array, int n) /// /// Assigns a new value to the object at the given index /// - /// Array object whose size is required. This is guaranteed to be somthing returned by + /// Array object whose size is required. This is guaranteed to be something returned by /// a previous call to CreateArray(). /// The index to reassign to. /// The value to assign. @@ -468,7 +468,7 @@ protected virtual void SetArrayElement(object array, int index, object value) /// /// Returns the element at the given index. /// - /// Array object whose size is required. This is guaranteed to be somthing returned by + /// Array object whose size is required. This is guaranteed to be something returned by /// a previous call to CreateArray(). /// The index to look into. /// The object the given index. Null if no object has been assigned to that index. @@ -478,7 +478,7 @@ protected virtual object GetArrayElement(object array, int index) } /// - /// Deserialized an avro map. The default implemenation creats a new map using CreateMap() and then + /// Deserialized an avro map. The default implementation creates a new map using CreateMap() and then /// adds elements to the map using AddMapEntry(). /// /// If appropriate, use this instead of creating a new map object. @@ -503,7 +503,7 @@ protected virtual object ReadMap(object reuse, MapSchema writerSchema, Schema re /// /// Used by the default implementation of ReadMap() to create a fresh map object. The default - /// implementaion of this method returns a IDictionary<string, map>. + /// implementation of this method returns a IDictionary<string, map>. /// /// If appropriate, use this map object instead of creating a new one. /// Map schema to use when creating the object. @@ -531,7 +531,7 @@ protected virtual void AddMapEntry(object map, string key, object value) } /// - /// Deserialized an object based on the writer's uninon schema. + /// Deserialized an object based on the writer's union schema. /// /// If appropriate, uses this object instead of creating a new one. /// The UnionSchema that the writer used. @@ -574,10 +574,10 @@ protected virtual object ReadLogical(object reuse, LogicalSchema writerSchema, S /// /// If appropriate, uses this object instead of creating a new one. /// The FixedSchema the writer used during serialization. - /// The schema that the readr uses. Must be a FixedSchema with the same + /// The schema that the reader uses. Must be a FixedSchema with the same /// size as the writerSchema. /// The decoder for deserialization. - /// The deserilized object. + /// The deserialized object. protected virtual object ReadFixed(object reuse, FixedSchema writerSchema, Schema readerSchema, Decoder d) { FixedSchema rs = (FixedSchema)readerSchema; diff --git a/lang/csharp/src/apache/main/Generic/GenericWriter.cs b/lang/csharp/src/apache/main/Generic/GenericWriter.cs index 79ff3b20867..b29cb68bfbc 100644 --- a/lang/csharp/src/apache/main/Generic/GenericWriter.cs +++ b/lang/csharp/src/apache/main/Generic/GenericWriter.cs @@ -75,7 +75,7 @@ public void Write(T value, Encoder encoder) /// A General purpose writer for serializing objects into a Stream using /// Avro. This class implements a default way of serializing objects. But /// one can derive a class from this and override different methods to - /// acheive results that are different from the default implementation. + /// achieve results that are different from the default implementation. /// public class DefaultWriter { @@ -177,6 +177,7 @@ public virtual void Write(Schema schema, object value, Encoder encoder) protected virtual void WriteNull(object value, Encoder encoder) { if (value != null) throw TypeMismatch(value, "null", "null"); + encoder.WriteNull(); } /// @@ -246,7 +247,7 @@ protected virtual object GetField(object value, string fieldName, int fieldPos) } /// - /// Serializes an enumeration. The default implementation expectes the value to be string whose + /// Serializes an enumeration. The default implementation expects the value to be string whose /// value is the name of the enumeration. /// /// The EnumSchema for serialization @@ -293,8 +294,8 @@ protected virtual void EnsureArrayObject(object value) /// /// Returns the length of an array. The default implementation requires the object - /// to be an array of objects and returns its length. The defaul implementation - /// gurantees that EnsureArrayObject() has been called on the value before this + /// to be an array of objects and returns its length. The default implementation + /// guarantees that EnsureArrayObject() has been called on the value before this /// function is called. /// /// The object whose array length is required @@ -306,8 +307,8 @@ protected virtual long GetArrayLength(object value) /// /// Returns the element at the given index from the given array object. The default implementation - /// requires that the value is an object array and returns the element in that array. The defaul implementation - /// gurantees that EnsureArrayObject() has been called on the value before this + /// requires that the value is an object array and returns the element in that array. The default implementation + /// guarantees that EnsureArrayObject() has been called on the value before this /// function is called. /// /// The array object @@ -351,7 +352,7 @@ protected virtual void EnsureMapObject(object value) } /// - /// Returns the size of the map object. The default implementation gurantees that EnsureMapObject has been + /// Returns the size of the map object. The default implementation guarantees that EnsureMapObject has been /// successfully called with the given value. The default implementation requires the value /// to be an IDictionary<string, object> and returns the number of elements in it. /// @@ -364,7 +365,7 @@ protected virtual long GetMapSize(object value) /// /// Returns the contents of the given map object. The default implementation guarantees that EnsureMapObject - /// has been called with the given value. The defualt implementation of this method requires that + /// has been called with the given value. The default implementation of this method requires that /// the value is an IDictionary<string, object> and returns its contents. /// /// The map object whose size is desired @@ -437,7 +438,7 @@ protected virtual void WriteFixed(FixedSchema es, object value, Encoder encoder) /// /// Creates a new and uses the provided parameters to build an - /// exception message indicathing there was a type mismatch. + /// exception message indicating there was a type mismatch. /// /// Object whose type does not the expected type /// Schema that we tried to write against diff --git a/lang/csharp/src/apache/main/Generic/PreresolvingDatumReader.cs b/lang/csharp/src/apache/main/Generic/PreresolvingDatumReader.cs index a4b4aa832fb..53270faecdb 100644 --- a/lang/csharp/src/apache/main/Generic/PreresolvingDatumReader.cs +++ b/lang/csharp/src/apache/main/Generic/PreresolvingDatumReader.cs @@ -198,7 +198,7 @@ private ReadItem ResolveEnum(EnumSchema writerSchema, EnumSchema readerSchema) var readerDefaultOrdinal = null != readerSchema.Default ? readerSchema.Ordinal(readerSchema.Default) : -1; foreach (var symbol in writerSchema.Symbols) - { + { var writerOrdinal = writerSchema.Ordinal(symbol); if (readerSchema.Contains(symbol)) { @@ -274,27 +274,29 @@ private ReadItem ResolveRecord(RecordSchema writerSchema, RecordSchema readerSch { if (writerSchema.Contains(rf.Name)) continue; - var defaultStream = new MemoryStream(); - var defaultEncoder = new BinaryEncoder(defaultStream); + using (var defaultStream = new MemoryStream()) + { + var defaultEncoder = new BinaryEncoder(defaultStream); - defaultStream.Position = 0; // reset for writing - Resolver.EncodeDefaultValue(defaultEncoder, rf.Schema, rf.DefaultValue); - defaultStream.Flush(); - var defaultBytes = defaultStream.ToArray(); + defaultStream.Position = 0; // reset for writing + Resolver.EncodeDefaultValue(defaultEncoder, rf.Schema, rf.DefaultValue); + defaultStream.Flush(); + var defaultBytes = defaultStream.ToArray(); - var readItem = ResolveReader(rf.Schema, rf.Schema); + var readItem = ResolveReader(rf.Schema, rf.Schema); - var rfInstance = rf; - if(IsReusable(rf.Schema.Tag)) - { - readSteps.Add((rec, d) => recordAccess.AddField(rec, rfInstance.Name, rfInstance.Pos, - readItem(recordAccess.GetField(rec, rfInstance.Name, rfInstance.Pos), - new BinaryDecoder(new MemoryStream( defaultBytes))))); - } - else - { - readSteps.Add((rec, d) => recordAccess.AddField(rec, rfInstance.Name, rfInstance.Pos, - readItem(null, new BinaryDecoder(new MemoryStream(defaultBytes))))); + var rfInstance = rf; + if (IsReusable(rf.Schema.Tag)) + { + readSteps.Add((rec, d) => recordAccess.AddField(rec, rfInstance.Name, rfInstance.Pos, + readItem(recordAccess.GetField(rec, rfInstance.Name, rfInstance.Pos), + new BinaryDecoder(new MemoryStream(defaultBytes))))); + } + else + { + readSteps.Add((rec, d) => recordAccess.AddField(rec, rfInstance.Name, rfInstance.Pos, + readItem(null, new BinaryDecoder(new MemoryStream(defaultBytes))))); + } } } @@ -319,15 +321,14 @@ private ReadItem ResolveUnion(UnionSchema writerSchema, Schema readerSchema) for (int i = 0; i < writerSchema.Count; i++) { - var writerBranch = writerSchema[i]; + Schema writerBranch = writerSchema[i]; - if (readerSchema is UnionSchema) + if (readerSchema is UnionSchema unionReader) { - var unionReader = (UnionSchema) readerSchema; - var readerBranch = unionReader.MatchingBranch(writerBranch); + int readerBranch = unionReader.MatchingBranch(writerBranch); if (readerBranch == -1) { - lookup[i] = (r, d) => { throw new AvroException( "No matching schema for " + writerBranch + " in " + unionReader ); }; + lookup[i] = (r, d) => { throw new AvroException("No matching schema for " + writerBranch + " in " + unionReader); }; } else { @@ -338,7 +339,7 @@ private ReadItem ResolveUnion(UnionSchema writerSchema, Schema readerSchema) { if (!readerSchema.CanRead(writerBranch)) { - lookup[i] = (r, d) => { throw new AvroException( "Schema mismatch Reader: " + ReaderSchema + ", writer: " + WriterSchema ); }; + lookup[i] = (r, d) => { throw new AvroException("Schema mismatch Reader: " + ReaderSchema + ", writer: " + WriterSchema); }; } else { @@ -619,7 +620,7 @@ protected interface ArrayAccess /// Hint that the array should be able to handle at least targetSize elements. The array /// is not required to be resized /// - /// Array object who needs to support targetSize elements. This is guaranteed to be somthing returned by + /// Array object who needs to support targetSize elements. This is guaranteed to be something returned by /// a previous call to CreateArray(). /// The new size. void EnsureSize(ref object array, int targetSize); @@ -627,7 +628,7 @@ protected interface ArrayAccess /// /// Resizes the array to the new value. /// - /// Array object whose size is required. This is guaranteed to be somthing returned by + /// Array object whose size is required. This is guaranteed to be something returned by /// a previous call to CreateArray(). /// The new size. void Resize(ref object array, int targetSize); diff --git a/lang/csharp/src/apache/main/Generic/PreresolvingDatumWriter.cs b/lang/csharp/src/apache/main/Generic/PreresolvingDatumWriter.cs index a90ac34349b..dd21f62ed80 100644 --- a/lang/csharp/src/apache/main/Generic/PreresolvingDatumWriter.cs +++ b/lang/csharp/src/apache/main/Generic/PreresolvingDatumWriter.cs @@ -114,6 +114,7 @@ private WriteItem ResolveWriter( Schema schema ) protected void WriteNull(object value, Encoder encoder) { if (value != null) throw TypeMismatch(value, "null", "null"); + encoder.WriteNull(); } /// @@ -332,7 +333,7 @@ protected int ResolveUnion(UnionSchema us, Schema[] branchSchemas, object obj) /// /// Creates a new and uses the provided parameters to build an - /// exception message indicathing there was a type mismatch. + /// exception message indicating there was a type mismatch. /// /// Object whose type does not the expected type /// Schema that we tried to write against @@ -383,8 +384,8 @@ protected interface ArrayAccess /// /// Returns the length of an array. The default implementation requires the object - /// to be an array of objects and returns its length. The defaul implementation - /// gurantees that EnsureArrayObject() has been called on the value before this + /// to be an array of objects and returns its length. The default implementation + /// guarantees that EnsureArrayObject() has been called on the value before this /// function is called. /// /// The object whose array length is required @@ -416,7 +417,7 @@ protected interface MapAccess void EnsureMapObject(object value); /// - /// Returns the size of the map object. The default implementation gurantees that EnsureMapObject has been + /// Returns the size of the map object. The default implementation guarantees that EnsureMapObject has been /// successfully called with the given value. The default implementation requires the value /// to be an IDictionary<string, object> and returns the number of elements in it. /// diff --git a/lang/csharp/src/apache/main/IO/BinaryDecoder.netstandard2.0.cs b/lang/csharp/src/apache/main/IO/BinaryDecoder.netstandard2.0.cs index 91afeb57e8e..a37d6fa6c84 100644 --- a/lang/csharp/src/apache/main/IO/BinaryDecoder.netstandard2.0.cs +++ b/lang/csharp/src/apache/main/IO/BinaryDecoder.netstandard2.0.cs @@ -16,6 +16,8 @@ * limitations under the License. */ using System; +using System.IO; +using System.Text; namespace Avro.IO { @@ -24,10 +26,15 @@ namespace Avro.IO /// public partial class BinaryDecoder { + /// + /// It is hard to find documentation about the real maximum array length in .NET Framework 4.6.1, but this seems to work :-/ + /// + private const int MaxDotNetArrayLength = 0x3FFFFFFF; + /// /// A float is written as 4 bytes. /// The float is converted into a 32-bit integer using a method equivalent to - /// Java's floatToIntBits and then encoded in little-endian format. + /// Java's floatToRawIntBits and then encoded in little-endian format. /// /// public float ReadFloat() @@ -49,7 +56,7 @@ public float ReadFloat() /// /// A double is written as 8 bytes. /// The double is converted into a 64-bit integer using a method equivalent to - /// Java's doubleToLongBits and then encoded in little-endian format. + /// Java's doubleToRawLongBits and then encoded in little-endian format. /// /// A double value. public double ReadDouble() @@ -72,10 +79,28 @@ public double ReadDouble() public string ReadString() { int length = ReadInt(); - byte[] buffer = new byte[length]; - //TODO: Fix this because it's lame; - ReadFixed(buffer); - return System.Text.Encoding.UTF8.GetString(buffer); + + if (length < 0) + { + throw new AvroException("Can not deserialize a string with negative length!"); + } + + if (length > MaxDotNetArrayLength) + { + throw new AvroException("String length is not supported!"); + } + + using (var binaryReader = new BinaryReader(stream, Encoding.UTF8, true)) + { + var bytes = binaryReader.ReadBytes(length); + + if (bytes.Length != length) + { + throw new AvroException("Could not read as many bytes from stream as expected!"); + } + + return Encoding.UTF8.GetString(bytes); + } } private void Read(byte[] buffer, int start, int len) diff --git a/lang/csharp/src/apache/main/IO/BinaryDecoder.notnetstandard2.0.cs b/lang/csharp/src/apache/main/IO/BinaryDecoder.notnetstandard2.0.cs index 17bd8415a96..c4a0dfaaf31 100644 --- a/lang/csharp/src/apache/main/IO/BinaryDecoder.notnetstandard2.0.cs +++ b/lang/csharp/src/apache/main/IO/BinaryDecoder.notnetstandard2.0.cs @@ -18,6 +18,7 @@ using System; using System.Buffers; using System.Buffers.Binary; +using System.IO; using System.Text; namespace Avro.IO @@ -28,11 +29,13 @@ namespace Avro.IO public partial class BinaryDecoder { private const int StackallocThreshold = 256; + private const int MaxFastReadLength = 4096; + private const int MaxDotNetArrayLength = 0x7FFFFFC7; /// /// A float is written as 4 bytes. /// The float is converted into a 32-bit integer using a method equivalent to - /// Java's floatToIntBits and then encoded in little-endian format. + /// Java's floatToRawIntBits and then encoded in little-endian format. /// /// public float ReadFloat() @@ -46,7 +49,7 @@ public float ReadFloat() /// /// A double is written as 8 bytes. /// The double is converted into a 64-bit integer using a method equivalent to - /// Java's doubleToLongBits and then encoded in little-endian format. + /// Java's doubleToRawLongBits and then encoded in little-endian format. /// /// A double value. public double ReadDouble() @@ -63,23 +66,54 @@ public double ReadDouble() /// String read from the stream. public string ReadString() { - byte[] bufferArray = null; - int length = ReadInt(); - Span buffer = length <= StackallocThreshold ? - stackalloc byte[length] : - (bufferArray = ArrayPool.Shared.Rent(length)).AsSpan(0, length); - - Read(buffer); - string result = Encoding.UTF8.GetString(buffer); + if (length < 0) + { + throw new AvroException("Can not deserialize a string with negative length!"); + } - if (bufferArray != null) + if (length <= MaxFastReadLength) { - ArrayPool.Shared.Return(bufferArray); + byte[] bufferArray = null; + + try + { + Span buffer = length <= StackallocThreshold ? + stackalloc byte[length] : + (bufferArray = ArrayPool.Shared.Rent(length)).AsSpan(0, length); + + Read(buffer); + + return Encoding.UTF8.GetString(buffer); + } + finally + { + if (bufferArray != null) + { + ArrayPool.Shared.Return(bufferArray); + } + } } + else + { + if (length > MaxDotNetArrayLength) + { + throw new AvroException("String length is not supported!"); + } - return result; + using (var binaryReader = new BinaryReader(stream, Encoding.UTF8, true)) + { + var bytes = binaryReader.ReadBytes(length); + + if (bytes.Length != length) + { + throw new AvroException("Could not read as many bytes from stream as expected!"); + } + + return Encoding.UTF8.GetString(bytes); + } + } } private void Read(byte[] buffer, int start, int len) diff --git a/lang/csharp/src/apache/main/IO/BinaryEncoder.cs b/lang/csharp/src/apache/main/IO/BinaryEncoder.cs index 30100bf31d6..91eb0e5553b 100644 --- a/lang/csharp/src/apache/main/IO/BinaryEncoder.cs +++ b/lang/csharp/src/apache/main/IO/BinaryEncoder.cs @@ -25,7 +25,7 @@ namespace Avro.IO /// public class BinaryEncoder : Encoder { - private readonly Stream Stream; + private readonly Stream stream; /// /// Initializes a new instance of the class without a backing @@ -42,7 +42,7 @@ public BinaryEncoder() : this(null) /// Stream to write to. public BinaryEncoder(Stream stream) { - this.Stream = stream; + this.stream = stream; } /// @@ -87,7 +87,7 @@ public void WriteLong(long value) /// /// A float is written as 4 bytes. /// The float is converted into a 32-bit integer using a method equivalent to - /// Java's floatToIntBits and then encoded in little-endian format. + /// Java's floatToRawIntBits and then encoded in little-endian format. /// /// public void WriteFloat(float value) @@ -99,7 +99,7 @@ public void WriteFloat(float value) /// ///A double is written as 8 bytes. ///The double is converted into a 64-bit integer using a method equivalent to - ///Java's doubleToLongBits and then encoded in little-endian format. + ///Java's doubleToRawLongBits and then encoded in little-endian format. /// /// public void WriteDouble(double value) @@ -203,22 +203,22 @@ public void WriteFixed(byte[] data) /// public void WriteFixed(byte[] data, int start, int len) { - Stream.Write(data, start, len); + stream.Write(data, start, len); } private void writeBytes(byte[] bytes) { - Stream.Write(bytes, 0, bytes.Length); + stream.Write(bytes, 0, bytes.Length); } private void writeBytes(byte[] bytes, int offset, int length) { - Stream.Write(bytes, offset, length); + stream.Write(bytes, offset, length); } private void writeByte(byte b) { - Stream.WriteByte(b); + stream.WriteByte(b); } /// @@ -226,7 +226,7 @@ private void writeByte(byte b) /// public void Flush() { - Stream.Flush(); + stream.Flush(); } } } diff --git a/lang/csharp/src/apache/main/IO/ByteBufferInputStream.cs b/lang/csharp/src/apache/main/IO/ByteBufferInputStream.cs index b077bfd3f18..278bc59b76e 100644 --- a/lang/csharp/src/apache/main/IO/ByteBufferInputStream.cs +++ b/lang/csharp/src/apache/main/IO/ByteBufferInputStream.cs @@ -83,7 +83,7 @@ private MemoryStream GetNextNonEmptyBuffer() /// Throws a . /// /// - /// Always thows. + /// Always throws. /// public override long Length { diff --git a/lang/csharp/src/apache/main/IO/Decoder.cs b/lang/csharp/src/apache/main/IO/Decoder.cs index 536c1e93956..1476b0e1a51 100644 --- a/lang/csharp/src/apache/main/IO/Decoder.cs +++ b/lang/csharp/src/apache/main/IO/Decoder.cs @@ -22,8 +22,6 @@ namespace Avro.IO /// Decoder is used to decode Avro data on a stream. There are methods to read the Avro types on the stream. There are also /// methods to skip items, which are usually more efficient than reading, on the stream. /// - [System.Diagnostics.CodeAnalysis.SuppressMessage("Naming", - "CA1715:Identifiers should have correct prefix", Justification = "Maintain public API")] public interface Decoder { /// @@ -102,7 +100,7 @@ public interface Decoder /// /// Starts reading the map Avro type. This, together with ReadMapNext() is used to read the /// entries from Avro map. This returns the number of entries in the initial chunk. After consuming - /// the chunk, the client should call ReadMapNext() to get the number of entriess in the next + /// the chunk, the client should call ReadMapNext() to get the number of entries in the next /// chunk. The client should repeat the procedure until there are no more entries in the array. /// for (int n = decoder.ReadMapStart(); n > 0; n = decoder.ReadMapNext()) /// { diff --git a/lang/csharp/src/apache/main/IO/Encoder.cs b/lang/csharp/src/apache/main/IO/Encoder.cs index 000a06eed8b..0c1712af430 100644 --- a/lang/csharp/src/apache/main/IO/Encoder.cs +++ b/lang/csharp/src/apache/main/IO/Encoder.cs @@ -19,11 +19,9 @@ namespace Avro.IO { /// - /// Defines the interface for a class that provies low-level support for serializing Avro + /// Defines the interface for a class that provides low-level support for serializing Avro /// values. /// - [System.Diagnostics.CodeAnalysis.SuppressMessage("Naming", - "CA1715:Identifiers should have correct prefix", Justification = "Maintain public API")] public interface Encoder { /// @@ -189,5 +187,10 @@ public interface Encoder /// Position within data where the contents start. /// Number of bytes to write. void WriteFixed(byte[] data, int start, int len); + + /// + /// Flushes the encoder. + /// + void Flush(); } } diff --git a/lang/csharp/src/apache/main/IO/JsonDecoder.cs b/lang/csharp/src/apache/main/IO/JsonDecoder.cs new file mode 100644 index 00000000000..48d726e3083 --- /dev/null +++ b/lang/csharp/src/apache/main/IO/JsonDecoder.cs @@ -0,0 +1,765 @@ +īģŋ/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Text; +using Avro.IO.Parsing; +using Newtonsoft.Json; + +namespace Avro.IO +{ + /// + /// A for Avro's JSON data encoding. + /// + /// JsonDecoder is not thread-safe. + /// + public class JsonDecoder : ParsingDecoder + { + private JsonReader reader; + private readonly Stack reorderBuffers = new Stack(); + private ReorderBuffer currentReorderBuffer; + + private class ReorderBuffer + { + public readonly IDictionary> SavedFields = + new Dictionary>(); + + public JsonReader OrigParser { get; set; } + } + + private JsonDecoder(Symbol root, Stream stream) : base(root) + { + Configure(stream); + } + + private JsonDecoder(Symbol root, string str) : base(root) + { + Configure(str); + } + + /// + /// Initializes a new instance of the class. + /// + public JsonDecoder(Schema schema, Stream stream) : this(GetSymbol(schema), stream) + { + } + + /// + /// Initializes a new instance of the class. + /// + public JsonDecoder(Schema schema, string str) : this(GetSymbol(schema), str) + { + } + + private static Symbol GetSymbol(Schema schema) + { + return (new JsonGrammarGenerator()).Generate(schema); + } + + /// + /// Reconfigures this JsonDecoder to use the InputStream provided. + /// Otherwise, this JsonDecoder will reset its state and then reconfigure its + /// input. + /// + /// The InputStream to read from. Cannot be null. + public void Configure(Stream stream) + { + Parser.Reset(); + reorderBuffers.Clear(); + currentReorderBuffer = null; + reader = new JsonTextReader(new StreamReader(stream)); + reader.Read(); + } + + /// + /// Reconfigures this JsonDecoder to use the String provided for input. + /// Otherwise, this JsonDecoder will reset its state and then reconfigure its + /// input. + /// + /// The String to read from. Cannot be null. + public void Configure(string str) + { + Parser.Reset(); + reorderBuffers.Clear(); + currentReorderBuffer = null; + reader = new JsonTextReader(new StringReader(str)); + reader.Read(); + } + + private void Advance(Symbol symbol) + { + Parser.ProcessTrailingImplicitActions(); + Parser.Advance(symbol); + } + + /// + public override void ReadNull() + { + Advance(Symbol.Null); + if (reader.TokenType == JsonToken.Null) + { + reader.Read(); + } + else + { + throw TypeError("null"); + } + } + + /// + public override bool ReadBoolean() + { + Advance(Symbol.Boolean); + if (reader.TokenType == JsonToken.Boolean) + { + bool result = Convert.ToBoolean(reader.Value); + reader.Read(); + return result; + } + else + { + throw TypeError("boolean"); + } + } + + /// + public override int ReadInt() + { + Advance(Symbol.Int); + if (reader.TokenType == JsonToken.Integer || reader.TokenType == JsonToken.Float) + { + int result = Convert.ToInt32(reader.Value); + reader.Read(); + return result; + } + else + { + throw TypeError("int"); + } + } + + /// + public override long ReadLong() + { + Advance(Symbol.Long); + if (reader.TokenType == JsonToken.Integer || reader.TokenType == JsonToken.Float) + { + long result = Convert.ToInt64(reader.Value); + reader.Read(); + return result; + } + else + { + throw TypeError("long"); + } + } + + /// + public override float ReadFloat() + { + Advance(Symbol.Float); + if (reader.TokenType == JsonToken.Integer || reader.TokenType == JsonToken.Float) + { + float result = (float)Convert.ToDouble(reader.Value); + reader.Read(); + return result; + } + else + { + throw TypeError("float"); + } + } + + /// + public override double ReadDouble() + { + Advance(Symbol.Double); + if (reader.TokenType == JsonToken.Integer || reader.TokenType == JsonToken.Float) + { + double result = Convert.ToDouble(reader.Value); + reader.Read(); + return result; + } + else + { + throw TypeError("double"); + } + } + + /// + public override string ReadString() + { + Advance(Symbol.String); + if (Parser.TopSymbol() == Symbol.MapKeyMarker) + { + Parser.Advance(Symbol.MapKeyMarker); + if (reader.TokenType != JsonToken.PropertyName) + { + throw TypeError("map-key"); + } + } + else + { + if (reader.TokenType != JsonToken.String) + { + throw TypeError("string"); + } + } + + string result = Convert.ToString(reader.Value); + reader.Read(); + return result; + } + + /// + public override void SkipString() + { + Advance(Symbol.String); + if (Parser.TopSymbol() == Symbol.MapKeyMarker) + { + Parser.Advance(Symbol.MapKeyMarker); + if (reader.TokenType != JsonToken.PropertyName) + { + throw TypeError("map-key"); + } + } + else + { + if (reader.TokenType != JsonToken.String) + { + throw TypeError("string"); + } + } + + reader.Read(); + } + + /// + public override byte[] ReadBytes() + { + Advance(Symbol.Bytes); + if (reader.TokenType == JsonToken.String) + { + byte[] result = ReadByteArray(); + reader.Read(); + return result; + } + else + { + throw TypeError("bytes"); + } + } + + private byte[] ReadByteArray() + { + Encoding iso = Encoding.GetEncoding("ISO-8859-1"); + byte[] result = iso.GetBytes(Convert.ToString(reader.Value)); + return result; + } + + /// + public override void SkipBytes() + { + Advance(Symbol.Bytes); + if (reader.TokenType == JsonToken.String) + { + reader.Read(); + } + else + { + throw TypeError("bytes"); + } + } + + private void CheckFixed(int size) + { + Advance(Symbol.Fixed); + Symbol.IntCheckAction top = (Symbol.IntCheckAction)Parser.PopSymbol(); + if (size != top.Size) + { + throw new AvroTypeException("Incorrect length for fixed binary: expected " + top.Size + + " but received " + size + " bytes."); + } + } + + /// + public override void ReadFixed(byte[] bytes) + { + ReadFixed(bytes, 0, bytes.Length); + } + + /// + public override void ReadFixed(byte[] bytes, int start, int len) + { + CheckFixed(len); + if (reader.TokenType == JsonToken.String) + { + byte[] result = ReadByteArray(); + reader.Read(); + if (result.Length != len) + { + throw new AvroTypeException("Expected fixed length " + len + ", but got" + result.Length); + } + + Array.Copy(result, 0, bytes, start, len); + } + else + { + throw TypeError("fixed"); + } + } + + /// + public override void SkipFixed(int length) + { + CheckFixed(length); + DoSkipFixed(length); + } + + private void DoSkipFixed(int length) + { + if (reader.TokenType == JsonToken.String) + { + byte[] result = ReadByteArray(); + reader.Read(); + if (result.Length != length) + { + throw new AvroTypeException("Expected fixed length " + length + ", but got" + result.Length); + } + } + else + { + throw TypeError("fixed"); + } + } + + /// + protected override void SkipFixed() + { + Advance(Symbol.Fixed); + Symbol.IntCheckAction top = (Symbol.IntCheckAction)Parser.PopSymbol(); + DoSkipFixed(top.Size); + } + + /// + public override int ReadEnum() + { + Advance(Symbol.Enum); + Symbol.EnumLabelsAction top = (Symbol.EnumLabelsAction)Parser.PopSymbol(); + if (reader.TokenType == JsonToken.String) + { + string label = Convert.ToString(reader.Value); + int n = top.FindLabel(label); + if (n >= 0) + { + reader.Read(); + return n; + } + + throw new AvroTypeException("Unknown symbol in enum " + label); + } + else + { + throw TypeError("fixed"); + } + } + + /// + public override long ReadArrayStart() + { + Advance(Symbol.ArrayStart); + if (reader.TokenType == JsonToken.StartArray) + { + reader.Read(); + return DoArrayNext(); + } + else + { + throw TypeError("array-start"); + } + } + + /// + public override long ReadArrayNext() + { + Advance(Symbol.ItemEnd); + return DoArrayNext(); + } + + private long DoArrayNext() + { + if (reader.TokenType == JsonToken.EndArray) + { + Parser.Advance(Symbol.ArrayEnd); + reader.Read(); + return 0; + } + else + { + return 1; + } + } + + /// + public override void SkipArray() + { + Advance(Symbol.ArrayStart); + if (reader.TokenType == JsonToken.StartArray) + { + reader.Skip(); + reader.Read(); + Advance(Symbol.ArrayEnd); + } + else + { + throw TypeError("array-start"); + } + } + + /// + public override long ReadMapStart() + { + Advance(Symbol.MapStart); + if (reader.TokenType == JsonToken.StartObject) + { + reader.Read(); + return DoMapNext(); + } + else + { + throw TypeError("map-start"); + } + } + + /// + public override long ReadMapNext() + { + Advance(Symbol.ItemEnd); + return DoMapNext(); + } + + private long DoMapNext() + { + if (reader.TokenType == JsonToken.EndObject) + { + reader.Read(); + Advance(Symbol.MapEnd); + return 0; + } + else + { + return 1; + } + } + + /// + public override void SkipMap() + { + Advance(Symbol.MapStart); + if (reader.TokenType == JsonToken.StartObject) + { + reader.Skip(); + reader.Read(); + Advance(Symbol.MapEnd); + } + else + { + throw TypeError("map-start"); + } + } + + /// + public override int ReadUnionIndex() + { + Advance(Symbol.Union); + Symbol.Alternative a = (Symbol.Alternative)Parser.PopSymbol(); + + string label; + if (reader.TokenType == JsonToken.Null) + { + label = "null"; + } + else if (reader.TokenType == JsonToken.StartObject) + { + reader.Read(); + if (reader.TokenType == JsonToken.PropertyName) + { + label = Convert.ToString(reader.Value); + reader.Read(); + Parser.PushSymbol(Symbol.UnionEnd); + } + else + { + throw TypeError("start-union"); + } + } + else + { + throw TypeError("start-union"); + } + + int n = a.FindLabel(label); + if (n < 0) + { + throw new AvroTypeException("Unknown union branch " + label); + } + + Parser.PushSymbol(a.GetSymbol(n)); + return n; + } + + /// + public override void SkipNull() + { + ReadNull(); + } + + /// + public override void SkipBoolean() + { + ReadBoolean(); + } + + /// + public override void SkipInt() + { + ReadInt(); + } + + /// + public override void SkipLong() + { + ReadLong(); + } + + /// + public override void SkipFloat() + { + ReadFloat(); + } + + /// + public override void SkipDouble() + { + ReadDouble(); + } + + /// + public override void SkipEnum() + { + ReadEnum(); + } + + /// + public override void SkipUnionIndex() + { + ReadUnionIndex(); + } + + /// + public override Symbol DoAction(Symbol input, Symbol top) + { + if (top is Symbol.FieldAdjustAction) + { + Symbol.FieldAdjustAction fa = (Symbol.FieldAdjustAction)top; + string name = fa.FName; + if (currentReorderBuffer != null) + { + IList node = currentReorderBuffer.SavedFields[name]; + if (node != null) + { + currentReorderBuffer.SavedFields.Remove(name); + currentReorderBuffer.OrigParser = reader; + reader = MakeParser(node); + return null; + } + } + + if (reader.TokenType == JsonToken.PropertyName) + { + do + { + string fn = Convert.ToString(reader.Value); + reader.Read(); + if (name.Equals(fn) || (fa.Aliases != null && fa.Aliases.Contains(fn))) + { + return null; + } + else + { + if (currentReorderBuffer == null) + { + currentReorderBuffer = new ReorderBuffer(); + } + + currentReorderBuffer.SavedFields[fn] = GetValueAsTree(reader); + } + } while (reader.TokenType == JsonToken.PropertyName); + + throw new AvroTypeException("Expected field name not found: " + fa.FName); + } + } + else if (top == Symbol.FieldEnd) + { + if (currentReorderBuffer != null && currentReorderBuffer.OrigParser != null) + { + reader = currentReorderBuffer.OrigParser; + currentReorderBuffer.OrigParser = null; + } + } + else if (top == Symbol.RecordStart) + { + if (reader.TokenType == JsonToken.StartObject) + { + reader.Read(); + reorderBuffers.Push(currentReorderBuffer); + currentReorderBuffer = null; + } + else + { + throw TypeError("record-start"); + } + } + else if (top == Symbol.RecordEnd || top == Symbol.UnionEnd) + { + // AVRO-2034 advance to the end of our object + while (reader.TokenType != JsonToken.EndObject) + { + reader.Read(); + } + + if (top == Symbol.RecordEnd) + { + if (currentReorderBuffer != null && currentReorderBuffer.SavedFields.Count > 0) + { + throw TypeError("Unknown fields: " + currentReorderBuffer.SavedFields.Keys + .Aggregate((x, y) => x + ", " + y )); + } + + currentReorderBuffer = reorderBuffers.Pop(); + } + + // AVRO-2034 advance beyond the end object for the next record. + reader.Read(); + } + else + { + throw new AvroTypeException("Unknown action symbol " + top); + } + + return null; + } + + + private class JsonElement + { + private readonly JsonToken token; + public JsonToken Token => token; + private readonly object value; + public object Value => value; + + public JsonElement(JsonToken t, object value) + { + token = t; + this.value = value; + } + + public JsonElement(JsonToken t) : this(t, null) + { + } + } + + private static IList GetValueAsTree(JsonReader reader) + { + int level = 0; + IList result = new List(); + do + { + JsonToken t = reader.TokenType; + switch (t) + { + case JsonToken.StartObject: + case JsonToken.StartArray: + level++; + result.Add(new JsonElement(t)); + break; + case JsonToken.EndObject: + case JsonToken.EndArray: + level--; + result.Add(new JsonElement(t)); + break; + case JsonToken.PropertyName: + case JsonToken.String: + case JsonToken.Integer: + case JsonToken.Float: + case JsonToken.Boolean: + case JsonToken.Null: + result.Add(new JsonElement(t, reader.Value)); + break; + } + + reader.Read(); + } while (level != 0); + + result.Add(new JsonElement(JsonToken.None)); + return result; + } + + private JsonReader MakeParser(in IList elements) + { + return new JsonElementReader(elements); + } + + private class JsonElementReader : JsonReader + { + private readonly IList elements; + + public JsonElementReader(IList elements) + { + this.elements = elements; + pos = 0; + } + + private int pos; + + public override object Value + { + get { return elements[pos].Value; } + } + + public override JsonToken TokenType + { + get { return elements[pos].Token; } + } + + public override bool Read() + { + pos++; + return true; + } + } + + private AvroTypeException TypeError(string type) + { + return new AvroTypeException("Expected " + type + ". Got " + reader.TokenType); + } + } +} diff --git a/lang/csharp/src/apache/main/IO/JsonEncoder.cs b/lang/csharp/src/apache/main/IO/JsonEncoder.cs new file mode 100644 index 00000000000..c159a013e8c --- /dev/null +++ b/lang/csharp/src/apache/main/IO/JsonEncoder.cs @@ -0,0 +1,352 @@ +īģŋ/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using Avro.IO.Parsing; +using System.Collections; +using System.IO; +using System.Text; +using Newtonsoft.Json; + +namespace Avro.IO +{ + /// + /// An for Avro's JSON data encoding. + /// + /// JsonEncoder buffers output, and data may not appear on the output until + /// is called. + /// + /// JsonEncoder is not thread-safe. + /// + public class JsonEncoder : ParsingEncoder, Parser.IActionHandler + { + private readonly Parser parser; + private JsonWriter writer; + private bool includeNamespace = true; + + // Has anything been written into the collections? + private readonly BitArray isEmpty = new BitArray(64); + + /// + /// Initializes a new instance of the class. + /// + public JsonEncoder(Schema sc, Stream stream) : this(sc, GetJsonWriter(stream, false)) + { + } + + /// + /// Initializes a new instance of the class. + /// + public JsonEncoder(Schema sc, Stream stream, bool pretty) : this(sc, GetJsonWriter(stream, pretty)) + { + } + + /// + /// Initializes a new instance of the class. + /// + public JsonEncoder(Schema sc, JsonWriter writer) + { + Configure(writer); + parser = new Parser((new JsonGrammarGenerator()).Generate(sc), this); + } + + /// + public override void Flush() + { + parser.ProcessImplicitActions(); + if (writer != null) + { + writer.Flush(); + } + } + + // by default, one object per line. + // with pretty option use default pretty printer with root line separator. + private static JsonWriter GetJsonWriter(Stream stream, bool pretty) + { + JsonWriter writer = new JsonTextWriter(new StreamWriter(stream)); + if (pretty) + { + writer.Formatting = Formatting.Indented; + } + + return writer; + } + + /// + /// Whether to include a union label when generating JSON. + /// + public virtual bool IncludeNamespace + { + get { return includeNamespace; } + set { includeNamespace = value; } + } + + + /// + /// Reconfigures this JsonEncoder to use the output stream provided. + /// Otherwise, this JsonEncoder will flush its current output and then + /// reconfigure its output to use a default UTF8 JsonWriter that writes to the + /// provided Stream. + /// + /// The Stream to direct output to. Cannot be null. + public void Configure(Stream stream) + { + Configure(GetJsonWriter(stream, false)); + } + + /// + /// Reconfigures this JsonEncoder to output to the JsonWriter provided. + /// Otherwise, this JsonEncoder will flush its current output and then + /// reconfigure its output to use the provided JsonWriter. + /// + /// The JsonWriter to direct output to. Cannot be null. + public void Configure(JsonWriter jsonWriter) + { + if (null != parser) + { + Flush(); + } + + writer = jsonWriter; + } + + /// + public override void WriteNull() + { + parser.Advance(Symbol.Null); + writer.WriteNull(); + } + + /// + public override void WriteBoolean(bool b) + { + parser.Advance(Symbol.Boolean); + writer.WriteValue(b); + } + + /// + public override void WriteInt(int n) + { + parser.Advance(Symbol.Int); + writer.WriteValue(n); + } + + /// + public override void WriteLong(long n) + { + parser.Advance(Symbol.Long); + writer.WriteValue(n); + } + + /// + public override void WriteFloat(float f) + { + parser.Advance(Symbol.Float); + writer.WriteValue(f); + } + + /// + public override void WriteDouble(double d) + { + parser.Advance(Symbol.Double); + writer.WriteValue(d); + } + + /// + public override void WriteString(string str) + { + parser.Advance(Symbol.String); + if (parser.TopSymbol() == Symbol.MapKeyMarker) + { + parser.Advance(Symbol.MapKeyMarker); + writer.WritePropertyName(str); + } + else + { + writer.WriteValue(str); + } + } + + /// + public override void WriteBytes(byte[] bytes) + { + WriteBytes(bytes, 0, bytes.Length); + } + + /// + public override void WriteBytes(byte[] bytes, int start, int len) + { + parser.Advance(Symbol.Bytes); + WriteByteArray(bytes, start, len); + } + + private void WriteByteArray(byte[] bytes, int start, int len) + { + Encoding iso = Encoding.GetEncoding("ISO-8859-1"); + writer.WriteValue(iso.GetString(bytes, start, len)); + } + + /// + public override void WriteFixed(byte[] bytes) + { + WriteFixed(bytes, 0, bytes.Length); + } + + /// + public override void WriteFixed(byte[] bytes, int start, int len) + { + parser.Advance(Symbol.Fixed); + Symbol.IntCheckAction top = (Symbol.IntCheckAction)parser.PopSymbol(); + if (len != top.Size) + { + throw new AvroTypeException("Incorrect length for fixed binary: expected " + top.Size + + " but received " + len + " bytes."); + } + + WriteByteArray(bytes, start, len); + } + + /// + public override void WriteEnum(int e) + { + parser.Advance(Symbol.Enum); + Symbol.EnumLabelsAction top = (Symbol.EnumLabelsAction)parser.PopSymbol(); + if (e < 0 || e >= top.Size) + { + throw new AvroTypeException("Enumeration out of range: max is " + top.Size + " but received " + e); + } + + writer.WriteValue(top.GetLabel(e)); + } + + /// + public override void WriteArrayStart() + { + parser.Advance(Symbol.ArrayStart); + writer.WriteStartArray(); + Push(); + if (Depth() >= isEmpty.Length) + { + isEmpty.Length += isEmpty.Length; + } + + isEmpty.Set(Depth(), true); + } + + /// + public override void WriteArrayEnd() + { + if (!isEmpty.Get(Pos)) + { + parser.Advance(Symbol.ItemEnd); + } + + Pop(); + parser.Advance(Symbol.ArrayEnd); + writer.WriteEndArray(); + } + + /// + public override void WriteMapStart() + { + Push(); + if (Depth() >= isEmpty.Length) + { + isEmpty.Length += isEmpty.Length; + } + + isEmpty.Set(Depth(), true); + + parser.Advance(Symbol.MapStart); + writer.WriteStartObject(); + } + + /// + public override void WriteMapEnd() + { + if (!isEmpty.Get(Pos)) + { + parser.Advance(Symbol.ItemEnd); + } + + Pop(); + + parser.Advance(Symbol.MapEnd); + writer.WriteEndObject(); + } + + /// + public override void StartItem() + { + if (!isEmpty.Get(Pos)) + { + parser.Advance(Symbol.ItemEnd); + } + + base.StartItem(); + if (Depth() >= isEmpty.Length) + { + isEmpty.Length += isEmpty.Length; + } + + isEmpty.Set(Depth(), false); + } + + /// + public override void WriteUnionIndex(int unionIndex) + { + parser.Advance(Symbol.Union); + Symbol.Alternative top = (Symbol.Alternative)parser.PopSymbol(); + Symbol symbol = top.GetSymbol(unionIndex); + if (symbol != Symbol.Null && includeNamespace) + { + writer.WriteStartObject(); + writer.WritePropertyName(top.GetLabel(unionIndex)); + parser.PushSymbol(Symbol.UnionEnd); + } + + parser.PushSymbol(symbol); + } + + /// + /// Perform an action based on the given input. + /// + public virtual Symbol DoAction(Symbol input, Symbol top) + { + if (top is Symbol.FieldAdjustAction) + { + Symbol.FieldAdjustAction fa = (Symbol.FieldAdjustAction)top; + writer.WritePropertyName(fa.FName); + } + else if (top == Symbol.RecordStart) + { + writer.WriteStartObject(); + } + else if (top == Symbol.RecordEnd || top == Symbol.UnionEnd) + { + writer.WriteEndObject(); + } + else if (top != Symbol.FieldEnd) + { + throw new AvroTypeException("Unknown action symbol " + top); + } + + return null; + } + } +} diff --git a/lang/csharp/src/apache/main/IO/Parsing/JsonGrammarGenerator.cs b/lang/csharp/src/apache/main/IO/Parsing/JsonGrammarGenerator.cs new file mode 100644 index 00000000000..508ea264b83 --- /dev/null +++ b/lang/csharp/src/apache/main/IO/Parsing/JsonGrammarGenerator.cs @@ -0,0 +1,105 @@ +īģŋ/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Collections.Generic; + +namespace Avro.IO.Parsing +{ + /// + /// The class that generates a grammar suitable to parse Avro data in JSON + /// format. + /// + public class JsonGrammarGenerator : ValidatingGrammarGenerator + { + /// + /// Returns the non-terminal that is the start symbol for the grammar for the + /// grammar for the given schema schema. + /// + public override Symbol Generate(Schema schema) + { + return Symbol.NewRoot(Generate(schema, new Dictionary())); + } + + /// + /// Returns the non-terminal that is the start symbol for grammar of the given + /// schema sc. If there is already an entry for the given schema in the + /// given map seen then that entry is returned. Otherwise a new symbol + /// is generated and an entry is inserted into the map. + /// + /// The schema for which the start symbol is required + /// A map of schema to symbol mapping done so far. + /// The start symbol for the schema + protected override Symbol Generate(Schema sc, IDictionary seen) + { + switch (sc.Tag) + { + case Schema.Type.Null: + case Schema.Type.Boolean: + case Schema.Type.Int: + case Schema.Type.Long: + case Schema.Type.Float: + case Schema.Type.Double: + case Schema.Type.String: + case Schema.Type.Bytes: + case Schema.Type.Fixed: + case Schema.Type.Union: + return base.Generate(sc, seen); + case Schema.Type.Enumeration: + return Symbol.NewSeq(new Symbol.EnumLabelsAction(((EnumSchema)sc).Symbols), Symbol.Enum); + case Schema.Type.Array: + return Symbol.NewSeq( + Symbol.NewRepeat(Symbol.ArrayEnd, Symbol.ItemEnd, Generate(((ArraySchema)sc).ItemSchema, seen)), + Symbol.ArrayStart); + case Schema.Type.Map: + return Symbol.NewSeq( + Symbol.NewRepeat(Symbol.MapEnd, Symbol.ItemEnd, Generate(((MapSchema)sc).ValueSchema, seen), + Symbol.MapKeyMarker, Symbol.String), Symbol.MapStart); + case Schema.Type.Record: + { + LitS wsc = new LitS(sc); + if (!seen.TryGetValue(wsc, out Symbol rresult)) + { + Symbol[] production = new Symbol[((RecordSchema)sc).Fields.Count * 3 + 2]; + rresult = Symbol.NewSeq(production); + seen[wsc] = rresult; + + int i = production.Length; + int n = 0; + production[--i] = Symbol.RecordStart; + foreach (Field f in ((RecordSchema)sc).Fields) + { + production[--i] = new Symbol.FieldAdjustAction(n, f.Name, f.Aliases); + production[--i] = Generate(f.Schema, seen); + production[--i] = Symbol.FieldEnd; + n++; + } + + production[i - 1] = Symbol.RecordEnd; + } + + return rresult; + } + case Schema.Type.Logical: + return Generate((sc as LogicalSchema).BaseSchema, seen); + default: + throw new Exception("Unexpected schema type"); + } + } + } +} diff --git a/lang/csharp/src/apache/main/IO/Parsing/Parser.cs b/lang/csharp/src/apache/main/IO/Parsing/Parser.cs new file mode 100644 index 00000000000..ae788ede05d --- /dev/null +++ b/lang/csharp/src/apache/main/IO/Parsing/Parser.cs @@ -0,0 +1,229 @@ +īģŋ/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Avro.IO.Parsing +{ + /// + /// Parser is the class that maintains the stack for parsing. This class is used + /// by encoders, which are not required to skip. + /// + public class Parser + { + /// + /// The parser knows how to handle the terminal and non-terminal symbols. But it + /// needs help from outside to handle implicit and explicit actions. The clients + /// implement this interface to provide this help. + /// + public interface IActionHandler + { + /// + /// Handle the action symbol top when the input is sought to be + /// taken off the stack. + /// + /// The input symbol from the caller of Advance + /// The symbol at the top the stack. + /// null if Advance() is to continue processing the stack. If + /// not null the return value will be returned by Advance(). + Symbol DoAction(Symbol input, Symbol top); + } + + private readonly IActionHandler symbolHandler; + /// + /// Stack of symbols. + /// + protected Symbol[] Stack; + /// + /// Position of the stack. + /// + protected int Pos; + + /// + /// Initializes a new instance of the class. + /// + public Parser(Symbol root, IActionHandler symbolHandler) + { + this.symbolHandler = symbolHandler; + Stack = new Symbol[5]; // Start small to make sure expansion code works + Stack[0] = root; + Pos = 1; + } + + /// + /// If there is no sufficient room in the stack, use this expand it. + /// + private void ExpandStack() + { + Array.Resize(ref Stack, Stack.Length + Math.Max(Stack.Length, 1024)); + } + + /// + /// Recursively replaces the symbol at the top of the stack with its production, + /// until the top is a terminal. Then checks if the top symbol matches the + /// terminal symbol supplied input. + /// + /// The symbol to match against the terminal at the top of the + /// stack. + /// The terminal symbol at the top of the stack unless an implicit action + /// resulted in another symbol, in which case that symbol is returned. + public Symbol Advance(Symbol input) + { + for (;;) + { + Symbol top = Stack[--Pos]; + if (top == input) + { + return top; // A common case + } + + Symbol.Kind k = top.SymKind; + if (k == Symbol.Kind.ImplicitAction) + { + Symbol result = symbolHandler.DoAction(input, top); + if (result != null) + { + return result; + } + } + else if (k == Symbol.Kind.Terminal) + { + throw new AvroTypeException("Attempt to process a " + input + " when a " + top + " was expected."); + } + else if (k == Symbol.Kind.Repeater && input == ((Symbol.Repeater)top).End) + { + return input; + } + else + { + PushProduction(top); + } + } + } + + /// + /// Performs any implicit actions at the top the stack, expanding any production + /// (other than the root) that may be encountered. This method will fail if there + /// are any repeaters on the stack. + /// + public void ProcessImplicitActions() + { + while (Pos > 1) + { + Symbol top = Stack[Pos - 1]; + if (top.SymKind == Symbol.Kind.ImplicitAction) + { + Pos--; + symbolHandler.DoAction(null, top); + } + else if (top.SymKind != Symbol.Kind.Terminal) + { + Pos--; + PushProduction(top); + } + else + { + break; + } + } + } + + /// + /// Performs any "trailing" implicit actions at the top the stack. + /// + public void ProcessTrailingImplicitActions() + { + while (Pos >= 1) + { + Symbol top = Stack[Pos - 1]; + if (top.SymKind == Symbol.Kind.ImplicitAction && ((Symbol.ImplicitAction)top).IsTrailing) + { + Pos--; + symbolHandler.DoAction(null, top); + } + else + { + break; + } + } + } + + /// + /// Pushes the production for the given symbol sym. If sym is a + /// repeater and input is either or + /// pushes nothing. + /// + /// + public void PushProduction(Symbol sym) + { + Symbol[] p = sym.Production; + while (Pos + p.Length > Stack.Length) + { + ExpandStack(); + } + + Array.Copy(p, 0, Stack, Pos, p.Length); + Pos += p.Length; + } + + /// + /// Pops and returns the top symbol from the stack. + /// + public virtual Symbol PopSymbol() + { + return Stack[--Pos]; + } + + /// + /// Returns the top symbol from the stack. + /// + public virtual Symbol TopSymbol() + { + return Stack[Pos - 1]; + } + + /// + /// Pushes sym on to the stack. + /// + public virtual void PushSymbol(Symbol sym) + { + if (Pos == Stack.Length) + { + ExpandStack(); + } + + Stack[Pos++] = sym; + } + + /// + /// Returns the depth of the stack. + /// + public virtual int Depth() + { + return Pos; + } + + /// + /// Resets the stack. + /// + public virtual void Reset() + { + Pos = 1; + } + } +} diff --git a/lang/csharp/src/apache/main/IO/Parsing/SkipParser.cs b/lang/csharp/src/apache/main/IO/Parsing/SkipParser.cs new file mode 100644 index 00000000000..4679215cbc2 --- /dev/null +++ b/lang/csharp/src/apache/main/IO/Parsing/SkipParser.cs @@ -0,0 +1,107 @@ +īģŋ/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System.Diagnostics; + +namespace Avro.IO.Parsing +{ + /// + /// A parser that capable of skipping as well read and write. This class is used + /// by decoders who (unlike encoders) are required to implement methods to skip. + /// + public class SkipParser : Parser + { + /// + /// The clients implement this interface to skip symbols and actions. + /// + public interface ISkipHandler + { + /// + /// Skips the action at the top of the stack. + /// + void SkipAction(); + + /// + /// Skips the symbol at the top of the stack. + /// + void SkipTopSymbol(); + } + + private readonly ISkipHandler skipHandler; + + /// + /// Initializes a new instance of the class. + /// + public SkipParser(Symbol root, IActionHandler symbolHandler, ISkipHandler skipHandler) : base(root, symbolHandler) + { + this.skipHandler = skipHandler; + } + + /// + /// Skips data by calling skipXyz or readXyz methods on + /// this, until the parser stack reaches the target level. + /// + public void SkipTo(int target) + { + while (target < Pos) + { + Symbol top = Stack[Pos - 1]; + while (top.SymKind != Symbol.Kind.Terminal) + { + if (top.SymKind == Symbol.Kind.ImplicitAction || top.SymKind == Symbol.Kind.ExplicitAction) + { + skipHandler.SkipAction(); + } + else + { + --Pos; + PushProduction(top); + } + + goto outerContinue; + } + + skipHandler.SkipTopSymbol(); + outerContinue: ; + } + } + + /// + /// Skips the repeater at the top the stack. + /// + public void SkipRepeater() + { + int target = Pos; + Symbol repeater = Stack[--Pos]; + Debug.Assert(repeater.SymKind == Symbol.Kind.Repeater); + PushProduction(repeater); + SkipTo(target); + } + + /// + /// Pushes the given symbol on to the skip and skips it. + /// + /// The symbol that should be skipped. + public void SkipSymbol(Symbol symToSkip) + { + int target = Pos; + PushSymbol(symToSkip); + SkipTo(target); + } + } +} diff --git a/lang/csharp/src/apache/main/IO/Parsing/Symbol.cs b/lang/csharp/src/apache/main/IO/Parsing/Symbol.cs new file mode 100644 index 00000000000..d5f4ee09c43 --- /dev/null +++ b/lang/csharp/src/apache/main/IO/Parsing/Symbol.cs @@ -0,0 +1,984 @@ +īģŋ/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Collections; +using System.Collections.Generic; +using System.Linq; + +namespace Avro.IO.Parsing +{ + /// + /// Symbol is the base of all symbols (terminals and non-terminals) of the + /// grammar. + /// + public abstract class Symbol + { + /// + /// The type of symbol. + /// + public enum Kind + { + /// + /// terminal symbols which have no productions + Terminal, + + /// + /// Start symbol for some grammar + Root, + + /// + /// non-terminal symbol which is a sequence of one or more other symbols + Sequence, + + /// + /// non-terminal to represent the contents of an array or map + Repeater, + + /// + /// non-terminal to represent the union + Alternative, + + /// + /// non-terminal action symbol which are automatically consumed + ImplicitAction, + + /// + /// non-terminal action symbol which is explicitly consumed + ExplicitAction + } + + /// The kind of this symbol. + public Kind SymKind { get; private set; } + + /// + /// The production for this symbol. If this symbol is a terminal this is + /// null. Otherwise this holds the the sequence of the symbols that + /// forms the production for this symbol. The sequence is in the reverse order of + /// production. This is useful for easy copying onto parsing stack. + /// + /// Please note that this is a final. So the production for a symbol should be + /// known before that symbol is constructed. This requirement cannot be met for + /// those symbols which are recursive (e.g. a record that holds union a branch of + /// which is the record itself). To resolve this problem, we initialize the + /// symbol with an array of nulls. Later we fill the symbols. Not clean, but + /// works. The other option is to not have this field a final. But keeping it + /// final and thus keeping symbol immutable gives some comfort. See various + /// generators how we generate records. + /// + public Symbol[] Production { get; private set; } + + /// + /// Constructs a new symbol of the given kind. + /// + protected Symbol(Kind kind) : this(kind, null) + { + } + + /// + /// Constructs a new symbol of the given kind and production. + /// + protected Symbol(Kind kind, Symbol[] production) + { + Production = production; + SymKind = kind; + } + + /// + /// A convenience method to construct a root symbol. + /// + public static Symbol NewRoot(params Symbol[] symbols) => new Root(symbols); + + /// + /// A convenience method to construct a sequence. + /// + /// The constituent symbols of the sequence. + public static Symbol NewSeq(params Symbol[] production) => new Sequence(production); + + /// + /// A convenience method to construct a repeater. + /// + /// The end symbol. + /// The symbols to repeat in the repeater. + public static Symbol NewRepeat(Symbol endSymbol, params Symbol[] symsToRepeat) => + new Repeater(endSymbol, symsToRepeat); + + /// + /// A convenience method to construct a union. + /// + public static Symbol NewAlt(Symbol[] symbols, string[] labels) => new Alternative(symbols, labels); + + /// + /// A convenience method to construct an ErrorAction. + /// + /// + protected static Symbol Error(string e) => new ErrorAction(e); + + /// + /// A convenience method to construct a ResolvingAction. + /// + /// The writer symbol + /// The reader symbol + protected static Symbol Resolve(Symbol w, Symbol r) => new ResolvingAction(w, r); + + /// + /// Fixup symbol. + /// + protected class Fixup + { + private readonly Symbol[] symbols; + + /// + /// The symbols. + /// + public Symbol[] Symbols + { + get { return (Symbol[])symbols.Clone(); } + } + + /// + /// The position. + /// + public int Pos { get; private set; } + + /// + /// Initializes a new instance of the class. + /// + public Fixup(Symbol[] symbols, int pos) + { + this.symbols = (Symbol[])symbols.Clone(); + Pos = pos; + } + } + + /// + /// Flatten the given sub-array of symbols into a sub-array of symbols. + /// + protected virtual Symbol Flatten(IDictionary map, IDictionary> map2) => this; + + /// + /// Returns the flattened size. + /// + public virtual int FlattenedSize() => 1; + + /// + /// Flattens the given sub-array of symbols into an sub-array of symbols. Every + /// Sequence in the input are replaced by its production recursively. + /// Non-Sequence symbols, they internally have other symbols those + /// internal symbols also get flattened. When flattening is done, the only place + /// there might be Sequence symbols is in the productions of a Repeater, + /// Alternative, or the symToParse and symToSkip in a UnionAdjustAction or + /// SkipAction. + /// + /// Why is this done? We want our parsers to be fast. If we left the grammars + /// unflattened, then the parser would be constantly copying the contents of + /// nested Sequence productions onto the parsing stack. Instead, because of + /// flattening, we have a long top-level production with no Sequences unless the + /// Sequence is absolutely needed, e.g., in the case of a Repeater or an + /// Alternative. + /// + /// Well, this is not exactly true when recursion is involved. Where there is a + /// recursive record, that record will be "inlined" once, but any internal (ie, + /// recursive) references to that record will be a Sequence for the record. That + /// Sequence will not further inline itself -- it will refer to itself as a + /// Sequence. The same is true for any records nested in this outer recursive + /// record. Recursion is rare, and we want things to be fast in the typical case, + /// which is why we do the flattening optimization. + /// + /// + /// The algorithm does a few tricks to handle recursive symbol definitions. In + /// order to avoid infinite recursion with recursive symbols, we have a map of + /// Symbol->Symbol. Before fully constructing a flattened symbol for a + /// Sequence we insert an empty output symbol into the map and then + /// start filling the production for the Sequence. If the same + /// Sequence is encountered due to recursion, we simply return the + /// (empty) output Sequence from the map. Then we actually fill out + /// the production for the Sequence. As part of the flattening process + /// we copy the production of Sequences into larger arrays. If the + /// original Sequence has not not be fully constructed yet, we copy a + /// bunch of nulls. Fix-up remembers all those null patches. + /// The fix-ups gets finally filled when we know the symbols to occupy those + /// patches. + /// + /// The array of input symbols to flatten + /// The position where the input sub-array starts. + /// The output that receives the flattened list of symbols. The + /// output array should have sufficient space to receive the + /// expanded sub-array of symbols. + /// The position where the output input sub-array starts. + /// A map of symbols which have already been expanded. Useful for + /// handling recursive definitions and for caching. + /// A map to to store the list of fix-ups. + protected static void Flatten(Symbol[] input, int start, Symbol[] output, int skip, + IDictionary map, IDictionary> map2) + { + for (int i = start, j = skip; i < input.Length; i++) + { + Symbol s = input[i].Flatten(map, map2); + if (s is Sequence) + { + Symbol[] p = s.Production; + if (!map2.TryGetValue((Sequence)s, out IList l)) + { + Array.Copy(p, 0, output, j, p.Length); + // Copy any fixups that will be applied to p to add missing symbols + foreach (IList fixups in map2.Values) + { + CopyFixups(fixups, output, j, p); + } + } + else + { + l.Add(new Fixup(output, j)); + } + + j += p.Length; + } + else + { + output[j++] = s; + } + } + } + + private static void CopyFixups(IList fixups, Symbol[] output, int outPos, Symbol[] toCopy) + { + for (int i = 0, n = fixups.Count; i < n; i += 1) + { + Fixup fixup = fixups[i]; + if (fixup.Symbols == toCopy) + { + fixups.Add(new Fixup(output, fixup.Pos + outPos)); + } + } + } + + /// + /// Returns the amount of space required to flatten the given sub-array of + /// symbols. + /// + /// The array of input symbols. + /// The index where the subarray starts. + /// The number of symbols that will be produced if one expands the given + /// input. + protected static int FlattenedSize(Symbol[] symbols, int start) + { + int result = 0; + for (int i = start; i < symbols.Length; i++) + { + if (symbols[i] is Sequence) + { + Sequence s = (Sequence)symbols[i]; + result += s.FlattenedSize(); + } + else + { + result += 1; + } + } + + return result; + } + + /// + /// Terminal symbol. + /// + protected class Terminal : Symbol + { + /// + /// Printable name. + /// + public string PrintName { get; private set; } + + /// + /// Initializes a new instance of the class. + /// + public Terminal(string printName) : base(Kind.Terminal) + { + PrintName = printName; + } + + /// + public override string ToString() => PrintName; + } + + /// + /// Implicit action. + /// + public class ImplicitAction : Symbol + { + /// + /// Set to true if and only if this implicit action is a trailing + /// action. That is, it is an action that follows real symbol. E.g + /// . + /// + public bool IsTrailing { get; private set; } + + /// + /// Initializes a new instance of the class. + /// + public ImplicitAction() : this(false) + { + } + + /// + /// Initializes a new instance of the class. + /// + public ImplicitAction(bool isTrailing) : base(Kind.ImplicitAction) + { + IsTrailing = isTrailing; + } + } + + /// + /// Root symbol. + /// + protected class Root : Symbol + { + /// + /// Initializes a new instance of the class. + /// + public Root(params Symbol[] symbols) : base(Kind.Root, MakeProduction(symbols)) + { + Production[0] = this; + } + + private static Symbol[] MakeProduction(Symbol[] symbols) + { + Symbol[] result = new Symbol[FlattenedSize(symbols, 0) + 1]; + Flatten(symbols, 0, result, 1, new Dictionary(), + new Dictionary>()); + return result; + } + } + + /// + /// Sequence symbol. + /// + protected class Sequence : Symbol, IEnumerable + { + /// + /// Initializes a new instance of the class. + /// + public Sequence(Symbol[] productions) : base(Kind.Sequence, productions) + { + } + + /// + /// Get the symbol at the given index. + /// + public virtual Symbol this[int index] => Production[index]; + + /// + /// Get the symbol at the given index. + /// + public virtual Symbol Get(int index) => Production[index]; + + /// + /// Returns the number of symbols. + /// + public virtual int Size() => Production.Length; + + /// + public IEnumerator GetEnumerator() => Enumerable.Reverse(Production).GetEnumerator(); + + IEnumerator IEnumerable.GetEnumerator() => GetEnumerator(); + + /// + protected override Symbol Flatten(IDictionary map, + IDictionary> map2) + { + if (!map.TryGetValue(this, out Sequence result)) + { + result = new Sequence(new Symbol[FlattenedSize()]); + map[this] = result; + IList l = new List(); + map2[result] = l; + + Flatten(Production, 0, result.Production, 0, map, map2); + foreach (Fixup f in l) + { + Array.Copy(result.Production, 0, f.Symbols, f.Pos, result.Production.Length); + } + + map2.Remove(result); + } + + return result; + } + + /// + public override int FlattenedSize() => FlattenedSize(Production, 0); + } + + /// + /// Repeater symbol. + /// + public class Repeater : Symbol + { + /// + /// The end symbol. + /// + public Symbol End { get; private set; } + + /// + /// Initializes a new instance of the class. + /// + public Repeater(Symbol end, params Symbol[] sequenceToRepeat) : base(Kind.Repeater, + MakeProduction(sequenceToRepeat)) + { + End = end; + Production[0] = this; + } + + private static Symbol[] MakeProduction(Symbol[] p) + { + Symbol[] result = new Symbol[p.Length + 1]; + Array.Copy(p, 0, result, 1, p.Length); + return result; + } + + /// + protected override Symbol Flatten(IDictionary map, + IDictionary> map2) + { + Repeater result = new Repeater(End, new Symbol[FlattenedSize(Production, 1)]); + Flatten(Production, 1, result.Production, 1, map, map2); + return result; + } + } + + /// + /// Returns true if the Parser contains any Error symbol, indicating that it may + /// fail for some inputs. + /// + private static bool HasErrors(Symbol symbol) + { + return HasErrors(symbol, new HashSet()); + } + + private static bool HasErrors(Symbol symbol, ISet visited) + { + // avoid infinite recursion + if (visited.Contains(symbol)) + { + return false; + } + + visited.Add(symbol); + + switch (symbol.SymKind) + { + case Kind.Alternative: + return HasErrors(symbol, ((Alternative)symbol).Symbols, visited); + case Kind.ExplicitAction: + return false; + case Kind.ImplicitAction: + if (symbol is ErrorAction) + { + return true; + } + + if (symbol is UnionAdjustAction) + { + return HasErrors(((UnionAdjustAction)symbol).SymToParse, visited); + } + + return false; + case Kind.Repeater: + Repeater r = (Repeater)symbol; + return HasErrors(r.End, visited) || HasErrors(symbol, r.Production, visited); + case Kind.Root: + case Kind.Sequence: + return HasErrors(symbol, symbol.Production, visited); + case Kind.Terminal: + return false; + default: + throw new Exception("unknown symbol kind: " + symbol.SymKind); + } + } + + private static bool HasErrors(Symbol root, Symbol[] symbols, ISet visited) + { + if (null != symbols) + { + foreach (Symbol s in symbols) + { + if (s == root) + { + continue; + } + + if (HasErrors(s, visited)) + { + return true; + } + } + } + + return false; + } + + /// + /// Alternative symbol. + /// + public class Alternative : Symbol + { + /// + /// The symbols. + /// + public Symbol[] Symbols { get; private set; } + + /// + /// The labels. + /// + public string[] Labels { get; private set; } + + /// + /// Initializes a new instance of the class. + /// + public Alternative(Symbol[] symbols, string[] labels) : base(Kind.Alternative) + { + Symbols = symbols; + Labels = labels; + } + + /// + /// Returns the symbol at the given index. + /// + public virtual Symbol GetSymbol(int index) + { + return Symbols[index]; + } + + /// + /// Returns the label at the given index. + /// + public virtual string GetLabel(int index) + { + return Labels[index]; + } + + /// + /// Returns the size. + /// + public virtual int Size() + { + return Symbols.Length; + } + + /// + /// Returns the index of the given label. + /// + public virtual int FindLabel(string label) + { + if (label != null) + { + for (int i = 0; i < Labels.Length; i++) + { + if (label.Equals(Labels[i])) + { + return i; + } + } + } + + return -1; + } + + /// + protected override Symbol Flatten(IDictionary map, + IDictionary> map2) + { + Symbol[] ss = new Symbol[Symbols.Length]; + for (int i = 0; i < ss.Length; i++) + { + ss[i] = Symbols[i].Flatten(map, map2); + } + + return new Alternative(ss, Labels); + } + } + + /// + /// The error action. + /// + public class ErrorAction : ImplicitAction + { + /// + /// The error message. + /// + public string Msg { get; private set; } + + /// + /// Initializes a new instance of the class. + /// + public ErrorAction(string msg) + { + Msg = msg; + } + } + + /// + /// Int check action. + /// + public class IntCheckAction : Symbol + { + /// + /// The size. + /// + public int Size { get; private set; } + + /// + /// Initializes a new instance of the class. + /// + public IntCheckAction(int size) : base(Kind.ExplicitAction) + { + Size = size; + } + } + + /// + /// The writer union action. + /// + public class WriterUnionAction : ImplicitAction + { + } + + /// + /// The resolving action. + /// + public class ResolvingAction : ImplicitAction + { + /// + /// The writer. + /// + public Symbol Writer { get; private set; } + + /// + /// The reader. + /// + public Symbol Reader { get; private set; } + + /// + /// Initializes a new instance of the class. + /// + public ResolvingAction(Symbol writer, Symbol reader) + { + Writer = writer; + Reader = reader; + } + + /// + protected override Symbol Flatten(IDictionary map, + IDictionary> map2) + { + return new ResolvingAction(Writer.Flatten(map, map2), Reader.Flatten(map, map2)); + } + } + + /// + /// The skip action. + /// + public class SkipAction : ImplicitAction + { + /// + /// The symbol to skip. + /// + public Symbol SymToSkip { get; private set; } + + /// + /// Initializes a new instance of the class. + /// + public SkipAction(Symbol symToSkip) : base(true) + { + SymToSkip = symToSkip; + } + + /// + protected override Symbol Flatten(IDictionary map, + IDictionary> map2) + { + return new SkipAction(SymToSkip.Flatten(map, map2)); + } + } + + /// + /// The field adjust action. + /// + public class FieldAdjustAction : ImplicitAction + { + /// + /// The index. + /// + public int RIndex { get; private set; } + + /// + /// The field name. + /// + public string FName { get; private set; } + + /// + /// The field aliases. + /// + public IList Aliases { get; private set; } + + /// + /// Initializes a new instance of the class. + /// + public FieldAdjustAction(int rindex, string fname, IList aliases) + { + RIndex = rindex; + FName = fname; + Aliases = aliases; + } + } + + /// + /// THe field order action. + /// + public sealed class FieldOrderAction : ImplicitAction + { + /// + /// Whether no reorder is needed. + /// + public bool NoReorder { get; private set; } + + /// + /// The fields. + /// + public Field[] Fields { get; private set; } + + /// + /// Initializes a new instance of the class. + /// + public FieldOrderAction(Field[] fields) + { + Fields = fields; + bool noReorder = true; + for (int i = 0; noReorder && i < fields.Length; i++) + { + noReorder &= (i == fields[i].Pos); + } + + NoReorder = noReorder; + } + } + + /// + /// The default start action. + /// + public class DefaultStartAction : ImplicitAction + { + /// + /// The contents. + /// + public byte[] Contents { get; private set; } + + /// + /// Initializes a new instance of the class. + /// + public DefaultStartAction(byte[] contents) + { + Contents = contents; + } + } + + /// + /// The union adjust action. + /// + public class UnionAdjustAction : ImplicitAction + { + /// + /// The index. + /// + public int RIndex { get; private set; } + + /// + /// The symbol to parser. + /// + public Symbol SymToParse { get; private set; } + + /// + /// Initializes a new instance of the class. + /// + public UnionAdjustAction(int rindex, Symbol symToParse) + { + RIndex = rindex; + SymToParse = symToParse; + } + + /// + protected override Symbol Flatten(IDictionary map, + IDictionary> map2) + { + return new UnionAdjustAction(RIndex, SymToParse.Flatten(map, map2)); + } + } + + /// + /// The enum labels action. + /// + public class EnumLabelsAction : IntCheckAction + { + /// + /// The symbols. + /// + public IList Symbols { get; private set; } + + /// + /// Initializes a new instance of the class. + /// + public EnumLabelsAction(IList symbols) : base(symbols.Count) + { + Symbols = symbols; + } + + /// + /// Returns the label at the given index. + /// + public virtual string GetLabel(int n) + { + return Symbols[n]; + } + + /// + /// Returns index of the given label. + /// + public virtual int FindLabel(string label) + { + if (label != null) + { + for (int i = 0; i < Symbols.Count; i++) + { + if (label.Equals(Symbols[i])) + { + return i; + } + } + } + + return -1; + } + } + + /// + /// The terminal symbols for the grammar. + /// + public static Symbol Null { get; } = new Terminal("null"); + + /// + /// Boolean + /// + public static Symbol Boolean { get; } = new Terminal("boolean"); + + /// + /// Int + /// + public static Symbol Int { get; } = new Terminal("int"); + /// + /// Long + /// + public static Symbol Long { get; } = new Terminal("long"); + /// + /// Float + /// + public static Symbol Float { get; } = new Terminal("float"); + /// + /// Double + /// + public static Symbol Double { get; } = new Terminal("double"); + /// + /// String + /// + public static Symbol String { get; } = new Terminal("string"); + /// + /// Bytes + /// + public static Symbol Bytes { get; } = new Terminal("bytes"); + /// + /// Fixed + /// + public static Symbol Fixed { get; } = new Terminal("fixed"); + /// + /// Enum + /// + public static Symbol Enum { get; } = new Terminal("enum"); + /// + /// Union + /// + public static Symbol Union { get; } = new Terminal("union"); + + /// + /// ArrayStart + /// + public static Symbol ArrayStart { get; } = new Terminal("array-start"); + /// + /// ArrayEnd + /// + public static Symbol ArrayEnd { get; } = new Terminal("array-end"); + /// + /// MapStart + /// + public static Symbol MapStart { get; } = new Terminal("map-start"); + /// + /// MapEnd + /// + public static Symbol MapEnd { get; } = new Terminal("map-end"); + /// + /// ItemEnd + /// + public static Symbol ItemEnd { get; } = new Terminal("item-end"); + + /// + /// WriterUnion + /// + public static Symbol WriterUnion { get; } = new WriterUnionAction(); + + /// + /// FieldAction - a pseudo terminal used by parsers + /// + public static Symbol FieldAction { get; } = new Terminal("field-action"); + + /// + /// RecordStart + /// + public static Symbol RecordStart { get; } = new ImplicitAction(false); + /// + /// RecordEnd + /// + public static Symbol RecordEnd { get; } = new ImplicitAction(true); + /// + /// UnionEnd + /// + public static Symbol UnionEnd { get; } = new ImplicitAction(true); + /// + /// FieldEnd + /// + public static Symbol FieldEnd { get; } = new ImplicitAction(true); + + /// + /// DefaultEndAction + /// + public static Symbol DefaultEndAction { get; } = new ImplicitAction(true); + /// + /// MapKeyMarker + /// + public static Symbol MapKeyMarker { get; } = new Terminal("map-key-marker"); + } +} diff --git a/lang/csharp/src/apache/main/IO/Parsing/ValidatingGrammarGenerator.cs b/lang/csharp/src/apache/main/IO/Parsing/ValidatingGrammarGenerator.cs new file mode 100644 index 00000000000..7d109660671 --- /dev/null +++ b/lang/csharp/src/apache/main/IO/Parsing/ValidatingGrammarGenerator.cs @@ -0,0 +1,170 @@ +īģŋ/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Collections.Generic; +using Avro.Generic; + +namespace Avro.IO.Parsing +{ + /// + /// The class that generates validating grammar. + /// + public class ValidatingGrammarGenerator + { + /// + /// Returns the non-terminal that is the start symbol for the grammar for the + /// given schema sc. + /// + public virtual Symbol Generate(Schema schema) + { + return Symbol.NewRoot(Generate(schema, new Dictionary())); + } + + /// + /// Returns the non-terminal that is the start symbol for the grammar for the + /// given schema sc. If there is already an entry for the given schema + /// in the given map seen then that entry is returned. Otherwise a new + /// symbol is generated and an entry is inserted into the map. + /// + /// The schema for which the start symbol is required + /// A map of schema to symbol mapping done so far. + /// The start symbol for the schema + protected virtual Symbol Generate(Schema sc, IDictionary seen) + { + switch (sc.Tag) + { + case Schema.Type.Null: + return Symbol.Null; + case Schema.Type.Boolean: + return Symbol.Boolean; + case Schema.Type.Int: + return Symbol.Int; + case Schema.Type.Long: + return Symbol.Long; + case Schema.Type.Float: + return Symbol.Float; + case Schema.Type.Double: + return Symbol.Double; + case Schema.Type.String: + return Symbol.String; + case Schema.Type.Bytes: + return Symbol.Bytes; + case Schema.Type.Fixed: + return Symbol.NewSeq(new Symbol.IntCheckAction(((FixedSchema)sc).Size), Symbol.Fixed); + case Schema.Type.Enumeration: + return Symbol.NewSeq(new Symbol.IntCheckAction(((EnumSchema)sc).Symbols.Count), Symbol.Enum); + case Schema.Type.Array: + return Symbol.NewSeq( + Symbol.NewRepeat(Symbol.ArrayEnd, Generate(((ArraySchema)sc).ItemSchema, seen)), + Symbol.ArrayStart); + case Schema.Type.Map: + return Symbol.NewSeq( + Symbol.NewRepeat(Symbol.MapEnd, Generate(((MapSchema)sc).ValueSchema, seen), Symbol.String), + Symbol.MapStart); + case Schema.Type.Record: + { + LitS wsc = new LitS(sc); + if (!seen.TryGetValue(wsc, out Symbol rresult)) + { + Symbol[] production = new Symbol[((RecordSchema)sc).Fields.Count]; + + // We construct a symbol without filling the array. Please see + // for the reason. + rresult = Symbol.NewSeq(production); + seen[wsc] = rresult; + + int j = production.Length; + foreach (Field f in ((RecordSchema)sc).Fields) + { + production[--j] = Generate(f.Schema, seen); + } + } + + return rresult; + } + case Schema.Type.Union: + IList subs = ((UnionSchema)sc).Schemas; + Symbol[] symbols = new Symbol[subs.Count]; + string[] labels = new string[subs.Count]; + + int i = 0; + foreach (Schema b in ((UnionSchema)sc).Schemas) + { + symbols[i] = Generate(b, seen); + labels[i] = b.Fullname; + i++; + } + + return Symbol.NewSeq(Symbol.NewAlt(symbols, labels), Symbol.Union); + case Schema.Type.Logical: + return Generate((sc as LogicalSchema).BaseSchema, seen); + default: + throw new Exception("Unexpected schema type"); + } + } + + /// + /// A wrapper around Schema that does "==" equality. + /// + protected class LitS + { + private readonly Schema actual; + + /// + /// Initializes a new instance of the class. + /// + public LitS(Schema actual) + { + this.actual = actual; + } + + /// + /// Two LitS are equal if and only if their underlying schema is the same (not + /// merely equal). + /// + public override bool Equals(object o) + { + if (o is null) + { + return false; + } + + if (Object.ReferenceEquals(this, o)) + { + return true; + } + + if (GetType() != o.GetType()) + { + return false; + } + + return actual.Equals(((LitS)o).actual); + } + + /// + /// Returns the hash code for the current . + /// + public override int GetHashCode() + { + return actual.GetHashCode(); + } + } + } +} diff --git a/lang/csharp/src/apache/main/IO/ParsingDecoder.cs b/lang/csharp/src/apache/main/IO/ParsingDecoder.cs new file mode 100644 index 00000000000..ce327613306 --- /dev/null +++ b/lang/csharp/src/apache/main/IO/ParsingDecoder.cs @@ -0,0 +1,205 @@ +īģŋ/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using Avro.IO.Parsing; + +namespace Avro.IO +{ + /// + /// Base class for a -based + /// s. + /// + public abstract class ParsingDecoder : Decoder, Parser.IActionHandler, SkipParser.ISkipHandler + { + /// + public abstract void ReadNull(); + + /// + public abstract bool ReadBoolean(); + + /// + public abstract int ReadInt(); + + /// + public abstract long ReadLong(); + + /// + public abstract float ReadFloat(); + + /// + public abstract double ReadDouble(); + + /// + public abstract byte[] ReadBytes(); + + /// + public abstract string ReadString(); + + /// + public abstract int ReadEnum(); + + /// + public abstract long ReadArrayStart(); + + /// + public abstract long ReadArrayNext(); + + /// + public abstract long ReadMapStart(); + + /// + public abstract long ReadMapNext(); + + /// + public abstract int ReadUnionIndex(); + + /// + public abstract void ReadFixed(byte[] buffer); + + /// + public abstract void ReadFixed(byte[] buffer, int start, int length); + + /// + public abstract void SkipNull(); + + /// + public abstract void SkipBoolean(); + + /// + public abstract void SkipInt(); + + /// + public abstract void SkipLong(); + + /// + public abstract void SkipFloat(); + + /// + public abstract void SkipDouble(); + + /// + public abstract void SkipBytes(); + + /// + public abstract void SkipString(); + + /// + public abstract void SkipEnum(); + + /// + public abstract void SkipUnionIndex(); + + /// + public abstract void SkipFixed(int len); + + /// + /// Skips an array on the stream. + /// + public abstract void SkipArray(); + + /// + /// Skips a map on the stream. + /// + public abstract void SkipMap(); + + /// + public abstract Symbol DoAction(Symbol input, Symbol top); + + /// + /// The parser. + /// + protected readonly SkipParser Parser; + + /// + /// Initializes a new instance of the class. + /// + protected ParsingDecoder(Symbol root) + { + Parser = new SkipParser(root, this, this); + } + + /// + /// Skips a fixed type on the stream. + /// + protected abstract void SkipFixed(); + + /// + public virtual void SkipAction() + { + Parser.PopSymbol(); + } + + /// + public virtual void SkipTopSymbol() + { + Symbol top = Parser.TopSymbol(); + if (top == Symbol.Null) + { + ReadNull(); + } + else if (top == Symbol.Boolean) + { + ReadBoolean(); + } + else if (top == Symbol.Int) + { + ReadInt(); + } + else if (top == Symbol.Long) + { + ReadLong(); + } + else if (top == Symbol.Float) + { + ReadFloat(); + } + else if (top == Symbol.Double) + { + ReadDouble(); + } + else if (top == Symbol.String) + { + SkipString(); + } + else if (top == Symbol.Bytes) + { + SkipBytes(); + } + else if (top == Symbol.Enum) + { + ReadEnum(); + } + else if (top == Symbol.Fixed) + { + SkipFixed(); + } + else if (top == Symbol.Union) + { + ReadUnionIndex(); + } + else if (top == Symbol.ArrayStart) + { + SkipArray(); + } + else if (top == Symbol.MapStart) + { + SkipMap(); + } + } + } +} diff --git a/lang/csharp/src/apache/main/IO/ParsingEncoder.cs b/lang/csharp/src/apache/main/IO/ParsingEncoder.cs new file mode 100644 index 00000000000..637a6e3465a --- /dev/null +++ b/lang/csharp/src/apache/main/IO/ParsingEncoder.cs @@ -0,0 +1,146 @@ +īģŋ/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Avro.IO +{ + /// + /// Base class for a -based + /// s. + /// + public abstract class ParsingEncoder : Encoder + { + /// + /// Tracks the number of items that remain to be written in the collections + /// (array or map). + /// + private long[] counts = new long[10]; + + /// + /// Position into the counts stack. + /// + protected int Pos = -1; + + /// + public abstract void WriteNull(); + + /// + public abstract void WriteBoolean(bool value); + + /// + public abstract void WriteInt(int value); + + /// + public abstract void WriteLong(long value); + + /// + public abstract void WriteFloat(float value); + + /// + public abstract void WriteDouble(double value); + + /// + public abstract void WriteBytes(byte[] value); + + /// + public abstract void WriteBytes(byte[] value, int offset, int length); + + /// + public abstract void WriteString(string value); + + /// + public abstract void WriteEnum(int value); + + /// + public abstract void WriteArrayStart(); + + /// + public abstract void WriteArrayEnd(); + + /// + public abstract void WriteMapStart(); + + /// + public abstract void WriteMapEnd(); + + /// + public abstract void WriteUnionIndex(int value); + + /// + public abstract void WriteFixed(byte[] data); + + /// + public abstract void WriteFixed(byte[] data, int start, int len); + + /// + public abstract void Flush(); + + /// + public virtual void SetItemCount(long value) + { + if (counts[Pos] != 0) + { + throw new AvroTypeException("Incorrect number of items written. " + counts[Pos] + + " more required."); + } + + counts[Pos] = value; + } + + /// + public virtual void StartItem() + { + counts[Pos]--; + } + + /// + /// Push a new collection on to the stack. + /// + protected void Push() + { + if (++Pos == counts.Length) + { + Array.Resize(ref counts, Pos + 10); + } + + counts[Pos] = 0; + } + + /// + /// Pop a new collection on to the stack. + /// + protected void Pop() + { + if (counts[Pos] != 0) + { + throw new AvroTypeException("Incorrect number of items written. " + counts[Pos] + " more required."); + } + + Pos--; + } + + /// + /// Returns the position into the stack. + /// + protected int Depth() + { + return Pos; + } + } +} diff --git a/lang/csharp/src/apache/main/IO/Resolver.cs b/lang/csharp/src/apache/main/IO/Resolver.cs index c77aca7698d..60d7966cfea 100644 --- a/lang/csharp/src/apache/main/IO/Resolver.cs +++ b/lang/csharp/src/apache/main/IO/Resolver.cs @@ -158,6 +158,10 @@ public static void EncodeDefaultValue(Encoder enc, Schema schema, JToken jtok) EncodeDefaultValue(enc, (schema as UnionSchema).Schemas[0], jtok); break; + case Schema.Type.Logical: + EncodeDefaultValue(enc, (schema as LogicalSchema).BaseSchema, jtok); + break; + default: throw new AvroException("Unsupported schema type " + schema.Tag); } diff --git a/lang/csharp/src/apache/main/Protocol/Message.cs b/lang/csharp/src/apache/main/Protocol/Message.cs index 732438c9370..19cc61c84fe 100644 --- a/lang/csharp/src/apache/main/Protocol/Message.cs +++ b/lang/csharp/src/apache/main/Protocol/Message.cs @@ -198,12 +198,10 @@ public override bool Equals(Object obj) /// public override int GetHashCode() { -#pragma warning disable CA1307 // Specify StringComparison return Name.GetHashCode() + -#pragma warning restore CA1307 // Specify StringComparison - Request.GetHashCode() + - (Response == null ? 0 : Response.GetHashCode()) + - (Error == null ? 0 : Error.GetHashCode()); + Request.GetHashCode() + + (Response == null ? 0 : Response.GetHashCode()) + + (Error == null ? 0 : Error.GetHashCode()); } /// diff --git a/lang/csharp/src/apache/main/Protocol/Protocol.cs b/lang/csharp/src/apache/main/Protocol/Protocol.cs index 1f5b5410605..94ae1268a0d 100644 --- a/lang/csharp/src/apache/main/Protocol/Protocol.cs +++ b/lang/csharp/src/apache/main/Protocol/Protocol.cs @@ -269,10 +269,8 @@ private bool MessagesEquals(IDictionary that) /// public override int GetHashCode() { -#pragma warning disable CA1307 // Specify StringComparison return Name.GetHashCode() + Namespace.GetHashCode() + -#pragma warning restore CA1307 // Specify StringComparison - GetTypesHashCode() + GetMessagesHashCode(); + GetTypesHashCode() + GetMessagesHashCode(); } /// @@ -295,9 +293,10 @@ private int GetMessagesHashCode() { int hash = Messages.Count; foreach (KeyValuePair pair in Messages) -#pragma warning disable CA1307 // Specify StringComparison + { hash += pair.Key.GetHashCode() + pair.Value.GetHashCode(); -#pragma warning restore CA1307 // Specify StringComparison + } + return hash; } } diff --git a/lang/csharp/src/apache/main/Reflect/ArraySchemaExtensions.cs b/lang/csharp/src/apache/main/Reflect/ArraySchemaExtensions.cs index 33ae5f55682..10466cc8ae6 100644 --- a/lang/csharp/src/apache/main/Reflect/ArraySchemaExtensions.cs +++ b/lang/csharp/src/apache/main/Reflect/ArraySchemaExtensions.cs @@ -27,7 +27,7 @@ public static class ArraySchemaExtensions /// Return the name of the array helper /// /// this - /// value of the helper metadata - null if it isnt present + /// value of the helper metadata - null if it isn't present public static string GetHelper(this ArraySchema ars) { string s = null; diff --git a/lang/csharp/src/apache/main/Reflect/ClassCache.cs b/lang/csharp/src/apache/main/Reflect/ClassCache.cs index 430efffbf03..a64e06d2609 100644 --- a/lang/csharp/src/apache/main/Reflect/ClassCache.cs +++ b/lang/csharp/src/apache/main/Reflect/ClassCache.cs @@ -76,7 +76,7 @@ public static void AddDefaultConverter(Func /// /// - /// The first matching converter - null if there isnt one + /// The first matching converter - null if there isn't one public IAvroFieldConverter GetDefaultConverter(Avro.Schema.Type tag, Type propType) { Type avroType; @@ -159,7 +159,7 @@ public void AddArrayHelper(string name, Type helperType) public ArrayHelper GetArrayHelper(ArraySchema schema, IEnumerable enumerable) { Type h; - // note ArraySchema is unamed and doesnt have a FulllName, use "helper" metadata + // note ArraySchema is unnamed and doesn't have a FulllName, use "helper" metadata // metadata is json string, strip quotes string s = null; s = schema.GetHelper(); @@ -263,17 +263,27 @@ public void LoadClassCache(Type objType, Schema s) EnumCache.AddEnumNameMapItem(ns, objType); break; case UnionSchema us: - if (us.Schemas.Count == 2 && (us.Schemas[0].Tag == Schema.Type.Null || us.Schemas[1].Tag == Schema.Type.Null) && objType.IsClass) + if (us.Schemas.Count == 2 && (us.Schemas[0].Tag == Schema.Type.Null || us.Schemas[1].Tag == Schema.Type.Null)) { // in this case objType will match the non null type in the union foreach (var o in us.Schemas) { - if (o.Tag != Schema.Type.Null) + if (o.Tag == Schema.Type.Null) + { + continue; + } + + if (objType.IsClass) { LoadClassCache(objType, o); } - } + var innerType = Nullable.GetUnderlyingType(objType); + if (innerType != null && innerType.IsEnum) + { + LoadClassCache(innerType, o); + } + } } else { diff --git a/lang/csharp/src/apache/main/Reflect/DotnetClass.cs b/lang/csharp/src/apache/main/Reflect/DotnetClass.cs index 5bef040f631..78eaca52224 100644 --- a/lang/csharp/src/apache/main/Reflect/DotnetClass.cs +++ b/lang/csharp/src/apache/main/Reflect/DotnetClass.cs @@ -52,14 +52,14 @@ public DotnetClass(Type t, RecordSchema r, ClassCache cache) if (avroAttr != null) { hasAttribute = true; - _propertyMap.TryAdd(f.Name, new DotnetProperty(prop, f.Schema.Tag, avroAttr.Converter, cache)); + _propertyMap.TryAdd(f.Name, new DotnetProperty(prop, f.Schema, avroAttr.Converter, cache)); break; } } if (!hasAttribute) { - _propertyMap.TryAdd(f.Name, new DotnetProperty(prop, f.Schema.Tag, cache)); + _propertyMap.TryAdd(f.Name, new DotnetProperty(prop, f.Schema, cache)); } } } @@ -83,7 +83,7 @@ private PropertyInfo GetPropertyInfo(Field f) } } - throw new AvroException($"Class {_type.Name} doesnt contain property {f.Name}"); + throw new AvroException($"Class {_type.Name} doesn't contain property {f.Name}"); } /// @@ -97,7 +97,7 @@ public object GetValue(object o, Field f) DotnetProperty p; if (!_propertyMap.TryGetValue(f.Name, out p)) { - throw new AvroException($"ByPosClass doesnt contain property {f.Name}"); + throw new AvroException($"ByPosClass doesn't contain property {f.Name}"); } return p.GetValue(o, f.Schema); @@ -108,13 +108,13 @@ public object GetValue(object o, Field f) /// /// the object /// field schema - /// value for the proprty referenced by the field schema + /// value for the property referenced by the field schema public void SetValue(object o, Field f, object v) { DotnetProperty p; if (!_propertyMap.TryGetValue(f.Name, out p)) { - throw new AvroException($"ByPosClass doesnt contain property {f.Name}"); + throw new AvroException($"ByPosClass doesn't contain property {f.Name}"); } p.SetValue(o, v, f.Schema); @@ -139,7 +139,7 @@ public Type GetPropertyType(Field f) DotnetProperty p; if (!_propertyMap.TryGetValue(f.Name, out p)) { - throw new AvroException($"ByPosClass doesnt contain property {f.Name}"); + throw new AvroException($"ByPosClass doesn't contain property {f.Name}"); } return p.GetPropertyType(); diff --git a/lang/csharp/src/apache/main/Reflect/DotnetProperty.cs b/lang/csharp/src/apache/main/Reflect/DotnetProperty.cs index 4ddcdc69df0..42ae766bd23 100644 --- a/lang/csharp/src/apache/main/Reflect/DotnetProperty.cs +++ b/lang/csharp/src/apache/main/Reflect/DotnetProperty.cs @@ -28,9 +28,10 @@ internal class DotnetProperty public IAvroFieldConverter Converter { get; set; } - private bool IsPropertyCompatible(Avro.Schema.Type schemaTag) + private bool IsPropertyCompatible(Avro.Schema schema) { Type propType; + var schemaTag = schema.Tag; if (Converter == null) { @@ -74,21 +75,25 @@ private bool IsPropertyCompatible(Avro.Schema.Type schemaTag) return propType == typeof(byte[]); case Avro.Schema.Type.Error: return propType.IsClass; + case Avro.Schema.Type.Logical: + var logicalSchema = (LogicalSchema)schema; + var type = logicalSchema.LogicalType.GetCSharpType(false); + return type == propType; } return false; } - public DotnetProperty(PropertyInfo property, Avro.Schema.Type schemaTag, IAvroFieldConverter converter, ClassCache cache) + public DotnetProperty(PropertyInfo property, Avro.Schema schema, IAvroFieldConverter converter, ClassCache cache) { _property = property; Converter = converter; - if (!IsPropertyCompatible(schemaTag)) + if (!IsPropertyCompatible(schema)) { if (Converter == null) { - var c = cache.GetDefaultConverter(schemaTag, _property.PropertyType); + var c = cache.GetDefaultConverter(schema.Tag, _property.PropertyType); if (c != null) { Converter = c; @@ -96,12 +101,12 @@ public DotnetProperty(PropertyInfo property, Avro.Schema.Type schemaTag, IAvroF } } - throw new AvroException($"Property {property.Name} in object {property.DeclaringType} isn't compatible with Avro schema type {schemaTag}"); + throw new AvroException($"Property {property.Name} in object {property.DeclaringType} isn't compatible with Avro schema type {schema.Tag}"); } } - public DotnetProperty(PropertyInfo property, Avro.Schema.Type schemaTag, ClassCache cache) - : this(property, schemaTag, null, cache) + public DotnetProperty(PropertyInfo property, Avro.Schema schema, ClassCache cache) + : this(property, schema, null, cache) { } diff --git a/lang/csharp/src/apache/main/Reflect/EnumCache.cs b/lang/csharp/src/apache/main/Reflect/EnumCache.cs index 7fbfc998d12..463758915f4 100644 --- a/lang/csharp/src/apache/main/Reflect/EnumCache.cs +++ b/lang/csharp/src/apache/main/Reflect/EnumCache.cs @@ -48,7 +48,7 @@ public static Type GetEnumeration(NamedSchema schema) Type t; if (!_nameEnumMap.TryGetValue(schema.Fullname, out t)) { - throw new AvroException($"Couldnt find enumeration for avro fullname: {schema.Fullname}"); + throw new AvroException($"Couldn't find enumeration for avro fullname: {schema.Fullname}"); } return t; diff --git a/lang/csharp/src/apache/main/Reflect/README.md b/lang/csharp/src/apache/main/Reflect/README.md index 3573c6a309e..e3cb2e4cc48 100644 --- a/lang/csharp/src/apache/main/Reflect/README.md +++ b/lang/csharp/src/apache/main/Reflect/README.md @@ -1,12 +1,12 @@ # Namespace Avro.Reflect -This namespace contains classes that implement Avro serialization and deserialization for plain C# objects. The classes use .net reflection to implement the serializers. The interface is similar to the Generic and Specific serialiation classes. +This namespace contains classes that implement Avro serialization and deserialization for plain C# objects. The classes use .net reflection to implement the serializers. The interface is similar to the Generic and Specific serialization classes. ## Serialization The approach starts with the schema and iterates both the schema and the dotnet type together in a depth first manner per the specification. Serialization is the same as the Generic serializer except where the serializer encounters: - *A fixed type*: if the corresponding dotnet object type is a byte[] of the correct length then the object is serialized, otherwise an exception is thrown. -- *A record type*: the serializer matches the schema property name to the dotnet object property name and then reursively serializes the schema property and the dotnet object property +- *A record type*: the serializer matches the schema property name to the dotnet object property name and then recursively serializes the schema property and the dotnet object property - *An array type*: See array serialization/deserialization. Basic serialization is performed as in the following example: @@ -37,7 +37,7 @@ You might want to do this if your class contains interfaces and/or if you use an See the section on Arrays. The ArrayHelper specifies the type of object created when an array is deserialized. The default is List\. -The type created for Map objects is specified by the Deserializer property MapType. *This must be a two (or more) parameter generic type where the first type paramater is string and the second is undefined* e.g. Dictionary. +The type created for Map objects is specified by the Deserializer property MapType. *This must be a two (or more) parameter generic type where the first type parameter is string and the second is undefined* e.g. Dictionary. ```csharp public Type MapType { get; set; } ``` diff --git a/lang/csharp/src/apache/main/Reflect/ReflectDefaultReader.cs b/lang/csharp/src/apache/main/Reflect/ReflectDefaultReader.cs index 676d9f39d7b..034cb89f88e 100644 --- a/lang/csharp/src/apache/main/Reflect/ReflectDefaultReader.cs +++ b/lang/csharp/src/apache/main/Reflect/ReflectDefaultReader.cs @@ -50,7 +50,7 @@ public class ReflectDefaultReader : SpecificDefaultReader /// /// Delegate to a factory method to create objects of type x. If you are deserializing to interfaces - /// you could use an IoC container factory insread of the default. Default is Activator.CreateInstance() + /// you could use an IoC container factory instead of the default. Default is Activator.CreateInstance() /// /// public Func RecordFactory { get => _recordFactory; set => _recordFactory = value; } @@ -176,7 +176,7 @@ internal Type GetTypeFromSchema(Schema schema, bool nullable) throw new Exception("Unable to cast schema into a union schema"); } - Schema nullibleType = CodeGen.getNullableType(unionSchema); + Schema nullibleType = CodeGen.GetNullableType(unionSchema); if (nullibleType == null) { return typeof(object); @@ -373,7 +373,7 @@ public object GetDefaultValue(Schema s, JToken defaultValue) /// /// Deserializes a enum. Uses CreateEnum to construct the new enum object. /// - /// If appropirate, uses this instead of creating a new enum object. + /// If appropriate, uses this instead of creating a new enum object. /// The schema the writer used while writing the enum /// The schema the reader is using /// The decoder for deserialization. @@ -450,10 +450,10 @@ protected override object ReadRecord(object reuse, RecordSchema writerSchema, Sc /// /// If appropriate, uses this object instead of creating a new one. /// The FixedSchema the writer used during serialization. - /// The schema that the readr uses. Must be a FixedSchema with the same + /// The schema that the reader uses. Must be a FixedSchema with the same /// size as the writerSchema. /// The decoder for deserialization. - /// The deserilized object. + /// The deserialized object. protected override object ReadFixed(object reuse, FixedSchema writerSchema, Schema readerSchema, Decoder d) { FixedSchema rs = readerSchema as FixedSchema; diff --git a/lang/csharp/src/apache/main/Reflect/ReflectDefaultWriter.cs b/lang/csharp/src/apache/main/Reflect/ReflectDefaultWriter.cs index a6397c65001..e5ef4a2124c 100644 --- a/lang/csharp/src/apache/main/Reflect/ReflectDefaultWriter.cs +++ b/lang/csharp/src/apache/main/Reflect/ReflectDefaultWriter.cs @@ -199,6 +199,8 @@ protected override bool Matches(Schema sc, object obj) return false; // Union directly within another union not allowed! case Schema.Type.Fixed: return obj is byte[]; + case Schema.Type.Logical: + return ((LogicalSchema)sc).LogicalType.IsInstanceOfLogicalType(obj); default: throw new AvroException("Unknown schema type: " + sc.Tag); } diff --git a/lang/csharp/src/apache/main/Reflect/ReflectReader.cs b/lang/csharp/src/apache/main/Reflect/ReflectReader.cs index 0c2df58a9a5..e39e30d32cf 100644 --- a/lang/csharp/src/apache/main/Reflect/ReflectReader.cs +++ b/lang/csharp/src/apache/main/Reflect/ReflectReader.cs @@ -72,7 +72,7 @@ public ReflectReader(ReflectDefaultReader reader) /// Generic read function /// /// object to store data read - /// decorder to use for reading data + /// decoder to use for reading data /// public T Read(T reuse, Decoder dec) { @@ -82,7 +82,7 @@ public T Read(T reuse, Decoder dec) /// /// Generic read function /// - /// decorder to use for reading data + /// decoder to use for reading data /// public T Read(Decoder dec) { diff --git a/lang/csharp/src/apache/main/Schema/Aliases.cs b/lang/csharp/src/apache/main/Schema/Aliases.cs new file mode 100644 index 00000000000..6574e3163d6 --- /dev/null +++ b/lang/csharp/src/apache/main/Schema/Aliases.cs @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +using System.Collections.Generic; +using System.Linq; + +namespace Avro +{ + internal static class Aliases + { + internal static IList GetSchemaNames(IEnumerable aliases, string enclosingTypeName, string enclosingTypeNamespace) + { + if (aliases == null) + { + return null; + } + + SchemaName enclosingSchemaName = new SchemaName(enclosingTypeName, enclosingTypeNamespace, null, null); + return aliases.Select(alias => new SchemaName(alias, enclosingSchemaName.Namespace, null, null)).ToList(); + } + } +} diff --git a/lang/csharp/src/apache/main/Schema/ArraySchema.cs b/lang/csharp/src/apache/main/Schema/ArraySchema.cs index 5b4e6a434e0..7c4d8e1a9c8 100644 --- a/lang/csharp/src/apache/main/Schema/ArraySchema.cs +++ b/lang/csharp/src/apache/main/Schema/ArraySchema.cs @@ -29,7 +29,7 @@ public class ArraySchema : UnnamedSchema /// /// Schema for the array 'type' attribute /// - public Schema ItemSchema { get; set; } + public Schema ItemSchema { get; set; } /// /// Static class to return a new instance of ArraySchema @@ -48,14 +48,25 @@ internal static ArraySchema NewInstance(JToken jtok, PropertyMap props, SchemaNa } /// - /// Constructor + /// Creates a new /// - /// schema for the array items type - /// dictionary that provides access to custom properties - private ArraySchema(Schema items, PropertyMap props) : base(Type.Array, props) + /// Schema for the array items type + /// Dictionary that provides access to custom properties + /// + public static ArraySchema Create(Schema items, PropertyMap customAttributes = null) + { + return new ArraySchema(items, customAttributes); + } + + /// + /// Initializes a new instance of the class. + /// + /// Schema for the array items type + /// Dictionary that provides access to custom properties + private ArraySchema(Schema items, PropertyMap customAttributes) + : base(Type.Array, customAttributes) { - if (null == items) throw new ArgumentNullException(nameof(items)); - this.ItemSchema = items; + ItemSchema = items ?? throw new ArgumentNullException(nameof(items)); } /// diff --git a/lang/csharp/src/apache/main/Schema/EnumSchema.cs b/lang/csharp/src/apache/main/Schema/EnumSchema.cs index 3fd14500414..225780310a6 100644 --- a/lang/csharp/src/apache/main/Schema/EnumSchema.cs +++ b/lang/csharp/src/apache/main/Schema/EnumSchema.cs @@ -17,7 +17,8 @@ */ using System; using System.Collections.Generic; -using System.Text; +using System.Linq; +using System.Text.RegularExpressions; using Newtonsoft.Json.Linq; namespace Avro @@ -30,7 +31,7 @@ public class EnumSchema : NamedSchema /// /// List of strings representing the enum symbols /// - public IList Symbols { get; private set; } + public IList Symbols { get; private set; } /// /// The default token to use when deserializing an enum when the provided token is not found @@ -47,6 +48,34 @@ public class EnumSchema : NamedSchema /// public int Count { get { return Symbols.Count; } } + /// + /// Initializes a new instance of the class. + /// + /// Name of enum + /// Namespace of enum + /// List of aliases for the name + /// List of enum symbols + /// Custom properties on this schema + /// Documentation for this named schema + /// + public static EnumSchema Create(string name, + IEnumerable symbols, + string space = null, + IEnumerable aliases = null, + PropertyMap customProperties = null, + string doc = null, + string defaultSymbol = null) + { + return new EnumSchema(new SchemaName(name, space, null, doc), + Aliases.GetSchemaNames(aliases, name, space), + symbols.ToList(), + CreateSymbolsMap(symbols), + customProperties, + new SchemaNames(), + doc, + defaultSymbol); + } + /// /// Static function to return new instance of EnumSchema /// @@ -81,7 +110,7 @@ internal static EnumSchema NewInstance(JToken jtok, PropertyMap props, SchemaNam return new EnumSchema(name, aliases, symbols, symbolMap, props, names, JsonHelper.GetOptionalString(jtok, "doc"), JsonHelper.GetOptionalString(jtok, "default")); } - catch (SchemaParseException e) + catch (AvroException e) { throw new SchemaParseException($"{e.Message} at '{jtok.Path}'", e); } @@ -103,15 +132,49 @@ private EnumSchema(SchemaName name, IList aliases, List symb string doc, string defaultSymbol) : base(Type.Enumeration, name, aliases, props, names, doc) { - if (null == name.Name) throw new SchemaParseException("name cannot be null for enum schema."); + if (null == name.Name) throw new AvroException("name cannot be null for enum schema."); this.Symbols = symbols; this.symbolMap = symbolMap; if (null != defaultSymbol && !symbolMap.ContainsKey(defaultSymbol)) - throw new SchemaParseException($"Default symbol: {defaultSymbol} not found in symbols"); + throw new AvroException($"Default symbol: {defaultSymbol} not found in symbols"); Default = defaultSymbol; } + /// + /// Creates symbols map from specified list of symbols. + /// Symbol map contains the names of the symbols and their index. + /// + /// List of symbols + /// Symbol map + /// Is thrown if the symbols list contains invalid symbol name or duplicate symbols + private static IDictionary CreateSymbolsMap(IEnumerable symbols) + { + IDictionary symbolMap = new Dictionary(); + int i = 0; + foreach (var symbol in symbols) + { + ValidateSymbolName(symbol); + + if (symbolMap.ContainsKey(symbol)) + { + throw new AvroException($"Duplicate symbol: {symbol}"); + } + + symbolMap[symbol] = i++; + } + + return symbolMap; + } + + private static void ValidateSymbolName(string symbol) + { + if(string.IsNullOrEmpty(symbol) || !Regex.IsMatch(symbol, "^([A-Za-z_][A-Za-z0-9_]*)$")) + { + throw new AvroException($"Invalid symbol name: {symbol}"); + } + } + /// /// Writes enum schema in JSON format /// @@ -127,7 +190,7 @@ protected internal override void WriteJsonFields(Newtonsoft.Json.JsonTextWriter foreach (string s in this.Symbols) writer.WriteValue(s); writer.WriteEndArray(); - if (null != Default) + if (null != Default) { writer.WritePropertyName("default"); writer.WriteValue(Default); @@ -139,16 +202,24 @@ protected internal override void WriteJsonFields(Newtonsoft.Json.JsonTextWriter /// Throws AvroException if the symbol is not found in this enum. /// /// name of the symbol to find - /// position of the given symbol in this enum schema + /// + /// position of the given symbol in this enum schema + /// + /// No such symbol: {symbol} public int Ordinal(string symbol) { int result; if (symbolMap.TryGetValue(symbol, out result)) + { return result; - if (null != Default) - return symbolMap[Default]; + } + + if (Default != null && symbolMap.TryGetValue(Default, out result)) + { + return result; + } - throw new AvroException("No such symbol: " + symbol); + throw new AvroException($"No such symbol: {symbol}"); } /// @@ -218,9 +289,11 @@ public override bool Equals(object obj) public override int GetHashCode() { int result = SchemaName.GetHashCode() + getHashCode(Props); -#pragma warning disable CA1307 // Specify StringComparison - foreach (string s in Symbols) result += 23 * s.GetHashCode(); -#pragma warning restore CA1307 // Specify StringComparison + foreach (string s in Symbols) + { + result += 23 * s.GetHashCode(); + } + return result; } diff --git a/lang/csharp/src/apache/main/Schema/Field.cs b/lang/csharp/src/apache/main/Schema/Field.cs index bdfe9282cb3..799f265b320 100644 --- a/lang/csharp/src/apache/main/Schema/Field.cs +++ b/lang/csharp/src/apache/main/Schema/Field.cs @@ -54,18 +54,10 @@ public enum SortOrder /// public readonly string Name; - /// - /// List of aliases for the field name - /// - [Obsolete("Use Aliases instead. This will be removed from the public API in a future version.")] - public readonly IList aliases; - -#pragma warning disable CS0618 // Type or member is obsolete /// /// List of aliases for the field name. /// - public IList Aliases => aliases; -#pragma warning restore CS0618 // Type or member is obsolete + public IList Aliases { get; private set; } /// /// Position of the field within its record. @@ -103,15 +95,42 @@ public enum SortOrder /// /// Static comparer object for JSON objects such as the fields default value /// - internal static JTokenEqualityComparer JtokenEqual = new JTokenEqualityComparer(); + internal readonly static JTokenEqualityComparer JtokenEqual = new JTokenEqualityComparer(); /// - /// A flag to indicate if reader schema has a field that is missing from writer schema and has a default value - /// This is set in CanRead() which is always be called before deserializing data + /// Initializes a new instance of the class. /// + /// schema for the field type. + /// name of the field. + /// list of aliases for the name of the field. + /// position of the field. + /// documentation for the field. + /// field's default value if it exists. + /// sort order of the field. + /// dictionary that provides access to custom properties. + public Field(Schema schema, + string name, + int pos, + IList aliases = null, + string doc = null, + JToken defaultValue = null, + SortOrder sortorder = SortOrder.ignore, + PropertyMap customProperties = null) + : this(schema, name, aliases, pos, doc, defaultValue, sortorder, customProperties) + { + } /// - /// Constructor for the field class + /// Creates a new field based on the specified field, with a different position. + /// + /// A clone of this field with new position. + internal Field ChangePosition(int newPosition) + { + return new Field(Schema, Name, newPosition, Aliases, Documentation, DefaultValue, Ordering ?? SortOrder.ignore, Props); + } + + /// + /// Initializes a new instance of the class. /// /// schema for the field type /// name of the field @@ -121,21 +140,27 @@ public enum SortOrder /// field's default value if it exists /// sort order of the field /// dictionary that provides access to custom properties + /// + /// name - name cannot be null. + /// or + /// type - type cannot be null. + /// internal Field(Schema schema, string name, IList aliases, int pos, string doc, JToken defaultValue, SortOrder sortorder, PropertyMap props) { - if (string.IsNullOrEmpty(name)) throw new ArgumentNullException(nameof(name), "name cannot be null."); - if (null == schema) throw new ArgumentNullException("type", "type cannot be null."); - this.Schema = schema; - this.Name = name; -#pragma warning disable CS0618 // Type or member is obsolete - this.aliases = aliases; -#pragma warning restore CS0618 // Type or member is obsolete - this.Pos = pos; - this.Documentation = doc; - this.DefaultValue = defaultValue; - this.Ordering = sortorder; - this.Props = props; + if (string.IsNullOrEmpty(name)) + { + throw new ArgumentNullException(nameof(name), "name cannot be null."); + } + + Schema = schema ?? throw new ArgumentNullException("type", "type cannot be null."); + Name = name; + Aliases = aliases; + Pos = pos; + Documentation = doc; + DefaultValue = defaultValue; + Ordering = sortorder; + Props = props; } /// diff --git a/lang/csharp/src/apache/main/Schema/FixedSchema.cs b/lang/csharp/src/apache/main/Schema/FixedSchema.cs index b16c1ff1dcb..2b24e6b8689 100644 --- a/lang/csharp/src/apache/main/Schema/FixedSchema.cs +++ b/lang/csharp/src/apache/main/Schema/FixedSchema.cs @@ -32,6 +32,20 @@ public class FixedSchema : NamedSchema /// public int Size { get; set; } + /// + /// Initializes a new instance of the class. + /// + /// Name of the fixed schema + /// List of aliases for the name + /// Fixed size + /// Namespace of fixed + /// Custom properties on this schema + /// Documentation for this named schema + public static FixedSchema Create(string name, int size, string space = null, IEnumerable aliases = null, PropertyMap customProperties = null, string doc = null) + { + return new FixedSchema(new SchemaName(name, space, null, doc), Aliases.GetSchemaNames(aliases, name, space), size, customProperties, new SchemaNames(), doc); + } + /// /// Static function to return new instance of the fixed schema class /// diff --git a/lang/csharp/src/apache/main/Schema/JsonHelper.cs b/lang/csharp/src/apache/main/Schema/JsonHelper.cs index 1ca51cb4848..ccdf8f7345a 100644 --- a/lang/csharp/src/apache/main/Schema/JsonHelper.cs +++ b/lang/csharp/src/apache/main/Schema/JsonHelper.cs @@ -82,7 +82,7 @@ public static int GetRequiredInteger(JToken jtok, string field) /// /// JSON object to read /// property name - /// null if property doesn't exist, otherise returns property boolean value + /// null if property doesn't exist, otherwise returns property boolean value public static bool? GetOptionalBoolean(JToken jtok, string field) { if (null == jtok) throw new ArgumentNullException(nameof(jtok), "jtok cannot be null."); diff --git a/lang/csharp/src/apache/main/Schema/LogicalSchema.cs b/lang/csharp/src/apache/main/Schema/LogicalSchema.cs index 3c1928ee47f..181260f2ca2 100644 --- a/lang/csharp/src/apache/main/Schema/LogicalSchema.cs +++ b/lang/csharp/src/apache/main/Schema/LogicalSchema.cs @@ -52,8 +52,7 @@ internal static LogicalSchema NewInstance(JToken jtok, PropertyMap props, Schema private LogicalSchema(Schema baseSchema, string logicalTypeName, PropertyMap props) : base(Type.Logical, props) { - if (null == baseSchema) throw new ArgumentNullException(nameof(baseSchema)); - BaseSchema = baseSchema; + BaseSchema = baseSchema ?? throw new ArgumentNullException(nameof(baseSchema)); LogicalTypeName = logicalTypeName; LogicalType = LogicalTypeFactory.Instance.GetFromLogicalSchema(this); } @@ -76,6 +75,18 @@ protected internal override void WriteJson(Newtonsoft.Json.JsonTextWriter writer writer.WriteEndObject(); } + /// + public override string Name + { + get { return BaseSchema.Name; } + } + + /// + public override string Fullname + { + get { return BaseSchema.Fullname; } + } + /// /// Checks if this schema can read data written by the given schema. Used for decoding data. /// diff --git a/lang/csharp/src/apache/main/Schema/MapSchema.cs b/lang/csharp/src/apache/main/Schema/MapSchema.cs index 54bc05a8d31..a1a6a4222b9 100644 --- a/lang/csharp/src/apache/main/Schema/MapSchema.cs +++ b/lang/csharp/src/apache/main/Schema/MapSchema.cs @@ -36,10 +36,11 @@ public class MapSchema : UnnamedSchema /// Creates a new from the given schema. /// /// Schema to create the map schema from. + /// Dictionary that provides access to custom properties /// A new . - public static MapSchema CreateMap(Schema type) + public static MapSchema CreateMap(Schema type, PropertyMap customProperties = null) { - return new MapSchema(type,null); + return new MapSchema(type, customProperties); } /// @@ -67,12 +68,12 @@ internal static MapSchema NewInstance(JToken jtok, PropertyMap props, SchemaName /// /// Constructor for map schema class /// - /// schema for map values type - /// dictionary that provides access to custom properties - private MapSchema(Schema valueSchema, PropertyMap props) : base(Type.Map, props) + /// Schema for map values type + /// Dictionary that provides access to custom properties + private MapSchema(Schema valueSchema, PropertyMap cutsomProperties) + : base(Type.Map, cutsomProperties) { - if (null == valueSchema) throw new ArgumentNullException(nameof(valueSchema), "valueSchema cannot be null."); - this.ValueSchema = valueSchema; + ValueSchema = valueSchema ?? throw new ArgumentNullException(nameof(valueSchema), "valueSchema cannot be null."); } /// diff --git a/lang/csharp/src/apache/main/Schema/PrimitiveSchema.cs b/lang/csharp/src/apache/main/Schema/PrimitiveSchema.cs index 1a55c2ff863..db5db2cb03e 100644 --- a/lang/csharp/src/apache/main/Schema/PrimitiveSchema.cs +++ b/lang/csharp/src/apache/main/Schema/PrimitiveSchema.cs @@ -16,8 +16,7 @@ * limitations under the License. */ using System; -using System.Collections.Generic; -using System.Text; +using System.Linq; using Newtonsoft.Json; namespace Avro @@ -31,11 +30,23 @@ public sealed class PrimitiveSchema : UnnamedSchema /// Constructor for primitive schema /// /// - /// dictionary that provides access to custom properties - private PrimitiveSchema(Type type, PropertyMap props) : base(type, props) + /// dictionary that provides access to custom properties + private PrimitiveSchema(Type type, PropertyMap customProperties) + : base(type, customProperties) { } + /// + /// Creates a new instance of + /// + /// The primitive type to create + /// Dictionary that provides access to custom properties + /// + public static PrimitiveSchema Create(Type type, PropertyMap customProperties = null) + { + return new PrimitiveSchema(type, customProperties); + } + /// /// Static function to return new instance of primitive schema /// @@ -82,7 +93,22 @@ public static PrimitiveSchema NewInstance(string type, PropertyMap props = null) /// protected internal override void WriteJson(JsonTextWriter w, SchemaNames names, string encspace) { - w.WriteValue(Name); + if(this.Props?.Any() == true) + { + w.WriteStartObject(); + w.WritePropertyName("type"); + w.WriteValue(Name); + foreach(var prop in Props) + { + w.WritePropertyName(prop.Key); + w.WriteRawValue(prop.Value); + } + w.WriteEndObject(); + } + else + { + w.WriteValue(Name); + } } /// diff --git a/lang/csharp/src/apache/main/Schema/Property.cs b/lang/csharp/src/apache/main/Schema/Property.cs index f4240721f1a..1774bebff87 100644 --- a/lang/csharp/src/apache/main/Schema/Property.cs +++ b/lang/csharp/src/apache/main/Schema/Property.cs @@ -36,7 +36,7 @@ public class PropertyMap : Dictionary /// Parses the custom properties from the given JSON object and stores them /// into the schema's list of custom properties /// - /// JSON object to prase + /// JSON object to parse public void Parse(JToken jtok) { JObject jo = jtok as JObject; diff --git a/lang/csharp/src/apache/main/Schema/RecordSchema.cs b/lang/csharp/src/apache/main/Schema/RecordSchema.cs index 6f01d0ca898..910bc466fe9 100644 --- a/lang/csharp/src/apache/main/Schema/RecordSchema.cs +++ b/lang/csharp/src/apache/main/Schema/RecordSchema.cs @@ -17,6 +17,7 @@ */ using System; using System.Collections.Generic; +using System.Linq; using Newtonsoft.Json.Linq; namespace Avro @@ -28,10 +29,26 @@ namespace Avro /// public class RecordSchema : NamedSchema { + private List _fields; + /// /// List of fields in the record /// - public List Fields { get; private set; } + public List Fields + { + get + { + return _fields; + } + + set + { + _fields = SetFieldsPositions(value); + + fieldLookup = CreateFieldMap(_fields); + fieldAliasLookup = CreateFieldMap(_fields, true); + } + } /// /// Number of fields in the record @@ -41,10 +58,109 @@ public class RecordSchema : NamedSchema /// /// Map of field name and Field object for faster field lookups /// - private readonly IDictionary fieldLookup; + private IDictionary fieldLookup; - private readonly IDictionary fieldAliasLookup; - private bool request; + private IDictionary fieldAliasLookup; + private readonly bool request; + + /// + /// Creates a new instance of + /// + /// name of the record schema + /// list of fields for the record + /// type of record schema, either record or error + /// list of aliases for the record name + /// custom properties on this schema + /// documentation for this named schema + public static RecordSchema Create(string name, + List fields, + string space = null, + IEnumerable aliases = null, + PropertyMap customProperties = null, + string doc = null) + { + return new RecordSchema(Type.Record, + new SchemaName(name, space, null, doc), + Aliases.GetSchemaNames(aliases, name, space), + customProperties, + fields, + false, + CreateFieldMap(fields), + CreateFieldMap(fields, true), + new SchemaNames(), + doc); + } + + private static IEnumerable EnumerateSchemasRecursive(Schema schema) + { + yield return schema; + switch (schema.Tag) + { + case Type.Null: + break; + case Type.Boolean: + break; + case Type.Int: + break; + case Type.Long: + break; + case Type.Float: + break; + case Type.Double: + break; + case Type.Bytes: + break; + case Type.String: + break; + case Type.Record: + var recordSchema = (RecordSchema)schema; + recordSchema.Fields.SelectMany(f => EnumerateSchemasRecursive(f.Schema)); + break; + case Type.Enumeration: + break; + case Type.Array: + var arraySchema = (ArraySchema)schema; + EnumerateSchemasRecursive(arraySchema.ItemSchema); + break; + case Type.Map: + var mapSchema = (MapSchema)schema; + EnumerateSchemasRecursive(mapSchema.ValueSchema); + break; + case Type.Union: + var unionSchema = (UnionSchema)schema; + foreach (var innerSchema in unionSchema.Schemas) + { + EnumerateSchemasRecursive(innerSchema); + } + break; + case Type.Fixed: + break; + case Type.Error: + break; + case Type.Logical: + break; + } + } + + private static IDictionary CreateFieldMap(List fields, bool includeAliases = false) + { + var map = new Dictionary(); + if (fields != null) + { + foreach (Field field in fields) + { + addToFieldMap(map, field.Name, field); + + if (includeAliases && field.Aliases != null) + { + foreach (var alias in field.Aliases) + addToFieldMap(map, alias, field); + } + } + } + + return map; + } /// /// Static function to return new instance of the record schema @@ -99,8 +215,10 @@ internal static RecordSchema NewInstance(Type type, JToken jtok, PropertyMap pro if (null != field.Aliases) // add aliases to field lookup map so reader function will find it when writer field name appears only as an alias on the reader field foreach (string alias in field.Aliases) addToFieldMap(fieldAliasMap, alias, field); + + result._fields = fields; } - catch (SchemaParseException e) + catch (AvroException e) { throw new SchemaParseException($"{e.Message} at '{jfield.Path}'", e); } @@ -121,7 +239,7 @@ internal static RecordSchema NewInstance(Type type, JToken jtok, PropertyMap pro /// map of field aliases and field objects /// list of named schema already read /// documentation for this named schema - private RecordSchema(Type type, SchemaName name, IList aliases, PropertyMap props, + private RecordSchema(Type type, SchemaName name, IList aliases, PropertyMap props, List fields, bool request, IDictionary fieldMap, IDictionary fieldAliasMap, SchemaNames names, string doc) : base(type, name, aliases, props, names, doc) @@ -149,7 +267,7 @@ private static Field createField(JToken jfield, int pos, SchemaNames names, stri var jorder = JsonHelper.GetOptionalString(jfield, "order"); Field.SortOrder sortorder = Field.SortOrder.ignore; if (null != jorder) - sortorder = (Field.SortOrder) Enum.Parse(typeof(Field.SortOrder), jorder); + sortorder = (Field.SortOrder)Enum.Parse(typeof(Field.SortOrder), jorder); var aliases = Field.GetAliases(jfield); var props = Schema.GetProperties(jfield); @@ -165,10 +283,20 @@ private static Field createField(JToken jfield, int pos, SchemaNames names, stri private static void addToFieldMap(Dictionary map, string name, Field field) { if (map.ContainsKey(name)) - throw new SchemaParseException("field or alias " + name + " is a duplicate name"); + throw new AvroException("field or alias " + name + " is a duplicate name"); map.Add(name, field); } + /// + /// Clones the fields with updated positions. Updates the positions according to the order of the fields in the list. + /// + /// List of fields + /// New list of cloned fields with updated positions + private List SetFieldsPositions(List fields) + { + return fields.Select((field, i) => field.ChangePosition(i)).ToList(); + } + /// /// Returns the field with the given name. /// @@ -354,9 +482,9 @@ public RecordSchemaPair(RecordSchema first, RecordSchema second) * we can detect it. * * The infinite loop happens in ToString(), Equals() and GetHashCode() methods. - * Though it does not happen for CanRead() because of the current implemenation of UnionSchema's can read, - * it could potenitally happen. - * We do a linear seach for the marker as we don't expect the list to be very long. + * Though it does not happen for CanRead() because of the current implementation of UnionSchema's can read, + * it could potentially happen. + * We do a linear search for the marker as we don't expect the list to be very long. */ private T protect(Function bypass, Function main, RecordSchema that) { diff --git a/lang/csharp/src/apache/main/Schema/Schema.cs b/lang/csharp/src/apache/main/Schema/Schema.cs index d14b016dcab..3e54653f015 100644 --- a/lang/csharp/src/apache/main/Schema/Schema.cs +++ b/lang/csharp/src/apache/main/Schema/Schema.cs @@ -196,14 +196,26 @@ internal static Schema ParseJson(JToken jtok, SchemaNames names, string encspace return LogicalSchema.NewInstance(jtok, props, names, encspace); Schema schema = PrimitiveSchema.NewInstance((string)type, props); - if (null != schema) return schema; + if (null != schema) + return schema; return NamedSchema.NewInstance(jo, props, names, encspace); } else if (jtype.Type == JTokenType.Array) return UnionSchema.NewInstance(jtype as JArray, props, names, encspace); - else if (jtype.Type == JTokenType.Object && null != jo["logicalType"]) // logical type based on a complex type - return LogicalSchema.NewInstance(jtok, props, names, encspace); + else if (jtype.Type == JTokenType.Object) + { + if (null != jo["logicalType"]) // logical type based on a complex type + { + return LogicalSchema.NewInstance(jtok, props, names, encspace); + } + + var schema = ParseJson(jtype, names, encspace); // primitive schemas are allowed to have additional metadata properties + if (schema is PrimitiveSchema) + { + return schema; + } + } } throw new AvroTypeException($"Invalid JSON for schema: {jtok} at '{jtok.Path}'"); } @@ -369,5 +381,91 @@ protected static int getHashCode(object obj) { return obj == null ? 0 : obj.GetHashCode(); } + + /// + /// Parses the Schema.Type from a string. + /// + /// The type to convert. + /// if set to true [remove quotes]. + /// A Schema.Type unless it could not parse then null + /// + /// usage ParseType("string") returns Schema.Type.String + /// + public static Schema.Type? ParseType(string type, bool removeQuotes = false) + { + string newValue = removeQuotes ? RemoveQuotes(type) : type; + + switch (newValue) + { + case "null": + return Schema.Type.Null; + + case "boolean": + return Schema.Type.Boolean; + + case "int": + return Schema.Type.Int; + + case "long": + return Schema.Type.Long; + + case "float": + return Schema.Type.Float; + + case "double": + return Schema.Type.Double; + + case "bytes": + return Schema.Type.Bytes; + + case "string": + return Schema.Type.String; + + case "record": + return Schema.Type.Record; + + case "enumeration": + return Schema.Type.Enumeration; + + case "array": + return Schema.Type.Array; + + case "map": + return Schema.Type.Map; + + case "union": + return Schema.Type.Union; + + case "fixed": + return Schema.Type.Fixed; + + case "error": + return Schema.Type.Error; + + case "logical": + return Schema.Type.Logical; + + default: + return null; + } + } + + /// + /// Removes the quotes from the first position and last position of the string. + /// + /// The value. + /// + /// If string has a quote at the beginning and the end it removes them, + /// otherwise it returns the original string + /// + private static string RemoveQuotes(string value) + { + if(value.StartsWith("\"") && value.EndsWith("\"")) + { + return value.Substring(1, value.Length - 2); + } + + return value; + } } } diff --git a/lang/csharp/src/apache/main/Schema/SchemaName.cs b/lang/csharp/src/apache/main/Schema/SchemaName.cs index 20cc1b43d89..7716d7a55ff 100644 --- a/lang/csharp/src/apache/main/Schema/SchemaName.cs +++ b/lang/csharp/src/apache/main/Schema/SchemaName.cs @@ -64,31 +64,30 @@ public class SchemaName /// name of the schema /// namespace of the schema /// enclosing namespace of the schema - /// documentation o fthe schema - public SchemaName(String name, String space, String encspace, String documentation) + /// documentation of the schema + public SchemaName(string name, string space, string encspace, string documentation) { if (name == null) { // anonymous - this.Name = this.Space = null; - this.EncSpace = encspace; // need to save enclosing namespace for anonymous types, so named types within the anonymous type can be resolved + Name = Space = null; + EncSpace = encspace; // need to save enclosing namespace for anonymous types, so named types within the anonymous type can be resolved } -#pragma warning disable CA1307 // Specify StringComparison else if (!name.Contains(".")) -#pragma warning restore CA1307 // Specify StringComparison { // unqualified name - this.Space = space; // use default space - this.Name = name; - this.EncSpace = encspace; + Space = space; // use default space + Name = name; + EncSpace = encspace; } else { string[] parts = name.Split('.'); - this.Space = string.Join(".", parts, 0, parts.Length - 1); - this.Name = parts[parts.Length - 1]; - this.EncSpace = encspace; + Space = string.Join(".", parts, 0, parts.Length - 1); + Name = parts[parts.Length - 1]; + EncSpace = encspace; } - this.Documentation = documentation; - fullName = string.IsNullOrEmpty(Namespace) ? this.Name : Namespace + "." + this.Name; + + Documentation = documentation; + fullName = string.IsNullOrEmpty(Namespace) ? Name : Namespace + "." + Name; } /// @@ -149,9 +148,7 @@ private static bool areEqual(object obj1, object obj2) /// public override int GetHashCode() { -#pragma warning disable CA1307 // Specify StringComparison return string.IsNullOrEmpty(Fullname) ? 0 : 29 * Fullname.GetHashCode(); -#pragma warning restore CA1307 // Specify StringComparison } } diff --git a/lang/csharp/src/apache/main/Schema/SchemaNormalization.cs b/lang/csharp/src/apache/main/Schema/SchemaNormalization.cs index 3b12d04ec52..d6c5a45cf12 100644 --- a/lang/csharp/src/apache/main/Schema/SchemaNormalization.cs +++ b/lang/csharp/src/apache/main/Schema/SchemaNormalization.cs @@ -24,13 +24,13 @@ namespace Avro { /// - /// Collection of static methods for generating the cannonical form of schemas. + /// Collection of static methods for generating the canonical form of schemas. /// public static class SchemaNormalization { /// /// Obsolete: This will be removed from the public API in a future version. - /// This should be a private const field, similar to the Java implementation. It appears + /// This should be a private constant field, similar to the Java implementation. It appears /// that this was originally exposed for unit tests. Unit tests should hard-code this value /// rather than access it here. /// @@ -71,7 +71,7 @@ public static string ToParsingForm(Schema s) /// not recognized and an /// ArgumentException is thrown /// - /// Recommended Avro practice dictiates that + /// Recommended Avro practice dictates that /// "CRC-64-AVRO" is used for 64-bit fingerprints, /// "MD5" is used for 128-bit fingerprints, and /// "SHA-256" is used for 256-bit fingerprints. diff --git a/lang/csharp/src/apache/main/Schema/UnionSchema.cs b/lang/csharp/src/apache/main/Schema/UnionSchema.cs index 0ffb5e091f7..af9ba758363 100644 --- a/lang/csharp/src/apache/main/Schema/UnionSchema.cs +++ b/lang/csharp/src/apache/main/Schema/UnionSchema.cs @@ -17,9 +17,8 @@ */ using System; using System.Collections.Generic; -using System.Text; +using System.Linq; using Newtonsoft.Json.Linq; -using Newtonsoft.Json; namespace Avro { @@ -67,15 +66,28 @@ internal static UnionSchema NewInstance(JArray jarr, PropertyMap props, SchemaNa return new UnionSchema(schemas, props); } + /// + /// Creates a new + /// + /// The union schemas + /// Dictionary that provides access to custom properties + /// New + public static UnionSchema Create(List schemas, PropertyMap customProperties = null) + { + return new UnionSchema(schemas, customProperties); + } + /// /// Contructor for union schema /// /// - /// dictionary that provides access to custom properties - private UnionSchema(List schemas, PropertyMap props) : base(Type.Union, props) + /// dictionary that provides access to custom properties + private UnionSchema(List schemas, PropertyMap customProperties) + : base(Type.Union, customProperties) { if (schemas == null) throw new ArgumentNullException(nameof(schemas)); + VerifyChildSchemas(schemas); this.Schemas = schemas; } @@ -115,8 +127,21 @@ public int MatchingBranch(Schema s) { if (s is UnionSchema) throw new AvroException("Cannot find a match against union schema"); // Try exact match. - //for (int i = 0; i < Count; i++) if (Schemas[i].Equals(s)) return i; // removed this for performance's sake - for (int i = 0; i < Count; i++) if (Schemas[i].CanRead(s)) return i; + // CanRead might find a compatible schema which can read. e.g. double and long + for (int i = 0; i < Count; i++) + { + if (Schemas[i].Equals(s)) + { + return i; + } + } + for (int i = 0; i < Count; i++) + { + if (Schemas[i].CanRead(s)) + { + return i; + } + } return -1; } @@ -161,5 +186,20 @@ public override int GetHashCode() result += getHashCode(Props); return result; } + + private void VerifyChildSchemas(List schemas) + { + if (schemas.Any(schema => schema.Tag == Type.Union)) + { + throw new ArgumentException("Unions may not immediately contain other unions", nameof(schemas)); + } + + IGrouping duplicateType = schemas.GroupBy(schema => schema.Fullname).FirstOrDefault(x => x.Count() > 1); + + if (duplicateType != null) + { + throw new ArgumentException($"Duplicate type in union: {duplicateType.Key}"); + } + } } } diff --git a/lang/csharp/src/apache/main/Specific/ObjectCreator.cs b/lang/csharp/src/apache/main/Specific/ObjectCreator.cs index e69a490283d..073b107958a 100644 --- a/lang/csharp/src/apache/main/Specific/ObjectCreator.cs +++ b/lang/csharp/src/apache/main/Specific/ObjectCreator.cs @@ -58,13 +58,6 @@ public sealed class ObjectCreator private readonly Assembly entryAssembly; private readonly bool diffAssembly; - /// - /// Obsolete: This will be removed from the public API in a future version. - /// - /// Obsolete - [Obsolete("This will be removed from the public API in a future version.")] - public delegate object CtorDelegate(); - /// /// Initializes a new instance of the class. /// @@ -78,57 +71,6 @@ public ObjectCreator() diffAssembly = entryAssembly != null && execAssembly != entryAssembly; } -#pragma warning disable CS1591 // Missing XML comment for publicly visible type or member -#pragma warning disable CA1034 // Nested types should not be visible -#pragma warning disable SA1600 // Elements should be documented - /// - /// Obsolete: This will be removed from the public API in a future version. - /// - [Obsolete("This will be removed from the public API in a future version.")] - public struct NameCtorKey : IEquatable - { - public string name { get; private set; } - public Schema.Type type { get; private set; } - public NameCtorKey(string value1, Schema.Type value2) - : this() - { - name = value1; - type = value2; - } - public bool Equals(NameCtorKey other) - { - return Equals(other.name, name) && other.type == type; - } - public override bool Equals(object obj) - { - if (ReferenceEquals(null, obj)) - return false; - if (obj.GetType() != typeof(NameCtorKey)) - return false; - return Equals((NameCtorKey)obj); - } - public override int GetHashCode() - { - unchecked - { -#pragma warning disable CA1307 // Specify StringComparison - return ((name != null ? name.GetHashCode() : 0) * 397) ^ type.GetHashCode(); -#pragma warning restore CA1307 // Specify StringComparison - } - } - public static bool operator ==(NameCtorKey left, NameCtorKey right) - { - return left.Equals(right); - } - public static bool operator !=(NameCtorKey left, NameCtorKey right) - { - return !left.Equals(right); - } - } -#pragma warning restore SA1600 // Elements should be documented -#pragma warning restore CA1034 // Nested types should not be visible -#pragma warning restore CS1591 // Missing XML comment for publicly visible type or member - /// /// Find the type with the given name /// diff --git a/lang/csharp/src/apache/main/Specific/SpecificDatumWriter.cs b/lang/csharp/src/apache/main/Specific/SpecificDatumWriter.cs index bfc88847176..c823253692d 100644 --- a/lang/csharp/src/apache/main/Specific/SpecificDatumWriter.cs +++ b/lang/csharp/src/apache/main/Specific/SpecificDatumWriter.cs @@ -176,6 +176,7 @@ public void WriteArrayValues(object array, WriteItem valueWriter, Encoder encode var list = (IList) array; for (int i = 0; i < list.Count; i++ ) { + encoder.StartItem(); valueWriter(list[i], encoder); } } diff --git a/lang/csharp/src/apache/main/Specific/SpecificReader.cs b/lang/csharp/src/apache/main/Specific/SpecificReader.cs index a8e8e5970bf..1019fa36ced 100644 --- a/lang/csharp/src/apache/main/Specific/SpecificReader.cs +++ b/lang/csharp/src/apache/main/Specific/SpecificReader.cs @@ -72,7 +72,7 @@ public SpecificReader(SpecificDefaultReader reader) /// Generic read function /// /// object to store data read - /// decorder to use for reading data + /// decoder to use for reading data /// public T Read(T reuse, Decoder dec) { @@ -130,20 +130,22 @@ protected override object ReadRecord(object reuse, RecordSchema writerSchema, Sc } } - var defaultStream = new MemoryStream(); - var defaultEncoder = new BinaryEncoder(defaultStream); - var defaultDecoder = new BinaryDecoder(defaultStream); - foreach (Field rf in rs) + using (var defaultStream = new MemoryStream()) { - if (writerSchema.Contains(rf.Name)) continue; + var defaultEncoder = new BinaryEncoder(defaultStream); + var defaultDecoder = new BinaryDecoder(defaultStream); + foreach (Field rf in rs) + { + if (writerSchema.Contains(rf.Name)) continue; - defaultStream.Position = 0; // reset for writing - Resolver.EncodeDefaultValue(defaultEncoder, rf.Schema, rf.DefaultValue); - defaultStream.Flush(); - defaultStream.Position = 0; // reset for reading + defaultStream.Position = 0; // reset for writing + Resolver.EncodeDefaultValue(defaultEncoder, rf.Schema, rf.DefaultValue); + defaultStream.Flush(); + defaultStream.Position = 0; // reset for reading - obj = rec.Get(rf.Pos); - rec.Put(rf.Pos, Read(obj, rf.Schema, rf.Schema, defaultDecoder)); + obj = rec.Get(rf.Pos); + rec.Put(rf.Pos, Read(obj, rf.Schema, rf.Schema, defaultDecoder)); + } } return rec; @@ -155,10 +157,10 @@ protected override object ReadRecord(object reuse, RecordSchema writerSchema, Sc /// /// If appropriate, uses this object instead of creating a new one. /// The FixedSchema the writer used during serialization. - /// The schema that the readr uses. Must be a FixedSchema with the same + /// The schema that the reader uses. Must be a FixedSchema with the same /// size as the writerSchema. /// The decoder for deserialization. - /// The deserilized object. + /// The deserialized object. protected override object ReadFixed(object reuse, FixedSchema writerSchema, Schema readerSchema, Decoder d) { FixedSchema rs = readerSchema as FixedSchema; @@ -220,7 +222,7 @@ protected override object ReadArray(object reuse, ArraySchema writerSchema, Sche } /// - /// Deserialized an avro map. The default implemenation creats a new map using CreateMap() and then + /// Deserialized an avro map. The default implementation creates a new map using CreateMap() and then /// adds elements to the map using AddMapEntry(). /// /// If appropriate, use this instead of creating a new map object. diff --git a/lang/csharp/src/apache/main/Specific/SpecificWriter.cs b/lang/csharp/src/apache/main/Specific/SpecificWriter.cs index b595241f39a..53d6407e947 100644 --- a/lang/csharp/src/apache/main/Specific/SpecificWriter.cs +++ b/lang/csharp/src/apache/main/Specific/SpecificWriter.cs @@ -149,7 +149,7 @@ protected override void WriteMap(MapSchema schema, object value, Encoder encoder if (map == null) throw new AvroTypeException("Map does not implement non-generic IDictionary"); - encoder.WriteArrayStart(); + encoder.WriteMapStart(); encoder.SetItemCount(map.Count); foreach (System.Collections.DictionaryEntry de in map) { diff --git a/lang/csharp/src/apache/main/Util/LocalTimestampMicrosecond.cs b/lang/csharp/src/apache/main/Util/LocalTimestampMicrosecond.cs new file mode 100644 index 00000000000..36014c97aef --- /dev/null +++ b/lang/csharp/src/apache/main/Util/LocalTimestampMicrosecond.cs @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Avro.Util +{ + /// + /// The 'local-timestamp-micros' logical type. + /// + public class LocalTimestampMicrosecond : LogicalUnixEpochType + { + /// + /// The logical type name for LocalTimestampMicrosecond. + /// + public static readonly string LogicalTypeName = "local-timestamp-micros"; + + /// + /// Initializes a new LocalTimestampMicrosecond logical type. + /// + public LocalTimestampMicrosecond() + : base(LogicalTypeName) + { + } + + /// + public override void ValidateSchema(LogicalSchema schema) + { + if (Schema.Type.Long != schema.BaseSchema.Tag) + { + throw new AvroTypeException("'local-timestamp-micros' can only be used with an underlying long type"); + } + } + + /// + public override object ConvertToBaseValue(object logicalValue, LogicalSchema schema) + { + DateTime date = ((DateTime)logicalValue).ToUniversalTime(); + return (date - UnixEpochDateTime).Ticks / TicksPerMicrosecond; + } + + /// + public override object ConvertToLogicalValue(object baseValue, LogicalSchema schema) + { + return UnixEpochDateTime.AddTicks((long)baseValue * TicksPerMicrosecond).ToLocalTime(); + } + } +} diff --git a/lang/csharp/src/apache/main/Util/LocalTimestampMillisecond.cs b/lang/csharp/src/apache/main/Util/LocalTimestampMillisecond.cs new file mode 100644 index 00000000000..4ae86fd087b --- /dev/null +++ b/lang/csharp/src/apache/main/Util/LocalTimestampMillisecond.cs @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +using System; + +namespace Avro.Util +{ + /// + /// The 'local-timestamp-millis' logical type. + /// + public class LocalTimestampMillisecond : LogicalUnixEpochType + { + /// + /// The logical type name for LocalTimestampMillisecond. + /// + public static readonly string LogicalTypeName = "local-timestamp-millis"; + + /// + /// Initializes a new LocalTimestampMillisecond logical type. + /// + public LocalTimestampMillisecond() + : base(LogicalTypeName) + { + } + + /// + public override void ValidateSchema(LogicalSchema schema) + { + if (Schema.Type.Long != schema.BaseSchema.Tag) + { + throw new AvroTypeException("'local-timestamp-millis' can only be used with an underlying long type"); + } + } + + /// + public override object ConvertToBaseValue(object logicalValue, LogicalSchema schema) + { + DateTime date = ((DateTime)logicalValue).ToUniversalTime(); + return (long)(date - UnixEpochDateTime).TotalMilliseconds; + } + + /// + public override object ConvertToLogicalValue(object baseValue, LogicalSchema schema) + { + return UnixEpochDateTime.AddMilliseconds((long)baseValue).ToLocalTime(); + } + } +} diff --git a/lang/csharp/src/apache/main/Util/LogicalTypeFactory.cs b/lang/csharp/src/apache/main/Util/LogicalTypeFactory.cs index e7fa3155641..f4086ab5a27 100644 --- a/lang/csharp/src/apache/main/Util/LogicalTypeFactory.cs +++ b/lang/csharp/src/apache/main/Util/LogicalTypeFactory.cs @@ -39,6 +39,8 @@ private LogicalTypeFactory() { { Decimal.LogicalTypeName, new Decimal() }, { Date.LogicalTypeName, new Date() }, + { LocalTimestampMillisecond.LogicalTypeName, new LocalTimestampMillisecond() }, + { LocalTimestampMicrosecond.LogicalTypeName, new LocalTimestampMicrosecond() }, { TimeMillisecond.LogicalTypeName, new TimeMillisecond() }, { TimeMicrosecond.LogicalTypeName, new TimeMicrosecond() }, { TimestampMillisecond.LogicalTypeName, new TimestampMillisecond() }, diff --git a/lang/csharp/src/apache/main/Util/LogicalUnixEpochType.cs b/lang/csharp/src/apache/main/Util/LogicalUnixEpochType.cs index f4187d070ca..f88b733fd35 100644 --- a/lang/csharp/src/apache/main/Util/LogicalUnixEpochType.cs +++ b/lang/csharp/src/apache/main/Util/LogicalUnixEpochType.cs @@ -31,6 +31,11 @@ public abstract class LogicalUnixEpochType : LogicalType /// protected static readonly DateTime UnixEpochDateTime = new DateTime(1970, 1, 1, 0, 0, 0, DateTimeKind.Utc); + /// + /// Number of ticks per microsecond. + /// + protected const long TicksPerMicrosecond = TimeSpan.TicksPerMillisecond / 1000; + /// /// Initializes the base logical type. /// diff --git a/lang/csharp/src/apache/main/Util/TimeMicrosecond.cs b/lang/csharp/src/apache/main/Util/TimeMicrosecond.cs index f561d6ff8d6..c3226f625ef 100644 --- a/lang/csharp/src/apache/main/Util/TimeMicrosecond.cs +++ b/lang/csharp/src/apache/main/Util/TimeMicrosecond.cs @@ -25,8 +25,8 @@ namespace Avro.Util /// public class TimeMicrosecond : LogicalUnixEpochType { - private static readonly TimeSpan _maxTime = new TimeSpan(23, 59, 59); - + private static readonly TimeSpan _exclusiveUpperBound = TimeSpan.FromDays(1); + /// /// The logical type name for TimeMicrosecond. /// @@ -50,17 +50,29 @@ public override object ConvertToBaseValue(object logicalValue, LogicalSchema sch { var time = (TimeSpan)logicalValue; - if (time > _maxTime) - throw new ArgumentOutOfRangeException(nameof(logicalValue), "A 'time-micros' value can only have the range '00:00:00' to '23:59:59'."); + ThrowIfOutOfRange(time, nameof(logicalValue)); - return (long)(time - UnixEpochDateTime.TimeOfDay).TotalMilliseconds * 1000; + // Note: UnixEpochDateTime.TimeOfDay is '00:00:00'. This could be 'return time.Ticks / TicksPerMicrosecond'; + return (time - UnixEpochDateTime.TimeOfDay).Ticks / TicksPerMicrosecond; } /// public override object ConvertToLogicalValue(object baseValue, LogicalSchema schema) { - var noMs = (long)baseValue / 1000; - return UnixEpochDateTime.TimeOfDay.Add(TimeSpan.FromMilliseconds(noMs)); + var time = TimeSpan.FromTicks((long)baseValue * TicksPerMicrosecond); + + ThrowIfOutOfRange(time, nameof(baseValue)); + + // Note: UnixEpochDateTime.TimeOfDay is '00:00:00', so the Add is meaningless. This could be 'return time;' + return UnixEpochDateTime.TimeOfDay.Add(time); + } + + private static void ThrowIfOutOfRange(TimeSpan time, string paramName) + { + if (time.Ticks < 0 || time >= _exclusiveUpperBound) + { + throw new ArgumentOutOfRangeException(paramName, $"A '{LogicalTypeName}' value must be at least '{TimeSpan.Zero}' and less than '{_exclusiveUpperBound}'."); + } } } } diff --git a/lang/csharp/src/apache/main/Util/TimeMillisecond.cs b/lang/csharp/src/apache/main/Util/TimeMillisecond.cs index 9008fa38abf..d3132560063 100644 --- a/lang/csharp/src/apache/main/Util/TimeMillisecond.cs +++ b/lang/csharp/src/apache/main/Util/TimeMillisecond.cs @@ -25,7 +25,7 @@ namespace Avro.Util /// public class TimeMillisecond : LogicalUnixEpochType { - private static readonly TimeSpan _maxTime = new TimeSpan(23, 59, 59); + private static readonly TimeSpan _exclusiveUpperBound = TimeSpan.FromDays(1); /// /// The logical type name for TimeMillisecond. @@ -50,17 +50,29 @@ public override object ConvertToBaseValue(object logicalValue, LogicalSchema sch { var time = (TimeSpan)logicalValue; - if (time > _maxTime) - throw new ArgumentOutOfRangeException(nameof(logicalValue), "A 'time-millis' value can only have the range '00:00:00' to '23:59:59'."); + ThrowIfOutOfRange(time, nameof(logicalValue)); + // Note: UnixEpochDateTime.TimeOfDay is '00:00:00'. This could be 'return time.TotalMilliseconds; return (int)(time - UnixEpochDateTime.TimeOfDay).TotalMilliseconds; } /// public override object ConvertToLogicalValue(object baseValue, LogicalSchema schema) { - var noMs = (int)baseValue; - return UnixEpochDateTime.TimeOfDay.Add(TimeSpan.FromMilliseconds(noMs)); + var time = TimeSpan.FromMilliseconds((int)baseValue); + + ThrowIfOutOfRange(time, nameof(baseValue)); + + // Note: UnixEpochDateTime.TimeOfDay is '00:00:00'. This could be 'return time;' + return UnixEpochDateTime.TimeOfDay.Add(time); + } + + private static void ThrowIfOutOfRange(TimeSpan time, string paramName) + { + if (time.Ticks < 0 || time >= _exclusiveUpperBound) + { + throw new ArgumentOutOfRangeException(paramName, $"A '{LogicalTypeName}' value must be at least '{TimeSpan.Zero}' and less than '{_exclusiveUpperBound}'."); + } } } } diff --git a/lang/csharp/src/apache/main/Util/TimestampMicrosecond.cs b/lang/csharp/src/apache/main/Util/TimestampMicrosecond.cs index 54a421a5d71..4d8b1cc6cad 100644 --- a/lang/csharp/src/apache/main/Util/TimestampMicrosecond.cs +++ b/lang/csharp/src/apache/main/Util/TimestampMicrosecond.cs @@ -47,14 +47,13 @@ public override void ValidateSchema(LogicalSchema schema) public override object ConvertToBaseValue(object logicalValue, LogicalSchema schema) { var date = ((DateTime)logicalValue).ToUniversalTime(); - return (long)((date - UnixEpochDateTime).TotalMilliseconds * 1000); + return (date - UnixEpochDateTime).Ticks / TicksPerMicrosecond; } /// public override object ConvertToLogicalValue(object baseValue, LogicalSchema schema) { - var noMs = (long)baseValue / 1000; - return UnixEpochDateTime.AddMilliseconds(noMs); + return UnixEpochDateTime.AddTicks((long)baseValue * TicksPerMicrosecond); } } } diff --git a/lang/csharp/src/apache/msbuild/Avro.msbuild.csproj b/lang/csharp/src/apache/msbuild/Avro.msbuild.csproj index 7ba943cacd7..7f06f64dc65 100644 --- a/lang/csharp/src/apache/msbuild/Avro.msbuild.csproj +++ b/lang/csharp/src/apache/msbuild/Avro.msbuild.csproj @@ -16,11 +16,10 @@ --> - + - netstandard2.0 - net461;netstandard2.0 + $(DefaultLibraryTargetFrameworks) Avro.msbuild Avro.msbuild false @@ -33,17 +32,11 @@ $(NoWarn);NU5104 - + - - - - - - diff --git a/lang/csharp/src/apache/perf/Avro.perf.csproj b/lang/csharp/src/apache/perf/Avro.perf.csproj index 00b94aa298e..cae41aaa6e7 100644 --- a/lang/csharp/src/apache/perf/Avro.perf.csproj +++ b/lang/csharp/src/apache/perf/Avro.perf.csproj @@ -16,10 +16,11 @@ --> + + Exe - net5.0 - net461;net5.0 + $(DefaultExeTargetFrameworks) Avro.perf Avro.perf false diff --git a/lang/csharp/src/apache/test/Avro.test.csproj b/lang/csharp/src/apache/test/Avro.test.csproj index ff2cfa09fef..3ba3a0ffa89 100644 --- a/lang/csharp/src/apache/test/Avro.test.csproj +++ b/lang/csharp/src/apache/test/Avro.test.csproj @@ -16,15 +16,16 @@ --> - + - netcoreapp2.1;netcoreapp3.1;net5.0 - net461;netcoreapp2.1;netcoreapp3.1;net5.0 + $(DefaultUnitTestTargetFrameworks) Avro.test Avro.test false false + True + ..\..\..\Avro.snk @@ -33,18 +34,32 @@ + + all + runtime; build; native; contentfiles; analyzers; buildtransitive + + + all + runtime; build; native; contentfiles; analyzers; buildtransitive + + + + - - + + - + + + + diff --git a/lang/csharp/src/apache/test/AvroDecimalTest.cs b/lang/csharp/src/apache/test/AvroDecimalTest.cs index e10210bf31c..4a8654aa8a3 100644 --- a/lang/csharp/src/apache/test/AvroDecimalTest.cs +++ b/lang/csharp/src/apache/test/AvroDecimalTest.cs @@ -15,6 +15,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + using NUnit.Framework; namespace Avro.test diff --git a/lang/csharp/src/apache/test/AvroGen/AvroGenHelper.cs b/lang/csharp/src/apache/test/AvroGen/AvroGenHelper.cs new file mode 100644 index 00000000000..0ce66020717 --- /dev/null +++ b/lang/csharp/src/apache/test/AvroGen/AvroGenHelper.cs @@ -0,0 +1,297 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Reflection; +using System.Text; +using Microsoft.CodeAnalysis; +using Microsoft.CodeAnalysis.CSharp; +using Microsoft.CodeAnalysis.Emit; +using NUnit.Framework; +using Avro.Specific; + +namespace Avro.Test.AvroGen +{ + class AvroGenToolResult + { + public int ExitCode { get; set; } + public string[] StdOut { get; set; } + public string[] StdErr { get; set; } + } + + class AvroGenHelper + { + public static AvroGenToolResult RunAvroGenTool(params string[] args) + { + // Save stdout and stderr + TextWriter conOut = Console.Out; + TextWriter conErr = Console.Error; + + try + { + AvroGenToolResult result = new AvroGenToolResult(); + StringBuilder strBuilderOut = new StringBuilder(); + StringBuilder strBuilderErr = new StringBuilder(); + + using (StringWriter writerOut = new StringWriter(strBuilderOut)) + using (StringWriter writerErr = new StringWriter(strBuilderErr)) + { + writerOut.NewLine = "\n"; + writerErr.NewLine = "\n"; + + // Overwrite stdout and stderr to be able to capture console output + Console.SetOut(writerOut); + Console.SetError(writerErr); + + result.ExitCode = AvroGenTool.Main(args.ToArray()); + + writerOut.Flush(); + writerErr.Flush(); + + result.StdOut = strBuilderOut.Length == 0 ? Array.Empty() : strBuilderOut.ToString().Split(writerOut.NewLine); + result.StdErr = strBuilderErr.Length == 0 ? Array.Empty() : strBuilderErr.ToString().Split(writerErr.NewLine); + } + + return result; + } + finally + { + // Restore console + Console.SetOut(conOut); + Console.SetError(conErr); + } + } + + public static Assembly CompileCSharpFilesIntoLibrary(IEnumerable sourceFiles, string assemblyName = null, bool loadAssembly = true) + { + // Create random assembly name if not specified + if (assemblyName == null) + assemblyName = Path.GetRandomFileName(); + + // Base path to assemblies .NET assemblies + var assemblyPath = Path.GetDirectoryName(typeof(object).Assembly.Location); + + using (var compilerStream = new MemoryStream()) + { + List assemblies = new List() + { + typeof(object).Assembly.Location, + typeof(Schema).Assembly.Location, + typeof(System.CodeDom.Compiler.GeneratedCodeAttribute).Assembly.Location, + Path.Combine(assemblyPath, "System.Runtime.dll"), + Path.Combine(assemblyPath, "netstandard.dll") + }; + + // Create compiler + CSharpCompilation compilation = CSharpCompilation + .Create(assemblyName) + .WithOptions(new CSharpCompilationOptions(OutputKind.DynamicallyLinkedLibrary)) + .AddReferences(assemblies.Select(path => MetadataReference.CreateFromFile(path))) + .AddSyntaxTrees(sourceFiles.Select(sourceFile => + { + string sourceText = System.IO.File.ReadAllText(sourceFile); + return CSharpSyntaxTree.ParseText(sourceText); + })); + + // Compile + EmitResult compilationResult = compilation.Emit(compilerStream); + +#if DEBUG + if (!compilationResult.Success) + { + foreach (Diagnostic diagnostic in compilationResult.Diagnostics) + { + if (diagnostic.IsWarningAsError || diagnostic.Severity == DiagnosticSeverity.Error) + { + TestContext.WriteLine($"{diagnostic.Id} - {diagnostic.GetMessage()} - {diagnostic.Location}"); + } + } + } +#endif + + Assert.That(compilationResult.Success, Is.True); + + if (!loadAssembly) + { + return null; + } + + // Load assembly from stream + compilerStream.Seek(0, SeekOrigin.Begin); + return Assembly.Load(compilerStream.ToArray()); + } + } + + public static string CreateEmptyTemporaryFolder(out string uniqueId, string path = null) + { + // Create unique id + uniqueId = Guid.NewGuid().ToString(); + + // Temporary folder name in working folder or the specified path + string tempFolder = Path.Combine(path ?? TestContext.CurrentContext.WorkDirectory, uniqueId); + + // Create folder + Directory.CreateDirectory(tempFolder); + + // Make sure it is empty + Assert.That(new DirectoryInfo(tempFolder), Is.Empty); + + return tempFolder; + } + + public static Assembly CompileCSharpFilesAndCheckTypes( + string outputDir, + string assemblyName, + IEnumerable typeNamesToCheck = null, + IEnumerable generatedFilesToCheck = null) + { + // Check if all generated files exist + if (generatedFilesToCheck != null) + { + foreach (string generatedFile in generatedFilesToCheck) + { + Assert.That(new FileInfo(Path.Combine(outputDir, generatedFile)), Does.Exist); + } + } + + // Compile into netstandard library and load assembly + Assembly assembly = CompileCSharpFilesIntoLibrary( + new DirectoryInfo(outputDir) + .EnumerateFiles("*.cs", SearchOption.AllDirectories) + .Select(fi => fi.FullName), + assemblyName); + + if (typeNamesToCheck != null) + { + // Check if the compiled code has the same number of types defined as the check list + // Note: Ignore types which are injected by the compiler (System.* and Microsoft.*), e.g. Microsoft.CodeAnalysis.EmbeddedAttribute + Assert.That( + typeNamesToCheck.Count(), + Is.EqualTo( + assembly + .DefinedTypes + .Where(t => + { + return !t.Namespace.StartsWith("Microsoft.", StringComparison.OrdinalIgnoreCase) && + !t.Namespace.StartsWith("System.", StringComparison.OrdinalIgnoreCase); + }) + .Count())); + + // Check if types available in compiled assembly + foreach (string typeName in typeNamesToCheck) + { + Type type = assembly.GetType(typeName); + Assert.That(type, Is.Not.Null); + + // Protocols are abstract and cannot be instantiated + if (typeof(ISpecificProtocol).IsAssignableFrom(type)) + { + Assert.That(type.IsAbstract, Is.True); + + // If directly inherited from ISpecificProtocol, use reflection to read static private field + // holding the protocol. Callback objects are not directly inherited from ISpecificProtocol, + // so private fields in the base class cannot be accessed + if (type.BaseType.Equals(typeof(ISpecificProtocol))) + { + // Use reflection to read static field, holding the protocol + FieldInfo protocolField = type.GetField("protocol", BindingFlags.NonPublic | BindingFlags.Static); + Protocol protocol = protocolField.GetValue(null) as Protocol; + + Assert.That(protocol, Is.Not.Null); + } + } + else + { + Assert.That(type.IsClass || type.IsEnum, Is.True); + + // Instantiate object + object obj = Activator.CreateInstance(type); + Assert.That(obj, Is.Not.Null); + + // If ISpecificRecord, call its member for sanity check + if (obj is ISpecificRecord record) + { + // Read record's schema object + Assert.That(record.Schema, Is.Not.Null); + // Force exception by reading/writing invalid field + Assert.Throws(() => record.Get(-1)); + Assert.Throws(() => record.Put(-1, null)); + } + } + } + } + + return assembly; + } + + public static Assembly TestSchema( + string schema, + IEnumerable typeNamesToCheck = null, + IEnumerable> namespaceMapping = null, + IEnumerable generatedFilesToCheck = null, + bool skipDirectories = false) + { + // Create temp folder + string outputDir = CreateEmptyTemporaryFolder(out string uniqueId); + + try + { + // Save schema + string schemaFileName = Path.Combine(outputDir, $"{uniqueId}.avsc"); + System.IO.File.WriteAllText(schemaFileName, schema); + + // Generate from schema file + Assert.That(AvroGenTool.GenSchema(schemaFileName, outputDir, namespaceMapping ?? new Dictionary(), skipDirectories), Is.EqualTo(0)); + + return CompileCSharpFilesAndCheckTypes(outputDir, uniqueId, typeNamesToCheck, generatedFilesToCheck); + } + finally + { + Directory.Delete(outputDir, true); + } + } + + public static Assembly TestProtocol( + string protocol, + IEnumerable typeNamesToCheck = null, + IEnumerable> namespaceMapping = null, + IEnumerable generatedFilesToCheck = null) + { + // Create temp folder + string outputDir = CreateEmptyTemporaryFolder(out string uniqueId); + + try + { + // Save protocol + string schemaFileName = Path.Combine(outputDir, $"{uniqueId}.avpr"); + System.IO.File.WriteAllText(schemaFileName, protocol); + + // Generate from protocol file + Assert.That(AvroGenTool.GenProtocol(schemaFileName, outputDir, namespaceMapping ?? new Dictionary()), Is.EqualTo(0)); + + return CompileCSharpFilesAndCheckTypes(outputDir, uniqueId, typeNamesToCheck, generatedFilesToCheck); + } + finally + { + Directory.Delete(outputDir, true); + } + } + } +} diff --git a/lang/csharp/src/apache/test/AvroGen/AvroGenProtocolTests.cs b/lang/csharp/src/apache/test/AvroGen/AvroGenProtocolTests.cs new file mode 100644 index 00000000000..b408650369f --- /dev/null +++ b/lang/csharp/src/apache/test/AvroGen/AvroGenProtocolTests.cs @@ -0,0 +1,517 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +using System.Collections.Generic; +using NUnit.Framework; + +namespace Avro.Test.AvroGen +{ + [TestFixture] + + class AvroGenProtocolTests + { + private const string _baseball = @" +{ + ""protocol"" : ""Baseball"", + ""namespace"" : ""avro.examples.baseball"", + ""doc"" : ""Licensed to the Apache Software Foundation (ASF) under one\nor more contributor license agreements. See the NOTICE file\ndistributed with this work for additional information\nregarding copyright ownership. The ASF licenses this file\nto you under the Apache License, Version 2.0 (the\n\""License\""); you may not use this file except in compliance\nwith the License. You may obtain a copy of the License at\n\n https://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \""AS IS\"" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License."", + ""types"" : [ { + ""type"" : ""enum"", + ""name"" : ""Position"", + ""symbols"" : [ ""P"", ""C"", ""B1"", ""B2"", ""B3"", ""SS"", ""LF"", ""CF"", ""RF"", ""DH"" ] + }, { + ""type"" : ""record"", + ""name"" : ""Player"", + ""fields"" : [ { + ""name"" : ""number"", + ""type"" : ""int"" + }, { + ""name"" : ""first_name"", + ""type"" : ""string"" + }, { + ""name"" : ""last_name"", + ""type"" : ""string"" + }, { + ""name"" : ""position"", + ""type"" : { + ""type"" : ""array"", + ""items"" : ""Position"" + } + } ] + } ], + ""messages"" : { + } +} +"; + private const string _comments = @" +{ + ""protocol"" : ""Comments"", + ""namespace"" : ""testing"", + ""types"" : [ { + ""type"" : ""enum"", + ""name"" : ""DocumentedEnum"", + ""doc"" : ""Documented Enum"", + ""symbols"" : [ ""A"", ""B"", ""C"" ], + ""default"" : ""A"" + }, { + ""type"" : ""enum"", + ""name"" : ""UndocumentedEnum"", + ""symbols"" : [ ""D"", ""E"" ] + }, { + ""type"" : ""fixed"", + ""name"" : ""DocumentedFixed"", + ""doc"" : ""Documented Fixed Type"", + ""size"" : 16 + }, { + ""type"" : ""fixed"", + ""name"" : ""UndocumentedFixed"", + ""size"" : 16 + }, { + ""type"" : ""error"", + ""name"" : ""DocumentedError"", + ""doc"" : ""Documented Error"", + ""fields"" : [ { + ""name"" : ""reason"", + ""type"" : ""string"", + ""doc"" : ""Documented Reason Field"" + }, { + ""name"" : ""explanation"", + ""type"" : ""string"", + ""doc"" : ""Default Doc Explanation Field"" + } ] + }, { + ""type"" : ""record"", + ""name"" : ""UndocumentedRecord"", + ""fields"" : [ { + ""name"" : ""description"", + ""type"" : ""string"" + } ] + } ], + ""messages"" : { + ""documentedMethod"" : { + ""doc"" : ""Documented Method"", + ""request"" : [ { + ""name"" : ""message"", + ""type"" : ""string"", + ""doc"" : ""Documented Parameter"" + }, { + ""name"" : ""defMsg"", + ""type"" : ""string"", + ""doc"" : ""Default Documented Parameter"" + } ], + ""response"" : ""null"", + ""errors"" : [ ""DocumentedError"" ] + }, + ""undocumentedMethod"" : { + ""request"" : [ { + ""name"" : ""message"", + ""type"" : ""string"" + } ], + ""response"" : ""null"" + } + } +} +"; + + private const string _interop = @" +{ + ""protocol"" : ""InteropProtocol"", + ""namespace"" : ""org.apache.avro.interop"", + ""doc"" : ""Licensed to the Apache Software Foundation (ASF) under one\nor more contributor license agreements. See the NOTICE file\ndistributed with this work for additional information\nregarding copyright ownership. The ASF licenses this file\nto you under the Apache License, Version 2.0 (the\n\""License\""); you may not use this file except in compliance\nwith the License. You may obtain a copy of the License at\n\n https://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \""AS IS\"" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License."", + ""types"" : [ { + ""type"" : ""record"", + ""name"" : ""Label"", + ""fields"" : [ { + ""name"" : ""label"", + ""type"" : ""string"" + } ] + }, { + ""type"" : ""enum"", + ""name"" : ""Kind"", + ""symbols"" : [ ""A"", ""B"", ""C"" ] + }, { + ""type"" : ""fixed"", + ""name"" : ""MD5"", + ""size"" : 16 + }, { + ""type"" : ""record"", + ""name"" : ""Node"", + ""fields"" : [ { + ""name"" : ""label"", + ""type"" : ""string"" + }, { + ""name"" : ""children"", + ""type"" : { + ""type"" : ""array"", + ""items"" : ""Node"" + }, + ""default"" : [ ] + } ] + }, { + ""type"" : ""record"", + ""name"" : ""Interop"", + ""fields"" : [ { + ""name"" : ""intField"", + ""type"" : ""int"", + ""default"" : 1 + }, { + ""name"" : ""longField"", + ""type"" : ""long"", + ""default"" : -1 + }, { + ""name"" : ""stringField"", + ""type"" : ""string"" + }, { + ""name"" : ""boolField"", + ""type"" : ""boolean"", + ""default"" : false + }, { + ""name"" : ""floatField"", + ""type"" : ""float"", + ""default"" : 0.0 + }, { + ""name"" : ""doubleField"", + ""type"" : ""double"", + ""default"" : -1.0E12 + }, { + ""name"" : ""nullField"", + ""type"" : ""null"" + }, { + ""name"" : ""arrayField"", + ""type"" : { + ""type"" : ""array"", + ""items"" : ""double"" + }, + ""default"" : [ ] + }, { + ""name"" : ""mapField"", + ""type"" : { + ""type"" : ""map"", + ""values"" : ""Label"" + } + }, { + ""name"" : ""unionField"", + ""type"" : [ ""boolean"", ""double"", { + ""type"" : ""array"", + ""items"" : ""bytes"" + } ] + }, { + ""name"" : ""enumField"", + ""type"" : ""Kind"" + }, { + ""name"" : ""fixedField"", + ""type"" : ""MD5"" + }, { + ""name"" : ""recordField"", + ""type"" : ""Node"" + } ] + } ], + ""messages"" : { } +} +"; + private const string _namespaces = @" +{ + ""protocol"" : ""TestNamespace"", + ""namespace"" : ""avro.test.protocol"", + ""doc"" : ""Licensed to the Apache Software Foundation (ASF) under one\nor more contributor license agreements. See the NOTICE file\ndistributed with this work for additional information\nregarding copyright ownership. The ASF licenses this file\nto you under the Apache License, Version 2.0 (the\n\""License\""); you may not use this file except in compliance\nwith the License. You may obtain a copy of the License at\n\n https://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \""AS IS\"" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License."", + ""types"" : [ { + ""type"" : ""fixed"", + ""name"" : ""FixedInOtherNamespace"", + ""namespace"" : ""avro.test.fixed"", + ""size"" : 16 + }, { + ""type"" : ""fixed"", + ""name"" : ""FixedInThisNamespace"", + ""size"" : 16 + }, { + ""type"" : ""record"", + ""name"" : ""RecordInOtherNamespace"", + ""namespace"" : ""avro.test.record"", + ""fields"" : [ ] + }, { + ""type"" : ""error"", + ""name"" : ""ErrorInOtherNamespace"", + ""namespace"" : ""avro.test.error"", + ""fields"" : [ ] + }, { + ""type"" : ""enum"", + ""name"" : ""EnumInOtherNamespace"", + ""namespace"" : ""avro.test.enum"", + ""symbols"" : [ ""FOO"" ] + }, { + ""type"" : ""record"", + ""name"" : ""RefersToOthers"", + ""fields"" : [ { + ""name"" : ""someFixed"", + ""type"" : ""avro.test.fixed.FixedInOtherNamespace"" + }, { + ""name"" : ""someRecord"", + ""type"" : ""avro.test.record.RecordInOtherNamespace"" + }, { + ""name"" : ""someError"", + ""type"" : ""avro.test.error.ErrorInOtherNamespace"" + }, { + ""name"" : ""someEnum"", + ""type"" : ""avro.test.enum.EnumInOtherNamespace"" + }, { + ""name"" : ""thisFixed"", + ""type"" : ""FixedInThisNamespace"" + } ] + } ], + ""messages"" : { + } +} +"; + private const string _forwardRef = @" +{ + ""protocol"": ""Import"", + ""namespace"": ""org.foo"", + ""types"": [ + { + ""type"": ""record"", + ""name"": ""ANameValue"", + ""fields"": [ + { ""name"":""name"", ""type"": ""string"", ""doc"":""the name"" }, + { ""name"": ""value"", ""type"": ""string"", ""doc"": ""the value"" }, + { ""name"": ""type"", ""type"": { ""type"": ""enum"", ""name"":""ValueType"", ""symbols"": [""JSON"",""BASE64BIN"",""PLAIN""] }, ""default"": ""PLAIN"" } + ] + } + ], + ""messages"": { } +} +"; + private const string _unicode = @" +{ + ""protocol"" : ""ĐŸŅ€ĐžŅ‚ĐžĐēĐžĐģŅ‹"", + ""namespace"" : ""org.avro.test"", + ""doc"" : ""This is a test that UTF8 functions correctly.\nこぎテ゚トでは、UTF - 8ã§æ­Ŗã—ãæŠŸčƒŊしãĻいる。\nčŋ™æ˜¯ä¸€ä¸Ēæĩ‹č¯•īŧŒUTF - 8įš„æ­Ŗå¸¸čŋčĄŒã€‚"", + ""types"" : [ { + ""type"" : ""record"", + ""name"" : ""ĐĄŅ‚Ņ€ŅƒĐēŅ‚ŅƒŅ€Đ°"", + ""fields"" : [ { + ""name"" : ""ĐĄŅ‚Ņ€ĐžĐēĐžĐ˛Ņ‹Đš"", + ""type"" : ""string"" + }, { + ""name"" : ""文字列"", + ""type"" : ""string"" + } ] + } ], + ""messages"" : { + } +} +"; + + private const string _myProtocol = @" +{ + ""protocol"" : ""MyProtocol"", + ""namespace"" : ""com.foo"", + ""types"" : [ + { + ""type"" : ""record"", + ""name"" : ""A"", + ""fields"" : [ { ""name"" : ""f1"", ""type"" : ""long"" } ] + }, + { + ""type"" : ""enum"", + ""name"" : ""MyEnum"", + ""symbols"" : [ ""A"", ""B"", ""C"" ] + }, + { + ""type"": ""fixed"", + ""size"": 16, + ""name"": ""MyFixed"" + }, + { + ""type"" : ""record"", + ""name"" : ""Z"", + ""fields"" : + [ + { ""name"" : ""myUInt"", ""type"" : [ ""int"", ""null"" ] }, + { ""name"" : ""myULong"", ""type"" : [ ""long"", ""null"" ] }, + { ""name"" : ""myUBool"", ""type"" : [ ""boolean"", ""null"" ] }, + { ""name"" : ""myUDouble"", ""type"" : [ ""double"", ""null"" ] }, + { ""name"" : ""myUFloat"", ""type"" : [ ""float"", ""null"" ] }, + { ""name"" : ""myUBytes"", ""type"" : [ ""bytes"", ""null"" ] }, + { ""name"" : ""myUString"", ""type"" : [ ""string"", ""null"" ] }, + + { ""name"" : ""myInt"", ""type"" : ""int"" }, + { ""name"" : ""myLong"", ""type"" : ""long"" }, + { ""name"" : ""myBool"", ""type"" : ""boolean"" }, + { ""name"" : ""myDouble"", ""type"" : ""double"" }, + { ""name"" : ""myFloat"", ""type"" : ""float"" }, + { ""name"" : ""myBytes"", ""type"" : ""bytes"" }, + { ""name"" : ""myString"", ""type"" : ""string"" }, + { ""name"" : ""myNull"", ""type"" : ""null"" }, + + { ""name"" : ""myFixed"", ""type"" : ""MyFixed"" }, + { ""name"" : ""myA"", ""type"" : ""A"" }, + { ""name"" : ""myE"", ""type"" : ""MyEnum"" }, + { ""name"" : ""myArray"", ""type"" : { ""type"" : ""array"", ""items"" : ""bytes"" } }, + { ""name"" : ""myArray2"", ""type"" : { ""type"" : ""array"", ""items"" : { ""type"" : ""record"", ""name"" : ""newRec"", ""fields"" : [ { ""name"" : ""f1"", ""type"" : ""long""} ] } } }, + { ""name"" : ""myMap"", ""type"" : { ""type"" : ""map"", ""values"" : ""string"" } }, + { ""name"" : ""myMap2"", ""type"" : { ""type"" : ""map"", ""values"" : ""newRec"" } }, + { ""name"" : ""myObject"", ""type"" : [ ""MyEnum"", ""A"", ""null"" ] }, + { ""name"" : ""myArray3"", ""type"" : { ""type"" : ""array"", ""items"" : { ""type"" : ""array"", ""items"" : [ ""double"", ""string"", ""null"" ] } } } + ] + } + ] +}"; + + [TestCase( + _baseball, + new string[] + { + "avro.examples.baseball.Baseball", + "avro.examples.baseball.BaseballCallback", + "avro.examples.baseball.Player", + "avro.examples.baseball.Position" + }, + new string[] + { + "avro/examples/baseball/Baseball.cs", + "avro/examples/baseball/BaseballCallback.cs", + "avro/examples/baseball/Player.cs", + "avro/examples/baseball/Position.cs" + })] + [TestCase( + _comments, + new string[] + { + "testing.Comments", + "testing.CommentsCallback", + "testing.DocumentedEnum", + "testing.DocumentedError", + "testing.DocumentedFixed", + "testing.UndocumentedEnum", + "testing.UndocumentedFixed", + "testing.UndocumentedRecord" + }, + new string[] + { + "testing/Comments.cs", + "testing/CommentsCallback.cs", + "testing/DocumentedEnum.cs", + "testing/DocumentedError.cs", + "testing/DocumentedFixed.cs", + "testing/UndocumentedEnum.cs", + "testing/UndocumentedFixed.cs", + "testing/UndocumentedRecord.cs" + })] + [TestCase( + _interop, + new string[] + { + "org.apache.avro.interop.Label", + "org.apache.avro.interop.Interop", + "org.apache.avro.interop.InteropProtocol", + "org.apache.avro.interop.InteropProtocolCallback", + "org.apache.avro.interop.Kind", + "org.apache.avro.interop.MD5", + "org.apache.avro.interop.Node", + }, + new string[] + { + "org/apache/avro/interop/Label.cs", + "org/apache/avro/interop/Interop.cs", + "org/apache/avro/interop/InteropProtocol.cs", + "org/apache/avro/interop/InteropProtocolCallback.cs", + "org/apache/avro/interop/Kind.cs", + "org/apache/avro/interop/MD5.cs", + "org/apache/avro/interop/Node.cs", + })] + [TestCase( + _namespaces, + new string[] + { + "avro.test.enum.EnumInOtherNamespace", + "avro.test.error.ErrorInOtherNamespace", + "avro.test.fixed.FixedInOtherNamespace", + "avro.test.protocol.FixedInThisNamespace", + "avro.test.protocol.RefersToOthers", + "avro.test.protocol.TestNamespace", + "avro.test.protocol.TestNamespaceCallback", + "avro.test.record.RecordInOtherNamespace" + }, + new string[] + { + "avro/test/enum/EnumInOtherNamespace.cs", + "avro/test/error/ErrorInOtherNamespace.cs", + "avro/test/fixed/FixedInOtherNamespace.cs", + "avro/test/protocol/FixedInThisNamespace.cs", + "avro/test/protocol/RefersToOthers.cs", + "avro/test/protocol/TestNamespace.cs", + "avro/test/protocol/TestNamespaceCallback.cs", + "avro/test/record/RecordInOtherNamespace.cs" + })] + [TestCase( + _forwardRef, + new string[] + { + "org.foo.ANameValue", + "org.foo.Import", + "org.foo.ImportCallback", + "org.foo.ValueType" + }, + new string[] + { + "org/foo/ANameValue.cs", + "org/foo/Import.cs", + "org/foo/ImportCallback.cs", + "org/foo/ValueType.cs" + })] + [TestCase( + _unicode, + new string[] + { + "org.avro.test.ĐŸŅ€ĐžŅ‚ĐžĐēĐžĐģŅ‹", + "org.avro.test.ĐŸŅ€ĐžŅ‚ĐžĐēĐžĐģŅ‹Callback", + "org.avro.test.ĐĄŅ‚Ņ€ŅƒĐēŅ‚ŅƒŅ€Đ°" + }, + new string[] + { + "org/avro/test/ĐŸŅ€ĐžŅ‚ĐžĐēĐžĐģŅ‹.cs", + "org/avro/test/ĐŸŅ€ĐžŅ‚ĐžĐēĐžĐģŅ‹Callback.cs", + "org/avro/test/ĐĄŅ‚Ņ€ŅƒĐēŅ‚ŅƒŅ€Đ°.cs" + })] + [TestCase( + _myProtocol, + new string[] + { + "com.foo.A", + "com.foo.MyEnum", + "com.foo.MyFixed", + "com.foo.MyProtocol", + "com.foo.MyProtocolCallback", + "com.foo.newRec", + "com.foo.Z" + }, + new string[] + { + "com/foo/A.cs", + "com/foo/MyEnum.cs", + "com/foo/MyFixed.cs", + "com/foo/MyProtocol.cs", + "com/foo/MyProtocolCallback.cs", + "com/foo/newRec.cs", + "com/foo/Z.cs" + })] + public void GenerateProtocol(string protocol, IEnumerable typeNamesToCheck, IEnumerable generatedFilesToCheck) + { + AvroGenHelper.TestProtocol(protocol, typeNamesToCheck, generatedFilesToCheck: generatedFilesToCheck); + } + } +} diff --git a/lang/csharp/src/apache/test/AvroGen/AvroGenSchemaTests.cs b/lang/csharp/src/apache/test/AvroGen/AvroGenSchemaTests.cs new file mode 100644 index 00000000000..807acbda92a --- /dev/null +++ b/lang/csharp/src/apache/test/AvroGen/AvroGenSchemaTests.cs @@ -0,0 +1,817 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +using System; +using System.IO; +using System.Linq; +using System.Reflection; +using System.Collections.Generic; +using Microsoft.CodeAnalysis; +using NUnit.Framework; +using Avro.Specific; + +namespace Avro.Test.AvroGen +{ + [TestFixture] + + class AvroGenSchemaTests + { + private const string _customConversionWithLogicalTypes = @" +{ + ""namespace"": ""org.apache.avro.codegentest.testdata"", + ""type"": ""record"", + ""name"": ""CustomConversionWithLogicalTypes"", + ""doc"" : ""Test custom conversion and logical types in generated Java classes"", + ""fields"": [ + { + ""name"": ""customEnum"", + ""type"": [""null"", { + ""namespace"": ""org.apache.avro.codegentest.testdata"", + ""name"": ""CustomAvroEnum"", + ""type"": ""enum"", + ""logicalType"": ""custom-enum"", + ""symbols"": [""ONE"", ""TWO"", ""THREE""] + }] + }] +} +"; + + private const string _logicalTypesWithCustomConversion = @" +{ +""namespace"": ""org.apache.avro.codegentest.testdata"", + ""type"": ""record"", + ""name"": ""LogicalTypesWithCustomConversion"", + ""doc"" : ""Test unions with logical types in generated Java classes"", + ""fields"": [ + {""name"": ""nullableCustomField"", ""type"": [""null"", {""type"": ""bytes"", ""logicalType"": ""decimal"", ""precision"": 9, ""scale"": 2}], ""default"": null}, + { ""name"": ""nonNullCustomField"", ""type"": { ""type"": ""bytes"", ""logicalType"": ""decimal"", ""precision"": 9, ""scale"": 2} }, + { ""name"": ""nullableFixedSizeString"", ""type"": [""null"", { ""type"": ""bytes"", ""logicalType"": ""fixed-size-string"", ""minLength"": 1, ""maxLength"": 50}], ""default"": null}, + { ""name"": ""nonNullFixedSizeString"", ""type"": { ""type"": ""bytes"", ""logicalType"": ""fixed-size-string"", ""minLength"": 1, ""maxLength"": 50} } + ] +} +"; + + private const string _logicalTypesWithDefaults = @" +{ +""namespace"": ""org.apache.avro.codegentest.testdata"", + ""type"": ""record"", + ""name"": ""LogicalTypesWithDefaults"", + ""doc"" : ""Test logical types and default values in generated Java classes"", + ""fields"": [ + {""name"": ""nullableDate"", ""type"": [{""type"": ""int"", ""logicalType"": ""date""}, ""null""], ""default"": 1234}, + { ""name"": ""nonNullDate"", ""type"": { ""type"": ""int"", ""logicalType"": ""date""}, ""default"": 1234} + ] +}"; + + private const string _nestedLogicalTypesArray = @" +{""namespace"": ""org.apache.avro.codegentest.testdata"", + ""type"": ""record"", + ""name"": ""NestedLogicalTypesArray"", + ""doc"" : ""Test nested types with logical types in generated Java classes"", + ""fields"": [ + { + ""name"": ""arrayOfRecords"", + ""type"": { + ""type"": ""array"", + ""items"": { + ""namespace"": ""org.apache.avro.codegentest.testdata"", + ""name"": ""RecordInArray"", + ""type"": ""record"", + ""fields"": [ + { + ""name"": ""nullableDateField"", + ""type"": [""null"", {""type"": ""int"", ""logicalType"": ""date""}] + } + ] + } + } + }] +} +"; + + private const string _nestedLogicalTypesMap = @" +{""namespace"": ""org.apache.avro.codegentest.testdata"", + ""type"": ""record"", + ""name"": ""NestedLogicalTypesMap"", + ""doc"" : ""Test nested types with logical types in generated Java classes"", + ""fields"": [ + { + ""name"": ""mapOfRecords"", + ""type"": { + ""type"": ""map"", + ""values"": { + ""namespace"": ""org.apache.avro.codegentest.testdata"", + ""name"": ""RecordInMap"", + ""type"": ""record"", + ""fields"": [ + { + ""name"": ""nullableDateField"", + ""type"": [""null"", {""type"": ""int"", ""logicalType"": ""date""}] + } + ] + } + } + }] +}"; + + private const string _nestedLogicalTypesRecord = @" +{""namespace"": ""org.apache.avro.codegentest.testdata"", + ""type"": ""record"", + ""name"": ""NestedLogicalTypesRecord"", + ""doc"" : ""Test nested types with logical types in generated Java classes"", + ""fields"": [ + { + ""name"": ""nestedRecord"", + ""type"": { + ""namespace"": ""org.apache.avro.codegentest.testdata"", + ""type"": ""record"", + ""name"": ""NestedRecord"", + ""fields"": [ + { + ""name"": ""nullableDateField"", + ""type"": [""null"", {""type"": ""int"", ""logicalType"": ""date""}] + } + ] + } + }] +}"; + + private const string _nestedLogicalTypesUnionFixedDecimal = @" +{""namespace"": ""org.apache.avro.codegentest.testdata"", + ""type"": ""record"", + ""name"": ""NestedLogicalTypesUnionFixedDecimal"", + ""doc"" : ""Test nested types with logical types in generated Java classes"", + ""fields"": [ + { + ""name"": ""unionOfFixedDecimal"", + ""type"": [""null"", { + ""namespace"": ""org.apache.avro.codegentest.testdata"", + ""name"": ""FixedInUnion"", + ""type"": { + ""type"": ""fixed"", + ""size"": 12, + ""name"": ""FixedName"", + }, + ""logicalType"": ""decimal"", + ""precision"": 28, + ""scale"": 15 + }] + }] +}"; + + private const string _nestedLogicalTypesUnion = @" +{""namespace"": ""org.apache.avro.codegentest.testdata"", + ""type"": ""record"", + ""name"": ""NestedLogicalTypesUnion"", + ""doc"" : ""Test nested types with logical types in generated Java classes"", + ""fields"": [ + { + ""name"": ""unionOfRecords"", + ""type"": [""null"", { + ""namespace"": ""org.apache.avro.codegentest.testdata"", + ""name"": ""RecordInUnion"", + ""type"": ""record"", + ""fields"": [ + { + ""name"": ""nullableDateField"", + ""type"": [""null"", {""type"": ""int"", ""logicalType"": ""date""}] + } + ] + }] + }] +}"; + + private const string _nestedSomeNamespaceRecord = @" +{""namespace"": ""org.apache.avro.codegentest.some"", + ""type"": ""record"", + ""name"": ""NestedSomeNamespaceRecord"", + ""doc"" : ""Test nested types with different namespace than the outer type"", + ""fields"": [ + { + ""name"": ""nestedRecord"", + ""type"": { + ""namespace"": ""org.apache.avro.codegentest.other"", + ""type"": ""record"", + ""name"": ""NestedOtherNamespaceRecord"", + ""fields"": [ + { + ""name"": ""someField"", + ""type"": ""int"" + } + ] + } + }] +}"; + + private const string _nullableLogicalTypesArray = @" +{""namespace"": ""org.apache.avro.codegentest.testdata"", + ""type"": ""record"", + ""name"": ""NullableLogicalTypesArray"", + ""doc"" : ""Test nested types with logical types in generated Java classes"", + ""fields"": [ + { + ""name"": ""arrayOfLogicalType"", + ""type"": { + ""type"": ""array"", + ""items"": [""null"", {""type"": ""int"", ""logicalType"": ""date""}] + } + }] +}"; + + private const string _nullableLogicalTypes = @" +{""namespace"": ""org.apache.avro.codegentest.testdata"", + ""type"": ""record"", + ""name"": ""NullableLogicalTypes"", + ""doc"" : ""Test unions with logical types in generated Java classes"", + ""fields"": [ + {""name"": ""nullableDate"", ""type"": [""null"", {""type"": ""int"", ""logicalType"": ""date""}], ""default"": null} + ] +}"; + + private const string _stringLogicalType = @" +{ + ""namespace"": ""org.apache.avro.codegentest.testdata"", + ""type"": ""record"", + ""name"": ""StringLogicalType"", + ""doc"": ""Test logical type applied to field of type string"", + ""fields"": [ + { + ""name"": ""someIdentifier"", + ""type"": { + ""type"": ""string"", + ""logicalType"": ""uuid"" + } +}, + { + ""name"": ""someJavaString"", + ""type"": ""string"", + ""doc"": ""Just to ensure no one removed String because this is the basis of this test"" + } + ] +}"; + + // https://issues.apache.org/jira/browse/AVRO-2883 + private const string _schema_avro_2883 = @" +{ + ""type"" : ""record"", + ""name"" : ""TestModel"", + ""namespace"" : ""my.avro.ns"", + ""fields"" : [ { + ""name"" : ""eventType"", + ""type"" : { + ""type"" : ""enum"", + ""name"" : ""EventType"", + ""symbols"" : [ ""CREATE"", ""UPDATE"", ""DELETE"" ] + } +} ] +}"; + + // https://issues.apache.org/jira/browse/AVRO-3046 + private const string _schema_avro_3046 = @" +{ + ""type"": ""record"", + ""name"": ""ExampleRecord"", + ""namespace"": ""com.example"", + ""fields"": [ + { + ""name"": ""Id"", + ""type"": ""string"", + ""logicalType"": ""UUID"" + }, + { + ""name"": ""InnerRecord"", + ""type"": { + ""type"": ""record"", + ""name"": ""InnerRecord"", + ""fields"": [ + { + ""name"": ""Id"", + ""type"": ""string"", + ""logicalType"": ""UUID"" + } + ] + } + } + ] +}"; + + private Assembly TestSchema( + string schema, + IEnumerable typeNamesToCheck = null, + IEnumerable> namespaceMapping = null, + IEnumerable generatedFilesToCheck = null) + { + // Create temp folder + string outputDir = AvroGenHelper.CreateEmptyTemporaryFolder(out string uniqueId); + + try + { + // Save schema + string schemaFileName = Path.Combine(outputDir, $"{uniqueId}.avsc"); + System.IO.File.WriteAllText(schemaFileName, schema); + + // Generate from schema file + Assert.That(AvroGenTool.GenSchema(schemaFileName, outputDir, namespaceMapping ?? new Dictionary(), false), Is.EqualTo(0)); + + // Check if all generated files exist + if (generatedFilesToCheck != null) + { + foreach (string generatedFile in generatedFilesToCheck) + { + Assert.That(new FileInfo(Path.Combine(outputDir, generatedFile)), Does.Exist); + } + } + + // Compile into netstandard library and load assembly + Assembly assembly = AvroGenHelper.CompileCSharpFilesIntoLibrary( + new DirectoryInfo(outputDir) + .EnumerateFiles("*.cs", SearchOption.AllDirectories) + .Select(fi => fi.FullName), + uniqueId); + + if (typeNamesToCheck != null) + { + // Check if the compiled code has the same number of types defined as the check list + Assert.That(typeNamesToCheck.Count(), Is.EqualTo(assembly.DefinedTypes.Count())); + + // Check if types available in compiled assembly + foreach (string typeName in typeNamesToCheck) + { + Type type = assembly.GetType(typeName); + Assert.That(type, Is.Not.Null); + + // Instantiate + object obj = Activator.CreateInstance(type); + Assert.That(obj, Is.Not.Null); + } + } + + // Verify GeneratedCodeAttribute + foreach(System.Reflection.TypeInfo definedType in assembly.DefinedTypes) + { + var generatedAttributes = definedType.CustomAttributes.Where(x => x.AttributeType.FullName == "System.CodeDom.Compiler.GeneratedCodeAttribute"); + Assert.That(generatedAttributes, Is.Not.Null); + } + + return assembly; + } + finally + { + Directory.Delete(outputDir, true); + } + } + + [TestCase( + _logicalTypesWithDefaults, + new string[] + { + "org.apache.avro.codegentest.testdata.LogicalTypesWithDefaults" + }, + new string[] + { + "org/apache/avro/codegentest/testdata/LogicalTypesWithDefaults.cs" + })] + [TestCase( + _nestedLogicalTypesArray, + new string[] + { + "org.apache.avro.codegentest.testdata.NestedLogicalTypesArray", + "org.apache.avro.codegentest.testdata.RecordInArray" + }, + new string[] + { + "org/apache/avro/codegentest/testdata/NestedLogicalTypesArray.cs", + "org/apache/avro/codegentest/testdata/RecordInArray.cs" + })] + [TestCase( + _nestedLogicalTypesMap, + new string[] + { + "org.apache.avro.codegentest.testdata.NestedLogicalTypesMap", + "org.apache.avro.codegentest.testdata.RecordInMap" + }, + new string[] + { + "org/apache/avro/codegentest/testdata/NestedLogicalTypesMap.cs", + "org/apache/avro/codegentest/testdata/RecordInMap.cs" + })] + [TestCase( + _nestedLogicalTypesRecord, + new string[] + { + "org.apache.avro.codegentest.testdata.NestedLogicalTypesRecord", + "org.apache.avro.codegentest.testdata.NestedRecord" + }, + new string[] + { + "org/apache/avro/codegentest/testdata/NestedLogicalTypesRecord.cs", + "org/apache/avro/codegentest/testdata/NestedRecord.cs" + })] + [TestCase( + _nestedLogicalTypesUnion, + new string[] + { + "org.apache.avro.codegentest.testdata.NestedLogicalTypesUnion", + "org.apache.avro.codegentest.testdata.RecordInUnion" + }, + new string[] + { + "org/apache/avro/codegentest/testdata/NestedLogicalTypesUnion.cs", + "org/apache/avro/codegentest/testdata/RecordInUnion.cs" + })] + [TestCase( + _nestedSomeNamespaceRecord, + new string[] + { + "org.apache.avro.codegentest.some.NestedSomeNamespaceRecord", + "org.apache.avro.codegentest.other.NestedOtherNamespaceRecord" + }, + new string[] + { + "org/apache/avro/codegentest/some/NestedSomeNamespaceRecord.cs", + "org/apache/avro/codegentest/other/NestedOtherNamespaceRecord.cs" + })] + [TestCase( + _nestedLogicalTypesUnionFixedDecimal, + new string[] + { + "org.apache.avro.codegentest.testdata.NestedLogicalTypesUnionFixedDecimal" + }, + new string[] + { + "org/apache/avro/codegentest/testdata/NestedLogicalTypesUnionFixedDecimal.cs" + })] + [TestCase( + _nullableLogicalTypes, + new string[] + { + "org.apache.avro.codegentest.testdata.NullableLogicalTypes" + }, + new string[] + { + "org/apache/avro/codegentest/testdata/NullableLogicalTypes.cs" + })] + [TestCase( + _nullableLogicalTypesArray, + new string[] + { + "org.apache.avro.codegentest.testdata.NullableLogicalTypesArray" + }, + new string[] + { + "org/apache/avro/codegentest/testdata/NullableLogicalTypesArray.cs" + })] + [TestCase( + _schema_avro_2883, + new string[] + { + "my.avro.ns.TestModel", + "my.avro.ns.EventType", + }, + new string[] + { + "my/avro/ns/TestModel.cs", + "my/avro/ns/EventType.cs" + })] + public void GenerateSchema(string schema, IEnumerable typeNamesToCheck, IEnumerable generatedFilesToCheck) + { + AvroGenHelper.TestSchema(schema, typeNamesToCheck, generatedFilesToCheck: generatedFilesToCheck); + } + + [TestCase( + _nullableLogicalTypesArray, + "org.apache.avro.codegentest.testdata", "org.apache.csharp.codegentest.testdata", + new string[] + { + "org.apache.csharp.codegentest.testdata.NullableLogicalTypesArray" + }, + new string[] + { + "org/apache/csharp/codegentest/testdata/NullableLogicalTypesArray.cs" + })] + [TestCase( + _nestedLogicalTypesUnion, + "org.apache.avro.codegentest.testdata", "org.apache.csharp.codegentest.testdata", + new string[] + { + "org.apache.csharp.codegentest.testdata.NestedLogicalTypesUnion", + "org.apache.csharp.codegentest.testdata.RecordInUnion" + }, + new string[] + { + "org/apache/csharp/codegentest/testdata/NestedLogicalTypesUnion.cs", + "org/apache/csharp/codegentest/testdata/RecordInUnion.cs" + })] + [TestCase( + _schema_avro_2883, + "my.avro.ns", "my.csharp.ns", + new string[] + { + "my.csharp.ns.TestModel", + "my.csharp.ns.EventType", + }, + new string[] + { + "my/csharp/ns/TestModel.cs", + "my/csharp/ns/EventType.cs" + })] + [TestCase( + _schema_avro_3046, + "com.example", "Example", + new string[] + { + "Example.ExampleRecord", + "Example.InnerRecord", + }, + new string[] + { + "Example/ExampleRecord.cs", + "Example/InnerRecord.cs" + })] + [TestCase( + _nullableLogicalTypesArray, + "org.apache.avro.codegentest.testdata", "org.apache.@return.@int", // Reserved keywords in namespace + new string[] + { + "org.apache.return.int.NullableLogicalTypesArray" + }, + new string[] + { + "org/apache/return/int/NullableLogicalTypesArray.cs" + })] + [TestCase( + _nullableLogicalTypesArray, + "org.apache.avro.codegentest.testdata", "org.apache.value.partial", // Contextual keywords in namespace + new string[] + { + "org.apache.value.partial.NullableLogicalTypesArray" + }, + new string[] + { + "org/apache/value/partial/NullableLogicalTypesArray.cs" + })] + [TestCase(@" +{ + ""type"": ""fixed"", + ""namespace"": ""com.base"", + ""name"": ""MD5"", + ""size"": 16 +}", + "com.base", "SchemaTest", + new string[] + { + "SchemaTest.MD5" + }, + new string[] + { + "SchemaTest/MD5.cs" + })] + [TestCase(@" +{ + ""type"": ""fixed"", + ""namespace"": ""com.base"", + ""name"": ""MD5"", + ""size"": 16 +}", + "miss", "SchemaTest", + new string[] + { + "com.base.MD5" + }, + new string[] + { + "com/base/MD5.cs" + })] + public void GenerateSchemaWithNamespaceMapping( + string schema, + string namespaceMappingFrom, + string namespaceMappingTo, + IEnumerable typeNamesToCheck, + IEnumerable generatedFilesToCheck) + { + AvroGenHelper.TestSchema(schema, typeNamesToCheck, new Dictionary { { namespaceMappingFrom, namespaceMappingTo } }, generatedFilesToCheck); + } + + [TestCase(_logicalTypesWithCustomConversion, typeof(AvroTypeException))] + [TestCase(_customConversionWithLogicalTypes, typeof(SchemaParseException))] + public void NotSupportedSchema(string schema, Type expectedException) + { + // Create temp folder + string outputDir = AvroGenHelper.CreateEmptyTemporaryFolder(out string uniqueId); + + try + { + // Save schema + string schemaFileName = Path.Combine(outputDir, $"{uniqueId}.avsc"); + System.IO.File.WriteAllText(schemaFileName, schema); + + Assert.That(AvroGenTool.GenSchema(schemaFileName, outputDir, new Dictionary(), false), Is.EqualTo(1)); + } + finally + { + Directory.Delete(outputDir, true); + } + } + + [TestCase(@" +{ + ""type"" : ""record"", + ""name"" : ""ClassKeywords"", + ""namespace"" : ""com.base"", + ""fields"" : + [ + { ""name"" : ""int"", ""type"" : ""int"" }, + { ""name"" : ""base"", ""type"" : ""long"" }, + { ""name"" : ""event"", ""type"" : ""boolean"" }, + { ""name"" : ""foreach"", ""type"" : ""double"" }, + { ""name"" : ""bool"", ""type"" : ""float"" }, + { ""name"" : ""internal"", ""type"" : ""bytes"" }, + { ""name"" : ""while"", ""type"" : ""string"" }, + { ""name"" : ""return"", ""type"" : ""null"" }, + { ""name"" : ""enum"", ""type"" : { ""type"" : ""enum"", ""name"" : ""class"", ""symbols"" : [ ""Unknown"", ""A"", ""B"" ], ""default"" : ""Unknown"" } }, + { ""name"" : ""string"", ""type"" : { ""type"": ""fixed"", ""size"": 16, ""name"": ""static"" } } + ] +}", + new object[] { "com.base.ClassKeywords", typeof(int), typeof(long), typeof(bool), typeof(double), typeof(float), typeof(byte[]), typeof(string), typeof(object), "com.base.class", "com.base.static" })] + [TestCase(@" +{ + ""type"" : ""record"", + ""name"" : ""AvroNamespaceType"", + ""namespace"" : ""My.Avro"", + ""fields"" : + [ + { ""name"" : ""justenum"", ""type"" : { ""type"" : ""enum"", ""name"" : ""justenumEnum"", ""symbols"" : [ ""One"", ""Two"" ] } }, + ] +}", + new object[] { "My.Avro.AvroNamespaceType", "My.Avro.justenumEnum" })] + [TestCase(@" +{ + ""type"" : ""record"", + ""name"" : ""SchemaObject"", + ""namespace"" : ""schematest"", + ""fields"" : + [ + { ""name"" : ""myobject"", ""type"" : + [ + ""null"", + { ""type"" : ""array"", ""items"" : + [ + ""null"", + { ""type"" : ""enum"", ""name"" : ""MyEnum"", ""symbols"" : [ ""A"", ""B"" ] }, + { ""type"": ""fixed"", ""size"": 16, ""name"": ""MyFixed"" } + ] + } + ] + } + ] +}", + new object[] { "schematest.SchemaObject", typeof(IList) })] + [TestCase(@" +{ + ""type"" : ""record"", + ""name"" : ""LogicalTypes"", + ""namespace"" : ""schematest"", + ""fields"" : + [ + { ""name"" : ""nullibleguid"", ""type"" : [""null"", {""type"": ""string"", ""logicalType"": ""uuid"" } ]}, + { ""name"" : ""guid"", ""type"" : {""type"": ""string"", ""logicalType"": ""uuid"" } }, + { ""name"" : ""nullibletimestampmillis"", ""type"" : [""null"", {""type"": ""long"", ""logicalType"": ""timestamp-millis""}] }, + { ""name"" : ""timestampmillis"", ""type"" : {""type"": ""long"", ""logicalType"": ""timestamp-millis""} }, + { ""name"" : ""nullibiletimestampmicros"", ""type"" : [""null"", {""type"": ""long"", ""logicalType"": ""timestamp-micros""}] }, + { ""name"" : ""timestampmicros"", ""type"" : {""type"": ""long"", ""logicalType"": ""timestamp-micros""} }, + { ""name"" : ""nulliblelocaltimestampmillis"", ""type"" : [""null"", {""type"": ""long"", ""logicalType"": ""local-timestamp-millis""}] }, + { ""name"" : ""localtimestampmillis"", ""type"" : {""type"": ""long"", ""logicalType"": ""local-timestamp-millis""} }, + { ""name"" : ""nullibilelocaltimestampmicros"", ""type"" : [""null"", {""type"": ""long"", ""logicalType"": ""local-timestamp-micros""}] }, + { ""name"" : ""locallocaltimestampmicros"", ""type"" : {""type"": ""long"", ""logicalType"": ""local-timestamp-micros""} }, + { ""name"" : ""nullibiletimemicros"", ""type"" : [""null"", {""type"": ""long"", ""logicalType"": ""time-micros""}] }, + { ""name"" : ""timemicros"", ""type"" : {""type"": ""long"", ""logicalType"": ""time-micros""} }, + { ""name"" : ""nullibiletimemillis"", ""type"" : [""null"", {""type"": ""int"", ""logicalType"": ""time-millis""}] }, + { ""name"" : ""timemillis"", ""type"" : {""type"": ""int"", ""logicalType"": ""time-millis""} }, + { ""name"" : ""nullibledecimal"", ""type"" : [""null"", {""type"": ""bytes"", ""logicalType"": ""decimal"", ""precision"": 4, ""scale"": 2}] }, + { ""name"" : ""decimal"", ""type"" : {""type"": ""bytes"", ""logicalType"": ""decimal"", ""precision"": 4, ""scale"": 2} }, + { ""name"" : ""nullibledecimalfixed"", ""type"" : [""null"", {""type"": {""type"" : ""fixed"", ""size"": 16, ""name"": ""ndf""}, ""logicalType"": ""decimal"", ""precision"": 4, ""scale"": 2}] }, + { ""name"" : ""decimalfixed"", ""type"" : {""type"": {""type"" : ""fixed"", ""size"": 16, ""name"": ""df""}, ""logicalType"": ""decimal"", ""precision"": 4, ""scale"": 2} } + ] +}", + new object[] { "schematest.LogicalTypes", typeof(Guid?), typeof(Guid), typeof(DateTime?), typeof(DateTime), typeof(DateTime?), typeof(DateTime), typeof(DateTime?), typeof(DateTime), typeof(DateTime?), typeof(DateTime), typeof(TimeSpan?), typeof(TimeSpan), typeof(TimeSpan?), typeof(TimeSpan), typeof(AvroDecimal?), typeof(AvroDecimal), typeof(AvroDecimal?), typeof(AvroDecimal) })] + [TestCase(@" +{ + ""namespace"": ""enum.base"", + ""type"": ""record"", + ""name"": ""EnumInDifferentNamespace"", + ""doc"": ""Test enum with a default value in a different namespace"", + ""fields"": [ + { + ""name"": ""anEnum"", + ""type"": { + ""namespace"": ""enum.base.other"", + ""type"": ""enum"", + ""name"": ""AnEnum"", + ""symbols"": [ + ""A"", + ""B"" + ], + ""default"": ""A"" + } + } + ] +}", + new object[] { "enum.base.EnumInDifferentNamespace", "enum.base.other.AnEnum" })] + public void GenerateSchemaCheckFields(string schema, object[] result) + { + Assembly assembly = AvroGenHelper.TestSchema(schema); + + // Instantiate object + Type type = assembly.GetType((string)result[0]); + Assert.That(type, Is.Not.Null); + + ISpecificRecord record = Activator.CreateInstance(type) as ISpecificRecord; + Assert.IsNotNull(record); + + // test type of each fields + for (int i = 1; i < result.Length; ++i) + { + object field = record.Get(i - 1); + Type stype; + if (result[i].GetType() == typeof(string)) + { + Type t = assembly.GetType((string)result[i]); + Assert.That(record, Is.Not.Null); + + object obj = Activator.CreateInstance(t); + Assert.That(obj, Is.Not.Null); + stype = obj.GetType(); + } + else + { + stype = (Type)result[i]; + } + if (!stype.IsValueType) + { + Assert.That(field, Is.Null); // can't test reference type, it will be null + } + else if (stype.IsValueType && field == null) + { + Assert.That(field, Is.Null); // nullable value type, so we can't get the type using GetType + } + else + { + Assert.That(field, Is.Not.Null); + Assert.That(field.GetType(), Is.EqualTo(stype)); + } + } + } + + [TestCase( + _nullableLogicalTypesArray, + new string[] + { + "org.apache.avro.codegentest.testdata.NullableLogicalTypesArray" + }, + new string[] + { + "NullableLogicalTypesArray.cs" + })] + [TestCase( + _nestedSomeNamespaceRecord, + new string[] + { + "org.apache.avro.codegentest.some.NestedSomeNamespaceRecord", + "org.apache.avro.codegentest.other.NestedOtherNamespaceRecord" + }, + new string[] + { + "NestedSomeNamespaceRecord.cs", + "NestedOtherNamespaceRecord.cs" + })] + [TestCase(_schema_avro_2883, + new string[] + { + "my.avro.ns.TestModel", + "my.avro.ns.EventType", + }, + new string[] + { + "TestModel.cs", + "EventType.cs" + })] + public void GenerateSchemaWithSkipDirectoriesOption(string schema, IEnumerable typeNamesToCheck, IEnumerable generatedFilesToCheck) + { + AvroGenHelper.TestSchema(schema, typeNamesToCheck, generatedFilesToCheck: generatedFilesToCheck, skipDirectories: true); + } + } +} diff --git a/lang/csharp/src/apache/test/AvroGen/AvroGenToolTests.cs b/lang/csharp/src/apache/test/AvroGen/AvroGenToolTests.cs new file mode 100644 index 00000000000..698ff468c2d --- /dev/null +++ b/lang/csharp/src/apache/test/AvroGen/AvroGenToolTests.cs @@ -0,0 +1,103 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +using System; +using System.Linq; +using System.Reflection; +using NUnit.Framework; + +namespace Avro.Test.AvroGen +{ + [TestFixture] + + class AvroGenToolTests + { + [Test] + public void CommandLineNoArgs() + { + AvroGenToolResult result = AvroGenHelper.RunAvroGenTool(Array.Empty()); + + Assert.That(result.ExitCode, Is.EqualTo(1)); + Assert.That(result.StdOut, Is.Not.Empty); + Assert.That(result.StdErr, Is.Empty); + } + + [TestCase("-h")] + [TestCase("--help")] + [TestCase("--help", "-h")] + [TestCase("--help", "-s", "whatever.avsc", ".")] + [TestCase("-p", "whatever.avpr", ".", "-h")] + public void CommandLineHelp(params string[] args) + { + AvroGenToolResult result = AvroGenHelper.RunAvroGenTool(args); + + Assert.That(result.ExitCode, Is.EqualTo(0)); + Assert.That(result.StdOut, Is.Not.Empty); + Assert.That(result.StdErr, Is.Empty); + } + + [TestCase("--version")] + [TestCase("-V")] + public void CommandLineVersion(params string[] args) + { + AvroGenToolResult result = AvroGenHelper.RunAvroGenTool(args); + + Assert.That(result.ExitCode, Is.EqualTo(0)); + Assert.That(result.StdOut, Is.Not.Empty); + Assert.That(result.StdErr, Is.Empty); + + // Check if returned version is SemVer 2.0 compliant + Assert.That(result.StdOut[0], Does.Match(Utils.VersionTests.SemVerRegex)); + + // Returned version must be the same as the avrogen tool assembly's version + Assert.That(result.StdOut[0], Is.EqualTo(typeof(AvroGenTool).Assembly.GetCustomAttribute().InformationalVersion)); + + // Returned version must be the same as the avro library assembly's version + Assert.That(result.StdOut[0], Is.EqualTo(typeof(Schema).Assembly.GetCustomAttribute().InformationalVersion)); + } + + [TestCase("-p")] + [TestCase("-s")] + [TestCase("-p", "whatever.avpr")] + [TestCase("-p", "whatever.avpr")] + [TestCase("-s", "whatever.avsc")] + [TestCase("whatever.avsc")] + [TestCase("whatever.avsc", ".")] + [TestCase(".")] + [TestCase("-s", "whatever.avsc", "--namespace")] + [TestCase("-s", "whatever.avsc", "--namespace", "org.apache")] + [TestCase("-s", "whatever.avsc", "--namespace", "org.apache:")] + [TestCase("-s", "whatever.avsc", ".", "whatever")] + public void CommandLineInvalidArgs(params string[] args) + { + AvroGenToolResult result = AvroGenHelper.RunAvroGenTool(args); + + Assert.That(result.ExitCode, Is.EqualTo(1)); + Assert.That(result.StdOut, Is.Not.Empty); + Assert.That(result.StdErr, Is.Not.Empty); + } + + [Theory] + public void CommandLineHelpContainsSkipDirectoriesParameter() + { + AvroGenToolResult result = AvroGenHelper.RunAvroGenTool("-h"); + + Assert.That(result.ExitCode, Is.EqualTo(0)); + Assert.IsTrue(result.StdOut.Any(s => s.Contains("--skip-directories"))); + } + } +} diff --git a/lang/csharp/src/apache/test/CodGen/CodeGenTest.cs b/lang/csharp/src/apache/test/CodGen/CodeGenTest.cs index c2889897d64..33c7f0cf6ee 100644 --- a/lang/csharp/src/apache/test/CodGen/CodeGenTest.cs +++ b/lang/csharp/src/apache/test/CodGen/CodeGenTest.cs @@ -17,174 +17,125 @@ */ using System; using System.Collections.Generic; -using System.IO; -using System.CodeDom.Compiler; -using Microsoft.CSharp; +using System.Linq; +using System.Text.RegularExpressions; +using Microsoft.CodeAnalysis.CSharp; using NUnit.Framework; -using Avro.Specific; -namespace Avro.Test +namespace Avro.Test.CodeGen { [TestFixture] - - class CodeGenTest + class CodeGenTests { -#if !NETCOREAPP // System.CodeDom compilation not supported in .NET Core: https://github.com/dotnet/corefx/issues/12180 - [TestCase(@"{ -""type"" : ""record"", -""name"" : ""ClassKeywords"", -""namespace"" : ""com.base"", -""fields"" : - [ - { ""name"" : ""int"", ""type"" : ""int"" }, - { ""name"" : ""base"", ""type"" : ""long"" }, - { ""name"" : ""event"", ""type"" : ""boolean"" }, - { ""name"" : ""foreach"", ""type"" : ""double"" }, - { ""name"" : ""bool"", ""type"" : ""float"" }, - { ""name"" : ""internal"", ""type"" : ""bytes"" }, - { ""name"" : ""while"", ""type"" : ""string"" }, - { ""name"" : ""return"", ""type"" : ""null"" }, - { ""name"" : ""enum"", ""type"" : { ""type"" : ""enum"", ""name"" : ""class"", ""symbols"" : [ ""Unknown"", ""A"", ""B"" ], ""default"" : ""Unknown"" } }, - { ""name"" : ""string"", ""type"" : { ""type"": ""fixed"", ""size"": 16, ""name"": ""static"" } } - ] -} -", new object[] {"com.base.ClassKeywords", typeof(int), typeof(long), typeof(bool), typeof(double), typeof(float), typeof(byte[]), typeof(string),typeof(object),"com.base.class", "com.base.static"}, TestName = "TestCodeGen0")] - [TestCase(@"{ -""type"" : ""record"", -""name"" : ""SchemaObject"", -""namespace"" : ""schematest"", -""fields"" : - [ - { ""name"" : ""myobject"", ""type"" : - [ - ""null"", - {""type"" : ""array"", ""items"" : [ ""null"", - { ""type"" : ""enum"", ""name"" : ""MyEnum"", ""symbols"" : [ ""A"", ""B"" ] }, - { ""type"": ""fixed"", ""size"": 16, ""name"": ""MyFixed"" } - ] - } - ] - } - ] -} -", new object[] { "schematest.SchemaObject", typeof(IList) }, TestName = "TestCodeGen1")] - [TestCase(@"{ - ""type"" : ""record"", - ""name"" : ""LogicalTypes"", - ""namespace"" : ""schematest"", - ""fields"" : - [ - { ""name"" : ""nullibleguid"", ""type"" : [""null"", {""type"": ""string"", ""logicalType"": ""uuid"" } ]}, - { ""name"" : ""guid"", ""type"" : {""type"": ""string"", ""logicalType"": ""uuid"" } }, - { ""name"" : ""nullibletimestampmillis"", ""type"" : [""null"", {""type"": ""long"", ""logicalType"": ""timestamp-millis""}] }, - { ""name"" : ""timestampmillis"", ""type"" : {""type"": ""long"", ""logicalType"": ""timestamp-millis""} }, - { ""name"" : ""nullibiletimestampmicros"", ""type"" : [""null"", {""type"": ""long"", ""logicalType"": ""timestamp-micros""}] }, - { ""name"" : ""timestampmicros"", ""type"" : {""type"": ""long"", ""logicalType"": ""timestamp-micros""} }, - { ""name"" : ""nullibiletimemicros"", ""type"" : [""null"", {""type"": ""long"", ""logicalType"": ""time-micros""}] }, - { ""name"" : ""timemicros"", ""type"" : {""type"": ""long"", ""logicalType"": ""time-micros""} }, - { ""name"" : ""nullibiletimemillis"", ""type"" : [""null"", {""type"": ""int"", ""logicalType"": ""time-millis""}] }, - { ""name"" : ""timemillis"", ""type"" : {""type"": ""int"", ""logicalType"": ""time-millis""} }, - { ""name"" : ""nullibledecimal"", ""type"" : [""null"", {""type"": ""bytes"", ""logicalType"": ""decimal"", ""precision"": 4, ""scale"": 2}] }, - { ""name"" : ""decimal"", ""type"" : {""type"": ""bytes"", ""logicalType"": ""decimal"", ""precision"": 4, ""scale"": 2} } - ] -} -", new object[] { "schematest.LogicalTypes", typeof(Guid?), typeof(Guid), typeof(DateTime?), typeof(DateTime), typeof(DateTime?), typeof(DateTime), typeof(TimeSpan?), typeof(TimeSpan), typeof(TimeSpan?), typeof(TimeSpan), typeof(AvroDecimal?), typeof(AvroDecimal) }, TestName = "TestCodeGen2 - Logical Types")] - public static void TestCodeGen(string str, object[] result) - { - Schema schema = Schema.Parse(str); - CompilerResults compres = GenerateSchema(schema); - - // instantiate object - ISpecificRecord rec = compres.CompiledAssembly.CreateInstance((string)result[0]) as ISpecificRecord; - Assert.IsNotNull(rec); - - // test type of each fields - for (int i = 1; i < result.Length; ++i) - { - object field = rec.Get(i - 1); - Type stype; - if (result[i].GetType() == typeof(string)) - { - object obj = compres.CompiledAssembly.CreateInstance((string)result[i]); - Assert.IsNotNull(obj); - stype = obj.GetType(); - } - else - stype = (Type)result[i]; - if (!stype.IsValueType) - Assert.IsNull(field); // can't test reference type, it will be null - else if (stype.IsValueType && field == null) - Assert.IsNull(field); // nullable value type, so we can't get the type using GetType - else - Assert.AreEqual(stype, field.GetType()); - } + [Test] + public void TestGetNullableTypeException() + { + Assert.Throws(() => Avro.CodeGen.GetNullableType(null)); } - [TestCase(@"{ -""type"": ""fixed"", -""namespace"": ""com.base"", -""name"": ""MD5"", -""size"": 16 -}", null, null, "com.base")] - [TestCase(@"{ -""type"": ""fixed"", -""namespace"": ""com.base"", -""name"": ""MD5"", -""size"": 16 -}", "com.base", "SchemaTest", "SchemaTest")] - [TestCase(@"{ -""type"": ""fixed"", -""namespace"": ""com.base"", -""name"": ""MD5"", -""size"": 16 -}", "miss", "SchemaTest", "com.base")] - public void TestCodeGenNamespaceMapping(string str, string avroNamespace, string csharpNamespace, - string expectedNamespace) + [Test] + public void TestReservedKeywords() { - Schema schema = Schema.Parse(str); + // https://github.com/dotnet/roslyn/blob/main/src/Compilers/CSharp/Portable/Syntax/SyntaxKindFacts.cs - var codegen = new CodeGen(); - codegen.AddSchema(schema); - - if (avroNamespace != null && csharpNamespace != null) + // Check if all items in CodeGenUtil.Instance.ReservedKeywords are keywords + foreach (string keyword in CodeGenUtil.Instance.ReservedKeywords) { - codegen.NamespaceMapping[avroNamespace] = csharpNamespace; + Assert.That(SyntaxFacts.GetKeywordKind(keyword) != SyntaxKind.None, Is.True); } - var results = GenerateAssembly(codegen); - foreach(var type in results.CompiledAssembly.GetTypes()) + // Check if all Roslyn defined keywords are in CodeGenUtil.Instance.ReservedKeywords + foreach (SyntaxKind keywordKind in SyntaxFacts.GetReservedKeywordKinds()) { - Assert.AreEqual(expectedNamespace, type.Namespace); + Assert.That(CodeGenUtil.Instance.ReservedKeywords, Does.Contain(SyntaxFacts.GetText(keywordKind))); } + + // If this test fails, CodeGenUtil.ReservedKeywords list must be updated. + // This might happen if newer version of C# language defines new reserved keywords. } - private static CompilerResults GenerateSchema(Schema schema) + [TestCase("a", "a")] + [TestCase("a.b", "a.b")] + [TestCase("a.b.c", "a.b.c")] + [TestCase("int", "@int")] + [TestCase("a.long.b", "a.@long.b")] + [TestCase("int.b.c", "@int.b.c")] + [TestCase("a.b.int", "a.b.@int")] + [TestCase("int.long.while", "@int.@long.@while")] // Reserved keywords + [TestCase("a.value.partial", "a.value.partial")] // Contextual keywords + [TestCase("a.value.b.int.c.while.longpartial", "a.value.b.@int.c.@while.longpartial")] // Reserved and contextual keywords + public void TestMangleUnMangle(string input, string mangled) { - var codegen = new CodeGen(); - codegen.AddSchema(schema); - return GenerateAssembly(codegen); + // Mangle + Assert.That(CodeGenUtil.Instance.Mangle(input), Is.EqualTo(mangled)); + // Unmangle + Assert.That(CodeGenUtil.Instance.UnMangle(mangled), Is.EqualTo(input)); } - private static CompilerResults GenerateAssembly(CodeGen schema) + [TestFixture] + public class CodeGenTestClass : Avro.CodeGen { - var compileUnit = schema.GenerateCode(); + [Test] + public void TestGenerateNamesException() + { + Protocol protocol = null; + Assert.Throws(() => this.GenerateNames(protocol)); + } + + + [Test] + public void GetTypesShouldReturnTypes() + { + AddSchema(@" +{ + ""name"": ""PlanetEnum"", + ""namespace"": ""Space.Models"", + ""type"": ""enum"", + ""symbols"": [ + ""Earth"", + ""Mars"", + ""Jupiter"", + ""Saturn"", + ""Uranus"", + ""Neptune"" + ] +} +"); + GenerateCode(); + var types = GetTypes(); + Assert.That(types.Count, Is.EqualTo(1)); + bool hasPlanetEnumCode = types.TryGetValue("PlanetEnum", out string planetEnumCode); + Assert.That(hasPlanetEnumCode); + Assert.That(Regex.Matches(planetEnumCode, "public enum PlanetEnum").Count, Is.EqualTo(1)); + } - var comparam = new CompilerParameters(new string[] { "netstandard.dll" }); - comparam.ReferencedAssemblies.Add("System.dll"); - comparam.ReferencedAssemblies.Add(Path.Combine(TestContext.CurrentContext.TestDirectory, "Avro.dll")); - comparam.GenerateInMemory = true; - var ccp = new CSharpCodeProvider(); - var units = new[] { compileUnit }; - var compres = ccp.CompileAssemblyFromDom(comparam, units); - if (compres.Errors.Count > 0) + [Test] + public void EnumWithKeywordSymbolsShouldHavePrefixedSymbols() { - for (int i = 0; i < compres.Errors.Count; i++) - Console.WriteLine(compres.Errors[i]); + AddSchema(@"{ + ""type"": ""enum"", + ""symbols"": [ + ""string"", + ""integer"", + ""float"", + ""boolean"", + ""list"", + ""dict"", + ""regex"" + ], + ""name"": ""type"", + ""namespace"": ""com.example"" +}"); + GenerateCode(); + var types = GetTypes(); + Assert.That(types.Count, Is.EqualTo(1)); + bool hasTypeCode = types.TryGetValue("type", out string typeCode); + Assert.That(hasTypeCode); + Assert.That(Regex.Matches(typeCode, "public enum type").Count, Is.EqualTo(1)); + Assert.That(Regex.Matches(typeCode, "@string,").Count, Is.EqualTo(1)); + Assert.That(Regex.Matches(typeCode, "@float,").Count, Is.EqualTo(1)); } - Assert.AreEqual(0, compres.Errors.Count); - return compres; } -#endif } } diff --git a/lang/csharp/src/apache/test/File/FileTests.cs b/lang/csharp/src/apache/test/File/FileTests.cs index 9229bf46974..0ef81c9766f 100644 --- a/lang/csharp/src/apache/test/File/FileTests.cs +++ b/lang/csharp/src/apache/test/File/FileTests.cs @@ -18,6 +18,7 @@ using System; using System.Collections; using System.Collections.Generic; +using System.Diagnostics; using System.IO; using System.IO.Compression; using System.Linq; @@ -34,32 +35,80 @@ public class FileTests const string specificSchema = "{\"type\":\"record\",\"name\":\"Foo\",\"namespace\":\"Avro.Test.File\",\"fields\":" + "[{\"name\":\"name\",\"type\":[\"null\",\"string\"]},{\"name\":\"age\",\"type\":\"int\"}]}"; + private static IEnumerable TestSpecificDataSource() + { + foreach (Codec.Type codecType in Enum.GetValues(typeof(Codec.Type))) + { + yield return new TestCaseData(specificSchema, new object[] + { + new object[] { "John", 23 } + }, codecType).SetName("{m}(Case0,{2})"); + + yield return new TestCaseData(specificSchema, new object[] + { + new object[] { "John", 23 }, + new object[] { "Jane", 99 }, + new object[] { "Jeff", 88 } + }, codecType).SetName("{m}(Case1,{2})"); + + yield return new TestCaseData(specificSchema, new object[] + { + new object[] { "John", 23 }, + new object[] { "Jane", 99 }, + new object[] { "Jeff", 88 }, + new object[] { "James", 13 }, + new object[] { "June", 109 }, + new object[] { "Lloyd", 18 }, + new object[] {"Jenny", 3}, + new object[] { "Bob", 9 }, + new object[] { null, 48 } + }, codecType).SetName("{m}(Case2,{2})"); + + yield return new TestCaseData(specificSchema, new object[] + { + new object[] { "John", 23}, + new object[] { "Jane", 99 }, + new object[] { "Jeff", 88 }, + new object[] { "James", 13 }, + new object[] { "June", 109 }, + new object[] { "Lloyd", 18 }, + new object[] { "Jamie", 53 }, + new object[] { "Fanessa", 101 }, + new object[] { "Kan", 18 }, + new object[] { "Janey", 33 }, + new object[] { "Deva", 102 }, + new object[] { "Gavin", 28 }, + new object[] { "Lochy", 113 }, + new object[] { "Nickie", 10 }, + new object[] { "Liddia", 38 }, + new object[] { "Fred", 3 }, + new object[] { "April", 17 }, + new object[] { "Novac", 48 }, + new object[] { "Idan", 33 }, + new object[] { "Jolyon", 76 }, + new object[] { "Ant", 68 }, + new object[] { "Ernie", 43 }, + new object[] { "Joel", 99 }, + new object[] { "Dan", 78 }, + new object[] { "Dave", 103 }, + new object[] { "Hillary", 79 }, + new object[] { "Grant", 88 }, + new object[] { "JJ", 14 }, + new object[] { "Bill", 90 }, + new object[] { "Larry", 4 }, + new object[] { "Jenny", 3 }, + new object[] { "Bob", 9 }, + new object[] { null, 48 } + }, codecType).SetName("{m}(Case3,{2})"); + } + } + /// /// Reading & writing of specific (custom) record objects /// - /// /// /// - [TestCase(specificSchema, new object[] { new object[] { "John", 23 } }, Codec.Type.Deflate, TestName = "TestSpecificData0")] - [TestCase(specificSchema, new object[] { new object[] { "Jane", 23 } }, Codec.Type.Deflate, TestName = "TestSpecificData1")] - [TestCase(specificSchema, new object[] { new object[] { "John", 23 }, new object[] { "Jane", 99 }, new object[] { "Jeff", 88 } }, Codec.Type.Deflate, TestName = "TestSpecificData2")] - [TestCase(specificSchema, new object[] { new object[] {"John", 23}, new object[] { "Jane", 99 }, new object[] { "Jeff", 88 }, - new object[] {"James", 13}, new object[] { "June", 109 }, new object[] { "Lloyd", 18 }, - new object[] {"Jenny", 3}, new object[] { "Bob", 9 }, new object[] { null, 48 }}, Codec.Type.Deflate, TestName = "TestSpecificData3")] - [TestCase(specificSchema, new object[] { new object[] { "John", 23 } }, Codec.Type.Null, TestName = "TestSpecificData4")] - [TestCase(specificSchema, new object[] { new object[] { "Jane", 23 } }, Codec.Type.Null, TestName = "TestSpecificData5")] - [TestCase(specificSchema, new object[] { new object[] { "John", 23 }, new object[] { "Jane", 99 }, new object[] { "Jeff", 88 } }, Codec.Type.Null, TestName = "TestSpecificData6")] - [TestCase(specificSchema, new object[] { new object[] {"John", 23}, new object[] { "Jane", 99 }, new object[] { "Jeff", 88 }, - new object[] {"James", 13}, new object[] { "June", 109 }, new object[] { "Lloyd", 18 }, - new object[] {"Jamie", 53}, new object[] { "Fanessa", 101 }, new object[] { "Kan", 18 }, - new object[] {"Janey", 33}, new object[] { "Deva", 102 }, new object[] { "Gavin", 28 }, - new object[] {"Lochy", 113}, new object[] { "Nickie", 10 }, new object[] { "Liddia", 38 }, - new object[] {"Fred", 3}, new object[] { "April", 17 }, new object[] { "Novac", 48 }, - new object[] {"Idan", 33}, new object[] { "Jolyon", 76 }, new object[] { "Ant", 68 }, - new object[] {"Ernie", 43}, new object[] { "Joel", 99 }, new object[] { "Dan", 78 }, - new object[] {"Dave", 103}, new object[] { "Hillary", 79 }, new object[] { "Grant", 88 }, - new object[] {"JJ", 14}, new object[] { "Bill", 90 }, new object[] { "Larry", 4 }, - new object[] {"Jenny", 3}, new object[] { "Bob", 9 }, new object[] { null, 48 }}, Codec.Type.Null, TestName = "TestSpecificData7")] + [TestCaseSource(nameof(TestSpecificDataSource))] public void TestSpecificData(string schemaStr, object[] recs, Codec.Type codecType) { // create and write out @@ -95,6 +144,41 @@ public void TestSpecificData(string schemaStr, object[] recs, Codec.Type codecTy } } + private static IEnumerable TestAppendSpecificDataSource() + { + foreach (Codec.Type codecType in Enum.GetValues(typeof(Codec.Type))) + { + yield return new TestCaseData(specificSchema, + new object[] + { + new object[] { "John", 23 } + }, + new object[] + { + new object[] { "Jane", 21 } + }, codecType).SetName("{m}(Case0,{3})"); + + yield return new TestCaseData(specificSchema, + new object[] + { + new object[] { "John", 23 }, + new object[] { "Jane", 99 }, + new object[] { "Jeff", 88 }, + new object[] { "James", 13 }, + new object[] { "June", 109 }, + new object[] { "Lloyd", 18 }, + new object[] { "Jenny", 3 }, + new object[] { "Bob", 9 }, + new object[] { null, 48 } + }, + new object[] + { + new object[] { "Hillary", 79 }, + new object[] { "Grant", 88 } + }, codecType).SetName("{m}(Case1,{3})"); + } + } + /// /// Test appending of specific (custom) record objects /// @@ -102,18 +186,7 @@ public void TestSpecificData(string schemaStr, object[] recs, Codec.Type codecTy /// initial records /// append records /// initial compression codec type - [TestCase(specificSchema, new object[] { new object[] { "John", 23 } }, new object[] { new object[] { "Jane", 21 } }, Codec.Type.Deflate, TestName = "TestAppendSpecificData0")] - [TestCase(specificSchema, new object[] { new object[] { "John", 23 } }, new object[] { new object[] { "Jane", 21 } }, Codec.Type.Null, TestName = "TestAppendSpecificData1")] - [TestCase(specificSchema, new object[] { new object[] {"John", 23}, new object[] { "Jane", 99 }, new object[] { "Jeff", 88 }, - new object[] {"James", 13}, new object[] { "June", 109 }, new object[] { "Lloyd", 18 }, - new object[] {"Jenny", 3}, new object[] { "Bob", 9 }, new object[] { null, 48 }}, - new object[] { new object[] { "Hillary", 79 }, - new object[] { "Grant", 88 } }, Codec.Type.Deflate, TestName = "TestAppendSpecificData2")] - [TestCase(specificSchema, new object[] { new object[] {"John", 23}, new object[] { "Jane", 99 }, new object[] { "Jeff", 88 }, - new object[] {"James", 13}, new object[] { "June", 109 }, new object[] { "Lloyd", 18 }, - new object[] {"Jenny", 3}, new object[] { "Bob", 9 }, new object[] { null, 48 }}, - new object[] { new object[] { "Hillary", 79 }, - new object[] { "Grant", 88 } }, Codec.Type.Null, TestName = "TestAppendSpecificData3")] + [TestCaseSource(nameof(TestAppendSpecificDataSource))] public void TestAppendSpecificData(string schemaStr, object[] recs, object[] appendRecs, Codec.Type codecType) { IList records = MakeRecords(recs); @@ -161,84 +234,50 @@ public void TestAppendSpecificData(string schemaStr, object[] recs, object[] app } } + private static IEnumerable TestGenericDataSource() + { + foreach (Codec.Type codecType in Enum.GetValues(typeof(Codec.Type))) + { + yield return new TestCaseData( + "{\"type\":\"record\", \"name\":\"n\", \"fields\":[{\"name\":\"f1\", \"type\":\"null\"}]}", new object[] { "f1", null }, codecType) + .SetName("{m}(null,{2})"); + yield return new TestCaseData( + "{\"type\":\"record\", \"name\":\"n\", \"fields\":[{\"name\":\"f1\", \"type\":\"boolean\"}]}", new object[] { "f1", true }, codecType) + .SetName("{m}(true,{2})"); + yield return new TestCaseData( + "{\"type\":\"record\", \"name\":\"n\", \"fields\":[{\"name\":\"f1\", \"type\":\"boolean\"}]}", new object[] { "f1", false }, codecType) + .SetName("{m}(false,{2})"); ; + yield return new TestCaseData( + "{\"type\":\"record\", \"name\":\"n\", \"fields\":[{\"name\":\"f1\", \"type\":\"int\"}]}", new object[] { "f1", 101 }, codecType) + .SetName("{m}(int,{2})"); ; + yield return new TestCaseData( + "{\"type\":\"record\", \"name\":\"n\", \"fields\":[{\"name\":\"f1\", \"type\":\"long\"}]}", new object[] { "f1", 101L }, codecType) + .SetName("{m}(long,{2})"); ; + yield return new TestCaseData( + "{\"type\":\"record\", \"name\":\"n\", \"fields\":[{\"name\":\"f1\", \"type\":\"float\"}]}", new object[] { "f1", 101.78f }, codecType) + .SetName("{m}(float,{2})"); ; + yield return new TestCaseData( + "{\"type\":\"record\", \"name\":\"n\", \"fields\":[{\"name\":\"f1\", \"type\":\"double\"}]}", new object[] { "f1", 101.78 }, codecType) + .SetName("{m}(double,{2})"); ; + yield return new TestCaseData( + "{\"type\":\"record\", \"name\":\"n\", \"fields\":[{\"name\":\"f1\", \"type\":\"string\"}]}", new object[] { "f1", "A" }, codecType) + .SetName("{m}(string,{2})"); ; + yield return new TestCaseData( + "{\"type\":\"record\", \"name\":\"n\", \"fields\":[{\"name\":\"f1\", \"type\":\"bytes\"}]}", new object[] { "f1", new byte[] { 0, 1 } }, codecType) + .SetName("{m}(bytes,{2})"); ; + } + } + /// /// Reading & writing of generic record objects /// /// /// /// - [TestCase("{\"type\":\"record\", \"name\":\"n\", \"fields\":[{\"name\":\"f1\", \"type\":\"null\"}]}", - new object[] { "f1", null }, Codec.Type.Deflate)] - [TestCase("{\"type\":\"record\", \"name\":\"n\", \"fields\":[{\"name\":\"f1\", \"type\":\"boolean\"}]}", - new object[] { "f1", true }, Codec.Type.Deflate)] - [TestCase("{\"type\":\"record\", \"name\":\"n\", \"fields\":[{\"name\":\"f1\", \"type\":\"boolean\"}]}", - new object[] { "f1", false }, Codec.Type.Deflate)] - [TestCase("{\"type\":\"record\", \"name\":\"n\", \"fields\":[{\"name\":\"f1\", \"type\":\"int\"}]}", - new object[] { "f1", 101 }, Codec.Type.Deflate)] - [TestCase("{\"type\":\"record\", \"name\":\"n\", \"fields\":[{\"name\":\"f1\", \"type\":\"long\"}]}", - new object[] { "f1", 101L }, Codec.Type.Deflate)] - [TestCase("{\"type\":\"record\", \"name\":\"n\", \"fields\":[{\"name\":\"f1\", \"type\":\"float\"}]}", - new object[] { "f1", 101.78f }, Codec.Type.Deflate)] - [TestCase("{\"type\":\"record\", \"name\":\"n\", \"fields\":[{\"name\":\"f1\", \"type\":\"double\"}]}", - new object[] { "f1", 101.78 }, Codec.Type.Deflate)] - [TestCase("{\"type\":\"record\", \"name\":\"n\", \"fields\":[{\"name\":\"f1\", \"type\":\"string\"}]}", - new object[] { "f1", "A" }, Codec.Type.Deflate)] - [TestCase("{\"type\":\"record\", \"name\":\"n\", \"fields\":[{\"name\":\"f1\", \"type\":\"bytes\"}]}", - new object[] { "f1", new byte[] { 0, 1 } }, Codec.Type.Deflate)] - [TestCase("{\"type\":\"record\", \"name\":\"n\", \"fields\":" + - "[{\"name\":\"f1\", \"type\":{\"type\": \"enum\", \"name\": \"e\", \"symbols\":[\"s1\", \"s2\"]}}]}", - new object[] { "f1", "s2" }, Codec.Type.Deflate)] - [TestCase("{\"type\":\"record\", \"name\":\"n\", \"fields\":" + - "[{\"name\":\"f1\", \"type\":{\"type\": \"array\", \"items\": \"int\"}}]}", - new object[] { "f1", new object[] { 0, 1, 101 } }, Codec.Type.Deflate)] - [TestCase("{\"type\":\"record\", \"name\":\"n\", \"fields\":" + - "[{\"name\":\"f1\", \"type\":{\"type\": \"array\", \"items\": \"int\"}}]}", - new object[] { "f1", new int[] { 0, 1, 101 } }, Codec.Type.Deflate)] - [TestCase("{\"type\":\"record\", \"name\":\"n\", \"fields\":" + - "[{\"name\":\"f1\", \"type\":[\"int\", \"long\"]}]}", - new object[] { "f1", 100 }, Codec.Type.Deflate)] - [TestCase("{\"type\":\"record\", \"name\":\"n\", \"fields\":" + - "[{\"name\":\"f1\", \"type\":[\"int\", \"long\"]}]}", - new object[] { "f1", 100L }, Codec.Type.Deflate)] - [TestCase("{\"type\":\"record\", \"name\":\"n\", \"fields\":" + - "[{\"name\":\"f1\", \"type\":{\"type\": \"fixed\", \"name\": \"f\", \"size\": 2}}]}", - new object[] { "f1", new byte[] { 1, 2 } }, Codec.Type.Deflate)] - [TestCase("{\"type\":\"record\", \"name\":\"n\", \"fields\":[{\"name\":\"f1\", \"type\":\"null\"}]}", - new object[] { "f1", null }, Codec.Type.Null)] - [TestCase("{\"type\":\"record\", \"name\":\"n\", \"fields\":[{\"name\":\"f1\", \"type\":\"boolean\"}]}", - new object[] { "f1", true }, Codec.Type.Null)] - [TestCase("{\"type\":\"record\", \"name\":\"n\", \"fields\":[{\"name\":\"f1\", \"type\":\"boolean\"}]}", - new object[] { "f1", false }, Codec.Type.Null)] - [TestCase("{\"type\":\"record\", \"name\":\"n\", \"fields\":[{\"name\":\"f1\", \"type\":\"int\"}]}", - new object[] { "f1", 101 }, Codec.Type.Null)] - [TestCase("{\"type\":\"record\", \"name\":\"n\", \"fields\":[{\"name\":\"f1\", \"type\":\"long\"}]}", - new object[] { "f1", 101L }, Codec.Type.Null)] - [TestCase("{\"type\":\"record\", \"name\":\"n\", \"fields\":[{\"name\":\"f1\", \"type\":\"float\"}]}", - new object[] { "f1", 101.78f }, Codec.Type.Null)] - [TestCase("{\"type\":\"record\", \"name\":\"n\", \"fields\":[{\"name\":\"f1\", \"type\":\"double\"}]}", - new object[] { "f1", 101.78 }, Codec.Type.Null)] - [TestCase("{\"type\":\"record\", \"name\":\"n\", \"fields\":[{\"name\":\"f1\", \"type\":\"string\"}]}", - new object[] { "f1", "A" }, Codec.Type.Null)] - [TestCase("{\"type\":\"record\", \"name\":\"n\", \"fields\":[{\"name\":\"f1\", \"type\":\"bytes\"}]}", - new object[] { "f1", new byte[] { 0, 1 } }, Codec.Type.Null)] - [TestCase("{\"type\":\"record\", \"name\":\"n\", \"fields\":" + - "[{\"name\":\"f1\", \"type\":{\"type\": \"enum\", \"name\": \"e\", \"symbols\":[\"s1\", \"s2\"]}}]}", - new object[] { "f1", "s2" }, Codec.Type.Null)] - [TestCase("{\"type\":\"record\", \"name\":\"n\", \"fields\":" + - "[{\"name\":\"f1\", \"type\":{\"type\": \"array\", \"items\": \"int\"}}]}", - new object[] { "f1", new object[] { 0, 1, 101 } }, Codec.Type.Null)] - [TestCase("{\"type\":\"record\", \"name\":\"n\", \"fields\":" + - "[{\"name\":\"f1\", \"type\":{\"type\": \"array\", \"items\": \"int\"}}]}", - new object[] { "f1", new int[] { 0, 1, 101 } }, Codec.Type.Null)] - [TestCase("{\"type\":\"record\", \"name\":\"n\", \"fields\":" + - "[{\"name\":\"f1\", \"type\":[\"int\", \"long\"]}]}", - new object[] { "f1", 100 }, Codec.Type.Null)] - [TestCase("{\"type\":\"record\", \"name\":\"n\", \"fields\":" + - "[{\"name\":\"f1\", \"type\":[\"int\", \"long\"]}]}", - new object[] { "f1", 100L }, Codec.Type.Null)] + [TestCaseSource(nameof(TestGenericDataSource))] public void TestGenericData(string schemaStr, object[] value, Codec.Type codecType) { - foreach(var rwFactory in GenericOptions()) + foreach (var rwFactory in GenericOptions()) { // Create and write out MemoryStream dataFileOutputStream = new MemoryStream(); @@ -260,7 +299,23 @@ public void TestGenericData(string schemaStr, object[] value, Codec.Type codecTy } Assert.IsTrue((readFoos != null && readFoos.Count > 0), - string.Format(@"Generic object: {0} did not serialise/deserialise correctly", readFoos)); + string.Format(@"Generic object: {0} did not serialize/deserialize correctly", readFoos)); + } + } + + private static IEnumerable TestAppendGenericDataSource() + { + foreach (Codec.Type codecType in Enum.GetValues(typeof(Codec.Type))) + { + yield return new TestCaseData( + "{\"type\":\"record\", \"name\":\"n\", \"fields\":[{\"name\":\"f1\", \"type\":\"boolean\"}]}", new object[] { "f1", true }, new object[] { "f1", false }, codecType) + .SetName("{m}(bool,{3})"); + yield return new TestCaseData( + "{\"type\":\"record\", \"name\":\"n\", \"fields\":[{\"name\":\"f1\", \"type\":\"int\"}]}", new object[] { "f1", 1 }, new object[] { "f1", 2 }, codecType) + .SetName("{m}(int,{3})"); + yield return new TestCaseData( + "{\"type\":\"record\", \"name\":\"n\", \"fields\":[{\"name\":\"f1\", \"type\":\"string\"}]}", new object[] { "f1", "A" }, new object[] { "f1", "B" }, codecType) + .SetName("{m}(string,{3})"); } } @@ -270,11 +325,8 @@ public void TestGenericData(string schemaStr, object[] value, Codec.Type codecTy /// schema /// initial records /// append records - /// innitial compression codec type - [TestCase("{\"type\":\"record\", \"name\":\"n\", \"fields\":[{\"name\":\"f1\", \"type\":\"boolean\"}]}", - new object[] { "f1", true }, new object[] { "f1", false }, Codec.Type.Deflate)] - [TestCase("{\"type\":\"record\", \"name\":\"n\", \"fields\":[{\"name\":\"f1\", \"type\":\"int\"}]}", - new object[] { "f1", 1 }, new object[] { "f1", 2 }, Codec.Type.Null)] + /// initial compression codec type + [TestCaseSource(nameof(TestAppendGenericDataSource))] public void TestAppendGenericData(string schemaStr, object[] recs, object[] appendRecs, Codec.Type codecType) { foreach (var rwFactory in GenericOptions()) @@ -311,7 +363,7 @@ public void TestAppendGenericData(string schemaStr, object[] recs, object[] appe Assert.NotNull(readFoos); Assert.AreEqual((recs.Length + appendRecs.Length) / 2, readFoos.Count, - $"Generic object: {readFoos} did not serialise/deserialise correctly"); + $"Generic object: {readFoos} did not serialize/deserialize correctly"); } } @@ -346,9 +398,7 @@ public void OpenAppendWriter_IncorrectOutStream_Throws() /// DeflateStream as it is a standard non-seekable Stream that has the same behavior as the /// NetworkStream, which we should handle. /// - [TestCase("{\"type\":\"record\", \"name\":\"n\", \"fields\":" + - "[{\"name\":\"f1\", \"type\":[\"int\", \"long\"]}]}", - new object[] { "f1", 100L }, Codec.Type.Null)] + [TestCase("{\"type\":\"record\", \"name\":\"n\", \"fields\":[{\"name\":\"f1\", \"type\":[\"int\", \"long\"]}]}", new object[] { "f1", 100L }, Codec.Type.Null)] public void TestNonSeekableStream(string schemaStr, object[] value, Codec.Type codecType) { foreach (var rwFactory in GenericOptions()) @@ -383,7 +433,35 @@ public void TestNonSeekableStream(string schemaStr, object[] value, Codec.Type c } Assert.IsTrue((readFoos != null && readFoos.Count > 0), - string.Format(@"Generic object: {0} did not serialise/deserialise correctly", readFoos)); + string.Format(@"Generic object: {0} did not serialize/deserialize correctly", readFoos)); + } + } + + private static IEnumerable TestPrimitiveDataSource() + { + foreach (Codec.Type codecType in Enum.GetValues(typeof(Codec.Type))) + { + yield return new TestCaseData("[\"boolean\", \"null\"]", null, codecType); + yield return new TestCaseData("[\"boolean\", \"null\"]", true, codecType); + yield return new TestCaseData("[\"int\", \"long\"]", 100, codecType); + yield return new TestCaseData("[\"int\", \"long\"]", 100L, codecType); + yield return new TestCaseData("[\"float\", \"double\"]", 100.75, codecType); + yield return new TestCaseData("[\"float\", \"double\"]", 23.67f, codecType); + yield return new TestCaseData("{\"type\": \"boolean\"}", true, codecType); + yield return new TestCaseData("{\"type\": \"boolean\"}", false, codecType); + yield return new TestCaseData("{\"type\": \"string\"}", "John", codecType); + yield return new TestCaseData("{\"type\": [\"null\",\"string\"]}", null, codecType); + yield return new TestCaseData("{\"type\": \"int\"}", 1, codecType); + yield return new TestCaseData("{\"type\": \"long\"}", 12312313123L, codecType); + yield return new TestCaseData("{\"type\": \"float\"}", 0.0f, codecType); + yield return new TestCaseData("{\"type\": \"double\"}", 0.0, codecType); + yield return new TestCaseData("[{\"type\": \"array\", \"items\": \"float\"}, \"double\"]", new float[] { 23.67f, 22.78f }, codecType); + yield return new TestCaseData("[{\"type\": \"array\", \"items\": \"float\"}, \"double\"]", 100.89, codecType); + yield return new TestCaseData("[{\"type\": \"array\", \"items\": \"string\"}, \"string\"]", "a", codecType); + yield return new TestCaseData("[{\"type\": \"array\", \"items\": \"string\"}, \"string\"]", new string[] { "a", "b" }, codecType); + yield return new TestCaseData("[{\"type\": \"array\", \"items\": \"bytes\"}, \"bytes\"]", new byte[] { 1, 2, 3 }, codecType); + yield return new TestCaseData("[{\"type\": \"array\", \"items\": \"bytes\"}, \"bytes\"]", new object[] { new byte[] { 1, 2 }, new byte[] { 3, 4 } }, codecType); + yield return new TestCaseData("[{\"type\": \"enum\", \"symbols\": [\"s1\", \"s2\"], \"name\": \"e\"}, \"string\"]", "h1", codecType); } } @@ -393,51 +471,7 @@ public void TestNonSeekableStream(string schemaStr, object[] value, Codec.Type c /// /// /// - [TestCase("{\"type\": \"boolean\"}", true, Codec.Type.Deflate)] - [TestCase("{\"type\": \"boolean\"}", false, Codec.Type.Deflate)] - [TestCase("{\"type\": \"boolean\"}", true, Codec.Type.Null)] - [TestCase("{\"type\": \"boolean\"}", false, Codec.Type.Null)] - [TestCase("[\"boolean\", \"null\"]", null, Codec.Type.Deflate)] - [TestCase("[\"boolean\", \"null\"]", true, Codec.Type.Deflate)] - [TestCase("[\"int\", \"long\"]", 100, Codec.Type.Deflate)] - [TestCase("[\"int\", \"long\"]", 100L, Codec.Type.Deflate)] - [TestCase("[\"float\", \"double\"]", 100.75, Codec.Type.Deflate)] - [TestCase("[\"float\", \"double\"]", 23.67f, Codec.Type.Deflate)] - [TestCase("[{\"type\": \"array\", \"items\": \"float\"}, \"double\"]", new float[] { 23.67f, 22.78f }, Codec.Type.Deflate)] - [TestCase("[{\"type\": \"array\", \"items\": \"float\"}, \"double\"]", 100.89, Codec.Type.Deflate)] - [TestCase("[{\"type\": \"array\", \"items\": \"string\"}, \"string\"]", "a", Codec.Type.Deflate)] - [TestCase("[{\"type\": \"array\", \"items\": \"string\"}, \"string\"]", new string[] { "a", "b" }, Codec.Type.Deflate)] - [TestCase("[{\"type\": \"array\", \"items\": \"bytes\"}, \"bytes\"]", new byte[] { 1, 2, 3 }, Codec.Type.Deflate)] - [TestCase("[{\"type\": \"array\", \"items\": \"bytes\"}, \"bytes\"]", new object[] { new byte[] { 1, 2 }, new byte[] { 3, 4 } }, Codec.Type.Deflate)] - [TestCase("[{\"type\": \"enum\", \"symbols\": [\"s1\", \"s2\"], \"name\": \"e\"}, \"string\"]", "h1", Codec.Type.Deflate)] - [TestCase("{\"type\":\"string\"}", "John", Codec.Type.Deflate)] - [TestCase("{\"type\":[\"null\",\"string\"]}", null, Codec.Type.Deflate)] - [TestCase("{\"type\":\"int\"}", 1, Codec.Type.Deflate)] - [TestCase("{\"type\":\"boolean\"}", false, Codec.Type.Deflate)] - [TestCase("{\"type\":\"long\"}", 12312313123L, Codec.Type.Deflate)] - [TestCase("{\"type\":\"float\"}", 0.0f, Codec.Type.Deflate)] - [TestCase("{\"type\":\"double\"}", 0.0, Codec.Type.Deflate)] - [TestCase("[\"boolean\", \"null\"]", null, Codec.Type.Null)] - [TestCase("[\"boolean\", \"null\"]", true, Codec.Type.Null)] - [TestCase("[\"int\", \"long\"]", 100, Codec.Type.Null)] - [TestCase("[\"int\", \"long\"]", 100L, Codec.Type.Null)] - [TestCase("[\"float\", \"double\"]", 100.75, Codec.Type.Null)] - [TestCase("[\"float\", \"double\"]", 23.67f, Codec.Type.Null)] - [TestCase("[{\"type\": \"array\", \"items\": \"float\"}, \"double\"]", new float[] { 23.67f, 22.78f }, Codec.Type.Null)] - [TestCase("[{\"type\": \"array\", \"items\": \"float\"}, \"double\"]", 100.89, Codec.Type.Null)] - [TestCase("[{\"type\": \"array\", \"items\": \"string\"}, \"string\"]", "a", Codec.Type.Null)] - [TestCase("[{\"type\": \"array\", \"items\": \"string\"}, \"string\"]", new string[] { "a", "b" }, Codec.Type.Null)] - [TestCase("[{\"type\": \"array\", \"items\": \"bytes\"}, \"bytes\"]", new byte[] { 1, 2, 3 }, Codec.Type.Null)] - [TestCase("[{\"type\": \"array\", \"items\": \"bytes\"}, \"bytes\"]", new object[] { new byte[] { 1, 2 }, new byte[] { 3, 4 } }, Codec.Type.Null)] - [TestCase("[{\"type\": \"enum\", \"symbols\": [\"s1\", \"s2\"], \"name\": \"e\"}, \"string\"]", "h1", Codec.Type.Null)] - [TestCase("{\"type\":\"string\"}", "John", Codec.Type.Null)] - [TestCase("{\"type\":[\"null\",\"string\"]}", null, Codec.Type.Null)] - [TestCase("{\"type\":\"int\"}", 1, Codec.Type.Null)] - [TestCase("{\"type\":\"boolean\"}", false, Codec.Type.Null)] - [TestCase("{\"type\":\"long\"}", 12312313123L, Codec.Type.Null)] - [TestCase("{\"type\":\"float\"}", 0.0f, Codec.Type.Null)] - [TestCase("{\"type\":\"double\"}", 0.0, Codec.Type.Null)] - [TestCase("{\"type\":\"string\"}", "test", Codec.Type.Null)] + [TestCaseSource(nameof(TestPrimitiveDataSource))] public void TestPrimitiveData(string schemaStr, object value, Codec.Type codecType) { foreach(var rwFactory in GenericOptions()) @@ -455,32 +489,28 @@ public void TestPrimitiveData(string schemaStr, object value, Codec.Type codecTy } } + private static IEnumerable TestMetaDataSource() + { + foreach (Codec.Type codecType in Enum.GetValues(typeof(Codec.Type))) + { + foreach (bool useTypeGetter in new bool[] { true, false }) + { + yield return new TestCaseData("bytesTest", new byte[] { 1, 2, 3 }, codecType, useTypeGetter); + yield return new TestCaseData("stringTest", "testVal", codecType, useTypeGetter); + yield return new TestCaseData("longTest", 12312313123L, codecType, useTypeGetter); + yield return new TestCaseData("bytesTest", new byte[] { 1 }, codecType, useTypeGetter); + yield return new TestCaseData("longTest", -1211212L, codecType, useTypeGetter); + } + } + } + /// /// Reading & writing of header meta data /// - /// /// /// - [TestCase("bytesTest", new byte[] { 1, 2, 3 }, Codec.Type.Null, true)] - [TestCase("stringTest", "testVal", Codec.Type.Null, true)] - [TestCase("longTest", 12312313123L, Codec.Type.Null, true)] - [TestCase("bytesTest", new byte[] { 1 }, Codec.Type.Null, true)] - [TestCase("longTest", -1211212L, Codec.Type.Null, true)] - [TestCase("bytesTest", new byte[] { 1, 2, 3 }, Codec.Type.Deflate, true)] - [TestCase("stringTest", "testVal", Codec.Type.Deflate, true)] - [TestCase("longTest", 12312313123L, Codec.Type.Deflate, true)] - [TestCase("bytesTest", new byte[] { 1 }, Codec.Type.Deflate, true)] - [TestCase("longTest", -21211212L, Codec.Type.Deflate, true)] - [TestCase("bytesTest", new byte[] { 1, 2, 3 }, Codec.Type.Null, false)] - [TestCase("stringTest", "testVal", Codec.Type.Null, false)] - [TestCase("longTest", 12312313123L, Codec.Type.Null, false)] - [TestCase("bytesTest", new byte[] { 1 }, Codec.Type.Null, false)] - [TestCase("longTest", -1211212L, Codec.Type.Null, false)] - [TestCase("bytesTest", new byte[] { 1, 2, 3 }, Codec.Type.Deflate, false)] - [TestCase("stringTest", "testVal", Codec.Type.Deflate, false)] - [TestCase("longTest", 12312313123L, Codec.Type.Deflate, false)] - [TestCase("bytesTest", new byte[] { 1 }, Codec.Type.Deflate, false)] - [TestCase("longTest", -21211212L, Codec.Type.Deflate, false)] + /// + [TestCaseSource(nameof(TestMetaDataSource))] public void TestMetaData(string key, object value, Codec.Type codecType, bool useTypeGetter) { // create and write out @@ -507,19 +537,27 @@ public void TestMetaData(string key, object value, Codec.Type codecType, bool us } } + private static IEnumerable TestPartialReadSource() + { + foreach (Codec.Type codecType in Enum.GetValues(typeof(Codec.Type))) + { + yield return new TestCaseData(specificSchema, codecType, 0, 330).SetName("{m}({1},{2},{3})"); + yield return new TestCaseData(specificSchema, codecType, 1, 330).SetName("{m}({1},{2},{3})"); + yield return new TestCaseData(specificSchema, codecType, 135, 330).SetName("{m}({1},{2},{3})"); + yield return new TestCaseData(specificSchema, codecType, 194, 264).SetName("{m}({1},{2},{3})"); + } + + // This is only for Null codec + yield return new TestCaseData(specificSchema, Codec.Type.Null, 888, 165).SetName("{m}({1},{2},{3})"); + } + /// /// Partial reading of file / stream from /// position in stream /// /// - /// /// - [TestCase(specificSchema, Codec.Type.Null, 1, 330)] // 330 - [TestCase(specificSchema, Codec.Type.Null, 135, 330)] // 330 - [TestCase(specificSchema, Codec.Type.Null, 194, 264)] // 264 - [TestCase(specificSchema, Codec.Type.Null, 235, 264)] // 264 - [TestCase(specificSchema, Codec.Type.Null, 888, 165)] // 165 - [TestCase(specificSchema, Codec.Type.Null, 0, 330)] // 330 + [TestCaseSource(nameof(TestPartialReadSource))] public void TestPartialRead(string schemaStr, Codec.Type codecType, int position, int expectedRecords) { // create and write out @@ -569,11 +607,9 @@ public void TestPartialRead(string schemaStr, Codec.Type codecType, int position /// Tests reading from sync boundaries. /// /// - /// /// - [TestCase(specificSchema, Codec.Type.Null)] - [TestCase(specificSchema, Codec.Type.Deflate)] - public void TestPartialReadAll(string schemaStr, Codec.Type codecType) + [Test] + public void TestPartialReadAll([Values(specificSchema)] string schemaStr, [Values] Codec.Type codecType) { // create and write out IList records = MakeRecords(GetTestFooObject()); @@ -636,14 +672,11 @@ public void TestPartialReadAll(string schemaStr, Codec.Type codecType) /// Test leaveOpen flag /// /// - /// /// - /// - [TestCase(specificSchema, Codec.Type.Null, true, false)] - [TestCase(specificSchema, Codec.Type.Null, true, true)] - [TestCase(specificSchema, Codec.Type.Null, false, false)] - [TestCase(specificSchema, Codec.Type.Null, false, true)] - public void TestLeaveOpen(string schemaStr, Codec.Type codecType, bool leaveWriteOpen, bool leaveReadOpen) + /// + /// + [Test] + public void TestLeaveOpen([Values(specificSchema)] string schemaStr, [Values] Codec.Type codecType, [Values] bool leaveWriteOpen, [Values] bool leaveReadOpen) { // create and write out IList records = MakeRecords(GetTestFooObject()); @@ -723,19 +756,23 @@ private static void AssertNumRecordsFromPosition( IFileReader reader, long Assert.AreEqual( expectedRecords, readRecords, "didn't read expected records from position " + position ); } + private static IEnumerable TestSyncAndSeekPositionsSource() + { + foreach (Codec.Type codecType in Enum.GetValues(typeof(Codec.Type))) + { + yield return new TestCaseData(specificSchema, codecType, 2, 0, 1).SetName("{m}({1},{2},{3},{4})"); + yield return new TestCaseData(specificSchema, codecType, 10, 1, 4).SetName("{m}({1},{2},{3},{4})"); + yield return new TestCaseData(specificSchema, codecType, 200, 111, 15).SetName("{m}({1},{2},{3},{4})"); + yield return new TestCaseData(specificSchema, codecType, 1000, 588, 998).SetName("{m}({1},{2},{3},{4})"); + } + } + /// /// Reading all sync positions and /// verifying them with subsequent seek /// positions /// - [TestCase(specificSchema, Codec.Type.Null, 2, 0, 1)] - [TestCase(specificSchema, Codec.Type.Null, 10, 1, 4)] - [TestCase(specificSchema, Codec.Type.Null, 200, 111, 15)] - [TestCase(specificSchema, Codec.Type.Null, 1000, 588, 998)] - [TestCase(specificSchema, Codec.Type.Deflate, 2, 0, 1)] - [TestCase(specificSchema, Codec.Type.Deflate, 10, 1, 4)] - [TestCase(specificSchema, Codec.Type.Deflate, 200, 111, 15)] - [TestCase(specificSchema, Codec.Type.Deflate, 1000, 588, 998)] + [TestCaseSource(nameof(TestSyncAndSeekPositionsSource))] public void TestSyncAndSeekPositions(string schemaStr, Codec.Type codecType, int iterations, int firstSyncPosition, int secondSyncPosition) { // create and write out @@ -775,7 +812,7 @@ public void TestSyncAndSeekPositions(string schemaStr, Codec.Type codecType, int } } - // verify syncs wth seeks + // verify syncs with seeks reader.Sync(0); // first sync Assert.AreEqual(reader.PreviousSync(), syncs[0], string.Format("Error syncing reader to position: {0}", syncs[0])); @@ -819,6 +856,130 @@ public void TestDifferentReaderSchema() } } + /// + /// Reading & writing many specific record objects + /// + /// + /// + [Test] + public void TestLargeSpecificData([Values] Codec.Type codecType, [Values(0, 1000, 100000)] int numOfRecords) + { + foreach (var rwFactory in SpecificOptions()) + { + MemoryStream dataFileOutputStream = new MemoryStream(); + Schema schema = Schema.Parse(specificSchema); + using (IFileWriter dataFileWriter = rwFactory.CreateWriter(dataFileOutputStream, schema, Codec.CreateCodec(codecType))) + { + for (int index = 0; index < numOfRecords; index++) + { + dataFileWriter.Append(new Foo() { name = $"Name-{index}", age = index }); + } + } + + MemoryStream dataFileInputStream = new MemoryStream(dataFileOutputStream.ToArray()); + + // Read back and verify + using (IFileReader reader = rwFactory.CreateReader(dataFileInputStream, null)) + { + int index = 0; + foreach (Foo record in reader.NextEntries) + { + Assert.AreEqual($"Name-{index}", record.name); + Assert.AreEqual(index, record.age); + index++; + } + + Assert.AreEqual(numOfRecords, index); + } + } + } + + /// + /// Reading and writing using optional codecs + /// + /// + /// + [TestCase("zstd", true)] + [TestCase("deflate", false)] + [TestCase("null", false)] + [TestCase("snappy", false)] + [TestCase("bzip2", false)] + [TestCase("xz", false)] + [TestCase("zstandard", false)] + public void TestOptionalCodecs(string codecToUse, bool expectResolverProvidedCodec) + { + var resolverProvidedCodec = false; + + var fakeCodec = new FakeZstdCodec(); + Codec codecResolver(string codecString) + { + if (codecString == "zstd") + { + resolverProvidedCodec = true; + return fakeCodec; + } + + return null; + } + + Codec.RegisterResolver(codecResolver); + + RecordSchema schema = Schema.Parse( "{\"type\":\"record\", \"name\":\"n\", \"fields\":[{\"name\":\"f1\", \"type\":\"string\"}," + + "{\"name\":\"f2\", \"type\":\"string\"}]}" ) as RecordSchema; + + foreach(var rwFactory in GenericOptions()) + { + using (MemoryStream dataFileOutputStream = new MemoryStream()) + { + using (var writer = rwFactory.CreateWriter(dataFileOutputStream, schema, fakeCodec)) + { + writer.Append(mkRecord(new [] { "f1", "f1val", "f2", "f2val" }, schema)); + } + + using (var dataFileInputStream = new MemoryStream(dataFileOutputStream.ToArray())) + using (IFileReader reader = rwFactory.CreateReader(dataFileInputStream, schema)) + { + GenericRecord result = reader.Next(); + Assert.AreEqual("f1val", result["f1"]); + Assert.AreEqual("f2val", result["f2"]); + } + } + } + + Assert.AreEqual(expectResolverProvidedCodec, resolverProvidedCodec); + } + + [TestCase("")] + [TestCase("blahblahblah")] + public void UnknownCodecFromStringException(string codec) + { + Assert.Throws(typeof(AvroRuntimeException), () => Codec.CreateCodecFromString(codec)); + } + + [TestCase((Codec.Type)(-1))] // "Invalid" Codec.Type + public void UnknownCodecFromType(Codec.Type codec) + { + Assert.Throws(typeof(AvroRuntimeException), () => Codec.CreateCodec(codec)); + } + + [TestCase("deflate")] + [TestCase("null")] + [TestCase(null)] // If codec is absent, it is assumed to be "null" + [TestCase("snappy")] + [TestCase("bzip2")] + [TestCase("xz")] + [TestCase("zstandard")] + public void KnownCodecFromString(string codec) + { + Assert.NotNull(Codec.CreateCodecFromString(codec)); + } + + [Test] + public void KnownCodecFromType([Values] Codec.Type codec) + { + Assert.NotNull(Codec.CreateCodec(codec)); + } + private bool CheckPrimitive(Stream input, T value, ReaderWriterSet.ReaderFactory createReader) { IFileReader reader = createReader(input, null); @@ -1048,4 +1209,40 @@ public override string ToString() return string.Format("Name: {0}, Age: {1}", name, age); } } + + class FakeZstdCodec : Codec + { + private DeflateCodec _codec = new DeflateCodec(); + public override byte[] Compress(byte[] uncompressedData) + { + return _codec.Compress(uncompressedData); + } + + public override void Compress(MemoryStream inputStream, MemoryStream outputStream) + { + _codec.Compress(inputStream, outputStream); + } + + public override byte[] Decompress(byte[] compressedData, int length) + { + return _codec.Decompress(compressedData, length); + } + + public override bool Equals(object other) + { + if (other == null) return false; + + return this == other; + } + + public override int GetHashCode() + { + return GetName().GetHashCode(); + } + + public override string GetName() + { + return "zstd"; + } + } } diff --git a/lang/csharp/src/apache/test/Generic/GenericEnumTests.cs b/lang/csharp/src/apache/test/Generic/GenericEnumTests.cs new file mode 100644 index 00000000000..aba0038ea1a --- /dev/null +++ b/lang/csharp/src/apache/test/Generic/GenericEnumTests.cs @@ -0,0 +1,72 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using Avro.Generic; +using NUnit.Framework; + +namespace Avro.test.Generic +{ + [TestFixture] + public class GenericEnumTests + { + private const string baseSchema = "{\"type\": \"enum\", \"name\": \"Test\", \"symbols\": " + + "[\"Unknown\", \"A\", \"B\"], \"default\": \"Unknown\" }"; + + [Test] + public void TestEquals() + { + GenericEnum genericEnum = GetBaseGenericEnum(); + GenericEnum genericEnum2 = GetBaseGenericEnum(); + + Assert.IsTrue(genericEnum.Equals(genericEnum2)); + } + + [Test] + public void TestEqualsNotEqual() + { + GenericEnum genericEnum = GetBaseGenericEnum(); + GenericEnum genericEnum2 = new GenericEnum(Schema.Parse(baseSchema) as EnumSchema, "B"); + + Assert.IsFalse(genericEnum.Equals(genericEnum2)); + } + + [Test] + public void TestEqualsObject() + { + GenericEnum genericEnum = GetBaseGenericEnum(); + object genericEnum2 = genericEnum; + + Assert.IsTrue(genericEnum.Equals(genericEnum2)); + } + + [Test] + public void TestEqualsObjectNullObject() + { + GenericEnum genericEnum = GetBaseGenericEnum(); + + Assert.IsFalse(genericEnum.Equals(null)); + } + + private GenericEnum GetBaseGenericEnum() + { + GenericEnum genericEnum = new GenericEnum(Schema.Parse(baseSchema) as EnumSchema, "A"); + + return genericEnum; + } + } +} diff --git a/lang/csharp/src/apache/test/Generic/GenericRecordTests.cs b/lang/csharp/src/apache/test/Generic/GenericRecordTests.cs new file mode 100644 index 00000000000..9ae0e6f7e05 --- /dev/null +++ b/lang/csharp/src/apache/test/Generic/GenericRecordTests.cs @@ -0,0 +1,238 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Collections.Generic; +using Avro.Generic; +using NUnit.Framework; + +namespace Avro.test.Generic +{ + [TestFixture] + public class GenericRecordTests + { + private const string baseSchema = "{\"type\":\"record\",\"name\":\"r\",\"fields\":" + + "[{\"name\":\"f2\",\"type\":\"int\"},{\"name\":\"f1\",\"type\":\"boolean\"}]}"; + + [Test] + public void TestAddByFieldNameThrows() + { + GenericRecord genericRecord = GetBaseGenericRecord(); + + // Field does not exist + Assert.Throws(() => { genericRecord.Add("badField", "test"); }); + } + + [Test] + public void TestAddByPosition() + { + GenericRecord genericRecord = GetBaseGenericRecord(); + + genericRecord.Add(0, 2); + + object value = genericRecord.GetValue(0); + + Assert.IsNotNull(value); + Assert.IsTrue(value is int); + Assert.AreEqual(2, (int)value); + } + + [Test] + public void TestAddByPositionThrows() + { + GenericRecord genericRecord = GetBaseGenericRecord(); + + Assert.Throws(() => { genericRecord.Add(2, 2); }); + } + + [Test] + public void TestEquals() + { + GenericRecord genericRecord = GetBaseGenericRecord(); + GenericRecord genericRecord2 = GetBaseGenericRecord(); + + Assert.IsTrue(genericRecord.Equals(genericRecord2)); + } + + [Test] + public void TestEqualsNotEqual() + { + GenericRecord genericRecord = GetBaseGenericRecord(); + GenericRecord genericRecord2 = GetBaseGenericRecord(); + genericRecord2.Add(0, 2); + + Assert.IsFalse(genericRecord.Equals(genericRecord2)); + } + + [Test] + public void TestEqualsObject() + { + GenericRecord genericRecord = GetBaseGenericRecord(); + object genericRecord2 = genericRecord; + + Assert.IsTrue(genericRecord.Equals(genericRecord2)); + } + + [Test] + public void TestEqualsObjectNotEqual() + { + GenericRecord genericRecord = GetBaseGenericRecord(); + GenericRecord genericRecord2 = GetBaseGenericRecord(); + genericRecord2.Add(0, 2); + + Assert.IsFalse(genericRecord.Equals((object)genericRecord2)); + } + + [Test] + public void TestEqualsObjectNullObject() + { + GenericRecord genericRecord = GetBaseGenericRecord(); + + Assert.IsFalse(genericRecord.Equals((object)null)); + } + + [Test] + public void TestGetHashCode() + { + int hashCode = GetBaseGenericRecord().GetHashCode(); + Assert.IsTrue(hashCode > 0); + } + + [Test] + public void TestGetValue() + { + GenericRecord genericRecord = GetBaseGenericRecord(); + + object value = genericRecord.GetValue(0); + + Assert.IsNotNull(value); + Assert.IsTrue(value is int); + Assert.AreEqual(1, (int)value); + } + + [Test] + public void TestKeyValueLookup() + { + GenericRecord genericRecord = GetBaseGenericRecord(); + + // Key Exists + object existingKey = genericRecord["f2"]; + Assert.IsNotNull(existingKey); + Assert.IsTrue(existingKey is int); + } + + [Test] + public void TestKeyValueLookupThrows() + { + GenericRecord genericRecord = GetBaseGenericRecord(); + + // Key does not exist + Assert.Throws(() => { _ = genericRecord["badField"]; }); + } + + [Test] + public void TestToString() + { + GenericRecord genericRecord = GetBaseGenericRecord(); + string str = genericRecord.ToString(); + string expectedValue = "Schema: {\"type\":\"record\",\"name\":\"r\",\"fields\":" + + "[{\"name\":\"f2\",\"type\":\"int\"},{\"name\":\"f1\",\"type\":" + + "\"boolean\"}]}, contents: { f2: 1, f1: True, }"; + + Assert.AreEqual(expectedValue, str); + } + + + [Test] + public void TestFieldNames() + { + string schemaWithNames = "{\"type\":\"record\",\"name\":\"r\",\"fields\":" + + "[{\"name\":\"æ­ŗäģĨ上\",\"type\":\"int\"}]}"; + + RecordSchema testSchema = Schema.Parse(schemaWithNames) as RecordSchema; + GenericRecord genericRecord = new GenericRecord(testSchema); + genericRecord.Add("æ­ŗäģĨ上", 1); + + string str = genericRecord.ToString(); + string expectedValue = "Schema: {\"type\":\"record\",\"name\":\"r\",\"fields\":" + + "[{\"name\":\"æ­ŗäģĨ上\",\"type\":\"int\"}]}, contents: { æ­ŗäģĨ上: 1, }"; + + Assert.AreEqual(expectedValue, str); + } + + [Test] + public void TestTryGetValue() + { + GenericRecord genericRecord = GetBaseGenericRecord(); + + // Value exists + bool returnResult = genericRecord.TryGetValue("f2", out object result); + + Assert.IsTrue(returnResult); + Assert.IsNotNull(result); + Assert.IsTrue(result is int); + Assert.AreEqual(1, (int)result); + } + + [Test] + public void TestTryGetValueByPosition() + { + GenericRecord genericRecord = GetBaseGenericRecord(); + + bool returnResult = genericRecord.TryGetValue(0, out object value); + + Assert.IsTrue(returnResult); + Assert.IsNotNull(value); + Assert.IsTrue(value is int); + Assert.AreEqual(1, (int)value); + } + + [Test] + public void TestTryGetValueByPositionNotFound() + { + GenericRecord genericRecord = GetBaseGenericRecord(); + + bool returnResult = genericRecord.TryGetValue(3, out object value); + + Assert.IsFalse(returnResult); + Assert.IsNull(value); + } + + [Test] + public void TestTryGetValueNotFound() + { + GenericRecord genericRecord = GetBaseGenericRecord(); + + // Value exists + bool returnResult = genericRecord.TryGetValue("badField", out object result); + + Assert.IsFalse(returnResult); + Assert.IsNull(result); + } + + private GenericRecord GetBaseGenericRecord() + { + RecordSchema testSchema = Schema.Parse(baseSchema) as RecordSchema; + GenericRecord genericRecord = new GenericRecord(testSchema); + genericRecord.Add("f2", 1); + genericRecord.Add("f1", true); + + return genericRecord; + } + } +} diff --git a/lang/csharp/src/apache/test/Generic/GenericTests.cs b/lang/csharp/src/apache/test/Generic/GenericTests.cs index 05aa5bc4944..b87ce69f890 100644 --- a/lang/csharp/src/apache/test/Generic/GenericTests.cs +++ b/lang/csharp/src/apache/test/Generic/GenericTests.cs @@ -17,16 +17,97 @@ */ using System; using System.IO; -using System.Linq; using Avro.IO; using System.Collections.Generic; +using System.Text; using Avro.Generic; using NUnit.Framework; +using Decoder = Avro.IO.Decoder; +using Encoder = Avro.IO.Encoder; namespace Avro.Test.Generic { class GenericTests { + private static string intToUtf8(int value) + { + var decimalLogicalType = new Avro.Util.Decimal(); + var logicalSchema = (LogicalSchema) + Schema.Parse(@"{ ""type"": ""bytes"", ""logicalType"": ""decimal"", ""precision"": 4 }"); + + byte[] byteArray = (byte[])decimalLogicalType.ConvertToBaseValue(new AvroDecimal(value), logicalSchema); + + return Encoding.GetEncoding("iso-8859-1").GetString(byteArray); + } + + [Test] + public void ConvertsDecimalZeroToLogicalType() => ConvertsDefaultToLogicalType( + @"{""type"": ""bytes"", ""logicalType"": ""decimal"", ""precision"": 4}", + @$"""{intToUtf8(0)}""", new AvroDecimal(0)); + + [Test] + public void ConvertsDecimalIntegerToLogicalType() => ConvertsDefaultToLogicalType( + @"{""type"": ""bytes"", ""logicalType"": ""decimal"", ""precision"": 4}", + @$"""{intToUtf8(1234)}""", new AvroDecimal(1234)); + + [Test] + public void ConvertsDecimalScaledToLogicalType() => ConvertsDefaultToLogicalType( + @"{""type"": ""bytes"", ""logicalType"": ""decimal"", ""precision"": 4, ""scale"": 3}", + @$"""{intToUtf8(1234)}""", new AvroDecimal(1.234)); + + private static IEnumerable ConvertsDefaultToLogicalTypeSource = new List() + { + new TestCaseData(@"{""type"": ""string"", ""logicalType"": ""uuid""}", @"""00000000-0000-0000-0000-000000000000""", new Guid()), + new TestCaseData(@"{""type"": ""string"", ""logicalType"": ""uuid""}", @"""00000000000000000000000000000000""", new Guid()), + new TestCaseData(@"{""type"": ""string"", ""logicalType"": ""uuid""}", @"""12345678-1234-5678-1234-123456789012""", new Guid("12345678-1234-5678-1234-123456789012")), + new TestCaseData(@"{""type"": ""string"", ""logicalType"": ""uuid""}", @"""12345678123456781234123456789012""", new Guid("12345678-1234-5678-1234-123456789012")), + new TestCaseData(@"{""type"": ""int"", ""logicalType"": ""date""}", "0", DateTime.UnixEpoch), + new TestCaseData(@"{""type"": ""int"", ""logicalType"": ""date""}", "123456", DateTime.UnixEpoch.AddDays(123456)), + new TestCaseData(@"{""type"": ""long"", ""logicalType"": ""time-micros""}", "0", new TimeSpan()), + new TestCaseData(@"{""type"": ""long"", ""logicalType"": ""time-micros""}", "123456", new TimeSpan(123456*TimeSpan.TicksPerMillisecond/1000)), + new TestCaseData(@"{""type"": ""int"", ""logicalType"": ""time-millis""}", "0", new TimeSpan()), + new TestCaseData(@"{""type"": ""int"", ""logicalType"": ""time-millis""}", "123456", new TimeSpan(0, 0, 0, 0, 123456)), + new TestCaseData(@"{""type"": ""long"", ""logicalType"": ""timestamp-micros""}", "0", DateTime.UnixEpoch), + new TestCaseData(@"{""type"": ""long"", ""logicalType"": ""timestamp-micros""}", "123456", DateTime.UnixEpoch.AddTicks(123456*TimeSpan.TicksPerMillisecond/1000)), + new TestCaseData(@"{""type"": ""long"", ""logicalType"": ""timestamp-millis""}", "0", DateTime.UnixEpoch), + new TestCaseData(@"{""type"": ""long"", ""logicalType"": ""timestamp-millis""}", "123456", DateTime.UnixEpoch.AddMilliseconds(123456)) + }; + + [TestCaseSource(nameof(ConvertsDefaultToLogicalTypeSource))] + public void ConvertsDefaultToLogicalType(string typeDefinition, string defaultDefinition, object expected) + { + var writerSchemaString = @"{ + ""type"": ""record"", + ""name"": ""Foo"", + ""fields"": [ + ] +}"; + + var readerSchemaString = $@"{{ + ""type"": ""record"", + ""name"": ""Foo"", + ""fields"": [ + {{ + ""name"": ""x"", + ""type"": {typeDefinition}, + ""default"": {defaultDefinition} + }} + ] +}}"; + var writerSchema = Schema.Parse(writerSchemaString); + + Stream stream; + + serialize(writerSchemaString, + MkRecord(new object[] { }, (RecordSchema)writerSchema), + out stream, + out _); + + var output = deserialize(stream, writerSchema, Schema.Parse(readerSchemaString)).GetValue(0); + + Assert.AreEqual(expected, output); + } + private static void test(string s, T value) { Stream ms; @@ -47,6 +128,18 @@ private static void test(string s, T value) [TestCase("[\"int\", \"long\"]", 100L)] [TestCase("[\"float\", \"double\"]", 100.75)] [TestCase("[\"float\", \"double\"]", 23.67f)] + [TestCase("[\"float\", \"int\"]", 0)] + [TestCase("[\"float\", \"int\"]", 0.0f)] + [TestCase("[\"float\", \"int\"]", 100)] + [TestCase("[\"float\", \"int\"]", 100.0f)] + [TestCase("[\"float\", \"int\"]", -100)] + [TestCase("[\"float\", \"int\"]", -100.0f)] + [TestCase("[\"double\", \"long\"]", 0L)] + [TestCase("[\"double\", \"long\"]", 0.0)] + [TestCase("[\"double\", \"long\"]", 100L)] + [TestCase("[\"double\", \"long\"]", 100.0)] + [TestCase("[\"double\", \"long\"]", -100L)] + [TestCase("[\"double\", \"long\"]", -100.0)] [TestCase("[{\"type\": \"array\", \"items\": \"float\"}, \"double\"]", new float[] { 23.67f, 22.78f })] [TestCase("[{\"type\": \"array\", \"items\": \"float\"}, \"double\"]", 100.89)] [TestCase("[{\"type\": \"array\", \"items\": \"string\"}, \"string\"]", "a")] @@ -98,7 +191,7 @@ public void TestPrimitive(string schema, object value) new object[] { "f1", new byte[] { 1, 2 } })] public void TestRecord(string schema, object[] kv) { - test(schema, mkRecord(kv, Schema.Parse(schema) as RecordSchema)); + test(schema, MkRecord(kv, Schema.Parse(schema) as RecordSchema)); } [TestCase("{\"type\": \"map\", \"values\": \"string\"}", @@ -166,7 +259,7 @@ public void TestLogical_Decimal_Fixed() new object[] { "f1", "v1" })] public void TestUnion_record(string unionSchema, string recordSchema, object[] value) { - test(unionSchema, mkRecord(value, Schema.Parse(recordSchema) as RecordSchema)); + test(unionSchema, MkRecord(value, Schema.Parse(recordSchema) as RecordSchema)); } [TestCase("[{\"type\": \"enum\", \"symbols\": [\"s1\", \"s2\"], \"name\": \"e\"}, \"string\"]", @@ -344,8 +437,8 @@ public void TestResolution_enum() new object[] { "f1", true, "f2", "d" }, Description = "Default field")] public void TestResolution_record(string ws, object[] actual, string rs, object[] expected) { - TestResolution(ws, mkRecord(actual, Schema.Parse(ws) as RecordSchema), rs, - mkRecord(expected, Schema.Parse(rs) as RecordSchema)); + TestResolution(ws, MkRecord(actual, Schema.Parse(ws) as RecordSchema), rs, + MkRecord(expected, Schema.Parse(rs) as RecordSchema)); } [TestCase("{\"type\":\"map\",\"values\":\"int\"}", new object[] { "a", 100, "b", -202 }, @@ -419,11 +512,11 @@ public void TestResolutionMismatch_record(string ws, object[] actual, string rs, { if (expectedExceptionType != null) { - Assert.Throws(expectedExceptionType, () => { testResolutionMismatch(ws, mkRecord(actual, Schema.Parse(ws) as RecordSchema), rs); }); + Assert.Throws(expectedExceptionType, () => { testResolutionMismatch(ws, MkRecord(actual, Schema.Parse(ws) as RecordSchema), rs); }); } else { - testResolutionMismatch(ws, mkRecord(actual, Schema.Parse(ws) as RecordSchema), rs); + testResolutionMismatch(ws, MkRecord(actual, Schema.Parse(ws) as RecordSchema), rs); } } @@ -491,7 +584,7 @@ public void TestRecordEquality_arrayFieldnotEqual() "{\"type\":\"record\",\"name\":\"r\",\"fields\":" + "[{\"name\":\"a\",\"type\":{\"type\":\"array\",\"items\":\"int\"}}]}"); - Func makeRec = arr => mkRecord(new object[] { "a", arr }, schema); + Func makeRec = arr => MkRecord(new object[] { "a", arr }, schema); var rec1 = makeRec(new[] { 69, 23 }); var rec2 = makeRec(new[] { 42, 11 }); @@ -506,7 +599,7 @@ public void TestRecordEquality_arrayFieldequal() "{\"type\":\"record\",\"name\":\"r\",\"fields\":" + "[{\"name\":\"a\",\"type\":{\"type\":\"array\",\"items\":\"int\"}}]}"); - Func makeRec = arr => mkRecord(new object[] { "a", arr }, schema); + Func makeRec = arr => MkRecord(new object[] { "a", arr }, schema); // Intentionally duplicated so reference equality doesn't apply var rec1 = makeRec(new[] { 89, 12, 66 }); @@ -522,7 +615,7 @@ public void TestRecordEquality_mapFieldequal() "{\"type\":\"record\",\"name\":\"r\",\"fields\":" + "[{\"name\":\"a\",\"type\":{\"type\":\"map\",\"values\":\"int\"}}]}"); - Func makeRec = value => mkRecord( + Func makeRec = value => MkRecord( new object[] { "a", new Dictionary { { "key", value } } }, schema); var rec1 = makeRec(52); @@ -538,7 +631,7 @@ public void TestRecordEquality_mapFieldnotEqual() "{\"type\":\"record\",\"name\":\"r\",\"fields\":" + "[{\"name\":\"a\",\"type\":{\"type\":\"map\",\"values\":\"int\"}}]}"); - Func makeRec = value => mkRecord( + Func makeRec = value => MkRecord( new object[] { "a", new Dictionary { { "key", value } } }, schema); var rec1 = makeRec(69); @@ -547,7 +640,7 @@ public void TestRecordEquality_mapFieldnotEqual() Assert.AreNotEqual(rec1, rec2); } - private static GenericRecord mkRecord(object[] kv, RecordSchema s) + public static GenericRecord MkRecord(object[] kv, RecordSchema s) { GenericRecord input = new GenericRecord(s); for (int i = 0; i < kv.Length; i += 2) diff --git a/lang/csharp/src/apache/test/IO/BinaryCodecTests.cs b/lang/csharp/src/apache/test/IO/BinaryCodecTests.cs index a6a1731e2d8..a638b73fea2 100644 --- a/lang/csharp/src/apache/test/IO/BinaryCodecTests.cs +++ b/lang/csharp/src/apache/test/IO/BinaryCodecTests.cs @@ -20,6 +20,7 @@ using NUnit.Framework; using System.IO; using System.Linq; +using System.Text; using Avro.IO; namespace Avro.Test @@ -31,7 +32,7 @@ namespace Avro.Test delegate void Encode(Encoder e, T t); /// - /// Tests the BinaryEncoder and BinaryDecoder. This is pertty general set of test cases and hence + /// Tests the BinaryEncoder and BinaryDecoder. This is pretty general set of test cases and hence /// can be used for any encoder and its corresponding decoder. /// [TestFixture] @@ -214,23 +215,105 @@ public void TestString(string n, int overhead) TestSkip(n, (Decoder d) => d.SkipString(), (Encoder e, string t) => e.WriteString(t), overhead + n.Length); } -#if NETCOREAPP3_1 +#if NETCOREAPP3_1_OR_GREATER [Test] - public void TestLargeString() + public void TestStringReadIntoArrayPool() { + const int maxFastReadLength = 4096; + // Create a 16KB buffer in the Array Pool var largeBufferToSeedPool = ArrayPool.Shared.Rent(2 << 14); ArrayPool.Shared.Return(largeBufferToSeedPool); - // Create a slightly less than 16KB buffer, which will use the 16KB buffer in the pool - var n = string.Concat(Enumerable.Repeat("1234567890", 1600)); - var overhead = 3; + var n = string.Concat(Enumerable.Repeat("A", maxFastReadLength)); + var overhead = 2; TestRead(n, (Decoder d) => d.ReadString(), (Encoder e, string t) => e.WriteString(t), overhead + n.Length); - TestSkip(n, (Decoder d) => d.SkipString(), (Encoder e, string t) => e.WriteString(t), overhead + n.Length); } + + [Test] + public void TestStringReadByBinaryReader() + { + const int overhead = 2; + const int maxFastReadLength = 4096; + const int expectedStringLength = maxFastReadLength + 1; + var n = string.Concat(Enumerable.Repeat("A", expectedStringLength)); + + TestRead(n, (Decoder d) => d.ReadString(), (Encoder e, string t) => e.WriteString(t), expectedStringLength + overhead); + } +#endif + + [Test] + public void TestInvalidInputWithNegativeStringLength() + { + using (MemoryStream iostr = new MemoryStream()) + { + Encoder e = new BinaryEncoder(iostr); + + e.WriteLong(-1); + + iostr.Flush(); + iostr.Position = 0; + Decoder d = new BinaryDecoder(iostr); + + var exception = Assert.Throws(() => d.ReadString()); + + Assert.NotNull(exception); + Assert.AreEqual("Can not deserialize a string with negative length!", exception.Message); + iostr.Close(); + } + } + + [Test] + public void TestInvalidInputWithMaxIntAsStringLength() + { + using (MemoryStream iostr = new MemoryStream()) + { + Encoder e = new BinaryEncoder(iostr); + + e.WriteLong(int.MaxValue); + e.WriteBytes(Encoding.UTF8.GetBytes("SomeSmallString")); + + iostr.Flush(); + iostr.Position = 0; + Decoder d = new BinaryDecoder(iostr); + + var exception = Assert.Throws(() => d.ReadString()); + + Assert.NotNull(exception); + Assert.AreEqual("String length is not supported!", exception.Message); + iostr.Close(); + } + } + + [Test] + public void TestInvalidInputWithMaxArrayLengthAsStringLength() + { + using (MemoryStream iostr = new MemoryStream()) + { + Encoder e = new BinaryEncoder(iostr); + +#if NETCOREAPP3_1_OR_GREATER + const int maximumArrayLength = 0x7FFFFFC7; +#else + const int maximumArrayLength = 0x7FFFFFFF / 2; #endif + e.WriteLong(maximumArrayLength); + e.WriteBytes(Encoding.UTF8.GetBytes("SomeSmallString")); + + iostr.Flush(); + iostr.Position = 0; + Decoder d = new BinaryDecoder(iostr); + + var exception = Assert.Throws(() => d.ReadString()); + + Assert.NotNull(exception); + Assert.AreEqual("Could not read as many bytes from stream as expected!", exception.Message); + iostr.Close(); + } + } + [TestCase(0, 1)] [TestCase(1, 1)] [TestCase(64, 2)] diff --git a/lang/csharp/src/apache/test/IO/JsonCodecTests.cs b/lang/csharp/src/apache/test/IO/JsonCodecTests.cs new file mode 100644 index 00000000000..1c909275594 --- /dev/null +++ b/lang/csharp/src/apache/test/IO/JsonCodecTests.cs @@ -0,0 +1,472 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Collections.Generic; +using NUnit.Framework; +using System.IO; +using System.Linq; +using System.Text; +using Avro.Generic; +using Avro.IO; +using Avro.Specific; +using Newtonsoft.Json; +using Newtonsoft.Json.Linq; + +namespace Avro.Test +{ + using Decoder = Avro.IO.Decoder; + using Encoder = Avro.IO.Encoder; + + /// + /// Tests the JsonEncoder and JsonDecoder. + /// + [TestFixture] + public class JsonCodecTests + { + [TestCase("{ \"type\": \"record\", \"name\": \"r\", \"fields\": [ " + + " { \"name\" : \"f1\", \"type\": \"int\" }, " + + " { \"name\" : \"f2\", \"type\": \"float\" } " + + "] }", + "{ \"f2\": 10.4, \"f1\": 10 } ")] + [TestCase("{ \"type\": \"enum\", \"name\": \"e\", \"symbols\": [ \"s1\", \"s2\"] }", " \"s1\" ")] + [TestCase("{ \"type\": \"enum\", \"name\": \"e\", \"symbols\": [ \"s1\", \"s2\"] }", " \"s2\" ")] + [TestCase("{ \"type\": \"fixed\", \"name\": \"f\", \"size\": 5 }", "\"hello\"")] + [TestCase("{ \"type\": \"array\", \"items\": \"int\" }", "[ 10, 20, 30 ]")] + [TestCase("{ \"type\": \"map\", \"values\": \"int\" }", "{ \"k1\": 10, \"k2\": 20, \"k3\": 30 }")] + [TestCase("[ \"int\", \"long\" ]", "{ \"int\": 10 }")] + [TestCase("\"string\"", "\"hello\"")] + [TestCase("\"bytes\"", "\"hello\"")] + [TestCase("\"int\"", "10")] + [TestCase("\"long\"", "10")] + [TestCase("\"float\"", "10.0")] + [TestCase("\"double\"", "10.0")] + [TestCase("\"boolean\"", "true")] + [TestCase("\"boolean\"", "false")] + [TestCase("\"null\"", "null")] + public void TestJsonAllTypesValidValues(String schemaStr, String value) + { + Schema schema = Schema.Parse(schemaStr); + byte[] avroBytes = fromJsonToAvro(value, schema); + + Assert.IsTrue(JToken.DeepEquals(JToken.Parse(value), + JToken.Parse(fromAvroToJson(avroBytes, schema, true)))); + } + + [TestCase("{ \"type\": \"record\", \"name\": \"r\", \"fields\": [ " + + " { \"name\" : \"f1\", \"type\": \"int\" }, " + + " { \"name\" : \"f2\", \"type\": \"float\" } " + + "] }", + "{ \"f4\": 10.4, \"f3\": 10 } ")] + [TestCase("{ \"type\": \"enum\", \"name\": \"e\", \"symbols\": [ \"s1\", \"s2\"] }", " \"s3\" ")] + [TestCase("{ \"type\": \"fixed\", \"name\": \"f\", \"size\": 10 }", "\"hello\"")] + [TestCase("{ \"type\": \"array\", \"items\": \"int\" }", "[ \"10\", \"20\", \"30\" ]")] + [TestCase("{ \"type\": \"map\", \"values\": \"int\" }", "{ \"k1\": \"10\", \"k2\": \"20\"}")] + [TestCase("[ \"int\", \"long\" ]", "10")] + [TestCase("\"string\"", "10")] + [TestCase("\"bytes\"", "10")] + [TestCase("\"int\"", "\"hi\"")] + [TestCase("\"long\"", "\"hi\"")] + [TestCase("\"float\"", "\"hi\"")] + [TestCase("\"double\"", "\"hi\"")] + [TestCase("\"boolean\"", "\"hi\"")] + [TestCase("\"boolean\"", "\"hi\"")] + [TestCase("\"null\"", "\"hi\"")] + public void TestJsonAllTypesInvalidValues(String schemaStr, String value) + { + Schema schema = Schema.Parse(schemaStr); + Assert.Throws(() => fromJsonToAvro(value, schema)); + } + + [TestCase("{ \"type\": \"record\", \"name\": \"r\", \"fields\": [ " + + " { \"name\" : \"f1\", \"type\": \"int\" }, " + + " { \"name\" : \"f2\", \"type\": \"float\" } " + + "] }", + "{ \"f2\": 10.4, \"f1")] + [TestCase("{ \"type\": \"enum\", \"name\": \"e\", \"symbols\": [ \"s1\", \"s2\"] }", "s1")] + [TestCase("\"string\"", "\"hi")] + public void TestJsonMalformed(String schemaStr, String value) + { + Schema schema = Schema.Parse(schemaStr); + Assert.Throws(() => fromJsonToAvro(value, schema)); + } + + [Test] + public void TestJsonEncoderWhenIncludeNamespaceOptionIsFalse() + { + string value = "{\"b\": {\"string\":\"myVal\"}, \"a\": 1}"; + string schemaStr = "{\"type\": \"record\", \"name\": \"ab\", \"fields\": [" + + "{\"name\": \"a\", \"type\": \"int\"}, {\"name\": \"b\", \"type\": [\"null\", \"string\"]}" + + "]}"; + Schema schema = Schema.Parse(schemaStr); + byte[] avroBytes = fromJsonToAvro(value, schema); + + Assert.IsTrue(JToken.DeepEquals(JObject.Parse("{\"b\":\"myVal\",\"a\":1}"), + JObject.Parse(fromAvroToJson(avroBytes, schema, false)))); + } + + [Test] + public void TestJsonEncoderWhenIncludeNamespaceOptionIsTrue() + { + string value = "{\"b\": {\"string\":\"myVal\"}, \"a\": 1}"; + string schemaStr = "{\"type\": \"record\", \"name\": \"ab\", \"fields\": [" + + "{\"name\": \"a\", \"type\": \"int\"}, {\"name\": \"b\", \"type\": [\"null\", \"string\"]}" + + "]}"; + Schema schema = Schema.Parse(schemaStr); + byte[] avroBytes = fromJsonToAvro(value, schema); + + Assert.IsTrue(JToken.DeepEquals(JObject.Parse("{\"b\":{\"string\":\"myVal\"},\"a\":1}"), + JObject.Parse(fromAvroToJson(avroBytes, schema, true)))); + } + + [Test] + public void TestJsonRecordOrdering() + { + string value = "{\"b\": 2, \"a\": 1}"; + Schema schema = Schema.Parse("{\"type\": \"record\", \"name\": \"ab\", \"fields\": [" + + "{\"name\": \"a\", \"type\": \"int\"}, {\"name\": \"b\", \"type\": \"int\"}" + + "]}"); + GenericDatumReader reader = new GenericDatumReader(schema, schema); + Decoder decoder = new JsonDecoder(schema, value); + object o = reader.Read(null, decoder); + + Assert.AreEqual("{\"a\":1,\"b\":2}", fromDatumToJson(o, schema, false)); + } + + [Test] + public void TestJsonRecordOrdering2() + { + string value = "{\"b\": { \"b3\": 1.4, \"b2\": 3.14, \"b1\": \"h\"}, \"a\": {\"a2\":true, \"a1\": null}}"; + Schema schema = Schema.Parse("{\"type\": \"record\", \"name\": \"ab\", \"fields\": [\n" + + "{\"name\": \"a\", \"type\": {\"type\":\"record\",\"name\":\"A\",\"fields\":\n" + + "[{\"name\":\"a1\", \"type\":\"null\"}, {\"name\":\"a2\", \"type\":\"boolean\"}]}},\n" + + "{\"name\": \"b\", \"type\": {\"type\":\"record\",\"name\":\"B\",\"fields\":\n" + + "[{\"name\":\"b1\", \"type\":\"string\"}, {\"name\":\"b2\", \"type\":\"float\"}, {\"name\":\"b3\", \"type\":\"double\"}]}}\n" + + "]}"); + GenericDatumReader reader = new GenericDatumReader(schema, schema); + Decoder decoder = new JsonDecoder(schema, value); + object o = reader.Read(null, decoder); + + Assert.AreEqual("{\"a\":{\"a1\":null,\"a2\":true},\"b\":{\"b1\":\"h\",\"b2\":3.14,\"b3\":1.4}}", + fromDatumToJson(o, schema, false)); + } + + [Test] + public void TestJsonRecordOrderingWithProjection() + { + String value = "{\"b\": { \"b3\": 1.4, \"b2\": 3.14, \"b1\": \"h\"}, \"a\": {\"a2\":true, \"a1\": null}}"; + Schema writerSchema = Schema.Parse("{\"type\": \"record\", \"name\": \"ab\", \"fields\": [\n" + + "{\"name\": \"a\", \"type\": {\"type\":\"record\",\"name\":\"A\",\"fields\":\n" + + "[{\"name\":\"a1\", \"type\":\"null\"}, {\"name\":\"a2\", \"type\":\"boolean\"}]}},\n" + + "{\"name\": \"b\", \"type\": {\"type\":\"record\",\"name\":\"B\",\"fields\":\n" + + "[{\"name\":\"b1\", \"type\":\"string\"}, {\"name\":\"b2\", \"type\":\"float\"}, {\"name\":\"b3\", \"type\":\"double\"}]}}\n" + + "]}"); + Schema readerSchema = Schema.Parse("{\"type\": \"record\", \"name\": \"ab\", \"fields\": [\n" + + "{\"name\": \"a\", \"type\": {\"type\":\"record\",\"name\":\"A\",\"fields\":\n" + + "[{\"name\":\"a1\", \"type\":\"null\"}, {\"name\":\"a2\", \"type\":\"boolean\"}]}}\n" + + "]}"); + GenericDatumReader reader = new GenericDatumReader(writerSchema, readerSchema); + Decoder decoder = new JsonDecoder(writerSchema, value); + Object o = reader.Read(null, decoder); + + Assert.AreEqual("{\"a\":{\"a1\":null,\"a2\":true}}", + fromDatumToJson(o, readerSchema, false)); + } + + + [Test] + public void TestJsonRecordOrderingWithProjection2() + { + String value = + "{\"b\": { \"b1\": \"h\", \"b2\": [3.14, 3.56], \"b3\": 1.4}, \"a\": {\"a2\":true, \"a1\": null}}"; + Schema writerSchema = Schema.Parse("{\"type\": \"record\", \"name\": \"ab\", \"fields\": [\n" + + "{\"name\": \"a\", \"type\": {\"type\":\"record\",\"name\":\"A\",\"fields\":\n" + + "[{\"name\":\"a1\", \"type\":\"null\"}, {\"name\":\"a2\", \"type\":\"boolean\"}]}},\n" + + "{\"name\": \"b\", \"type\": {\"type\":\"record\",\"name\":\"B\",\"fields\":\n" + + "[{\"name\":\"b1\", \"type\":\"string\"}, {\"name\":\"b2\", \"type\":{\"type\":\"array\", \"items\":\"float\"}}, {\"name\":\"b3\", \"type\":\"double\"}]}}\n" + + "]}"); + + Schema readerSchema = Schema.Parse("{\"type\": \"record\", \"name\": \"ab\", \"fields\": [\n" + + "{\"name\": \"a\", \"type\": {\"type\":\"record\",\"name\":\"A\",\"fields\":\n" + + "[{\"name\":\"a1\", \"type\":\"null\"}, {\"name\":\"a2\", \"type\":\"boolean\"}]}}\n" + + "]}"); + + GenericDatumReader reader = new GenericDatumReader(writerSchema, readerSchema); + Decoder decoder = new JsonDecoder(writerSchema, value); + object o = reader.Read(null, decoder); + + Assert.AreEqual("{\"a\":{\"a1\":null,\"a2\":true}}", + fromDatumToJson(o, readerSchema, false)); + } + + [TestCase("{\"int\":123}")] + [TestCase("{\"string\":\"12345678-1234-5678-1234-123456789012\"}")] + [TestCase("null")] + public void TestJsonUnionWithLogicalTypes(String value) + { + Schema schema = Schema.Parse( + "[\"null\",\n" + + " { \"type\": \"int\", \"logicalType\": \"date\" },\n" + + " { \"type\": \"string\", \"logicalType\": \"uuid\" }\n" + + "]"); + GenericDatumReader reader = new GenericDatumReader(schema, schema); + Decoder decoder = new JsonDecoder(schema, value); + object o = reader.Read(null, decoder); + + Assert.AreEqual(value, fromDatumToJson(o, schema, true)); + } + + [TestCase("{\"int\":123}")] + [TestCase("{\"com.myrecord\":{\"f1\":123}}")] + [TestCase("null")] + public void TestJsonUnionWithRecord(String value) + { + Schema schema = Schema.Parse( + "[\"null\",\n" + + " { \"type\": \"int\", \"logicalType\": \"date\" },\n" + + " {\"type\":\"record\",\"name\":\"myrecord\", \"namespace\":\"com\"," + + " \"fields\":[{\"name\":\"f1\",\"type\": \"int\"}]}" + + "]"); + GenericDatumReader reader = new GenericDatumReader(schema, schema); + Decoder decoder = new JsonDecoder(schema, value); + object o = reader.Read(null, decoder); + + Assert.AreEqual(value, fromDatumToJson(o, schema, true)); + } + + [TestCase("int", 1)] + [TestCase("long", 1L)] + [TestCase("float", 1.0F)] + [TestCase("double", 1.0)] + public void TestJsonDecoderNumeric(string type, object value) + { + string def = "{\"type\":\"record\",\"name\":\"X\",\"fields\":" + "[{\"type\":\"" + type + + "\",\"name\":\"n\"}]}"; + Schema schema = Schema.Parse(def); + DatumReader reader = new GenericDatumReader(schema, schema); + + string[] records = { "{\"n\":1}", "{\"n\":1.0}" }; + + foreach (GenericRecord g in records.Select(r => reader.Read(null, new JsonDecoder(schema, r)))) + { + Assert.AreEqual(value, g["n"]); + } + } + + // Ensure that even if the order of fields in JSON is different from the order in schema, it works. + [Test] + public void TestJsonDecoderReorderFields() + { + String w = "{\"type\":\"record\",\"name\":\"R\",\"fields\":" + "[{\"type\":\"long\",\"name\":\"l\"}," + + "{\"type\":{\"type\":\"array\",\"items\":\"int\"},\"name\":\"a\"}" + + "]}"; + Schema ws = Schema.Parse(w); + String data = "{\"a\":[1,2],\"l\":100}"; + JsonDecoder decoder = new JsonDecoder(ws, data); + Assert.AreEqual(100, decoder.ReadLong()); + decoder.SkipArray(); + data = "{\"l\": 200, \"a\":[1,2]}"; + decoder = new JsonDecoder(ws, data); + Assert.AreEqual(200, decoder.ReadLong()); + decoder.SkipArray(); + } + + [Test] + public void TestJsonDecoderSpecificDatumWriterWithArrayAndMap() + { + Root data = new Root(); + Item item = new Item { id = 123456 }; + data.myarray = new List { item }; + data.mymap = new Dictionary { { "1", 1 }, { "2", 2 }, { "3", 3 }, { "4", 4 } }; + + DatumWriter writer = new SpecificDatumWriter(data.Schema); + + ByteBufferOutputStream bbos = new ByteBufferOutputStream(); + + Encoder encoder = new JsonEncoder(data.Schema, bbos); + writer.Write(data, encoder); + encoder.Flush(); + + List listStreams = bbos.GetBufferList(); + + using (StreamReader reader = new StreamReader(listStreams[0])) + { + String output = reader.ReadToEnd(); + Assert.AreEqual("{\"myarray\":[{\"id\":123456}],\"mymap\":{\"map\":{\"1\":1,\"2\":2,\"3\":3,\"4\":4}}}", output); + } + } + + [Test] + public void TestJsonDecoderSpecificDefaultWriterWithArrayAndMap() + { + Root data = new Root(); + Item item = new Item { id = 123456 }; + data.myarray = new List { item }; + data.mymap = new Dictionary { { "1", 1 }, { "2", 2 }, { "3", 3 }, { "4", 4 } }; + + SpecificDefaultWriter writer = new SpecificDefaultWriter(data.Schema); + + ByteBufferOutputStream bbos = new ByteBufferOutputStream(); + + Encoder encoder = new JsonEncoder(data.Schema, bbos); + writer.Write(data, encoder); + encoder.Flush(); + + List listStreams = bbos.GetBufferList(); + + using (StreamReader reader = new StreamReader(listStreams[0])) + { + String output = reader.ReadToEnd(); + Assert.AreEqual("{\"myarray\":[{\"id\":123456}],\"mymap\":{\"map\":{\"1\":1,\"2\":2,\"3\":3,\"4\":4}}}", output); + } + } + + private byte[] fromJsonToAvro(string json, Schema schema) + { + DatumReader reader = new GenericDatumReader(schema, schema); + GenericDatumWriter writer = new GenericDatumWriter(schema); + MemoryStream output = new MemoryStream(); + + Decoder decoder = new JsonDecoder(schema, json); + Encoder encoder = new BinaryEncoder(output); + + object datum = reader.Read(null, decoder); + + writer.Write(datum, encoder); + encoder.Flush(); + output.Flush(); + + return output.ToArray(); + } + + private string fromAvroToJson(byte[] avroBytes, Schema schema, bool includeNamespace) + { + GenericDatumReader reader = new GenericDatumReader(schema, schema); + + Decoder decoder = new BinaryDecoder(new MemoryStream(avroBytes)); + object datum = reader.Read(null, decoder); + return fromDatumToJson(datum, schema, includeNamespace); + } + + private string fromDatumToJson(object datum, Schema schema, bool includeNamespace) + { + DatumWriter writer = new GenericDatumWriter(schema); + MemoryStream output = new MemoryStream(); + + JsonEncoder encoder = new JsonEncoder(schema, output); + encoder.IncludeNamespace = includeNamespace; + writer.Write(datum, encoder); + encoder.Flush(); + output.Flush(); + + return Encoding.UTF8.GetString(output.ToArray()); + } + } + + public partial class Root : global::Avro.Specific.ISpecificRecord + { + public static global::Avro.Schema _SCHEMA = global::Avro.Schema.Parse( + "{\"type\":\"record\",\"name\":\"Root\",\"namespace\":\"Avro.Test\",\"fields\":[{\"name\":\"myarray" + + "\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"Item\",\"namespace\":\"Avr" + + "o.Test\",\"fields\":[{\"name\":\"id\",\"type\":\"long\"}]}}},{\"name\":\"mymap\",\"default\":null," + + "\"type\":[\"null\",{\"type\":\"map\",\"values\":\"int\"}]}]}"); + private IList _myarray; + private IDictionary _mymap; + + public virtual global::Avro.Schema Schema + { + get { return Root._SCHEMA; } + } + + public IList myarray + { + get { return this._myarray; } + set { this._myarray = value; } + } + + public IDictionary mymap + { + get { return this._mymap; } + set { this._mymap = value; } + } + + public virtual object Get(int fieldPos) + { + switch (fieldPos) + { + case 0: return this.myarray; + case 1: return this.mymap; + default: throw new global::Avro.AvroRuntimeException("Bad index " + fieldPos + " in Get()"); + } + } + + public virtual void Put(int fieldPos, object fieldValue) + { + switch (fieldPos) + { + case 0: + this.myarray = (IList)fieldValue; + break; + case 1: + this.mymap = (IDictionary)fieldValue; + break; + default: throw new global::Avro.AvroRuntimeException("Bad index " + fieldPos + " in Put()"); + } + } + } + + public partial class Item : global::Avro.Specific.ISpecificRecord + { + public static global::Avro.Schema _SCHEMA = global::Avro.Schema.Parse( + "{\"type\":\"record\",\"name\":\"Item\",\"namespace\":\"Avro.Test\",\"fields\":[{\"name\":\"id\",\"ty" + + "pe\":\"long\"}]}"); + + private long _id; + + public virtual global::Avro.Schema Schema + { + get { return Item._SCHEMA; } + } + + public long id + { + get { return this._id; } + set { this._id = value; } + } + + public virtual object Get(int fieldPos) + { + switch (fieldPos) + { + case 0: return this.id; + default: throw new global::Avro.AvroRuntimeException("Bad index " + fieldPos + " in Get()"); + } + } + + public virtual void Put(int fieldPos, object fieldValue) + { + switch (fieldPos) + { + case 0: + this.id = (System.Int64)fieldValue; + break; + default: throw new global::Avro.AvroRuntimeException("Bad index " + fieldPos + " in Put()"); + } + } + } +} diff --git a/lang/csharp/src/apache/test/Interop/InteropDataConstants.cs b/lang/csharp/src/apache/test/Interop/InteropDataConstants.cs index 94bfb408d18..170e28eb5fa 100644 --- a/lang/csharp/src/apache/test/Interop/InteropDataConstants.cs +++ b/lang/csharp/src/apache/test/Interop/InteropDataConstants.cs @@ -25,7 +25,11 @@ public class InteropDataConstants public static readonly HashSet SupportedCodecNames = new HashSet { DataFileConstants.NullCodec, - DataFileConstants.DeflateCodec + DataFileConstants.DeflateCodec, + DataFileConstants.SnappyCodec, + DataFileConstants.BZip2Codec, + DataFileConstants.XZCodec, + DataFileConstants.ZstandardCodec }; } } \ No newline at end of file diff --git a/lang/csharp/src/apache/test/Interop/InteropDataGenerator.cs b/lang/csharp/src/apache/test/Interop/InteropDataGenerator.cs index 10c06f79480..7aa10c0e65a 100644 --- a/lang/csharp/src/apache/test/Interop/InteropDataGenerator.cs +++ b/lang/csharp/src/apache/test/Interop/InteropDataGenerator.cs @@ -86,6 +86,7 @@ static void GenerateInteropData(string schemaPath, string outputDir) var codec = Codec.CreateCodecFromString(codecName); using (var dataFileWriter = DataFileWriter.OpenWriter(datumWriter, outputPath, codec)) { + dataFileWriter.SetMeta("user_metadata", "someByteArray"); dataFileWriter.Append(record); } } diff --git a/lang/csharp/src/apache/test/Interop/InteropDataTests.cs b/lang/csharp/src/apache/test/Interop/InteropDataTests.cs index 7215df4321c..4f66a7e4459 100644 --- a/lang/csharp/src/apache/test/Interop/InteropDataTests.cs +++ b/lang/csharp/src/apache/test/Interop/InteropDataTests.cs @@ -48,6 +48,11 @@ public void TestInterop(string inputDir) using(var reader = DataFileReader.OpenReader(avroFile)) { int i = 0; + string userMetadata = reader.GetMetaString("user_metadata"); + if (userMetadata != null) + { + Assert.AreEqual("someByteArray", userMetadata); + } foreach (var record in reader.NextEntries) { i++; diff --git a/lang/csharp/src/apache/test/Reflect/TestLogicalSchema.cs b/lang/csharp/src/apache/test/Reflect/TestLogicalSchema.cs new file mode 100644 index 00000000000..c18da4068ea --- /dev/null +++ b/lang/csharp/src/apache/test/Reflect/TestLogicalSchema.cs @@ -0,0 +1,198 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.IO; +using Avro.IO; +using Avro.Reflect; +using NUnit.Framework; + +namespace Avro.test.Reflect +{ + public class TestLogicalSchema + { + [TestCase] + public void WriteAndReadObjectsWithLogicalSchemaFields_WithNullValues() + { + //Arrange + var obj = new TestObject + { + AvroDecimalNullableProperty = null, + AvroDecimalProperty = 13.42m, + GuidNullableProperty = null, + GuidProperty = Guid.NewGuid(), + DateNullableProperty = null, + DateProperty = new DateTime(2022, 05, 26, 14, 57, 24, 123), + DateTimeMicrosecondNullableProperty = null, + DateTimeMicrosecondProperty = DateTime.UtcNow, + DateTimeMillisecondNullableProperty = null, + DateTimeMillisecondProperty = DateTime.UtcNow, + TimeSpanMicrosecondNullableProperty = null, + TimeSpanMicrosecondProperty = new TimeSpan(23, 59, 59), + TimeSpanMillisecondNullableProperty = null, + TimeSpanMillisecondProperty = new TimeSpan(23, 59, 59), + }; + + var schema = Schema.Parse(SchemaJson); + var writer = new ReflectWriter(schema); + var reader = new ReflectReader(schema, schema); + var writeStream = new MemoryStream(); + var writeBinaryEncoder = new BinaryEncoder(writeStream); + + //Act + writer.Write(obj, writeBinaryEncoder); + var data = writeStream.ToArray(); + + var readStream = new MemoryStream(data); + var result = reader.Read(null, new BinaryDecoder(readStream)); + + //Assert + Assert.NotNull(result); + + Assert.IsNull(result.AvroDecimalNullableProperty); + Assert.AreEqual(obj.AvroDecimalProperty, result.AvroDecimalProperty); + + Assert.IsNull(result.GuidNullableProperty); + Assert.AreEqual(obj.GuidProperty, result.GuidProperty); + + Assert.IsNull(obj.DateNullableProperty); + Assert.AreEqual(obj.DateProperty.Date, result.DateProperty); + + Assert.IsNull(result.DateTimeMicrosecondNullableProperty); + Assert.AreEqual((obj.DateTimeMicrosecondProperty.Ticks / 10 ) * 10, result.DateTimeMicrosecondProperty.Ticks); + + Assert.IsNull(result.DateTimeMillisecondNullableProperty); + Assert.AreEqual((obj.DateTimeMillisecondProperty.Ticks / 10000) * 10000, result.DateTimeMillisecondProperty.Ticks); + + Assert.IsNull(result.TimeSpanMicrosecondNullableProperty); + Assert.AreEqual(obj.TimeSpanMicrosecondProperty, result.TimeSpanMicrosecondProperty); + + Assert.IsNull(result.TimeSpanMillisecondNullableProperty); + Assert.AreEqual(obj.TimeSpanMillisecondProperty, result.TimeSpanMillisecondProperty); + } + + [TestCase] + public void WriteAndReadObjectsWithLogicalSchemaFields_WithoutNullValues() + { + //Arrange + var obj = new TestObject + { + AvroDecimalNullableProperty = 136.42m, + AvroDecimalProperty = 13.42m, + GuidNullableProperty = Guid.NewGuid(), + GuidProperty = Guid.NewGuid(), + DateNullableProperty = new DateTime(2022, 05, 26, 14, 57, 24, 123), + DateProperty = new DateTime(2022, 05, 26, 14, 57, 24, 123), + DateTimeMicrosecondNullableProperty = DateTime.UtcNow, + DateTimeMicrosecondProperty = DateTime.UtcNow, + DateTimeMillisecondNullableProperty = DateTime.UtcNow, + DateTimeMillisecondProperty = DateTime.UtcNow, + TimeSpanMicrosecondNullableProperty = new TimeSpan(23, 59, 59), + TimeSpanMicrosecondProperty = new TimeSpan(23, 59, 59), + TimeSpanMillisecondNullableProperty = new TimeSpan(23, 59, 59), + TimeSpanMillisecondProperty = new TimeSpan(23, 59, 59), + }; + + var schema = Schema.Parse(SchemaJson); + var writer = new ReflectWriter(schema); + var reader = new ReflectReader(schema, schema); + var writeStream = new MemoryStream(); + var writeBinaryEncoder = new BinaryEncoder(writeStream); + + //Act + writer.Write(obj, writeBinaryEncoder); + var data = writeStream.ToArray(); + + var readStream = new MemoryStream(data); + var result = reader.Read(null, new BinaryDecoder(readStream)); + + //Assert + Assert.NotNull(result); + + Assert.NotNull(result.AvroDecimalNullableProperty); + Assert.AreEqual(obj.AvroDecimalNullableProperty, result.AvroDecimalNullableProperty); + Assert.AreEqual(obj.AvroDecimalProperty, result.AvroDecimalProperty); + + Assert.NotNull(result.GuidNullableProperty); + Assert.AreEqual(obj.GuidNullableProperty, result.GuidNullableProperty); + Assert.AreEqual(obj.GuidProperty, result.GuidProperty); + + Assert.NotNull(result.DateProperty); + Assert.AreEqual(obj.DateNullableProperty?.Date, result.DateProperty); + Assert.AreEqual(obj.DateProperty.Date, result.DateProperty); + + Assert.NotNull(result.DateTimeMicrosecondNullableProperty); + Assert.AreEqual((obj.DateTimeMicrosecondNullableProperty?.Ticks / 10) * 10, result.DateTimeMicrosecondNullableProperty?.Ticks); + Assert.AreEqual((obj.DateTimeMicrosecondProperty.Ticks / 10) * 10, result.DateTimeMicrosecondProperty.Ticks); + + Assert.NotNull(result.DateTimeMillisecondNullableProperty); + Assert.AreEqual((obj.DateTimeMillisecondNullableProperty?.Ticks / 10000) * 10000, result.DateTimeMillisecondNullableProperty?.Ticks); + Assert.AreEqual((obj.DateTimeMillisecondProperty.Ticks / 10000) * 10000, result.DateTimeMillisecondProperty.Ticks); + + Assert.NotNull(result.TimeSpanMicrosecondNullableProperty); + Assert.AreEqual(obj.TimeSpanMicrosecondNullableProperty, result.TimeSpanMicrosecondNullableProperty); + Assert.AreEqual(obj.TimeSpanMicrosecondProperty, result.TimeSpanMicrosecondProperty); + + Assert.NotNull(result.TimeSpanMillisecondNullableProperty); + Assert.AreEqual(obj.TimeSpanMillisecondNullableProperty, result.TimeSpanMillisecondNullableProperty); + Assert.AreEqual(obj.TimeSpanMillisecondProperty, result.TimeSpanMillisecondProperty); + } + + private const string SchemaJson = @" +{ + ""type"" : ""record"", + ""namespace"" : ""Avro.test.Reflect.Converters"", + ""name"" : ""TestObject"", + ""fields"" : [ + { ""name"" : ""AvroDecimalNullableProperty"" , ""type"" : [""null"", { ""type"": ""bytes"", ""logicalType"": ""decimal"", ""precision"": 6, ""scale"": 2 }] }, + { ""name"" : ""AvroDecimalProperty"" , ""type"" : { ""type"": ""bytes"", ""logicalType"": ""decimal"", ""precision"": 6, ""scale"": 2 } }, + { ""name"" : ""GuidNullableProperty"" , ""type"" : [""null"", { ""type"": ""string"", ""logicalType"": ""uuid""}] }, + { ""name"" : ""GuidProperty"" , ""type"" : { ""type"": ""string"", ""logicalType"": ""uuid""} }, + { ""name"" : ""DateNullableProperty"" , ""type"" : [""null"", { ""type"": ""int"", ""logicalType"": ""date""}] }, + { ""name"" : ""DateProperty"" , ""type"" : { ""type"": ""int"", ""logicalType"": ""date""} }, + { ""name"" : ""DateTimeMicrosecondNullableProperty"" , ""type"" : [""null"", { ""type"": ""long"", ""logicalType"": ""timestamp-micros""}] }, + { ""name"" : ""DateTimeMicrosecondProperty"" , ""type"" : { ""type"": ""long"", ""logicalType"": ""timestamp-micros""} }, + { ""name"" : ""DateTimeMillisecondNullableProperty"" , ""type"" : [""null"", { ""type"": ""long"", ""logicalType"": ""timestamp-millis""}] }, + { ""name"" : ""DateTimeMillisecondProperty"" , ""type"" : { ""type"": ""long"", ""logicalType"": ""timestamp-millis""} }, + { ""name"" : ""TimeSpanMicrosecondNullableProperty"" , ""type"" : [""null"", { ""type"": ""long"", ""logicalType"": ""time-micros""}] }, + { ""name"" : ""TimeSpanMicrosecondProperty"" , ""type"" : { ""type"": ""long"", ""logicalType"": ""time-micros""} }, + { ""name"" : ""TimeSpanMillisecondNullableProperty"" , ""type"" : [""null"", { ""type"": ""int"", ""logicalType"": ""time-millis""}] }, + { ""name"" : ""TimeSpanMillisecondProperty"" , ""type"" : { ""type"": ""int"", ""logicalType"": ""time-millis""} } + ] +} +"; + + public class TestObject + { + public AvroDecimal? AvroDecimalNullableProperty { get; set; } + public AvroDecimal AvroDecimalProperty { get; set; } + public Guid? GuidNullableProperty { get; set; } + public Guid GuidProperty { get; set; } + public DateTime? DateNullableProperty { get; set; } + public DateTime DateProperty { get; set; } + public DateTime? DateTimeMicrosecondNullableProperty { get; set; } + public DateTime DateTimeMicrosecondProperty { get; set; } + public DateTime? DateTimeMillisecondNullableProperty { get; set; } + public DateTime DateTimeMillisecondProperty { get; set; } + public TimeSpan? TimeSpanMicrosecondNullableProperty { get; set; } + public TimeSpan TimeSpanMicrosecondProperty { get; set; } + public TimeSpan? TimeSpanMillisecondNullableProperty { get; set; } + public TimeSpan TimeSpanMillisecondProperty { get; set; } + } + } +} diff --git a/lang/csharp/src/apache/test/Reflect/TestReflect.cs b/lang/csharp/src/apache/test/Reflect/TestReflect.cs index bea5ef23f9a..5cf57253978 100644 --- a/lang/csharp/src/apache/test/Reflect/TestReflect.cs +++ b/lang/csharp/src/apache/test/Reflect/TestReflect.cs @@ -40,17 +40,22 @@ class EnumResolutionRecord public EnumResolutionEnum enumType { get; set; } } + class NullableEnumResolutionRecord + { + public EnumResolutionEnum? enumType { get; set; } + } + [TestCase] public void TestEnumResolution() { Schema writerSchema = Schema.Parse("{\"type\":\"record\",\"name\":\"EnumRecord\",\"namespace\":\"Avro.Test\"," + - "\"fields\":[{\"name\":\"enumType\",\"type\": { \"type\": \"enum\", \"name\": \"EnumType\", \"symbols\": [\"FIRST\", \"SECOND\"]} }]}"); + "\"fields\":[{\"name\":\"enumType\",\"type\": { \"type\": \"enum\", \"name\": \"EnumType\", \"symbols\": [\"FIRST\", \"SECOND\"]} }]}"); var testRecord = new EnumResolutionRecord(); Schema readerSchema = Schema.Parse("{\"type\":\"record\",\"name\":\"EnumRecord\",\"namespace\":\"Avro.Test\"," + - "\"fields\":[{\"name\":\"enumType\",\"type\": { \"type\": \"enum\", \"name\":" + - " \"EnumType\", \"symbols\": [\"THIRD\", \"FIRST\", \"SECOND\"]} }]}");; + "\"fields\":[{\"name\":\"enumType\",\"type\": { \"type\": \"enum\", \"name\":" + + " \"EnumType\", \"symbols\": [\"THIRD\", \"FIRST\", \"SECOND\"]} }]}");; testRecord.enumType = EnumResolutionEnum.SECOND; // serialize @@ -61,6 +66,28 @@ public void TestEnumResolution() Assert.AreEqual( EnumResolutionEnum.SECOND, rec2.enumType ); } + [TestCase] + public void TestNullableEnumResolution() + { + Schema writerSchema = Schema.Parse("{\"type\":\"record\",\"name\":\"EnumRecord\",\"namespace\":\"Avro.Test\"," + + "\"fields\":[{\"name\":\"enumType\",\"type\":[\"null\", { \"type\": \"enum\", \"name\": " + + "\"EnumType\",\"symbols\": [\"THIRD\", \"FIRST\", \"SECOND\"]}] }]}"); + + var testRecord = new NullableEnumResolutionRecord(); + + Schema readerSchema = Schema.Parse("{\"type\":\"record\",\"name\":\"EnumRecord\",\"namespace\":\"Avro.Test\"," + + "\"fields\":[{\"name\":\"enumType\",\"type\":[\"null\", { \"type\": \"enum\", \"name\": " + + "\"EnumType\", \"symbols\": [\"THIRD\", \"FIRST\", \"SECOND\"]}] }]}"); + testRecord.enumType = EnumResolutionEnum.SECOND; + + // serialize + var stream = serialize(writerSchema, testRecord); + + // deserialize + var rec2 = deserialize(stream, writerSchema, readerSchema); + Assert.AreEqual( EnumResolutionEnum.SECOND, rec2.enumType ); + } + private static S deserialize(Stream ms, Schema ws, Schema rs) where S : class { long initialPos = ms.Position; diff --git a/lang/csharp/src/apache/test/Schema/AliasTest.cs b/lang/csharp/src/apache/test/Schema/AliasTest.cs index 422e07fc60b..b9a31bb8170 100644 --- a/lang/csharp/src/apache/test/Schema/AliasTest.cs +++ b/lang/csharp/src/apache/test/Schema/AliasTest.cs @@ -73,9 +73,8 @@ public void TestAliases(string s, bool valid) // also tests properties, defaul Assert.IsTrue(json == json2); } - catch (Exception ex) + catch (Exception) { - Console.WriteLine(ex.Message); Assert.IsFalse(valid); } } diff --git a/lang/csharp/src/apache/test/Schema/AliasesTests.cs b/lang/csharp/src/apache/test/Schema/AliasesTests.cs new file mode 100644 index 00000000000..27ad4b23efd --- /dev/null +++ b/lang/csharp/src/apache/test/Schema/AliasesTests.cs @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +using NUnit.Framework; + +namespace Avro.Test +{ + [TestFixture] + public class AliasesTests + { + [TestCase] + public void TestNoNamespace() + { + CollectionAssert.AreEqual(new[] { new SchemaName("alias", null, null, null) }, Aliases.GetSchemaNames(new[] { "alias" }, "name", null)); + } + + [TestCase] + public void TestTypeWithNamespace() + { + CollectionAssert.AreEqual(new[] { new SchemaName("space.alias", null, null, null) }, Aliases.GetSchemaNames(new[] { "alias" }, "name", "space")); + } + + [TestCase] + public void TestTypeWithNamespaceInName() + { + CollectionAssert.AreEqual(new[] { new SchemaName("space.alias", null, null, null) }, Aliases.GetSchemaNames(new[] { "alias" }, "space.name", null)); + } + + [TestCase] + public void TestAliasWithNamespace() + { + CollectionAssert.AreEqual(new[] { new SchemaName("name.alias", null, null, null) }, Aliases.GetSchemaNames(new[] { "name.alias" }, "space.name", null)); + } + } +} diff --git a/lang/csharp/src/apache/test/Schema/ArraySchemaTests.cs b/lang/csharp/src/apache/test/Schema/ArraySchemaTests.cs new file mode 100644 index 00000000000..7b8b7d3139c --- /dev/null +++ b/lang/csharp/src/apache/test/Schema/ArraySchemaTests.cs @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using NUnit.Framework; + +namespace Avro.test +{ + [TestFixture] + public class ArraySchemaTests + { + [Test] + public void EqualsNullCheck() + { + string schemaString = "{\"type\": \"array\", \"items\": \"long\"}"; + ArraySchema nullSchema = null; + + Schema schema = Schema.Parse(schemaString); + + if (schema is ArraySchema arraySchema) + { + Assert.False(arraySchema.Equals(nullSchema)); + } + else + { + Assert.Fail("Must be an array schema"); + } + } + + [Test] + public void EqualsNotArraySchema() + { + string schemaString = "[\"string\", \"null\", \"long\"]"; + string arraySchemaString = "{\"type\": \"array\", \"items\": \"long\"}"; + ArraySchema arraySchema = Schema.Parse(arraySchemaString) as ArraySchema; + Schema schema = Schema.Parse(schemaString); + + Assert.False(arraySchema.Equals(schema)); + } + } +} diff --git a/lang/csharp/src/apache/test/Schema/SchemaNormalizationTests.cs b/lang/csharp/src/apache/test/Schema/SchemaNormalizationTests.cs index 1e670677a48..c6296395153 100644 --- a/lang/csharp/src/apache/test/Schema/SchemaNormalizationTests.cs +++ b/lang/csharp/src/apache/test/Schema/SchemaNormalizationTests.cs @@ -32,6 +32,14 @@ public class SchemaNormalizationTests private static readonly long One = -9223372036854775808; private static readonly byte[] Postfix = { 0, 0, 0, 0, 0, 0, 0, 0 }; + [Test] + public void TestLogicalType() + { + var schema = @"[""int"", {""type"": ""string"", ""logicalType"": ""uuid""}]"; + string pcf = SchemaNormalization.ToParsingForm(Schema.Parse(schema)); + Assert.AreEqual(@"[""int"",""string""]", pcf); + } + [Test, TestCaseSource("ProvideCanonicalTestCases")] public void CanonicalTest(string input, string expectedOutput) { diff --git a/lang/csharp/src/apache/test/Schema/SchemaTests.cs b/lang/csharp/src/apache/test/Schema/SchemaTests.cs index 93a86a9734a..319e9a95be3 100644 --- a/lang/csharp/src/apache/test/Schema/SchemaTests.cs +++ b/lang/csharp/src/apache/test/Schema/SchemaTests.cs @@ -17,9 +17,8 @@ */ using System; using System.Collections.Generic; -using System.Text; using NUnit.Framework; -using Avro; +using System.Linq; namespace Avro.Test { @@ -89,6 +88,7 @@ public class SchemaTests // Array [TestCase("{\"type\": \"array\", \"items\": \"long\"}")] [TestCase("{\"type\": \"array\",\"items\": {\"type\": \"enum\", \"name\": \"Test\", \"symbols\": [\"A\", \"B\"]}}")] + [TestCase("{\"type\": \"array\"}", typeof(AvroTypeException), Description = "No Items")] // Map [TestCase("{\"type\": \"map\", \"values\": \"long\"}")] @@ -140,8 +140,9 @@ public void TestBasic(string s, Type expectedExceptionType = null) public void TestPrimitive(string s, Schema.Type type) { Schema sc = Schema.Parse(s); - Assert.IsTrue(sc is PrimitiveSchema); - Assert.AreEqual(type, sc.Tag); + Schema schema = PrimitiveSchema.Create(type, null); + + Assert.AreEqual(sc, schema); testEquality(s, sc); testToString(sc); @@ -167,6 +168,67 @@ private static void testToString(Schema sc) } } + private static void testToString(Schema sc, string schema) + { + try + { + //remove any excess spaces in the JSON to normalize the match with toString + schema = schema.Replace("{ ", "{") + .Replace("} ", "}") + .Replace("\" ", "\"") + .Replace(", ", ",") + .Replace(": ", ":"); + Assert.AreEqual(sc.ToString(), schema); + } + catch (Exception e) + { + throw new AvroException($"{e} : {sc}", e); + } + } + + [TestCase("{ \"type\": \"null\", \"metafield\": \"abc\" }", Schema.Type.Null)] + [TestCase("{ \"type\": \"boolean\", \"metafield\": \"abc\" }", Schema.Type.Boolean)] + [TestCase("{ \"type\": \"int\", \"metafield\": \"abc\" }", Schema.Type.Int)] + [TestCase("{ \"type\": \"long\", \"metafield\": \"abc\" }", Schema.Type.Long)] + [TestCase("{ \"type\": \"float\", \"metafield\": \"abc\" }", Schema.Type.Float)] + [TestCase("{ \"type\": \"double\", \"metafield\": \"abc\" }", Schema.Type.Double)] + [TestCase("{ \"type\": \"bytes\", \"metafield\": \"abc\" }", Schema.Type.Bytes)] + [TestCase("{ \"type\": \"string\", \"metafield\": \"abc\" }", Schema.Type.String)] + public void TestPrimitiveWithMetadata(string rawSchema, Schema.Type type) + { + Schema definedSchema = Schema.Parse(rawSchema); + Assert.IsTrue(definedSchema is PrimitiveSchema); + Assert.AreEqual(type.ToString().ToLower(), definedSchema.Name); + Assert.AreEqual(type, definedSchema.Tag); + + testEquality(rawSchema, definedSchema); + testToString(definedSchema); + + Assert.True(definedSchema.ToString().Contains("metafield")); + + var rawRecordSchema = "{\"type\":\"record\",\"name\":\"Foo\"," + + "\"fields\":[{\"name\":\"f1\",\"type\":" + rawSchema + + "}]}"; + Schema baseRecordSchema = Schema.Parse(rawRecordSchema); + Assert.AreEqual(Schema.Type.Record, baseRecordSchema.Tag); + RecordSchema recordSchema = baseRecordSchema as RecordSchema; + Assert.IsNotNull(recordSchema); + Assert.AreEqual(1, recordSchema.Count); + + Assert.IsTrue(recordSchema["f1"].Schema is PrimitiveSchema); + Assert.AreEqual(type.ToString().ToLower(), recordSchema["f1"].Schema.Name); + Assert.AreEqual(type, recordSchema["f1"].Schema.Tag); + + testEquality(rawRecordSchema, baseRecordSchema); + testToString(recordSchema["f1"].Schema); + + Assert.True(baseRecordSchema.ToString().Contains("metafield")); + Assert.True(recordSchema["f1"].Schema.ToString().Contains("metafield")); + + Assert.True(definedSchema.Equals(recordSchema["f1"].Schema)); + Assert.AreEqual(definedSchema.GetHashCode(), recordSchema["f1"].Schema.GetHashCode()); + } + [TestCase("{\"type\":\"record\",\"name\":\"LongList\"," + "\"fields\":[{\"name\":\"f1\",\"type\":\"long\"}," + "{\"name\":\"f2\",\"type\": \"int\"}]}", @@ -229,23 +291,164 @@ public void TestRecordDoc(string s, string expectedDoc) Assert.AreEqual(expectedDoc, roundTrip.Documentation); } - [TestCase("{\"type\": \"enum\", \"name\": \"Test\", \"symbols\": [\"A\", \"B\"]}", + [TestCase("{\"type\":\"record\",\"name\":\"Longs\",\"fields\":[{\"name\":\"value\",\"default\":\"100\",\"type\":\"long\",\"aliases\":[\"oldName\"]}]}", + "Longs", null, null, null, + new[] { "value" }, new[] { Schema.Type.Long }, new[] { "100" }, new[] { "oldName" }, new string[] { null })] + [TestCase("{\"type\":\"record\",\"name\":\"Longs\",\"fields\":[{\"name\":\"value\",\"doc\":\"Field With Documentation\",\"default\":\"100\",\"type\":\"long\",\"aliases\":[\"oldName\"]}]}", + "Longs", null, null, null, + new[] { "value" }, new[] { Schema.Type.Long }, new[] { "100" }, new[] { "oldName" }, new string[] { "Field With Documentation" })] + [TestCase("{\"type\":\"record\",\"name\":\"Longs\",\"namespace\":\"space\",\"fields\":[{\"name\":\"value\",\"default\":\"100\",\"type\":\"long\",\"aliases\":[\"oldName\"]}]}", + "Longs", "space", null, null, + new[] { "value" }, new[] { Schema.Type.Long }, new[] { "100" }, new[] { "oldName" }, new string[] { null })] + [TestCase("{\"type\":\"record\",\"name\":\"Longs\",\"doc\":\"Record with alias\",\"namespace\":\"space\",\"aliases\":[\"space.RecordAlias\"],\"fields\":[{\"name\":\"value\",\"default\":\"100\",\"type\":\"long\",\"aliases\":[\"oldName\"]}]}", + "Longs", "space", "RecordAlias", "Record with alias", + new[] { "value" }, new[] { Schema.Type.Long }, new[] { "100" }, new[] { "oldName" }, new string[] { null })] + [TestCase("{\"type\":\"record\",\"name\":\"Longs\",\"doc\":\"Record with two fields\",\"namespace\":\"space\",\"aliases\":[\"space.RecordAlias\"],\"fields\":[{\"name\":\"value\",\"doc\":\"first field\",\"default\":\"100\",\"type\":\"long\",\"aliases\":[\"oldName\"]},{\"name\":\"field2\",\"default\":\"true\",\"type\":\"boolean\"}]}", + "Longs", "space", "RecordAlias", "Record with two fields", + new[] { "value", "field2" }, new[] { Schema.Type.Long, Schema.Type.Boolean }, new[] { "100", "true" }, new[] { "oldName", null }, new string[] { "first field", null })] + public void TestRecordCreation(string expectedSchema, string name, string space, string alias, string documentation, string[] fieldsNames, Schema.Type[] fieldsTypes, object[] fieldsDefaultValues, string[] fieldsAliases, string[] fieldsDocs) + { + IEnumerable recordFields = fieldsNames.Select((fieldName, i) => new Field(PrimitiveSchema.Create(fieldsTypes[i]), + fieldName, + fieldsAliases[i] == null? null: new List { fieldsAliases[i] }, + i, + fieldsDocs[i], + fieldsDefaultValues[i].ToString(), + Field.SortOrder.ignore, + null)); + + string[] aliases = alias == null ? null : new[] { alias }; + + RecordSchema recordSchema = RecordSchema.Create(name, recordFields.ToList(), space, aliases, null, documentation); + + for(int i = 0; i < fieldsNames.Length; i++) + { + var fieldByName = recordSchema[fieldsNames[i]]; + if (fieldsAliases[i] != null) + { + recordSchema.TryGetFieldAlias(fieldsAliases[i], out Field fieldByAlias); + + Assert.AreSame(fieldByAlias, fieldByName); + } + Assert.AreEqual(expectedSchema, recordSchema.ToString()); + Assert.AreEqual(fieldsNames[i], fieldByName.Name); + Assert.AreEqual(i, fieldByName.Pos); + Assert.AreEqual(fieldsTypes[i], fieldByName.Schema.Tag); + Assert.AreEqual(fieldsDocs[i], fieldByName.Documentation); + Assert.AreEqual(fieldsDefaultValues[i], fieldByName.DefaultValue.ToString()); + CollectionAssert.AreEqual(fieldsAliases[i] == null? null: new[] {fieldsAliases[i]}, fieldByName.Aliases); + } + } + + [TestCase] + public void TestRecordCreationWithDuplicateFields() + { + var recordField = new Field(PrimitiveSchema.Create(Schema.Type.Long), + "value", + new List { "oldName" }, + 0, + null, + "100", + Field.SortOrder.ignore, + null); + + Assert.Throws(() => RecordSchema.Create("Longs", + new List + { + recordField, + recordField + })); + } + + [TestCase] + public void TestRecordFieldNames() { + var fields = new List + { + new Field(PrimitiveSchema.Create(Schema.Type.Long), + "æ­ŗäģĨ上", + null, + 0, + null, + null, + Field.SortOrder.ignore, + null) + }; + var recordSchema = RecordSchema.Create("LongList", fields, null, new[] { "LinkedLongs" }); + + Field f = recordSchema.Fields[0]; + Assert.AreEqual("æ­ŗäģĨ上", f.Name); + } + + [TestCase] + public void TestRecordCreationWithRecursiveRecord() + { + string schema = "{\"type\":\"record\",\"name\":\"LongList\",\"aliases\":[\"LinkedLongs\"],\"fields\":[{\"name\":\"value\",\"type\":\"long\"},{\"name\":\"next\",\"type\":[\"null\",\"LongList\"]}]}"; + + var recordSchema = RecordSchema.Create("LongList", new List(), null, new[] { "LinkedLongs" }); + + recordSchema.Fields = new List + { + new Field(PrimitiveSchema.Create(Schema.Type.Long), + "value", + null, + 0, + null, + null, + Field.SortOrder.ignore, + null), + new Field(UnionSchema.Create( + new List + { + PrimitiveSchema.Create(Schema.Type.Null), recordSchema + }), + "next", + 1) + }; + + Assert.AreEqual(schema, recordSchema.ToString()); + } + + [TestCase("{\"type\":\"enum\",\"name\":\"Test\",\"symbols\":[\"A\",\"B\"]}", + new string[] { "A", "B" })] + + [TestCase("{\"type\":\"enum\",\"name\":\"Test\",\"symbols\":[\"A\",\"B\"]}", new string[] { "A", "B" })] - public void TestEnum(string s, string[] symbols) + [TestCase("{\"type\":\"enum\",\"name\":\"Test\",\"doc\":\"Some explanation\",\"namespace\":\"mynamespace\",\"aliases\":[\"mynamespace.Alias\"],\"symbols\":[\"UNKNOWN\",\"A\",\"B\"],\"default\":\"UNKNOWN\",\"propertyKey\":\"propertyValue\"}", + new string[] { "UNKNOWN", "A", "B" }, "mynamespace", new string[] { "Alias" }, "Some explanation", true, "UNKNOWN")] + [TestCase("{\"type\":\"enum\",\"name\":\"Test\",\"doc\":\"Some explanation\",\"namespace\":\"space\",\"aliases\":[\"internalNamespace.Alias\"],\"symbols\":[\"UNKNOWN\",\"A\",\"B\"]}", + new string[] { "UNKNOWN", "A", "B" }, "space", new string[] { "internalNamespace.Alias" }, "Some explanation")] + [TestCase("{\"type\":\"enum\",\"name\":\"Test\",\"doc\":\"Some explanation\",\"namespace\":\"space\",\"aliases\":[\"internalNamespace.Alias\"],\"symbols\":[]}", + new string[] { }, "space", new string[] { "internalNamespace.Alias" }, "Some explanation")] + + public void TestEnum(string s, string[] symbols, string space = null, IEnumerable aliases = null, string doc = null, bool? usePropertyMap = null, string defaultSymbol = null) { Schema sc = Schema.Parse(s); + + PropertyMap propertyMap = new PropertyMap(); + propertyMap.Add("propertyKey", "\"propertyValue\""); + Schema schema = EnumSchema.Create("Test", + symbols, + space, + aliases, + usePropertyMap == true ? propertyMap : null, + doc, + defaultSymbol); + + Assert.AreEqual(sc, schema); + Assert.AreEqual(s, schema.ToString()); + Assert.AreEqual(Schema.Type.Enumeration, sc.Tag); EnumSchema es = sc as EnumSchema; Assert.AreEqual(symbols.Length, es.Count); int i = 0; - foreach (String str in es) + foreach (string str in es) { Assert.AreEqual(symbols[i++], str); } testEquality(s, sc); - testToString(sc); + testToString(sc, s); } [TestCase("{\"type\": \"enum\", \"name\": \"Test\", \"symbols\": [\"A\", \"B\"]}", null)] @@ -272,18 +475,66 @@ public void TestEnumDefaultSymbolDoesntExist(string s) Assert.Throws(() => Schema.Parse(s)); } + [TestCase("name", new string[] { "A", "B" }, "s", new[] { "L1", "L2" }, "regular enum", null, "name", "s")] + [TestCase("s.name", new string[] { "A", "B" }, null, new[] { "L1", "L2" }, "internal namespace", null, "name", "s")] + [TestCase("name", new string[] { "A", "B" }, null, new[] { "L1", "L2" }, "no namespace", null, "name", null)] + [TestCase("name", new string[] { "A", "B" }, null, new[] { "L1", "L2" }, "with default value", "A", "name", null)] + [TestCase("name", new string[] { "A1B2", "B4324" }, null, new[] { "L1", "L2" }, "with longer symbols", "B4324", "name", null)] + [TestCase("name", new string[] { "_A1B2_", "B4324" }, null, new[] { "L1", "L2" }, "underscore in symbols", "_A1B2_", "name", null)] + public void TestEnumCreation(string name, string[] symbols, string space, string[] aliases, string doc, string defaultSymbol, string expectedName, string expectedNamespace) + { + EnumSchema enumSchema = EnumSchema.Create(name, symbols, space, aliases, null, doc, defaultSymbol); + + Assert.AreEqual(expectedName, enumSchema.Name); + CollectionAssert.AreEqual(symbols, enumSchema.Symbols); + Assert.AreEqual(expectedNamespace, enumSchema.Namespace); + Assert.AreEqual(Schema.Type.Enumeration, enumSchema.Tag); + Assert.AreEqual(doc, enumSchema.Documentation); + Assert.AreEqual(defaultSymbol, enumSchema.Default); + } + + [TestCase(new[] {"A", "B"}, "C")] + [TestCase(new[] {null, "B"}, null)] + [TestCase(new[] {"", "B" }, null)] + [TestCase(new[] {"8", "B" }, null)] + [TestCase(new[] {"8", "B" }, null)] + [TestCase(new[] {"A", "A" }, null)] + [TestCase(new[] {" ", "A" }, null)] + [TestCase(new[] {"9A23", "A" }, null)] + public void TestEnumInvalidSymbols(string[] symbols, string defaultSymbol) + { + Assert.Throws(() => EnumSchema.Create("name", symbols, defaultSymbol: defaultSymbol)); + } + [TestCase("{\"type\": \"array\", \"items\": \"long\"}", "long")] public void TestArray(string s, string item) { Schema sc = Schema.Parse(s); Assert.AreEqual(Schema.Type.Array, sc.Tag); - ArraySchema ars = sc as ArraySchema; + ArraySchema ars = (ArraySchema)sc; Assert.AreEqual(item, ars.ItemSchema.Name); testEquality(s, sc); testToString(sc); } + [TestCase] + public void TestArrayCreation() + { + PrimitiveSchema itemsSchema = PrimitiveSchema.Create(Schema.Type.String); + ArraySchema arraySchema = ArraySchema.Create(itemsSchema); + + Assert.AreEqual("array", arraySchema.Name); + Assert.AreEqual(Schema.Type.Array, arraySchema.Tag); + Assert.AreEqual(itemsSchema, arraySchema.ItemSchema); + } + + [TestCase] + public void TestInvalidArrayCreation() + { + Assert.Throws(() => ArraySchema.Create(null)); + } + [TestCase("{\"type\": \"int\", \"logicalType\": \"date\"}", "int", "date")] public void TestLogicalPrimitive(string s, string baseType, string logicalType) { @@ -310,33 +561,82 @@ public void TestMap(string s, string value) { Schema sc = Schema.Parse(s); Assert.AreEqual(Schema.Type.Map, sc.Tag); - MapSchema ms = sc as MapSchema; + MapSchema ms = (MapSchema)sc; Assert.AreEqual(value, ms.ValueSchema.Name); testEquality(s, sc); testToString(sc); } - [TestCase("[\"string\", \"null\", \"long\"]", new string[] { "string", "null", "long" })] - public void TestUnion(string s, string[] types) + [TestCase] + public void TestMapCreation() + { + PrimitiveSchema mapType = PrimitiveSchema.Create(Schema.Type.Float); + MapSchema mapSchema = MapSchema.CreateMap(mapType); + + Assert.AreEqual("map", mapSchema.Fullname); + Assert.AreEqual("map", mapSchema.Name); + Assert.AreEqual(Schema.Type.Map, mapSchema.Tag); + Assert.AreEqual(mapType, mapSchema.ValueSchema); + } + + [TestCase] + public void TestInvalidMapCreation() + { + Assert.Throws(() => MapSchema.CreateMap(null)); + } + + [TestCase("[\"string\", \"null\", \"long\"]", + new Schema.Type[] { Schema.Type.String, Schema.Type.Null, Schema.Type.Long })] + public void TestUnion(string s, Schema.Type[] types) { Schema sc = Schema.Parse(s); + + UnionSchema schema = UnionSchema.Create(types.Select(t => (Schema)PrimitiveSchema.Create(t)).ToList()); + Assert.AreEqual(sc, schema); + Assert.AreEqual(Schema.Type.Union, sc.Tag); - UnionSchema us = sc as UnionSchema; + UnionSchema us = (UnionSchema)sc; Assert.AreEqual(types.Length, us.Count); for (int i = 0; i < us.Count; i++) { - Assert.AreEqual(types[i], us[i].Name); + Assert.AreEqual(types[i].ToString().ToLower(), us[i].Name); } testEquality(s, sc); testToString(sc); } - [TestCase("{ \"type\": \"fixed\", \"name\": \"Test\", \"size\": 1}", 1)] + [TestCase] + public void TestUnionCreation() + { + UnionSchema unionSchema = UnionSchema.Create(new List { PrimitiveSchema.Create(Schema.Type.Null), PrimitiveSchema.Create(Schema.Type.String) }); + + CollectionAssert.AreEqual(new List { PrimitiveSchema.Create(Schema.Type.Null), PrimitiveSchema.Create(Schema.Type.String) }, + unionSchema.Schemas); + } + + [TestCase] + public void TestUnionCreationWithDuplicateSchemas() + { + Assert.Throws(() => UnionSchema.Create(new List { PrimitiveSchema.Create(Schema.Type.String), PrimitiveSchema.Create(Schema.Type.String) })); + } + + [TestCase] + public void TestUnionNestedUnionCreation() + { + Assert.Throws(() => UnionSchema.Create(new List { UnionSchema.Create(new List()), PrimitiveSchema.Create(Schema.Type.String) })); + } + + [TestCase("{\"type\":\"fixed\",\"name\":\"Test\",\"size\":1}", 1)] public void TestFixed(string s, int size) { Schema sc = Schema.Parse(s); + FixedSchema schema = FixedSchema.Create("Test", 1); + + Assert.AreEqual(sc, schema); + Assert.AreEqual(s, schema.ToString()); + Assert.AreEqual(Schema.Type.Fixed, sc.Tag); FixedSchema fs = sc as FixedSchema; Assert.AreEqual(size, fs.Size); @@ -354,6 +654,19 @@ public void TestFixedDoc(string s, string expectedDoc) Assert.AreEqual(expectedDoc, fs.Documentation); } + [TestCase] + public void TestFixedCreation() + { + string s = @"{""type"":""fixed"",""name"":""fixedName"",""namespace"":""space"",""aliases"":[""space.fixedOldName""],""size"":10}"; + + FixedSchema fixedSchema = FixedSchema.Create("fixedName", 10, "space", new[] { "fixedOldName" }, null); + + Assert.AreEqual("fixedName", fixedSchema.Name); + Assert.AreEqual("space.fixedName", fixedSchema.Fullname); + Assert.AreEqual(10, fixedSchema.Size); + Assert.AreEqual(s, fixedSchema.ToString()); + } + [TestCase("a", "o.a.h", ExpectedResult = "o.a.h.a")] public string testFullname(string s1, string s2) { @@ -376,5 +689,43 @@ public void TestUnionSchemaWithoutTypeProperty(string schemaJson, string expecte var schema = Schema.Parse(schemaJson); Assert.AreEqual(schema.ToString(), expectedSchemaJson); } + + [TestFixture] + public class SchemaTypeExtensionsTests + { + [TestCase("null", Schema.Type.Null)] + [TestCase("boolean", Schema.Type.Boolean)] + [TestCase("int", Schema.Type.Int)] + [TestCase("long", Schema.Type.Long)] + [TestCase("float", Schema.Type.Float)] + [TestCase("double", Schema.Type.Double)] + [TestCase("bytes", Schema.Type.Bytes)] + [TestCase("string", Schema.Type.String)] + [TestCase("record", Schema.Type.Record)] + [TestCase("enumeration", Schema.Type.Enumeration)] + [TestCase("array", Schema.Type.Array)] + [TestCase("map", Schema.Type.Map)] + [TestCase("union", Schema.Type.Union)] + [TestCase("fixed", Schema.Type.Fixed)] + [TestCase("error", Schema.Type.Error)] + [TestCase("logical", Schema.Type.Logical)] + [TestCase("Logical", null)] + [TestCase("InvalidValue", null)] + [TestCase("\"null\"", null)] + [TestCase("", null)] + [TestCase(null, null)] + public void ParseTypeTest(string value, object expectedResult) + { + Assert.AreEqual(Schema.ParseType(value), expectedResult); + } + + [TestCase("\"null\"", Schema.Type.Null)] + [TestCase("\"nu\"ll\"", null)] + [TestCase("\"\"", null)] + public void ParseTypeRemoveQuotesTest(string value, object expectedResult) + { + Assert.AreEqual(Schema.ParseType(value, true), expectedResult); + } + } } } diff --git a/lang/csharp/src/apache/test/Specific/DoubleLongUnionRecord.cs b/lang/csharp/src/apache/test/Specific/DoubleLongUnionRecord.cs new file mode 100644 index 00000000000..97b94be7eed --- /dev/null +++ b/lang/csharp/src/apache/test/Specific/DoubleLongUnionRecord.cs @@ -0,0 +1,73 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +// ------------------------------------------------------------------------------ +// +// Generated by avrogen, version 1.11.0.0 +// Changes to this file may cause incorrect behavior and will be lost if code +// is regenerated +// +// ------------------------------------------------------------------------------ +namespace Avro.Test.Specific +{ + using System; + using System.Collections.Generic; + using System.Text; + using Avro; + using Avro.Specific; + + public partial class DoubleLongUnionRecord : ISpecificRecord + { + public static Schema _SCHEMA = Avro.Schema.Parse("{\"type\":\"record\",\"name\":\"DoubleLongUnionRecord\",\"namespace\":\"Avro.Test.Specific\",\"fields\":[{\"name" + + "\":\"Property\",\"type\":[\"double\",\"long\"]}]}"); + private object _Property; + public virtual Schema Schema + { + get + { + return DoubleLongUnionRecord._SCHEMA; + } + } + public object Property + { + get + { + return this._Property; + } + set + { + this._Property = value; + } + } + public virtual object Get(int fieldPos) + { + switch (fieldPos) + { + case 0: return this.Property; + default: throw new AvroRuntimeException("Bad index " + fieldPos + " in Get()"); + }; + } + public virtual void Put(int fieldPos, object fieldValue) + { + switch (fieldPos) + { + case 0: this.Property = (System.Object)fieldValue; break; + default: throw new AvroRuntimeException("Bad index " + fieldPos + " in Put()"); + }; + } + } +} diff --git a/lang/csharp/src/apache/test/Specific/RecordWithOptionalLogicalType.cs b/lang/csharp/src/apache/test/Specific/RecordWithOptionalLogicalType.cs new file mode 100644 index 00000000000..585032e9bce --- /dev/null +++ b/lang/csharp/src/apache/test/Specific/RecordWithOptionalLogicalType.cs @@ -0,0 +1,74 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +// ------------------------------------------------------------------------------ +// +// Generated by avrogen, version 1.11.0.0 +// Changes to this file may cause incorrect behavior and will be lost if code +// is regenerated +// +// ------------------------------------------------------------------------------ +namespace Avro.Test.Specific.@return +{ + using System; + using System.Collections.Generic; + using System.Text; + using Avro; + using Avro.Specific; + + public partial class RecordWithOptionalLogicalType : ISpecificRecord + { + public static Schema _SCHEMA = Avro.Schema.Parse("{\"type\":\"record\",\"name\":\"RecordWithOptionalLogicalType\",\"namespace\":\"Avro.Test.Sp" + + "ecific.return\",\"fields\":[{\"name\":\"x\",\"default\":10,\"type\":{\"type\":\"int\",\"logicalT" + + "ype\":\"date\"}}]}"); + private System.DateTime _x; + public virtual Schema Schema + { + get + { + return RecordWithOptionalLogicalType._SCHEMA; + } + } + public System.DateTime x + { + get + { + return this._x; + } + set + { + this._x = value; + } + } + public virtual object Get(int fieldPos) + { + switch (fieldPos) + { + case 0: return this.x; + default: throw new AvroRuntimeException("Bad index " + fieldPos + " in Get()"); + }; + } + public virtual void Put(int fieldPos, object fieldValue) + { + switch (fieldPos) + { + case 0: this.x = (System.DateTime)fieldValue; break; + default: throw new AvroRuntimeException("Bad index " + fieldPos + " in Put()"); + }; + } + } +} diff --git a/lang/csharp/src/apache/test/Specific/SpecificTests.cs b/lang/csharp/src/apache/test/Specific/SpecificTests.cs index 168c4d0bba5..1aa3c3a03ae 100644 --- a/lang/csharp/src/apache/test/Specific/SpecificTests.cs +++ b/lang/csharp/src/apache/test/Specific/SpecificTests.cs @@ -16,6 +16,7 @@ * limitations under the License. */ +using System; using System.Collections; using System.IO; using NUnit.Framework; @@ -23,8 +24,16 @@ using Avro.Specific; using Avro.Test.Specific; using System.Collections.Generic; +using Avro.Generic; +using Avro.Test.Generic; using Avro.Test.Specific.@return; +#if !NETCOREAPP +using System.CodeDom; +using System.CodeDom.Compiler; +using System.Reflection; +#endif + namespace Avro.Test { [TestFixture] @@ -263,6 +272,38 @@ public void TestEnumDefault() Assert.AreEqual(EnumType.DEFAULT, rec2.enumType); } + [TestCase(0L)] + [TestCase(100L)] + [TestCase(-100L)] + [TestCase(0.0)] + [TestCase(100.0)] + [TestCase(-100.0)] + public void TestDoubleLongUnion(object value) + { + var testRecord = new DoubleLongUnionRecord(); + testRecord.Property = value; + + // serialize + var stream = serialize(DoubleLongUnionRecord._SCHEMA, testRecord); + + // deserialize + var rec2 = deserialize(stream, DoubleLongUnionRecord._SCHEMA, DoubleLongUnionRecord._SCHEMA); + Assert.AreEqual(value, rec2.Property); + Assert.AreEqual(value.GetType(), rec2.Property.GetType()); + } + + [TestCase(0)] + [TestCase(100)] + [TestCase(-100)] + [TestCase(0.0f)] + [TestCase(100.0f)] + [TestCase(-100.0f)] + [TestCase("0")] + [TestCase("100")] + public void TestDoubleLongUnionNoMatchException(object value) + { + Assert.Throws(() => serialize(DoubleLongUnionRecord._SCHEMA, new DoubleLongUnionRecord() { Property = value })); + } [Test] public void TestArrayWithReservedWords() @@ -433,6 +474,43 @@ public void TestEmbeddedGenerics() Assert.AreEqual(0, dstRecord.UserMatrix[2].Count); } + private static void serializeGeneric(string writerSchema, T actual, out Stream stream, out Schema ws) + { + var ms = new MemoryStream(); + Encoder e = new BinaryEncoder(ms); + ws = Schema.Parse(writerSchema); + GenericWriter w = new GenericWriter(ws); + w.Write(actual, e); + ms.Flush(); + ms.Position = 0; + stream = ms; + } + + [Test] + public void DeserializeToLogicalTypeWithDefault() + { + var writerSchemaString = @"{ + ""type"": ""record"", + ""name"": ""RecordWithOptionalLogicalType"", + ""namespace"": ""Avro.Test.Specific.return"", + ""fields"": [ + ]}"; + + var writerSchema = Schema.Parse(writerSchemaString); + + Stream stream; + + serializeGeneric(writerSchemaString, + GenericTests.MkRecord(new object[] { }, (RecordSchema)writerSchema), + out stream, + out _); + + RecordWithOptionalLogicalType output = deserialize(stream, writerSchema, RecordWithOptionalLogicalType._SCHEMA); + + Assert.AreEqual(output.x, new DateTime(1970, 1, 11)); + + } + private static S deserialize(Stream ms, Schema ws, Schema rs) where S : class, ISpecificRecord { long initialPos = ms.Position; @@ -526,7 +604,7 @@ private static void AssertSpecificRecordEqual(ISpecificRecord rec1, ISpecificRec } /// - /// Asserts that two lists are equal, delegating the work of comapring + /// Asserts that two lists are equal, delegating the work of comparing /// entries to /// . /// diff --git a/lang/csharp/src/apache/test/Util/LogicalTypeTests.cs b/lang/csharp/src/apache/test/Util/LogicalTypeTests.cs index e4d6b052af9..0129b2a5b45 100644 --- a/lang/csharp/src/apache/test/Util/LogicalTypeTests.cs +++ b/lang/csharp/src/apache/test/Util/LogicalTypeTests.cs @@ -18,6 +18,7 @@ using System; using System.Globalization; +using System.Numerics; using Avro.Util; using NUnit.Framework; @@ -26,28 +27,95 @@ namespace Avro.Test [TestFixture] class LogicalTypeTests { - [TestCase("1234.56")] - [TestCase("-1234.56")] - [TestCase("123456789123456789.56")] - [TestCase("-123456789123456789.56")] - [TestCase("000000000000000001.01")] - [TestCase("-000000000000000001.01")] - public void TestDecimal(string s) + [TestCase("0", 0, new byte[] { 0 })] + [TestCase("1.01", 2, new byte[] { 101 })] + [TestCase("123456789123456789.56", 2, new byte[] { 0, 171, 84, 169, 143, 129, 101, 36, 108 })] + [TestCase("1234", 0, new byte[] { 4, 210 })] + [TestCase("1234.5", 1, new byte[] { 48, 57 })] + [TestCase("1234.56", 2, new byte[] { 1, 226, 64 })] + [TestCase("-0", 0, new byte[] { 0 })] + [TestCase("-1.01", 2, new byte[] { 155 })] + [TestCase("-123456789123456789.56", 2, new byte[] { 255, 84, 171, 86, 112, 126, 154, 219, 148 })] + [TestCase("-1234", 0, new byte[] { 251, 46 })] + [TestCase("-1234.5", 1, new byte[] { 207, 199 })] + [TestCase("-1234.56", 2, new byte[] { 254, 29, 192 })] + // This tests ensures that changes to Decimal.ConvertToBaseValue and ConvertToLogicalValue can be validated (bytes) + public void TestDecimalConvert(string s, int scale, byte[] converted) { - var schema = (LogicalSchema)Schema.Parse("{\"type\": \"bytes\", \"logicalType\": \"decimal\", \"precision\": 4, \"scale\": 2 }"); + var schema = (LogicalSchema)Schema.Parse(@$"{{""type"": ""bytes"", ""logicalType"": ""decimal"", ""precision"": 4, ""scale"": {scale}}}"); var avroDecimal = new Avro.Util.Decimal(); - var decimalVal = (AvroDecimal)decimal.Parse(s); + // CultureInfo.InvariantCulture ensures that "." is always accepted as the decimal point + var decimalVal = (AvroDecimal)decimal.Parse(s, CultureInfo.InvariantCulture); + + // TestDecimal tests ConvertToLogicalValue(ConvertToBaseValue(...)) which might hide symmetrical breaking changes in both functions + // The following 2 tests are checking the conversions separately + + // Validate Decimal.ConvertToBaseValue + Assert.AreEqual(converted, avroDecimal.ConvertToBaseValue(decimalVal, schema)); + + // Validate Decimal.ConvertToLogicalValue + Assert.AreEqual(decimalVal, (AvroDecimal)avroDecimal.ConvertToLogicalValue(converted, schema)); + } + + [Test] + public void TestDecimal( + [Values( + "1234.56", + "-1234.56", + "123456789123456789.56", + "-123456789123456789.56", + "000000000000000001.01", + "-000000000000000001.01" + )] string s, + [Values( + "\"bytes\"", + "{\"type\": \"fixed\", \"size\": 16, \"name\": \"n\"}" + )] string baseType) + { + var schema = (LogicalSchema)Schema.Parse($"{{\"type\": {baseType}, \"logicalType\": \"decimal\", \"precision\": 4, \"scale\": 2 }}"); + + var avroDecimal = new Avro.Util.Decimal(); + // CultureInfo.InvariantCulture ensures that "." is always accepted as the decimal point + var decimalVal = (AvroDecimal)decimal.Parse(s, CultureInfo.InvariantCulture); var convertedDecimalVal = (AvroDecimal)avroDecimal.ConvertToLogicalValue(avroDecimal.ConvertToBaseValue(decimalVal, schema), schema); Assert.AreEqual(decimalVal, convertedDecimalVal); } - [TestCase] - public void TestDecimalMinMax() + [Test] + public void TestDecimalScale( + [Values( + "0", + "1", + "-1", + "1234567891234567890123456789", + "-1234567891234567890123456789", + "0000000000000000000000000001", + "-0000000000000000000000000001" + )] string s, + [Values(1, 2, 3, 4, 5, 6, 7, 8)] int scale, + [Values( + "\"bytes\"", + "{\"type\": \"fixed\", \"size\": 16, \"name\": \"n\"}" + )] string baseType) { - var schema = (LogicalSchema)Schema.Parse("{\"type\": \"bytes\", \"logicalType\": \"decimal\", \"precision\": 4, \"scale\": 0 }"); + var schema = (LogicalSchema)Schema.Parse($"{{\"type\": {baseType}, \"logicalType\": \"decimal\", \"precision\": 8, \"scale\": {scale} }}"); + + var avroDecimal = new Avro.Util.Decimal(); + var decimalVal = new AvroDecimal(BigInteger.Parse(s), scale); + + var convertedDecimalVal = (AvroDecimal)avroDecimal.ConvertToLogicalValue(avroDecimal.ConvertToBaseValue(decimalVal, schema), schema); + + Assert.AreEqual(decimalVal, convertedDecimalVal); + } + + [TestCase("\"bytes\"")] + [TestCase("{\"type\": \"fixed\", \"size\": 16, \"name\": \"n\"}")] + public void TestDecimalMinMax(string baseType) + { + var schema = (LogicalSchema)Schema.Parse($"{{\"type\": {baseType}, \"logicalType\": \"decimal\", \"precision\": 4, \"scale\": 0 }}"); var avroDecimal = new Avro.Util.Decimal(); @@ -59,10 +127,11 @@ public void TestDecimalMinMax() } } - [TestCase] - public void TestDecimalOutOfRangeException() + [TestCase("\"bytes\"")] + [TestCase("{\"type\": \"fixed\", \"size\": 16, \"name\": \"n\"}")] + public void TestDecimalOutOfRangeException(string baseType) { - var schema = (LogicalSchema)Schema.Parse("{\"type\": \"bytes\", \"logicalType\": \"decimal\", \"precision\": 4, \"scale\": 2 }"); + var schema = (LogicalSchema)Schema.Parse($"{{\"type\": {baseType}, \"logicalType\": \"decimal\", \"precision\": 4, \"scale\": 2 }}"); var avroDecimal = new Avro.Util.Decimal(); var decimalVal = (AvroDecimal)1234.567M; // scale of 3 should throw ArgumentOutOfRangeException @@ -75,6 +144,14 @@ public void TestDecimalOutOfRangeException() [TestCase("05/05/2019 00:00:00Z")] [TestCase("05/05/2019 01:00:00Z")] [TestCase("05/05/2019 01:00:00+01:00")] + [TestCase("05/05/2019 01:00:00.1Z")] + [TestCase("05/05/2019 01:00:00.01Z")] + [TestCase("05/05/2019 01:00:00.001Z")] + [TestCase("05/05/2019 01:00:00.0001Z")] + [TestCase("05/05/2019 01:00:00.00001Z")] + [TestCase("05/05/2019 01:00:00.000001Z")] + [TestCase("05/05/2019 01:00:00.0000001Z")] + [TestCase("05/05/2019 01:00:00.00000001Z")] public void TestDate(string s) { var schema = (LogicalSchema)Schema.Parse("{\"type\": \"int\", \"logicalType\": \"date\"}"); @@ -100,6 +177,12 @@ public void TestDate(string s) [TestCase("05/05/2019 14:20:00+01:00", "05/05/2019 13:20:00Z")] [TestCase("05/05/2019 00:00:00Z", "05/05/2019 00:00:00Z")] [TestCase("05/05/2019 00:00:00+01:00", "05/04/2019 23:00:00Z")] // adjusted to UTC + [TestCase("01/01/2019 14:20:00.1Z", "01/01/2019 14:20:00.1Z")] + [TestCase("01/01/2019 14:20:00.01Z", "01/01/2019 14:20:00.01Z")] + [TestCase("01/01/2019 14:20:00.001Z", "01/01/2019 14:20:00.001Z")] + [TestCase("01/01/2019 14:20:00.0001Z", "01/01/2019 14:20:00Z")] + [TestCase("01/01/2019 14:20:00.0009Z", "01/01/2019 14:20:00Z")] // there is no rounding up + [TestCase("01/01/2019 14:20:00.0019Z", "01/01/2019 14:20:00.001Z")] // there is no rounding up public void TestTimestampMillisecond(string s, string e) { var schema = (LogicalSchema)Schema.Parse("{\"type\": \"long\", \"logicalType\": \"timestamp-millis\"}"); @@ -116,6 +199,7 @@ public void TestTimestampMillisecond(string s, string e) var avroTimestampMilli = new TimestampMillisecond(); var convertedDate = (DateTime)avroTimestampMilli.ConvertToLogicalValue(avroTimestampMilli.ConvertToBaseValue(date, schema), schema); Assert.AreEqual(expectedDate, convertedDate); + Assert.AreEqual(DateTimeKind.Utc, convertedDate.Kind); } [TestCase("01/01/2019 14:20:00Z", "01/01/2019 14:20:00Z")] @@ -124,6 +208,15 @@ public void TestTimestampMillisecond(string s, string e) [TestCase("05/05/2019 14:20:00+01:00", "05/05/2019 13:20:00Z")] [TestCase("05/05/2019 00:00:00Z", "05/05/2019 00:00:00Z")] [TestCase("05/05/2019 00:00:00+01:00", "05/04/2019 23:00:00Z")] // adjusted to UTC + [TestCase("01/01/2019 14:20:00.1Z", "01/01/2019 14:20:00.1Z")] + [TestCase("01/01/2019 14:20:00.01Z", "01/01/2019 14:20:00.01Z")] + [TestCase("01/01/2019 14:20:00.001Z", "01/01/2019 14:20:00.001Z")] + [TestCase("01/01/2019 14:20:00.0001Z", "01/01/2019 14:20:00.0001Z")] + [TestCase("01/01/2019 14:20:00.00001Z", "01/01/2019 14:20:00.00001Z")] + [TestCase("01/01/2019 14:20:00.000001Z", "01/01/2019 14:20:00.000001Z")] + [TestCase("01/01/2019 14:20:00.0000001Z", "01/01/2019 14:20:00Z")] + [TestCase("01/01/2019 14:20:00.0000009Z", "01/01/2019 14:20:00Z")] // there is no rounding up + [TestCase("01/01/2019 14:20:00.0000019Z", "01/01/2019 14:20:00.000001Z")] // there is no rounding up public void TestTimestampMicrosecond(string s, string e) { var schema = (LogicalSchema)Schema.Parse("{\"type\": \"long\", \"logicalType\": \"timestamp-micros\"}"); @@ -140,20 +233,117 @@ public void TestTimestampMicrosecond(string s, string e) var avroTimestampMicro = new TimestampMicrosecond(); var convertedDate = (DateTime)avroTimestampMicro.ConvertToLogicalValue(avroTimestampMicro.ConvertToBaseValue(date, schema), schema); Assert.AreEqual(expectedDate, convertedDate); + Assert.AreEqual(DateTimeKind.Utc, convertedDate.Kind); + } + + [TestCase("01/01/2019 14:20:00", "01/01/2019 14:20:00")] + [TestCase("05/05/2019 14:20:00", "05/05/2019 14:20:00")] + [TestCase("05/05/2019 00:00:00", "05/05/2019 00:00:00")] + [TestCase("01/01/2019 14:20:00.1", "01/01/2019 14:20:00.1")] + [TestCase("01/01/2019 14:20:00.01", "01/01/2019 14:20:00.01")] + [TestCase("01/01/2019 14:20:00.001", "01/01/2019 14:20:00.001")] + [TestCase("01/01/2019 14:20:00.0001", "01/01/2019 14:20:00")] + [TestCase("01/01/2019 14:20:00.0009", "01/01/2019 14:20:00")] // there is no rounding up + [TestCase("01/01/2019 14:20:00.0019", "01/01/2019 14:20:00.001")] // there is no rounding up + [TestCase("01/01/2019 14:20:00Z", "01/01/2019 14:20:00Z")] // UTC timestamps, but will check will in local TZ + [TestCase("01/01/2019 14:20:00.1Z", "01/01/2019 14:20:00.1Z")] + [TestCase("01/01/2019 14:20:00.01Z", "01/01/2019 14:20:00.01Z")] + [TestCase("01/01/2019 14:20:00.001Z", "01/01/2019 14:20:00.001Z")] + public void TestLocalTimestampMillisecond(string s, string e) + { + var schema = (LogicalSchema)Schema.Parse("{\"type\": \"long\", \"logicalType\": \"local-timestamp-millis\"}"); + + var date = DateTime.Parse(s, CultureInfo.GetCultureInfo("en-US").DateTimeFormat, DateTimeStyles.RoundtripKind); + + if (date.Kind != DateTimeKind.Utc) + { + date = DateTime.Parse(s, CultureInfo.GetCultureInfo("en-US").DateTimeFormat, DateTimeStyles.AssumeLocal); + } + + var expectedDate = DateTime.Parse(e, CultureInfo.GetCultureInfo("en-US").DateTimeFormat, DateTimeStyles.RoundtripKind); + + if (expectedDate.Kind != DateTimeKind.Utc) + { + expectedDate = DateTime.Parse(e, CultureInfo.GetCultureInfo("en-US").DateTimeFormat, DateTimeStyles.AssumeLocal); + } + + expectedDate = expectedDate.ToLocalTime(); + + var avroLocalTimestampMilli = new LocalTimestampMillisecond(); + var convertedDate = (DateTime)avroLocalTimestampMilli.ConvertToLogicalValue(avroLocalTimestampMilli.ConvertToBaseValue(date, schema), schema); + Assert.AreEqual(expectedDate, convertedDate); + Assert.AreEqual(DateTimeKind.Local, convertedDate.Kind); + } + + [TestCase("01/01/2019 14:20:00", "01/01/2019 14:20:00")] + [TestCase("05/05/2019 14:20:00", "05/05/2019 14:20:00")] + [TestCase("05/05/2019 00:00:00", "05/05/2019 00:00:00")] + [TestCase("01/01/2019 14:20:00.1", "01/01/2019 14:20:00.1")] + [TestCase("01/01/2019 14:20:00.01", "01/01/2019 14:20:00.01")] + [TestCase("01/01/2019 14:20:00.001", "01/01/2019 14:20:00.001")] + [TestCase("01/01/2019 14:20:00.0001", "01/01/2019 14:20:00.0001")] + [TestCase("01/01/2019 14:20:00.00001", "01/01/2019 14:20:00.00001")] + [TestCase("01/01/2019 14:20:00.000001", "01/01/2019 14:20:00.000001")] + [TestCase("01/01/2019 14:20:00.0000001", "01/01/2019 14:20:00")] + [TestCase("01/01/2019 14:20:00.0000009", "01/01/2019 14:20:00")] // there is no rounding up + [TestCase("01/01/2019 14:20:00.0000019", "01/01/2019 14:20:00.000001")] // there is no rounding up + [TestCase("01/01/2019 14:20:00Z", "01/01/2019 14:20:00Z")] // UTC timestamps, but will check will in local TZ + [TestCase("01/01/2019 14:20:00.1Z", "01/01/2019 14:20:00.1Z")] + [TestCase("01/01/2019 14:20:00.01Z", "01/01/2019 14:20:00.01Z")] + [TestCase("01/01/2019 14:20:00.001Z", "01/01/2019 14:20:00.001Z")] + [TestCase("01/01/2019 14:20:00.0001Z", "01/01/2019 14:20:00.0001Z")] + [TestCase("01/01/2019 14:20:00.00001Z", "01/01/2019 14:20:00.00001Z")] + [TestCase("01/01/2019 14:20:00.000001Z", "01/01/2019 14:20:00.000001Z")] + public void TestLocalTimestampMicrosecond(string s, string e) + { + var schema = (LogicalSchema)Schema.Parse("{\"type\": \"long\", \"logicalType\": \"local-timestamp-micros\"}"); + + var date = DateTime.Parse(s, CultureInfo.GetCultureInfo("en-US").DateTimeFormat, DateTimeStyles.RoundtripKind); + + if (date.Kind != DateTimeKind.Utc) + { + date = DateTime.Parse(s, CultureInfo.GetCultureInfo("en-US").DateTimeFormat, DateTimeStyles.AssumeLocal); + } + + var expectedDate = DateTime.Parse(e, CultureInfo.GetCultureInfo("en-US").DateTimeFormat, DateTimeStyles.RoundtripKind); + + if (expectedDate.Kind != DateTimeKind.Utc) + { + expectedDate = DateTime.Parse(e, CultureInfo.GetCultureInfo("en-US").DateTimeFormat, DateTimeStyles.AssumeLocal); + } + + expectedDate = expectedDate.ToLocalTime(); + + var avroLocalTimestampMicro = new LocalTimestampMicrosecond(); + var convertedDate = (DateTime)avroLocalTimestampMicro.ConvertToLogicalValue(avroLocalTimestampMicro.ConvertToBaseValue(date, schema), schema); + Assert.AreEqual(expectedDate, convertedDate); + Assert.AreEqual(DateTimeKind.Local, convertedDate.Kind); } [TestCase("01:20:10", "01:20:10", false)] [TestCase("23:00:00", "23:00:00", false)] + [TestCase("23:59:00", "23:59:00", false)] + [TestCase("23:59:59", "23:59:59", false)] + [TestCase("01:20:10.1", "01:20:10.1", false)] + [TestCase("01:20:10.01", "01:20:10.01", false)] + [TestCase("01:20:10.001", "01:20:10.001", false)] + [TestCase("01:20:10.0001", "01:20:10", false)] + [TestCase("01:20:10.0009", "01:20:10", false)] // there is no rounding up + [TestCase("01:20:10.0019", "01:20:10.001", false)] // there is no rounding up + [TestCase("23:59:59.999", "23:59:59.999", false)] [TestCase("01:00:00:00", null, true)] - public void TestTime(string s, string e, bool expectRangeError) + [TestCase("-00:00:00.001", null, true)] + [TestCase("-00:00:00.000001", null, true)] + [TestCase("-00:00:00.0000001", null, true)] + [TestCase("-00:01", null, true)] + [TestCase("-999999.00:00:00", null, true)] + public void TestTimeMillisecond(string s, string e, bool expectRangeError) { var timeMilliSchema = (LogicalSchema)Schema.Parse("{\"type\": \"int\", \"logicalType\": \"time-millis\"}"); - var timeMicroSchema = (LogicalSchema)Schema.Parse("{\"type\": \"long\", \"logicalType\": \"time-micros\"}"); var time = TimeSpan.Parse(s); - + var avroTimeMilli = new TimeMillisecond(); - var avroTimeMicro = new TimeMicrosecond(); if (expectRangeError) { @@ -161,10 +351,6 @@ public void TestTime(string s, string e, bool expectRangeError) { avroTimeMilli.ConvertToLogicalValue(avroTimeMilli.ConvertToBaseValue(time, timeMilliSchema), timeMilliSchema); }); - Assert.Throws(() => - { - avroTimeMicro.ConvertToLogicalValue(avroTimeMilli.ConvertToBaseValue(time, timeMicroSchema), timeMicroSchema); - }); } else { @@ -172,8 +358,48 @@ public void TestTime(string s, string e, bool expectRangeError) var convertedTime = (TimeSpan)avroTimeMilli.ConvertToLogicalValue(avroTimeMilli.ConvertToBaseValue(time, timeMilliSchema), timeMilliSchema); Assert.AreEqual(expectedTime, convertedTime); + } + } + + [TestCase("01:20:10", "01:20:10", false)] + [TestCase("23:00:00", "23:00:00", false)] + [TestCase("23:59:00", "23:59:00", false)] + [TestCase("23:59:59", "23:59:59", false)] + [TestCase("01:20:10.1", "01:20:10.1", false)] + [TestCase("01:20:10.01", "01:20:10.01", false)] + [TestCase("01:20:10.001", "01:20:10.001", false)] + [TestCase("01:20:10.0001", "01:20:10.0001", false)] + [TestCase("01:20:10.00001", "01:20:10.00001", false)] + [TestCase("01:20:10.000001", "01:20:10.000001", false)] + [TestCase("01:20:10.0000001", "01:20:10", false)] + [TestCase("01:20:10.0000009", "01:20:10", false)] + [TestCase("23:59:59.999999", "23:59:59.999999", false)] + [TestCase("01:00:00:00", null, true)] + [TestCase("-00:00:00.001", null, true)] + [TestCase("-00:00:00.000001", null, true)] + [TestCase("-00:00:00.0000001", null, true)] + [TestCase("-00:01", null, true)] + [TestCase("-999999.00:00:00", null, true)] + public void TestTimeMicrosecond(string s, string e, bool expectRangeError) + { + var timeMicroSchema = (LogicalSchema)Schema.Parse("{\"type\": \"long\", \"logicalType\": \"time-micros\"}"); + + var time = TimeSpan.Parse(s); + + var avroTimeMicro = new TimeMicrosecond(); + + if (expectRangeError) + { + Assert.Throws(() => + { + avroTimeMicro.ConvertToLogicalValue(avroTimeMicro.ConvertToBaseValue(time, timeMicroSchema), timeMicroSchema); + }); + } + else + { + var expectedTime = TimeSpan.Parse(e); - convertedTime = (TimeSpan)avroTimeMicro.ConvertToLogicalValue(avroTimeMicro.ConvertToBaseValue(time, timeMicroSchema), timeMicroSchema); + var convertedTime = (TimeSpan)avroTimeMicro.ConvertToLogicalValue(avroTimeMicro.ConvertToBaseValue(time, timeMicroSchema), timeMicroSchema); Assert.AreEqual(expectedTime, convertedTime); } diff --git a/lang/csharp/src/apache/test/Utils/VersionTests.cs b/lang/csharp/src/apache/test/Utils/VersionTests.cs new file mode 100644 index 00000000000..20d7ed2f421 --- /dev/null +++ b/lang/csharp/src/apache/test/Utils/VersionTests.cs @@ -0,0 +1,55 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System.Reflection; +using NUnit.Framework; + +namespace Avro.Test.Utils +{ + public class VersionTests + { + // SemVer2.0 Regular Expression + public static string SemVerRegex = @"^((([0-9]+)\.([0-9]+)\.([0-9]+)(?:-([0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?)(?:\+([0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?)$"; + + [Test] + public void VersionTest() + { + // Avro library's assembly + Assembly assembly = typeof(Schema).Assembly; + + // Note: InformationalVersion contains prerelease tag if available (e.g. 1.x.y-beta.z) + string libraryVersion = assembly.GetCustomAttribute().InformationalVersion; + + // Check version is SmeVer 2.0 compliant + Assert.That(libraryVersion, Does.Match(SemVerRegex)); + } + + [Test] + public void MandatoryAttributesTest() + { + // Avro library's assembly + Assembly assembly = typeof(Schema).Assembly; + + Assert.That(assembly.GetCustomAttribute(), Is.Not.Null); + Assert.That(assembly.GetCustomAttribute(), Is.Not.Null); + Assert.That(assembly.GetCustomAttribute(), Is.Not.Null); + Assert.That(assembly.GetCustomAttribute(), Is.Not.Null); + Assert.That(assembly.GetCustomAttribute(), Is.Not.Null); + } + } +} diff --git a/lang/csharp/stylecop.json b/lang/csharp/stylecop.json deleted file mode 100644 index 892559168ac..00000000000 --- a/lang/csharp/stylecop.json +++ /dev/null @@ -1,31 +0,0 @@ -{ - "$schema": "https://raw.githubusercontent.com/DotNetAnalyzers/StyleCopAnalyzers/master/StyleCop.Analyzers/StyleCop.Analyzers/Settings/stylecop.schema.json", - "settings": { - "documentationRules": { - "companyName": "Apache Software Foundation (ASF)", - "fileNamingConvention": "stylecop", - "documentInterfaces": false, - "documentExposedElements": true, - "documentInternalElements": false, - "documentPrivateElements": false, - "documentPrivateFields": false - }, - "indentation": { - "indentationSize": 4, - "tabSize": 4, - "useTabs": false - }, - "layoutRules": { - "newlineAtEndOfFile": "require", - "allowConsecutiveUsings": true - }, - "orderingRules": { - "blankLinesBetweenUsingGroups": "allow", - "systemUsingDirectivesFirst": true, - "usingDirectivesPlacement": "outsideNamespace" - }, - "readabilityRules": { - "allowBuiltInTypeAliases": false - } - } -} diff --git a/lang/csharp/versions.props b/lang/csharp/versions.props index b6b9747fe2a..15dae9ce5a4 100644 --- a/lang/csharp/versions.props +++ b/lang/csharp/versions.props @@ -23,42 +23,54 @@ - 12.0.3 - 5.0.0 + 13.0.1 + 8.0.0 4.3.0 4.7.0 4.7.0 - 4.5.1 + + + 1.4.2 + 1.3.1 + + 4.1.0 + 1.1.7 - 10.0.3 - 4.4.0 - 4.3.0 - 4.3.0 - 4.3.0 + + 13.0.1 - - 16.8.0 - 16.8.0 - 5.0.3 - 16.11.0 - 3.13.2 - 3.12.0 - 3.17.0 + + 0.13.10 + 6.0.0 + 6.0.0 + 17.8.3 + 17.8.3 + 4.7.0 + 4.7.0 + 4.7.0 + 8.0.0 + 17.8.0 + 3.14.0 + 3.16.3 + 4.5.0 1.1.118 diff --git a/lang/java/android/pom.xml b/lang/java/android/pom.xml index e43bd5ab6e5..b1e5710a214 100644 --- a/lang/java/android/pom.xml +++ b/lang/java/android/pom.xml @@ -22,7 +22,7 @@ avro-parent org.apache.avro - 1.11.0-SNAPSHOT + 1.12.0 ../pom.xml diff --git a/lang/java/android/src/test/java/org/apache/avro/util/internal/TestClassValueCache.java b/lang/java/android/src/test/java/org/apache/avro/util/internal/TestClassValueCache.java index 09f6e4b7a1c..5c889b70b53 100644 --- a/lang/java/android/src/test/java/org/apache/avro/util/internal/TestClassValueCache.java +++ b/lang/java/android/src/test/java/org/apache/avro/util/internal/TestClassValueCache.java @@ -17,14 +17,14 @@ */ package org.apache.avro.util.internal; -import org.junit.Test; +import org.junit.jupiter.api.Test; import static org.hamcrest.CoreMatchers.*; import static org.hamcrest.MatcherAssert.assertThat; public class TestClassValueCache { @Test - public void testBasic() { + void basic() { ClassValueCache cache = new ClassValueCache<>(Class::toString); String fromCache = cache.apply(String.class); diff --git a/lang/java/archetypes/avro-service-archetype/pom.xml b/lang/java/archetypes/avro-service-archetype/pom.xml index 1b50363554b..4e180b12a53 100644 --- a/lang/java/archetypes/avro-service-archetype/pom.xml +++ b/lang/java/archetypes/avro-service-archetype/pom.xml @@ -23,7 +23,7 @@ avro-archetypes-parent org.apache.avro - 1.11.0-SNAPSHOT + 1.12.0 ../pom.xml diff --git a/lang/java/archetypes/avro-service-archetype/src/main/pom/pom.xml b/lang/java/archetypes/avro-service-archetype/src/main/pom/pom.xml index 362f5b69ce9..be2717556a5 100644 --- a/lang/java/archetypes/avro-service-archetype/src/main/pom/pom.xml +++ b/lang/java/archetypes/avro-service-archetype/src/main/pom/pom.xml @@ -33,13 +33,28 @@ Simple Avro Ordering Service + ${maven.compiler.source} + ${maven.compiler.release} + ${project.build.sourceEncoding} ${project.version} - ${jackson.version} - ${junit.version} + ${jackson-bom.version} + ${junit5.version} 1.2.3 ${slf4j.version} + + + + com.fasterxml.jackson + jackson-bom + \${jackson-bom.version} + pom + import + + + + org.apache.avro @@ -56,16 +71,6 @@ avro-ipc-netty \${avro.version} - - com.fasterxml.jackson.core - jackson-core - \${jackson.version} - - - com.fasterxml.jackson.core - jackson-databind - \${jackson.version} - org.slf4j slf4j-api @@ -77,9 +82,9 @@ \${logback.version} - junit - junit - \${junit.version} + org.junit.jupiter + junit-jupiter + \${junit5.version} test @@ -135,6 +140,10 @@ org.apache.maven.plugins maven-compiler-plugin + + \${maven.compiler.source} + \${maven.compiler.release} + diff --git a/lang/java/archetypes/avro-service-archetype/src/main/resources/archetype-resources/src/test/java/integration/SimpleOrderServiceIntegrationTest.java b/lang/java/archetypes/avro-service-archetype/src/main/resources/archetype-resources/src/test/java/integration/SimpleOrderServiceIntegrationTest.java index e5d6ad7e509..0796826e0a9 100644 --- a/lang/java/archetypes/avro-service-archetype/src/main/resources/archetype-resources/src/test/java/integration/SimpleOrderServiceIntegrationTest.java +++ b/lang/java/archetypes/avro-service-archetype/src/main/resources/archetype-resources/src/test/java/integration/SimpleOrderServiceIntegrationTest.java @@ -21,8 +21,8 @@ package ${package}.integration; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.net.InetSocketAddress; import java.util.ArrayList; @@ -30,9 +30,9 @@ import ${package}.transport.SimpleOrderServiceEndpoint; import ${package}.transport.SimpleOrderServiceClient; -import org.junit.AfterClass; -import org.junit.BeforeClass; -import org.junit.Test; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; import ${package}.service.Confirmation; import ${package}.service.Item; @@ -42,13 +42,13 @@ * SimpleOrderServiceIntegrationTest runs as part of the Integration phase of the build and is * meant for end to end service testing. */ -public class SimpleOrderServiceIntegrationTest { +class SimpleOrderServiceIntegrationTest { private static SimpleOrderServiceEndpoint service; private static SimpleOrderServiceClient client; @Test - public void simpleRoundTripTest() throws Exception { + void simpleRoundTripTest() throws Exception { Order simpleOrder = createOrder(); Confirmation c = client.submitOrder(simpleOrder); @@ -57,7 +57,7 @@ public void simpleRoundTripTest() throws Exception { assertTrue(c.getEstimatedCompletion() > 0); } - @BeforeClass + @BeforeAll public static void setupTransport() throws Exception { InetSocketAddress endpointAddress = new InetSocketAddress("0.0.0.0", 12345); service = new SimpleOrderServiceEndpoint(endpointAddress); @@ -67,7 +67,7 @@ public static void setupTransport() throws Exception { client.start(); } - @AfterClass + @AfterAll public static void shutdownTransport() throws Exception { client.stop(); service.stop(); diff --git a/lang/java/archetypes/avro-service-archetype/src/test/integration/projects/basic/archetype.properties b/lang/java/archetypes/avro-service-archetype/src/test/resources/projects/basic/archetype.properties similarity index 100% rename from lang/java/archetypes/avro-service-archetype/src/test/integration/projects/basic/archetype.properties rename to lang/java/archetypes/avro-service-archetype/src/test/resources/projects/basic/archetype.properties diff --git a/lang/java/archetypes/avro-service-archetype/src/test/integration/projects/basic/goal.txt b/lang/java/archetypes/avro-service-archetype/src/test/resources/projects/basic/goal.txt similarity index 100% rename from lang/java/archetypes/avro-service-archetype/src/test/integration/projects/basic/goal.txt rename to lang/java/archetypes/avro-service-archetype/src/test/resources/projects/basic/goal.txt diff --git a/lang/java/archetypes/pom.xml b/lang/java/archetypes/pom.xml index 27837400503..fb8294fedb1 100644 --- a/lang/java/archetypes/pom.xml +++ b/lang/java/archetypes/pom.xml @@ -22,7 +22,7 @@ org.apache.avro avro-parent - 1.11.0-SNAPSHOT + 1.12.0 ../pom.xml diff --git a/lang/java/avro/pom.xml b/lang/java/avro/pom.xml index bfa8154e11f..f855426f549 100644 --- a/lang/java/avro/pom.xml +++ b/lang/java/avro/pom.xml @@ -23,8 +23,8 @@ avro-parent org.apache.avro - 1.11.0-SNAPSHOT - ../ + 1.12.0 + ../pom.xml avro @@ -54,7 +54,24 @@ org/apache/avro/data/Json.avsc + + src/main/resources + + + + src/test/resources + + + ../../../share/ + + schemas/** + test/** + + share/ + + + org.apache.maven.plugins @@ -67,6 +84,7 @@ + org.apache.maven.plugins maven-surefire-plugin @@ -100,122 +118,94 @@ + + + + org.apache.maven.plugins + maven-invoker-plugin + + true + + true + ./src/it + ${project.build.directory}/it + + pom.xml + + verify + ${project.build.directory}/local-repo + ./src/it/settings.xml + + + true + + + clean + test + + + + + + Populate the local repo for integration tests + + install + + + + + Run all tests under Java 11 + + run + + + + 11 + + + 11 + + ${project.build.directory}/it-jdk-11 + + + + + Run all tests under Java 17 + + run + + + + 17 + + + 17 + + ${project.build.directory}/it-jdk-17 + + + + + Run all tests under Java 21 + + run + + + + 21 + + + 21 + + ${project.build.directory}/it-jdk-21 + + + + + - - - interop-data-generate - - false - - - - - org.codehaus.mojo - exec-maven-plugin - ${exec-plugin.version} - - - - interop-generate-null-codec - generate-resources - - org.apache.avro.util.RandomData - - ../../../share/test/schemas/interop.avsc - ../../../build/interop/data/java.avro - 100 - - - java - - - - - interop-generate-deflate-codec - generate-resources - - org.apache.avro.util.RandomData - - ../../../share/test/schemas/interop.avsc - ../../../build/interop/data/java_deflate.avro - 100 - deflate - - - java - - - - - interop-generate-snappy-codec - generate-resources - - org.apache.avro.util.RandomData - - ../../../share/test/schemas/interop.avsc - ../../../build/interop/data/java_snappy.avro - 100 - snappy - - - java - - - - - interop-generate-bzip2-codec - generate-resources - - org.apache.avro.util.RandomData - - ../../../share/test/schemas/interop.avsc - ../../../build/interop/data/java_bzip2.avro - 100 - bzip2 - - - java - - - - - interop-generate-xz-codec - generate-resources - - org.apache.avro.util.RandomData - - ../../../share/test/schemas/interop.avsc - ../../../build/interop/data/java_xz.avro - 100 - xz - - - java - - - - - interop-generate-zstandard-codec - generate-resources - - org.apache.avro.util.RandomData - - ../../../share/test/schemas/interop.avsc - ../../../build/interop/data/java_zstandard.avro - 100 - zstandard - - - java - - - - - - - @@ -250,5 +240,10 @@ hamcrest-library test + + org.mockito + mockito-core + test + diff --git a/lang/java/avro/src/it/pom.xml b/lang/java/avro/src/it/pom.xml new file mode 100644 index 00000000000..5a83cff3ed4 --- /dev/null +++ b/lang/java/avro/src/it/pom.xml @@ -0,0 +1,165 @@ + + + + 4.0.0 + + nl.example.avro + integration-test + 1.0.0 + + jar + IT : Java @integrationTestingJDK@ + + + UTF-8 + UTF-8 + 8 + + + + + + ../../src/test/resources + + + ../../../../../share/ + + schemas/** + test/** + + share/ + + + + + + org.apache.maven.plugins + maven-toolchains-plugin + @maven-toolchains-plugin.version@ + + + + toolchain + + + + + + + @integrationTestingJDK@ + + + + + + + org.apache.maven.plugins + maven-compiler-plugin + @maven-compiler-plugin.version@ + + + default-compile + + true + + + + default-testCompile + + ../../src/test/java + + + + + + + org.apache.maven.plugins + maven-surefire-plugin + @maven-surefire-plugin.version@ + + false + true + + + + + + + + + @project.groupId@ + @project.artifactId@ + @project.version@ + + + + + org.xerial.snappy + snappy-java + @snappy.version@ + true + + + + org.tukaani + xz + @tukaani.version@ + true + + + + com.github.luben + zstd-jni + @zstd-jni.version@ + true + + + + + org.junit.vintage + junit-vintage-engine + @junit5.version@ + test + + + + org.junit.jupiter + junit-jupiter + @junit5.version@ + test + + + + org.hamcrest + hamcrest-library + @hamcrest.version@ + test + + + + org.mockito + mockito-core + @mockito.version@ + test + + + + + diff --git a/lang/java/avro/src/it/settings.xml b/lang/java/avro/src/it/settings.xml new file mode 100644 index 00000000000..2e3d2e38201 --- /dev/null +++ b/lang/java/avro/src/it/settings.xml @@ -0,0 +1,51 @@ + + + + + + it-repo + + true + + + + local.central + @localRepositoryUrl@ + + true + + + true + + + + + + local.central + @localRepositoryUrl@ + + true + + + true + + + + + + diff --git a/lang/java/avro/src/main/java/org/apache/avro/CanonicalSchemaFormatterFactory.java b/lang/java/avro/src/main/java/org/apache/avro/CanonicalSchemaFormatterFactory.java new file mode 100644 index 00000000000..8ddec8155a4 --- /dev/null +++ b/lang/java/avro/src/main/java/org/apache/avro/CanonicalSchemaFormatterFactory.java @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro; + +/** + * Schema formatter factory that supports the "Parsing Canonical Form". + * + * @see Specification: + * Parsing Canonical Form for Schemas + */ +public class CanonicalSchemaFormatterFactory implements SchemaFormatterFactory, SchemaFormatter { + @Override + public SchemaFormatter getDefaultFormatter() { + return this; + } + + @Override + public String format(Schema schema) { + return SchemaNormalization.toParsingForm(schema); + } +} diff --git a/lang/java/avro/src/main/java/org/apache/avro/Conversion.java b/lang/java/avro/src/main/java/org/apache/avro/Conversion.java index 4ae75f4a5cb..934672e7d30 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/Conversion.java +++ b/lang/java/avro/src/main/java/org/apache/avro/Conversion.java @@ -21,6 +21,9 @@ import java.nio.ByteBuffer; import java.util.Collection; import java.util.Map; +import java.util.ServiceLoader; + +import org.apache.avro.generic.GenericData; import org.apache.avro.generic.GenericEnumSymbol; import org.apache.avro.generic.GenericFixed; import org.apache.avro.generic.IndexedRecord; @@ -28,23 +31,33 @@ /** * Conversion between generic and logical type instances. *

- * Instances of this class are added to GenericData to convert a logical type to - * a particular representation. + * Instances of this class can be added to GenericData to convert a logical type + * to a particular representation. This can be done manually, using + * {@link GenericData#addLogicalTypeConversion(Conversion)}, or automatically. + * This last option uses the Java {@link ServiceLoader}, and requires the + * implementation to be a public class with a public no-arg constructor, be + * named in a file called {@code /META-INF/services/org.apache.avro.Conversion}, + * and both must available in the classpath. *

- * Implementations must provide: * {@link #getConvertedType()}: get the Java - * class used for the logical type * {@link #getLogicalTypeName()}: get the - * logical type this implements + * Implementations must provide: + *

    + *
  • {@link #getConvertedType()}: get the Java class used for the logical + * type
  • + *
  • {@link #getLogicalTypeName()}: get the logical type this implements
  • + *
*

- * Subclasses must also override all of the conversion methods for Avro's base - * types that are valid for the logical type, or else risk causing + * Subclasses must also override the conversion methods for Avro's base types + * that are valid for the logical type, or else risk causing * {@code UnsupportedOperationException} at runtime. *

* Optionally, use {@link #getRecommendedSchema()} to provide a Schema that will - * be used when a Schema is generated for the class returned by - * {@code getConvertedType}. + * be used when generating a Schema for the class. This is useful when using + * {@code ReflectData} or {@code ProtobufData}, for example. * - * @param a Java type that generic data is converted to + * @param a Java type that can represent the named logical type + * @see ServiceLoader */ +@SuppressWarnings("unused") public abstract class Conversion { /** @@ -65,9 +78,9 @@ public abstract class Conversion { * Certain logical types may require adjusting the code within the "setter" * methods to make sure the data that is set is properly formatted. This method * allows the Conversion to generate custom setter code if required. - * - * @param varName - * @param valParamName + * + * @param varName the name of the variable holding the converted value + * @param valParamName the name of the parameter with the new converted value * @return a String for the body of the setter method */ public String adjustAndSetValue(String varName, String valParamName) { @@ -102,7 +115,7 @@ public T fromCharSequence(CharSequence value, Schema schema, LogicalType type) { throw new UnsupportedOperationException("fromCharSequence is not supported for " + type.getName()); } - public T fromEnumSymbol(GenericEnumSymbol value, Schema schema, LogicalType type) { + public T fromEnumSymbol(GenericEnumSymbol value, Schema schema, LogicalType type) { throw new UnsupportedOperationException("fromEnumSymbol is not supported for " + type.getName()); } @@ -150,7 +163,7 @@ public CharSequence toCharSequence(T value, Schema schema, LogicalType type) { throw new UnsupportedOperationException("toCharSequence is not supported for " + type.getName()); } - public GenericEnumSymbol toEnumSymbol(T value, Schema schema, LogicalType type) { + public GenericEnumSymbol toEnumSymbol(T value, Schema schema, LogicalType type) { throw new UnsupportedOperationException("toEnumSymbol is not supported for " + type.getName()); } diff --git a/lang/java/avro/src/main/java/org/apache/avro/Conversions.java b/lang/java/avro/src/main/java/org/apache/avro/Conversions.java index 1c28c9adb81..99ad500647e 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/Conversions.java +++ b/lang/java/avro/src/main/java/org/apache/avro/Conversions.java @@ -18,15 +18,24 @@ package org.apache.avro; -import java.math.RoundingMode; import org.apache.avro.generic.GenericData; import org.apache.avro.generic.GenericEnumSymbol; import org.apache.avro.generic.GenericFixed; import org.apache.avro.generic.IndexedRecord; +import org.apache.avro.io.BinaryDecoder; +import org.apache.avro.io.BinaryEncoder; +import org.apache.avro.io.DecoderFactory; +import org.apache.avro.io.EncoderFactory; +import org.apache.avro.util.TimePeriod; +import java.io.ByteArrayOutputStream; +import java.io.IOException; import java.math.BigDecimal; import java.math.BigInteger; +import java.math.RoundingMode; import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.IntBuffer; import java.util.Arrays; import java.util.Collection; import java.util.Map; @@ -59,6 +68,22 @@ public UUID fromCharSequence(CharSequence value, Schema schema, LogicalType type public CharSequence toCharSequence(UUID value, Schema schema, LogicalType type) { return value.toString(); } + + @Override + public UUID fromFixed(final GenericFixed value, final Schema schema, final LogicalType type) { + ByteBuffer buffer = ByteBuffer.wrap(value.bytes()); + long mostSigBits = buffer.getLong(); + long leastSigBits = buffer.getLong(); + return new UUID(mostSigBits, leastSigBits); + } + + @Override + public GenericFixed toFixed(final UUID value, final Schema schema, final LogicalType type) { + ByteBuffer buffer = ByteBuffer.allocate(2 * Long.BYTES); + buffer.putLong(value.getMostSignificantBits()); + buffer.putLong(value.getLeastSignificantBits()); + return new GenericData.Fixed(schema, buffer.array()); + } } public static class DecimalConversion extends Conversion { @@ -106,11 +131,12 @@ public GenericFixed toFixed(BigDecimal value, Schema schema, LogicalType type) { byte fillByte = (byte) (value.signum() < 0 ? 0xFF : 0x00); byte[] unscaled = value.unscaledValue().toByteArray(); byte[] bytes = new byte[schema.getFixedSize()]; - int offset = bytes.length - unscaled.length; + int unscaledLength = unscaled.length; + int offset = bytes.length - unscaledLength; - // Fill the front of the array and copy remaining with unscaled values + // Fill the front with the filler and copy the unscaled value into the remainder Arrays.fill(bytes, 0, offset, fillByte); - System.arraycopy(unscaled, 0, bytes, offset, bytes.length - offset); + System.arraycopy(unscaled, 0, bytes, offset, unscaledLength); return new GenericData.Fixed(schema, bytes); } @@ -146,8 +172,104 @@ private static BigDecimal validate(final LogicalTypes.Decimal decimal, BigDecima } } + public static class BigDecimalConversion extends Conversion { + + @Override + public Class getConvertedType() { + return BigDecimal.class; + } + + @Override + public String getLogicalTypeName() { + return "big-decimal"; + } + + @Override + public BigDecimal fromBytes(final ByteBuffer value, final Schema schema, final LogicalType type) { + BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(value.array(), null); + + try { + BigInteger bg = null; + ByteBuffer buffer = decoder.readBytes(null); + byte[] array = buffer.array(); + if (array != null && array.length > 0) { + bg = new BigInteger(array); + } + + int scale = decoder.readInt(); + return new BigDecimal(bg, scale); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + @Override + public ByteBuffer toBytes(final BigDecimal value, final Schema schema, final LogicalType type) { + try { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + BinaryEncoder encoder = EncoderFactory.get().binaryEncoder(out, null); + + BigInteger unscaledValue = value.unscaledValue(); + if (unscaledValue != null) { + encoder.writeBytes(unscaledValue.toByteArray()); + } else { + encoder.writeBytes(new byte[] {}); + } + encoder.writeInt(value.scale()); + encoder.flush(); + return ByteBuffer.wrap(out.toByteArray()); + + } catch (IOException e) { + throw new RuntimeException(e); + } + + } + + @Override + public Schema getRecommendedSchema() { + return LogicalTypes.bigDecimal().addToSchema(Schema.create(Schema.Type.BYTES)); + } + } + + public static class DurationConversion extends Conversion { + @Override + public Class getConvertedType() { + return TimePeriod.class; + } + + @Override + public String getLogicalTypeName() { + return "duration"; + } + + @Override + public Schema getRecommendedSchema() { + return LogicalTypes.duration().addToSchema(Schema.createFixed("time.Duration", + "A 12-byte byte array encoding a duration in months, days and milliseconds.", null, 12)); + } + + @Override + public TimePeriod fromFixed(GenericFixed value, Schema schema, LogicalType type) { + IntBuffer buffer = ByteBuffer.wrap(value.bytes()).order(ByteOrder.LITTLE_ENDIAN).asIntBuffer(); + long months = Integer.toUnsignedLong(buffer.get()); + long days = Integer.toUnsignedLong(buffer.get()); + long millis = Integer.toUnsignedLong(buffer.get()); + return TimePeriod.of(months, days, millis); + } + + @Override + public GenericFixed toFixed(TimePeriod value, Schema schema, LogicalType type) { + ByteBuffer buffer = ByteBuffer.allocate(12).order(ByteOrder.LITTLE_ENDIAN); + IntBuffer intBuffer = buffer.asIntBuffer(); + intBuffer.put((int) value.getMonths()); + intBuffer.put((int) value.getDays()); + intBuffer.put((int) value.getMillis()); + return new GenericData.Fixed(schema, buffer.array()); + } + } + /** - * Convert a underlying representation of a logical type (such as a ByteBuffer) + * Convert an underlying representation of a logical type (such as a ByteBuffer) * to a higher level object (such as a BigDecimal). * * @param datum The object to be converted. @@ -157,9 +279,9 @@ private static BigDecimal validate(final LogicalTypes.Decimal decimal, BigDecima * @param conversion The tool used to finish the conversion. Cannot be null if * datum is not null. * @return The result object, which is a high level object of the logical type. - * If a null datum is passed in, a null value will be returned. - * @throws IllegalArgumentException if a null schema, type or conversion is - * passed in while datum is not null. + * The null datum always converts to a null value. + * @throws IllegalArgumentException if datum is not null, but schema, type or + * conversion is. */ public static Object convertToLogicalType(Object datum, Schema schema, LogicalType type, Conversion conversion) { if (datum == null) { @@ -176,9 +298,9 @@ public static Object convertToLogicalType(Object datum, Schema schema, LogicalTy case RECORD: return conversion.fromRecord((IndexedRecord) datum, schema, type); case ENUM: - return conversion.fromEnumSymbol((GenericEnumSymbol) datum, schema, type); + return conversion.fromEnumSymbol((GenericEnumSymbol) datum, schema, type); case ARRAY: - return conversion.fromArray((Collection) datum, schema, type); + return conversion.fromArray((Collection) datum, schema, type); case MAP: return conversion.fromMap((Map) datum, schema, type); case FIXED: @@ -201,13 +323,13 @@ public static Object convertToLogicalType(Object datum, Schema schema, LogicalTy return datum; } catch (ClassCastException e) { throw new AvroRuntimeException( - "Cannot convert " + datum + ":" + datum.getClass().getSimpleName() + ": expected generic type", e); + "Cannot convert " + datum + ':' + datum.getClass().getSimpleName() + ": expected generic type", e); } } /** * Convert a high level representation of a logical type (such as a BigDecimal) - * to the its underlying representation object (such as a ByteBuffer) + * to its underlying representation object (such as a ByteBuffer) * * @param datum The object to be converted. * @param schema The schema of datum. Cannot be null if datum is not null. @@ -218,8 +340,8 @@ public static Object convertToLogicalType(Object datum, Schema schema, LogicalTy * @return The result object, which is an underlying representation object of * the logical type. If the input param datum is null, a null value will * be returned. - * @throws IllegalArgumentException if a null schema, type or conversion is - * passed in while datum is not null. + * @throws IllegalArgumentException if datum is not null, but schema, type or + * conversion is. */ public static Object convertToRawType(Object datum, Schema schema, LogicalType type, Conversion conversion) { if (datum == null) { @@ -262,7 +384,7 @@ public static Object convertToRawType(Object datum, Schema schema, LogicalTy return datum; } catch (ClassCastException e) { throw new AvroRuntimeException( - "Cannot convert " + datum + ":" + datum.getClass().getSimpleName() + ": expected logical type", e); + "Cannot convert " + datum + ':' + datum.getClass().getSimpleName() + ": expected logical type", e); } } diff --git a/lang/java/avro/src/main/java/org/apache/avro/FormattedSchemaParser.java b/lang/java/avro/src/main/java/org/apache/avro/FormattedSchemaParser.java new file mode 100644 index 00000000000..c37eca15dc6 --- /dev/null +++ b/lang/java/avro/src/main/java/org/apache/avro/FormattedSchemaParser.java @@ -0,0 +1,77 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro; + +import java.io.IOException; +import java.net.URI; + +/** + * Schema parser for a specific schema format. + * + *

+ * The {@link SchemaParser} class uses this interface, supporting text based + * schema sources. + *

+ * + *

+ * Implementations are located using a {@link java.util.ServiceLoader} and must + * therefore be threadsafe. See the {@code ServiceLoader} class for details on + * loading your implementation. + *

+ * + * @see java.util.ServiceLoader + */ +public interface FormattedSchemaParser { + /** + *

+ * Parse schema definitions from a text based source. + *

+ * + *

Notes for implementers:

+ * + *
    + *
  • Schema definitions are expected not to be in the format the parser + * expects. So when the input clearly doesn't make sense (e.g., reading "/**" + * when expecting JSON), it is a good idea not to do anything (especially + * calling methods on the @code ParseContext}).
  • + *
  • The parameter {@code parseContext} is not thread-safe.
  • + *
  • All named schema definitions that are parsed should be added to the + * provided {@link ParseContext}.
  • + *
  • Optionally, you may return a "main" schema. Some schema definitions have + * one, for example the schema defined by the root of the JSON document in a + * standard schema + * definition. If unsure, return {@code null}.
  • + *
  • If parsing fails, throw a {@link SchemaParseException}. This will let the + * parsing process recover and continue.
  • + *
  • Throwing anything other than a {@code SchemaParseException} will abort + * the parsing process, so reserve that for rethrowing exceptions.
  • + *
+ * + * @param parseContext the current parse context: all named schemata that are + * parsed should be added here, otherwise resolving + * schemata can fail; contains all previously known types + * @param baseUri the base location of the schema, or {@code null} if + * not known + * @param formattedSchema the text of the schema definition(s) to parse + * @return the main schema, if any + * @throws IOException when the schema cannot be read + * @throws SchemaParseException when the schema cannot be parsed + */ + Schema parse(ParseContext parseContext, URI baseUri, CharSequence formattedSchema) + throws IOException, SchemaParseException; +} diff --git a/lang/java/avro/src/main/java/org/apache/avro/JsonProperties.java b/lang/java/avro/src/main/java/org/apache/avro/JsonProperties.java index b53bc6cb2ba..0c100baa98f 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/JsonProperties.java +++ b/lang/java/avro/src/main/java/org/apache/avro/JsonProperties.java @@ -24,12 +24,14 @@ import java.util.Map; import java.util.Map.Entry; import java.util.Queue; +import java.util.Objects; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentLinkedQueue; import java.util.concurrent.ConcurrentMap; import java.io.IOException; +import java.util.function.BiConsumer; import org.apache.avro.util.internal.Accessor; import org.apache.avro.util.internal.Accessor.JsonPropertiesAccessor; @@ -241,6 +243,11 @@ public Object getObjectProp(String name) { return JacksonUtils.toObject(props.get(name)); } + public Object getObjectProp(String name, Object defaultValue) { + final JsonNode json = props.get(name); + return json != null ? JacksonUtils.toObject(json) : defaultValue; + } + /** * Adds a property with the given name name and value value. * Neither name nor value can be null. It is illegal @@ -307,6 +314,17 @@ public Map getObjectProps() { return Collections.unmodifiableMap(result); } + public boolean propsContainsKey(String key) { + return this.props.containsKey(key); + } + + public void forEachProperty(BiConsumer consumer) { + for (Map.Entry entry : this.props.entrySet()) { + final Object value = JacksonUtils.toObject(entry.getValue()); + consumer.accept(entry.getKey(), value); + } + } + void writeProps(JsonGenerator gen) throws IOException { for (Map.Entry e : props.entrySet()) gen.writeObjectField(e.getKey(), e.getValue()); @@ -317,7 +335,7 @@ int propsHashCode() { } boolean propsEqual(JsonProperties np) { - return props.equals(np.props); + return Objects.equals(props, np.props); } public boolean hasProps() { diff --git a/lang/csharp/src/apache/codegen/Properties/AssemblyInfo.cs b/lang/java/avro/src/main/java/org/apache/avro/JsonSchemaFormatter.java similarity index 59% rename from lang/csharp/src/apache/codegen/Properties/AssemblyInfo.cs rename to lang/java/avro/src/main/java/org/apache/avro/JsonSchemaFormatter.java index 6175167803a..5d372658611 100644 --- a/lang/csharp/src/apache/codegen/Properties/AssemblyInfo.cs +++ b/lang/java/avro/src/main/java/org/apache/avro/JsonSchemaFormatter.java @@ -1,4 +1,4 @@ -īģŋ/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -15,19 +15,19 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +package org.apache.avro; -using System.Reflection; -using System.Runtime.InteropServices; +public class JsonSchemaFormatter implements SchemaFormatter { + private final boolean prettyPrinted; -[assembly: AssemblyTitle("Avro.codegen")] -[assembly: AssemblyDescription("")] -[assembly: AssemblyConfiguration("")] -[assembly: AssemblyCompany("Apache")] -[assembly: AssemblyProduct("Avro.codegen")] -[assembly: AssemblyCopyright("Copyright Š Apache 2013")] -[assembly: AssemblyTrademark("")] -[assembly: AssemblyCulture("")] -[assembly: ComVisible(false)] -[assembly: Guid("3C23DD33-DD4F-42B1-B71F-8F9C86929E58")] -[assembly: AssemblyVersion("0.9.0.0")] -[assembly: AssemblyFileVersion("0.9.0.0")] \ No newline at end of file + public JsonSchemaFormatter(boolean prettyPrinted) { + this.prettyPrinted = prettyPrinted; + } + + @Override + public String format(Schema schema) { + // TODO: Move the toString implementation here and have Schema#toString() + // use SchemaFormatter with the formats "json/pretty" and "json/inline" + return schema.toString(prettyPrinted); + } +} diff --git a/lang/java/avro/src/main/java/org/apache/avro/JsonSchemaFormatterFactory.java b/lang/java/avro/src/main/java/org/apache/avro/JsonSchemaFormatterFactory.java new file mode 100644 index 00000000000..915a671ebd7 --- /dev/null +++ b/lang/java/avro/src/main/java/org/apache/avro/JsonSchemaFormatterFactory.java @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro; + +public class JsonSchemaFormatterFactory implements SchemaFormatterFactory { + + public static final String VARIANT_NAME_PRETTY = "pretty"; + public static final String VARIANT_NAME_INLINE = "inline"; + + @Override + public SchemaFormatter getDefaultFormatter() { + return getFormatterForVariant(VARIANT_NAME_PRETTY); + } + + @Override + public SchemaFormatter getFormatterForVariant(String variantName) { + if (VARIANT_NAME_PRETTY.equals(variantName)) { + return new JsonSchemaFormatter(true); + } else if (VARIANT_NAME_INLINE.equals(variantName)) { + return new JsonSchemaFormatter(false); + } else { + throw new AvroRuntimeException("Unknown JSON variant: " + variantName); + } + } +} diff --git a/lang/java/avro/src/main/java/org/apache/avro/JsonSchemaParser.java b/lang/java/avro/src/main/java/org/apache/avro/JsonSchemaParser.java new file mode 100644 index 00000000000..5dd532444a3 --- /dev/null +++ b/lang/java/avro/src/main/java/org/apache/avro/JsonSchemaParser.java @@ -0,0 +1,88 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro; + +import java.io.IOException; +import java.net.URI; + +/** + * Schema parser for JSON formatted schemata. This initial implementation simply + * delegates to the {@link Schema.Parser} class, though it should be refactored + * out of there. + * + *

+ * Note: this class is intentionally not available via the Java + * {@link java.util.ServiceLoader}, as its use is hardcoded as fallback when no + * service exists. This enables users to reliably override the standard JSON + * parser as well. + *

+ */ +public class JsonSchemaParser implements FormattedSchemaParser { + /** + *

+ * Parse a schema written in the internal (JSON) format without any validations. + *

+ * + *

+ * Using this method is only safe if used to parse a write schema (i.e., a + * schema used to read Avro data). Other usages, for example by generated Avro + * code, can cause interoperability problems. + *

+ * + *

+ * Use with care and sufficient testing! + *

+ * + * @param fragments one or more strings making up the schema (some schemata + * exceed the compiler limits) + * @return the parsed schema + */ + public static Schema parseInternal(String... fragments) { + StringBuilder buffer = new StringBuilder(); + for (String fragment : fragments) { + buffer.append(fragment); + } + + boolean saved = Schema.getValidateDefaults(); + try { + Schema.setValidateDefaults(false); + ParseContext context = new ParseContext(NameValidator.NO_VALIDATION); + Schema schema = new JsonSchemaParser().parse(context, buffer, true); + context.commit(); + context.resolveAllSchemas(); + return context.resolve(schema); + } finally { + Schema.setValidateDefaults(saved); + } + } + + @Override + public Schema parse(ParseContext parseContext, URI baseUri, CharSequence formattedSchema) + throws IOException, SchemaParseException { + return parse(parseContext, formattedSchema, false); + } + + private Schema parse(ParseContext parseContext, CharSequence formattedSchema, boolean allowInvalidDefaults) + throws SchemaParseException { + Schema.Parser parser = new Schema.Parser(parseContext); + if (allowInvalidDefaults) { + parser.setValidateDefaults(false); + } + return parser.parseInternal(formattedSchema.toString()); + } +} diff --git a/lang/java/avro/src/main/java/org/apache/avro/LogicalTypes.java b/lang/java/avro/src/main/java/org/apache/avro/LogicalTypes.java index 5b03e1524fc..6a894f05104 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/LogicalTypes.java +++ b/lang/java/avro/src/main/java/org/apache/avro/LogicalTypes.java @@ -18,18 +18,35 @@ package org.apache.avro; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + import java.util.Collections; import java.util.Map; import java.util.Objects; +import java.util.ServiceLoader; import java.util.concurrent.ConcurrentHashMap; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - public class LogicalTypes { private static final Logger LOG = LoggerFactory.getLogger(LogicalTypes.class); + /** + * Factory interface and SPI for logical types. A {@code LogicalTypeFactory} can + * be registered in two ways: + * + *
    + *
  1. Manually, via {@link #register(LogicalTypeFactory)} or + * {@link #register(String, LogicalTypeFactory)}
  2. + * + *
  3. Automatically, when the {@code LogicalTypeFactory} implementation is a + * public class with a public no-arg constructor, is named in a file called + * {@code /META-INF/services/org.apache.avro.LogicalTypes$LogicalTypeFactory}, + * and both are available in the classpath
  4. + *
+ * + * @see ServiceLoader + */ public interface LogicalTypeFactory { LogicalType fromSchema(Schema schema); @@ -40,6 +57,12 @@ default String getTypeName() { private static final Map REGISTERED_TYPES = new ConcurrentHashMap<>(); + static { + for (LogicalTypeFactory logicalTypeFactory : ServiceLoader.load(LogicalTypeFactory.class)) { + register(logicalTypeFactory); + } + } + /** * Register a logical type. * @@ -114,6 +137,9 @@ private static LogicalType fromSchemaImpl(Schema schema, boolean throwErrors) { case DECIMAL: logicalType = new Decimal(schema); break; + case BIG_DECIMAL: + logicalType = BIG_DECIMAL_TYPE; + break; case UUID: logicalType = UUID_TYPE; break; @@ -123,6 +149,9 @@ private static LogicalType fromSchemaImpl(Schema schema, boolean throwErrors) { case TIMESTAMP_MICROS: logicalType = TIMESTAMP_MICROS_TYPE; break; + case TIMESTAMP_NANOS: + logicalType = TIMESTAMP_NANOS_TYPE; + break; case TIME_MILLIS: logicalType = TIME_MILLIS_TYPE; break; @@ -135,6 +164,9 @@ private static LogicalType fromSchemaImpl(Schema schema, boolean throwErrors) { case LOCAL_TIMESTAMP_MILLIS: logicalType = LOCAL_TIMESTAMP_MILLIS_TYPE; break; + case LOCAL_TIMESTAMP_NANOS: + logicalType = LOCAL_TIMESTAMP_NANOS_TYPE; + break; default: final LogicalTypeFactory typeFactory = REGISTERED_TYPES.get(typeName); logicalType = (typeFactory == null) ? null : typeFactory.fromSchema(schema); @@ -159,14 +191,18 @@ private static LogicalType fromSchemaImpl(Schema schema, boolean throwErrors) { } private static final String DECIMAL = "decimal"; + private static final String BIG_DECIMAL = "big-decimal"; + private static final String DURATION = "duration"; private static final String UUID = "uuid"; private static final String DATE = "date"; private static final String TIME_MILLIS = "time-millis"; private static final String TIME_MICROS = "time-micros"; private static final String TIMESTAMP_MILLIS = "timestamp-millis"; private static final String TIMESTAMP_MICROS = "timestamp-micros"; + private static final String TIMESTAMP_NANOS = "timestamp-nanos"; private static final String LOCAL_TIMESTAMP_MILLIS = "local-timestamp-millis"; private static final String LOCAL_TIMESTAMP_MICROS = "local-timestamp-micros"; + private static final String LOCAL_TIMESTAMP_NANOS = "local-timestamp-nanos"; /** Create a Decimal LogicalType with the given precision and scale 0 */ public static Decimal decimal(int precision) { @@ -178,12 +214,25 @@ public static Decimal decimal(int precision, int scale) { return new Decimal(precision, scale); } - private static final LogicalType UUID_TYPE = new LogicalType("uuid"); + private static final BigDecimal BIG_DECIMAL_TYPE = new BigDecimal(); + + /** Create a Big Decimal LogicalType that can accept any precision and scale */ + public static BigDecimal bigDecimal() { + return BIG_DECIMAL_TYPE; + } + + private static final LogicalType UUID_TYPE = new Uuid(); public static LogicalType uuid() { return UUID_TYPE; } + private static final LogicalType DURATION_TYPE = new Duration(); + + public static LogicalType duration() { + return DURATION_TYPE; + } + private static final Date DATE_TYPE = new Date(); public static Date date() { @@ -214,6 +263,12 @@ public static TimestampMicros timestampMicros() { return TIMESTAMP_MICROS_TYPE; } + private static final TimestampNanos TIMESTAMP_NANOS_TYPE = new TimestampNanos(); + + public static TimestampNanos timestampNanos() { + return TIMESTAMP_NANOS_TYPE; + } + private static final LocalTimestampMillis LOCAL_TIMESTAMP_MILLIS_TYPE = new LocalTimestampMillis(); public static LocalTimestampMillis localTimestampMillis() { @@ -226,6 +281,50 @@ public static LocalTimestampMicros localTimestampMicros() { return LOCAL_TIMESTAMP_MICROS_TYPE; } + private static final LocalTimestampNanos LOCAL_TIMESTAMP_NANOS_TYPE = new LocalTimestampNanos(); + + public static LocalTimestampNanos localTimestampNanos() { + return LOCAL_TIMESTAMP_NANOS_TYPE; + } + + /** Uuid represents a uuid without a time */ + public static class Uuid extends LogicalType { + + private static final int UUID_BYTES = 2 * Long.BYTES; + + private Uuid() { + super(UUID); + } + + @Override + public void validate(Schema schema) { + super.validate(schema); + if (schema.getType() != Schema.Type.STRING && schema.getType() != Schema.Type.FIXED) { + throw new IllegalArgumentException("Uuid can only be used with an underlying string or fixed type"); + } + if (schema.getType() == Schema.Type.FIXED && schema.getFixedSize() != UUID_BYTES) { + throw new IllegalArgumentException("Uuid with fixed type must have a size of " + UUID_BYTES + " bytes"); + } + } + } + + /** + * Duration represents a duration, consisting on months, days and milliseconds + */ + public static class Duration extends LogicalType { + private Duration() { + super(DURATION); + } + + @Override + public void validate(Schema schema) { + super.validate(schema); + if (schema.getType() != Schema.Type.FIXED || schema.getFixedSize() != 12) { + throw new IllegalArgumentException("Duration can only be used with an underlying fixed type of size 12."); + } + } + } + /** Decimal represents arbitrary-precision fixed-scale decimal numbers */ public static class Decimal extends LogicalType { private static final String PRECISION_PROP = "precision"; @@ -306,7 +405,7 @@ private long maxPrecision(Schema schema) { } private boolean hasProperty(Schema schema, String name) { - return (schema.getObjectProp(name) != null); + return schema.propsContainsKey(name); } private int getInt(Schema schema, String name) { @@ -340,6 +439,20 @@ public int hashCode() { } } + public static class BigDecimal extends LogicalType { + private BigDecimal() { + super(BIG_DECIMAL); + } + + @Override + public void validate(final Schema schema) { + super.validate(schema); + if (schema.getType() != Schema.Type.BYTES) { + throw new IllegalArgumentException("BigDecimal can only be used with an underlying bytes type"); + } + } + } + /** Date represents a date without a time */ public static class Date extends LogicalType { private Date() { @@ -415,6 +528,21 @@ public void validate(Schema schema) { } } + /** TimestampNanos represents a date and time in nanoseconds */ + public static class TimestampNanos extends LogicalType { + private TimestampNanos() { + super(TIMESTAMP_NANOS); + } + + @Override + public void validate(Schema schema) { + super.validate(schema); + if (schema.getType() != Schema.Type.LONG) { + throw new IllegalArgumentException("Timestamp (nanos) can only be used with an underlying long type"); + } + } + } + public static class LocalTimestampMillis extends LogicalType { private LocalTimestampMillis() { super(LOCAL_TIMESTAMP_MILLIS); @@ -443,4 +571,18 @@ public void validate(Schema schema) { } } + public static class LocalTimestampNanos extends LogicalType { + private LocalTimestampNanos() { + super(LOCAL_TIMESTAMP_NANOS); + } + + @Override + public void validate(Schema schema) { + super.validate(schema); + if (schema.getType() != Schema.Type.LONG) { + throw new IllegalArgumentException("Local timestamp (micros) can only be used with an underlying long type"); + } + } + } + } diff --git a/lang/java/avro/src/main/java/org/apache/avro/NameValidator.java b/lang/java/avro/src/main/java/org/apache/avro/NameValidator.java new file mode 100644 index 00000000000..f1262d922cf --- /dev/null +++ b/lang/java/avro/src/main/java/org/apache/avro/NameValidator.java @@ -0,0 +1,104 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro; + +public interface NameValidator { + + class Result { + private final String errors; + + public Result(final String errors) { + this.errors = errors; + } + + public boolean isOK() { + return this == NameValidator.OK; + } + + public String getErrors() { + return errors; + } + } + + Result OK = new Result(null); + + default Result validate(String name) { + return OK; + } + + NameValidator NO_VALIDATION = new NameValidator() { + }; + + NameValidator UTF_VALIDATOR = new NameValidator() { + @Override + public Result validate(final String name) { + if (name == null) { + return new Result("Null name"); + } + int length = name.length(); + if (length == 0) { + return new Result("Empty name"); + } + char first = name.charAt(0); + if (!(Character.isLetter(first) || first == '_')) { + return new Result("Illegal initial character: " + name); + } + for (int i = 1; i < length; i++) { + char c = name.charAt(i); + if (!(Character.isLetterOrDigit(c) || c == '_')) { + return new Result("Illegal character in: " + name); + } + } + return OK; + } + }; + + NameValidator STRICT_VALIDATOR = new NameValidator() { + @Override + public Result validate(final String name) { + if (name == null) { + return new Result("Null name"); + } + int length = name.length(); + if (length == 0) { + return new Result("Empty name"); + } + char first = name.charAt(0); + if (!(isLetter(first) || first == '_')) { + return new Result("Illegal initial character: " + name); + } + for (int i = 1; i < length; i++) { + char c = name.charAt(i); + if (!(isLetter(c) || isDigit(c) || c == '_')) { + return new Result("Illegal character in: " + name); + } + } + return OK; + } + + private boolean isLetter(char c) { + return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); + } + + private boolean isDigit(char c) { + return c >= '0' && c <= '9'; + } + + }; + +} diff --git a/lang/java/avro/src/main/java/org/apache/avro/ParseContext.java b/lang/java/avro/src/main/java/org/apache/avro/ParseContext.java new file mode 100644 index 00000000000..b7bc42b9787 --- /dev/null +++ b/lang/java/avro/src/main/java/org/apache/avro/ParseContext.java @@ -0,0 +1,352 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro; + +import org.apache.avro.util.SchemaResolver; +import org.apache.avro.util.Schemas; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.EnumSet; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; + +import static java.util.Objects.requireNonNull; + +/** + * Class to define a name context, useful to reference schemata with. This + * allows for the following: + * + *
    + *
  • Collect new named schemata.
  • + *
  • Find schemata by name, including primitives.
  • + *
  • Find schemas that do not exist yet.
  • + *
  • Resolve references to schemas that didn't exist yet when first used.
  • + *
+ * + *

+ * This class is NOT thread-safe. + *

+ * + *

+ * Note: this class has no use for most Avro users, but is a key component when + * implementing a schema parser. + *

+ * + * @see JSON based + * schema definition + **/ +public class ParseContext { + private static final Map PRIMITIVES = new HashMap<>(); + + static { + PRIMITIVES.put("string", Schema.Type.STRING); + PRIMITIVES.put("bytes", Schema.Type.BYTES); + PRIMITIVES.put("int", Schema.Type.INT); + PRIMITIVES.put("long", Schema.Type.LONG); + PRIMITIVES.put("float", Schema.Type.FLOAT); + PRIMITIVES.put("double", Schema.Type.DOUBLE); + PRIMITIVES.put("boolean", Schema.Type.BOOLEAN); + PRIMITIVES.put("null", Schema.Type.NULL); + } + + private static final Set NAMED_SCHEMA_TYPES = EnumSet.of(Schema.Type.RECORD, Schema.Type.ENUM, + Schema.Type.FIXED); + /** + * Collection of old schemata. Can contain unresolved references if !isResolved. + */ + private final Map oldSchemas; + /** + * Collection of new schemata. Can contain unresolved references. + */ + private final Map newSchemas; + /** + * The name validator to use. + */ + // Visible for use in JsonSchemaParser + final NameValidator nameValidator; + /** + * Visitor that was used to resolve schemata with. If not available, some + * schemata in {@code oldSchemas} may not be fully resolved. If available, all + * schemata in {@code oldSchemas} are resolved, and {@code newSchemas} is empty. + * After visiting a schema, it can return the corresponding resolved schema for + * a schema that possibly contains unresolved references. + */ + private SchemaResolver.ResolvingVisitor resolvingVisitor; + + /** + * Create a {@code ParseContext} for the default/{@code null} namespace, using + * default name validation for new schemata. + */ + public ParseContext() { + this(NameValidator.UTF_VALIDATOR); + } + + /** + * Create a {@code ParseContext} using the specified name validation for new + * schemata. + */ + public ParseContext(NameValidator nameValidator) { + this(requireNonNull(nameValidator), new LinkedHashMap<>(), new LinkedHashMap<>()); + } + + private ParseContext(NameValidator nameValidator, Map oldSchemas, Map newSchemas) { + this.nameValidator = nameValidator; + this.oldSchemas = oldSchemas; + this.newSchemas = newSchemas; + resolvingVisitor = null; + } + + /** + * Tell whether this context contains a schema with the given name. + * + * @param name a schema name + * @return {@code true} if the context contains a schema with this name, + * {@code false} otherwise + */ + public boolean contains(String name) { + return PRIMITIVES.containsKey(name) || oldSchemas.containsKey(name) || newSchemas.containsKey(name); + } + + /** + *

+ * Find a schema by name and namespace. + *

+ * + *

+ * That is: + *

+ * + *
    + *
  1. If {@code name} is a primitive name, return a (new) schema for it
  2. + *
  3. Otherwise, determine the full schema name (using the given + * {@code namespace} if necessary), and find it
  4. + *
  5. If no schema was found and {@code name} is a simple name, find the schema + * in the default (null) namespace
  6. + *
  7. If still no schema was found, return an unresolved reference for the full + * schema name (see step 2)
  8. + *
+ * + *

+ * Note: as an unresolved reference might be returned, the schema is not + * directly usable. Please {@link #put(Schema)} the schema using it in the + * context. The {@link SchemaParser} and protocol parsers will ensure you'll + * only get a resolved schema that is usable. + *

+ * + * @param name the schema name to find + * @param namespace the namespace to find the schema against + * @return the schema, or an unresolved reference + */ + public Schema find(String name, String namespace) { + Schema.Type type = PRIMITIVES.get(name); + if (type != null) { + return Schema.create(type); + } + + String fullName = fullName(name, namespace); + Schema schema = getNamedSchema(fullName); + if (schema == null) { + schema = getNamedSchema(name); + } + + return schema != null ? schema : SchemaResolver.unresolvedSchema(fullName); + } + + private String fullName(String name, String namespace) { + if (namespace != null && name.lastIndexOf('.') < 0) { + return namespace + "." + name; + } + return name; + } + + /** + * Get a schema by name. Note that the schema might not (yet) be resolved/usable + * until {@link #resolveAllSchemas()} has been called. + * + * @param fullName a full schema name + * @return the schema, if known + */ + public Schema getNamedSchema(String fullName) { + Schema schema = oldSchemas.get(fullName); + if (schema == null) { + schema = newSchemas.get(fullName); + } + return schema; + } + + /** + * Put the schema into this context. This is an idempotent operation: it only + * fails if this context already has a different schema with the same name. + * + *

+ * Note that although this method works for all types except for arrays, maps + * and unions, all primitive types have already been defined upon construction. + * This means you cannot redefine a 'long' with a logical timestamp type. + *

+ * + * @param schema the schema to put into the context + */ + public void put(Schema schema) { + if (!(NAMED_SCHEMA_TYPES.contains(schema.getType()))) { + throw new AvroTypeException("You can only put a named schema into the context"); + } + + String fullName = requireValidFullName(schema.getFullName()); + + Schema alreadyKnownSchema = oldSchemas.get(fullName); + if (alreadyKnownSchema != null) { + if (!schema.equals(alreadyKnownSchema)) { + throw new SchemaParseException("Can't redefine: " + fullName); + } + } else { + resolvingVisitor = null; + Schema previouslyAddedSchema = newSchemas.putIfAbsent(fullName, schema); + if (previouslyAddedSchema != null && !previouslyAddedSchema.equals(schema)) { + throw new SchemaParseException("Can't redefine: " + fullName); + } + } + } + + private String requireValidFullName(String fullName) { + String[] names = fullName.split("\\."); + for (int i = 0; i < names.length - 1; i++) { + validateName(names[i], "Namespace part"); + } + validateName(names[names.length - 1], "Name"); + return fullName; + } + + private void validateName(String name, String typeOfName) { + NameValidator.Result result = nameValidator.validate(name); + if (!result.isOK()) { + throw new SchemaParseException(typeOfName + " \"" + name + "\" is invalid: " + result.getErrors()); + } + } + + public boolean hasNewSchemas() { + return !newSchemas.isEmpty(); + } + + public void commit() { + oldSchemas.putAll(newSchemas); + newSchemas.clear(); + } + + public SchemaParser.ParseResult commit(Schema mainSchema) { + Collection parsedNamedSchemas = newSchemas.values(); + SchemaParser.ParseResult parseResult = new SchemaParser.ParseResult() { + @Override + public Schema mainSchema() { + return mainSchema == null ? null : resolve(mainSchema); + } + + @Override + public List parsedNamedSchemas() { + return parsedNamedSchemas.stream().map(ParseContext.this::resolve).collect(Collectors.toList()); + } + }; + commit(); + return parseResult; + } + + public void rollback() { + newSchemas.clear(); + } + + /** + * Resolve all (named) schemas that were parsed. This resolves all forward + * references, even if parsed from different files. Note: the context must be + * committed for this method to work. + * + * @return all parsed schemas, in the order they were parsed + * @throws AvroTypeException if a schema reference cannot be resolved + */ + public List resolveAllSchemas() { + ensureSchemasAreResolved(); + + return new ArrayList<>(oldSchemas.values()); + } + + private void ensureSchemasAreResolved() { + if (hasNewSchemas()) { + throw new IllegalStateException("Schemas cannot be resolved unless the ParseContext is committed."); + } + if (resolvingVisitor == null) { + NameValidator saved = Schema.getNameValidator(); + try { + // Ensure we use the same validation when copying schemas as when they were + // defined. + Schema.setNameValidator(nameValidator); + SchemaResolver.ResolvingVisitor visitor = new SchemaResolver.ResolvingVisitor(oldSchemas::get); + oldSchemas.values().forEach(schema -> Schemas.visit(schema, visitor)); + // Before this point is where we can get exceptions due to resolving failures. + for (Map.Entry entry : oldSchemas.entrySet()) { + entry.setValue(visitor.getResolved(entry.getValue())); + } + resolvingVisitor = visitor; + } finally { + Schema.setNameValidator(saved); + } + } + } + + /** + * Resolve unresolved references in a schema that was parsed for this + * context using the types known to this context. Note: this method will + * ensure all known schemas are resolved, or throw, and thus requires the + * context to be committed. + * + * @param schema the schema resolve + * @return the fully resolved schema + * @throws AvroTypeException if a schema reference cannot be resolved + */ + public Schema resolve(Schema schema) { + ensureSchemasAreResolved(); + + // As all (named) schemas are resolved now, we know: + // — All named types are either in oldSchemas or unknown. + // — All unnamed types can be visited&resolved without validation. + + if (NAMED_SCHEMA_TYPES.contains(schema.getType()) && schema.getFullName() != null) { + return requireNonNull(oldSchemas.get(schema.getFullName()), () -> "Unknown schema: " + schema.getFullName()); + } else { + // Unnamed or anonymous schema + // (protocol message request parameters are anonymous records) + Schemas.visit(schema, resolvingVisitor); // This field is set, as ensureSchemasAreResolved(); was called. + return resolvingVisitor.getResolved(schema); + } + } + + /** + * Return all known types by their fullname. Warning: this returns all types, + * even uncommitted ones, including unresolved references! + * + * @return a map of all types by their name + */ + public Map typesByName() { + LinkedHashMap result = new LinkedHashMap<>(); + result.putAll(oldSchemas); + result.putAll(newSchemas); + return result; + } +} diff --git a/lang/java/avro/src/main/java/org/apache/avro/Protocol.java b/lang/java/avro/src/main/java/org/apache/avro/Protocol.java index 6987d4c8f54..905f2778c6b 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/Protocol.java +++ b/lang/java/avro/src/main/java/org/apache/avro/Protocol.java @@ -19,28 +19,28 @@ import java.io.ByteArrayInputStream; import java.io.File; +import java.io.IOException; import java.io.InputStream; import java.io.StringWriter; import java.nio.charset.StandardCharsets; -import java.io.IOException; import java.security.MessageDigest; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.HashSet; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; -import java.util.Collection; -import java.util.Collections; +import java.util.Objects; import java.util.Set; -import java.util.HashSet; - -import org.apache.avro.Schema.Field; -import org.apache.avro.Schema.Field.Order; import com.fasterxml.jackson.core.JsonGenerator; import com.fasterxml.jackson.core.JsonParser; import com.fasterxml.jackson.databind.JsonNode; +import org.apache.avro.Schema.Field; +import org.apache.avro.Schema.Field.Order; /** * A set of messages forming an application protocol. @@ -79,9 +79,9 @@ public class Protocol extends JsonProperties { /** A protocol message. */ public class Message extends JsonProperties { - private String name; - private String doc; - private Schema request; + private final String name; + private final String doc; + private final Schema request; /** Construct a message. */ private Message(String name, String doc, JsonProperties propMap, Schema request) { @@ -132,7 +132,7 @@ public String toString() { try { StringWriter writer = new StringWriter(); JsonGenerator gen = Schema.FACTORY.createGenerator(writer); - toJson(gen); + toJson(new HashSet<>(), gen); gen.flush(); return writer.toString(); } catch (IOException e) { @@ -140,19 +140,19 @@ public String toString() { } } - void toJson(JsonGenerator gen) throws IOException { + void toJson(Set knownNames, JsonGenerator gen) throws IOException { gen.writeStartObject(); if (doc != null) gen.writeStringField("doc", doc); writeProps(gen); // write out properties gen.writeFieldName("request"); - request.fieldsToJson(types, gen); + request.fieldsToJson(knownNames, namespace, gen); - toJson1(gen); + toJson1(knownNames, gen); gen.writeEndObject(); } - void toJson1(JsonGenerator gen) throws IOException { + void toJson1(Set knownNames, JsonGenerator gen) throws IOException { gen.writeStringField("response", "null"); gen.writeBooleanField("one-way", true); } @@ -175,12 +175,11 @@ public int hashCode() { public String getDoc() { return doc; } - } - private class TwoWayMessage extends Message { - private Schema response; - private Schema errors; + private final class TwoWayMessage extends Message { + private final Schema response; + private final Schema errors; /** Construct a message. */ private TwoWayMessage(String name, String doc, Map propMap, Schema request, Schema response, @@ -228,15 +227,15 @@ public int hashCode() { } @Override - void toJson1(JsonGenerator gen) throws IOException { + void toJson1(Set knownNames, JsonGenerator gen) throws IOException { gen.writeFieldName("response"); - response.toJson(types, gen); + response.toJson(knownNames, namespace, gen); List errs = errors.getTypes(); // elide system error if (errs.size() > 1) { Schema union = Schema.createUnion(errs.subList(1, errs.size())); gen.writeFieldName("errors"); - union.toJson(types, gen); + union.toJson(knownNames, namespace, gen); } } @@ -246,7 +245,7 @@ void toJson1(JsonGenerator gen) throws IOException { private String namespace; private String doc; - private Schema.Names types = new Schema.Names(); + private ParseContext context = new ParseContext(); private final Map messages = new LinkedHashMap<>(); private byte[] md5; @@ -268,6 +267,7 @@ private Protocol() { * {@code doc}, and {@code namespace} as {code p} has. It also copies all the * {@code props}. */ + @SuppressWarnings("CopyConstructorMissesField") public Protocol(Protocol p) { this(p.getName(), p.getDoc(), p.getNamespace()); putAll(p); @@ -275,15 +275,28 @@ public Protocol(Protocol p) { public Protocol(String name, String doc, String namespace) { super(PROTOCOL_RESERVED); - this.name = name; + setName(name, namespace); this.doc = doc; - this.namespace = namespace; } public Protocol(String name, String namespace) { this(name, null, namespace); } + private void setName(String name, String namespace) { + int lastDot = name.lastIndexOf('.'); + if (lastDot < 0) { + this.name = name; + this.namespace = namespace; + } else { + this.name = name.substring(lastDot + 1); + this.namespace = name.substring(0, lastDot); + } + if (this.namespace != null && this.namespace.isEmpty()) { + this.namespace = null; + } + } + /** The name of this protocol. */ public String getName() { return name; @@ -301,19 +314,30 @@ public String getDoc() { /** The types of this protocol. */ public Collection getTypes() { - return types.values(); + return context.resolveAllSchemas(); + } + + /** @deprecated can return invalid schemata: do NOT use! */ + @Deprecated + public Collection getUnresolvedTypes() { + return context.typesByName().values(); } /** Returns the named type. */ public Schema getType(String name) { - return types.get(name); + Schema namedSchema = null; + if (!name.contains(".")) { + namedSchema = context.getNamedSchema(namespace + "." + name); + } + return namedSchema != null ? namedSchema : context.getNamedSchema(name); } /** Set the types of this protocol. */ public void setTypes(Collection newTypes) { - types = new Schema.Names(); + context = new ParseContext(); for (Schema s : newTypes) - types.add(s); + context.put(s); + context.commit(); } /** The messages of this protocol. */ @@ -332,16 +356,16 @@ public Message createMessage(String name, String doc, Schema request) { * {@code props} of {@code m}. */ public Message createMessage(Message m, Schema request) { - return new Message(name, doc, m, request); + return new Message(m.name, m.doc, m, request); } /** Create a one-way message. */ - public Message createMessage(String name, String doc, JsonProperties propMap, Schema request) { + public Message createMessage(String name, String doc, JsonProperties propMap, Schema request) { return new Message(name, doc, propMap, request); } /** Create a one-way message. */ - public Message createMessage(String name, String doc, Map propMap, Schema request) { + public Message createMessage(String name, String doc, Map propMap, Schema request) { return new Message(name, doc, propMap, request); } @@ -360,13 +384,13 @@ public Message createMessage(Message m, Schema request, Schema response, Schema } /** Create a two-way message. */ - public Message createMessage(String name, String doc, JsonProperties propMap, Schema request, Schema response, + public Message createMessage(String name, String doc, JsonProperties propMap, Schema request, Schema response, Schema errors) { return new TwoWayMessage(name, doc, propMap, request, response, errors); } /** Create a two-way message. */ - public Message createMessage(String name, String doc, Map propMap, Schema request, Schema response, + public Message createMessage(String name, String doc, Map propMap, Schema request, Schema response, Schema errors) { return new TwoWayMessage(name, doc, propMap, request, response, errors); } @@ -378,13 +402,14 @@ public boolean equals(Object o) { if (!(o instanceof Protocol)) return false; Protocol that = (Protocol) o; - return this.name.equals(that.name) && this.namespace.equals(that.namespace) && this.types.equals(that.types) - && this.messages.equals(that.messages) && this.propsEqual(that); + return Objects.equals(this.name, that.name) && Objects.equals(this.namespace, that.namespace) + && Objects.equals(this.context.resolveAllSchemas(), that.context.resolveAllSchemas()) + && Objects.equals(this.messages, that.messages) && this.propsEqual(that); } @Override public int hashCode() { - return name.hashCode() + namespace.hashCode() + types.hashCode() + messages.hashCode() + propsHashCode(); + return 31 * Objects.hash(name, namespace, context, messages) + propsHashCode(); } /** Render this as JSON. */ @@ -413,26 +438,26 @@ public String toString(boolean pretty) { } void toJson(JsonGenerator gen) throws IOException { - types.space(namespace); - gen.writeStartObject(); gen.writeStringField("protocol", name); - gen.writeStringField("namespace", namespace); + if (namespace != null) { + gen.writeStringField("namespace", namespace); + } if (doc != null) gen.writeStringField("doc", doc); writeProps(gen); gen.writeArrayFieldStart("types"); - Schema.Names resolved = new Schema.Names(namespace); - for (Schema type : types.values()) - if (!resolved.contains(type)) - type.toJson(resolved, gen); + Set knownNames = new HashSet<>(); + for (Schema type : context.resolveAllSchemas()) + if (!knownNames.contains(type.getFullName())) + type.toJson(knownNames, namespace, gen); gen.writeEndArray(); gen.writeObjectFieldStart("messages"); for (Map.Entry e : messages.entrySet()) { gen.writeFieldName(e.getKey()); - e.getValue().toJson(gen); + e.getValue().toJson(knownNames, gen); } gen.writeEndObject(); gen.writeEndObject(); @@ -451,7 +476,9 @@ public byte[] getMD5() { /** Read a protocol from a Json file. */ public static Protocol parse(File file) throws IOException { - return parse(Schema.FACTORY.createParser(file)); + try (JsonParser jsonParser = Schema.FACTORY.createParser(file)) { + return parse(jsonParser); + } } /** Read a protocol from a Json stream. */ @@ -487,20 +514,43 @@ private static Protocol parse(JsonParser parser) { } private void parse(JsonNode json) { - parseNamespace(json); - parseName(json); + parseNameAndNamespace(json); parseTypes(json); parseMessages(json); parseDoc(json); parseProps(json); + + context.commit(); + context.resolveAllSchemas(); + resolveMessageSchemata(); + } + + private void resolveMessageSchemata() { + for (Map.Entry entry : messages.entrySet()) { + Message oldValue = entry.getValue(); + Message newValue; + if (oldValue.isOneWay()) { + newValue = createMessage(oldValue.getName(), oldValue.getDoc(), oldValue, + context.resolve(oldValue.getRequest())); + } else { + Schema request = context.resolve(oldValue.getRequest()); + Schema response = context.resolve(oldValue.getResponse()); + Schema errors = context.resolve(oldValue.getErrors()); + newValue = createMessage(oldValue.getName(), oldValue.getDoc(), oldValue, request, response, errors); + } + entry.setValue(newValue); + } } - private void parseNamespace(JsonNode json) { - JsonNode nameNode = json.get("namespace"); - if (nameNode == null) - return; // no namespace defined - this.namespace = nameNode.textValue(); - types.space(this.namespace); + private void parseNameAndNamespace(JsonNode json) { + JsonNode nameNode = json.get("protocol"); + if (nameNode == null) { + throw new SchemaParseException("No protocol name specified: " + json); + } + JsonNode namespaceNode = json.get("namespace"); + String namespace = namespaceNode == null ? null : namespaceNode.textValue(); + + setName(nameNode.textValue(), namespace); } private void parseDoc(JsonNode json) { @@ -514,23 +564,17 @@ private String parseDocNode(JsonNode json) { return nameNode.textValue(); } - private void parseName(JsonNode json) { - JsonNode nameNode = json.get("protocol"); - if (nameNode == null) - throw new SchemaParseException("No protocol name specified: " + json); - this.name = nameNode.textValue(); - } - private void parseTypes(JsonNode json) { JsonNode defs = json.get("types"); if (defs == null) return; // no types defined if (!defs.isArray()) throw new SchemaParseException("Types not an array: " + defs); + for (JsonNode type : defs) { if (!type.isObject()) throw new SchemaParseException("Type not an object: " + type); - Schema.parse(type, types); + Schema.parse(type, context, namespace); } } @@ -578,8 +622,8 @@ private Message parseMessage(String messageName, JsonNode json) { JsonNode fieldDocNode = field.get("doc"); if (fieldDocNode != null) fieldDoc = fieldDocNode.textValue(); - Field newField = new Field(name, Schema.parse(fieldTypeNode, types), fieldDoc, field.get("default"), true, - Order.ASCENDING); + Field newField = new Field(name, Schema.parse(fieldTypeNode, context, namespace), fieldDoc, field.get("default"), + true, Order.ASCENDING); Set aliases = Schema.parseAliases(field); if (aliases != null) { // add aliases for (String alias : aliases) @@ -594,7 +638,7 @@ private Message parseMessage(String messageName, JsonNode json) { } fields.add(newField); } - Schema request = Schema.createRecord(fields); + Schema request = Schema.createRecord(null, null, null, false, fields); boolean oneWay = false; JsonNode oneWayNode = json.get("one-way"); @@ -613,12 +657,12 @@ private Message parseMessage(String messageName, JsonNode json) { if (oneWay) { if (decls != null) throw new SchemaParseException("one-way can't have errors: " + json); - if (responseNode != null && Schema.parse(responseNode, types).getType() != Schema.Type.NULL) + if (responseNode != null && Schema.parse(responseNode, context, namespace).getType() != Schema.Type.NULL) throw new SchemaParseException("One way response must be null: " + json); return new Message(messageName, doc, mProps, request); } - Schema response = Schema.parse(responseNode, types); + Schema response = Schema.parse(responseNode, context, namespace); List errs = new ArrayList<>(); errs.add(SYSTEM_ERROR); // every method can throw @@ -627,7 +671,7 @@ private Message parseMessage(String messageName, JsonNode json) { throw new SchemaParseException("Errors not an array: " + json); for (JsonNode decl : decls) { String name = decl.textValue(); - Schema schema = this.types.get(name); + Schema schema = this.context.find(name, namespace); if (schema == null) throw new SchemaParseException("Undefined error: " + name); if (!schema.isError()) @@ -642,5 +686,4 @@ private Message parseMessage(String messageName, JsonNode json) { public static void main(String[] args) throws Exception { System.out.println(Protocol.parse(new File(args[0]))); } - } diff --git a/lang/java/avro/src/main/java/org/apache/avro/Resolver.java b/lang/java/avro/src/main/java/org/apache/avro/Resolver.java index 182a91bdff6..117c9e3911f 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/Resolver.java +++ b/lang/java/avro/src/main/java/org/apache/avro/Resolver.java @@ -90,7 +90,7 @@ private static Action resolve(Schema w, Schema r, GenericData d, Map wsymbols = w.getEnumSymbols(); diff --git a/lang/java/avro/src/main/java/org/apache/avro/Schema.java b/lang/java/avro/src/main/java/org/apache/avro/Schema.java index 76ee2ee3be5..2f0711ed401 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/Schema.java +++ b/lang/java/avro/src/main/java/org/apache/avro/Schema.java @@ -25,12 +25,21 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.node.DoubleNode; import com.fasterxml.jackson.databind.node.NullNode; +import org.apache.avro.path.TracingAvroTypeException; +import org.apache.avro.util.internal.Accessor; +import org.apache.avro.util.internal.Accessor.FieldAccessor; +import org.apache.avro.util.internal.JacksonUtils; +import org.apache.avro.util.internal.ThreadLocalWithInitial; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import java.io.ByteArrayOutputStream; import java.io.File; import java.io.IOException; import java.io.InputStream; import java.io.Serializable; import java.io.StringWriter; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; @@ -38,7 +47,6 @@ import java.util.HashMap; import java.util.HashSet; import java.util.IdentityHashMap; -import java.util.Iterator; import java.util.LinkedHashMap; import java.util.LinkedHashSet; import java.util.List; @@ -46,12 +54,7 @@ import java.util.Map; import java.util.Objects; import java.util.Set; -import org.apache.avro.util.internal.Accessor; -import org.apache.avro.util.internal.Accessor.FieldAccessor; -import org.apache.avro.util.internal.JacksonUtils; -import org.apache.avro.util.internal.ThreadLocalWithInitial; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; +import java.util.stream.Collectors; import static org.apache.avro.LogicalType.LOGICAL_TYPE_PROP; @@ -76,9 +79,9 @@ *
  • null. * * - * A schema can be constructed using one of its static createXXX - * methods, or more conveniently using {@link SchemaBuilder}. The schema objects - * are logically immutable. There are only two mutating methods - + * Construct a schema using one of its static createXXX methods, or + * more conveniently using {@link SchemaBuilder}. The schema objects are + * logically immutable. There are only two mutating methods - * {@link #setFields(List)} and {@link #addProp(String, String)}. The following * restrictions apply on these two methods. *
      @@ -89,6 +92,7 @@ * property. *
    */ +@SuppressWarnings("unused") public abstract class Schema extends JsonProperties implements Serializable { private static final long serialVersionUID = 1L; @@ -121,20 +125,20 @@ private Object readResolve() { FACTORY.setCodec(MAPPER); } - /** The type of a schema. */ + /** The type of schema. */ public enum Type { RECORD, ENUM, ARRAY, MAP, UNION, FIXED, STRING, BYTES, INT, LONG, FLOAT, DOUBLE, BOOLEAN, NULL; private final String name; - private Type() { + Type() { this.name = this.name().toLowerCase(Locale.ENGLISH); } public String getName() { return name; } - }; + } private final Type type; private LogicalType logicalType = null; @@ -202,9 +206,9 @@ void setLogicalType(LogicalType logicalType) { * Create an anonymous record schema. * * @deprecated This method allows to create Schema objects that cannot be parsed - * by {@link Schema.Parser#parse(String)}. It will be removed in a - * future version of Avro. Better use - * i{@link #createRecord(String, String, String, boolean, List)} to + * by {@link SchemaParser#parse(CharSequence)}. It will be removed + * in a future version of Avro. Better use + * {@link #createRecord(String, String, String, boolean, List)} to * produce a fully qualified Schema. */ @Deprecated @@ -269,7 +273,7 @@ public Type getType() { * fieldName. If there is no field by that name, a null is * returned. */ - public Field getField(String fieldname) { + public Field getField(String fieldName) { throw new AvroRuntimeException("Not a record: " + this); } @@ -281,6 +285,13 @@ public List getFields() { throw new AvroRuntimeException("Not a record: " + this); } + /** + * If this is a record, returns whether the fields have been set. + */ + public boolean hasFields() { + throw new AvroRuntimeException("Not a record: " + this); + } + /** * If this is a record, set its fields. The fields can be set only once in a * schema. @@ -383,7 +394,16 @@ public int getFixedSize() { throw new AvroRuntimeException("Not fixed: " + this); } - /** Render this as JSON. */ + /** + *

    + * Render this as JSON. + *

    + * + *

    + * This method is equivalent to: + * {@code SchemaFormatter.getInstance("json").format(this)} + *

    + */ @Override public String toString() { return toString(false); @@ -393,9 +413,12 @@ public String toString() { * Render this as JSON. * * @param pretty if true, pretty-print JSON. + * @deprecated Use {@link SchemaFormatter#format(Schema)} instead, using the + * format {@code json/pretty} or {@code json/inline} */ + @Deprecated public String toString(boolean pretty) { - return toString(new Names(), pretty); + return toString(new HashSet(), pretty); } /** @@ -408,22 +431,23 @@ public String toString(boolean pretty) { // Use at your own risk. This method should be removed with AVRO-2832. @Deprecated public String toString(Collection referencedSchemas, boolean pretty) { - Schema.Names names = new Schema.Names(); + Set knownNames = new HashSet<>(); if (referencedSchemas != null) { for (Schema s : referencedSchemas) { - names.add(s); + knownNames.add(s.getFullName()); } } - return toString(names, pretty); + return toString(knownNames, pretty); } - String toString(Names names, boolean pretty) { + @Deprecated + String toString(Set knownNames, boolean pretty) { try { StringWriter writer = new StringWriter(); JsonGenerator gen = FACTORY.createGenerator(writer); if (pretty) gen.useDefaultPrettyPrinter(); - toJson(names, gen); + toJson(knownNames, null, gen); gen.flush(); return writer.toString(); } catch (IOException e) { @@ -431,7 +455,8 @@ String toString(Names names, boolean pretty) { } } - void toJson(Names names, JsonGenerator gen) throws IOException { + @Deprecated + void toJson(Set knownNames, String namespace, JsonGenerator gen) throws IOException { if (!hasProps()) { // no props defined gen.writeString(getName()); // just write name } else { @@ -442,7 +467,8 @@ void toJson(Names names, JsonGenerator gen) throws IOException { } } - void fieldsToJson(Names names, JsonGenerator gen) throws IOException { + @Deprecated + void fieldsToJson(Set knownNames, String namespace, JsonGenerator gen) throws IOException { throw new AvroRuntimeException("Not a record: " + this); } @@ -476,12 +502,12 @@ final boolean equalCachedHash(Schema other) { private static final Set FIELD_RESERVED = Collections .unmodifiableSet(new HashSet<>(Arrays.asList("default", "doc", "name", "order", "type", "aliases"))); - /** Returns true if this record is an union type. */ + /** Returns true if this record is a union type. */ public boolean isUnion() { return this instanceof UnionSchema; } - /** Returns true if this record is an union type containing null. */ + /** Returns true if this record is a union type containing null. */ public boolean isNullable() { if (!isUnion()) { return getType().equals(Schema.Type.NULL); @@ -525,10 +551,10 @@ public enum Order { private final String name; - private Order() { + Order() { this.name = this.name().toLowerCase(Locale.ENGLISH); } - }; + } /** * For Schema unions with a "null" type as the first entry, this can be used to @@ -547,7 +573,7 @@ private Order() { Field(String name, Schema schema, String doc, JsonNode defaultValue, boolean validateDefault, Order order) { super(FIELD_RESERVED); this.name = validateName(name); - this.schema = schema; + this.schema = Objects.requireNonNull(schema, "schema is required and cannot be null"); this.doc = doc; this.defaultValue = validateDefault ? validateDefault(name, schema, defaultValue) : defaultValue; this.order = Objects.requireNonNull(order, "Order cannot be null"); @@ -570,14 +596,14 @@ public Field(Field field, Schema schema) { * */ public Field(String name, Schema schema) { - this(name, schema, (String) null, (JsonNode) null, true, Order.ASCENDING); + this(name, schema, null, null, true, Order.ASCENDING); } /** * */ public Field(String name, Schema schema, String doc) { - this(name, schema, doc, (JsonNode) null, true, Order.ASCENDING); + this(name, schema, doc, null, true, Order.ASCENDING); } /** @@ -602,7 +628,7 @@ public Field(String name, Schema schema, String doc, Object defaultValue, Order public String name() { return name; - }; + } /** The position of this field within the record. */ public int pos() { @@ -704,7 +730,7 @@ public Name(String name, String space) { this.name = validateName(name); } else { // qualified name space = name.substring(0, lastDot); // get space from name - this.name = validateName(name.substring(lastDot + 1, name.length())); + this.name = validateName(name.substring(lastDot + 1)); } if ("".equals(space)) space = null; @@ -732,20 +758,44 @@ public String toString() { return full; } - public void writeName(Names names, JsonGenerator gen) throws IOException { + public void writeName(String currentNamespace, JsonGenerator gen) throws IOException { if (name != null) gen.writeStringField("name", name); if (space != null) { - if (!space.equals(names.space())) + if (!space.equals(currentNamespace)) gen.writeStringField("namespace", space); - } else if (names.space() != null) { // null within non-null + } else if (currentNamespace != null) { // null within non-null gen.writeStringField("namespace", ""); } } public String getQualified(String defaultSpace) { - return (space == null || space.equals(defaultSpace)) ? name : full; + return this.shouldWriteFull(defaultSpace) ? full : name; + } + + /** + * Determine if full name must be written. There are 2 cases for true : + * {@code defaultSpace} != from {@code this.space} or name is already a + * {@code Schema.Type} (int, array, ...) + * + * @param defaultSpace : default name space. + * @return true if full name must be written. + */ + private boolean shouldWriteFull(String defaultSpace) { + if (space != null && space.equals(defaultSpace)) { + for (Type schemaType : Type.values()) { + if (schemaType.name.equals(name)) { + // name is a 'Type', so namespace must be written + return true; + } + } + // this.space == defaultSpace + return false; + } + // this.space != defaultSpace, so namespace must be written. + return true; } + } private static abstract class NamedSchema extends Schema { @@ -801,22 +851,25 @@ public Set getAliases() { Set result = new LinkedHashSet<>(); if (aliases != null) for (Name alias : aliases) - result.add(alias.full); + if (alias.space == null && name.space != null) + result.add("." + alias.name); + else + result.add(alias.full); return result; } - public boolean writeNameRef(Names names, JsonGenerator gen) throws IOException { - if (this.equals(names.get(name))) { - gen.writeString(name.getQualified(names.space())); - return true; - } else if (name.name != null) { - names.put(name, this); + public boolean writeNameRef(Set knownNames, String currentNamespace, JsonGenerator gen) throws IOException { + if (name.name != null) { + if (!knownNames.add(name.full)) { + gen.writeString(name.getQualified(currentNamespace)); + return true; + } } return false; } - public void writeName(Names names, JsonGenerator gen) throws IOException { - name.writeName(names, gen); + public void writeName(String currentNamespace, JsonGenerator gen) throws IOException { + name.writeName(currentNamespace, gen); } public boolean equalNames(NamedSchema that) { @@ -829,7 +882,7 @@ int computeHash() { } public void aliasesToJson(JsonGenerator gen) throws IOException { - if (aliases == null || aliases.size() == 0) + if (aliases == null || aliases.isEmpty()) return; gen.writeFieldName("aliases"); gen.writeStartArray(); @@ -845,8 +898,8 @@ public void aliasesToJson(JsonGenerator gen) throws IOException { * and need to watch for recursion. */ public static class SeenPair { - private Object s1; - private Object s2; + private final Object s1; + private final Object s2; public SeenPair(Object s1, Object s2) { this.s1 = s1; @@ -865,10 +918,9 @@ public int hashCode() { } } - private static final ThreadLocal SEEN_EQUALS = ThreadLocalWithInitial.of(HashSet::new); - private static final ThreadLocal SEEN_HASHCODE = ThreadLocalWithInitial.of(IdentityHashMap::new); + private static final ThreadLocal> SEEN_EQUALS = ThreadLocalWithInitial.of(HashSet::new); + private static final ThreadLocal> SEEN_HASHCODE = ThreadLocalWithInitial.of(IdentityHashMap::new); - @SuppressWarnings(value = "unchecked") private static class RecordSchema extends NamedSchema { private List fields; private Map fieldMap; @@ -891,10 +943,10 @@ public boolean isError() { } @Override - public Field getField(String fieldname) { + public Field getField(String fieldName) { if (fieldMap == null) throw new AvroRuntimeException("Schema fields not set yet"); - return fieldMap.get(fieldname); + return fieldMap.get(fieldName); } @Override @@ -904,6 +956,11 @@ public List getFields() { return fields; } + @Override + public boolean hasFields() { + return fields != null; + } + @Override public void setFields(List fields) { if (this.fields != null) { @@ -941,7 +998,7 @@ public boolean equals(Object o) { return false; if (!propsEqual(that)) return false; - Set seen = SEEN_EQUALS.get(); + Set seen = SEEN_EQUALS.get(); SeenPair here = new SeenPair(this, o); if (seen.contains(here)) return true; // prevent stack overflow @@ -957,7 +1014,7 @@ public boolean equals(Object o) { @Override int computeHash() { - Map seen = SEEN_HASHCODE.get(); + Map seen = SEEN_HASHCODE.get(); if (seen.containsKey(this)) return 0; // prevent stack overflow boolean first = seen.isEmpty(); @@ -971,36 +1028,36 @@ int computeHash() { } @Override - void toJson(Names names, JsonGenerator gen) throws IOException { - if (writeNameRef(names, gen)) + @Deprecated + void toJson(Set knownNames, String currentNamespace, JsonGenerator gen) throws IOException { + if (writeNameRef(knownNames, currentNamespace, gen)) return; - String savedSpace = names.space; // save namespace gen.writeStartObject(); gen.writeStringField("type", isError ? "error" : "record"); - writeName(names, gen); - names.space = name.space; // set default namespace - if (getDoc() != null) - gen.writeStringField("doc", getDoc()); + writeName(currentNamespace, gen); + if (this.getDoc() != null) { + gen.writeStringField("doc", this.getDoc()); + } if (fields != null) { gen.writeFieldName("fields"); - fieldsToJson(names, gen); + fieldsToJson(knownNames, name.space, gen); } writeProps(gen); aliasesToJson(gen); gen.writeEndObject(); - names.space = savedSpace; // restore namespace } @Override - void fieldsToJson(Names names, JsonGenerator gen) throws IOException { + @Deprecated + void fieldsToJson(Set knownNames, String namespace, JsonGenerator gen) throws IOException { gen.writeStartArray(); for (Field f : fields) { gen.writeStartObject(); gen.writeStringField("name", f.name()); gen.writeFieldName("type"); - f.schema().toJson(names, gen); + f.schema().toJson(knownNames, namespace, gen); if (f.doc() != null) gen.writeStringField("doc", f.doc()); if (f.hasDefaultValue()) { @@ -1009,7 +1066,7 @@ void fieldsToJson(Names names, JsonGenerator gen) throws IOException { } if (f.order() != Field.Order.ASCENDING) gen.writeStringField("order", f.order().name); - if (f.aliases != null && f.aliases.size() != 0) { + if (f.aliases != null && !f.aliases.isEmpty()) { gen.writeFieldName("aliases"); gen.writeStartArray(); for (String alias : f.aliases) @@ -1057,7 +1114,12 @@ public boolean hasEnumSymbol(String symbol) { @Override public int getEnumOrdinal(String symbol) { - return ordinals.get(symbol); + Integer ordinal = ordinals.get(symbol); + if (ordinal == null) { + throw new TracingAvroTypeException( + new AvroTypeException("enum value '" + symbol + "' is not in the enum symbol set: " + symbols)); + } + return ordinal; } @Override @@ -1081,12 +1143,13 @@ int computeHash() { } @Override - void toJson(Names names, JsonGenerator gen) throws IOException { - if (writeNameRef(names, gen)) + @Deprecated + void toJson(Set knownNames, String currentNamespace, JsonGenerator gen) throws IOException { + if (writeNameRef(knownNames, currentNamespace, gen)) return; gen.writeStartObject(); gen.writeStringField("type", "enum"); - writeName(names, gen); + writeName(currentNamespace, gen); if (getDoc() != null) gen.writeStringField("doc", getDoc()); gen.writeArrayFieldStart("symbols"); @@ -1130,11 +1193,12 @@ int computeHash() { } @Override - void toJson(Names names, JsonGenerator gen) throws IOException { + @Deprecated + void toJson(Set knownNames, String namespace, JsonGenerator gen) throws IOException { gen.writeStartObject(); gen.writeStringField("type", "array"); gen.writeFieldName("items"); - elementType.toJson(names, gen); + elementType.toJson(knownNames, namespace, gen); writeProps(gen); gen.writeEndObject(); } @@ -1169,11 +1233,12 @@ int computeHash() { } @Override - void toJson(Names names, JsonGenerator gen) throws IOException { + @Deprecated + void toJson(Set knownNames, String currentNamespace, JsonGenerator gen) throws IOException { gen.writeStartObject(); gen.writeStringField("type", "map"); gen.writeFieldName("values"); - valueType.toJson(names, gen); + valueType.toJson(knownNames, currentNamespace, gen); writeProps(gen); gen.writeEndObject(); } @@ -1202,6 +1267,16 @@ public UnionSchema(LockableArrayList types) { } } + /** + * Checks if a JSON value matches the schema. + * + * @param jsonValue a value to check against the schema + * @return true if the value is valid according to this schema + */ + public boolean isValidDefault(JsonNode jsonValue) { + return this.types.stream().anyMatch((Schema s) -> s.isValidDefault(jsonValue)); + } + @Override public List getTypes() { return types; @@ -1236,12 +1311,19 @@ public void addProp(String name, String value) { } @Override - void toJson(Names names, JsonGenerator gen) throws IOException { + @Deprecated + void toJson(Set knownNames, String currentNamespace, JsonGenerator gen) throws IOException { gen.writeStartArray(); for (Schema type : types) - type.toJson(names, gen); + type.toJson(knownNames, currentNamespace, gen); gen.writeEndArray(); } + + @Override + public String getName() { + return super.getName() + + this.getTypes().stream().map(Schema::getName).collect(Collectors.joining(", ", "[", "]")); + } } private static class FixedSchema extends NamedSchema { @@ -1249,8 +1331,7 @@ private static class FixedSchema extends NamedSchema { public FixedSchema(Name name, String doc, int size) { super(Type.FIXED, name, doc); - if (size < 0) - throw new IllegalArgumentException("Invalid fixed size: " + size); + SystemLimitException.checkMaxBytesLength(size); this.size = size; } @@ -1275,12 +1356,13 @@ int computeHash() { } @Override - void toJson(Names names, JsonGenerator gen) throws IOException { - if (writeNameRef(names, gen)) + @Deprecated + void toJson(Set knownNames, String currentNamespace, JsonGenerator gen) throws IOException { + if (writeNameRef(knownNames, currentNamespace, gen)) return; gen.writeStartObject(); gen.writeStringField("type", "fixed"); - writeName(names, gen); + writeName(currentNamespace, gen); if (getDoc() != null) gen.writeStringField("doc", getDoc()); gen.writeNumberField("size", size); @@ -1344,37 +1426,48 @@ public NullSchema() { * may refer to it by name. */ public static class Parser { - private Names names = new Names(); - private boolean validate = true; + final ParseContext context; + private final NameValidator validate; private boolean validateDefaults = true; + public Parser() { + this(NameValidator.UTF_VALIDATOR); + } + + public Parser(final NameValidator validate) { + this.validate = validate != null ? validate : NameValidator.NO_VALIDATION; + context = new ParseContext(this.validate); + } + + public Parser(final ParseContext context) { + this.validate = context.nameValidator; + this.context = context; + } + /** * Adds the provided types to the set of defined, named types known to this * parser. + * + * @deprecated use addTypes(Iterable types) */ + @Deprecated public Parser addTypes(Map types) { - for (Schema s : types.values()) - names.add(s); - return this; - } - - /** Returns the set of defined, named types known to this parser. */ - public Map getTypes() { - Map result = new LinkedHashMap<>(); - for (Schema s : names.values()) - result.put(s.getFullName(), s); - return result; + return this.addTypes(types.values()); } - /** Enable or disable name validation. */ - public Parser setValidate(boolean validate) { - this.validate = validate; + /** + * Adds the provided types to the set of defined, named types known to this + * parser. + */ + public Parser addTypes(Iterable types) { + for (Schema s : types) + context.put(s); return this; } - /** True iff names are validated. True by default. */ - public boolean getValidate() { - return this.validate; + /** Returns the set of defined, named types known to this parser. */ + public Map getTypes() { + return context.typesByName(); } /** Enable or disable default value validation. */ @@ -1393,7 +1486,7 @@ public boolean getValidateDefaults() { * names known to this parser. */ public Schema parse(File file) throws IOException { - return parse(FACTORY.createParser(file)); + return parse(FACTORY.createParser(file), false, true); } /** @@ -1401,7 +1494,8 @@ public Schema parse(File file) throws IOException { * names known to this parser. The input stream stays open after the parsing. */ public Schema parse(InputStream in) throws IOException { - return parse(FACTORY.createParser(in).disable(JsonParser.Feature.AUTO_CLOSE_SOURCE)); + JsonParser parser = FACTORY.createParser(in).disable(JsonParser.Feature.AUTO_CLOSE_SOURCE); + return parse(parser, true, true); } /** Read a schema from one or more json strings */ @@ -1418,24 +1512,55 @@ public Schema parse(String s, String... more) { */ public Schema parse(String s) { try { - return parse(FACTORY.createParser(s)); + return parse(FACTORY.createParser(s), false, true); + } catch (IOException e) { + throw new SchemaParseException(e); + } + } + + public Schema parseInternal(String s) { + try { + return parse(FACTORY.createParser(s), false, false); } catch (IOException e) { throw new SchemaParseException(e); } } - private Schema parse(JsonParser parser) throws IOException { - boolean saved = validateNames.get(); + private Schema parse(JsonParser parser, boolean allowDanglingContent, boolean resolveSchema) throws IOException { + NameValidator saved = VALIDATE_NAMES.get(); boolean savedValidateDefaults = VALIDATE_DEFAULTS.get(); try { - validateNames.set(validate); + // This ensured we're using the same validation as the ParseContext. + // This is most relevant for field names. + VALIDATE_NAMES.set(validate); VALIDATE_DEFAULTS.set(validateDefaults); - return Schema.parse(MAPPER.readTree(parser), names); + JsonNode jsonNode = MAPPER.readTree(parser); + Schema schema = Schema.parse(jsonNode, context, null); + if (resolveSchema) { + context.commit(); + schema = context.resolve(schema); + } + if (!allowDanglingContent) { + String dangling; + StringWriter danglingWriter = new StringWriter(); + int numCharsReleased = parser.releaseBuffered(danglingWriter); + if (numCharsReleased == -1) { + ByteArrayOutputStream danglingOutputStream = new ByteArrayOutputStream(); + parser.releaseBuffered(danglingOutputStream); // if input isn't chars above it must be bytes + dangling = new String(danglingOutputStream.toByteArray(), StandardCharsets.UTF_8).trim(); + } else { + dangling = danglingWriter.toString().trim(); + } + if (!dangling.isEmpty()) { + throw new SchemaParseException("dangling content after end of schema: " + dangling); + } + } + return schema; } catch (JsonParseException e) { throw new SchemaParseException(e); } finally { parser.close(); - validateNames.set(saved); + VALIDATE_NAMES.set(saved); VALIDATE_DEFAULTS.set(savedValidateDefaults); } } @@ -1447,9 +1572,9 @@ private Schema parse(JsonParser parser) throws IOException { * * @param file The file to read the schema from. * @return The freshly built Schema. - * @throws IOException if there was trouble reading the contents or they are + * @throws IOException if there was trouble reading the contents, or they are * invalid - * @deprecated use {@link Schema.Parser} instead. + * @deprecated use {@link SchemaParser} instead. */ @Deprecated public static Schema parse(File file) throws IOException { @@ -1462,9 +1587,9 @@ public static Schema parse(File file) throws IOException { * * @param in The input stream to read the schema from. * @return The freshly built Schema. - * @throws IOException if there was trouble reading the contents or they are + * @throws IOException if there was trouble reading the contents, or they are * invalid - * @deprecated use {@link Schema.Parser} instead. + * @deprecated use {@link SchemaParser} instead. */ @Deprecated public static Schema parse(InputStream in) throws IOException { @@ -1474,7 +1599,7 @@ public static Schema parse(InputStream in) throws IOException { /** * Construct a schema from JSON text. * - * @deprecated use {@link Schema.Parser} instead. + * @deprecated use {@link SchemaParser} instead. */ @Deprecated public static Schema parse(String jsonSchema) { @@ -1485,11 +1610,12 @@ public static Schema parse(String jsonSchema) { * Construct a schema from JSON text. * * @param validate true if names should be validated, false if not. - * @deprecated use {@link Schema.Parser} instead. + * @deprecated use {@link SchemaParser} instead. */ @Deprecated public static Schema parse(String jsonSchema, boolean validate) { - return new Parser().setValidate(validate).parse(jsonSchema); + final NameValidator validator = validate ? NameValidator.UTF_VALIDATOR : NameValidator.NO_VALIDATION; + return new Parser(validator).parse(jsonSchema); } static final Map PRIMITIVES = new HashMap<>(); @@ -1546,43 +1672,81 @@ public void add(Schema schema) { @Override public Schema put(Name name, Schema schema) { - if (containsKey(name)) - throw new SchemaParseException("Can't redefine: " + name); + if (containsKey(name)) { + final Schema other = super.get(name); + if (!Objects.equals(other, schema)) { + throw new SchemaParseException("Can't redefine: " + name); + } else { + return schema; + } + } return super.put(name, schema); } } - private static ThreadLocal validateNames = ThreadLocalWithInitial.of(() -> true); + private static final ThreadLocal VALIDATE_NAMES = ThreadLocalWithInitial + .of(() -> NameValidator.UTF_VALIDATOR); private static String validateName(String name) { - if (!validateNames.get()) - return name; // not validating names - if (name == null) - throw new SchemaParseException("Null name"); - int length = name.length(); - if (length == 0) - throw new SchemaParseException("Empty name"); - char first = name.charAt(0); - if (!(Character.isLetter(first) || first == '_')) - throw new SchemaParseException("Illegal initial character: " + name); - for (int i = 1; i < length; i++) { - char c = name.charAt(i); - if (!(Character.isLetterOrDigit(c) || c == '_')) - throw new SchemaParseException("Illegal character in: " + name); + NameValidator.Result result = VALIDATE_NAMES.get().validate(name); + if (!result.isOK()) { + throw new SchemaParseException(result.getErrors()); } return name; } + /* + * @deprecated Scheduled for removal. Do Not Use! + */ + @Deprecated + public static void setNameValidator(final NameValidator validator) { + Schema.VALIDATE_NAMES.set(validator); + } + + /* + * @deprecated Scheduled for removal. Do Not Use! + */ + @Deprecated + public static NameValidator getNameValidator() { + return Schema.VALIDATE_NAMES.get(); + } + private static final ThreadLocal VALIDATE_DEFAULTS = ThreadLocalWithInitial.of(() -> true); private static JsonNode validateDefault(String fieldName, Schema schema, JsonNode defaultValue) { - if (VALIDATE_DEFAULTS.get() && (defaultValue != null) && !isValidDefault(schema, defaultValue)) { // invalid default + if (VALIDATE_DEFAULTS.get() && (defaultValue != null) && !schema.isValidDefault(defaultValue)) { // invalid default String message = "Invalid default for field " + fieldName + ": " + defaultValue + " not a " + schema; throw new AvroTypeException(message); // throw exception } return defaultValue; } + /* + * @deprecated Scheduled for removal. Do Not Use! + */ + @Deprecated + public static void setValidateDefaults(boolean validateDefaults) { + Schema.VALIDATE_DEFAULTS.set(validateDefaults); + } + + /* + * @deprecated Scheduled for removal. Do Not Use! + */ + @Deprecated + public static boolean getValidateDefaults() { + return Schema.VALIDATE_DEFAULTS.get(); + } + + /** + * Checks if a JSON value matches the schema. + * + * @param jsonValue a value to check against the schema + * @return true if the value is valid according to this schema + */ + public boolean isValidDefault(JsonNode jsonValue) { + return isValidDefault(this, jsonValue); + } + private static boolean isValidDefault(Schema schema, JsonNode defaultValue) { if (defaultValue == null) return false; @@ -1617,13 +1781,13 @@ private static boolean isValidDefault(Schema schema, JsonNode defaultValue) { if (!isValidDefault(schema.getValueType(), value)) return false; return true; - case UNION: // union default: first branch - return isValidDefault(schema.getTypes().get(0), defaultValue); + case UNION: // union default: any branch + return schema.getTypes().stream().anyMatch((Schema s) -> isValidValue(s, defaultValue)); case RECORD: if (!defaultValue.isObject()) return false; for (Field field : schema.getFields()) - if (!isValidDefault(field.schema(), + if (!isValidValue(field.schema(), defaultValue.has(field.name()) ? defaultValue.get(field.name()) : field.defaultValue())) return false; return true; @@ -1632,143 +1796,213 @@ private static boolean isValidDefault(Schema schema, JsonNode defaultValue) { } } + /** + * Validate a value against the schema. + * + * @param schema : schema for value. + * @param value : value to validate. + * @return true if ok. + */ + private static boolean isValidValue(Schema schema, JsonNode value) { + if (value == null) + return false; + if (schema.isUnion()) { + // For Union, only need that one sub schema is ok. + for (Schema sub : schema.getTypes()) { + if (Schema.isValidDefault(sub, value)) { + return true; + } + } + return false; + } else { + // for other types, same as validate default. + return Schema.isValidDefault(schema, value); + } + } + /** @see #parse(String) */ - static Schema parse(JsonNode schema, Names names) { + static Schema parse(JsonNode schema, ParseContext context, String currentNameSpace) { if (schema == null) { throw new SchemaParseException("Cannot parse schema"); - } - if (schema.isTextual()) { // name - Schema result = names.get(schema.textValue()); - if (result == null) - throw new SchemaParseException("Undefined name: " + schema); - return result; + } else if (schema.isTextual()) { // name + return context.find(schema.textValue(), currentNameSpace); } else if (schema.isObject()) { - Schema result; String type = getRequiredText(schema, "type", "No type"); - Name name = null; - String savedSpace = names.space(); - String doc = null; - if (type.equals("record") || type.equals("error") || type.equals("enum") || type.equals("fixed")) { - String space = getOptionalText(schema, "namespace"); - doc = getOptionalText(schema, "doc"); - if (space == null) - space = names.space(); - name = new Name(getRequiredText(schema, "name", "No name in schema"), space); - names.space(name.space); // set default namespace - } + final boolean isTypeError = "error".equals(type); if (PRIMITIVES.containsKey(type)) { // primitive - result = create(PRIMITIVES.get(type)); - } else if (type.equals("record") || type.equals("error")) { // record - List fields = new ArrayList<>(); - result = new RecordSchema(name, doc, type.equals("error")); - if (name != null) - names.add(result); - JsonNode fieldsNode = schema.get("fields"); - if (fieldsNode == null || !fieldsNode.isArray()) - throw new SchemaParseException("Record has no fields: " + schema); - for (JsonNode field : fieldsNode) { - String fieldName = getRequiredText(field, "name", "No field name"); - String fieldDoc = getOptionalText(field, "doc"); - JsonNode fieldTypeNode = field.get("type"); - if (fieldTypeNode == null) - throw new SchemaParseException("No field type: " + field); - if (fieldTypeNode.isTextual() && names.get(fieldTypeNode.textValue()) == null) - throw new SchemaParseException(fieldTypeNode + " is not a defined name." + " The type of the \"" + fieldName - + "\" field must be" + " a defined name or a {\"type\": ...} expression."); - Schema fieldSchema = parse(fieldTypeNode, names); - Field.Order order = Field.Order.ASCENDING; - JsonNode orderNode = field.get("order"); - if (orderNode != null) - order = Field.Order.valueOf(orderNode.textValue().toUpperCase(Locale.ENGLISH)); - JsonNode defaultValue = field.get("default"); - if (defaultValue != null - && (Type.FLOAT.equals(fieldSchema.getType()) || Type.DOUBLE.equals(fieldSchema.getType())) - && defaultValue.isTextual()) - defaultValue = new DoubleNode(Double.valueOf(defaultValue.textValue())); - Field f = new Field(fieldName, fieldSchema, fieldDoc, defaultValue, true, order); - Iterator i = field.fieldNames(); - while (i.hasNext()) { // add field props - String prop = i.next(); - if (!FIELD_RESERVED.contains(prop)) - f.addProp(prop, field.get(prop)); - } - f.aliases = parseAliases(field); - fields.add(f); - if (fieldSchema.getLogicalType() == null && getOptionalText(field, LOGICAL_TYPE_PROP) != null) - LOG.warn( - "Ignored the {}.{}.logicalType property (\"{}\"). It should probably be nested inside the \"type\" for the field.", - name, fieldName, getOptionalText(field, "logicalType")); - } - result.setFields(fields); - } else if (type.equals("enum")) { // enum - JsonNode symbolsNode = schema.get("symbols"); - if (symbolsNode == null || !symbolsNode.isArray()) - throw new SchemaParseException("Enum has no symbols: " + schema); - LockableArrayList symbols = new LockableArrayList<>(symbolsNode.size()); - for (JsonNode n : symbolsNode) - symbols.add(n.textValue()); - JsonNode enumDefault = schema.get("default"); - String defaultSymbol = null; - if (enumDefault != null) - defaultSymbol = enumDefault.textValue(); - result = new EnumSchema(name, doc, symbols, defaultSymbol); - if (name != null) - names.add(result); + return parsePrimitive(schema, type); + } else if ("record".equals(type) || isTypeError) { // record + return parseRecord(schema, context, currentNameSpace, isTypeError); + } else if ("enum".equals(type)) { // enum + return parseEnum(schema, context, currentNameSpace); } else if (type.equals("array")) { // array - JsonNode itemsNode = schema.get("items"); - if (itemsNode == null) - throw new SchemaParseException("Array has no items type: " + schema); - result = new ArraySchema(parse(itemsNode, names)); + return parseArray(schema, context, currentNameSpace); } else if (type.equals("map")) { // map - JsonNode valuesNode = schema.get("values"); - if (valuesNode == null) - throw new SchemaParseException("Map has no values type: " + schema); - result = new MapSchema(parse(valuesNode, names)); - } else if (type.equals("fixed")) { // fixed - JsonNode sizeNode = schema.get("size"); - if (sizeNode == null || !sizeNode.isInt()) - throw new SchemaParseException("Invalid or no size: " + schema); - result = new FixedSchema(name, doc, sizeNode.intValue()); - if (name != null) - names.add(result); + return parseMap(schema, context, currentNameSpace); + } else if ("fixed".equals(type)) { // fixed + return parseFixed(schema, context, currentNameSpace); } else { // For unions with self reference - Name nameFromType = new Name(type, names.space); - if (names.containsKey(nameFromType)) { - return names.get(nameFromType); - } - throw new SchemaParseException("Type not supported: " + type); - } - Iterator i = schema.fieldNames(); - - Set reserved = SCHEMA_RESERVED; - if (type.equals("enum")) { - reserved = ENUM_RESERVED; - } - while (i.hasNext()) { // add properties - String prop = i.next(); - if (!reserved.contains(prop)) // ignore reserved - result.addProp(prop, schema.get(prop)); - } - // parse logical type if present - result.logicalType = LogicalTypes.fromSchemaIgnoreInvalid(result); - names.space(savedSpace); // restore space - if (result instanceof NamedSchema) { - Set aliases = parseAliases(schema); - if (aliases != null) // add aliases - for (String alias : aliases) - result.addAlias(alias); + return context.find(type, currentNameSpace); } - return result; } else if (schema.isArray()) { // union - LockableArrayList types = new LockableArrayList<>(schema.size()); - for (JsonNode typeNode : schema) - types.add(parse(typeNode, names)); - return new UnionSchema(types); + return parseUnion(schema, context, currentNameSpace); } else { throw new SchemaParseException("Schema not yet supported: " + schema); } } + private static Schema parsePrimitive(JsonNode schema, String type) { + Schema result = create(PRIMITIVES.get(type)); + parsePropertiesAndLogicalType(schema, result, SCHEMA_RESERVED); + return result; + } + + private static Schema parseRecord(JsonNode schema, ParseContext context, String currentNameSpace, + boolean isTypeError) { + Name name = parseName(schema, currentNameSpace); + String doc = parseDoc(schema); + Schema result = new RecordSchema(name, doc, isTypeError); + context.put(result); + + JsonNode fieldsNode = schema.get("fields"); + if (fieldsNode == null || !fieldsNode.isArray()) + throw new SchemaParseException("Record has no fields: " + schema); + List fields = new ArrayList<>(); + for (JsonNode field : fieldsNode) { + Field f = parseField(field, context, name.space); + fields.add(f); + if (f.schema().getLogicalType() == null && getOptionalText(field, LOGICAL_TYPE_PROP) != null) + LOG.warn( + "Ignored the {}.{}.logicalType property (\"{}\"). It should probably be nested inside the \"type\" for the field.", + name, f.name(), getOptionalText(field, "logicalType")); + } + result.setFields(fields); + parsePropertiesAndLogicalType(schema, result, SCHEMA_RESERVED); + parseAliases(schema, result); + return result; + } + + private static Field parseField(JsonNode field, ParseContext context, String namespace) { + String fieldName = getRequiredText(field, "name", "No field name"); + String fieldDoc = parseDoc(field); + JsonNode fieldTypeNode = field.get("type"); + if (fieldTypeNode == null) + throw new SchemaParseException("No field type: " + field); + Schema fieldSchema = parse(fieldTypeNode, context, namespace); + + Field.Order order = Field.Order.ASCENDING; + JsonNode orderNode = field.get("order"); + if (orderNode != null) + order = Field.Order.valueOf(orderNode.textValue().toUpperCase(Locale.ENGLISH)); + + JsonNode defaultValue = field.get("default"); + if (defaultValue != null && (Type.FLOAT.equals(fieldSchema.getType()) || Type.DOUBLE.equals(fieldSchema.getType())) + && defaultValue.isTextual()) + defaultValue = new DoubleNode(Double.parseDouble(defaultValue.textValue())); + + Field f = new Field(fieldName, fieldSchema, fieldDoc, defaultValue, true, order); + parseProperties(field, f, FIELD_RESERVED); + f.aliases = parseAliases(field); + return f; + } + + private static Schema parseEnum(JsonNode schema, ParseContext context, String currentNameSpace) { + Name name = parseName(schema, currentNameSpace); + String doc = parseDoc(schema); + + JsonNode symbolsNode = schema.get("symbols"); + if (symbolsNode == null || !symbolsNode.isArray()) { + throw new SchemaParseException("Enum has no symbols: " + schema); + } + LockableArrayList symbols = new LockableArrayList<>(symbolsNode.size()); + for (JsonNode n : symbolsNode) + symbols.add(n.textValue()); + JsonNode enumDefault = schema.get("default"); + String defaultSymbol = null; + if (enumDefault != null) { + defaultSymbol = enumDefault.textValue(); + } + + Schema result = new EnumSchema(name, doc, symbols, defaultSymbol); + context.put(result); + parsePropertiesAndLogicalType(schema, result, ENUM_RESERVED); + parseAliases(schema, result); + return result; + } + + private static Schema parseArray(JsonNode schema, ParseContext context, String currentNameSpace) { + Schema result; + JsonNode itemsNode = schema.get("items"); + if (itemsNode == null) + throw new SchemaParseException("Array has no items type: " + schema); + result = new ArraySchema(parse(itemsNode, context, currentNameSpace)); + parsePropertiesAndLogicalType(schema, result, SCHEMA_RESERVED); + return result; + } + + private static Schema parseMap(JsonNode schema, ParseContext context, String currentNameSpace) { + Schema result; + JsonNode valuesNode = schema.get("values"); + if (valuesNode == null) + throw new SchemaParseException("Map has no values type: " + schema); + result = new MapSchema(parse(valuesNode, context, currentNameSpace)); + parsePropertiesAndLogicalType(schema, result, SCHEMA_RESERVED); + return result; + } + + private static Schema parseFixed(JsonNode schema, ParseContext context, String currentNameSpace) { + Name name = parseName(schema, currentNameSpace); + String doc = parseDoc(schema); + + JsonNode sizeNode = schema.get("size"); + if (sizeNode == null || !sizeNode.isInt()) + throw new SchemaParseException("Invalid or no size: " + schema); + + Schema result = new FixedSchema(name, doc, sizeNode.intValue()); + context.put(result); + parsePropertiesAndLogicalType(schema, result, SCHEMA_RESERVED); + parseAliases(schema, result); + return result; + } + + private static UnionSchema parseUnion(JsonNode schema, ParseContext context, String currentNameSpace) { + LockableArrayList types = new LockableArrayList<>(schema.size()); + for (JsonNode typeNode : schema) + types.add(parse(typeNode, context, currentNameSpace)); + return new UnionSchema(types); + } + + private static void parsePropertiesAndLogicalType(JsonNode jsonNode, Schema result, Set propertiesToSkip) { + parseProperties(jsonNode, result, propertiesToSkip); + // parse logical type if present + result.logicalType = LogicalTypes.fromSchemaIgnoreInvalid(result); + } + + private static void parseProperties(JsonNode schema, JsonProperties result, Set propertiesToSkip) { + schema.fieldNames().forEachRemaining(prop -> { // add properties + if (!propertiesToSkip.contains(prop)) // ignore reserved + result.addProp(prop, schema.get(prop)); + }); + } + + private static Name parseName(JsonNode schema, String currentNameSpace) { + String space = getOptionalText(schema, "namespace"); + if (space == null) + space = currentNameSpace; + return new Name(getRequiredText(schema, "name", "No name in schema"), space); + } + + private static String parseDoc(JsonNode schema) { + return getOptionalText(schema, "doc"); + } + + private static void parseAliases(JsonNode schema, Schema result) { + Set aliases = parseAliases(schema); + if (aliases != null) // add aliases + for (String alias : aliases) + result.addAlias(alias); + } + static Set parseAliases(JsonNode node) { JsonNode aliasesNode = node.get("aliases"); if (aliasesNode == null) @@ -1838,13 +2072,14 @@ public static Schema applyAliases(Schema writer, Schema reader) { Map> fieldAliases = new HashMap<>(1); getAliases(reader, seen, aliases, fieldAliases); - if (aliases.size() == 0 && fieldAliases.size() == 0) + if (aliases.isEmpty() && fieldAliases.isEmpty()) return writer; // no aliases seen.clear(); return applyAliases(writer, seen, aliases, fieldAliases); } + @SuppressWarnings("DataFlowIssue") private static Schema applyAliases(Schema s, Map seen, Map aliases, Map> fieldAliases) { @@ -1900,6 +2135,7 @@ private static Schema applyAliases(Schema s, Map seen, Map seen, Map aliases, Map> fieldAliases) { if (schema instanceof NamedSchema) { @@ -1961,10 +2197,11 @@ private static String getFieldAlias(Name record, String field, Maptrue in the lock() method. It's legal to call lock() any number of * times. Any lock() other than the first one is a no-op. * - * This class throws IllegalStateException if a mutating operation is - * performed after being locked. Since modifications through iterator also use + * If a mutating operation is performed after being locked, it throws an + * IllegalStateException. Since modifications through iterator also use * the list's mutating operations, this effectively blocks all modifications. */ + @SuppressWarnings("unused") static class LockableArrayList extends ArrayList { private static final long serialVersionUID = 1L; private boolean locked = false; @@ -1980,6 +2217,7 @@ public LockableArrayList(List types) { super(types); } + @SafeVarargs public LockableArrayList(E... types) { super(types.length); Collections.addAll(this, types); diff --git a/lang/java/avro/src/main/java/org/apache/avro/SchemaCompatibility.java b/lang/java/avro/src/main/java/org/apache/avro/SchemaCompatibility.java index 3e5628d9b3b..8b6a2839ad6 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/SchemaCompatibility.java +++ b/lang/java/avro/src/main/java/org/apache/avro/SchemaCompatibility.java @@ -324,8 +324,10 @@ private SchemaCompatibilityResult calculateCompatibility(final Schema reader, fi // Reader compatible with all branches of a writer union is compatible if (writer.getType() == Schema.Type.UNION) { + int index = 0; for (Schema s : writer.getTypes()) { - result = result.mergedWith(getCompatibility(reader, s)); + result = result.mergedWith(getCompatibility(Integer.toString(index), reader, s, location)); + index++; } return result; } diff --git a/lang/java/avro/src/main/java/org/apache/avro/SchemaFormatter.java b/lang/java/avro/src/main/java/org/apache/avro/SchemaFormatter.java new file mode 100644 index 00000000000..6303b01fb45 --- /dev/null +++ b/lang/java/avro/src/main/java/org/apache/avro/SchemaFormatter.java @@ -0,0 +1,127 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro; + +import java.util.Locale; +import java.util.ServiceLoader; + +/** + * Interface and factory to format schemas to text. + * + *

    + * Schema formats have a name, and optionally a variant (all lowercase). The + * Avro library supports a few formats out of the box: + *

    + * + *
    + * + *
    {@code json}
    + *
    Classic schema definition (which is a form of JSON). Supports the + * variants {@code pretty} (the default) and {@code inline}. Can be written as + * .avsc files. See the specification (Schema + * Declaration) for more details.
    + * + *
    {@code canonical}
    + *
    Parsing Canonical Form; this uniquely defines how Avro data is written. + * Used to generate schema fingerprints.
    + * See the specification (Parsing + * Canonical Form for Schemas) for more details.
    + * + *
    {@code idl}
    + *
    IDL: a format that looks much like source code, and is arguably easier to + * read than JSON. Available when the module {@code avro-idl} is on the + * classpath. See + * IDL Language + * for more details.
    + * + *
    + * + *

    + * Additional formats can be defined by implementing + * {@link SchemaFormatterFactory}. + *

    + * + * @see Specification: + * Schema Declaration + * @see Specification: + * Parsing Canonical Form for Schemas + * @see IDL + * Language + */ +public interface SchemaFormatter { + /** + * Get the schema formatter for the specified format name with optional variant. + * + * @param name a format with optional variant, for example "json/pretty", + * "canonical" or "idl" + * @return the schema formatter for the specified format + * @throws AvroRuntimeException if the schema format is not supported + */ + static SchemaFormatter getInstance(String name) { + int slashPos = name.indexOf("/"); + // SchemaFormatterFactory.getFormatterForVariant(String) receives the name of + // the variant in lowercase (as stated in its javadoc). We're doing a + // case-insensitive comparison on the format name instead, so we don't have to + // convert the format name provided by the factory to lower case. + // This ensures the least amount of assumptions about implementations. + String formatName = slashPos < 0 ? name : name.substring(0, slashPos); + String variantName = slashPos < 0 ? null : name.substring(slashPos + 1).toLowerCase(Locale.ROOT); + + for (SchemaFormatterFactory formatterFactory : SchemaFormatterCache.LOADER) { + if (formatName.equalsIgnoreCase(formatterFactory.formatName())) { + if (variantName == null) { + return formatterFactory.getDefaultFormatter(); + } else { + return formatterFactory.getFormatterForVariant(variantName); + } + } + } + throw new AvroRuntimeException("Unsupported schema format: " + name + "; see the javadoc for valid examples"); + } + + /** + * Format a schema with the specified format. Shorthand for + * {@code getInstance(name).format(schema)}. + * + * @param name the name of the schema format + * @param schema the schema to format + * @return the formatted schema + * @throws AvroRuntimeException if the schema format is not supported + * @see #getInstance(String) + * @see #format(Schema) + */ + static String format(String name, Schema schema) { + return getInstance(name).format(schema); + } + + /** + * Write the specified schema as a String. + * + * @param schema the schema to write + * @return the formatted schema + */ + String format(Schema schema); +} + +class SchemaFormatterCache { + static final ServiceLoader LOADER = ServiceLoader.load(SchemaFormatterFactory.class); +} diff --git a/lang/java/avro/src/main/java/org/apache/avro/SchemaFormatterFactory.java b/lang/java/avro/src/main/java/org/apache/avro/SchemaFormatterFactory.java new file mode 100644 index 00000000000..be731a86ddf --- /dev/null +++ b/lang/java/avro/src/main/java/org/apache/avro/SchemaFormatterFactory.java @@ -0,0 +1,106 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro; + +import java.util.Locale; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * Service Provider Interface (SPI) for {@link SchemaFormatter}. + * + *

    + * Notes to implementers: + *

    + * + *
      + * + *
    • Implementations are located using a {@link java.util.ServiceLoader}. See + * that class for details.
    • + * + *
    • Implementing classes should either be named + * {@code SchemaFormatterFactory} (where the format is alphanumeric), or + * implement {@link #formatName()}.
    • + * + *
    • Implement at least {@link #getDefaultFormatter()}; use it to call + * {@link #getFormatterForVariant(String)} if the format supports multiple + * variants.
    • + * + *
    • Example implementations are {@link JsonSchemaFormatterFactory} and + * {@link CanonicalSchemaFormatterFactory}
    • + * + *
    + * + * @see java.util.ServiceLoader + */ +public interface SchemaFormatterFactory { + /** + * Return the name of the format this formatter factory supports. + * + *

    + * The default implementation returns the lowercase prefix of the implementing + * class if it is named {@code SchemaFormatterFactory}. That is, if the + * implementing class is named {@code some.package.JsonSchemaFormatterFactory}, + * it returns: {@literal "json"} + *

    + * + * @return the name of the format + */ + default String formatName() { + String simpleName = getClass().getSimpleName(); + Matcher matcher = SchemaFormatterFactoryConstants.SIMPLE_NAME_PATTERN.matcher(simpleName); + if (matcher.matches()) { + return matcher.group(1).toLowerCase(Locale.ROOT); + } else { + throw new AvroRuntimeException( + "Formatter is not named \"SchemaFormatterFactory\"; cannot determine format name."); + } + } + + /** + * Get the default formatter for this schema format. Instances should be + * thread-safe, as they may be cached. + * + *

    + * Implementations should either return the only formatter for this format, or + * call {@link #getFormatterForVariant(String)} with the default variant and + * implement that method as well. + *

    + * + * @return the default formatter for this schema format + */ + SchemaFormatter getDefaultFormatter(); + + /** + * Get a formatter for the specified schema format variant, if multiple variants + * are supported. Instances should be thread-safe, as they may be cached. + * + * @param variantName the name of the format variant (lower case), if specified + * @return if the factory supports the format, a schema writer; {@code null} + * otherwise + */ + default SchemaFormatter getFormatterForVariant(String variantName) { + throw new AvroRuntimeException("The schema format \"" + formatName() + "\" has no variants."); + } +} + +class SchemaFormatterFactoryConstants { + static final Pattern SIMPLE_NAME_PATTERN = Pattern.compile( + "([a-z][0-9a-z]*)" + SchemaFormatterFactory.class.getSimpleName(), + Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE); +} diff --git a/lang/java/avro/src/main/java/org/apache/avro/SchemaParser.java b/lang/java/avro/src/main/java/org/apache/avro/SchemaParser.java new file mode 100644 index 00000000000..e3eb2d9ab69 --- /dev/null +++ b/lang/java/avro/src/main/java/org/apache/avro/SchemaParser.java @@ -0,0 +1,294 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro; + +import org.apache.avro.util.UtfTextUtils; + +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.io.Reader; +import java.net.URI; +import java.nio.charset.Charset; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.ServiceLoader; + +/** + * Avro schema parser for text-based formats like JSON, IDL, etc. + * + *

    + * Parses formatted (i.e., text based) schemata from a given source using the + * available {@link FormattedSchemaParser} implementations, and returns the + * first result. This means it can transparently handle any schema format. The + * Avro project defines a JSON based format and an IDL format (the latter + * available as a separate dependency), but you can also provide your own. + *

    + * + *

    + * The parser can handle various text based sources. If the source contains a + * UTF encoded latin text based format it can even detect which UTF encoding was + * used (UTF-8, UTF16BE, UTF16LE, UTF-32BE or UTF32LE). + *

    + * + * @see FormattedSchemaParser + * @see UtfTextUtils + */ +public class SchemaParser { + private final ParseContext parseContext; + private final Collection formattedSchemaParsers; + + /** + * Create a schema parser. Initially, the list of known (named) schemata is + * empty. + */ + public SchemaParser() { + this.parseContext = new ParseContext(); + this.formattedSchemaParsers = new ArrayList<>(); + for (FormattedSchemaParser formattedSchemaParser : ServiceLoader.load(FormattedSchemaParser.class)) { + formattedSchemaParsers.add(formattedSchemaParser); + } + // Add the default / JSON parser last (not as a service, even though it + // implements the service interface), to allow implementations that parse JSON + // files into schemata differently. + formattedSchemaParsers.add(new JsonSchemaParser()); + } + + /** + * Parse an Avro schema from a file. The file content is assumed to be UTF-8 + * text. + * + * @param file the file to read + * @return the schema + * @throws IOException when the schema cannot be read + * @throws SchemaParseException if parsing the schema failed; contains + * suppressed underlying parse exceptions if + * available + * @see UtfTextUtils + */ + public ParseResult parse(File file) throws IOException, SchemaParseException { + return parse(file, null); + } + + /** + * Parse an Avro schema from a file written with a specific character set. + * + * @param file the file to read + * @param charset the character set of the file contents + * @return the schema + * @throws IOException when the schema cannot be read + * @throws SchemaParseException if parsing the schema failed; contains + * suppressed underlying parse exceptions if + * available + */ + public ParseResult parse(File file, Charset charset) throws IOException, SchemaParseException { + return parse(file.toPath(), charset); + } + + /** + * Parse an Avro schema from a file. The file content is assumed to be UTF-8 + * text. + * + * @param file the file to read + * @return the schema + * @throws IOException when the schema cannot be read + * @throws SchemaParseException if parsing the schema failed; contains + * suppressed underlying parse exceptions if + * available + * @see UtfTextUtils + */ + public ParseResult parse(Path file) throws IOException, SchemaParseException { + return parse(file, null); + } + + /** + * Parse an Avro schema from a file written with a specific character set. + * + * @param file the file to read + * @param charset the character set of the file contents + * @return the schema + * @throws IOException when the schema cannot be read + * @throws SchemaParseException if parsing the schema failed; contains + * suppressed underlying parse exceptions if + * available + */ + public ParseResult parse(Path file, Charset charset) throws IOException, SchemaParseException { + URI inputDir = file.getParent().toUri(); + try (InputStream stream = Files.newInputStream(file)) { + String formattedSchema = UtfTextUtils.readAllBytes(stream, charset); + return parse(inputDir, formattedSchema); + } + } + + /** + * Parse an Avro schema from a file written with a specific character set. + * + * @param location the location of the schema resource + * @param charset the character set of the schema resource + * @return the schema + * @throws IOException when the schema cannot be read + * @throws SchemaParseException if parsing the schema failed; contains + * suppressed underlying parse exceptions if + * available + */ + public ParseResult parse(URI location, Charset charset) throws IOException, SchemaParseException { + try (InputStream stream = location.toURL().openStream()) { + String formattedSchema = UtfTextUtils.readAllBytes(stream, charset); + return parse(location, formattedSchema); + } + } + + /** + * Parse an Avro schema from an input stream. The stream content is assumed to + * be UTF-8 text. Note that the stream stays open after reading. + * + * @param in the stream to read + * @return the schema + * @throws IOException when the schema cannot be read + * @throws SchemaParseException if parsing the schema failed; contains + * suppressed underlying parse exceptions if + * available + * @see UtfTextUtils + */ + public ParseResult parse(InputStream in) throws IOException, SchemaParseException { + return parse(in, null); + } + + /** + * Parse an Avro schema from an input stream. Note that the stream stays open + * after reading. + * + * @param in the stream to read + * @param charset the character set of the stream contents + * @return the schema + * @throws IOException when the schema cannot be read + * @throws SchemaParseException if parsing the schema failed; contains + * suppressed underlying parse exceptions if + * available + */ + public ParseResult parse(InputStream in, Charset charset) throws IOException, SchemaParseException { + return parse(UtfTextUtils.readAllBytes(in, charset)); + } + + /** + * Parse an Avro schema from an input reader. + * + * @param in the stream to read + * @return the schema + * @throws IOException when the schema cannot be read + * @throws SchemaParseException if parsing the schema failed; contains + * suppressed underlying parse exceptions if + * available + */ + public ParseResult parse(Reader in) throws IOException, SchemaParseException { + return parse(UtfTextUtils.readAllChars(in)); + } + + /** + * Parse an Avro schema from a string. + * + * @param text the text to parse + * @return the schema + * @throws SchemaParseException if parsing the schema failed; contains + * suppressed underlying parse exceptions if + * available + */ + public ParseResult parse(CharSequence text) throws SchemaParseException { + try { + return parse(null, text); + } catch (IOException e) { + // This can only happen if parser implementations try to read other (related) + // schemata from somewhere. + throw new AvroRuntimeException("Could not read schema", e); + } + } + + /** + * Parse the given schema (string) within the specified context using all + * available {@link FormattedSchemaParser} implementations, collecting any + * {@link SchemaParseException}s that occur, and return the first successfully + * parsed schema. If all parsers fail, throw a {@code SchemaParseException} with + * all collected parse exceptions added as suppressed exceptions. Uses the base + * location of the schema (e.g., the directory where the schema file lives) if + * available. + * + * @param baseUri the base location of the schema, or {@code null} if + * not known + * @param formattedSchema the schema as text + * @return the parsed schema + * @throws IOException if thrown by one of the parsers + * @throws RuntimeException if thrown by one of the parsers + * @throws SchemaParseException when all parsers fail + */ + private ParseResult parse(URI baseUri, CharSequence formattedSchema) throws IOException, SchemaParseException { + List parseExceptions = new ArrayList<>(); + for (FormattedSchemaParser formattedSchemaParser : formattedSchemaParsers) { + try { + Schema schema = formattedSchemaParser.parse(parseContext, baseUri, formattedSchema); + if (parseContext.hasNewSchemas() || schema != null) { + // Parsing succeeded: return the result. + return parseContext.commit(schema); + } + } catch (SchemaParseException e) { + parseContext.rollback(); + parseExceptions.add(e); + } + } + + // None of the available parsers succeeded + + if (parseExceptions.size() == 1) { + throw parseExceptions.get(0); + } + SchemaParseException parseException = new SchemaParseException( + "Could not parse the schema (the suppressed exceptions tell why)."); + parseExceptions.forEach(parseException::addSuppressed); + throw parseException; + } + + /** + * Get all parsed schemata. + * + * @return all parsed schemas, in the order they were parsed + */ + public List getParsedNamedSchemas() { + return parseContext.resolveAllSchemas(); + } + + // Temporary method to reduce PR size + @Deprecated + public Schema resolve(ParseResult result) { + return result.mainSchema(); + } + + public interface ParseResult { + /** + * The main schema parsed from a file. Can be any schema, or {@code null} if the + * parsed file has no "main" schema. + */ + Schema mainSchema(); + + /** + * The list of named schemata that were parsed. + */ + List parsedNamedSchemas(); + } +} diff --git a/lang/java/avro/src/main/java/org/apache/avro/SystemLimitException.java b/lang/java/avro/src/main/java/org/apache/avro/SystemLimitException.java new file mode 100644 index 00000000000..a96f812d84d --- /dev/null +++ b/lang/java/avro/src/main/java/org/apache/avro/SystemLimitException.java @@ -0,0 +1,190 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.avro; + +import org.slf4j.LoggerFactory; + +/** + * Thrown to prevent making large allocations when reading potentially + * pathological input data from an untrusted source. + *

    + * The following system properties can be set to limit the size of bytes, + * strings and collection types to be allocated: + *

      + *
    • org.apache.avro.limits.byte.maxLength
    • limits the maximum + * size of byte types. + *
    • org.apache.avro.limits.collectionItems.maxLength
    • limits the + * maximum number of map and list items that can be read at + * once single sequence. + *
    • org.apache.avro.limits.string.maxLength
    • limits the maximum + * size of string types. + *
    + * + * The default is to permit sizes up to {@link #MAX_ARRAY_VM_LIMIT}. + */ +public class SystemLimitException extends AvroRuntimeException { + + /** + * The maximum length of array to allocate (unless necessary). Some VMs reserve + * some header words in an array. Attempts to allocate larger arrays may result + * in {@code OutOfMemoryError: Requested array size exceeds VM limit} + * + * @see JDK-8246725 + */ + // VisibleForTesting + static final int MAX_ARRAY_VM_LIMIT = Integer.MAX_VALUE - 8; + + public static final String MAX_BYTES_LENGTH_PROPERTY = "org.apache.avro.limits.bytes.maxLength"; + public static final String MAX_COLLECTION_LENGTH_PROPERTY = "org.apache.avro.limits.collectionItems.maxLength"; + public static final String MAX_STRING_LENGTH_PROPERTY = "org.apache.avro.limits.string.maxLength"; + + private static int maxBytesLength = MAX_ARRAY_VM_LIMIT; + private static int maxCollectionLength = MAX_ARRAY_VM_LIMIT; + private static int maxStringLength = MAX_ARRAY_VM_LIMIT; + + static { + resetLimits(); + } + + public SystemLimitException(String message) { + super(message); + } + + /** + * Get an integer value stored in a system property, used to configure the + * system behaviour of decoders + * + * @param property The system property to fetch + * @param defaultValue The value to use if the system property is not present or + * parsable as an int + * @return The value from the system property + */ + private static int getLimitFromProperty(String property, int defaultValue) { + String o = System.getProperty(property); + int i = defaultValue; + if (o != null) { + try { + i = Integer.parseUnsignedInt(o); + } catch (NumberFormatException nfe) { + LoggerFactory.getLogger(SystemLimitException.class).warn("Could not parse property " + property + ": " + o, + nfe); + } + } + return i; + } + + /** + * Check to ensure that reading the bytes is within the specified limits. + * + * @param length The proposed size of the bytes to read + * @return The size of the bytes if and only if it is within the limit and + * non-negative. + * @throws UnsupportedOperationException if reading the datum would allocate a + * collection that the Java VM would be + * unable to handle + * @throws SystemLimitException if the decoding should fail because it + * would otherwise result in an allocation + * exceeding the set limit + * @throws AvroRuntimeException if the length is negative + */ + public static int checkMaxBytesLength(long length) { + if (length < 0) { + throw new AvroRuntimeException("Malformed data. Length is negative: " + length); + } + if (length > MAX_ARRAY_VM_LIMIT) { + throw new UnsupportedOperationException( + "Cannot read arrays longer than " + MAX_ARRAY_VM_LIMIT + " bytes in Java library"); + } + if (length > maxBytesLength) { + throw new SystemLimitException("Bytes length " + length + " exceeds maximum allowed"); + } + return (int) length; + } + + /** + * Check to ensure that reading the specified number of items remains within the + * specified limits. + * + * @param existing The number of elements items read in the collection + * @param items The next number of items to read. In normal usage, this is + * always a positive, permitted value. Negative and zero values + * have a special meaning in Avro decoding. + * @return The total number of items in the collection if and only if it is + * within the limit and non-negative. + * @throws UnsupportedOperationException if reading the items would allocate a + * collection that the Java VM would be + * unable to handle + * @throws SystemLimitException if the decoding should fail because it + * would otherwise result in an allocation + * exceeding the set limit + * @throws AvroRuntimeException if the length is negative + */ + public static int checkMaxCollectionLength(long existing, long items) { + long length = existing + items; + if (existing < 0) { + throw new AvroRuntimeException("Malformed data. Length is negative: " + existing); + } + if (items < 0) { + throw new AvroRuntimeException("Malformed data. Length is negative: " + items); + } + if (length > MAX_ARRAY_VM_LIMIT || length < existing) { + throw new UnsupportedOperationException( + "Cannot read collections larger than " + MAX_ARRAY_VM_LIMIT + " items in Java library"); + } + if (length > maxCollectionLength) { + throw new SystemLimitException("Collection length " + length + " exceeds maximum allowed"); + } + return (int) length; + } + + /** + * Check to ensure that reading the string size is within the specified limits. + * + * @param length The proposed size of the string to read + * @return The size of the string if and only if it is within the limit and + * non-negative. + * @throws UnsupportedOperationException if reading the items would allocate a + * collection that the Java VM would be + * unable to handle + * @throws SystemLimitException if the decoding should fail because it + * would otherwise result in an allocation + * exceeding the set limit + * @throws AvroRuntimeException if the length is negative + */ + public static int checkMaxStringLength(long length) { + if (length < 0) { + throw new AvroRuntimeException("Malformed data. Length is negative: " + length); + } + if (length > MAX_ARRAY_VM_LIMIT) { + throw new UnsupportedOperationException("Cannot read strings longer than " + MAX_ARRAY_VM_LIMIT + " bytes"); + } + if (length > maxStringLength) { + throw new SystemLimitException("String length " + length + " exceeds maximum allowed"); + } + return (int) length; + } + + /** Reread the limits from the system properties. */ + // VisibleForTesting + static void resetLimits() { + maxBytesLength = getLimitFromProperty(MAX_BYTES_LENGTH_PROPERTY, MAX_ARRAY_VM_LIMIT); + maxCollectionLength = getLimitFromProperty(MAX_COLLECTION_LENGTH_PROPERTY, MAX_ARRAY_VM_LIMIT); + maxStringLength = getLimitFromProperty(MAX_STRING_LENGTH_PROPERTY, MAX_ARRAY_VM_LIMIT); + } +} diff --git a/lang/java/avro/src/main/java/org/apache/avro/data/TimeConversions.java b/lang/java/avro/src/main/java/org/apache/avro/data/TimeConversions.java index 785d31a5116..e63ebaae6e0 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/data/TimeConversions.java +++ b/lang/java/avro/src/main/java/org/apache/avro/data/TimeConversions.java @@ -204,6 +204,53 @@ public Schema getRecommendedSchema() { } } + public static class TimestampNanosConversion extends Conversion { + @Override + public Class getConvertedType() { + return Instant.class; + } + + @Override + public String getLogicalTypeName() { + return "timestamp-nanos"; + } + + @Override + public String adjustAndSetValue(String varName, String valParamName) { + return varName + " = " + valParamName + ".truncatedTo(java.time.temporal.ChronoUnit.NANOS);"; + } + + @Override + public Instant fromLong(Long microsFromEpoch, Schema schema, LogicalType type) { + long epochSeconds = microsFromEpoch / 1_000_000_000L; + long nanoAdjustment = microsFromEpoch % 1_000_000_000L; + + return Instant.ofEpochSecond(epochSeconds, nanoAdjustment); + } + + @Override + public Long toLong(Instant instant, Schema schema, LogicalType type) { + long seconds = instant.getEpochSecond(); + int nanos = instant.getNano(); + + if (seconds < 0 && nanos > 0) { + long micros = Math.multiplyExact(seconds + 1, 1_000_000_000L); + long adjustment = nanos - 1_000_000; + + return Math.addExact(micros, adjustment); + } else { + long micros = Math.multiplyExact(seconds, 1_000_000_000L); + + return Math.addExact(micros, nanos); + } + } + + @Override + public Schema getRecommendedSchema() { + return LogicalTypes.timestampNanos().addToSchema(Schema.create(Schema.Type.LONG)); + } + } + public static class LocalTimestampMillisConversion extends Conversion { private final TimestampMillisConversion timestampMillisConversion = new TimestampMillisConversion(); @@ -265,4 +312,35 @@ public Schema getRecommendedSchema() { return LogicalTypes.localTimestampMicros().addToSchema(Schema.create(Schema.Type.LONG)); } } + + public static class LocalTimestampNanosConversion extends Conversion { + private final TimestampNanosConversion timestampNanosConversion = new TimestampNanosConversion(); + + @Override + public Class getConvertedType() { + return LocalDateTime.class; + } + + @Override + public String getLogicalTypeName() { + return "local-timestamp-nanos"; + } + + @Override + public LocalDateTime fromLong(Long microsFromEpoch, Schema schema, LogicalType type) { + Instant instant = timestampNanosConversion.fromLong(microsFromEpoch, schema, type); + return LocalDateTime.ofInstant(instant, ZoneOffset.UTC); + } + + @Override + public Long toLong(LocalDateTime timestamp, Schema schema, LogicalType type) { + Instant instant = timestamp.toInstant(ZoneOffset.UTC); + return timestampNanosConversion.toLong(instant, schema, type); + } + + @Override + public Schema getRecommendedSchema() { + return LogicalTypes.localTimestampNanos().addToSchema(Schema.create(Schema.Type.LONG)); + } + } } diff --git a/lang/java/avro/src/main/java/org/apache/avro/file/CodecFactory.java b/lang/java/avro/src/main/java/org/apache/avro/file/CodecFactory.java index 351c036b861..1cfed238f7e 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/file/CodecFactory.java +++ b/lang/java/avro/src/main/java/org/apache/avro/file/CodecFactory.java @@ -28,12 +28,14 @@ /** * Encapsulates the ability to specify and configure a compression codec. * - * Currently there are three codecs registered by default: + * Currently there are five codecs registered by default: *
      *
    • {@code null}
    • *
    • {@code deflate}
    • *
    • {@code snappy}
    • *
    • {@code bzip2}
    • + *
    • {@code xz}
    • + *
    • {@code zstandard}
    • *
    * * New and custom codecs can be registered using diff --git a/lang/java/avro/src/main/java/org/apache/avro/file/DataFileReader.java b/lang/java/avro/src/main/java/org/apache/avro/file/DataFileReader.java index 7a235352e50..ae33df59fbe 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/file/DataFileReader.java +++ b/lang/java/avro/src/main/java/org/apache/avro/file/DataFileReader.java @@ -17,18 +17,19 @@ */ package org.apache.avro.file; +import org.apache.avro.InvalidAvroMagicException; +import org.apache.avro.io.DatumReader; +import org.apache.avro.io.DecoderFactory; +import org.apache.commons.io.IOUtils; + import java.io.EOFException; +import java.io.File; import java.io.IOException; import java.io.InputStream; -import java.io.File; import java.util.Arrays; -import org.apache.avro.InvalidAvroMagicException; -import org.apache.avro.io.DecoderFactory; -import org.apache.commons.compress.utils.IOUtils; -import org.apache.avro.io.DatumReader; -import static org.apache.avro.file.DataFileConstants.SYNC_SIZE; import static org.apache.avro.file.DataFileConstants.MAGIC; +import static org.apache.avro.file.DataFileConstants.SYNC_SIZE; /** * Random access to files written with {@link DataFileWriter}. @@ -36,7 +37,7 @@ * @see DataFileWriter */ public class DataFileReader extends DataFileStream implements FileReader { - private SeekableInputStream sin; + private final SeekableInputStream sin; private long blockStart; private int[] partialMatchTable; @@ -69,10 +70,9 @@ public static FileReader openReader(SeekableInput in, DatumReader read length -= bytesRead; offset += bytesRead; } - in.seek(0); if (Arrays.equals(MAGIC, magic)) // current format - return new DataFileReader<>(in, reader); + return new DataFileReader<>(in, reader, magic); if (Arrays.equals(DataFileReader12.MAGIC, magic)) // 1.2 format return new DataFileReader12<>(in, reader); @@ -111,7 +111,7 @@ public static DataFileReader openReader(SeekableInput in, DatumReader *
        */
       public DataFileReader(File file, DatumReader reader) throws IOException {
    -    this(new SeekableFileInput(file), reader, true);
    +    this(new SeekableFileInput(file), reader, true, null);
       }
     
       /**
    @@ -128,15 +128,20 @@ public DataFileReader(File file, DatumReader reader) throws IOException {
        * 
        */
       public DataFileReader(SeekableInput sin, DatumReader reader) throws IOException {
    -    this(sin, reader, false);
    +    this(sin, reader, false, null);
    +  }
    +
    +  private DataFileReader(SeekableInput sin, DatumReader reader, byte[] magic) throws IOException {
    +    this(sin, reader, false, magic);
       }
     
       /** Construct a reader for a file. Please close resource files yourself. */
    -  protected DataFileReader(SeekableInput sin, DatumReader reader, boolean closeOnError) throws IOException {
    +  protected DataFileReader(SeekableInput sin, DatumReader reader, boolean closeOnError, byte[] magic)
    +      throws IOException {
         super(reader);
         try {
           this.sin = new SeekableInputStream(sin);
    -      initialize(this.sin);
    +      initialize(this.sin, magic);
           blockFinished();
         } catch (final Throwable e) {
           if (closeOnError) {
    @@ -153,7 +158,7 @@ protected DataFileReader(SeekableInput sin, DatumReader reader, boolean close
       protected DataFileReader(SeekableInput sin, DatumReader reader, Header header) throws IOException {
         super(reader);
         this.sin = new SeekableInputStream(sin);
    -    initialize(this.sin, header);
    +    initialize(header);
       }
     
       /**
    @@ -166,7 +171,7 @@ public void seek(long position) throws IOException {
         vin = DecoderFactory.get().binaryDecoder(this.sin, vin);
         datumIn = null;
         blockRemaining = 0;
    -    blockStart = position;
    +    blockFinished();
       }
     
       /**
    @@ -180,7 +185,7 @@ public void sync(final long position) throws IOException {
         seek(position);
         // work around an issue where 1.5.4 C stored sync in metadata
         if ((position == 0L) && (getMeta("avro.sync") != null)) {
    -      initialize(sin); // re-init to skip header
    +      initialize(sin, null); // re-init to skip header
           return;
         }
     
    @@ -259,9 +264,9 @@ public long tell() throws IOException {
     
       static class SeekableInputStream extends InputStream implements SeekableInput {
         private final byte[] oneByte = new byte[1];
    -    private SeekableInput in;
    +    private final SeekableInput in;
     
    -    SeekableInputStream(SeekableInput in) throws IOException {
    +    SeekableInputStream(SeekableInput in) {
           this.in = in;
         }
     
    @@ -305,15 +310,10 @@ public int read() throws IOException {
         @Override
         public long skip(long skip) throws IOException {
           long position = in.tell();
    +      long skipToPosition = position + skip;
           long length = in.length();
    -      long remaining = length - position;
    -      if (remaining > skip) {
    -        in.seek(skip);
    -        return in.tell() - position;
    -      } else {
    -        in.seek(remaining);
    -        return in.tell() - position;
    -      }
    +      in.seek(Math.min(skipToPosition, length));
    +      return in.tell() - position;
         }
     
         @Override
    @@ -325,7 +325,7 @@ public void close() throws IOException {
         @Override
         public int available() throws IOException {
           long remaining = (in.length() - in.tell());
    -      return (remaining > Integer.MAX_VALUE) ? Integer.MAX_VALUE : (int) remaining;
    +      return (int) Math.min(remaining, Integer.MAX_VALUE);
         }
       }
     }
    diff --git a/lang/java/avro/src/main/java/org/apache/avro/file/DataFileReader12.java b/lang/java/avro/src/main/java/org/apache/avro/file/DataFileReader12.java
    index f24a978d6f8..c057a86db73 100644
    --- a/lang/java/avro/src/main/java/org/apache/avro/file/DataFileReader12.java
    +++ b/lang/java/avro/src/main/java/org/apache/avro/file/DataFileReader12.java
    @@ -61,6 +61,7 @@ public DataFileReader12(SeekableInput sin, DatumReader reader) throws IOExcep
         this.in = new DataFileReader.SeekableInputStream(sin);
     
         byte[] magic = new byte[4];
    +    in.seek(0); // seek to 0 to read magic header
         in.read(magic);
         if (!Arrays.equals(MAGIC, magic))
           throw new InvalidAvroMagicException("Not a data file.");
    diff --git a/lang/java/avro/src/main/java/org/apache/avro/file/DataFileStream.java b/lang/java/avro/src/main/java/org/apache/avro/file/DataFileStream.java
    index 8d2697104e2..aa458684635 100644
    --- a/lang/java/avro/src/main/java/org/apache/avro/file/DataFileStream.java
    +++ b/lang/java/avro/src/main/java/org/apache/avro/file/DataFileStream.java
    @@ -17,14 +17,23 @@
      */
     package org.apache.avro.file;
     
    +import org.apache.avro.AvroRuntimeException;
    +import org.apache.avro.InvalidAvroMagicException;
    +import org.apache.avro.NameValidator;
    +import org.apache.avro.Schema;
    +import org.apache.avro.io.BinaryDecoder;
    +import org.apache.avro.io.BinaryEncoder;
    +import org.apache.avro.io.DatumReader;
    +import org.apache.avro.io.DecoderFactory;
    +
    +import java.io.Closeable;
     import java.io.EOFException;
     import java.io.IOException;
     import java.io.InputStream;
    -import java.io.Closeable;
     import java.nio.ByteBuffer;
     import java.nio.charset.StandardCharsets;
    -import java.util.Arrays;
     import java.util.ArrayList;
    +import java.util.Arrays;
     import java.util.Collections;
     import java.util.HashMap;
     import java.util.Iterator;
    @@ -32,14 +41,6 @@
     import java.util.Map;
     import java.util.NoSuchElementException;
     
    -import org.apache.avro.AvroRuntimeException;
    -import org.apache.avro.InvalidAvroMagicException;
    -import org.apache.avro.Schema;
    -import org.apache.avro.io.BinaryEncoder;
    -import org.apache.avro.io.DecoderFactory;
    -import org.apache.avro.io.BinaryDecoder;
    -import org.apache.avro.io.DatumReader;
    -
     /**
      * Streaming access to files written by {@link DataFileWriter}. Use
      * {@link DataFileReader} for file-based input.
    @@ -87,7 +88,7 @@ private Header() {
        */
       public DataFileStream(InputStream in, DatumReader reader) throws IOException {
         this.reader = reader;
    -    initialize(in);
    +    initialize(in, null);
       }
     
       /**
    @@ -97,18 +98,30 @@ protected DataFileStream(DatumReader reader) throws IOException {
         this.reader = reader;
       }
     
    -  /** Initialize the stream by reading from its head. */
    -  void initialize(InputStream in) throws IOException {
    -    this.header = new Header();
    -    this.vin = DecoderFactory.get().binaryDecoder(in, vin);
    +  byte[] readMagic() throws IOException {
    +    if (this.vin == null) {
    +      throw new IOException("InputStream is not initialized");
    +    }
         byte[] magic = new byte[DataFileConstants.MAGIC.length];
         try {
           vin.readFixed(magic); // read magic
         } catch (IOException e) {
           throw new IOException("Not an Avro data file.", e);
         }
    +    return magic;
    +  }
    +
    +  void validateMagic(byte[] magic) throws InvalidAvroMagicException {
         if (!Arrays.equals(DataFileConstants.MAGIC, magic))
           throw new InvalidAvroMagicException("Not an Avro data file.");
    +  }
    +
    +  /** Initialize the stream by reading from its head. */
    +  void initialize(InputStream in, byte[] magic) throws IOException {
    +    this.header = new Header();
    +    this.vin = DecoderFactory.get().binaryDecoder(in, vin);
    +    magic = (magic == null) ? readMagic() : magic;
    +    validateMagic(magic);
     
         long l = vin.readMapStart(); // read meta data
         if (l > 0) {
    @@ -127,14 +140,14 @@ void initialize(InputStream in) throws IOException {
     
         // finalize the header
         header.metaKeyList = Collections.unmodifiableList(header.metaKeyList);
    -    header.schema = new Schema.Parser().setValidate(false).setValidateDefaults(false)
    +    header.schema = new Schema.Parser(NameValidator.NO_VALIDATION).setValidateDefaults(false)
             .parse(getMetaString(DataFileConstants.SCHEMA));
         this.codec = resolveCodec();
         reader.setSchema(header.schema);
       }
     
       /** Initialize the stream without reading from it. */
    -  void initialize(InputStream in, Header header) throws IOException {
    +  void initialize(Header header) throws IOException {
         this.header = header;
         this.codec = resolveCodec();
         reader.setSchema(header.schema);
    @@ -262,6 +275,7 @@ public ByteBuffer nextBlock() throws IOException {
         if (blockRemaining != blockCount)
           throw new IllegalStateException("Not at block start.");
         blockRemaining = 0;
    +    blockFinished();
         datumIn = null;
         return blockBuffer;
       }
    diff --git a/lang/java/avro/src/main/java/org/apache/avro/file/DataFileWriter.java b/lang/java/avro/src/main/java/org/apache/avro/file/DataFileWriter.java
    index 05e5006acbf..58235da8848 100644
    --- a/lang/java/avro/src/main/java/org/apache/avro/file/DataFileWriter.java
    +++ b/lang/java/avro/src/main/java/org/apache/avro/file/DataFileWriter.java
    @@ -17,32 +17,32 @@
      */
     package org.apache.avro.file;
     
    -import static java.nio.charset.StandardCharsets.UTF_8;
    +import org.apache.avro.AvroRuntimeException;
    +import org.apache.avro.Schema;
    +import org.apache.avro.file.DataFileStream.DataBlock;
    +import org.apache.avro.generic.GenericDatumReader;
    +import org.apache.avro.io.BinaryEncoder;
    +import org.apache.avro.io.DatumWriter;
    +import org.apache.avro.io.EncoderFactory;
    +import org.apache.avro.util.NonCopyingByteArrayOutputStream;
    +import org.apache.commons.compress.utils.IOUtils;
     
     import java.io.BufferedOutputStream;
     import java.io.Closeable;
     import java.io.File;
    +import java.io.FileOutputStream;
     import java.io.FilterOutputStream;
     import java.io.Flushable;
     import java.io.IOException;
     import java.io.OutputStream;
     import java.nio.ByteBuffer;
     import java.nio.charset.StandardCharsets;
    -import java.security.MessageDigest;
    -import java.security.NoSuchAlgorithmException;
    +import java.security.SecureRandom;
     import java.util.HashMap;
     import java.util.Map;
    -import java.util.UUID;
    +import java.util.function.Function;
     
    -import org.apache.avro.AvroRuntimeException;
    -import org.apache.avro.Schema;
    -import org.apache.avro.file.DataFileStream.DataBlock;
    -import org.apache.avro.generic.GenericDatumReader;
    -import org.apache.avro.io.BinaryEncoder;
    -import org.apache.avro.io.DatumWriter;
    -import org.apache.avro.io.EncoderFactory;
    -import org.apache.avro.util.NonCopyingByteArrayOutputStream;
    -import org.apache.commons.compress.utils.IOUtils;
    +import static java.nio.charset.StandardCharsets.UTF_8;
     
     /**
      * Stores in a file a sequence of data conforming to a schema. The schema is
    @@ -51,7 +51,7 @@
      * blocks. A synchronization marker is written between blocks, so that
      * files may be split. Blocks may be compressed. Extensible metadata is stored
      * at the end of the file. Files may be appended to.
    - * 
    + *
      * @see DataFileReader
      */
     public class DataFileWriter implements Closeable, Flushable {
    @@ -72,6 +72,8 @@ public class DataFileWriter implements Closeable, Flushable {
     
       private byte[] sync; // 16 random bytes
       private int syncInterval = DataFileConstants.DEFAULT_SYNC_INTERVAL;
    +  private Function initEncoder = out -> new EncoderFactory().directBinaryEncoder(out,
    +      null);
     
       private boolean isOpen;
       private Codec codec;
    @@ -129,6 +131,17 @@ public DataFileWriter setSyncInterval(int syncInterval) {
         return this;
       }
     
    +  /**
    +   * Allows setting a different encoder than the default DirectBinaryEncoder.
    +   *
    +   * @param initEncoderFunc Function to create a binary encoder
    +   * @return this DataFileWriter
    +   */
    +  public DataFileWriter setEncoder(Function initEncoderFunc) {
    +    this.initEncoder = initEncoderFunc;
    +    return this;
    +  }
    +
       /** Open a new file for data matching a schema with a random sync. */
       public DataFileWriter create(Schema schema, File file) throws IOException {
         SyncableFileOutputStream sfos = new SyncableFileOutputStream(file);
    @@ -181,7 +194,7 @@ public DataFileWriter create(Schema schema, OutputStream outs, byte[] sync) t
        * sync marker is written. By default, the writer will flush the buffer each
        * time a sync marker is written (if the block size limit is reached or the
        * {@linkplain #sync()} is called.
    -   * 
    +   *
        * @param flushOnEveryBlock - If set to false, this writer will not flush the
        *                          block to the stream until {@linkplain #flush()} is
        *                          explicitly called.
    @@ -211,7 +224,7 @@ public DataFileWriter appendTo(File file) throws IOException {
       /**
        * Open a writer appending to an existing file. Since 1.9.0 this method
        * does not close in.
    -   * 
    +   *
        * @param in  reading the existing file.
        * @param out positioned at the end of the existing file.
        */
    @@ -241,22 +254,19 @@ private void init(OutputStream outs) throws IOException {
         this.vout = efactory.directBinaryEncoder(out, null);
         dout.setSchema(schema);
         buffer = new NonCopyingByteArrayOutputStream(Math.min((int) (syncInterval * 1.25), Integer.MAX_VALUE / 2 - 1));
    -    this.bufOut = efactory.directBinaryEncoder(buffer, null);
    +    this.bufOut = this.initEncoder.apply(buffer);
         if (this.codec == null) {
           this.codec = CodecFactory.nullCodec().createInstance();
         }
         this.isOpen = true;
       }
     
    +  private static final SecureRandom RNG = new SecureRandom();
    +
       private static byte[] generateSync() {
    -    try {
    -      MessageDigest digester = MessageDigest.getInstance("MD5");
    -      long time = System.currentTimeMillis();
    -      digester.update((UUID.randomUUID() + "@" + time).getBytes(UTF_8));
    -      return digester.digest();
    -    } catch (NoSuchAlgorithmException e) {
    -      throw new RuntimeException(e);
    -    }
    +    byte[] sync = new byte[16];
    +    RNG.nextBytes(sync);
    +    return sync;
       }
     
       private DataFileWriter setMetaInternal(String key, byte[] value) {
    @@ -304,7 +314,7 @@ public AppendWriteException(Exception e) {
     
       /**
        * Append a datum to the file.
    -   * 
    +   *
        * @see AppendWriteException
        */
       public void append(D datum) throws IOException {
    @@ -365,7 +375,7 @@ private void writeIfBlockFull() throws IOException {
        * at compression level 7. If recompress is false, blocks will be copied
        * without changing the compression level. If true, they will be converted to
        * the new compression level.
    -   * 
    +   *
        * @param otherFile
        * @param recompress
        * @throws IOException
    @@ -439,10 +449,10 @@ public void flush() throws IOException {
       }
     
       /**
    -   * If this writer was instantiated using a File or using an
    -   * {@linkplain Syncable} instance, this method flushes all buffers for this
    -   * writer to disk. In other cases, this method behaves exactly like
    -   * {@linkplain #flush()}.
    +   * If this writer was instantiated using a {@linkplain File},
    +   * {@linkplain FileOutputStream} or {@linkplain Syncable} instance, this method
    +   * flushes all buffers for this writer to disk. In other cases, this method
    +   * behaves exactly like {@linkplain #flush()}.
        *
        * @throws IOException
        */
    @@ -450,6 +460,8 @@ public void fSync() throws IOException {
         flush();
         if (underlyingStream instanceof Syncable) {
           ((Syncable) underlyingStream).sync();
    +    } else if (underlyingStream instanceof FileOutputStream) {
    +      ((FileOutputStream) underlyingStream).getFD().sync();
         }
       }
     
    diff --git a/lang/java/avro/src/main/java/org/apache/avro/file/SeekableByteArrayInput.java b/lang/java/avro/src/main/java/org/apache/avro/file/SeekableByteArrayInput.java
    index 991fc44b4e8..49994a9bc8e 100644
    --- a/lang/java/avro/src/main/java/org/apache/avro/file/SeekableByteArrayInput.java
    +++ b/lang/java/avro/src/main/java/org/apache/avro/file/SeekableByteArrayInput.java
    @@ -18,6 +18,7 @@
     package org.apache.avro.file;
     
     import java.io.ByteArrayInputStream;
    +import java.io.EOFException;
     import java.io.IOException;
     
     /** A {@link SeekableInput} backed with data in a byte array. */
    @@ -34,8 +35,12 @@ public long length() throws IOException {
     
       @Override
       public void seek(long p) throws IOException {
    -    this.reset();
    -    this.skip(p);
    +    if (p >= this.count) {
    +      throw new EOFException();
    +    }
    +    if (p >= 0) {
    +      this.pos = (int) p;
    +    }
       }
     
       @Override
    diff --git a/lang/java/avro/src/main/java/org/apache/avro/generic/GenericData.java b/lang/java/avro/src/main/java/org/apache/avro/generic/GenericData.java
    index 8f7391f5934..6db0a40eee6 100644
    --- a/lang/java/avro/src/main/java/org/apache/avro/generic/GenericData.java
    +++ b/lang/java/avro/src/main/java/org/apache/avro/generic/GenericData.java
    @@ -26,14 +26,15 @@
     import java.util.AbstractList;
     import java.util.Arrays;
     import java.util.Collection;
    -import java.util.Collections;
     import java.util.HashMap;
     import java.util.IdentityHashMap;
     import java.util.Iterator;
     import java.util.LinkedHashMap;
     import java.util.List;
     import java.util.Map;
    -import java.util.WeakHashMap;
    +import java.util.ServiceLoader;
    +import java.util.UUID;
    +import java.util.concurrent.ConcurrentMap;
     
     import org.apache.avro.AvroMissingFieldException;
     import org.apache.avro.AvroRuntimeException;
    @@ -58,6 +59,9 @@
     import org.apache.avro.util.internal.Accessor;
     
     import com.fasterxml.jackson.databind.JsonNode;
    +import org.apache.avro.util.springframework.ConcurrentReferenceHashMap;
    +
    +import static org.apache.avro.util.springframework.ConcurrentReferenceHashMap.ReferenceType.WEAK;
     
     /**
      * Utilities for generic Java data. See {@link GenericRecordBuilder} for a
    @@ -114,6 +118,7 @@ public GenericData() {
       /** For subclasses. GenericData does not use a ClassLoader. */
       public GenericData(ClassLoader classLoader) {
         this.classLoader = (classLoader != null) ? classLoader : getClass().getClassLoader();
    +    loadConversions();
       }
     
       /** Return the class loader that's used (by subclasses). */
    @@ -121,6 +126,17 @@ public ClassLoader getClassLoader() {
         return classLoader;
       }
     
    +  /**
    +   * Use the Java 6 ServiceLoader to load conversions.
    +   *
    +   * @see #addLogicalTypeConversion(Conversion)
    +   */
    +  private void loadConversions() {
    +    for (Conversion conversion : ServiceLoader.load(Conversion.class, classLoader)) {
    +      addLogicalTypeConversion(conversion);
    +    }
    +  }
    +
       private Map> conversions = new HashMap<>();
     
       private Map, Map>> conversionsByClass = new IdentityHashMap<>();
    @@ -131,19 +147,17 @@ public Collection> getConversions() {
     
       /**
        * Registers the given conversion to be used when reading and writing with this
    -   * data model.
    +   * data model. Conversions can also be registered automatically, as documented
    +   * on the class {@link Conversion Conversion<T>}.
        *
        * @param conversion a logical type Conversion.
        */
       public void addLogicalTypeConversion(Conversion conversion) {
         conversions.put(conversion.getLogicalTypeName(), conversion);
         Class type = conversion.getConvertedType();
    -    Map> conversions = conversionsByClass.get(type);
    -    if (conversions == null) {
    -      conversions = new LinkedHashMap<>();
    -      conversionsByClass.put(type, conversions);
    -    }
    -    conversions.put(conversion.getLogicalTypeName(), conversion);
    +    Map> conversionsForClass = conversionsByClass.computeIfAbsent(type,
    +        k -> new LinkedHashMap<>());
    +    conversionsForClass.put(conversion.getLogicalTypeName(), conversion);
       }
     
       /**
    @@ -184,11 +198,11 @@ public  Conversion getConversionByClass(Class datumClass, LogicalType l
        * @return the conversion for the logical type, or null
        */
       @SuppressWarnings("unchecked")
    -  public Conversion getConversionFor(LogicalType logicalType) {
    +  public  Conversion getConversionFor(LogicalType logicalType) {
         if (logicalType == null) {
           return null;
         }
    -    return (Conversion) conversions.get(logicalType.getName());
    +    return (Conversion) conversions.get(logicalType.getName());
       }
     
       public static final String FAST_READER_PROP = "org.apache.avro.fastread";
    @@ -303,30 +317,16 @@ public String toString() {
         }
       }
     
    -  /** Default implementation of an array. */
    -  @SuppressWarnings(value = "unchecked")
    -  public static class Array extends AbstractList implements GenericArray, Comparable> {
    -    private static final Object[] EMPTY = new Object[0];
    +  public static abstract class AbstractArray extends AbstractList
    +      implements GenericArray, Comparable> {
         private final Schema schema;
    -    private int size;
    -    private Object[] elements = EMPTY;
     
    -    public Array(int capacity, Schema schema) {
    -      if (schema == null || !Type.ARRAY.equals(schema.getType()))
    -        throw new AvroRuntimeException("Not an array schema: " + schema);
    -      this.schema = schema;
    -      if (capacity != 0)
    -        elements = new Object[capacity];
    -    }
    +    protected int size = 0;
     
    -    public Array(Schema schema, Collection c) {
    +    public AbstractArray(Schema schema) {
           if (schema == null || !Type.ARRAY.equals(schema.getType()))
             throw new AvroRuntimeException("Not an array schema: " + schema);
           this.schema = schema;
    -      if (c != null) {
    -        elements = new Object[c.size()];
    -        addAll(c);
    -      }
         }
     
         @Override
    @@ -340,22 +340,26 @@ public int size() {
         }
     
         @Override
    -    public void clear() {
    -      // Let GC do its work
    -      Arrays.fill(elements, 0, size, null);
    +    public void reset() {
           size = 0;
         }
     
         @Override
    -    public void reset() {
    -      size = 0;
    +    public int compareTo(GenericArray that) {
    +      return GenericData.get().compare(this, that, this.getSchema());
         }
     
         @Override
    -    public void prune() {
    -      if (size < elements.length) {
    -        Arrays.fill(elements, size, elements.length, null);
    +    public boolean equals(final Object o) {
    +      if (!(o instanceof Collection)) {
    +        return false;
           }
    +      return GenericData.get().compare(this, o, this.getSchema()) == 0;
    +    }
    +
    +    @Override
    +    public int hashCode() {
    +      return super.hashCode();
         }
     
         @Override
    @@ -370,7 +374,7 @@ public boolean hasNext() {
     
             @Override
             public T next() {
    -          return (T) elements[position++];
    +          return AbstractArray.this.get(position++);
             }
     
             @Override
    @@ -380,6 +384,57 @@ public void remove() {
           };
         }
     
    +    @Override
    +    public void reverse() {
    +      int left = 0;
    +      int right = size - 1;
    +
    +      while (left < right) {
    +        this.swap(left, right);
    +
    +        left++;
    +        right--;
    +      }
    +    }
    +
    +    protected abstract void swap(int index1, int index2);
    +  }
    +
    +  /** Default implementation of an array. */
    +  @SuppressWarnings(value = "unchecked")
    +  public static class Array extends AbstractArray {
    +    private static final Object[] EMPTY = new Object[0];
    +
    +    private Object[] elements = EMPTY;
    +
    +    public Array(int capacity, Schema schema) {
    +      super(schema);
    +      if (capacity != 0)
    +        elements = new Object[capacity];
    +    }
    +
    +    public Array(Schema schema, Collection c) {
    +      super(schema);
    +      if (c != null) {
    +        elements = new Object[c.size()];
    +        addAll(c);
    +      }
    +    }
    +
    +    @Override
    +    public void clear() {
    +      // Let GC do its work
    +      Arrays.fill(elements, 0, size, null);
    +      size = 0;
    +    }
    +
    +    @Override
    +    public void prune() {
    +      if (size < elements.length) {
    +        Arrays.fill(elements, size, elements.length, null);
    +      }
    +    }
    +
         @Override
         public T get(int i) {
           if (i >= size)
    @@ -428,23 +483,10 @@ public T peek() {
         }
     
         @Override
    -    public int compareTo(GenericArray that) {
    -      return GenericData.get().compare(this, that, this.getSchema());
    -    }
    -
    -    @Override
    -    public void reverse() {
    -      int left = 0;
    -      int right = elements.length - 1;
    -
    -      while (left < right) {
    -        Object tmp = elements[left];
    -        elements[left] = elements[right];
    -        elements[right] = tmp;
    -
    -        left++;
    -        right--;
    -      }
    +    protected void swap(final int index1, final int index2) {
    +      Object tmp = elements[index1];
    +      elements[index1] = elements[index2];
    +      elements[index2] = tmp;
         }
       }
     
    @@ -704,7 +746,7 @@ protected void toString(Object datum, StringBuilder buffer, IdentityHashMap a = (Collection) o;
    +        Schema elementType = s.getElementType();
    +        for (Object e : a) {
    +          if (this.shouldStop()) {
    +            return currentHashCode;
    +          }
    +          currentHashCode = this.hashCodeAdd(e, elementType);
    +        }
    +        return currentHashCode;
    +      case UNION:
    +        return hashCode(o, s.getTypes().get(GenericData.this.resolveUnion(s, o)));
    +      case ENUM:
    +        return s.getEnumOrdinal(o.toString());
    +      case NULL:
    +        return 0;
    +      case STRING:
    +        return (o instanceof Utf8 ? o : new Utf8(o.toString())).hashCode();
    +      default:
    +        return o.hashCode();
           }
    -      return hashCode;
    -    case ARRAY:
    -      Collection a = (Collection) o;
    -      Schema elementType = s.getElementType();
    -      for (Object e : a)
    -        hashCode = hashCodeAdd(hashCode, e, elementType);
    -      return hashCode;
    -    case UNION:
    -      return hashCode(o, s.getTypes().get(resolveUnion(s, o)));
    -    case ENUM:
    -      return s.getEnumOrdinal(o.toString());
    -    case NULL:
    -      return 0;
    -    case STRING:
    -      return (o instanceof Utf8 ? o : new Utf8(o.toString())).hashCode();
    -    default:
    -      return o.hashCode();
         }
    -  }
     
    -  /** Add the hash code for an object into an accumulated hash code. */
    -  protected int hashCodeAdd(int hashCode, Object o, Schema s) {
    -    return 31 * hashCode + hashCode(o, s);
    +    /** Add the hash code for an object into an accumulated hash code. */
    +    protected int hashCodeAdd(Object o, Schema s) {
    +      return 31 * this.currentHashCode + hashCode(o, s);
    +    }
    +
    +    private boolean shouldStop() {
    +      return --counter <= 0;
    +    }
       }
     
       /**
    @@ -1136,6 +1201,73 @@ public int compare(Object o1, Object o2, Schema s) {
         return compare(o1, o2, s, false);
       }
     
    +  protected int compareMaps(final Map m1, final Map m2) {
    +    if (m1 == m2) {
    +      return 0;
    +    }
    +
    +    if (m1.isEmpty() && m2.isEmpty()) {
    +      return 0;
    +    }
    +
    +    if (m1.size() != m2.size()) {
    +      return 1;
    +    }
    +
    +    /**
    +     * Peek at keys, assuming they're all the same type within a Map
    +     */
    +    final Object key1 = m1.keySet().iterator().next();
    +    final Object key2 = m2.keySet().iterator().next();
    +    boolean utf8ToString = false;
    +    boolean stringToUtf8 = false;
    +
    +    if (key1 instanceof Utf8 && key2 instanceof String) {
    +      utf8ToString = true;
    +    } else if (key1 instanceof String && key2 instanceof Utf8) {
    +      stringToUtf8 = true;
    +    }
    +
    +    try {
    +      for (Map.Entry e : m1.entrySet()) {
    +        final Object key = e.getKey();
    +        Object lookupKey = key;
    +        if (utf8ToString) {
    +          lookupKey = key.toString();
    +        } else if (stringToUtf8) {
    +          lookupKey = new Utf8((String) lookupKey);
    +        }
    +        final Object value = e.getValue();
    +        if (value == null) {
    +          if (!(m2.get(lookupKey) == null && m2.containsKey(lookupKey))) {
    +            return 1;
    +          }
    +        } else {
    +          final Object value2 = m2.get(lookupKey);
    +          if (value instanceof Utf8 && value2 instanceof String) {
    +            if (!value.toString().equals(value2)) {
    +              return 1;
    +            }
    +          } else if (value instanceof String && value2 instanceof Utf8) {
    +            if (!new Utf8((String) value).equals(value2)) {
    +              return 1;
    +            }
    +          } else {
    +            if (!value.equals(value2)) {
    +              return 1;
    +            }
    +          }
    +        }
    +      }
    +    } catch (ClassCastException unused) {
    +      return 1;
    +    } catch (NullPointerException unused) {
    +      return 1;
    +    }
    +
    +    return 0;
    +  }
    +
       /**
        * Comparison implementation. When equals is true, only checks for equality, not
        * for order.
    @@ -1172,7 +1304,7 @@ protected int compare(Object o1, Object o2, Schema s, boolean equals) {
           return e1.hasNext() ? 1 : (e2.hasNext() ? -1 : 0);
         case MAP:
           if (equals)
    -        return o1.equals(o2) ? 0 : 1;
    +        return compareMaps((Map) o1, (Map) o2);
           throw new AvroRuntimeException("Can't compare maps!");
         case UNION:
           int i1 = resolveUnion(s, o1);
    @@ -1189,7 +1321,7 @@ protected int compare(Object o1, Object o2, Schema s, boolean equals) {
         }
       }
     
    -  private final Map defaultValueCache = Collections.synchronizedMap(new WeakHashMap<>());
    +  private final ConcurrentMap defaultValueCache = new ConcurrentReferenceHashMap<>(128, WEAK);
     
       /**
        * Gets the default value of the given field, if any.
    @@ -1209,28 +1341,20 @@ public Object getDefaultValue(Field field) {
         }
     
         // Check the cache
    -    Object defaultValue = defaultValueCache.get(field);
    -
         // If not cached, get the default Java value by encoding the default JSON
         // value and then decoding it:
    -    if (defaultValue == null)
    +    return defaultValueCache.computeIfAbsent(field, fieldToGetValueFor -> {
           try {
             ByteArrayOutputStream baos = new ByteArrayOutputStream();
             BinaryEncoder encoder = EncoderFactory.get().binaryEncoder(baos, null);
    -        Accessor.encode(encoder, field.schema(), json);
    +        Accessor.encode(encoder, fieldToGetValueFor.schema(), json);
             encoder.flush();
             BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(baos.toByteArray(), null);
    -        defaultValue = createDatumReader(field.schema()).read(null, decoder);
    -
    -        // this MAY result in two threads creating the same defaultValue
    -        // and calling put. The last thread will win. However,
    -        // that's not an issue.
    -        defaultValueCache.put(field, defaultValue);
    +        return createDatumReader(fieldToGetValueFor.schema()).read(null, decoder);
           } catch (IOException e) {
             throw new AvroRuntimeException(e);
           }
    -
    -    return defaultValue;
    +    });
       }
     
       private static final Schema STRINGS = Schema.create(Type.STRING);
    @@ -1403,8 +1527,24 @@ public Object newArray(Object old, int size, Schema schema) {
         } else if (old instanceof Collection) {
           ((Collection) old).clear();
           return old;
    -    } else
    +    } else {
    +      if (schema.getElementType().getType() == Type.INT) {
    +        return new PrimitivesArrays.IntArray(size, schema);
    +      }
    +      if (schema.getElementType().getType() == Type.BOOLEAN) {
    +        return new PrimitivesArrays.BooleanArray(size, schema);
    +      }
    +      if (schema.getElementType().getType() == Type.LONG) {
    +        return new PrimitivesArrays.LongArray(size, schema);
    +      }
    +      if (schema.getElementType().getType() == Type.FLOAT) {
    +        return new PrimitivesArrays.FloatArray(size, schema);
    +      }
    +      if (schema.getElementType().getType() == Type.DOUBLE) {
    +        return new PrimitivesArrays.DoubleArray(size, schema);
    +      }
           return new GenericData.Array(size, schema);
    +    }
       }
     
       /**
    diff --git a/lang/java/avro/src/main/java/org/apache/avro/generic/GenericDatumReader.java b/lang/java/avro/src/main/java/org/apache/avro/generic/GenericDatumReader.java
    index e32db6e98b7..3c5d1316cb3 100644
    --- a/lang/java/avro/src/main/java/org/apache/avro/generic/GenericDatumReader.java
    +++ b/lang/java/avro/src/main/java/org/apache/avro/generic/GenericDatumReader.java
    @@ -18,13 +18,13 @@
     package org.apache.avro.generic;
     
     import java.io.IOException;
    -import java.nio.ByteBuffer;
     import java.lang.reflect.Constructor;
    -import java.lang.reflect.InvocationTargetException;
    +import java.nio.ByteBuffer;
     import java.util.Collection;
     import java.util.HashMap;
    -import java.util.IdentityHashMap;
     import java.util.Map;
    +import java.util.concurrent.ConcurrentHashMap;
    +import java.util.function.Function;
     
     import org.apache.avro.AvroRuntimeException;
     import org.apache.avro.Conversion;
    @@ -211,7 +211,7 @@ protected Object readWithoutConversion(Object old, Schema expected, ResolvingDec
       }
     
       /**
    -   * Convert a underlying representation of a logical type (such as a ByteBuffer)
    +   * Convert an underlying representation of a logical type (such as a ByteBuffer)
        * to a higher level object (such as a BigDecimal).
        *
        * @throws IllegalArgumentException if a null schema or logicalType is passed in
    @@ -452,14 +452,14 @@ protected Object newMap(Object old, int size) {
        * representation. By default, this calls {@link #readString(Object,Decoder)}.
        */
       protected Object readString(Object old, Schema expected, Decoder in) throws IOException {
    -    Class stringClass = getStringClass(expected);
    +    Class stringClass = this.getReaderCache().getStringClass(expected);
         if (stringClass == String.class) {
           return in.readString();
         }
         if (stringClass == CharSequence.class) {
           return readString(old, in);
         }
    -    return newInstanceFromString(stringClass, in.readString());
    +    return this.newInstanceFromString(stringClass, in.readString());
       }
     
       /**
    @@ -498,34 +498,88 @@ protected Class findStringClass(Schema schema) {
         }
       }
     
    -  private Map stringClassCache = new IdentityHashMap<>();
    +  /**
    +   * This class is used to reproduce part of IdentityHashMap in ConcurrentHashMap
    +   * code.
    +   */
    +  private static final class IdentitySchemaKey {
    +    private final Schema schema;
    +
    +    private final int hashcode;
    +
    +    public IdentitySchemaKey(Schema schema) {
    +      this.schema = schema;
    +      this.hashcode = System.identityHashCode(schema);
    +    }
     
    -  private Class getStringClass(Schema s) {
    -    Class c = stringClassCache.get(s);
    -    if (c == null) {
    -      c = findStringClass(s);
    -      stringClassCache.put(s, c);
    +    @Override
    +    public int hashCode() {
    +      return this.hashcode;
    +    }
    +
    +    @Override
    +    public boolean equals(Object obj) {
    +      if (obj == null || !(obj instanceof GenericDatumReader.IdentitySchemaKey)) {
    +        return false;
    +      }
    +      IdentitySchemaKey key = (IdentitySchemaKey) obj;
    +      return this == key || this.schema == key.schema;
         }
    -    return c;
       }
     
    -  private final Map stringCtorCache = new HashMap<>();
    +  // VisibleForTesting
    +  static class ReaderCache {
    +    private final Map stringClassCache = new ConcurrentHashMap<>();
     
    -  @SuppressWarnings("unchecked")
    -  protected Object newInstanceFromString(Class c, String s) {
    -    try {
    -      Constructor ctor = stringCtorCache.get(c);
    -      if (ctor == null) {
    +    private final Map> stringCtorCache = new ConcurrentHashMap<>();
    +
    +    private final Function findStringClass;
    +
    +    public ReaderCache(Function findStringClass) {
    +      this.findStringClass = findStringClass;
    +    }
    +
    +    public Object newInstanceFromString(Class c, String s) {
    +      final Function ctor = stringCtorCache.computeIfAbsent(c, this::buildFunction);
    +      return ctor.apply(s);
    +    }
    +
    +    private Function buildFunction(Class c) {
    +      final Constructor ctor;
    +      try {
             ctor = c.getDeclaredConstructor(String.class);
    -        ctor.setAccessible(true);
    -        stringCtorCache.put(c, ctor);
    +      } catch (NoSuchMethodException e) {
    +        throw new AvroRuntimeException(e);
           }
    -      return ctor.newInstance(s);
    -    } catch (NoSuchMethodException | InvocationTargetException | IllegalAccessException | InstantiationException e) {
    -      throw new AvroRuntimeException(e);
    +      ctor.setAccessible(true);
    +
    +      return (String s) -> {
    +        try {
    +          return ctor.newInstance(s);
    +        } catch (ReflectiveOperationException e) {
    +          throw new AvroRuntimeException(e);
    +        }
    +      };
    +    }
    +
    +    public Class getStringClass(final Schema s) {
    +      final IdentitySchemaKey key = new IdentitySchemaKey(s);
    +      return this.stringClassCache.computeIfAbsent(key, (IdentitySchemaKey k) -> this.findStringClass.apply(k.schema));
         }
       }
     
    +  private final ReaderCache readerCache = new ReaderCache(this::findStringClass);
    +
    +  // VisibleForTesting
    +  ReaderCache getReaderCache() {
    +    return readerCache;
    +  }
    +
    +  @SuppressWarnings("unchecked")
    +  protected Object newInstanceFromString(Class c, String s) {
    +    return this.getReaderCache().newInstanceFromString(c, s);
    +  }
    +
       /**
        * Called to read byte arrays. Subclasses may override to use a different byte
        * array representation. By default, this calls
    diff --git a/lang/java/avro/src/main/java/org/apache/avro/generic/GenericDatumWriter.java b/lang/java/avro/src/main/java/org/apache/avro/generic/GenericDatumWriter.java
    index 77d01e98c2c..deeac0b1f2b 100644
    --- a/lang/java/avro/src/main/java/org/apache/avro/generic/GenericDatumWriter.java
    +++ b/lang/java/avro/src/main/java/org/apache/avro/generic/GenericDatumWriter.java
    @@ -32,9 +32,17 @@
     import org.apache.avro.LogicalType;
     import org.apache.avro.Schema;
     import org.apache.avro.Schema.Field;
    +import org.apache.avro.path.TracingAvroTypeException;
     import org.apache.avro.UnresolvedUnionException;
     import org.apache.avro.io.DatumWriter;
     import org.apache.avro.io.Encoder;
    +import org.apache.avro.path.ArrayPositionPredicate;
    +import org.apache.avro.path.LocationStep;
    +import org.apache.avro.path.MapKeyPredicate;
    +import org.apache.avro.path.TracingClassCastException;
    +import org.apache.avro.path.TracingNullPointException;
    +import org.apache.avro.path.UnionTypePredicate;
    +import org.apache.avro.util.SchemaUtil;
     
     /** {@link DatumWriter} for generic Java objects. */
     public class GenericDatumWriter implements DatumWriter {
    @@ -70,7 +78,11 @@ public void setSchema(Schema root) {
     
       public void write(D datum, Encoder out) throws IOException {
         Objects.requireNonNull(out, "Encoder cannot be null");
    -    write(root, datum, out);
    +    try {
    +      write(root, datum, out);
    +    } catch (TracingNullPointException | TracingClassCastException | TracingAvroTypeException e) {
    +      throw e.summarize(root);
    +    }
       }
     
       /** Called to write data. */
    @@ -86,7 +98,7 @@ protected void write(Schema schema, Object datum, Encoder out) throws IOExceptio
     
       /**
        * Convert a high level representation of a logical type (such as a BigDecimal)
    -   * to the its underlying representation object (such as a ByteBuffer).
    +   * to its underlying representation object (such as a ByteBuffer).
        *
        * @throws IllegalArgumentException if a null schema or logicalType is passed in
        *                                  while datum and conversion are not null.
    @@ -125,8 +137,10 @@ protected  Object convert(Schema schema, LogicalType logicalType, Conversion<
     
       /** Called to write data. */
       protected void writeWithoutConversion(Schema schema, Object datum, Encoder out) throws IOException {
    +    int unionIndex = -1;
    +    Schema.Type schemaType = schema.getType();
         try {
    -      switch (schema.getType()) {
    +      switch (schemaType) {
           case RECORD:
             writeRecord(schema, datum, out);
             break;
    @@ -140,9 +154,9 @@ protected void writeWithoutConversion(Schema schema, Object datum, Encoder out)
             writeMap(schema, datum, out);
             break;
           case UNION:
    -        int index = resolveUnion(schema, datum);
    -        out.writeIndex(index);
    -        write(schema.getTypes().get(index), datum, out);
    +        unionIndex = resolveUnion(schema, datum);
    +        out.writeIndex(unionIndex);
    +        write(schema.getTypes().get(unionIndex), datum, out);
             break;
           case FIXED:
             writeFixed(schema, datum, out);
    @@ -174,8 +188,18 @@ protected void writeWithoutConversion(Schema schema, Object datum, Encoder out)
           default:
             error(schema, datum);
           }
    +    } catch (TracingNullPointException | TracingClassCastException | TracingAvroTypeException e) {
    +      if (schemaType == Schema.Type.UNION) {
    +        e.tracePath(new UnionTypePredicate(schema.getTypes().get(unionIndex).getName()));
    +      }
    +      // writeArray() and writeMap() have their own handling
    +      throw e;
         } catch (NullPointerException e) {
    -      throw npe(e, " of " + schema.getFullName());
    +      throw new TracingNullPointException(e, schema, false);
    +    } catch (ClassCastException e) {
    +      throw new TracingClassCastException(e, datum, schema, false);
    +    } catch (AvroTypeException e) {
    +      throw new TracingAvroTypeException(e);
         }
       }
     
    @@ -223,6 +247,9 @@ protected void writeField(Object datum, Field f, Encoder out, Object state) thro
           final UnresolvedUnionException unresolvedUnionException = new UnresolvedUnionException(f.schema(), f, value);
           unresolvedUnionException.addSuppressed(uue);
           throw unresolvedUnionException;
    +    } catch (TracingNullPointException | TracingClassCastException | TracingAvroTypeException e) {
    +      e.tracePath(new LocationStep(".", f.name()));
    +      throw e;
         } catch (NullPointerException e) {
           throw npe(e, " in field " + f.name());
         } catch (ClassCastException cce) {
    @@ -237,8 +264,11 @@ protected void writeField(Object datum, Field f, Encoder out, Object state) thro
        * representations.
        */
       protected void writeEnum(Schema schema, Object datum, Encoder out) throws IOException {
    -    if (!data.isEnum(datum))
    -      throw new AvroTypeException("Not an enum: " + datum + " for schema: " + schema);
    +    if (!data.isEnum(datum)) {
    +      AvroTypeException cause = new AvroTypeException(
    +          "value " + SchemaUtil.describe(datum) + " is not a " + SchemaUtil.describe(schema));
    +      throw new TracingAvroTypeException(cause);
    +    }
         out.writeEnum(schema.getEnumOrdinal(datum.toString()));
       }
     
    @@ -254,7 +284,12 @@ protected void writeArray(Schema schema, Object datum, Encoder out) throws IOExc
         out.setItemCount(size);
         for (Iterator it = getArrayElements(datum); it.hasNext();) {
           out.startItem();
    -      write(element, it.next(), out);
    +      try {
    +        write(element, it.next(), out);
    +      } catch (TracingNullPointException | TracingClassCastException | TracingAvroTypeException e) {
    +        e.tracePath(new ArrayPositionPredicate(actualSize));
    +        throw e;
    +      }
           actualSize++;
         }
         out.writeArrayEnd();
    @@ -276,18 +311,16 @@ protected int resolveUnion(Schema union, Object datum) {
        * Called by the default implementation of {@link #writeArray} to get the size
        * of an array. The default implementation is for {@link Collection}.
        */
    -  @SuppressWarnings("unchecked")
       protected long getArraySize(Object array) {
    -    return ((Collection) array).size();
    +    return ((Collection) array).size();
       }
     
       /**
        * Called by the default implementation of {@link #writeArray} to enumerate
        * array elements. The default implementation is for {@link Collection}.
        */
    -  @SuppressWarnings("unchecked")
    -  protected Iterator getArrayElements(Object array) {
    -    return ((Collection) array).iterator();
    +  protected Iterator getArrayElements(Object array) {
    +    return ((Collection) array).iterator();
       }
     
       /**
    @@ -301,8 +334,21 @@ protected void writeMap(Schema schema, Object datum, Encoder out) throws IOExcep
         out.setItemCount(size);
         for (Map.Entry entry : getMapEntries(datum)) {
           out.startItem();
    -      writeString(entry.getKey().toString(), out);
    -      write(value, entry.getValue(), out);
    +      String key;
    +      try {
    +        key = entry.getKey().toString();
    +      } catch (NullPointerException npe) {
    +        TracingNullPointException tnpe = new TracingNullPointException(npe, Schema.create(Schema.Type.STRING), false);
    +        tnpe.tracePath(new MapKeyPredicate(null));
    +        throw tnpe;
    +      }
    +      writeString(key, out);
    +      try {
    +        write(value, entry.getValue(), out);
    +      } catch (TracingNullPointException | TracingClassCastException | TracingAvroTypeException e) {
    +        e.tracePath(new MapKeyPredicate(key));
    +        throw e;
    +      }
           actualSize++;
         }
         out.writeMapEnd();
    @@ -363,7 +409,7 @@ protected void writeFixed(Schema schema, Object datum, Encoder out) throws IOExc
       }
     
       private void error(Schema schema, Object datum) {
    -    throw new AvroTypeException("Not a " + schema + ": " + datum);
    +    throw new AvroTypeException("value " + SchemaUtil.describe(datum) + " is not a " + SchemaUtil.describe(schema));
       }
     
     }
    diff --git a/lang/java/avro/src/main/java/org/apache/avro/generic/PrimitivesArrays.java b/lang/java/avro/src/main/java/org/apache/avro/generic/PrimitivesArrays.java
    new file mode 100644
    index 00000000000..d34ce0f5bcb
    --- /dev/null
    +++ b/lang/java/avro/src/main/java/org/apache/avro/generic/PrimitivesArrays.java
    @@ -0,0 +1,609 @@
    +/*
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements.  See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership.  The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License.  You may obtain a copy of the License at
    + *
    + *     https://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +package org.apache.avro.generic;
    +
    +import org.apache.avro.AvroRuntimeException;
    +import org.apache.avro.Schema;
    +
    +import java.util.Arrays;
    +import java.util.Collection;
    +
    +public class PrimitivesArrays {
    +
    +  public static class IntArray extends GenericData.AbstractArray {
    +    private static final int[] EMPTY = new int[0];
    +
    +    private int[] elements = EMPTY;
    +
    +    public IntArray(int capacity, Schema schema) {
    +      super(schema);
    +      if (!Schema.Type.INT.equals(schema.getElementType().getType()))
    +        throw new AvroRuntimeException("Not a int array schema: " + schema);
    +      if (capacity != 0)
    +        elements = new int[capacity];
    +    }
    +
    +    public IntArray(Schema schema, Collection c) {
    +      super(schema);
    +      if (c != null) {
    +        elements = new int[c.size()];
    +        addAll(c);
    +      }
    +    }
    +
    +    @Override
    +    public void clear() {
    +      size = 0;
    +    }
    +
    +    @Override
    +    public Integer get(int i) {
    +      return this.getInt(i);
    +    }
    +
    +    /**
    +     * Direct primitive int access.
    +     * 
    +     * @param i : index.
    +     * @return value at index.
    +     */
    +    public int getInt(int i) {
    +      if (i >= size)
    +        throw new IndexOutOfBoundsException("Index " + i + " out of bounds.");
    +      return elements[i];
    +    }
    +
    +    @Override
    +    public void add(int location, Integer o) {
    +      if (o == null) {
    +        return;
    +      }
    +      this.add(location, o.intValue());
    +    }
    +
    +    public void add(int location, int o) {
    +      if (location > size || location < 0) {
    +        throw new IndexOutOfBoundsException("Index " + location + " out of bounds.");
    +      }
    +      if (size == elements.length) {
    +        // Increase size by 1.5x + 1
    +        final int newSize = size + (size >> 1) + 1;
    +        elements = Arrays.copyOf(elements, newSize);
    +      }
    +      System.arraycopy(elements, location, elements, location + 1, size - location);
    +      elements[location] = o;
    +      size++;
    +    }
    +
    +    @Override
    +    public Integer set(int i, Integer o) {
    +      if (o == null) {
    +        return null;
    +      }
    +      return this.set(i, o.intValue());
    +    }
    +
    +    public int set(int i, int o) {
    +      if (i >= size)
    +        throw new IndexOutOfBoundsException("Index " + i + " out of bounds.");
    +      int response = elements[i];
    +      elements[i] = o;
    +      return response;
    +    }
    +
    +    @Override
    +    public Integer remove(int i) {
    +      if (i >= size)
    +        throw new IndexOutOfBoundsException("Index " + i + " out of bounds.");
    +      int result = elements[i];
    +      --size;
    +      System.arraycopy(elements, i + 1, elements, i, (size - i));
    +      return result;
    +    }
    +
    +    @Override
    +    public Integer peek() {
    +      return (size < elements.length) ? elements[size] : null;
    +    }
    +
    +    @Override
    +    protected void swap(final int index1, final int index2) {
    +      int tmp = elements[index1];
    +      elements[index1] = elements[index2];
    +      elements[index2] = tmp;
    +    }
    +  }
    +
    +  public static class LongArray extends GenericData.AbstractArray {
    +    private static final long[] EMPTY = new long[0];
    +
    +    private long[] elements = EMPTY;
    +
    +    public LongArray(int capacity, Schema schema) {
    +      super(schema);
    +      if (!Schema.Type.LONG.equals(schema.getElementType().getType()))
    +        throw new AvroRuntimeException("Not a long array schema: " + schema);
    +      if (capacity != 0)
    +        elements = new long[capacity];
    +    }
    +
    +    public LongArray(Schema schema, Collection c) {
    +      super(schema);
    +      if (c != null) {
    +        elements = new long[c.size()];
    +        addAll(c);
    +      }
    +    }
    +
    +    @Override
    +    public void clear() {
    +      size = 0;
    +    }
    +
    +    @Override
    +    public Long get(int i) {
    +      return getLong(i);
    +    }
    +
    +    /**
    +     * Direct primitive int access.
    +     * 
    +     * @param i : index.
    +     * @return value at index.
    +     */
    +    public long getLong(int i) {
    +      if (i >= size)
    +        throw new IndexOutOfBoundsException("Index " + i + " out of bounds.");
    +      return elements[i];
    +    }
    +
    +    @Override
    +    public void add(int location, Long o) {
    +      if (o == null) {
    +        return;
    +      }
    +      this.add(location, o.longValue());
    +    }
    +
    +    public void add(int location, long o) {
    +      if (location > size || location < 0) {
    +        throw new IndexOutOfBoundsException("Index " + location + " out of bounds.");
    +      }
    +      if (size == elements.length) {
    +        // Increase size by 1.5x + 1
    +        final int newSize = size + (size >> 1) + 1;
    +        elements = Arrays.copyOf(elements, newSize);
    +      }
    +      System.arraycopy(elements, location, elements, location + 1, size - location);
    +      elements[location] = o;
    +      size++;
    +    }
    +
    +    @Override
    +    public Long set(int i, Long o) {
    +      if (o == null) {
    +        return null;
    +      }
    +      return this.set(i, o.longValue());
    +    }
    +
    +    public long set(int i, long o) {
    +      if (i >= size)
    +        throw new IndexOutOfBoundsException("Index " + i + " out of bounds.");
    +      long response = elements[i];
    +      elements[i] = o;
    +      return response;
    +    }
    +
    +    @Override
    +    public Long remove(int i) {
    +      if (i >= size)
    +        throw new IndexOutOfBoundsException("Index " + i + " out of bounds.");
    +      long result = elements[i];
    +      --size;
    +      System.arraycopy(elements, i + 1, elements, i, (size - i));
    +      return result;
    +    }
    +
    +    @Override
    +    public Long peek() {
    +      return (size < elements.length) ? elements[size] : null;
    +    }
    +
    +    @Override
    +    protected void swap(final int index1, final int index2) {
    +      long tmp = elements[index1];
    +      elements[index1] = elements[index2];
    +      elements[index2] = tmp;
    +    }
    +  }
    +
    +  public static class BooleanArray extends GenericData.AbstractArray {
    +    private static final byte[] EMPTY = new byte[0];
    +
    +    private byte[] elements = EMPTY;
    +
    +    public BooleanArray(int capacity, Schema schema) {
    +      super(schema);
    +      if (!Schema.Type.BOOLEAN.equals(schema.getElementType().getType()))
    +        throw new AvroRuntimeException("Not a boolean array schema: " + schema);
    +      if (capacity != 0)
    +        elements = new byte[1 + (capacity / Byte.SIZE)];
    +    }
    +
    +    public BooleanArray(Schema schema, Collection c) {
    +      super(schema);
    +
    +      if (c != null) {
    +        elements = new byte[1 + (c.size() / 8)];
    +        if (c instanceof BooleanArray) {
    +          BooleanArray other = (BooleanArray) c;
    +          this.size = other.size;
    +          System.arraycopy(other.elements, 0, this.elements, 0, other.elements.length);
    +        } else {
    +          addAll(c);
    +        }
    +      }
    +    }
    +
    +    @Override
    +    public void clear() {
    +      size = 0;
    +    }
    +
    +    @Override
    +    public Boolean get(int i) {
    +      return this.getBoolean(i);
    +    }
    +
    +    /**
    +     * Direct primitive int access.
    +     * 
    +     * @param i : index.
    +     * @return value at index.
    +     */
    +    public boolean getBoolean(int i) {
    +      if (i >= size)
    +        throw new IndexOutOfBoundsException("Index " + i + " out of bounds.");
    +      return (elements[i / 8] & (1 << (i % 8))) > 0;
    +    }
    +
    +    @Override
    +    public boolean add(final Boolean o) {
    +      if (o == null) {
    +        return false;
    +      }
    +      return this.add(o.booleanValue());
    +    }
    +
    +    public boolean add(final boolean o) {
    +      if (this.size == elements.length * 8) {
    +        final int newLength = elements.length + (elements.length >> 1) + 1;
    +        elements = Arrays.copyOf(elements, newLength);
    +      }
    +      this.size++;
    +      this.set(this.size - 1, o);
    +      return true;
    +    }
    +
    +    @Override
    +    public void add(int location, Boolean o) {
    +      if (o == null) {
    +        return;
    +      }
    +      this.add(location, o.booleanValue());
    +    }
    +
    +    public void add(int location, boolean o) {
    +      if (location > size || location < 0) {
    +        throw new IndexOutOfBoundsException("Index " + location + " out of bounds.");
    +      }
    +      if (size == elements.length * 8) {
    +        // Increase size by 1.5x + 1
    +        final int newLength = elements.length + (elements.length >> 1) + 1;
    +        elements = Arrays.copyOf(elements, newLength);
    +      }
    +      size++;
    +      for (int index = this.size / 8; index > (location / 8); index--) {
    +        elements[index] <<= 1;
    +        if (index > 0 && (elements[index - 1] & (1 << Byte.SIZE)) > 0) {
    +          elements[index] |= 1;
    +        }
    +      }
    +      byte pos = (byte) (1 << (location % Byte.SIZE));
    +      byte highbits = (byte) ~(pos + (pos - 1));
    +      byte lowbits = (byte) (pos - 1);
    +      byte currentHigh = (byte) ((elements[location / 8] & highbits) << 1);
    +      byte currentLow = (byte) (elements[location / 8] & lowbits);
    +      if (o) {
    +        elements[location / 8] = (byte) (currentHigh | currentLow | pos);
    +      } else {
    +        elements[location / 8] = (byte) (currentHigh | currentLow);
    +      }
    +
    +    }
    +
    +    @Override
    +    public Boolean set(int i, Boolean o) {
    +      if (o == null) {
    +        return null;
    +      }
    +      return this.set(i, o.booleanValue());
    +    }
    +
    +    public boolean set(int i, boolean o) {
    +      if (i >= size)
    +        throw new IndexOutOfBoundsException("Index " + i + " out of bounds.");
    +      boolean response = (elements[i / 8] & (1 << (i % 8))) > 0;
    +      if (o) {
    +        elements[i / 8] |= 1 << (i % 8);
    +      } else {
    +        elements[i / 8] &= 0xFF - (1 << (i % 8));
    +      }
    +      return response;
    +    }
    +
    +    @Override
    +    public Boolean remove(int i) {
    +      if (i >= size)
    +        throw new IndexOutOfBoundsException("Index " + i + " out of bounds.");
    +      boolean result = (elements[(i / 8)] & (1 << (i % 8))) > 0;
    +      --size;
    +
    +      byte memo = 0;
    +      if ((i / 8) + 1 < elements.length) {
    +        memo = (byte) ((1 & (elements[(i / 8) + 1])) << 7);
    +      }
    +      for (int index = (i / 8) + 1; index <= (size / 8); index++) {
    +        elements[index] = (byte) ((elements[index] & 0xff) >>> 1);
    +        if (index + 1 < elements.length && (elements[index + 1] & 1) == 1) {
    +          elements[index] |= 1 << (Byte.SIZE - 1);
    +        }
    +      }
    +      // 87654321 => 8764321
    +      byte start = (byte) ((1 << ((i + 1) % 8)) - 1);
    +      byte end = (byte) ~start;
    +      elements[i / 8] = (byte) ((((start & 0xff) >>> 1) & elements[i / 8]) // 1234
    +          | (end & (elements[i / 8] >> 1)) // 876
    +          | memo);
    +
    +      return result;
    +    }
    +
    +    @Override
    +    public Boolean peek() {
    +      return (size < elements.length * Byte.SIZE) ? (elements[(size / 8)] & (1 << (size % 8))) > 0 : null;
    +    }
    +
    +    @Override
    +    protected void swap(final int index1, final int index2) {
    +      boolean tmp = this.get(index1);
    +      this.set(index1, this.get(index2));
    +      this.set(index2, tmp);
    +    }
    +  }
    +
    +  public static class FloatArray extends GenericData.AbstractArray {
    +    private static final float[] EMPTY = new float[0];
    +
    +    private float[] elements = EMPTY;
    +
    +    public FloatArray(int capacity, Schema schema) {
    +      super(schema);
    +      if (!Schema.Type.FLOAT.equals(schema.getElementType().getType()))
    +        throw new AvroRuntimeException("Not a float array schema: " + schema);
    +      if (capacity != 0)
    +        elements = new float[capacity];
    +    }
    +
    +    public FloatArray(Schema schema, Collection c) {
    +      super(schema);
    +      if (c != null) {
    +        elements = new float[c.size()];
    +        addAll(c);
    +      }
    +    }
    +
    +    @Override
    +    public void clear() {
    +      size = 0;
    +    }
    +
    +    @Override
    +    public Float get(int i) {
    +      return this.getFloat(i);
    +    }
    +
    +    /**
    +     * Direct primitive int access.
    +     * 
    +     * @param i : index.
    +     * @return value at index.
    +     */
    +    public float getFloat(int i) {
    +      if (i >= size)
    +        throw new IndexOutOfBoundsException("Index " + i + " out of bounds.");
    +      return elements[i];
    +    }
    +
    +    @Override
    +    public void add(int location, Float o) {
    +      if (o == null) {
    +        return;
    +      }
    +      this.add(location, o.floatValue());
    +    }
    +
    +    public void add(int location, float o) {
    +      if (location > size || location < 0) {
    +        throw new IndexOutOfBoundsException("Index " + location + " out of bounds.");
    +      }
    +      if (size == elements.length) {
    +        // Increase size by 1.5x + 1
    +        final int newSize = size + (size >> 1) + 1;
    +        elements = Arrays.copyOf(elements, newSize);
    +      }
    +      System.arraycopy(elements, location, elements, location + 1, size - location);
    +      elements[location] = o;
    +      size++;
    +    }
    +
    +    @Override
    +    public Float set(int i, Float o) {
    +      if (o == null) {
    +        return null;
    +      }
    +      return this.set(i, o.floatValue());
    +    }
    +
    +    public float set(int i, float o) {
    +      if (i >= size)
    +        throw new IndexOutOfBoundsException("Index " + i + " out of bounds.");
    +      float response = elements[i];
    +      elements[i] = o;
    +      return response;
    +    }
    +
    +    @Override
    +    public Float remove(int i) {
    +      if (i >= size)
    +        throw new IndexOutOfBoundsException("Index " + i + " out of bounds.");
    +      float result = elements[i];
    +      --size;
    +      System.arraycopy(elements, i + 1, elements, i, (size - i));
    +      return result;
    +    }
    +
    +    @Override
    +    public Float peek() {
    +      return (size < elements.length) ? elements[size] : null;
    +    }
    +
    +    @Override
    +    protected void swap(final int index1, final int index2) {
    +      float tmp = this.get(index1);
    +      this.set(index1, this.get(index2));
    +      this.set(index2, tmp);
    +    }
    +  }
    +
    +  public static class DoubleArray extends GenericData.AbstractArray {
    +    private static final double[] EMPTY = new double[0];
    +
    +    private double[] elements = EMPTY;
    +
    +    public DoubleArray(int capacity, Schema schema) {
    +      super(schema);
    +      if (!Schema.Type.DOUBLE.equals(schema.getElementType().getType()))
    +        throw new AvroRuntimeException("Not a double array schema: " + schema);
    +      if (capacity != 0)
    +        elements = new double[capacity];
    +    }
    +
    +    public DoubleArray(Schema schema, Collection c) {
    +      super(schema);
    +      if (c != null) {
    +        elements = new double[c.size()];
    +        addAll(c);
    +      }
    +    }
    +
    +    @Override
    +    public void clear() {
    +      size = 0;
    +    }
    +
    +    @Override
    +    public Double get(int i) {
    +      return this.getDouble(i);
    +    }
    +
    +    /**
    +     * Direct primitive int access.
    +     * 
    +     * @param i : index.
    +     * @return value at index.
    +     */
    +    public double getDouble(int i) {
    +      if (i >= size)
    +        throw new IndexOutOfBoundsException("Index " + i + " out of bounds.");
    +      return elements[i];
    +    }
    +
    +    @Override
    +    public void add(int location, Double o) {
    +      if (o == null) {
    +        return;
    +      }
    +      this.add(location, o.floatValue());
    +    }
    +
    +    public void add(int location, double o) {
    +      if (location > size || location < 0) {
    +        throw new IndexOutOfBoundsException("Index " + location + " out of bounds.");
    +      }
    +      if (size == elements.length) {
    +        // Increase size by 1.5x + 1
    +        final int newSize = size + (size >> 1) + 1;
    +        elements = Arrays.copyOf(elements, newSize);
    +      }
    +      System.arraycopy(elements, location, elements, location + 1, size - location);
    +      elements[location] = o;
    +      size++;
    +    }
    +
    +    @Override
    +    public Double set(int i, Double o) {
    +      if (o == null) {
    +        return null;
    +      }
    +      return this.set(i, o.floatValue());
    +    }
    +
    +    public double set(int i, double o) {
    +      if (i >= size)
    +        throw new IndexOutOfBoundsException("Index " + i + " out of bounds.");
    +      double response = elements[i];
    +      elements[i] = o;
    +      return response;
    +    }
    +
    +    @Override
    +    public Double remove(int i) {
    +      if (i >= size)
    +        throw new IndexOutOfBoundsException("Index " + i + " out of bounds.");
    +      double result = elements[i];
    +      --size;
    +      System.arraycopy(elements, i + 1, elements, i, (size - i));
    +      return result;
    +    }
    +
    +    @Override
    +    public Double peek() {
    +      return (size < elements.length) ? elements[size] : null;
    +    }
    +
    +    @Override
    +    protected void swap(final int index1, final int index2) {
    +      double tmp = this.get(index1);
    +      this.set(index1, this.get(index2));
    +      this.set(index2, tmp);
    +    }
    +  }
    +
    +}
    diff --git a/lang/java/avro/src/main/java/org/apache/avro/io/BinaryDecoder.java b/lang/java/avro/src/main/java/org/apache/avro/io/BinaryDecoder.java
    index 44d2b764009..7217be3addd 100644
    --- a/lang/java/avro/src/main/java/org/apache/avro/io/BinaryDecoder.java
    +++ b/lang/java/avro/src/main/java/org/apache/avro/io/BinaryDecoder.java
    @@ -17,18 +17,17 @@
      */
     package org.apache.avro.io;
     
    +import org.apache.avro.AvroRuntimeException;
    +import org.apache.avro.InvalidNumberEncodingException;
    +import org.apache.avro.SystemLimitException;
    +import org.apache.avro.util.Utf8;
    +
     import java.io.EOFException;
     import java.io.IOException;
     import java.io.InputStream;
    -import java.nio.Buffer;
     import java.nio.ByteBuffer;
     import java.util.Arrays;
     
    -import org.apache.avro.AvroRuntimeException;
    -import org.apache.avro.InvalidNumberEncodingException;
    -import org.apache.avro.util.Utf8;
    -import org.slf4j.LoggerFactory;
    -
     /**
      * An {@link Decoder} for binary-format data.
      * 

    @@ -39,27 +38,20 @@ * can be accessed by inputStream().remaining(), if the BinaryDecoder is not * 'direct'. *

    - * To prevent this class from making large allocations when handling potentially - * pathological input data, set Java properties - * org.apache.avro.limits.string.maxLength and - * org.apache.avro.limits.bytes.maxLength before instantiating this - * class to limit the maximum sizes of string and bytes types - * handled. The default is to permit sizes up to Java's maximum array length. * * @see Encoder + * @see SystemLimitException */ public class BinaryDecoder extends Decoder { /** - * The maximum size of array to allocate. Some VMs reserve some header words in - * an array. Attempts to allocate larger arrays may result in OutOfMemoryError: - * Requested array size exceeds VM limit + * When reading a collection (MAP or ARRAY), this keeps track of the number of + * elements to ensure that the + * {@link SystemLimitException#checkMaxCollectionLength} constraint is + * respected. */ - static final long MAX_ARRAY_SIZE = (long) Integer.MAX_VALUE - 8L; - - private static final String MAX_BYTES_LENGTH_PROPERTY = "org.apache.avro.limits.bytes.maxLength"; - private final int maxBytesLength; + private long collectionCount = 0L; private ByteSource source = null; // we keep the buffer and its state variables in this class and not in a @@ -99,17 +91,6 @@ void clearBuf() { /** protected constructor for child classes */ protected BinaryDecoder() { super(); - String o = System.getProperty(MAX_BYTES_LENGTH_PROPERTY); - int i = Integer.MAX_VALUE; - if (o != null) { - try { - i = Integer.parseUnsignedInt(o); - } catch (NumberFormatException nfe) { - LoggerFactory.getLogger(BinaryDecoder.class) - .warn("Could not parse property " + MAX_BYTES_LENGTH_PROPERTY + ": " + o, nfe); - } - } - maxBytesLength = i; } BinaryDecoder(InputStream in, int bufferSize) { @@ -300,17 +281,11 @@ public double readDouble() throws IOException { @Override public Utf8 readString(Utf8 old) throws IOException { - long length = readLong(); - if (length > MAX_ARRAY_SIZE) { - throw new UnsupportedOperationException("Cannot read strings longer than " + MAX_ARRAY_SIZE + " bytes"); - } - if (length < 0L) { - throw new AvroRuntimeException("Malformed data. Length is negative: " + length); - } + int length = SystemLimitException.checkMaxStringLength(readLong()); Utf8 result = (old != null ? old : new Utf8()); - result.setByteLength((int) length); - if (0L != length) { - doReadBytes(result.getBytes(), 0, (int) length); + result.setByteLength(length); + if (0 != length) { + doReadBytes(result.getBytes(), 0, length); } return result; } @@ -329,25 +304,16 @@ public void skipString() throws IOException { @Override public ByteBuffer readBytes(ByteBuffer old) throws IOException { - int length = readInt(); - if (length > MAX_ARRAY_SIZE) { - throw new UnsupportedOperationException("Cannot read arrays longer than " + MAX_ARRAY_SIZE + " bytes"); - } - if (length > maxBytesLength) { - throw new AvroRuntimeException("Bytes length " + length + " exceeds maximum allowed"); - } - if (length < 0L) { - throw new AvroRuntimeException("Malformed data. Length is negative: " + length); - } + int length = SystemLimitException.checkMaxBytesLength(readLong()); final ByteBuffer result; if (old != null && length <= old.capacity()) { result = old; - ((Buffer) result).clear(); + result.clear(); } else { result = ByteBuffer.allocate(length); } doReadBytes(result.array(), result.position(), length); - ((Buffer) result).limit(length); + result.limit(length); return result; } @@ -372,6 +338,9 @@ public int readEnum() throws IOException { } protected void doSkipBytes(long length) throws IOException { + if (length <= 0) { + return; + } int remaining = limit - pos; if (length <= remaining) { pos = (int) (pos + length); @@ -443,7 +412,6 @@ protected long doReadItemCount() throws IOException { * @return Zero if there are no more items to skip and end of array/map is * reached. Positive number if some items are found that cannot be * skipped and the client needs to skip them individually. - * * @throws IOException If the first byte cannot be read for any reason other * than the end of the file, if the input stream has been * closed, or if some other I/O error occurs. @@ -460,12 +428,15 @@ private long doSkipItems() throws IOException { @Override public long readArrayStart() throws IOException { - return doReadItemCount(); + collectionCount = SystemLimitException.checkMaxCollectionLength(0L, doReadItemCount()); + return collectionCount; } @Override public long arrayNext() throws IOException { - return doReadItemCount(); + long length = doReadItemCount(); + collectionCount = SystemLimitException.checkMaxCollectionLength(collectionCount, length); + return length; } @Override @@ -475,12 +446,15 @@ public long skipArray() throws IOException { @Override public long readMapStart() throws IOException { - return doReadItemCount(); + collectionCount = SystemLimitException.checkMaxCollectionLength(0L, doReadItemCount()); + return collectionCount; } @Override public long mapNext() throws IOException { - return doReadItemCount(); + long length = doReadItemCount(); + collectionCount = SystemLimitException.checkMaxCollectionLength(collectionCount, length); + return length; } @Override @@ -556,7 +530,7 @@ public InputStream inputStream() { /** * BufferAccessor is used by BinaryEncoder to enable {@link ByteSource}s and the - * InputStream returned by {@link BinaryDecoder.inputStream} to access the + * InputStream returned by {@link BinaryDecoder#inputStream} to access the * BinaryEncoder's buffer. When a BufferAccessor is created, it is attached to a * BinaryDecoder and its buffer. Its accessors directly reference the * BinaryDecoder's buffer. When detach() is called, it stores references to the @@ -649,15 +623,15 @@ void setBuf(byte[] buf, int offset, int length) { * stronger guarantees than InputStream, freeing client code to be simplified * and faster. *

    - * {@link skipSourceBytes} and {@link readRaw} are guaranteed to have read or + * {@link #skipSourceBytes} and {@link #readRaw} are guaranteed to have read or * skipped as many bytes as possible, or throw EOFException. - * {@link trySkipBytes} and {@link tryRead} are guaranteed to attempt to read or - * skip as many bytes as possible and never throw EOFException, while returning - * the exact number of bytes skipped or read. {@link isEof} returns true if all - * the source bytes have been read or skipped. This condition can also be - * detected by a client if an EOFException is thrown from - * {@link skipSourceBytes} or {@link readRaw}, or if {@link trySkipBytes} or - * {@link tryRead} return 0; + * {@link #trySkipBytes} and {@link #tryReadRaw} are guaranteed to attempt to + * read or skip as many bytes as possible and never throw EOFException, while + * returning the exact number of bytes skipped or read. {@link #isEof} returns + * true if all the source bytes have been read or skipped. This condition can + * also be detected by a client if an EOFException is thrown from + * {@link #skipSourceBytes} or {@link #readRaw}, or if {@link #trySkipBytes} or + * {@link #tryReadRaw} return 0; *

    * A ByteSource also implements the InputStream contract for use by APIs that * require it. The InputStream interface must take into account buffering in any @@ -932,7 +906,6 @@ public void close() throws IOException { /** * This byte source is special. It will avoid copying data by using the source's * byte[] as a buffer in the decoder. - * */ private static class ByteArrayByteSource extends ByteSource { private static final int MIN_SIZE = 16; diff --git a/lang/java/avro/src/main/java/org/apache/avro/io/BinaryEncoder.java b/lang/java/avro/src/main/java/org/apache/avro/io/BinaryEncoder.java index 22d0326165c..aacb83b88f4 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/io/BinaryEncoder.java +++ b/lang/java/avro/src/main/java/org/apache/avro/io/BinaryEncoder.java @@ -48,7 +48,7 @@ public void writeString(Utf8 utf8) throws IOException { @Override public void writeString(String string) throws IOException { - if (0 == string.length()) { + if (string.isEmpty()) { writeZero(); return; } diff --git a/lang/java/avro/src/main/java/org/apache/avro/io/BlockingDirectBinaryEncoder.java b/lang/java/avro/src/main/java/org/apache/avro/io/BlockingDirectBinaryEncoder.java new file mode 100644 index 00000000000..2ef2375e640 --- /dev/null +++ b/lang/java/avro/src/main/java/org/apache/avro/io/BlockingDirectBinaryEncoder.java @@ -0,0 +1,141 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro.io; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.nio.ByteBuffer; +import java.util.ArrayDeque; +import java.util.ArrayList; + +/** + * An {@link Encoder} for Avro's binary encoding that does not buffer output. + *

    + * This encoder does not buffer writes in contrast to + * {@link BufferedBinaryEncoder}. However, it is lighter-weight and useful when: + * The buffering in BufferedBinaryEncoder is not desired because you buffer a + * different level or the Encoder is very short-lived. + *

    + * The BlockingDirectBinaryEncoder will encode the number of bytes of the Map + * and Array blocks. This will allow to postpone the decoding, or skip over it + * at all. + *

    + * To construct, use + * {@link EncoderFactory#blockingDirectBinaryEncoder(OutputStream, BinaryEncoder)} + *

    + * {@link BlockingDirectBinaryEncoder} instances returned by this method are not + * thread-safe + * + * @see BinaryEncoder + * @see EncoderFactory + * @see Encoder + * @see Decoder + */ +public class BlockingDirectBinaryEncoder extends DirectBinaryEncoder { + private final ArrayList buffers; + + private final ArrayDeque stashedBuffers; + + private int depth = 0; + + private final ArrayDeque blockItemCounts; + + /** + * Create a writer that sends its output to the underlying stream + * out. + * + * @param out The Outputstream to write to + */ + public BlockingDirectBinaryEncoder(OutputStream out) { + super(out); + this.buffers = new ArrayList<>(); + this.stashedBuffers = new ArrayDeque<>(); + this.blockItemCounts = new ArrayDeque<>(); + } + + private void startBlock() { + stashedBuffers.push(out); + if (this.buffers.size() <= depth) { + this.buffers.add(new BufferOutputStream()); + } + BufferOutputStream buf = buffers.get(depth); + buf.reset(); + this.depth += 1; + this.out = buf; + } + + private void endBlock() { + if (depth == 0) { + throw new RuntimeException("Called endBlock, while not buffering a block"); + } + this.depth -= 1; + out = stashedBuffers.pop(); + BufferOutputStream buffer = this.buffers.get(depth); + long blockItemCount = blockItemCounts.pop(); + if (blockItemCount > 0) { + try { + // Make it negative, so the reader knows that the number of bytes is coming + writeLong(-blockItemCount); + writeLong(buffer.size()); + writeFixed(buffer.toBufferWithoutCopy()); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + } + + @Override + public void setItemCount(long itemCount) throws IOException { + blockItemCounts.push(itemCount); + } + + @Override + public void writeArrayStart() throws IOException { + startBlock(); + } + + @Override + public void writeArrayEnd() throws IOException { + endBlock(); + // Writes another zero to indicate that this is the last block + super.writeArrayEnd(); + } + + @Override + public void writeMapStart() throws IOException { + startBlock(); + } + + @Override + public void writeMapEnd() throws IOException { + endBlock(); + // Writes another zero to indicate that this is the last block + super.writeMapEnd(); + } + + private static class BufferOutputStream extends ByteArrayOutputStream { + BufferOutputStream() { + } + + ByteBuffer toBufferWithoutCopy() { + return ByteBuffer.wrap(buf, 0, count); + } + + } +} diff --git a/lang/java/avro/src/main/java/org/apache/avro/io/DirectBinaryDecoder.java b/lang/java/avro/src/main/java/org/apache/avro/io/DirectBinaryDecoder.java index 7b056556693..71f3ed593af 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/io/DirectBinaryDecoder.java +++ b/lang/java/avro/src/main/java/org/apache/avro/io/DirectBinaryDecoder.java @@ -20,10 +20,10 @@ import java.io.EOFException; import java.io.IOException; import java.io.InputStream; -import java.nio.Buffer; import java.nio.ByteBuffer; import org.apache.avro.InvalidNumberEncodingException; +import org.apache.avro.SystemLimitException; import org.apache.avro.util.ByteBufferInputStream; /** @@ -40,15 +40,15 @@ class DirectBinaryDecoder extends BinaryDecoder { private class ByteReader { public ByteBuffer read(ByteBuffer old, int length) throws IOException { - ByteBuffer result; + final ByteBuffer result; if (old != null && length <= old.capacity()) { result = old; - ((Buffer) result).clear(); + result.clear(); } else { result = ByteBuffer.allocate(length); } doReadBytes(result.array(), result.position(), length); - ((Buffer) result).limit(length); + result.limit(length); return result; } } @@ -68,7 +68,6 @@ public ByteBuffer read(ByteBuffer old, int length) throws IOException { return bbi.readBuffer(length); } } - } private ByteReader byteReader; @@ -156,8 +155,8 @@ public double readDouble() throws IOException { @Override public ByteBuffer readBytes(ByteBuffer old) throws IOException { - int length = readInt(); - return byteReader.read(old, length); + long length = readLong(); + return byteReader.read(old, SystemLimitException.checkMaxBytesLength(length)); } @Override diff --git a/lang/java/avro/src/main/java/org/apache/avro/io/DirectBinaryEncoder.java b/lang/java/avro/src/main/java/org/apache/avro/io/DirectBinaryEncoder.java index 62b2a482627..df7c118b648 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/io/DirectBinaryEncoder.java +++ b/lang/java/avro/src/main/java/org/apache/avro/io/DirectBinaryEncoder.java @@ -27,20 +27,20 @@ * This encoder does not buffer writes, and as a result is slower than * {@link BufferedBinaryEncoder}. However, it is lighter-weight and useful when * the buffering in BufferedBinaryEncoder is not desired and/or the Encoder is - * very short lived. + * very short-lived. *

    * To construct, use * {@link EncoderFactory#directBinaryEncoder(OutputStream, BinaryEncoder)} *

    * DirectBinaryEncoder is not thread-safe - * + * * @see BinaryEncoder * @see EncoderFactory * @see Encoder * @see Decoder */ public class DirectBinaryEncoder extends BinaryEncoder { - private OutputStream out; + protected OutputStream out; // the buffer is used for writing floats, doubles, and large longs. private final byte[] buf = new byte[12]; @@ -48,7 +48,7 @@ public class DirectBinaryEncoder extends BinaryEncoder { * Create a writer that sends its output to the underlying stream * out. **/ - DirectBinaryEncoder(OutputStream out) { + protected DirectBinaryEncoder(OutputStream out) { configure(out); } @@ -69,8 +69,8 @@ public void writeBoolean(boolean b) throws IOException { } /* - * buffering is slower for ints that encode to just 1 or two bytes, and and - * faster for large ones. (Sun JRE 1.6u22, x64 -server) + * buffering is slower for ints that encode to just 1 or two bytes, and faster + * for large ones. (Sun JRE 1.6u22, x64 -server) */ @Override public void writeInt(int n) throws IOException { diff --git a/lang/java/avro/src/main/java/org/apache/avro/io/EncoderFactory.java b/lang/java/avro/src/main/java/org/apache/avro/io/EncoderFactory.java index 0188a29637d..2039f30097a 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/io/EncoderFactory.java +++ b/lang/java/avro/src/main/java/org/apache/avro/io/EncoderFactory.java @@ -19,6 +19,7 @@ import java.io.IOException; import java.io.OutputStream; +import java.util.EnumSet; import org.apache.avro.AvroRuntimeException; import org.apache.avro.Schema; @@ -71,11 +72,11 @@ public static EncoderFactory get() { * likely to improve performance but may be useful for the * downstream OutputStream. * @return This factory, to enable method chaining: - * + * *

        *         EncoderFactory factory = new EncoderFactory().configureBufferSize(4096);
        *         
    - * + * * @see #binaryEncoder(OutputStream, BinaryEncoder) */ public EncoderFactory configureBufferSize(int size) { @@ -90,7 +91,7 @@ public EncoderFactory configureBufferSize(int size) { /** * Returns this factory's configured default buffer size. Used when creating * Encoder instances that buffer writes. - * + * * @see #configureBufferSize(int) * @see #binaryEncoder(OutputStream, BinaryEncoder) * @return The preferred buffer size, in bytes. @@ -109,11 +110,11 @@ public int getBufferSize() { * outside this range are set to the nearest value in the range. The * encoder will require at least this amount of memory. * @return This factory, to enable method chaining: - * + * *
        *         EncoderFactory factory = new EncoderFactory().configureBlockSize(8000);
        *         
    - * + * * @see #blockingBinaryEncoder(OutputStream, BinaryEncoder) */ public EncoderFactory configureBlockSize(int size) { @@ -131,7 +132,7 @@ public EncoderFactory configureBlockSize(int size) { * #blockingBinaryEncoder(OutputStream, BinaryEncoder) will have block buffers * of this size. *

    - * + * * @see #configureBlockSize(int) * @see #blockingBinaryEncoder(OutputStream, BinaryEncoder) * @return The preferred block size, in bytes. @@ -216,6 +217,49 @@ public BinaryEncoder directBinaryEncoder(OutputStream out, BinaryEncoder reuse) } } + /** + * Creates or reinitializes a {@link BlockingDirectBinaryEncoder} with the + * OutputStream provided as the destination for written data. If reuse is + * provided, an attempt will be made to reconfigure reuse rather than + * construct a new instance, but this is not guaranteed, a new instance may be + * returned. + *

    + * The {@link BinaryEncoder} implementation returned does not buffer its output, + * calling {@link Encoder#flush()} will simply cause the wrapped OutputStream to + * be flushed. + *

    + * The {@link BlockingDirectBinaryEncoder} will write the block sizes for the + * arrays and maps so efficient skipping can be done. + *

    + * Performance of unbuffered writes can be significantly slower than buffered + * writes. {@link #binaryEncoder(OutputStream, BinaryEncoder)} returns + * BinaryEncoder instances that are tuned for performance but may buffer output. + * The unbuffered, 'direct' encoder may be desired when buffering semantics are + * problematic, or if the lifetime of the encoder is so short that the buffer + * would not be useful. + *

    + * {@link BinaryEncoder} instances returned by this method are not thread-safe. + * + * @param out The OutputStream to initialize to. Cannot be null. + * @param reuse The BinaryEncoder to attempt to reuse given the factory + * configuration. A BinaryEncoder implementation may not be + * compatible with reuse, causing a new instance to be returned. If + * null, a new instance is returned. + * @return A BinaryEncoder that uses out as its data output. If + * reuse is null, this will be a new instance. If reuse is + * not null, then the returned instance may be a new instance or + * reuse reconfigured to use out. + * @see DirectBinaryEncoder + * @see Encoder + */ + public BinaryEncoder blockingDirectBinaryEncoder(OutputStream out, BinaryEncoder reuse) { + if (null == reuse || !reuse.getClass().equals(BlockingDirectBinaryEncoder.class)) { + return new BlockingDirectBinaryEncoder(out); + } else { + return ((DirectBinaryEncoder) reuse).configure(out); + } + } + /** * Creates or reinitializes a {@link BinaryEncoder} with the OutputStream * provided as the destination for written data. If reuse is provided, an @@ -297,6 +341,38 @@ public JsonEncoder jsonEncoder(Schema schema, OutputStream out, boolean pretty) return new JsonEncoder(schema, out, pretty); } + /** + * Creates a {@link JsonEncoder} using the OutputStream provided for writing + * data conforming to the Schema provided with optional pretty printing. + *

    + * {@link JsonEncoder} buffers its output. Data may not appear on the underlying + * OutputStream until {@link Encoder#flush()} is called. + *

    + * {@link JsonEncoder} is not thread-safe. + * + * @param schema The Schema for data written to this JsonEncoder. Cannot be + * null. + * @param out The OutputStream to write to. Cannot be null. + * @param pretty Pretty print encoding. + * @param autoflush Whether to Automatically flush the data to storage, default + * is true controls the underlying FLUSH_PASSED_TO_STREAM + * feature of JsonGenerator + * @return A JsonEncoder configured with out, schema and + * pretty + * @throws IOException + */ + public JsonEncoder jsonEncoder(Schema schema, OutputStream out, boolean pretty, boolean autoflush) + throws IOException { + EnumSet options = EnumSet.noneOf(JsonEncoder.JsonOptions.class); + if (pretty) { + options.add(JsonEncoder.JsonOptions.Pretty); + } + if (!autoflush) { + options.add(JsonEncoder.JsonOptions.NoFlushStream); + } + return new JsonEncoder(schema, out, options); + } + /** * Creates a {@link JsonEncoder} using the {@link JsonGenerator} provided for * output of data conforming to the Schema provided. diff --git a/lang/java/avro/src/main/java/org/apache/avro/io/JsonDecoder.java b/lang/java/avro/src/main/java/org/apache/avro/io/JsonDecoder.java index c1c38511ab4..2ad496a5b87 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/io/JsonDecoder.java +++ b/lang/java/avro/src/main/java/org/apache/avro/io/JsonDecoder.java @@ -86,7 +86,7 @@ private static Symbol getSymbol(Schema schema) { *

    * Otherwise, this JsonDecoder will reset its state and then reconfigure its * input. - * + * * @param in The InputStream to read from. Cannot be null. * @throws IOException * @throws NullPointerException if {@code in} is {@code null} @@ -109,7 +109,7 @@ public JsonDecoder configure(InputStream in) throws IOException { *

    * Otherwise, this JsonDecoder will reset its state and then reconfigure its * input. - * + * * @param in The String to read from. Cannot be null. * @throws IOException * @throws NullPointerException if {@code in} is {@code null} @@ -157,25 +157,39 @@ public boolean readBoolean() throws IOException { @Override public int readInt() throws IOException { advance(Symbol.INT); - if (in.getCurrentToken().isNumeric()) { + if (in.getCurrentToken() == JsonToken.VALUE_NUMBER_INT) { int result = in.getIntValue(); in.nextToken(); return result; - } else { - throw error("int"); } + if (in.getCurrentToken() == JsonToken.VALUE_NUMBER_FLOAT) { + float value = in.getFloatValue(); + if (Math.abs(value - Math.round(value)) <= Float.MIN_VALUE) { + int result = Math.round(value); + in.nextToken(); + return result; + } + } + throw error("int"); } @Override public long readLong() throws IOException { advance(Symbol.LONG); - if (in.getCurrentToken().isNumeric()) { + if (in.getCurrentToken() == JsonToken.VALUE_NUMBER_INT) { long result = in.getLongValue(); in.nextToken(); return result; - } else { - throw error("long"); } + if (in.getCurrentToken() == JsonToken.VALUE_NUMBER_FLOAT) { + double value = in.getDoubleValue(); + if (Math.abs(value - Math.round(value)) <= Double.MIN_VALUE) { + long result = Math.round(value); + in.nextToken(); + return result; + } + } + throw error("long"); } @Override diff --git a/lang/java/avro/src/main/java/org/apache/avro/io/JsonEncoder.java b/lang/java/avro/src/main/java/org/apache/avro/io/JsonEncoder.java index 71cc690b8a4..46fb6b5cb58 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/io/JsonEncoder.java +++ b/lang/java/avro/src/main/java/org/apache/avro/io/JsonEncoder.java @@ -22,7 +22,9 @@ import java.nio.ByteBuffer; import java.nio.charset.StandardCharsets; import java.util.BitSet; +import java.util.EnumSet; import java.util.Objects; +import java.util.Set; import org.apache.avro.AvroTypeException; import org.apache.avro.Schema; @@ -33,6 +35,7 @@ import com.fasterxml.jackson.core.JsonEncoding; import com.fasterxml.jackson.core.JsonFactory; import com.fasterxml.jackson.core.JsonGenerator; +import com.fasterxml.jackson.core.PrettyPrinter; import com.fasterxml.jackson.core.util.DefaultPrettyPrinter; import com.fasterxml.jackson.core.util.MinimalPrettyPrinter; @@ -58,11 +61,15 @@ public class JsonEncoder extends ParsingEncoder implements Parser.ActionHandler protected BitSet isEmpty = new BitSet(); JsonEncoder(Schema sc, OutputStream out) throws IOException { - this(sc, getJsonGenerator(out, false)); + this(sc, getJsonGenerator(out, EnumSet.noneOf(JsonOptions.class))); } JsonEncoder(Schema sc, OutputStream out, boolean pretty) throws IOException { - this(sc, getJsonGenerator(out, pretty)); + this(sc, getJsonGenerator(out, pretty ? EnumSet.of(JsonOptions.Pretty) : EnumSet.noneOf(JsonOptions.class))); + } + + JsonEncoder(Schema sc, OutputStream out, Set options) throws IOException { + this(sc, getJsonGenerator(out, options)); } JsonEncoder(Schema sc, JsonGenerator out) throws IOException { @@ -78,24 +85,28 @@ public void flush() throws IOException { } } + enum JsonOptions { + Pretty, + + // Prevent underlying outputstream to be flush for optimisation purpose. + NoFlushStream + } + // by default, one object per line. // with pretty option use default pretty printer with root line separator. - private static JsonGenerator getJsonGenerator(OutputStream out, boolean pretty) throws IOException { + private static JsonGenerator getJsonGenerator(OutputStream out, Set options) throws IOException { Objects.requireNonNull(out, "OutputStream cannot be null"); JsonGenerator g = new JsonFactory().createGenerator(out, JsonEncoding.UTF8); - if (pretty) { - DefaultPrettyPrinter pp = new DefaultPrettyPrinter() { - @Override - public void writeRootValueSeparator(JsonGenerator jg) throws IOException { - jg.writeRaw(LINE_SEPARATOR); - } - }; - g.setPrettyPrinter(pp); + if (options.contains(JsonOptions.NoFlushStream)) { + g = g.configure(JsonGenerator.Feature.FLUSH_PASSED_TO_STREAM, false); + } + final PrettyPrinter pp; + if (options.contains(JsonOptions.Pretty)) { + pp = new DefaultPrettyPrinter(LINE_SEPARATOR); } else { - MinimalPrettyPrinter pp = new MinimalPrettyPrinter(); - pp.setRootValueSeparator(LINE_SEPARATOR); - g.setPrettyPrinter(pp); + pp = new MinimalPrettyPrinter(LINE_SEPARATOR); } + g.setPrettyPrinter(pp); return g; } @@ -122,7 +133,29 @@ public void setIncludeNamespace(final boolean includeNamespace) { * @return this JsonEncoder */ public JsonEncoder configure(OutputStream out) throws IOException { - this.configure(getJsonGenerator(out, false)); + return this.configure(out, true); + } + + /** + * Reconfigures this JsonEncoder to use the output stream provided. + *

    + * If the OutputStream provided is null, a NullPointerException is thrown. + *

    + * Otherwise, this JsonEncoder will flush its current output and then + * reconfigure its output to use a default UTF8 JsonGenerator that writes to the + * provided OutputStream. + * + * @param out The OutputStream to direct output to. Cannot be null. + * @throws IOException + * @throws NullPointerException if {@code out} is {@code null} + * @return this JsonEncoder + */ + public JsonEncoder configure(OutputStream out, boolean autoflush) throws IOException { + EnumSet jsonOptions = EnumSet.noneOf(JsonOptions.class); + if (!autoflush) { + jsonOptions.add(JsonOptions.NoFlushStream); + } + this.configure(getJsonGenerator(out, jsonOptions)); return this; } @@ -175,7 +208,7 @@ public void writeLong(long n) throws IOException { @Override public void writeFloat(float f) throws IOException { parser.advance(Symbol.FLOAT); - out.writeNumber(f); + out.writeNumber(f + 0d); } @Override diff --git a/lang/java/avro/src/main/java/org/apache/avro/io/parsing/ResolvingGrammarGenerator.java b/lang/java/avro/src/main/java/org/apache/avro/io/parsing/ResolvingGrammarGenerator.java index 77fbe1c7ad0..f1c9d139e7d 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/io/parsing/ResolvingGrammarGenerator.java +++ b/lang/java/avro/src/main/java/org/apache/avro/io/parsing/ResolvingGrammarGenerator.java @@ -292,8 +292,16 @@ public static void encode(Encoder e, Schema s, JsonNode n) throws IOException { e.writeMapEnd(); break; case UNION: - e.writeIndex(0); - encode(e, s.getTypes().get(0), n); + int correctIndex = 0; + List innerTypes = s.getTypes(); + while (correctIndex < innerTypes.size() && !isCompatible(innerTypes.get(correctIndex).getType(), n)) { + correctIndex++; + } + if (correctIndex >= innerTypes.size()) { + throw new AvroTypeException("Not compatible default value for union: " + n); + } + e.writeIndex(correctIndex); + encode(e, innerTypes.get(correctIndex), n); break; case FIXED: if (!n.isTextual()) @@ -346,4 +354,29 @@ public static void encode(Encoder e, Schema s, JsonNode n) throws IOException { break; } } + + private static boolean isCompatible(Schema.Type stype, JsonNode value) { + switch (stype) { + case RECORD: + case ENUM: + case ARRAY: + case MAP: + case UNION: + return true; + case FIXED: + case STRING: + case BYTES: + return value.isTextual(); + case INT: + case LONG: + case FLOAT: + case DOUBLE: + return value.isNumber(); + case BOOLEAN: + return value.isBoolean(); + case NULL: + return value.isNull(); + } + return true; + } } diff --git a/lang/java/avro/src/main/java/org/apache/avro/path/ArrayPositionPredicate.java b/lang/java/avro/src/main/java/org/apache/avro/path/ArrayPositionPredicate.java new file mode 100644 index 00000000000..480d02614ca --- /dev/null +++ b/lang/java/avro/src/main/java/org/apache/avro/path/ArrayPositionPredicate.java @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.avro.path; + +/** + * Returns items by their position (numeric index) in an array + */ +public class ArrayPositionPredicate implements PositionalPathPredicate { + private final long index; + + public ArrayPositionPredicate(long index) { + this.index = index; + } + + @Override + public String toString() { + return "[" + index + "]"; + } +} diff --git a/lang/java/avro/src/main/java/org/apache/avro/path/LocationStep.java b/lang/java/avro/src/main/java/org/apache/avro/path/LocationStep.java new file mode 100644 index 00000000000..c8442dfaef9 --- /dev/null +++ b/lang/java/avro/src/main/java/org/apache/avro/path/LocationStep.java @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.avro.path; + +/** + * Selects items based on their "path" (name of a property under which they are + * stored) relative to the context. + */ +public class LocationStep implements PathElement { + /** + * selector part of location step. either "." or ".." + */ + private final String selector; + /** + * name of a property to select + */ + private final String propertyName; + + public LocationStep(String selector, String propertyName) { + this.selector = selector; + this.propertyName = propertyName; + } + + @Override + public String toString() { + if (propertyName == null || propertyName.isEmpty()) { + return selector; + } + return selector + propertyName; + } +} diff --git a/lang/java/avro/src/main/java/org/apache/avro/path/MapKeyPredicate.java b/lang/java/avro/src/main/java/org/apache/avro/path/MapKeyPredicate.java new file mode 100644 index 00000000000..b183d8459d3 --- /dev/null +++ b/lang/java/avro/src/main/java/org/apache/avro/path/MapKeyPredicate.java @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.avro.path; + +/** + * Returns items by their position (string key under which they are stored) in a + * map + */ +public class MapKeyPredicate implements PositionalPathPredicate { + private final String key; + + public MapKeyPredicate(String key) { + this.key = key; + } + + public String getKey() { + return key; + } + + @Override + public String toString() { + if (key == null) { + return ""; + } + return "[\"" + key + "\"]"; + } +} diff --git a/lang/java/avro/src/main/java/org/apache/avro/path/PathElement.java b/lang/java/avro/src/main/java/org/apache/avro/path/PathElement.java new file mode 100644 index 00000000000..f3be4dc2a92 --- /dev/null +++ b/lang/java/avro/src/main/java/org/apache/avro/path/PathElement.java @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.avro.path; + +/** + * root interface for all pieces of an AvroPath expression + */ +public interface PathElement { +} diff --git a/lang/java/avro/src/main/java/org/apache/avro/path/PathPredicate.java b/lang/java/avro/src/main/java/org/apache/avro/path/PathPredicate.java new file mode 100644 index 00000000000..092894652f2 --- /dev/null +++ b/lang/java/avro/src/main/java/org/apache/avro/path/PathPredicate.java @@ -0,0 +1,26 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.avro.path; + +/** + * a predicate is a filter that restricts items selected by a + * {@link LocationStep} + */ +public interface PathPredicate extends PathElement { +} diff --git a/lang/java/avro/src/main/java/org/apache/avro/path/PathTracingException.java b/lang/java/avro/src/main/java/org/apache/avro/path/PathTracingException.java new file mode 100644 index 00000000000..ac9ba513722 --- /dev/null +++ b/lang/java/avro/src/main/java/org/apache/avro/path/PathTracingException.java @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.avro.path; + +import org.apache.avro.Schema; + +/** + * interface for exceptions that can trace the AvroPath of an error + * + * @param the regular (user-facing) exception that will be + * {@link #summarize(Schema)}ed out of this class + */ +public interface PathTracingException { + /** + * appends a path element to the trace. expected to be called in reverse-order + * as the exception bubbles up the stack + * + * @param step an AvroPath step tracing back from the location of the original + * exception towards the root of the data graph + */ + void tracePath(PathElement step); + + /** + * produces a user-facing exception to be thrown back out to user code + * + * @param root the root object for the operation that generated the exception + * @return an exception + */ + T summarize(Schema root); +} diff --git a/lang/java/avro/src/main/java/org/apache/avro/path/PositionalPathPredicate.java b/lang/java/avro/src/main/java/org/apache/avro/path/PositionalPathPredicate.java new file mode 100644 index 00000000000..3c9751ef2a5 --- /dev/null +++ b/lang/java/avro/src/main/java/org/apache/avro/path/PositionalPathPredicate.java @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.avro.path; + +/** + * filters items by their context position + */ +public interface PositionalPathPredicate extends PathPredicate { +} diff --git a/lang/java/avro/src/main/java/org/apache/avro/path/TracingAvroTypeException.java b/lang/java/avro/src/main/java/org/apache/avro/path/TracingAvroTypeException.java new file mode 100644 index 00000000000..4aed18b91de --- /dev/null +++ b/lang/java/avro/src/main/java/org/apache/avro/path/TracingAvroTypeException.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.avro.path; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.avro.AvroTypeException; +import org.apache.avro.Schema; +import org.apache.avro.util.SchemaUtil; + +/** + * an {@link AvroTypeException} with extra fields used to trace back the path to + * a bad value through an object graph + */ +public class TracingAvroTypeException extends AvroTypeException implements PathTracingException { + private final List reversePath; + + public TracingAvroTypeException(AvroTypeException cause) { + super(cause.getMessage(), cause); + this.reversePath = new ArrayList<>(3); // expected to be short + } + + @Override + public void tracePath(PathElement step) { + reversePath.add(step); + } + + @Override + public AvroTypeException summarize(Schema root) { + AvroTypeException cause = (AvroTypeException) getCause(); + + StringBuilder sb = new StringBuilder(); + sb.append(cause.getMessage()); + + if (reversePath != null && !reversePath.isEmpty()) { + sb.append(" at "); + if (root != null) { + sb.append(SchemaUtil.describe(root)); + } + for (int i = reversePath.size() - 1; i >= 0; i--) { + PathElement step = reversePath.get(i); + sb.append(step.toString()); + } + } + AvroTypeException summary = new AvroTypeException(sb.toString()); + summary.initCause(cause); + return summary; + } +} diff --git a/lang/java/avro/src/main/java/org/apache/avro/path/TracingClassCastException.java b/lang/java/avro/src/main/java/org/apache/avro/path/TracingClassCastException.java new file mode 100644 index 00000000000..87607aee8f6 --- /dev/null +++ b/lang/java/avro/src/main/java/org/apache/avro/path/TracingClassCastException.java @@ -0,0 +1,83 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro.path; + +import org.apache.avro.Schema; +import org.apache.avro.util.SchemaUtil; + +import java.util.ArrayList; +import java.util.List; + +/** + * a {@link ClassCastException} with extra fields used to trace back the path to + * a bad value through an object graph + */ +public class TracingClassCastException extends ClassCastException implements PathTracingException { + private final ClassCastException cause; + private final Object datum; + private final Schema expected; + private final boolean customCoderUsed; + private final List reversePath; + + public TracingClassCastException(ClassCastException cause, Object datum, Schema expected, boolean customCoderUsed) { + this.cause = cause; + this.datum = datum; + this.expected = expected; + this.customCoderUsed = customCoderUsed; + this.reversePath = new ArrayList<>(3); // assume short + } + + @Override + public void tracePath(PathElement step) { + reversePath.add(step); + } + + @Override + public synchronized ClassCastException getCause() { + return cause; + } + + /** + * @return a hopefully helpful error message + */ + @Override + public ClassCastException summarize(Schema root) { + StringBuilder sb = new StringBuilder(); + sb.append("value ").append(SchemaUtil.describe(datum)); + sb.append(" cannot be cast to expected type ").append(SchemaUtil.describe(expected)); + if (reversePath == null || reversePath.isEmpty()) { + // very simple "shallow" NPE, no nesting at all, or custom coders used means we + // have no data + if (customCoderUsed) { + sb.append(". No further details available as custom coders were used"); + } + } else { + sb.append(" at "); + if (root != null) { + sb.append(SchemaUtil.describe(root)); + } + for (int i = reversePath.size() - 1; i >= 0; i--) { + PathElement step = reversePath.get(i); + sb.append(step.toString()); + } + } + ClassCastException summary = new ClassCastException(sb.toString()); + summary.initCause(cause); + return summary; + } +} diff --git a/lang/java/avro/src/main/java/org/apache/avro/path/TracingNullPointException.java b/lang/java/avro/src/main/java/org/apache/avro/path/TracingNullPointException.java new file mode 100644 index 00000000000..fabfc764d51 --- /dev/null +++ b/lang/java/avro/src/main/java/org/apache/avro/path/TracingNullPointException.java @@ -0,0 +1,92 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro.path; + +import org.apache.avro.Schema; +import org.apache.avro.util.SchemaUtil; + +import java.util.ArrayList; +import java.util.List; + +/** + * a {@link NullPointerException} with extra fields used to trace back the path + * to a null value through an object graph + */ +public class TracingNullPointException extends NullPointerException + implements PathTracingException { + private final NullPointerException cause; + private final Schema expected; + private final boolean customCoderUsed; + private final List reversePath; + + public TracingNullPointException(NullPointerException cause, Schema expected, boolean customCoderUsed) { + this.cause = cause; + this.expected = expected; + this.customCoderUsed = customCoderUsed; + this.reversePath = new ArrayList<>(3); // assume short + } + + @Override + public void tracePath(PathElement step) { + reversePath.add(step); + } + + @Override + public synchronized NullPointerException getCause() { + return cause; + } + + /** + * @return a hopefully helpful error message + */ + @Override + public NullPointerException summarize(Schema root) { + StringBuilder sb = new StringBuilder(); + sb.append("null value for (non-nullable) "); + if (reversePath == null || reversePath.isEmpty()) { + // very simple "shallow" NPE, no nesting at all, or custom coders used means we + // have no data + if (customCoderUsed) { + sb.append("field or map key. No further details available as custom coders were used"); + } else { + sb.append(SchemaUtil.describe(expected)); + } + } else { + PathElement innerMostElement = reversePath.get(0); + boolean isNullMapKey = innerMostElement instanceof MapKeyPredicate + && ((MapKeyPredicate) innerMostElement).getKey() == null; + if (isNullMapKey) { + sb.delete(0, sb.length()); // clear + sb.append("null key in map"); + } else { + sb.append(SchemaUtil.describe(expected)); + } + sb.append(" at "); + if (root != null) { + sb.append(SchemaUtil.describe(root)); + } + for (int i = reversePath.size() - 1; i >= 0; i--) { + PathElement step = reversePath.get(i); + sb.append(step.toString()); + } + } + NullPointerException summary = new NullPointerException(sb.toString()); + summary.initCause(cause); + return summary; + } +} diff --git a/lang/java/avro/src/main/java/org/apache/avro/path/UnionTypePredicate.java b/lang/java/avro/src/main/java/org/apache/avro/path/UnionTypePredicate.java new file mode 100644 index 00000000000..01e30e108f1 --- /dev/null +++ b/lang/java/avro/src/main/java/org/apache/avro/path/UnionTypePredicate.java @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.avro.path; + +/** + * Returns items by their position (numeric index of type) in a union schema + */ +public class UnionTypePredicate implements PositionalPathPredicate { + private final String type; + + public UnionTypePredicate(String type) { + this.type = type; + } + + @Override + public String toString() { + return "[" + type + "]"; + } +} diff --git a/lang/java/avro/src/main/java/org/apache/avro/path/package-info.java b/lang/java/avro/src/main/java/org/apache/avro/path/package-info.java new file mode 100644 index 00000000000..8b1dea4b56c --- /dev/null +++ b/lang/java/avro/src/main/java/org/apache/avro/path/package-info.java @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Interfaces and base classes for AvroPath. This functionality is + * experimental, meaning these APIs are not expected to be stable any + * time soon so use at your own risk. Feedback, however, would be very + * appreciated :-) + */ +package org.apache.avro.path; diff --git a/lang/java/avro/src/main/java/org/apache/avro/path/package.html b/lang/java/avro/src/main/java/org/apache/avro/path/package.html new file mode 100644 index 00000000000..73ab0a71528 --- /dev/null +++ b/lang/java/avro/src/main/java/org/apache/avro/path/package.html @@ -0,0 +1,29 @@ + + + + + + Interfaces and base classes for AvroPath. + +

    + This functionality is experimental, meaning these APIs are not + expected to be stable any time soon so use at your own risk. Feedback, + however, would be very appreciated :-) +

    + + diff --git a/lang/java/avro/src/main/java/org/apache/avro/reflect/FieldAccess.java b/lang/java/avro/src/main/java/org/apache/avro/reflect/FieldAccess.java index 96188495121..dce1aed98a5 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/reflect/FieldAccess.java +++ b/lang/java/avro/src/main/java/org/apache/avro/reflect/FieldAccess.java @@ -21,6 +21,22 @@ abstract class FieldAccess { + protected static final int INT_DEFAULT_VALUE = 0; + + protected static final float FLOAT_DEFAULT_VALUE = 0.0f; + + protected static final short SHORT_DEFAULT_VALUE = (short) 0; + + protected static final byte BYTE_DEFAULT_VALUE = (byte) 0; + + protected static final boolean BOOLEAN_DEFAULT_VALUE = false; + + protected static final char CHAR_DEFAULT_VALUE = '\u0000'; + + protected static final long LONG_DEFAULT_VALUE = 0L; + + protected static final double DOUBLE_DEFAULT_VALUE = 0.0d; + protected abstract FieldAccessor getAccessor(Field field); } diff --git a/lang/java/avro/src/main/java/org/apache/avro/reflect/FieldAccessReflect.java b/lang/java/avro/src/main/java/org/apache/avro/reflect/FieldAccessReflect.java index c790dbfb886..5d51be054be 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/reflect/FieldAccessReflect.java +++ b/lang/java/avro/src/main/java/org/apache/avro/reflect/FieldAccessReflect.java @@ -62,7 +62,29 @@ public Object get(Object object) throws IllegalAccessException { @Override public void set(Object object, Object value) throws IllegalAccessException, IOException { - field.set(object, value); + if (value == null && field.getType().isPrimitive()) { + Object defaultValue = null; + if (int.class.equals(field.getType())) { + defaultValue = INT_DEFAULT_VALUE; + } else if (float.class.equals(field.getType())) { + defaultValue = FLOAT_DEFAULT_VALUE; + } else if (short.class.equals(field.getType())) { + defaultValue = SHORT_DEFAULT_VALUE; + } else if (byte.class.equals(field.getType())) { + defaultValue = BYTE_DEFAULT_VALUE; + } else if (boolean.class.equals(field.getType())) { + defaultValue = BOOLEAN_DEFAULT_VALUE; + } else if (char.class.equals(field.getType())) { + defaultValue = CHAR_DEFAULT_VALUE; + } else if (long.class.equals(field.getType())) { + defaultValue = LONG_DEFAULT_VALUE; + } else if (double.class.equals(field.getType())) { + defaultValue = DOUBLE_DEFAULT_VALUE; + } + field.set(object, defaultValue); + } else { + field.set(object, value); + } } @Override diff --git a/lang/java/avro/src/main/java/org/apache/avro/reflect/FieldAccessUnsafe.java b/lang/java/avro/src/main/java/org/apache/avro/reflect/FieldAccessUnsafe.java deleted file mode 100644 index f555df49ae2..00000000000 --- a/lang/java/avro/src/main/java/org/apache/avro/reflect/FieldAccessUnsafe.java +++ /dev/null @@ -1,366 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * https://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.avro.reflect; - -import java.io.IOException; -import java.lang.reflect.Field; - -import org.apache.avro.AvroRuntimeException; -import org.apache.avro.io.Decoder; -import org.apache.avro.io.Encoder; - -import sun.misc.Unsafe; - -@SuppressWarnings("restriction") -class FieldAccessUnsafe extends FieldAccess { - - private static final Unsafe UNSAFE; - - static { - try { - Field theUnsafe = Unsafe.class.getDeclaredField("theUnsafe"); - theUnsafe.setAccessible(true); - UNSAFE = (Unsafe) theUnsafe.get(null); - // It seems not all Unsafe implementations implement the following method. - } catch (Exception e) { - throw new RuntimeException(e); - } - } - - @Override - protected FieldAccessor getAccessor(Field field) { - AvroEncode enc = field.getAnnotation(AvroEncode.class); - if (enc != null) - try { - return new UnsafeCustomEncodedField(field, enc.using().getDeclaredConstructor().newInstance()); - } catch (Exception e) { - throw new AvroRuntimeException("Could not instantiate custom Encoding"); - } - Class c = field.getType(); - if (c == int.class) - return new UnsafeIntField(field); - else if (c == long.class) - return new UnsafeLongField(field); - else if (c == byte.class) - return new UnsafeByteField(field); - else if (c == float.class) - return new UnsafeFloatField(field); - else if (c == double.class) - return new UnsafeDoubleField(field); - else if (c == char.class) - return new UnsafeCharField(field); - else if (c == boolean.class) - return new UnsafeBooleanField(field); - else if (c == short.class) - return new UnsafeShortField(field); - else - return new UnsafeObjectField(field); - } - - abstract static class UnsafeCachedField extends FieldAccessor { - protected final long offset; - protected Field field; - protected final boolean isStringable; - - UnsafeCachedField(Field f) { - this.offset = UNSAFE.objectFieldOffset(f); - this.field = f; - this.isStringable = f.isAnnotationPresent(Stringable.class); - } - - @Override - protected Field getField() { - return field; - } - - @Override - protected boolean supportsIO() { - return true; - } - - @Override - protected boolean isStringable() { - return isStringable; - } - } - - final static class UnsafeIntField extends UnsafeCachedField { - UnsafeIntField(Field f) { - super(f); - } - - @Override - protected void set(Object object, Object value) { - UNSAFE.putInt(object, offset, (Integer) value); - } - - @Override - protected Object get(Object object) { - return UNSAFE.getInt(object, offset); - } - - @Override - protected void read(Object object, Decoder in) throws IOException { - UNSAFE.putInt(object, offset, in.readInt()); - } - - @Override - protected void write(Object object, Encoder out) throws IOException { - out.writeInt(UNSAFE.getInt(object, offset)); - } - } - - final static class UnsafeFloatField extends UnsafeCachedField { - protected UnsafeFloatField(Field f) { - super(f); - } - - @Override - protected void set(Object object, Object value) { - UNSAFE.putFloat(object, offset, (Float) value); - } - - @Override - protected Object get(Object object) { - return UNSAFE.getFloat(object, offset); - } - - @Override - protected void read(Object object, Decoder in) throws IOException { - UNSAFE.putFloat(object, offset, in.readFloat()); - } - - @Override - protected void write(Object object, Encoder out) throws IOException { - out.writeFloat(UNSAFE.getFloat(object, offset)); - } - } - - final static class UnsafeShortField extends UnsafeCachedField { - protected UnsafeShortField(Field f) { - super(f); - } - - @Override - protected void set(Object object, Object value) { - UNSAFE.putShort(object, offset, (Short) value); - } - - @Override - protected Object get(Object object) { - return UNSAFE.getShort(object, offset); - } - - @Override - protected void read(Object object, Decoder in) throws IOException { - UNSAFE.putShort(object, offset, (short) in.readInt()); - } - - @Override - protected void write(Object object, Encoder out) throws IOException { - out.writeInt(UNSAFE.getShort(object, offset)); - } - } - - final static class UnsafeByteField extends UnsafeCachedField { - protected UnsafeByteField(Field f) { - super(f); - } - - @Override - protected void set(Object object, Object value) { - UNSAFE.putByte(object, offset, (Byte) value); - } - - @Override - protected Object get(Object object) { - return UNSAFE.getByte(object, offset); - } - - @Override - protected void read(Object object, Decoder in) throws IOException { - UNSAFE.putByte(object, offset, (byte) in.readInt()); - } - - @Override - protected void write(Object object, Encoder out) throws IOException { - out.writeInt(UNSAFE.getByte(object, offset)); - } - } - - final static class UnsafeBooleanField extends UnsafeCachedField { - protected UnsafeBooleanField(Field f) { - super(f); - } - - @Override - protected void set(Object object, Object value) { - UNSAFE.putBoolean(object, offset, (Boolean) value); - } - - @Override - protected Object get(Object object) { - return UNSAFE.getBoolean(object, offset); - } - - @Override - protected void read(Object object, Decoder in) throws IOException { - UNSAFE.putBoolean(object, offset, in.readBoolean()); - } - - @Override - protected void write(Object object, Encoder out) throws IOException { - out.writeBoolean(UNSAFE.getBoolean(object, offset)); - } - } - - final static class UnsafeCharField extends UnsafeCachedField { - protected UnsafeCharField(Field f) { - super(f); - } - - @Override - protected void set(Object object, Object value) { - UNSAFE.putChar(object, offset, (Character) value); - } - - @Override - protected Object get(Object object) { - return UNSAFE.getChar(object, offset); - } - - @Override - protected void read(Object object, Decoder in) throws IOException { - UNSAFE.putChar(object, offset, (char) in.readInt()); - } - - @Override - protected void write(Object object, Encoder out) throws IOException { - out.writeInt(UNSAFE.getChar(object, offset)); - } - } - - final static class UnsafeLongField extends UnsafeCachedField { - protected UnsafeLongField(Field f) { - super(f); - } - - @Override - protected void set(Object object, Object value) { - UNSAFE.putLong(object, offset, (Long) value); - } - - @Override - protected Object get(Object object) { - return UNSAFE.getLong(object, offset); - } - - @Override - protected void read(Object object, Decoder in) throws IOException { - UNSAFE.putLong(object, offset, in.readLong()); - } - - @Override - protected void write(Object object, Encoder out) throws IOException { - out.writeLong(UNSAFE.getLong(object, offset)); - } - } - - final static class UnsafeDoubleField extends UnsafeCachedField { - protected UnsafeDoubleField(Field f) { - super(f); - } - - @Override - protected void set(Object object, Object value) { - UNSAFE.putDouble(object, offset, (Double) value); - } - - @Override - protected Object get(Object object) { - return UNSAFE.getDouble(object, offset); - } - - @Override - protected void read(Object object, Decoder in) throws IOException { - UNSAFE.putDouble(object, offset, in.readDouble()); - } - - @Override - protected void write(Object object, Encoder out) throws IOException { - out.writeDouble(UNSAFE.getDouble(object, offset)); - } - } - - final static class UnsafeObjectField extends UnsafeCachedField { - protected UnsafeObjectField(Field f) { - super(f); - } - - @Override - protected void set(Object object, Object value) { - UNSAFE.putObject(object, offset, value); - } - - @Override - protected Object get(Object object) { - return UNSAFE.getObject(object, offset); - } - - @Override - protected boolean supportsIO() { - return false; - } - - } - - final static class UnsafeCustomEncodedField extends UnsafeCachedField { - - private CustomEncoding encoding; - - UnsafeCustomEncodedField(Field f, CustomEncoding encoding) { - super(f); - this.encoding = encoding; - } - - @Override - protected Object get(Object object) throws IllegalAccessException { - return UNSAFE.getObject(object, offset); - } - - @Override - protected void set(Object object, Object value) throws IllegalAccessException, IOException { - UNSAFE.putObject(object, offset, value); - } - - @Override - protected void read(Object object, Decoder in) throws IOException { - UNSAFE.putObject(object, offset, encoding.read(in)); - } - - @Override - protected void write(Object object, Encoder out) throws IOException { - encoding.write(UNSAFE.getObject(object, offset), out); - } - - @Override - protected boolean isCustomEncoded() { - return true; - } - } -} diff --git a/lang/java/avro/src/main/java/org/apache/avro/reflect/ReflectData.java b/lang/java/avro/src/main/java/org/apache/avro/reflect/ReflectData.java index 4ead6b888f4..0c0b10478a5 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/reflect/ReflectData.java +++ b/lang/java/avro/src/main/java/org/apache/avro/reflect/ReflectData.java @@ -36,6 +36,7 @@ import org.apache.avro.specific.FixedSize; import org.apache.avro.specific.SpecificData; import org.apache.avro.util.ClassUtils; +import org.apache.avro.util.MapUtil; import java.io.IOException; import java.lang.annotation.Annotation; @@ -63,12 +64,16 @@ import java.util.Map; import java.util.WeakHashMap; import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; /** Utilities to use existing Java classes and interfaces via reflection. */ public class ReflectData extends SpecificData { private static final String STRING_OUTER_PARENT_REFERENCE = "this$0"; + /** + * Always false since custom coders are not available for {@link ReflectData}. + */ @Override public boolean useCustomCoders() { return false; @@ -355,8 +360,8 @@ protected ClassAccessorData computeValue(Class c) { static class ClassAccessorData { private final Class clazz; private final Map byName = new HashMap<>(); - // getAccessorsFor is already synchronized, no need to wrap - final Map bySchema = new WeakHashMap<>(); + // getAccessorsFor replaces this map with each modification + volatile Map bySchema = new WeakHashMap<>(); private ClassAccessorData(Class c) { clazz = c; @@ -374,12 +379,14 @@ private ClassAccessorData(Class c) { * Return the field accessors as an array, indexed by the field index of the * given schema. */ - private synchronized FieldAccessor[] getAccessorsFor(Schema schema) { - // if synchronized is removed from this method, adjust bySchema appropriately + private FieldAccessor[] getAccessorsFor(Schema schema) { + // to avoid synchronization, we replace the map for each modification FieldAccessor[] result = bySchema.get(schema); if (result == null) { result = createAccessorsFor(schema); + Map bySchema = new WeakHashMap<>(this.bySchema); bySchema.put(schema, result); + this.bySchema = bySchema; } return result; } @@ -422,16 +429,6 @@ private FieldAccessor getFieldAccessor(Class c, String fieldName) { return null; } - /** @deprecated Replaced by {@link SpecificData#CLASS_PROP} */ - @Deprecated - static final String CLASS_PROP = "java-class"; - /** @deprecated Replaced by {@link SpecificData#KEY_CLASS_PROP} */ - @Deprecated - static final String KEY_CLASS_PROP = "java-key-class"; - /** @deprecated Replaced by {@link SpecificData#ELEMENT_PROP} */ - @Deprecated - static final String ELEMENT_PROP = "java-element-class"; - private static final Map CLASS_CACHE = new ConcurrentHashMap<>(); static Class getClassProp(Schema schema, String prop) { @@ -569,7 +566,7 @@ private String getNameForNonStringMapRecord(Type keyType, Type valueType, Schema Package pkg2 = valueClass.getPackage(); if (pkg1 != null && pkg1.getName().startsWith("java") && pkg2 != null && pkg2.getName().startsWith("java")) { - return NS_MAP_ARRAY_RECORD + keyClass.getSimpleName() + valueClass.getSimpleName(); + return NS_MAP_ARRAY_RECORD + simpleName(keyClass) + simpleName(valueClass); } } @@ -612,11 +609,8 @@ protected Object createSchemaDefaultValue(Type type, Field field, Schema fieldSc AvroDefault defaultAnnotation = field.getAnnotation(AvroDefault.class); defaultValue = (defaultAnnotation == null) ? null : Schema.parseJsonToObject(defaultAnnotation.value()); - if (defaultValue == null && fieldSchema.getType() == Schema.Type.UNION) { - Schema defaultType = fieldSchema.getTypes().get(0); - if (defaultType.getType() == Schema.Type.NULL) { - defaultValue = JsonProperties.NULL_VALUE; - } + if (defaultValue == null && fieldSchema.isNullable()) { + defaultValue = JsonProperties.NULL_VALUE; } return defaultValue; } @@ -666,6 +660,9 @@ protected Schema createSchema(Type type, Map names) { return result; } else if (type instanceof Class) { // Class Class c = (Class) type; + while (c.isAnonymousClass()) { + c = c.getSuperclass(); + } if (c.isPrimitive() || // primitives c == Void.class || c == Boolean.class || c == Integer.class || c == Long.class || c == Float.class || c == Double.class || c == Byte.class || c == Short.class || c == Character.class) @@ -748,7 +745,7 @@ protected Schema createSchema(Type type, Map names) { AvroMeta[] metadata = field.getAnnotationsByType(AvroMeta.class); // add metadata for (AvroMeta meta : metadata) { - if (recordField.getObjectProps().containsKey(meta.key())) { + if (recordField.propsContainsKey(meta.key())) { throw new AvroTypeException("Duplicate field prop key: " + meta.key()); } recordField.addProp(meta.key(), meta.value()); @@ -767,7 +764,7 @@ protected Schema createSchema(Type type, Map names) { schema.setFields(fields); AvroMeta[] metadata = c.getAnnotationsByType(AvroMeta.class); for (AvroMeta meta : metadata) { - if (schema.getObjectProps().containsKey(meta.key())) { + if (schema.propsContainsKey(meta.key())) { throw new AvroTypeException("Duplicate type prop key: " + meta.key()); } schema.addProp(meta.key(), meta.value()); @@ -785,6 +782,18 @@ protected boolean isStringable(Class c) { return c.isAnnotationPresent(Stringable.class) || super.isStringable(c); } + private String simpleName(Class c) { + String simpleName = null; + if (c != null) { + while (c.isAnonymousClass()) { + c = c.getSuperclass(); + } + simpleName = c.getSimpleName(); + } + + return simpleName; + } + private static final Schema THROWABLE_MESSAGE = makeNullable(Schema.create(Schema.Type.STRING)); // if array element type is a class with a union annotation, note it @@ -826,11 +835,11 @@ public static Schema makeNullable(Schema schema) { } } - private static final Map, Field[]> FIELDS_CACHE = new ConcurrentHashMap<>(); + private static final ConcurrentMap, Field[]> FIELDS_CACHE = new ConcurrentHashMap<>(); // Return of this class and its superclasses to serialize. private static Field[] getCachedFields(Class recordClass) { - return FIELDS_CACHE.computeIfAbsent(recordClass, rc -> getFields(rc, true)); + return MapUtil.computeIfAbsent(FIELDS_CACHE, recordClass, rc -> getFields(rc, true)); } private static Field[] getFields(Class recordClass, boolean excludeJava) { @@ -888,8 +897,7 @@ protected Schema createFieldSchema(Field field, Map names) { */ @Override public Protocol getProtocol(Class iface) { - Protocol protocol = new Protocol(iface.getSimpleName(), - iface.getPackage() == null ? "" : iface.getPackage().getName()); + Protocol protocol = new Protocol(simpleName(iface), iface.getPackage() == null ? "" : iface.getPackage().getName()); Map names = new LinkedHashMap<>(); Map messages = protocol.getMessages(); Map, Type> genericTypeVariableMap = ReflectionUtil.resolveTypeVariables(iface); diff --git a/lang/java/avro/src/main/java/org/apache/avro/reflect/ReflectDatumReader.java b/lang/java/avro/src/main/java/org/apache/avro/reflect/ReflectDatumReader.java index 20be49ec408..2a8fcee9f26 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/reflect/ReflectDatumReader.java +++ b/lang/java/avro/src/main/java/org/apache/avro/reflect/ReflectDatumReader.java @@ -21,8 +21,11 @@ import java.lang.reflect.Array; import java.nio.ByteBuffer; import java.util.ArrayList; +import java.util.HashSet; +import java.util.HashMap; import java.util.Collection; import java.util.Map; +import java.util.Optional; import org.apache.avro.AvroRuntimeException; import org.apache.avro.Conversion; @@ -92,8 +95,16 @@ protected Object newArray(Object old, int size, Schema schema) { ((Collection) old).clear(); return old; } + if (collectionClass.isAssignableFrom(ArrayList.class)) return new ArrayList<>(); + + if (collectionClass.isAssignableFrom(HashSet.class)) + return new HashSet<>(); + + if (collectionClass.isAssignableFrom(HashMap.class)) + return new HashMap<>(); + return SpecificData.newInstance(collectionClass, schema); } @@ -135,7 +146,7 @@ protected Object readArray(Object old, Schema expected, ResolvingDecoder in) thr return readCollection(c, expectedType, l, in); } else if (array instanceof Map) { // Only for non-string keys, we can use NS_MAP_* fields - // So we check the samee explicitly here + // So we check the same explicitly here if (ReflectData.isNonStringMapSchema(expected)) { Collection c = new ArrayList<>(); readCollection(c, expectedType, l, in); @@ -287,6 +298,15 @@ protected void readField(Object record, Field field, Object oldDatum, ResolvingD return; } } + if (Optional.class.isAssignableFrom(accessor.getField().getType())) { + try { + Object value = readWithoutConversion(oldDatum, field.schema(), in); + accessor.set(record, Optional.ofNullable(value)); + return; + } catch (IllegalAccessException e) { + throw new AvroRuntimeException("Failed to set " + field); + } + } try { accessor.set(record, readWithoutConversion(oldDatum, field.schema(), in)); return; diff --git a/lang/java/avro/src/main/java/org/apache/avro/reflect/ReflectDatumWriter.java b/lang/java/avro/src/main/java/org/apache/avro/reflect/ReflectDatumWriter.java index 05d9366e662..25555d99e47 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/reflect/ReflectDatumWriter.java +++ b/lang/java/avro/src/main/java/org/apache/avro/reflect/ReflectDatumWriter.java @@ -22,6 +22,7 @@ import java.util.Collection; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.Set; import org.apache.avro.AvroRuntimeException; @@ -81,8 +82,7 @@ protected void writeArray(Schema schema, Object datum, Encoder out) throws IOExc out.writeArrayStart(); switch (type) { case BOOLEAN: - if (elementClass.isPrimitive()) - ArrayAccessor.writeArray((boolean[]) datum, out); + ArrayAccessor.writeArray((boolean[]) datum, out); break; case DOUBLE: ArrayAccessor.writeArray((double[]) datum, out); @@ -154,6 +154,8 @@ else if (datum instanceof Map && ReflectData.isNonStringMapSchema(schema)) { entryList.add(new MapEntry(e.getKey(), e.getValue())); } datum = entryList; + } else if (datum instanceof Optional) { + datum = ((Optional) datum).orElse(null); } try { super.write(schema, datum, out); diff --git a/lang/java/avro/src/main/java/org/apache/avro/reflect/ReflectionUtil.java b/lang/java/avro/src/main/java/org/apache/avro/reflect/ReflectionUtil.java index 18ad4754c7e..4fa52d0345e 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/reflect/ReflectionUtil.java +++ b/lang/java/avro/src/main/java/org/apache/avro/reflect/ReflectionUtil.java @@ -56,25 +56,13 @@ static void resetFieldAccess() { // so it is monomorphic and the JIT can inline FieldAccess access = null; try { - if (null == System.getProperty("avro.disable.unsafe")) { - FieldAccess unsafeAccess = load("org.apache.avro.reflect.FieldAccessUnsafe", FieldAccess.class); - if (validate(unsafeAccess)) { - access = unsafeAccess; - } + FieldAccess reflectAccess = new FieldAccessReflect(); + if (validate(reflectAccess)) { + fieldAccess = reflectAccess; } - } catch (Throwable ignored) { + } catch (Throwable oops) { + throw new AvroRuntimeException("Unable to load a functional FieldAccess class!"); } - if (access == null) { - try { - FieldAccess reflectAccess = load("org.apache.avro.reflect.FieldAccessReflect", FieldAccess.class); - if (validate(reflectAccess)) { - access = reflectAccess; - } - } catch (Throwable oops) { - throw new AvroRuntimeException("Unable to load a functional FieldAccess class!"); - } - } - fieldAccess = access; } private static T load(String name, Class type) throws Exception { @@ -118,10 +106,8 @@ private boolean validate(FieldAccess access) throws Exception { } private boolean validField(FieldAccess access, String name, Object original, Object toSet) throws Exception { - FieldAccessor a; - boolean valid = true; - a = accessor(access, name); - valid &= original.equals(a.get(this)); + FieldAccessor a = accessor(access, name); + boolean valid = original.equals(a.get(this)); a.set(this, toSet); valid &= !original.equals(a.get(this)); return valid; diff --git a/lang/java/avro/src/main/java/org/apache/avro/specific/SpecificData.java b/lang/java/avro/src/main/java/org/apache/avro/specific/SpecificData.java index 5b539395b8e..c30616e17a3 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/specific/SpecificData.java +++ b/lang/java/avro/src/main/java/org/apache/avro/specific/SpecificData.java @@ -30,6 +30,8 @@ import org.apache.avro.io.DecoderFactory; import org.apache.avro.io.EncoderFactory; import org.apache.avro.util.ClassUtils; +import org.apache.avro.util.MapUtil; +import org.apache.avro.util.SchemaUtil; import org.apache.avro.util.internal.ClassValueCache; import java.io.ObjectInput; @@ -45,9 +47,11 @@ import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.Set; import java.util.WeakHashMap; import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; import java.util.function.Function; /** Utilities for generated Java classes and interfaces. */ @@ -87,6 +91,8 @@ public class SpecificData extends GenericData { public static final String KEY_CLASS_PROP = "java-key-class"; public static final String ELEMENT_PROP = "java-element-class"; + public static final char RESERVED_WORD_ESCAPE_CHAR = '$'; + /** * Reserved words from * https://docs.oracle.com/javase/specs/jls/se16/html/jls-3.html require @@ -101,12 +107,36 @@ public class SpecificData extends GenericData { "throw", "throws", "transient", "try", "void", "volatile", "while", // Literals from Section 3.10 can't be used as identifiers. "true", "false", "null", - // Some keywords from Section 3.8 can't be used as type identifiers. - "var", "yield", "record", // Note that module-related restricted keywords can still be used. // Class names used internally by the avro code generator "Builder")); + /* Reserved words for accessor/mutator methods */ + public static final Set ACCESSOR_MUTATOR_RESERVED_WORDS = new HashSet<>( + Arrays.asList("class", "schema", "classSchema")); + + static { + // Add reserved words to accessor/mutator reserved words + ACCESSOR_MUTATOR_RESERVED_WORDS.addAll(RESERVED_WORDS); + } + + /* Reserved words for type identifiers */ + public static final Set TYPE_IDENTIFIER_RESERVED_WORDS = new HashSet<>( + Arrays.asList("var", "yield", "record")); + + static { + // Add reserved words to type identifier reserved words + TYPE_IDENTIFIER_RESERVED_WORDS.addAll(RESERVED_WORDS); + } + + /* Reserved words for error types */ + public static final Set ERROR_RESERVED_WORDS = new HashSet<>(Arrays.asList("message", "cause")); + + static { + // Add accessor/mutator reserved words to error reserved words + ERROR_RESERVED_WORDS.addAll(ACCESSOR_MUTATOR_RESERVED_WORDS); + } + /** * Read/write some common builtin classes as strings. Representing these as * strings isn't always best, as they aren't always ordered ideally, but at @@ -148,16 +178,16 @@ public static SpecificData get() { } /** - * For RECORD type schemas, this method returns the SpecificData instance of the - * class associated with the schema, in order to get the right conversions for - * any logical types used. + * For RECORD and UNION type schemas, this method returns the SpecificData + * instance of the class associated with the schema, in order to get the right + * conversions for any logical types used. * * @param reader the reader schema * @return the SpecificData associated with the schema's class, or the default * instance. */ public static SpecificData getForSchema(Schema reader) { - if (reader != null && reader.getType() == Type.RECORD) { + if (reader != null && (reader.getType() == Type.RECORD || reader.getType() == Type.UNION)) { final Class clazz = SpecificData.get().getClass(reader); if (clazz != null) { return getForClass(clazz); @@ -190,7 +220,7 @@ public static SpecificData getForClass(Class c) { /** * Retrieve the current value of the custom-coders feature flag. Defaults to - * true, but this default can be overriden using the system + * false, but this default can be overridden using the system * property org.apache.avro.specific.use_custom_coders, and can be * set dynamically by {@link SpecificData#useCustomCoders()}. See classCache = new ConcurrentHashMap<>(); + private final ConcurrentMap classCache = new ConcurrentHashMap<>(); private static final Class NO_CLASS = new Object() { }.getClass(); private static final Schema NULL_SCHEMA = Schema.create(Schema.Type.NULL); + /** + * Utility to mangle the fully qualified class name into a valid symbol. + */ + public static String mangleFullyQualified(String fullName) { + int lastDot = fullName.lastIndexOf('.'); + + if (lastDot < 0) { + return mangleTypeIdentifier(fullName); + } else { + String namespace = fullName.substring(0, lastDot); + String typeName = fullName.substring(lastDot + 1); + + return mangle(namespace) + "." + mangleTypeIdentifier(typeName); + } + } + + /** + * Utility for template use. Adds a dollar sign to reserved words. + */ + public static String mangle(String word) { + return mangle(word, false); + } + + /** + * Utility for template use. Adds a dollar sign to reserved words. + */ + public static String mangle(String word, boolean isError) { + return mangle(word, isError ? ERROR_RESERVED_WORDS : RESERVED_WORDS); + } + + /** + * Utility for template use. Adds a dollar sign to reserved words in type + * identifiers. + */ + public static String mangleTypeIdentifier(String word) { + return mangleTypeIdentifier(word, false); + } + + /** + * Utility for template use. Adds a dollar sign to reserved words in type + * identifiers. + */ + public static String mangleTypeIdentifier(String word, boolean isError) { + return mangle(word, isError ? ERROR_RESERVED_WORDS : TYPE_IDENTIFIER_RESERVED_WORDS); + } + + /** + * Utility for template use. Adds a dollar sign to reserved words. + */ + public static String mangle(String word, Set reservedWords) { + return mangle(word, reservedWords, false); + } + + public static String mangleMethod(String word, boolean isError) { + return mangle(word, isError ? ERROR_RESERVED_WORDS : ACCESSOR_MUTATOR_RESERVED_WORDS, true); + } + + /** + * Utility for template use. Adds a dollar sign to reserved words. + */ + public static String mangle(String word, Set reservedWords, boolean isMethod) { + if (isBlank(word)) { + return word; + } + if (word.contains(".")) { + // If the 'word' is really a full path of a class we must mangle just the + String[] packageWords = word.split("\\."); + String[] newPackageWords = new String[packageWords.length]; + + for (int i = 0; i < packageWords.length; i++) { + String oldName = packageWords[i]; + newPackageWords[i] = mangle(oldName, reservedWords, false); + } + + return String.join(".", newPackageWords); + } + if (reservedWords.contains(word) || (isMethod && reservedWords + .contains(Character.toLowerCase(word.charAt(0)) + ((word.length() > 1) ? word.substring(1) : "")))) { + return word + "$"; + } + return word; + } + /** Undoes mangling for reserved words. */ protected static String unmangle(String word) { while (word.endsWith("$")) { @@ -242,6 +355,21 @@ protected static String unmangle(String word) { return word; } + private static boolean isBlank(CharSequence cs) { + int strLen = cs == null ? 0 : cs.length(); + if (strLen == 0) { + return true; + } else { + for (int i = 0; i < strLen; ++i) { + if (!Character.isWhitespace(cs.charAt(i))) { + return false; + } + } + + return true; + } + } + /** Return the class that implements a schema, or null if none exists. */ public Class getClass(Schema schema) { switch (schema.getType()) { @@ -251,7 +379,7 @@ public Class getClass(Schema schema) { String name = schema.getFullName(); if (name == null) return null; - Class c = classCache.computeIfAbsent(name, n -> { + Class c = MapUtil.computeIfAbsent(classCache, name, n -> { try { return ClassUtils.forName(getClassLoader(), getClassName(schema)); } catch (ClassNotFoundException e) { @@ -328,7 +456,7 @@ public static String getClassName(Schema schema) { if (namespace == null || "".equals(namespace)) return name; String dot = namespace.endsWith("$") ? "" : "."; // back-compatibly handle $ - return namespace + dot + name; + return mangle(namespace) + dot + mangleTypeIdentifier(name); } // cache for schemas created from Class objects. Use ClassValue to avoid @@ -380,9 +508,11 @@ else if (type instanceof ParameterizedType) { } else if (Map.class.isAssignableFrom(raw)) { // map java.lang.reflect.Type key = params[0]; java.lang.reflect.Type value = params[1]; - if (!(key instanceof Class && CharSequence.class.isAssignableFrom((Class) key))) - throw new AvroTypeException("Map key class not CharSequence: " + key); + if (!(key instanceof Class && CharSequence.class.isAssignableFrom((Class) key))) + throw new AvroTypeException("Map key class not CharSequence: " + SchemaUtil.describe(key)); return Schema.createMap(createSchema(value, names)); + } else if (Optional.class.isAssignableFrom(raw)) { + return Schema.createUnion(Schema.create(Schema.Type.NULL), createSchema(params[0], names)); } else { return createSchema(raw, names); } diff --git a/lang/java/avro/src/main/java/org/apache/avro/specific/SpecificDatumReader.java b/lang/java/avro/src/main/java/org/apache/avro/specific/SpecificDatumReader.java index d924c8e04b7..8950f165991 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/specific/SpecificDatumReader.java +++ b/lang/java/avro/src/main/java/org/apache/avro/specific/SpecificDatumReader.java @@ -24,12 +24,25 @@ import org.apache.avro.io.ResolvingDecoder; import org.apache.avro.util.ClassUtils; import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; /** * {@link org.apache.avro.io.DatumReader DatumReader} for generated Java * classes. */ public class SpecificDatumReader extends GenericDatumReader { + + public static final String[] SERIALIZABLE_PACKAGES; + + static { + SERIALIZABLE_PACKAGES = System.getProperty("org.apache.avro.SERIALIZABLE_PACKAGES", + "java.lang,java.math,java.io,java.net,org.apache.avro.reflect").split(","); + } + + private final List trustedPackages = new ArrayList<>(); + public SpecificDatumReader() { this(null, null, SpecificData.get()); } @@ -55,6 +68,7 @@ public SpecificDatumReader(Schema writer, Schema reader) { */ public SpecificDatumReader(Schema writer, Schema reader, SpecificData data) { super(writer, reader, data); + trustedPackages.addAll(Arrays.asList(SERIALIZABLE_PACKAGES)); } /** Construct given a {@link SpecificData}. */ @@ -101,12 +115,43 @@ private Class getPropAsClass(Schema schema, String prop) { if (name == null) return null; try { - return ClassUtils.forName(getData().getClassLoader(), name); + Class clazz = ClassUtils.forName(getData().getClassLoader(), name); + checkSecurity(clazz); + return clazz; } catch (ClassNotFoundException e) { throw new AvroRuntimeException(e); } } + private boolean trustAllPackages() { + return (trustedPackages.size() == 1 && "*".equals(trustedPackages.get(0))); + } + + private void checkSecurity(Class clazz) throws ClassNotFoundException { + if (trustAllPackages() || clazz.isPrimitive()) { + return; + } + + boolean found = false; + Package thePackage = clazz.getPackage(); + if (thePackage != null) { + for (String trustedPackage : getTrustedPackages()) { + if (thePackage.getName().equals(trustedPackage) || thePackage.getName().startsWith(trustedPackage + ".")) { + found = true; + break; + } + } + if (!found) { + throw new SecurityException("Forbidden " + clazz + + "! This class is not trusted to be included in Avro schema using java-class. Please set org.apache.avro.SERIALIZABLE_PACKAGES system property with the packages you trust."); + } + } + } + + public final List getTrustedPackages() { + return trustedPackages; + } + @Override protected Object readRecord(Object old, Schema expected, ResolvingDecoder in) throws IOException { SpecificData data = getSpecificData(); diff --git a/lang/java/avro/src/main/java/org/apache/avro/specific/SpecificDatumWriter.java b/lang/java/avro/src/main/java/org/apache/avro/specific/SpecificDatumWriter.java index 46118474f1b..17214031a6e 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/specific/SpecificDatumWriter.java +++ b/lang/java/avro/src/main/java/org/apache/avro/specific/SpecificDatumWriter.java @@ -23,8 +23,12 @@ import org.apache.avro.Conversion; import org.apache.avro.LogicalType; import org.apache.avro.Schema; +import org.apache.avro.path.TracingAvroTypeException; import org.apache.avro.generic.GenericDatumWriter; +import org.apache.avro.path.TracingClassCastException; +import org.apache.avro.path.TracingNullPointException; import org.apache.avro.io.Encoder; +import org.apache.avro.path.LocationStep; /** * {@link org.apache.avro.io.DatumWriter DatumWriter} for generated Java @@ -77,7 +81,11 @@ protected void writeRecord(Schema schema, Object datum, Encoder out) throws IOEx if (datum instanceof SpecificRecordBase && this.getSpecificData().useCustomCoders()) { SpecificRecordBase d = (SpecificRecordBase) datum; if (d.hasCustomCoders()) { - d.customEncode(out); + try { + d.customEncode(out); + } catch (NullPointerException e) { + throw new TracingNullPointException(e, null, true); + } return; } } @@ -98,10 +106,9 @@ protected void writeField(Object datum, Schema.Field f, Encoder out, Object stat try { writeWithoutConversion(fieldSchema, value, out); - } catch (NullPointerException e) { - throw npe(e, " in field '" + f.name() + "'"); - } catch (ClassCastException cce) { - throw addClassCastMsg(cce, " in field '" + f.name() + "'"); + } catch (TracingNullPointException | TracingClassCastException | TracingAvroTypeException e) { + e.tracePath(new LocationStep(".", f.name())); + throw e; } catch (AvroTypeException ate) { throw addAvroTypeMsg(ate, " in field '" + f.name() + "'"); } diff --git a/lang/java/avro/src/main/java/org/apache/avro/specific/SpecificExceptionBase.java b/lang/java/avro/src/main/java/org/apache/avro/specific/SpecificExceptionBase.java index 82c23f129b4..64667ba2420 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/specific/SpecificExceptionBase.java +++ b/lang/java/avro/src/main/java/org/apache/avro/specific/SpecificExceptionBase.java @@ -62,7 +62,7 @@ public boolean equals(Object that) { return false; // not a record if (this.getClass() != that.getClass()) return false; // not same schema - return SpecificData.get().compare(this, that, this.getSchema()) == 0; + return this.getSpecificData().compare(this, that, this.getSchema()) == 0; } @Override @@ -76,4 +76,9 @@ public int hashCode() { @Override public abstract void readExternal(ObjectInput in) throws IOException; + public SpecificData getSpecificData() { + // Default implementation for backwards compatibility, overridden in generated + // code + return SpecificData.get(); + } } diff --git a/lang/java/avro/src/main/java/org/apache/avro/specific/SpecificRecordBase.java b/lang/java/avro/src/main/java/org/apache/avro/specific/SpecificRecordBase.java index 07df303b329..8cf7d5bfe13 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/specific/SpecificRecordBase.java +++ b/lang/java/avro/src/main/java/org/apache/avro/specific/SpecificRecordBase.java @@ -35,15 +35,6 @@ public abstract class SpecificRecordBase implements SpecificRecord, Comparable, GenericRecord, Externalizable { - @Override - public abstract Schema getSchema(); - - @Override - public abstract Object get(int field); - - @Override - public abstract void put(int field, Object value); - public SpecificData getSpecificData() { // Default implementation for backwards compatibility, overridden in generated // code @@ -105,12 +96,12 @@ public String toString() { @Override public void writeExternal(ObjectOutput out) throws IOException { - new SpecificDatumWriter(getSchema()).write(this, SpecificData.getEncoder(out)); + new SpecificDatumWriter<>(getSchema()).write(this, SpecificData.getEncoder(out)); } @Override public void readExternal(ObjectInput in) throws IOException { - new SpecificDatumReader(getSchema()).read(this, SpecificData.getDecoder(in)); + new SpecificDatumReader<>(getSchema()).read(this, SpecificData.getDecoder(in)); } /** diff --git a/lang/java/avro/src/main/java/org/apache/avro/util/MapUtil.java b/lang/java/avro/src/main/java/org/apache/avro/util/MapUtil.java new file mode 100644 index 00000000000..1bdbfaf9a0a --- /dev/null +++ b/lang/java/avro/src/main/java/org/apache/avro/util/MapUtil.java @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro.util; + +import java.util.concurrent.ConcurrentMap; +import java.util.function.Function; + +public class MapUtil { + + private MapUtil() { + super(); + } + + /** + * A temporary workaround for Java 8 specific performance issue JDK-8161372 + * .
    + * This class should be removed once we drop Java 8 support. + * + * @see
    JDK-8161372 + */ + public static V computeIfAbsent(ConcurrentMap map, K key, Function mappingFunction) { + V value = map.get(key); + if (value != null) { + return value; + } + return map.computeIfAbsent(key, mappingFunction::apply); + } + +} diff --git a/lang/java/avro/src/main/java/org/apache/avro/util/RandomData.java b/lang/java/avro/src/main/java/org/apache/avro/util/RandomData.java index e4623fce2ea..7ff7f477f35 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/util/RandomData.java +++ b/lang/java/avro/src/main/java/org/apache/avro/util/RandomData.java @@ -17,27 +17,34 @@ */ package org.apache.avro.util; +import org.apache.avro.LogicalType; +import org.apache.avro.LogicalTypes; +import org.apache.avro.Schema; +import org.apache.avro.file.CodecFactory; +import org.apache.avro.file.DataFileWriter; +import org.apache.avro.generic.GenericArray; +import org.apache.avro.generic.GenericData; +import org.apache.avro.generic.GenericDatumWriter; + import java.io.File; -import java.nio.Buffer; import java.nio.ByteBuffer; import java.nio.charset.Charset; -import java.util.HashMap; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.time.Duration; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Random; - -import org.apache.avro.Schema; -import org.apache.avro.file.CodecFactory; -import org.apache.avro.file.DataFileWriter; -import org.apache.avro.generic.GenericArray; -import org.apache.avro.generic.GenericData; -import org.apache.avro.generic.GenericDatumWriter; -import org.apache.avro.generic.GenericRecord; +import java.util.concurrent.ThreadLocalRandom; /** Generates schema data as Java objects with random values. */ public class RandomData implements Iterable { public static final String USE_DEFAULT = "use-default"; + private final GenericData genericData; + + private static final int MILLIS_IN_DAY = (int) Duration.ofDays(1).toMillis(); private final Schema root; private final long seed; @@ -57,6 +64,23 @@ public RandomData(Schema schema, int count, boolean utf8ForString) { } public RandomData(Schema schema, int count, long seed, boolean utf8ForString) { + this(GenericData.get(), schema, count, seed, utf8ForString); + } + + public RandomData(GenericData genericData, Schema schema, int count) { + this(genericData, schema, count, false); + } + + public RandomData(GenericData genericData, Schema schema, int count, long seed) { + this(genericData, schema, count, seed, false); + } + + public RandomData(GenericData genericData, Schema schema, int count, boolean utf8ForString) { + this(genericData, schema, count, System.currentTimeMillis(), utf8ForString); + } + + public RandomData(GenericData genericData, Schema schema, int count, long seed, boolean utf8ForString) { + this.genericData = genericData; this.root = schema; this.seed = seed; this.count = count; @@ -67,7 +91,7 @@ public RandomData(Schema schema, int count, long seed, boolean utf8ForString) { public Iterator iterator() { return new Iterator() { private int n; - private Random random = new Random(seed); + private final Random random = new Random(seed); @Override public boolean hasNext() { @@ -91,26 +115,25 @@ public void remove() { private Object generate(Schema schema, Random random, int d) { switch (schema.getType()) { case RECORD: - GenericRecord record = new GenericData.Record(schema); + Object record = genericData.newRecord(null, schema); for (Schema.Field field : schema.getFields()) { Object value = (field.getObjectProp(USE_DEFAULT) == null) ? generate(field.schema(), random, d + 1) : GenericData.get().getDefaultValue(field); - record.put(field.name(), value); + genericData.setField(record, field.name(), field.pos(), value); } return record; case ENUM: List symbols = schema.getEnumSymbols(); - return new GenericData.EnumSymbol(schema, symbols.get(random.nextInt(symbols.size()))); + return genericData.createEnum(symbols.get(random.nextInt(symbols.size())), schema); case ARRAY: - int length = (random.nextInt(5) + 2) - d; - @SuppressWarnings("rawtypes") - GenericArray array = new GenericData.Array(length <= 0 ? 0 : length, schema); + int length = Math.max(0, (random.nextInt(5) + 2) - d); + GenericArray array = (GenericArray) genericData.newArray(null, length, schema); for (int i = 0; i < length; i++) array.add(generate(schema.getElementType(), random, d + 1)); return array; case MAP: - length = (random.nextInt(5) + 2) - d; - Map map = new HashMap<>(length <= 0 ? 0 : length); + length = Math.max(0, (random.nextInt(5) + 2) - d); + Map map = (Map) genericData.newMap(null, length); for (int i = 0; i < length; i++) { map.put(randomString(random, 40), generate(schema.getValueType(), random, d + 1)); } @@ -121,15 +144,15 @@ private Object generate(Schema schema, Random random, int d) { case FIXED: byte[] bytes = new byte[schema.getFixedSize()]; random.nextBytes(bytes); - return new GenericData.Fixed(schema, bytes); + return genericData.createFixed(null, bytes, schema); case STRING: return randomString(random, 40); case BYTES: return randomBytes(random, 40); case INT: - return random.nextInt(); + return this.randomInt(random, schema.getLogicalType()); case LONG: - return random.nextLong(); + return this.randomLong(random, schema.getLogicalType()); case FLOAT: return random.nextFloat(); case DOUBLE: @@ -143,7 +166,24 @@ private Object generate(Schema schema, Random random, int d) { } } - private static final Charset UTF8 = Charset.forName("UTF-8"); + private static final Charset UTF8 = StandardCharsets.UTF_8; + + private int randomInt(Random random, LogicalType type) { + if (type instanceof LogicalTypes.TimeMillis) { + return random.nextInt(RandomData.MILLIS_IN_DAY - 1); + } + // LogicalTypes.Date LocalDate.MAX.toEpochDay() > Integer.MAX; + return random.nextInt(); + } + + private long randomLong(Random random, LogicalType type) { + if (type instanceof LogicalTypes.TimeMicros) { + return ThreadLocalRandom.current().nextLong(RandomData.MILLIS_IN_DAY * 1000L); + } + // For LogicalTypes.TimestampMillis, every long would be OK, + // Instant.MAX.toEpochMilli() failed and would be > Long.MAX_VALUE. + return random.nextLong(); + } private Object randomString(Random random, int maxLength) { int length = random.nextInt(maxLength); @@ -156,7 +196,7 @@ private Object randomString(Random random, int maxLength) { private static ByteBuffer randomBytes(Random rand, int maxLength) { ByteBuffer bytes = ByteBuffer.allocate(rand.nextInt(maxLength)); - ((Buffer) bytes).limit(bytes.capacity()); + bytes.limit(bytes.capacity()); rand.nextBytes(bytes.array()); return bytes; } @@ -167,15 +207,16 @@ public static void main(String[] args) throws Exception { System.exit(-1); } Schema sch = new Schema.Parser().parse(new File(args[0])); - DataFileWriter writer = new DataFileWriter<>(new GenericDatumWriter<>()); - writer.setCodec(CodecFactory.fromString(args.length >= 4 ? args[3] : "null")); - writer.create(sch, new File(args[1])); - try { + try (DataFileWriter writer = new DataFileWriter<>(new GenericDatumWriter<>())) { + writer.setCodec(CodecFactory.fromString(args.length >= 4 ? args[3] : "null")); + writer.setMeta("user_metadata", "someByteArray".getBytes(StandardCharsets.UTF_8)); + File file = new File(args[1]); + Files.createDirectories(Paths.get(file.getParent())); + writer.create(sch, file); + for (Object datum : new RandomData(sch, Integer.parseInt(args[2]))) { writer.append(datum); } - } finally { - writer.close(); } } } diff --git a/lang/java/avro/src/main/java/org/apache/avro/util/SchemaResolver.java b/lang/java/avro/src/main/java/org/apache/avro/util/SchemaResolver.java new file mode 100644 index 00000000000..83285d371ae --- /dev/null +++ b/lang/java/avro/src/main/java/org/apache/avro/util/SchemaResolver.java @@ -0,0 +1,275 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro.util; + +import org.apache.avro.AvroTypeException; +import org.apache.avro.Schema; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.EnumSet; +import java.util.IdentityHashMap; +import java.util.List; +import java.util.Optional; +import java.util.Set; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.function.Function; + +import static java.util.Objects.requireNonNull; +import static org.apache.avro.Schema.Type.ARRAY; +import static org.apache.avro.Schema.Type.ENUM; +import static org.apache.avro.Schema.Type.FIXED; +import static org.apache.avro.Schema.Type.MAP; +import static org.apache.avro.Schema.Type.RECORD; +import static org.apache.avro.Schema.Type.UNION; + +/** + * Utility class to resolve schemas that are unavailable at the point they are + * referenced in a schema file. This class is meant for internal use: use at + * your own risk! + */ +public final class SchemaResolver { + + private SchemaResolver() { + } + + private static final String UR_SCHEMA_ATTR = "org.apache.avro.idl.unresolved.name"; + + private static final String UR_SCHEMA_NAME = "UnresolvedSchema"; + + private static final String UR_SCHEMA_NS = "org.apache.avro.compiler"; + + private static final AtomicInteger COUNTER = new AtomicInteger(); + + /** + * Create a schema to represent an "unresolved" schema. (used to represent a + * schema whose definition does not exist, yet). + * + * @param name a schema name + * @return an unresolved schema for the given name + */ + public static Schema unresolvedSchema(final String name) { + Schema schema = Schema.createRecord(UR_SCHEMA_NAME + '_' + COUNTER.getAndIncrement(), "unresolved schema", + UR_SCHEMA_NS, false, Collections.emptyList()); + schema.addProp(UR_SCHEMA_ATTR, name); + return schema; + } + + /** + * Is this an unresolved schema. + * + * @param schema a schema + * @return whether the schema is an unresolved schema + */ + public static boolean isUnresolvedSchema(final Schema schema) { + return (schema.getType() == Schema.Type.RECORD && schema.getProp(UR_SCHEMA_ATTR) != null && schema.getName() != null + && schema.getName().startsWith(UR_SCHEMA_NAME) && UR_SCHEMA_NS.equals(schema.getNamespace())); + } + + /** + * Get the unresolved schema name. + * + * @param schema an unresolved schema + * @return the name of the unresolved schema + */ + public static String getUnresolvedSchemaName(final Schema schema) { + if (!isUnresolvedSchema(schema)) { + throw new IllegalArgumentException("Not a unresolved schema: " + schema); + } + return schema.getProp(UR_SCHEMA_ATTR); + } + + /** + * Is this an unresolved schema? + */ + public static boolean isFullyResolvedSchema(final Schema schema) { + if (isUnresolvedSchema(schema)) { + return false; + } else { + return Schemas.visit(schema, new IsResolvedSchemaVisitor()); + } + } + + /** + * This visitor checks if the current schema is fully resolved. + */ + public static final class IsResolvedSchemaVisitor implements SchemaVisitor { + boolean hasUnresolvedParts; + + IsResolvedSchemaVisitor() { + hasUnresolvedParts = false; + } + + @Override + public SchemaVisitorAction visitTerminal(Schema terminal) { + hasUnresolvedParts = isUnresolvedSchema(terminal); + return hasUnresolvedParts ? SchemaVisitorAction.TERMINATE : SchemaVisitorAction.CONTINUE; + } + + @Override + public SchemaVisitorAction visitNonTerminal(Schema nonTerminal) { + hasUnresolvedParts = isUnresolvedSchema(nonTerminal); + if (hasUnresolvedParts) { + return SchemaVisitorAction.TERMINATE; + } + if (nonTerminal.getType() == Schema.Type.RECORD && !nonTerminal.hasFields()) { + // We're still initializing the type... + return SchemaVisitorAction.SKIP_SUBTREE; + } + return SchemaVisitorAction.CONTINUE; + } + + @Override + public SchemaVisitorAction afterVisitNonTerminal(Schema nonTerminal) { + return SchemaVisitorAction.CONTINUE; + } + + @Override + public Boolean get() { + return !hasUnresolvedParts; + } + } + + /** + * This visitor creates clone of the visited Schemata, minus the specified + * schema properties, and resolves all unresolved schemas. + */ + public static final class ResolvingVisitor implements SchemaVisitor { + private static final Set CONTAINER_SCHEMA_TYPES = EnumSet.of(RECORD, ARRAY, MAP, UNION); + private static final Set NAMED_SCHEMA_TYPES = EnumSet.of(RECORD, ENUM, FIXED); + + private final Function symbolTable; + private final IdentityHashMap replace; + + public ResolvingVisitor(final Function symbolTable) { + this.replace = new IdentityHashMap<>(); + this.symbolTable = symbolTable; + } + + @Override + public SchemaVisitorAction visitTerminal(final Schema terminal) { + Schema.Type type = terminal.getType(); + if (CONTAINER_SCHEMA_TYPES.contains(type)) { + if (!replace.containsKey(terminal)) { + throw new IllegalStateException("Schema " + terminal + " must be already processed"); + } + } else { + replace.put(terminal, terminal); + } + return SchemaVisitorAction.CONTINUE; + } + + @Override + public SchemaVisitorAction visitNonTerminal(final Schema nt) { + Schema.Type type = nt.getType(); + if (type == RECORD && !replace.containsKey(nt)) { + if (isUnresolvedSchema(nt)) { + // unresolved schema will get a replacement that we already encountered, + // or we will attempt to resolve. + final String unresolvedSchemaName = getUnresolvedSchemaName(nt); + Schema resSchema = symbolTable.apply(unresolvedSchemaName); + if (resSchema == null) { + throw new AvroTypeException("Undefined schema: " + unresolvedSchemaName); + } + Schema replacement = replace.computeIfAbsent(resSchema, schema -> { + Schemas.visit(schema, this); + return replace.get(schema); // This is not what the visitor returns! + }); + replace.put(nt, replacement); + } else { + // Create a clone without fields or properties. They will be added in + // afterVisitNonTerminal, as they can both create circular references. + // (see org.apache.avro.TestCircularReferences as an example) + replace.put(nt, Schema.createRecord(nt.getName(), nt.getDoc(), nt.getNamespace(), nt.isError())); + } + } + return SchemaVisitorAction.CONTINUE; + } + + public void copyProperties(final Schema first, final Schema second) { + // Logical type + Optional.ofNullable(first.getLogicalType()).ifPresent(logicalType -> logicalType.addToSchema(second)); + + // Aliases (if applicable) + if (NAMED_SCHEMA_TYPES.contains(first.getType())) { + first.getAliases().forEach(second::addAlias); + } + + // Other properties + first.getObjectProps().forEach(second::addProp); + } + + @Override + public SchemaVisitorAction afterVisitNonTerminal(final Schema nt) { + Schema.Type type = nt.getType(); + Schema newSchema; + switch (type) { + case RECORD: + if (!isUnresolvedSchema(nt)) { + newSchema = replace.get(nt); + // Check if we've already handled the replacement schema with a + // reentrant call to visit(...) from within the visitor. + if (!newSchema.hasFields()) { + List fields = nt.getFields(); + List newFields = new ArrayList<>(fields.size()); + for (Schema.Field field : fields) { + newFields.add(new Schema.Field(field, replace.get(field.schema()))); + } + newSchema.setFields(newFields); + copyProperties(nt, newSchema); + } + } + return SchemaVisitorAction.CONTINUE; + case UNION: + List types = nt.getTypes(); + List newTypes = new ArrayList<>(types.size()); + for (Schema sch : types) { + newTypes.add(requireNonNull(replace.get(sch))); + } + newSchema = Schema.createUnion(newTypes); + break; + case ARRAY: + newSchema = Schema.createArray(requireNonNull(replace.get(nt.getElementType()))); + break; + case MAP: + newSchema = Schema.createMap(requireNonNull(replace.get(nt.getValueType()))); + break; + default: + throw new IllegalStateException("Illegal type " + type + ", schema " + nt); + } + copyProperties(nt, newSchema); + replace.put(nt, newSchema); + return SchemaVisitorAction.CONTINUE; + } + + @Override + public Void get() { + return null; + } + + public Schema getResolved(Schema schema) { + return requireNonNull(replace.get(schema), + () -> "Unknown schema: " + schema.getFullName() + ". Was it resolved before?"); + } + + @Override + public String toString() { + return "ResolvingVisitor{symbolTable=" + symbolTable + ", replace=" + replace + '}'; + } + } +} diff --git a/lang/java/avro/src/main/java/org/apache/avro/util/SchemaUtil.java b/lang/java/avro/src/main/java/org/apache/avro/util/SchemaUtil.java new file mode 100644 index 00000000000..0661981155d --- /dev/null +++ b/lang/java/avro/src/main/java/org/apache/avro/util/SchemaUtil.java @@ -0,0 +1,56 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro.util; + +import org.apache.avro.Schema; + +import java.util.StringJoiner; + +public class SchemaUtil { + + private SchemaUtil() { + // utility class + } + + public static String describe(Schema schema) { + if (schema == null) { + return "unknown"; + } + switch (schema.getType()) { + case UNION: + StringJoiner csv = new StringJoiner(", "); + for (Schema branch : schema.getTypes()) { + csv.add(describe(branch)); + } + return "[" + csv + "]"; + case MAP: + return "Map"; + case ARRAY: + return "List<" + describe(schema.getElementType()) + ">"; + default: + return schema.getName(); + } + } + + public static String describe(Object datum) { + if (datum == null) { + return "null"; + } + return datum + " (a " + datum.getClass().getName() + ")"; + } +} diff --git a/lang/java/avro/src/main/java/org/apache/avro/util/SchemaVisitor.java b/lang/java/avro/src/main/java/org/apache/avro/util/SchemaVisitor.java new file mode 100644 index 00000000000..1ac35baeda7 --- /dev/null +++ b/lang/java/avro/src/main/java/org/apache/avro/util/SchemaVisitor.java @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro.util; + +import org.apache.avro.Schema; + +public interface SchemaVisitor { + + /** + * Invoked for schemas that do not have "child" schemas (like string, int â€Ļ) or + * for a previously encountered schema with children, which will be treated as a + * terminal. (to avoid circular recursion) + */ + SchemaVisitorAction visitTerminal(Schema terminal); + + /** + * Invoked for schema with children before proceeding to visit the children. + */ + SchemaVisitorAction visitNonTerminal(Schema nonTerminal); + + /** + * Invoked for schemas with children after its children have been visited. + */ + SchemaVisitorAction afterVisitNonTerminal(Schema nonTerminal); + + /** + * Invoked when visiting is complete. + * + * @return a value that will be returned by the visit method. + */ + T get(); + + enum SchemaVisitorAction { + + /** + * continue visit. + */ + CONTINUE, + /** + * terminate visit. + */ + TERMINATE, + /** + * when returned from pre non terminal visit method the children of the non + * terminal are skipped. afterVisitNonTerminal for the current schema will not + * be invoked. + */ + SKIP_SUBTREE, + /** + * Skip visiting the siblings of this schema. + */ + SKIP_SIBLINGS + } +} diff --git a/lang/java/avro/src/main/java/org/apache/avro/util/Schemas.java b/lang/java/avro/src/main/java/org/apache/avro/util/Schemas.java new file mode 100644 index 00000000000..927a0c37b43 --- /dev/null +++ b/lang/java/avro/src/main/java/org/apache/avro/util/Schemas.java @@ -0,0 +1,149 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro.util; + +import org.apache.avro.Schema; +import org.apache.avro.Schema.Field; + +import java.util.ArrayDeque; +import java.util.Collections; +import java.util.Deque; +import java.util.IdentityHashMap; +import java.util.function.Supplier; +import java.util.stream.Collectors; + +/** + * Avro Schema utilities, to traverse... + */ +public final class Schemas { + + private Schemas() { + } + + /** + * Depth first visit. + */ + public static T visit(final Schema start, final SchemaVisitor visitor) { + // Set of Visited Schemas + IdentityHashMap visited = new IdentityHashMap<>(); + // Stack that contains the Schemas to process and afterVisitNonTerminal + // functions. + // Deque>> + // Using Either<...> has a cost we want to avoid... + Deque dq = new ArrayDeque<>(); + dq.push(start); + Object current; + while ((current = dq.poll()) != null) { + if (current instanceof Supplier) { + // We are executing a non-terminal post visit. + SchemaVisitor.SchemaVisitorAction action = ((Supplier) current).get(); + switch (action) { + case CONTINUE: + break; + case SKIP_SIBLINGS: + while (dq.peek() instanceof Schema) { + dq.remove(); + } + break; + case TERMINATE: + return visitor.get(); + case SKIP_SUBTREE: + default: + throw new UnsupportedOperationException("Invalid action " + action); + } + } else { + Schema schema = (Schema) current; + boolean terminate; + if (visited.containsKey(schema)) { + terminate = visitTerminal(visitor, schema, dq); + } else { + Schema.Type type = schema.getType(); + switch (type) { + case ARRAY: + terminate = visitNonTerminal(visitor, schema, dq, Collections.singleton(schema.getElementType())); + visited.put(schema, schema); + break; + case RECORD: + terminate = visitNonTerminal(visitor, schema, dq, () -> schema.getFields().stream().map(Field::schema) + .collect(Collectors.toCollection(ArrayDeque::new)).descendingIterator()); + visited.put(schema, schema); + break; + case UNION: + terminate = visitNonTerminal(visitor, schema, dq, schema.getTypes()); + visited.put(schema, schema); + break; + case MAP: + terminate = visitNonTerminal(visitor, schema, dq, Collections.singleton(schema.getValueType())); + visited.put(schema, schema); + break; + default: + terminate = visitTerminal(visitor, schema, dq); + break; + } + } + if (terminate) { + return visitor.get(); + } + } + } + return visitor.get(); + } + + private static boolean visitNonTerminal(final SchemaVisitor visitor, final Schema schema, final Deque dq, + final Iterable itSupp) { + SchemaVisitor.SchemaVisitorAction action = visitor.visitNonTerminal(schema); + switch (action) { + case CONTINUE: + dq.push((Supplier) () -> visitor.afterVisitNonTerminal(schema)); + itSupp.forEach(dq::push); + break; + case SKIP_SUBTREE: + dq.push((Supplier) () -> visitor.afterVisitNonTerminal(schema)); + break; + case SKIP_SIBLINGS: + while (dq.peek() instanceof Schema) { + dq.remove(); + } + break; + case TERMINATE: + return true; + default: + throw new UnsupportedOperationException("Invalid action " + action + " for " + schema); + } + return false; + } + + private static boolean visitTerminal(final SchemaVisitor visitor, final Schema schema, final Deque dq) { + SchemaVisitor.SchemaVisitorAction action = visitor.visitTerminal(schema); + switch (action) { + case CONTINUE: + break; + case SKIP_SIBLINGS: + while (dq.peek() instanceof Schema) { + dq.remove(); + } + break; + case TERMINATE: + return true; + case SKIP_SUBTREE: + default: + throw new UnsupportedOperationException("Invalid action " + action + " for " + schema); + } + return false; + } +} diff --git a/lang/java/avro/src/main/java/org/apache/avro/util/TimePeriod.java b/lang/java/avro/src/main/java/org/apache/avro/util/TimePeriod.java new file mode 100644 index 00000000000..a1f7fa4e8b5 --- /dev/null +++ b/lang/java/avro/src/main/java/org/apache/avro/util/TimePeriod.java @@ -0,0 +1,393 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro.util; + +import java.io.Serializable; +import java.time.DateTimeException; +import java.time.Duration; +import java.time.Period; +import java.time.chrono.ChronoPeriod; +import java.time.chrono.IsoChronology; +import java.time.temporal.ChronoUnit; +import java.time.temporal.Temporal; +import java.time.temporal.TemporalAmount; +import java.time.temporal.TemporalUnit; +import java.time.temporal.UnsupportedTemporalTypeException; +import java.util.ArrayList; +import java.util.List; +import java.util.Objects; + +import static java.time.temporal.ChronoUnit.DAYS; +import static java.time.temporal.ChronoUnit.MILLIS; +import static java.time.temporal.ChronoUnit.MONTHS; +import static java.util.Collections.unmodifiableList; +import static java.util.Objects.requireNonNull; + +/** + *

    + * A temporal amount to model an {@link org.apache.avro.LogicalTypes.Duration + * Avro duration} (the logical type). + *

    + * + *

    + * It consists of a number of months, days and milliseconds, all modelled as an + * unsigned integer. + *

    + * + *

    + * Compared to {@link Period java.time.Period}, this class has a smaller range + * ('only' supporting a little less than 358 million years), and cannot support + * negative time periods. + *

    + * + *

    + * Compared to {@link Duration java.time.Duration}, this class has less + * precision (milliseconds compared to nanoseconds), cannot support negative + * durations, and has a much smaller range. Where {@code java.time.Duration} + * supports fixed ranges up to about 68 years, {@code TimePeriod} can only + * handle about 49 days. + *

    + * + *

    + * Comparison with the regular {@code java.time} classes: + *

    + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + *
    TimePeriod{@link Period}{@link Duration}
    Precisionmillisecondsdaysnanoseconds
    Time range (approx.)0 - 49 daysunsupported-68 - 68 years
    Date range (approx.)0 to 370 million years-2.3 to 2.3 billion yearsunsupported
    + * + * @see Avro 1.11 + * specification on duration + */ +public final class TimePeriod implements TemporalAmount, Serializable { + private static final long MAX_UNSIGNED_INT = 0xffffffffL; + private static final long MONTHS_PER_YEAR = 12; + private static final long MONTHS_PER_DECADE = MONTHS_PER_YEAR * 10; + private static final long MONTHS_PER_CENTURY = MONTHS_PER_DECADE * 10; + private static final long MONTHS_PER_MILLENNIUM = MONTHS_PER_CENTURY * 10; + private static final long MILLIS_PER_SECOND = 1_000; + private static final long MILLIS_PER_MINUTE = MILLIS_PER_SECOND * 60; + private static final long MILLIS_PER_HOUR = MILLIS_PER_MINUTE * 60; + private static final long MILLIS_IN_HALF_DAY = MILLIS_PER_HOUR * 12; + private static final long MICROS_PER_MILLI = 1_000; + private static final long NANOS_PER_MILLI = 1_000_000; + + private final long months; + private final long days; + private final long millis; + + /** + * Create a TimePeriod from another TemporalAmount, such as a {@link Period} or + * a {@link Duration}. + * + * @param amount a temporal amount + * @return the corresponding TimePeriod + */ + public static TimePeriod from(TemporalAmount amount) { + if (requireNonNull(amount, "amount") instanceof TimePeriod) { + return (TimePeriod) amount; + } + if (amount instanceof ChronoPeriod) { + if (!IsoChronology.INSTANCE.equals(((ChronoPeriod) amount).getChronology())) { + throw new DateTimeException("TimePeriod requires ISO chronology: " + amount); + } + } + long months = 0; + long days = 0; + long millis = 0; + for (TemporalUnit unit : amount.getUnits()) { + if (unit instanceof ChronoUnit) { + long unitAmount = amount.get(unit); + switch ((ChronoUnit) unit) { + case MILLENNIA: + months = unsignedInt(months + unitAmount * MONTHS_PER_MILLENNIUM); + break; + case CENTURIES: + months = unsignedInt(months + unitAmount * MONTHS_PER_CENTURY); + break; + case DECADES: + months = unsignedInt(months + unitAmount * MONTHS_PER_DECADE); + break; + case YEARS: + months = unsignedInt(months + unitAmount * MONTHS_PER_YEAR); + break; + case MONTHS: + months = unsignedInt(months + unitAmount); + break; + case WEEKS: + days = unsignedInt(days + unitAmount * 7); + break; + case DAYS: + days = unsignedInt(days + unitAmount); + break; + case HALF_DAYS: + days = unsignedInt(days + (unitAmount / 2)); // Truncates halves + if (unitAmount % 2 != 0) { + millis = unsignedInt(millis + MILLIS_IN_HALF_DAY); + } + break; + case HOURS: + millis = unsignedInt(millis + unitAmount * MILLIS_PER_HOUR); + break; + case MINUTES: + millis = unsignedInt(millis + unitAmount * MILLIS_PER_MINUTE); + break; + case SECONDS: + millis = unsignedInt(millis + unitAmount * MILLIS_PER_SECOND); + break; + case MILLIS: + millis = unsignedInt(millis + unitAmount); + break; + case MICROS: + if (unitAmount % MICROS_PER_MILLI != 0) { + throw new DateTimeException( + "Cannot add " + unitAmount + " microseconds: not a whole number of milliseconds"); + } + millis = unsignedInt(millis + unitAmount / MICROS_PER_MILLI); + break; + case NANOS: + if (unitAmount % NANOS_PER_MILLI != 0) { + throw new DateTimeException( + "Cannot add " + unitAmount + " nanoseconds: not a whole number of milliseconds"); + } + millis = unsignedInt(millis + unitAmount / NANOS_PER_MILLI); + break; + default: + throw new UnsupportedTemporalTypeException("Unsupported unit: " + unit); + } + } else { + throw new UnsupportedTemporalTypeException("Unsupported unit: " + unit); + } + } + return new TimePeriod(months, days, millis); + } + + /** + * Create a TimePeriod from a number of months, days and milliseconds + * + * @param months a number of months + * @param days a number of days + * @param millis a number of milliseconds + * @return the corresponding TimePeriod + * @throws ArithmeticException if any of the parameters does not fit an unsigned + * long (0..4294967296) + */ + public static TimePeriod of(long months, long days, long millis) { + return new TimePeriod(unsignedInt(months), unsignedInt(days), unsignedInt(millis)); + } + + private static long unsignedInt(long number) { + if (number != (number & MAX_UNSIGNED_INT)) { + throw new ArithmeticException("Overflow/underflow of unsigned int"); + } + return number; + } + + private TimePeriod(long months, long days, long millis) { + this.months = months; + this.days = days; + this.millis = millis; + } + + public Duration toDuration() { + return Duration.from(this); + } + + public Period toPeriod() { + if (isDateBased()) { + // We use unsigned ints, which have double the range of a signed int that + // Period uses. We can split months to years and months to ensure there's no + // overflow. But we cannot split days, as both days and months have varying + // lengths. + int yearsAsInt = (int) (months / MONTHS_PER_YEAR); + int monthsAsInt = (int) (months % MONTHS_PER_YEAR); + int daysAsInt = (int) days; + if (days != daysAsInt) { + throw new DateTimeException("Too many days: a Period can contain at most " + Integer.MAX_VALUE + " days."); + } + return Period.ofYears(yearsAsInt).withMonths(monthsAsInt).withDays(daysAsInt); + } + throw new DateTimeException("Cannot convert this TimePeriod to a Period: is not date based"); + } + + /** + * Determines if the TimePeriod is date based (i.e., if its milliseconds + * component is 0). + * + * @return {@code true} iff the TimePeriod is date based + */ + public boolean isDateBased() { + return millis == 0; + } + + /** + * Determines if the TimePeriod is time based (i.e., if its months and days + * components are 0). + * + * @return {@code true} iff the TimePeriod is time based + */ + public boolean isTimeBased() { + return months == 0 && days == 0; + } + + public long getMonths() { + return months; + } + + public long getDays() { + return days; + } + + public long getMillis() { + return millis; + } + + @Override + public long get(TemporalUnit unit) { + if (unit == MONTHS) { + return months; + } else if (unit == DAYS) { + return days; + } else if (unit == MILLIS) { + return millis; + } else { + throw new UnsupportedTemporalTypeException("Unsupported unit: " + unit); + } + } + + @Override + public List getUnits() { + List units = new ArrayList<>(); + // The zero-checks ensure compatibility with the Java Time classes Period and + // Duration where possible. + if (months != 0) { + units.add(MONTHS); + } + if (days != 0) { + units.add(DAYS); + } + if (millis != 0) { + units.add(MILLIS); + } + return unmodifiableList(units); + } + + @Override + public Temporal addTo(Temporal temporal) { + return addTo(temporal, months, days, millis); + } + + @Override + public Temporal subtractFrom(Temporal temporal) { + return addTo(temporal, -months, -days, -millis); + } + + private Temporal addTo(Temporal temporal, long months, long days, long millis) { + // The zero-checks ensure we can add a TimePeriod to a Temporal even when it + // does not support all fields, as long as the unsupported fields are zero. + if (months != 0) { + temporal = temporal.plus(months, MONTHS); + } + if (days != 0) { + temporal = temporal.plus(days, DAYS); + } + if (millis != 0) { + temporal = temporal.plus(millis, MILLIS); + } + return temporal; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + TimePeriod that = (TimePeriod) o; + return months == that.months && days == that.days && millis == that.millis; + } + + @Override + public int hashCode() { + return Objects.hash(months, days, millis); + } + + @Override + public String toString() { + StringBuilder buffer = new StringBuilder(); + buffer.append("P"); + if (months != 0) { + int years = (int) (months / MONTHS_PER_YEAR); + int monthsLeft = (int) (months % MONTHS_PER_YEAR); + if (years != 0) { + buffer.append(years).append("Y"); + } + if (monthsLeft != 0) { + buffer.append(monthsLeft).append("M"); + } + } + if (days != 0 || (months == 0 && millis == 0)) { + buffer.append(days); + } + if (millis != 0) { + long millisLeft = millis; + int hours = (int) (millisLeft / MILLIS_PER_HOUR); + millisLeft -= MILLIS_PER_HOUR * hours; + int minutes = (int) (millisLeft / MILLIS_PER_MINUTE); + millisLeft -= MILLIS_PER_MINUTE * minutes; + int seconds = (int) (millisLeft / MILLIS_PER_SECOND); + millisLeft -= MILLIS_PER_SECOND * seconds; + if (millisLeft != 0) { + buffer.append(String.format("T%02d:%02d:%02d.%03d", hours, minutes, seconds, millisLeft)); + } else if (seconds != 0) { + buffer.append(String.format("T%02d:%02d:%02d", hours, minutes, seconds)); + } else if (minutes != 0) { + buffer.append(String.format("T%02d:%02d", hours, minutes)); + } else { + buffer.append(String.format("T%02d", hours)); + } + } + return buffer.toString(); + } +} diff --git a/lang/java/avro/src/main/java/org/apache/avro/util/Utf8.java b/lang/java/avro/src/main/java/org/apache/avro/util/Utf8.java index f54b6e2062b..9238fd78c65 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/util/Utf8.java +++ b/lang/java/avro/src/main/java/org/apache/avro/util/Utf8.java @@ -24,9 +24,8 @@ import java.nio.charset.StandardCharsets; import java.util.Arrays; -import org.apache.avro.AvroRuntimeException; +import org.apache.avro.SystemLimitException; import org.apache.avro.io.BinaryData; -import org.slf4j.LoggerFactory; /** * A Utf8 string. Unlike {@link String}, instances are mutable. This is more @@ -34,22 +33,8 @@ * as a single instance may be reused. */ public class Utf8 implements Comparable, CharSequence, Externalizable { - private static final String MAX_LENGTH_PROPERTY = "org.apache.avro.limits.string.maxLength"; - private static final int MAX_LENGTH; - private static final byte[] EMPTY = new byte[0]; - static { - String o = System.getProperty(MAX_LENGTH_PROPERTY); - int i = Integer.MAX_VALUE; - if (o != null) { - try { - i = Integer.parseUnsignedInt(o); - } catch (NumberFormatException nfe) { - LoggerFactory.getLogger(Utf8.class).warn("Could not parse property " + MAX_LENGTH_PROPERTY + ": " + o, nfe); - } - } - MAX_LENGTH = i; - } + private static final byte[] EMPTY = new byte[0]; private byte[] bytes; private int hash; @@ -63,7 +48,7 @@ public Utf8() { public Utf8(String string) { byte[] bytes = getBytesFor(string); int length = bytes.length; - checkLength(length); + SystemLimitException.checkMaxStringLength(length); this.bytes = bytes; this.length = length; this.string = string; @@ -78,7 +63,7 @@ public Utf8(Utf8 other) { public Utf8(byte[] bytes) { int length = bytes.length; - checkLength(length); + SystemLimitException.checkMaxStringLength(length); this.bytes = bytes; this.length = length; } @@ -121,7 +106,7 @@ public Utf8 setLength(int newLength) { * length does not change, as this also clears the cached String. */ public Utf8 setByteLength(int newLength) { - checkLength(newLength); + SystemLimitException.checkMaxStringLength(newLength); if (this.bytes.length < newLength) { this.bytes = Arrays.copyOf(this.bytes, newLength); } @@ -135,7 +120,7 @@ public Utf8 setByteLength(int newLength) { public Utf8 set(String string) { byte[] bytes = getBytesFor(string); int length = bytes.length; - checkLength(length); + SystemLimitException.checkMaxStringLength(length); this.bytes = bytes; this.length = length; this.string = string; @@ -215,12 +200,6 @@ public CharSequence subSequence(int start, int end) { return toString().subSequence(start, end); } - private static void checkLength(int length) { - if (length > MAX_LENGTH) { - throw new AvroRuntimeException("String length " + length + " exceeds maximum allowed"); - } - } - /** Gets the UTF-8 bytes for a String */ public static byte[] getBytesFor(String str) { return str.getBytes(StandardCharsets.UTF_8); diff --git a/lang/java/avro/src/main/java/org/apache/avro/util/UtfTextUtils.java b/lang/java/avro/src/main/java/org/apache/avro/util/UtfTextUtils.java new file mode 100644 index 00000000000..967a48bf364 --- /dev/null +++ b/lang/java/avro/src/main/java/org/apache/avro/util/UtfTextUtils.java @@ -0,0 +1,247 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro.util; + +import java.io.BufferedInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.Reader; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; + +/** + * Text utilities especially suited for UTF encoded bytes. + * + *

    + * When the character set is unknown, methods in this class assume UTF encoded + * text and try to detect the UTF variant (8/16/32 bits, big/little endian), + * using the BOM (if present) or an educated guess assuming the first character + * is in the range U+0000-U+00FF. This heuristic works for all latin text based + * formats, which includes Avro IDL, JSON, XML, etc. If the heuristic fails, + * UTF-8 is assumed. + *

    + * + * @see XML specification, + * appendix F: Autodetection of Character Encodings (Non-Normative) + */ +public class UtfTextUtils { + private static final int TRANSFER_BUFFER_SIZE = 4096; + /** + * JVM standard character set (but that doesn't have a constant in + * {@link StandardCharsets}) for UTF-32. + */ + private static final Charset UTF_32 = Charset.forName("UTF-32"); + /** + * JVM standard character set (but that doesn't have a constant in + * {@link StandardCharsets}) for UTF-32BE. + */ + private static final Charset UTF_32BE = Charset.forName("UTF-32BE"); + /** + * JVM standard character set (but that doesn't have a constant in + * {@link StandardCharsets}) for UTF-32LE. + */ + private static final Charset UTF_32LE = Charset.forName("UTF-32LE"); + + public static String asString(byte[] bytes, Charset charset) { + if (charset == null) { + charset = detectUtfCharset(bytes); + } + return skipBOM(new String(bytes, charset)); + } + + /** + * Reads the specified input stream as text. If {@code charset} is {@code null}, + * the method will assume UTF encoded text and attempt to detect the appropriate + * charset. + * + * @param input the input to read + * @param charset the character set of the input, if known + * @return all bytes, read into a string + * @throws IOException when reading the input fails for some reason + */ + public static String readAllBytes(InputStream input, Charset charset) throws IOException { + if (charset == null) { + input = ensureMarkSupport(input); + input.mark(4); + byte[] buffer = new byte[4]; + int bytesRead = fillBuffer(input, buffer); + input.reset(); + + charset = detectUtfCharset0(buffer, bytesRead); + + if (charset == null) { + throw new IOException("Unsupported UCS-4 variant (neither UTF-32BE nor UTF32-LE)"); + } + } + Reader reader = new InputStreamReader(input, charset); + return readAllChars(reader); + } + + private static InputStream ensureMarkSupport(InputStream input) { + if (input.markSupported()) { + return input; + } else { + return new BufferedInputStream(input); + } + } + + private static int fillBuffer(InputStream in, byte[] buf) throws IOException { + int remaining = buf.length; + int offset = 0; + while (remaining > 0) { + int bytesRead = in.read(buf, offset, remaining); + // As remaining > 0, bytesRead is either -1 or positive + if (bytesRead == -1) { + break; + } + offset += bytesRead; + remaining -= bytesRead; + } + return offset; + } + + public static String readAllChars(Reader input) throws IOException { + StringBuilder buffer = new StringBuilder(); + char[] charBuffer = new char[TRANSFER_BUFFER_SIZE]; + int charsRead; + while ((charsRead = input.read(charBuffer, 0, TRANSFER_BUFFER_SIZE)) >= 0) { + buffer.append(charBuffer, 0, charsRead); + } + return skipBOM(buffer); + } + + private static String skipBOM(CharSequence buffer) { + if (buffer.charAt(0) == '\uFEFF') { + return buffer.subSequence(1, buffer.length()).toString(); + } + return buffer.toString(); + } + + /** + * Assuming UTF encoded bytes, detect the UTF variant (8/16/32 bits, big/little + * endian). + * + *

    + * To ensure the most accurate detection, the algorithm requires at least 4 + * bytes. One should only provide less than 4 bytes of data if that is all there + * is. + *

    + * + *

    + * Detection is certain when a byte order mark (BOM) is used. Otherwise a + * heuristic is used, which works when the first character is from the first 256 + * characters from the BMP (U+0000-U+00FF). This works for all latin-based + * textual formats, like Avro IDL, JSON, YAML, XML, etc. + *

    + * + * @param firstFewBytes the first few bytes of the text to detect the character + * set of + * @return the character set to use + */ + public static Charset detectUtfCharset(byte[] firstFewBytes) { + Charset detectedCharset = detectUtfCharset0(firstFewBytes, firstFewBytes.length); + if (detectedCharset == null) { + throw new IllegalArgumentException("Unsupported UCS-4 variant (neither UTF-32BE nor UTF32-LE)"); + } + return detectedCharset; + } + + private static Charset detectUtfCharset0(byte[] firstFewBytes, int numBytes) { + // spotless:off + /* + * Lookup table, adapted from https://www.w3.org/TR/xml/#sec-guessing + * It omits non-UTF encodings (the 2nd and 3rd rows from the end). + * Note that the order (with respect to UTF-32 & UTF-16) is important! + * + * (the non-zero bytes encode the byte order mark, BOM) + * + * Match the 'magic bytes' in order, and take the first match: + * 00 00 FE FF -> UTF-32 (be) + * FF FE 00 00 -> UTF-32 (le) + * 00 00 FF FE -> unsupported UCS-4 (byte order 2143) + * FE FF 00 00 -> unsupported UCS-4 (byte order 3412) + * FE FF __ __ -> UTF-16 (be) + * FF FE __ __ -> UTF-16 (le) + * EF BB BF __ -> UTF-8 + * 00 00 00 __ -> UTF-32BE + * __ 00 00 00 -> UTF-32LE + * 00 00 __ 00 -> unsupported UCS-4 (byte order 2143) + * 00 __ 00 00 -> unsupported UCS-4 (byte order 3412) + * 00 __ __ __ -> UTF-16BE + * __ 00 __ __ -> UTF-16LE + * __ __ __ __ -> UTF-8 (fallback) + */ + // spotless:on + int quad = quad(firstFewBytes, numBytes); + int word = quad >>> 16; + if (numBytes > 3 && (quad == 0x0000FEFF || quad == 0xFFFE0000)) { + // With BOM: UTF-32 (Charset handles BOM & endianness) + return UTF_32; + } else if (numBytes > 3 && (quad == 0x0000FFFE || quad == 0xFEFF0000)) { + // With BOM: unsupported UCS-4 encoding (byte order 2143 resp. 3412) + return null; + } else if (numBytes > 1 && (word == 0xFEFF || word == 0xFFFE)) { + // With BOM: UTF-16 (Charset handles BOM & endianness) + return StandardCharsets.UTF_16; + } else if (numBytes > 2 && quad >>> 8 == 0xEFBBBF) { + // With BOM: UTF-8 (Charset does not handle a BOM, so our caller must skip it) + return StandardCharsets.UTF_8; + } else if (numBytes > 3 && (quad & 0xFFFFFF00) == 0) { + // Without BOM (i.e., a guess) + return UTF_32BE; + } else if (numBytes > 3 && (quad & 0x00FFFFFF) == 0) { + // Without BOM (i.e., a guess) + return UTF_32LE; + } else if (numBytes > 3 && (quad & 0xFFFF00FF) == 0 || (quad & 0xFF00FFFF) == 0) { + // Without BOM (i.e., a guess): unsupported UCS-4 encoding (byte order 2143 + // resp. 3412) + return null; + } else if (numBytes > 1 && (word & 0xFF00) == 0) { + // Without BOM (i.e., a guess) + return StandardCharsets.UTF_16BE; + } else if (numBytes > 1 && (word & 0x00FF) == 0) { + // Without BOM (i.e., a guess) + return StandardCharsets.UTF_16LE; + } else { + // Fallback + return StandardCharsets.UTF_8; + } + } + + private static int quad(byte[] bytes, int length) { + int quad = 0xFFFFFFFF; + switch (length) { + default: + quad = (quad & 0xFFFFFF00) | (bytes[3] & 0xFF); + // Fallthrough + case 3: + quad = (quad & 0xFFFF00FF) | (bytes[2] & 0xFF) << 8; + // Fallthrough + case 2: + quad = (quad & 0xFF00FFFF) | (bytes[1] & 0xFF) << 16; + // Fallthrough + case 1: + quad = (quad & 0x00FFFFFF) | (bytes[0] & 0xFF) << 24; + // Fallthrough + case 0: + break; + } + return quad; + } +} diff --git a/lang/java/avro/src/main/java/org/apache/avro/util/WeakIdentityHashMap.java b/lang/java/avro/src/main/java/org/apache/avro/util/WeakIdentityHashMap.java index a57cb49ac13..565d8e7ed36 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/util/WeakIdentityHashMap.java +++ b/lang/java/avro/src/main/java/org/apache/avro/util/WeakIdentityHashMap.java @@ -22,10 +22,10 @@ import java.lang.ref.WeakReference; import java.util.Collection; import java.util.Collections; -import java.util.HashMap; import java.util.HashSet; import java.util.Map; import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; /** * Implements a combination of WeakHashMap and IdentityHashMap. Useful for @@ -41,7 +41,7 @@ */ public class WeakIdentityHashMap implements Map { private final ReferenceQueue queue = new ReferenceQueue<>(); - private Map backingStore = new HashMap<>(); + private Map backingStore = new ConcurrentHashMap<>(); public WeakIdentityHashMap() { } diff --git a/lang/java/avro/src/main/java/org/apache/avro/util/internal/JacksonUtils.java b/lang/java/avro/src/main/java/org/apache/avro/util/internal/JacksonUtils.java index 1a822899f97..02a3872d43f 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/util/internal/JacksonUtils.java +++ b/lang/java/avro/src/main/java/org/apache/avro/util/internal/JacksonUtils.java @@ -18,6 +18,7 @@ package org.apache.avro.util.internal; import java.io.IOException; +import java.io.UncheckedIOException; import java.math.BigDecimal; import java.math.BigInteger; import java.nio.charset.StandardCharsets; @@ -75,7 +76,7 @@ static void toJson(Object datum, JsonGenerator generator) throws IOException { } generator.writeEndArray(); } else if (datum instanceof byte[]) { // bytes, fixed - generator.writeString(new String((byte[]) datum, StandardCharsets.ISO_8859_1)); + generator.writeBinary((byte[]) datum);// writeString(new String((byte[]) datum, StandardCharsets.ISO_8859_1)); } else if (datum instanceof CharSequence || datum instanceof Enum) { // string, enum generator.writeString(datum.toString()); } else if (datum instanceof Double) { // double @@ -136,10 +137,23 @@ public static Object toObject(JsonNode jsonNode, Schema schema) { return jsonNode.asDouble(); } } else if (jsonNode.isDouble() || jsonNode.isFloat()) { - if (schema == null || schema.getType().equals(Schema.Type.DOUBLE)) { - return jsonNode.asDouble(); - } else if (schema.getType().equals(Schema.Type.FLOAT)) { - return (float) jsonNode.asDouble(); + if (schema != null) { + if (schema.getType().equals(Schema.Type.DOUBLE)) { + return jsonNode.doubleValue(); + } else if (schema.getType().equals(Schema.Type.FLOAT)) { + return jsonNode.floatValue(); + } + } else if (jsonNode.isDouble()) { + return jsonNode.doubleValue(); + } else { + return jsonNode.floatValue(); + } + } else if (jsonNode.isBinary()) { + try { + return jsonNode.binaryValue(); + } catch (IOException ex) { + // only for TextNode, so, can't happen with binaryNode. + throw new UncheckedIOException(ex); } } else if (jsonNode.isTextual()) { if (schema == null || schema.getType().equals(Schema.Type.STRING) || schema.getType().equals(Schema.Type.ENUM)) { @@ -175,7 +189,7 @@ public static Object toObject(JsonNode jsonNode, Schema schema) { /** * Convert an object into a map - * + * * @param datum The object * @return Its Map representation */ diff --git a/lang/java/avro/src/main/java/org/apache/avro/util/springframework/Assert.java b/lang/java/avro/src/main/java/org/apache/avro/util/springframework/Assert.java new file mode 100644 index 00000000000..70e2e9f3b30 --- /dev/null +++ b/lang/java/avro/src/main/java/org/apache/avro/util/springframework/Assert.java @@ -0,0 +1,121 @@ +/* + * Copyright 2002-2020 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.avro.util.springframework; + +import org.apache.avro.reflect.Nullable; + +/** + * Assertion utility class that assists in validating arguments. + * + *

    + * Useful for identifying programmer errors early and clearly at runtime. + * + *

    + * For example, if the contract of a public method states it does not allow + * {@code null} arguments, {@code Assert} can be used to validate that contract. + * Doing this clearly indicates a contract violation when it occurs and protects + * the class's invariants. + * + *

    + * Typically used to validate method arguments rather than configuration + * properties, to check for cases that are usually programmer errors rather than + * configuration errors. In contrast to configuration initialization code, there + * is usually no point in falling back to defaults in such methods. + * + *

    + * This class is similar to JUnit's assertion library. If an argument value is + * deemed invalid, an {@link IllegalArgumentException} is thrown (typically). + * For example: + * + *

    + * Assert.notNull(clazz, "The class must not be null");
    + * Assert.isTrue(i > 0, "The value must be greater than zero");
    + * 
    + * + *

    + * Mainly for internal use within the framework; for a more comprehensive suite + * of assertion utilities consider {@code org.apache.commons.lang3.Validate} + * from Apache Commons + * Lang, Google Guava's Preconditions, + * or similar third-party libraries. + * + * @author Keith Donald + * @author Juergen Hoeller + * @author Sam Brannen + * @author Colin Sampaleanu + * @author Rob Harrop + * @since 1.1.2 + */ +class Assert { + private Assert() { + } + + /** + * Assert a boolean expression, throwing an {@code IllegalStateException} if the + * expression evaluates to {@code false}. + * + *

    +   * Assert.state(id == null, "The id property must not already be initialized");
    +   * 
    + * + * @param expression a boolean expression + * @param message the exception message to use if the assertion fails + * @throws IllegalStateException if {@code expression} is {@code false} + */ + public static void state(boolean expression, String message) { + if (!expression) { + throw new IllegalStateException(message); + } + } + + /** + * Assert a boolean expression, throwing an {@code IllegalArgumentException} if + * the expression evaluates to {@code false}. + * + *
    +   * Assert.isTrue(i > 0, "The value must be greater than zero");
    +   * 
    + * + * @param expression a boolean expression + * @param message the exception message to use if the assertion fails + * @throws IllegalArgumentException if {@code expression} is {@code false} + */ + public static void isTrue(boolean expression, String message) { + if (!expression) { + throw new IllegalArgumentException(message); + } + } + + /** + * Assert that an object is not {@code null}. + * + *
    +   * Assert.notNull(clazz, "The class must not be null");
    +   * 
    + * + * @param object the object to check + * @param message the exception message to use if the assertion fails + * @throws IllegalArgumentException if the object is {@code null} + */ + public static void notNull(@Nullable Object object, String message) { + if (object == null) { + throw new IllegalArgumentException(message); + } + } + +} diff --git a/lang/java/avro/src/main/java/org/apache/avro/util/springframework/ConcurrentReferenceHashMap.java b/lang/java/avro/src/main/java/org/apache/avro/util/springframework/ConcurrentReferenceHashMap.java new file mode 100644 index 00000000000..1a137cf2101 --- /dev/null +++ b/lang/java/avro/src/main/java/org/apache/avro/util/springframework/ConcurrentReferenceHashMap.java @@ -0,0 +1,1111 @@ +/* + * Copyright 2002-2021 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.avro.util.springframework; + +import org.apache.avro.reflect.Nullable; + +import java.lang.ref.ReferenceQueue; +import java.lang.ref.SoftReference; +import java.lang.ref.WeakReference; +import java.lang.reflect.Array; +import java.util.AbstractMap; +import java.util.AbstractSet; +import java.util.Collections; +import java.util.EnumSet; +import java.util.HashSet; +import java.util.Iterator; +import java.util.Map; +import java.util.NoSuchElementException; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.locks.ReentrantLock; + +/** + * A {@link ConcurrentHashMap} that uses {@link ReferenceType#SOFT soft} or + * {@linkplain ReferenceType#WEAK weak} references for both {@code keys} and + * {@code values}. + * + *

    + * This class can be used as an alternative to + * {@code Collections.synchronizedMap(new WeakHashMap>())} in + * order to support better performance when accessed concurrently. This + * implementation follows the same design constraints as + * {@link ConcurrentHashMap} with the exception that {@code null} values and + * {@code null} keys are supported. + * + *

    + * NOTE: The use of references means that there is no guarantee that + * items placed into the map will be subsequently available. The garbage + * collector may discard references at any time, so it may appear that an + * unknown thread is silently removing entries. + * + *

    + * If not explicitly specified, this implementation will use + * {@linkplain SoftReference soft entry references}. + * + * @param the key type + * @param the value type + * @author Phillip Webb + * @author Juergen Hoeller + * @since 3.2 + */ +public class ConcurrentReferenceHashMap extends AbstractMap implements ConcurrentMap { + + private static final int DEFAULT_INITIAL_CAPACITY = 16; + + private static final float DEFAULT_LOAD_FACTOR = 0.75f; + + private static final int DEFAULT_CONCURRENCY_LEVEL = 16; + + private static final ReferenceType DEFAULT_REFERENCE_TYPE = ReferenceType.SOFT; + + private static final int MAXIMUM_CONCURRENCY_LEVEL = 1 << 16; + + private static final int MAXIMUM_SEGMENT_SIZE = 1 << 30; + + /** + * Array of segments indexed using the high order bits from the hash. + */ + private final Segment[] segments; + + /** + * When the average number of references per table exceeds this value resize + * will be attempted. + */ + private final float loadFactor; + + /** + * The reference type: SOFT or WEAK. + */ + private final ReferenceType referenceType; + + /** + * The shift value used to calculate the size of the segments array and an index + * from the hash. + */ + private final int shift; + + /** + * Late binding entry set. + */ + @Nullable + private volatile Set> entrySet; + + /** + * Create a new {@code ConcurrentReferenceHashMap} instance. + */ + public ConcurrentReferenceHashMap() { + this(DEFAULT_INITIAL_CAPACITY, DEFAULT_LOAD_FACTOR, DEFAULT_CONCURRENCY_LEVEL, DEFAULT_REFERENCE_TYPE); + } + + /** + * Create a new {@code ConcurrentReferenceHashMap} instance. + * + * @param initialCapacity the initial capacity of the map + */ + public ConcurrentReferenceHashMap(int initialCapacity) { + this(initialCapacity, DEFAULT_LOAD_FACTOR, DEFAULT_CONCURRENCY_LEVEL, DEFAULT_REFERENCE_TYPE); + } + + /** + * Create a new {@code ConcurrentReferenceHashMap} instance. + * + * @param initialCapacity the initial capacity of the map + * @param loadFactor the load factor. When the average number of references + * per table exceeds this value resize will be attempted + */ + public ConcurrentReferenceHashMap(int initialCapacity, float loadFactor) { + this(initialCapacity, loadFactor, DEFAULT_CONCURRENCY_LEVEL, DEFAULT_REFERENCE_TYPE); + } + + /** + * Create a new {@code ConcurrentReferenceHashMap} instance. + * + * @param initialCapacity the initial capacity of the map + * @param concurrencyLevel the expected number of threads that will concurrently + * write to the map + */ + public ConcurrentReferenceHashMap(int initialCapacity, int concurrencyLevel) { + this(initialCapacity, DEFAULT_LOAD_FACTOR, concurrencyLevel, DEFAULT_REFERENCE_TYPE); + } + + /** + * Create a new {@code ConcurrentReferenceHashMap} instance. + * + * @param initialCapacity the initial capacity of the map + * @param referenceType the reference type used for entries (soft or weak) + */ + public ConcurrentReferenceHashMap(int initialCapacity, ReferenceType referenceType) { + this(initialCapacity, DEFAULT_LOAD_FACTOR, DEFAULT_CONCURRENCY_LEVEL, referenceType); + } + + /** + * Create a new {@code ConcurrentReferenceHashMap} instance. + * + * @param initialCapacity the initial capacity of the map + * @param loadFactor the load factor. When the average number of + * references per table exceeds this value, resize will + * be attempted. + * @param concurrencyLevel the expected number of threads that will concurrently + * write to the map + */ + public ConcurrentReferenceHashMap(int initialCapacity, float loadFactor, int concurrencyLevel) { + this(initialCapacity, loadFactor, concurrencyLevel, DEFAULT_REFERENCE_TYPE); + } + + /** + * Create a new {@code ConcurrentReferenceHashMap} instance. + * + * @param initialCapacity the initial capacity of the map + * @param loadFactor the load factor. When the average number of + * references per table exceeds this value, resize will + * be attempted. + * @param concurrencyLevel the expected number of threads that will concurrently + * write to the map + * @param referenceType the reference type used for entries (soft or weak) + */ + @SuppressWarnings("unchecked") + public ConcurrentReferenceHashMap(int initialCapacity, float loadFactor, int concurrencyLevel, + ReferenceType referenceType) { + + Assert.isTrue(initialCapacity >= 0, "Initial capacity must not be negative"); + Assert.isTrue(loadFactor > 0f, "Load factor must be positive"); + Assert.isTrue(concurrencyLevel > 0, "Concurrency level must be positive"); + Assert.notNull(referenceType, "Reference type must not be null"); + this.loadFactor = loadFactor; + this.shift = calculateShift(concurrencyLevel, MAXIMUM_CONCURRENCY_LEVEL); + int size = 1 << this.shift; + this.referenceType = referenceType; + int roundedUpSegmentCapacity = (int) ((initialCapacity + size - 1L) / size); + int initialSize = 1 << calculateShift(roundedUpSegmentCapacity, MAXIMUM_SEGMENT_SIZE); + Segment[] segments = (Segment[]) Array.newInstance(Segment.class, size); + int resizeThreshold = (int) (initialSize * getLoadFactor()); + for (int i = 0; i < segments.length; i++) { + segments[i] = new Segment(initialSize, resizeThreshold); + } + this.segments = segments; + } + + protected final float getLoadFactor() { + return this.loadFactor; + } + + protected final int getSegmentsSize() { + return this.segments.length; + } + + protected final Segment getSegment(int index) { + return this.segments[index]; + } + + /** + * Factory method that returns the {@link ReferenceManager}. This method will be + * called once for each {@link Segment}. + * + * @return a new reference manager + */ + protected ReferenceManager createReferenceManager() { + return new ReferenceManager(); + } + + /** + * Get the hash for a given object, apply an additional hash function to reduce + * collisions. This implementation uses the same Wang/Jenkins algorithm as + * {@link ConcurrentHashMap}. Subclasses can override to provide alternative + * hashing. + * + * @param o the object to hash (may be null) + * @return the resulting hash code + */ + protected int getHash(@Nullable Object o) { + int hash = (o != null ? o.hashCode() : 0); + hash += (hash << 15) ^ 0xffffcd7d; + hash ^= (hash >>> 10); + hash += (hash << 3); + hash ^= (hash >>> 6); + hash += (hash << 2) + (hash << 14); + hash ^= (hash >>> 16); + return hash; + } + + @Override + @Nullable + public V get(@Nullable Object key) { + Reference ref = getReference(key, Restructure.WHEN_NECESSARY); + Entry entry = (ref != null ? ref.get() : null); + return (entry != null ? entry.getValue() : null); + } + + @Override + @Nullable + public V getOrDefault(@Nullable Object key, @Nullable V defaultValue) { + Reference ref = getReference(key, Restructure.WHEN_NECESSARY); + Entry entry = (ref != null ? ref.get() : null); + return (entry != null ? entry.getValue() : defaultValue); + } + + @Override + public boolean containsKey(@Nullable Object key) { + Reference ref = getReference(key, Restructure.WHEN_NECESSARY); + Entry entry = (ref != null ? ref.get() : null); + return (entry != null && ObjectUtils.nullSafeEquals(entry.getKey(), key)); + } + + /** + * Return a {@link Reference} to the {@link Entry} for the specified + * {@code key}, or {@code null} if not found. + * + * @param key the key (can be {@code null}) + * @param restructure types of restructure allowed during this call + * @return the reference, or {@code null} if not found + */ + @Nullable + protected final Reference getReference(@Nullable Object key, Restructure restructure) { + int hash = getHash(key); + return getSegmentForHash(hash).getReference(key, hash, restructure); + } + + @Override + @Nullable + public V put(@Nullable K key, @Nullable V value) { + return put(key, value, true); + } + + @Override + @Nullable + public V putIfAbsent(@Nullable K key, @Nullable V value) { + return put(key, value, false); + } + + @Nullable + private V put(@Nullable final K key, @Nullable final V value, final boolean overwriteExisting) { + return doTask(key, new Task(TaskOption.RESTRUCTURE_BEFORE, TaskOption.RESIZE) { + @Override + @Nullable + protected V execute(@Nullable Reference ref, @Nullable Entry entry, @Nullable Entries entries) { + if (entry != null) { + V oldValue = entry.getValue(); + if (overwriteExisting) { + entry.setValue(value); + } + return oldValue; + } + Assert.state(entries != null, "No entries segment"); + entries.add(value); + return null; + } + }); + } + + @Override + @Nullable + public V remove(@Nullable Object key) { + return doTask(key, new Task(TaskOption.RESTRUCTURE_AFTER, TaskOption.SKIP_IF_EMPTY) { + @Override + @Nullable + protected V execute(@Nullable Reference ref, @Nullable Entry entry) { + if (entry != null) { + if (ref != null) { + ref.release(); + } + return entry.value; + } + return null; + } + }); + } + + @Override + public boolean remove(@Nullable Object key, final @Nullable Object value) { + Boolean result = doTask(key, new Task(TaskOption.RESTRUCTURE_AFTER, TaskOption.SKIP_IF_EMPTY) { + @Override + protected Boolean execute(@Nullable Reference ref, @Nullable Entry entry) { + if (entry != null && ObjectUtils.nullSafeEquals(entry.getValue(), value)) { + if (ref != null) { + ref.release(); + } + return true; + } + return false; + } + }); + return (Boolean.TRUE.equals(result)); + } + + @Override + public boolean replace(@Nullable K key, final @Nullable V oldValue, final @Nullable V newValue) { + Boolean result = doTask(key, new Task(TaskOption.RESTRUCTURE_BEFORE, TaskOption.SKIP_IF_EMPTY) { + @Override + protected Boolean execute(@Nullable Reference ref, @Nullable Entry entry) { + if (entry != null && ObjectUtils.nullSafeEquals(entry.getValue(), oldValue)) { + entry.setValue(newValue); + return true; + } + return false; + } + }); + return (Boolean.TRUE.equals(result)); + } + + @Override + @Nullable + public V replace(@Nullable K key, final @Nullable V value) { + return doTask(key, new Task(TaskOption.RESTRUCTURE_BEFORE, TaskOption.SKIP_IF_EMPTY) { + @Override + @Nullable + protected V execute(@Nullable Reference ref, @Nullable Entry entry) { + if (entry != null) { + V oldValue = entry.getValue(); + entry.setValue(value); + return oldValue; + } + return null; + } + }); + } + + @Override + public void clear() { + for (Segment segment : this.segments) { + segment.clear(); + } + } + + /** + * Remove any entries that have been garbage collected and are no longer + * referenced. Under normal circumstances garbage collected entries are + * automatically purged as items are added or removed from the Map. This method + * can be used to force a purge, and is useful when the Map is read frequently + * but updated less often. + */ + public void purgeUnreferencedEntries() { + for (Segment segment : this.segments) { + segment.restructureIfNecessary(false); + } + } + + @Override + public int size() { + int size = 0; + for (Segment segment : this.segments) { + size += segment.getCount(); + } + return size; + } + + @Override + public boolean isEmpty() { + for (Segment segment : this.segments) { + if (segment.getCount() > 0) { + return false; + } + } + return true; + } + + @Override + public Set> entrySet() { + Set> entrySet = this.entrySet; + if (entrySet == null) { + entrySet = new EntrySet(); + this.entrySet = entrySet; + } + return entrySet; + } + + @Nullable + private T doTask(@Nullable Object key, Task task) { + int hash = getHash(key); + return getSegmentForHash(hash).doTask(hash, key, task); + } + + private Segment getSegmentForHash(int hash) { + return this.segments[(hash >>> (32 - this.shift)) & (this.segments.length - 1)]; + } + + /** + * Calculate a shift value that can be used to create a power-of-two value + * between the specified maximum and minimum values. + * + * @param minimumValue the minimum value + * @param maximumValue the maximum value + * @return the calculated shift (use {@code 1 << shift} to obtain a value) + */ + protected static int calculateShift(int minimumValue, int maximumValue) { + int shift = 0; + int value = 1; + while (value < minimumValue && value < maximumValue) { + value <<= 1; + shift++; + } + return shift; + } + + /** + * Various reference types supported by this map. + */ + public enum ReferenceType { + + /** + * Use {@link SoftReference SoftReferences}. + */ + SOFT, + + /** + * Use {@link WeakReference WeakReferences}. + */ + WEAK + } + + /** + * A single segment used to divide the map to allow better concurrent + * performance. + */ + @SuppressWarnings("serial") + protected final class Segment extends ReentrantLock { + + private final ReferenceManager referenceManager; + + private final int initialSize; + + /** + * Array of references indexed using the low order bits from the hash. This + * property should only be set along with {@code resizeThreshold}. + */ + private volatile Reference[] references; + + /** + * The total number of references contained in this segment. This includes + * chained references and references that have been garbage collected but not + * purged. + */ + private final AtomicInteger count = new AtomicInteger(); + + /** + * The threshold when resizing of the references should occur. When + * {@code count} exceeds this value references will be resized. + */ + private int resizeThreshold; + + public Segment(int initialSize, int resizeThreshold) { + this.referenceManager = createReferenceManager(); + this.initialSize = initialSize; + this.references = createReferenceArray(initialSize); + this.resizeThreshold = resizeThreshold; + } + + @Nullable + public Reference getReference(@Nullable Object key, int hash, Restructure restructure) { + if (restructure == Restructure.WHEN_NECESSARY) { + restructureIfNecessary(false); + } + if (this.count.get() == 0) { + return null; + } + // Use a local copy to protect against other threads writing + Reference[] references = this.references; + int index = getIndex(hash, references); + Reference head = references[index]; + return findInChain(head, key, hash); + } + + /** + * Apply an update operation to this segment. The segment will be locked during + * the update. + * + * @param hash the hash of the key + * @param key the key + * @param task the update operation + * @return the result of the operation + */ + @Nullable + public T doTask(final int hash, @Nullable final Object key, final Task task) { + boolean resize = task.hasOption(TaskOption.RESIZE); + if (task.hasOption(TaskOption.RESTRUCTURE_BEFORE)) { + restructureIfNecessary(resize); + } + if (task.hasOption(TaskOption.SKIP_IF_EMPTY) && this.count.get() == 0) { + return task.execute(null, null, null); + } + lock(); + try { + final int index = getIndex(hash, this.references); + final Reference head = this.references[index]; + Reference ref = findInChain(head, key, hash); + Entry entry = (ref != null ? ref.get() : null); + Entries entries = value -> { + @SuppressWarnings("unchecked") + Entry newEntry = new Entry<>((K) key, value); + Reference newReference = Segment.this.referenceManager.createReference(newEntry, hash, head); + Segment.this.references[index] = newReference; + Segment.this.count.incrementAndGet(); + }; + return task.execute(ref, entry, entries); + } finally { + unlock(); + if (task.hasOption(TaskOption.RESTRUCTURE_AFTER)) { + restructureIfNecessary(resize); + } + } + } + + /** + * Clear all items from this segment. + */ + public void clear() { + if (this.count.get() == 0) { + return; + } + lock(); + try { + this.references = createReferenceArray(this.initialSize); + this.resizeThreshold = (int) (this.references.length * getLoadFactor()); + this.count.set(0); + } finally { + unlock(); + } + } + + /** + * Restructure the underlying data structure when it becomes necessary. This + * method can increase the size of the references table as well as purge any + * references that have been garbage collected. + * + * @param allowResize if resizing is permitted + */ + private void restructureIfNecessary(boolean allowResize) { + int currCount = this.count.get(); + boolean needsResize = allowResize && (currCount > 0 && currCount >= this.resizeThreshold); + Reference ref = this.referenceManager.pollForPurge(); + if (ref != null || (needsResize)) { + restructure(allowResize, ref); + } + } + + private void restructure(boolean allowResize, @Nullable Reference ref) { + boolean needsResize; + lock(); + try { + int countAfterRestructure = this.count.get(); + Set> toPurge = Collections.emptySet(); + if (ref != null) { + toPurge = new HashSet<>(); + while (ref != null) { + toPurge.add(ref); + ref = this.referenceManager.pollForPurge(); + } + } + countAfterRestructure -= toPurge.size(); + + // Recalculate taking into account count inside lock and items that + // will be purged + needsResize = (countAfterRestructure > 0 && countAfterRestructure >= this.resizeThreshold); + boolean resizing = false; + int restructureSize = this.references.length; + if (allowResize && needsResize && restructureSize < MAXIMUM_SEGMENT_SIZE) { + restructureSize <<= 1; + resizing = true; + } + + // Either create a new table or reuse the existing one + Reference[] restructured = (resizing ? createReferenceArray(restructureSize) : this.references); + + // Restructure + for (int i = 0; i < this.references.length; i++) { + ref = this.references[i]; + if (!resizing) { + restructured[i] = null; + } + while (ref != null) { + if (!toPurge.contains(ref)) { + Entry entry = ref.get(); + if (entry != null) { + int index = getIndex(ref.getHash(), restructured); + restructured[index] = this.referenceManager.createReference(entry, ref.getHash(), restructured[index]); + } + } + ref = ref.getNext(); + } + } + + // Replace volatile members + if (resizing) { + this.references = restructured; + this.resizeThreshold = (int) (this.references.length * getLoadFactor()); + } + this.count.set(Math.max(countAfterRestructure, 0)); + } finally { + unlock(); + } + } + + @Nullable + private Reference findInChain(Reference ref, @Nullable Object key, int hash) { + Reference currRef = ref; + while (currRef != null) { + if (currRef.getHash() == hash) { + Entry entry = currRef.get(); + if (entry != null) { + K entryKey = entry.getKey(); + if (ObjectUtils.nullSafeEquals(entryKey, key)) { + return currRef; + } + } + } + currRef = currRef.getNext(); + } + return null; + } + + @SuppressWarnings({ "unchecked" }) + private Reference[] createReferenceArray(int size) { + return new Reference[size]; + } + + private int getIndex(int hash, Reference[] references) { + return (hash & (references.length - 1)); + } + + /** + * Return the size of the current references array. + */ + public int getSize() { + return this.references.length; + } + + /** + * Return the total number of references in this segment. + */ + public int getCount() { + return this.count.get(); + } + } + + /** + * A reference to an {@link Entry} contained in the map. Implementations are + * usually wrappers around specific Java reference implementations (e.g., + * {@link SoftReference}). + * + * @param the key type + * @param the value type + */ + protected interface Reference { + + /** + * Return the referenced entry, or {@code null} if the entry is no longer + * available. + */ + @Nullable + Entry get(); + + /** + * Return the hash for the reference. + */ + int getHash(); + + /** + * Return the next reference in the chain, or {@code null} if none. + */ + @Nullable + Reference getNext(); + + /** + * Release this entry and ensure that it will be returned from + * {@code ReferenceManager#pollForPurge()}. + */ + void release(); + } + + /** + * A single map entry. + * + * @param the key type + * @param the value type + */ + protected static final class Entry implements Map.Entry { + + @Nullable + private final K key; + + @Nullable + private volatile V value; + + public Entry(@Nullable K key, @Nullable V value) { + this.key = key; + this.value = value; + } + + @Override + @Nullable + public K getKey() { + return this.key; + } + + @Override + @Nullable + public V getValue() { + return this.value; + } + + @Override + @Nullable + public V setValue(@Nullable V value) { + V previous = this.value; + this.value = value; + return previous; + } + + @Override + public String toString() { + return (this.key + "=" + this.value); + } + + @Override + @SuppressWarnings("rawtypes") + public boolean equals(@Nullable Object other) { + if (this == other) { + return true; + } + if (!(other instanceof Map.Entry)) { + return false; + } + Map.Entry otherEntry = (Map.Entry) other; + return (ObjectUtils.nullSafeEquals(getKey(), otherEntry.getKey()) + && ObjectUtils.nullSafeEquals(getValue(), otherEntry.getValue())); + } + + @Override + public int hashCode() { + return (ObjectUtils.nullSafeHashCode(this.key) ^ ObjectUtils.nullSafeHashCode(this.value)); + } + } + + /** + * A task that can be {@link Segment#doTask run} against a {@link Segment}. + */ + private abstract class Task { + + private final EnumSet options; + + public Task(TaskOption... options) { + this.options = (options.length == 0 ? EnumSet.noneOf(TaskOption.class) : EnumSet.of(options[0], options)); + } + + public boolean hasOption(TaskOption option) { + return this.options.contains(option); + } + + /** + * Execute the task. + * + * @param ref the found reference (or {@code null}) + * @param entry the found entry (or {@code null}) + * @param entries access to the underlying entries + * @return the result of the task + * @see #execute(Reference, Entry) + */ + @Nullable + protected T execute(@Nullable Reference ref, @Nullable Entry entry, @Nullable Entries entries) { + return execute(ref, entry); + } + + /** + * Convenience method that can be used for tasks that do not need access to + * {@link Entries}. + * + * @param ref the found reference (or {@code null}) + * @param entry the found entry (or {@code null}) + * @return the result of the task + * @see #execute(Reference, Entry, Entries) + */ + @Nullable + protected T execute(@Nullable Reference ref, @Nullable Entry entry) { + return null; + } + } + + /** + * Various options supported by a {@code Task}. + */ + private enum TaskOption { + + RESTRUCTURE_BEFORE, RESTRUCTURE_AFTER, SKIP_IF_EMPTY, RESIZE + } + + /** + * Allows a task access to {@link Segment} entries. + */ + private interface Entries { + + /** + * Add a new entry with the specified value. + * + * @param value the value to add + */ + void add(@Nullable V value); + } + + /** + * Internal entry-set implementation. + */ + private class EntrySet extends AbstractSet> { + + @Override + public Iterator> iterator() { + return new EntryIterator(); + } + + @Override + public boolean contains(@Nullable Object o) { + if (o instanceof Map.Entry) { + Map.Entry entry = (Map.Entry) o; + Reference ref = ConcurrentReferenceHashMap.this.getReference(entry.getKey(), Restructure.NEVER); + Entry otherEntry = (ref != null ? ref.get() : null); + if (otherEntry != null) { + return ObjectUtils.nullSafeEquals(entry.getValue(), otherEntry.getValue()); + } + } + return false; + } + + @Override + public boolean remove(Object o) { + if (o instanceof Map.Entry) { + Map.Entry entry = (Map.Entry) o; + return ConcurrentReferenceHashMap.this.remove(entry.getKey(), entry.getValue()); + } + return false; + } + + @Override + public int size() { + return ConcurrentReferenceHashMap.this.size(); + } + + @Override + public void clear() { + ConcurrentReferenceHashMap.this.clear(); + } + } + + /** + * Internal entry iterator implementation. + */ + private class EntryIterator implements Iterator> { + + private int segmentIndex; + + private int referenceIndex; + + @Nullable + private Reference[] references; + + @Nullable + private Reference reference; + + @Nullable + private Entry next; + + @Nullable + private Entry last; + + public EntryIterator() { + moveToNextSegment(); + } + + @Override + public boolean hasNext() { + getNextIfNecessary(); + return (this.next != null); + } + + @Override + public Entry next() { + getNextIfNecessary(); + if (this.next == null) { + throw new NoSuchElementException(); + } + this.last = this.next; + this.next = null; + return this.last; + } + + private void getNextIfNecessary() { + while (this.next == null) { + moveToNextReference(); + if (this.reference == null) { + return; + } + this.next = this.reference.get(); + } + } + + private void moveToNextReference() { + if (this.reference != null) { + this.reference = this.reference.getNext(); + } + while (this.reference == null && this.references != null) { + if (this.referenceIndex >= this.references.length) { + moveToNextSegment(); + this.referenceIndex = 0; + } else { + this.reference = this.references[this.referenceIndex]; + this.referenceIndex++; + } + } + } + + private void moveToNextSegment() { + this.reference = null; + this.references = null; + if (this.segmentIndex < ConcurrentReferenceHashMap.this.segments.length) { + this.references = ConcurrentReferenceHashMap.this.segments[this.segmentIndex].references; + this.segmentIndex++; + } + } + + @Override + public void remove() { + Assert.state(this.last != null, "No element to remove"); + ConcurrentReferenceHashMap.this.remove(this.last.getKey()); + this.last = null; + } + } + + /** + * The types of restructuring that can be performed. + */ + protected enum Restructure { + + WHEN_NECESSARY, NEVER + } + + /** + * Strategy class used to manage {@link Reference References}. This class can be + * overridden if alternative reference types need to be supported. + */ + protected class ReferenceManager { + + private final ReferenceQueue> queue = new ReferenceQueue<>(); + + /** + * Factory method used to create a new {@link Reference}. + * + * @param entry the entry contained in the reference + * @param hash the hash + * @param next the next reference in the chain, or {@code null} if none + * @return a new {@link Reference} + */ + public Reference createReference(Entry entry, int hash, @Nullable Reference next) { + if (ConcurrentReferenceHashMap.this.referenceType == ReferenceType.WEAK) { + return new WeakEntryReference<>(entry, hash, next, this.queue); + } + return new SoftEntryReference<>(entry, hash, next, this.queue); + } + + /** + * Return any reference that has been garbage collected and can be purged from + * the underlying structure or {@code null} if no references need purging. This + * method must be thread safe and ideally should not block when returning + * {@code null}. References should be returned once and only once. + * + * @return a reference to purge or {@code null} + */ + @SuppressWarnings("unchecked") + @Nullable + public Reference pollForPurge() { + return (Reference) this.queue.poll(); + } + } + + /** + * Internal {@link Reference} implementation for {@link SoftReference + * SoftReferences}. + */ + private static final class SoftEntryReference extends SoftReference> implements Reference { + + private final int hash; + + @Nullable + private final Reference nextReference; + + public SoftEntryReference(Entry entry, int hash, @Nullable Reference next, + ReferenceQueue> queue) { + + super(entry, queue); + this.hash = hash; + this.nextReference = next; + } + + @Override + public int getHash() { + return this.hash; + } + + @Override + @Nullable + public Reference getNext() { + return this.nextReference; + } + + @Override + public void release() { + enqueue(); + clear(); + } + } + + /** + * Internal {@link Reference} implementation for {@link WeakReference + * WeakReferences}. + */ + private static final class WeakEntryReference extends WeakReference> implements Reference { + + private final int hash; + + @Nullable + private final Reference nextReference; + + public WeakEntryReference(Entry entry, int hash, @Nullable Reference next, + ReferenceQueue> queue) { + + super(entry, queue); + this.hash = hash; + this.nextReference = next; + } + + @Override + public int getHash() { + return this.hash; + } + + @Override + @Nullable + public Reference getNext() { + return this.nextReference; + } + + @Override + public void release() { + enqueue(); + clear(); + } + } + +} diff --git a/lang/java/avro/src/main/java/org/apache/avro/util/springframework/ObjectUtils.java b/lang/java/avro/src/main/java/org/apache/avro/util/springframework/ObjectUtils.java new file mode 100644 index 00000000000..a8e0c45180e --- /dev/null +++ b/lang/java/avro/src/main/java/org/apache/avro/util/springframework/ObjectUtils.java @@ -0,0 +1,320 @@ +/* + * Copyright 2002-2021 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.avro.util.springframework; + +import org.apache.avro.reflect.Nullable; +import org.apache.avro.util.ClassUtils; + +import java.util.Arrays; + +/** + * Miscellaneous object utility methods. + * + *

    + * Mainly for internal use within the framework. + * + *

    + * Thanks to Alex Ruiz for contributing several enhancements to this class! + * + * @author Juergen Hoeller + * @author Keith Donald + * @author Rod Johnson + * @author Rob Harrop + * @author Chris Beams + * @author Sam Brannen + * @see ClassUtils see CollectionUtils see StringUtils + * @since 19.03.2004 + */ +class ObjectUtils { + private ObjectUtils() { + } + + private static final int INITIAL_HASH = 7; + private static final int MULTIPLIER = 31; + + /** + * Determine whether the given array is empty: i.e. {@code null} or of zero + * length. + * + * @param array the array to check + */ + public static boolean isEmpty(@Nullable Object[] array) { + return (array == null || array.length == 0); + } + + // --------------------------------------------------------------------- + // Convenience methods for content-based equality/hash-code handling + // --------------------------------------------------------------------- + + /** + * Determine if the given objects are equal, returning {@code true} if both are + * {@code null} or {@code false} if only one is {@code null}. + *

    + * Compares arrays with {@code Arrays.equals}, performing an equality check + * based on the array elements rather than the array reference. + * + * @param o1 first Object to compare + * @param o2 second Object to compare + * @return whether the given objects are equal + * @see Object#equals(Object) + * @see Arrays#equals + */ + public static boolean nullSafeEquals(@Nullable Object o1, @Nullable Object o2) { + if (o1 == o2) { + return true; + } + if (o1 == null || o2 == null) { + return false; + } + if (o1.equals(o2)) { + return true; + } + if (o1.getClass().isArray() && o2.getClass().isArray()) { + return arrayEquals(o1, o2); + } + return false; + } + + /** + * Compare the given arrays with {@code Arrays.equals}, performing an equality + * check based on the array elements rather than the array reference. + * + * @param o1 first array to compare + * @param o2 second array to compare + * @return whether the given objects are equal + * @see #nullSafeEquals(Object, Object) + * @see Arrays#equals + */ + private static boolean arrayEquals(Object o1, Object o2) { + if (o1 instanceof Object[] && o2 instanceof Object[]) { + return Arrays.equals((Object[]) o1, (Object[]) o2); + } + if (o1 instanceof boolean[] && o2 instanceof boolean[]) { + return Arrays.equals((boolean[]) o1, (boolean[]) o2); + } + if (o1 instanceof byte[] && o2 instanceof byte[]) { + return Arrays.equals((byte[]) o1, (byte[]) o2); + } + if (o1 instanceof char[] && o2 instanceof char[]) { + return Arrays.equals((char[]) o1, (char[]) o2); + } + if (o1 instanceof double[] && o2 instanceof double[]) { + return Arrays.equals((double[]) o1, (double[]) o2); + } + if (o1 instanceof float[] && o2 instanceof float[]) { + return Arrays.equals((float[]) o1, (float[]) o2); + } + if (o1 instanceof int[] && o2 instanceof int[]) { + return Arrays.equals((int[]) o1, (int[]) o2); + } + if (o1 instanceof long[] && o2 instanceof long[]) { + return Arrays.equals((long[]) o1, (long[]) o2); + } + if (o1 instanceof short[] && o2 instanceof short[]) { + return Arrays.equals((short[]) o1, (short[]) o2); + } + return false; + } + + /** + * Return as hash code for the given object; typically the value of + * {@code Object#hashCode()}}. If the object is an array, this method will + * delegate to any of the {@code nullSafeHashCode} methods for arrays in this + * class. If the object is {@code null}, this method returns 0. + * + * @see Object#hashCode() + * @see #nullSafeHashCode(Object[]) + * @see #nullSafeHashCode(boolean[]) + * @see #nullSafeHashCode(byte[]) + * @see #nullSafeHashCode(char[]) + * @see #nullSafeHashCode(double[]) + * @see #nullSafeHashCode(float[]) + * @see #nullSafeHashCode(int[]) + * @see #nullSafeHashCode(long[]) + * @see #nullSafeHashCode(short[]) + */ + public static int nullSafeHashCode(@Nullable Object obj) { + if (obj == null) { + return 0; + } + if (obj.getClass().isArray()) { + if (obj instanceof Object[]) { + return nullSafeHashCode((Object[]) obj); + } + if (obj instanceof boolean[]) { + return nullSafeHashCode((boolean[]) obj); + } + if (obj instanceof byte[]) { + return nullSafeHashCode((byte[]) obj); + } + if (obj instanceof char[]) { + return nullSafeHashCode((char[]) obj); + } + if (obj instanceof double[]) { + return nullSafeHashCode((double[]) obj); + } + if (obj instanceof float[]) { + return nullSafeHashCode((float[]) obj); + } + if (obj instanceof int[]) { + return nullSafeHashCode((int[]) obj); + } + if (obj instanceof long[]) { + return nullSafeHashCode((long[]) obj); + } + if (obj instanceof short[]) { + return nullSafeHashCode((short[]) obj); + } + } + return obj.hashCode(); + } + + /** + * Return a hash code based on the contents of the specified array. If + * {@code array} is {@code null}, this method returns 0. + */ + public static int nullSafeHashCode(@Nullable Object[] array) { + if (array == null) { + return 0; + } + int hash = INITIAL_HASH; + for (Object element : array) { + hash = MULTIPLIER * hash + nullSafeHashCode(element); + } + return hash; + } + + /** + * Return a hash code based on the contents of the specified array. If + * {@code array} is {@code null}, this method returns 0. + */ + public static int nullSafeHashCode(@Nullable boolean[] array) { + if (array == null) { + return 0; + } + int hash = INITIAL_HASH; + for (boolean element : array) { + hash = MULTIPLIER * hash + Boolean.hashCode(element); + } + return hash; + } + + /** + * Return a hash code based on the contents of the specified array. If + * {@code array} is {@code null}, this method returns 0. + */ + public static int nullSafeHashCode(@Nullable byte[] array) { + if (array == null) { + return 0; + } + int hash = INITIAL_HASH; + for (byte element : array) { + hash = MULTIPLIER * hash + element; + } + return hash; + } + + /** + * Return a hash code based on the contents of the specified array. If + * {@code array} is {@code null}, this method returns 0. + */ + public static int nullSafeHashCode(@Nullable char[] array) { + if (array == null) { + return 0; + } + int hash = INITIAL_HASH; + for (char element : array) { + hash = MULTIPLIER * hash + element; + } + return hash; + } + + /** + * Return a hash code based on the contents of the specified array. If + * {@code array} is {@code null}, this method returns 0. + */ + public static int nullSafeHashCode(@Nullable double[] array) { + if (array == null) { + return 0; + } + int hash = INITIAL_HASH; + for (double element : array) { + hash = MULTIPLIER * hash + Double.hashCode(element); + } + return hash; + } + + /** + * Return a hash code based on the contents of the specified array. If + * {@code array} is {@code null}, this method returns 0. + */ + public static int nullSafeHashCode(@Nullable float[] array) { + if (array == null) { + return 0; + } + int hash = INITIAL_HASH; + for (float element : array) { + hash = MULTIPLIER * hash + Float.hashCode(element); + } + return hash; + } + + /** + * Return a hash code based on the contents of the specified array. If + * {@code array} is {@code null}, this method returns 0. + */ + public static int nullSafeHashCode(@Nullable int[] array) { + if (array == null) { + return 0; + } + int hash = INITIAL_HASH; + for (int element : array) { + hash = MULTIPLIER * hash + element; + } + return hash; + } + + /** + * Return a hash code based on the contents of the specified array. If + * {@code array} is {@code null}, this method returns 0. + */ + public static int nullSafeHashCode(@Nullable long[] array) { + if (array == null) { + return 0; + } + int hash = INITIAL_HASH; + for (long element : array) { + hash = MULTIPLIER * hash + Long.hashCode(element); + } + return hash; + } + + /** + * Return a hash code based on the contents of the specified array. If + * {@code array} is {@code null}, this method returns 0. + */ + public static int nullSafeHashCode(@Nullable short[] array) { + if (array == null) { + return 0; + } + int hash = INITIAL_HASH; + for (short element : array) { + hash = MULTIPLIER * hash + element; + } + return hash; + } +} diff --git a/lang/java/avro/src/main/resources/META-INF/services/org.apache.avro.SchemaFormatterFactory b/lang/java/avro/src/main/resources/META-INF/services/org.apache.avro.SchemaFormatterFactory new file mode 100644 index 00000000000..06f140bde45 --- /dev/null +++ b/lang/java/avro/src/main/resources/META-INF/services/org.apache.avro.SchemaFormatterFactory @@ -0,0 +1,19 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +org.apache.avro.JsonSchemaFormatterFactory +org.apache.avro.CanonicalSchemaFormatterFactory diff --git a/lang/java/avro/src/test/java/org/apache/avro/CustomType.java b/lang/java/avro/src/test/java/org/apache/avro/CustomType.java new file mode 100644 index 00000000000..140ac901b0b --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/CustomType.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.avro; + +import java.util.Objects; + +public final class CustomType { + private final String name; + + public CustomType(CharSequence name) { + this.name = name.toString(); + } + + public String getName() { + return name; + } + + @Override + public int hashCode() { + return Objects.hashCode(name); + } + + @Override + public boolean equals(Object obj) { + return obj instanceof CustomType && name.equals(((CustomType) obj).name); + } + + @Override + public String toString() { + return "CustomType{name='" + name + "'}"; + } +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/CustomTypeConverter.java b/lang/java/avro/src/test/java/org/apache/avro/CustomTypeConverter.java new file mode 100644 index 00000000000..de8fea02ca4 --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/CustomTypeConverter.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.avro; + +public class CustomTypeConverter extends Conversion { + private static final CustomTypeLogicalTypeFactory logicalTypeFactory = new CustomTypeLogicalTypeFactory(); + + @Override + public Class getConvertedType() { + return CustomType.class; + } + + @Override + public String getLogicalTypeName() { + return logicalTypeFactory.getTypeName(); + } + + @Override + public Schema getRecommendedSchema() { + return Schema.create(Schema.Type.STRING); + } + + @Override + public CustomType fromCharSequence(CharSequence value, Schema schema, LogicalType type) { + return new CustomType(value); + } + + @Override + public CharSequence toCharSequence(CustomType value, Schema schema, LogicalType type) { + return value.getName(); + } +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/CustomTypeLogicalTypeFactory.java b/lang/java/avro/src/test/java/org/apache/avro/CustomTypeLogicalTypeFactory.java new file mode 100644 index 00000000000..3e121e0242c --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/CustomTypeLogicalTypeFactory.java @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro; + +public class CustomTypeLogicalTypeFactory implements LogicalTypes.LogicalTypeFactory { + @Override + public LogicalType fromSchema(Schema schema) { + return new LogicalType(getTypeName()); + } + + @Override + public String getTypeName() { + return "custom"; + } +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/DummySchemaParser.java b/lang/java/avro/src/test/java/org/apache/avro/DummySchemaParser.java new file mode 100644 index 00000000000..db7dc640521 --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/DummySchemaParser.java @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.net.URI; + +public class DummySchemaParser implements FormattedSchemaParser { + /** + * Logger for this class. + */ + private static final Logger LOGGER = LoggerFactory.getLogger(DummySchemaParser.class); + public static final String SCHEMA_TEXT_ONE = "one"; + public static final Schema FIXED_SCHEMA = Schema.createFixed("DummyOne", null, "tests", 42); + public static final String SCHEMA_TEXT_ERROR = "error"; + public static final String SCHEMA_TEXT_IO_ERROR = "io-error"; + public static final String ERROR_MESSAGE = "Syntax error"; + public static final String IO_ERROR_MESSAGE = "I/O error"; + + @Override + public Schema parse(ParseContext parseContext, URI baseUri, CharSequence formattedSchema) + throws IOException, SchemaParseException { + LOGGER.debug("Using DummySchemaParser for {}", formattedSchema); + if (SCHEMA_TEXT_ONE.contentEquals(formattedSchema)) { + parseContext.put(FIXED_SCHEMA); + return FIXED_SCHEMA; + } else if (SCHEMA_TEXT_ERROR.contentEquals(formattedSchema)) { + throw new SchemaParseException(ERROR_MESSAGE); + } else if (SCHEMA_TEXT_IO_ERROR.contentEquals(formattedSchema)) { + throw new IOException(IO_ERROR_MESSAGE); + } + // Syntax not recognized + return null; + } +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/ParseContextTest.java b/lang/java/avro/src/test/java/org/apache/avro/ParseContextTest.java new file mode 100644 index 00000000000..d40a6cc9d83 --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/ParseContextTest.java @@ -0,0 +1,129 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro; + +import org.apache.avro.util.SchemaResolver; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import java.util.EnumSet; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotEquals; +import static org.junit.jupiter.api.Assertions.assertNotSame; +import static org.junit.jupiter.api.Assertions.assertSame; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +public class ParseContextTest { + Schema fooRecord, fooRecordCopy, barEnum, bazFixed, mehRecord; + ParseContext fooBarBaz; + + @BeforeEach + public void setUp() throws Exception { + fooRecord = SchemaBuilder.record("ns.Foo").fields().endRecord(); + fooRecordCopy = SchemaBuilder.record("ns.Foo").fields().endRecord(); + barEnum = SchemaBuilder.enumeration("ns.Bar").symbols(); + bazFixed = SchemaBuilder.fixed("ns.Baz").size(8); + mehRecord = SchemaBuilder.record("ns.Meh").fields().endRecord(); + + fooBarBaz = new ParseContext(); + fooBarBaz.put(fooRecord); + fooBarBaz.put(barEnum); + fooBarBaz.put(bazFixed); + } + + @Test + public void checkNewNameContextContainsPrimitives() { + EnumSet complexTypes = EnumSet.of(Schema.Type.RECORD, Schema.Type.ENUM, Schema.Type.FIXED, + Schema.Type.UNION, Schema.Type.ARRAY, Schema.Type.MAP); + EnumSet primitives = EnumSet.complementOf(complexTypes); + + ParseContext context = new ParseContext(); + for (Schema.Type type : complexTypes) { + assertFalse(context.contains(type.getName())); + } + for (Schema.Type type : primitives) { + assertTrue(context.contains(type.getName())); + } + } + + @Test + public void primitivesAreNotCached() { + EnumSet primitives = EnumSet.complementOf(EnumSet.of(Schema.Type.RECORD, Schema.Type.ENUM, + Schema.Type.FIXED, Schema.Type.UNION, Schema.Type.ARRAY, Schema.Type.MAP)); + + ParseContext context = new ParseContext(); + for (Schema.Type type : primitives) { + Schema first = context.find(type.getName(), null); + Schema second = context.find(type.getName(), null); + assertEquals(first, second); + assertNotSame(first, second); + + first.addProp("logicalType", "brick"); + assertNotEquals(first, second); + } + } + + @Test + public void validateSchemaRetrievalFailure() { + Schema unknown = Schema.createFixed("unknown", null, null, 0); + + Schema unresolved = fooBarBaz.find("unknown", null); + assertTrue(SchemaResolver.isUnresolvedSchema(unresolved)); + assertEquals(unknown.getFullName(), SchemaResolver.getUnresolvedSchemaName(unresolved)); + } + + @Test + public void validateSchemaRetrievalByFullName() { + assertSame(fooRecord, fooBarBaz.find(fooRecord.getFullName(), null)); + } + + @Test + public void validateSchemaRetrievalBySimpleName() { + assertSame(fooRecord, fooBarBaz.find(fooRecord.getName(), fooRecord.getNamespace())); + } + + @Test + public void verifyPutIsIdempotent() { + ParseContext context = new ParseContext(); + assertNotEquals(fooRecord, context.find(fooRecord.getFullName(), null)); + + context.put(fooRecord); + assertEquals(fooRecord, context.find(fooRecord.getFullName(), null)); + + context.put(fooRecord); + assertEquals(fooRecord, context.find(fooRecord.getFullName(), null)); + } + + @Test + public void verifyPutOnlyAcceptsNamedSchemas() { + ParseContext context = new ParseContext(); + assertThrows(AvroRuntimeException.class, () -> context.put(Schema.create(Schema.Type.STRING))); + } + + @Test + public void verifyAddDoesNotAllowChangingSchemas() { + Schema fooEnum = SchemaBuilder.enumeration("ns.Foo").symbols(); + + ParseContext context = new ParseContext(); + context.put(fooRecord); + assertThrows(AvroRuntimeException.class, () -> context.put(fooEnum)); + } +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/SchemaFormatterTest.java b/lang/java/avro/src/test/java/org/apache/avro/SchemaFormatterTest.java new file mode 100644 index 00000000000..00b76e28b94 --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/SchemaFormatterTest.java @@ -0,0 +1,88 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro; + +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.*; + +class SchemaFormatterTest { + + @Test + void validateDefaultNaming() { + assertEquals("json", new JsonSchemaFormatterFactory().formatName()); + assertThrows(AvroRuntimeException.class, () -> new Wrongly_Named_SchemaFormatterFactory().formatName()); + assertThrows(AvroRuntimeException.class, () -> new SchemaFormatterFactoryWithOddName().formatName()); + } + + @Test + void validateJsonFormatDefaultsToPrettyPrinting() { + Schema schema = Schema.createFixed("ns.Fixed", null, null, 16); + assertEquals(SchemaFormatter.format("json", schema), SchemaFormatter.format("json/pretty", schema)); + } + + @Test + void validateSupportForPrettyJsonFormat() { + Schema schema = Schema.createFixed("ns.Fixed", null, null, 16); + assertEquals("{\n \"type\" : \"fixed\",\n \"name\" : \"Fixed\",\n \"namespace\" : \"ns\",\n \"size\" : 16\n}", + SchemaFormatter.format("json/pretty", schema)); + } + + @Test + void validateSupportForInlineJsonFormat() { + Schema schema = Schema.createFixed("ns.Fixed", null, null, 16); + assertEquals("{\"type\":\"fixed\",\"name\":\"Fixed\",\"namespace\":\"ns\",\"size\":16}", + SchemaFormatter.format("json/inline", schema)); + } + + @Test + void checkThatJsonHasNoExtraVariant() { + assertThrows(AvroRuntimeException.class, () -> SchemaFormatter.getInstance("json/extra")); + } + + @Test + void validateSupportForCanonicalFormat() { + Schema schema = Schema.createFixed("Fixed", "Another test", "ns", 16); + assertEquals("{\"name\":\"ns.Fixed\",\"type\":\"fixed\",\"size\":16}", SchemaFormatter.format("canonical", schema)); + } + + @Test + void checkThatCanonicalFormHasNoVariants() { + assertThrows(AvroRuntimeException.class, () -> SchemaFormatter.getInstance("canonical/foo")); + } + + @Test + void checkExceptionForMissingFormat() { + assertThrows(AvroRuntimeException.class, () -> SchemaFormatter.getInstance("unknown")); + } + + private static class Wrongly_Named_SchemaFormatterFactory implements SchemaFormatterFactory { + + @Override + public SchemaFormatter getDefaultFormatter() { + return null; + } + } + + private static class SchemaFormatterFactoryWithOddName implements SchemaFormatterFactory { + @Override + public SchemaFormatter getDefaultFormatter() { + return null; + } + } +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/SchemaNameValidatorTest.java b/lang/java/avro/src/test/java/org/apache/avro/SchemaNameValidatorTest.java new file mode 100644 index 00000000000..871c172875d --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/SchemaNameValidatorTest.java @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + +import java.util.stream.Stream; + +class SchemaNameValidatorTest { + + @ParameterizedTest + @MethodSource("data") + void validator(NameValidator validator, String input, boolean expectedResult) { + NameValidator.Result result = validator.validate(input); + Assertions.assertEquals(expectedResult, result.isOK(), result.getErrors()); + } + + static Stream data() { + return Stream.of(Arguments.of(NameValidator.UTF_VALIDATOR, null, false), // null not accepted + Arguments.of(NameValidator.STRICT_VALIDATOR, null, false), // null not accepted + Arguments.of(NameValidator.UTF_VALIDATOR, "", false), // empty not accepted + Arguments.of(NameValidator.STRICT_VALIDATOR, "", false), // empty not accepted + Arguments.of(NameValidator.UTF_VALIDATOR, "Hello world", false), // space not accepted + Arguments.of(NameValidator.STRICT_VALIDATOR, "Hello world", false), // space not accepted + Arguments.of(NameValidator.UTF_VALIDATOR, "H&", false), // non letter or digit not accepted + Arguments.of(NameValidator.STRICT_VALIDATOR, "H&", false), // non letter or digit not accepted + Arguments.of(NameValidator.UTF_VALIDATOR, "H=", false), // non letter or digit not accepted + Arguments.of(NameValidator.STRICT_VALIDATOR, "H=", false), // non letter or digit not accepted + Arguments.of(NameValidator.UTF_VALIDATOR, "H]", false), // non letter or digit not accepted + Arguments.of(NameValidator.STRICT_VALIDATOR, "H]", false), // non letter or digit not accepted + Arguments.of(NameValidator.UTF_VALIDATOR, "Hello_world", true), + Arguments.of(NameValidator.STRICT_VALIDATOR, "Hello_world", true), + Arguments.of(NameValidator.UTF_VALIDATOR, "Êàçô", true), // Accept accent + Arguments.of(NameValidator.STRICT_VALIDATOR, "Êàçô", false), // Not Accept accent + Arguments.of(NameValidator.UTF_VALIDATOR, "5Êàçô", false), // can't start with number + Arguments.of(NameValidator.STRICT_VALIDATOR, "5Êàçô", false), // can't start with number + Arguments.of(NameValidator.UTF_VALIDATOR, "_Hello_world", true), + Arguments.of(NameValidator.STRICT_VALIDATOR, "_Hello_world", true)); + } + +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestBigDecimalConversion.java b/lang/java/avro/src/test/java/org/apache/avro/TestBigDecimalConversion.java new file mode 100644 index 00000000000..e781fe07bd9 --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/TestBigDecimalConversion.java @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package org.apache.avro; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + +import java.math.BigDecimal; +import java.nio.ByteBuffer; +import java.util.Iterator; +import java.util.NoSuchElementException; +import java.util.Spliterator; +import java.util.Spliterators; +import java.util.stream.Stream; +import java.util.stream.StreamSupport; + +public class TestBigDecimalConversion { + + private Conversion conversion = new Conversions.BigDecimalConversion(); + + private final LogicalType bigDecimal = LogicalTypes.bigDecimal(); + + private Schema bytesSchema = conversion.getRecommendedSchema(); + + @ParameterizedTest + @MethodSource("listBigDecimal") + void bigdec(BigDecimal d1) { + ByteBuffer d1bytes = conversion.toBytes(d1, bytesSchema, bigDecimal); + BigDecimal decimal1 = conversion.fromBytes(d1bytes, bytesSchema, bigDecimal); + Assertions.assertEquals(decimal1, d1); + } + + static Stream listBigDecimal() { + Iterator iterator = new Iterator() { + int index = 0; + + BigDecimal step = new BigDecimal(-2.7d); + + BigDecimal current = new BigDecimal(1.0d); + + @Override + public boolean hasNext() { + if (index == 50) { + // test small bigdecimal + current = new BigDecimal(1.0d); + step = new BigDecimal(-0.71d); + } + return index < 100; + } + + @Override + public BigDecimal next() { + if (!hasNext()) { + throw new NoSuchElementException(); + } + index++; + current = current.multiply(step); + return current; + } + }; + return StreamSupport.stream(Spliterators.spliteratorUnknownSize(iterator, Spliterator.ORDERED), false) + .map(Arguments::of); + + } + +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestCircularReferences.java b/lang/java/avro/src/test/java/org/apache/avro/TestCircularReferences.java index c3aa5a61063..6777722fdf2 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestCircularReferences.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestCircularReferences.java @@ -18,6 +18,9 @@ package org.apache.avro; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + import java.io.File; import java.io.IOException; import java.util.ArrayList; @@ -34,16 +37,14 @@ import org.apache.avro.io.DatumReader; import org.apache.avro.io.DatumWriter; import org.apache.avro.util.Utf8; -import org.junit.Assert; -import org.junit.BeforeClass; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; public class TestCircularReferences { - @Rule - public TemporaryFolder temp = new TemporaryFolder(); + @TempDir + public File temp; public static class Reference extends LogicalType { private static final String REFERENCE = "reference"; @@ -152,7 +153,7 @@ public String getTypeName() { } } - @BeforeClass + @BeforeAll public static void addReferenceTypes() { LogicalTypes.register(Referenceable.REFERENCEABLE, new ReferenceableTypeFactory()); LogicalTypes.register(Reference.REFERENCE, new ReferenceTypeFactory()); @@ -303,7 +304,7 @@ public Schema getSchema() { } @Test - public void test() throws IOException { + void test() throws IOException { ReferenceManager manager = new ReferenceManager(); GenericData model = new GenericData(); model.addLogicalTypeConversion(manager.getTracker()); @@ -348,17 +349,17 @@ public void test() throws IOException { Record actual = records.get(0); // because the record is a recursive structure, equals won't work - Assert.assertEquals("Should correctly read back the parent id", 1L, actual.get("id")); - Assert.assertEquals("Should correctly read back the parent data", new Utf8("parent data!"), actual.get("p")); + assertEquals(1L, actual.get("id"), "Should correctly read back the parent id"); + assertEquals(new Utf8("parent data!"), actual.get("p"), "Should correctly read back the parent data"); Record actualChild = (Record) actual.get("child"); - Assert.assertEquals("Should correctly read back the child data", new Utf8("child data!"), actualChild.get("c")); + assertEquals(new Utf8("child data!"), actualChild.get("c"), "Should correctly read back the child data"); Object childParent = actualChild.get("parent"); - Assert.assertTrue("Should have a parent Record object", childParent instanceof Record); + assertTrue(childParent instanceof Record, "Should have a parent Record object"); Record childParentRecord = (Record) actualChild.get("parent"); - Assert.assertEquals("Should have the right parent id", 1L, childParentRecord.get("id")); - Assert.assertEquals("Should have the right parent data", new Utf8("parent data!"), childParentRecord.get("p")); + assertEquals(1L, childParentRecord.get("id"), "Should have the right parent id"); + assertEquals(new Utf8("parent data!"), childParentRecord.get("p"), "Should have the right parent data"); } private List read(GenericData model, Schema schema, File file) throws IOException { @@ -381,7 +382,7 @@ private DatumReader newReader(GenericData model, Schema schema) { @SuppressWarnings("unchecked") private File write(GenericData model, Schema schema, D... data) throws IOException { - File file = temp.newFile(); + File file = File.createTempFile("junit", null, temp); DatumWriter writer = model.createDatumWriter(schema); try (DataFileWriter fileWriter = new DataFileWriter<>(writer)) { diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestDataFile.java b/lang/java/avro/src/test/java/org/apache/avro/TestDataFile.java index a5c0dec3efe..e411ab0effa 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestDataFile.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestDataFile.java @@ -17,17 +17,6 @@ */ package org.apache.avro; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertNull; -import static org.junit.Assert.assertTrue; - -import java.io.ByteArrayOutputStream; -import java.io.File; -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.Random; import org.apache.avro.file.CodecFactory; import org.apache.avro.file.DataFileReader; import org.apache.avro.file.DataFileStream; @@ -38,33 +27,40 @@ import org.apache.avro.generic.GenericData; import org.apache.avro.generic.GenericDatumReader; import org.apache.avro.generic.GenericDatumWriter; +import org.apache.avro.io.BinaryEncoder; import org.apache.avro.io.DatumReader; +import org.apache.avro.io.EncoderFactory; import org.apache.avro.util.RandomData; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; -import org.junit.runners.Parameterized.Parameters; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -@RunWith(Parameterized.class) -public class TestDataFile { - private static final Logger LOG = LoggerFactory.getLogger(TestDataFile.class); +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.IOException; +import java.io.OutputStream; +import java.util.ArrayList; +import java.util.List; +import java.util.Random; +import java.util.function.Function; +import java.util.stream.Stream; - @Rule - public TemporaryFolder DIR = new TemporaryFolder(); +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertTrue; - private final CodecFactory codec; +public class TestDataFile { + private static final Logger LOG = LoggerFactory.getLogger(TestDataFile.class); - public TestDataFile(CodecFactory codec) { - this.codec = codec; - LOG.info("Running with codec: " + codec); - } + @TempDir + public File DIR; - @Parameters - public static List codecs() { + public static Stream codecs() { List r = new ArrayList<>(); r.add(new Object[] { null }); r.add(new Object[] { CodecFactory.deflateCodec(0) }); @@ -81,7 +77,7 @@ public static List codecs() { r.add(new Object[] { CodecFactory.zstandardCodec(18, true) }); r.add(new Object[] { CodecFactory.zstandardCodec(0, false, false) }); r.add(new Object[] { CodecFactory.zstandardCodec(0, false, true) }); - return r; + return r.stream().map(Arguments::of); } private static final int COUNT = Integer.parseInt(System.getProperty("test.count", "200")); @@ -91,29 +87,50 @@ public static List codecs() { private static final String SCHEMA_JSON = "{\"type\": \"record\", \"name\": \"Test\", \"fields\": [" + "{\"name\":\"stringField\", \"type\":\"string\"}," + "{\"name\":\"longField\", \"type\":\"long\"}]}"; private static final Schema SCHEMA = new Schema.Parser().parse(SCHEMA_JSON); + private static final Object LAST_RECORD; + static { + Object lastValue = null; + for (Object object : new RandomData(SCHEMA, COUNT, SEED)) { + lastValue = object; + } + LAST_RECORD = lastValue; + } - private File makeFile() { - return new File(DIR.getRoot().getPath(), "test-" + codec + ".avro"); + private File makeFile(CodecFactory codec) { + return new File(DIR, "test-" + codec + ".avro"); } - @Test - public void runTestsInOrder() throws Exception { - testGenericWrite(); - testGenericRead(); - testSplits(); - testSyncDiscovery(); - testGenericAppend(); - testReadWithHeader(); - testFSync(false); - testFSync(true); + @ParameterizedTest + @MethodSource("codecs") + public void runTestsInOrder(CodecFactory codec) throws Exception { + // Run for both encoders, but the MethodSource didn't really like it, + // so it is just a loop within the test + List> encoders = new ArrayList<>(); + encoders.add(b -> new EncoderFactory().directBinaryEncoder(b, null)); + encoders.add(b -> new EncoderFactory().blockingDirectBinaryEncoder(b, null)); + + for (Function encoder : encoders) { + LOG.info("Running with codec: {}", codec); + testGenericWrite(codec, encoder); + testGenericRead(codec); + testSplits(codec); + testSyncDiscovery(codec); + testReadLastRecord(codec); + testGenericAppend(codec, encoder); + testReadWithHeader(codec); + testFSync(codec, encoder, false); + testFSync(codec, encoder, true); + } } - private void testGenericWrite() throws IOException { + private void testGenericWrite(CodecFactory codec, Function encoderFunc) + throws IOException { DataFileWriter writer = new DataFileWriter<>(new GenericDatumWriter<>()).setSyncInterval(100); if (codec != null) { writer.setCodec(codec); } - writer.create(SCHEMA, makeFile()); + writer.setEncoder(encoderFunc); + writer.create(SCHEMA, makeFile(codec)); try { int count = 0; for (Object datum : new RandomData(SCHEMA, COUNT, SEED)) { @@ -132,7 +149,7 @@ private void testGenericWrite() throws IOException { } catch (DataFileWriter.AppendWriteException e) { System.out.println("Ignoring: " + e); } - assertTrue("failed to throw when expected", threwProperly); + assertTrue(threwProperly, "failed to throw when expected"); } } } finally { @@ -148,11 +165,11 @@ private void testGenericWrite() throws IOException { doubleCloseEx = e; } - assertNull("Double close() threw an unexpected exception", doubleCloseEx); + assertNull(doubleCloseEx, "Double close() threw an unexpected exception"); } - private void testGenericRead() throws IOException { - try (DataFileReader reader = new DataFileReader<>(makeFile(), new GenericDatumReader<>())) { + private void testGenericRead(CodecFactory codec) throws IOException { + try (DataFileReader reader = new DataFileReader<>(makeFile(codec), new GenericDatumReader<>())) { Object datum = null; if (VALIDATE) { for (Object expected : new RandomData(SCHEMA, COUNT, SEED)) { @@ -167,8 +184,8 @@ private void testGenericRead() throws IOException { } } - private void testSplits() throws IOException { - File file = makeFile(); + private void testSplits(CodecFactory codec) throws IOException { + File file = makeFile(codec); try (DataFileReader reader = new DataFileReader<>(file, new GenericDatumReader<>())) { Random rand = new Random(SEED); int splits = 10; // number of splits @@ -190,8 +207,8 @@ private void testSplits() throws IOException { } } - private void testSyncDiscovery() throws IOException { - File file = makeFile(); + private void testSyncDiscovery(CodecFactory codec) throws IOException { + File file = makeFile(codec); try (DataFileReader reader = new DataFileReader<>(file, new GenericDatumReader<>())) { // discover the sync points ArrayList syncs = new ArrayList<>(); @@ -211,13 +228,46 @@ private void testSyncDiscovery() throws IOException { reader.seek(sync); assertNotNull(reader.next()); } + // Lastly, confirm that reading (but not decoding) all blocks results in the + // same sync points + reader.sync(0); + ArrayList syncs2 = new ArrayList<>(); + while (reader.hasNext()) { + syncs2.add(reader.previousSync()); + reader.nextBlock(); + } + assertEquals(syncs, syncs2); + } + } + + private void testReadLastRecord(CodecFactory codec) throws IOException { + File file = makeFile(codec); + try (DataFileReader reader = new DataFileReader<>(file, new GenericDatumReader<>())) { + long lastBlockStart = -1; + while (reader.hasNext()) { + // This algorithm can be made more efficient by checking if the underlying + // SeekableFileInput has been fully read: if so, the last block is in + // memory, and calls to next() will decode it. + // NOTE: this depends on the current implementation of DataFileReader. + lastBlockStart = reader.previousSync(); + reader.nextBlock(); + } + reader.seek(lastBlockStart); + + Object lastRecord = null; + while (reader.hasNext()) { + lastRecord = reader.next(lastRecord); + } + assertEquals(LAST_RECORD, lastRecord); } } - private void testGenericAppend() throws IOException { - File file = makeFile(); + private void testGenericAppend(CodecFactory codec, Function encoderFunc) + throws IOException { + File file = makeFile(codec); long start = file.length(); try (DataFileWriter writer = new DataFileWriter<>(new GenericDatumWriter<>()).appendTo(file)) { + writer.setEncoder(encoderFunc); for (Object datum : new RandomData(SCHEMA, COUNT, SEED + 1)) { writer.append(datum); } @@ -238,8 +288,8 @@ private void testGenericAppend() throws IOException { } } - private void testReadWithHeader() throws IOException { - File file = makeFile(); + private void testReadWithHeader(CodecFactory codec) throws IOException { + File file = makeFile(codec); try (DataFileReader reader = new DataFileReader<>(file, new GenericDatumReader<>())) { // get a header for this file DataFileStream.Header header = reader.getHeader(); @@ -249,26 +299,23 @@ private void testReadWithHeader() throws IOException { try (DataFileReader readerTrue = DataFileReader.openReader(sin, new GenericDatumReader<>(), header, true);) { - assertNotNull("Should be able to reopen from arbitrary point", readerTrue.next()); + assertNotNull(readerTrue.next(), "Should be able to reopen from arbitrary point"); long validPos = readerTrue.previousSync(); // post sync, we know of a valid sync point: re-open with seek (sync == false) sin.seek(validPos); try (DataFileReader readerFalse = DataFileReader.openReader(sin, new GenericDatumReader<>(), header, false)) { - assertEquals("Should not move from sync point on reopen", validPos, sin.tell()); - assertNotNull("Should be able to reopen at sync point", readerFalse.next()); + assertEquals(validPos, sin.tell(), "Should not move from sync point on reopen"); + assertNotNull(readerFalse.next(), "Should be able to reopen at sync point"); } - } - } - } @Test - public void testSyncInHeader() throws IOException { - try (DataFileReader reader = new DataFileReader<>(new File("../../../share/test/data/syncInMeta.avro"), - new GenericDatumReader<>())) { + public void syncInHeader() throws IOException { + try (DataFileReader reader = new DataFileReader<>( + new File("target/test-classes/share/test/data/syncInMeta.avro"), new GenericDatumReader<>())) { reader.sync(0); for (Object datum : reader) assertNotNull(datum); @@ -277,11 +324,11 @@ public void testSyncInHeader() throws IOException { @Test public void test12() throws IOException { - readFile(new File("../../../share/test/data/test.avro12"), new GenericDatumReader<>()); + readFile(new File("target/test-classes/share/test/data/test.avro12"), new GenericDatumReader<>()); } @Test - public void testFlushCount() throws IOException { + public void flushCount() throws IOException { DataFileWriter writer = new DataFileWriter<>(new GenericDatumWriter<>()); writer.setFlushOnEveryBlock(false); TestingByteArrayOutputStream out = new TestingByteArrayOutputStream(); @@ -310,12 +357,14 @@ public void testFlushCount() throws IOException { assertTrue(out.flushCount < currentCount && out.flushCount >= flushCounter); } - private void testFSync(boolean useFile) throws IOException { + private void testFSync(CodecFactory codec, Function encoderFunc, boolean useFile) + throws IOException { try (DataFileWriter writer = new DataFileWriter<>(new GenericDatumWriter<>())) { + writer.setEncoder(encoderFunc); writer.setFlushOnEveryBlock(false); TestingByteArrayOutputStream out = new TestingByteArrayOutputStream(); if (useFile) { - File f = makeFile(); + File f = makeFile(codec); try (SeekableFileInput in = new SeekableFileInput(f)) { writer.appendTo(in, out); } diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestDataFileConcat.java b/lang/java/avro/src/test/java/org/apache/avro/TestDataFileConcat.java index f1267ab9788..1aeebcddad5 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestDataFileConcat.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestDataFileConcat.java @@ -17,60 +17,42 @@ */ package org.apache.avro; -import static org.junit.Assert.assertEquals; - -import java.io.File; -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; - import org.apache.avro.file.CodecFactory; import org.apache.avro.file.DataFileReader; import org.apache.avro.file.DataFileWriter; import org.apache.avro.generic.GenericDatumReader; import org.apache.avro.generic.GenericDatumWriter; import org.apache.avro.util.RandomData; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; -import org.junit.runners.Parameterized.Parameters; + +import org.junit.jupiter.api.io.TempDir; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -@RunWith(Parameterized.class) -public class TestDataFileConcat { - private static final Logger LOG = LoggerFactory.getLogger(TestDataFileConcat.class); - - @Rule - public TemporaryFolder DIR = new TemporaryFolder(); +import java.io.File; +import java.io.IOException; +import java.util.stream.Stream; - CodecFactory codec; - CodecFactory codec2; - boolean recompress; +import static org.junit.Assert.assertEquals; - public TestDataFileConcat(CodecFactory codec, CodecFactory codec2, Boolean recompress) { - this.codec = codec; - this.codec2 = codec2; - this.recompress = recompress; - LOG.info("Testing concatenating files, " + codec2 + " into " + codec + " with recompress=" + recompress); - } +public class TestDataFileConcat { + private static final Logger LOG = LoggerFactory.getLogger(TestDataFileConcat.class); - @Parameters - public static List codecs() { - List r = new ArrayList<>(); - r.add(new Object[] { null, null, false }); - r.add(new Object[] { null, null, true }); - r.add(new Object[] { CodecFactory.deflateCodec(1), CodecFactory.deflateCodec(6), false }); - r.add(new Object[] { CodecFactory.deflateCodec(1), CodecFactory.deflateCodec(6), true }); - r.add(new Object[] { CodecFactory.deflateCodec(3), CodecFactory.nullCodec(), false }); - r.add(new Object[] { CodecFactory.nullCodec(), CodecFactory.deflateCodec(6), false }); - r.add(new Object[] { CodecFactory.xzCodec(1), CodecFactory.xzCodec(2), false }); - r.add(new Object[] { CodecFactory.xzCodec(1), CodecFactory.xzCodec(2), true }); - r.add(new Object[] { CodecFactory.xzCodec(2), CodecFactory.nullCodec(), false }); - r.add(new Object[] { CodecFactory.nullCodec(), CodecFactory.xzCodec(2), false }); - return r; + @TempDir + public File DIR; + + public static Stream codecs() { + return Stream.of(Arguments.of(null, null, false), Arguments.of(null, null, true), + Arguments.of(CodecFactory.deflateCodec(1), CodecFactory.deflateCodec(6), false), + Arguments.of(CodecFactory.deflateCodec(1), CodecFactory.deflateCodec(6), true), + Arguments.of(CodecFactory.deflateCodec(3), CodecFactory.nullCodec(), false), + Arguments.of(CodecFactory.nullCodec(), CodecFactory.deflateCodec(6), false), + Arguments.of(CodecFactory.xzCodec(1), CodecFactory.xzCodec(2), false), + Arguments.of(CodecFactory.xzCodec(1), CodecFactory.xzCodec(2), true), + Arguments.of(CodecFactory.xzCodec(2), CodecFactory.nullCodec(), false), + Arguments.of(CodecFactory.nullCodec(), CodecFactory.xzCodec(2), false)); } private static final int COUNT = Integer.parseInt(System.getProperty("test.count", "200")); @@ -83,11 +65,12 @@ public static List codecs() { private static final Schema SCHEMA = new Schema.Parser().parse(SCHEMA_JSON); private File makeFile(String name) { - return new File(DIR.getRoot().getPath(), "test-" + name + ".avro"); + return new File(DIR, "test-" + name + ".avro"); } - @Test - public void testConcatenateFiles() throws IOException { + @ParameterizedTest + @MethodSource("codecs") + void concatenateFiles(CodecFactory codec, CodecFactory codec2, boolean recompress) throws IOException { System.out.println("SEED = " + SEED); System.out.println("COUNT = " + COUNT); for (int k = 0; k < 5; k++) { diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestDataFileCorruption.java b/lang/java/avro/src/test/java/org/apache/avro/TestDataFileCorruption.java index 437ef6cd409..e4659488d17 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestDataFileCorruption.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestDataFileCorruption.java @@ -17,7 +17,7 @@ */ package org.apache.avro; -import static org.junit.Assert.*; +import static org.junit.jupiter.api.Assertions.*; import java.io.ByteArrayOutputStream; import java.io.File; @@ -31,7 +31,7 @@ import org.apache.avro.generic.GenericDatumReader; import org.apache.avro.generic.GenericDatumWriter; import org.apache.avro.util.Utf8; -import org.junit.Test; +import org.junit.jupiter.api.Test; public class TestDataFileCorruption { @@ -42,7 +42,7 @@ private File makeFile(String name) { } @Test - public void testCorruptedFile() throws IOException { + void corruptedFile() throws IOException { Schema schema = Schema.create(Type.STRING); // Write a data file diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestDataFileCustomSync.java b/lang/java/avro/src/test/java/org/apache/avro/TestDataFileCustomSync.java index 3ba52376dd5..62d81f63782 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestDataFileCustomSync.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestDataFileCustomSync.java @@ -18,8 +18,7 @@ package org.apache.avro; import static java.nio.charset.StandardCharsets.UTF_8; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.*; import java.io.ByteArrayOutputStream; import java.io.IOException; @@ -32,7 +31,7 @@ import org.apache.avro.file.DataFileWriter; import org.apache.avro.generic.GenericDatumWriter; import org.apache.avro.util.Utf8; -import org.junit.Test; +import org.junit.jupiter.api.Test; public class TestDataFileCustomSync { private byte[] createDataFile(byte[] sync) throws IOException { @@ -63,15 +62,17 @@ private static byte[] generateSync() { } } - @Test(expected = IOException.class) - public void testInvalidSync() throws IOException { - // Invalid size (must be 16): - byte[] sync = new byte[8]; - createDataFile(sync); + @Test + void invalidSync() throws IOException { + assertThrows(IOException.class, () -> { + // Invalid size (must be 16): + byte[] sync = new byte[8]; + createDataFile(sync); + }); } @Test - public void testRandomSync() throws IOException { + void randomSync() throws IOException { byte[] sync = generateSync(); byte[] randSyncFile = createDataFile(null); byte[] customSyncFile = createDataFile(sync); @@ -79,10 +80,10 @@ public void testRandomSync() throws IOException { } @Test - public void testCustomSync() throws IOException { + void customSync() throws IOException { byte[] sync = generateSync(); byte[] customSyncFile = createDataFile(sync); byte[] sameCustomSyncFile = createDataFile(sync); - assertTrue(Arrays.equals(customSyncFile, sameCustomSyncFile)); + assertArrayEquals(customSyncFile, sameCustomSyncFile); } } diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestDataFileDeflate.java b/lang/java/avro/src/test/java/org/apache/avro/TestDataFileDeflate.java index 1eb59931ecf..30eaf6f27e6 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestDataFileDeflate.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestDataFileDeflate.java @@ -17,8 +17,8 @@ */ package org.apache.avro; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; @@ -31,12 +31,12 @@ import org.apache.avro.generic.GenericDatumReader; import org.apache.avro.generic.GenericDatumWriter; import org.apache.avro.util.Utf8; -import org.junit.Test; +import org.junit.jupiter.api.Test; /** Simple test of DataFileWriter and DataFileStream with deflate codec. */ public class TestDataFileDeflate { @Test - public void testWriteAndRead() throws IOException { + void writeAndRead() throws IOException { Schema schema = Schema.create(Type.STRING); // Write it diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestDataFileMeta.java b/lang/java/avro/src/test/java/org/apache/avro/TestDataFileMeta.java index 3a70df4a6ec..30f51153c5b 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestDataFileMeta.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestDataFileMeta.java @@ -17,8 +17,7 @@ */ package org.apache.avro; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.*; import java.io.ByteArrayOutputStream; import java.io.File; @@ -30,25 +29,24 @@ import org.apache.avro.file.DataFileWriter; import org.apache.avro.generic.GenericDatumReader; import org.apache.avro.generic.GenericDatumWriter; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; public class TestDataFileMeta { - @Rule - public TemporaryFolder DIR = new TemporaryFolder(); + @TempDir + public File DIR; - @Test(expected = AvroRuntimeException.class) - public void testUseReservedMeta() throws IOException { + @Test + public void useReservedMeta() throws IOException { try (DataFileWriter w = new DataFileWriter<>(new GenericDatumWriter<>())) { - w.setMeta("avro.foo", "bar"); + assertThrows(AvroRuntimeException.class, () -> w.setMeta("avro.foo", "bar")); } } - @Test() - public void testUseMeta() throws IOException { - File f = new File(DIR.getRoot().getPath(), "testDataFileMeta.avro"); + @Test + public void useMeta() throws IOException { + File f = new File(DIR, "testDataFileMeta.avro"); try (DataFileWriter w = new DataFileWriter<>(new GenericDatumWriter<>())) { w.setMeta("hello", "bar"); w.create(Schema.create(Type.NULL), f); @@ -62,17 +60,17 @@ public void testUseMeta() throws IOException { } - @Test(expected = AvroRuntimeException.class) - public void testUseMetaAfterCreate() throws IOException { + @Test + public void useMetaAfterCreate() throws IOException { try (DataFileWriter w = new DataFileWriter<>(new GenericDatumWriter<>())) { w.create(Schema.create(Type.NULL), new ByteArrayOutputStream()); - w.setMeta("foo", "bar"); + assertThrows(AvroRuntimeException.class, () -> w.setMeta("foo", "bar")); } } @Test - public void testBlockSizeSetInvalid() { + public void blockSizeSetInvalid() { int exceptions = 0; for (int i = -1; i < 33; i++) { // 33 invalid, one valid diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestDataFileReader.java b/lang/java/avro/src/test/java/org/apache/avro/TestDataFileReader.java index 8393179cdb0..6ed6b35cc50 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestDataFileReader.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestDataFileReader.java @@ -17,8 +17,7 @@ */ package org.apache.avro; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.fail; +import static org.junit.jupiter.api.Assertions.*; import java.io.EOFException; import java.io.File; @@ -38,14 +37,17 @@ import org.apache.avro.file.SeekableInput; import org.apache.avro.generic.GenericDatumReader; import org.apache.avro.generic.GenericDatumWriter; -import org.junit.Test; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; @SuppressWarnings("restriction") public class TestDataFileReader { + @TempDir + public Path dataDir; - @Test // regression test for bug AVRO-2286 - public void testForLeakingFileDescriptors() throws IOException { + @Test + void forLeakingFileDescriptors() throws IOException { StringBuilder sb = new StringBuilder(); int maxTries = 3; for (int tries = 0; tries < maxTries; tries++) { @@ -66,8 +68,7 @@ public void testForLeakingFileDescriptors() throws IOException { return; // Sometimes the number of file descriptors is off due to other processes or - // garbage - // collection. We note each inconsistency and retry. + // garbage collection. We note each inconsistency and retry. sb.append(openFilesBeforeOperation).append("!=").append(openFilesAfterOperation).append(","); } fail("File descriptor leaked from new DataFileReader() over " + maxTries + " tries: (" @@ -82,17 +83,17 @@ private long getNumberOfOpenFileDescriptors() { return 0; } - @Test // regression test for bug AVRO-2944 - public void testThrottledInputStream() throws IOException { + @Test + void throttledInputStream() throws IOException { // AVRO-2944 describes hanging/failure in reading Avro file with performing // magic header check. This happens with throttled input stream, // where we read into buffer less bytes than requested. - Schema legacySchema = new Schema.Parser().setValidate(false).setValidateDefaults(false) + Schema legacySchema = new Schema.Parser(NameValidator.NO_VALIDATION).setValidateDefaults(false) .parse("{\"type\": \"record\", \"name\": \"TestSchema\", \"fields\": " + "[ {\"name\": \"id\", \"type\": [\"long\", \"null\"], \"default\": null}]}"); - File f = Files.createTempFile("testThrottledInputStream", ".avro").toFile(); + File f = dataDir.resolve("testThrottledInputStream.avro").toFile(); try (DataFileWriter w = new DataFileWriter<>(new GenericDatumWriter<>())) { w.create(legacySchema, f); w.flush(); @@ -141,23 +142,25 @@ public int read(byte[] b, int off, int len) throws IOException { }; } - @Test(expected = EOFException.class) // another regression test for bug AVRO-2944, testing EOF case - public void testInputStreamEOF() throws IOException { - // AVRO-2944 describes hanging/failure in reading Avro file with performing - // magic header check. This potentially happens with a defective input stream - // where a -1 value is unexpectedly returned from a read. - Schema legacySchema = new Schema.Parser().setValidate(false).setValidateDefaults(false) - .parse("{\"type\": \"record\", \"name\": \"TestSchema\", \"fields\": " - + "[ {\"name\": \"id\", \"type\": [\"long\", \"null\"], \"default\": null}]}"); - File f = Files.createTempFile("testInputStreamEOF", ".avro").toFile(); - try (DataFileWriter w = new DataFileWriter<>(new GenericDatumWriter<>())) { - w.create(legacySchema, f); - w.flush(); - } + @Test + void inputStreamEOF() throws IOException { + assertThrows(EOFException.class, () -> { + // AVRO-2944 describes hanging/failure in reading Avro file with performing + // magic header check. This potentially happens with a defective input stream + // where a -1 value is unexpectedly returned from a read. + Schema legacySchema = new Schema.Parser(NameValidator.NO_VALIDATION).setValidateDefaults(false) + .parse("{\"type\": \"record\", \"name\": \"TestSchema\", \"fields\": " + + "[ {\"name\": \"id\", \"type\": [\"long\", \"null\"], \"default\": null}]}"); + File f = dataDir.resolve("testInputStreamEOF.avro").toFile(); + try (DataFileWriter w = new DataFileWriter<>(new GenericDatumWriter<>())) { + w.create(legacySchema, f); + w.flush(); + } - // Should throw an EOFException - DataFileReader.openReader(eofInputStream(f), new GenericDatumReader<>()); + // Should throw an EOFException + DataFileReader.openReader(eofInputStream(f), new GenericDatumReader<>()); + }); } private SeekableInput eofInputStream(File f) throws IOException { @@ -191,16 +194,16 @@ public int read(byte[] b, int off, int len) throws IOException { } @Test - public void testIgnoreSchemaValidationOnRead() throws IOException { + void ignoreSchemaValidationOnRead() throws IOException { // This schema has an accent in the name and the default for the field doesn't // match the first type in the union. A Java SDK in the past could create a file // containing this schema. - Schema legacySchema = new Schema.Parser().setValidate(false).setValidateDefaults(false) + Schema legacySchema = new Schema.Parser(NameValidator.NO_VALIDATION).setValidateDefaults(false) .parse("{\"type\": \"record\", \"name\": \"InvalidAccÃĢntWithInvalidNull\", \"fields\": " + "[ {\"name\": \"id\", \"type\": [\"long\", \"null\"], \"default\": null}]}"); // Create a file with the legacy schema. - File f = Files.createTempFile("testIgnoreSchemaValidationOnRead", ".avro").toFile(); + File f = dataDir.resolve("testIgnoreSchemaValidationOnRead.avro").toFile(); try (DataFileWriter w = new DataFileWriter<>(new GenericDatumWriter<>())) { w.create(legacySchema, f); w.flush(); @@ -212,23 +215,27 @@ public void testIgnoreSchemaValidationOnRead() throws IOException { } } - @Test(expected = InvalidAvroMagicException.class) - public void testInvalidMagicLength() throws IOException { - File f = Files.createTempFile("testInvalidMagicLength", ".avro").toFile(); + @Test + void invalidMagicLength() throws IOException { + File f = dataDir.resolve("testInvalidMagicLength.avro").toFile(); try (FileWriter w = new FileWriter(f)) { w.write("-"); } - - DataFileReader.openReader(new SeekableFileInput(f), new GenericDatumReader<>()); + try (SeekableFileInput fileInput = new SeekableFileInput(f)) { + assertThrows(InvalidAvroMagicException.class, + () -> DataFileReader.openReader(fileInput, new GenericDatumReader<>())); + } } - @Test(expected = InvalidAvroMagicException.class) - public void testInvalidMagicBytes() throws IOException { - File f = Files.createTempFile("testInvalidMagicBytes", ".avro").toFile(); + @Test + void invalidMagicBytes() throws IOException { + File f = dataDir.resolve("testInvalidMagicBytes.avro").toFile(); try (FileWriter w = new FileWriter(f)) { w.write("invalid"); } - - DataFileReader.openReader(new SeekableFileInput(f), new GenericDatumReader<>()); + try (SeekableFileInput fileInput = new SeekableFileInput(f)) { + assertThrows(InvalidAvroMagicException.class, + () -> DataFileReader.openReader(fileInput, new GenericDatumReader<>())); + } } } diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestDataFileReflect.java b/lang/java/avro/src/test/java/org/apache/avro/TestDataFileReflect.java index 190f788c397..d6590b7c108 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestDataFileReflect.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestDataFileReflect.java @@ -17,6 +17,9 @@ */ package org.apache.avro; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; + import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.File; @@ -37,23 +40,22 @@ import org.apache.avro.reflect.ReflectData; import org.apache.avro.reflect.ReflectDatumReader; import org.apache.avro.reflect.ReflectDatumWriter; -import org.junit.Assert; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; public class TestDataFileReflect { - @Rule - public TemporaryFolder DIR = new TemporaryFolder(); + @TempDir + public File DIR; /* * Test that using multiple schemas in a file works doing a union before writing * any records. */ @Test - public void testMultiReflectWithUnionBeforeWriting() throws IOException { - File file = new File(DIR.getRoot().getPath(), "testMultiReflectWithUnionBeforeWriting.avro"); + void multiReflectWithUnionBeforeWriting() throws IOException { + File file = new File(DIR.getPath(), "testMultiReflectWithUnionBeforeWriting.avro"); CheckList check = new CheckList<>(); try (FileOutputStream fos = new FileOutputStream(file)) { @@ -80,7 +82,7 @@ public void testMultiReflectWithUnionBeforeWriting() throws IOException { for (Object datum : reader) { check.assertEquals(datum, count++); } - Assert.assertEquals(count, check.size()); + assertEquals(count, check.size()); } } @@ -88,8 +90,8 @@ public void testMultiReflectWithUnionBeforeWriting() throws IOException { * Test that writing a record with a field that is null. */ @Test - public void testNull() throws IOException { - File file = new File(DIR.getRoot().getPath(), "testNull.avro"); + void testNull() throws IOException { + File file = new File(DIR.getPath(), "testNull.avro"); CheckList check = new CheckList<>(); try (FileOutputStream fos = new FileOutputStream(file)) { @@ -113,13 +115,13 @@ public void testNull() throws IOException { for (BarRecord datum : reader) { check.assertEquals(datum, count++); } - Assert.assertEquals(count, check.size()); + assertEquals(count, check.size()); } } } @Test - public void testNew() throws IOException { + void testNew() throws IOException { ByteBuffer payload = ByteBuffer.allocateDirect(8 * 1024); for (int i = 0; i < 500; i++) { payload.putInt(1); @@ -142,15 +144,15 @@ public void testNew() throws IOException { BinaryDecoder avroDecoder = DecoderFactory.get().binaryDecoder(inputStream, null); ByteBufferRecord deserialized = datumReader.read(null, avroDecoder); - Assert.assertEquals(bbr, deserialized); + assertEquals(bbr, deserialized); } /* * Test that writing out and reading in a nested class works */ @Test - public void testNestedClass() throws IOException { - File file = new File(DIR.getRoot().getPath(), "testNull.avro"); + void nestedClass() throws IOException { + File file = new File(DIR.getPath(), "testNull.avro"); CheckList check = new CheckList<>(); try (FileOutputStream fos = new FileOutputStream(file)) { @@ -171,7 +173,7 @@ public void testNestedClass() throws IOException { for (BazRecord datum : reader) { check.assertEquals(datum, count++); } - Assert.assertEquals(count, check.size()); + assertEquals(count, check.size()); } } } @@ -188,10 +190,10 @@ T addAndReturn(T check) { } void assertEquals(Object toCheck, int i) { - Assert.assertNotNull(toCheck); + assertNotNull(toCheck); Object o = get(i); - Assert.assertNotNull(o); - Assert.assertEquals(toCheck, o); + assertNotNull(o); + Assertions.assertEquals(toCheck, o); } } diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestDecimalConversion.java b/lang/java/avro/src/test/java/org/apache/avro/TestDecimalConversion.java index 2183dd3ac11..391c886c366 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestDecimalConversion.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestDecimalConversion.java @@ -19,32 +19,26 @@ import org.apache.avro.generic.GenericData; import org.apache.avro.generic.GenericFixed; -import org.junit.Before; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.ExpectedException; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; import java.math.BigDecimal; import java.nio.ByteBuffer; import static java.math.RoundingMode.HALF_EVEN; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNotEquals; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.*; public class TestDecimalConversion { private static final Conversion CONVERSION = new Conversions.DecimalConversion(); - @Rule - public ExpectedException expectedException = ExpectedException.none(); - private Schema smallerSchema; private LogicalType smallerLogicalType; private Schema largerSchema; private LogicalType largerLogicalType; - @Before + @BeforeEach public void setup() { smallerSchema = Schema.createFixed("smallFixed", null, null, 3); smallerSchema.addProp("logicalType", "decimal"); @@ -60,7 +54,7 @@ public void setup() { } @Test - public void testToFromBytes() { + void toFromBytes() { final BigDecimal value = BigDecimal.valueOf(10.99).setScale(15, HALF_EVEN); final ByteBuffer byteBuffer = CONVERSION.toBytes(value, largerSchema, largerLogicalType); final BigDecimal result = CONVERSION.fromBytes(byteBuffer, largerSchema, largerLogicalType); @@ -68,7 +62,7 @@ public void testToFromBytes() { } @Test - public void testToFromBytesMaxPrecision() { + void toFromBytesMaxPrecision() { final BigDecimal value = new BigDecimal("4567335489766.99834").setScale(15, HALF_EVEN); final ByteBuffer byteBuffer = CONVERSION.toBytes(value, largerSchema, largerLogicalType); final BigDecimal result = CONVERSION.fromBytes(byteBuffer, largerSchema, largerLogicalType); @@ -76,15 +70,15 @@ public void testToFromBytesMaxPrecision() { } @Test - public void testToBytesPrecisionError() { + void toBytesPrecisionError() { final BigDecimal value = new BigDecimal("1.07046455859736525E+18").setScale(15, HALF_EVEN); - expectedException.expect(AvroTypeException.class); - expectedException.expectMessage("Cannot encode decimal with precision 34 as max precision 28"); - CONVERSION.toBytes(value, largerSchema, largerLogicalType); + AvroTypeException avroTypeException = assertThrows(AvroTypeException.class, + () -> CONVERSION.toBytes(value, largerSchema, largerLogicalType)); + assertEquals("Cannot encode decimal with precision 34 as max precision 28", avroTypeException.getMessage()); } @Test - public void testToBytesFixedSmallerScale() { + void toBytesFixedSmallerScale() { final BigDecimal value = new BigDecimal("99892.1234").setScale(10, HALF_EVEN); final ByteBuffer byteBuffer = CONVERSION.toBytes(value, largerSchema, largerLogicalType); final BigDecimal result = CONVERSION.fromBytes(byteBuffer, largerSchema, largerLogicalType); @@ -92,15 +86,15 @@ public void testToBytesFixedSmallerScale() { } @Test - public void testToBytesScaleError() { + void toBytesScaleError() { final BigDecimal value = new BigDecimal("4567335489766.989989998435899453").setScale(16, HALF_EVEN); - expectedException.expect(AvroTypeException.class); - expectedException.expectMessage("Cannot encode decimal with scale 16 as scale 15 without rounding"); - CONVERSION.toBytes(value, largerSchema, largerLogicalType); + AvroTypeException avroTypeException = assertThrows(AvroTypeException.class, + () -> CONVERSION.toBytes(value, largerSchema, largerLogicalType)); + assertEquals("Cannot encode decimal with scale 16 as scale 15 without rounding", avroTypeException.getMessage()); } @Test - public void testToFromFixed() { + void toFromFixed() { final BigDecimal value = new BigDecimal("3").setScale(15, HALF_EVEN); final GenericFixed fixed = CONVERSION.toFixed(value, largerSchema, largerLogicalType); final BigDecimal result = CONVERSION.fromFixed(fixed, largerSchema, largerLogicalType); @@ -108,7 +102,7 @@ public void testToFromFixed() { } @Test - public void testToFromFixedMaxPrecision() { + void toFromFixedMaxPrecision() { final BigDecimal value = new BigDecimal("4567335489766.99834").setScale(15, HALF_EVEN); final GenericFixed fixed = CONVERSION.toFixed(value, largerSchema, largerLogicalType); final BigDecimal result = CONVERSION.fromFixed(fixed, largerSchema, largerLogicalType); @@ -116,15 +110,16 @@ public void testToFromFixedMaxPrecision() { } @Test - public void testToFixedPrecisionError() { + void toFixedPrecisionError() { final BigDecimal value = new BigDecimal("1.07046455859736525E+18").setScale(15, HALF_EVEN); - expectedException.expect(AvroTypeException.class); - expectedException.expectMessage("Cannot encode decimal with precision 34 as max precision 28"); - CONVERSION.toFixed(value, largerSchema, largerLogicalType); + + AvroTypeException avroTypeException = assertThrows(AvroTypeException.class, + () -> CONVERSION.toFixed(value, largerSchema, largerLogicalType)); + assertEquals("Cannot encode decimal with precision 34 as max precision 28", avroTypeException.getMessage()); } @Test - public void testToFromFixedSmallerScale() { + void toFromFixedSmallerScale() { final BigDecimal value = new BigDecimal("99892.1234").setScale(10, HALF_EVEN); final GenericFixed fixed = CONVERSION.toFixed(value, largerSchema, largerLogicalType); final BigDecimal result = CONVERSION.fromFixed(fixed, largerSchema, largerLogicalType); @@ -132,15 +127,16 @@ public void testToFromFixedSmallerScale() { } @Test - public void testToFixedScaleError() { + void toFixedScaleError() { final BigDecimal value = new BigDecimal("4567335489766.3453453453453453453453").setScale(16, HALF_EVEN); - expectedException.expect(AvroTypeException.class); - expectedException.expectMessage("Cannot encode decimal with scale 16 as scale 15 without rounding"); - CONVERSION.toFixed(value, largerSchema, largerLogicalType); + + AvroTypeException avroTypeException = assertThrows(AvroTypeException.class, + () -> CONVERSION.toFixed(value, largerSchema, largerLogicalType)); + assertEquals("Cannot encode decimal with scale 16 as scale 15 without rounding", avroTypeException.getMessage()); } @Test - public void testToFromFixedMatchScaleAndPrecision() { + void toFromFixedMatchScaleAndPrecision() { final BigDecimal value = new BigDecimal("123.45"); final GenericFixed fixed = CONVERSION.toFixed(value, smallerSchema, smallerLogicalType); final BigDecimal result = CONVERSION.fromFixed(fixed, smallerSchema, smallerLogicalType); @@ -148,7 +144,7 @@ public void testToFromFixedMatchScaleAndPrecision() { } @Test - public void testToFromFixedRepresentedInLogicalTypeAllowRoundUnneccesary() { + void toFromFixedRepresentedInLogicalTypeAllowRoundUnneccesary() { final BigDecimal value = new BigDecimal("123.4500"); final GenericFixed fixed = CONVERSION.toFixed(value, smallerSchema, smallerLogicalType); final BigDecimal result = CONVERSION.fromFixed(fixed, smallerSchema, smallerLogicalType); @@ -156,24 +152,27 @@ public void testToFromFixedRepresentedInLogicalTypeAllowRoundUnneccesary() { } @Test - public void testToFromFixedPrecisionErrorAfterAdjustingScale() { + void toFromFixedPrecisionErrorAfterAdjustingScale() { final BigDecimal value = new BigDecimal("1234.560"); - expectedException.expect(AvroTypeException.class); - expectedException.expectMessage( - "Cannot encode decimal with precision 6 as max precision 5. This is after safely adjusting scale from 3 to required 2"); - CONVERSION.toFixed(value, smallerSchema, smallerLogicalType); + + AvroTypeException avroTypeException = assertThrows(AvroTypeException.class, + () -> CONVERSION.toFixed(value, smallerSchema, smallerLogicalType)); + assertEquals( + "Cannot encode decimal with precision 6 as max precision 5. This is after safely adjusting scale from 3 to required 2", + avroTypeException.getMessage()); } @Test - public void testToFixedRepresentedInLogicalTypeErrorIfRoundingRequired() { + void toFixedRepresentedInLogicalTypeErrorIfRoundingRequired() { final BigDecimal value = new BigDecimal("123.456"); - expectedException.expect(AvroTypeException.class); - expectedException.expectMessage("Cannot encode decimal with scale 3 as scale 2 without rounding"); - CONVERSION.toFixed(value, smallerSchema, smallerLogicalType); + + AvroTypeException avroTypeException = assertThrows(AvroTypeException.class, + () -> CONVERSION.toFixed(value, smallerSchema, smallerLogicalType)); + assertEquals("Cannot encode decimal with scale 3 as scale 2 without rounding", avroTypeException.getMessage()); } @Test - public void testImportanceOfEnsuringCorrectScaleWhenConvertingFixed() { + void importanceOfEnsuringCorrectScaleWhenConvertingFixed() { LogicalTypes.Decimal decimal = (LogicalTypes.Decimal) smallerLogicalType; final BigDecimal bigDecimal = new BigDecimal("1234.5"); @@ -192,7 +191,7 @@ public void testImportanceOfEnsuringCorrectScaleWhenConvertingFixed() { } @Test - public void testImportanceOfEnsuringCorrectScaleWhenConvertingBytes() { + void importanceOfEnsuringCorrectScaleWhenConvertingBytes() { LogicalTypes.Decimal decimal = (LogicalTypes.Decimal) smallerLogicalType; final BigDecimal bigDecimal = new BigDecimal("1234.5"); diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestFixed.java b/lang/java/avro/src/test/java/org/apache/avro/TestFixed.java index a9f78f16899..f35c62d7a2e 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestFixed.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestFixed.java @@ -18,19 +18,32 @@ package org.apache.avro; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.*; public class TestFixed { @Test - public void testFixedDefaultValueDrop() { + void fixedDefaultValueDrop() { Schema md5 = SchemaBuilder.builder().fixed("MD5").size(16); Schema frec = SchemaBuilder.builder().record("test").fields().name("hash").type(md5).withDefault(new byte[16]) .endRecord(); Schema.Field field = frec.getField("hash"); - Assert.assertNotNull(field.defaultVal()); - Assert.assertArrayEquals(new byte[16], (byte[]) field.defaultVal()); + assertNotNull(field.defaultVal()); + assertArrayEquals(new byte[16], (byte[]) field.defaultVal()); + } + + @Test + void fixedLengthOutOfLimit() { + Exception ex = assertThrows(UnsupportedOperationException.class, + () -> Schema.createFixed("oversize", "doc", "space", Integer.MAX_VALUE)); + assertEquals(TestSystemLimitException.ERROR_VM_LIMIT_BYTES, ex.getMessage()); } + @Test + void fixedNegativeLength() { + Exception ex = assertThrows(AvroRuntimeException.class, () -> Schema.createFixed("negative", "doc", "space", -1)); + assertEquals(TestSystemLimitException.ERROR_NEGATIVE, ex.getMessage()); + } } diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestLogicalType.java b/lang/java/avro/src/test/java/org/apache/avro/TestLogicalType.java index 7b1f5bf5249..733997db28f 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestLogicalType.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestLogicalType.java @@ -18,42 +18,50 @@ package org.apache.avro; +import org.hamcrest.collection.IsMapContaining; +import org.junit.jupiter.api.Test; + import java.util.Arrays; import java.util.concurrent.Callable; -import org.hamcrest.MatcherAssert; -import org.hamcrest.collection.IsMapContaining; -import org.junit.Assert; -import org.junit.Test; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.instanceOf; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotEquals; +import static org.junit.jupiter.api.Assertions.assertNotSame; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.fail; public class TestLogicalType { @Test - public void testDecimalFromSchema() { + void decimalFromSchema() { Schema schema = Schema.createFixed("aFixed", null, null, 4); schema.addProp("logicalType", "decimal"); schema.addProp("precision", 9); schema.addProp("scale", 2); LogicalType logicalType = LogicalTypes.fromSchemaIgnoreInvalid(schema); - Assert.assertTrue("Should be a Decimal", logicalType instanceof LogicalTypes.Decimal); + assertTrue(logicalType instanceof LogicalTypes.Decimal, "Should be a Decimal"); LogicalTypes.Decimal decimal = (LogicalTypes.Decimal) logicalType; - Assert.assertEquals("Should have correct precision", 9, decimal.getPrecision()); - Assert.assertEquals("Should have correct scale", 2, decimal.getScale()); + assertEquals(9, decimal.getPrecision(), "Should have correct precision"); + assertEquals(2, decimal.getScale(), "Should have correct scale"); } @Test - public void testInvalidLogicalTypeIgnored() { + void invalidLogicalTypeIgnored() { final Schema schema = Schema.createFixed("aFixed", null, null, 2); schema.addProp("logicalType", "decimal"); schema.addProp("precision", 9); schema.addProp("scale", 2); - Assert.assertNull("Should ignore invalid logical type", LogicalTypes.fromSchemaIgnoreInvalid(schema)); + assertNull(LogicalTypes.fromSchemaIgnoreInvalid(schema), "Should ignore invalid logical type"); } @Test - public void testDecimalWithNonByteArrayTypes() { + void decimalWithNonByteArrayTypes() { final LogicalType decimal = LogicalTypes.decimal(5, 2); // test simple types Schema[] nonBytes = new Schema[] { Schema.createRecord("Record", null, null, false), @@ -73,25 +81,26 @@ public void testDecimalWithNonByteArrayTypes() { } @Test - public void testUnknownFromJsonNode() { + void unknownFromJsonNode() { Schema schema = Schema.create(Schema.Type.STRING); schema.addProp("logicalType", "unknown"); schema.addProp("someProperty", 34); LogicalType logicalType = LogicalTypes.fromSchemaIgnoreInvalid(schema); - Assert.assertNull("Should not return a LogicalType instance", logicalType); + assertNull(logicalType, "Should not return a LogicalType instance"); } @Test - public void testDecimalBytesHasNoPrecisionLimit() { + void decimalBytesHasNoPrecisionLimit() { Schema schema = Schema.create(Schema.Type.BYTES); // precision is not limited for bytes LogicalTypes.decimal(Integer.MAX_VALUE).addToSchema(schema); - Assert.assertEquals("Precision should be an Integer.MAX_VALUE", Integer.MAX_VALUE, - ((LogicalTypes.Decimal) LogicalTypes.fromSchemaIgnoreInvalid(schema)).getPrecision()); + assertEquals(Integer.MAX_VALUE, + ((LogicalTypes.Decimal) LogicalTypes.fromSchemaIgnoreInvalid(schema)).getPrecision(), + "Precision should be an Integer.MAX_VALUE"); } @Test - public void testDecimalFixedPrecisionLimit() { + void decimalFixedPrecisionLimit() { // 4 bytes can hold up to 9 digits of precision final Schema schema = Schema.createFixed("aDecimal", null, null, 4); assertThrows("Should reject precision", IllegalArgumentException.class, "fixed(4) cannot store 10 digits (max 9)", @@ -99,7 +108,7 @@ public void testDecimalFixedPrecisionLimit() { LogicalTypes.decimal(10).addToSchema(schema); return null; }); - Assert.assertNull("Invalid logical type should not be set on schema", LogicalTypes.fromSchemaIgnoreInvalid(schema)); + assertNull(LogicalTypes.fromSchemaIgnoreInvalid(schema), "Invalid logical type should not be set on schema"); // 129 bytes can hold up to 310 digits of precision final Schema schema129 = Schema.createFixed("aDecimal", null, null, 129); @@ -108,56 +117,55 @@ public void testDecimalFixedPrecisionLimit() { LogicalTypes.decimal(311).addToSchema(schema129); return null; }); - Assert.assertNull("Invalid logical type should not be set on schema", - LogicalTypes.fromSchemaIgnoreInvalid(schema129)); + assertNull(LogicalTypes.fromSchemaIgnoreInvalid(schema129), "Invalid logical type should not be set on schema"); } @Test - public void testDecimalFailsWithZeroPrecision() { + void decimalFailsWithZeroPrecision() { final Schema schema = Schema.createFixed("aDecimal", null, null, 4); assertThrows("Should reject precision", IllegalArgumentException.class, "Invalid decimal precision: 0 (must be positive)", () -> { LogicalTypes.decimal(0).addToSchema(schema); return null; }); - Assert.assertNull("Invalid logical type should not be set on schema", LogicalTypes.fromSchemaIgnoreInvalid(schema)); + assertNull(LogicalTypes.fromSchemaIgnoreInvalid(schema), "Invalid logical type should not be set on schema"); } @Test - public void testDecimalFailsWithNegativePrecision() { + void decimalFailsWithNegativePrecision() { final Schema schema = Schema.createFixed("aDecimal", null, null, 4); assertThrows("Should reject precision", IllegalArgumentException.class, "Invalid decimal precision: -9 (must be positive)", () -> { LogicalTypes.decimal(-9).addToSchema(schema); return null; }); - Assert.assertNull("Invalid logical type should not be set on schema", LogicalTypes.fromSchemaIgnoreInvalid(schema)); + assertNull(LogicalTypes.fromSchemaIgnoreInvalid(schema), "Invalid logical type should not be set on schema"); } @Test - public void testDecimalScaleBoundedByPrecision() { + void decimalScaleBoundedByPrecision() { final Schema schema = Schema.createFixed("aDecimal", null, null, 4); assertThrows("Should reject precision", IllegalArgumentException.class, "Invalid decimal scale: 10 (greater than precision: 9)", () -> { LogicalTypes.decimal(9, 10).addToSchema(schema); return null; }); - Assert.assertNull("Invalid logical type should not be set on schema", LogicalTypes.fromSchemaIgnoreInvalid(schema)); + assertNull(LogicalTypes.fromSchemaIgnoreInvalid(schema), "Invalid logical type should not be set on schema"); } @Test - public void testDecimalFailsWithNegativeScale() { + void decimalFailsWithNegativeScale() { final Schema schema = Schema.createFixed("aDecimal", null, null, 4); assertThrows("Should reject precision", IllegalArgumentException.class, "Invalid decimal scale: -2 (must be positive)", () -> { LogicalTypes.decimal(9, -2).addToSchema(schema); return null; }); - Assert.assertNull("Invalid logical type should not be set on schema", LogicalTypes.fromSchemaIgnoreInvalid(schema)); + assertNull(LogicalTypes.fromSchemaIgnoreInvalid(schema), "Invalid logical type should not be set on schema"); } @Test - public void testSchemaRejectsSecondLogicalType() { + void schemaRejectsSecondLogicalType() { final Schema schema = Schema.createFixed("aDecimal", null, null, 4); LogicalTypes.decimal(9).addToSchema(schema); assertThrows("Should reject second logical type", AvroRuntimeException.class, "Can't overwrite property: scale", @@ -165,37 +173,61 @@ public void testSchemaRejectsSecondLogicalType() { LogicalTypes.decimal(9, 2).addToSchema(schema); return null; }); - Assert.assertEquals("First logical type should still be set on schema", LogicalTypes.decimal(9), - LogicalTypes.fromSchemaIgnoreInvalid(schema)); + assertEquals(LogicalTypes.decimal(9), LogicalTypes.fromSchemaIgnoreInvalid(schema), + "First logical type should still be set on schema"); } @Test - public void testDecimalDefaultScale() { + void decimalDefaultScale() { Schema schema = Schema.createFixed("aDecimal", null, null, 4); // 4 bytes can hold up to 9 digits of precision LogicalTypes.decimal(9).addToSchema(schema); - Assert.assertEquals("Scale should be a 0", 0, - ((LogicalTypes.Decimal) LogicalTypes.fromSchemaIgnoreInvalid(schema)).getScale()); + assertEquals(0, ((LogicalTypes.Decimal) LogicalTypes.fromSchemaIgnoreInvalid(schema)).getScale(), + "Scale should be a 0"); } @Test - public void testFixedDecimalToFromJson() { + void fixedDecimalToFromJson() { Schema schema = Schema.createFixed("aDecimal", null, null, 4); LogicalTypes.decimal(9, 2).addToSchema(schema); Schema parsed = new Schema.Parser().parse(schema.toString(true)); - Assert.assertEquals("Constructed and parsed schemas should match", schema, parsed); + assertEquals(schema, parsed, "Constructed and parsed schemas should match"); } @Test - public void testBytesDecimalToFromJson() { + void bytesDecimalToFromJson() { Schema schema = Schema.create(Schema.Type.BYTES); LogicalTypes.decimal(9, 2).addToSchema(schema); Schema parsed = new Schema.Parser().parse(schema.toString(true)); - Assert.assertEquals("Constructed and parsed schemas should match", schema, parsed); + assertEquals(schema, parsed, "Constructed and parsed schemas should match"); + } + + @Test + void uuidExtendsString() { + Schema uuidSchema = LogicalTypes.uuid().addToSchema(Schema.create(Schema.Type.STRING)); + assertEquals(LogicalTypes.uuid(), uuidSchema.getLogicalType()); + + assertThrows("UUID requires a string", IllegalArgumentException.class, + "Uuid can only be used with an underlying string or fixed type", + () -> LogicalTypes.uuid().addToSchema(Schema.create(Schema.Type.INT))); } @Test - public void testLogicalTypeEquals() { + void durationExtendsFixed12() { + Schema durationSchema = LogicalTypes.duration().addToSchema(Schema.createFixed("f", null, null, 12)); + assertEquals(LogicalTypes.duration(), durationSchema.getLogicalType()); + + assertThrows("Duration requires a fixed(12)", IllegalArgumentException.class, + "Duration can only be used with an underlying fixed type of size 12.", + () -> LogicalTypes.duration().addToSchema(Schema.create(Schema.Type.INT))); + + assertThrows("Duration requires a fixed(12)", IllegalArgumentException.class, + "Duration can only be used with an underlying fixed type of size 12.", + () -> LogicalTypes.duration().addToSchema(Schema.createFixed("wrong", null, null, 42))); + } + + @Test + void logicalTypeEquals() { LogicalTypes.Decimal decimal90 = LogicalTypes.decimal(9); LogicalTypes.Decimal decimal80 = LogicalTypes.decimal(8); LogicalTypes.Decimal decimal92 = LogicalTypes.decimal(9, 2); @@ -209,12 +241,12 @@ public void testLogicalTypeEquals() { } @Test - public void testLogicalTypeInSchemaEquals() { + void logicalTypeInSchemaEquals() { Schema schema1 = Schema.createFixed("aDecimal", null, null, 4); Schema schema2 = Schema.createFixed("aDecimal", null, null, 4); Schema schema3 = Schema.createFixed("aDecimal", null, null, 4); - Assert.assertNotSame(schema1, schema2); - Assert.assertNotSame(schema1, schema3); + assertNotSame(schema1, schema2); + assertNotSame(schema1, schema3); assertEqualsTrue("No logical types", schema1, schema2); assertEqualsTrue("No logical types", schema1, schema3); @@ -229,7 +261,7 @@ public void testLogicalTypeInSchemaEquals() { } @Test - public void testRegisterLogicalTypeThrowsIfTypeNameNotProvided() { + void registerLogicalTypeThrowsIfTypeNameNotProvided() { assertThrows("Should error if type name was not provided", UnsupportedOperationException.class, "LogicalTypeFactory TypeName has not been provided", () -> { LogicalTypes.register(schema -> LogicalTypes.date()); @@ -238,7 +270,7 @@ public void testRegisterLogicalTypeThrowsIfTypeNameNotProvided() { } @Test - public void testRegisterLogicalTypeWithName() { + void registerLogicalTypeWithName() { final LogicalTypes.LogicalTypeFactory factory = new LogicalTypes.LogicalTypeFactory() { @Override public LogicalType fromSchema(Schema schema) { @@ -253,11 +285,11 @@ public String getTypeName() { LogicalTypes.register("registered", factory); - MatcherAssert.assertThat(LogicalTypes.getCustomRegisteredTypes(), IsMapContaining.hasEntry("registered", factory)); + assertThat(LogicalTypes.getCustomRegisteredTypes(), IsMapContaining.hasEntry("registered", factory)); } @Test - public void testRegisterLogicalTypeWithFactoryName() { + void registerLogicalTypeWithFactoryName() { final LogicalTypes.LogicalTypeFactory factory = new LogicalTypes.LogicalTypeFactory() { @Override public LogicalType fromSchema(Schema schema) { @@ -272,27 +304,32 @@ public String getTypeName() { LogicalTypes.register(factory); - MatcherAssert.assertThat(LogicalTypes.getCustomRegisteredTypes(), IsMapContaining.hasEntry("factory", factory)); + assertThat(LogicalTypes.getCustomRegisteredTypes(), IsMapContaining.hasEntry("factory", factory)); } @Test - public void testRegisterLogicalTypeWithFactoryNameNotProvided() { + void registerLogicalTypeWithFactoryNameNotProvided() { final LogicalTypes.LogicalTypeFactory factory = schema -> LogicalTypes.date(); LogicalTypes.register("logicalTypeName", factory); - MatcherAssert.assertThat(LogicalTypes.getCustomRegisteredTypes(), - IsMapContaining.hasEntry("logicalTypeName", factory)); + assertThat(LogicalTypes.getCustomRegisteredTypes(), IsMapContaining.hasEntry("logicalTypeName", factory)); + } + + @Test + public void testRegisterLogicalTypeFactoryByServiceLoader() { + assertThat(LogicalTypes.getCustomRegisteredTypes(), + IsMapContaining.hasEntry(equalTo("custom"), instanceOf(LogicalTypes.LogicalTypeFactory.class))); } public static void assertEqualsTrue(String message, Object o1, Object o2) { - Assert.assertTrue("Should be equal (forward): " + message, o1.equals(o2)); - Assert.assertTrue("Should be equal (reverse): " + message, o2.equals(o1)); + assertEquals(o1, o2, "Should be equal (forward): " + message); + assertEquals(o2, o1, "Should be equal (reverse): " + message); } public static void assertEqualsFalse(String message, Object o1, Object o2) { - Assert.assertFalse("Should be equal (forward): " + message, o1.equals(o2)); - Assert.assertFalse("Should be equal (reverse): " + message, o2.equals(o1)); + assertNotEquals(o1, o2, "Should be equal (forward): " + message); + assertNotEquals(o2, o1, "Should be equal (reverse): " + message); } /** @@ -305,14 +342,14 @@ public static void assertEqualsFalse(String message, Object o1, Object o2) { * @param callable A Callable that is expected to throw the exception */ public static void assertThrows(String message, Class expected, String containedInMessage, - Callable callable) { + Callable callable) { try { callable.call(); - Assert.fail("No exception was thrown (" + message + "), expected: " + expected.getName()); + fail("No exception was thrown (" + message + "), expected: " + expected.getName()); } catch (Exception actual) { - Assert.assertEquals(message, expected, actual.getClass()); - Assert.assertTrue("Expected exception message (" + containedInMessage + ") missing: " + actual.getMessage(), - actual.getMessage().contains(containedInMessage)); + assertEquals(expected, actual.getClass(), message); + assertTrue(actual.getMessage().contains(containedInMessage), + "Expected exception message (" + containedInMessage + ") missing: " + actual.getMessage()); } } } diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestNestedRecords.java b/lang/java/avro/src/test/java/org/apache/avro/TestNestedRecords.java index 0e9d08e95fb..e697830c34f 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestNestedRecords.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestNestedRecords.java @@ -22,14 +22,13 @@ import org.apache.avro.io.DatumReader; import org.apache.avro.io.DecoderFactory; import org.apache.avro.io.JsonDecoder; -import org.junit.Test; - +import org.junit.jupiter.api.Test; import java.io.ByteArrayInputStream; import java.io.IOException; import static java.nio.charset.StandardCharsets.UTF_8; import static org.hamcrest.CoreMatchers.equalTo; -import static org.junit.Assert.assertThat; +import static org.hamcrest.MatcherAssert.assertThat; /** * This test demonstrates the fix for a complex nested schema type. @@ -37,7 +36,7 @@ public class TestNestedRecords { @Test - public void testSingleSubRecord() throws IOException { + void singleSubRecord() throws IOException { final Schema child = SchemaBuilder.record("Child").namespace("org.apache.avro.nested").fields() .requiredString("childField").endRecord(); @@ -64,7 +63,7 @@ public void testSingleSubRecord() throws IOException { } @Test - public void testSingleSubRecordExtraField() throws IOException { + void singleSubRecordExtraField() throws IOException { final Schema child = SchemaBuilder.record("Child").namespace("org.apache.avro.nested").fields() .requiredString("childField").endRecord(); diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestProtocol.java b/lang/java/avro/src/test/java/org/apache/avro/TestProtocol.java index 3c63cca9b18..711b896a403 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestProtocol.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestProtocol.java @@ -17,23 +17,115 @@ */ package org.apache.avro; -import static org.junit.Assert.*; +import org.apache.avro.generic.GenericData; +import org.apache.avro.generic.GenericDatumWriter; +import org.apache.avro.generic.IndexedRecord; +import org.apache.avro.io.EncoderFactory; +import org.apache.avro.io.JsonEncoder; -import org.junit.Test; +import com.fasterxml.jackson.databind.JsonNode; + +import static java.util.Collections.emptyList; +import static java.util.Collections.emptyMap; +import static java.util.Collections.singletonList; +import static java.util.Collections.singletonMap; +import static org.junit.jupiter.api.Assertions.*; + +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.IOException; +import java.util.Collections; + +import org.junit.jupiter.api.Test; public class TestProtocol { @Test - public void testPropEquals() { + public void parse() throws IOException { + File fic = new File("target/test-classes/share/test/schemas/namespace.avpr"); + Protocol protocol = Protocol.parse(fic); + assertNotNull(protocol); + assertEquals("TestNamespace", protocol.getName()); + } + + /** + * record type 'User' contains a field of type 'Status', which contains a field + * of type 'User'. + */ + @Test + public void crossProtocol() { + String userStatus = "{ \"protocol\" : \"p1\", " + "\"types\": [" + + "{\"name\": \"User\", \"type\": \"record\", \"fields\": [{\"name\": \"current_status\", \"type\": \"Status\"}]},\n" + + "\n" + + "{\"name\": \"Status\", \"type\": \"record\", \"fields\": [{\"name\": \"author\", \"type\": \"User\"}]}" + + "]}"; + + Protocol protocol = Protocol.parse(userStatus); + Schema userSchema = protocol.getType("User"); + Schema statusSchema = protocol.getType("Status"); + assertSame(statusSchema, userSchema.getField("current_status").schema()); + assertSame(userSchema, statusSchema.getField("author").schema()); + + String parsingFormUser = SchemaNormalization.toParsingForm(userSchema); + assertEquals( + "{\"name\":\"User\",\"type\":\"record\",\"fields\":[{\"name\":\"current_status\",\"type\":{\"name\":\"Status\",\"type\":\"record\",\"fields\":[{\"name\":\"author\",\"type\":\"User\"}]}}]}", + parsingFormUser); + + String parsingFormStatus = SchemaNormalization.toParsingForm(statusSchema); + assertEquals( + "{\"name\":\"Status\",\"type\":\"record\",\"fields\":[{\"name\":\"author\",\"type\":{\"name\":\"User\",\"type\":\"record\",\"fields\":[{\"name\":\"current_status\",\"type\":\"Status\"}]}}]}", + parsingFormStatus); + } + + /** + * When one schema with a type used before it is defined, test normalization + * defined schema before it is used. + */ + @Test + void normalization() { + final String schema = "{\n" + " \"type\":\"record\", \"name\": \"Main\", " + " \"fields\":[\n" + + " { \"name\":\"f1\", \"type\":\"Sub\" },\n" // use Sub + + " { \"name\":\"f2\", " + " \"type\":{\n" + " \"type\":\"enum\", \"name\":\"Sub\",\n" // define + // Sub + + " \"symbols\":[\"OPEN\",\"CLOSE\"]\n" + " }\n" + " }\n" + " ]\n" + "}"; + Schema s = new Schema.Parser().parse(schema); + assertNotNull(s); + + String parsingForm = SchemaNormalization.toParsingForm(s); + assertEquals( + "{\"name\":\"Main\",\"type\":\"record\",\"fields\":[{\"name\":\"f1\",\"type\":{\"name\":\"Sub\",\"type\":\"enum\",\"symbols\":[\"OPEN\",\"CLOSE\"]}},{\"name\":\"f2\",\"type\":\"Sub\"}]}", + parsingForm); + } + + @Test + void namespaceAndNameRules() { + Protocol p1 = new Protocol("P", null, "foo"); + Protocol p2 = new Protocol("foo.P", null, null); + Protocol p3 = new Protocol("foo.P", null, "bar"); + assertEquals(p1.getName(), p2.getName()); + assertEquals(p1.getNamespace(), p2.getNamespace()); + assertEquals(p1.getName(), p3.getName()); + assertEquals(p1.getNamespace(), p3.getNamespace()); + + // The following situation is allowed, even if confusing, because the + // specification describes this algorithm without specifying that the resulting + // namespace mst be non-empty. + Protocol invalidName = new Protocol(".P", null, "ignored"); + assertNull(invalidName.getNamespace()); + assertEquals("P", invalidName.getName()); + } + + @Test + void propEquals() { Protocol p1 = new Protocol("P", null, "foo"); p1.addProp("a", "1"); Protocol p2 = new Protocol("P", null, "foo"); p2.addProp("a", "2"); - assertFalse(p1.equals(p2)); + assertNotEquals(p1, p2); } @Test - public void testSplitProtocolBuild() { + void splitProtocolBuild() { Protocol p = new Protocol("P", null, "foo"); p.addProp("property", "some value"); @@ -48,4 +140,17 @@ public void testSplitProtocolBuild() { assertNotNull(parsedArrayOfStringProtocol); assertEquals(parsedStringProtocol.toString(), parsedArrayOfStringProtocol.toString()); } + + @Test + void copyMessage() { + Protocol p = new Protocol("P", "protocol", "foo"); + Schema req1 = SchemaBuilder.record("foo.req1").fields().endRecord(); + Protocol.Message m1 = p.createMessage("M", "message", singletonMap("foo", "bar"), req1); + Schema req2 = SchemaBuilder.record("foo.req2").fields().name("test").type().booleanType().noDefault().endRecord(); + + Protocol.Message m2 = p.createMessage(m1, req2); + assertEquals(m1.getName(), m2.getName()); + assertEquals(m1.getDoc(), m2.getDoc()); + assertEquals(m1.getProp("foo"), m2.getProp("foo")); + } } diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestReadingWritingDataInEvolvedSchemas.java b/lang/java/avro/src/test/java/org/apache/avro/TestReadingWritingDataInEvolvedSchemas.java index 47cafcec189..89fedc75ca7 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestReadingWritingDataInEvolvedSchemas.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestReadingWritingDataInEvolvedSchemas.java @@ -27,6 +27,8 @@ import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.Collection; +import java.util.stream.Stream; + import org.apache.avro.generic.GenericData; import org.apache.avro.generic.GenericData.EnumSymbol; import org.apache.avro.generic.GenericData.Record; @@ -38,24 +40,17 @@ import org.apache.avro.io.DecoderFactory; import org.apache.avro.io.Encoder; import org.apache.avro.io.EncoderFactory; -import org.junit.Assert; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.ExpectedException; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; -import org.junit.runners.Parameterized.Parameters; - -@RunWith(Parameterized.class) + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.EnumSource; + public class TestReadingWritingDataInEvolvedSchemas { private static final String RECORD_A = "RecordA"; private static final String FIELD_A = "fieldA"; private static final char LATIN_SMALL_LETTER_O_WITH_DIARESIS = '\u00F6'; - @Rule - public ExpectedException expectedException = ExpectedException.none(); - private static final Schema DOUBLE_RECORD = SchemaBuilder.record(RECORD_A) // .fields() // .name(FIELD_A).type().doubleType().noDefault() // @@ -89,13 +84,18 @@ public class TestReadingWritingDataInEvolvedSchemas { .fields() // .name(FIELD_A).type().unionOf().stringType().and().bytesType().endUnion().noDefault() // .endRecord(); + + private static final Schema ENUM_AB = SchemaBuilder.enumeration("Enum1").symbols("A", "B"); + private static final Schema ENUM_AB_RECORD = SchemaBuilder.record(RECORD_A) // .fields() // - .name(FIELD_A).type().enumeration("Enum1").symbols("A", "B").noDefault() // + .name(FIELD_A).type(ENUM_AB).noDefault() // .endRecord(); + + private static final Schema ENUM_ABC = SchemaBuilder.enumeration("Enum1").symbols("A", "B", "C"); private static final Schema ENUM_ABC_RECORD = SchemaBuilder.record(RECORD_A) // .fields() // - .name(FIELD_A).type().enumeration("Enum1").symbols("A", "B", "C").noDefault() // + .name(FIELD_A).type(ENUM_ABC).noDefault() // .endRecord(); private static final Schema UNION_INT_RECORD = SchemaBuilder.record(RECORD_A) // .fields() // @@ -122,221 +122,235 @@ public class TestReadingWritingDataInEvolvedSchemas { .name(FIELD_A).type().unionOf().floatType().and().doubleType().endUnion().noDefault() // .endRecord(); - @Parameters(name = "encoder = {0}") - public static Collection data() { - return Arrays.asList(new EncoderType[][] { { EncoderType.BINARY }, { EncoderType.JSON } }); - } - - public TestReadingWritingDataInEvolvedSchemas(EncoderType encoderType) { - this.encoderType = encoderType; - } - - private final EncoderType encoderType; - enum EncoderType { BINARY, JSON } - @Test - public void doubleWrittenWithUnionSchemaIsConvertedToDoubleSchema() throws Exception { + @ParameterizedTest + @EnumSource(EncoderType.class) + void doubleWrittenWithUnionSchemaIsConvertedToDoubleSchema(EncoderType encoderType) throws Exception { Schema writer = UNION_INT_LONG_FLOAT_DOUBLE_RECORD; Record record = defaultRecordWithSchema(writer, FIELD_A, 42.0); - byte[] encoded = encodeGenericBlob(record); - Record decoded = decodeGenericBlob(DOUBLE_RECORD, writer, encoded); + byte[] encoded = encodeGenericBlob(record, encoderType); + Record decoded = decodeGenericBlob(DOUBLE_RECORD, writer, encoded, encoderType); assertEquals(42.0, decoded.get(FIELD_A)); } - @Test - public void longWrittenWithUnionSchemaIsConvertedToUnionLongFloatSchema() throws Exception { + @ParameterizedTest + @EnumSource(EncoderType.class) + void longWrittenWithUnionSchemaIsConvertedToUnionLongFloatSchema(EncoderType encoderType) throws Exception { Schema writer = UNION_LONG_RECORD; Record record = defaultRecordWithSchema(writer, FIELD_A, 42L); - byte[] encoded = encodeGenericBlob(record); - Record decoded = decodeGenericBlob(UNION_LONG_FLOAT_RECORD, writer, encoded); + byte[] encoded = encodeGenericBlob(record, encoderType); + Record decoded = decodeGenericBlob(UNION_LONG_FLOAT_RECORD, writer, encoded, encoderType); assertEquals(42L, decoded.get(FIELD_A)); } - @Test - public void longWrittenWithUnionSchemaIsConvertedToDoubleSchema() throws Exception { + @ParameterizedTest + @EnumSource(EncoderType.class) + void longWrittenWithUnionSchemaIsConvertedToDoubleSchema(EncoderType encoderType) throws Exception { Schema writer = UNION_LONG_RECORD; Record record = defaultRecordWithSchema(writer, FIELD_A, 42L); - byte[] encoded = encodeGenericBlob(record); - Record decoded = decodeGenericBlob(UNION_DOUBLE_RECORD, writer, encoded); + byte[] encoded = encodeGenericBlob(record, encoderType); + Record decoded = decodeGenericBlob(UNION_DOUBLE_RECORD, writer, encoded, encoderType); assertEquals(42.0, decoded.get(FIELD_A)); } - @Test - public void intWrittenWithUnionSchemaIsConvertedToDoubleSchema() throws Exception { + @ParameterizedTest + @EnumSource(EncoderType.class) + void intWrittenWithUnionSchemaIsConvertedToDoubleSchema(EncoderType encoderType) throws Exception { Schema writer = UNION_INT_RECORD; Record record = defaultRecordWithSchema(writer, FIELD_A, 42); - byte[] encoded = encodeGenericBlob(record); - Record decoded = decodeGenericBlob(UNION_DOUBLE_RECORD, writer, encoded); + byte[] encoded = encodeGenericBlob(record, encoderType); + Record decoded = decodeGenericBlob(UNION_DOUBLE_RECORD, writer, encoded, encoderType); assertEquals(42.0, decoded.get(FIELD_A)); } - @Test - public void intWrittenWithUnionSchemaIsReadableByFloatSchema() throws Exception { + @ParameterizedTest + @EnumSource(EncoderType.class) + void intWrittenWithUnionSchemaIsReadableByFloatSchema(EncoderType encoderType) throws Exception { Schema writer = UNION_INT_RECORD; Record record = defaultRecordWithSchema(writer, FIELD_A, 42); - byte[] encoded = encodeGenericBlob(record); - Record decoded = decodeGenericBlob(FLOAT_RECORD, writer, encoded); + byte[] encoded = encodeGenericBlob(record, encoderType); + Record decoded = decodeGenericBlob(FLOAT_RECORD, writer, encoded, encoderType); assertEquals(42.0f, decoded.get(FIELD_A)); } - @Test - public void intWrittenWithUnionSchemaIsReadableByFloatUnionSchema() throws Exception { + @ParameterizedTest + @EnumSource(EncoderType.class) + void intWrittenWithUnionSchemaIsReadableByFloatUnionSchema(EncoderType encoderType) throws Exception { Schema writer = UNION_INT_RECORD; Record record = defaultRecordWithSchema(writer, FIELD_A, 42); - byte[] encoded = encodeGenericBlob(record); - Record decoded = decodeGenericBlob(UNION_FLOAT_RECORD, writer, encoded); + byte[] encoded = encodeGenericBlob(record, encoderType); + Record decoded = decodeGenericBlob(UNION_FLOAT_RECORD, writer, encoded, encoderType); assertEquals(42.0f, decoded.get(FIELD_A)); } - @Test - public void longWrittenWithUnionSchemaIsReadableByFloatSchema() throws Exception { + @ParameterizedTest + @EnumSource(EncoderType.class) + void longWrittenWithUnionSchemaIsReadableByFloatSchema(EncoderType encoderType) throws Exception { Schema writer = UNION_LONG_RECORD; Record record = defaultRecordWithSchema(writer, FIELD_A, 42L); - byte[] encoded = encodeGenericBlob(record); - Record decoded = decodeGenericBlob(FLOAT_RECORD, writer, encoded); + byte[] encoded = encodeGenericBlob(record, encoderType); + Record decoded = decodeGenericBlob(FLOAT_RECORD, writer, encoded, encoderType); assertEquals(42.0f, decoded.get(FIELD_A)); } - @Test - public void longWrittenWithUnionSchemaIsReadableByFloatUnionSchema() throws Exception { + @ParameterizedTest + @EnumSource(EncoderType.class) + void longWrittenWithUnionSchemaIsReadableByFloatUnionSchema(EncoderType encoderType) throws Exception { Schema writer = UNION_LONG_RECORD; Record record = defaultRecordWithSchema(writer, FIELD_A, 42L); - byte[] encoded = encodeGenericBlob(record); - Record decoded = decodeGenericBlob(UNION_FLOAT_RECORD, writer, encoded); + byte[] encoded = encodeGenericBlob(record, encoderType); + Record decoded = decodeGenericBlob(UNION_FLOAT_RECORD, writer, encoded, encoderType); assertEquals(42.0f, decoded.get(FIELD_A)); } - @Test - public void longWrittenWithUnionSchemaIsConvertedToLongFloatUnionSchema() throws Exception { + @ParameterizedTest + @EnumSource(EncoderType.class) + void longWrittenWithUnionSchemaIsConvertedToLongFloatUnionSchema(EncoderType encoderType) throws Exception { Schema writer = UNION_LONG_RECORD; Record record = defaultRecordWithSchema(writer, FIELD_A, 42L); - byte[] encoded = encodeGenericBlob(record); - Record decoded = decodeGenericBlob(UNION_LONG_FLOAT_RECORD, writer, encoded); + byte[] encoded = encodeGenericBlob(record, encoderType); + Record decoded = decodeGenericBlob(UNION_LONG_FLOAT_RECORD, writer, encoded, encoderType); assertEquals(42L, decoded.get(FIELD_A)); } - @Test - public void longWrittenWithUnionSchemaIsConvertedToFloatDoubleUnionSchema() throws Exception { + @ParameterizedTest + @EnumSource(EncoderType.class) + void longWrittenWithUnionSchemaIsConvertedToFloatDoubleUnionSchema(EncoderType encoderType) throws Exception { Schema writer = UNION_LONG_RECORD; Record record = defaultRecordWithSchema(writer, FIELD_A, 42L); - byte[] encoded = encodeGenericBlob(record); - Record decoded = decodeGenericBlob(UNION_FLOAT_DOUBLE_RECORD, writer, encoded); + byte[] encoded = encodeGenericBlob(record, encoderType); + Record decoded = decodeGenericBlob(UNION_FLOAT_DOUBLE_RECORD, writer, encoded, encoderType); assertEquals(42.0F, decoded.get(FIELD_A)); } - @Test - public void doubleWrittenWithUnionSchemaIsNotConvertedToFloatSchema() throws Exception { - expectedException.expect(AvroTypeException.class); - expectedException.expectMessage("Found double, expecting float"); + @ParameterizedTest + @EnumSource(EncoderType.class) + void doubleWrittenWithUnionSchemaIsNotConvertedToFloatSchema(EncoderType encoderType) throws Exception { Schema writer = UNION_INT_LONG_FLOAT_DOUBLE_RECORD; Record record = defaultRecordWithSchema(writer, FIELD_A, 42.0); - byte[] encoded = encodeGenericBlob(record); - decodeGenericBlob(FLOAT_RECORD, writer, encoded); + byte[] encoded = encodeGenericBlob(record, encoderType); + AvroTypeException exception = Assertions.assertThrows(AvroTypeException.class, + () -> decodeGenericBlob(FLOAT_RECORD, writer, encoded, encoderType)); + Assertions.assertEquals("Found double, expecting float", exception.getMessage()); } - @Test - public void floatWrittenWithUnionSchemaIsNotConvertedToLongSchema() throws Exception { - expectedException.expect(AvroTypeException.class); - expectedException.expectMessage("Found float, expecting long"); + @ParameterizedTest + @EnumSource(EncoderType.class) + void floatWrittenWithUnionSchemaIsNotConvertedToLongSchema(EncoderType encoderType) throws Exception { Schema writer = UNION_INT_LONG_FLOAT_DOUBLE_RECORD; Record record = defaultRecordWithSchema(writer, FIELD_A, 42.0f); - byte[] encoded = encodeGenericBlob(record); - decodeGenericBlob(LONG_RECORD, writer, encoded); + byte[] encoded = encodeGenericBlob(record, encoderType); + AvroTypeException exception = Assertions.assertThrows(AvroTypeException.class, + () -> decodeGenericBlob(LONG_RECORD, writer, encoded, encoderType)); + Assertions.assertEquals("Found float, expecting long", exception.getMessage()); } - @Test - public void longWrittenWithUnionSchemaIsNotConvertedToIntSchema() throws Exception { - expectedException.expect(AvroTypeException.class); - expectedException.expectMessage("Found long, expecting int"); + @ParameterizedTest + @EnumSource(EncoderType.class) + void longWrittenWithUnionSchemaIsNotConvertedToIntSchema(EncoderType encoderType) throws Exception { Schema writer = UNION_INT_LONG_FLOAT_DOUBLE_RECORD; Record record = defaultRecordWithSchema(writer, FIELD_A, 42L); - byte[] encoded = encodeGenericBlob(record); - decodeGenericBlob(INT_RECORD, writer, encoded); + byte[] encoded = encodeGenericBlob(record, encoderType); + AvroTypeException exception = Assertions.assertThrows(AvroTypeException.class, + () -> decodeGenericBlob(INT_RECORD, writer, encoded, encoderType)); + Assertions.assertEquals("Found long, expecting int", exception.getMessage()); } - @Test - public void intWrittenWithUnionSchemaIsConvertedToAllNumberSchemas() throws Exception { + @ParameterizedTest + @EnumSource(EncoderType.class) + void intWrittenWithUnionSchemaIsConvertedToAllNumberSchemas(EncoderType encoderType) throws Exception { Schema writer = UNION_INT_LONG_FLOAT_DOUBLE_RECORD; Record record = defaultRecordWithSchema(writer, FIELD_A, 42); - byte[] encoded = encodeGenericBlob(record); - assertEquals(42.0, decodeGenericBlob(DOUBLE_RECORD, writer, encoded).get(FIELD_A)); - assertEquals(42.0f, decodeGenericBlob(FLOAT_RECORD, writer, encoded).get(FIELD_A)); - assertEquals(42L, decodeGenericBlob(LONG_RECORD, writer, encoded).get(FIELD_A)); - assertEquals(42, decodeGenericBlob(INT_RECORD, writer, encoded).get(FIELD_A)); + byte[] encoded = encodeGenericBlob(record, encoderType); + assertEquals(42.0, decodeGenericBlob(DOUBLE_RECORD, writer, encoded, encoderType).get(FIELD_A)); + assertEquals(42.0f, decodeGenericBlob(FLOAT_RECORD, writer, encoded, encoderType).get(FIELD_A)); + assertEquals(42L, decodeGenericBlob(LONG_RECORD, writer, encoded, encoderType).get(FIELD_A)); + assertEquals(42, decodeGenericBlob(INT_RECORD, writer, encoded, encoderType).get(FIELD_A)); } - @Test - public void asciiStringWrittenWithUnionSchemaIsConvertedToBytesSchema() throws Exception { + @ParameterizedTest + @EnumSource(EncoderType.class) + void asciiStringWrittenWithUnionSchemaIsConvertedToBytesSchema(EncoderType encoderType) throws Exception { Schema writer = UNION_STRING_BYTES_RECORD; Record record = defaultRecordWithSchema(writer, FIELD_A, "42"); - byte[] encoded = encodeGenericBlob(record); - ByteBuffer actual = (ByteBuffer) decodeGenericBlob(BYTES_RECORD, writer, encoded).get(FIELD_A); + byte[] encoded = encodeGenericBlob(record, encoderType); + ByteBuffer actual = (ByteBuffer) decodeGenericBlob(BYTES_RECORD, writer, encoded, encoderType).get(FIELD_A); assertArrayEquals("42".getBytes(StandardCharsets.UTF_8), actual.array()); } - @Test - public void utf8StringWrittenWithUnionSchemaIsConvertedToBytesSchema() throws Exception { + @ParameterizedTest + @EnumSource(EncoderType.class) + void utf8StringWrittenWithUnionSchemaIsConvertedToBytesSchema(EncoderType encoderType) throws Exception { String goeran = String.format("G%sran", LATIN_SMALL_LETTER_O_WITH_DIARESIS); Schema writer = UNION_STRING_BYTES_RECORD; Record record = defaultRecordWithSchema(writer, FIELD_A, goeran); - byte[] encoded = encodeGenericBlob(record); - ByteBuffer actual = (ByteBuffer) decodeGenericBlob(BYTES_RECORD, writer, encoded).get(FIELD_A); + byte[] encoded = encodeGenericBlob(record, encoderType); + ByteBuffer actual = (ByteBuffer) decodeGenericBlob(BYTES_RECORD, writer, encoded, encoderType).get(FIELD_A); assertArrayEquals(goeran.getBytes(StandardCharsets.UTF_8), actual.array()); } - @Test - public void asciiBytesWrittenWithUnionSchemaIsConvertedToStringSchema() throws Exception { + @ParameterizedTest + @EnumSource(EncoderType.class) + void asciiBytesWrittenWithUnionSchemaIsConvertedToStringSchema(EncoderType encoderType) throws Exception { Schema writer = UNION_STRING_BYTES_RECORD; ByteBuffer buf = ByteBuffer.wrap("42".getBytes(StandardCharsets.UTF_8)); Record record = defaultRecordWithSchema(writer, FIELD_A, buf); - byte[] encoded = encodeGenericBlob(record); - CharSequence read = (CharSequence) decodeGenericBlob(STRING_RECORD, writer, encoded).get(FIELD_A); + byte[] encoded = encodeGenericBlob(record, encoderType); + CharSequence read = (CharSequence) decodeGenericBlob(STRING_RECORD, writer, encoded, encoderType).get(FIELD_A); assertEquals("42", read.toString()); } - @Test - public void utf8BytesWrittenWithUnionSchemaIsConvertedToStringSchema() throws Exception { + @ParameterizedTest + @EnumSource(EncoderType.class) + void utf8BytesWrittenWithUnionSchemaIsConvertedToStringSchema(EncoderType encoderType) throws Exception { String goeran = String.format("G%sran", LATIN_SMALL_LETTER_O_WITH_DIARESIS); Schema writer = UNION_STRING_BYTES_RECORD; Record record = defaultRecordWithSchema(writer, FIELD_A, goeran); - byte[] encoded = encodeGenericBlob(record); - CharSequence read = (CharSequence) decodeGenericBlob(STRING_RECORD, writer, encoded).get(FIELD_A); + byte[] encoded = encodeGenericBlob(record, encoderType); + CharSequence read = (CharSequence) decodeGenericBlob(STRING_RECORD, writer, encoded, encoderType).get(FIELD_A); assertEquals(goeran, read.toString()); } - @Test - public void enumRecordCanBeReadWithExtendedEnumSchema() throws Exception { + @ParameterizedTest + @EnumSource(EncoderType.class) + void enumRecordCanBeReadWithExtendedEnumSchema(EncoderType encoderType) throws Exception { Schema writer = ENUM_AB_RECORD; - Record record = defaultRecordWithSchema(writer, FIELD_A, new EnumSymbol(writer, "A")); - byte[] encoded = encodeGenericBlob(record); - Record decoded = decodeGenericBlob(ENUM_ABC_RECORD, writer, encoded); + Record record = defaultRecordWithSchema(writer, FIELD_A, new EnumSymbol(ENUM_AB, "A")); + byte[] encoded = encodeGenericBlob(record, encoderType); + Record decoded = decodeGenericBlob(ENUM_ABC_RECORD, writer, encoded, encoderType); assertEquals("A", decoded.get(FIELD_A).toString()); } - @Test - public void enumRecordWithExtendedSchemaCanBeReadWithOriginalEnumSchemaIfOnlyOldValues() throws Exception { + @ParameterizedTest + @EnumSource(EncoderType.class) + void enumRecordWithExtendedSchemaCanBeReadWithOriginalEnumSchemaIfOnlyOldValues(EncoderType encoderType) + throws Exception { Schema writer = ENUM_ABC_RECORD; - Record record = defaultRecordWithSchema(writer, FIELD_A, new EnumSymbol(writer, "A")); - byte[] encoded = encodeGenericBlob(record); - Record decoded = decodeGenericBlob(ENUM_AB_RECORD, writer, encoded); + Record record = defaultRecordWithSchema(writer, FIELD_A, new EnumSymbol(ENUM_ABC, "A")); + byte[] encoded = encodeGenericBlob(record, encoderType); + Record decoded = decodeGenericBlob(ENUM_AB_RECORD, writer, encoded, encoderType); assertEquals("A", decoded.get(FIELD_A).toString()); } - @Test - public void enumRecordWithExtendedSchemaCanNotBeReadIfNewValuesAreUsed() throws Exception { - expectedException.expect(AvroTypeException.class); - expectedException.expectMessage("No match for C"); + @ParameterizedTest + @EnumSource(EncoderType.class) + void enumRecordWithExtendedSchemaCanNotBeReadIfNewValuesAreUsed(EncoderType encoderType) throws Exception { Schema writer = ENUM_ABC_RECORD; - Record record = defaultRecordWithSchema(writer, FIELD_A, new EnumSymbol(writer, "C")); - byte[] encoded = encodeGenericBlob(record); - decodeGenericBlob(ENUM_AB_RECORD, writer, encoded); + Record record = defaultRecordWithSchema(writer, FIELD_A, new EnumSymbol(ENUM_ABC, "C")); + byte[] encoded = encodeGenericBlob(record, encoderType); + + AvroTypeException exception = Assertions.assertThrows(AvroTypeException.class, + () -> decodeGenericBlob(ENUM_AB_RECORD, writer, encoded, encoderType)); + Assertions.assertEquals("No match for C", exception.getMessage()); } - @Test - public void recordWrittenWithExtendedSchemaCanBeReadWithOriginalSchemaButLossOfData() throws Exception { + @ParameterizedTest + @EnumSource(EncoderType.class) + void recordWrittenWithExtendedSchemaCanBeReadWithOriginalSchemaButLossOfData(EncoderType encoderType) + throws Exception { Schema writer = SchemaBuilder.record(RECORD_A) // .fields() // .name("newTopField").type().stringType().noDefault() // @@ -344,47 +358,50 @@ public void recordWrittenWithExtendedSchemaCanBeReadWithOriginalSchemaButLossOfD .endRecord(); Record record = defaultRecordWithSchema(writer, FIELD_A, 42); record.put("newTopField", "not decoded"); - byte[] encoded = encodeGenericBlob(record); - Record decoded = decodeGenericBlob(INT_RECORD, writer, encoded); + byte[] encoded = encodeGenericBlob(record, encoderType); + Record decoded = decodeGenericBlob(INT_RECORD, writer, encoded, encoderType); assertEquals(42, decoded.get(FIELD_A)); try { decoded.get("newTopField"); - Assert.fail("get should throw a exception"); + Assertions.fail("get should throw a exception"); } catch (AvroRuntimeException ex) { - Assert.assertEquals("Not a valid schema field: newTopField", ex.getMessage()); + Assertions.assertEquals("Not a valid schema field: newTopField", ex.getMessage()); } } - @Test - public void readerWithoutDefaultValueThrowsException() throws Exception { - expectedException.expect(AvroTypeException.class); - expectedException.expectMessage("missing required field newField"); + @ParameterizedTest + @EnumSource(EncoderType.class) + void readerWithoutDefaultValueThrowsException(EncoderType encoderType) throws Exception { Schema reader = SchemaBuilder.record(RECORD_A) // .fields() // .name("newField").type().intType().noDefault() // .name(FIELD_A).type().intType().noDefault() // .endRecord(); Record record = defaultRecordWithSchema(INT_RECORD, FIELD_A, 42); - byte[] encoded = encodeGenericBlob(record); - decodeGenericBlob(reader, INT_RECORD, encoded); + byte[] encoded = encodeGenericBlob(record, encoderType); + AvroTypeException exception = Assertions.assertThrows(AvroTypeException.class, + () -> decodeGenericBlob(reader, INT_RECORD, encoded, encoderType)); + Assertions.assertTrue(exception.getMessage().contains("missing required field newField"), exception.getMessage()); } - @Test - public void readerWithDefaultValueIsApplied() throws Exception { + @ParameterizedTest + @EnumSource(EncoderType.class) + void readerWithDefaultValueIsApplied(EncoderType encoderType) throws Exception { Schema reader = SchemaBuilder.record(RECORD_A) // .fields() // .name("newFieldWithDefault").type().intType().intDefault(314) // .name(FIELD_A).type().intType().noDefault() // .endRecord(); Record record = defaultRecordWithSchema(INT_RECORD, FIELD_A, 42); - byte[] encoded = encodeGenericBlob(record); - Record decoded = decodeGenericBlob(reader, INT_RECORD, encoded); + byte[] encoded = encodeGenericBlob(record, encoderType); + Record decoded = decodeGenericBlob(reader, INT_RECORD, encoded, encoderType); assertEquals(42, decoded.get(FIELD_A)); assertEquals(314, decoded.get("newFieldWithDefault")); } - @Test - public void aliasesInSchema() throws Exception { + @ParameterizedTest + @EnumSource(EncoderType.class) + void aliasesInSchema(EncoderType encoderType) throws Exception { Schema writer = new Schema.Parser() .parse("{\"namespace\": \"example.avro\", \"type\": \"record\", \"name\": \"User\", \"fields\": [" + "{\"name\": \"name\", \"type\": \"int\"}\n" + "]}\n"); @@ -393,8 +410,8 @@ public void aliasesInSchema() throws Exception { + "{\"name\": \"fname\", \"type\": \"int\", \"aliases\" : [ \"name\" ]}\n" + "]}\n"); GenericData.Record record = defaultRecordWithSchema(writer, "name", 1); - byte[] encoded = encodeGenericBlob(record); - GenericData.Record decoded = decodeGenericBlob(reader, reader, encoded); + byte[] encoded = encodeGenericBlob(record, encoderType); + GenericData.Record decoded = decodeGenericBlob(reader, reader, encoded, encoderType); assertEquals(1, decoded.get("fname")); } @@ -405,7 +422,7 @@ private Record defaultRecordWithSchema(Schema schema, String key, T value) { return data; } - private byte[] encodeGenericBlob(GenericRecord data) throws IOException { + private byte[] encodeGenericBlob(GenericRecord data, EncoderType encoderType) throws IOException { DatumWriter writer = new GenericDatumWriter<>(data.getSchema()); ByteArrayOutputStream outStream = new ByteArrayOutputStream(); Encoder encoder = encoderType == EncoderType.BINARY ? EncoderFactory.get().binaryEncoder(outStream, null) @@ -416,7 +433,8 @@ private byte[] encodeGenericBlob(GenericRecord data) throws IOException { return outStream.toByteArray(); } - private Record decodeGenericBlob(Schema expectedSchema, Schema schemaOfBlob, byte[] blob) throws IOException { + private Record decodeGenericBlob(Schema expectedSchema, Schema schemaOfBlob, byte[] blob, EncoderType encoderType) + throws IOException { if (blob == null) { return null; } diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestResolver.java b/lang/java/avro/src/test/java/org/apache/avro/TestResolver.java new file mode 100644 index 00000000000..1d3919319af --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/TestResolver.java @@ -0,0 +1,122 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro; + +import java.io.IOException; +import java.util.Arrays; + +import org.apache.avro.data.TimeConversions; +import org.apache.avro.generic.IndexedRecord; +import org.apache.avro.io.DatumReader; +import org.apache.avro.io.DecoderFactory; +import org.apache.avro.io.FastReaderBuilder; +import org.apache.avro.io.JsonDecoder; +import org.hamcrest.MatcherAssert; +import org.hamcrest.Matchers; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +class TestResolver { + + /** + * Test promote action INT -> LONG, with logical type for LONG. + */ + @Test + void resolveTime() { + final Schema writeSchema = Schema.create(Schema.Type.INT); + final Schema readSchema = new TimeConversions.TimeMicrosConversion().getRecommendedSchema(); // LONG + + Resolver.Action action = Resolver.resolve(writeSchema, readSchema); + Assertions.assertNotNull(action); + MatcherAssert.assertThat("Wrong class for action", action, Matchers.instanceOf(Resolver.Promote.class)); + Assertions.assertEquals(action.type, Resolver.Action.Type.PROMOTE); + Assertions.assertNotNull(action.logicalType); + } + + /** + * Test union type with promote action INT -> LONG, with logical type for LONG. + */ + @Test + void resolveUnion() { + final Schema schema = new TimeConversions.TimeMicrosConversion().getRecommendedSchema(); + + final Schema writeSchema = Schema.createUnion(Schema.create(Schema.Type.INT)); + final Schema readSchema = Schema.createUnion(schema); + + Resolver.Action action = Resolver.resolve(writeSchema, readSchema); + Assertions.assertNotNull(action); + Assertions.assertEquals(action.type, Resolver.Action.Type.WRITER_UNION); + MatcherAssert.assertThat("Wrong class for action", action, Matchers.instanceOf(Resolver.WriterUnion.class)); + + Assertions.assertEquals(1, ((Resolver.WriterUnion) action).actions.length); + Resolver.Action innerAction = ((Resolver.WriterUnion) action).actions[0]; + + MatcherAssert.assertThat("Wrong class for action", innerAction, Matchers.instanceOf(Resolver.ReaderUnion.class)); + Resolver.ReaderUnion innerUnionAction = (Resolver.ReaderUnion) innerAction; + Resolver.Action promoteAction = innerUnionAction.actualAction; + Assertions.assertEquals(promoteAction.type, Resolver.Action.Type.PROMOTE); + Assertions.assertNotNull(promoteAction.logicalType); + } + + @Test + void resolveEnum() throws IOException { + final Schema writeSchema = Schema.createEnum("myEnum", "", "n1", Arrays.asList("e1", "e3", "e4")); + final Schema readSchema = Schema.createEnum("myEnum", "", "n1", Arrays.asList("e1", "e2", "e3"), "e2"); + + Resolver.Action action = Resolver.resolve(writeSchema, readSchema); + Assertions.assertNotNull(action); + Assertions.assertEquals(action.type, Resolver.Action.Type.ENUM); + MatcherAssert.assertThat("Wrong class for action", action, Matchers.instanceOf(Resolver.EnumAdjust.class)); + Resolver.EnumAdjust adjust = (Resolver.EnumAdjust) action; + + Assertions.assertArrayEquals(new int[] { 0, 2, 1 }, adjust.adjustments); + Assertions.assertEquals("e1", adjust.values[0].toString()); + Assertions.assertEquals("e3", adjust.values[1].toString()); + Assertions.assertEquals("e2", adjust.values[2].toString()); + + FastReaderBuilder reader = FastReaderBuilder.get(); + Schema writeRecord = Schema.createRecord("rec1", "", "", false, + Arrays.asList(new Schema.Field("f1", writeSchema, ""))); + Schema readRecord = Schema.createRecord("rec1", "", "", false, + Arrays.asList(new Schema.Field("f1", readSchema, ""))); + DatumReader datumReader = reader.createDatumReader(writeRecord, readRecord); + JsonDecoder e2 = DecoderFactory.get().jsonDecoder(readRecord, "{ \"f1\" : \"e2\" }"); + Object read = datumReader.read(null, e2); + Assertions.assertNotNull(read); + MatcherAssert.assertThat("", read, Matchers.instanceOf(IndexedRecord.class)); + IndexedRecord result = (IndexedRecord) read; + Assertions.assertEquals("e3", result.get(0).toString()); + } + + @Test + void promoteIsValid() { + Assertions.assertThrows(IllegalArgumentException.class, + () -> Resolver.Promote.isValid(Schema.create(Schema.Type.INT), Schema.create(Schema.Type.INT))); + + Assertions.assertTrue(Resolver.Promote.isValid(Schema.create(Schema.Type.INT), Schema.create(Schema.Type.LONG))); + Assertions.assertFalse(Resolver.Promote.isValid(Schema.create(Schema.Type.LONG), Schema.create(Schema.Type.INT))); + + Assertions.assertTrue(Resolver.Promote.isValid(Schema.create(Schema.Type.INT), Schema.create(Schema.Type.FLOAT))); + Assertions.assertFalse(Resolver.Promote.isValid(Schema.create(Schema.Type.FLOAT), Schema.create(Schema.Type.INT))); + + Assertions + .assertTrue(Resolver.Promote.isValid(Schema.create(Schema.Type.FLOAT), Schema.create(Schema.Type.DOUBLE))); + Assertions + .assertFalse(Resolver.Promote.isValid(Schema.create(Schema.Type.DOUBLE), Schema.create(Schema.Type.FLOAT))); + } +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestSchema.java b/lang/java/avro/src/test/java/org/apache/avro/TestSchema.java index 4b2a78bc8cb..9a3b14ee754 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestSchema.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestSchema.java @@ -17,26 +17,56 @@ */ package org.apache.avro; -import static org.junit.Assert.*; - import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.FileWriter; import java.io.IOException; import java.io.InputStream; import java.io.ObjectInputStream; import java.io.ObjectOutputStream; +import java.net.URL; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import java.util.List; - +import java.util.Set; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.node.ArrayNode; +import com.fasterxml.jackson.databind.node.IntNode; +import com.fasterxml.jackson.databind.node.JsonNodeFactory; +import com.fasterxml.jackson.databind.node.NullNode; +import com.fasterxml.jackson.databind.node.TextNode; import org.apache.avro.Schema.Field; import org.apache.avro.Schema.Type; import org.apache.avro.generic.GenericData; -import org.junit.Test; +import org.apache.avro.generic.GenericData.EnumSymbol; +import org.apache.avro.generic.GenericData.Record; +import org.apache.avro.generic.GenericDatumWriter; +import org.apache.avro.generic.GenericRecord; +import org.apache.avro.generic.GenericRecordBuilder; +import org.apache.avro.io.Encoder; +import org.apache.avro.io.EncoderFactory; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import static java.util.Objects.requireNonNull; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertSame; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.fail; public class TestSchema { @Test - public void testSplitSchemaBuild() { + void splitSchemaBuild() { Schema s = SchemaBuilder.record("HandshakeRequest").namespace("org.apache.avro.ipc").fields().name("clientProtocol") .type().optional().stringType().name("meta").type().optional().map().values().bytesType().endRecord(); @@ -52,7 +82,7 @@ public void testSplitSchemaBuild() { } @Test - public void testDefaultRecordWithDuplicateFieldName() { + void defaultRecordWithDuplicateFieldName() { String recordName = "name"; Schema schema = Schema.createRecord(recordName, "doc", "namespace", false); List fields = new ArrayList<>(); @@ -67,7 +97,7 @@ public void testDefaultRecordWithDuplicateFieldName() { } @Test - public void testCreateUnionVarargs() { + void createUnionVarargs() { List types = new ArrayList<>(); types.add(Schema.create(Type.NULL)); types.add(Schema.create(Type.LONG)); @@ -78,33 +108,35 @@ public void testCreateUnionVarargs() { } @Test - public void testRecordWithNullDoc() { + void recordWithNullDoc() { Schema schema = Schema.createRecord("name", null, "namespace", false); String schemaString = schema.toString(); assertNotNull(schemaString); } @Test - public void testRecordWithNullNamespace() { + void recordWithNullNamespace() { Schema schema = Schema.createRecord("name", "doc", null, false); String schemaString = schema.toString(); assertNotNull(schemaString); } @Test - public void testEmptyRecordSchema() { + void emptyRecordSchema() { Schema schema = createDefaultRecord(); String schemaString = schema.toString(); assertNotNull(schemaString); } - @Test(expected = SchemaParseException.class) - public void testParseEmptySchema() { - new Schema.Parser().parse(""); + @Test + void parseEmptySchema() { + assertThrows(SchemaParseException.class, () -> { + new Schema.Parser().parse(""); + }); } @Test - public void testSchemaWithFields() { + void schemaWithFields() { List fields = new ArrayList<>(); fields.add(new Field("field_name1", Schema.create(Type.NULL), null, null)); fields.add(new Field("field_name2", Schema.create(Type.INT), null, null)); @@ -115,67 +147,69 @@ public void testSchemaWithFields() { assertEquals(2, schema.getFields().size()); } - @Test(expected = NullPointerException.class) - public void testSchemaWithNullFields() { - Schema.createRecord("name", "doc", "namespace", false, null); + @Test + void schemaWithNullFields() { + assertThrows(NullPointerException.class, () -> { + Schema.createRecord("name", "doc", "namespace", false, null); + }); } @Test - public void testIsUnionOnUnionWithMultipleElements() { + void isUnionOnUnionWithMultipleElements() { Schema schema = Schema.createUnion(Schema.create(Type.NULL), Schema.create(Type.LONG)); assertTrue(schema.isUnion()); } @Test - public void testIsUnionOnUnionWithOneElement() { + void isUnionOnUnionWithOneElement() { Schema schema = Schema.createUnion(Schema.create(Type.LONG)); assertTrue(schema.isUnion()); } @Test - public void testIsUnionOnRecord() { + void isUnionOnRecord() { Schema schema = createDefaultRecord(); assertFalse(schema.isUnion()); } @Test - public void testIsUnionOnArray() { + void isUnionOnArray() { Schema schema = Schema.createArray(Schema.create(Type.LONG)); assertFalse(schema.isUnion()); } @Test - public void testIsUnionOnEnum() { + void isUnionOnEnum() { Schema schema = Schema.createEnum("name", "doc", "namespace", Collections.singletonList("value")); assertFalse(schema.isUnion()); } @Test - public void testIsUnionOnFixed() { + void isUnionOnFixed() { Schema schema = Schema.createFixed("name", "doc", "space", 10); assertFalse(schema.isUnion()); } @Test - public void testIsUnionOnMap() { + void isUnionOnMap() { Schema schema = Schema.createMap(Schema.create(Type.LONG)); assertFalse(schema.isUnion()); } @Test - public void testIsNullableOnUnionWithNull() { + void isNullableOnUnionWithNull() { Schema schema = Schema.createUnion(Schema.create(Type.NULL), Schema.create(Type.LONG)); assertTrue(schema.isNullable()); } @Test - public void testIsNullableOnUnionWithoutNull() { + void isNullableOnUnionWithoutNull() { Schema schema = Schema.createUnion(Schema.create(Type.LONG)); assertFalse(schema.isNullable()); } @Test - public void testIsNullableOnRecord() { + void isNullableOnRecord() { Schema schema = createDefaultRecord(); assertFalse(schema.isNullable()); } @@ -185,7 +219,7 @@ private Schema createDefaultRecord() { } @Test - public void testSerialization() throws IOException, ClassNotFoundException { + void serialization() throws IOException, ClassNotFoundException { try (ByteArrayOutputStream bos = new ByteArrayOutputStream(); ObjectOutputStream oos = new ObjectOutputStream(bos); InputStream jsonSchema = getClass().getResourceAsStream("/SchemaBuilder.avsc")) { @@ -202,7 +236,7 @@ public void testSerialization() throws IOException, ClassNotFoundException { } @Test - public void testReconstructSchemaStringWithoutInlinedChildReference() { + void reconstructSchemaStringWithoutInlinedChildReference() { String child = "{\"type\":\"record\"," + "\"name\":\"Child\"," + "\"namespace\":\"org.apache.avro.nested\"," + "\"fields\":" + "[{\"name\":\"childField\",\"type\":\"string\"}]}"; String parent = "{\"type\":\"record\"," + "\"name\":\"Parent\"," + "\"namespace\":\"org.apache.avro.nested\"," @@ -217,7 +251,7 @@ public void testReconstructSchemaStringWithoutInlinedChildReference() { } @Test - public void testIntDefaultValue() { + void intDefaultValue() { Schema.Field field = new Schema.Field("myField", Schema.create(Schema.Type.INT), "doc", 1); assertTrue(field.hasDefaultValue()); assertEquals(1, field.defaultVal()); @@ -235,7 +269,7 @@ public void testIntDefaultValue() { } @Test - public void testValidLongAsIntDefaultValue() { + void validLongAsIntDefaultValue() { Schema.Field field = new Schema.Field("myField", Schema.create(Schema.Type.INT), "doc", 1L); assertTrue(field.hasDefaultValue()); assertEquals(1, field.defaultVal()); @@ -252,18 +286,22 @@ public void testValidLongAsIntDefaultValue() { assertEquals(Integer.MAX_VALUE, GenericData.get().getDefaultValue(field)); } - @Test(expected = AvroTypeException.class) - public void testInvalidLongAsIntDefaultValue() { - new Schema.Field("myField", Schema.create(Schema.Type.INT), "doc", Integer.MAX_VALUE + 1L); + @Test + void invalidLongAsIntDefaultValue() { + assertThrows(AvroTypeException.class, () -> { + new Schema.Field("myField", Schema.create(Schema.Type.INT), "doc", Integer.MAX_VALUE + 1L); + }); } - @Test(expected = AvroTypeException.class) - public void testDoubleAsIntDefaultValue() { - new Schema.Field("myField", Schema.create(Schema.Type.INT), "doc", 1.0); + @Test + void doubleAsIntDefaultValue() { + assertThrows(AvroTypeException.class, () -> { + new Schema.Field("myField", Schema.create(Schema.Type.INT), "doc", 1.0); + }); } @Test - public void testLongDefaultValue() { + void longDefaultValue() { Schema.Field field = new Schema.Field("myField", Schema.create(Schema.Type.LONG), "doc", 1L); assertTrue(field.hasDefaultValue()); assertEquals(1L, field.defaultVal()); @@ -281,20 +319,22 @@ public void testLongDefaultValue() { } @Test - public void testIntAsLongDefaultValue() { + void intAsLongDefaultValue() { Schema.Field field = new Schema.Field("myField", Schema.create(Schema.Type.LONG), "doc", 1); assertTrue(field.hasDefaultValue()); assertEquals(1L, field.defaultVal()); assertEquals(1L, GenericData.get().getDefaultValue(field)); } - @Test(expected = AvroTypeException.class) - public void testDoubleAsLongDefaultValue() { - new Schema.Field("myField", Schema.create(Schema.Type.LONG), "doc", 1.0); + @Test + void doubleAsLongDefaultValue() { + assertThrows(AvroTypeException.class, () -> { + new Schema.Field("myField", Schema.create(Schema.Type.LONG), "doc", 1.0); + }); } @Test - public void testDoubleDefaultValue() { + void doubleDefaultValue() { Schema.Field field = new Schema.Field("myField", Schema.create(Schema.Type.DOUBLE), "doc", 1.0); assertTrue(field.hasDefaultValue()); assertEquals(1.0d, field.defaultVal()); @@ -302,7 +342,7 @@ public void testDoubleDefaultValue() { } @Test - public void testIntAsDoubleDefaultValue() { + void intAsDoubleDefaultValue() { Schema.Field field = new Schema.Field("myField", Schema.create(Schema.Type.DOUBLE), "doc", 1); assertTrue(field.hasDefaultValue()); assertEquals(1.0d, field.defaultVal()); @@ -310,7 +350,7 @@ public void testIntAsDoubleDefaultValue() { } @Test - public void testLongAsDoubleDefaultValue() { + void longAsDoubleDefaultValue() { Schema.Field field = new Schema.Field("myField", Schema.create(Schema.Type.DOUBLE), "doc", 1L); assertTrue(field.hasDefaultValue()); assertEquals(1.0d, field.defaultVal()); @@ -318,7 +358,7 @@ public void testLongAsDoubleDefaultValue() { } @Test - public void testFloatAsDoubleDefaultValue() { + void floatAsDoubleDefaultValue() { Schema.Field field = new Schema.Field("myField", Schema.create(Schema.Type.DOUBLE), "doc", 1.0f); assertTrue(field.hasDefaultValue()); assertEquals(1.0d, field.defaultVal()); @@ -326,7 +366,7 @@ public void testFloatAsDoubleDefaultValue() { } @Test - public void testFloatDefaultValue() { + void floatDefaultValue() { Schema.Field field = new Schema.Field("myField", Schema.create(Schema.Type.FLOAT), "doc", 1.0f); assertTrue(field.hasDefaultValue()); assertEquals(1.0f, field.defaultVal()); @@ -334,7 +374,7 @@ public void testFloatDefaultValue() { } @Test - public void testIntAsFloatDefaultValue() { + void intAsFloatDefaultValue() { Schema.Field field = new Schema.Field("myField", Schema.create(Schema.Type.FLOAT), "doc", 1); assertTrue(field.hasDefaultValue()); assertEquals(1.0f, field.defaultVal()); @@ -342,7 +382,7 @@ public void testIntAsFloatDefaultValue() { } @Test - public void testLongAsFloatDefaultValue() { + void longAsFloatDefaultValue() { Schema.Field field = new Schema.Field("myField", Schema.create(Schema.Type.FLOAT), "doc", 1L); assertTrue(field.hasDefaultValue()); assertEquals(1.0f, field.defaultVal()); @@ -350,15 +390,286 @@ public void testLongAsFloatDefaultValue() { } @Test - public void testDoubleAsFloatDefaultValue() { + void doubleAsFloatDefaultValue() { Schema.Field field = new Schema.Field("myField", Schema.create(Schema.Type.FLOAT), "doc", 1.0d); assertTrue(field.hasDefaultValue()); assertEquals(1.0f, field.defaultVal()); assertEquals(1.0f, GenericData.get().getDefaultValue(field)); } - @Test(expected = SchemaParseException.class) - public void testEnumSymbolAsNull() { - Schema.createEnum("myField", "doc", "namespace", Collections.singletonList(null)); + @Test + void enumSymbolAsNull() { + assertThrows(SchemaParseException.class, () -> { + Schema.createEnum("myField", "doc", "namespace", Collections.singletonList(null)); + }); + } + + @Test + void schemaFieldWithoutSchema() { + assertThrows(NullPointerException.class, () -> { + new Schema.Field("f", null); + }); + } + + @Test + void parseRecordWithNameAsType() { + final String schemaString = "{\n \"type\" : \"record\",\n \"name\" : \"ns.int\",\n" + + " \"fields\" : [ \n {\"name\" : \"value\", \"type\" : \"int\"}, \n" + + " {\"name\" : \"next\", \"type\" : [ \"null\", \"ns.int\" ]}\n ]\n}"; + final Schema schema = new Schema.Parser().parse(schemaString); + String toString = schema.toString(true); + + final Schema schema2 = new Schema.Parser().parse(toString); + assertEquals(schema, schema2); + } + + @Test + void qualifiedName() { + Arrays.stream(Type.values()).forEach((Type t) -> { + final Schema.Name name = new Schema.Name(t.getName(), "space"); + assertEquals("space." + t.getName(), name.getQualified("space")); + assertEquals("space." + t.getName(), name.getQualified("otherdefault")); + }); + final Schema.Name name = new Schema.Name("name", "space"); + assertEquals("name", name.getQualified("space")); + assertEquals("space.name", name.getQualified("otherdefault")); + + final Schema.Name nameInt = new Schema.Name("Int", "space"); + assertEquals("Int", nameInt.getQualified("space")); + } + + @Test + void validValue() { + // Valid null value + final Schema nullSchema = Schema.create(Type.NULL); + assertTrue(nullSchema.isValidDefault(JsonNodeFactory.instance.nullNode())); + + // Valid int value + final Schema intSchema = Schema.create(Type.INT); + assertTrue(intSchema.isValidDefault(JsonNodeFactory.instance.numberNode(12))); + + // Valid Text value + final Schema strSchema = Schema.create(Type.STRING); + assertTrue(strSchema.isValidDefault(new TextNode("textNode"))); + + // Valid Array value + final Schema arraySchema = Schema.createArray(Schema.create(Type.STRING)); + final ArrayNode arrayValue = JsonNodeFactory.instance.arrayNode(); + assertTrue(arraySchema.isValidDefault(arrayValue)); // empty array + + arrayValue.add("Hello"); + arrayValue.add("World"); + assertTrue(arraySchema.isValidDefault(arrayValue)); + + arrayValue.add(5); + assertFalse(arraySchema.isValidDefault(arrayValue)); + + // Valid Union type + final Schema unionSchema = Schema.createUnion(strSchema, intSchema, nullSchema); + assertTrue(unionSchema.isValidDefault(JsonNodeFactory.instance.textNode("Hello"))); + assertTrue(unionSchema.isValidDefault(new IntNode(23))); + assertTrue(unionSchema.isValidDefault(JsonNodeFactory.instance.nullNode())); + + assertFalse(unionSchema.isValidDefault(arrayValue)); + + // Array of union + final Schema arrayUnion = Schema.createArray(unionSchema); + final ArrayNode arrayUnionValue = JsonNodeFactory.instance.arrayNode(); + arrayUnionValue.add("Hello"); + arrayUnionValue.add(NullNode.getInstance()); + assertTrue(arrayUnion.isValidDefault(arrayUnionValue)); + + // Union String, bytes + final Schema unionStrBytes = Schema.createUnion(strSchema, Schema.create(Type.BYTES)); + assertTrue(unionStrBytes.isValidDefault(JsonNodeFactory.instance.textNode("Hello"))); + assertFalse(unionStrBytes.isValidDefault(JsonNodeFactory.instance.numberNode(123))); + } + + @Test + void enumLateDefine() { + String schemaString = "{\n" + " \"type\":\"record\",\n" + " \"name\": \"Main\",\n" + " \"fields\":[\n" + + " {\n" + " \"name\":\"f1\",\n" + " \"type\":\"Sub\"\n" + " },\n" + + " {\n" + " \"name\":\"f2\",\n" + " \"type\":{\n" + + " \"type\":\"enum\",\n" + " \"name\":\"Sub\",\n" + + " \"symbols\":[\"OPEN\",\"CLOSE\"]\n" + " }\n" + " }\n" + " ]\n" + "}"; + + final Schema schema = new Schema.Parser().parse(schemaString); + Schema f1Schema = schema.getField("f1").schema(); + Schema f2Schema = schema.getField("f2").schema(); + assertSame(f1Schema, f2Schema); + assertEquals(Type.ENUM, f1Schema.getType()); + String stringSchema = schema.toString(); + int definitionIndex = stringSchema.indexOf("\"symbols\":[\"OPEN\",\"CLOSE\"]"); + int usageIndex = stringSchema.indexOf("\"type\":\"Sub\""); + assertTrue(definitionIndex < usageIndex, "usage is before definition"); + } + + @Test + public void testRecordInArray() { + String schemaString = "{\n" + " \"type\": \"record\",\n" + " \"name\": \"TestRecord\",\n" + " \"fields\": [\n" + + " {\n" + " \"name\": \"value\",\n" + " \"type\": {\n" + " \"type\": \"record\",\n" + + " \"name\": \"Container\",\n" + " \"fields\": [\n" + " {\n" + + " \"name\": \"Optional\",\n" + " \"type\": {\n" + " \"type\": \"array\",\n" + + " \"items\": [\n" + " {\n" + " \"type\": \"record\",\n" + + " \"name\": \"optional_field_0\",\n" + " \"namespace\": \"\",\n" + + " \"doc\": \"\",\n" + " \"fields\": [\n" + " {\n" + + " \"name\": \"optional_field_1\",\n" + " \"type\": \"long\",\n" + + " \"doc\": \"\",\n" + " \"default\": 0\n" + + " }\n" + " ]\n" + " }\n" + " ]\n" + + " }\n" + " }\n" + " ]\n" + " }\n" + " }\n" + " ]\n" + "}"; + final Schema schema = new Schema.Parser().parse(schemaString); + assertNotNull(schema); + } + + /* + * @Test public void testRec() { String schemaString = + * "[{\"name\":\"employees\",\"type\":[\"null\",{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"Pair1081149ea1d6eb80\",\"fields\":[{\"name\":\"key\",\"type\":\"int\"},{\"name\":\"value\",\"type\":{\"type\":\"record\",\"name\":\"EmployeeInfo2\",\"fields\":[{\"name\":\"companyMap\",\"type\":[\"null\",{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"PairIntegerString\",\"fields\":[{\"name\":\"key\",\"type\":\"int\"},{\"name\":\"value\",\"type\":\"string\"}]},\"java-class\":\"java.util.HashMap\"}],\"default\":null},{\"name\":\"name\",\"type\":[\"null\",\"string\"],\"default\":null}]}}]},\"java-class\":\"java.util.HashMap\"}],\"default\":null}]"; + * final Schema schema = new Schema.Parser().parse(schemaString); + * Assert.assertNotNull(schema); + * + * } + */ + + @Test + public void testUnionFieldType() { + String schemaString = "{\"type\": \"record\", \"name\": \"Lisp\", \"fields\": [{\"name\":\"value\", \"type\":[\"null\", \"string\",{\"type\": \"record\", \"name\": \"Cons\", \"fields\": [{\"name\":\"car\", \"type\":\"Lisp\"},{\"name\":\"cdr\", \"type\":\"Lisp\"}]}]}]}"; + final Schema schema = new Schema.Parser().parse(schemaString); + Field value = schema.getField("value"); + Schema fieldSchema = value.schema(); + Schema subSchema = fieldSchema.getTypes().stream().filter((Schema s) -> s.getType() == Type.RECORD).findFirst() + .get(); + assertTrue(subSchema.hasFields()); + } + + @Test + public void parseAliases() throws JsonProcessingException { + String s1 = "{ \"aliases\" : [\"a1\", \"b1\"]}"; + ObjectMapper mapper = new ObjectMapper(); + JsonNode j1 = mapper.readTree(s1); + Set aliases = Schema.parseAliases(j1); + assertEquals(2, aliases.size()); + assertTrue(aliases.contains("a1")); + assertTrue(aliases.contains("b1")); + + String s2 = "{ \"aliases\" : {\"a1\": \"b1\"}}"; + JsonNode j2 = mapper.readTree(s2); + + SchemaParseException ex = assertThrows(SchemaParseException.class, () -> Schema.parseAliases(j2)); + assertTrue(ex.getMessage().contains("aliases not an array")); + + String s3 = "{ \"aliases\" : [11, \"b1\"]}"; + JsonNode j3 = mapper.readTree(s3); + SchemaParseException ex3 = assertThrows(SchemaParseException.class, () -> Schema.parseAliases(j3)); + assertTrue(ex3.getMessage().contains("alias not a string")); + } + + @Test + void testContentAfterAvsc() { + Schema.Parser parser = new Schema.Parser(NameValidator.UTF_VALIDATOR); + parser.setValidateDefaults(true); + assertThrows(SchemaParseException.class, () -> parser.parse("{\"type\": \"string\"}; DROP TABLE STUDENTS")); + } + + @Test + void testContentAfterAvscInInputStream() throws Exception { + Schema.Parser parser = new Schema.Parser(NameValidator.UTF_VALIDATOR); + parser.setValidateDefaults(true); + String avsc = "{\"type\": \"string\"}; DROP TABLE STUDENTS"; + ByteArrayInputStream is = new ByteArrayInputStream(avsc.getBytes(StandardCharsets.UTF_8)); + Schema schema = parser.parse(is); + assertNotNull(schema); + } + + @Test + void testContentAfterAvscInFile() throws Exception { + File avscFile = Files.createTempFile("testContentAfterAvscInFile", null).toFile(); + try (FileWriter writer = new FileWriter(avscFile)) { + writer.write("{\"type\": \"string\"}; DROP TABLE STUDENTS"); + writer.flush(); + } + + Schema.Parser parser = new Schema.Parser(NameValidator.UTF_VALIDATOR); + parser.setValidateDefaults(true); + assertThrows(SchemaParseException.class, () -> parser.parse(avscFile)); + } + + @Test + void testParseMultipleFile() throws IOException { + URL directory = requireNonNull(Thread.currentThread().getContextClassLoader().getResource("multipleFile")); + File f1 = new File(directory.getPath(), "ApplicationEvent.avsc"); + File f2 = new File(directory.getPath(), "DocumentInfo.avsc"); + File f3 = new File(directory.getPath(), "MyResponse.avsc"); + Assertions.assertTrue(f1.exists(), "File not exist for test " + f1.getPath()); + Assertions.assertTrue(f2.exists(), "File not exist for test " + f2.getPath()); + Assertions.assertTrue(f3.exists(), "File not exist for test " + f3.getPath()); + SchemaParser parser = new SchemaParser(); + parser.parse(f1); + parser.parse(f2); + parser.parse(f3); + final List schemas = parser.getParsedNamedSchemas(); + Assertions.assertEquals(3, schemas.size()); + Schema schemaAppEvent = schemas.get(0); + Schema schemaDocInfo = schemas.get(1); + Schema schemaResponse = schemas.get(2); + Assertions.assertNotNull(schemaAppEvent); + Assertions.assertEquals(3, schemaAppEvent.getFields().size()); + Field documents = schemaAppEvent.getField("documents"); + Schema docSchema = documents.schema().getTypes().get(1).getElementType(); + Assertions.assertEquals(docSchema, schemaDocInfo); + Assertions.assertNotNull(schemaDocInfo); + Assertions.assertNotNull(schemaResponse); + } + + @Test + void add_types() { + String schemaRecord2 = "{\"type\":\"record\", \"name\":\"record2\", \"fields\": [" + + " {\"name\":\"f1\", \"type\":\"record1\" }" + "]}"; // register schema1 in schema. + Schema schemaRecord1 = Schema.createRecord("record1", "doc", "", false); + schemaRecord1.setFields(Collections.singletonList(new Field("name", Schema.create(Type.STRING)))); + Schema.Parser parser = new Schema.Parser().addTypes(Collections.singleton(schemaRecord1)); + + // parse schema for record2 that contains field for schema1. + final Schema schema = parser.parse(schemaRecord2); + final Field f1 = schema.getField("f1"); + assertNotNull(f1); + assertEquals(schemaRecord1, f1.schema()); + } + + /** + * Tests the behavior of Schema.Parser if its validation option is set to + * `null`. This is then set to the default option `NO_VALIDATION`. + */ + @Test + void testParserNullValidate() { + new Schema.Parser((NameValidator) null).parse("{\"type\":\"record\",\"name\":\"\",\"fields\":[]}"); // Empty name + } + + /** + * Tests when a user tries to write a record with an invalid enum symbol value + * that the exception returned is more descriptive than just a NPE or an + * incorrect mention of an unspecified non-null field. + */ + @Test + void enumWriteUnknownField() throws IOException { + Schema schema = Schema.createRecord("record1", "doc", "", false); + String goodValue = "HELLO"; + Schema enumSchema = Schema.createEnum("enum1", "doc", "", Arrays.asList(goodValue)); + Field field1 = new Field("field1", enumSchema); + schema.setFields(Collections.singletonList(field1)); + + GenericDatumWriter datumWriter = new GenericDatumWriter<>(schema); + ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); + Encoder encoder = EncoderFactory.get().binaryEncoder(byteArrayOutputStream, null); + GenericRecordBuilder builder = new GenericRecordBuilder(schema); + String badValue = "GOODBYE"; + builder.set(field1, new EnumSymbol(enumSchema, badValue)); + Record record = builder.build(); + try { + datumWriter.write(record, encoder); + fail("should have thrown"); + } catch (AvroTypeException ate) { + assertTrue(ate.getMessage().contains(goodValue)); + assertTrue(ate.getMessage().contains(badValue)); + } } } diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaBuilder.java b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaBuilder.java index 77ee588b1e3..fdb18d0f87c 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaBuilder.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaBuilder.java @@ -17,10 +17,13 @@ */ package org.apache.avro; +import static org.junit.jupiter.api.Assertions.*; + import com.fasterxml.jackson.databind.node.NullNode; import java.io.File; import java.io.IOException; import java.nio.ByteBuffer; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; @@ -36,93 +39,93 @@ import org.apache.avro.generic.GenericDatumReader; import org.apache.avro.generic.GenericDatumWriter; import org.apache.avro.generic.GenericRecordBuilder; -import org.junit.Assert; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; public class TestSchemaBuilder { - @Rule - public TemporaryFolder DIR = new TemporaryFolder(); + @TempDir + public File DIR; @Test - public void testRecord() { + void record() { Schema schema = SchemaBuilder.record("myrecord").namespace("org.example").aliases("oldrecord").fields().name("f0") .aliases("f0alias").type().stringType().noDefault().name("f1").doc("This is f1").type().longType().noDefault() .name("f2").type().nullable().booleanType().booleanDefault(true).name("f3").type().unionOf().nullType().and() .booleanType().endUnion().nullDefault().endRecord(); - Assert.assertEquals("myrecord", schema.getName()); - Assert.assertEquals("org.example", schema.getNamespace()); - Assert.assertEquals("org.example.oldrecord", schema.getAliases().iterator().next()); - Assert.assertFalse(schema.isError()); + assertEquals("myrecord", schema.getName()); + assertEquals("org.example", schema.getNamespace()); + assertEquals("org.example.oldrecord", schema.getAliases().iterator().next()); + assertFalse(schema.isError()); List fields = schema.getFields(); - Assert.assertEquals(4, fields.size()); - Assert.assertEquals(new Schema.Field("f0", Schema.create(Schema.Type.STRING)), fields.get(0)); - Assert.assertTrue(fields.get(0).aliases().contains("f0alias")); - Assert.assertEquals(new Schema.Field("f1", Schema.create(Schema.Type.LONG), "This is f1"), fields.get(1)); + assertEquals(4, fields.size()); + assertEquals(new Schema.Field("f0", Schema.create(Schema.Type.STRING)), fields.get(0)); + assertTrue(fields.get(0).aliases().contains("f0alias")); + assertEquals(new Schema.Field("f1", Schema.create(Schema.Type.LONG), "This is f1"), fields.get(1)); List types = new ArrayList<>(); types.add(Schema.create(Schema.Type.BOOLEAN)); types.add(Schema.create(Schema.Type.NULL)); Schema optional = Schema.createUnion(types); - Assert.assertEquals(new Schema.Field("f2", optional, null, true), fields.get(2)); + assertEquals(new Schema.Field("f2", optional, null, true), fields.get(2)); List types2 = new ArrayList<>(); types2.add(Schema.create(Schema.Type.NULL)); types2.add(Schema.create(Schema.Type.BOOLEAN)); Schema optional2 = Schema.createUnion(types2); - Assert.assertNotEquals(new Schema.Field("f3", optional2, null, (Object) null), fields.get(3)); - Assert.assertEquals(new Schema.Field("f3", optional2, null, Schema.Field.NULL_DEFAULT_VALUE), fields.get(3)); + assertNotEquals(new Schema.Field("f3", optional2, null, (Object) null), fields.get(3)); + assertEquals(new Schema.Field("f3", optional2, null, Schema.Field.NULL_DEFAULT_VALUE), fields.get(3)); } @Test - public void testDoc() { + void doc() { Schema s = SchemaBuilder.fixed("myfixed").doc("mydoc").size(1); - Assert.assertEquals("mydoc", s.getDoc()); + assertEquals("mydoc", s.getDoc()); } @Test - public void testProps() { + void props() { Schema s = SchemaBuilder.builder().intBuilder().prop("p1", "v1").prop("p2", "v2").prop("p2", "v2real") // overwrite .endInt(); int size = s.getObjectProps().size(); - Assert.assertEquals(2, size); - Assert.assertEquals("v1", s.getProp("p1")); - Assert.assertEquals("v2real", s.getProp("p2")); + assertEquals(2, size); + assertEquals("v1", s.getProp("p1")); + assertEquals("v2real", s.getProp("p2")); } @Test - public void testObjectProps() { + void objectProps() { Schema s = SchemaBuilder.builder().intBuilder().prop("booleanProp", true).prop("intProp", Integer.MAX_VALUE) .prop("longProp", Long.MAX_VALUE).prop("floatProp", 1.0f).prop("doubleProp", Double.MAX_VALUE) .prop("byteProp", new byte[] { 0x41, 0x42, 0x43 }).prop("stringProp", "abc").endInt(); // object properties - Assert.assertEquals(7, s.getObjectProps().size()); - Assert.assertTrue(s.getObjectProp("booleanProp") instanceof Boolean); - Assert.assertEquals(true, s.getObjectProp("booleanProp")); - Assert.assertTrue(s.getObjectProp("intProp") instanceof Integer); - Assert.assertEquals(Integer.MAX_VALUE, s.getObjectProp("intProp")); - Assert.assertTrue(s.getObjectProp("intProp") instanceof Integer); - Assert.assertTrue(s.getObjectProp("longProp") instanceof Long); - Assert.assertEquals(Long.MAX_VALUE, s.getObjectProp("longProp")); - Assert.assertTrue(s.getObjectProp("floatProp") instanceof Double); + assertEquals(7, s.getObjectProps().size()); + assertTrue(s.getObjectProp("booleanProp") instanceof Boolean); + assertEquals(true, s.getObjectProp("booleanProp")); + assertTrue(s.getObjectProp("intProp") instanceof Integer); + assertEquals(Integer.MAX_VALUE, s.getObjectProp("intProp")); + assertTrue(s.getObjectProp("intProp") instanceof Integer); + assertTrue(s.getObjectProp("longProp") instanceof Long); + assertEquals(Long.MAX_VALUE, s.getObjectProp("longProp")); + assertTrue(s.getObjectProp("floatProp") instanceof Float); // float converts to double - Assert.assertEquals(1.0d, s.getObjectProp("floatProp")); - Assert.assertTrue(s.getObjectProp("doubleProp") instanceof Double); - Assert.assertEquals(Double.MAX_VALUE, s.getObjectProp("doubleProp")); + assertEquals(1.0f, s.getObjectProp("floatProp")); + assertTrue(s.getObjectProp("doubleProp") instanceof Double); + assertEquals(Double.MAX_VALUE, s.getObjectProp("doubleProp")); // byte[] converts to string - Assert.assertTrue(s.getObjectProp("byteProp") instanceof String); - Assert.assertEquals("ABC", s.getObjectProp("byteProp")); - Assert.assertTrue(s.getObjectProp("stringProp") instanceof String); - Assert.assertEquals("abc", s.getObjectProp("stringProp")); + assertTrue(s.getObjectProp("byteProp") instanceof byte[]); + assertArrayEquals(new byte[] { 0x41, 0x42, 0x43 }, (byte[]) s.getObjectProp("byteProp")); + assertTrue(s.getObjectProp("stringProp") instanceof String); + assertEquals("abc", s.getObjectProp("stringProp")); } @Test - public void testFieldObjectProps() { + void fieldObjectProps() { Schema s = SchemaBuilder.builder().record("MyRecord").fields().name("myField").prop("booleanProp", true) .prop("intProp", Integer.MAX_VALUE).prop("longProp", Long.MAX_VALUE).prop("floatProp", 1.0f) .prop("doubleProp", Double.MAX_VALUE).prop("byteProp", new byte[] { 0x41, 0x42, 0x43 }) @@ -131,28 +134,31 @@ public void testFieldObjectProps() { Schema.Field f = s.getField("myField"); // object properties - Assert.assertEquals(7, f.getObjectProps().size()); - Assert.assertTrue(f.getObjectProp("booleanProp") instanceof Boolean); - Assert.assertEquals(true, f.getObjectProp("booleanProp")); - Assert.assertTrue(f.getObjectProp("intProp") instanceof Integer); - Assert.assertEquals(Integer.MAX_VALUE, f.getObjectProp("intProp")); - Assert.assertTrue(f.getObjectProp("intProp") instanceof Integer); - Assert.assertTrue(f.getObjectProp("longProp") instanceof Long); - Assert.assertEquals(Long.MAX_VALUE, f.getObjectProp("longProp")); - Assert.assertTrue(f.getObjectProp("floatProp") instanceof Double); + assertEquals(7, f.getObjectProps().size()); + assertTrue(f.getObjectProp("booleanProp") instanceof Boolean); + assertEquals(true, f.getObjectProp("booleanProp")); + assertTrue(f.getObjectProp("intProp") instanceof Integer); + assertEquals(Integer.MAX_VALUE, f.getObjectProp("intProp")); + assertTrue(f.getObjectProp("intProp") instanceof Integer); + assertTrue(f.getObjectProp("longProp") instanceof Long); + assertEquals(Long.MAX_VALUE, f.getObjectProp("longProp")); + assertTrue(f.getObjectProp("floatProp") instanceof Float); // float converts to double - Assert.assertEquals(1.0d, f.getObjectProp("floatProp")); - Assert.assertTrue(f.getObjectProp("doubleProp") instanceof Double); - Assert.assertEquals(Double.MAX_VALUE, f.getObjectProp("doubleProp")); + assertEquals(1.0f, f.getObjectProp("floatProp")); + assertTrue(f.getObjectProp("doubleProp") instanceof Double); + assertEquals(Double.MAX_VALUE, f.getObjectProp("doubleProp")); // byte[] converts to string - Assert.assertTrue(f.getObjectProp("byteProp") instanceof String); - Assert.assertEquals("ABC", f.getObjectProp("byteProp")); - Assert.assertTrue(f.getObjectProp("stringProp") instanceof String); - Assert.assertEquals("abc", f.getObjectProp("stringProp")); + assertTrue(f.getObjectProp("byteProp") instanceof byte[]); + assertArrayEquals(new byte[] { 0x41, 0x42, 0x43 }, (byte[]) f.getObjectProp("byteProp")); + assertTrue(f.getObjectProp("stringProp") instanceof String); + assertEquals("abc", f.getObjectProp("stringProp")); + + assertEquals("abc", f.getObjectProp("stringProp", "default")); + assertEquals("default", f.getObjectProp("unknwon", "default")); } @Test - public void testArrayObjectProp() { + void arrayObjectProp() { List values = new ArrayList<>(); values.add(true); values.add(Integer.MAX_VALUE); @@ -165,26 +171,26 @@ public void testArrayObjectProp() { Schema s = SchemaBuilder.builder().intBuilder().prop("arrayProp", values).endInt(); // object properties - Assert.assertEquals(1, s.getObjectProps().size()); + assertEquals(1, s.getObjectProps().size()); - Assert.assertTrue(s.getObjectProp("arrayProp") instanceof Collection); + assertTrue(s.getObjectProp("arrayProp") instanceof Collection); @SuppressWarnings("unchecked") Collection valueCollection = (Collection) s.getObjectProp("arrayProp"); Iterator iter = valueCollection.iterator(); - Assert.assertEquals(7, valueCollection.size()); - Assert.assertEquals(true, iter.next()); - Assert.assertEquals(Integer.MAX_VALUE, iter.next()); - Assert.assertEquals(Long.MAX_VALUE, iter.next()); - // float converts to double - Assert.assertEquals(1.0d, iter.next()); - Assert.assertEquals(Double.MAX_VALUE, iter.next()); - // byte[] converts to string - Assert.assertEquals("ABC", iter.next()); - Assert.assertEquals("abc", iter.next()); + assertEquals(7, valueCollection.size()); + assertEquals(true, iter.next()); + assertEquals(Integer.MAX_VALUE, iter.next()); + assertEquals(Long.MAX_VALUE, iter.next()); + + assertEquals(1.0f, iter.next()); + assertEquals(Double.MAX_VALUE, iter.next()); + + assertArrayEquals(new byte[] { 0x41, 0x42, 0x43 }, (byte[]) iter.next()); + assertEquals("abc", iter.next()); } @Test - public void testFieldArrayObjectProp() { + void fieldArrayObjectProp() { List values = new ArrayList<>(); values.add(true); values.add(Integer.MAX_VALUE); @@ -200,26 +206,26 @@ public void testFieldArrayObjectProp() { Schema.Field f = s.getField("myField"); // object properties - Assert.assertEquals(1, f.getObjectProps().size()); + assertEquals(1, f.getObjectProps().size()); - Assert.assertTrue(f.getObjectProp("arrayProp") instanceof Collection); + assertTrue(f.getObjectProp("arrayProp") instanceof Collection); @SuppressWarnings("unchecked") Collection valueCollection = (Collection) f.getObjectProp("arrayProp"); Iterator iter = valueCollection.iterator(); - Assert.assertEquals(7, valueCollection.size()); - Assert.assertEquals(true, iter.next()); - Assert.assertEquals(Integer.MAX_VALUE, iter.next()); - Assert.assertEquals(Long.MAX_VALUE, iter.next()); - // float converts to double - Assert.assertEquals(1.0d, iter.next()); - Assert.assertEquals(Double.MAX_VALUE, iter.next()); - // byte[] converts to string - Assert.assertEquals("ABC", iter.next()); - Assert.assertEquals("abc", iter.next()); + assertEquals(7, valueCollection.size()); + assertEquals(true, iter.next()); + assertEquals(Integer.MAX_VALUE, iter.next()); + assertEquals(Long.MAX_VALUE, iter.next()); + + assertEquals(1.0f, iter.next()); + assertEquals(Double.MAX_VALUE, iter.next()); + + assertArrayEquals(new byte[] { 0x41, 0x42, 0x43 }, (byte[]) iter.next()); + assertEquals("abc", iter.next()); } @Test - public void testMapObjectProp() { + void mapObjectProp() { Map values = new HashMap<>(); values.put("booleanKey", true); values.put("intKey", Integer.MAX_VALUE); @@ -232,31 +238,31 @@ public void testMapObjectProp() { Schema s = SchemaBuilder.builder().intBuilder().prop("mapProp", values).endInt(); // object properties - Assert.assertTrue(s.getObjectProp("mapProp") instanceof Map); + assertTrue(s.getObjectProp("mapProp") instanceof Map); @SuppressWarnings("unchecked") Map valueMap = (Map) s.getObjectProp("mapProp"); - Assert.assertEquals(values.size(), valueMap.size()); - - Assert.assertTrue(valueMap.get("booleanKey") instanceof Boolean); - Assert.assertEquals(true, valueMap.get("booleanKey")); - Assert.assertTrue(valueMap.get("intKey") instanceof Integer); - Assert.assertEquals(Integer.MAX_VALUE, valueMap.get("intKey")); - Assert.assertTrue(valueMap.get("longKey") instanceof Long); - Assert.assertEquals(Long.MAX_VALUE, valueMap.get("longKey")); - // float converts to double - Assert.assertTrue(valueMap.get("floatKey") instanceof Double); - Assert.assertEquals(1.0d, valueMap.get("floatKey")); - Assert.assertTrue(valueMap.get("doubleKey") instanceof Double); - Assert.assertEquals(Double.MAX_VALUE, valueMap.get("doubleKey")); - // byte[] converts to string - Assert.assertTrue(valueMap.get("byteKey") instanceof String); - Assert.assertEquals("ABC", valueMap.get("byteKey")); - Assert.assertTrue(valueMap.get("stringKey") instanceof String); - Assert.assertEquals("abc", valueMap.get("stringKey")); + assertEquals(values.size(), valueMap.size()); + + assertTrue(valueMap.get("booleanKey") instanceof Boolean); + assertEquals(true, valueMap.get("booleanKey")); + assertTrue(valueMap.get("intKey") instanceof Integer); + assertEquals(Integer.MAX_VALUE, valueMap.get("intKey")); + assertTrue(valueMap.get("longKey") instanceof Long); + assertEquals(Long.MAX_VALUE, valueMap.get("longKey")); + + assertTrue(valueMap.get("floatKey") instanceof Float); + assertEquals(1.0f, valueMap.get("floatKey")); + assertTrue(valueMap.get("doubleKey") instanceof Double); + assertEquals(Double.MAX_VALUE, valueMap.get("doubleKey")); + + assertTrue(valueMap.get("byteKey") instanceof byte[]); + assertArrayEquals("ABC".getBytes(StandardCharsets.UTF_8), (byte[]) valueMap.get("byteKey")); + assertTrue(valueMap.get("stringKey") instanceof String); + assertEquals("abc", valueMap.get("stringKey")); } @Test - public void testFieldMapObjectProp() { + void fieldMapObjectProp() { Map values = new HashMap<>(); values.put("booleanKey", true); values.put("intKey", Integer.MAX_VALUE); @@ -272,42 +278,46 @@ public void testFieldMapObjectProp() { Schema.Field f = s.getField("myField"); // object properties - Assert.assertTrue(f.getObjectProp("mapProp") instanceof Map); + assertTrue(f.getObjectProp("mapProp") instanceof Map); @SuppressWarnings("unchecked") Map valueMap = (Map) f.getObjectProp("mapProp"); - Assert.assertEquals(values.size(), valueMap.size()); - - Assert.assertTrue(valueMap.get("booleanKey") instanceof Boolean); - Assert.assertEquals(true, valueMap.get("booleanKey")); - Assert.assertTrue(valueMap.get("intKey") instanceof Integer); - Assert.assertEquals(Integer.MAX_VALUE, valueMap.get("intKey")); - Assert.assertTrue(valueMap.get("longKey") instanceof Long); - Assert.assertEquals(Long.MAX_VALUE, valueMap.get("longKey")); - // float converts to double - Assert.assertTrue(valueMap.get("floatKey") instanceof Double); - Assert.assertEquals(1.0d, valueMap.get("floatKey")); - Assert.assertTrue(valueMap.get("doubleKey") instanceof Double); - Assert.assertEquals(Double.MAX_VALUE, valueMap.get("doubleKey")); - // byte[] converts to string - Assert.assertTrue(valueMap.get("byteKey") instanceof String); - Assert.assertEquals("ABC", valueMap.get("byteKey")); - Assert.assertTrue(valueMap.get("stringKey") instanceof String); - Assert.assertEquals("abc", valueMap.get("stringKey")); + assertEquals(values.size(), valueMap.size()); + + assertTrue(valueMap.get("booleanKey") instanceof Boolean); + assertEquals(true, valueMap.get("booleanKey")); + assertTrue(valueMap.get("intKey") instanceof Integer); + assertEquals(Integer.MAX_VALUE, valueMap.get("intKey")); + assertTrue(valueMap.get("longKey") instanceof Long); + assertEquals(Long.MAX_VALUE, valueMap.get("longKey")); + + assertTrue(valueMap.get("floatKey") instanceof Float); + assertEquals(1.0f, valueMap.get("floatKey")); + assertTrue(valueMap.get("doubleKey") instanceof Double); + assertEquals(Double.MAX_VALUE, valueMap.get("doubleKey")); + + assertTrue(valueMap.get("byteKey") instanceof byte[]); + assertEquals("ABC", new String((byte[]) valueMap.get("byteKey"))); + assertTrue(valueMap.get("stringKey") instanceof String); + assertEquals("abc", valueMap.get("stringKey")); } - @Test(expected = AvroRuntimeException.class) - public void testNullObjectProp() { - SchemaBuilder.builder().intBuilder().prop("nullProp", (Object) null).endInt(); + @Test + void nullObjectProp() { + assertThrows(AvroRuntimeException.class, () -> { + SchemaBuilder.builder().intBuilder().prop("nullProp", (Object) null).endInt(); + }); } - @Test(expected = AvroRuntimeException.class) - public void testFieldNullObjectProp() { - SchemaBuilder.builder().record("MyRecord").fields().name("myField").prop("nullProp", (Object) null).type().intType() - .noDefault().endRecord(); + @Test + void fieldNullObjectProp() { + assertThrows(AvroRuntimeException.class, () -> { + SchemaBuilder.builder().record("MyRecord").fields().name("myField").prop("nullProp", (Object) null).type() + .intType().noDefault().endRecord(); + }); } @Test - public void testNamespaces() { + void namespaces() { Schema s1 = SchemaBuilder.record("myrecord").namespace("org.example").fields().name("myint").type().intType() .noDefault().endRecord(); Schema s2 = SchemaBuilder.record("org.example.myrecord").fields().name("myint").type().intType().noDefault() @@ -317,105 +327,107 @@ public void testNamespaces() { Schema s4 = SchemaBuilder.builder("org.example").record("myrecord").fields().name("myint").type().intType() .noDefault().endRecord(); - Assert.assertEquals("myrecord", s1.getName()); - Assert.assertEquals("myrecord", s2.getName()); - Assert.assertEquals("myrecord", s3.getName()); - Assert.assertEquals("myrecord", s4.getName()); + assertEquals("myrecord", s1.getName()); + assertEquals("myrecord", s2.getName()); + assertEquals("myrecord", s3.getName()); + assertEquals("myrecord", s4.getName()); - Assert.assertEquals("org.example", s1.getNamespace()); - Assert.assertEquals("org.example", s2.getNamespace()); - Assert.assertEquals("org.example", s3.getNamespace()); // namespace call is ignored - Assert.assertEquals("org.example", s4.getNamespace()); + assertEquals("org.example", s1.getNamespace()); + assertEquals("org.example", s2.getNamespace()); + assertEquals("org.example", s3.getNamespace()); // namespace call is ignored + assertEquals("org.example", s4.getNamespace()); - Assert.assertEquals("org.example.myrecord", s1.getFullName()); - Assert.assertEquals("org.example.myrecord", s2.getFullName()); - Assert.assertEquals("org.example.myrecord", s3.getFullName()); - Assert.assertEquals("org.example.myrecord", s4.getFullName()); + assertEquals("org.example.myrecord", s1.getFullName()); + assertEquals("org.example.myrecord", s2.getFullName()); + assertEquals("org.example.myrecord", s3.getFullName()); + assertEquals("org.example.myrecord", s4.getFullName()); } - @Test(expected = NullPointerException.class) - public void testMissingRecordName() { - SchemaBuilder.record(null).fields() // null name - .name("f0").type().stringType().noDefault().endRecord(); + @Test + void missingRecordName() { + assertThrows(NullPointerException.class, () -> { + SchemaBuilder.record(null).fields() // null name + .name("f0").type().stringType().noDefault().endRecord(); + }); } @Test - public void testBoolean() { + void testBoolean() { Schema.Type type = Schema.Type.BOOLEAN; Schema simple = SchemaBuilder.builder().booleanType(); Schema expected = primitive(type, simple); Schema built1 = SchemaBuilder.builder().booleanBuilder().prop("p", "v").endBoolean(); - Assert.assertEquals(expected, built1); + assertEquals(expected, built1); } @Test - public void testInt() { + void testInt() { Schema.Type type = Schema.Type.INT; Schema simple = SchemaBuilder.builder().intType(); Schema expected = primitive(type, simple); Schema built1 = SchemaBuilder.builder().intBuilder().prop("p", "v").endInt(); - Assert.assertEquals(expected, built1); + assertEquals(expected, built1); } @Test - public void testLong() { + void testLong() { Schema.Type type = Schema.Type.LONG; Schema simple = SchemaBuilder.builder().longType(); Schema expected = primitive(type, simple); Schema built1 = SchemaBuilder.builder().longBuilder().prop("p", "v").endLong(); - Assert.assertEquals(expected, built1); + assertEquals(expected, built1); } @Test - public void testFloat() { + void testFloat() { Schema.Type type = Schema.Type.FLOAT; Schema simple = SchemaBuilder.builder().floatType(); Schema expected = primitive(type, simple); Schema built1 = SchemaBuilder.builder().floatBuilder().prop("p", "v").endFloat(); - Assert.assertEquals(expected, built1); + assertEquals(expected, built1); } @Test - public void testDuble() { + void duble() { Schema.Type type = Schema.Type.DOUBLE; Schema simple = SchemaBuilder.builder().doubleType(); Schema expected = primitive(type, simple); Schema built1 = SchemaBuilder.builder().doubleBuilder().prop("p", "v").endDouble(); - Assert.assertEquals(expected, built1); + assertEquals(expected, built1); } @Test - public void testString() { + void string() { Schema.Type type = Schema.Type.STRING; Schema simple = SchemaBuilder.builder().stringType(); Schema expected = primitive(type, simple); Schema built1 = SchemaBuilder.builder().stringBuilder().prop("p", "v").endString(); - Assert.assertEquals(expected, built1); + assertEquals(expected, built1); } @Test - public void testBytes() { + void bytes() { Schema.Type type = Schema.Type.BYTES; Schema simple = SchemaBuilder.builder().bytesType(); Schema expected = primitive(type, simple); Schema built1 = SchemaBuilder.builder().bytesBuilder().prop("p", "v").endBytes(); - Assert.assertEquals(expected, built1); + assertEquals(expected, built1); } @Test - public void testNull() { + void testNull() { Schema.Type type = Schema.Type.NULL; Schema simple = SchemaBuilder.builder().nullType(); Schema expected = primitive(type, simple); Schema built1 = SchemaBuilder.builder().nullBuilder().prop("p", "v").endNull(); - Assert.assertEquals(expected, built1); + assertEquals(expected, built1); } private Schema primitive(Schema.Type type, Schema bare) { // test creation of bare schema by name Schema bareByName = SchemaBuilder.builder().type(type.getName()); - Assert.assertEquals(Schema.create(type), bareByName); - Assert.assertEquals(bareByName, bare); + assertEquals(Schema.create(type), bareByName); + assertEquals(bareByName, bare); // return a schema with custom prop set Schema p = Schema.create(type); p.addProp("p", "v"); @@ -434,112 +446,112 @@ private Schema primitive(Schema.Type type, Schema bare) { // } @Test - public void testRecursiveRecord() { + void recursiveRecord() { Schema schema = SchemaBuilder.record("LongList").fields().name("value").type().longType().noDefault().name("next") .type().optional().type("LongList").endRecord(); - Assert.assertEquals("LongList", schema.getName()); + assertEquals("LongList", schema.getName()); List fields = schema.getFields(); - Assert.assertEquals(2, fields.size()); - Assert.assertEquals(new Schema.Field("value", Schema.create(Schema.Type.LONG), null), fields.get(0)); + assertEquals(2, fields.size()); + assertEquals(new Schema.Field("value", Schema.create(Schema.Type.LONG), null), fields.get(0)); - Assert.assertEquals(Schema.Type.UNION, fields.get(1).schema().getType()); + assertEquals(Schema.Type.UNION, fields.get(1).schema().getType()); - Assert.assertEquals(Schema.Type.NULL, fields.get(1).schema().getTypes().get(0).getType()); + assertEquals(Schema.Type.NULL, fields.get(1).schema().getTypes().get(0).getType()); Schema recordSchema = fields.get(1).schema().getTypes().get(1); - Assert.assertEquals(Schema.Type.RECORD, recordSchema.getType()); - Assert.assertEquals("LongList", recordSchema.getName()); - Assert.assertEquals(NullNode.getInstance(), fields.get(1).defaultValue()); + assertEquals(Schema.Type.RECORD, recordSchema.getType()); + assertEquals("LongList", recordSchema.getName()); + assertEquals(NullNode.getInstance(), fields.get(1).defaultValue()); } @Test - public void testEnum() { + void testEnum() { List symbols = Arrays.asList("a", "b"); Schema expected = Schema.createEnum("myenum", null, null, symbols); expected.addProp("p", "v"); Schema schema = SchemaBuilder.enumeration("myenum").prop("p", "v").symbols("a", "b"); - Assert.assertEquals(expected, schema); + assertEquals(expected, schema); } @Test - public void testEnumWithDefault() { + void enumWithDefault() { List symbols = Arrays.asList("a", "b"); String enumDefault = "a"; Schema expected = Schema.createEnum("myenum", null, null, symbols, enumDefault); expected.addProp("p", "v"); Schema schema = SchemaBuilder.enumeration("myenum").prop("p", "v").defaultSymbol(enumDefault).symbols("a", "b"); - Assert.assertEquals(expected, schema); + assertEquals(expected, schema); } @Test - public void testFixed() { + void fixed() { Schema expected = Schema.createFixed("myfixed", null, null, 16); expected.addAlias("myOldFixed"); Schema schema = SchemaBuilder.fixed("myfixed").aliases("myOldFixed").size(16); - Assert.assertEquals(expected, schema); + assertEquals(expected, schema); } @Test - public void testArray() { + void array() { Schema longSchema = Schema.create(Schema.Type.LONG); Schema expected = Schema.createArray(longSchema); Schema schema1 = SchemaBuilder.array().items().longType(); - Assert.assertEquals(expected, schema1); + assertEquals(expected, schema1); Schema schema2 = SchemaBuilder.array().items(longSchema); - Assert.assertEquals(expected, schema2); + assertEquals(expected, schema2); Schema schema3 = SchemaBuilder.array().prop("p", "v").items().type("long"); expected.addProp("p", "v"); - Assert.assertEquals(expected, schema3); + assertEquals(expected, schema3); } @Test - public void testMap() { + void map() { Schema intSchema = Schema.create(Schema.Type.INT); Schema expected = Schema.createMap(intSchema); Schema schema1 = SchemaBuilder.map().values().intType(); - Assert.assertEquals(expected, schema1); + assertEquals(expected, schema1); Schema schema2 = SchemaBuilder.map().values(intSchema); - Assert.assertEquals(expected, schema2); + assertEquals(expected, schema2); Schema schema3 = SchemaBuilder.map().prop("p", "v").values().type("int"); expected.addProp("p", "v"); - Assert.assertEquals(expected, schema3); + assertEquals(expected, schema3); } @Test - public void testUnionAndNullable() { + void unionAndNullable() { List types = new ArrayList<>(); types.add(Schema.create(Schema.Type.LONG)); types.add(Schema.create(Schema.Type.NULL)); Schema expected = Schema.createUnion(types); Schema schema = SchemaBuilder.unionOf().longType().and().nullType().endUnion(); - Assert.assertEquals(expected, schema); + assertEquals(expected, schema); schema = SchemaBuilder.nullable().longType(); - Assert.assertEquals(expected, schema); + assertEquals(expected, schema); } @Test - public void testFields() { + void fields() { Schema rec = SchemaBuilder.record("Rec").fields().name("documented").doc("documented").type().nullType().noDefault() .name("ascending").orderAscending().type().booleanType().noDefault().name("descending").orderDescending().type() .floatType().noDefault().name("ignored").orderIgnore().type().doubleType().noDefault().name("aliased") .aliases("anAlias").type().stringType().noDefault().endRecord(); - Assert.assertEquals("documented", rec.getField("documented").doc()); - Assert.assertEquals(Order.ASCENDING, rec.getField("ascending").order()); - Assert.assertEquals(Order.DESCENDING, rec.getField("descending").order()); - Assert.assertEquals(Order.IGNORE, rec.getField("ignored").order()); - Assert.assertTrue(rec.getField("aliased").aliases().contains("anAlias")); + assertEquals("documented", rec.getField("documented").doc()); + assertEquals(Order.ASCENDING, rec.getField("ascending").order()); + assertEquals(Order.DESCENDING, rec.getField("descending").order()); + assertEquals(Order.IGNORE, rec.getField("ignored").order()); + assertTrue(rec.getField("aliased").aliases().contains("anAlias")); } @Test - public void testFieldShortcuts() { + void fieldShortcuts() { Schema full = SchemaBuilder.record("Blah").fields().name("rbool").type().booleanType().noDefault().name("obool") .type().optional().booleanType().name("nbool").type().nullable().booleanType().booleanDefault(true).name("rint") .type().intType().noDefault().name("oint").type().optional().intType().name("nint").type().nullable().intType() @@ -560,11 +572,11 @@ public void testFieldShortcuts() { .nullableString("nstring", "def").requiredBytes("rbytes").optionalBytes("obytes") .nullableBytes("nbytes", new byte[] { 1, 2, 3 }).endRecord(); - Assert.assertEquals(full, shortcut); + assertEquals(full, shortcut); } @Test - public void testNames() { + void names() { // no contextual namespace Schema r = SchemaBuilder.record("Rec").fields().name("f0").type().fixed("org.foo.MyFixed").size(1).noDefault() .name("f1").type("org.foo.MyFixed").noDefault().name("f2").type("org.foo.MyFixed", "").noDefault().name("f3") @@ -580,7 +592,7 @@ public void testNames() { // context namespace Schema f = SchemaBuilder.builder("").fixed("Foo").size(1); - Assert.assertEquals(Schema.createFixed("Foo", null, null, 1), f); + assertEquals(Schema.createFixed("Foo", null, null, 1), f); // context namespace from record matches r = SchemaBuilder.record("Rec").namespace("org.foo").fields().name("f0").type().fixed("MyFixed").size(1).noDefault() @@ -625,27 +637,33 @@ public void testNames() { } private void checkField(Schema r, Schema expected, String name) { - Assert.assertEquals(expected, r.getField(name).schema()); + assertEquals(expected, r.getField(name).schema()); } - @Test(expected = SchemaParseException.class) - public void testNamesFailRedefined() { - SchemaBuilder.record("Rec").fields().name("f0").type().enumeration("MyEnum").symbols("A", "B").enumDefault("A") - .name("f1").type().enumeration("MyEnum").symbols("X", "Y").noDefault().endRecord(); + @Test + void namesFailRedefined() { + assertThrows(SchemaParseException.class, () -> { + SchemaBuilder.record("Rec").fields().name("f0").type().enumeration("MyEnum").symbols("A", "B").enumDefault("A") + .name("f1").type().enumeration("MyEnum").symbols("X", "Y").noDefault().endRecord(); + }); } - @Test(expected = SchemaParseException.class) - public void testNamesFailAbsent() { - SchemaBuilder.builder().type("notdefined"); + @Test + void namesFailAbsent() { + assertThrows(SchemaParseException.class, () -> { + SchemaBuilder.builder().type("notdefined"); + }); } - @Test(expected = AvroTypeException.class) - public void testNameReserved() { - SchemaBuilder.fixed("long").namespace("").size(1); + @Test + void nameReserved() { + assertThrows(AvroTypeException.class, () -> { + SchemaBuilder.fixed("long").namespace("").size(1); + }); } @Test - public void testFieldTypesAndDefaultValues() { + void fieldTypesAndDefaultValues() { byte[] bytedef = new byte[] { 3 }; ByteBuffer bufdef = ByteBuffer.wrap(bytedef); String strdef = "\u0003"; @@ -689,57 +707,59 @@ public void testFieldTypesAndDefaultValues() { GenericData.Record newRec = new GenericRecordBuilder(r).build(); - Assert.assertEquals(false, newRec.get("boolF")); - Assert.assertEquals(false, newRec.get("boolU")); - Assert.assertEquals(1, newRec.get("intF")); - Assert.assertEquals(1, newRec.get("intU")); - Assert.assertEquals(2L, newRec.get("longF")); - Assert.assertEquals(2L, newRec.get("longU")); - Assert.assertEquals(3f, newRec.get("floatF")); - Assert.assertEquals(3f, newRec.get("floatU")); - Assert.assertEquals(4d, newRec.get("doubleF")); - Assert.assertEquals(4d, newRec.get("doubleU")); - Assert.assertEquals("def", newRec.get("stringF").toString()); - Assert.assertEquals("def", newRec.get("stringU").toString()); - Assert.assertEquals(bufdef, newRec.get("bytesF1")); - Assert.assertEquals(bufdef, newRec.get("bytesF2")); - Assert.assertEquals(bufdef, newRec.get("bytesF3")); - Assert.assertEquals(bufdef, newRec.get("bytesU")); - Assert.assertNull(newRec.get("nullF")); - Assert.assertNull(newRec.get("nullU")); - Assert.assertArrayEquals(bytedef, ((GenericData.Fixed) newRec.get("fixedF1")).bytes()); - Assert.assertArrayEquals(bytedef, ((GenericData.Fixed) newRec.get("fixedF2")).bytes()); - Assert.assertArrayEquals(bytedef, ((GenericData.Fixed) newRec.get("fixedF3")).bytes()); - Assert.assertArrayEquals(bytedef, ((GenericData.Fixed) newRec.get("fixedU")).bytes()); - Assert.assertEquals("S", newRec.get("enumF").toString()); - Assert.assertEquals("SS", newRec.get("enumU").toString()); + assertEquals(false, newRec.get("boolF")); + assertEquals(false, newRec.get("boolU")); + assertEquals(1, newRec.get("intF")); + assertEquals(1, newRec.get("intU")); + assertEquals(2L, newRec.get("longF")); + assertEquals(2L, newRec.get("longU")); + assertEquals(3f, newRec.get("floatF")); + assertEquals(3f, newRec.get("floatU")); + assertEquals(4d, newRec.get("doubleF")); + assertEquals(4d, newRec.get("doubleU")); + assertEquals("def", newRec.get("stringF").toString()); + assertEquals("def", newRec.get("stringU").toString()); + assertEquals(bufdef, newRec.get("bytesF1")); + assertEquals(bufdef, newRec.get("bytesF2")); + assertEquals(bufdef, newRec.get("bytesF3")); + assertEquals(bufdef, newRec.get("bytesU")); + assertNull(newRec.get("nullF")); + assertNull(newRec.get("nullU")); + assertArrayEquals(bytedef, ((GenericData.Fixed) newRec.get("fixedF1")).bytes()); + assertArrayEquals(bytedef, ((GenericData.Fixed) newRec.get("fixedF2")).bytes()); + assertArrayEquals(bytedef, ((GenericData.Fixed) newRec.get("fixedF3")).bytes()); + assertArrayEquals(bytedef, ((GenericData.Fixed) newRec.get("fixedU")).bytes()); + assertEquals("S", newRec.get("enumF").toString()); + assertEquals("SS", newRec.get("enumU").toString()); @SuppressWarnings("unchecked") Map map = (Map) newRec.get("mapF"); - Assert.assertEquals(mapdef.size(), map.size()); + assertEquals(mapdef.size(), map.size()); for (Map.Entry e : map.entrySet()) { - Assert.assertEquals(mapdef.get(e.getKey().toString()), e.getValue().toString()); + assertEquals(mapdef.get(e.getKey().toString()), e.getValue().toString()); } - Assert.assertEquals(newRec.get("mapF"), newRec.get("mapU")); + assertEquals(newRec.get("mapF"), newRec.get("mapU")); @SuppressWarnings("unchecked") GenericData.Array arr = (GenericData.Array) newRec.get("arrayF"); - Assert.assertEquals(arrdef.size(), arr.size()); + assertEquals(arrdef.size(), arr.size()); for (CharSequence c : arr) { - Assert.assertTrue(arrdef.contains(c.toString())); + assertTrue(arrdef.contains(c.toString())); } - Assert.assertEquals(newRec.get("arrayF"), newRec.get("arrayU")); - Assert.assertEquals(recdef, newRec.get("recordF")); - Assert.assertEquals(recdef2, newRec.get("recordU")); - Assert.assertEquals("S", newRec.get("byName").toString()); + assertEquals(newRec.get("arrayF"), newRec.get("arrayU")); + assertEquals(recdef, newRec.get("recordF")); + assertEquals(recdef2, newRec.get("recordU")); + assertEquals("S", newRec.get("byName").toString()); } - @Test(expected = SchemaBuilderException.class) - public void testBadDefault() { - SchemaBuilder.record("r").fields().name("f").type(Schema.create(Schema.Type.INT)).withDefault(new Object()) - .endRecord(); + @Test + void badDefault() { + assertThrows(SchemaBuilderException.class, () -> { + SchemaBuilder.record("r").fields().name("f").type(Schema.create(Schema.Type.INT)).withDefault(new Object()) + .endRecord(); + }); } @Test - public void testUnionFieldBuild() { + void unionFieldBuild() { SchemaBuilder.record("r").fields().name("allUnion").type().unionOf().booleanType().and().intType().and().longType() .and().floatType().and().doubleType().and().stringType().and().bytesType().and().nullType().and().fixed("Fix") .size(1).and().enumeration("Enu").symbols("Q").and().array().items().intType().and().map().values().longType() @@ -748,27 +768,27 @@ public void testUnionFieldBuild() { } @Test - public void testDefaults() throws IOException { + void defaults() throws IOException { Schema writeSchema = SchemaBuilder.record("r").fields().name("requiredInt").type().intType().noDefault() .name("optionalInt").type().optional().intType().name("nullableIntWithDefault").type().nullable().intType() .intDefault(3).endRecord(); GenericData.Record rec1 = new GenericRecordBuilder(writeSchema).set("requiredInt", 1).build(); - Assert.assertEquals(1, rec1.get("requiredInt")); - Assert.assertEquals(null, rec1.get("optionalInt")); - Assert.assertEquals(3, rec1.get("nullableIntWithDefault")); + assertEquals(1, rec1.get("requiredInt")); + assertNull(rec1.get("optionalInt")); + assertEquals(3, rec1.get("nullableIntWithDefault")); GenericData.Record rec2 = new GenericRecordBuilder(writeSchema).set("requiredInt", 1).set("optionalInt", 2) .set("nullableIntWithDefault", 13).build(); - Assert.assertEquals(1, rec2.get("requiredInt")); - Assert.assertEquals(2, rec2.get("optionalInt")); - Assert.assertEquals(13, rec2.get("nullableIntWithDefault")); + assertEquals(1, rec2.get("requiredInt")); + assertEquals(2, rec2.get("optionalInt")); + assertEquals(13, rec2.get("nullableIntWithDefault")); // write to file - File file = new File(DIR.getRoot().getPath(), "testDefaults.avro"); + File file = new File(DIR.getPath(), "testDefaults.avro"); try (DataFileWriter writer = new DataFileWriter<>(new GenericDatumWriter<>())) { writer.create(writeSchema, file); @@ -785,24 +805,24 @@ public void testDefaults() throws IOException { new GenericDatumReader<>(writeSchema, readSchema))) { GenericData.Record rec1read = reader.iterator().next(); - Assert.assertEquals(1, rec1read.get("requiredInt")); - Assert.assertNull(rec1read.get("optionalInt")); - Assert.assertEquals(3, rec1read.get("nullableIntWithDefault")); - Assert.assertNull(rec1read.get("newOptionalInt")); - Assert.assertEquals(5, rec1read.get("newNullableIntWithDefault")); + assertEquals(1, rec1read.get("requiredInt")); + assertNull(rec1read.get("optionalInt")); + assertEquals(3, rec1read.get("nullableIntWithDefault")); + assertNull(rec1read.get("newOptionalInt")); + assertEquals(5, rec1read.get("newNullableIntWithDefault")); GenericData.Record rec2read = reader.iterator().next(); - Assert.assertEquals(1, rec2read.get("requiredInt")); - Assert.assertEquals(2, rec2read.get("optionalInt")); - Assert.assertEquals(13, rec2read.get("nullableIntWithDefault")); - Assert.assertNull(rec2read.get("newOptionalInt")); - Assert.assertEquals(5, rec2read.get("newNullableIntWithDefault")); + assertEquals(1, rec2read.get("requiredInt")); + assertEquals(2, rec2read.get("optionalInt")); + assertEquals(13, rec2read.get("nullableIntWithDefault")); + assertNull(rec2read.get("newOptionalInt")); + assertEquals(5, rec2read.get("newNullableIntWithDefault")); } } @Test - public void testDefaultTypes() { + void defaultTypes() { Integer intDef = 1; Long longDef = 2L; Float floatDef = 3F; @@ -811,34 +831,71 @@ public void testDefaultTypes() { .type().longType().longDefault(longDef).name("float").type().floatType().floatDefault(floatDef).name("double") .type().doubleType().doubleDefault(doubleDef).endRecord(); - Assert.assertEquals("int field default type or value mismatch", intDef, schema.getField("int").defaultVal()); - Assert.assertEquals("long field default type or value mismatch", longDef, schema.getField("long").defaultVal()); - Assert.assertEquals("float field default type or value mismatch", floatDef, schema.getField("float").defaultVal()); - Assert.assertEquals("double field default type or value mismatch", doubleDef, - schema.getField("double").defaultVal()); + assertEquals(intDef, schema.getField("int").defaultVal(), "int field default type or value mismatch"); + assertEquals(longDef, schema.getField("long").defaultVal(), "long field default type or value mismatch"); + assertEquals(floatDef, schema.getField("float").defaultVal(), "float field default type or value mismatch"); + assertEquals(doubleDef, schema.getField("double").defaultVal(), "double field default type or value mismatch"); } - @Test(expected = AvroRuntimeException.class) - public void testValidateDefaultsEnabled() { - try { - SchemaBuilder.record("ValidationRecord").fields().name("IntegerField").type("int").withDefault("Invalid") - .endRecord(); - } catch (AvroRuntimeException e) { - Assert.assertEquals("Default behavior is to raise an exception due to record having an invalid default", - "Invalid default for field IntegerField: \"Invalid\" not a \"int\"", e.getMessage()); - throw e; - } + @Test + void validateDefaultsEnabled() { + assertThrows(AvroRuntimeException.class, () -> { + try { + SchemaBuilder.record("ValidationRecord").fields().name("IntegerField").type("int").withDefault("Invalid") + .endRecord(); + } catch (AvroRuntimeException e) { + assertEquals("Invalid default for field IntegerField: \"Invalid\" not a \"int\"", e.getMessage(), + "Default behavior is to raise an exception due to record having an invalid default"); + throw e; + } + }); } @Test - public void testValidateDefaultsDisabled() { + void validateDefaultsDisabled() { final String fieldName = "IntegerField"; final String defaultValue = "foo"; Schema schema = SchemaBuilder.record("ValidationRecord").fields().name(fieldName).notValidatingDefaults() .type("int").withDefault(defaultValue) // Would throw an exception on endRecord() if validations enabled .endRecord(); - Assert.assertNull("Differing types, so this returns null", schema.getField(fieldName).defaultVal()); - Assert.assertEquals("Schema is able to be successfully created as is without validation", defaultValue, - schema.getField(fieldName).defaultValue().asText()); + assertNull(schema.getField(fieldName).defaultVal(), "Differing types, so this returns null"); + assertEquals(defaultValue, schema.getField(fieldName).defaultValue().asText(), + "Schema is able to be successfully created as is without validation"); + } + + /** + * https://issues.apache.org/jira/browse/AVRO-1965 + */ + @Test + void namespaceDefaulting() { + Schema d = SchemaBuilder.builder().intType(); + Schema c = SchemaBuilder.record("c").fields().name("d").type(d).noDefault().endRecord(); + Schema b = SchemaBuilder.record("b").fields().name("c").type(c).noDefault().endRecord(); + + Schema a1 = SchemaBuilder.record("default.a").fields().name("b").type(b).noDefault().endRecord(); + Schema a2 = new Schema.Parser().parse(a1.toString()); + + assertEquals(a2, a1); + } + + @Test + void namesAcceptAll() throws InterruptedException { + // Ensure that Schema.setNameValidator won't interfere with others unit tests. + Runnable r = () -> { + Schema.setNameValidator(NameValidator.NO_VALIDATION); + final Schema schema = SchemaBuilder.record("7name").fields().name("123").type(Schema.create(Schema.Type.INT)) + .noDefault().endRecord(); + Assertions.assertNotNull(schema); + Assertions.assertEquals("7name", schema.getName()); + final Schema.Field field = schema.getField("123"); + Assertions.assertEquals("123", field.name()); + }; + + final Throwable[] exception = new Throwable[] { null }; + Thread t = new Thread(r); + t.setUncaughtExceptionHandler((Thread th, Throwable e) -> exception[0] = e); + t.start(); + t.join(); + Assertions.assertNull(exception[0], () -> exception[0].getMessage()); } } diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCommons.java b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCommons.java new file mode 100644 index 00000000000..3997bbafa90 --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCommons.java @@ -0,0 +1,91 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro; + +import java.io.File; +import java.io.IOException; +import java.util.Arrays; +import java.util.stream.Stream; + +import org.apache.avro.file.DataFileReader; +import org.apache.avro.file.DataFileWriter; +import org.apache.avro.generic.GenericDatumReader; +import org.apache.avro.generic.GenericDatumWriter; +import org.apache.avro.generic.GenericRecord; +import org.apache.avro.io.DatumWriter; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + +public class TestSchemaCommons { + private static final Logger LOG = LoggerFactory.getLogger(TestSchemaCommons.class); + + @ParameterizedTest + @MethodSource("sharedFolders") + void runFolder(final File folder) throws IOException { + final File schemaSource = new File(folder, "schema.json"); + final File data = new File(folder, "data.avro"); + + if (!schemaSource.exists()) { + LOG.warn("No 'schema.json' file on folder {}", folder.getPath()); + return; + } + final Schema schema = new Schema.Parser().parse(schemaSource); + assertNotNull(schema); + + if (!data.exists()) { + LOG.warn("No 'data.avro' file on folder {}", folder.getPath()); + return; + } + + // output file + final String rootTest = Thread.currentThread().getContextClassLoader().getResource(".").getPath(); + final File copyData = new File(rootTest, "copy.avro"); + + // Deserialize from disk + DatumWriter datumWriter = new GenericDatumWriter<>(schema); + GenericDatumReader datumReader = new GenericDatumReader<>(schema); + try (DataFileReader dataFileReader = new DataFileReader<>(data, datumReader); + DataFileWriter dataFileWriter = new DataFileWriter<>(datumWriter)) { + dataFileWriter.create(schema, copyData); + GenericRecord record = null; + int counter = 0; + while (dataFileReader.hasNext()) { + record = dataFileReader.next(); + counter++; + assertNotNull(record); + dataFileWriter.append(record); + } + assertTrue(counter > 0, "no data in file"); + } + + // Cleanup + assertTrue(copyData.delete()); + } + + public static Stream sharedFolders() { + File root = new File("target/test-classes/share/test/data/schemas"); + return Arrays.stream(root.listFiles(File::isDirectory)).map(Arguments::of); + } + +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibility.java b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibility.java index 27d47d221c4..275bcfafede 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibility.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibility.java @@ -39,6 +39,9 @@ import static org.apache.avro.TestSchemas.EMPTY_UNION_SCHEMA; import static org.apache.avro.TestSchemas.ENUM1_ABC_SCHEMA; import static org.apache.avro.TestSchemas.ENUM1_AB_SCHEMA; +import static org.apache.avro.TestSchemas.ENUM1_AB_SCHEMA_DEFAULT; +import static org.apache.avro.TestSchemas.ENUM1_AB_SCHEMA_NAMESPACE_1; +import static org.apache.avro.TestSchemas.ENUM1_AB_SCHEMA_NAMESPACE_2; import static org.apache.avro.TestSchemas.ENUM1_BC_SCHEMA; import static org.apache.avro.TestSchemas.ENUM_ABC_ENUM_DEFAULT_A_RECORD; import static org.apache.avro.TestSchemas.ENUM_ABC_ENUM_DEFAULT_A_SCHEMA; @@ -65,6 +68,8 @@ import static org.apache.avro.TestSchemas.LONG_UNION_SCHEMA; import static org.apache.avro.TestSchemas.NS_RECORD1; import static org.apache.avro.TestSchemas.NS_RECORD2; +import static org.apache.avro.TestSchemas.WITH_NS; +import static org.apache.avro.TestSchemas.WITHOUT_NS; import static org.apache.avro.TestSchemas.NULL_SCHEMA; import static org.apache.avro.TestSchemas.ReaderWriter; import static org.apache.avro.TestSchemas.STRING_ARRAY_SCHEMA; @@ -73,9 +78,7 @@ import static org.apache.avro.TestSchemas.STRING_UNION_SCHEMA; import static org.apache.avro.TestSchemas.assertSchemaContains; import static org.apache.avro.TestSchemas.list; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertNull; +import static org.junit.jupiter.api.Assertions.*; import java.io.BufferedReader; import java.io.ByteArrayOutputStream; @@ -100,7 +103,7 @@ import org.apache.avro.io.Encoder; import org.apache.avro.io.EncoderFactory; import org.apache.avro.util.Utf8; -import org.junit.Test; +import org.junit.jupiter.api.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -115,9 +118,9 @@ public class TestSchemaCompatibility { new Schema.Field("oldfield1", INT_SCHEMA, null, null), new Schema.Field("oldfield2", STRING_SCHEMA, null, null))); @Test - public void testValidateSchemaPairMissingField() { + void validateSchemaPairMissingField() { final List readerFields = list(new Schema.Field("oldfield1", INT_SCHEMA, null, null)); - final Schema reader = Schema.createRecord(readerFields); + final Schema reader = Schema.createRecord(null, null, null, false, readerFields); final SchemaCompatibility.SchemaPairCompatibility expectedResult = new SchemaCompatibility.SchemaPairCompatibility( SchemaCompatibility.SchemaCompatibilityResult.compatible(), reader, WRITER_SCHEMA, SchemaCompatibility.READER_WRITER_COMPATIBLE_MESSAGE); @@ -127,9 +130,9 @@ public void testValidateSchemaPairMissingField() { } @Test - public void testValidateSchemaPairMissingSecondField() { + void validateSchemaPairMissingSecondField() { final List readerFields = list(new Schema.Field("oldfield2", STRING_SCHEMA, null, null)); - final Schema reader = Schema.createRecord(readerFields); + final Schema reader = Schema.createRecord(null, null, null, false, readerFields); final SchemaCompatibility.SchemaPairCompatibility expectedResult = new SchemaCompatibility.SchemaPairCompatibility( SchemaCompatibility.SchemaCompatibilityResult.compatible(), reader, WRITER_SCHEMA, SchemaCompatibility.READER_WRITER_COMPATIBLE_MESSAGE); @@ -139,10 +142,10 @@ public void testValidateSchemaPairMissingSecondField() { } @Test - public void testValidateSchemaPairAllFields() { + void validateSchemaPairAllFields() { final List readerFields = list(new Schema.Field("oldfield1", INT_SCHEMA, null, null), new Schema.Field("oldfield2", STRING_SCHEMA, null, null)); - final Schema reader = Schema.createRecord(readerFields); + final Schema reader = Schema.createRecord(null, null, null, false, readerFields); final SchemaCompatibility.SchemaPairCompatibility expectedResult = new SchemaCompatibility.SchemaPairCompatibility( SchemaCompatibility.SchemaCompatibilityResult.compatible(), reader, WRITER_SCHEMA, SchemaCompatibility.READER_WRITER_COMPATIBLE_MESSAGE); @@ -152,10 +155,10 @@ public void testValidateSchemaPairAllFields() { } @Test - public void testValidateSchemaNewFieldWithDefault() { + void validateSchemaNewFieldWithDefault() { final List readerFields = list(new Schema.Field("oldfield1", INT_SCHEMA, null, null), new Schema.Field("newfield1", INT_SCHEMA, null, 42)); - final Schema reader = Schema.createRecord(readerFields); + final Schema reader = Schema.createRecord(null, null, null, false, readerFields); final SchemaCompatibility.SchemaPairCompatibility expectedResult = new SchemaCompatibility.SchemaPairCompatibility( SchemaCompatibility.SchemaCompatibilityResult.compatible(), reader, WRITER_SCHEMA, SchemaCompatibility.READER_WRITER_COMPATIBLE_MESSAGE); @@ -165,10 +168,10 @@ public void testValidateSchemaNewFieldWithDefault() { } @Test - public void testValidateSchemaNewField() { + void validateSchemaNewField() { final List readerFields = list(new Schema.Field("oldfield1", INT_SCHEMA, null, null), new Schema.Field("newfield1", INT_SCHEMA, null, null)); - final Schema reader = Schema.createRecord(readerFields); + final Schema reader = Schema.createRecord(null, null, null, false, readerFields); SchemaPairCompatibility compatibility = checkReaderWriterCompatibility(reader, WRITER_SCHEMA); // Test new field without default value. @@ -184,7 +187,7 @@ public void testValidateSchemaNewField() { } @Test - public void testValidateArrayWriterSchema() { + void validateArrayWriterSchema() { final Schema validReader = Schema.createArray(STRING_SCHEMA); final Schema invalidReader = Schema.createMap(STRING_SCHEMA); final SchemaCompatibility.SchemaPairCompatibility validResult = new SchemaCompatibility.SchemaPairCompatibility( @@ -204,7 +207,7 @@ public void testValidateArrayWriterSchema() { } @Test - public void testValidatePrimitiveWriterSchema() { + void validatePrimitiveWriterSchema() { final Schema validReader = Schema.create(Schema.Type.STRING); final SchemaCompatibility.SchemaPairCompatibility validResult = new SchemaCompatibility.SchemaPairCompatibility( SchemaCompatibility.SchemaCompatibilityResult.compatible(), validReader, STRING_SCHEMA, @@ -225,11 +228,27 @@ public void testValidatePrimitiveWriterSchema() { * Reader union schema must contain all writer union branches. */ @Test - public void testUnionReaderWriterSubsetIncompatibility() { + void unionReaderWriterSubsetIncompatibility() { final Schema unionWriter = Schema.createUnion(list(INT_SCHEMA, STRING_SCHEMA, LONG_SCHEMA)); final Schema unionReader = Schema.createUnion(list(INT_SCHEMA, STRING_SCHEMA)); final SchemaPairCompatibility result = checkReaderWriterCompatibility(unionReader, unionWriter); assertEquals(SchemaCompatibilityType.INCOMPATIBLE, result.getType()); + assertEquals("/2", result.getResult().getIncompatibilities().get(0).getLocation()); + } + + @Test + void unionWriterSimpleReaderIncompatibility() { + Schema mandatorySchema = SchemaBuilder.record("Account").fields().name("age").type().intType().noDefault() + .endRecord(); + Schema optionalSchema = SchemaBuilder.record("Account").fields().optionalInt("age").endRecord(); + + SchemaPairCompatibility compatibility = checkReaderWriterCompatibility(mandatorySchema, optionalSchema); + + assertEquals(SchemaCompatibilityType.INCOMPATIBLE, compatibility.getType()); + + Incompatibility incompatibility = compatibility.getResult().getIncompatibilities().get(0); + assertEquals("reader type: INT not compatible with writer type: NULL", incompatibility.getMessage()); + assertEquals("/fields/0/type/0", incompatibility.getLocation()); } // ----------------------------------------------------------------------------------------------- @@ -259,6 +278,10 @@ public void testUnionReaderWriterSubsetIncompatibility() { new ReaderWriter(INT_MAP_SCHEMA, INT_MAP_SCHEMA), new ReaderWriter(LONG_MAP_SCHEMA, INT_MAP_SCHEMA), new ReaderWriter(ENUM1_AB_SCHEMA, ENUM1_AB_SCHEMA), new ReaderWriter(ENUM1_ABC_SCHEMA, ENUM1_AB_SCHEMA), + new ReaderWriter(ENUM1_AB_SCHEMA_DEFAULT, ENUM1_ABC_SCHEMA), + new ReaderWriter(ENUM1_AB_SCHEMA, ENUM1_AB_SCHEMA_NAMESPACE_1), + new ReaderWriter(ENUM1_AB_SCHEMA_NAMESPACE_1, ENUM1_AB_SCHEMA), + new ReaderWriter(ENUM1_AB_SCHEMA_NAMESPACE_1, ENUM1_AB_SCHEMA_NAMESPACE_2), // String-to/from-bytes, introduced in Avro 1.7.7 new ReaderWriter(STRING_SCHEMA, BYTES_SCHEMA), new ReaderWriter(BYTES_SCHEMA, STRING_SCHEMA), @@ -315,7 +338,7 @@ public void testUnionReaderWriterSubsetIncompatibility() { // This is comparing two records that have an inner array of records with // different namespaces. - new ReaderWriter(NS_RECORD1, NS_RECORD2)); + new ReaderWriter(NS_RECORD1, NS_RECORD2), new ReaderWriter(WITHOUT_NS, WITH_NS)); // ----------------------------------------------------------------------------------------------- @@ -362,14 +385,14 @@ public static void validateIncompatibleSchemas(Schema reader, Schema writer, * Tests reader/writer compatibility validation. */ @Test - public void testReaderWriterCompatibility() { + void readerWriterCompatibility() { for (ReaderWriter readerWriter : COMPATIBLE_READER_WRITER_TEST_CASES) { final Schema reader = readerWriter.getReader(); final Schema writer = readerWriter.getWriter(); LOG.debug("Testing compatibility of reader {} with writer {}.", reader, writer); final SchemaPairCompatibility result = checkReaderWriterCompatibility(reader, writer); - assertEquals(String.format("Expecting reader %s to be compatible with writer %s, but tested incompatible.", - reader, writer), SchemaCompatibilityType.COMPATIBLE, result.getType()); + assertEquals(SchemaCompatibilityType.COMPATIBLE, result.getType(), String + .format("Expecting reader %s to be compatible with writer %s, but tested incompatible.", reader, writer)); } } @@ -460,7 +483,7 @@ ENUM_AB_ENUM_DEFAULT_A_SCHEMA, new EnumSymbol(ENUM_AB_ENUM_DEFAULT_A_SCHEMA, "A" * Tests the reader/writer compatibility at decoding time. */ @Test - public void testReaderWriterDecodingCompatibility() throws Exception { + void readerWriterDecodingCompatibility() throws Exception { for (DecodingTestCase testCase : DECODING_COMPATIBILITY_TEST_CASES) { final Schema readerSchema = testCase.getReaderSchema(); final Schema writerSchema = testCase.getWriterSchema(); @@ -483,10 +506,11 @@ public void testReaderWriterDecodingCompatibility() throws Exception { final DatumReader datumReader = new GenericDatumReader<>(readerSchema); final Object decodedDatum = datumReader.read(null, decoder); - assertEquals(String.format( - "Expecting decoded value %s when decoding value %s whose writer schema is %s " - + "using reader schema %s, but value was %s.", - expectedDecodedDatum, datum, writerSchema, readerSchema, decodedDatum), expectedDecodedDatum, decodedDatum); + assertEquals(expectedDecodedDatum, decodedDatum, + String.format( + "Expecting decoded value %s when decoding value %s whose writer schema is %s " + + "using reader schema %s, but value was %s.", + expectedDecodedDatum, datum, writerSchema, readerSchema, decodedDatum)); } } @@ -500,7 +524,7 @@ private Schema readSchemaFromResources(String name) throws IOException { } @Test - public void checkResolvingDecoder() throws IOException { + void checkResolvingDecoder() throws IOException { final Schema locationSchema = readSchemaFromResources("schema-location.json"); final Schema writeSchema = readSchemaFromResources("schema-location-write.json"); diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibilityEnumDefaults.java b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibilityEnumDefaults.java index 33fc5ce3d33..ce701af12c0 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibilityEnumDefaults.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibilityEnumDefaults.java @@ -22,7 +22,8 @@ import static org.apache.avro.TestSchemas.ENUM2_AB_SCHEMA; import static org.apache.avro.TestSchemas.ENUM_ABC_ENUM_DEFAULT_A_SCHEMA; import static org.apache.avro.TestSchemas.ENUM_AB_ENUM_DEFAULT_A_SCHEMA; -import static org.junit.Assert.assertEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; import java.io.ByteArrayOutputStream; import org.apache.avro.generic.GenericData; @@ -35,18 +36,12 @@ import org.apache.avro.io.DecoderFactory; import org.apache.avro.io.Encoder; import org.apache.avro.io.EncoderFactory; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.ExpectedException; +import org.junit.jupiter.api.Test; public class TestSchemaCompatibilityEnumDefaults { - @Rule - public ExpectedException expectedException = ExpectedException.none(); @Test - public void testEnumDefaultNotAppliedWhenWriterFieldMissing() throws Exception { - expectedException.expect(AvroTypeException.class); - expectedException.expectMessage("Found Record1, expecting Record1, missing required field field1"); + void enumDefaultNotAppliedWhenWriterFieldMissing() throws Exception { Schema writerSchema = SchemaBuilder.record("Record1").fields().name("field2").type(ENUM2_AB_SCHEMA).noDefault() .endRecord(); @@ -56,11 +51,13 @@ public void testEnumDefaultNotAppliedWhenWriterFieldMissing() throws Exception { GenericRecord datum = new GenericData.Record(writerSchema); datum.put("field2", new GenericData.EnumSymbol(writerSchema, "B")); - serializeWithWriterThenDeserializeWithReader(writerSchema, datum, readerSchema); + AvroTypeException avroTypeException = assertThrows(AvroTypeException.class, + () -> serializeWithWriterThenDeserializeWithReader(writerSchema, datum, readerSchema)); + assertEquals("Found Record1, expecting Record1, missing required field field1", avroTypeException.getMessage()); } @Test - public void testEnumDefaultAppliedWhenNoFieldDefaultDefined() throws Exception { + void enumDefaultAppliedWhenNoFieldDefaultDefined() throws Exception { Schema writerSchema = SchemaBuilder.record("Record1").fields().name("field1").type(ENUM_ABC_ENUM_DEFAULT_A_SCHEMA) .noDefault().endRecord(); @@ -75,7 +72,7 @@ public void testEnumDefaultAppliedWhenNoFieldDefaultDefined() throws Exception { } @Test - public void testEnumDefaultNotAppliedWhenCompatibleSymbolIsFound() throws Exception { + void enumDefaultNotAppliedWhenCompatibleSymbolIsFound() throws Exception { Schema writerSchema = SchemaBuilder.record("Record1").fields().name("field1").type(ENUM_ABC_ENUM_DEFAULT_A_SCHEMA) .noDefault().endRecord(); @@ -89,7 +86,7 @@ public void testEnumDefaultNotAppliedWhenCompatibleSymbolIsFound() throws Except } @Test - public void testEnumDefaultAppliedWhenFieldDefaultDefined() throws Exception { + void enumDefaultAppliedWhenFieldDefaultDefined() throws Exception { Schema writerSchema = SchemaBuilder.record("Record1").fields().name("field1").type(ENUM_ABC_ENUM_DEFAULT_A_SCHEMA) .noDefault().endRecord(); @@ -104,10 +101,7 @@ public void testEnumDefaultAppliedWhenFieldDefaultDefined() throws Exception { } @Test - public void testFieldDefaultNotAppliedForUnknownSymbol() throws Exception { - expectedException.expect(AvroTypeException.class); - expectedException.expectMessage("No match for C"); - + void fieldDefaultNotAppliedForUnknownSymbol() throws Exception { Schema writerSchema = SchemaBuilder.record("Record1").fields().name("field1").type(ENUM1_ABC_SCHEMA).noDefault() .endRecord(); Schema readerSchema = SchemaBuilder.record("Record1").fields().name("field1").type(ENUM1_AB_SCHEMA).withDefault("A") @@ -115,7 +109,9 @@ public void testFieldDefaultNotAppliedForUnknownSymbol() throws Exception { GenericRecord datum = new GenericData.Record(writerSchema); datum.put("field1", new GenericData.EnumSymbol(writerSchema, "C")); - serializeWithWriterThenDeserializeWithReader(writerSchema, datum, readerSchema); + AvroTypeException avroTypeException = assertThrows(AvroTypeException.class, + () -> serializeWithWriterThenDeserializeWithReader(writerSchema, datum, readerSchema)); + assertEquals("No match for C", avroTypeException.getMessage()); } private GenericRecord serializeWithWriterThenDeserializeWithReader(Schema writerSchema, GenericRecord datum, diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibilityFixedSizeMismatch.java b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibilityFixedSizeMismatch.java index 6ac3c68dc03..05321527cb4 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibilityFixedSizeMismatch.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibilityFixedSizeMismatch.java @@ -17,44 +17,34 @@ */ package org.apache.avro; -import static org.apache.avro.TestSchemaCompatibility.validateIncompatibleSchemas; -import static org.apache.avro.TestSchemas.*; +import org.apache.avro.SchemaCompatibility.SchemaIncompatibilityType; -import java.util.Arrays; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; -import org.apache.avro.SchemaCompatibility.SchemaIncompatibilityType; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; -import org.junit.runners.Parameterized.Parameter; -import org.junit.runners.Parameterized.Parameters; +import java.util.stream.Stream; + +import static org.apache.avro.TestSchemaCompatibility.validateIncompatibleSchemas; +import static org.apache.avro.TestSchemas.A_DINT_B_DFIXED_4_BYTES_RECORD1; +import static org.apache.avro.TestSchemas.A_DINT_B_DFIXED_8_BYTES_RECORD1; +import static org.apache.avro.TestSchemas.FIXED_4_BYTES; +import static org.apache.avro.TestSchemas.FIXED_8_BYTES; -@RunWith(Parameterized.class) public class TestSchemaCompatibilityFixedSizeMismatch { - @Parameters(name = "r: {0} | w: {1}") - public static Iterable data() { - Object[][] fields = { // - { FIXED_4_BYTES, FIXED_8_BYTES, "expected: 8, found: 4", "/size" }, - { FIXED_8_BYTES, FIXED_4_BYTES, "expected: 4, found: 8", "/size" }, - { A_DINT_B_DFIXED_8_BYTES_RECORD1, A_DINT_B_DFIXED_4_BYTES_RECORD1, "expected: 4, found: 8", - "/fields/1/type/size" }, - { A_DINT_B_DFIXED_4_BYTES_RECORD1, A_DINT_B_DFIXED_8_BYTES_RECORD1, "expected: 8, found: 4", - "/fields/1/type/size" }, }; - return Arrays.asList(fields); + public static Stream data() { + return Stream.of(Arguments.of(FIXED_4_BYTES, FIXED_8_BYTES, "expected: 8, found: 4", "/size"), + Arguments.of(FIXED_8_BYTES, FIXED_4_BYTES, "expected: 4, found: 8", "/size"), + Arguments.of(A_DINT_B_DFIXED_8_BYTES_RECORD1, A_DINT_B_DFIXED_4_BYTES_RECORD1, "expected: 4, found: 8", + "/fields/1/type/size"), + Arguments.of(A_DINT_B_DFIXED_4_BYTES_RECORD1, A_DINT_B_DFIXED_8_BYTES_RECORD1, "expected: 8, found: 4", + "/fields/1/type/size")); } - @Parameter(0) - public Schema reader; - @Parameter(1) - public Schema writer; - @Parameter(2) - public String details; - @Parameter(3) - public String location; - - @Test - public void testFixedSizeMismatchSchemas() throws Exception { + @ParameterizedTest + @MethodSource("data") + void fixedSizeMismatchSchemas(Schema reader, Schema writer, String details, String location) { validateIncompatibleSchemas(reader, writer, SchemaIncompatibilityType.FIXED_SIZE_MISMATCH, details, location); } } diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibilityMissingEnumSymbols.java b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibilityMissingEnumSymbols.java index 82b70fe2443..63d607cd596 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibilityMissingEnumSymbols.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibilityMissingEnumSymbols.java @@ -17,19 +17,19 @@ */ package org.apache.avro; -import static org.apache.avro.TestSchemaCompatibility.validateIncompatibleSchemas; -import static org.apache.avro.TestSchemas.*; +import org.apache.avro.SchemaCompatibility.SchemaIncompatibilityType; -import java.util.Arrays; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; -import org.apache.avro.SchemaCompatibility.SchemaIncompatibilityType; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; -import org.junit.runners.Parameterized.Parameter; -import org.junit.runners.Parameterized.Parameters; +import java.util.stream.Stream; + +import static org.apache.avro.TestSchemaCompatibility.validateIncompatibleSchemas; +import static org.apache.avro.TestSchemas.ENUM1_ABC_SCHEMA; +import static org.apache.avro.TestSchemas.ENUM1_AB_SCHEMA; +import static org.apache.avro.TestSchemas.ENUM1_BC_SCHEMA; -@RunWith(Parameterized.class) public class TestSchemaCompatibilityMissingEnumSymbols { private static final Schema RECORD1_WITH_ENUM_AB = SchemaBuilder.record("Record1").fields() // @@ -39,26 +39,15 @@ public class TestSchemaCompatibilityMissingEnumSymbols { .name("field1").type(ENUM1_ABC_SCHEMA).noDefault() // .endRecord(); - @Parameters(name = "r: {0} | w: {1}") - public static Iterable data() { - Object[][] fields = { // - { ENUM1_AB_SCHEMA, ENUM1_ABC_SCHEMA, "[C]", "/symbols" }, - { ENUM1_BC_SCHEMA, ENUM1_ABC_SCHEMA, "[A]", "/symbols" }, - { RECORD1_WITH_ENUM_AB, RECORD1_WITH_ENUM_ABC, "[C]", "/fields/0/type/symbols" } }; - return Arrays.asList(fields); + public static Stream data() { + return Stream.of(Arguments.of(ENUM1_AB_SCHEMA, ENUM1_ABC_SCHEMA, "[C]", "/symbols"), + Arguments.of(ENUM1_BC_SCHEMA, ENUM1_ABC_SCHEMA, "[A]", "/symbols"), + Arguments.of(RECORD1_WITH_ENUM_AB, RECORD1_WITH_ENUM_ABC, "[C]", "/fields/0/type/symbols")); } - @Parameter(0) - public Schema reader; - @Parameter(1) - public Schema writer; - @Parameter(2) - public String details; - @Parameter(3) - public String location; - - @Test - public void testTypeMismatchSchemas() throws Exception { + @ParameterizedTest + @MethodSource("data") + public void testTypeMismatchSchemas(Schema reader, Schema writer, String details, String location) { validateIncompatibleSchemas(reader, writer, SchemaIncompatibilityType.MISSING_ENUM_SYMBOLS, details, location); } } diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibilityMissingUnionBranch.java b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibilityMissingUnionBranch.java index 4f947690009..3e84a5337c9 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibilityMissingUnionBranch.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibilityMissingUnionBranch.java @@ -17,22 +17,40 @@ */ package org.apache.avro; -import static java.util.Arrays.asList; -import static org.apache.avro.TestSchemaCompatibility.validateIncompatibleSchemas; -import static org.apache.avro.TestSchemas.*; - -import java.util.Arrays; -import java.util.Collections; -import java.util.List; - import org.apache.avro.SchemaCompatibility.SchemaIncompatibilityType; -import org.junit.Test; + +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; -import org.junit.runners.Parameterized.Parameter; import org.junit.runners.Parameterized.Parameters; -@RunWith(Parameterized.class) +import java.util.Collections; +import java.util.List; +import java.util.stream.Stream; + +import static java.util.Arrays.asList; +import static org.apache.avro.TestSchemaCompatibility.validateIncompatibleSchemas; +import static org.apache.avro.TestSchemas.A_DINT_B_DINT_STRING_UNION_RECORD1; +import static org.apache.avro.TestSchemas.A_DINT_B_DINT_UNION_RECORD1; +import static org.apache.avro.TestSchemas.BOOLEAN_SCHEMA; +import static org.apache.avro.TestSchemas.BYTES_UNION_SCHEMA; +import static org.apache.avro.TestSchemas.DOUBLE_UNION_SCHEMA; +import static org.apache.avro.TestSchemas.ENUM1_AB_SCHEMA; +import static org.apache.avro.TestSchemas.FIXED_4_BYTES; +import static org.apache.avro.TestSchemas.FLOAT_UNION_SCHEMA; +import static org.apache.avro.TestSchemas.INT_ARRAY_SCHEMA; +import static org.apache.avro.TestSchemas.INT_LONG_FLOAT_DOUBLE_UNION_SCHEMA; +import static org.apache.avro.TestSchemas.INT_MAP_SCHEMA; +import static org.apache.avro.TestSchemas.INT_SCHEMA; +import static org.apache.avro.TestSchemas.INT_STRING_UNION_SCHEMA; +import static org.apache.avro.TestSchemas.INT_UNION_SCHEMA; +import static org.apache.avro.TestSchemas.LONG_UNION_SCHEMA; +import static org.apache.avro.TestSchemas.NULL_SCHEMA; +import static org.apache.avro.TestSchemas.STRING_UNION_SCHEMA; +import static org.apache.avro.TestSchemas.list; + public class TestSchemaCompatibilityMissingUnionBranch { private static final Schema RECORD1_WITH_INT = SchemaBuilder.record("Record1").fields() // @@ -50,61 +68,52 @@ public class TestSchemaCompatibilityMissingUnionBranch { private static final Schema UNION_INT_MAP_INT = Schema.createUnion(list(INT_SCHEMA, INT_MAP_SCHEMA)); private static final Schema UNION_INT_NULL = Schema.createUnion(list(INT_SCHEMA, NULL_SCHEMA)); - @Parameters(name = "r: {0} | w: {1}") - public static Iterable data() { - Object[][] fields = { // - { INT_UNION_SCHEMA, INT_STRING_UNION_SCHEMA, - Collections.singletonList("reader union lacking writer type: STRING"), Collections.singletonList("/1") }, - { STRING_UNION_SCHEMA, INT_STRING_UNION_SCHEMA, - Collections.singletonList("reader union lacking writer type: INT"), Collections.singletonList("/0") }, - { INT_UNION_SCHEMA, UNION_INT_RECORD1, Collections.singletonList("reader union lacking writer type: RECORD"), - Collections.singletonList("/1") }, - { INT_UNION_SCHEMA, UNION_INT_RECORD2, Collections.singletonList("reader union lacking writer type: RECORD"), - Collections.singletonList("/1") }, + public static Stream data() { + return Stream.of( // + Arguments.of(INT_UNION_SCHEMA, INT_STRING_UNION_SCHEMA, + Collections.singletonList("reader union lacking writer type: STRING"), Collections.singletonList("/1")), + Arguments.of(STRING_UNION_SCHEMA, INT_STRING_UNION_SCHEMA, + Collections.singletonList("reader union lacking writer type: INT"), Collections.singletonList("/0")), + Arguments.of(INT_UNION_SCHEMA, UNION_INT_RECORD1, + Collections.singletonList("reader union lacking writer type: RECORD"), Collections.singletonList("/1")), + Arguments.of(INT_UNION_SCHEMA, UNION_INT_RECORD2, + Collections.singletonList("reader union lacking writer type: RECORD"), Collections.singletonList("/1")), // more info in the subset schemas - { UNION_INT_RECORD1, UNION_INT_RECORD2, Collections.singletonList("reader union lacking writer type: RECORD"), - Collections.singletonList("/1") }, - { INT_UNION_SCHEMA, UNION_INT_ENUM1_AB, Collections.singletonList("reader union lacking writer type: ENUM"), - Collections.singletonList("/1") }, - { INT_UNION_SCHEMA, UNION_INT_FIXED_4_BYTES, - Collections.singletonList("reader union lacking writer type: FIXED"), Collections.singletonList("/1") }, - { INT_UNION_SCHEMA, UNION_INT_BOOLEAN, Collections.singletonList("reader union lacking writer type: BOOLEAN"), - Collections.singletonList("/1") }, - { INT_UNION_SCHEMA, LONG_UNION_SCHEMA, Collections.singletonList("reader union lacking writer type: LONG"), - Collections.singletonList("/0") }, - { INT_UNION_SCHEMA, FLOAT_UNION_SCHEMA, Collections.singletonList("reader union lacking writer type: FLOAT"), - Collections.singletonList("/0") }, - { INT_UNION_SCHEMA, DOUBLE_UNION_SCHEMA, Collections.singletonList("reader union lacking writer type: DOUBLE"), - Collections.singletonList("/0") }, - { INT_UNION_SCHEMA, BYTES_UNION_SCHEMA, Collections.singletonList("reader union lacking writer type: BYTES"), - Collections.singletonList("/0") }, - { INT_UNION_SCHEMA, UNION_INT_ARRAY_INT, Collections.singletonList("reader union lacking writer type: ARRAY"), - Collections.singletonList("/1") }, - { INT_UNION_SCHEMA, UNION_INT_MAP_INT, Collections.singletonList("reader union lacking writer type: MAP"), - Collections.singletonList("/1") }, - { INT_UNION_SCHEMA, UNION_INT_NULL, Collections.singletonList("reader union lacking writer type: NULL"), - Collections.singletonList("/1") }, - { INT_UNION_SCHEMA, INT_LONG_FLOAT_DOUBLE_UNION_SCHEMA, + Arguments.of(UNION_INT_RECORD1, UNION_INT_RECORD2, + Collections.singletonList("reader union lacking writer type: RECORD"), Collections.singletonList("/1")), + Arguments.of(INT_UNION_SCHEMA, UNION_INT_ENUM1_AB, + Collections.singletonList("reader union lacking writer type: ENUM"), Collections.singletonList("/1")), + Arguments.of(INT_UNION_SCHEMA, UNION_INT_FIXED_4_BYTES, + Collections.singletonList("reader union lacking writer type: FIXED"), Collections.singletonList("/1")), + Arguments.of(INT_UNION_SCHEMA, UNION_INT_BOOLEAN, + Collections.singletonList("reader union lacking writer type: BOOLEAN"), Collections.singletonList("/1")), + Arguments.of(INT_UNION_SCHEMA, LONG_UNION_SCHEMA, + Collections.singletonList("reader union lacking writer type: LONG"), Collections.singletonList("/0")), + Arguments.of(INT_UNION_SCHEMA, FLOAT_UNION_SCHEMA, + Collections.singletonList("reader union lacking writer type: FLOAT"), Collections.singletonList("/0")), + Arguments.of(INT_UNION_SCHEMA, DOUBLE_UNION_SCHEMA, + Collections.singletonList("reader union lacking writer type: DOUBLE"), Collections.singletonList("/0")), + Arguments.of(INT_UNION_SCHEMA, BYTES_UNION_SCHEMA, + Collections.singletonList("reader union lacking writer type: BYTES"), Collections.singletonList("/0")), + Arguments.of(INT_UNION_SCHEMA, UNION_INT_ARRAY_INT, + Collections.singletonList("reader union lacking writer type: ARRAY"), Collections.singletonList("/1")), + Arguments.of(INT_UNION_SCHEMA, UNION_INT_MAP_INT, + Collections.singletonList("reader union lacking writer type: MAP"), Collections.singletonList("/1")), + Arguments.of(INT_UNION_SCHEMA, UNION_INT_NULL, + Collections.singletonList("reader union lacking writer type: NULL"), Collections.singletonList("/1")), + Arguments.of(INT_UNION_SCHEMA, INT_LONG_FLOAT_DOUBLE_UNION_SCHEMA, asList("reader union lacking writer type: LONG", "reader union lacking writer type: FLOAT", "reader union lacking writer type: DOUBLE"), - asList("/1", "/2", "/3") }, - { A_DINT_B_DINT_UNION_RECORD1, A_DINT_B_DINT_STRING_UNION_RECORD1, + asList("/1", "/2", "/3")), + Arguments.of(A_DINT_B_DINT_UNION_RECORD1, A_DINT_B_DINT_STRING_UNION_RECORD1, Collections.singletonList("reader union lacking writer type: STRING"), - Collections.singletonList("/fields/1/type/1") } }; - return Arrays.asList(fields); + Collections.singletonList("/fields/1/type/1"))); } - @Parameter(0) - public Schema reader; - @Parameter(1) - public Schema writer; - @Parameter(2) - public List details; - @Parameter(3) - public List location; - - @Test - public void testMissingUnionBranch() throws Exception { + @ParameterizedTest + @MethodSource("data") + public void testMissingUnionBranch(Schema reader, Schema writer, List details, List location) + throws Exception { List types = Collections.nCopies(details.size(), SchemaIncompatibilityType.MISSING_UNION_BRANCH); validateIncompatibleSchemas(reader, writer, types, details, location); diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibilityMultiple.java b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibilityMultiple.java index 23946755b85..456e4b9d178 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibilityMultiple.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibilityMultiple.java @@ -23,12 +23,12 @@ import java.util.List; import org.apache.avro.SchemaCompatibility.SchemaIncompatibilityType; -import org.junit.Test; +import org.junit.jupiter.api.Test; public class TestSchemaCompatibilityMultiple { @Test - public void testMultipleIncompatibilities() throws Exception { + void multipleIncompatibilities() throws Exception { Schema reader = SchemaBuilder.record("base").fields() // 0 .name("check_enum_symbols_field").type().enumeration("check_enum_symbols_type").symbols("A", "C").noDefault() diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibilityNameMismatch.java b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibilityNameMismatch.java index 83c89ab7b76..d20561faae8 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibilityNameMismatch.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibilityNameMismatch.java @@ -17,44 +17,37 @@ */ package org.apache.avro; -import static org.apache.avro.TestSchemaCompatibility.validateIncompatibleSchemas; -import static org.apache.avro.TestSchemas.*; +import org.apache.avro.SchemaCompatibility.SchemaIncompatibilityType; -import java.util.Arrays; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; -import org.apache.avro.SchemaCompatibility.SchemaIncompatibilityType; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; -import org.junit.runners.Parameterized.Parameter; -import org.junit.runners.Parameterized.Parameters; +import java.util.stream.Stream; + +import static org.apache.avro.TestSchemaCompatibility.validateIncompatibleSchemas; +import static org.apache.avro.TestSchemas.A_DINT_B_DENUM_1_RECORD1; +import static org.apache.avro.TestSchemas.A_DINT_B_DENUM_2_RECORD1; +import static org.apache.avro.TestSchemas.EMPTY_RECORD1; +import static org.apache.avro.TestSchemas.EMPTY_RECORD2; +import static org.apache.avro.TestSchemas.ENUM1_AB_SCHEMA; +import static org.apache.avro.TestSchemas.ENUM2_AB_SCHEMA; +import static org.apache.avro.TestSchemas.FIXED_4_BYTES; -@RunWith(Parameterized.class) public class TestSchemaCompatibilityNameMismatch { private static final Schema FIXED_4_ANOTHER_NAME = Schema.createFixed("AnotherName", null, null, 4); - @Parameters(name = "r: {0} | w: {1}") - public static Iterable data() { - Object[][] fields = { // - { ENUM1_AB_SCHEMA, ENUM2_AB_SCHEMA, "expected: Enum2", "/name" }, - { EMPTY_RECORD2, EMPTY_RECORD1, "expected: Record1", "/name" }, - { FIXED_4_BYTES, FIXED_4_ANOTHER_NAME, "expected: AnotherName", "/name" }, - { A_DINT_B_DENUM_1_RECORD1, A_DINT_B_DENUM_2_RECORD1, "expected: Enum2", "/fields/1/type/name" } }; - return Arrays.asList(fields); + public static Stream data() { + return Stream.of(Arguments.of(ENUM1_AB_SCHEMA, ENUM2_AB_SCHEMA, "expected: Enum2", "/name"), + Arguments.of(EMPTY_RECORD2, EMPTY_RECORD1, "expected: Record1", "/name"), + Arguments.of(FIXED_4_BYTES, FIXED_4_ANOTHER_NAME, "expected: AnotherName", "/name"), + Arguments.of(A_DINT_B_DENUM_1_RECORD1, A_DINT_B_DENUM_2_RECORD1, "expected: Enum2", "/fields/1/type/name")); } - @Parameter(0) - public Schema reader; - @Parameter(1) - public Schema writer; - @Parameter(2) - public String details; - @Parameter(3) - public String location; - - @Test - public void testNameMismatchSchemas() throws Exception { + @ParameterizedTest + @MethodSource("data") + public void testNameMismatchSchemas(Schema reader, Schema writer, String details, String location) throws Exception { validateIncompatibleSchemas(reader, writer, SchemaIncompatibilityType.NAME_MISMATCH, details, location); } } diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibilityReaderFieldMissingDefaultValue.java b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibilityReaderFieldMissingDefaultValue.java index d367caed941..7a21c1a5fcd 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibilityReaderFieldMissingDefaultValue.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibilityReaderFieldMissingDefaultValue.java @@ -17,38 +17,29 @@ */ package org.apache.avro; -import static org.apache.avro.TestSchemaCompatibility.validateIncompatibleSchemas; -import static org.apache.avro.TestSchemas.*; +import org.apache.avro.SchemaCompatibility.SchemaIncompatibilityType; -import java.util.Arrays; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; -import org.apache.avro.SchemaCompatibility.SchemaIncompatibilityType; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; -import org.junit.runners.Parameterized.Parameter; -import org.junit.runners.Parameterized.Parameters; +import java.util.stream.Stream; + +import static org.apache.avro.TestSchemaCompatibility.validateIncompatibleSchemas; +import static org.apache.avro.TestSchemas.A_INT_B_DINT_RECORD1; +import static org.apache.avro.TestSchemas.A_INT_RECORD1; +import static org.apache.avro.TestSchemas.EMPTY_RECORD1; -@RunWith(Parameterized.class) public class TestSchemaCompatibilityReaderFieldMissingDefaultValue { - @Parameters(name = "r: {0} | w: {1}") - public static Iterable data() { - Object[][] fields = { // - { A_INT_RECORD1, EMPTY_RECORD1, "a", "/fields/0" }, { A_INT_B_DINT_RECORD1, EMPTY_RECORD1, "a", "/fields/0" } }; - return Arrays.asList(fields); - } - @Parameter(0) - public Schema reader; - @Parameter(1) - public Schema writer; - @Parameter(2) - public String details; - @Parameter(3) - public String location; + public static Stream data() { + return Stream.of(Arguments.of(A_INT_RECORD1, EMPTY_RECORD1, "a", "/fields/0"), + Arguments.of(A_INT_B_DINT_RECORD1, EMPTY_RECORD1, "a", "/fields/0")); + } - @Test - public void testReaderFieldMissingDefaultValueSchemas() throws Exception { + @ParameterizedTest + @MethodSource("data") + public void testReaderFieldMissingDefaultValueSchemas(Schema reader, Schema writer, String details, String location) { validateIncompatibleSchemas(reader, writer, SchemaIncompatibilityType.READER_FIELD_MISSING_DEFAULT_VALUE, details, location); } diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibilityTypeMismatch.java b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibilityTypeMismatch.java index 63dd3ac11a7..247e40404ba 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibilityTypeMismatch.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibilityTypeMismatch.java @@ -17,82 +17,94 @@ */ package org.apache.avro; -import static org.apache.avro.TestSchemaCompatibility.validateIncompatibleSchemas; -import static org.apache.avro.TestSchemas.*; +import org.apache.avro.SchemaCompatibility.SchemaIncompatibilityType; -import java.util.Arrays; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; -import org.apache.avro.SchemaCompatibility.SchemaIncompatibilityType; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; -import org.junit.runners.Parameterized.Parameter; -import org.junit.runners.Parameterized.Parameters; +import java.util.stream.Stream; + +import static org.apache.avro.TestSchemaCompatibility.validateIncompatibleSchemas; +import static org.apache.avro.TestSchemas.A_INT_RECORD1; +import static org.apache.avro.TestSchemas.BOOLEAN_SCHEMA; +import static org.apache.avro.TestSchemas.BYTES_SCHEMA; +import static org.apache.avro.TestSchemas.DOUBLE_SCHEMA; +import static org.apache.avro.TestSchemas.ENUM2_AB_SCHEMA; +import static org.apache.avro.TestSchemas.FIXED_4_BYTES; +import static org.apache.avro.TestSchemas.FLOAT_SCHEMA; +import static org.apache.avro.TestSchemas.INT_ARRAY_SCHEMA; +import static org.apache.avro.TestSchemas.INT_FLOAT_UNION_SCHEMA; +import static org.apache.avro.TestSchemas.INT_LIST_RECORD; +import static org.apache.avro.TestSchemas.INT_LONG_FLOAT_DOUBLE_UNION_SCHEMA; +import static org.apache.avro.TestSchemas.INT_MAP_SCHEMA; +import static org.apache.avro.TestSchemas.INT_SCHEMA; +import static org.apache.avro.TestSchemas.LONG_ARRAY_SCHEMA; +import static org.apache.avro.TestSchemas.LONG_LIST_RECORD; +import static org.apache.avro.TestSchemas.LONG_MAP_SCHEMA; +import static org.apache.avro.TestSchemas.LONG_SCHEMA; +import static org.apache.avro.TestSchemas.NULL_SCHEMA; +import static org.apache.avro.TestSchemas.STRING_SCHEMA; -@RunWith(Parameterized.class) public class TestSchemaCompatibilityTypeMismatch { - @Parameters(name = "r: {0} | w: {1}") - public static Iterable data() { - Object[][] fields = { // - { NULL_SCHEMA, INT_SCHEMA, "reader type: NULL not compatible with writer type: INT", "/" }, - { NULL_SCHEMA, LONG_SCHEMA, "reader type: NULL not compatible with writer type: LONG", "/" }, - { BOOLEAN_SCHEMA, INT_SCHEMA, "reader type: BOOLEAN not compatible with writer type: INT", "/" }, + public static Stream data() { + return Stream.of( + Arguments.of(NULL_SCHEMA, INT_SCHEMA, "reader type: NULL not compatible with writer type: INT", "/"), + Arguments.of(NULL_SCHEMA, LONG_SCHEMA, "reader type: NULL not compatible with writer type: LONG", "/"), + + Arguments.of(BOOLEAN_SCHEMA, INT_SCHEMA, "reader type: BOOLEAN not compatible with writer type: INT", "/"), - { INT_SCHEMA, NULL_SCHEMA, "reader type: INT not compatible with writer type: NULL", "/" }, - { INT_SCHEMA, BOOLEAN_SCHEMA, "reader type: INT not compatible with writer type: BOOLEAN", "/" }, - { INT_SCHEMA, LONG_SCHEMA, "reader type: INT not compatible with writer type: LONG", "/" }, - { INT_SCHEMA, FLOAT_SCHEMA, "reader type: INT not compatible with writer type: FLOAT", "/" }, - { INT_SCHEMA, DOUBLE_SCHEMA, "reader type: INT not compatible with writer type: DOUBLE", "/" }, + Arguments.of(INT_SCHEMA, NULL_SCHEMA, "reader type: INT not compatible with writer type: NULL", "/"), + Arguments.of(INT_SCHEMA, BOOLEAN_SCHEMA, "reader type: INT not compatible with writer type: BOOLEAN", "/"), + Arguments.of(INT_SCHEMA, LONG_SCHEMA, "reader type: INT not compatible with writer type: LONG", "/"), + Arguments.of(INT_SCHEMA, FLOAT_SCHEMA, "reader type: INT not compatible with writer type: FLOAT", "/"), + Arguments.of(INT_SCHEMA, DOUBLE_SCHEMA, "reader type: INT not compatible with writer type: DOUBLE", "/"), - { LONG_SCHEMA, FLOAT_SCHEMA, "reader type: LONG not compatible with writer type: FLOAT", "/" }, - { LONG_SCHEMA, DOUBLE_SCHEMA, "reader type: LONG not compatible with writer type: DOUBLE", "/" }, + Arguments.of(LONG_SCHEMA, FLOAT_SCHEMA, "reader type: LONG not compatible with writer type: FLOAT", "/"), + Arguments.of(LONG_SCHEMA, DOUBLE_SCHEMA, "reader type: LONG not compatible with writer type: DOUBLE", "/"), - { FLOAT_SCHEMA, DOUBLE_SCHEMA, "reader type: FLOAT not compatible with writer type: DOUBLE", "/" }, + Arguments.of(FLOAT_SCHEMA, DOUBLE_SCHEMA, "reader type: FLOAT not compatible with writer type: DOUBLE", "/"), - { DOUBLE_SCHEMA, STRING_SCHEMA, "reader type: DOUBLE not compatible with writer type: STRING", "/" }, + Arguments.of(DOUBLE_SCHEMA, STRING_SCHEMA, "reader type: DOUBLE not compatible with writer type: STRING", "/"), - { FIXED_4_BYTES, STRING_SCHEMA, "reader type: FIXED not compatible with writer type: STRING", "/" }, + Arguments.of(FIXED_4_BYTES, STRING_SCHEMA, "reader type: FIXED not compatible with writer type: STRING", "/"), - { STRING_SCHEMA, BOOLEAN_SCHEMA, "reader type: STRING not compatible with writer type: BOOLEAN", "/" }, - { STRING_SCHEMA, INT_SCHEMA, "reader type: STRING not compatible with writer type: INT", "/" }, + Arguments.of(STRING_SCHEMA, BOOLEAN_SCHEMA, "reader type: STRING not compatible with writer type: BOOLEAN", + "/"), + Arguments.of(STRING_SCHEMA, INT_SCHEMA, "reader type: STRING not compatible with writer type: INT", "/"), - { BYTES_SCHEMA, NULL_SCHEMA, "reader type: BYTES not compatible with writer type: NULL", "/" }, - { BYTES_SCHEMA, INT_SCHEMA, "reader type: BYTES not compatible with writer type: INT", "/" }, + Arguments.of(BYTES_SCHEMA, NULL_SCHEMA, "reader type: BYTES not compatible with writer type: NULL", "/"), + Arguments.of(BYTES_SCHEMA, INT_SCHEMA, "reader type: BYTES not compatible with writer type: INT", "/"), - { A_INT_RECORD1, INT_SCHEMA, "reader type: RECORD not compatible with writer type: INT", "/" }, + Arguments.of(A_INT_RECORD1, INT_SCHEMA, "reader type: RECORD not compatible with writer type: INT", "/"), - { INT_ARRAY_SCHEMA, LONG_ARRAY_SCHEMA, "reader type: INT not compatible with writer type: LONG", "/items" }, - { INT_MAP_SCHEMA, INT_ARRAY_SCHEMA, "reader type: MAP not compatible with writer type: ARRAY", "/" }, - { INT_ARRAY_SCHEMA, INT_MAP_SCHEMA, "reader type: ARRAY not compatible with writer type: MAP", "/" }, - { INT_MAP_SCHEMA, LONG_MAP_SCHEMA, "reader type: INT not compatible with writer type: LONG", "/values" }, + Arguments.of(INT_ARRAY_SCHEMA, LONG_ARRAY_SCHEMA, "reader type: INT not compatible with writer type: LONG", + "/items"), + Arguments.of(INT_MAP_SCHEMA, INT_ARRAY_SCHEMA, "reader type: MAP not compatible with writer type: ARRAY", "/"), + Arguments.of(INT_ARRAY_SCHEMA, INT_MAP_SCHEMA, "reader type: ARRAY not compatible with writer type: MAP", "/"), + Arguments.of(INT_MAP_SCHEMA, LONG_MAP_SCHEMA, "reader type: INT not compatible with writer type: LONG", + "/values"), - { INT_SCHEMA, ENUM2_AB_SCHEMA, "reader type: INT not compatible with writer type: ENUM", "/" }, - { ENUM2_AB_SCHEMA, INT_SCHEMA, "reader type: ENUM not compatible with writer type: INT", "/" }, + Arguments.of(INT_SCHEMA, ENUM2_AB_SCHEMA, "reader type: INT not compatible with writer type: ENUM", "/"), + Arguments.of(ENUM2_AB_SCHEMA, INT_SCHEMA, "reader type: ENUM not compatible with writer type: INT", "/"), - { FLOAT_SCHEMA, INT_LONG_FLOAT_DOUBLE_UNION_SCHEMA, - "reader type: FLOAT not compatible with writer type: DOUBLE", "/" }, - { LONG_SCHEMA, INT_FLOAT_UNION_SCHEMA, "reader type: LONG not compatible with writer type: FLOAT", "/" }, - { INT_SCHEMA, INT_FLOAT_UNION_SCHEMA, "reader type: INT not compatible with writer type: FLOAT", "/" }, + Arguments.of(FLOAT_SCHEMA, INT_LONG_FLOAT_DOUBLE_UNION_SCHEMA, + "reader type: FLOAT not compatible with writer type: DOUBLE", "/3"), + Arguments.of(LONG_SCHEMA, INT_FLOAT_UNION_SCHEMA, "reader type: LONG not compatible with writer type: FLOAT", + "/1"), + Arguments.of(INT_SCHEMA, INT_FLOAT_UNION_SCHEMA, "reader type: INT not compatible with writer type: FLOAT", + "/1"), - { INT_LIST_RECORD, LONG_LIST_RECORD, "reader type: INT not compatible with writer type: LONG", - "/fields/0/type" }, + Arguments.of(INT_LIST_RECORD, LONG_LIST_RECORD, "reader type: INT not compatible with writer type: LONG", + "/fields/0/type"), - { NULL_SCHEMA, INT_SCHEMA, "reader type: NULL not compatible with writer type: INT", "/" } }; - return Arrays.asList(fields); + Arguments.of(NULL_SCHEMA, INT_SCHEMA, "reader type: NULL not compatible with writer type: INT", "/")); } - @Parameter(0) - public Schema reader; - @Parameter(1) - public Schema writer; - @Parameter(2) - public String details; - @Parameter(3) - public String location; - - @Test - public void testTypeMismatchSchemas() throws Exception { + @ParameterizedTest + @MethodSource("data") + public void testTypeMismatchSchemas(Schema reader, Schema writer, String details, String location) throws Exception { validateIncompatibleSchemas(reader, writer, SchemaIncompatibilityType.TYPE_MISMATCH, details, location); } } diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaNormalization.java b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaNormalization.java index 97b7a7803ce..4bd1c36f506 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaNormalization.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaNormalization.java @@ -18,9 +18,7 @@ package org.apache.avro; import static java.nio.charset.StandardCharsets.UTF_8; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; - +import static org.junit.jupiter.api.Assertions.assertEquals; import java.io.BufferedReader; import java.io.IOException; import java.nio.file.Files; @@ -31,51 +29,36 @@ import java.util.Locale; import org.apache.avro.util.CaseFinder; -import org.junit.Test; -import org.junit.experimental.runners.Enclosed; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; -import org.junit.runners.Parameterized.Parameters; +import org.junit.jupiter.api.Nested; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.MethodSource; -@RunWith(Enclosed.class) public class TestSchemaNormalization { - @RunWith(Parameterized.class) + @Nested public static class TestCanonical { - String input, expectedOutput; - - public TestCanonical(String i, String o) { - input = i; - expectedOutput = o; - } - @Parameters public static List cases() throws IOException { return CaseFinder.find(data(), "canonical", new ArrayList<>()); } - @Test - public void testCanonicalization() throws Exception { + @ParameterizedTest + @MethodSource("cases") + void canonicalization(String input, String expectedOutput) { assertEquals(SchemaNormalization.toParsingForm(new Schema.Parser().parse(input)), expectedOutput); } } - @RunWith(Parameterized.class) + @Nested public static class TestFingerprint { - String input, expectedOutput; - public TestFingerprint(String i, String o) { - input = i; - expectedOutput = o; - } - - @Parameters public static List cases() throws IOException { return CaseFinder.find(data(), "fingerprint", new ArrayList<>()); } - @Test - public void testCanonicalization() throws Exception { + @ParameterizedTest + @MethodSource("cases") + void canonicalization(String input, String expectedOutput) { Schema s = new Schema.Parser().parse(input); long carefulFP = altFingerprint(SchemaNormalization.toParsingForm(s)); assertEquals(carefulFP, Long.parseLong(expectedOutput)); @@ -84,22 +67,16 @@ public void testCanonicalization() throws Exception { } // see AVRO-1493 - @RunWith(Parameterized.class) + @Nested public static class TestFingerprintInternationalization { - String input, expectedOutput; - - public TestFingerprintInternationalization(String i, String o) { - input = i; - expectedOutput = o; - } - @Parameters public static List cases() throws IOException { return CaseFinder.find(data(), "fingerprint", new ArrayList<>()); } - @Test - public void testCanonicalization() throws Exception { + @ParameterizedTest + @MethodSource("cases") + void canonicalization(String input, String expectedOutput) { Locale originalDefaultLocale = Locale.getDefault(); Locale.setDefault(Locale.forLanguageTag("tr")); Schema s = new Schema.Parser().parse(input); @@ -152,8 +129,7 @@ private static long altExtend(long poly, int degree, long fp, byte[] b) { private static final byte[] POSTFIX = { 0, 0, 0, 0, 0, 0, 0, 0 }; private static void assertEqHex(long expected, long actual) { - String m = format("0x%016x != 0x%016x", expected, actual); - assertTrue(m, expected == actual); + assertEquals(expected, actual, () -> format("0x%016x != 0x%016x", expected, actual)); } private static String format(String f, Object... args) { diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaParser.java b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaParser.java new file mode 100644 index 00000000000..29c8f65be66 --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaParser.java @@ -0,0 +1,121 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro; + +import com.fasterxml.jackson.core.JsonParseException; +import org.junit.jupiter.api.Test; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.StringReader; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; + +import static java.util.Collections.singletonList; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +class TestSchemaParser { + private static final Schema SCHEMA_REAL = Schema.createFixed("Real", null, "tests", 42); + private static final String SCHEMA_JSON = SCHEMA_REAL.toString(false); + private static final Charset[] UTF_CHARSETS = { StandardCharsets.UTF_8, StandardCharsets.UTF_16LE, + StandardCharsets.UTF_16BE }; + + @Test + void testParseFile() throws IOException { + Path tempFile = Files.createTempFile("TestSchemaParser", null); + Files.write(tempFile, singletonList(SCHEMA_JSON)); + + Schema schema = new SchemaParser().parse(tempFile.toFile()).mainSchema(); + assertEquals(SCHEMA_REAL, schema); + } + + @Test + void testParsePath() throws IOException { + Path tempFile = Files.createTempFile("TestSchemaParser", null); + Files.write(tempFile, singletonList(SCHEMA_JSON)); + + Schema schema = new SchemaParser().parse(tempFile).mainSchema(); + assertEquals(SCHEMA_REAL, schema); + } + + @Test + void testParseURI() throws IOException { + Path tempFile = Files.createTempFile("TestSchemaParser", null); + Charset charset = UTF_CHARSETS[(int) Math.floor(UTF_CHARSETS.length * Math.random())]; + Files.write(tempFile, singletonList(SCHEMA_JSON), charset); + + Schema schema = new SchemaParser().parse(tempFile.toUri(), null).mainSchema(); + assertEquals(SCHEMA_REAL, schema); + } + + @Test + void testParseReader() throws IOException { + Schema schema = new SchemaParser().parse(new StringReader(SCHEMA_JSON)).mainSchema(); + assertEquals(SCHEMA_REAL, schema); + } + + @Test + void testParseStream() throws IOException { + Schema schema = new SchemaParser().parse(new ByteArrayInputStream(SCHEMA_JSON.getBytes(StandardCharsets.UTF_16))) + .mainSchema(); + assertEquals(SCHEMA_REAL, schema); + } + + @Test + void testParseTextWithFallbackJsonParser() { + Schema schema = new SchemaParser().parse(SCHEMA_JSON).mainSchema(); + assertEquals(SCHEMA_REAL, schema); + } + + @Test + void testParseByCustomParser() { + Schema schema = new SchemaParser().parse(DummySchemaParser.SCHEMA_TEXT_ONE).mainSchema(); + assertEquals(DummySchemaParser.FIXED_SCHEMA, schema); + } + + @Test + void testSingleParseError() { + SchemaParseException parseException = assertThrows(SchemaParseException.class, + () -> new SchemaParser().parse("foo").mainSchema()); + assertEquals(JsonParseException.class, parseException.getCause().getClass()); + assertEquals(0, parseException.getSuppressed().length); + } + + @Test + void testMultipleParseErrors() { + SchemaParseException parseException = assertThrows(SchemaParseException.class, + () -> new SchemaParser().parse(DummySchemaParser.SCHEMA_TEXT_ERROR).mainSchema()); + assertTrue(parseException.getMessage().startsWith("Could not parse the schema")); + Throwable[] suppressed = parseException.getSuppressed(); + assertEquals(2, suppressed.length); + assertEquals(DummySchemaParser.ERROR_MESSAGE, suppressed[0].getMessage()); + assertEquals(JsonParseException.class, suppressed[1].getCause().getClass()); + } + + @Test + void testIOFailureWhileParsingText() { + AvroRuntimeException exception = assertThrows(AvroRuntimeException.class, + () -> new SchemaParser().parse(DummySchemaParser.SCHEMA_TEXT_IO_ERROR).mainSchema()); + assertEquals(IOException.class, exception.getCause().getClass()); + assertEquals(DummySchemaParser.IO_ERROR_MESSAGE, exception.getCause().getMessage()); + } +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaValidateDefault.java b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaValidateDefault.java new file mode 100644 index 00000000000..a86519c7560 --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaValidateDefault.java @@ -0,0 +1,152 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro; + +import org.apache.avro.generic.GenericData; +import org.apache.avro.io.Decoder; +import org.apache.avro.io.DecoderFactory; +import org.apache.avro.io.Encoder; +import org.apache.avro.io.EncoderFactory; +import org.apache.avro.reflect.ReflectData; +import org.apache.avro.reflect.ReflectDatumReader; +import org.apache.avro.reflect.ReflectDatumWriter; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.util.Objects; +import java.util.function.Function; + +public class TestSchemaValidateDefault { + + @Test + public void valueReadWithCorrectDefaultValue() throws IOException { + + ExampleRecord writtenValue = new ExampleRecord(new ComplexValue(42L), new ComplexValue(666L)); + byte[] bytes = getSerializer(ExampleRecord.SCHEMA_WITH_ONE_FIELD).apply(writtenValue); + + ReflectDatumReader reader = new ReflectDatumReader<>(ExampleRecord.SCHEMA_WITH_ONE_FIELD, + ExampleRecord.SCHEMA_WITH_TWO_FIELDS, ReflectData.get()); + Decoder decoder = DecoderFactory.get().jsonDecoder(ExampleRecord.SCHEMA_WITH_ONE_FIELD, + new ByteArrayInputStream(bytes)); + ExampleRecord deserializedValue = reader.read(null, decoder); + + Assertions.assertNotNull(deserializedValue.getValue2(), "Null get value2"); + Assertions.assertEquals(15L, deserializedValue.getValue2().getValue()); + } + + public static Function getSerializer(Schema writerSchema) { + Objects.requireNonNull(writerSchema, "writerSchema must not be null"); + + ReflectDatumWriter writer = new ReflectDatumWriter<>(writerSchema, new ReflectData()); + return object -> { + try { + ByteArrayOutputStream stream = new ByteArrayOutputStream(); + Encoder encoder = EncoderFactory.get().jsonEncoder(writerSchema, stream); + writer.write(object, encoder); + encoder.flush(); + return stream.toByteArray(); + } catch (IOException e) { + throw new IllegalStateException(String.format("Avro failed to encode %s to schema %s", object, writerSchema), + e); + } + }; + } + + public static Function getDeserializer(Class readClass, Schema readerSchema, Schema writerSchema) { + Objects.requireNonNull(readClass, "readClass must not be null"); + Objects.requireNonNull(readerSchema, "readerSchema must not be null"); + Objects.requireNonNull(writerSchema, "writerSchema must not be null"); + + ReflectDatumReader reader = new ReflectDatumReader<>(writerSchema, readerSchema, new ReflectData()); + return (byte[] bytes) -> { + try { + Decoder decoder = DecoderFactory.get().jsonDecoder(writerSchema, new ByteArrayInputStream(bytes)); + T readValue = reader.read(null, decoder); + return readValue; + } catch (IOException e) { + throw new IllegalStateException(String.format("Avro failed to decode %s to %s", new String(bytes), readClass), + e); + } + }; + } + + static final Schema SCHEMA = SchemaBuilder.record("org.apache.avro.TestSchemaValidateDefault.ComplexValue").fields() + .optionalLong("value").endRecord(); + + public static class ComplexValue { + + private Long value; + + public ComplexValue() { + } + + public ComplexValue(Long value) { + this.value = value; + } + + public Long getValue() { + return this.value; + } + + @Override + public String toString() { + return "{" + "\"value\": { \"long\": " + this.value + "}}"; + } + } + + public static class ExampleRecord { + public static final Schema SCHEMA_WITH_ONE_FIELD; + public static final Schema SCHEMA_WITH_TWO_FIELDS; + + static { + SCHEMA_WITH_ONE_FIELD = SchemaBuilder.record("org.apache.avro.TestSchemaValidateDefault.ExampleRecord").fields() + .name("value1").type(TestSchemaValidateDefault.SCHEMA).noDefault().endRecord(); + + GenericData.Record record = new GenericData.Record(TestSchemaValidateDefault.SCHEMA); + record.put("value", 15L); + + SCHEMA_WITH_TWO_FIELDS = SchemaBuilder.record("org.apache.avro.TestSchemaValidateDefault.ExampleRecord").fields() + .name("value1").type(TestSchemaValidateDefault.SCHEMA).noDefault().name("value2") + .type(TestSchemaValidateDefault.SCHEMA).withDefault(record).endRecord(); + } + + private ComplexValue value1; + private ComplexValue value2; + + public ExampleRecord() { + } + + public ExampleRecord(ComplexValue value1, ComplexValue value2) { + this.value1 = value1; + this.value2 = value2; + } + + public ComplexValue getValue1() { + return this.value1; + } + + public ComplexValue getValue2() { + return this.value2; + } + } + +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaValidation.java b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaValidation.java index 61c354ef143..b5b39d51648 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaValidation.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaValidation.java @@ -18,22 +18,18 @@ package org.apache.avro; import static org.apache.avro.TestSchemas.*; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.util.ArrayList; import java.util.Collections; import java.util.List; import org.apache.avro.reflect.ReflectData; -import org.junit.Assert; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.ExpectedException; +import org.junit.jupiter.api.Test; public class TestSchemaValidation { - @Rule - public ExpectedException expectedException = ExpectedException.none(); - /** Collection of reader/writer schema pair that are compatible. */ public static final List COMPATIBLE_READER_WRITER_TEST_CASES = list( new ReaderWriter(BOOLEAN_SCHEMA, BOOLEAN_SCHEMA), @@ -136,7 +132,8 @@ public class TestSchemaValidation { new ReaderWriter(INT_ARRAY_SCHEMA, LONG_ARRAY_SCHEMA), new ReaderWriter(INT_MAP_SCHEMA, INT_ARRAY_SCHEMA), new ReaderWriter(INT_ARRAY_SCHEMA, INT_MAP_SCHEMA), new ReaderWriter(INT_MAP_SCHEMA, LONG_MAP_SCHEMA), - new ReaderWriter(ENUM1_AB_SCHEMA, ENUM1_ABC_SCHEMA), new ReaderWriter(ENUM1_BC_SCHEMA, ENUM1_ABC_SCHEMA), + // new ReaderWriter(ENUM1_AB_SCHEMA, ENUM1_ABC_SCHEMA), + // new ReaderWriter(ENUM1_BC_SCHEMA, ENUM1_ABC_SCHEMA), new ReaderWriter(ENUM1_AB_SCHEMA, ENUM2_AB_SCHEMA), new ReaderWriter(INT_SCHEMA, ENUM2_AB_SCHEMA), new ReaderWriter(ENUM2_AB_SCHEMA, INT_SCHEMA), @@ -147,10 +144,10 @@ public class TestSchemaValidation { new ReaderWriter(FLOAT_SCHEMA, INT_LONG_FLOAT_DOUBLE_UNION_SCHEMA), new ReaderWriter(LONG_SCHEMA, INT_FLOAT_UNION_SCHEMA), new ReaderWriter(INT_SCHEMA, INT_FLOAT_UNION_SCHEMA), - new ReaderWriter(EMPTY_RECORD2, EMPTY_RECORD1), new ReaderWriter(A_INT_RECORD1, EMPTY_RECORD1), - new ReaderWriter(A_INT_B_DINT_RECORD1, EMPTY_RECORD1), + // new ReaderWriter(EMPTY_RECORD2, EMPTY_RECORD1), + new ReaderWriter(A_INT_RECORD1, EMPTY_RECORD1), new ReaderWriter(A_INT_B_DINT_RECORD1, EMPTY_RECORD1), - new ReaderWriter(INT_LIST_RECORD, LONG_LIST_RECORD), + // new ReaderWriter(INT_LIST_RECORD, LONG_LIST_RECORD), new ReaderWriter(NULL_SCHEMA, INT_SCHEMA)); @@ -169,7 +166,7 @@ public class TestSchemaValidation { .name("b").type().longType().noDefault().name("c").type().intType().intDefault(0).endRecord(); @Test - public void testAllTypes() throws SchemaValidationException { + void allTypes() throws SchemaValidationException { Schema s = SchemaBuilder.record("r").fields().requiredBoolean("boolF").requiredInt("intF").requiredLong("longF") .requiredFloat("floatF").requiredDouble("doubleF").requiredString("stringF").requiredBytes("bytesF") .name("fixedF1").type().fixed("F1").size(1).noDefault().name("enumF").type().enumeration("E1").symbols("S") @@ -180,46 +177,48 @@ public void testAllTypes() throws SchemaValidationException { } @Test - public void testReadOnePrior() throws SchemaValidationException { + void readOnePrior() throws SchemaValidationException { testValidatorPasses(builder.canReadStrategy().validateLatest(), rec3, rec); testValidatorPasses(builder.canReadStrategy().validateLatest(), rec5, rec3); testValidatorFails(builder.canReadStrategy().validateLatest(), rec4, rec); } @Test - public void testReadAllPrior() throws SchemaValidationException { + void readAllPrior() throws SchemaValidationException { testValidatorPasses(builder.canReadStrategy().validateAll(), rec3, rec, rec2); testValidatorFails(builder.canReadStrategy().validateAll(), rec4, rec, rec2, rec3); testValidatorFails(builder.canReadStrategy().validateAll(), rec5, rec, rec2, rec3); } @Test - public void testOnePriorCanRead() throws SchemaValidationException { + void onePriorCanRead() throws SchemaValidationException { testValidatorPasses(builder.canBeReadStrategy().validateLatest(), rec, rec3); testValidatorFails(builder.canBeReadStrategy().validateLatest(), rec, rec4); } @Test - public void testAllPriorCanRead() throws SchemaValidationException { + void allPriorCanRead() throws SchemaValidationException { testValidatorPasses(builder.canBeReadStrategy().validateAll(), rec, rec3, rec2); testValidatorFails(builder.canBeReadStrategy().validateAll(), rec, rec4, rec3, rec2); } @Test - public void testOnePriorCompatible() throws SchemaValidationException { + void onePriorCompatible() throws SchemaValidationException { testValidatorPasses(builder.mutualReadStrategy().validateLatest(), rec, rec3); testValidatorFails(builder.mutualReadStrategy().validateLatest(), rec, rec4); } @Test - public void testAllPriorCompatible() throws SchemaValidationException { + void allPriorCompatible() throws SchemaValidationException { testValidatorPasses(builder.mutualReadStrategy().validateAll(), rec, rec3, rec2); testValidatorFails(builder.mutualReadStrategy().validateAll(), rec, rec4, rec3, rec2); } - @Test(expected = AvroRuntimeException.class) - public void testInvalidBuild() { - builder.strategy(null).validateAll(); + @Test + void invalidBuild() { + assertThrows(AvroRuntimeException.class, () -> { + builder.strategy(null).validateAll(); + }); } public static class Point { @@ -241,33 +240,33 @@ public static class Circle { .endRecord(); @Test - public void testReflectMatchStructure() throws SchemaValidationException { + void reflectMatchStructure() throws SchemaValidationException { testValidatorPasses(builder.canBeReadStrategy().validateAll(), circleSchemaDifferentNames, ReflectData.get().getSchema(Circle.class)); } @Test - public void testReflectWithAllowNullMatchStructure() throws SchemaValidationException { + void reflectWithAllowNullMatchStructure() throws SchemaValidationException { testValidatorPasses(builder.canBeReadStrategy().validateAll(), circleSchemaDifferentNames, ReflectData.AllowNull.get().getSchema(Circle.class)); } @Test - public void testUnionWithIncompatibleElements() throws SchemaValidationException { + void unionWithIncompatibleElements() throws SchemaValidationException { Schema union1 = Schema.createUnion(Collections.singletonList(rec)); Schema union2 = Schema.createUnion(Collections.singletonList(rec4)); testValidatorFails(builder.canReadStrategy().validateAll(), union2, union1); } @Test - public void testUnionWithCompatibleElements() throws SchemaValidationException { + void unionWithCompatibleElements() throws SchemaValidationException { Schema union1 = Schema.createUnion(Collections.singletonList(rec)); Schema union2 = Schema.createUnion(Collections.singletonList(rec3)); testValidatorPasses(builder.canReadStrategy().validateAll(), union2, union1); } @Test - public void testSchemaCompatibilitySuccesses() throws SchemaValidationException { + void schemaCompatibilitySuccesses() throws SchemaValidationException { // float-union-to-int/long-union does not work... // and neither does recursive types for (ReaderWriter tc : COMPATIBLE_READER_WRITER_TEST_CASES) { @@ -276,14 +275,19 @@ public void testSchemaCompatibilitySuccesses() throws SchemaValidationException } @Test - public void testSchemaCompatibilityFailures() throws SchemaValidationException { + void schemaCompatibilityFailures() { for (ReaderWriter tc : INCOMPATIBLE_READER_WRITER_TEST_CASES) { Schema reader = tc.getReader(); Schema writer = tc.getWriter(); - expectedException.expect(SchemaValidationException.class); - expectedException.expectMessage("Unable to read schema: \n" + writer.toString()); + + String expectedMsg = "Unable to read schema: \n" + writer.toString(false); SchemaValidator validator = builder.canReadStrategy().validateAll(); - validator.validate(reader, Collections.singleton(writer)); + SchemaValidationException exception = assertThrows(SchemaValidationException.class, + () -> validator.validate(reader, Collections.singleton(writer)), + "No or wrong exception for (" + reader.toString(false) + "; " + writer.toString(false) + ")"); + assertTrue(exception.getMessage().contains("Unable to read schema:"), + "'" + expectedMsg + "' != '" + exception.getMessage() + "'"); + } } @@ -309,7 +313,7 @@ private void testValidatorFails(SchemaValidator validator, Schema schemaFails, S } catch (SchemaValidationException sve) { threw = true; } - Assert.assertTrue(threw); + assertTrue(threw); } public static final org.apache.avro.Schema recursiveSchema = new org.apache.avro.Schema.Parser().parse( @@ -319,7 +323,7 @@ private void testValidatorFails(SchemaValidator validator, Schema schemaFails, S * Unit test to verify that recursive schemas can be validated. See AVRO-2122. */ @Test - public void testRecursiveSchemaValidation() throws SchemaValidationException { + void recursiveSchemaValidation() throws SchemaValidationException { // before AVRO-2122, this would cause a StackOverflowError final SchemaValidator backwardValidator = builder.canReadStrategy().validateLatest(); backwardValidator.validate(recursiveSchema, Collections.singletonList(recursiveSchema)); diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaWarnings.java b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaWarnings.java index e14ec626b73..784d2982f1f 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaWarnings.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaWarnings.java @@ -17,9 +17,10 @@ */ package org.apache.avro; -import org.junit.AfterClass; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.condition.DisabledIfEnvironmentVariable; import java.io.ByteArrayOutputStream; import java.io.PrintStream; @@ -40,13 +41,13 @@ public class TestSchemaWarnings { */ private final ByteArrayOutputStream capturedErr = new ByteArrayOutputStream(); - @Before + @BeforeEach public void setupStdErr() { capturedErr.reset(); System.setErr(new PrintStream(capturedErr)); } - @AfterClass + @AfterAll public static void restoreStdErr() { System.setErr(originalErr); } @@ -59,7 +60,9 @@ public String getCapturedStdErr() { } @Test - public void testWarnWhenTheLogicalTypeIsOnTheField() { + // FIXME: Find a different way of capturing the output + @DisabledIfEnvironmentVariable(named = "WithinInvokerPlugin", matches = "true", disabledReason = "Redirecting stderr does not work within the invoker plugin") + void warnWhenTheLogicalTypeIsOnTheField() { // A record with a single int field. Schema s = SchemaBuilder.record("A").fields().requiredInt("a1").endRecord(); @@ -93,7 +96,9 @@ public void testWarnWhenTheLogicalTypeIsOnTheField() { } @Test - public void testWarnWhenTheLogicalTypeIsIgnored() { + // FIXME: Find a different way of capturing the output + @DisabledIfEnvironmentVariable(named = "WithinInvokerPlugin", matches = "true", disabledReason = "Redirecting stderr does not work within the invoker plugin") + void warnWhenTheLogicalTypeIsIgnored() { // A record with a single int field. Schema s = SchemaBuilder.record("A").fields().requiredLong("a1").endRecord(); diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestSchemas.java b/lang/java/avro/src/test/java/org/apache/avro/TestSchemas.java index 30cabadb563..a37de9bfb0d 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestSchemas.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestSchemas.java @@ -17,14 +17,17 @@ */ package org.apache.avro; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.util.ArrayList; import java.util.Collections; import org.apache.avro.Schema.Field; -/** Schemas used by other tests in this package. Therefore package protected. */ +/** + * Schemas used by other tests in this package. Therefore mostly package + * protected. + */ public class TestSchemas { static final Schema NULL_SCHEMA = Schema.create(Schema.Type.NULL); @@ -45,6 +48,11 @@ public class TestSchemas { static final Schema STRING_MAP_SCHEMA = Schema.createMap(STRING_SCHEMA); static final Schema ENUM1_AB_SCHEMA = Schema.createEnum("Enum1", null, null, list("A", "B")); + static final Schema ENUM1_AB_SCHEMA_DEFAULT = Schema.createEnum("Enum1", null, null, list("A", "B"), "A"); + public static final Schema ENUM1_AB_SCHEMA_NAMESPACE_1 = Schema.createEnum("Enum1", null, "namespace1", + list("A", "B")); + public static final Schema ENUM1_AB_SCHEMA_NAMESPACE_2 = Schema.createEnum("Enum1", null, "namespace2", + list("A", "B")); static final Schema ENUM1_ABC_SCHEMA = Schema.createEnum("Enum1", null, null, list("A", "B", "C")); static final Schema ENUM1_BC_SCHEMA = Schema.createEnum("Enum1", null, null, list("B", "C")); static final Schema ENUM2_AB_SCHEMA = Schema.createEnum("Enum2", null, null, list("A", "B")); @@ -108,6 +116,9 @@ public class TestSchemas { static final Schema NS_INNER_RECORD1 = Schema.createRecord("InnerRecord1", null, "ns1", false); static final Schema NS_INNER_RECORD2 = Schema.createRecord("InnerRecord1", null, "ns2", false); + static final Schema WITHOUT_NS = Schema.createRecord("Record", null, null, false); + static final Schema WITH_NS = Schema.createRecord("ns.Record", null, null, false); + static { EMPTY_RECORD1.setFields(Collections.emptyList()); EMPTY_RECORD2.setFields(Collections.emptyList()); @@ -137,6 +148,9 @@ public class TestSchemas { .setFields(list(new Schema.Field("f1", Schema.createUnion(NULL_SCHEMA, Schema.createArray(NS_INNER_RECORD1))))); NS_RECORD2 .setFields(list(new Schema.Field("f1", Schema.createUnion(NULL_SCHEMA, Schema.createArray(NS_INNER_RECORD2))))); + + WITH_NS.setFields(list(new Field("f1", INT_SCHEMA, null, null))); + WITHOUT_NS.setFields(list(new Field("f1", INT_SCHEMA, null, null))); } // Recursive records @@ -180,7 +194,7 @@ static ArrayList list(E... elements) { static void assertSchemaContains(Schema schemaSubset, Schema original) { String subset = schemaSubset.toString(false); String whole = original.toString(false); - assertTrue(String.format("Subset '%s' not found in '%s'", subset, whole), whole.contains(subset)); + assertTrue(whole.contains(subset), String.format("Subset '%s' not found in '%s'", subset, whole)); } } diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestSystemLimitException.java b/lang/java/avro/src/test/java/org/apache/avro/TestSystemLimitException.java new file mode 100644 index 00000000000..0da39179506 --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/TestSystemLimitException.java @@ -0,0 +1,164 @@ +/* + * Copyright 2017 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.avro; + +import static org.apache.avro.SystemLimitException.*; +import static org.junit.jupiter.api.Assertions.*; + +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.Test; + +import java.util.function.Function; + +public class TestSystemLimitException { + + /** Delegated here for package visibility. */ + public static final int MAX_ARRAY_VM_LIMIT = SystemLimitException.MAX_ARRAY_VM_LIMIT; + + public static final String ERROR_NEGATIVE = "Malformed data. Length is negative: -1"; + public static final String ERROR_VM_LIMIT_BYTES = "Cannot read arrays longer than " + MAX_ARRAY_VM_LIMIT + + " bytes in Java library"; + public static final String ERROR_VM_LIMIT_COLLECTION = "Cannot read collections larger than " + MAX_ARRAY_VM_LIMIT + + " items in Java library"; + public static final String ERROR_VM_LIMIT_STRING = "Cannot read strings longer than " + MAX_ARRAY_VM_LIMIT + " bytes"; + + /** Delegated here for package visibility. */ + public static void resetLimits() { + SystemLimitException.resetLimits(); + } + + @AfterEach + void reset() { + System.clearProperty(MAX_BYTES_LENGTH_PROPERTY); + System.clearProperty(MAX_COLLECTION_LENGTH_PROPERTY); + System.clearProperty(MAX_STRING_LENGTH_PROPERTY); + resetLimits(); + } + + /** + * A helper method that tests the consistent limit handling from system + * properties. + * + * @param f The function to be tested. + * @param sysProperty The system property used to control the custom limit. + * @param errorVmLimit The error message used when the property would be + * over the VM limit. + * @param errorCustomLimit The error message used when the property would be + * over the custom limit of 1000. + */ + void helpCheckSystemLimits(Function f, String sysProperty, String errorVmLimit, + String errorCustomLimit) { + // Correct values pass through + assertEquals(0, f.apply(0L)); + assertEquals(1024, f.apply(1024L)); + assertEquals(MAX_ARRAY_VM_LIMIT, f.apply((long) MAX_ARRAY_VM_LIMIT)); + + // Values that exceed the default system limits throw exceptions + Exception ex = assertThrows(UnsupportedOperationException.class, () -> f.apply(Long.MAX_VALUE)); + assertEquals(errorVmLimit, ex.getMessage()); + ex = assertThrows(UnsupportedOperationException.class, () -> f.apply((long) MAX_ARRAY_VM_LIMIT + 1)); + assertEquals(errorVmLimit, ex.getMessage()); + ex = assertThrows(AvroRuntimeException.class, () -> f.apply(-1L)); + assertEquals(ERROR_NEGATIVE, ex.getMessage()); + + // Setting the system property to provide a custom limit. + System.setProperty(sysProperty, Long.toString(1000L)); + resetLimits(); + + // Correct values pass through + assertEquals(0, f.apply(0L)); + assertEquals(102, f.apply(102L)); + + // Values that exceed the custom system limits throw exceptions + ex = assertThrows(UnsupportedOperationException.class, () -> f.apply((long) MAX_ARRAY_VM_LIMIT + 1)); + assertEquals(errorVmLimit, ex.getMessage()); + ex = assertThrows(SystemLimitException.class, () -> f.apply(1024L)); + assertEquals(errorCustomLimit, ex.getMessage()); + ex = assertThrows(AvroRuntimeException.class, () -> f.apply(-1L)); + assertEquals(ERROR_NEGATIVE, ex.getMessage()); + } + + @Test + void testCheckMaxBytesLength() { + helpCheckSystemLimits(SystemLimitException::checkMaxBytesLength, MAX_BYTES_LENGTH_PROPERTY, ERROR_VM_LIMIT_BYTES, + "Bytes length 1024 exceeds maximum allowed"); + } + + @Test + void testCheckMaxCollectionLengthFromZero() { + helpCheckSystemLimits(l -> checkMaxCollectionLength(0L, l), MAX_COLLECTION_LENGTH_PROPERTY, + ERROR_VM_LIMIT_COLLECTION, "Collection length 1024 exceeds maximum allowed"); + } + + @Test + void testCheckMaxStringLength() { + helpCheckSystemLimits(SystemLimitException::checkMaxStringLength, MAX_STRING_LENGTH_PROPERTY, ERROR_VM_LIMIT_STRING, + "String length 1024 exceeds maximum allowed"); + } + + @Test + void testCheckMaxCollectionLengthFromNonZero() { + // Correct values pass through + assertEquals(10, checkMaxCollectionLength(10L, 0L)); + assertEquals(MAX_ARRAY_VM_LIMIT, checkMaxCollectionLength(10L, MAX_ARRAY_VM_LIMIT - 10L)); + assertEquals(MAX_ARRAY_VM_LIMIT, checkMaxCollectionLength(MAX_ARRAY_VM_LIMIT - 10L, 10L)); + + // Values that exceed the default system limits throw exceptions + Exception ex = assertThrows(UnsupportedOperationException.class, + () -> checkMaxCollectionLength(10L, MAX_ARRAY_VM_LIMIT - 9L)); + assertEquals(ERROR_VM_LIMIT_COLLECTION, ex.getMessage()); + ex = assertThrows(UnsupportedOperationException.class, + () -> checkMaxCollectionLength(SystemLimitException.MAX_ARRAY_VM_LIMIT - 9L, 10L)); + assertEquals(ERROR_VM_LIMIT_COLLECTION, ex.getMessage()); + + ex = assertThrows(UnsupportedOperationException.class, () -> checkMaxCollectionLength(10L, Long.MAX_VALUE - 10L)); + assertEquals(ERROR_VM_LIMIT_COLLECTION, ex.getMessage()); + ex = assertThrows(UnsupportedOperationException.class, () -> checkMaxCollectionLength(Long.MAX_VALUE - 10L, 10L)); + assertEquals(ERROR_VM_LIMIT_COLLECTION, ex.getMessage()); + + // Overflow that adds to negative + ex = assertThrows(UnsupportedOperationException.class, () -> checkMaxCollectionLength(10L, Long.MAX_VALUE)); + assertEquals(ERROR_VM_LIMIT_COLLECTION, ex.getMessage()); + ex = assertThrows(UnsupportedOperationException.class, () -> checkMaxCollectionLength(Long.MAX_VALUE, 10L)); + assertEquals(ERROR_VM_LIMIT_COLLECTION, ex.getMessage()); + + ex = assertThrows(AvroRuntimeException.class, () -> checkMaxCollectionLength(10L, -1L)); + assertEquals(ERROR_NEGATIVE, ex.getMessage()); + ex = assertThrows(AvroRuntimeException.class, () -> checkMaxCollectionLength(-1L, 10L)); + assertEquals(ERROR_NEGATIVE, ex.getMessage()); + + // Setting the system property to provide a custom limit. + System.setProperty(MAX_COLLECTION_LENGTH_PROPERTY, Long.toString(1000L)); + resetLimits(); + + // Correct values pass through + assertEquals(10, checkMaxCollectionLength(10L, 0L)); + assertEquals(102, checkMaxCollectionLength(10L, 92L)); + assertEquals(102, checkMaxCollectionLength(92L, 10L)); + + // Values that exceed the custom system limits throw exceptions + ex = assertThrows(UnsupportedOperationException.class, () -> checkMaxCollectionLength(MAX_ARRAY_VM_LIMIT, 1)); + assertEquals(ERROR_VM_LIMIT_COLLECTION, ex.getMessage()); + ex = assertThrows(UnsupportedOperationException.class, () -> checkMaxCollectionLength(1, MAX_ARRAY_VM_LIMIT)); + assertEquals(ERROR_VM_LIMIT_COLLECTION, ex.getMessage()); + + ex = assertThrows(SystemLimitException.class, () -> checkMaxCollectionLength(999, 25)); + assertEquals("Collection length 1024 exceeds maximum allowed", ex.getMessage()); + ex = assertThrows(SystemLimitException.class, () -> checkMaxCollectionLength(25, 999)); + assertEquals("Collection length 1024 exceeds maximum allowed", ex.getMessage()); + } +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestUnionError.java b/lang/java/avro/src/test/java/org/apache/avro/TestUnionError.java new file mode 100644 index 00000000000..7f5e48fb962 --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/TestUnionError.java @@ -0,0 +1,84 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro; + +import org.apache.avro.generic.GenericData; +import org.apache.avro.generic.GenericDatumReader; +import org.apache.avro.generic.GenericDatumWriter; +import org.apache.avro.generic.GenericRecord; +import org.apache.avro.io.BinaryDecoder; +import org.apache.avro.io.BinaryEncoder; +import org.apache.avro.io.DecoderFactory; +import org.apache.avro.io.EncoderFactory; + +import org.junit.jupiter.api.Test; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.List; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; + +public class TestUnionError { + + @Test + void unionErrorMessage() throws IOException { + String writerSchemaJson = " {\n" + " \"type\" : \"record\",\n" + + " \"name\" : \"C\",\n" + " \"fields\" : [ {\n" + + " \"name\" : \"c\",\n" + " \"type\" : [ {\n" + + " \"type\" : \"record\",\n" + " \"name\" : \"A\",\n" + + " \"fields\" : [ {\n" + " \"name\" : \"amount\",\n" + + " \"type\" : \"int\"\n" + " } ]\n" + " }, {\n" + + " \"type\" : \"record\",\n" + " \"name\" : \"B\",\n" + + " \"fields\" : [ {\n" + " \"name\" : \"amount1\",\n" + + " \"type\" : \"int\"\n" + " } ]\n" + " } ]\n" + + " } ]\n" + " }"; + Schema writerSchema = new Schema.Parser().parse(writerSchemaJson); + + String readerSchemaJson = " {\n" + " \"type\" : \"record\",\n" + " \"name\" : \"C1\",\n" + + " \"fields\" : [ {\n" + " \"name\" : \"c\",\n" + + " \"type\" : [ {\n" + " \"type\" : \"record\",\n" + + " \"name\" : \"A\",\n" + " \"fields\" : [ {\n" + + " \"name\" : \"amount\",\n" + " \"type\" : \"int\"\n" + + " } ]\n" + " }, \"float\" ]\n" + " } ]\n" + " }"; + Schema readerSchema = new Schema.Parser().parse(readerSchemaJson); + + List unionSchemas = writerSchema.getField("c").schema().getTypes(); + + GenericRecord r = new GenericData.Record(writerSchema); + GenericRecord b = new GenericData.Record(unionSchemas.get(1)); + b.put("amount1", 12); + r.put("c", b); + + ByteArrayOutputStream outs = new ByteArrayOutputStream(); + GenericDatumWriter datumWriter = new GenericDatumWriter<>(writerSchema); + BinaryEncoder encoder = EncoderFactory.get().binaryEncoder(outs, null); + datumWriter.write(r, encoder); + encoder.flush(); + + InputStream ins = new ByteArrayInputStream(outs.toByteArray()); + BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(ins, null); + + GenericDatumReader datumReader = new GenericDatumReader<>(writerSchema, readerSchema); + AvroTypeException avroException = assertThrows(AvroTypeException.class, () -> datumReader.read(null, decoder)); + assertEquals("Found B, expecting union[A, float]", avroException.getMessage()); + } +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestUnionSelfReference.java b/lang/java/avro/src/test/java/org/apache/avro/TestUnionSelfReference.java index b9bb4dd6c65..90e19f63e58 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestUnionSelfReference.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestUnionSelfReference.java @@ -19,11 +19,11 @@ import org.slf4j.LoggerFactory; -import static org.junit.Assert.*; - import org.apache.avro.Schema.Field; + +import static org.junit.jupiter.api.Assertions.assertEquals; import org.apache.avro.Schema.Type; -import org.junit.Test; +import org.junit.jupiter.api.Test; import org.slf4j.Logger; public class TestUnionSelfReference { @@ -47,7 +47,7 @@ public class TestUnionSelfReference { + " \"default\": null" + " }" + " ]" + " }"; @Test - public void testSelfReferenceInUnion() { + void selfReferenceInUnion() { Schema schema = new Schema.Parser().parse(SIMPLE_BINARY_TREE); Field leftField = schema.getField("left"); assertEquals(JsonProperties.NULL_VALUE, leftField.defaultVal()); @@ -65,7 +65,7 @@ public void testSelfReferenceInUnion() { } @Test - public void testSelfReferenceInThreeUnion() { + void selfReferenceInThreeUnion() { Schema schema = new Schema.Parser().parse(THREE_TYPE_UNION); Field leftField = schema.getField("left"); assertEquals(JsonProperties.NULL_VALUE, leftField.defaultVal()); diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestUuidConversions.java b/lang/java/avro/src/test/java/org/apache/avro/TestUuidConversions.java new file mode 100644 index 00000000000..640bf1a2bb5 --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/TestUuidConversions.java @@ -0,0 +1,72 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro; + +import org.apache.avro.generic.GenericFixed; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + +import java.math.BigInteger; +import java.util.UUID; +import java.util.stream.Stream; + +public class TestUuidConversions { + + private Conversions.UUIDConversion uuidConversion = new Conversions.UUIDConversion(); + + private Schema fixed = Schema.createFixed("fixed", "doc", "", Long.BYTES * 2); + private Schema fixedUuid = LogicalTypes.uuid().addToSchema(fixed); + + private Schema string = Schema.createFixed("fixed", "doc", "", Long.BYTES * 2); + private Schema stringUuid = LogicalTypes.uuid().addToSchema(string); + + @ParameterizedTest + @MethodSource("uuidData") + void uuidFixed(UUID uuid) { + GenericFixed value = uuidConversion.toFixed(uuid, fixedUuid, LogicalTypes.uuid()); + + byte[] b = new byte[Long.BYTES]; + System.arraycopy(value.bytes(), 0, b, 0, b.length); + Assertions.assertEquals(uuid.getMostSignificantBits(), new BigInteger(b).longValue()); + System.arraycopy(value.bytes(), Long.BYTES, b, 0, b.length); + Assertions.assertEquals(uuid.getLeastSignificantBits(), new BigInteger(b).longValue()); + + UUID uuid1 = uuidConversion.fromFixed(value, fixedUuid, LogicalTypes.uuid()); + Assertions.assertEquals(uuid, uuid1); + } + + @ParameterizedTest + @MethodSource("uuidData") + void uuidCharSequence(UUID uuid) { + CharSequence value = uuidConversion.toCharSequence(uuid, stringUuid, LogicalTypes.uuid()); + + Assertions.assertEquals(uuid.toString(), value.toString()); + + UUID uuid1 = uuidConversion.fromCharSequence(value, stringUuid, LogicalTypes.uuid()); + Assertions.assertEquals(uuid, uuid1); + } + + public static Stream uuidData() { + return Stream.of(Arguments.of(new UUID(Long.MIN_VALUE, Long.MAX_VALUE)), Arguments.of(new UUID(-1, 0)), + Arguments.of(UUID.randomUUID()), Arguments.of(UUID.randomUUID())); + } + +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/data/RecordBuilderBaseTest.java b/lang/java/avro/src/test/java/org/apache/avro/data/RecordBuilderBaseTest.java index 5f3012f869c..c2d161de50c 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/data/RecordBuilderBaseTest.java +++ b/lang/java/avro/src/test/java/org/apache/avro/data/RecordBuilderBaseTest.java @@ -17,6 +17,9 @@ */ package org.apache.avro.data; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + import java.util.Arrays; import java.util.HashSet; import java.util.Set; @@ -24,9 +27,8 @@ import org.apache.avro.Schema; import org.apache.avro.Schema.Field; import org.apache.avro.Schema.Type; -import org.junit.Assert; -import org.junit.BeforeClass; -import org.junit.Test; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; /** * Unit test for RecordBuilderBase. @@ -35,7 +37,7 @@ public class RecordBuilderBaseTest { private static Set primitives; private static Set nonNullPrimitives; - @BeforeClass() + @BeforeAll() public static void setUpBeforeClass() { primitives = new HashSet<>(Arrays.asList(Type.values())); primitives.removeAll(Arrays.asList(Type.RECORD, Type.ENUM, Type.ARRAY, Type.MAP, Type.UNION, Type.FIXED)); @@ -45,39 +47,39 @@ public static void setUpBeforeClass() { } @Test - public void testIsValidValueWithPrimitives() { + void isValidValueWithPrimitives() { // Verify that a non-null value is valid for all primitives: for (Type type : primitives) { Field f = new Field("f", Schema.create(type), null, null); - Assert.assertTrue(RecordBuilderBase.isValidValue(f, new Object())); + assertTrue(RecordBuilderBase.isValidValue(f, new Object())); } // Verify that null is not valid for all non-null primitives: for (Type type : nonNullPrimitives) { Field f = new Field("f", Schema.create(type), null, null); - Assert.assertFalse(RecordBuilderBase.isValidValue(f, null)); + assertFalse(RecordBuilderBase.isValidValue(f, null)); } } @Test - public void testIsValidValueWithNullField() { + void isValidValueWithNullField() { // Verify that null is a valid value for null fields: - Assert.assertTrue(RecordBuilderBase.isValidValue(new Field("f", Schema.create(Type.NULL), null, null), null)); + assertTrue(RecordBuilderBase.isValidValue(new Field("f", Schema.create(Type.NULL), null, null), null)); } @Test - public void testIsValidValueWithUnion() { + void isValidValueWithUnion() { // Verify that null values are not valid for a union with no null type: Schema unionWithoutNull = Schema .createUnion(Arrays.asList(Schema.create(Type.STRING), Schema.create(Type.BOOLEAN))); - Assert.assertTrue(RecordBuilderBase.isValidValue(new Field("f", unionWithoutNull, null, null), new Object())); - Assert.assertFalse(RecordBuilderBase.isValidValue(new Field("f", unionWithoutNull, null, null), null)); + assertTrue(RecordBuilderBase.isValidValue(new Field("f", unionWithoutNull, null, null), new Object())); + assertFalse(RecordBuilderBase.isValidValue(new Field("f", unionWithoutNull, null, null), null)); // Verify that null values are valid for a union with a null type: Schema unionWithNull = Schema.createUnion(Arrays.asList(Schema.create(Type.STRING), Schema.create(Type.NULL))); - Assert.assertTrue(RecordBuilderBase.isValidValue(new Field("f", unionWithNull, null, null), new Object())); - Assert.assertTrue(RecordBuilderBase.isValidValue(new Field("f", unionWithNull, null, null), null)); + assertTrue(RecordBuilderBase.isValidValue(new Field("f", unionWithNull, null, null), new Object())); + assertTrue(RecordBuilderBase.isValidValue(new Field("f", unionWithNull, null, null), null)); } } diff --git a/lang/java/avro/src/test/java/org/apache/avro/data/TestTimeConversions.java b/lang/java/avro/src/test/java/org/apache/avro/data/TestTimeConversions.java index 0cf4454d580..089915803a0 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/data/TestTimeConversions.java +++ b/lang/java/avro/src/test/java/org/apache/avro/data/TestTimeConversions.java @@ -18,6 +18,8 @@ package org.apache.avro.data; +import static org.junit.jupiter.api.Assertions.assertEquals; + import java.time.Instant; import java.time.LocalDate; import java.time.LocalTime; @@ -32,9 +34,8 @@ import org.apache.avro.data.TimeConversions.TimestampMicrosConversion; import org.apache.avro.data.TimeConversions.TimestampMillisConversion; import org.apache.avro.reflect.ReflectData; -import org.junit.Assert; -import org.junit.BeforeClass; -import org.junit.Test; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; public class TestTimeConversions { @@ -44,7 +45,7 @@ public class TestTimeConversions { public static Schema TIMESTAMP_MILLIS_SCHEMA; public static Schema TIMESTAMP_MICROS_SCHEMA; - @BeforeClass + @BeforeAll public static void createSchemas() { TestTimeConversions.DATE_SCHEMA = LogicalTypes.date().addToSchema(Schema.create(Schema.Type.INT)); TestTimeConversions.TIME_MILLIS_SCHEMA = LogicalTypes.timeMillis().addToSchema(Schema.create(Schema.Type.INT)); @@ -56,78 +57,74 @@ public static void createSchemas() { } @Test - public void testDateConversion() throws Exception { + void dateConversion() throws Exception { DateConversion conversion = new DateConversion(); LocalDate Jan_6_1970 = LocalDate.of(1970, 1, 6); // 5 LocalDate Jan_1_1970 = LocalDate.of(1970, 1, 1); // 0 LocalDate Dec_27_1969 = LocalDate.of(1969, 12, 27); // -5 - Assert.assertEquals("6 Jan 1970 should be 5", 5, - (int) conversion.toInt(Jan_6_1970, DATE_SCHEMA, LogicalTypes.date())); - Assert.assertEquals("1 Jan 1970 should be 0", 0, - (int) conversion.toInt(Jan_1_1970, DATE_SCHEMA, LogicalTypes.date())); - Assert.assertEquals("27 Dec 1969 should be -5", -5, - (int) conversion.toInt(Dec_27_1969, DATE_SCHEMA, LogicalTypes.date())); - - Assert.assertEquals("6 Jan 1970 should be 5", conversion.fromInt(5, DATE_SCHEMA, LogicalTypes.date()), Jan_6_1970); - Assert.assertEquals("1 Jan 1970 should be 0", conversion.fromInt(0, DATE_SCHEMA, LogicalTypes.date()), Jan_1_1970); - Assert.assertEquals("27 Dec 1969 should be -5", conversion.fromInt(-5, DATE_SCHEMA, LogicalTypes.date()), - Dec_27_1969); + assertEquals(5, (int) conversion.toInt(Jan_6_1970, DATE_SCHEMA, LogicalTypes.date()), "6 Jan 1970 should be 5"); + assertEquals(0, (int) conversion.toInt(Jan_1_1970, DATE_SCHEMA, LogicalTypes.date()), "1 Jan 1970 should be 0"); + assertEquals(-5, (int) conversion.toInt(Dec_27_1969, DATE_SCHEMA, LogicalTypes.date()), "27 Dec 1969 should be -5"); + + assertEquals(conversion.fromInt(5, DATE_SCHEMA, LogicalTypes.date()), Jan_6_1970, "6 Jan 1970 should be 5"); + assertEquals(conversion.fromInt(0, DATE_SCHEMA, LogicalTypes.date()), Jan_1_1970, "1 Jan 1970 should be 0"); + assertEquals(conversion.fromInt(-5, DATE_SCHEMA, LogicalTypes.date()), Dec_27_1969, "27 Dec 1969 should be -5"); } @Test - public void testTimeMillisConversion() { + void timeMillisConversion() { TimeMillisConversion conversion = new TimeMillisConversion(); LocalTime oneAM = LocalTime.of(1, 0); LocalTime afternoon = LocalTime.of(15, 14, 15, 926_000_000); int afternoonMillis = ((15 * 60 + 14) * 60 + 15) * 1000 + 926; - Assert.assertEquals("Midnight should be 0", 0, - (int) conversion.toInt(LocalTime.MIDNIGHT, TIME_MILLIS_SCHEMA, LogicalTypes.timeMillis())); - Assert.assertEquals("01:00 should be 3,600,000", 3_600_000, - (int) conversion.toInt(oneAM, TIME_MILLIS_SCHEMA, LogicalTypes.timeMillis())); - Assert.assertEquals("15:14:15.926 should be " + afternoonMillis, afternoonMillis, - (int) conversion.toInt(afternoon, TIME_MILLIS_SCHEMA, LogicalTypes.timeMillis())); - - Assert.assertEquals("Midnight should be 0", LocalTime.MIDNIGHT, - conversion.fromInt(0, TIME_MILLIS_SCHEMA, LogicalTypes.timeMillis())); - Assert.assertEquals("01:00 should be 3,600,000", oneAM, - conversion.fromInt(3600000, TIME_MILLIS_SCHEMA, LogicalTypes.timeMillis())); - Assert.assertEquals("15:14:15.926 should be " + afternoonMillis, afternoon, - conversion.fromInt(afternoonMillis, TIME_MILLIS_SCHEMA, LogicalTypes.timeMillis())); + assertEquals(0, (int) conversion.toInt(LocalTime.MIDNIGHT, TIME_MILLIS_SCHEMA, LogicalTypes.timeMillis()), + "Midnight should be 0"); + assertEquals(3_600_000, (int) conversion.toInt(oneAM, TIME_MILLIS_SCHEMA, LogicalTypes.timeMillis()), + "01:00 should be 3,600,000"); + assertEquals(afternoonMillis, (int) conversion.toInt(afternoon, TIME_MILLIS_SCHEMA, LogicalTypes.timeMillis()), + "15:14:15.926 should be " + afternoonMillis); + + assertEquals(LocalTime.MIDNIGHT, conversion.fromInt(0, TIME_MILLIS_SCHEMA, LogicalTypes.timeMillis()), + "Midnight should be 0"); + assertEquals(oneAM, conversion.fromInt(3600000, TIME_MILLIS_SCHEMA, LogicalTypes.timeMillis()), + "01:00 should be 3,600,000"); + assertEquals(afternoon, conversion.fromInt(afternoonMillis, TIME_MILLIS_SCHEMA, LogicalTypes.timeMillis()), + "15:14:15.926 should be " + afternoonMillis); } @Test - public void testTimeMicrosConversion() throws Exception { + void timeMicrosConversion() throws Exception { TimeMicrosConversion conversion = new TimeMicrosConversion(); LocalTime oneAM = LocalTime.of(1, 0); LocalTime afternoon = LocalTime.of(15, 14, 15, 926_551_000); long afternoonMicros = ((long) (15 * 60 + 14) * 60 + 15) * 1_000_000 + 926_551; - Assert.assertEquals("Midnight should be 0", LocalTime.MIDNIGHT, - conversion.fromLong(0L, TIME_MICROS_SCHEMA, LogicalTypes.timeMicros())); - Assert.assertEquals("01:00 should be 3,600,000,000", oneAM, - conversion.fromLong(3_600_000_000L, TIME_MICROS_SCHEMA, LogicalTypes.timeMicros())); - Assert.assertEquals("15:14:15.926551 should be " + afternoonMicros, afternoon, - conversion.fromLong(afternoonMicros, TIME_MICROS_SCHEMA, LogicalTypes.timeMicros())); - - Assert.assertEquals("Midnight should be 0", 0, - (long) conversion.toLong(LocalTime.MIDNIGHT, TIME_MICROS_SCHEMA, LogicalTypes.timeMicros())); - Assert.assertEquals("01:00 should be 3,600,000,000", 3_600_000_000L, - (long) conversion.toLong(oneAM, TIME_MICROS_SCHEMA, LogicalTypes.timeMicros())); - Assert.assertEquals("15:14:15.926551 should be " + afternoonMicros, afternoonMicros, - (long) conversion.toLong(afternoon, TIME_MICROS_SCHEMA, LogicalTypes.timeMicros())); + assertEquals(LocalTime.MIDNIGHT, conversion.fromLong(0L, TIME_MICROS_SCHEMA, LogicalTypes.timeMicros()), + "Midnight should be 0"); + assertEquals(oneAM, conversion.fromLong(3_600_000_000L, TIME_MICROS_SCHEMA, LogicalTypes.timeMicros()), + "01:00 should be 3,600,000,000"); + assertEquals(afternoon, conversion.fromLong(afternoonMicros, TIME_MICROS_SCHEMA, LogicalTypes.timeMicros()), + "15:14:15.926551 should be " + afternoonMicros); + + assertEquals(0, (long) conversion.toLong(LocalTime.MIDNIGHT, TIME_MICROS_SCHEMA, LogicalTypes.timeMicros()), + "Midnight should be 0"); + assertEquals(3_600_000_000L, (long) conversion.toLong(oneAM, TIME_MICROS_SCHEMA, LogicalTypes.timeMicros()), + "01:00 should be 3,600,000,000"); + assertEquals(afternoonMicros, (long) conversion.toLong(afternoon, TIME_MICROS_SCHEMA, LogicalTypes.timeMicros()), + "15:14:15.926551 should be " + afternoonMicros); } @Test - public void testTimestampMillisConversion() throws Exception { + void timestampMillisConversion() throws Exception { TimestampMillisConversion conversion = new TimestampMillisConversion(); long nowInstant = Instant.now().toEpochMilli(); // ms precision // round trip Instant now = conversion.fromLong(nowInstant, TIMESTAMP_MILLIS_SCHEMA, LogicalTypes.timestampMillis()); long roundTrip = conversion.toLong(now, TIMESTAMP_MILLIS_SCHEMA, LogicalTypes.timestampMillis()); - Assert.assertEquals("Round-trip conversion should work", nowInstant, roundTrip); + assertEquals(nowInstant, roundTrip, "Round-trip conversion should work"); long May_28_2015_21_46_53_221_instant = 1432849613221L; Instant May_28_2015_21_46_53_221 = ZonedDateTime.of(2015, 5, 28, 21, 46, 53, 221_000_000, ZoneOffset.UTC) @@ -135,30 +132,33 @@ public void testTimestampMillisConversion() throws Exception { // known dates from https://www.epochconverter.com/ // > Epoch - Assert.assertEquals("Known date should be correct", May_28_2015_21_46_53_221, - conversion.fromLong(May_28_2015_21_46_53_221_instant, TIMESTAMP_MILLIS_SCHEMA, LogicalTypes.timestampMillis())); - Assert.assertEquals("Known date should be correct", May_28_2015_21_46_53_221_instant, - (long) conversion.toLong(May_28_2015_21_46_53_221, TIMESTAMP_MILLIS_SCHEMA, LogicalTypes.timestampMillis())); + assertEquals(May_28_2015_21_46_53_221, + conversion.fromLong(May_28_2015_21_46_53_221_instant, TIMESTAMP_MILLIS_SCHEMA, LogicalTypes.timestampMillis()), + "Known date should be correct"); + assertEquals(May_28_2015_21_46_53_221_instant, + (long) conversion.toLong(May_28_2015_21_46_53_221, TIMESTAMP_MILLIS_SCHEMA, LogicalTypes.timestampMillis()), + "Known date should be correct"); // Epoch - Assert.assertEquals("1970-01-01 should be 0", Instant.EPOCH, - conversion.fromLong(0L, TIMESTAMP_MILLIS_SCHEMA, LogicalTypes.timestampMillis())); - Assert.assertEquals("1970-01-01 should be 0", 0L, - (long) conversion.toLong(ZonedDateTime.ofInstant(Instant.EPOCH, ZoneOffset.UTC).toInstant(), - TIMESTAMP_MILLIS_SCHEMA, LogicalTypes.timestampMillis())); + assertEquals(Instant.EPOCH, conversion.fromLong(0L, TIMESTAMP_MILLIS_SCHEMA, LogicalTypes.timestampMillis()), + "1970-01-01 should be 0"); + assertEquals(0L, (long) conversion.toLong(ZonedDateTime.ofInstant(Instant.EPOCH, ZoneOffset.UTC).toInstant(), + TIMESTAMP_MILLIS_SCHEMA, LogicalTypes.timestampMillis()), "1970-01-01 should be 0"); // < Epoch long Jul_01_1969_12_00_00_123_instant = -15854400000L + 123; Instant Jul_01_1969_12_00_00_123 = ZonedDateTime.of(1969, 7, 1, 12, 0, 0, 123_000_000, ZoneOffset.UTC).toInstant(); - Assert.assertEquals("Pre 1970 date should be correct", Jul_01_1969_12_00_00_123, - conversion.fromLong(Jul_01_1969_12_00_00_123_instant, TIMESTAMP_MILLIS_SCHEMA, LogicalTypes.timestampMillis())); - Assert.assertEquals("Pre 1970 date should be correct", Jul_01_1969_12_00_00_123_instant, - (long) conversion.toLong(Jul_01_1969_12_00_00_123, TIMESTAMP_MILLIS_SCHEMA, LogicalTypes.timestampMillis())); + assertEquals(Jul_01_1969_12_00_00_123, + conversion.fromLong(Jul_01_1969_12_00_00_123_instant, TIMESTAMP_MILLIS_SCHEMA, LogicalTypes.timestampMillis()), + "Pre 1970 date should be correct"); + assertEquals(Jul_01_1969_12_00_00_123_instant, + (long) conversion.toLong(Jul_01_1969_12_00_00_123, TIMESTAMP_MILLIS_SCHEMA, LogicalTypes.timestampMillis()), + "Pre 1970 date should be correct"); } @Test - public void testTimestampMicrosConversion() throws Exception { + void timestampMicrosConversion() throws Exception { TimestampMicrosConversion conversion = new TimestampMicrosConversion(); // known dates from https://www.epochconverter.com/ @@ -167,57 +167,58 @@ public void testTimestampMicrosConversion() throws Exception { Instant May_28_2015_21_46_53_221_843 = ZonedDateTime.of(2015, 5, 28, 21, 46, 53, 221_843_000, ZoneOffset.UTC) .toInstant(); - Assert.assertEquals("Known date should be correct", May_28_2015_21_46_53_221_843, conversion - .fromLong(May_28_2015_21_46_53_221_843_instant, TIMESTAMP_MICROS_SCHEMA, LogicalTypes.timestampMicros())); + assertEquals(May_28_2015_21_46_53_221_843, conversion.fromLong(May_28_2015_21_46_53_221_843_instant, + TIMESTAMP_MICROS_SCHEMA, LogicalTypes.timestampMicros()), "Known date should be correct"); - Assert.assertEquals("Known date should be correct", May_28_2015_21_46_53_221_843_instant, (long) conversion - .toLong(May_28_2015_21_46_53_221_843, TIMESTAMP_MICROS_SCHEMA, LogicalTypes.timestampMillis())); + assertEquals(May_28_2015_21_46_53_221_843_instant, + (long) conversion.toLong(May_28_2015_21_46_53_221_843, TIMESTAMP_MICROS_SCHEMA, LogicalTypes.timestampMillis()), + "Known date should be correct"); // Epoch - Assert.assertEquals("1970-01-01 should be 0", Instant.EPOCH, - conversion.fromLong(0L, TIMESTAMP_MILLIS_SCHEMA, LogicalTypes.timestampMillis())); - Assert.assertEquals("1970-01-01 should be 0", 0L, - (long) conversion.toLong(ZonedDateTime.ofInstant(Instant.EPOCH, ZoneOffset.UTC).toInstant(), - TIMESTAMP_MILLIS_SCHEMA, LogicalTypes.timestampMillis())); + assertEquals(Instant.EPOCH, conversion.fromLong(0L, TIMESTAMP_MILLIS_SCHEMA, LogicalTypes.timestampMillis()), + "1970-01-01 should be 0"); + assertEquals(0L, (long) conversion.toLong(ZonedDateTime.ofInstant(Instant.EPOCH, ZoneOffset.UTC).toInstant(), + TIMESTAMP_MILLIS_SCHEMA, LogicalTypes.timestampMillis()), "1970-01-01 should be 0"); // < Epoch long Jul_01_1969_12_00_00_000_123_instant = -15854400000L * 1000 + 123; Instant Jul_01_1969_12_00_00_000_123 = ZonedDateTime.of(1969, 7, 1, 12, 0, 0, 123_000, ZoneOffset.UTC).toInstant(); - Assert.assertEquals("Pre 1970 date should be correct", Jul_01_1969_12_00_00_000_123, conversion - .fromLong(Jul_01_1969_12_00_00_000_123_instant, TIMESTAMP_MILLIS_SCHEMA, LogicalTypes.timestampMillis())); - Assert.assertEquals("Pre 1970 date should be correct", Jul_01_1969_12_00_00_000_123_instant, (long) conversion - .toLong(Jul_01_1969_12_00_00_000_123, TIMESTAMP_MILLIS_SCHEMA, LogicalTypes.timestampMillis())); + assertEquals(Jul_01_1969_12_00_00_000_123, conversion.fromLong(Jul_01_1969_12_00_00_000_123_instant, + TIMESTAMP_MILLIS_SCHEMA, LogicalTypes.timestampMillis()), "Pre 1970 date should be correct"); + assertEquals(Jul_01_1969_12_00_00_000_123_instant, + (long) conversion.toLong(Jul_01_1969_12_00_00_000_123, TIMESTAMP_MILLIS_SCHEMA, LogicalTypes.timestampMillis()), + "Pre 1970 date should be correct"); } @Test - public void testDynamicSchemaWithDateConversion() throws ClassNotFoundException { + void dynamicSchemaWithDateConversion() throws ClassNotFoundException { Schema schema = getReflectedSchemaByName("java.time.LocalDate", new TimeConversions.DateConversion()); - Assert.assertEquals("Reflected schema should be logicalType date", DATE_SCHEMA, schema); + assertEquals(DATE_SCHEMA, schema, "Reflected schema should be logicalType date"); } @Test - public void testDynamicSchemaWithTimeConversion() throws ClassNotFoundException { + void dynamicSchemaWithTimeConversion() throws ClassNotFoundException { Schema schema = getReflectedSchemaByName("java.time.LocalTime", new TimeConversions.TimeMillisConversion()); - Assert.assertEquals("Reflected schema should be logicalType timeMillis", TIME_MILLIS_SCHEMA, schema); + assertEquals(TIME_MILLIS_SCHEMA, schema, "Reflected schema should be logicalType timeMillis"); } @Test - public void testDynamicSchemaWithTimeMicrosConversion() throws ClassNotFoundException { + void dynamicSchemaWithTimeMicrosConversion() throws ClassNotFoundException { Schema schema = getReflectedSchemaByName("java.time.LocalTime", new TimeConversions.TimeMicrosConversion()); - Assert.assertEquals("Reflected schema should be logicalType timeMicros", TIME_MICROS_SCHEMA, schema); + assertEquals(TIME_MICROS_SCHEMA, schema, "Reflected schema should be logicalType timeMicros"); } @Test - public void testDynamicSchemaWithDateTimeConversion() throws ClassNotFoundException { + void dynamicSchemaWithDateTimeConversion() throws ClassNotFoundException { Schema schema = getReflectedSchemaByName("java.time.Instant", new TimeConversions.TimestampMillisConversion()); - Assert.assertEquals("Reflected schema should be logicalType timestampMillis", TIMESTAMP_MILLIS_SCHEMA, schema); + assertEquals(TIMESTAMP_MILLIS_SCHEMA, schema, "Reflected schema should be logicalType timestampMillis"); } @Test - public void testDynamicSchemaWithDateTimeMicrosConversion() throws ClassNotFoundException { + void dynamicSchemaWithDateTimeMicrosConversion() throws ClassNotFoundException { Schema schema = getReflectedSchemaByName("java.time.Instant", new TimeConversions.TimestampMicrosConversion()); - Assert.assertEquals("Reflected schema should be logicalType timestampMicros", TIMESTAMP_MICROS_SCHEMA, schema); + assertEquals(TIMESTAMP_MICROS_SCHEMA, schema, "Reflected schema should be logicalType timestampMicros"); } private Schema getReflectedSchemaByName(String className, Conversion conversion) throws ClassNotFoundException { diff --git a/lang/java/avro/src/test/java/org/apache/avro/file/TestAllCodecs.java b/lang/java/avro/src/test/java/org/apache/avro/file/TestAllCodecs.java index 491a7e3f713..ef928db6f47 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/file/TestAllCodecs.java +++ b/lang/java/avro/src/test/java/org/apache/avro/file/TestAllCodecs.java @@ -18,43 +18,27 @@ package org.apache.avro.file; -import org.junit.Assert; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; import java.io.IOException; import java.nio.ByteBuffer; -import java.util.Arrays; -import java.util.Collection; +import java.util.stream.Stream; -import static org.junit.Assert.assertTrue; - -@RunWith(Parameterized.class) public class TestAllCodecs { - @Parameterized.Parameters(name = "{index}: codec={0}") - public static Collection data() { - return Arrays.asList(new Object[][] { { "bzip2", BZip2Codec.class }, { "zstandard", ZstandardCodec.class }, - { "null", NullCodec.class }, { "xz", XZCodec.class }, { "snappy", SnappyCodec.class }, - { "deflate", DeflateCodec.class }, }); - } - - @Parameterized.Parameter(0) - public String codec; - - @Parameterized.Parameter(1) - public Class codecClass; - - @Test - public void testCodec() throws IOException { + @ParameterizedTest + @MethodSource("codecTypes") + void codec(String codec, Class codecClass) throws IOException { int inputSize = 500_000; byte[] input = generateTestData(inputSize); Codec codecInstance = CodecFactory.fromString(codec).createInstance(); - assertTrue(codecClass.isInstance(codecInstance)); - assertTrue(codecInstance.getName().equals(codec)); + Assertions.assertTrue(codecClass.isInstance(codecInstance)); + Assertions.assertTrue(codecInstance.getName().equals(codec)); ByteBuffer inputByteBuffer = ByteBuffer.wrap(input); ByteBuffer compressedBuffer = codecInstance.compress(inputByteBuffer); @@ -62,28 +46,30 @@ public void testCodec() throws IOException { int compressedSize = compressedBuffer.remaining(); // Make sure something returned - assertTrue(compressedSize > 0); + Assertions.assertTrue(compressedSize > 0); // While the compressed size could in many real cases // *increase* compared to the input size, our input data // is extremely easy to compress and all Avro's compression algorithms // should have a compression ratio greater than 1 (except 'null'). - assertTrue(compressedSize < inputSize || codec.equals("null")); + Assertions.assertTrue(compressedSize < inputSize || codec.equals("null")); // Decompress the data ByteBuffer decompressedBuffer = codecInstance.decompress(compressedBuffer); // Validate the the input and output are equal. inputByteBuffer.rewind(); - Assert.assertEquals(decompressedBuffer, inputByteBuffer); + Assertions.assertEquals(inputByteBuffer, decompressedBuffer); } - @Test - public void testCodecSlice() throws IOException { + @ParameterizedTest + @MethodSource("codecTypes") + void codecSlice(String codec, Class codecClass) throws IOException { int inputSize = 500_000; byte[] input = generateTestData(inputSize); Codec codecInstance = CodecFactory.fromString(codec).createInstance(); + Assertions.assertTrue(codecClass.isInstance(codecInstance)); ByteBuffer partialBuffer = ByteBuffer.wrap(input); partialBuffer.position(17); @@ -94,7 +80,7 @@ public void testCodecSlice() throws IOException { int compressedSize = compressedBuffer.remaining(); // Make sure something returned - assertTrue(compressedSize > 0); + Assertions.assertTrue(compressedSize > 0); // Create a slice from the compressed buffer ByteBuffer sliceBuffer = ByteBuffer.allocate(compressedSize + 100); @@ -108,7 +94,13 @@ public void testCodecSlice() throws IOException { // Validate the the input and output are equal. inputByteBuffer.rewind(); - Assert.assertEquals(decompressedBuffer, inputByteBuffer); + Assertions.assertEquals(inputByteBuffer, decompressedBuffer); + } + + public static Stream codecTypes() { + return Stream.of(Arguments.of("bzip2", BZip2Codec.class), Arguments.of("zstandard", ZstandardCodec.class), + Arguments.of("null", NullCodec.class), Arguments.of("xz", XZCodec.class), + Arguments.of("snappy", SnappyCodec.class), Arguments.of("deflate", DeflateCodec.class)); } // Generate some test data that will compress easily diff --git a/lang/java/avro/src/test/java/org/apache/avro/file/TestCustomCodec.java b/lang/java/avro/src/test/java/org/apache/avro/file/TestCustomCodec.java index a7b0ef02e8f..930b0d34a90 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/file/TestCustomCodec.java +++ b/lang/java/avro/src/test/java/org/apache/avro/file/TestCustomCodec.java @@ -19,22 +19,23 @@ package org.apache.avro.file; import static java.nio.charset.StandardCharsets.UTF_8; -import static org.junit.Assert.*; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotEquals; import java.io.IOException; import java.nio.ByteBuffer; import org.apache.avro.file.codec.CustomCodec; -import org.junit.Test; +import org.junit.jupiter.api.Test; public class TestCustomCodec { @Test - public void testCustomCodec() { + void customCodec() { CustomCodec customCodec = new CustomCodec(); Codec snappyCodec = new SnappyCodec.Option().createInstance(); - assertTrue(customCodec.equals(new CustomCodec())); - assertFalse(customCodec.equals(snappyCodec)); + assertEquals(customCodec, new CustomCodec()); + assertNotEquals(customCodec, snappyCodec); String testString = "Testing 123"; ByteBuffer original = ByteBuffer.allocate(testString.getBytes(UTF_8).length); diff --git a/lang/java/avro/src/test/java/org/apache/avro/file/TestIOExceptionDuringWrite.java b/lang/java/avro/src/test/java/org/apache/avro/file/TestIOExceptionDuringWrite.java index eba142a5cf9..089497d7bfe 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/file/TestIOExceptionDuringWrite.java +++ b/lang/java/avro/src/test/java/org/apache/avro/file/TestIOExceptionDuringWrite.java @@ -17,7 +17,7 @@ */ package org.apache.avro.file; -import static org.junit.Assert.fail; +import static org.junit.jupiter.api.Assertions.fail; import java.io.IOException; import java.io.OutputStream; @@ -25,7 +25,7 @@ import org.apache.avro.Schema; import org.apache.avro.generic.GenericDatumWriter; import org.apache.avro.util.RandomData; -import org.junit.Test; +import org.junit.jupiter.api.Test; /* * Tests if we not write any garbage to the end of the file after any exception occurred @@ -57,7 +57,7 @@ public void write(int b) throws IOException { private static final Schema SCHEMA = new Schema.Parser().parse(SCHEMA_JSON); @Test - public void testNoWritingAfterException() throws IOException { + void noWritingAfterException() throws IOException { try (DataFileWriter writer = new DataFileWriter<>(new GenericDatumWriter<>())) { writer.create(SCHEMA, new FailingOutputStream(100000)); int recordCnt = 0; diff --git a/lang/java/avro/src/test/java/org/apache/avro/file/TestSeekableByteArrayInput.java b/lang/java/avro/src/test/java/org/apache/avro/file/TestSeekableByteArrayInput.java index 5c8b3a8ddb5..2e6b46e5d1f 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/file/TestSeekableByteArrayInput.java +++ b/lang/java/avro/src/test/java/org/apache/avro/file/TestSeekableByteArrayInput.java @@ -17,10 +17,6 @@ */ package org.apache.avro.file; -import java.io.ByteArrayOutputStream; -import java.util.ArrayList; -import java.util.List; - import org.apache.avro.Schema; import org.apache.avro.Schema.Field; import org.apache.avro.Schema.Type; @@ -30,8 +26,19 @@ import org.apache.avro.generic.IndexedRecord; import org.apache.avro.specific.SpecificDatumWriter; import org.apache.avro.util.Utf8; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.Test; + +import java.io.ByteArrayOutputStream; +import java.io.EOFException; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.List; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertInstanceOf; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertThrows; public class TestSeekableByteArrayInput { @@ -53,7 +60,7 @@ private Schema getTestSchema() throws Exception { } @Test - public void testSerialization() throws Exception { + void serialization() throws Exception { Schema testSchema = getTestSchema(); GenericRecord message = new Record(testSchema); message.put("name", "testValue"); @@ -66,8 +73,35 @@ public void testSerialization() throws Exception { FileReader dfr = DataFileReader.openReader(in, reader)) { result = dfr.next(); } - Assert.assertNotNull(result); - Assert.assertTrue(result instanceof GenericRecord); - Assert.assertEquals(new Utf8("testValue"), ((GenericRecord) result).get("name")); + assertNotNull(result); + assertInstanceOf(GenericRecord.class, result); + assertEquals(new Utf8("testValue"), ((GenericRecord) result).get("name")); + } + + @Test + void readingData() throws IOException { + byte[] data = "0123456789ABCD".getBytes(StandardCharsets.UTF_8); + byte[] result = new byte[16]; + try (SeekableInput in = new SeekableByteArrayInput(data)) { + in.read(result, 0, 8); + in.seek(4); + in.read(result, 8, 8); + assertEquals(12, in.tell()); + assertEquals(data.length, in.length()); + assertEquals("01234567456789AB", new String(result, StandardCharsets.UTF_8)); + } + } + + @Test + void illegalSeeks() throws IOException { + byte[] data = "0123456789ABCD".getBytes(StandardCharsets.UTF_8); + try (SeekableInput in = new SeekableByteArrayInput(data)) { + byte[] buf = new byte[2]; + in.read(buf, 0, buf.length); + in.seek(-4); + assertEquals(2, in.tell()); + + assertThrows(EOFException.class, () -> in.seek(64)); + } } } diff --git a/lang/java/avro/src/test/java/org/apache/avro/file/TestSeekableInputStream.java b/lang/java/avro/src/test/java/org/apache/avro/file/TestSeekableInputStream.java new file mode 100644 index 00000000000..34dbf298215 --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/file/TestSeekableInputStream.java @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro.file; + +import org.junit.Assert; +import org.junit.jupiter.api.Test; + +import java.io.IOException; +import java.nio.charset.StandardCharsets; + +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; + +public class TestSeekableInputStream { + @Test + void readingData() throws IOException { + byte[] data = "0123456789ABCD".getBytes(StandardCharsets.UTF_8); + try (DataFileReader.SeekableInputStream sin = new DataFileReader.SeekableInputStream( + new SeekableByteArrayInput(data))) { + byte[] first8 = new byte[8]; + assertEquals(first8.length, sin.read(first8, 0, 8)); + assertArrayEquals("01234567".getBytes(StandardCharsets.UTF_8), first8); + sin.seek(4); + assertEquals(10, sin.available()); + assertEquals(2, sin.skip(2)); + assertEquals((byte) '6', sin.read()); + byte[] next4 = new byte[4]; + assertEquals(next4.length, sin.read(next4)); + assertArrayEquals("789A".getBytes(StandardCharsets.UTF_8), next4); + assertEquals(11, sin.tell()); + assertEquals(data.length, sin.length()); + } + } + + @Test + void illegalSeek() throws IOException { + try (SeekableInput in = new SeekableByteArrayInput("".getBytes(StandardCharsets.UTF_8)); + DataFileReader.SeekableInputStream sin = new DataFileReader.SeekableInputStream(in)) { + Assert.assertThrows(IOException.class, () -> sin.seek(-5)); + } + } +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/file/TestZstandardCodec.java b/lang/java/avro/src/test/java/org/apache/avro/file/TestZstandardCodec.java index aa002608342..7242996fae0 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/file/TestZstandardCodec.java +++ b/lang/java/avro/src/test/java/org/apache/avro/file/TestZstandardCodec.java @@ -17,19 +17,20 @@ */ package org.apache.avro.file; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.io.IOException; -import org.junit.Test; +import org.junit.jupiter.api.Test; public class TestZstandardCodec { @Test - public void testZstandardToStringAndName() throws IOException { + void zstandardToStringAndName() throws IOException { Codec codec = CodecFactory.zstandardCodec(3).createInstance(); assertTrue(codec instanceof ZstandardCodec); - assertTrue(codec.getName().equals("zstandard")); - assertTrue(codec.toString().equals("zstandard[3]")); + assertEquals(codec.getName(), "zstandard"); + assertEquals(codec.toString(), "zstandard[3]"); } } diff --git a/lang/java/avro/src/test/java/org/apache/avro/generic/GenericDataArrayTest.java b/lang/java/avro/src/test/java/org/apache/avro/generic/GenericDataArrayTest.java new file mode 100644 index 00000000000..a4ffebac02d --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/generic/GenericDataArrayTest.java @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro.generic; + +import org.apache.avro.Schema; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +class GenericDataArrayTest { + + @Test + void test() { + GenericData.Array array = new GenericData.Array<>(10, + Schema.createArray(Schema.create(Schema.Type.STRING))); + array.add("One"); + array.add("Two"); + array.add("Two"); + array.add("Three"); + array.add(4, "Four"); + array.remove(1); + Assertions.assertEquals(4, array.size()); + Assertions.assertEquals("One", array.get(0)); + Assertions.assertEquals("Two", array.get(1)); + Assertions.assertEquals("Three", array.get(2)); + Assertions.assertEquals("Four", array.get(3)); + } + +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/generic/PrimitivesArraysTest.java b/lang/java/avro/src/test/java/org/apache/avro/generic/PrimitivesArraysTest.java new file mode 100644 index 00000000000..7d199bf92c8 --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/generic/PrimitivesArraysTest.java @@ -0,0 +1,280 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro.generic; + +import org.apache.avro.Schema; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +class PrimitivesArraysTest { + + @Test + void booleanArray() { + PrimitivesArrays.BooleanArray ba = new PrimitivesArrays.BooleanArray(4, + Schema.createArray(Schema.create(Schema.Type.BOOLEAN))); + + Assertions.assertEquals(0, ba.size()); + for (int i = 1; i < 100; i++) { + if (i % 3 == 0 || i % 5 == 0) { + ba.add(true); + } else { + ba.add(false); + } + } + Assertions.assertEquals(99, ba.size()); + for (int i = 1; i < 100; i++) { + if (i % 3 == 0 || i % 5 == 0) { + Assertions.assertTrue(ba.get(i - 1), "Error for " + i); + } else { + Assertions.assertFalse(ba.get(i - 1), "Error for " + i); + } + } + Assertions.assertFalse(ba.remove(12)); + Assertions.assertEquals(98, ba.size()); + for (int i = 13; i < 99; i++) { + if ((i + 1) % 3 == 0 || (i + 1) % 5 == 0) { + Assertions.assertTrue(ba.get(i - 1), "After delete, Error for " + i); + } else { + Assertions.assertFalse(ba.get(i - 1), "After delete, Error for " + i); + } + } + + ba.add(12, false); + Assertions.assertEquals(99, ba.size()); + for (int i = 1; i < 100; i++) { + if (i % 3 == 0 || i % 5 == 0) { + Assertions.assertTrue(ba.get(i - 1), "Error for " + i); + } else { + Assertions.assertFalse(ba.get(i - 1), "Error for " + i); + } + } + Assertions.assertFalse(ba.remove(12)); + ba.add(12, true); + for (int i = 1; i < 100; i++) { + if (i % 3 == 0 || i % 5 == 0 || i == 13) { + Assertions.assertTrue(ba.get(i - 1), "Error for " + i); + } else { + Assertions.assertFalse(ba.get(i - 1), "Error for " + i); + } + } + ba.add(99, true); + Assertions.assertTrue(ba.get(99), "Error for 99"); + ba.remove(99); + ba.reverse(); + for (int i = 1; i < 100; i++) { + if (i % 3 == 0 || i % 5 == 0 || i == 13) { + Assertions.assertTrue(ba.get(99 - i), "Error for " + i); + } else { + Assertions.assertFalse(ba.get(99 - i), "Error for " + i); + } + } + } + + @Test + void booleanArrayIterator() { + PrimitivesArrays.BooleanArray ba = new PrimitivesArrays.BooleanArray(4, + Schema.createArray(Schema.create(Schema.Type.BOOLEAN))); + boolean[] model = new boolean[] { true, false, false, true, true, true, false, false, true, false, false }; + for (boolean x : model) { + ba.add(x); + } + Assertions.assertEquals(model.length, ba.size()); + int index = 0; + for (Boolean b : ba) { + Assertions.assertEquals(model[index], b); + index++; + } + } + + @Test + void intArray() { + final PrimitivesArrays.IntArray intArray = new PrimitivesArrays.IntArray(4, + Schema.createArray(Schema.create(Schema.Type.INT))); + for (int i = 1; i <= 100; i++) { + intArray.add(i); + } + Assertions.assertEquals(100, intArray.size()); + for (int i = 1; i <= 100; i++) { + Assertions.assertEquals(i, intArray.get(i - 1)); + } + + int expectedValue = 1; + for (Integer value : intArray) { + Assertions.assertEquals(expectedValue, value); + expectedValue++; + } + + intArray.remove(40); + Assertions.assertEquals(99, intArray.size()); + for (int i = 1; i <= 99; i++) { + if (i <= 40) { + Assertions.assertEquals(i, intArray.get(i - 1)); + } else { + Assertions.assertEquals(i + 1, intArray.get(i - 1)); + } + } + intArray.add(40, 41); + Assertions.assertEquals(100, intArray.size()); + for (int i = 1; i <= 100; i++) { + Assertions.assertEquals(i, intArray.get(i - 1)); + } + intArray.set(40, 25); + Assertions.assertEquals(25, intArray.get(40)); + + Assertions.assertEquals(0, intArray.peek()); + intArray.set(40, 41); + intArray.reverse(); + Assertions.assertEquals(100, intArray.size()); + for (int i = 1; i <= 100; i++) { + Assertions.assertEquals(101 - i, intArray.get(i - 1)); + } + } + + @Test + void longArray() { + final PrimitivesArrays.LongArray longArray = new PrimitivesArrays.LongArray(4, + Schema.createArray(Schema.create(Schema.Type.LONG))); + for (long i = 1; i <= 100; i++) { + longArray.add(i); + } + Assertions.assertEquals(100l, longArray.size()); + for (int i = 1; i <= 100; i++) { + Assertions.assertEquals(i, longArray.get(i - 1)); + } + + int expectedValue = 1; + for (Long value : longArray) { + Assertions.assertEquals(expectedValue, value); + expectedValue++; + } + + longArray.remove(40); + Assertions.assertEquals(99, longArray.size()); + for (int i = 1; i <= 99; i++) { + if (i <= 40) { + Assertions.assertEquals(i, longArray.get(i - 1)); + } else { + Assertions.assertEquals(i + 1, longArray.get(i - 1)); + } + } + longArray.add(40, 41); + Assertions.assertEquals(100, longArray.size()); + for (int i = 1; i <= 100; i++) { + Assertions.assertEquals(i, longArray.get(i - 1)); + } + longArray.set(40, 25); + Assertions.assertEquals(25, longArray.get(40)); + + Assertions.assertEquals(0, longArray.peek()); + longArray.set(40, 41); + longArray.reverse(); + Assertions.assertEquals(100, longArray.size()); + for (int i = 1; i <= 100; i++) { + Assertions.assertEquals(101 - i, longArray.get(i - 1)); + } + } + + @Test + void floatArray() { + final PrimitivesArrays.FloatArray floatArray = new PrimitivesArrays.FloatArray(4, + Schema.createArray(Schema.create(Schema.Type.FLOAT))); + for (int i = 1; i <= 100; i++) { + floatArray.add(i * 3.3f); + } + Assertions.assertEquals(100, floatArray.size()); + for (int i = 1; i <= 100; i++) { + Assertions.assertEquals(i * 3.3f, floatArray.get(i - 1)); + } + + float expectedValue = 1.0f; + for (Float value : floatArray) { + Assertions.assertEquals(expectedValue * 3.3f, value); + expectedValue++; + } + + floatArray.remove(40); + Assertions.assertEquals(99, floatArray.size()); + for (int i = 1; i <= 99; i++) { + if (i <= 40) { + Assertions.assertEquals(i * 3.3f, floatArray.get(i - 1)); + } else { + Assertions.assertEquals((i + 1) * 3.3f, floatArray.get(i - 1)); + } + } + floatArray.add(40, 41 * 3.3f); + Assertions.assertEquals(100, floatArray.size()); + for (int i = 1; i <= 100; i++) { + Assertions.assertEquals(i * 3.3f, floatArray.get(i - 1)); + } + floatArray.set(40, 25.2f); + Assertions.assertEquals(25.2f, floatArray.get(40)); + + Assertions.assertEquals(0.0f, floatArray.peek()); + floatArray.set(40, 41 * 3.3f); + floatArray.reverse(); + Assertions.assertEquals(100, floatArray.size()); + for (int i = 1; i <= 100; i++) { + Assertions.assertEquals((101 - i) * 3.3f, floatArray.get(i - 1)); + } + } + + @Test + void doubleArray() { + final PrimitivesArrays.DoubleArray doubleArray = new PrimitivesArrays.DoubleArray(4, + Schema.createArray(Schema.create(Schema.Type.DOUBLE))); + for (int i = 1; i <= 100; i++) { + doubleArray.add(i * 3.0d); + } + Assertions.assertEquals(100, doubleArray.size()); + for (int i = 1; i <= 100; i++) { + Assertions.assertEquals(i * 3.0d, doubleArray.get(i - 1)); + } + + double expectedValue = 1.0f; + for (Double value : doubleArray) { + Assertions.assertEquals(expectedValue * 3.0d, value); + expectedValue++; + } + + doubleArray.remove(40); + Assertions.assertEquals(99, doubleArray.size()); + for (int i = 1; i <= 99; i++) { + if (i <= 40) { + Assertions.assertEquals(i * 3.0d, doubleArray.get(i - 1)); + } else { + Assertions.assertEquals((i + 1) * 3.0d, doubleArray.get(i - 1)); + } + } + doubleArray.add(40, 41 * 3.0d); + Assertions.assertEquals(100, doubleArray.size()); + for (int i = 1; i <= 100; i++) { + Assertions.assertEquals(i * 3.0d, doubleArray.get(i - 1)); + } + doubleArray.set(40, 25.2d); + Assertions.assertEquals(25.2d, doubleArray.get(40)); + + Assertions.assertEquals(0.0d, doubleArray.peek()); + doubleArray.set(40, 41 * 3.0d); + doubleArray.reverse(); + Assertions.assertEquals(100, doubleArray.size()); + for (int i = 1; i <= 100; i++) { + Assertions.assertEquals((101 - i) * 3.0d, doubleArray.get(i - 1)); + } + } +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/generic/TestGenericConcreteEnum.java b/lang/java/avro/src/test/java/org/apache/avro/generic/TestGenericConcreteEnum.java index c01e32fdfb9..bf56d0ca4e1 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/generic/TestGenericConcreteEnum.java +++ b/lang/java/avro/src/test/java/org/apache/avro/generic/TestGenericConcreteEnum.java @@ -24,14 +24,13 @@ import org.apache.avro.io.Encoder; import org.apache.avro.io.EncoderFactory; import org.apache.avro.specific.SpecificDatumReader; -import org.junit.Test; - +import org.junit.jupiter.api.Test; import java.io.ByteArrayOutputStream; + +import static org.junit.jupiter.api.Assertions.assertEquals; import java.io.IOException; import java.util.Collections; -import static org.junit.Assert.assertEquals; - /** * See AVRO-1810: GenericDatumWriter broken with Enum */ @@ -47,7 +46,7 @@ private static byte[] serializeRecord(FooBarSpecificRecord fooBarSpecificRecord) } @Test - public void testGenericWriteAndRead() throws IOException { + void genericWriteAndRead() throws IOException { FooBarSpecificRecord specificRecord = getRecord(); byte[] bytes = serializeRecord(specificRecord); @@ -62,7 +61,7 @@ public void testGenericWriteAndRead() throws IOException { } @Test - public void testGenericWriteSpecificRead() throws IOException { + void genericWriteSpecificRead() throws IOException { FooBarSpecificRecord specificRecord = getRecord(); byte[] bytes = serializeRecord(specificRecord); diff --git a/lang/java/avro/src/test/java/org/apache/avro/generic/TestGenericData.java b/lang/java/avro/src/test/java/org/apache/avro/generic/TestGenericData.java index c8690ea9265..20c82179561 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/generic/TestGenericData.java +++ b/lang/java/avro/src/test/java/org/apache/avro/generic/TestGenericData.java @@ -19,12 +19,7 @@ import static org.apache.avro.TestCircularReferences.Reference; import static org.apache.avro.TestCircularReferences.Referenceable; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertNull; -import static org.junit.Assert.assertTrue; -import static org.junit.Assert.fail; +import static org.junit.jupiter.api.Assertions.*; import com.fasterxml.jackson.core.JsonFactory; import com.fasterxml.jackson.core.JsonParseException; @@ -45,6 +40,8 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.UUID; + import org.apache.avro.AvroRuntimeException; import org.apache.avro.Schema; import org.apache.avro.Schema.Field; @@ -56,61 +53,77 @@ import org.apache.avro.io.BinaryEncoder; import org.apache.avro.io.EncoderFactory; import org.apache.avro.util.Utf8; -import org.junit.Test; +import org.junit.jupiter.api.Test; public class TestGenericData { - @Test(expected = AvroRuntimeException.class) - public void testrecordConstructorNullSchema() throws Exception { - new GenericData.Record(null); + @Test + void recordConstructorNullSchema() throws Exception { + assertThrows(AvroRuntimeException.class, () -> { + new GenericData.Record(null); + }); } - @Test(expected = AvroRuntimeException.class) - public void testrecordConstructorWrongSchema() throws Exception { - new GenericData.Record(Schema.create(Schema.Type.INT)); + @Test + void recordConstructorWrongSchema() throws Exception { + assertThrows(AvroRuntimeException.class, () -> { + new GenericData.Record(Schema.create(Schema.Type.INT)); + }); } - @Test(expected = AvroRuntimeException.class) - public void testArrayConstructorNullSchema() throws Exception { - new GenericData.Array<>(1, null); + @Test + void arrayConstructorNullSchema() throws Exception { + assertThrows(AvroRuntimeException.class, () -> { + new GenericData.Array<>(1, null); + }); } - @Test(expected = AvroRuntimeException.class) - public void testArrayConstructorWrongSchema() throws Exception { - new GenericData.Array<>(1, Schema.create(Schema.Type.INT)); + @Test + void arrayConstructorWrongSchema() throws Exception { + assertThrows(AvroRuntimeException.class, () -> { + new GenericData.Array<>(1, Schema.create(Schema.Type.INT)); + }); } - @Test(expected = AvroRuntimeException.class) - public void testRecordCreateEmptySchema() throws Exception { - Schema s = Schema.createRecord("schemaName", "schemaDoc", "namespace", false); - new GenericData.Record(s); + @Test + void recordCreateEmptySchema() throws Exception { + assertThrows(AvroRuntimeException.class, () -> { + Schema s = Schema.createRecord("schemaName", "schemaDoc", "namespace", false); + new GenericData.Record(s); + }); } - @Test(expected = AvroRuntimeException.class) - public void testGetEmptySchemaFields() throws Exception { - Schema s = Schema.createRecord("schemaName", "schemaDoc", "namespace", false); - s.getFields(); + @Test + void getEmptySchemaFields() throws Exception { + assertThrows(AvroRuntimeException.class, () -> { + Schema s = Schema.createRecord("schemaName", "schemaDoc", "namespace", false); + s.getFields(); + }); } - @Test(expected = AvroRuntimeException.class) - public void testGetEmptySchemaField() throws Exception { - Schema s = Schema.createRecord("schemaName", "schemaDoc", "namespace", false); - s.getField("foo"); + @Test + void getEmptySchemaField() throws Exception { + assertThrows(AvroRuntimeException.class, () -> { + Schema s = Schema.createRecord("schemaName", "schemaDoc", "namespace", false); + s.getField("foo"); + }); } - @Test(expected = AvroRuntimeException.class) - public void testRecordPutInvalidField() throws Exception { - Schema s = Schema.createRecord("schemaName", "schemaDoc", "namespace", false); - List fields = new ArrayList<>(); - fields.add(new Schema.Field("someFieldName", s, "docs", null)); - s.setFields(fields); - Record r = new GenericData.Record(s); - r.put("invalidFieldName", "someValue"); + @Test + void recordPutInvalidField() throws Exception { + assertThrows(AvroRuntimeException.class, () -> { + Schema s = Schema.createRecord("schemaName", "schemaDoc", "namespace", false); + List fields = new ArrayList<>(); + fields.add(new Schema.Field("someFieldName", s, "docs", null)); + s.setFields(fields); + Record r = new GenericData.Record(s); + r.put("invalidFieldName", "someValue"); + }); } - @Test /** Make sure that even with nulls, hashCode() doesn't throw NPE. */ - public void testHashCode() { + @Test + void testHashCode() { GenericData.get().hashCode(null, Schema.create(Type.NULL)); GenericData.get().hashCode(null, Schema.createUnion(Arrays.asList(Schema.create(Type.BOOLEAN), Schema.create(Type.STRING)))); @@ -123,7 +136,7 @@ public void testHashCode() { } @Test - public void testEquals() { + void testEquals() { Schema s = recordSchema(); GenericRecord r0 = new GenericData.Record(s); GenericRecord r1 = new GenericData.Record(s); @@ -143,6 +156,157 @@ public void testEquals() { assertEquals(r1, r2); } + @Test + public void testMapKeyEqualsStringAndUtf8Compatibility() { + Field myMapField = new Field("my_map", Schema.createMap(Schema.create(Schema.Type.STRING)), null, null); + Schema schema = Schema.createRecord("my_record", "doc", "mytest", false); + schema.setFields(Arrays.asList(myMapField)); + GenericRecord r0 = new GenericData.Record(schema); + GenericRecord r1 = new GenericData.Record(schema); + + HashMap pair1 = new HashMap<>(); + pair1.put("keyOne", "valueOne"); + r0.put("my_map", pair1); + + HashMap pair2 = new HashMap<>(); + pair2.put(new Utf8("keyOne"), "valueOne"); + r1.put("my_map", pair2); + + assertEquals(r0, r1); + assertEquals(r1, r0); + } + + @Test + public void testMapValuesEqualsStringAndUtf8Compatibility() { + Field myMapField = new Field("my_map", Schema.createMap(Schema.create(Schema.Type.STRING)), null, null); + Schema schema = Schema.createRecord("my_record", "doc", "mytest", false); + schema.setFields(Arrays.asList(myMapField)); + GenericRecord r0 = new GenericData.Record(schema); + GenericRecord r1 = new GenericData.Record(schema); + + HashMap pair1 = new HashMap<>(); + pair1.put("keyOne", "valueOne"); + r0.put("my_map", pair1); + + HashMap pair2 = new HashMap<>(); + pair2.put("keyOne", new Utf8("valueOne")); + r1.put("my_map", pair2); + + assertEquals(r0, r1); + assertEquals(r1, r0); + } + + @Test + public void testEqualsEmptyMaps() { + Field myMapField = new Field("my_map", Schema.createMap(Schema.create(Schema.Type.STRING)), null, null); + Schema schema = Schema.createRecord("my_record", "doc", "mytest", false); + schema.setFields(Arrays.asList(myMapField)); + + GenericRecord r0 = new GenericData.Record(schema); + r0.put("my_map", new HashMap<>()); + GenericRecord r1 = new GenericData.Record(schema); + r1.put("my_map", new HashMap<>()); + + assertEquals(r0, r1); + assertEquals(r1, r0); + } + + @Test + public void testEqualsEmptyMapAndNonEmptyMap() { + Field myMapField = new Field("my_map", Schema.createMap(Schema.create(Schema.Type.STRING)), null, null); + Schema schema = Schema.createRecord("my_record", "doc", "mytest", false); + schema.setFields(Arrays.asList(myMapField)); + + GenericRecord r0 = new GenericData.Record(schema); + r0.put("my_map", new HashMap<>()); + GenericRecord r1 = new GenericData.Record(schema); + HashMap pair1 = new HashMap<>(); + pair1.put("keyOne", "valueOne"); + r1.put("my_map", pair1); + + assertNotEquals(r0, r1); + assertNotEquals(r1, r0); + } + + @Test + public void testEqualsMapAndSubset() { + Field myMapField = new Field("my_map", Schema.createMap(Schema.create(Schema.Type.STRING)), null, null); + Schema schema = Schema.createRecord("my_record", "doc", "mytest", false); + schema.setFields(Arrays.asList(myMapField)); + + GenericRecord r0 = new GenericData.Record(schema); + HashMap m1 = new HashMap<>(); + m1.put("keyOne", "valueOne"); + m1.put("keyTwo", "valueTwo"); + r0.put("my_map", m1); + + GenericRecord r1 = new GenericData.Record(schema); + HashMap m2 = new HashMap<>(); + m2.put("keyOne", "valueOne"); + r1.put("my_map", m2); + + assertNotEquals(r0, r1); + assertNotEquals(r1, r0); + } + + @Test + public void testEqualsMapAndSameSizeMapWithDifferentKeys() { + Field myMapField = new Field("my_map", Schema.createMap(Schema.create(Schema.Type.STRING)), null, null); + Schema schema = Schema.createRecord("my_record", "doc", "mytest", false); + schema.setFields(Arrays.asList(myMapField)); + + GenericRecord r0 = new GenericData.Record(schema); + HashMap m1 = new HashMap<>(); + m1.put("keyOne", "valueOne"); + r0.put("my_map", m1); + + GenericRecord r1 = new GenericData.Record(schema); + HashMap m2 = new HashMap<>(); + m2.put("keyTwo", "valueTwo"); + r1.put("my_map", m2); + + assertNotEquals(r0, r1); + assertNotEquals(r1, r0); + } + + @Test + public void testEqualsMapAndSameSizeMapWithDifferentValues() { + Field myMapField = new Field("my_map", Schema.createMap(Schema.create(Schema.Type.STRING)), null, null); + Schema schema = Schema.createRecord("my_record", "doc", "mytest", false); + schema.setFields(Arrays.asList(myMapField)); + + GenericRecord r0 = new GenericData.Record(schema); + HashMap m1 = new HashMap<>(); + m1.put("keyOne", "valueOne"); + r0.put("my_map", m1); + + GenericRecord r1 = new GenericData.Record(schema); + HashMap m2 = new HashMap<>(); + m2.put("keyOne", "valueTwo"); + r1.put("my_map", m2); + + assertNotEquals(r0, r1); + assertNotEquals(r1, r0); + } + + @Test + public void testArrayValuesEqualsStringAndUtf8Compatibility() { + Field myArrayField = new Field("my_array", Schema.createArray(Schema.create(Schema.Type.STRING)), null, null); + Schema schema = Schema.createRecord("my_record", "doc", "mytest", false); + schema.setFields(Arrays.asList(myArrayField)); + GenericRecord r0 = new GenericData.Record(schema); + GenericRecord r1 = new GenericData.Record(schema); + + List array1 = Arrays.asList("valueOne"); + r0.put("my_array", array1); + + List array2 = Arrays.asList(new Utf8("valueOne")); + r1.put("my_array", array2); + + assertEquals(r0, r1); + assertEquals(r1, r0); + } + private Schema recordSchema() { List fields = new ArrayList<>(); fields.add(new Field("anArray", Schema.createArray(Schema.create(Type.STRING)), null, null)); @@ -153,7 +317,7 @@ private Schema recordSchema() { } @Test - public void testEquals2() { + void equals2() { Schema schema1 = Schema.createRecord("r", null, "x", false); List fields1 = new ArrayList<>(); fields1.add(new Field("a", Schema.create(Schema.Type.STRING), null, null, Field.Order.IGNORE)); @@ -171,19 +335,21 @@ public void testEquals2() { GenericRecord record2 = new GenericData.Record(schema2); record2.put("a", "2"); - assertFalse(record2.equals(record1)); - assertFalse(record1.equals(record2)); + assertNotEquals(record2, record1); + assertNotEquals(record1, record2); } - @Test(expected = AvroRuntimeException.class) - public void testRecordGetFieldDoesntExist() throws Exception { - Schema schema = Schema.createRecord("test", "doc", "test", false, Collections.EMPTY_LIST); - GenericData.Record record = new GenericData.Record(schema); - record.get("does not exist"); + @Test + void recordGetFieldDoesntExist() throws Exception { + assertThrows(AvroRuntimeException.class, () -> { + Schema schema = Schema.createRecord("test", "doc", "test", false, Collections.EMPTY_LIST); + GenericData.Record record = new GenericData.Record(schema); + record.get("does not exist"); + }); } @Test - public void testArrayReversal() { + void arrayReversal() { Schema schema = Schema.createArray(Schema.create(Schema.Type.INT)); GenericArray forward = new GenericData.Array<>(10, schema); GenericArray backward = new GenericData.Array<>(10, schema); @@ -194,11 +360,11 @@ public void testArrayReversal() { backward.add(i); } forward.reverse(); - assertTrue(forward.equals(backward)); + assertEquals(forward, backward); } @Test - public void testArrayListInterface() { + void arrayListInterface() { Schema schema = Schema.createArray(Schema.create(Schema.Type.INT)); GenericArray array = new GenericData.Array<>(1, schema); array.add(99); @@ -224,7 +390,7 @@ public void testArrayListInterface() { } @Test - public void testArrayAddAtLocation() { + void arrayAddAtLocation() { Schema schema = Schema.createArray(Schema.create(Schema.Type.INT)); GenericArray array = new GenericData.Array<>(6, schema); array.clear(); @@ -254,7 +420,7 @@ public void testArrayAddAtLocation() { } @Test - public void testArrayRemove() { + void arrayRemove() { Schema schema = Schema.createArray(Schema.create(Schema.Type.INT)); GenericArray array = new GenericData.Array<>(10, schema); array.clear(); @@ -297,7 +463,7 @@ public void testArrayRemove() { } @Test - public void testArraySet() { + void arraySet() { Schema schema = Schema.createArray(Schema.create(Schema.Type.INT)); GenericArray array = new GenericData.Array<>(10, schema); array.clear(); @@ -313,7 +479,7 @@ public void testArraySet() { } @Test - public void testToStringIsJson() throws JsonParseException, IOException { + void toStringIsJson() throws JsonParseException, IOException { Field stringField = new Field("string", Schema.create(Type.STRING), null, null); Field enumField = new Field("enum", Schema.createEnum("my_enum", "doc", null, Arrays.asList("a", "b", "c")), null, null); @@ -335,7 +501,7 @@ public void testToStringIsJson() throws JsonParseException, IOException { } @Test - public void testMapWithNonStringKeyToStringIsJson() throws Exception { + void mapWithNonStringKeyToStringIsJson() throws Exception { Schema intMapSchema = new Schema.Parser() .parse("{\"type\": \"map\", \"values\": \"string\", \"java-key-class\" : \"java.lang.Integer\"}"); Field intMapField = new Field("intMap", Schema.createMap(intMapSchema), null, null); @@ -384,7 +550,7 @@ public void testMapWithNonStringKeyToStringIsJson() throws Exception { } @Test - public void testToStringEscapesControlCharsInBytes() throws Exception { + void toStringEscapesControlCharsInBytes() throws Exception { GenericData data = GenericData.get(); ByteBuffer bytes = ByteBuffer.wrap(new byte[] { 'a', '\n', 'b' }); assertEquals("\"a\\nb\"", data.toString(bytes)); @@ -392,7 +558,7 @@ public void testToStringEscapesControlCharsInBytes() throws Exception { } @Test - public void testToStringEscapesControlCharsInMap() { + void toStringEscapesControlCharsInMap() { GenericData data = GenericData.get(); Map m = new HashMap<>(); m.put("a\n\\b", "a\n\\b"); @@ -400,20 +566,20 @@ public void testToStringEscapesControlCharsInMap() { } @Test - public void testToStringFixed() throws Exception { + void toStringFixed() throws Exception { GenericData data = GenericData.get(); assertEquals("[97, 10, 98]", data.toString(new GenericData.Fixed(Schema.createFixed("test", null, null, 3), new byte[] { 'a', '\n', 'b' }))); } @Test - public void testToStringDoesNotEscapeForwardSlash() throws Exception { + void toStringDoesNotEscapeForwardSlash() throws Exception { GenericData data = GenericData.get(); assertEquals("\"/\"", data.toString("/")); } @Test - public void testToStringNanInfinity() throws Exception { + void toStringNanInfinity() throws Exception { GenericData data = GenericData.get(); assertEquals("\"Infinity\"", data.toString(Float.POSITIVE_INFINITY)); assertEquals("\"-Infinity\"", data.toString(Float.NEGATIVE_INFINITY)); @@ -424,7 +590,7 @@ public void testToStringNanInfinity() throws Exception { } @Test - public void testToStringConvertsDatesAsStrings() throws Exception { + void toStringConvertsDatesAsStrings() throws Exception { GenericData data = GenericData.get(); assertEquals("\"1961-04-12T06:07:10Z\"", data.toString(Instant.parse("1961-04-12T06:07:10Z"))); assertEquals("\"1961-04-12\"", data.toString(LocalDate.parse("1961-04-12"))); @@ -433,7 +599,14 @@ public void testToStringConvertsDatesAsStrings() throws Exception { } @Test - public void testCompare() { + void ToStringConvertsUuidsAsStrings() throws Exception { + GenericData data = GenericData.get(); + assertEquals("\"abf2f1e8-cece-4fdc-290a-babaca09ec74\"", + data.toString(UUID.fromString("abf2f1e8-cece-4fdc-290a-babaca09ec74"))); + } + + @Test + void compare() { // Prepare a schema for testing. Field integerField = new Field("test", Schema.create(Type.INT), null, null); List fields = new ArrayList<>(); @@ -475,7 +648,7 @@ public void testCompare() { } @Test - public void testEnumCompare() { + void enumCompare() { Schema s = Schema.createEnum("Kind", null, null, Arrays.asList("Z", "Y", "X")); GenericEnumSymbol z = new GenericData.EnumSymbol(s, "Z"); GenericEnumSymbol z2 = new GenericData.EnumSymbol(s, "Z"); @@ -486,7 +659,7 @@ public void testEnumCompare() { } @Test - public void testByteBufferDeepCopy() { + void byteBufferDeepCopy() { // Test that a deep copy of a byte buffer respects the byte buffer // limits and capacity. byte[] buffer_value = { 0, 1, 2, 3, 0, 0, 0 }; @@ -505,7 +678,7 @@ public void testByteBufferDeepCopy() { } @Test - public void testValidateNullableEnum() { + void validateNullableEnum() { List unionTypes = new ArrayList<>(); Schema schema; Schema nullSchema = Schema.create(Type.NULL); @@ -542,10 +715,10 @@ public void testValidateNullableEnum() { private enum anEnum { ONE, TWO, THREE - }; + } @Test - public void validateRequiresGenericSymbolForEnumSchema() { + void validateRequiresGenericSymbolForEnumSchema() { final Schema schema = Schema.createEnum("my_enum", "doc", "namespace", Arrays.asList("ONE", "TWO", "THREE")); final GenericData gd = GenericData.get(); @@ -554,12 +727,12 @@ public void validateRequiresGenericSymbolForEnumSchema() { assertTrue(gd.validate(schema, new GenericData.EnumSymbol(schema, anEnum.ONE))); /* negative cases */ - assertFalse("We don't expect GenericData to allow a String datum for an enum schema", gd.validate(schema, "ONE")); - assertFalse("We don't expect GenericData to allow a Java Enum for an enum schema", gd.validate(schema, anEnum.ONE)); + assertFalse(gd.validate(schema, "ONE"), "We don't expect GenericData to allow a String datum for an enum schema"); + assertFalse(gd.validate(schema, anEnum.ONE), "We don't expect GenericData to allow a Java Enum for an enum schema"); } @Test - public void testValidateUnion() { + void validateUnion() { Schema type1Schema = SchemaBuilder.record("Type1").fields().requiredString("myString").requiredInt("myInt") .endRecord(); @@ -578,7 +751,7 @@ public void testValidateUnion() { * Record, Map and Array this is correct, for the rest is is not. */ @Test - public void testToStringSameValues() throws IOException { + void toStringSameValues() throws IOException { List fields = new ArrayList<>(); fields.add(new Field("nullstring1", Schema.create(Type.STRING), null, null)); fields.add(new Field("nullstring2", Schema.create(Type.STRING), null, null)); @@ -667,14 +840,14 @@ public void testToStringSameValues() throws IOException { testRecord.put("map2", map); String testString = testRecord.toString(); - assertFalse("Record with duplicated values results in wrong 'toString()'", - testString.contains("CIRCULAR REFERENCE")); + assertFalse(testString.contains("CIRCULAR REFERENCE"), + "Record with duplicated values results in wrong 'toString()'"); } // Test copied from Apache Parquet: // org.apache.parquet.avro.TestCircularReferences @Test - public void testToStringRecursive() throws IOException { + void toStringRecursive() throws IOException { ReferenceManager manager = new ReferenceManager(); GenericData model = new GenericData(); model.addLogicalTypeConversion(manager.getTracker()); @@ -725,12 +898,12 @@ public void testToStringRecursive() throws IOException { } } - @Test /** * check that GenericArray.reset() retains reusable elements and that * GenericArray.prune() cleans them up properly. */ - public void testGenericArrayPeek() { + @Test + void genericArrayPeek() { Schema elementSchema = SchemaBuilder.record("element").fields().requiredString("value").endRecord(); Schema arraySchema = Schema.createArray(elementSchema); diff --git a/lang/java/avro/src/test/java/org/apache/avro/generic/TestGenericDatumReader.java b/lang/java/avro/src/test/java/org/apache/avro/generic/TestGenericDatumReader.java new file mode 100644 index 00000000000..f74dab95b0f --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/generic/TestGenericDatumReader.java @@ -0,0 +1,120 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro.generic; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Random; +import java.util.stream.Collectors; +import java.util.stream.IntStream; + +import org.apache.avro.Schema; +import org.junit.jupiter.api.Test; + +public class TestGenericDatumReader { + + private static final Random r = new Random(System.currentTimeMillis()); + + @Test + void readerCache() { + final GenericDatumReader.ReaderCache cache = new GenericDatumReader.ReaderCache(this::findStringClass); + List threads = IntStream.rangeClosed(1, 200).mapToObj((int index) -> { + final Schema schema = TestGenericDatumReader.this.build(index); + final WithSchema s = new WithSchema(schema, cache); + return (Runnable) () -> s.test(); + }).map(Thread::new).collect(Collectors.toList()); + threads.forEach(Thread::start); + threads.forEach((Thread t) -> { + try { + t.join(); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + }); + } + + @Test + void newInstanceFromString() { + final GenericDatumReader.ReaderCache cache = new GenericDatumReader.ReaderCache(this::findStringClass); + + Object object = cache.newInstanceFromString(StringBuilder.class, "Hello"); + assertEquals(StringBuilder.class, object.getClass()); + StringBuilder builder = (StringBuilder) object; + assertEquals("Hello", builder.toString()); + + } + + static class WithSchema { + private final Schema schema; + + private final GenericDatumReader.ReaderCache cache; + + public WithSchema(Schema schema, GenericDatumReader.ReaderCache cache) { + this.schema = schema; + this.cache = cache; + } + + public void test() { + this.cache.getStringClass(schema); + } + } + + private List list = new ArrayList<>(); + + private Schema build(int index) { + int schemaNum = (index - 1) % 50; + if (index <= 50) { + Schema schema = Schema.createRecord("record_" + schemaNum, "doc", "namespace", false, + Arrays.asList(new Schema.Field("field" + schemaNum, Schema.create(Schema.Type.STRING)))); + list.add(schema); + } + + return list.get(schemaNum); + } + + private Class findStringClass(Schema schema) { + this.sleep(); + if (schema.getType() == Schema.Type.INT) { + return Integer.class; + } + if (schema.getType() == Schema.Type.STRING) { + return String.class; + } + if (schema.getType() == Schema.Type.LONG) { + return Long.class; + } + if (schema.getType() == Schema.Type.FLOAT) { + return Float.class; + } + return String.class; + } + + private void sleep() { + long timeToSleep = r.nextInt(30) + 10L; + if (timeToSleep > 25) { + try { + Thread.sleep(timeToSleep); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + } + } +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/generic/TestGenericDatumWriter.java b/lang/java/avro/src/test/java/org/apache/avro/generic/TestGenericDatumWriter.java index 2d5bf202d7f..dc3661f467b 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/generic/TestGenericDatumWriter.java +++ b/lang/java/avro/src/test/java/org/apache/avro/generic/TestGenericDatumWriter.java @@ -17,14 +17,14 @@ */ package org.apache.avro.generic; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; -import static org.junit.Assert.fail; +import static org.junit.jupiter.api.Assertions.*; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; +import java.io.File; import java.io.IOException; import java.nio.ByteBuffer; +import java.util.Arrays; import java.util.Collections; import java.util.ConcurrentModificationException; import java.util.HashMap; @@ -43,16 +43,15 @@ import org.apache.avro.io.Encoder; import org.apache.avro.io.EncoderFactory; import org.apache.avro.util.Utf8; -import org.junit.Test; +import org.junit.jupiter.api.Test; public class TestGenericDatumWriter { @Test - public void testUnionUnresolvedExceptionExplicitWhichField() throws IOException { + void unionUnresolvedExceptionExplicitWhichField() throws IOException { Schema s = schemaWithExplicitNullDefault(); GenericRecord r = new GenericData.Record(s); r.put("f", 100); ByteArrayOutputStream bao = new ByteArrayOutputStream(); - EncoderFactory.get().jsonEncoder(s, bao); try { new GenericDatumWriter<>(s).write(r, EncoderFactory.get().jsonEncoder(s, bao)); fail(); @@ -62,7 +61,7 @@ public void testUnionUnresolvedExceptionExplicitWhichField() throws IOException } @Test - public void testWrite() throws IOException { + void write() throws IOException { String json = "{\"type\": \"record\", \"name\": \"r\", \"fields\": [" + "{ \"name\": \"f1\", \"type\": \"long\" }" + "]}"; Schema s = new Schema.Parser().parse(json); @@ -80,7 +79,7 @@ public void testWrite() throws IOException { } @Test - public void testArrayConcurrentModification() throws Exception { + void arrayConcurrentModification() throws Exception { String json = "{\"type\": \"array\", \"items\": \"int\" }"; Schema s = new Schema.Parser().parse(json); final GenericArray a = new GenericData.Array<>(1, s); @@ -113,7 +112,7 @@ public void testArrayConcurrentModification() throws Exception { } @Test - public void testMapConcurrentModification() throws Exception { + void mapConcurrentModification() throws Exception { String json = "{\"type\": \"map\", \"values\": \"int\" }"; Schema s = new Schema.Parser().parse(json); final Map m = new HashMap<>(); @@ -146,7 +145,7 @@ public void testMapConcurrentModification() throws Exception { } @Test - public void testAllowWritingPrimitives() throws IOException { + void allowWritingPrimitives() throws IOException { Schema doubleType = Schema.create(Schema.Type.DOUBLE); Schema.Field field = new Schema.Field("double", doubleType); List fields = Collections.singletonList(field); @@ -282,56 +281,195 @@ public void writeMapEnd() throws IOException { public void writeIndex(int unionIndex) throws IOException { e.writeIndex(unionIndex); } - }; - - @Test(expected = AvroTypeException.class) - public void writeDoesNotAllowStringForGenericEnum() throws IOException { - final String json = "{\"type\": \"record\", \"name\": \"recordWithEnum\"," + "\"fields\": [ " - + "{\"name\": \"field\", \"type\": " + "{\"type\": \"enum\", \"name\": \"enum\", \"symbols\": " - + "[\"ONE\",\"TWO\",\"THREE\"] " + "}" + "}" + "]}"; - Schema schema = new Schema.Parser().parse(json); - GenericRecord record = new GenericData.Record(schema); - record.put("field", "ONE"); - - ByteArrayOutputStream bao = new ByteArrayOutputStream(); - GenericDatumWriter writer = new GenericDatumWriter<>(schema); - Encoder encoder = EncoderFactory.get().jsonEncoder(schema, bao); + } - writer.write(record, encoder); + @Test + void writeDoesNotAllowStringForGenericEnum() throws IOException { + assertThrows(AvroTypeException.class, () -> { + final String json = "{\"type\": \"record\", \"name\": \"recordWithEnum\"," + "\"fields\": [ " + + "{\"name\": \"field\", \"type\": " + "{\"type\": \"enum\", \"name\": \"enum\", \"symbols\": " + + "[\"ONE\",\"TWO\",\"THREE\"] " + "}" + "}" + "]}"; + Schema schema = new Schema.Parser().parse(json); + GenericRecord record = new GenericData.Record(schema); + record.put("field", "ONE"); + + ByteArrayOutputStream bao = new ByteArrayOutputStream(); + GenericDatumWriter writer = new GenericDatumWriter<>(schema); + Encoder encoder = EncoderFactory.get().jsonEncoder(schema, bao); + + writer.write(record, encoder); + }); } private enum AnEnum { ONE, TWO, THREE - }; - - @Test(expected = AvroTypeException.class) - public void writeDoesNotAllowJavaEnumForGenericEnum() throws IOException { - final String json = "{\"type\": \"record\", \"name\": \"recordWithEnum\"," + "\"fields\": [ " - + "{\"name\": \"field\", \"type\": " + "{\"type\": \"enum\", \"name\": \"enum\", \"symbols\": " - + "[\"ONE\",\"TWO\",\"THREE\"] " + "}" + "}" + "]}"; - Schema schema = new Schema.Parser().parse(json); - GenericRecord record = new GenericData.Record(schema); - record.put("field", AnEnum.ONE); - - ByteArrayOutputStream bao = new ByteArrayOutputStream(); - GenericDatumWriter writer = new GenericDatumWriter<>(schema); - Encoder encoder = EncoderFactory.get().jsonEncoder(schema, bao); + } - writer.write(record, encoder); + @Test + void writeDoesNotAllowJavaEnumForGenericEnum() throws IOException { + assertThrows(AvroTypeException.class, () -> { + final String json = "{\"type\": \"record\", \"name\": \"recordWithEnum\"," + "\"fields\": [ " + + "{\"name\": \"field\", \"type\": " + "{\"type\": \"enum\", \"name\": \"enum\", \"symbols\": " + + "[\"ONE\",\"TWO\",\"THREE\"] " + "}" + "}" + "]}"; + Schema schema = new Schema.Parser().parse(json); + GenericRecord record = new GenericData.Record(schema); + record.put("field", AnEnum.ONE); + + ByteArrayOutputStream bao = new ByteArrayOutputStream(); + GenericDatumWriter writer = new GenericDatumWriter<>(schema); + Encoder encoder = EncoderFactory.get().jsonEncoder(schema, bao); + + writer.write(record, encoder); + }); } @Test - public void writeFieldWithDefaultWithExplicitNullDefaultInSchema() throws Exception { + void writeFieldWithDefaultWithExplicitNullDefaultInSchema() throws Exception { Schema schema = schemaWithExplicitNullDefault(); GenericRecord record = createRecordWithDefaultField(schema); - writeObject(schema, record); + writeObject(record); } @Test - public void writeFieldWithDefaultWithoutExplicitNullDefaultInSchema() throws Exception { + void writeFieldWithDefaultWithoutExplicitNullDefaultInSchema() throws Exception { Schema schema = schemaWithoutExplicitNullDefault(); GenericRecord record = createRecordWithDefaultField(schema); - writeObject(schema, record); + writeObject(record); + } + + @Test + void nestedNPEErrorClarity() throws Exception { + GenericData.Record topLevelRecord = buildComplexRecord(); + @SuppressWarnings("unchecked") + Map map = (Map) ((List) ((GenericData.Record) topLevelRecord + .get("unionField")).get("arrayField")).get(0).get("mapField"); + map.get("a").put("strField", null); + try { + writeObject(topLevelRecord); + fail("expected to throw"); + } catch (NullPointerException expected) { + assertTrue( + expected.getMessage() + .contains("RecordWithRequiredFields.unionField[UnionRecord].arrayField[0].mapField[\"a\"].strField"), + "unexpected message " + expected.getMessage()); + } + } + + @Test + void nPEForMapKeyErrorClarity() throws Exception { + GenericData.Record topLevelRecord = buildComplexRecord(); + @SuppressWarnings("unchecked") + Map map = (Map) ((List) ((GenericData.Record) topLevelRecord + .get("unionField")).get("arrayField")).get(0).get("mapField"); + map.put(null, map.get("a")); // value is valid, but key is null + try { + writeObject(topLevelRecord); + fail("expected to throw"); + } catch (NullPointerException expected) { + assertTrue( + expected.getMessage() + .contains("null key in map at RecordWithRequiredFields.unionField[UnionRecord].arrayField[0].mapField"), + "unexpected message " + expected.getMessage()); + } + } + + @Test + void shortPathNPEErrorClarity() throws Exception { + try { + writeObject(Schema.create(Schema.Type.STRING), null); + fail("expected to throw"); + } catch (NullPointerException expected) { + assertTrue(expected.getMessage().contains("null value for (non-nullable) string"), + "unexpected message " + expected.getMessage()); + } + } + + @Test + void nestedCCEErrorClarity() throws Exception { + GenericData.Record topLevelRecord = buildComplexRecord(); + @SuppressWarnings("unchecked") + Map map = (Map) ((List) ((GenericData.Record) topLevelRecord + .get("unionField")).get("arrayField")).get(0).get("mapField"); + map.get("a").put("strField", 42); // not a string + try { + writeObject(topLevelRecord); + fail("expected to throw"); + } catch (ClassCastException expected) { + assertTrue( + expected.getMessage() + .contains("RecordWithRequiredFields.unionField[UnionRecord].arrayField[0].mapField[\"a\"].strField"), + "unexpected message " + expected.getMessage()); + } + } + + @Test + void shortPathCCEErrorClarity() throws Exception { + try { + writeObject(Schema.create(Schema.Type.STRING), 42); + fail("expected to throw"); + } catch (ClassCastException expected) { + assertTrue( + expected.getMessage().contains("value 42 (a java.lang.Integer) cannot be cast to expected type string"), + "unexpected message " + expected.getMessage()); + } + } + + @Test + void nestedATEErrorClarity() throws Exception { + GenericData.Record topLevelRecord = buildComplexRecord(); + @SuppressWarnings("unchecked") + Map map = (Map) ((List) ((GenericData.Record) topLevelRecord + .get("unionField")).get("arrayField")).get(0).get("mapField"); + map.get("a").put("enumField", 42); // not an enum + try { + writeObject(topLevelRecord); + fail("expected to throw"); + } catch (AvroTypeException expected) { + assertTrue( + expected.getMessage() + .contains("RecordWithRequiredFields.unionField[UnionRecord].arrayField[0].mapField[\"a\"].enumField"), + "unexpected message " + expected.getMessage()); + assertTrue(expected.getMessage().contains("42 (a java.lang.Integer) is not a MapRecordEnum"), + "unexpected message " + expected.getMessage()); + } + } + + private GenericData.Record buildComplexRecord() throws IOException { + + Schema schema = new Schema.Parser() + .parse(new File("target/test-classes/share/test/schemas/RecordWithRequiredFields.avsc")); + + GenericData.Record topLevelRecord = new GenericData.Record(schema); + GenericData.Record unionRecord = new GenericData.Record(schema.getField("unionField").schema().getTypes().get(1)); + Schema arraySchema = unionRecord.getSchema().getField("arrayField").schema(); + GenericData.Record arrayRecord1 = new GenericData.Record(arraySchema.getElementType()); + GenericData.Record arrayRecord2 = new GenericData.Record(arraySchema.getElementType()); + GenericData.Array array = new GenericData.Array<>(arraySchema, + Arrays.asList(arrayRecord1, arrayRecord2)); + Schema mapRecordSchema = arraySchema.getElementType().getField("mapField").schema().getValueType(); + GenericData.Record mapRecordA = new GenericData.Record(mapRecordSchema); + Schema mapRecordEnumSchema = mapRecordSchema.getField("enumField").schema(); + + mapRecordA.put("enumField", new GenericData.EnumSymbol(mapRecordEnumSchema, "B")); + mapRecordA.put("strField", "4"); + + arrayRecord1.put("strField", "2"); + HashMap map1 = new HashMap<>(); + map1.put("a", mapRecordA); + arrayRecord1.put("mapField", map1); + + arrayRecord2.put("strField", "2"); + HashMap map2 = new HashMap<>(); + map2.put("a", mapRecordA); + arrayRecord2.put("mapField", map2); + + unionRecord.put(unionRecord.getSchema().getField("strField").pos(), "1"); + unionRecord.put(unionRecord.getSchema().getField("arrayField").pos(), array); // BOOM + + topLevelRecord.put(topLevelRecord.getSchema().getField("strField").pos(), "0"); + topLevelRecord.put(topLevelRecord.getSchema().getField("unionField").pos(), unionRecord); + + return topLevelRecord; } private Schema schemaWithExplicitNullDefault() { @@ -347,10 +485,15 @@ private Schema schemaWithoutExplicitNullDefault() { return new Schema.Parser().parse(schema); } - private void writeObject(Schema schema, GenericRecord datum) throws Exception { + private void writeObject(GenericRecord datum) throws Exception { + writeObject(datum.getSchema(), datum); + } + + private void writeObject(Schema schema, Object datum) throws Exception { BinaryEncoder encoder = EncoderFactory.get().binaryEncoder(new ByteArrayOutputStream(), null); - GenericDatumWriter writer = new GenericDatumWriter<>(schema); - writer.write(schema, datum, encoder); + GenericDatumWriter writer = new GenericDatumWriter<>(schema); + writer.write(datum, encoder); + encoder.flush(); } private GenericRecord createRecordWithDefaultField(Schema schema) { diff --git a/lang/java/avro/src/test/java/org/apache/avro/generic/TestGenericLogicalTypes.java b/lang/java/avro/src/test/java/org/apache/avro/generic/TestGenericLogicalTypes.java index 4da31ea5a8f..6df4a8af6a2 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/generic/TestGenericLogicalTypes.java +++ b/lang/java/avro/src/test/java/org/apache/avro/generic/TestGenericLogicalTypes.java @@ -18,13 +18,28 @@ package org.apache.avro.generic; -import static org.hamcrest.Matchers.is; -import static org.junit.Assert.assertThat; +import org.apache.avro.Conversion; +import org.apache.avro.Conversions; +import org.apache.avro.CustomType; +import org.apache.avro.LogicalType; +import org.apache.avro.LogicalTypes; +import org.apache.avro.Schema; +import org.apache.avro.data.TimeConversions; +import org.apache.avro.file.DataFileReader; +import org.apache.avro.file.DataFileWriter; +import org.apache.avro.file.FileReader; +import org.apache.avro.io.DatumReader; +import org.apache.avro.io.DatumWriter; +import org.apache.avro.util.TimePeriod; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; import java.io.File; import java.io.IOException; import java.math.BigDecimal; import java.nio.ByteBuffer; +import java.nio.ByteOrder; import java.time.Instant; import java.time.LocalDateTime; import java.time.ZoneOffset; @@ -32,41 +47,32 @@ import java.util.Arrays; import java.util.Collections; import java.util.List; +import java.util.Random; import java.util.UUID; -import org.apache.avro.Conversion; -import org.apache.avro.Conversions; -import org.apache.avro.LogicalType; -import org.apache.avro.LogicalTypes; -import org.apache.avro.Schema; -import org.apache.avro.data.TimeConversions; -import org.apache.avro.file.DataFileReader; -import org.apache.avro.file.DataFileWriter; -import org.apache.avro.file.FileReader; -import org.apache.avro.io.DatumReader; -import org.apache.avro.io.DatumWriter; -import org.junit.Assert; -import org.junit.BeforeClass; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.is; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotSame; public class TestGenericLogicalTypes { - @Rule - public TemporaryFolder temp = new TemporaryFolder(); + @TempDir + public File temp; public static final GenericData GENERIC = new GenericData(); - @BeforeClass + @BeforeAll public static void addLogicalTypes() { GENERIC.addLogicalTypeConversion(new Conversions.DecimalConversion()); GENERIC.addLogicalTypeConversion(new Conversions.UUIDConversion()); + GENERIC.addLogicalTypeConversion(new Conversions.DurationConversion()); GENERIC.addLogicalTypeConversion(new TimeConversions.LocalTimestampMicrosConversion()); GENERIC.addLogicalTypeConversion(new TimeConversions.LocalTimestampMillisConversion()); } @Test - public void testReadUUID() throws IOException { + public void readUUID() throws IOException { Schema uuidSchema = Schema.create(Schema.Type.STRING); LogicalTypes.uuid().addToSchema(uuidSchema); @@ -75,11 +81,11 @@ public void testReadUUID() throws IOException { List expected = Arrays.asList(u1, u2); File test = write(Schema.create(Schema.Type.STRING), u1.toString(), u2.toString()); - Assert.assertEquals("Should convert Strings to UUIDs", expected, read(GENERIC.createDatumReader(uuidSchema), test)); + assertEquals(expected, read(GENERIC.createDatumReader(uuidSchema), test), "Should convert Strings to UUIDs"); } @Test - public void testWriteUUID() throws IOException { + public void writeUUID() throws IOException { Schema stringSchema = Schema.create(Schema.Type.STRING); stringSchema.addProp(GenericData.STRING_PROP, "String"); Schema uuidSchema = Schema.create(Schema.Type.STRING); @@ -90,12 +96,12 @@ public void testWriteUUID() throws IOException { List expected = Arrays.asList(u1.toString(), u2.toString()); File test = write(GENERIC, uuidSchema, u1, u2); - Assert.assertEquals("Should read UUIDs as Strings", expected, - read(GenericData.get().createDatumReader(stringSchema), test)); + assertEquals(expected, read(GenericData.get().createDatumReader(stringSchema), test), + "Should read UUIDs as Strings"); } @Test - public void testWriteNullableUUID() throws IOException { + public void writeNullableUUID() throws IOException { Schema stringSchema = Schema.create(Schema.Type.STRING); stringSchema.addProp(GenericData.STRING_PROP, "String"); Schema nullableStringSchema = Schema.createUnion(Schema.create(Schema.Type.NULL), stringSchema); @@ -109,12 +115,43 @@ public void testWriteNullableUUID() throws IOException { List expected = Arrays.asList(u1.toString(), u2.toString()); File test = write(GENERIC, nullableUuidSchema, u1, u2); - Assert.assertEquals("Should read UUIDs as Strings", expected, - read(GenericData.get().createDatumReader(nullableStringSchema), test)); + assertEquals(expected, read(GenericData.get().createDatumReader(nullableStringSchema), test), + "Should read UUIDs as Strings"); + } + + @Test + public void readWriteDuration() throws IOException { + Schema fixedSchema = Schema.createFixed("bare.Fixed", null, null, 12); + + Schema durationSchema = Schema.createFixed("time.Duration", null, null, 12); + LogicalTypes.duration().addToSchema(durationSchema); + + // These two are necessary for schema evolution! + fixedSchema.addAlias(durationSchema.getFullName()); + durationSchema.addAlias(fixedSchema.getFullName()); + + Random rng = new Random(); + TimePeriod d1 = TimePeriod.of(rng.nextInt(1000), rng.nextInt(1000), rng.nextInt(1000)); + ByteBuffer b1 = ByteBuffer.allocate(12).order(ByteOrder.LITTLE_ENDIAN).putInt((int) d1.getMonths()) + .putInt((int) d1.getDays()).putInt((int) d1.getMillis()); + GenericFixed f1 = new GenericData.Fixed(fixedSchema, b1.array()); + + TimePeriod d2 = TimePeriod.of(rng.nextInt(1000), rng.nextInt(1000), rng.nextInt(1000)); + ByteBuffer b2 = ByteBuffer.allocate(12).order(ByteOrder.LITTLE_ENDIAN).putInt((int) d2.getMonths()) + .putInt((int) d2.getDays()).putInt((int) d2.getMillis()); + GenericFixed f2 = new GenericData.Fixed(fixedSchema, b2.array()); + + File test = write(fixedSchema, f1, f2); + assertEquals(Arrays.asList(d1, d2), read(GENERIC.createDatumReader(durationSchema), test), + "Should convert fixed bytes to durations"); + + test = write(GENERIC, durationSchema, d2, d1); + assertEquals(Arrays.asList(f2, f1), read(GenericData.get().createDatumReader(fixedSchema), test), + "Should convert durations to fixed bytes"); } @Test - public void testReadDecimalFixed() throws IOException { + public void readDecimalFixed() throws IOException { LogicalType decimal = LogicalTypes.decimal(9, 2); Schema fixedSchema = Schema.createFixed("aFixed", null, null, 4); Schema decimalSchema = decimal.addToSchema(Schema.createFixed("aFixed", null, null, 4)); @@ -130,12 +167,11 @@ public void testReadDecimalFixed() throws IOException { GenericFixed d2fixed = conversion.toFixed(d2, fixedSchema, decimal); File test = write(fixedSchema, d1fixed, d2fixed); - Assert.assertEquals("Should convert fixed to BigDecimals", expected, - read(GENERIC.createDatumReader(decimalSchema), test)); + assertEquals(expected, read(GENERIC.createDatumReader(decimalSchema), test), "Should convert fixed to BigDecimals"); } @Test - public void testWriteDecimalFixed() throws IOException { + public void writeDecimalFixed() throws IOException { LogicalType decimal = LogicalTypes.decimal(9, 2); Schema fixedSchema = Schema.createFixed("aFixed", null, null, 4); Schema decimalSchema = decimal.addToSchema(Schema.createFixed("aFixed", null, null, 4)); @@ -150,16 +186,16 @@ public void testWriteDecimalFixed() throws IOException { List expected = Arrays.asList(d1fixed, d2fixed); File test = write(GENERIC, decimalSchema, d1, d2); - Assert.assertEquals("Should read BigDecimals as fixed", expected, - read(GenericData.get().createDatumReader(fixedSchema), test)); + assertEquals(expected, read(GenericData.get().createDatumReader(fixedSchema), test), + "Should read BigDecimals as fixed"); } @Test - public void testDecimalToFromBytes() throws IOException { + public void decimalToFromBytes() { LogicalType decimal = LogicalTypes.decimal(9, 2); Schema bytesSchema = Schema.create(Schema.Type.BYTES); - // Check that the round trip to and from bytes + // Check the round trip to and from bytes BigDecimal d1 = new BigDecimal("-34.34"); BigDecimal d2 = new BigDecimal("117230.00"); @@ -176,11 +212,11 @@ public void testDecimalToFromBytes() throws IOException { } @Test - public void testDecimalToFromFixed() throws IOException { + public void decimalToFromFixed() { LogicalType decimal = LogicalTypes.decimal(9, 2); Schema fixedSchema = Schema.createFixed("aFixed", null, null, 4); - // Check that the round trip to and from fixed data. + // Check the round trip to and from fixed data. BigDecimal d1 = new BigDecimal("-34.34"); BigDecimal d2 = new BigDecimal("117230.00"); @@ -193,7 +229,7 @@ public void testDecimalToFromFixed() throws IOException { } @Test - public void testReadDecimalBytes() throws IOException { + public void readDecimalBytes() throws IOException { LogicalType decimal = LogicalTypes.decimal(9, 2); Schema bytesSchema = Schema.create(Schema.Type.BYTES); Schema decimalSchema = decimal.addToSchema(Schema.create(Schema.Type.BYTES)); @@ -209,12 +245,11 @@ public void testReadDecimalBytes() throws IOException { ByteBuffer d2bytes = conversion.toBytes(d2, bytesSchema, decimal); File test = write(bytesSchema, d1bytes, d2bytes); - Assert.assertEquals("Should convert bytes to BigDecimals", expected, - read(GENERIC.createDatumReader(decimalSchema), test)); + assertEquals(expected, read(GENERIC.createDatumReader(decimalSchema), test), "Should convert bytes to BigDecimals"); } @Test - public void testWriteDecimalBytes() throws IOException { + public void writeDecimalBytes() throws IOException { LogicalType decimal = LogicalTypes.decimal(9, 2); Schema bytesSchema = Schema.create(Schema.Type.BYTES); Schema decimalSchema = decimal.addToSchema(Schema.create(Schema.Type.BYTES)); @@ -230,8 +265,8 @@ public void testWriteDecimalBytes() throws IOException { List expected = Arrays.asList(d1bytes, d2bytes); File test = write(GENERIC, decimalSchema, d1bytes, d2bytes); - Assert.assertEquals("Should read BigDecimals as bytes", expected, - read(GenericData.get().createDatumReader(bytesSchema), test)); + assertEquals(expected, read(GenericData.get().createDatumReader(bytesSchema), test), + "Should read BigDecimals as bytes"); } private List read(DatumReader reader, File file) throws IOException { @@ -246,13 +281,14 @@ private List read(DatumReader reader, File file) throws IOException { return data; } - private File write(Schema schema, D... data) throws IOException { + @SafeVarargs + private final File write(Schema schema, D... data) throws IOException { return write(GenericData.get(), schema, data); } @SuppressWarnings("unchecked") private File write(GenericData model, Schema schema, D... data) throws IOException { - File file = temp.newFile(); + File file = new File(temp, "out.avro"); DatumWriter writer = model.createDatumWriter(schema); try (DataFileWriter fileWriter = new DataFileWriter<>(writer)) { @@ -266,12 +302,12 @@ private File write(GenericData model, Schema schema, D... data) throws IOExc } @Test - public void testCopyUuid() { + public void copyUuid() { testCopy(LogicalTypes.uuid().addToSchema(Schema.create(Schema.Type.STRING)), UUID.randomUUID(), GENERIC); } @Test - public void testCopyUuidRaw() { + public void copyUuidRaw() { testCopy(LogicalTypes.uuid().addToSchema(Schema.create(Schema.Type.STRING)), UUID.randomUUID().toString(), // use // raw // type @@ -279,13 +315,13 @@ public void testCopyUuidRaw() { } @Test - public void testCopyDecimal() { + public void copyDecimal() { testCopy(LogicalTypes.decimal(9, 2).addToSchema(Schema.create(Schema.Type.BYTES)), new BigDecimal("-34.34"), GENERIC); } @Test - public void testCopyDecimalRaw() { + public void copyDecimalRaw() { testCopy(LogicalTypes.decimal(9, 2).addToSchema(Schema.create(Schema.Type.BYTES)), ByteBuffer.wrap(new BigDecimal("-34.34").unscaledValue().toByteArray()), GenericData.get()); // no conversions } @@ -307,28 +343,28 @@ private void testCopy(Schema schema, Object value, GenericData model) { // test nested in array Schema arraySchema = Schema.createArray(schema); - ArrayList array = new ArrayList(Collections.singletonList(value)); + ArrayList array = new ArrayList<>(Collections.singletonList(value)); checkCopy(array, model.deepCopy(arraySchema, array), true); // test record nested in array Schema recordArraySchema = Schema.createArray(recordSchema); - ArrayList recordArray = new ArrayList(Collections.singletonList(record)); + ArrayList recordArray = new ArrayList<>(Collections.singletonList(record)); checkCopy(recordArray, model.deepCopy(recordArraySchema, recordArray), true); } private void checkCopy(Object original, Object copy, boolean notSame) { if (notSame) - Assert.assertNotSame(original, copy); - Assert.assertEquals(original, copy); + assertNotSame(original, copy); + assertEquals(original, copy); } @Test - public void testReadLocalTimestampMillis() throws IOException { + public void readLocalTimestampMillis() throws IOException { LogicalType timestamp = LogicalTypes.localTimestampMillis(); Schema longSchema = Schema.create(Schema.Type.LONG); Schema timestampSchema = timestamp.addToSchema(Schema.create(Schema.Type.LONG)); - LocalDateTime i1 = LocalDateTime.of(1986, 06, 26, 12, 07, 11, 42000000); + LocalDateTime i1 = LocalDateTime.of(1986, 6, 26, 12, 7, 11, 42000000); LocalDateTime i2 = LocalDateTime.ofInstant(Instant.ofEpochMilli(0), ZoneOffset.UTC); List expected = Arrays.asList(i1, i2); @@ -339,17 +375,17 @@ public void testReadLocalTimestampMillis() throws IOException { Long i2long = 0L; File test = write(longSchema, i1long, i2long); - Assert.assertEquals("Should convert long to LocalDateTime", expected, - read(GENERIC.createDatumReader(timestampSchema), test)); + assertEquals(expected, read(GENERIC.createDatumReader(timestampSchema), test), + "Should convert long to LocalDateTime"); } @Test - public void testWriteLocalTimestampMillis() throws IOException { + public void writeLocalTimestampMillis() throws IOException { LogicalType timestamp = LogicalTypes.localTimestampMillis(); Schema longSchema = Schema.create(Schema.Type.LONG); Schema timestampSchema = timestamp.addToSchema(Schema.create(Schema.Type.LONG)); - LocalDateTime i1 = LocalDateTime.of(1986, 06, 26, 12, 07, 11, 42000000); + LocalDateTime i1 = LocalDateTime.of(1986, 6, 26, 12, 7, 11, 42000000); LocalDateTime i2 = LocalDateTime.ofInstant(Instant.ofEpochMilli(0), ZoneOffset.UTC); Conversion conversion = new TimeConversions.LocalTimestampMillisConversion(); @@ -359,17 +395,17 @@ public void testWriteLocalTimestampMillis() throws IOException { List expected = Arrays.asList(d1long, d2long); File test = write(GENERIC, timestampSchema, i1, i2); - Assert.assertEquals("Should read LocalDateTime as longs", expected, - read(GenericData.get().createDatumReader(timestampSchema), test)); + assertEquals(expected, read(GenericData.get().createDatumReader(timestampSchema), test), + "Should read LocalDateTime as longs"); } @Test - public void testReadLocalTimestampMicros() throws IOException { + public void readLocalTimestampMicros() throws IOException { LogicalType timestamp = LogicalTypes.localTimestampMicros(); Schema longSchema = Schema.create(Schema.Type.LONG); Schema timestampSchema = timestamp.addToSchema(Schema.create(Schema.Type.LONG)); - LocalDateTime i1 = LocalDateTime.of(1986, 06, 26, 12, 07, 11, 420000); + LocalDateTime i1 = LocalDateTime.of(1986, 6, 26, 12, 7, 11, 420000); LocalDateTime i2 = LocalDateTime.ofInstant(Instant.ofEpochSecond(0, 4000), ZoneOffset.UTC); List expected = Arrays.asList(i1, i2); @@ -380,17 +416,17 @@ public void testReadLocalTimestampMicros() throws IOException { Long i2long = conversion.toLong(i2, longSchema, timestamp); File test = write(longSchema, i1long, i2long); - Assert.assertEquals("Should convert long to LocalDateTime", expected, - read(GENERIC.createDatumReader(timestampSchema), test)); + assertEquals(expected, read(GENERIC.createDatumReader(timestampSchema), test), + "Should convert long to LocalDateTime"); } @Test - public void testWriteLocalTimestampMicros() throws IOException { + public void writeLocalTimestampMicros() throws IOException { LogicalType timestamp = LogicalTypes.localTimestampMicros(); Schema longSchema = Schema.create(Schema.Type.LONG); Schema timestampSchema = timestamp.addToSchema(Schema.create(Schema.Type.LONG)); - LocalDateTime i1 = LocalDateTime.of(1986, 06, 26, 12, 07, 11, 420000); + LocalDateTime i1 = LocalDateTime.of(1986, 6, 26, 12, 7, 11, 420000); LocalDateTime i2 = LocalDateTime.ofInstant(Instant.ofEpochSecond(0, 4000), ZoneOffset.UTC); Conversion conversion = new TimeConversions.LocalTimestampMicrosConversion(); @@ -400,7 +436,56 @@ public void testWriteLocalTimestampMicros() throws IOException { List expected = Arrays.asList(d1long, d2long); File test = write(GENERIC, timestampSchema, i1, i2); - Assert.assertEquals("Should read LocalDateTime as longs", expected, - read(GenericData.get().createDatumReader(timestampSchema), test)); + assertEquals(expected, read(GenericData.get().createDatumReader(timestampSchema), test), + "Should read LocalDateTime as longs"); + } + + @Test + public void testReadAutomaticallyRegisteredUri() throws IOException { + Schema stringSchema = Schema.create(Schema.Type.STRING); + GenericData.setStringType(stringSchema, GenericData.StringType.String); + LogicalType customType = LogicalTypes.getCustomRegisteredTypes().get("custom").fromSchema(stringSchema); + Schema customTypeSchema = customType.addToSchema(Schema.create(Schema.Type.STRING)); + + CustomType ct1 = new CustomType("foo"); + CustomType ct2 = new CustomType("bar"); + List expected = Arrays.asList(ct1, ct2); + + Conversion conversion = GENERIC.getConversionFor(customType); + + // use the conversion directly instead of relying on the write side + CharSequence ct1String = conversion.toCharSequence(ct1, stringSchema, customType); + CharSequence ct2String = conversion.toCharSequence(ct2, stringSchema, customType); + + File test = write(stringSchema, ct1String, ct2String); + assertEquals(expected, read(GENERIC.createDatumReader(customTypeSchema), test), + "Should convert string to CustomType"); + } + + @Test + public void testWriteAutomaticallyRegisteredUri() throws IOException { + Schema stringSchema = Schema.create(Schema.Type.STRING); + GenericData.setStringType(stringSchema, GenericData.StringType.String); + LogicalType customType = LogicalTypes.getCustomRegisteredTypes().get("custom").fromSchema(stringSchema); + Schema customTypeSchema = customType.addToSchema(Schema.create(Schema.Type.STRING)); + + CustomType ct1 = new CustomType("foo"); + CustomType ct2 = new CustomType("bar"); + + Conversion conversion = GENERIC.getConversionFor(customType); + + // use the conversion directly instead of relying on the write side + CharSequence ct1String = conversion.toCharSequence(ct1, stringSchema, customType); + CharSequence ct2String = conversion.toCharSequence(ct2, stringSchema, customType); + List expected = Arrays.asList(ct1String, ct2String); + + File test = write(GENERIC, customTypeSchema, ct1, ct2); + + // Note that this test still cannot read strings using the logical type + // schema, as all GenericData instances have the logical type and the + // conversions loaded. That's why this final assert is slightly different. + + assertEquals(expected, read(GenericData.get().createDatumReader(stringSchema), test), + "Should read CustomType as strings"); } } diff --git a/lang/java/avro/src/test/java/org/apache/avro/generic/TestGenericRecordBuilder.java b/lang/java/avro/src/test/java/org/apache/avro/generic/TestGenericRecordBuilder.java index 5fa321a3b27..d4eece27bd8 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/generic/TestGenericRecordBuilder.java +++ b/lang/java/avro/src/test/java/org/apache/avro/generic/TestGenericRecordBuilder.java @@ -17,6 +17,8 @@ */ package org.apache.avro.generic; +import static org.junit.jupiter.api.Assertions.*; + import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -27,67 +29,70 @@ import org.apache.avro.Schema.Field; import org.apache.avro.Schema.Type; import org.apache.avro.generic.GenericData.Record; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.Test; /** * Unit test for the GenericRecordBuilder class. */ public class TestGenericRecordBuilder { @Test - public void testGenericBuilder() { + void genericBuilder() { Schema schema = recordSchema(); GenericRecordBuilder builder = new GenericRecordBuilder(schema); // Verify that builder has no fields set after initialization: for (Field field : schema.getFields()) { - Assert.assertFalse("RecordBuilder should not have field " + field.name(), builder.has(field.name())); - Assert.assertNull("Field " + field.name() + " should be null", builder.get(field.name())); + assertFalse(builder.has(field.name()), "RecordBuilder should not have field " + field.name()); + assertNull(builder.get(field.name()), "Field " + field.name() + " should be null"); } // Set field in builder: builder.set("intField", 1); List anArray = Arrays.asList("one", "two", "three"); builder.set("anArray", anArray); - Assert.assertTrue("anArray should be set", builder.has("anArray")); - Assert.assertEquals(anArray, builder.get("anArray")); - Assert.assertFalse("id should not be set", builder.has("id")); - Assert.assertNull(builder.get("id")); + assertTrue(builder.has("anArray"), "anArray should be set"); + assertEquals(anArray, builder.get("anArray")); + assertFalse(builder.has("id"), "id should not be set"); + assertNull(builder.get("id")); // Build the record, and verify that fields are set: Record record = builder.build(); - Assert.assertEquals(1, record.get("intField")); - Assert.assertEquals(anArray, record.get("anArray")); - Assert.assertNotNull(record.get("id")); - Assert.assertEquals("0", record.get("id").toString()); + assertEquals(1, record.get("intField")); + assertEquals(anArray, record.get("anArray")); + assertNotNull(record.get("id")); + assertEquals("0", record.get("id").toString()); // Test copy constructors: - Assert.assertEquals(builder, new GenericRecordBuilder(builder)); - Assert.assertEquals(record, new GenericRecordBuilder(record).build()); + assertEquals(builder, new GenericRecordBuilder(builder)); + assertEquals(record, new GenericRecordBuilder(record).build()); // Test clear: builder.clear("intField"); - Assert.assertFalse(builder.has("intField")); - Assert.assertNull(builder.get("intField")); + assertFalse(builder.has("intField")); + assertNull(builder.get("intField")); } - @Test(expected = org.apache.avro.AvroRuntimeException.class) - public void attemptToSetNonNullableFieldToNull() { - new GenericRecordBuilder(recordSchema()).set("intField", null); + @Test + void attemptToSetNonNullableFieldToNull() { + assertThrows(org.apache.avro.AvroRuntimeException.class, () -> { + new GenericRecordBuilder(recordSchema()).set("intField", null); + }); } - @Test(expected = org.apache.avro.AvroRuntimeException.class) - public void buildWithoutSettingRequiredFields1() { - new GenericRecordBuilder(recordSchema()).build(); + @Test + void buildWithoutSettingRequiredFields1() { + assertThrows(org.apache.avro.AvroRuntimeException.class, () -> { + new GenericRecordBuilder(recordSchema()).build(); + }); } - @Test() - public void buildWithoutSettingRequiredFields2() { + @Test + void buildWithoutSettingRequiredFields2() { try { new GenericRecordBuilder(recordSchema()).set("anArray", Collections.singletonList("one")).build(); - Assert.fail("Should have thrown " + AvroRuntimeException.class.getCanonicalName()); + fail("Should have thrown " + AvroRuntimeException.class.getCanonicalName()); } catch (AvroRuntimeException e) { - Assert.assertTrue(e.getMessage().contains("intField")); + assertTrue(e.getMessage().contains("intField")); } } diff --git a/lang/java/avro/src/test/java/org/apache/avro/generic/TestSkipEnumSchema.java b/lang/java/avro/src/test/java/org/apache/avro/generic/TestSkipEnumSchema.java index b05c7b8552d..aae1af73860 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/generic/TestSkipEnumSchema.java +++ b/lang/java/avro/src/test/java/org/apache/avro/generic/TestSkipEnumSchema.java @@ -24,8 +24,7 @@ import org.apache.avro.io.DecoderFactory; import org.apache.avro.io.Encoder; import org.apache.avro.io.EncoderFactory; -import org.junit.Test; - +import org.junit.jupiter.api.Test; import java.io.ByteArrayOutputStream; import java.io.IOException; @@ -34,7 +33,7 @@ */ public class TestSkipEnumSchema { @Test - public void testSkipEnum() throws IOException { + void skipEnum() throws IOException { Schema enumSchema = SchemaBuilder.builder().enumeration("enum").symbols("en1", "en2"); EnumSymbol enumSymbol = new EnumSymbol(enumSchema, "en1"); diff --git a/lang/java/avro/src/test/java/org/apache/avro/io/TestBinaryData.java b/lang/java/avro/src/test/java/org/apache/avro/io/TestBinaryData.java index edbcd0bcbe3..167cd724630 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/io/TestBinaryData.java +++ b/lang/java/avro/src/test/java/org/apache/avro/io/TestBinaryData.java @@ -18,8 +18,10 @@ package org.apache.avro.io; -import org.junit.Assert; -import org.junit.Test; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; + +import org.junit.jupiter.api.Test; public class TestBinaryData { @@ -29,13 +31,33 @@ public class TestBinaryData { * normal 8). When skipping it, the next byte should be 10. */ @Test - public void testSkipLong() { + void skipLong() { byte[] b = new byte[10]; BinaryData.encodeLong(Long.MAX_VALUE, b, 0); final int nextIndex = BinaryData.skipLong(b, 0); - Assert.assertEquals(nextIndex, 10); + assertEquals(nextIndex, 10); } + @Test + void testIntLongVleEquality() { + byte[] intResult = new byte[9]; + byte[] longResult = new byte[9]; + BinaryData.encodeInt(0, intResult, 0); + BinaryData.encodeLong(0, longResult, 0); + assertArrayEquals(intResult, longResult); + BinaryData.encodeInt(42, intResult, 0); + BinaryData.encodeLong(42, longResult, 0); + assertArrayEquals(intResult, longResult); + BinaryData.encodeInt(-24, intResult, 0); + BinaryData.encodeLong(-24, longResult, 0); + assertArrayEquals(intResult, longResult); + BinaryData.encodeInt(Integer.MAX_VALUE, intResult, 0); + BinaryData.encodeLong(Integer.MAX_VALUE, longResult, 0); + assertArrayEquals(intResult, longResult); + BinaryData.encodeInt(Integer.MIN_VALUE, intResult, 0); + BinaryData.encodeLong(Integer.MIN_VALUE, longResult, 0); + assertArrayEquals(intResult, longResult); + } } diff --git a/lang/java/avro/src/test/java/org/apache/avro/io/TestBinaryDecoder.java b/lang/java/avro/src/test/java/org/apache/avro/io/TestBinaryDecoder.java index e4bf8f89ce3..b9437bd8a0e 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/io/TestBinaryDecoder.java +++ b/lang/java/avro/src/test/java/org/apache/avro/io/TestBinaryDecoder.java @@ -17,117 +17,159 @@ */ package org.apache.avro.io; -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.EOFException; -import java.io.IOException; -import java.io.InputStream; -import java.nio.ByteBuffer; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; import org.apache.avro.AvroRuntimeException; import org.apache.avro.Schema; +import org.apache.avro.SystemLimitException; +import org.apache.avro.generic.GenericData; import org.apache.avro.generic.GenericDatumReader; import org.apache.avro.generic.GenericDatumWriter; +import org.apache.avro.generic.GenericRecord; import org.apache.avro.util.ByteBufferInputStream; import org.apache.avro.util.ByteBufferOutputStream; import org.apache.avro.util.RandomData; import org.apache.avro.util.Utf8; -import org.junit.Assert; -import org.junit.BeforeClass; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; -import org.junit.runners.Parameterized.Parameters; - -@RunWith(Parameterized.class) + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.EOFException; +import java.io.IOException; +import java.io.InputStream; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Arrays; + +import static org.apache.avro.TestSystemLimitException.*; +import static org.junit.jupiter.api.Assertions.assertEquals; + public class TestBinaryDecoder { // prime number buffer size so that looping tests hit the buffer edge // at different points in the loop. DecoderFactory factory = new DecoderFactory().configureDecoderBufferSize(521); - private boolean useDirect = false; + static EncoderFactory e_factory = EncoderFactory.get(); - public TestBinaryDecoder(boolean useDirect) { - this.useDirect = useDirect; + private Decoder newDecoderWithNoData(boolean useDirect) { + return newDecoder(new byte[0], useDirect); } - @Parameters - public static Collection data() { - return Arrays.asList(new Object[][] { { true }, { false }, }); + private BinaryDecoder newDecoder(byte[] bytes, int start, int len, boolean useDirect) { + return this.newDecoder(bytes, start, len, null, useDirect); } - private Decoder newDecoderWithNoData() throws IOException { - return newDecoder(new byte[0]); + private BinaryDecoder newDecoder(byte[] bytes, int start, int len, BinaryDecoder reuse, boolean useDirect) { + if (useDirect) { + final ByteArrayInputStream input = new ByteArrayInputStream(bytes, start, len); + return factory.directBinaryDecoder(input, reuse); + } else { + return factory.binaryDecoder(bytes, start, len, reuse); + } } - private Decoder newDecoder(byte[] bytes, int start, int len) throws IOException { - return factory.binaryDecoder(bytes, start, len, null); + private BinaryDecoder newDecoder(InputStream in, boolean useDirect) { + return this.newDecoder(in, null, useDirect); + } + private BinaryDecoder newDecoder(InputStream in, BinaryDecoder reuse, boolean useDirect) { + if (useDirect) { + return factory.directBinaryDecoder(in, reuse); + } else { + return factory.binaryDecoder(in, reuse); + } } - private Decoder newDecoder(InputStream in) { + private BinaryDecoder newDecoder(byte[] bytes, BinaryDecoder reuse, boolean useDirect) { if (useDirect) { - return factory.directBinaryDecoder(in, null); + return this.factory.directBinaryDecoder(new ByteArrayInputStream(bytes), reuse); } else { - return factory.binaryDecoder(in, null); + return factory.binaryDecoder(bytes, reuse); } } - private Decoder newDecoder(byte[] bytes) throws IOException { - return factory.binaryDecoder(bytes, null); + private BinaryDecoder newDecoder(byte[] bytes, boolean useDirect) { + return this.newDecoder(bytes, null, useDirect); + } + + /** + * Create a decoder for simulating reading corrupt, unexpected or out-of-bounds + * data. + * + * @return a {@link org.apache.avro.io.BinaryDecoder that has been initialized + * on a byte array containing the sequence of encoded longs in order. + */ + private BinaryDecoder newDecoder(boolean useDirect, long... values) throws IOException { + try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) { + BinaryEncoder encoder = EncoderFactory.get().binaryEncoder(baos, null); + for (long v : values) + encoder.writeLong(v); + encoder.flush(); + return newDecoder(baos.toByteArray(), useDirect); + } } /** Verify EOFException throw at EOF */ - @Test(expected = EOFException.class) - public void testEOFBoolean() throws IOException { - newDecoderWithNoData().readBoolean(); + @ParameterizedTest + @ValueSource(booleans = { true, false }) + void eofBoolean(boolean useDirect) { + Assertions.assertThrows(EOFException.class, () -> newDecoderWithNoData(useDirect).readBoolean()); } - @Test(expected = EOFException.class) - public void testEOFInt() throws IOException { - newDecoderWithNoData().readInt(); + @ParameterizedTest + @ValueSource(booleans = { true, false }) + void eofInt(boolean useDirect) { + Assertions.assertThrows(EOFException.class, () -> newDecoderWithNoData(useDirect).readInt()); } - @Test(expected = EOFException.class) - public void testEOFLong() throws IOException { - newDecoderWithNoData().readLong(); + @ParameterizedTest + @ValueSource(booleans = { true, false }) + void eofLong(boolean useDirect) { + Assertions.assertThrows(EOFException.class, () -> newDecoderWithNoData(useDirect).readLong()); } - @Test(expected = EOFException.class) - public void testEOFFloat() throws IOException { - newDecoderWithNoData().readFloat(); + @ParameterizedTest + @ValueSource(booleans = { true, false }) + void eofFloat(boolean useDirect) { + Assertions.assertThrows(EOFException.class, () -> newDecoderWithNoData(useDirect).readFloat()); } - @Test(expected = EOFException.class) - public void testEOFDouble() throws IOException { - newDecoderWithNoData().readDouble(); + @ParameterizedTest + @ValueSource(booleans = { true, false }) + void eofDouble(boolean useDirect) { + Assertions.assertThrows(EOFException.class, () -> newDecoderWithNoData(useDirect).readDouble()); } - @Test(expected = EOFException.class) - public void testEOFBytes() throws IOException { - newDecoderWithNoData().readBytes(null); + @ParameterizedTest + @ValueSource(booleans = { true, false }) + void eofBytes(boolean useDirect) { + Assertions.assertThrows(EOFException.class, () -> newDecoderWithNoData(useDirect).readBytes(null)); } - @Test(expected = EOFException.class) - public void testEOFString() throws IOException { - newDecoderWithNoData().readString(new Utf8("a")); + @ParameterizedTest + @ValueSource(booleans = { true, false }) + void eofString(boolean useDirect) { + Assertions.assertThrows(EOFException.class, () -> newDecoderWithNoData(useDirect).readString(new Utf8("a"))); } - @Test(expected = EOFException.class) - public void testEOFFixed() throws IOException { - newDecoderWithNoData().readFixed(new byte[1]); + @ParameterizedTest + @ValueSource(booleans = { true, false }) + void eofFixed(boolean useDirect) { + Assertions.assertThrows(EOFException.class, () -> newDecoderWithNoData(useDirect).readFixed(new byte[1])); } - @Test(expected = EOFException.class) - public void testEOFEnum() throws IOException { - newDecoderWithNoData().readEnum(); + @ParameterizedTest + @ValueSource(booleans = { true, false }) + void eofEnum(boolean useDirect) { + Assertions.assertThrows(EOFException.class, () -> newDecoderWithNoData(useDirect).readEnum()); } @Test - public void testReuse() throws IOException { + void reuse() throws IOException { ByteBufferOutputStream bbo1 = new ByteBufferOutputStream(); ByteBufferOutputStream bbo2 = new ByteBufferOutputStream(); byte[] b1 = new byte[] { 1, 2 }; @@ -142,20 +184,20 @@ public void testReuse() throws IOException { DirectBinaryDecoder d = new DirectBinaryDecoder(new ByteBufferInputStream(bbo1.getBufferList())); ByteBuffer bb1 = d.readBytes(null); - Assert.assertEquals(b1.length, bb1.limit() - bb1.position()); + Assertions.assertEquals(b1.length, bb1.limit() - bb1.position()); d.configure(new ByteBufferInputStream(bbo2.getBufferList())); ByteBuffer bb2 = d.readBytes(null); - Assert.assertEquals(b1.length, bb2.limit() - bb2.position()); + Assertions.assertEquals(b1.length, bb2.limit() - bb2.position()); } private static byte[] data = null; private static Schema schema = null; - private static int count = 200; - private static ArrayList records = new ArrayList<>(count); + private static final int count = 200; + private static final ArrayList records = new ArrayList<>(count); - @BeforeClass + @BeforeAll public static void generateData() throws IOException { int seed = (int) System.currentTimeMillis(); // note some tests (testSkipping) rely on this explicitly @@ -179,8 +221,9 @@ public static void generateData() throws IOException { data = baos.toByteArray(); } - @Test - public void testDecodeFromSources() throws IOException { + @ParameterizedTest + @ValueSource(booleans = { true, false }) + void decodeFromSources(boolean useDirect) throws IOException { GenericDatumReader reader = new GenericDatumReader<>(); reader.setSchema(schema); @@ -188,81 +231,82 @@ public void testDecodeFromSources() throws IOException { ByteArrayInputStream is2 = new ByteArrayInputStream(data); ByteArrayInputStream is3 = new ByteArrayInputStream(data); - Decoder fromInputStream = newDecoder(is); - Decoder fromArray = newDecoder(data); + Decoder fromInputStream = newDecoder(is, useDirect); + Decoder fromArray = newDecoder(data, useDirect); byte[] data2 = new byte[data.length + 30]; Arrays.fill(data2, (byte) 0xff); System.arraycopy(data, 0, data2, 15, data.length); - Decoder fromOffsetArray = newDecoder(data2, 15, data.length); + Decoder fromOffsetArray = newDecoder(data2, 15, data.length, useDirect); - BinaryDecoder initOnInputStream = factory.binaryDecoder(new byte[50], 0, 30, null); - initOnInputStream = factory.binaryDecoder(is2, initOnInputStream); - BinaryDecoder initOnArray = factory.binaryDecoder(is3, null); - initOnArray = factory.binaryDecoder(data, 0, data.length, initOnArray); + BinaryDecoder initOnInputStream = newDecoder(new byte[50], 0, 30, useDirect); + initOnInputStream = newDecoder(is2, initOnInputStream, useDirect); + BinaryDecoder initOnArray = this.newDecoder(is3, null, useDirect); + initOnArray = this.newDecoder(data, initOnArray, useDirect); for (Object datum : records) { - Assert.assertEquals("InputStream based BinaryDecoder result does not match", datum, - reader.read(null, fromInputStream)); - Assert.assertEquals("Array based BinaryDecoder result does not match", datum, reader.read(null, fromArray)); - Assert.assertEquals("offset Array based BinaryDecoder result does not match", datum, - reader.read(null, fromOffsetArray)); - Assert.assertEquals("InputStream initialized BinaryDecoder result does not match", datum, - reader.read(null, initOnInputStream)); - Assert.assertEquals("Array initialized BinaryDecoder result does not match", datum, - reader.read(null, initOnArray)); + Assertions.assertEquals(datum, reader.read(null, fromInputStream), + "InputStream based BinaryDecoder result does not match"); + Assertions.assertEquals(datum, reader.read(null, fromArray), "Array based BinaryDecoder result does not match"); + Assertions.assertEquals(datum, reader.read(null, fromOffsetArray), + "offset Array based BinaryDecoder result does not match"); + Assertions.assertEquals(datum, reader.read(null, initOnInputStream), + "InputStream initialized BinaryDecoder result does not match"); + Assertions.assertEquals(datum, reader.read(null, initOnArray), + "Array initialized BinaryDecoder result does not match"); } } - @Test - public void testInputStreamProxy() throws IOException { - Decoder d = newDecoder(data); - if (d instanceof BinaryDecoder) { - BinaryDecoder bd = (BinaryDecoder) d; + @ParameterizedTest + @ValueSource(booleans = { true, false }) + void inputStreamProxy(boolean useDirect) throws IOException { + BinaryDecoder d = newDecoder(data, useDirect); + if (d != null) { + BinaryDecoder bd = d; InputStream test = bd.inputStream(); InputStream check = new ByteArrayInputStream(data); validateInputStreamReads(test, check); - bd = factory.binaryDecoder(data, bd); + bd = this.newDecoder(data, bd, useDirect); test = bd.inputStream(); check = new ByteArrayInputStream(data); validateInputStreamSkips(test, check); // with input stream sources - bd = factory.binaryDecoder(new ByteArrayInputStream(data), bd); + bd = newDecoder(new ByteArrayInputStream(data), bd, useDirect); test = bd.inputStream(); check = new ByteArrayInputStream(data); validateInputStreamReads(test, check); - bd = factory.binaryDecoder(new ByteArrayInputStream(data), bd); + bd = newDecoder(new ByteArrayInputStream(data), bd, useDirect); test = bd.inputStream(); check = new ByteArrayInputStream(data); validateInputStreamSkips(test, check); } } - @Test - public void testInputStreamProxyDetached() throws IOException { - Decoder d = newDecoder(data); - if (d instanceof BinaryDecoder) { - BinaryDecoder bd = (BinaryDecoder) d; - InputStream test = bd.inputStream(); - InputStream check = new ByteArrayInputStream(data); - // detach input stream and decoder from old source - factory.binaryDecoder(new byte[56], null); - InputStream bad = bd.inputStream(); - InputStream check2 = new ByteArrayInputStream(data); + @ParameterizedTest + @ValueSource(booleans = { true, false }) + void inputStreamProxyDetached(boolean useDirect) throws IOException { + BinaryDecoder bd = newDecoder(data, useDirect); + + InputStream test = bd.inputStream(); + InputStream check = new ByteArrayInputStream(data); + // detach input stream and decoder from old source + this.newDecoder(new byte[56], useDirect); + try (InputStream bad = bd.inputStream(); InputStream check2 = new ByteArrayInputStream(data)) { validateInputStreamReads(test, check); - Assert.assertFalse(bad.read() == check2.read()); + Assertions.assertNotEquals(bad.read(), check2.read()); } } - @Test - public void testInputStreamPartiallyUsed() throws IOException { - BinaryDecoder bd = factory.binaryDecoder(new ByteArrayInputStream(data), null); + @ParameterizedTest + @ValueSource(booleans = { true, false }) + void inputStreamPartiallyUsed(boolean useDirect) throws IOException { + BinaryDecoder bd = this.newDecoder(new ByteArrayInputStream(data), useDirect); InputStream test = bd.inputStream(); InputStream check = new ByteArrayInputStream(data); // triggers buffer fill if unused and tests isEnd() try { - Assert.assertFalse(bd.isEnd()); + Assertions.assertFalse(bd.isEnd()); } catch (UnsupportedOperationException e) { // this is ok if its a DirectBinaryDecoder. if (bd.getClass() != DirectBinaryDecoder.class) { @@ -280,25 +324,28 @@ private void validateInputStreamReads(InputStream test, InputStream check) throw while (true) { int t = test.read(); int c = check.read(); - Assert.assertEquals(c, t); - if (-1 == t) + Assertions.assertEquals(c, t); + if (-1 == t) { break; + } t = test.read(bt); c = check.read(bc); - Assert.assertEquals(c, t); - Assert.assertArrayEquals(bt, bc); - if (-1 == t) + Assertions.assertEquals(c, t); + Assertions.assertArrayEquals(bt, bc); + if (-1 == t) { break; + } t = test.read(bt, 1, 4); c = check.read(bc, 1, 4); - Assert.assertEquals(c, t); - Assert.assertArrayEquals(bt, bc); - if (-1 == t) + Assertions.assertEquals(c, t); + Assertions.assertArrayEquals(bt, bc); + if (-1 == t) { break; + } } - Assert.assertEquals(0, test.skip(5)); - Assert.assertEquals(0, test.available()); - Assert.assertFalse(test.getClass() != ByteArrayInputStream.class && test.markSupported()); + Assertions.assertEquals(0, test.skip(5)); + Assertions.assertEquals(0, test.available()); + Assertions.assertFalse(test.getClass() != ByteArrayInputStream.class && test.markSupported()); test.close(); } @@ -306,154 +353,300 @@ private void validateInputStreamSkips(InputStream test, InputStream check) throw while (true) { long t2 = test.skip(19); long c2 = check.skip(19); - Assert.assertEquals(c2, t2); - if (0 == t2) + Assertions.assertEquals(c2, t2); + if (0 == t2) { break; + } } - Assert.assertEquals(-1, test.read()); + Assertions.assertEquals(-1, test.read()); } - @Test - public void testBadIntEncoding() throws IOException { + @ParameterizedTest + @ValueSource(booleans = { true, false }) + void badIntEncoding(boolean useDirect) throws IOException { byte[] badint = new byte[5]; Arrays.fill(badint, (byte) 0xff); - Decoder bd = factory.binaryDecoder(badint, null); + Decoder bd = this.newDecoder(badint, useDirect); String message = ""; try { bd.readInt(); } catch (IOException ioe) { message = ioe.getMessage(); } - Assert.assertEquals("Invalid int encoding", message); + Assertions.assertEquals("Invalid int encoding", message); } - @Test - public void testBadLongEncoding() throws IOException { + @ParameterizedTest + @ValueSource(booleans = { true, false }) + void badLongEncoding(boolean useDirect) throws IOException { byte[] badint = new byte[10]; Arrays.fill(badint, (byte) 0xff); - Decoder bd = factory.binaryDecoder(badint, null); + Decoder bd = this.newDecoder(badint, useDirect); String message = ""; try { bd.readLong(); } catch (IOException ioe) { message = ioe.getMessage(); } - Assert.assertEquals("Invalid long encoding", message); + Assertions.assertEquals("Invalid long encoding", message); } - @Test - public void testNegativeStringLength() throws IOException { - byte[] bad = new byte[] { (byte) 1 }; - Decoder bd = factory.binaryDecoder(bad, null); + @ParameterizedTest + @ValueSource(booleans = { true, false }) + public void testStringNegativeLength(boolean useDirect) throws IOException { + Exception ex = Assertions.assertThrows(AvroRuntimeException.class, this.newDecoder(useDirect, -1L)::readString); + Assertions.assertEquals(ERROR_NEGATIVE, ex.getMessage()); + } - Assert.assertThrows("Malformed data. Length is negative: -1", AvroRuntimeException.class, bd::readString); + @ParameterizedTest + @ValueSource(booleans = { true, false }) + public void testStringVmMaxSize(boolean useDirect) throws IOException { + Exception ex = Assertions.assertThrows(UnsupportedOperationException.class, + newDecoder(useDirect, MAX_ARRAY_VM_LIMIT + 1L)::readString); + Assertions.assertEquals(ERROR_VM_LIMIT_STRING, ex.getMessage()); } - @Test - public void testStringMaxArraySize() throws IOException { - byte[] bad = new byte[10]; - BinaryData.encodeLong(BinaryDecoder.MAX_ARRAY_SIZE + 1, bad, 0); - Decoder bd = factory.binaryDecoder(bad, null); + @ParameterizedTest + @ValueSource(booleans = { true, false }) + public void testStringMaxCustom(boolean useDirect) throws IOException { + try { + System.setProperty(SystemLimitException.MAX_STRING_LENGTH_PROPERTY, Long.toString(128)); + resetLimits(); + Exception ex = Assertions.assertThrows(SystemLimitException.class, newDecoder(useDirect, 129)::readString); + Assertions.assertEquals("String length 129 exceeds maximum allowed", ex.getMessage()); + } finally { + System.clearProperty(SystemLimitException.MAX_STRING_LENGTH_PROPERTY); + resetLimits(); + } + } - Assert.assertThrows("Cannot read strings longer than " + BinaryDecoder.MAX_ARRAY_SIZE + " bytes", - UnsupportedOperationException.class, bd::readString); + @ParameterizedTest + @ValueSource(booleans = { true, false }) + public void testBytesNegativeLength(boolean useDirect) throws IOException { + Exception ex = Assertions.assertThrows(AvroRuntimeException.class, + () -> this.newDecoder(useDirect, -1).readBytes(null)); + Assertions.assertEquals(ERROR_NEGATIVE, ex.getMessage()); } - @Test - public void testNegativeBytesLength() throws IOException { - byte[] bad = new byte[] { (byte) 1 }; - Decoder bd = factory.binaryDecoder(bad, null); + @ParameterizedTest + @ValueSource(booleans = { true, false }) + public void testBytesVmMaxSize(boolean useDirect) throws IOException { + Exception ex = Assertions.assertThrows(UnsupportedOperationException.class, + () -> this.newDecoder(useDirect, MAX_ARRAY_VM_LIMIT + 1).readBytes(null)); + Assertions.assertEquals(ERROR_VM_LIMIT_BYTES, ex.getMessage()); + } - Assert.assertThrows("Malformed data. Length is negative: -1", AvroRuntimeException.class, () -> bd.readBytes(null)); + @ParameterizedTest + @ValueSource(booleans = { true, false }) + public void testBytesMaxCustom(boolean useDirect) throws IOException { + try { + System.setProperty(SystemLimitException.MAX_BYTES_LENGTH_PROPERTY, Long.toString(128)); + resetLimits(); + Exception ex = Assertions.assertThrows(SystemLimitException.class, + () -> newDecoder(useDirect, 129).readBytes(null)); + Assertions.assertEquals("Bytes length 129 exceeds maximum allowed", ex.getMessage()); + } finally { + System.clearProperty(SystemLimitException.MAX_BYTES_LENGTH_PROPERTY); + resetLimits(); + } } - @Test - public void testBytesMaxArraySize() throws IOException { - byte[] bad = new byte[10]; - BinaryData.encodeLong(BinaryDecoder.MAX_ARRAY_SIZE + 1, bad, 0); - Decoder bd = factory.binaryDecoder(bad, null); + @ParameterizedTest + @ValueSource(booleans = { true, false }) + public void testArrayVmMaxSize(boolean useDirect) throws IOException { + // At start + Exception ex = Assertions.assertThrows(UnsupportedOperationException.class, + () -> this.newDecoder(useDirect, MAX_ARRAY_VM_LIMIT + 1).readArrayStart()); + Assertions.assertEquals(ERROR_VM_LIMIT_COLLECTION, ex.getMessage()); + + // Next + ex = Assertions.assertThrows(UnsupportedOperationException.class, + () -> this.newDecoder(useDirect, MAX_ARRAY_VM_LIMIT + 1).arrayNext()); + Assertions.assertEquals(ERROR_VM_LIMIT_COLLECTION, ex.getMessage()); + + // An OK reads followed by an overflow + Decoder bd = newDecoder(useDirect, MAX_ARRAY_VM_LIMIT - 100, Long.MAX_VALUE); + Assertions.assertEquals(MAX_ARRAY_VM_LIMIT - 100, bd.readArrayStart()); + ex = Assertions.assertThrows(UnsupportedOperationException.class, bd::arrayNext); + Assertions.assertEquals(ERROR_VM_LIMIT_COLLECTION, ex.getMessage()); + + // Two OK reads followed by going over the VM limit. + bd = newDecoder(useDirect, MAX_ARRAY_VM_LIMIT - 100, 100, 1); + Assertions.assertEquals(MAX_ARRAY_VM_LIMIT - 100, bd.readArrayStart()); + Assertions.assertEquals(100, bd.arrayNext()); + ex = Assertions.assertThrows(UnsupportedOperationException.class, bd::arrayNext); + Assertions.assertEquals(ERROR_VM_LIMIT_COLLECTION, ex.getMessage()); + + // Two OK reads followed by going over the VM limit, where negative numbers are + // followed by the byte length of the items. For testing, the 999 values are + // read but ignored. + bd = newDecoder(useDirect, 100 - MAX_ARRAY_VM_LIMIT, 999, -100, 999, 1); + Assertions.assertEquals(MAX_ARRAY_VM_LIMIT - 100, bd.readArrayStart()); + Assertions.assertEquals(100, bd.arrayNext()); + ex = Assertions.assertThrows(UnsupportedOperationException.class, bd::arrayNext); + Assertions.assertEquals(ERROR_VM_LIMIT_COLLECTION, ex.getMessage()); + } + + @ParameterizedTest + @ValueSource(booleans = { true, false }) + public void testArrayMaxCustom(boolean useDirect) throws IOException { + try { + System.setProperty(SystemLimitException.MAX_COLLECTION_LENGTH_PROPERTY, Long.toString(128)); + resetLimits(); + Exception ex = Assertions.assertThrows(UnsupportedOperationException.class, + () -> newDecoder(useDirect, MAX_ARRAY_VM_LIMIT + 1).readArrayStart()); + Assertions.assertEquals(ERROR_VM_LIMIT_COLLECTION, ex.getMessage()); + + // Two OK reads followed by going over the custom limit. + Decoder bd = newDecoder(useDirect, 118, 10, 1); + Assertions.assertEquals(118, bd.readArrayStart()); + Assertions.assertEquals(10, bd.arrayNext()); + ex = Assertions.assertThrows(SystemLimitException.class, bd::arrayNext); + Assertions.assertEquals("Collection length 129 exceeds maximum allowed", ex.getMessage()); + + // Two OK reads followed by going over the VM limit, where negative numbers are + // followed by the byte length of the items. For testing, the 999 values are + // read but ignored. + bd = newDecoder(useDirect, -118, 999, -10, 999, 1); + Assertions.assertEquals(118, bd.readArrayStart()); + Assertions.assertEquals(10, bd.arrayNext()); + ex = Assertions.assertThrows(SystemLimitException.class, bd::arrayNext); + Assertions.assertEquals("Collection length 129 exceeds maximum allowed", ex.getMessage()); - Assert.assertThrows("Cannot read arrays longer than " + BinaryDecoder.MAX_ARRAY_SIZE + " bytes", - UnsupportedOperationException.class, () -> bd.readBytes(null)); + } finally { + System.clearProperty(SystemLimitException.MAX_COLLECTION_LENGTH_PROPERTY); + resetLimits(); + } } - @Test - public void testBytesMaxLengthProperty() throws IOException { - int maxLength = 128; - byte[] bad = new byte[10]; - BinaryData.encodeLong(maxLength + 1, bad, 0); + @ParameterizedTest + @ValueSource(booleans = { true, false }) + public void testMapVmMaxSize(boolean useDirect) throws IOException { + // At start + Exception ex = Assertions.assertThrows(UnsupportedOperationException.class, + () -> this.newDecoder(useDirect, MAX_ARRAY_VM_LIMIT + 1).readMapStart()); + Assertions.assertEquals(ERROR_VM_LIMIT_COLLECTION, ex.getMessage()); + + // Next + ex = Assertions.assertThrows(UnsupportedOperationException.class, + () -> this.newDecoder(useDirect, MAX_ARRAY_VM_LIMIT + 1).mapNext()); + Assertions.assertEquals(ERROR_VM_LIMIT_COLLECTION, ex.getMessage()); + + // Two OK reads followed by going over the VM limit. + Decoder bd = newDecoder(useDirect, MAX_ARRAY_VM_LIMIT - 100, 100, 1); + Assertions.assertEquals(MAX_ARRAY_VM_LIMIT - 100, bd.readMapStart()); + Assertions.assertEquals(100, bd.mapNext()); + ex = Assertions.assertThrows(UnsupportedOperationException.class, bd::mapNext); + Assertions.assertEquals(ERROR_VM_LIMIT_COLLECTION, ex.getMessage()); + + // Two OK reads followed by going over the VM limit, where negative numbers are + // followed by the byte length of the items. For testing, the 999 values are + // read but ignored. + bd = newDecoder(useDirect, 100 - MAX_ARRAY_VM_LIMIT, 999, -100, 999, 1); + Assertions.assertEquals(MAX_ARRAY_VM_LIMIT - 100, bd.readMapStart()); + Assertions.assertEquals(100, bd.mapNext()); + ex = Assertions.assertThrows(UnsupportedOperationException.class, bd::mapNext); + Assertions.assertEquals(ERROR_VM_LIMIT_COLLECTION, ex.getMessage()); + } + + @ParameterizedTest + @ValueSource(booleans = { true, false }) + public void testMapMaxCustom(boolean useDirect) throws IOException { try { - System.setProperty("org.apache.avro.limits.bytes.maxLength", Long.toString(maxLength)); - Decoder bd = factory.binaryDecoder(bad, null); + System.setProperty(SystemLimitException.MAX_COLLECTION_LENGTH_PROPERTY, Long.toString(128)); + resetLimits(); + Exception ex = Assertions.assertThrows(UnsupportedOperationException.class, + () -> newDecoder(useDirect, MAX_ARRAY_VM_LIMIT + 1).readMapStart()); + Assertions.assertEquals(ERROR_VM_LIMIT_COLLECTION, ex.getMessage()); + + // Two OK reads followed by going over the custom limit. + Decoder bd = newDecoder(useDirect, 118, 10, 1); + Assertions.assertEquals(118, bd.readMapStart()); + Assertions.assertEquals(10, bd.mapNext()); + ex = Assertions.assertThrows(SystemLimitException.class, bd::mapNext); + Assertions.assertEquals("Collection length 129 exceeds maximum allowed", ex.getMessage()); + + // Two OK reads followed by going over the VM limit, where negative numbers are + // followed by the byte length of the items. For testing, the 999 values are + // read but ignored. + bd = newDecoder(useDirect, -118, 999, -10, 999, 1); + Assertions.assertEquals(118, bd.readMapStart()); + Assertions.assertEquals(10, bd.mapNext()); + ex = Assertions.assertThrows(SystemLimitException.class, bd::mapNext); + Assertions.assertEquals("Collection length 129 exceeds maximum allowed", ex.getMessage()); - Assert.assertThrows("Bytes length " + (maxLength + 1) + " exceeds maximum allowed", AvroRuntimeException.class, - () -> bd.readBytes(null)); } finally { - System.clearProperty("org.apache.avro.limits.bytes.maxLength"); + System.clearProperty(SystemLimitException.MAX_COLLECTION_LENGTH_PROPERTY); + resetLimits(); } } - @Test(expected = UnsupportedOperationException.class) - public void testLongLengthEncoding() throws IOException { + @ParameterizedTest + @ValueSource(booleans = { true, false }) + void longLengthEncoding(boolean useDirect) { // Size equivalent to Integer.MAX_VALUE + 1 byte[] bad = new byte[] { (byte) -128, (byte) -128, (byte) -128, (byte) -128, (byte) 16 }; - Decoder bd = factory.binaryDecoder(bad, null); - bd.readString(); + Decoder bd = this.newDecoder(bad, useDirect); + Assertions.assertThrows(UnsupportedOperationException.class, bd::readString); } - @Test(expected = EOFException.class) - public void testIntTooShort() throws IOException { + @ParameterizedTest + @ValueSource(booleans = { true, false }) + void intTooShort(boolean useDirect) { byte[] badint = new byte[4]; Arrays.fill(badint, (byte) 0xff); - newDecoder(badint).readInt(); + Assertions.assertThrows(EOFException.class, () -> newDecoder(badint, useDirect).readInt()); } - @Test(expected = EOFException.class) - public void testLongTooShort() throws IOException { + @ParameterizedTest + @ValueSource(booleans = { true, false }) + void longTooShort(boolean useDirect) { byte[] badint = new byte[9]; Arrays.fill(badint, (byte) 0xff); - newDecoder(badint).readLong(); + Assertions.assertThrows(EOFException.class, () -> newDecoder(badint, useDirect).readLong()); } - @Test(expected = EOFException.class) - public void testFloatTooShort() throws IOException { + @ParameterizedTest + @ValueSource(booleans = { true, false }) + void floatTooShort(boolean useDirect) { byte[] badint = new byte[3]; Arrays.fill(badint, (byte) 0xff); - newDecoder(badint).readInt(); + Assertions.assertThrows(EOFException.class, () -> newDecoder(badint, useDirect).readInt()); } - @Test(expected = EOFException.class) - public void testDoubleTooShort() throws IOException { + @ParameterizedTest + @ValueSource(booleans = { true, false }) + void doubleTooShort(boolean useDirect) { byte[] badint = new byte[7]; Arrays.fill(badint, (byte) 0xff); - newDecoder(badint).readLong(); + Assertions.assertThrows(EOFException.class, () -> newDecoder(badint, useDirect).readLong()); } - @Test - public void testSkipping() throws IOException { - Decoder d = newDecoder(data); - skipGenerated(d); - if (d instanceof BinaryDecoder) { - BinaryDecoder bd = (BinaryDecoder) d; - try { - Assert.assertTrue(bd.isEnd()); - } catch (UnsupportedOperationException e) { - // this is ok if its a DirectBinaryDecoder. - if (bd.getClass() != DirectBinaryDecoder.class) { - throw e; - } + @ParameterizedTest + @ValueSource(booleans = { true, false }) + void skipping(boolean useDirect) throws IOException { + BinaryDecoder bd = newDecoder(data, useDirect); + skipGenerated(bd); + + try { + Assertions.assertTrue(bd.isEnd()); + } catch (UnsupportedOperationException e) { + // this is ok if its a DirectBinaryDecoder. + if (bd.getClass() != DirectBinaryDecoder.class) { + throw e; } - bd = factory.binaryDecoder(new ByteArrayInputStream(data), bd); - skipGenerated(bd); - try { - Assert.assertTrue(bd.isEnd()); - } catch (UnsupportedOperationException e) { - // this is ok if its a DirectBinaryDecoder. - if (bd.getClass() != DirectBinaryDecoder.class) { - throw e; - } + } + bd = this.newDecoder(new ByteArrayInputStream(data), bd, useDirect); + skipGenerated(bd); + try { + Assertions.assertTrue(bd.isEnd()); + } catch (UnsupportedOperationException e) { + // this is ok if its a DirectBinaryDecoder. + if (bd.getClass() != DirectBinaryDecoder.class) { + throw e; } } + } private void skipGenerated(Decoder bd) throws IOException { @@ -468,6 +661,7 @@ private void skipGenerated(Decoder bd) throws IOException { // booleans are one byte, array trailer is one byte bd.skipFixed((int) leftover + 1); bd.skipFixed(0); + bd.skipFixed(-8); // Should be a no-op; see AVRO-3635 bd.readLong(); } EOFException eof = null; @@ -476,19 +670,42 @@ private void skipGenerated(Decoder bd) throws IOException { } catch (EOFException e) { eof = e; } - Assert.assertTrue(null != eof); + Assertions.assertNotNull(eof); } - @Test(expected = EOFException.class) - public void testEOF() throws IOException { + @ParameterizedTest + @ValueSource(booleans = { true, false }) + void eof(boolean useDirect) throws IOException { ByteArrayOutputStream baos = new ByteArrayOutputStream(); Encoder e = EncoderFactory.get().binaryEncoder(baos, null); e.writeLong(0x10000000000000L); e.flush(); - Decoder d = newDecoder(new ByteArrayInputStream(baos.toByteArray())); - Assert.assertEquals(0x10000000000000L, d.readLong()); - d.readInt(); + Decoder d = newDecoder(new ByteArrayInputStream(baos.toByteArray()), useDirect); + Assertions.assertEquals(0x10000000000000L, d.readLong()); + Assertions.assertThrows(EOFException.class, () -> d.readInt()); + } + + @Test + void testFloatPrecision() throws Exception { + String def = "{\"type\":\"record\",\"name\":\"X\",\"fields\":" + "[{\"type\":\"float\",\"name\":\"n\"}]}"; + Schema schema = new Schema.Parser().parse(def); + DatumReader reader = new GenericDatumReader<>(schema); + + float value = 33.33000183105469f; + + GenericData.Record record = new GenericData.Record(schema); + record.put(0, value); + ByteArrayOutputStream out = new ByteArrayOutputStream(); + Encoder encoder = EncoderFactory.get().directBinaryEncoder(out, null); + + DatumWriter writer = new GenericDatumWriter<>(schema); + writer.write(record, encoder); + encoder.flush(); + + Decoder decoder = DecoderFactory.get().directBinaryDecoder(new ByteArrayInputStream(out.toByteArray()), null); + GenericRecord r = reader.read(null, decoder); + assertEquals(value + 0d, ((float) r.get("n")) + 0d); } } diff --git a/lang/java/avro/src/test/java/org/apache/avro/io/TestBinaryEncoderFidelity.java b/lang/java/avro/src/test/java/org/apache/avro/io/TestBinaryEncoderFidelity.java index f452c8b29b7..1f699ea8266 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/io/TestBinaryEncoderFidelity.java +++ b/lang/java/avro/src/test/java/org/apache/avro/io/TestBinaryEncoderFidelity.java @@ -17,15 +17,17 @@ */ package org.apache.avro.io; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; + import java.io.ByteArrayOutputStream; import java.io.IOException; import java.nio.ByteBuffer; import java.util.Random; import org.apache.avro.util.Utf8; -import org.junit.Assert; -import org.junit.BeforeClass; -import org.junit.Test; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; public class TestBinaryEncoderFidelity { @@ -138,7 +140,7 @@ static void generateComplexData(Encoder e) throws IOException { e.flush(); } - @BeforeClass + @BeforeAll public static void generateLegacyData() throws IOException { ByteArrayOutputStream baos = new ByteArrayOutputStream(); Encoder e = new LegacyBinaryEncoder(baos); @@ -150,49 +152,91 @@ public static void generateLegacyData() throws IOException { } @Test - public void testBinaryEncoder() throws IOException { + void binaryEncoder() throws IOException { ByteArrayOutputStream baos = new ByteArrayOutputStream(); BinaryEncoder e = factory.binaryEncoder(baos, null); generateData(e, true); byte[] result = baos.toByteArray(); - Assert.assertEquals(legacydata.length, result.length); - Assert.assertArrayEquals(legacydata, result); + assertEquals(legacydata.length, result.length); + assertArrayEquals(legacydata, result); baos.reset(); generateComplexData(e); byte[] result2 = baos.toByteArray(); - Assert.assertEquals(complexdata.length, result2.length); - Assert.assertArrayEquals(complexdata, result2); + assertEquals(complexdata.length, result2.length); + assertArrayEquals(complexdata, result2); } @Test - public void testDirectBinaryEncoder() throws IOException { + void directBinaryEncoder() throws IOException { ByteArrayOutputStream baos = new ByteArrayOutputStream(); BinaryEncoder e = factory.directBinaryEncoder(baos, null); generateData(e, true); byte[] result = baos.toByteArray(); - Assert.assertEquals(legacydata.length, result.length); - Assert.assertArrayEquals(legacydata, result); + assertEquals(legacydata.length, result.length); + assertArrayEquals(legacydata, result); + baos.reset(); + generateComplexData(e); + byte[] result2 = baos.toByteArray(); + assertEquals(complexdata.length, result2.length); + assertArrayEquals(complexdata, result2); + } + + @Test + void blockingDirectBinaryEncoder() throws IOException { + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + BinaryEncoder e = factory.blockingDirectBinaryEncoder(baos, null); + generateData(e, true); + + byte[] result = baos.toByteArray(); + assertEquals(legacydata.length, result.length); + assertArrayEquals(legacydata, result); baos.reset(); + generateComplexData(e); byte[] result2 = baos.toByteArray(); - Assert.assertEquals(complexdata.length, result2.length); - Assert.assertArrayEquals(complexdata, result2); + // blocking will cause different length, should be two bytes larger + assertEquals(complexdata.length + 2, result2.length); + // the first byte is the array start, with the count of items negative + assertEquals(complexdata[0] >>> 1, result2[0]); + baos.reset(); + + e.writeArrayStart(); + e.setItemCount(1); + e.startItem(); + e.writeInt(1); + e.writeArrayEnd(); + + // 1: 1 element in the array + // 2: 1 byte for the int + // 3: zigzag encoded int + // 4: 0 elements in the next block + assertArrayEquals(baos.toByteArray(), new byte[] { 1, 2, 2, 0 }); + baos.reset(); + + e.writeArrayStart(); + e.setItemCount(0); + e.writeArrayEnd(); + + // This is correct + // 0: 0 elements in the block + assertArrayEquals(baos.toByteArray(), new byte[] { 0 }); + baos.reset(); } @Test - public void testBlockingBinaryEncoder() throws IOException { + void blockingBinaryEncoder() throws IOException { ByteArrayOutputStream baos = new ByteArrayOutputStream(); BinaryEncoder e = factory.blockingBinaryEncoder(baos, null); generateData(e, true); byte[] result = baos.toByteArray(); - Assert.assertEquals(legacydata.length, result.length); - Assert.assertArrayEquals(legacydata, result); + assertEquals(legacydata.length, result.length); + assertArrayEquals(legacydata, result); baos.reset(); generateComplexData(e); byte[] result2 = baos.toByteArray(); // blocking will cause different length, should be two bytes larger - Assert.assertEquals(complexdata.length + 2, result2.length); + assertEquals(complexdata.length + 2, result2.length); // the first byte is the array start, with the count of items negative - Assert.assertEquals(complexdata[0] >>> 1, result2[0]); + assertEquals(complexdata[0] >>> 1, result2[0]); } } diff --git a/lang/java/avro/src/test/java/org/apache/avro/io/TestBlockingDirectBinaryEncoder.java b/lang/java/avro/src/test/java/org/apache/avro/io/TestBlockingDirectBinaryEncoder.java new file mode 100644 index 00000000000..fee4c23e198 --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/io/TestBlockingDirectBinaryEncoder.java @@ -0,0 +1,146 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro.io; + +import org.apache.avro.Schema; +import org.apache.avro.SchemaNormalization; +import org.apache.avro.generic.GenericDatumReader; +import org.apache.avro.message.BinaryMessageDecoder; +import org.apache.avro.specific.TestRecordWithMapsAndArrays; +import org.hamcrest.Matchers; +import org.junit.jupiter.api.Test; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.security.NoSuchAlgorithmException; +import java.util.Arrays; +import java.util.Map; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.hasEntry; +import static org.hamcrest.Matchers.is; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; + +public class TestBlockingDirectBinaryEncoder { + + private void writeToArray(BinaryEncoder encoder, int[] numbers) throws IOException { + encoder.writeArrayStart(); + encoder.setItemCount(numbers.length); + for (int number : numbers) { + encoder.startItem(); + encoder.writeString(Integer.toString(number)); + } + encoder.writeArrayEnd(); + } + + private void writeToMap(BinaryEncoder encoder, long[] numbers) throws IOException { + encoder.writeMapStart(); + encoder.setItemCount(numbers.length); + for (long number : numbers) { + encoder.startItem(); + encoder.writeString(Long.toString(number)); + encoder.writeLong(number); + } + encoder.writeMapEnd(); + } + + @Test + void blockingDirectBinaryEncoder() throws IOException, NoSuchAlgorithmException { + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + BinaryEncoder encoder = EncoderFactory.get().blockingDirectBinaryEncoder(baos, null); + + // This is needed because there is no BlockingDirectBinaryEncoder + // BinaryMessageWriter + // available out of the box + encoder.writeFixed(new byte[] { (byte) 0xC3, (byte) 0x01 }); + encoder.writeFixed(SchemaNormalization.parsingFingerprint("CRC-64-AVRO", TestRecordWithMapsAndArrays.SCHEMA$)); + + // Array + this.writeToArray(encoder, new int[] { 1, 2, 3, 4, 5 }); + + // Map + writeToMap(encoder, new long[] { 1L, 2L, 3L, 4L, 5L }); + + // Nested Array + + encoder.writeArrayStart(); + encoder.setItemCount(2); + this.writeToArray(encoder, new int[] { 1, 2 }); + this.writeToArray(encoder, new int[] { 3, 4, 5 }); + encoder.writeArrayEnd(); + + // Nested Map + + encoder.writeMapStart(); + encoder.setItemCount(2); + encoder.writeString("first"); + this.writeToMap(encoder, new long[] { 1L, 2L }); + encoder.writeString("second"); + this.writeToMap(encoder, new long[] { 3L, 4L, 5L }); + encoder.writeMapEnd(); + + // Read + + encoder.flush(); + + BinaryMessageDecoder decoder = TestRecordWithMapsAndArrays.getDecoder(); + TestRecordWithMapsAndArrays r = decoder.decode(baos.toByteArray()); + + assertThat(r.getArr(), is(Arrays.asList("1", "2", "3", "4", "5"))); + Map map = r.getMap(); + assertThat(map.size(), is(5)); + for (long i = 1; i <= 5; i++) { + assertThat(map.get(Long.toString(i)), is(i)); + } + + assertThat(r.getNestedArr(), is(Arrays.asList(Arrays.asList("1", "2"), Arrays.asList("3", "4", "5")))); + + Map> nestedMap = r.getNestedMap(); + assertThat(nestedMap.size(), is(2)); + + assertThat(nestedMap.get("first").size(), is(2)); + assertThat(nestedMap.get("first").get("1"), is(1L)); + assertThat(nestedMap.get("first").get("2"), is(2L)); + + assertThat(nestedMap.get("second").size(), is(3)); + assertThat(nestedMap.get("second").get("3"), is(3L)); + assertThat(nestedMap.get("second").get("4"), is(4L)); + assertThat(nestedMap.get("second").get("5"), is(5L)); + } + + @Test + void testSkippingUsingBlocks() throws IOException, NoSuchAlgorithmException { + // Create an empty schema for read, so we skip over all the fields + Schema emptySchema = new Schema.Parser().parse( + "{\"type\":\"record\",\"name\":\"TestRecordWithMapsAndArrays\",\"namespace\":\"org.apache.avro.specific\",\"fields\":[]}"); + + GenericDatumReader in = new GenericDatumReader<>(TestRecordWithMapsAndArrays.SCHEMA$, emptySchema); + Decoder mockDecoder = mock(BinaryDecoder.class); + + for (long i = 0; i < 1; i++) { + in.read(null, mockDecoder); + } + + verify(mockDecoder, times(2)).skipMap(); + verify(mockDecoder, times(2)).skipArray(); + verify(mockDecoder, times(0)).readString(); + verify(mockDecoder, times(0)).readLong(); + } +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/io/TestBlockingIO.java b/lang/java/avro/src/test/java/org/apache/avro/io/TestBlockingIO.java index 6beda2ae66e..d107b9d82d7 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/io/TestBlockingIO.java +++ b/lang/java/avro/src/test/java/org/apache/avro/io/TestBlockingIO.java @@ -17,7 +17,12 @@ */ package org.apache.avro.io; -import static org.junit.Assert.*; +import com.fasterxml.jackson.core.JsonFactory; +import com.fasterxml.jackson.core.JsonParser; + +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; @@ -25,28 +30,14 @@ import java.nio.ByteBuffer; import java.nio.charset.StandardCharsets; import java.util.ArrayDeque; -import java.util.Arrays; -import java.util.Collection; +import java.util.stream.Stream; -import com.fasterxml.jackson.core.JsonFactory; -import com.fasterxml.jackson.core.JsonParser; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; -@RunWith(Parameterized.class) public class TestBlockingIO { - private final int iSize; - private final int iDepth; - private final String sInput; - - public TestBlockingIO(int sz, int dp, String inp) { - this.iSize = sz; - this.iDepth = dp; - this.sInput = inp; - } - private static class Tests { private final JsonParser parser; private final Decoder input; @@ -206,25 +197,29 @@ public S(long count, boolean isArray) { } } - @Test - public void testScan() throws IOException { - Tests t = new Tests(iSize, iDepth, sInput); + @ParameterizedTest + @MethodSource("data") + public void testScan(int size, int depth, String input) throws IOException { + Tests t = new Tests(size, depth, input); t.scan(); } - @Test - public void testSkip1() throws IOException { - testSkip(iSize, iDepth, sInput, 0); + @ParameterizedTest + @MethodSource("data") + public void testSkip1(int size, int depth, String input) throws IOException { + testSkip(size, depth, input, 0); } - @Test - public void testSkip2() throws IOException { - testSkip(iSize, iDepth, sInput, 1); + @ParameterizedTest + @MethodSource("data") + public void testSkip2(int size, int depth, String input) throws IOException { + testSkip(size, depth, input, 1); } - @Test - public void testSkip3() throws IOException { - testSkip(iSize, iDepth, sInput, 2); + @ParameterizedTest + @MethodSource("data") + public void testSkip3(int size, int depth, String input) throws IOException { + testSkip(size, depth, input, 2); } private void testSkip(int bufferSize, int depth, String input, int skipLevel) throws IOException { @@ -323,9 +318,8 @@ private static void serialize(Encoder cos, JsonParser p, ByteArrayOutputStream o } } - @Parameterized.Parameters - public static Collection data() { - return Arrays.asList(new Object[][] { { 64, 0, "" }, { 64, 0, jss(0, 'a') }, { 64, 0, jss(3, 'a') }, + public static Stream data() { + return Stream.of(new Object[][] { { 64, 0, "" }, { 64, 0, jss(0, 'a') }, { 64, 0, jss(3, 'a') }, { 64, 0, jss(64, 'a') }, { 64, 0, jss(65, 'a') }, { 64, 0, jss(100, 'a') }, { 64, 1, "[]" }, { 64, 1, "[" + jss(0, 'a') + "]" }, { 64, 1, "[" + jss(3, 'a') + "]" }, { 64, 1, "[" + jss(61, 'a') + "]" }, { 64, 1, "[" + jss(62, 'a') + "]" }, { 64, 1, "[" + jss(64, 'a') + "]" }, { 64, 1, "[" + jss(65, 'a') + "]" }, @@ -387,7 +381,8 @@ public static Collection data() { { 100, 2, "[[\"pqr\", \"ab\", \"mnopqrstuvwx\"]]" }, { 64, 2, "[[[\"pqr\"]], [[\"ab\"], [\"mnopqrstuvwx\"]]]" }, { 64, 1, "{}" }, { 64, 1, "{\"n\": \"v\"}" }, { 64, 1, "{\"n1\": \"v\", \"n2\": []}" }, - { 100, 1, "{\"n1\": \"v\", \"n2\": []}" }, { 100, 1, "{\"n1\": \"v\", \"n2\": [\"abc\"]}" }, }); + { 100, 1, "{\"n1\": \"v\", \"n2\": []}" }, { 100, 1, "{\"n1\": \"v\", \"n2\": [\"abc\"]}" }, }) + .map(Arguments::of); } /** diff --git a/lang/java/avro/src/test/java/org/apache/avro/io/TestBlockingIO2.java b/lang/java/avro/src/test/java/org/apache/avro/io/TestBlockingIO2.java index 3a91bb96dea..378e17ee613 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/io/TestBlockingIO2.java +++ b/lang/java/avro/src/test/java/org/apache/avro/io/TestBlockingIO2.java @@ -17,14 +17,13 @@ */ package org.apache.avro.io; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + import java.io.ByteArrayOutputStream; import java.io.IOException; -import java.util.Arrays; -import java.util.Collection; - -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; +import java.util.stream.Stream; /** * This class has more exhaustive tests for Blocking IO. The reason we have both @@ -32,38 +31,29 @@ * TestBlockingIO2, it is hard to test skip() operations. and with the test * infrastructure of TestBlockingIO, it is hard to test enums, unions etc. */ -@RunWith(Parameterized.class) public class TestBlockingIO2 { - private final Decoder decoder; - private final String calls; - private Object[] values; - private String msg; - - public TestBlockingIO2(int bufferSize, int skipLevel, String calls) throws IOException { + @ParameterizedTest + @MethodSource("data") + public void testScan(int bufferSize, int skipLevel, String calls) throws IOException { ByteArrayOutputStream os = new ByteArrayOutputStream(); EncoderFactory factory = new EncoderFactory().configureBlockSize(bufferSize); Encoder encoder = factory.blockingBinaryEncoder(os, null); - this.values = TestValidatingIO.randomValues(calls); + Object[] values = TestValidatingIO.randomValues(calls); TestValidatingIO.generate(encoder, calls, values); encoder.flush(); byte[] bb = os.toByteArray(); - decoder = DecoderFactory.get().binaryDecoder(bb, null); - this.calls = calls; - this.msg = "Case: { " + bufferSize + ", " + skipLevel + ", \"" + calls + "\" }"; - } + Decoder decoder = DecoderFactory.get().binaryDecoder(bb, null); + String msg = "Case: { " + bufferSize + ", " + skipLevel + ", \"" + calls + "\" }"; - @Test - public void testScan() throws IOException { TestValidatingIO.check(msg, decoder, calls, values, -1); } - @Parameterized.Parameters - public static Collection data() { - return Arrays.asList(new Object[][] { { 64, 0, "" }, { 64, 0, "S0" }, { 64, 0, "S3" }, { 64, 0, "S64" }, + public static Stream data() { + return Stream.of(new Object[][] { { 64, 0, "" }, { 64, 0, "S0" }, { 64, 0, "S3" }, { 64, 0, "S64" }, { 64, 0, "S65" }, { 64, 0, "S100" }, { 64, 1, "[]" }, { 64, 1, "[c1sS0]" }, { 64, 1, "[c1sS3]" }, { 64, 1, "[c1sS61]" }, { 64, 1, "[c1sS62]" }, { 64, 1, "[c1sS64]" }, { 64, 1, "[c1sS65]" }, { 64, 1, "[c2sS0sS0]" }, { 64, 1, "[c2sS0sS10]" }, { 64, 1, "[c2sS0sS63]" }, { 64, 1, "[c2sS0sS64]" }, @@ -99,6 +89,6 @@ public static Collection data() { { 100, 1, "{c1sK5e10}" }, { 100, 1, "{c1sK5U1S10}" }, { 100, 1, "{c1sK5f10S10}" }, { 100, 1, "{c1sK5NS10}" }, { 100, 1, "{c1sK5BS10}" }, { 100, 1, "{c1sK5IS10}" }, { 100, 1, "{c1sK5LS10}" }, { 100, 1, "{c1sK5FS10}" }, - { 100, 1, "{c1sK5DS10}" }, }); + { 100, 1, "{c1sK5DS10}" }, }).map(Arguments::of); } } diff --git a/lang/java/avro/src/test/java/org/apache/avro/io/TestEncoders.java b/lang/java/avro/src/test/java/org/apache/avro/io/TestEncoders.java index f3a0760d82e..51ef375e307 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/io/TestEncoders.java +++ b/lang/java/avro/src/test/java/org/apache/avro/io/TestEncoders.java @@ -27,12 +27,12 @@ import org.apache.avro.Schema.Type; import org.apache.avro.generic.GenericDatumReader; import org.apache.avro.generic.GenericDatumWriter; -import org.junit.Assert; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; +import java.io.BufferedOutputStream; import java.io.ByteArrayOutputStream; +import java.io.File; import java.io.IOException; import java.io.OutputStream; import java.nio.ByteBuffer; @@ -47,89 +47,114 @@ import static java.util.Arrays.asList; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.is; -import static org.junit.Assert.assertThat; +import static org.junit.jupiter.api.Assertions.*; +import static org.hamcrest.MatcherAssert.assertThat; public class TestEncoders { private static final int ENCODER_BUFFER_SIZE = 32; private static final int EXAMPLE_DATA_SIZE = 17; - private static EncoderFactory factory = EncoderFactory.get(); + private static final EncoderFactory FACTORY = EncoderFactory.get(); - @Rule - public TemporaryFolder DIR = new TemporaryFolder(); + @TempDir + public Path dataDir; @Test - public void testBinaryEncoderInit() throws IOException { + void binaryEncoderInit() throws IOException { OutputStream out = new ByteArrayOutputStream(); - BinaryEncoder enc = factory.binaryEncoder(out, null); - Assert.assertSame(enc, factory.binaryEncoder(out, enc)); + BinaryEncoder enc = FACTORY.binaryEncoder(out, null); + assertSame(enc, FACTORY.binaryEncoder(out, enc)); } - @Test(expected = NullPointerException.class) - public void testBadBinaryEncoderInit() { - factory.binaryEncoder(null, null); + @Test + void badBinaryEncoderInit() { + assertThrows(NullPointerException.class, () -> { + FACTORY.binaryEncoder(null, null); + }); } @Test - public void testBlockingBinaryEncoderInit() throws IOException { + void blockingBinaryEncoderInit() throws IOException { OutputStream out = new ByteArrayOutputStream(); BinaryEncoder reuse = null; - reuse = factory.blockingBinaryEncoder(out, reuse); - Assert.assertSame(reuse, factory.blockingBinaryEncoder(out, reuse)); + reuse = FACTORY.blockingBinaryEncoder(out, reuse); + assertSame(reuse, FACTORY.blockingBinaryEncoder(out, reuse)); // comparison } - @Test(expected = NullPointerException.class) - public void testBadBlockintBinaryEncoderInit() { - factory.binaryEncoder(null, null); + @Test + void badBlockintBinaryEncoderInit() { + assertThrows(NullPointerException.class, () -> { + FACTORY.binaryEncoder(null, null); + }); } @Test - public void testDirectBinaryEncoderInit() throws IOException { + void directBinaryEncoderInit() throws IOException { OutputStream out = new ByteArrayOutputStream(); - BinaryEncoder enc = factory.directBinaryEncoder(out, null); - Assert.assertSame(enc, factory.directBinaryEncoder(out, enc)); + BinaryEncoder enc = FACTORY.directBinaryEncoder(out, null); + assertSame(enc, FACTORY.directBinaryEncoder(out, enc)); } - @Test(expected = NullPointerException.class) - public void testBadDirectBinaryEncoderInit() { - factory.directBinaryEncoder(null, null); + @Test + void badDirectBinaryEncoderInit() { + assertThrows(NullPointerException.class, () -> { + FACTORY.directBinaryEncoder(null, null); + }); } @Test - public void testJsonEncoderInit() throws IOException { + void blockingDirectBinaryEncoderInit() throws IOException { + OutputStream out = new ByteArrayOutputStream(); + BinaryEncoder enc = FACTORY.blockingDirectBinaryEncoder(out, null); + assertSame(enc, FACTORY.blockingDirectBinaryEncoder(out, enc)); + } + + @Test + void badBlockingDirectBinaryEncoderInit() { + assertThrows(NullPointerException.class, () -> { + FACTORY.blockingDirectBinaryEncoder(null, null); + }); + } + + @Test + void jsonEncoderInit() throws IOException { Schema s = new Schema.Parser().parse("\"int\""); OutputStream out = new ByteArrayOutputStream(); - factory.jsonEncoder(s, out); - JsonEncoder enc = factory.jsonEncoder(s, new JsonFactory().createGenerator(out, JsonEncoding.UTF8)); + FACTORY.jsonEncoder(s, out); + JsonEncoder enc = FACTORY.jsonEncoder(s, new JsonFactory().createGenerator(out, JsonEncoding.UTF8)); enc.configure(out); } - @Test(expected = NullPointerException.class) - public void testBadJsonEncoderInitOS() throws IOException { - factory.jsonEncoder(Schema.create(Type.INT), (OutputStream) null); + @Test + void badJsonEncoderInitOS() throws IOException { + assertThrows(NullPointerException.class, () -> { + FACTORY.jsonEncoder(Schema.create(Type.INT), (OutputStream) null); + }); } - @Test(expected = NullPointerException.class) - public void testBadJsonEncoderInit() throws IOException { - factory.jsonEncoder(Schema.create(Type.INT), (JsonGenerator) null); + @Test + void badJsonEncoderInit() throws IOException { + assertThrows(NullPointerException.class, () -> { + FACTORY.jsonEncoder(Schema.create(Type.INT), (JsonGenerator) null); + }); } @Test - public void testJsonEncoderNewlineDelimited() throws IOException { + void jsonEncoderNewlineDelimited() throws IOException { OutputStream out = new ByteArrayOutputStream(); Schema ints = Schema.create(Type.INT); - Encoder e = factory.jsonEncoder(ints, out); + Encoder e = FACTORY.jsonEncoder(ints, out); String separator = System.getProperty("line.separator"); GenericDatumWriter writer = new GenericDatumWriter<>(ints); writer.write(1, e); writer.write(2, e); e.flush(); - Assert.assertEquals("1" + separator + "2", out.toString()); + assertEquals("1" + separator + "2", out.toString()); } @Test - public void testJsonEncoderWhenIncludeNamespaceOptionIsFalse() throws IOException { + void jsonEncoderWhenIncludeNamespaceOptionIsFalse() throws IOException { String value = "{\"b\": {\"string\":\"myVal\"}, \"a\": 1}"; String schemaStr = "{\"type\": \"record\", \"name\": \"ab\", \"fields\": [" + "{\"name\": \"a\", \"type\": \"int\"}, {\"name\": \"b\", \"type\": [\"null\", \"string\"]}" + "]}"; @@ -137,12 +162,12 @@ public void testJsonEncoderWhenIncludeNamespaceOptionIsFalse() throws IOExceptio byte[] avroBytes = fromJsonToAvro(value, schema); ObjectMapper mapper = new ObjectMapper(); - Assert.assertEquals(mapper.readTree("{\"b\":\"myVal\",\"a\":1}"), + assertEquals(mapper.readTree("{\"b\":\"myVal\",\"a\":1}"), mapper.readTree(fromAvroToJson(avroBytes, schema, false))); } @Test - public void testJsonEncoderWhenIncludeNamespaceOptionIsTrue() throws IOException { + void jsonEncoderWhenIncludeNamespaceOptionIsTrue() throws IOException { String value = "{\"b\": {\"string\":\"myVal\"}, \"a\": 1}"; String schemaStr = "{\"type\": \"record\", \"name\": \"ab\", \"fields\": [" + "{\"name\": \"a\", \"type\": \"int\"}, {\"name\": \"b\", \"type\": [\"null\", \"string\"]}" + "]}"; @@ -150,45 +175,47 @@ public void testJsonEncoderWhenIncludeNamespaceOptionIsTrue() throws IOException byte[] avroBytes = fromJsonToAvro(value, schema); ObjectMapper mapper = new ObjectMapper(); - Assert.assertEquals(mapper.readTree("{\"b\":{\"string\":\"myVal\"},\"a\":1}"), + assertEquals(mapper.readTree("{\"b\":{\"string\":\"myVal\"},\"a\":1}"), mapper.readTree(fromAvroToJson(avroBytes, schema, true))); } @Test - public void testValidatingEncoderInit() throws IOException { + void validatingEncoderInit() throws IOException { Schema s = new Schema.Parser().parse("\"int\""); OutputStream out = new ByteArrayOutputStream(); - Encoder e = factory.directBinaryEncoder(out, null); - factory.validatingEncoder(s, e).configure(e); + Encoder e = FACTORY.directBinaryEncoder(out, null); + FACTORY.validatingEncoder(s, e).configure(e); } @Test - public void testJsonRecordOrdering() throws IOException { + void jsonRecordOrdering() throws IOException { String value = "{\"b\": 2, \"a\": 1}"; Schema schema = new Schema.Parser().parse("{\"type\": \"record\", \"name\": \"ab\", \"fields\": [" + "{\"name\": \"a\", \"type\": \"int\"}, {\"name\": \"b\", \"type\": \"int\"}" + "]}"); GenericDatumReader reader = new GenericDatumReader<>(schema); Decoder decoder = DecoderFactory.get().jsonDecoder(schema, value); Object o = reader.read(null, decoder); - Assert.assertEquals("{\"a\": 1, \"b\": 2}", o.toString()); + assertEquals("{\"a\": 1, \"b\": 2}", o.toString()); } - @Test(expected = AvroTypeException.class) - public void testJsonExcessFields() throws IOException { - String value = "{\"b\": { \"b3\": 1.4, \"b2\": 3.14, \"b1\": \"h\"}, \"a\": {\"a0\": 45, \"a2\":true, \"a1\": null}}"; - Schema schema = new Schema.Parser().parse("{\"type\": \"record\", \"name\": \"ab\", \"fields\": [\n" - + "{\"name\": \"a\", \"type\": {\"type\":\"record\",\"name\":\"A\",\"fields\":\n" - + "[{\"name\":\"a1\", \"type\":\"null\"}, {\"name\":\"a2\", \"type\":\"boolean\"}]}},\n" - + "{\"name\": \"b\", \"type\": {\"type\":\"record\",\"name\":\"B\",\"fields\":\n" - + "[{\"name\":\"b1\", \"type\":\"string\"}, {\"name\":\"b2\", \"type\":\"float\"}, {\"name\":\"b3\", \"type\":\"double\"}]}}\n" - + "]}"); - GenericDatumReader reader = new GenericDatumReader<>(schema); - Decoder decoder = DecoderFactory.get().jsonDecoder(schema, value); - reader.read(null, decoder); + @Test + void jsonExcessFields() throws IOException { + assertThrows(AvroTypeException.class, () -> { + String value = "{\"b\": { \"b3\": 1.4, \"b2\": 3.14, \"b1\": \"h\"}, \"a\": {\"a0\": 45, \"a2\":true, \"a1\": null}}"; + Schema schema = new Schema.Parser().parse("{\"type\": \"record\", \"name\": \"ab\", \"fields\": [\n" + + "{\"name\": \"a\", \"type\": {\"type\":\"record\",\"name\":\"A\",\"fields\":\n" + + "[{\"name\":\"a1\", \"type\":\"null\"}, {\"name\":\"a2\", \"type\":\"boolean\"}]}},\n" + + "{\"name\": \"b\", \"type\": {\"type\":\"record\",\"name\":\"B\",\"fields\":\n" + + "[{\"name\":\"b1\", \"type\":\"string\"}, {\"name\":\"b2\", \"type\":\"float\"}, {\"name\":\"b3\", \"type\":\"double\"}]}}\n" + + "]}"); + GenericDatumReader reader = new GenericDatumReader<>(schema); + Decoder decoder = DecoderFactory.get().jsonDecoder(schema, value); + reader.read(null, decoder); + }); } @Test - public void testJsonRecordOrdering2() throws IOException { + void jsonRecordOrdering2() throws IOException { String value = "{\"b\": { \"b3\": 1.4, \"b2\": 3.14, \"b1\": \"h\"}, \"a\": {\"a2\":true, \"a1\": null}}"; Schema schema = new Schema.Parser().parse("{\"type\": \"record\", \"name\": \"ab\", \"fields\": [\n" + "{\"name\": \"a\", \"type\": {\"type\":\"record\",\"name\":\"A\",\"fields\":\n" @@ -199,12 +226,12 @@ public void testJsonRecordOrdering2() throws IOException { GenericDatumReader reader = new GenericDatumReader<>(schema); Decoder decoder = DecoderFactory.get().jsonDecoder(schema, value); Object o = reader.read(null, decoder); - Assert.assertEquals("{\"a\": {\"a1\": null, \"a2\": true}, \"b\": {\"b1\": \"h\", \"b2\": 3.14, \"b3\": 1.4}}", + assertEquals("{\"a\": {\"a1\": null, \"a2\": true}, \"b\": {\"b1\": \"h\", \"b2\": 3.14, \"b3\": 1.4}}", o.toString()); } @Test - public void testJsonRecordOrderingWithProjection() throws IOException { + void jsonRecordOrderingWithProjection() throws IOException { String value = "{\"b\": { \"b3\": 1.4, \"b2\": 3.14, \"b1\": \"h\"}, \"a\": {\"a2\":true, \"a1\": null}}"; Schema writerSchema = new Schema.Parser().parse("{\"type\": \"record\", \"name\": \"ab\", \"fields\": [\n" + "{\"name\": \"a\", \"type\": {\"type\":\"record\",\"name\":\"A\",\"fields\":\n" @@ -218,11 +245,11 @@ public void testJsonRecordOrderingWithProjection() throws IOException { GenericDatumReader reader = new GenericDatumReader<>(writerSchema, readerSchema); Decoder decoder = DecoderFactory.get().jsonDecoder(writerSchema, value); Object o = reader.read(null, decoder); - Assert.assertEquals("{\"a\": {\"a1\": null, \"a2\": true}}", o.toString()); + assertEquals("{\"a\": {\"a1\": null, \"a2\": true}}", o.toString()); } @Test - public void testJsonRecordOrderingWithProjection2() throws IOException { + void jsonRecordOrderingWithProjection2() throws IOException { String value = "{\"b\": { \"b1\": \"h\", \"b2\": [3.14, 3.56], \"b3\": 1.4}, \"a\": {\"a2\":true, \"a1\": null}}"; Schema writerSchema = new Schema.Parser().parse("{\"type\": \"record\", \"name\": \"ab\", \"fields\": [\n" + "{\"name\": \"a\", \"type\": {\"type\":\"record\",\"name\":\"A\",\"fields\":\n" @@ -236,19 +263,19 @@ public void testJsonRecordOrderingWithProjection2() throws IOException { GenericDatumReader reader = new GenericDatumReader<>(writerSchema, readerSchema); Decoder decoder = DecoderFactory.get().jsonDecoder(writerSchema, value); Object o = reader.read(null, decoder); - Assert.assertEquals("{\"a\": {\"a1\": null, \"a2\": true}}", o.toString()); + assertEquals("{\"a\": {\"a1\": null, \"a2\": true}}", o.toString()); } @Test - public void testArrayBackedByteBuffer() throws IOException { + void arrayBackedByteBuffer() throws IOException { ByteBuffer buffer = ByteBuffer.wrap(someBytes(EXAMPLE_DATA_SIZE)); testWithBuffer(buffer); } @Test - public void testMappedByteBuffer() throws IOException { - Path file = Paths.get(DIR.getRoot().getPath() + "testMappedByteBuffer.avro"); + void mappedByteBuffer() throws IOException { + Path file = dataDir.resolve("testMappedByteBuffer.avro"); Files.write(file, someBytes(EXAMPLE_DATA_SIZE)); MappedByteBuffer buffer = FileChannel.open(file, StandardOpenOption.READ).map(FileChannel.MapMode.READ_ONLY, 0, EXAMPLE_DATA_SIZE); @@ -311,7 +338,7 @@ private String fromAvroToJson(byte[] avroBytes, Schema schema, boolean includeNa DatumWriter writer = new GenericDatumWriter<>(schema); ByteArrayOutputStream output = new ByteArrayOutputStream(); - JsonEncoder encoder = factory.jsonEncoder(schema, output); + JsonEncoder encoder = FACTORY.jsonEncoder(schema, output); encoder.setIncludeNamespace(includeNamespace); Decoder decoder = DecoderFactory.get().binaryDecoder(avroBytes, null); Object datum = reader.read(null, decoder); @@ -321,4 +348,35 @@ private String fromAvroToJson(byte[] avroBytes, Schema schema, boolean includeNa return new String(output.toByteArray(), StandardCharsets.UTF_8.name()); } + + @Test + public void testJsonEncoderInitAutoFlush() throws IOException { + Schema s = new Schema.Parser().parse("\"int\""); + OutputStream baos = new ByteArrayOutputStream(); + OutputStream out = new BufferedOutputStream(baos); + JsonEncoder enc = FACTORY.jsonEncoder(s, out, false); + enc.configure(out, false); + enc.writeInt(24); + enc.flush(); + assertEquals("", baos.toString()); + out.flush(); + assertEquals("24", baos.toString()); + } + + @Test + public void testJsonEncoderInitAutoFlushDisabled() throws IOException { + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + OutputStream out = new BufferedOutputStream(baos); + Schema ints = Schema.create(Type.INT); + Encoder e = FACTORY.jsonEncoder(ints, out, false, false); + String separator = System.getProperty("line.separator"); + GenericDatumWriter writer = new GenericDatumWriter(ints); + writer.write(1, e); + writer.write(2, e); + e.flush(); + assertEquals("", baos.toString()); + out.flush(); + assertEquals("1" + separator + "2", baos.toString()); + out.close(); + } } diff --git a/lang/java/avro/src/test/java/org/apache/avro/io/TestJsonDecoder.java b/lang/java/avro/src/test/java/org/apache/avro/io/TestJsonDecoder.java index 54fc4203080..1f44344e3aa 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/io/TestJsonDecoder.java +++ b/lang/java/avro/src/test/java/org/apache/avro/io/TestJsonDecoder.java @@ -17,31 +17,41 @@ */ package org.apache.avro.io; +import static org.junit.jupiter.api.Assertions.assertEquals; + +import org.apache.avro.AvroTypeException; import org.apache.avro.Schema; +import org.apache.avro.SchemaBuilder; +import org.apache.avro.generic.GenericData; import org.apache.avro.generic.GenericDatumReader; +import org.apache.avro.generic.GenericDatumWriter; import org.apache.avro.generic.GenericRecord; -import org.junit.Assert; -import org.junit.Test; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; public class TestJsonDecoder { @Test - public void testInt() throws Exception { + void testInt() throws Exception { checkNumeric("int", 1); } @Test - public void testLong() throws Exception { + void testLong() throws Exception { checkNumeric("long", 1L); } @Test - public void testFloat() throws Exception { + void testFloat() throws Exception { checkNumeric("float", 1.0F); } @Test - public void testDouble() throws Exception { + void testDouble() throws Exception { checkNumeric("double", 1.0); } @@ -55,24 +65,57 @@ private void checkNumeric(String type, Object value) throws Exception { for (String record : records) { Decoder decoder = DecoderFactory.get().jsonDecoder(schema, record); GenericRecord r = reader.read(null, decoder); - Assert.assertEquals(value, r.get("n")); + assertEquals(value, r.get("n")); } } + @Test + void testFloatPrecision() throws Exception { + String def = "{\"type\":\"record\",\"name\":\"X\",\"fields\":" + "[{\"type\":\"float\",\"name\":\"n\"}]}"; + Schema schema = new Schema.Parser().parse(def); + DatumReader reader = new GenericDatumReader<>(schema); + + float value = 33.33000183105469f; + GenericData.Record record = new GenericData.Record(schema); + record.put(0, value); + ByteArrayOutputStream out = new ByteArrayOutputStream(); + JsonEncoder encoder = EncoderFactory.get().jsonEncoder(schema, out); + + DatumWriter writer = new GenericDatumWriter<>(schema); + writer.write(record, encoder); + encoder.flush(); + // check the whole float precision is kept. + assertEquals("{\"n\":33.33000183105469}", out.toString()); + + Decoder decoder = DecoderFactory.get().jsonDecoder(schema, out.toString()); + GenericRecord r = reader.read(null, decoder); + assertEquals(value + 0d, ((float) r.get("n")) + 0d); + } + // Ensure that even if the order of fields in JSON is different from the order // in schema, // it works. @Test - public void testReorderFields() throws Exception { + void reorderFields() throws Exception { String w = "{\"type\":\"record\",\"name\":\"R\",\"fields\":" + "[{\"type\":\"long\",\"name\":\"l\"}," + "{\"type\":{\"type\":\"array\",\"items\":\"int\"},\"name\":\"a\"}" + "]}"; Schema ws = new Schema.Parser().parse(w); DecoderFactory df = DecoderFactory.get(); String data = "{\"a\":[1,2],\"l\":100}{\"l\": 200, \"a\":[1,2]}"; JsonDecoder in = df.jsonDecoder(ws, data); - Assert.assertEquals(100, in.readLong()); + assertEquals(100, in.readLong()); in.skipArray(); - Assert.assertEquals(200, in.readLong()); + assertEquals(200, in.readLong()); in.skipArray(); } + + @Test + void testIntWithError() throws IOException { + Schema schema = SchemaBuilder.builder("test").record("example").fields().requiredInt("id").endRecord(); + String record = "{ \"id\": -1.2 }"; + + GenericDatumReader reader = new GenericDatumReader<>(schema, schema); + JsonDecoder decoder = DecoderFactory.get().jsonDecoder(schema, record); + Assertions.assertThrows(AvroTypeException.class, () -> reader.read(null, decoder)); + } } diff --git a/lang/java/avro/src/test/java/org/apache/avro/io/TestResolvingIO.java b/lang/java/avro/src/test/java/org/apache/avro/io/TestResolvingIO.java index c880d9fd55a..8a960427922 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/io/TestResolvingIO.java +++ b/lang/java/avro/src/test/java/org/apache/avro/io/TestResolvingIO.java @@ -17,48 +17,34 @@ */ package org.apache.avro.io; +import org.apache.avro.Schema; +import org.apache.avro.io.TestValidatingIO.Encoding; + +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; -import java.util.Arrays; -import java.util.Collection; +import java.util.stream.Stream; -import org.apache.avro.Schema; -import org.apache.avro.io.TestValidatingIO.Encoding; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; - -@RunWith(Parameterized.class) public class TestResolvingIO { - protected final Encoding eEnc; - protected final int iSkipL; - protected final String sJsWrtSchm; - protected final String sWrtCls; - protected final String sJsRdrSchm; - protected final String sRdrCls; - - public TestResolvingIO(Encoding encoding, int skipLevel, String jsonWriterSchema, String writerCalls, - String jsonReaderSchema, String readerCalls) { - this.eEnc = encoding; - this.iSkipL = skipLevel; - this.sJsWrtSchm = jsonWriterSchema; - this.sWrtCls = writerCalls; - this.sJsRdrSchm = jsonReaderSchema; - this.sRdrCls = readerCalls; - } - - @Test - public void testIdentical() throws IOException { - performTest(eEnc, iSkipL, sJsWrtSchm, sWrtCls, sJsWrtSchm, sWrtCls); + @ParameterizedTest + @MethodSource("data2") + public void testIdentical(Encoding encoding, int skip, String jsonWriterSchema, String writerCalls, + String jsonReaderSchema, String readerCalls) throws IOException { + performTest(encoding, skip, jsonWriterSchema, writerCalls, jsonWriterSchema, writerCalls); } private static final int COUNT = 10; - @Test - public void testCompatible() throws IOException { - performTest(eEnc, iSkipL, sJsWrtSchm, sWrtCls, sJsRdrSchm, sRdrCls); + @ParameterizedTest + @MethodSource("data2") + public void testCompatible(Encoding encoding, int skip, String jsonWriterSchema, String writerCalls, + String jsonReaderSchema, String readerCalls) throws IOException { + performTest(encoding, skip, jsonWriterSchema, writerCalls, jsonReaderSchema, readerCalls); } private void performTest(Encoding encoding, int skipLevel, String jsonWriterSchema, String writerCalls, @@ -100,9 +86,8 @@ static void check(Schema wsc, Schema rsc, byte[] bytes, String calls, Object[] v TestValidatingIO.check(msg, vi, calls, values, skipLevel); } - @Parameterized.Parameters - public static Collection data2() { - return Arrays.asList(TestValidatingIO.convertTo2dArray(encodings, skipLevels, testSchemas())); + public static Stream data2() { + return TestValidatingIO.convertTo2dStream(encodings, skipLevels, testSchemas()); } static Object[][] encodings = new Object[][] { { Encoding.BINARY }, { Encoding.BLOCKING_BINARY }, { Encoding.JSON } }; diff --git a/lang/java/avro/src/test/java/org/apache/avro/io/TestResolvingIOResolving.java b/lang/java/avro/src/test/java/org/apache/avro/io/TestResolvingIOResolving.java index 8e3dc8e53d7..0a55d18a742 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/io/TestResolvingIOResolving.java +++ b/lang/java/avro/src/test/java/org/apache/avro/io/TestResolvingIOResolving.java @@ -17,53 +17,32 @@ */ package org.apache.avro.io; -import java.io.IOException; -import java.util.Arrays; -import java.util.Collection; - import org.apache.avro.Schema; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; -@RunWith(Parameterized.class) -public class TestResolvingIOResolving { - protected TestValidatingIO.Encoding eEnc; - protected final int iSkipL; - protected final String sJsWrtSchm; - protected final String sWrtCls; - protected final String sJsRdrSchm; - protected final String sRdrCls; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; - protected final Object[] oaWrtVals; - protected final Object[] oaRdrVals; +import java.io.IOException; +import java.util.stream.Stream; - public TestResolvingIOResolving(TestValidatingIO.Encoding encoding, int skipLevel, String jsonWriterSchema, - String writerCalls, Object[] writerValues, String jsonReaderSchema, String readerCalls, Object[] readerValues) { - this.eEnc = encoding; - this.iSkipL = skipLevel; - this.sJsWrtSchm = jsonWriterSchema; - this.sWrtCls = writerCalls; - this.oaWrtVals = writerValues; - this.sJsRdrSchm = jsonReaderSchema; - this.sRdrCls = readerCalls; - this.oaRdrVals = readerValues; - } +public class TestResolvingIOResolving { - @Test - public void testResolving() throws IOException { - Schema writerSchema = new Schema.Parser().parse(sJsWrtSchm); - byte[] bytes = TestValidatingIO.make(writerSchema, sWrtCls, oaWrtVals, eEnc); - Schema readerSchema = new Schema.Parser().parse(sJsRdrSchm); - TestValidatingIO.print(eEnc, iSkipL, writerSchema, readerSchema, oaWrtVals, oaRdrVals); - TestResolvingIO.check(writerSchema, readerSchema, bytes, sRdrCls, oaRdrVals, eEnc, iSkipL); + @ParameterizedTest + @MethodSource("data3") + public void testResolving(TestValidatingIO.Encoding encoding, int skipLevel, String jsonWriterSchema, + String writerCalls, Object[] writerValues, String jsonReaderSchema, String readerCalls, Object[] readerValues) + throws IOException { + Schema writerSchema = new Schema.Parser().parse(jsonWriterSchema); + byte[] bytes = TestValidatingIO.make(writerSchema, writerCalls, writerValues, encoding); + Schema readerSchema = new Schema.Parser().parse(jsonReaderSchema); + TestValidatingIO.print(encoding, skipLevel, writerSchema, readerSchema, writerValues, readerValues); + TestResolvingIO.check(writerSchema, readerSchema, bytes, readerCalls, readerValues, encoding, skipLevel); } - @Parameterized.Parameters - public static Collection data3() { - Collection ret = Arrays.asList(TestValidatingIO.convertTo2dArray(TestResolvingIO.encodings, - TestResolvingIO.skipLevels, dataForResolvingTests())); - return ret; + public static Stream data3() { + return TestValidatingIO.convertTo2dStream(TestResolvingIO.encodings, TestResolvingIO.skipLevels, + dataForResolvingTests()); } private static Object[][] dataForResolvingTests() { @@ -101,7 +80,7 @@ private static Object[][] dataForResolvingTests() { "{\"type\":\"record\",\"name\":\"outer\",\"fields\":[" + "{\"name\": \"g1\", " + "\"type\":{\"type\":\"record\",\"name\":\"inner\",\"fields\":[" + "{\"name\":\"f1\", \"type\":\"int\", \"default\": 101}," + "{\"name\":\"f2\", \"type\":\"int\"}]}}, " - + "{\"name\": \"g2\", \"type\": \"long\"}]}}", + + "{\"name\": \"g2\", \"type\": \"long\"}]}", "RRIIL", new Object[] { 10, 101, 11L } }, // Default value for a record. { "{\"type\":\"record\",\"name\":\"outer\",\"fields\":[" + "{\"name\": \"g2\", \"type\": \"long\"}]}", "L", diff --git a/lang/java/avro/src/test/java/org/apache/avro/io/TestValidatingIO.java b/lang/java/avro/src/test/java/org/apache/avro/io/TestValidatingIO.java index 3056d5430af..063414fbb43 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/io/TestValidatingIO.java +++ b/lang/java/avro/src/test/java/org/apache/avro/io/TestValidatingIO.java @@ -17,9 +17,15 @@ */ package org.apache.avro.io; -import static org.junit.Assert.assertArrayEquals; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.fail; +import org.apache.avro.Schema; +import org.apache.avro.util.Utf8; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; @@ -27,20 +33,14 @@ import java.io.InputStream; import java.nio.ByteBuffer; import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; import java.util.Iterator; import java.util.List; import java.util.Random; -import org.apache.avro.Schema; -import org.apache.avro.util.Utf8; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; +import java.util.Spliterator; +import java.util.Spliterators; +import java.util.stream.Stream; +import java.util.stream.StreamSupport; -@RunWith(Parameterized.class) public class TestValidatingIO { enum Encoding { BINARY, BLOCKING_BINARY, JSON, @@ -48,30 +48,19 @@ enum Encoding { private static final Logger LOG = LoggerFactory.getLogger(TestValidatingIO.class); - private Encoding eEnc; - private int iSkipL; - private String sJsSch; - private String sCl; - - public TestValidatingIO(Encoding enc, int skip, String js, String cls) { - this.eEnc = enc; - this.iSkipL = skip; - this.sJsSch = js; - this.sCl = cls; - } - private static final int COUNT = 1; - @Test - public void testMain() throws IOException { + @ParameterizedTest + @MethodSource("data") + public void testMain(Encoding enc, int skip, String js, String cls) throws IOException { for (int i = 0; i < COUNT; i++) { - testOnce(new Schema.Parser().parse(sJsSch), sCl, iSkipL, eEnc); + testOnce(new Schema.Parser().parse(js), cls, skip, enc); } } private void testOnce(Schema schema, String calls, int skipLevel, Encoding encoding) throws IOException { Object[] values = randomValues(calls); - print(eEnc, iSkipL, schema, schema, values, values); + print(encoding, skipLevel, schema, schema, values, values); byte[] bytes = make(schema, calls, values, encoding); check(schema, bytes, calls, values, skipLevel, encoding); } @@ -204,7 +193,7 @@ public static void generate(Encoder vw, String calls, Object[] values) throws IO break; } default: - fail(); + Assertions.fail(); break; } } @@ -254,7 +243,7 @@ public static Object[] randomValues(String calls) { case 's': break; default: - fail(); + Assertions.fail(); break; } } @@ -324,25 +313,25 @@ public static void check(String msg, Decoder vi, String calls, Object[] values, vi.readNull(); break; case 'B': - assertEquals(msg, values[p++], vi.readBoolean()); + Assertions.assertEquals(values[p++], vi.readBoolean(), msg); break; case 'I': - assertEquals(msg, values[p++], vi.readInt()); + Assertions.assertEquals(values[p++], vi.readInt(), msg); break; case 'L': - assertEquals(msg, values[p++], vi.readLong()); + Assertions.assertEquals(values[p++], vi.readLong(), msg); break; case 'F': if (!(values[p] instanceof Float)) - fail(); + Assertions.fail(); float f = (Float) values[p++]; - assertEquals(msg, f, vi.readFloat(), Math.abs(f / 1000)); + Assertions.assertEquals(f, vi.readFloat(), Math.abs(f / 1000)); break; case 'D': if (!(values[p] instanceof Double)) - fail(); + Assertions.fail(); double d = (Double) values[p++]; - assertEquals(msg, d, vi.readDouble(), Math.abs(d / 1000)); + Assertions.assertEquals(d, vi.readDouble(), Math.abs(d / 1000), msg); break; case 'S': extractInt(cs); @@ -351,7 +340,7 @@ public static void check(String msg, Decoder vi, String calls, Object[] values, p++; } else { String s = (String) values[p++]; - assertEquals(msg, new Utf8(s), vi.readString(null)); + Assertions.assertEquals(new Utf8(s), vi.readString(null), msg); } break; case 'K': @@ -361,7 +350,7 @@ public static void check(String msg, Decoder vi, String calls, Object[] values, p++; } else { String s = (String) values[p++]; - assertEquals(msg, new Utf8(s), vi.readString(null)); + Assertions.assertEquals(new Utf8(s), vi.readString(null), msg); } break; case 'b': @@ -374,7 +363,7 @@ public static void check(String msg, Decoder vi, String calls, Object[] values, ByteBuffer bb2 = vi.readBytes(null); byte[] actBytes = new byte[bb2.remaining()]; System.arraycopy(bb2.array(), bb2.position(), actBytes, 0, bb2.remaining()); - assertArrayEquals(msg, bb, actBytes); + Assertions.assertArrayEquals(bb, actBytes, msg); } break; case 'f': { @@ -386,7 +375,7 @@ public static void check(String msg, Decoder vi, String calls, Object[] values, byte[] bb = (byte[]) values[p++]; byte[] actBytes = new byte[len]; vi.readFixed(actBytes); - assertArrayEquals(msg, bb, actBytes); + Assertions.assertArrayEquals(bb, actBytes, msg); } } break; @@ -395,7 +384,7 @@ public static void check(String msg, Decoder vi, String calls, Object[] values, if (level == skipLevel) { vi.readEnum(); } else { - assertEquals(msg, e, vi.readEnum()); + Assertions.assertEquals(e, vi.readEnum(), msg); } } break; @@ -422,16 +411,16 @@ public static void check(String msg, Decoder vi, String calls, Object[] values, continue; } case ']': - assertEquals(msg, 0, counts[level]); + Assertions.assertEquals(0, counts[level], msg); if (!isEmpty[level]) { - assertEquals(msg, 0, vi.arrayNext()); + Assertions.assertEquals(0, vi.arrayNext(), msg); } level--; break; case '}': - assertEquals(0, counts[level]); + Assertions.assertEquals(0, counts[level]); if (!isEmpty[level]) { - assertEquals(msg, 0, vi.mapNext()); + Assertions.assertEquals(0, vi.mapNext(), msg); } level--; break; @@ -450,28 +439,28 @@ public static void check(String msg, Decoder vi, String calls, Object[] values, continue; case 'U': { int idx = extractInt(cs); - assertEquals(msg, idx, vi.readIndex()); + Assertions.assertEquals(idx, vi.readIndex(), msg); continue; } case 'R': ((ResolvingDecoder) vi).readFieldOrder(); continue; default: - fail(msg); + Assertions.fail(msg); } } catch (RuntimeException e) { throw new RuntimeException(msg, e); } } - assertEquals(msg, values.length, p); + Assertions.assertEquals(values.length, p, msg); } private static int skip(String msg, InputScanner cs, Decoder vi, boolean isArray) throws IOException { final char end = isArray ? ']' : '}'; if (isArray) { - assertEquals(msg, 0, vi.skipArray()); + Assertions.assertEquals(0, vi.skipArray(), msg); } else if (end == '}') { - assertEquals(msg, 0, vi.skipMap()); + Assertions.assertEquals(0, vi.skipMap(), msg); } int level = 0; int p = 0; @@ -507,9 +496,8 @@ private static int skip(String msg, InputScanner cs, Decoder vi, boolean isArray throw new RuntimeException("Don't know how to skip"); } - @Parameterized.Parameters - public static Collection data() { - return Arrays.asList(convertTo2dArray(encodings, skipLevels, testSchemas())); + public static Stream data() { + return convertTo2dStream(encodings, skipLevels, testSchemas()); } private static Object[][] encodings = new Object[][] { { Encoding.BINARY }, { Encoding.BLOCKING_BINARY }, @@ -517,19 +505,11 @@ public static Collection data() { private static Object[][] skipLevels = new Object[][] { { -1 }, { 0 }, { 1 }, { 2 }, }; - public static Object[][] convertTo2dArray(final Object[][]... values) { - ArrayList ret = new ArrayList<>(); - + public static Stream convertTo2dStream(final Object[][]... values) { Iterator iter = cartesian(values); - while (iter.hasNext()) { - Object[] objects = iter.next(); - ret.add(objects); - } - Object[][] retArrays = new Object[ret.size()][]; - for (int i = 0; i < ret.size(); i++) { - retArrays[i] = ret.get(i); - } - return retArrays; + Stream stream = StreamSupport.stream(Spliterators.spliteratorUnknownSize(iter, Spliterator.ORDERED), + false); + return stream.map(Arguments::of); } /** diff --git a/lang/java/avro/src/test/java/org/apache/avro/io/parsing/SymbolTest.java b/lang/java/avro/src/test/java/org/apache/avro/io/parsing/SymbolTest.java index c7d0213e61c..0dfe26c2258 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/io/parsing/SymbolTest.java +++ b/lang/java/avro/src/test/java/org/apache/avro/io/parsing/SymbolTest.java @@ -15,14 +15,14 @@ */ package org.apache.avro.io.parsing; -import static org.junit.Assert.fail; +import static org.junit.jupiter.api.Assertions.fail; import java.io.IOException; import java.util.HashSet; import java.util.Set; import org.apache.avro.Schema; -import org.junit.Test; +import org.junit.jupiter.api.Test; /** * Unit test to verify that recursive schemas are flattened correctly. See @@ -41,7 +41,7 @@ public class SymbolTest { + " ]}},\n" + " {\"name\":\"node\",\"type\":\"SampleNode\"}]}}}]}"; @Test - public void testSomeMethod() throws IOException { + void someMethod() throws IOException { Schema schema = new Schema.Parser().parse(SCHEMA); Symbol root = new ResolvingGrammarGenerator().generate(schema, schema); validateNonNull(root, new HashSet<>()); @@ -57,8 +57,7 @@ private static void validateNonNull(final Symbol symb, Set seen) { for (Symbol s : symb.production) { if (s == null) { fail("invalid parsing tree should not contain nulls"); - } - if (s.kind != Symbol.Kind.ROOT) { + } else if (s.kind != Symbol.Kind.ROOT) { validateNonNull(s, seen); } } diff --git a/lang/java/avro/src/test/java/org/apache/avro/io/parsing/TestResolvingGrammarGenerator.java b/lang/java/avro/src/test/java/org/apache/avro/io/parsing/TestResolvingGrammarGenerator.java index 3587055b96d..c6d8856733b 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/io/parsing/TestResolvingGrammarGenerator.java +++ b/lang/java/avro/src/test/java/org/apache/avro/io/parsing/TestResolvingGrammarGenerator.java @@ -21,8 +21,10 @@ import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.StringReader; +import java.io.UncheckedIOException; import java.util.Arrays; import java.util.Collection; +import java.util.stream.Stream; import com.fasterxml.jackson.core.JsonFactory; import com.fasterxml.jackson.databind.JsonNode; @@ -38,26 +40,21 @@ import org.apache.avro.generic.GenericRecordBuilder; import org.apache.avro.io.Encoder; import org.apache.avro.io.EncoderFactory; -import org.junit.Assert; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; -@RunWith(Parameterized.class) -public class TestResolvingGrammarGenerator { - private final Schema schema; - private final JsonNode data; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; - public TestResolvingGrammarGenerator(String jsonSchema, String jsonData) throws IOException { - this.schema = new Schema.Parser().parse(jsonSchema); - JsonFactory factory = new JsonFactory(); - ObjectMapper mapper = new ObjectMapper(factory); +import static org.apache.avro.TestSchemas.ENUM1_AB_SCHEMA_NAMESPACE_1; +import static org.apache.avro.TestSchemas.ENUM1_AB_SCHEMA_NAMESPACE_2; - this.data = mapper.readTree(new StringReader(jsonData)); - } +public class TestResolvingGrammarGenerator { - @Test - public void test() throws IOException { + @ParameterizedTest + @MethodSource("data") + void test(Schema schema, JsonNode data) throws IOException { ByteArrayOutputStream baos = new ByteArrayOutputStream(); EncoderFactory factory = EncoderFactory.get(); Encoder e = factory.validatingEncoder(schema, factory.binaryEncoder(baos, null)); @@ -67,7 +64,7 @@ public void test() throws IOException { } @Test - public void testRecordMissingRequiredFieldError() throws Exception { + void recordMissingRequiredFieldError() throws Exception { Schema schemaWithoutField = SchemaBuilder.record("MyRecord").namespace("ns").fields().name("field1").type() .stringType().noDefault().endRecord(); Schema schemaWithField = SchemaBuilder.record("MyRecord").namespace("ns").fields().name("field1").type() @@ -76,27 +73,51 @@ public void testRecordMissingRequiredFieldError() throws Exception { byte[] data = writeRecord(schemaWithoutField, record); try { readRecord(schemaWithField, data); - Assert.fail("Expected exception not thrown"); + Assertions.fail("Expected exception not thrown"); } catch (AvroTypeException typeException) { - Assert.assertEquals("Incorrect exception message", - "Found ns.MyRecord, expecting ns.MyRecord, missing required field field2", typeException.getMessage()); + Assertions.assertEquals("Found ns.MyRecord, expecting ns.MyRecord, missing required field field2", + typeException.getMessage(), "Incorrect exception message"); } } - @Parameterized.Parameters - public static Collection data() { - Collection ret = Arrays.asList(new Object[][] { + @Test + void differingEnumNamespaces() throws Exception { + Schema schema1 = SchemaBuilder.record("MyRecord").fields().name("field").type(ENUM1_AB_SCHEMA_NAMESPACE_1) + .noDefault().endRecord(); + Schema schema2 = SchemaBuilder.record("MyRecord").fields().name("field").type(ENUM1_AB_SCHEMA_NAMESPACE_2) + .noDefault().endRecord(); + GenericData.EnumSymbol genericEnumSymbol = new GenericData.EnumSymbol(ENUM1_AB_SCHEMA_NAMESPACE_1, "A"); + GenericData.Record record = new GenericRecordBuilder(schema1).set("field", genericEnumSymbol).build(); + byte[] data = writeRecord(schema1, record); + Assertions.assertEquals(genericEnumSymbol, readRecord(schema1, data).get("field")); + Assertions.assertEquals(genericEnumSymbol, readRecord(schema2, data).get("field")); + } + + public static Stream data() { + Collection ret = Arrays.asList(new String[][] { { "{ \"type\": \"record\", \"name\": \"r\", \"fields\": [ " + " { \"name\" : \"f1\", \"type\": \"int\" }, " - + " { \"name\" : \"f2\", \"type\": \"float\" } " + "] } }", "{ \"f2\": 10.4, \"f1\": 10 } " }, - { "{ \"type\": \"enum\", \"name\": \"e\", \"symbols\": " + "[ \"s1\", \"s2\"] } }", " \"s1\" " }, - { "{ \"type\": \"enum\", \"name\": \"e\", \"symbols\": " + "[ \"s1\", \"s2\"] } }", " \"s2\" " }, + + " { \"name\" : \"f2\", \"type\": \"float\" } " + "] }", "{ \"f2\": 10.4, \"f1\": 10 } " }, + { "{ \"type\": \"enum\", \"name\": \"e\", \"symbols\": " + "[ \"s1\", \"s2\"] }", " \"s1\" " }, + { "{ \"type\": \"enum\", \"name\": \"e\", \"symbols\": " + "[ \"s1\", \"s2\"] }", " \"s2\" " }, { "{ \"type\": \"fixed\", \"name\": \"f\", \"size\": 10 }", "\"hello\"" }, { "{ \"type\": \"array\", \"items\": \"int\" }", "[ 10, 20, 30 ]" }, { "{ \"type\": \"map\", \"values\": \"int\" }", "{ \"k1\": 10, \"k3\": 20, \"k3\": 30 }" }, { "[ \"int\", \"long\" ]", "10" }, { "\"string\"", "\"hello\"" }, { "\"bytes\"", "\"hello\"" }, { "\"int\"", "10" }, { "\"long\"", "10" }, { "\"float\"", "10.0" }, { "\"double\"", "10.0" }, { "\"boolean\"", "true" }, { "\"boolean\"", "false" }, { "\"null\"", "null" }, }); - return ret; + + final JsonFactory factory = new JsonFactory(); + final ObjectMapper mapper = new ObjectMapper(factory); + + return ret.stream().map((String[] args) -> { + Schema schema = new Schema.Parser().parse(args[0]); + try { + JsonNode data = mapper.readTree(new StringReader(args[1])); + return Arguments.of(schema, data); + } catch (IOException ex) { + throw new UncheckedIOException(ex); + } + }); } private byte[] writeRecord(Schema schema, GenericData.Record record) throws Exception { diff --git a/lang/java/avro/src/test/java/org/apache/avro/io/parsing/TestResolvingGrammarGenerator2.java b/lang/java/avro/src/test/java/org/apache/avro/io/parsing/TestResolvingGrammarGenerator2.java index fc698014d53..8508ae2802b 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/io/parsing/TestResolvingGrammarGenerator2.java +++ b/lang/java/avro/src/test/java/org/apache/avro/io/parsing/TestResolvingGrammarGenerator2.java @@ -32,17 +32,17 @@ import org.apache.avro.io.DatumReader; import org.apache.avro.io.Decoder; import org.apache.avro.io.DecoderFactory; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.Test; import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.instanceOf; import static org.hamcrest.Matchers.not; +import static org.junit.jupiter.api.Assertions.*; /** ResolvingGrammarGenerator tests that are not Parameterized. */ public class TestResolvingGrammarGenerator2 { @Test - public void testFixed() throws java.io.IOException { + void fixed() throws java.io.IOException { new ResolvingGrammarGenerator().generate(Schema.createFixed("MyFixed", null, null, 10), Schema.create(Schema.Type.BYTES)); new ResolvingGrammarGenerator().generate(Schema.create(Schema.Type.BYTES), @@ -63,69 +63,71 @@ public void testFixed() throws java.io.IOException { Schema point3dMatchName = SchemaBuilder.record("Point").fields().requiredDouble("x").requiredDouble("y").name("z") .type().doubleType().doubleDefault(0.0).endRecord(); - @Test(expected = SchemaValidationException.class) - public void testUnionResolutionNoStructureMatch() throws Exception { - // there is a short name match, but the structure does not match - Schema read = Schema.createUnion(Arrays.asList(Schema.create(Schema.Type.NULL), point3dNoDefault)); - - new SchemaValidatorBuilder().canBeReadStrategy().validateAll().validate(point2dFullname, - Collections.singletonList(read)); + @Test + void unionResolutionNoStructureMatch() throws Exception { + assertThrows(SchemaValidationException.class, () -> { + // there is a short name match, but the structure does not match + Schema read = Schema.createUnion(Arrays.asList(Schema.create(Schema.Type.NULL), point3dNoDefault)); + + new SchemaValidatorBuilder().canBeReadStrategy().validateAll().validate(point2dFullname, + Collections.singletonList(read)); + }); } @Test - public void testUnionResolutionFirstStructureMatch2d() throws Exception { + void unionResolutionFirstStructureMatch2d() throws Exception { // multiple structure matches with no short or full name matches Schema read = Schema .createUnion(Arrays.asList(Schema.create(Schema.Type.NULL), point3dNoDefault, point2d, point3d)); Symbol grammar = new ResolvingGrammarGenerator().generate(point2dFullname, read); - Assert.assertTrue(grammar.production[1] instanceof Symbol.UnionAdjustAction); + assertTrue(grammar.production[1] instanceof Symbol.UnionAdjustAction); Symbol.UnionAdjustAction action = (Symbol.UnionAdjustAction) grammar.production[1]; - Assert.assertEquals(2, action.rindex); + assertEquals(2, action.rindex); } @Test - public void testUnionResolutionFirstStructureMatch3d() throws Exception { + void unionResolutionFirstStructureMatch3d() throws Exception { // multiple structure matches with no short or full name matches Schema read = Schema .createUnion(Arrays.asList(Schema.create(Schema.Type.NULL), point3dNoDefault, point3d, point2d)); Symbol grammar = new ResolvingGrammarGenerator().generate(point2dFullname, read); - Assert.assertTrue(grammar.production[1] instanceof Symbol.UnionAdjustAction); + assertTrue(grammar.production[1] instanceof Symbol.UnionAdjustAction); Symbol.UnionAdjustAction action = (Symbol.UnionAdjustAction) grammar.production[1]; - Assert.assertEquals(2, action.rindex); + assertEquals(2, action.rindex); } @Test - public void testUnionResolutionNamedStructureMatch() throws Exception { + void unionResolutionNamedStructureMatch() throws Exception { // multiple structure matches with a short name match Schema read = Schema .createUnion(Arrays.asList(Schema.create(Schema.Type.NULL), point2d, point3dMatchName, point3d)); Symbol grammar = new ResolvingGrammarGenerator().generate(point2dFullname, read); - Assert.assertTrue(grammar.production[1] instanceof Symbol.UnionAdjustAction); + assertTrue(grammar.production[1] instanceof Symbol.UnionAdjustAction); Symbol.UnionAdjustAction action = (Symbol.UnionAdjustAction) grammar.production[1]; - Assert.assertEquals(2, action.rindex); + assertEquals(2, action.rindex); } @Test - public void testUnionResolutionFullNameMatch() throws Exception { + void unionResolutionFullNameMatch() throws Exception { // there is a full name match, so it should be chosen Schema read = Schema.createUnion( Arrays.asList(Schema.create(Schema.Type.NULL), point2d, point3dMatchName, point3d, point2dFullname)); Symbol grammar = new ResolvingGrammarGenerator().generate(point2dFullname, read); - Assert.assertTrue(grammar.production[1] instanceof Symbol.UnionAdjustAction); + assertTrue(grammar.production[1] instanceof Symbol.UnionAdjustAction); Symbol.UnionAdjustAction action = (Symbol.UnionAdjustAction) grammar.production[1]; - Assert.assertEquals(4, action.rindex); + assertEquals(4, action.rindex); } @Test - public void testAvro2702StringProperties() throws IOException { + void avro2702StringProperties() throws IOException { // Create a nested record schema with string fields at two levels. Schema inner = SchemaBuilder.builder().record("B").fields().requiredString("b1").endRecord(); diff --git a/lang/java/avro/src/test/java/org/apache/avro/message/TestBinaryMessageEncoding.java b/lang/java/avro/src/test/java/org/apache/avro/message/TestBinaryMessageEncoding.java index 7c6bf1a180b..0e8583bf6c2 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/message/TestBinaryMessageEncoding.java +++ b/lang/java/avro/src/test/java/org/apache/avro/message/TestBinaryMessageEncoding.java @@ -19,6 +19,8 @@ package org.apache.avro.message; +import static org.junit.jupiter.api.Assertions.*; + import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Arrays; @@ -31,8 +33,7 @@ import org.apache.avro.generic.GenericData; import org.apache.avro.generic.GenericData.Record; import org.apache.avro.generic.GenericRecordBuilder; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.Test; public class TestBinaryMessageEncoding { private static final Schema SCHEMA_V1 = SchemaBuilder.record("TestRecord").fields().requiredInt("id") @@ -56,18 +57,18 @@ public class TestBinaryMessageEncoding { V2_BUILDER.set("id", 8L).set("message", "m-8").set("data", 35.6).build()); @Test - public void testByteBufferRoundTrip() throws Exception { + void byteBufferRoundTrip() throws Exception { MessageEncoder encoder = new BinaryMessageEncoder<>(GenericData.get(), SCHEMA_V2); MessageDecoder decoder = new BinaryMessageDecoder<>(GenericData.get(), SCHEMA_V2); Record copy = decoder.decode(encoder.encode(V2_RECORDS.get(0))); - Assert.assertNotSame("Copy should not be the same object", copy, V2_RECORDS.get(0)); - Assert.assertEquals("Record should be identical after round-trip", V2_RECORDS.get(0), copy); + assertNotSame(copy, V2_RECORDS.get(0), "Copy should not be the same object"); + assertEquals(V2_RECORDS.get(0), copy, "Record should be identical after round-trip"); } @Test - public void testSchemaEvolution() throws Exception { + void schemaEvolution() throws Exception { List buffers = new ArrayList<>(); List records = new ArrayList<>(); @@ -100,21 +101,23 @@ public void testSchemaEvolution() throws Exception { decodedUsingV2.add(v2Decoder.decode(buffer)); } - Assert.assertEquals(allAsV2, decodedUsingV2); + assertEquals(allAsV2, decodedUsingV2); } - @Test(expected = MissingSchemaException.class) - public void testCompatibleReadFailsWithoutSchema() throws Exception { - MessageEncoder v1Encoder = new BinaryMessageEncoder<>(GenericData.get(), SCHEMA_V1); - BinaryMessageDecoder v2Decoder = new BinaryMessageDecoder<>(GenericData.get(), SCHEMA_V2); + @Test + void compatibleReadFailsWithoutSchema() throws Exception { + assertThrows(MissingSchemaException.class, () -> { + MessageEncoder v1Encoder = new BinaryMessageEncoder<>(GenericData.get(), SCHEMA_V1); + BinaryMessageDecoder v2Decoder = new BinaryMessageDecoder<>(GenericData.get(), SCHEMA_V2); - ByteBuffer v1Buffer = v1Encoder.encode(V1_RECORDS.get(3)); + ByteBuffer v1Buffer = v1Encoder.encode(V1_RECORDS.get(3)); - v2Decoder.decode(v1Buffer); + v2Decoder.decode(v1Buffer); + }); } @Test - public void testCompatibleReadWithSchema() throws Exception { + void compatibleReadWithSchema() throws Exception { MessageEncoder v1Encoder = new BinaryMessageEncoder<>(GenericData.get(), SCHEMA_V1); BinaryMessageDecoder v2Decoder = new BinaryMessageDecoder<>(GenericData.get(), SCHEMA_V2); v2Decoder.addSchema(SCHEMA_V1); @@ -123,11 +126,11 @@ public void testCompatibleReadWithSchema() throws Exception { Record record = v2Decoder.decode(v1Buffer); - Assert.assertEquals(V2_BUILDER.set("id", 6L).set("message", "m-6").clear("data").build(), record); + assertEquals(V2_BUILDER.set("id", 6L).set("message", "m-6").clear("data").build(), record); } @Test - public void testCompatibleReadWithSchemaFromLookup() throws Exception { + void compatibleReadWithSchemaFromLookup() throws Exception { MessageEncoder v1Encoder = new BinaryMessageEncoder<>(GenericData.get(), SCHEMA_V1); SchemaStore.Cache schemaCache = new SchemaStore.Cache(); @@ -138,11 +141,11 @@ public void testCompatibleReadWithSchemaFromLookup() throws Exception { Record record = v2Decoder.decode(v1Buffer); - Assert.assertEquals(V2_BUILDER.set("id", 4L).set("message", "m-4").clear("data").build(), record); + assertEquals(V2_BUILDER.set("id", 4L).set("message", "m-4").clear("data").build(), record); } @Test - public void testIdenticalReadWithSchemaFromLookup() throws Exception { + void identicalReadWithSchemaFromLookup() throws Exception { MessageEncoder v1Encoder = new BinaryMessageEncoder<>(GenericData.get(), SCHEMA_V1); SchemaStore.Cache schemaCache = new SchemaStore.Cache(); @@ -155,11 +158,11 @@ public void testIdenticalReadWithSchemaFromLookup() throws Exception { Record record = genericDecoder.decode(v1Buffer); - Assert.assertEquals(V1_RECORDS.get(2), record); + assertEquals(V1_RECORDS.get(2), record); } @Test - public void testBufferReuse() throws Exception { + void bufferReuse() throws Exception { // This test depends on the serialized version of record 1 being smaller or // the same size as record 0 so that the reused ByteArrayOutputStream won't // expand its internal buffer. @@ -168,81 +171,91 @@ public void testBufferReuse() throws Exception { ByteBuffer b0 = encoder.encode(V1_RECORDS.get(0)); ByteBuffer b1 = encoder.encode(V1_RECORDS.get(1)); - Assert.assertEquals(b0.array(), b1.array()); + assertEquals(b0.array(), b1.array()); MessageDecoder decoder = new BinaryMessageDecoder<>(GenericData.get(), SCHEMA_V1); - Assert.assertEquals("Buffer was reused, decode(b0) should be record 1", V1_RECORDS.get(1), decoder.decode(b0)); + assertEquals(V1_RECORDS.get(1), decoder.decode(b0), "Buffer was reused, decode(b0) should be record 1"); } @Test - public void testBufferCopy() throws Exception { + void bufferCopy() throws Exception { MessageEncoder encoder = new BinaryMessageEncoder<>(GenericData.get(), SCHEMA_V1); ByteBuffer b0 = encoder.encode(V1_RECORDS.get(0)); ByteBuffer b1 = encoder.encode(V1_RECORDS.get(1)); - Assert.assertNotEquals(b0.array(), b1.array()); + assertNotEquals(b0.array(), b1.array()); MessageDecoder decoder = new BinaryMessageDecoder<>(GenericData.get(), SCHEMA_V1); // bytes are not changed by reusing the encoder - Assert.assertEquals("Buffer was copied, decode(b0) should be record 0", V1_RECORDS.get(0), decoder.decode(b0)); + assertEquals(V1_RECORDS.get(0), decoder.decode(b0), "Buffer was copied, decode(b0) should be record 0"); } - @Test(expected = AvroRuntimeException.class) - public void testByteBufferMissingPayload() throws Exception { - MessageEncoder encoder = new BinaryMessageEncoder<>(GenericData.get(), SCHEMA_V2); - MessageDecoder decoder = new BinaryMessageDecoder<>(GenericData.get(), SCHEMA_V2); + @Test + void byteBufferMissingPayload() throws Exception { + assertThrows(AvroRuntimeException.class, () -> { + MessageEncoder encoder = new BinaryMessageEncoder<>(GenericData.get(), SCHEMA_V2); + MessageDecoder decoder = new BinaryMessageDecoder<>(GenericData.get(), SCHEMA_V2); - ByteBuffer buffer = encoder.encode(V2_RECORDS.get(0)); + ByteBuffer buffer = encoder.encode(V2_RECORDS.get(0)); - buffer.limit(12); + buffer.limit(12); - decoder.decode(buffer); + decoder.decode(buffer); + }); } - @Test(expected = BadHeaderException.class) - public void testByteBufferMissingFullHeader() throws Exception { - MessageEncoder encoder = new BinaryMessageEncoder<>(GenericData.get(), SCHEMA_V2); - MessageDecoder decoder = new BinaryMessageDecoder<>(GenericData.get(), SCHEMA_V2); + @Test + void byteBufferMissingFullHeader() throws Exception { + assertThrows(BadHeaderException.class, () -> { + MessageEncoder encoder = new BinaryMessageEncoder<>(GenericData.get(), SCHEMA_V2); + MessageDecoder decoder = new BinaryMessageDecoder<>(GenericData.get(), SCHEMA_V2); - ByteBuffer buffer = encoder.encode(V2_RECORDS.get(0)); + ByteBuffer buffer = encoder.encode(V2_RECORDS.get(0)); - buffer.limit(8); + buffer.limit(8); - decoder.decode(buffer); + decoder.decode(buffer); + }); } - @Test(expected = BadHeaderException.class) - public void testByteBufferBadMarkerByte() throws Exception { - MessageEncoder encoder = new BinaryMessageEncoder<>(GenericData.get(), SCHEMA_V2); - MessageDecoder decoder = new BinaryMessageDecoder<>(GenericData.get(), SCHEMA_V2); + @Test + void byteBufferBadMarkerByte() throws Exception { + assertThrows(BadHeaderException.class, () -> { + MessageEncoder encoder = new BinaryMessageEncoder<>(GenericData.get(), SCHEMA_V2); + MessageDecoder decoder = new BinaryMessageDecoder<>(GenericData.get(), SCHEMA_V2); - ByteBuffer buffer = encoder.encode(V2_RECORDS.get(0)); - buffer.array()[0] = 0x00; + ByteBuffer buffer = encoder.encode(V2_RECORDS.get(0)); + buffer.array()[0] = 0x00; - decoder.decode(buffer); + decoder.decode(buffer); + }); } - @Test(expected = BadHeaderException.class) - public void testByteBufferBadVersionByte() throws Exception { - MessageEncoder encoder = new BinaryMessageEncoder<>(GenericData.get(), SCHEMA_V2); - MessageDecoder decoder = new BinaryMessageDecoder<>(GenericData.get(), SCHEMA_V2); + @Test + void byteBufferBadVersionByte() throws Exception { + assertThrows(BadHeaderException.class, () -> { + MessageEncoder encoder = new BinaryMessageEncoder<>(GenericData.get(), SCHEMA_V2); + MessageDecoder decoder = new BinaryMessageDecoder<>(GenericData.get(), SCHEMA_V2); - ByteBuffer buffer = encoder.encode(V2_RECORDS.get(0)); - buffer.array()[1] = 0x00; + ByteBuffer buffer = encoder.encode(V2_RECORDS.get(0)); + buffer.array()[1] = 0x00; - decoder.decode(buffer); + decoder.decode(buffer); + }); } - @Test(expected = MissingSchemaException.class) - public void testByteBufferUnknownSchema() throws Exception { - MessageEncoder encoder = new BinaryMessageEncoder<>(GenericData.get(), SCHEMA_V2); - MessageDecoder decoder = new BinaryMessageDecoder<>(GenericData.get(), SCHEMA_V2); + @Test + void byteBufferUnknownSchema() throws Exception { + assertThrows(MissingSchemaException.class, () -> { + MessageEncoder encoder = new BinaryMessageEncoder<>(GenericData.get(), SCHEMA_V2); + MessageDecoder decoder = new BinaryMessageDecoder<>(GenericData.get(), SCHEMA_V2); - ByteBuffer buffer = encoder.encode(V2_RECORDS.get(0)); - buffer.array()[4] = 0x00; + ByteBuffer buffer = encoder.encode(V2_RECORDS.get(0)); + buffer.array()[4] = 0x00; - decoder.decode(buffer); + decoder.decode(buffer); + }); } } diff --git a/lang/java/avro/src/test/java/org/apache/avro/message/TestGenerateInteropSingleObjectEncoding.java b/lang/java/avro/src/test/java/org/apache/avro/message/TestGenerateInteropSingleObjectEncoding.java new file mode 100644 index 00000000000..0292b14c705 --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/message/TestGenerateInteropSingleObjectEncoding.java @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.avro.message; + +import org.apache.avro.Schema; +import org.apache.avro.generic.GenericData; +import org.apache.avro.generic.GenericRecordBuilder; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.Arrays; + +/** + * Generates test_message.bin - a single + * object encoded Avro message. + */ +public class TestGenerateInteropSingleObjectEncoding { + private static final String RESOURCES_FOLDER = System.getProperty("share.dir", "target/test-classes/share") + + "/test/data/messageV1"; + private static final File SCHEMA_FILE = new File(RESOURCES_FOLDER + "/test_schema.avsc"); + private static final File MESSAGE_FILE = new File(RESOURCES_FOLDER + "/test_message.bin"); + private static Schema SCHEMA; + private static GenericRecordBuilder BUILDER; + + @BeforeAll + public static void setup() throws IOException { + try (FileInputStream fileInputStream = new FileInputStream(SCHEMA_FILE)) { + SCHEMA = new Schema.Parser().parse(fileInputStream); + BUILDER = new GenericRecordBuilder(SCHEMA); + } + } + + @Test + void generateData() throws IOException { + MessageEncoder encoder = new BinaryMessageEncoder<>(GenericData.get(), SCHEMA); + BUILDER.set("id", 42L).set("name", "Bill").set("tags", Arrays.asList("dog_lover", "cat_hater")).build(); + ByteBuffer buffer = encoder.encode( + BUILDER.set("id", 42L).set("name", "Bill").set("tags", Arrays.asList("dog_lover", "cat_hater")).build()); + new FileOutputStream(MESSAGE_FILE).write(buffer.array()); + } +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/message/TestInteropSingleObjectEncoding.java b/lang/java/avro/src/test/java/org/apache/avro/message/TestInteropSingleObjectEncoding.java new file mode 100644 index 00000000000..dccd3c425a4 --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/message/TestInteropSingleObjectEncoding.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.avro.message; + +import org.apache.avro.Schema; +import org.apache.avro.generic.GenericData; +import org.apache.avro.generic.GenericRecordBuilder; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; + +import java.io.File; + +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import java.io.FileInputStream; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.file.Files; +import java.util.Arrays; + +/** + * Tests that test_message.bin is properly encoded single + * object + */ +public class TestInteropSingleObjectEncoding { + private static final String RESOURCES_FOLDER = System.getProperty("share.dir", "target/test-classes/share") + + "/test/data/messageV1"; + private static final File SCHEMA_FILE = new File(RESOURCES_FOLDER + "/test_schema.avsc"); + private static final File MESSAGE_FILE = new File(RESOURCES_FOLDER + "/test_message.bin"); + private static Schema SCHEMA; + private static GenericRecordBuilder BUILDER; + + @BeforeAll + public static void setup() throws IOException { + try (FileInputStream fileInputStream = new FileInputStream(SCHEMA_FILE)) { + SCHEMA = new Schema.Parser().parse(fileInputStream); + BUILDER = new GenericRecordBuilder(SCHEMA); + } + } + + @Test + void checkSingleObjectEncoding() throws IOException { + MessageEncoder encoder = new BinaryMessageEncoder<>(GenericData.get(), SCHEMA); + ByteBuffer buffer = encoder.encode( + BUILDER.set("id", 42L).set("name", "Bill").set("tags", Arrays.asList("dog_lover", "cat_hater")).build()); + byte[] fileBuffer = Files.readAllBytes(MESSAGE_FILE.toPath()); + assertArrayEquals(fileBuffer, buffer.array()); + } +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/reflect/TestByteBuffer.java b/lang/java/avro/src/test/java/org/apache/avro/reflect/TestByteBuffer.java index d0ef4312969..ae7869d1ccb 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/reflect/TestByteBuffer.java +++ b/lang/java/avro/src/test/java/org/apache/avro/reflect/TestByteBuffer.java @@ -19,9 +19,7 @@ package org.apache.avro.reflect; import static java.nio.charset.StandardCharsets.UTF_8; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.*; import java.io.ByteArrayOutputStream; import java.io.File; @@ -41,15 +39,14 @@ import org.apache.avro.file.FileReader; import org.apache.avro.file.SeekableByteArrayInput; import org.apache.avro.io.DatumWriter; -import org.junit.Before; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; public class TestByteBuffer { - @Rule - public TemporaryFolder DIR = new TemporaryFolder(); + @TempDir + public File DIR; static class X { String name = ""; @@ -58,9 +55,9 @@ static class X { File content; - @Before + @BeforeEach public void before() throws IOException { - content = new File(DIR.getRoot().getPath(), "test-content"); + content = new File(DIR.getPath(), "test-content"); try (FileOutputStream out = new FileOutputStream(content)) { for (int i = 0; i < 100000; i++) { out.write("hello world\n".getBytes(UTF_8)); @@ -69,7 +66,7 @@ public void before() throws IOException { } @Test - public void test() throws Exception { + void test() throws Exception { Schema schema = ReflectData.get().getSchema(X.class); ByteArrayOutputStream bout = new ByteArrayOutputStream(); writeOneXAsAvro(schema, bout); @@ -77,7 +74,7 @@ public void test() throws Exception { String expected = getmd5(content); String actual = getmd5(record.content); - assertEquals("md5 for result differed from input", expected, actual); + assertEquals(expected, actual, "md5 for result differed from input"); } private X readOneXFromAvro(Schema schema, ByteArrayOutputStream bout) throws IOException { @@ -85,9 +82,9 @@ private X readOneXFromAvro(Schema schema, ByteArrayOutputStream bout) throws IOE ReflectDatumReader datumReader = new ReflectDatumReader<>(schema); FileReader reader = DataFileReader.openReader(input, datumReader); Iterator it = reader.iterator(); - assertTrue("missing first record", it.hasNext()); + assertTrue(it.hasNext(), "missing first record"); X record = it.next(); - assertFalse("should be no more records - only wrote one out", it.hasNext()); + assertFalse(it.hasNext(), "should be no more records - only wrote one out"); return record; } diff --git a/lang/java/avro/src/test/java/org/apache/avro/reflect/TestNonStringMapKeys.java b/lang/java/avro/src/test/java/org/apache/avro/reflect/TestNonStringMapKeys.java index 70cb7b65aa3..6b031fb2186 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/reflect/TestNonStringMapKeys.java +++ b/lang/java/avro/src/test/java/org/apache/avro/reflect/TestNonStringMapKeys.java @@ -18,10 +18,7 @@ package org.apache.avro.reflect; import static java.nio.charset.StandardCharsets.UTF_8; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertTrue; -import static org.junit.Assert.fail; +import static org.junit.jupiter.api.Assertions.*; import java.io.ByteArrayOutputStream; import java.io.IOException; @@ -45,7 +42,7 @@ import org.apache.avro.io.Encoder; import org.apache.avro.io.EncoderFactory; import org.apache.avro.util.Utf8; -import org.junit.Test; +import org.junit.jupiter.api.Test; /** * Test serialization and de-serialization of non-string map-keys @@ -53,7 +50,7 @@ public class TestNonStringMapKeys { @Test - public void testNonStringMapKeys() throws Exception { + void nonStringMapKeys() throws Exception { Company entityObj1 = buildCompany(); Company entityObj2 = buildCompany(); @@ -65,7 +62,7 @@ public void testNonStringMapKeys() throws Exception { GenericRecord record = records.get(0); Object employees = record.get("employees"); - assertTrue("Unable to read 'employees' map", employees instanceof GenericArray); + assertTrue(employees instanceof GenericArray, "Unable to read 'employees' map"); GenericArray arrayEmployees = ((GenericArray) employees); Object employeeRecord = arrayEmployees.get(0); assertTrue(employeeRecord instanceof GenericRecord); @@ -90,13 +87,13 @@ public void testNonStringMapKeys() throws Exception { } byte[] jsonBytes = testJsonEncoder(testType, entityObj1); - assertNotNull("Unable to serialize using jsonEncoder", jsonBytes); + assertNotNull(jsonBytes, "Unable to serialize using jsonEncoder"); GenericRecord jsonRecord = testJsonDecoder(testType, jsonBytes, entityObj1); - assertEquals("JSON decoder output not same as Binary Decoder", record, jsonRecord); + assertEquals(record, jsonRecord, "JSON decoder output not same as Binary Decoder"); } @Test - public void testNonStringMapKeysInNestedMaps() throws Exception { + void nonStringMapKeysInNestedMaps() throws Exception { Company2 entityObj1 = buildCompany2(); @@ -107,7 +104,7 @@ public void testNonStringMapKeysInNestedMaps() throws Exception { GenericRecord record = records.get(0); Object employees = record.get("employees"); - assertTrue("Unable to read 'employees' map", employees instanceof GenericArray); + assertTrue(employees instanceof GenericArray, "Unable to read 'employees' map"); GenericArray employeesMapArray = ((GenericArray) employees); Object employeeMapElement = employeesMapArray.get(0); @@ -146,13 +143,13 @@ public void testNonStringMapKeysInNestedMaps() throws Exception { } byte[] jsonBytes = testJsonEncoder(testType, entityObj1); - assertNotNull("Unable to serialize using jsonEncoder", jsonBytes); + assertNotNull(jsonBytes, "Unable to serialize using jsonEncoder"); GenericRecord jsonRecord = testJsonDecoder(testType, jsonBytes, entityObj1); - assertEquals("JSON decoder output not same as Binary Decoder", record, jsonRecord); + assertEquals(record, jsonRecord, "JSON decoder output not same as Binary Decoder"); } @Test - public void testRecordNameInvariance() throws Exception { + void recordNameInvariance() throws Exception { SameMapSignature entityObj1 = buildSameMapSignature(); @@ -163,7 +160,7 @@ public void testRecordNameInvariance() throws Exception { GenericRecord record = records.get(0); Object map1obj = record.get("map1"); - assertTrue("Unable to read map1", map1obj instanceof GenericArray); + assertTrue(map1obj instanceof GenericArray, "Unable to read map1"); GenericArray map1array = ((GenericArray) map1obj); Object map1element = map1array.get(0); @@ -207,10 +204,10 @@ public void testRecordNameInvariance() throws Exception { assertEquals(map1schema, map4schema); byte[] jsonBytes = testJsonEncoder(testType, entityObj1); - assertNotNull("Unable to serialize using jsonEncoder", jsonBytes); + assertNotNull(jsonBytes, "Unable to serialize using jsonEncoder"); GenericRecord jsonRecord = testJsonDecoder(testType, jsonBytes, entityObj1); - assertEquals("JSON decoder output not same as Binary Decoder", record.get("map1"), jsonRecord.get("map1")); - assertEquals("JSON decoder output not same as Binary Decoder", record.get("map2"), jsonRecord.get("map2")); + assertEquals(record.get("map1"), jsonRecord.get("map1"), "JSON decoder output not same as Binary Decoder"); + assertEquals(record.get("map2"), jsonRecord.get("map2"), "JSON decoder output not same as Binary Decoder"); } /** @@ -223,7 +220,7 @@ public byte[] testSerialization(String testType, T... entityObjs) throws Exc ReflectData rdata = ReflectData.AllowNull.get(); Schema schema = rdata.getSchema(entityObj1.getClass()); - assertNotNull("Unable to get schema for " + testType, schema); + assertNotNull(schema, "Unable to get schema for " + testType); log(schema.toString(true)); ReflectDatumWriter datumWriter = new ReflectDatumWriter(entityObj1.getClass(), rdata); @@ -252,7 +249,7 @@ private List testGenericDatumRead(String testType, byte[] byt try (DataFileReader fileReader = new DataFileReader<>(avroInputStream, datumReader)) { Schema schema = fileReader.getSchema(); - assertNotNull("Unable to get schema for " + testType, schema); + assertNotNull(schema, "Unable to get schema for " + testType); GenericRecord record = null; while (fileReader.hasNext()) { try { diff --git a/lang/java/avro/src/test/java/org/apache/avro/reflect/TestReflect.java b/lang/java/avro/src/test/java/org/apache/avro/reflect/TestReflect.java index e3065d59b87..50121b5a0dd 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/reflect/TestReflect.java +++ b/lang/java/avro/src/test/java/org/apache/avro/reflect/TestReflect.java @@ -20,9 +20,7 @@ import static java.nio.charset.StandardCharsets.UTF_8; import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.is; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.*; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; @@ -35,10 +33,12 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.Random; import org.apache.avro.AvroRuntimeException; import org.apache.avro.AvroTypeException; import org.apache.avro.JsonProperties; +import org.apache.avro.NameValidator; import org.apache.avro.Protocol; import org.apache.avro.Schema; import org.apache.avro.Schema.Field; @@ -50,7 +50,10 @@ import org.apache.avro.io.EncoderFactory; import org.apache.avro.reflect.TestReflect.SampleRecord.AnotherSampleRecord; import org.apache.avro.util.Utf8; -import org.junit.Test; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.condition.DisabledIfEnvironmentVariable; +import org.junit.jupiter.api.condition.EnabledForJreRange; +import org.junit.jupiter.api.condition.JRE; public class TestReflect { @@ -58,86 +61,86 @@ public class TestReflect { // test primitive type inference @Test - public void testVoid() { + void testVoid() { check(Void.TYPE, "\"null\""); check(Void.class, "\"null\""); } @Test - public void testBoolean() { + void testBoolean() { check(Boolean.TYPE, "\"boolean\""); check(Boolean.class, "\"boolean\""); } @Test - public void testInt() { + void testInt() { check(Integer.TYPE, "\"int\""); check(Integer.class, "\"int\""); } @Test - public void testByte() { + void testByte() { check(Byte.TYPE, "{\"type\":\"int\",\"java-class\":\"java.lang.Byte\"}"); check(Byte.class, "{\"type\":\"int\",\"java-class\":\"java.lang.Byte\"}"); } @Test - public void testShort() { + void testShort() { check(Short.TYPE, "{\"type\":\"int\",\"java-class\":\"java.lang.Short\"}"); check(Short.class, "{\"type\":\"int\",\"java-class\":\"java.lang.Short\"}"); } @Test - public void testChar() { + void testChar() { check(Character.TYPE, "{\"type\":\"int\",\"java-class\":\"java.lang.Character\"}"); check(Character.class, "{\"type\":\"int\",\"java-class\":\"java.lang.Character\"}"); } @Test - public void testLong() { + void testLong() { check(Long.TYPE, "\"long\""); check(Long.class, "\"long\""); } @Test - public void testFloat() { + void testFloat() { check(Float.TYPE, "\"float\""); check(Float.class, "\"float\""); } @Test - public void testDouble() { + void testDouble() { check(Double.TYPE, "\"double\""); check(Double.class, "\"double\""); } @Test - public void testString() { + void string() { check("Foo", "\"string\""); } @Test - public void testBytes() { + void bytes() { check(ByteBuffer.allocate(0), "\"bytes\""); check(new byte[0], "{\"type\":\"bytes\",\"java-class\":\"[B\"}"); } @Test - public void testUnionWithCollection() { + void unionWithCollection() { Schema s = new Schema.Parser().parse("[\"null\", {\"type\":\"array\",\"items\":\"float\"}]"); GenericData data = ReflectData.get(); assertEquals(1, data.resolveUnion(s, new ArrayList())); } @Test - public void testUnionWithMap() { + void unionWithMap() { Schema s = new Schema.Parser().parse("[\"null\", {\"type\":\"map\",\"values\":\"float\"}]"); GenericData data = ReflectData.get(); assertEquals(1, data.resolveUnion(s, new HashMap())); } @Test - public void testUnionWithMapWithUtf8Keys() { + void unionWithMapWithUtf8Keys() { Schema s = new Schema.Parser().parse("[\"null\", {\"type\":\"map\",\"values\":\"float\"}]"); GenericData data = ReflectData.get(); HashMap map = new HashMap<>(); @@ -146,7 +149,7 @@ public void testUnionWithMapWithUtf8Keys() { } @Test - public void testUnionWithFixed() { + void unionWithFixed() { Schema s = new Schema.Parser().parse("[\"null\", {\"type\":\"fixed\",\"name\":\"f\",\"size\":1}]"); Schema f = new Schema.Parser().parse("{\"type\":\"fixed\",\"name\":\"f\",\"size\":1}"); GenericData data = ReflectData.get(); @@ -154,7 +157,7 @@ public void testUnionWithFixed() { } @Test - public void testUnionWithEnum() { + void unionWithEnum() { Schema s = new Schema.Parser().parse("[\"null\", {\"type\":\"enum\",\"name\":\"E\",\"namespace\":" + "\"org.apache.avro.reflect.TestReflect\",\"symbols\":[\"A\",\"B\"]}]"); GenericData data = ReflectData.get(); @@ -162,7 +165,7 @@ public void testUnionWithEnum() { } @Test - public void testUnionWithBytes() { + void unionWithBytes() { Schema s = new Schema.Parser().parse("[\"null\", \"bytes\"]"); GenericData data = ReflectData.get(); assertEquals(1, data.resolveUnion(s, ByteBuffer.wrap(new byte[] { 1 }))); @@ -190,24 +193,24 @@ public boolean equals(Object o) { } @Test - public void testMap() throws Exception { + void map() throws Exception { check(R1.class.getDeclaredField("mapField").getGenericType(), "{\"type\":\"map\",\"values\":\"string\"}"); } @Test - public void testArray() throws Exception { + void array() throws Exception { check(R1.class.getDeclaredField("arrayField").getGenericType(), "{\"type\":\"array\",\"items\":\"string\",\"java-class\":\"[Ljava.lang.String;\"}"); } @Test - public void testList() throws Exception { + void list() throws Exception { check(R1.class.getDeclaredField("listField").getGenericType(), "{\"type\":\"array\",\"items\":\"string\"" + ",\"java-class\":\"java.util.List\"}"); } @Test - public void testR1() throws Exception { + void r1() throws Exception { checkReadWrite(new R1()); } @@ -226,7 +229,7 @@ public boolean equals(Object o) { } @Test - public void testR2() throws Exception { + void r2() throws Exception { R2 r2 = new R2(); r2.arrayField = new String[] { "foo" }; r2.collectionField = new ArrayList<>(); @@ -248,7 +251,7 @@ public boolean equals(Object o) { } @Test - public void testR3() throws Exception { + void r3() throws Exception { R3 r3 = new R3(); r3.intArray = new int[] { 1 }; checkReadWrite(r3); @@ -275,7 +278,7 @@ public static class R5 extends R4 { } @Test - public void testR5() throws Exception { + void r5() throws Exception { R5 r5 = new R5(); r5.value = 1; r5.shorts = new short[] { 3, 255, 256, Short.MAX_VALUE, Short.MIN_VALUE }; @@ -324,7 +327,7 @@ public boolean equals(Object o) { } @Test - public void testR6() throws Exception { + void r6() throws Exception { R7 r7 = new R7(); r7.value = 1; checkReadWrite(r7, ReflectData.get().getSchema(R6.class)); @@ -352,7 +355,7 @@ public boolean equals(Object o) { } @Test - public void testR6_1() throws Exception { + void r6_1() throws Exception { R7 r7 = new R7(); r7.value = 1; checkReadWrite(r7, ReflectData.get().getSchema(R6.class)); @@ -375,7 +378,8 @@ public static interface P0 { } @Test - public void testP0() throws Exception { + @DisabledIfEnvironmentVariable(named = "WithinInvokerPlugin", matches = "true", disabledReason = "Doesn't work, no clue why") + void p0() throws Exception { Protocol p0 = ReflectData.get().getProtocol(P0.class); Protocol.Message message = p0.getMessages().get("foo"); // check response schema is union @@ -386,7 +390,11 @@ public void testP0() throws Exception { // check request schema is union Schema request = message.getRequest(); Field field = request.getField("s"); - assertNotNull("field 's' should not be null", field); + // FIXME: Figure out why this test fails under the invoker plugin and succeeds + // while normal testing + // [ERROR] TestReflect.p0:393 field 's' should not be null ==> expected: not + // + assertNotNull(field, "field 's' should not be null"); Schema param = field.schema(); assertEquals(Schema.Type.UNION, param.getType()); assertEquals(Schema.Type.NULL, param.getTypes().get(0).getType()); @@ -419,7 +427,7 @@ public boolean equals(Object o) { } @Test - public void testR10() throws Exception { + void r10() throws Exception { Schema r10Schema = ReflectData.get().getSchema(R10.class); assertEquals(Schema.Type.STRING, r10Schema.getType()); assertEquals(R10.class.getName(), r10Schema.getProp("java-class")); @@ -443,7 +451,7 @@ public boolean equals(Object o) { } @Test - public void testR11() throws Exception { + void r11() throws Exception { Schema r11Record = ReflectData.get().getSchema(R11.class); assertEquals(Schema.Type.RECORD, r11Record.getType()); Field r11Field = r11Record.getField("text"); @@ -466,7 +474,8 @@ public static interface P1 { } @Test - public void testP1() throws Exception { + @DisabledIfEnvironmentVariable(named = "WithinInvokerPlugin", matches = "true", disabledReason = "Doesn't work, no clue why") + void p1() throws Exception { Protocol p1 = ReflectData.get().getProtocol(P1.class); Protocol.Message message = p1.getMessages().get("foo"); // check response schema is union @@ -477,7 +486,11 @@ public void testP1() throws Exception { // check request schema is union Schema request = message.getRequest(); Field field = request.getField("s"); - assertNotNull("field 's' should not be null", field); + // FIXME: Figure out why this test fails under the invoker plugin and succeeds + // while normal testing + // [ERROR] TestReflect.p1:484 field 's' should not be null ==> expected: not + // + assertNotNull(field, "field 's' should not be null"); Schema param = field.schema(); assertEquals(Schema.Type.UNION, param.getType()); assertEquals(Schema.Type.NULL, param.getTypes().get(0).getType()); @@ -497,7 +510,7 @@ public static class R12 { // fields } @Test - public void testR12() throws Exception { + void r12() throws Exception { Schema s = ReflectData.get().getSchema(R12.class); assertEquals(Schema.Type.INT, s.getField("x").schema().getType()); assertEquals(new Schema.Parser().parse("{\"type\":\"array\",\"items\":[\"null\",\"string\"]}"), @@ -505,11 +518,11 @@ public void testR12() throws Exception { } @AvroSchema("\"null\"") // record - public class R13 { + public static class R13 { } @Test - public void testR13() throws Exception { + void r13() throws Exception { Schema s = ReflectData.get().getSchema(R13.class); assertEquals(Schema.Type.NULL, s.getType()); } @@ -520,7 +533,11 @@ public interface P4 { } @Test - public void testP4() throws Exception { + // FIXME: Figure out why this test fails under the invoker plugin and succeeds + // while normal testing + // [ERROR] TestReflect.p4:532 NullPointer + @DisabledIfEnvironmentVariable(named = "WithinInvokerPlugin", matches = "true", disabledReason = "Doesn't work, no clue why") + void p4() throws Exception { Protocol p = ReflectData.get().getProtocol(P4.class); Protocol.Message message = p.getMessages().get("foo"); assertEquals(Schema.Type.INT, message.getResponse().getType()); @@ -537,13 +554,41 @@ public static interface P2 { void error() throws E1; } + private static class NullableDefaultTest { + @Nullable + @AvroDefault("1") + int foo; + } + + @Test + public void testAvroNullableDefault() { + check(NullableDefaultTest.class, + "{\"type\":\"record\",\"name\":\"NullableDefaultTest\"," + + "\"namespace\":\"org.apache.avro.reflect.TestReflect\",\"fields\":[" + + "{\"name\":\"foo\",\"type\":[\"null\",\"int\"],\"default\":1}]}"); + } + + private static class UnionDefaultTest { + @Union({ Integer.class, String.class }) + @AvroDefault("1") + Object foo; + } + @Test - public void testP2() throws Exception { + public void testAvroUnionDefault() { + check(UnionDefaultTest.class, + "{\"type\":\"record\",\"name\":\"UnionDefaultTest\"," + + "\"namespace\":\"org.apache.avro.reflect.TestReflect\",\"fields\":[" + + "{\"name\":\"foo\",\"type\":[\"int\",\"string\"],\"default\":1}]}"); + } + + @Test + void p2() throws Exception { Schema e1 = ReflectData.get().getSchema(E1.class); assertEquals(Schema.Type.RECORD, e1.getType()); assertTrue(e1.isError()); Field message = e1.getField("detailMessage"); - assertNotNull("field 'detailMessage' should not be null", message); + assertNotNull(message, "field 'detailMessage' should not be null"); Schema messageSchema = message.schema(); assertEquals(Schema.Type.UNION, messageSchema.getType()); assertEquals(Schema.Type.NULL, messageSchema.getTypes().get(0).getType()); @@ -559,7 +604,7 @@ public void testP2() throws Exception { } @Test - public void testNoPackage() throws Exception { + void noPackage() throws Exception { Class noPackage = Class.forName("NoPackage"); Schema s = ReflectData.get().getSchema(noPackage); assertEquals(noPackage.getName(), ReflectData.getClassName(s)); @@ -584,16 +629,16 @@ void checkReadWrite(Object object, Schema s) throws Exception { Object val = ReflectData.get().getField(object, f.name(), f.pos()); ReflectData.get().setField(copy, f.name(), f.pos(), val); } - assertEquals("setField", object, copy); + assertEquals(object, copy, "setField"); } } public static enum E { A, B - }; + } @Test - public void testEnum() throws Exception { + void testEnum() throws Exception { check(E.class, "{\"type\":\"enum\",\"name\":\"E\",\"namespace\":" + "\"org.apache.avro.reflect.TestReflect\",\"symbols\":[\"A\",\"B\"]}"); } @@ -604,7 +649,7 @@ public static class R { } @Test - public void testRecord() throws Exception { + void record() throws Exception { check(R.class, "{\"type\":\"record\",\"name\":\"R\",\"namespace\":" + "\"org.apache.avro.reflect.TestReflect\",\"fields\":[" + "{\"name\":\"a\",\"type\":\"int\"}," + "{\"name\":\"b\",\"type\":\"long\"}]}"); @@ -616,7 +661,7 @@ public static class RAvroIgnore { } @Test - public void testAnnotationAvroIgnore() throws Exception { + void annotationAvroIgnore() throws Exception { check(RAvroIgnore.class, "{\"type\":\"record\",\"name\":\"RAvroIgnore\",\"namespace\":" + "\"org.apache.avro.reflect.TestReflect\",\"fields\":[]}"); } @@ -628,7 +673,7 @@ public static class RAvroMeta { } @Test - public void testAnnotationAvroMeta() throws Exception { + void annotationAvroMeta() throws Exception { check(RAvroMeta.class, "{\"type\":\"record\",\"name\":\"RAvroMeta\",\"namespace\":" + "\"org.apache.avro.reflect.TestReflect\",\"fields\":[" + "{\"name\":\"a\",\"type\":\"int\",\"K\":\"V\"}]" @@ -644,7 +689,7 @@ public static class RAvroMultiMeta { } @Test - public void testAnnotationMultiAvroMeta() { + void annotationMultiAvroMeta() { check(RAvroMultiMeta.class, "{\"type\":\"record\",\"name\":\"RAvroMultiMeta\",\"namespace\":" + "\"org.apache.avro.reflect.TestReflect\",\"fields\":[" @@ -657,9 +702,11 @@ public static class RAvroDuplicateFieldMeta { int a; } - @Test(expected = AvroTypeException.class) - public void testAnnotationDuplicateFieldAvroMeta() { - ReflectData.get().getSchema(RAvroDuplicateFieldMeta.class); + @Test + void annotationDuplicateFieldAvroMeta() { + assertThrows(AvroTypeException.class, () -> { + ReflectData.get().getSchema(RAvroDuplicateFieldMeta.class); + }); } @AvroMeta(key = "K", value = "V") @@ -668,9 +715,11 @@ public static class RAvroDuplicateTypeMeta { int a; } - @Test(expected = AvroTypeException.class) - public void testAnnotationDuplicateTypeAvroMeta() { - ReflectData.get().getSchema(RAvroDuplicateTypeMeta.class); + @Test + void annotationDuplicateTypeAvroMeta() { + assertThrows(AvroTypeException.class, () -> { + ReflectData.get().getSchema(RAvroDuplicateTypeMeta.class); + }); } public static class RAvroName { @@ -679,7 +728,7 @@ public static class RAvroName { } @Test - public void testAnnotationAvroName() throws Exception { + void annotationAvroName() throws Exception { check(RAvroName.class, "{\"type\":\"record\",\"name\":\"RAvroName\",\"namespace\":" + "\"org.apache.avro.reflect.TestReflect\",\"fields\":[" + "{\"name\":\"b\",\"type\":\"int\"}]}"); } @@ -690,12 +739,14 @@ public static class RAvroNameCollide { int b; } - @Test(expected = Exception.class) - public void testAnnotationAvroNameCollide() throws Exception { - check(RAvroNameCollide.class, - "{\"type\":\"record\",\"name\":\"RAvroNameCollide\",\"namespace\":" - + "\"org.apache.avro.reflect.TestReflect\",\"fields\":[" + "{\"name\":\"b\",\"type\":\"int\"}," - + "{\"name\":\"b\",\"type\":\"int\"}]}"); + @Test + void annotationAvroNameCollide() throws Exception { + assertThrows(Exception.class, () -> { + check(RAvroNameCollide.class, + "{\"type\":\"record\",\"name\":\"RAvroNameCollide\",\"namespace\":" + + "\"org.apache.avro.reflect.TestReflect\",\"fields\":[" + "{\"name\":\"b\",\"type\":\"int\"}," + + "{\"name\":\"b\",\"type\":\"int\"}]}"); + }); } public static class RAvroStringableField { @@ -704,7 +755,7 @@ public static class RAvroStringableField { } @Test - public void testAnnotationAvroStringableFields() throws Exception { + void annotationAvroStringableFields() throws Exception { check(RAvroStringableField.class, "{\"type\":\"record\",\"name\":\"RAvroStringableField\",\"namespace\":" + "\"org.apache.avro.reflect.TestReflect\",\"fields\":[" + "{\"name\":\"a\",\"type\":\"string\"}]}"); } @@ -718,7 +769,7 @@ private void check(java.lang.reflect.Type type, String schemaJson) { } @Test - public void testRecordIO() throws IOException { + void recordIO() throws IOException { Schema schm = ReflectData.get().getSchema(SampleRecord.class); ReflectDatumWriter writer = new ReflectDatumWriter<>(schm); ByteArrayOutputStream out = new ByteArrayOutputStream(); @@ -792,7 +843,7 @@ public static class multipleAnnotationRecord { } @Test - public void testMultipleAnnotations() throws IOException { + void multipleAnnotations() throws IOException { Schema schm = ReflectData.get().getSchema(multipleAnnotationRecord.class); ReflectDatumWriter writer = new ReflectDatumWriter<>(schm); ByteArrayOutputStream out = new ByteArrayOutputStream(); @@ -813,21 +864,21 @@ public void testMultipleAnnotations() throws IOException { ReflectDatumReader reader = new ReflectDatumReader<>(schm); multipleAnnotationRecord decoded = reader.read(new multipleAnnotationRecord(), DecoderFactory.get().binaryDecoder(out.toByteArray(), null)); - assertTrue(decoded.i1 == null); - assertTrue(decoded.i2 == null); - assertTrue(decoded.i3 == null); - assertTrue(decoded.i4 == null); - assertTrue(decoded.i5 == 5); - assertTrue(decoded.i6 == 6); - assertTrue(decoded.i7.getTime() == 7); - assertTrue(decoded.i8 == 8); - assertTrue(decoded.i9.getTime() == 9); - assertTrue(decoded.i10.getTime() == 10); - assertTrue(decoded.i11.getTime() == 11); + assertNull(decoded.i1); + assertNull(decoded.i2); + assertNull(decoded.i3); + assertNull(decoded.i4); + assertEquals(decoded.i5, 5); + assertEquals(decoded.i6, 6); + assertEquals(decoded.i7.getTime(), 7); + assertEquals(decoded.i8, 8); + assertEquals(decoded.i9.getTime(), 9); + assertEquals(decoded.i10.getTime(), 10); + assertEquals(decoded.i11.getTime(), 11); } @Test - public void testAvroEncodeInducing() throws IOException { + void avroEncodeInducing() throws IOException { Schema schm = ReflectData.get().getSchema(AvroEncRecord.class); assertEquals(schm.toString(), "{\"type\":\"record\",\"name\":\"AvroEncRecord\",\"namespace" @@ -836,7 +887,7 @@ public void testAvroEncodeInducing() throws IOException { } @Test - public void testAvroEncodeIO() throws IOException { + void avroEncodeIO() throws IOException { Schema schm = ReflectData.get().getSchema(AvroEncRecord.class); ReflectDatumWriter writer = new ReflectDatumWriter<>(schm); ByteArrayOutputStream out = new ByteArrayOutputStream(); @@ -850,7 +901,7 @@ public void testAvroEncodeIO() throws IOException { } @Test - public void testRecordWithNullIO() throws IOException { + void recordWithNullIO() throws IOException { ReflectData reflectData = ReflectData.AllowNull.get(); Schema schm = reflectData.getSchema(AnotherSampleRecord.class); ReflectDatumWriter writer = new ReflectDatumWriter<>(schm); @@ -871,27 +922,6 @@ public void testRecordWithNullIO() throws IOException { assertEquals(b, decoded); } - @Test - public void testDisableUnsafe() throws Exception { - String saved = System.getProperty("avro.disable.unsafe"); - try { - System.setProperty("avro.disable.unsafe", "true"); - ReflectData.ACCESSOR_CACHE.remove(multipleAnnotationRecord.class); - ReflectData.ACCESSOR_CACHE.remove(AnotherSampleRecord.class); - ReflectionUtil.resetFieldAccess(); - testMultipleAnnotations(); - testRecordWithNullIO(); - } finally { - if (saved == null) - System.clearProperty("avro.disable.unsafe"); - else - System.setProperty("avro.disable.unsafe", saved); - ReflectData.ACCESSOR_CACHE.remove(multipleAnnotationRecord.class); - ReflectData.ACCESSOR_CACHE.remove(AnotherSampleRecord.class); - ReflectionUtil.resetFieldAccess(); - } - } - public static class SampleRecord { public int x = 1; private int y = 2; @@ -969,10 +999,11 @@ public static interface C { } @Test - public void testForwardReference() { + void forwardReference() { ReflectData data = ReflectData.get(); Protocol reflected = data.getProtocol(C.class); - Protocol reparsed = Protocol.parse(reflected.toString()); + String ref = reflected.toString(); + Protocol reparsed = Protocol.parse(ref); assertEquals(reflected, reparsed); assert (reparsed.getTypes().contains(data.getSchema(A.class))); assert (reparsed.getTypes().contains(data.getSchema(B1.class))); @@ -986,18 +1017,20 @@ public static interface P3 { void m1(int x); } - @Test(expected = AvroTypeException.class) - public void testOverloadedMethod() { - ReflectData.get().getProtocol(P3.class); + @Test + void overloadedMethod() { + assertThrows(AvroTypeException.class, () -> { + ReflectData.get().getProtocol(P3.class); + }); } @Test - public void testNoPackageSchema() throws Exception { + void noPackageSchema() throws Exception { ReflectData.get().getSchema(Class.forName("NoPackage")); } @Test - public void testNoPackageProtocol() throws Exception { + void noPackageProtocol() throws Exception { ReflectData.get().getProtocol(Class.forName("NoPackage")); } @@ -1005,9 +1038,9 @@ public static class Y { int i; } - @Test /** Test nesting of reflect data within generic. */ - public void testReflectWithinGeneric() throws Exception { + @Test + void reflectWithinGeneric() throws Exception { ReflectData data = ReflectData.get(); // define a record with a field that's a specific Y Schema schema = Schema.createRecord("Foo", "", "x.y.z", false); @@ -1026,12 +1059,12 @@ public void testReflectWithinGeneric() throws Exception { } @Test - public void testPrimitiveArray() throws Exception { + void primitiveArray() throws Exception { testPrimitiveArrays(false); } @Test - public void testPrimitiveArrayBlocking() throws Exception { + void primitiveArrayBlocking() throws Exception { testPrimitiveArrays(true); } @@ -1080,7 +1113,7 @@ private Object randomFor(Class c, Random r) { /** Test union of null and an array. */ @Test - public void testNullArray() throws Exception { + void nullArray() throws Exception { String json = "[{\"type\":\"array\", \"items\": \"long\"}, \"null\"]"; Schema schema = new Schema.Parser().parse(json); checkBinary(schema, null); @@ -1088,7 +1121,7 @@ public void testNullArray() throws Exception { /** Test stringable classes. */ @Test - public void testStringables() throws Exception { + void stringables() throws Exception { checkStringable(java.math.BigDecimal.class, "10"); checkStringable(java.math.BigInteger.class, "20"); checkStringable(java.net.URI.class, "foo://bar:9000/baz"); @@ -1113,7 +1146,7 @@ public static class M1 { /** Test Map with stringable key classes. */ @Test - public void testStringableMapKeys() throws Exception { + void stringableMapKeys() throws Exception { M1 record = new M1(); record.integerKeyMap = new HashMap<>(1); record.integerKeyMap.put(10, "foo"); @@ -1137,7 +1170,7 @@ public static class NullableStringable { } @Test - public void testNullableStringableField() throws Exception { + void nullableStringableField() throws Exception { NullableStringable datum = new NullableStringable(); datum.number = java.math.BigDecimal.TEN; @@ -1174,7 +1207,24 @@ public static void checkBinary(Schema schema, Object datum) throws IOException { /** Test that the error message contains the name of the class. */ @Test - public void testReflectFieldError() throws Exception { + @EnabledForJreRange(min = JRE.JAVA_8, max = JRE.JAVA_11, disabledReason = "Java 11 announced: All illegal access operations will be denied in a future release") + // Java 11: + // - WARNING: An illegal reflective access operation has occurred + // - WARNING: Illegal reflective access by + // org.apache.avro.reflect.FieldAccessReflect$ReflectionBasedAccessor to field + // java.lang.String.coder + // - WARNING: Please consider reporting this to the maintainers of + // org.apache.avro.reflect.FieldAccessReflect$ReflectionBasedAccessor + // - WARNING: Use --illegal-access=warn to enable warnings of further illegal + // reflective access operations + // - WARNING: All illegal access operations will be denied in a future release + // Java 17: + // - [ERROR] org.apache.avro.reflect.TestReflect.reflectFieldError -- Time + // elapsed: 0.015 s <<< ERROR! + // - java.lang.reflect.InaccessibleObjectException: Unable to make field private + // final byte java.lang.String.coder accessible: module java.base does not + // "opens java.lang" to unnamed module @5a6d67c3 + void reflectFieldError() throws Exception { Object datum = ""; try { ReflectData.get().getField(datum, "notAFieldOfString", 0); @@ -1196,7 +1246,7 @@ private static class AliasC { } @Test - public void testAvroAliasOnClass() { + void avroAliasOnClass() { check(AliasA.class, "{\"type\":\"record\",\"name\":\"AliasA\",\"namespace\":\"org.apache.avro.reflect.TestReflect\",\"fields\":[],\"aliases\":[\"b.a\"]}"); check(AliasB.class, @@ -1212,7 +1262,7 @@ private static class MultipleAliasRecord { } @Test - public void testMultipleAliasAnnotationsOnClass() { + void multipleAliasAnnotationsOnClass() { check(MultipleAliasRecord.class, "{\"type\":\"record\",\"name\":\"MultipleAliasRecord\",\"namespace\":\"org.apache.avro.reflect.TestReflect\",\"fields\":[],\"aliases\":[\"space1.alias1\",\"space2.alias2\"]}"); @@ -1222,19 +1272,19 @@ private static class Z { } @Test - public void testDollarTerminatedNamespaceCompatibility() { + void dollarTerminatedNamespaceCompatibility() { ReflectData data = ReflectData.get(); - Schema s = new Schema.Parser().setValidate(false).parse( + Schema s = new Schema.Parser(NameValidator.NO_VALIDATION).parse( "{\"type\":\"record\",\"name\":\"Z\",\"namespace\":\"org.apache.avro.reflect.TestReflect$\",\"fields\":[]}"); assertEquals(data.getSchema(data.getClass(s)).toString(), "{\"type\":\"record\",\"name\":\"Z\",\"namespace\":\"org.apache.avro.reflect.TestReflect\",\"fields\":[]}"); } @Test - public void testDollarTerminatedNestedStaticClassNamespaceCompatibility() { + void dollarTerminatedNestedStaticClassNamespaceCompatibility() { ReflectData data = ReflectData.get(); // Older versions of Avro generated this namespace on nested records. - Schema s = new Schema.Parser().setValidate(false).parse( + Schema s = new Schema.Parser(NameValidator.NO_VALIDATION).parse( "{\"type\":\"record\",\"name\":\"AnotherSampleRecord\",\"namespace\":\"org.apache.avro.reflect.TestReflect$SampleRecord\",\"fields\":[]}"); assertThat(data.getSchema(data.getClass(s)).getFullName(), is("org.apache.avro.reflect.TestReflect.SampleRecord.AnotherSampleRecord")); @@ -1257,7 +1307,7 @@ private static class ClassWithAliasAndNamespaceOnField { } @Test - public void testAvroAliasOnField() { + void avroAliasOnField() { Schema expectedSchema = SchemaBuilder.record(ClassWithAliasOnField.class.getSimpleName()) .namespace("org.apache.avro.reflect.TestReflect").fields().name("primitiveField").aliases("aliasName") @@ -1266,19 +1316,35 @@ public void testAvroAliasOnField() { check(ClassWithAliasOnField.class, expectedSchema.toString()); } - @Test(expected = AvroRuntimeException.class) - public void namespaceDefinitionOnFieldAliasMustThrowException() { - ReflectData.get().getSchema(ClassWithAliasAndNamespaceOnField.class); + @Test + void namespaceDefinitionOnFieldAliasMustThrowException() { + assertThrows(AvroRuntimeException.class, () -> { + ReflectData.get().getSchema(ClassWithAliasAndNamespaceOnField.class); + }); } @Test public void testMultipleFieldAliases() { + Field field = new Field("primitiveField", Schema.create(Schema.Type.INT)); + field.addAlias("alias1"); + field.addAlias("alias2"); + Schema avroMultiMeta = Schema.createRecord("ClassWithMultipleAliasesOnField", null, + "org.apache.avro.reflect.TestReflect", false, Arrays.asList(field)); - Schema expectedSchema = SchemaBuilder.record(ClassWithMultipleAliasesOnField.class.getSimpleName()) - .namespace("org.apache.avro.reflect.TestReflect").fields().name("primitiveField").aliases("alias1", "alias2") - .type(Schema.create(org.apache.avro.Schema.Type.INT)).noDefault().endRecord(); + Schema schema = ReflectData.get().getSchema(ClassWithMultipleAliasesOnField.class); + assertEquals(avroMultiMeta, schema); + } - check(ClassWithMultipleAliasesOnField.class, expectedSchema.toString()); + private static class OptionalTest { + Optional foo; + } + + @Test + public void testOptional() { + check(OptionalTest.class, + "{\"type\":\"record\",\"name\":\"OptionalTest\"," + + "\"namespace\":\"org.apache.avro.reflect.TestReflect\",\"fields\":[" + + "{\"name\":\"foo\",\"type\":[\"null\",\"int\"],\"default\":null}]}"); } private static class DefaultTest { @@ -1287,7 +1353,7 @@ private static class DefaultTest { } @Test - public void testAvroDefault() { + void avroDefault() { check(DefaultTest.class, "{\"type\":\"record\",\"name\":\"DefaultTest\"," + "\"namespace\":\"org.apache.avro.reflect.TestReflect\",\"fields\":[" @@ -1312,12 +1378,12 @@ public boolean equals(Object obj) { } @Test - public void testNullableByteArrayNotNullValue() throws Exception { + void nullableByteArrayNotNullValue() throws Exception { checkReadWrite(new NullableBytesTest("foo".getBytes(UTF_8))); } @Test - public void testNullableByteArrayNullValue() throws Exception { + void nullableByteArrayNullValue() throws Exception { checkReadWrite(new NullableBytesTest()); } @@ -1338,7 +1404,7 @@ private static class DocTest { } @Test - public void testAvroDoc() { + void avroDoc() { check(DocTest.class, "{\"type\":\"record\",\"name\":\"DocTest\",\"namespace\":\"org.apache.avro.reflect.TestReflect\"," + "\"doc\":\"DocTest class docs\"," + "\"fields\":[" diff --git a/lang/java/avro/src/test/java/org/apache/avro/reflect/TestReflectAllowNulls.java b/lang/java/avro/src/test/java/org/apache/avro/reflect/TestReflectAllowNulls.java index acbd4fb96b1..5c138857739 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/reflect/TestReflectAllowNulls.java +++ b/lang/java/avro/src/test/java/org/apache/avro/reflect/TestReflectAllowNulls.java @@ -17,11 +17,12 @@ */ package org.apache.avro.reflect; +import static org.junit.jupiter.api.Assertions.assertEquals; + import java.util.Arrays; import org.apache.avro.Schema; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.Test; public class TestReflectAllowNulls { @@ -66,56 +67,56 @@ private static class AllowNullWithNullable { } @Test - public void testPrimitives() { + void primitives() { // AllowNull only makes fields nullable, so testing must use a base record Schema primitives = ReflectData.AllowNull.get().getSchema(Primitives.class); - Assert.assertEquals(requiredSchema(boolean.class), primitives.getField("aBoolean").schema()); - Assert.assertEquals(requiredSchema(byte.class), primitives.getField("aByte").schema()); - Assert.assertEquals(requiredSchema(short.class), primitives.getField("aShort").schema()); - Assert.assertEquals(requiredSchema(int.class), primitives.getField("anInt").schema()); - Assert.assertEquals(requiredSchema(long.class), primitives.getField("aLong").schema()); - Assert.assertEquals(requiredSchema(float.class), primitives.getField("aFloat").schema()); - Assert.assertEquals(requiredSchema(double.class), primitives.getField("aDouble").schema()); + assertEquals(requiredSchema(boolean.class), primitives.getField("aBoolean").schema()); + assertEquals(requiredSchema(byte.class), primitives.getField("aByte").schema()); + assertEquals(requiredSchema(short.class), primitives.getField("aShort").schema()); + assertEquals(requiredSchema(int.class), primitives.getField("anInt").schema()); + assertEquals(requiredSchema(long.class), primitives.getField("aLong").schema()); + assertEquals(requiredSchema(float.class), primitives.getField("aFloat").schema()); + assertEquals(requiredSchema(double.class), primitives.getField("aDouble").schema()); } @Test - public void testWrappers() { + void wrappers() { // AllowNull only makes fields nullable, so testing must use a base record Schema wrappers = ReflectData.AllowNull.get().getSchema(Wrappers.class); - Assert.assertEquals(nullableSchema(boolean.class), wrappers.getField("aBoolean").schema()); - Assert.assertEquals(nullableSchema(byte.class), wrappers.getField("aByte").schema()); - Assert.assertEquals(nullableSchema(short.class), wrappers.getField("aShort").schema()); - Assert.assertEquals(nullableSchema(int.class), wrappers.getField("anInt").schema()); - Assert.assertEquals(nullableSchema(long.class), wrappers.getField("aLong").schema()); - Assert.assertEquals(nullableSchema(float.class), wrappers.getField("aFloat").schema()); - Assert.assertEquals(nullableSchema(double.class), wrappers.getField("aDouble").schema()); - Assert.assertEquals(nullableSchema(Primitives.class), wrappers.getField("anObject").schema()); + assertEquals(nullableSchema(boolean.class), wrappers.getField("aBoolean").schema()); + assertEquals(nullableSchema(byte.class), wrappers.getField("aByte").schema()); + assertEquals(nullableSchema(short.class), wrappers.getField("aShort").schema()); + assertEquals(nullableSchema(int.class), wrappers.getField("anInt").schema()); + assertEquals(nullableSchema(long.class), wrappers.getField("aLong").schema()); + assertEquals(nullableSchema(float.class), wrappers.getField("aFloat").schema()); + assertEquals(nullableSchema(double.class), wrappers.getField("aDouble").schema()); + assertEquals(nullableSchema(Primitives.class), wrappers.getField("anObject").schema()); } @Test - public void testAllowNullWithNullableAnnotation() { + void allowNullWithNullableAnnotation() { Schema withNullable = ReflectData.AllowNull.get().getSchema(AllowNullWithNullable.class); - Assert.assertEquals("Should produce a nullable double", nullableSchema(double.class), - withNullable.getField("aDouble").schema()); + assertEquals(nullableSchema(double.class), withNullable.getField("aDouble").schema(), + "Should produce a nullable double"); Schema nullableDoubleOrLong = Schema.createUnion(Arrays.asList(Schema.create(Schema.Type.NULL), Schema.create(Schema.Type.DOUBLE), Schema.create(Schema.Type.LONG))); - Assert.assertEquals("Should add null to a non-null union", nullableDoubleOrLong, - withNullable.getField("doubleOrLong").schema()); + assertEquals(nullableDoubleOrLong, withNullable.getField("doubleOrLong").schema(), + "Should add null to a non-null union"); - Assert.assertEquals("Should add null to a non-null union", nullableDoubleOrLong, - withNullable.getField("doubleOrLongOrNull1").schema()); + assertEquals(nullableDoubleOrLong, withNullable.getField("doubleOrLongOrNull1").schema(), + "Should add null to a non-null union"); Schema doubleOrLongOrNull = Schema.createUnion(Arrays.asList(Schema.create(Schema.Type.DOUBLE), Schema.create(Schema.Type.LONG), Schema.create(Schema.Type.NULL))); - Assert.assertEquals("Should add null to a non-null union", doubleOrLongOrNull, - withNullable.getField("doubleOrLongOrNull2").schema()); + assertEquals(doubleOrLongOrNull, withNullable.getField("doubleOrLongOrNull2").schema(), + "Should add null to a non-null union"); - Assert.assertEquals("Should add null to a non-null union", doubleOrLongOrNull, - withNullable.getField("doubleOrLongOrNull3").schema()); + assertEquals(doubleOrLongOrNull, withNullable.getField("doubleOrLongOrNull3").schema(), + "Should add null to a non-null union"); } private Schema requiredSchema(Class type) { diff --git a/lang/java/avro/src/test/java/org/apache/avro/reflect/TestReflectData.java b/lang/java/avro/src/test/java/org/apache/avro/reflect/TestReflectData.java index 59009883663..46bfd7b74a1 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/reflect/TestReflectData.java +++ b/lang/java/avro/src/test/java/org/apache/avro/reflect/TestReflectData.java @@ -22,20 +22,27 @@ import org.apache.avro.Protocol; import org.apache.avro.Schema; import org.apache.avro.util.internal.JacksonUtils; -import org.junit.Test; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.condition.EnabledForJreRange; +import org.junit.jupiter.api.condition.JRE; import java.util.Arrays; import java.util.Collections; import java.util.List; import java.util.Map; -import static org.hamcrest.Matchers.*; -import static org.junit.Assert.*; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.contains; +import static org.hamcrest.Matchers.containsInAnyOrder; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.lessThan; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; public class TestReflectData { @Test @SuppressWarnings("unchecked") - public void testWeakSchemaCaching() throws Exception { + void weakSchemaCaching() throws Exception { int numSchemas = 1000000; for (int i = 0; i < numSchemas; i++) { // Create schema @@ -54,7 +61,7 @@ public void testWeakSchemaCaching() throws Exception { } @Test - public void testGenericProtocol() { + void genericProtocol() { Protocol protocol = ReflectData.get().getProtocol(FooBarProtocol.class); Schema recordSchema = ReflectData.get().getSchema(FooBarReflectiveRecord.class); @@ -107,7 +114,7 @@ static class Meta { } @Test - public void testCreateSchemaDefaultValue() { + void createSchemaDefaultValue() { Meta meta = new Meta(); validateSchema(meta); @@ -127,7 +134,7 @@ private void validateSchema(Meta meta) { Map testCases = JacksonUtils.objectToMap(meta); for (Schema.Field field : cloneSchema.getFields()) { - assertEquals("Invalid field " + field.name(), field.defaultVal(), testCases.get(field.name())); + assertEquals(field.defaultVal(), testCases.get(field.name()), "Invalid field " + field.name()); } } @@ -135,13 +142,17 @@ public class Definition { public Map tokens; } - @Test(expected = AvroTypeException.class) - public void testNonStaticInnerClasses() { - ReflectData.get().getSchema(Definition.class); + @Test + // FIXME: Why does this test fail under JDK 21? + @EnabledForJreRange(min = JRE.JAVA_8, max = JRE.JAVA_17, disabledReason = "Doesn't work under JRE 21, no clue why") + void nonStaticInnerClasses() { + assertThrows(AvroTypeException.class, () -> { + ReflectData.get().getSchema(Definition.class); + }); } @Test - public void testStaticInnerClasses() { + void staticInnerClasses() { ReflectData.get().getSchema(Meta.class); } } diff --git a/lang/java/avro/src/test/java/org/apache/avro/reflect/TestReflectDatumReader.java b/lang/java/avro/src/test/java/org/apache/avro/reflect/TestReflectDatumReader.java index e431f8f5599..52b40b87b36 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/reflect/TestReflectDatumReader.java +++ b/lang/java/avro/src/test/java/org/apache/avro/reflect/TestReflectDatumReader.java @@ -18,18 +18,24 @@ package org.apache.avro.reflect; -import static org.junit.Assert.assertEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.util.Arrays; +import java.util.HashSet; +import java.util.HashMap; import java.util.List; +import java.util.Set; +import java.util.Map; +import java.util.Optional; +import org.apache.avro.Schema; import org.apache.avro.io.Decoder; import org.apache.avro.io.DecoderFactory; import org.apache.avro.io.Encoder; import org.apache.avro.io.EncoderFactory; -import org.junit.Test; +import org.junit.jupiter.api.Test; public class TestReflectDatumReader { @@ -44,7 +50,7 @@ private static byte[] serializeWithReflectDatumWriter(T toSerialize, Class relatedIds = new HashSet<>(); + relatedIds.add(1); + relatedIds.add(2); + relatedIds.add(3); + pojoWithSet.setRelatedIds(relatedIds); + + byte[] serializedBytes = serializeWithReflectDatumWriter(pojoWithSet, PojoWithSet.class); + + Decoder decoder = DecoderFactory.get().binaryDecoder(serializedBytes, null); + ReflectDatumReader reflectDatumReader = new ReflectDatumReader<>(PojoWithSet.class); + + PojoWithSet deserialized = new PojoWithSet(); + reflectDatumReader.read(deserialized, decoder); + + assertEquals(pojoWithSet, deserialized); + + } + + @Test + public void testRead_PojoWithMap() throws IOException { + PojoWithMap pojoWithMap = new PojoWithMap(); + pojoWithMap.setId(42); + Map relatedIds = new HashMap<>(); + relatedIds.put(1, 11); + relatedIds.put(2, 22); + relatedIds.put(3, 33); + pojoWithMap.setRelatedIds(relatedIds); + + byte[] serializedBytes = serializeWithReflectDatumWriter(pojoWithMap, PojoWithMap.class); + + Decoder decoder = DecoderFactory.get().binaryDecoder(serializedBytes, null); + ReflectDatumReader reflectDatumReader = new ReflectDatumReader<>(PojoWithMap.class); + + PojoWithMap deserialized = new PojoWithMap(); + reflectDatumReader.read(deserialized, decoder); + + assertEquals(pojoWithMap, deserialized); + } + + @Test + public void testRead_PojoWithOptional() throws IOException { + PojoWithOptional pojoWithOptional = new PojoWithOptional(); + pojoWithOptional.setId(42); + pojoWithOptional.setRelatedId(Optional.of(13)); + + byte[] serializedBytes = serializeWithReflectDatumWriter(pojoWithOptional, PojoWithOptional.class); + + Decoder decoder = DecoderFactory.get().binaryDecoder(serializedBytes, null); + ReflectDatumReader reflectDatumReader = new ReflectDatumReader<>(PojoWithOptional.class); + + PojoWithOptional deserialized = new PojoWithOptional(); + reflectDatumReader.read(deserialized, decoder); + + assertEquals(pojoWithOptional, deserialized); + } + + @Test + public void testRead_PojoWithEmptyOptional() throws IOException { + PojoWithOptional pojoWithOptional = new PojoWithOptional(); + pojoWithOptional.setId(42); + pojoWithOptional.setRelatedId(Optional.empty()); + + byte[] serializedBytes = serializeWithReflectDatumWriter(pojoWithOptional, PojoWithOptional.class); + + Decoder decoder = DecoderFactory.get().binaryDecoder(serializedBytes, null); + ReflectDatumReader reflectDatumReader = new ReflectDatumReader<>(PojoWithOptional.class); + + PojoWithOptional deserialized = new PojoWithOptional(); + reflectDatumReader.read(deserialized, decoder); + + assertEquals(pojoWithOptional, deserialized); + } + + @Test + public void testRead_PojoWithNullableAnnotation() throws IOException { + PojoWithBasicTypeNullableAnnotationV1 v1Pojo = new PojoWithBasicTypeNullableAnnotationV1(); + int idValue = 1; + v1Pojo.setId(idValue); + byte[] serializedBytes = serializeWithReflectDatumWriter(v1Pojo, PojoWithBasicTypeNullableAnnotationV1.class); + Decoder decoder = DecoderFactory.get().binaryDecoder(serializedBytes, null); + + ReflectData reflectData = ReflectData.get(); + Schema schemaV1 = reflectData.getSchema(PojoWithBasicTypeNullableAnnotationV1.class); + Schema schemaV2 = reflectData.getSchema(PojoWithBasicTypeNullableAnnotationV2.class); + + ReflectDatumReader reflectDatumReader = new ReflectDatumReader<>(schemaV1, + schemaV2); + + PojoWithBasicTypeNullableAnnotationV2 v2Pojo = new PojoWithBasicTypeNullableAnnotationV2(); + reflectDatumReader.read(v2Pojo, decoder); + + assertEquals(v1Pojo.id, v2Pojo.id); + assertEquals(v2Pojo.id, idValue); + assertEquals(v2Pojo.intId, FieldAccess.INT_DEFAULT_VALUE); + assertEquals(v2Pojo.floatId, FieldAccess.FLOAT_DEFAULT_VALUE); + assertEquals(v2Pojo.shortId, FieldAccess.SHORT_DEFAULT_VALUE); + assertEquals(v2Pojo.byteId, FieldAccess.BYTE_DEFAULT_VALUE); + assertEquals(v2Pojo.booleanId, FieldAccess.BOOLEAN_DEFAULT_VALUE); + assertEquals(v2Pojo.charId, FieldAccess.CHAR_DEFAULT_VALUE); + assertEquals(v2Pojo.longId, FieldAccess.LONG_DEFAULT_VALUE); + assertEquals(v2Pojo.doubleId, FieldAccess.DOUBLE_DEFAULT_VALUE); + } + public static class PojoWithList { private int id; private List relatedIds; @@ -167,6 +280,325 @@ public boolean equals(Object obj) { return false; return Arrays.equals(relatedIds, other.relatedIds); } + } + public static class PojoWithSet { + private int id; + private Set relatedIds; + + public int getId() { + return id; + } + + public void setId(int id) { + this.id = id; + } + + public Set getRelatedIds() { + return relatedIds; + } + + public void setRelatedIds(Set relatedIds) { + this.relatedIds = relatedIds; + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + id; + result = prime * result + ((relatedIds == null) ? 0 : relatedIds.hashCode()); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + PojoWithSet other = (PojoWithSet) obj; + if (id != other.id) + return false; + if (relatedIds == null) { + return other.relatedIds == null; + } else + return relatedIds.equals(other.relatedIds); + } + } + + public static class PojoWithMap { + private int id; + private Map relatedIds; + + public int getId() { + return id; + } + + public void setId(int id) { + this.id = id; + } + + public Map getRelatedIds() { + return relatedIds; + } + + public void setRelatedIds(Map relatedIds) { + this.relatedIds = relatedIds; + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + id; + result = prime * result + ((relatedIds == null) ? 0 : relatedIds.hashCode()); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + PojoWithMap other = (PojoWithMap) obj; + if (id != other.id) + return false; + if (relatedIds == null) { + return other.relatedIds == null; + } else + return relatedIds.equals(other.relatedIds); + } + } + + public static class PojoWithOptional { + private int id; + + private Optional relatedId; + + public int getId() { + return id; + } + + public void setId(int id) { + this.id = id; + } + + public Optional getRelatedId() { + return relatedId; + } + + public void setRelatedId(Optional relatedId) { + this.relatedId = relatedId; + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + id; + result = prime * result + ((relatedId == null) ? 0 : relatedId.hashCode()); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + PojoWithOptional other = (PojoWithOptional) obj; + if (id != other.id) + return false; + if (relatedId == null) { + return other.relatedId == null; + } else + return relatedId.equals(other.relatedId); + } + } + + public static class PojoWithBasicTypeNullableAnnotationV1 { + + private int id; + + public int getId() { + return id; + } + + public void setId(int id) { + this.id = id; + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + id; + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + PojoWithBasicTypeNullableAnnotationV1 other = (PojoWithBasicTypeNullableAnnotationV1) obj; + return id == other.id; + } + } + + public static class PojoWithBasicTypeNullableAnnotationV2 { + + private int id; + + @Nullable + private int intId; + + @Nullable + private float floatId; + + @Nullable + private short shortId; + + @Nullable + private byte byteId; + + @Nullable + private boolean booleanId; + + @Nullable + private char charId; + + @Nullable + private long longId; + + @Nullable + private double doubleId; + + public int getId() { + return id; + } + + public void setId(int id) { + this.id = id; + } + + public int getIntId() { + return intId; + } + + public void setIntId(int intId) { + this.intId = intId; + } + + public float getFloatId() { + return floatId; + } + + public void setFloatId(float floatId) { + this.floatId = floatId; + } + + public short getShortId() { + return shortId; + } + + public void setShortId(short shortId) { + this.shortId = shortId; + } + + public byte getByteId() { + return byteId; + } + + public void setByteId(byte byteId) { + this.byteId = byteId; + } + + public boolean isBooleanId() { + return booleanId; + } + + public void setBooleanId(boolean booleanId) { + this.booleanId = booleanId; + } + + public char getCharId() { + return charId; + } + + public void setCharId(char charId) { + this.charId = charId; + } + + public long getLongId() { + return longId; + } + + public void setLongId(long longId) { + this.longId = longId; + } + + public double getDoubleId() { + return doubleId; + } + + public void setDoubleId(double doubleId) { + this.doubleId = doubleId; + } + + @Override + public int hashCode() { + final int prime = 31; + long temp; + int result = 1; + result = prime * result + id; + result = prime * result + intId; + result = prime * result + (floatId != 0.0f ? Float.floatToIntBits(floatId) : 0); + result = prime * result + (int) shortId; + result = prime * result + (int) byteId; + result = prime * result + (booleanId ? 1 : 0); + result = prime * result + (int) charId; + result = prime * result + (int) (longId ^ (longId >>> 32)); + temp = Double.doubleToLongBits(doubleId); + result = 31 * result + (int) (temp ^ (temp >>> 32)); + return result; + } + + @Override + public boolean equals(Object o) { + if (this == o) + return true; + if (o == null || getClass() != o.getClass()) + return false; + PojoWithBasicTypeNullableAnnotationV2 that = (PojoWithBasicTypeNullableAnnotationV2) o; + if (id != that.id) + return false; + if (intId != that.intId) + return false; + if (Float.compare(that.floatId, floatId) != 0) + return false; + if (shortId != that.shortId) + return false; + if (byteId != that.byteId) + return false; + if (booleanId != that.booleanId) + return false; + if (charId != that.charId) + return false; + if (longId != that.longId) + return false; + return Double.compare(that.doubleId, doubleId) == 0; + } } } diff --git a/lang/java/avro/src/test/java/org/apache/avro/reflect/TestReflectDatumWithAnonymousInstances.java b/lang/java/avro/src/test/java/org/apache/avro/reflect/TestReflectDatumWithAnonymousInstances.java new file mode 100644 index 00000000000..a076593a56f --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/reflect/TestReflectDatumWithAnonymousInstances.java @@ -0,0 +1,224 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro.reflect; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; + +import org.apache.avro.Schema; +import org.apache.avro.io.Decoder; +import org.apache.avro.io.DecoderFactory; +import org.apache.avro.io.Encoder; +import org.apache.avro.io.EncoderFactory; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; + +/** + * https://issues.apache.org/jira/browse/AVRO-1851 + */ +public class TestReflectDatumWithAnonymousInstances { + private static Pojo pojo; + + @BeforeAll + public static void init() { + // 1. Anonymous instance + pojo = new Pojo() { + { + // 2. Anonymous instance + Person person = new Person() { + { + setAddress("Address"); + } + }; + setPerson(person); + // 3. Anonymous instance + setTestEnum(TestEnum.V); + } + }; + } + + // Properly serializes and deserializes a POJO with an enum instance + // (TestEnum#V) + @Test + void handleProperlyEnumInstances() throws IOException { + byte[] output = serialize(pojo); + Pojo deserializedPojo = deserialize(output); + assertEquals(pojo, deserializedPojo); + assertTrue(deserializedPojo.getTestEnum().is_V()); + } + + private Pojo deserialize(byte[] input) throws IOException { + ByteArrayInputStream inputStream = new ByteArrayInputStream(input); + Decoder decoder = DecoderFactory.get().binaryDecoder(inputStream, null); + ReflectData reflectData = ReflectData.AllowNull.get(); + ReflectDatumReader reflectDatumReader = new ReflectDatumReader<>(reflectData); + Schema schema = reflectData.getSchema(Pojo.class); + reflectDatumReader.setSchema(schema); + return reflectDatumReader.read(null, decoder); + } + + private byte[] serialize(Pojo input) throws IOException { + // Reflect data that supports nulls + ReflectData reflectData = ReflectData.AllowNull.get(); + ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); + Encoder encoder = EncoderFactory.get().binaryEncoder(outputStream, null); + ReflectDatumWriter datumWriter = new ReflectDatumWriter<>(Pojo.class, reflectData); + datumWriter.write(input, encoder); + encoder.flush(); + return outputStream.toByteArray(); + } + + private static class Pojo { + private TestEnum testEnum; + private Person person; + + public TestEnum getTestEnum() { + return testEnum; + } + + public void setTestEnum(TestEnum testEnum) { + this.testEnum = testEnum; + } + + public Person getPerson() { + return person; + } + + public void setPerson(Person person) { + this.person = person; + } + + @Override + public boolean equals(Object o) { + if (this == o) + return true; + + if (o == null) + return false; + + Class thisClass = getClass(); + while (thisClass.isAnonymousClass()) { + thisClass = thisClass.getSuperclass(); + } + + Class oClass = o.getClass(); + while (oClass.isAnonymousClass()) { + oClass = oClass.getSuperclass(); + } + + if (thisClass != oClass) + return false; + + Pojo pojo = (Pojo) o; + + if (testEnum != pojo.testEnum) + return false; + return person != null ? person.equals(pojo.person) : pojo.person == null; + } + + @Override + public int hashCode() { + int result = testEnum != null ? testEnum.hashCode() : 0; + result = 31 * result + (person != null ? person.hashCode() : 0); + return result; + } + + @Override + public String toString() { + return "Pojo{" + "testEnum=" + testEnum + ", person=" + person + '}'; + } + } + + private static class Person { + private String name; + private String address; + + public String getName() { + return name; + } + + public void setName(String name) { + this.name = name; + } + + public String getAddress() { + return address; + } + + public void setAddress(String address) { + this.address = address; + } + + @Override + public boolean equals(Object o) { + if (this == o) + return true; + + if (o == null) + return false; + + Class thisClass = getClass(); + while (thisClass.isAnonymousClass()) { + thisClass = thisClass.getSuperclass(); + } + + Class oClass = o.getClass(); + while (oClass.isAnonymousClass()) { + oClass = oClass.getSuperclass(); + } + + if (thisClass != oClass) + return false; + + Person person = (Person) o; + + if (name != null ? !name.equals(person.name) : person.name != null) + return false; + return address != null ? address.equals(person.address) : person.address == null; + } + + @Override + public int hashCode() { + int result = name != null ? name.hashCode() : 0; + result = 31 * result + (address != null ? address.hashCode() : 0); + return result; + } + + @Override + public String toString() { + return "Person{" + "name='" + name + '\'' + ", address='" + address + '\'' + '}'; + } + } + + enum TestEnum { + V { + @Override + public boolean is_V() { + return true; + } + }; + + public boolean is_V() { + return false; + } + } +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/reflect/TestReflectLogicalTypes.java b/lang/java/avro/src/test/java/org/apache/avro/reflect/TestReflectLogicalTypes.java index c23a2f7369b..851ab95e3ea 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/reflect/TestReflectLogicalTypes.java +++ b/lang/java/avro/src/test/java/org/apache/avro/reflect/TestReflectLogicalTypes.java @@ -18,6 +18,8 @@ package org.apache.avro.reflect; +import static org.junit.jupiter.api.Assertions.*; + import java.io.File; import java.io.IOException; import java.math.BigDecimal; @@ -44,24 +46,22 @@ import org.apache.avro.io.DatumReader; import org.apache.avro.io.DatumWriter; import org.apache.avro.specific.SpecificData; -import org.junit.Assert; -import org.junit.Assume; -import org.junit.BeforeClass; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; +import org.junit.jupiter.api.Assumptions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; /** * Tests various logical types * string => UUID * fixed and bytes => Decimal * * record => Pair */ public class TestReflectLogicalTypes { - @Rule - public TemporaryFolder temp = new TemporaryFolder(); + @TempDir + public File temp; public static final ReflectData REFLECT = new ReflectData(); - @BeforeClass + @BeforeAll public static void addUUID() { REFLECT.addLogicalTypeConversion(new Conversions.UUIDConversion()); REFLECT.addLogicalTypeConversion(new Conversions.DecimalConversion()); @@ -69,7 +69,7 @@ public static void addUUID() { } @Test - public void testReflectedSchema() { + void reflectedSchema() { Schema expected = SchemaBuilder.record(RecordWithUUIDList.class.getName()).fields().name("uuids").type().array() .items().stringType().noDefault().endRecord(); expected.getField("uuids").schema().addProp(SpecificData.CLASS_PROP, List.class.getName()); @@ -77,7 +77,7 @@ public void testReflectedSchema() { Schema actual = REFLECT.getSchema(RecordWithUUIDList.class); - Assert.assertEquals("Should use the UUID logical type", expected, actual); + assertEquals(expected, actual, "Should use the UUID logical type"); } // this can be static because the schema only comes from reflection @@ -112,20 +112,20 @@ public int hashCode() { } @Test - public void testDecimalBytes() throws IOException { + void decimalBytes() throws IOException { Schema schema = REFLECT.getSchema(DecimalRecordBytes.class); - Assert.assertEquals("Should have the correct record name", "org.apache.avro.reflect.TestReflectLogicalTypes", - schema.getNamespace()); - Assert.assertEquals("Should have the correct record name", "DecimalRecordBytes", schema.getName()); - Assert.assertEquals("Should have the correct logical type", LogicalTypes.decimal(9, 2), - LogicalTypes.fromSchema(schema.getField("decimal").schema())); + assertEquals("org.apache.avro.reflect.TestReflectLogicalTypes", schema.getNamespace(), + "Should have the correct record name"); + assertEquals("DecimalRecordBytes", schema.getName(), "Should have the correct record name"); + assertEquals(LogicalTypes.decimal(9, 2), LogicalTypes.fromSchema(schema.getField("decimal").schema()), + "Should have the correct logical type"); DecimalRecordBytes record = new DecimalRecordBytes(); record.decimal = new BigDecimal("3.14"); File test = write(REFLECT, schema, record); - Assert.assertEquals("Should match the decimal after round trip", Collections.singletonList(record), - read(REFLECT.createDatumReader(schema), test)); + assertEquals(Collections.singletonList(record), read(REFLECT.createDatumReader(schema), test), + "Should match the decimal after round trip"); } // this can be static because the schema only comes from reflection @@ -160,20 +160,20 @@ public int hashCode() { } @Test - public void testDecimalFixed() throws IOException { + void decimalFixed() throws IOException { Schema schema = REFLECT.getSchema(DecimalRecordFixed.class); - Assert.assertEquals("Should have the correct record name", "org.apache.avro.reflect.TestReflectLogicalTypes", - schema.getNamespace()); - Assert.assertEquals("Should have the correct record name", "DecimalRecordFixed", schema.getName()); - Assert.assertEquals("Should have the correct logical type", LogicalTypes.decimal(9, 2), - LogicalTypes.fromSchema(schema.getField("decimal").schema())); + assertEquals("org.apache.avro.reflect.TestReflectLogicalTypes", schema.getNamespace(), + "Should have the correct record name"); + assertEquals("DecimalRecordFixed", schema.getName(), "Should have the correct record name"); + assertEquals(LogicalTypes.decimal(9, 2), LogicalTypes.fromSchema(schema.getField("decimal").schema()), + "Should have the correct logical type"); DecimalRecordFixed record = new DecimalRecordFixed(); record.decimal = new BigDecimal("3.14"); File test = write(REFLECT, schema, record); - Assert.assertEquals("Should match the decimal after round trip", Collections.singletonList(record), - read(REFLECT.createDatumReader(schema), test)); + assertEquals(Collections.singletonList(record), read(REFLECT.createDatumReader(schema), test), + "Should match the decimal after round trip"); } public static class Pair { @@ -230,7 +230,7 @@ public static class PairRecord { @Test @SuppressWarnings("unchecked") - public void testPairRecord() throws IOException { + void pairRecord() throws IOException { ReflectData model = new ReflectData(); model.addLogicalTypeConversion(new Conversion() { @Override @@ -258,11 +258,11 @@ public IndexedRecord toRecord(Pair value, Schema schema, LogicalType type) { }); LogicalTypes.register("pair", new LogicalTypes.LogicalTypeFactory() { - private final LogicalType PAIR = new LogicalType("pair"); + private final LogicalType pair = new LogicalType("pair"); @Override public LogicalType fromSchema(Schema schema) { - return PAIR; + return pair; } @Override @@ -272,11 +272,11 @@ public String getTypeName() { }); Schema schema = model.getSchema(PairRecord.class); - Assert.assertEquals("Should have the correct record name", "org.apache.avro.reflect.TestReflectLogicalTypes", - schema.getNamespace()); - Assert.assertEquals("Should have the correct record name", "PairRecord", schema.getName()); - Assert.assertEquals("Should have the correct logical type", "pair", - LogicalTypes.fromSchema(schema.getField("pair").schema()).getName()); + assertEquals("org.apache.avro.reflect.TestReflectLogicalTypes", schema.getNamespace(), + "Should have the correct record name"); + assertEquals("PairRecord", schema.getName(), "Should have the correct record name"); + assertEquals("pair", LogicalTypes.fromSchema(schema.getField("pair").schema()).getName(), + "Should have the correct logical type"); PairRecord record = new PairRecord(); record.pair = Pair.of(34L, 35L); @@ -286,12 +286,12 @@ public String getTypeName() { File test = write(model, schema, record); Pair actual = ((PairRecord) TestReflectLogicalTypes .read(model.createDatumReader(schema), test).get(0)).pair; - Assert.assertEquals("Data should match after serialization round-trip", 34L, (long) actual.first); - Assert.assertEquals("Data should match after serialization round-trip", 35L, (long) actual.second); + assertEquals(34L, (long) actual.first, "Data should match after serialization round-trip"); + assertEquals(35L, (long) actual.second, "Data should match after serialization round-trip"); } @Test - public void testReadUUID() throws IOException { + void readUUID() throws IOException { Schema uuidSchema = SchemaBuilder.record(RecordWithUUID.class.getName()).fields().requiredString("uuid") .endRecord(); LogicalTypes.uuid().addToSchema(uuidSchema.getField("uuid").schema()); @@ -310,19 +310,19 @@ public void testReadUUID() throws IOException { File test = write(ReflectData.get().getSchema(RecordWithStringUUID.class), r1, r2); - Assert.assertEquals("Should convert Strings to UUIDs", expected, read(REFLECT.createDatumReader(uuidSchema), test)); + assertEquals(expected, read(REFLECT.createDatumReader(uuidSchema), test), "Should convert Strings to UUIDs"); // verify that the field's type overrides the logical type Schema uuidStringSchema = SchemaBuilder.record(RecordWithStringUUID.class.getName()).fields().requiredString("uuid") .endRecord(); LogicalTypes.uuid().addToSchema(uuidStringSchema.getField("uuid").schema()); - Assert.assertEquals("Should not convert to UUID if accessor is String", Arrays.asList(r1, r2), - read(REFLECT.createDatumReader(uuidStringSchema), test)); + assertEquals(Arrays.asList(r1, r2), read(REFLECT.createDatumReader(uuidStringSchema), test), + "Should not convert to UUID if accessor is String"); } @Test - public void testWriteUUID() throws IOException { + void writeUUID() throws IOException { Schema uuidSchema = SchemaBuilder.record(RecordWithUUID.class.getName()).fields().requiredString("uuid") .endRecord(); LogicalTypes.uuid().addToSchema(uuidSchema.getField("uuid").schema()); @@ -345,16 +345,16 @@ public void testWriteUUID() throws IOException { Schema uuidStringSchema = SchemaBuilder.record(RecordWithStringUUID.class.getName()).fields().requiredString("uuid") .endRecord(); - Assert.assertEquals("Should read uuid as String without UUID conversion", expected, - read(REFLECT.createDatumReader(uuidStringSchema), test)); + assertEquals(expected, read(REFLECT.createDatumReader(uuidStringSchema), test), + "Should read uuid as String without UUID conversion"); LogicalTypes.uuid().addToSchema(uuidStringSchema.getField("uuid").schema()); - Assert.assertEquals("Should read uuid as String without UUID logical type", expected, - read(ReflectData.get().createDatumReader(uuidStringSchema), test)); + assertEquals(expected, read(ReflectData.get().createDatumReader(uuidStringSchema), test), + "Should read uuid as String without UUID logical type"); } @Test - public void testWriteNullableUUID() throws IOException { + void writeNullableUUID() throws IOException { Schema nullableUuidSchema = SchemaBuilder.record(RecordWithUUID.class.getName()).fields().optionalString("uuid") .endRecord(); LogicalTypes.uuid().addToSchema(nullableUuidSchema.getField("uuid").schema().getTypes().get(1)); @@ -377,12 +377,12 @@ public void testWriteNullableUUID() throws IOException { Schema nullableUuidStringSchema = SchemaBuilder.record(RecordWithStringUUID.class.getName()).fields() .optionalString("uuid").endRecord(); - Assert.assertEquals("Should read uuid as String without UUID conversion", expected, - read(ReflectData.get().createDatumReader(nullableUuidStringSchema), test)); + assertEquals(expected, read(ReflectData.get().createDatumReader(nullableUuidStringSchema), test), + "Should read uuid as String without UUID conversion"); } @Test - public void testWriteNullableUUIDReadRequiredString() throws IOException { + void writeNullableUUIDReadRequiredString() throws IOException { Schema nullableUuidSchema = SchemaBuilder.record(RecordWithUUID.class.getName()).fields().optionalString("uuid") .endRecord(); LogicalTypes.uuid().addToSchema(nullableUuidSchema.getField("uuid").schema().getTypes().get(1)); @@ -405,94 +405,58 @@ public void testWriteNullableUUIDReadRequiredString() throws IOException { Schema uuidStringSchema = SchemaBuilder.record(RecordWithStringUUID.class.getName()).fields().requiredString("uuid") .endRecord(); - Assert.assertEquals("Should read uuid as String without UUID conversion", expected, - read(REFLECT.createDatumReader(uuidStringSchema), test)); + assertEquals(expected, read(REFLECT.createDatumReader(uuidStringSchema), test), + "Should read uuid as String without UUID conversion"); } @Test - public void testReadUUIDMissingLogicalTypeUnsafe() throws IOException { - String unsafeValue = System.getProperty("avro.disable.unsafe"); - try { - // only one FieldAccess can be set per JVM - System.clearProperty("avro.disable.unsafe"); - Assume.assumeTrue(ReflectionUtil.getFieldAccess() instanceof FieldAccessUnsafe); - - Schema uuidSchema = SchemaBuilder.record(RecordWithUUID.class.getName()).fields().requiredString("uuid") - .endRecord(); - LogicalTypes.uuid().addToSchema(uuidSchema.getField("uuid").schema()); + void readUUIDMissingLogicalTypeReflect() throws IOException { + Assumptions.assumeTrue(ReflectionUtil.getFieldAccess() instanceof FieldAccessReflect); - UUID u1 = UUID.randomUUID(); + Schema uuidSchema = SchemaBuilder.record(RecordWithUUID.class.getName()).fields().requiredString("uuid") + .endRecord(); + LogicalTypes.uuid().addToSchema(uuidSchema.getField("uuid").schema()); - RecordWithStringUUID r1 = new RecordWithStringUUID(); - r1.uuid = u1.toString(); + UUID u1 = UUID.randomUUID(); - File test = write(ReflectData.get().getSchema(RecordWithStringUUID.class), r1); + RecordWithStringUUID r1 = new RecordWithStringUUID(); + r1.uuid = u1.toString(); - RecordWithUUID datum = (RecordWithUUID) read(ReflectData.get().createDatumReader(uuidSchema), test).get(0); - Object uuid = datum.uuid; - Assert.assertTrue("UUID should be a String (unsafe)", uuid instanceof String); - } finally { - if (unsafeValue != null) { - System.setProperty("avro.disable.unsafe", unsafeValue); - } - } + File test = write(ReflectData.get().getSchema(RecordWithStringUUID.class), r1); + assertThrows(IllegalArgumentException.class, + () -> read(ReflectData.get().createDatumReader(uuidSchema), test).get(0)); } - @Test(expected = IllegalArgumentException.class) - public void testReadUUIDMissingLogicalTypeReflect() throws IOException { - String unsafeValue = System.getProperty("avro.disable.unsafe"); - try { - // only one FieldAccess can be set per JVM - System.setProperty("avro.disable.unsafe", "true"); - Assume.assumeTrue(ReflectionUtil.getFieldAccess() instanceof FieldAccessReflect); - + @Test + void writeUUIDMissingLogicalType() throws IOException { + assertThrows(DataFileWriter.AppendWriteException.class, () -> { Schema uuidSchema = SchemaBuilder.record(RecordWithUUID.class.getName()).fields().requiredString("uuid") .endRecord(); LogicalTypes.uuid().addToSchema(uuidSchema.getField("uuid").schema()); UUID u1 = UUID.randomUUID(); + UUID u2 = UUID.randomUUID(); - RecordWithStringUUID r1 = new RecordWithStringUUID(); - r1.uuid = u1.toString(); + RecordWithUUID r1 = new RecordWithUUID(); + r1.uuid = u1; + RecordWithUUID r2 = new RecordWithUUID(); + r2.uuid = u2; - File test = write(ReflectData.get().getSchema(RecordWithStringUUID.class), r1); + // write without using REFLECT, which has the logical type + File test = write(uuidSchema, r1, r2); - read(ReflectData.get().createDatumReader(uuidSchema), test).get(0); - } finally { - if (unsafeValue != null) { - System.setProperty("avro.disable.unsafe", unsafeValue); - } - } - } + // verify that the field's type overrides the logical type + Schema uuidStringSchema = SchemaBuilder.record(RecordWithStringUUID.class.getName()).fields() + .requiredString("uuid").endRecord(); - @Test(expected = DataFileWriter.AppendWriteException.class) - public void testWriteUUIDMissingLogicalType() throws IOException { - Schema uuidSchema = SchemaBuilder.record(RecordWithUUID.class.getName()).fields().requiredString("uuid") - .endRecord(); - LogicalTypes.uuid().addToSchema(uuidSchema.getField("uuid").schema()); - - UUID u1 = UUID.randomUUID(); - UUID u2 = UUID.randomUUID(); - - RecordWithUUID r1 = new RecordWithUUID(); - r1.uuid = u1; - RecordWithUUID r2 = new RecordWithUUID(); - r2.uuid = u2; - - // write without using REFLECT, which has the logical type - File test = write(uuidSchema, r1, r2); - - // verify that the field's type overrides the logical type - Schema uuidStringSchema = SchemaBuilder.record(RecordWithStringUUID.class.getName()).fields().requiredString("uuid") - .endRecord(); - - // this fails with an AppendWriteException wrapping ClassCastException - // because the UUID isn't converted to a CharSequence expected internally - read(ReflectData.get().createDatumReader(uuidStringSchema), test); + // this fails with an AppendWriteException wrapping ClassCastException + // because the UUID isn't converted to a CharSequence expected internally + read(ReflectData.get().createDatumReader(uuidStringSchema), test); + }); } @Test - public void testReadUUIDGenericRecord() throws IOException { + void readUUIDGenericRecord() throws IOException { Schema uuidSchema = SchemaBuilder.record("RecordWithUUID").fields().requiredString("uuid").endRecord(); LogicalTypes.uuid().addToSchema(uuidSchema.getField("uuid").schema()); @@ -511,19 +475,19 @@ public void testReadUUIDGenericRecord() throws IOException { File test = write(ReflectData.get().getSchema(RecordWithStringUUID.class), r1, r2); - Assert.assertEquals("Should convert Strings to UUIDs", expected, read(REFLECT.createDatumReader(uuidSchema), test)); + assertEquals(expected, read(REFLECT.createDatumReader(uuidSchema), test), "Should convert Strings to UUIDs"); // verify that the field's type overrides the logical type Schema uuidStringSchema = SchemaBuilder.record(RecordWithStringUUID.class.getName()).fields().requiredString("uuid") .endRecord(); LogicalTypes.uuid().addToSchema(uuidSchema.getField("uuid").schema()); - Assert.assertEquals("Should not convert to UUID if accessor is String", Arrays.asList(r1, r2), - read(REFLECT.createDatumReader(uuidStringSchema), test)); + assertEquals(Arrays.asList(r1, r2), read(REFLECT.createDatumReader(uuidStringSchema), test), + "Should not convert to UUID if accessor is String"); } @Test - public void testReadUUIDArray() throws IOException { + void readUUIDArray() throws IOException { Schema uuidArraySchema = SchemaBuilder.record(RecordWithUUIDArray.class.getName()).fields().name("uuids").type() .array().items().stringType().noDefault().endRecord(); LogicalTypes.uuid().addToSchema(uuidArraySchema.getField("uuids").schema().getElementType()); @@ -539,12 +503,12 @@ public void testReadUUIDArray() throws IOException { File test = write(uuidArraySchema, r); - Assert.assertEquals("Should convert Strings to UUIDs", expected, - read(REFLECT.createDatumReader(uuidArraySchema), test).get(0)); + assertEquals(expected, read(REFLECT.createDatumReader(uuidArraySchema), test).get(0), + "Should convert Strings to UUIDs"); } @Test - public void testWriteUUIDArray() throws IOException { + void writeUUIDArray() throws IOException { Schema uuidArraySchema = SchemaBuilder.record(RecordWithUUIDArray.class.getName()).fields().name("uuids").type() .array().items().stringType().noDefault().endRecord(); LogicalTypes.uuid().addToSchema(uuidArraySchema.getField("uuids").schema().getElementType()); @@ -567,12 +531,12 @@ public void testWriteUUIDArray() throws IOException { File test = write(REFLECT, uuidArraySchema, r); - Assert.assertEquals("Should read UUIDs as Strings", expected, - read(ReflectData.get().createDatumReader(stringArraySchema), test).get(0)); + assertEquals(expected, read(ReflectData.get().createDatumReader(stringArraySchema), test).get(0), + "Should read UUIDs as Strings"); } @Test - public void testReadUUIDList() throws IOException { + void readUUIDList() throws IOException { Schema uuidListSchema = SchemaBuilder.record(RecordWithUUIDList.class.getName()).fields().name("uuids").type() .array().items().stringType().noDefault().endRecord(); uuidListSchema.getField("uuids").schema().addProp(SpecificData.CLASS_PROP, List.class.getName()); @@ -589,12 +553,12 @@ public void testReadUUIDList() throws IOException { File test = write(uuidListSchema, r); - Assert.assertEquals("Should convert Strings to UUIDs", expected, - read(REFLECT.createDatumReader(uuidListSchema), test).get(0)); + assertEquals(expected, read(REFLECT.createDatumReader(uuidListSchema), test).get(0), + "Should convert Strings to UUIDs"); } @Test - public void testWriteUUIDList() throws IOException { + void writeUUIDList() throws IOException { Schema uuidListSchema = SchemaBuilder.record(RecordWithUUIDList.class.getName()).fields().name("uuids").type() .array().items().stringType().noDefault().endRecord(); uuidListSchema.getField("uuids").schema().addProp(SpecificData.CLASS_PROP, List.class.getName()); @@ -615,20 +579,20 @@ public void testWriteUUIDList() throws IOException { File test = write(REFLECT, uuidListSchema, r); - Assert.assertEquals("Should read UUIDs as Strings", expected, - read(REFLECT.createDatumReader(stringArraySchema), test).get(0)); + assertEquals(expected, read(REFLECT.createDatumReader(stringArraySchema), test).get(0), + "Should read UUIDs as Strings"); } @Test - public void testReflectedSchemaLocalDateTime() { + void reflectedSchemaLocalDateTime() { Schema actual = REFLECT.getSchema(RecordWithTimestamps.class); - Assert.assertEquals("Should have the correct record name", "org.apache.avro.reflect", actual.getNamespace()); - Assert.assertEquals("Should have the correct record name", "RecordWithTimestamps", actual.getName()); - Assert.assertEquals("Should have the correct physical type", Schema.Type.LONG, - actual.getField("localDateTime").schema().getType()); - Assert.assertEquals("Should have the correct logical type", LogicalTypes.localTimestampMillis(), - LogicalTypes.fromSchema(actual.getField("localDateTime").schema())); + assertEquals("org.apache.avro.reflect", actual.getNamespace(), "Should have the correct record name"); + assertEquals("RecordWithTimestamps", actual.getName(), "Should have the correct record name"); + assertEquals(Schema.Type.LONG, actual.getField("localDateTime").schema().getType(), + "Should have the correct physical type"); + assertEquals(LogicalTypes.localTimestampMillis(), + LogicalTypes.fromSchema(actual.getField("localDateTime").schema()), "Should have the correct logical type"); } private static List read(DatumReader reader, File file) throws IOException { @@ -649,7 +613,7 @@ private File write(Schema schema, D... data) throws IOException { @SuppressWarnings("unchecked") private File write(GenericData model, Schema schema, D... data) throws IOException { - File file = temp.newFile(); + File file = File.createTempFile("junit", null, temp); DatumWriter writer = model.createDatumWriter(schema); try (DataFileWriter fileWriter = new DataFileWriter<>(writer)) { @@ -764,6 +728,6 @@ public boolean equals(Object obj) { return false; } RecordWithTimestamps that = (RecordWithTimestamps) obj; - return Objects.equals(that.localDateTime, that.localDateTime); + return Objects.equals(localDateTime, that.localDateTime); } } diff --git a/lang/java/avro/src/test/java/org/apache/avro/reflect/TestReflectionUtil.java b/lang/java/avro/src/test/java/org/apache/avro/reflect/TestReflectionUtil.java deleted file mode 100644 index 515f9f345eb..00000000000 --- a/lang/java/avro/src/test/java/org/apache/avro/reflect/TestReflectionUtil.java +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * https://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.avro.reflect; - -import java.io.IOException; -import java.io.InputStream; - -import org.junit.Test; - -public class TestReflectionUtil { - - @Test - public void testUnsafeUtil() { - new Tester().checkUnsafe(); - } - - @Test - public void testUnsafeWhenNotExists() throws Exception { - ClassLoader cl = new NoUnsafe(); - Class testerClass = cl.loadClass(Tester.class.getName()); - testerClass.getDeclaredMethod("checkUnsafe").invoke(testerClass.getDeclaredConstructor().newInstance()); - } - - public static final class Tester { - public Tester() { - } - - public void checkUnsafe() { - ReflectionUtil.getFieldAccess(); - } - - } - - private static final class NoUnsafe extends ClassLoader { - private ClassLoader parent = TestReflectionUtil.class.getClassLoader(); - - @Override - public java.lang.Class loadClass(String name) throws ClassNotFoundException { - Class clazz = findLoadedClass(name); - if (clazz != null) { - return clazz; - } - if ("sun.misc.Unsafe".equals(name)) { - throw new ClassNotFoundException(name); - } - if (!name.startsWith("org.apache.avro.")) { - return parent.loadClass(name); - } - - InputStream data = parent.getResourceAsStream(name.replace('.', '/') + ".class"); - byte[] buf = new byte[10240]; // big enough, too lazy to loop - int size; - try { - size = data.read(buf); - } catch (IOException e) { - throw new ClassNotFoundException(); - } - clazz = defineClass(name, buf, 0, size); - resolveClass(clazz); - return clazz; - } - - } -} diff --git a/lang/java/avro/src/test/java/org/apache/avro/specific/TestRecordWithLogicalTypes.java b/lang/java/avro/src/test/java/org/apache/avro/specific/TestRecordWithLogicalTypes.java index b7a89db6e59..1763a73144c 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/specific/TestRecordWithLogicalTypes.java +++ b/lang/java/avro/src/test/java/org/apache/avro/specific/TestRecordWithLogicalTypes.java @@ -1,10 +1,23 @@ -/** - * Autogenerated by Avro +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * - * DO NOT EDIT DIRECTLY + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. */ package org.apache.avro.specific; +import org.apache.avro.Conversions; import org.apache.avro.data.TimeConversions; import org.apache.avro.message.BinaryMessageDecoder; import org.apache.avro.message.BinaryMessageEncoder; @@ -16,7 +29,7 @@ public class TestRecordWithLogicalTypes extends org.apache.avro.specific.Specifi implements org.apache.avro.specific.SpecificRecord { private static final long serialVersionUID = 3313339903648295220L; public static final org.apache.avro.Schema SCHEMA$ = new org.apache.avro.Schema.Parser().parse( - "{\"type\":\"record\",\"name\":\"TestRecordWithLogicalTypes\",\"namespace\":\"org.apache.avro.specific\",\"fields\":[{\"name\":\"b\",\"type\":\"boolean\"},{\"name\":\"i32\",\"type\":\"int\"},{\"name\":\"i64\",\"type\":\"long\"},{\"name\":\"f32\",\"type\":\"float\"},{\"name\":\"f64\",\"type\":\"double\"},{\"name\":\"s\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"d\",\"type\":{\"type\":\"int\",\"logicalType\":\"date\"}},{\"name\":\"t\",\"type\":{\"type\":\"int\",\"logicalType\":\"time-millis\"}},{\"name\":\"ts\",\"type\":{\"type\":\"long\",\"logicalType\":\"timestamp-millis\"}},{\"name\":\"dec\",\"type\":{\"type\":\"bytes\",\"logicalType\":\"decimal\",\"precision\":9,\"scale\":2}}]}"); + "{\"type\":\"record\",\"name\":\"TestRecordWithLogicalTypes\",\"namespace\":\"org.apache.avro.specific\",\"fields\":[{\"name\":\"b\",\"type\":\"boolean\"},{\"name\":\"i32\",\"type\":\"int\"},{\"name\":\"i64\",\"type\":\"long\"},{\"name\":\"f32\",\"type\":\"float\"},{\"name\":\"f64\",\"type\":\"double\"},{\"name\":\"s\",\"type\":[\"null\",\"string\"],\"default\":null},{\"name\":\"d\",\"type\":{\"type\":\"int\",\"logicalType\":\"date\"}},{\"name\":\"t\",\"type\":{\"type\":\"int\",\"logicalType\":\"time-millis\"}},{\"name\":\"ts\",\"type\":{\"type\":\"long\",\"logicalType\":\"timestamp-millis\"}},{\"name\":\"dec\",\"type\":{\"type\":\"bytes\",\"logicalType\":\"decimal\",\"precision\":9,\"scale\":2}},{\"name\":\"bd\",\"type\":{\"type\":\"bytes\",\"logicalType\":\"big-decimal\"}}]}"); public static org.apache.avro.Schema getClassSchema() { return SCHEMA$; @@ -77,6 +90,8 @@ public static TestRecordWithLogicalTypes fromByteBuffer(java.nio.ByteBuffer b) t public java.time.Instant ts; @Deprecated public java.math.BigDecimal dec; + @Deprecated + public java.math.BigDecimal bd; /** * Default constructor. Note that this does not initialize fields to their @@ -99,10 +114,11 @@ public TestRecordWithLogicalTypes() { * @param t The new value for t * @param ts The new value for ts * @param dec The new value for dec + * @param bd The new value for bd */ public TestRecordWithLogicalTypes(java.lang.Boolean b, java.lang.Integer i32, java.lang.Long i64, java.lang.Float f32, java.lang.Double f64, java.lang.CharSequence s, java.time.LocalDate d, java.time.LocalTime t, - java.time.Instant ts, java.math.BigDecimal dec) { + java.time.Instant ts, java.math.BigDecimal dec, java.math.BigDecimal bd) { this.b = b; this.i32 = i32; this.i64 = i64; @@ -113,6 +129,7 @@ public TestRecordWithLogicalTypes(java.lang.Boolean b, java.lang.Integer i32, ja this.t = t; this.ts = ts; this.dec = dec; + this.bd = bd; } @Override @@ -144,18 +161,24 @@ public java.lang.Object get(int field$) { return ts; case 9: return dec; + case 10: + return bd; default: - throw new org.apache.avro.AvroRuntimeException("Bad index"); + throw new org.apache.avro.AvroRuntimeException("Bad index " + field$); } } protected static final org.apache.avro.Conversions.DecimalConversion DECIMAL_CONVERSION = new org.apache.avro.Conversions.DecimalConversion(); + + protected static final Conversions.BigDecimalConversion BIG_DECIMAL_CONVERSION = new org.apache.avro.Conversions.BigDecimalConversion(); + protected static final TimeConversions.DateConversion DATE_CONVERSION = new TimeConversions.DateConversion(); protected static final TimeConversions.TimeMillisConversion TIME_CONVERSION = new TimeConversions.TimeMillisConversion(); protected static final TimeConversions.TimestampMillisConversion TIMESTAMP_CONVERSION = new TimeConversions.TimestampMillisConversion(); private static final org.apache.avro.Conversion[] conversions = new org.apache.avro.Conversion[] { null, null, - null, null, null, null, DATE_CONVERSION, TIME_CONVERSION, TIMESTAMP_CONVERSION, DECIMAL_CONVERSION, null }; + null, null, null, null, DATE_CONVERSION, TIME_CONVERSION, TIMESTAMP_CONVERSION, DECIMAL_CONVERSION, + BIG_DECIMAL_CONVERSION }; @Override public org.apache.avro.Conversion getConversion(int field) { @@ -197,6 +220,9 @@ public void put(int field$, java.lang.Object value$) { case 9: dec = (java.math.BigDecimal) value$; break; + case 10: + bd = (java.math.BigDecimal) value$; + break; default: throw new org.apache.avro.AvroRuntimeException("Bad index"); } @@ -438,6 +464,8 @@ public static class Builder extends org.apache.avro.specific.SpecificRecordBuild private java.time.Instant ts; private java.math.BigDecimal dec; + private java.math.BigDecimal bd; + /** Creates a new Builder */ private Builder() { super(SCHEMA$); @@ -490,6 +518,10 @@ private Builder(TestRecordWithLogicalTypes.Builder other) { this.dec = data().deepCopy(fields()[9].schema(), other.dec); fieldSetFlags()[9] = other.fieldSetFlags()[9]; } + if (isValidValue(fields()[10], other.bd)) { + this.bd = data().deepCopy(fields()[10].schema(), other.bd); + fieldSetFlags()[10] = other.fieldSetFlags()[10]; + } } /** @@ -539,6 +571,10 @@ private Builder(TestRecordWithLogicalTypes other) { this.dec = data().deepCopy(fields()[9].schema(), other.dec); fieldSetFlags()[9] = true; } + if (isValidValue(fields()[10], other.bd)) { + this.bd = data().deepCopy(fields()[10].schema(), other.bd); + fieldSetFlags()[10] = true; + } } /** @@ -968,6 +1004,7 @@ public TestRecordWithLogicalTypes build() { record.t = fieldSetFlags()[7] ? this.t : (java.time.LocalTime) defaultValue(fields()[7]); record.ts = fieldSetFlags()[8] ? this.ts : (java.time.Instant) defaultValue(fields()[8]); record.dec = fieldSetFlags()[9] ? this.dec : (java.math.BigDecimal) defaultValue(fields()[9]); + record.bd = fieldSetFlags()[10] ? this.dec : (java.math.BigDecimal) defaultValue(fields()[10]); return record; } catch (java.lang.Exception e) { throw new org.apache.avro.AvroRuntimeException(e); diff --git a/lang/java/avro/src/test/java/org/apache/avro/specific/TestRecordWithMapsAndArrays.java b/lang/java/avro/src/test/java/org/apache/avro/specific/TestRecordWithMapsAndArrays.java new file mode 100644 index 00000000000..81572bc22c5 --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/specific/TestRecordWithMapsAndArrays.java @@ -0,0 +1,875 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro.specific; + +import org.apache.avro.generic.GenericArray; +import org.apache.avro.specific.SpecificData; +import org.apache.avro.util.Utf8; +import org.apache.avro.message.BinaryMessageEncoder; +import org.apache.avro.message.BinaryMessageDecoder; +import org.apache.avro.message.SchemaStore; + +@AvroGenerated +public class TestRecordWithMapsAndArrays extends SpecificRecordBase implements SpecificRecord { + private static final long serialVersionUID = -3823801533006425147L; + + public static final org.apache.avro.Schema SCHEMA$ = new org.apache.avro.Schema.Parser().parse( + "{\"type\":\"record\",\"name\":\"TestRecordWithMapsAndArrays\",\"namespace\":\"org.apache.avro.specific\",\"fields\":[{\"name\":\"arr\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"string\",\"avro.java.string\":\"String\"},\"default\":[]}},{\"name\":\"map\",\"type\":{\"type\":\"map\",\"values\":\"long\",\"avro.java.string\":\"String\",\"default\":{}}},{\"name\":\"nested_arr\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"array\",\"items\":{\"type\":\"string\",\"avro.java.string\":\"String\"},\"default\":[]},\"default\":[]}},{\"name\":\"nested_map\",\"type\":{\"type\":\"map\",\"values\":{\"type\":\"map\",\"values\":\"long\",\"avro.java.string\":\"String\",\"default\":{}},\"avro.java.string\":\"String\",\"default\":{}}}]}"); + + public static org.apache.avro.Schema getClassSchema() { + return SCHEMA$; + } + + private static final SpecificData MODEL$ = new SpecificData(); + + private static final BinaryMessageEncoder ENCODER = new BinaryMessageEncoder<>(MODEL$, + SCHEMA$); + + private static final BinaryMessageDecoder DECODER = new BinaryMessageDecoder<>(MODEL$, + SCHEMA$); + + /** + * Return the BinaryMessageEncoder instance used by this class. + * + * @return the message encoder used by this class + */ + public static BinaryMessageEncoder getEncoder() { + return ENCODER; + } + + /** + * Return the BinaryMessageDecoder instance used by this class. + * + * @return the message decoder used by this class + */ + public static BinaryMessageDecoder getDecoder() { + return DECODER; + } + + /** + * Create a new BinaryMessageDecoder instance for this class that uses the + * specified {@link SchemaStore}. + * + * @param resolver a {@link SchemaStore} used to find schemas by fingerprint + * @return a BinaryMessageDecoder instance for this class backed by the given + * SchemaStore + */ + public static BinaryMessageDecoder createDecoder(SchemaStore resolver) { + return new BinaryMessageDecoder<>(MODEL$, SCHEMA$, resolver); + } + + /** + * Serializes this TestRecordWithMapsAndArrays to a ByteBuffer. + * + * @return a buffer holding the serialized data for this instance + * @throws java.io.IOException if this instance could not be serialized + */ + public java.nio.ByteBuffer toByteBuffer() throws java.io.IOException { + return ENCODER.encode(this); + } + + /** + * Deserializes a TestRecordWithMapsAndArrays from a ByteBuffer. + * + * @param b a byte buffer holding serialized data for an instance of this class + * @return a TestRecordWithMapsAndArrays instance decoded from the given buffer + * @throws java.io.IOException if the given bytes could not be deserialized into + * an instance of this class + */ + public static TestRecordWithMapsAndArrays fromByteBuffer(java.nio.ByteBuffer b) throws java.io.IOException { + return DECODER.decode(b); + } + + private java.util.List arr; + private java.util.Map map; + private java.util.List> nested_arr; + private java.util.Map> nested_map; + + /** + * Default constructor. Note that this does not initialize fields to their + * default values from the schema. If that is desired then one should use + * newBuilder(). + */ + public TestRecordWithMapsAndArrays() { + } + + /** + * All-args constructor. + * + * @param arr The new value for arr + * @param map The new value for map + * @param nested_arr The new value for nested_arr + * @param nested_map The new value for nested_map + */ + public TestRecordWithMapsAndArrays(java.util.List arr, java.util.Map map, + java.util.List> nested_arr, + java.util.Map> nested_map) { + this.arr = arr; + this.map = map; + this.nested_arr = nested_arr; + this.nested_map = nested_map; + } + + @Override + public SpecificData getSpecificData() { + return MODEL$; + } + + @Override + public org.apache.avro.Schema getSchema() { + return SCHEMA$; + } + + // Used by DatumWriter. Applications should not call. + @Override + public Object get(int field$) { + switch (field$) { + case 0: + return arr; + case 1: + return map; + case 2: + return nested_arr; + case 3: + return nested_map; + default: + throw new IndexOutOfBoundsException("Invalid index: " + field$); + } + } + + // Used by DatumReader. Applications should not call. + @Override + @SuppressWarnings(value = "unchecked") + public void put(int field$, Object value$) { + switch (field$) { + case 0: + arr = (java.util.List) value$; + break; + case 1: + map = (java.util.Map) value$; + break; + case 2: + nested_arr = (java.util.List>) value$; + break; + case 3: + nested_map = (java.util.Map>) value$; + break; + default: + throw new IndexOutOfBoundsException("Invalid index: " + field$); + } + } + + /** + * Gets the value of the 'arr' field. + * + * @return The value of the 'arr' field. + */ + public java.util.List getArr() { + return arr; + } + + /** + * Sets the value of the 'arr' field. + * + * @param value the value to set. + */ + public void setArr(java.util.List value) { + this.arr = value; + } + + /** + * Gets the value of the 'map' field. + * + * @return The value of the 'map' field. + */ + public java.util.Map getMap() { + return map; + } + + /** + * Sets the value of the 'map' field. + * + * @param value the value to set. + */ + public void setMap(java.util.Map value) { + this.map = value; + } + + /** + * Gets the value of the 'nested_arr' field. + * + * @return The value of the 'nested_arr' field. + */ + public java.util.List> getNestedArr() { + return nested_arr; + } + + /** + * Sets the value of the 'nested_arr' field. + * + * @param value the value to set. + */ + public void setNestedArr(java.util.List> value) { + this.nested_arr = value; + } + + /** + * Gets the value of the 'nested_map' field. + * + * @return The value of the 'nested_map' field. + */ + public java.util.Map> getNestedMap() { + return nested_map; + } + + /** + * Sets the value of the 'nested_map' field. + * + * @param value the value to set. + */ + public void setNestedMap(java.util.Map> value) { + this.nested_map = value; + } + + /** + * Creates a new TestRecordWithMapsAndArrays RecordBuilder. + * + * @return A new TestRecordWithMapsAndArrays RecordBuilder + */ + public static Builder newBuilder() { + return new Builder(); + } + + /** + * Creates a new TestRecordWithMapsAndArrays RecordBuilder by copying an + * existing Builder. + * + * @param other The existing builder to copy. + * @return A new TestRecordWithMapsAndArrays RecordBuilder + */ + public static Builder newBuilder(Builder other) { + if (other == null) { + return new Builder(); + } else { + return new Builder(other); + } + } + + /** + * Creates a new TestRecordWithMapsAndArrays RecordBuilder by copying an + * existing TestRecordWithMapsAndArrays instance. + * + * @param other The existing instance to copy. + * @return A new TestRecordWithMapsAndArrays RecordBuilder + */ + public static Builder newBuilder(TestRecordWithMapsAndArrays other) { + if (other == null) { + return new Builder(); + } else { + return new Builder(other); + } + } + + /** + * RecordBuilder for TestRecordWithMapsAndArrays instances. + */ + @AvroGenerated + public static class Builder extends SpecificRecordBuilderBase + implements org.apache.avro.data.RecordBuilder { + + private java.util.List arr; + private java.util.Map map; + private java.util.List> nested_arr; + private java.util.Map> nested_map; + + /** Creates a new Builder */ + private Builder() { + super(SCHEMA$, MODEL$); + } + + /** + * Creates a Builder by copying an existing Builder. + * + * @param other The existing Builder to copy. + */ + private Builder(Builder other) { + super(other); + if (isValidValue(fields()[0], other.arr)) { + this.arr = data().deepCopy(fields()[0].schema(), other.arr); + fieldSetFlags()[0] = other.fieldSetFlags()[0]; + } + if (isValidValue(fields()[1], other.map)) { + this.map = data().deepCopy(fields()[1].schema(), other.map); + fieldSetFlags()[1] = other.fieldSetFlags()[1]; + } + if (isValidValue(fields()[2], other.nested_arr)) { + this.nested_arr = data().deepCopy(fields()[2].schema(), other.nested_arr); + fieldSetFlags()[2] = other.fieldSetFlags()[2]; + } + if (isValidValue(fields()[3], other.nested_map)) { + this.nested_map = data().deepCopy(fields()[3].schema(), other.nested_map); + fieldSetFlags()[3] = other.fieldSetFlags()[3]; + } + } + + /** + * Creates a Builder by copying an existing TestRecordWithMapsAndArrays instance + * + * @param other The existing instance to copy. + */ + private Builder(TestRecordWithMapsAndArrays other) { + super(SCHEMA$, MODEL$); + if (isValidValue(fields()[0], other.arr)) { + this.arr = data().deepCopy(fields()[0].schema(), other.arr); + fieldSetFlags()[0] = true; + } + if (isValidValue(fields()[1], other.map)) { + this.map = data().deepCopy(fields()[1].schema(), other.map); + fieldSetFlags()[1] = true; + } + if (isValidValue(fields()[2], other.nested_arr)) { + this.nested_arr = data().deepCopy(fields()[2].schema(), other.nested_arr); + fieldSetFlags()[2] = true; + } + if (isValidValue(fields()[3], other.nested_map)) { + this.nested_map = data().deepCopy(fields()[3].schema(), other.nested_map); + fieldSetFlags()[3] = true; + } + } + + /** + * Gets the value of the 'arr' field. + * + * @return The value. + */ + public java.util.List getArr() { + return arr; + } + + /** + * Sets the value of the 'arr' field. + * + * @param value The value of 'arr'. + * @return This builder. + */ + public Builder setArr(java.util.List value) { + validate(fields()[0], value); + this.arr = value; + fieldSetFlags()[0] = true; + return this; + } + + /** + * Checks whether the 'arr' field has been set. + * + * @return True if the 'arr' field has been set, false otherwise. + */ + public boolean hasArr() { + return fieldSetFlags()[0]; + } + + /** + * Clears the value of the 'arr' field. + * + * @return This builder. + */ + public Builder clearArr() { + arr = null; + fieldSetFlags()[0] = false; + return this; + } + + /** + * Gets the value of the 'map' field. + * + * @return The value. + */ + public java.util.Map getMap() { + return map; + } + + /** + * Sets the value of the 'map' field. + * + * @param value The value of 'map'. + * @return This builder. + */ + public Builder setMap(java.util.Map value) { + validate(fields()[1], value); + this.map = value; + fieldSetFlags()[1] = true; + return this; + } + + /** + * Checks whether the 'map' field has been set. + * + * @return True if the 'map' field has been set, false otherwise. + */ + public boolean hasMap() { + return fieldSetFlags()[1]; + } + + /** + * Clears the value of the 'map' field. + * + * @return This builder. + */ + public Builder clearMap() { + map = null; + fieldSetFlags()[1] = false; + return this; + } + + /** + * Gets the value of the 'nested_arr' field. + * + * @return The value. + */ + public java.util.List> getNestedArr() { + return nested_arr; + } + + /** + * Sets the value of the 'nested_arr' field. + * + * @param value The value of 'nested_arr'. + * @return This builder. + */ + public Builder setNestedArr(java.util.List> value) { + validate(fields()[2], value); + this.nested_arr = value; + fieldSetFlags()[2] = true; + return this; + } + + /** + * Checks whether the 'nested_arr' field has been set. + * + * @return True if the 'nested_arr' field has been set, false otherwise. + */ + public boolean hasNestedArr() { + return fieldSetFlags()[2]; + } + + /** + * Clears the value of the 'nested_arr' field. + * + * @return This builder. + */ + public Builder clearNestedArr() { + nested_arr = null; + fieldSetFlags()[2] = false; + return this; + } + + /** + * Gets the value of the 'nested_map' field. + * + * @return The value. + */ + public java.util.Map> getNestedMap() { + return nested_map; + } + + /** + * Sets the value of the 'nested_map' field. + * + * @param value The value of 'nested_map'. + * @return This builder. + */ + public Builder setNestedMap(java.util.Map> value) { + validate(fields()[3], value); + this.nested_map = value; + fieldSetFlags()[3] = true; + return this; + } + + /** + * Checks whether the 'nested_map' field has been set. + * + * @return True if the 'nested_map' field has been set, false otherwise. + */ + public boolean hasNestedMap() { + return fieldSetFlags()[3]; + } + + /** + * Clears the value of the 'nested_map' field. + * + * @return This builder. + */ + public Builder clearNestedMap() { + nested_map = null; + fieldSetFlags()[3] = false; + return this; + } + + @Override + @SuppressWarnings("unchecked") + public TestRecordWithMapsAndArrays build() { + try { + TestRecordWithMapsAndArrays record = new TestRecordWithMapsAndArrays(); + record.arr = fieldSetFlags()[0] ? this.arr : (java.util.List) defaultValue(fields()[0]); + record.map = fieldSetFlags()[1] ? this.map : (java.util.Map) defaultValue(fields()[1]); + record.nested_arr = fieldSetFlags()[2] ? this.nested_arr + : (java.util.List>) defaultValue(fields()[2]); + record.nested_map = fieldSetFlags()[3] ? this.nested_map + : (java.util.Map>) defaultValue(fields()[3]); + return record; + } catch (org.apache.avro.AvroMissingFieldException e) { + throw e; + } catch (Exception e) { + throw new org.apache.avro.AvroRuntimeException(e); + } + } + } + + @SuppressWarnings("unchecked") + private static final org.apache.avro.io.DatumWriter WRITER$ = (org.apache.avro.io.DatumWriter) MODEL$ + .createDatumWriter(SCHEMA$); + + @Override + public void writeExternal(java.io.ObjectOutput out) throws java.io.IOException { + WRITER$.write(this, SpecificData.getEncoder(out)); + } + + @SuppressWarnings("unchecked") + private static final org.apache.avro.io.DatumReader READER$ = (org.apache.avro.io.DatumReader) MODEL$ + .createDatumReader(SCHEMA$); + + @Override + public void readExternal(java.io.ObjectInput in) throws java.io.IOException { + READER$.read(this, SpecificData.getDecoder(in)); + } + + @Override + protected boolean hasCustomCoders() { + return true; + } + + @Override + public void customEncode(org.apache.avro.io.Encoder out) throws java.io.IOException { + long size0 = this.arr.size(); + out.writeArrayStart(); + out.setItemCount(size0); + long actualSize0 = 0; + for (String e0 : this.arr) { + actualSize0++; + out.startItem(); + out.writeString(e0); + } + out.writeArrayEnd(); + if (actualSize0 != size0) + throw new java.util.ConcurrentModificationException( + "Array-size written was " + size0 + ", but element count was " + actualSize0 + "."); + + long size1 = this.map.size(); + out.writeMapStart(); + out.setItemCount(size1); + long actualSize1 = 0; + for (java.util.Map.Entry e1 : this.map.entrySet()) { + actualSize1++; + out.startItem(); + out.writeString(e1.getKey()); + Long v1 = e1.getValue(); + out.writeLong(v1); + } + out.writeMapEnd(); + if (actualSize1 != size1) + throw new java.util.ConcurrentModificationException( + "Map-size written was " + size1 + ", but element count was " + actualSize1 + "."); + + long size2 = this.nested_arr.size(); + out.writeArrayStart(); + out.setItemCount(size2); + long actualSize2 = 0; + for (java.util.List e2 : this.nested_arr) { + actualSize2++; + out.startItem(); + long size3 = e2.size(); + out.writeArrayStart(); + out.setItemCount(size3); + long actualSize3 = 0; + for (String e3 : e2) { + actualSize3++; + out.startItem(); + out.writeString(e3); + } + out.writeArrayEnd(); + if (actualSize3 != size3) + throw new java.util.ConcurrentModificationException( + "Array-size written was " + size3 + ", but element count was " + actualSize3 + "."); + } + out.writeArrayEnd(); + if (actualSize2 != size2) + throw new java.util.ConcurrentModificationException( + "Array-size written was " + size2 + ", but element count was " + actualSize2 + "."); + + long size4 = this.nested_map.size(); + out.writeMapStart(); + out.setItemCount(size4); + long actualSize4 = 0; + for (java.util.Map.Entry> e4 : this.nested_map.entrySet()) { + actualSize4++; + out.startItem(); + out.writeString(e4.getKey()); + java.util.Map v4 = e4.getValue(); + long size5 = v4.size(); + out.writeMapStart(); + out.setItemCount(size5); + long actualSize5 = 0; + for (java.util.Map.Entry e5 : v4.entrySet()) { + actualSize5++; + out.startItem(); + out.writeString(e5.getKey()); + Long v5 = e5.getValue(); + out.writeLong(v5); + } + out.writeMapEnd(); + if (actualSize5 != size5) + throw new java.util.ConcurrentModificationException( + "Map-size written was " + size5 + ", but element count was " + actualSize5 + "."); + } + out.writeMapEnd(); + if (actualSize4 != size4) + throw new java.util.ConcurrentModificationException( + "Map-size written was " + size4 + ", but element count was " + actualSize4 + "."); + + } + + @Override + public void customDecode(org.apache.avro.io.ResolvingDecoder in) throws java.io.IOException { + org.apache.avro.Schema.Field[] fieldOrder = in.readFieldOrderIfDiff(); + if (fieldOrder == null) { + long size0 = in.readArrayStart(); + java.util.List a0 = this.arr; + if (a0 == null) { + a0 = new SpecificData.Array((int) size0, SCHEMA$.getField("arr").schema()); + this.arr = a0; + } else + a0.clear(); + SpecificData.Array ga0 = (a0 instanceof SpecificData.Array ? (SpecificData.Array) a0 : null); + for (; 0 < size0; size0 = in.arrayNext()) { + for (; size0 != 0; size0--) { + String e0 = (ga0 != null ? ga0.peek() : null); + e0 = in.readString(); + a0.add(e0); + } + } + + long size1 = in.readMapStart(); + java.util.Map m1 = this.map; // Need fresh name due to limitation of macro system + if (m1 == null) { + m1 = new java.util.HashMap((int) size1); + this.map = m1; + } else + m1.clear(); + for (; 0 < size1; size1 = in.mapNext()) { + for (; size1 != 0; size1--) { + String k1 = null; + k1 = in.readString(); + Long v1 = null; + v1 = in.readLong(); + m1.put(k1, v1); + } + } + + long size2 = in.readArrayStart(); + java.util.List> a2 = this.nested_arr; + if (a2 == null) { + a2 = new SpecificData.Array>((int) size2, SCHEMA$.getField("nested_arr").schema()); + this.nested_arr = a2; + } else + a2.clear(); + SpecificData.Array> ga2 = (a2 instanceof SpecificData.Array + ? (SpecificData.Array>) a2 + : null); + for (; 0 < size2; size2 = in.arrayNext()) { + for (; size2 != 0; size2--) { + java.util.List e2 = (ga2 != null ? ga2.peek() : null); + long size3 = in.readArrayStart(); + java.util.List a3 = e2; + if (a3 == null) { + a3 = new SpecificData.Array((int) size3, SCHEMA$.getField("nested_arr").schema().getElementType()); + e2 = a3; + } else + a3.clear(); + SpecificData.Array ga3 = (a3 instanceof SpecificData.Array ? (SpecificData.Array) a3 : null); + for (; 0 < size3; size3 = in.arrayNext()) { + for (; size3 != 0; size3--) { + String e3 = (ga3 != null ? ga3.peek() : null); + e3 = in.readString(); + a3.add(e3); + } + } + a2.add(e2); + } + } + + long size4 = in.readMapStart(); + java.util.Map> m4 = this.nested_map; // Need fresh name due to limitation of + // macro system + if (m4 == null) { + m4 = new java.util.HashMap>((int) size4); + this.nested_map = m4; + } else + m4.clear(); + for (; 0 < size4; size4 = in.mapNext()) { + for (; size4 != 0; size4--) { + String k4 = null; + k4 = in.readString(); + java.util.Map v4 = null; + long size5 = in.readMapStart(); + java.util.Map m5 = v4; // Need fresh name due to limitation of macro system + if (m5 == null) { + m5 = new java.util.HashMap((int) size5); + v4 = m5; + } else + m5.clear(); + for (; 0 < size5; size5 = in.mapNext()) { + for (; size5 != 0; size5--) { + String k5 = null; + k5 = in.readString(); + Long v5 = null; + v5 = in.readLong(); + m5.put(k5, v5); + } + } + m4.put(k4, v4); + } + } + + } else { + for (int i = 0; i < 4; i++) { + switch (fieldOrder[i].pos()) { + case 0: + long size0 = in.readArrayStart(); + java.util.List a0 = this.arr; + if (a0 == null) { + a0 = new SpecificData.Array((int) size0, SCHEMA$.getField("arr").schema()); + this.arr = a0; + } else + a0.clear(); + SpecificData.Array ga0 = (a0 instanceof SpecificData.Array ? (SpecificData.Array) a0 : null); + for (; 0 < size0; size0 = in.arrayNext()) { + for (; size0 != 0; size0--) { + String e0 = (ga0 != null ? ga0.peek() : null); + e0 = in.readString(); + a0.add(e0); + } + } + break; + + case 1: + long size1 = in.readMapStart(); + java.util.Map m1 = this.map; // Need fresh name due to limitation of macro system + if (m1 == null) { + m1 = new java.util.HashMap((int) size1); + this.map = m1; + } else + m1.clear(); + for (; 0 < size1; size1 = in.mapNext()) { + for (; size1 != 0; size1--) { + String k1 = null; + k1 = in.readString(); + Long v1 = null; + v1 = in.readLong(); + m1.put(k1, v1); + } + } + break; + + case 2: + long size2 = in.readArrayStart(); + java.util.List> a2 = this.nested_arr; + if (a2 == null) { + a2 = new SpecificData.Array>((int) size2, SCHEMA$.getField("nested_arr").schema()); + this.nested_arr = a2; + } else + a2.clear(); + SpecificData.Array> ga2 = (a2 instanceof SpecificData.Array + ? (SpecificData.Array>) a2 + : null); + for (; 0 < size2; size2 = in.arrayNext()) { + for (; size2 != 0; size2--) { + java.util.List e2 = (ga2 != null ? ga2.peek() : null); + long size3 = in.readArrayStart(); + java.util.List a3 = e2; + if (a3 == null) { + a3 = new SpecificData.Array((int) size3, + SCHEMA$.getField("nested_arr").schema().getElementType()); + e2 = a3; + } else + a3.clear(); + SpecificData.Array ga3 = (a3 instanceof SpecificData.Array ? (SpecificData.Array) a3 + : null); + for (; 0 < size3; size3 = in.arrayNext()) { + for (; size3 != 0; size3--) { + String e3 = (ga3 != null ? ga3.peek() : null); + e3 = in.readString(); + a3.add(e3); + } + } + a2.add(e2); + } + } + break; + + case 3: + long size4 = in.readMapStart(); + java.util.Map> m4 = this.nested_map; // Need fresh name due to limitation + // of macro system + if (m4 == null) { + m4 = new java.util.HashMap>((int) size4); + this.nested_map = m4; + } else + m4.clear(); + for (; 0 < size4; size4 = in.mapNext()) { + for (; size4 != 0; size4--) { + String k4 = null; + k4 = in.readString(); + java.util.Map v4 = null; + long size5 = in.readMapStart(); + java.util.Map m5 = v4; // Need fresh name due to limitation of macro system + if (m5 == null) { + m5 = new java.util.HashMap((int) size5); + v4 = m5; + } else + m5.clear(); + for (; 0 < size5; size5 = in.mapNext()) { + for (; size5 != 0; size5--) { + String k5 = null; + k5 = in.readString(); + Long v5 = null; + v5 = in.readLong(); + m5.put(k5, v5); + } + } + m4.put(k4, v4); + } + } + break; + + default: + throw new java.io.IOException("Corrupt ResolvingDecoder."); + } + } + } + } +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/specific/TestSpecificData.java b/lang/java/avro/src/test/java/org/apache/avro/specific/TestSpecificData.java index 12d8ddbcc83..5c8cad85331 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/specific/TestSpecificData.java +++ b/lang/java/avro/src/test/java/org/apache/avro/specific/TestSpecificData.java @@ -18,7 +18,7 @@ package org.apache.avro.specific; -import static org.junit.Assert.*; +import static org.junit.jupiter.api.Assertions.*; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; @@ -35,8 +35,8 @@ import org.apache.avro.io.DatumWriter; import org.apache.avro.io.Encoder; import org.apache.avro.io.EncoderFactory; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; /* * If integerClass is primitive, reflection to find method will @@ -47,7 +47,7 @@ public class TestSpecificData { private Class intClass; private Class integerClass; - @Before + @BeforeEach public void setUp() { Schema intSchema = Schema.create(Type.INT); intClass = SpecificData.get().getClass(intSchema); @@ -57,29 +57,33 @@ public void setUp() { } @Test - public void testClassTypes() { + void classTypes() { assertTrue(intClass.isPrimitive()); assertFalse(integerClass.isPrimitive()); } @Test - public void testPrimitiveParam() throws Exception { + void primitiveParam() throws Exception { assertNotNull(Reflection.class.getMethod("primitive", intClass)); } - @Test(expected = NoSuchMethodException.class) - public void testPrimitiveParamError() throws Exception { - Reflection.class.getMethod("primitiveWrapper", intClass); + @Test + void primitiveParamError() throws Exception { + assertThrows(NoSuchMethodException.class, () -> { + Reflection.class.getMethod("primitiveWrapper", intClass); + }); } @Test - public void testPrimitiveWrapperParam() throws Exception { + void primitiveWrapperParam() throws Exception { assertNotNull(Reflection.class.getMethod("primitiveWrapper", integerClass)); } - @Test(expected = NoSuchMethodException.class) - public void testPrimitiveWrapperParamError() throws Exception { - Reflection.class.getMethod("primitive", integerClass); + @Test + void primitiveWrapperParamError() throws Exception { + assertThrows(NoSuchMethodException.class, () -> { + Reflection.class.getMethod("primitive", integerClass); + }); } static class Reflection { @@ -136,7 +140,7 @@ public Schema getSchema() { } @Test - public void testSpecificRecordBase() { + void specificRecordBase() { final TestRecord record = new TestRecord(); record.put("x", 1); record.put("y", "str"); @@ -145,7 +149,7 @@ public void testSpecificRecordBase() { } @Test - public void testExternalizeable() throws Exception { + void externalizeable() throws Exception { final TestRecord before = new TestRecord(); before.put("x", 1); before.put("y", "str"); @@ -162,7 +166,7 @@ public void testExternalizeable() throws Exception { /** Tests that non Stringable datum are rejected by specific writers. */ @Test - public void testNonStringable() throws Exception { + void nonStringable() throws Exception { final Schema string = Schema.create(Type.STRING); final ByteArrayOutputStream baos = new ByteArrayOutputStream(); final Encoder encoder = EncoderFactory.get().directBinaryEncoder(baos, null); @@ -174,4 +178,16 @@ public void testNonStringable() throws Exception { // Expected error } } + + @Test + void classNameContainingReservedWords() { + final Schema schema = Schema.createRecord("AnyName", null, "db.public.table", false); + + assertEquals("db.public$.table.AnyName", SpecificData.getClassName(schema)); + } + + @Test + void testCanGetClassOfMangledType() { + assertEquals("org.apache.avro.specific.int$", SpecificData.getClassName(int$.getClassSchema())); + } } diff --git a/lang/java/avro/src/test/java/org/apache/avro/specific/TestSpecificDatumReader.java b/lang/java/avro/src/test/java/org/apache/avro/specific/TestSpecificDatumReader.java new file mode 100644 index 00000000000..3c10b74cde1 --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/specific/TestSpecificDatumReader.java @@ -0,0 +1,81 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro.specific; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; + +import org.apache.avro.Schema; +import org.apache.avro.io.BinaryDecoder; +import org.apache.avro.io.BinaryEncoder; +import org.apache.avro.io.DecoderFactory; +import org.apache.avro.io.EncoderFactory; +import org.apache.avro.util.Utf8; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +public class TestSpecificDatumReader { + + @Test + void readMyData() throws IOException { + // Check that method newInstanceFromString from SpecificDatumReader extension is + // called. + final EncoderFactory e_factory = new EncoderFactory().configureBufferSize(30); + final DecoderFactory factory = new DecoderFactory().configureDecoderBufferSize(30); + + final MyReader reader = new MyReader(); + reader.setExpected(Schema.create(Schema.Type.STRING)); + reader.setSchema(Schema.create(Schema.Type.STRING)); + + final ByteArrayOutputStream out = new ByteArrayOutputStream(30); + final BinaryEncoder encoder = e_factory.binaryEncoder(out, null); + encoder.writeString(new Utf8("Hello")); + encoder.flush(); + + final BinaryDecoder decoder = factory.binaryDecoder(out.toByteArray(), null); + reader.getData().setFastReaderEnabled(false); + final MyData read = reader.read(null, decoder); + Assertions.assertNotNull(read, "MyReader.newInstanceFromString was not called"); + Assertions.assertEquals("Hello", read.getContent()); + } + + public static class MyData { + private final String content; + + public MyData(String content) { + this.content = content; + } + + public String getContent() { + return content; + } + } + + public static class MyReader extends SpecificDatumReader { + + @Override + protected Class findStringClass(Schema schema) { + return MyData.class; + } + + @Override + protected Object newInstanceFromString(Class c, String s) { + return new MyData(s); + } + } +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/specific/TestSpecificRecordWithUnion.java b/lang/java/avro/src/test/java/org/apache/avro/specific/TestSpecificRecordWithUnion.java new file mode 100644 index 00000000000..e64b3f4c220 --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/specific/TestSpecificRecordWithUnion.java @@ -0,0 +1,70 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.avro.specific; + +import org.apache.avro.Schema; +import org.apache.avro.SchemaBuilder; +import org.apache.avro.generic.GenericDatumReader; +import org.apache.avro.generic.GenericDatumWriter; + +import org.apache.avro.io.EncoderFactory; +import org.apache.avro.io.DecoderFactory; +import org.apache.avro.io.DatumReader; +import org.apache.avro.io.DatumWriter; +import org.apache.avro.io.BinaryEncoder; +import org.apache.avro.io.Decoder; + +import org.junit.jupiter.api.Test; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.math.BigDecimal; + +import static org.junit.Assert.assertEquals; + +public class TestSpecificRecordWithUnion { + @Test + void unionLogicalDecimalConversion() throws IOException { + final TestUnionRecord record = TestUnionRecord.newBuilder().setAmount(BigDecimal.ZERO).build(); + final Schema schema = SchemaBuilder.unionOf().nullType().and().type(record.getSchema()).endUnion(); + + byte[] recordBytes = serializeRecord( + "{ \"org.apache.avro.specific.TestUnionRecord\": { \"amount\": { \"bytes\": \"\\u0000\" } } }", schema); + + SpecificDatumReader specificDatumReader = new SpecificDatumReader<>(schema); + ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(recordBytes); + Decoder decoder = DecoderFactory.get().binaryDecoder(byteArrayInputStream, null); + final SpecificRecord deserialized = specificDatumReader.read(null, decoder); + assertEquals(record, deserialized); + } + + public static byte[] serializeRecord(String value, Schema schema) throws IOException { + DatumReader reader = new GenericDatumReader<>(schema); + Object object = reader.read(null, DecoderFactory.get().jsonDecoder(schema, value)); + ByteArrayOutputStream out = new ByteArrayOutputStream(); + BinaryEncoder encoder = EncoderFactory.get().directBinaryEncoder(out, null); + DatumWriter writer = new GenericDatumWriter<>(schema); + writer.write(object, encoder); + encoder.flush(); + byte[] bytes = out.toByteArray(); + out.close(); + return bytes; + } +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/specific/TestSpecificToFromByteArray.java b/lang/java/avro/src/test/java/org/apache/avro/specific/TestSpecificToFromByteArray.java index 9bf40059bf4..f81dde37407 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/specific/TestSpecificToFromByteArray.java +++ b/lang/java/avro/src/test/java/org/apache/avro/specific/TestSpecificToFromByteArray.java @@ -20,9 +20,12 @@ import org.apache.avro.Conversions; import org.apache.avro.LogicalTypes; import org.apache.avro.message.MissingSchemaException; -import org.junit.Test; - +import org.junit.jupiter.api.Test; import java.io.IOException; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; + import java.math.BigDecimal; import java.nio.ByteBuffer; import java.time.Instant; @@ -30,12 +33,10 @@ import java.time.LocalTime; import java.time.temporal.ChronoUnit; -import static org.junit.Assert.assertEquals; - public class TestSpecificToFromByteArray { @Test - public void testSpecificToFromByteBufferWithLogicalTypes() throws IOException { + void specificToFromByteBufferWithLogicalTypes() throws IOException { // Java 9+ comes with NANO precision and since we encode it using millis // precision // Therefore we won't want to have NANOs in the input @@ -43,7 +44,7 @@ public void testSpecificToFromByteBufferWithLogicalTypes() throws IOException { Instant instant = Instant.now().truncatedTo(ChronoUnit.MILLIS); final TestRecordWithLogicalTypes record = new TestRecordWithLogicalTypes(true, 34, 35L, 3.14F, 3019.34, null, - LocalDate.now(), t, instant, new BigDecimal("123.45")); + LocalDate.now(), t, instant, new BigDecimal("123.45"), new BigDecimal(-23.456562323)); final ByteBuffer b = record.toByteBuffer(); final TestRecordWithLogicalTypes copy = TestRecordWithLogicalTypes.fromByteBuffer(b); @@ -52,7 +53,7 @@ public void testSpecificToFromByteBufferWithLogicalTypes() throws IOException { } @Test - public void testSpecificToFromByteBufferWithoutLogicalTypes() throws IOException { + void specificToFromByteBufferWithoutLogicalTypes() throws IOException { final TestRecordWithoutLogicalTypes record = new TestRecordWithoutLogicalTypes(true, 34, 35L, 3.14F, 3019.34, null, (int) System.currentTimeMillis() / 1000, (int) System.currentTimeMillis() / 1000, System.currentTimeMillis(), new Conversions.DecimalConversion().toBytes(new BigDecimal("123.45"), null, LogicalTypes.decimal(9, 2))); @@ -63,23 +64,28 @@ public void testSpecificToFromByteBufferWithoutLogicalTypes() throws IOException assertEquals(record, copy); } - @Test(expected = MissingSchemaException.class) - public void testSpecificByteArrayIncompatibleWithLogicalTypes() throws IOException { - final TestRecordWithoutLogicalTypes withoutLogicalTypes = new TestRecordWithoutLogicalTypes(true, 34, 35L, 3.14F, - 3019.34, null, (int) System.currentTimeMillis() / 1000, (int) System.currentTimeMillis() / 1000, - System.currentTimeMillis(), - new Conversions.DecimalConversion().toBytes(new BigDecimal("123.45"), null, LogicalTypes.decimal(9, 2))); + @Test + void specificByteArrayIncompatibleWithLogicalTypes() throws IOException { + assertThrows(MissingSchemaException.class, () -> { + final TestRecordWithoutLogicalTypes withoutLogicalTypes = new TestRecordWithoutLogicalTypes(true, 34, 35L, 3.14F, + 3019.34, null, (int) System.currentTimeMillis() / 1000, (int) System.currentTimeMillis() / 1000, + System.currentTimeMillis(), + new Conversions.DecimalConversion().toBytes(new BigDecimal("123.45"), null, LogicalTypes.decimal(9, 2))); - final ByteBuffer b = withoutLogicalTypes.toByteBuffer(); - TestRecordWithLogicalTypes.fromByteBuffer(b); + final ByteBuffer b = withoutLogicalTypes.toByteBuffer(); + TestRecordWithLogicalTypes.fromByteBuffer(b); + }); } - @Test(expected = MissingSchemaException.class) - public void testSpecificByteArrayIncompatibleWithoutLogicalTypes() throws IOException { - final TestRecordWithLogicalTypes withLogicalTypes = new TestRecordWithLogicalTypes(true, 34, 35L, 3.14F, 3019.34, - null, LocalDate.now(), LocalTime.now(), Instant.now(), new BigDecimal("123.45")); + @Test + void specificByteArrayIncompatibleWithoutLogicalTypes() throws IOException { + assertThrows(MissingSchemaException.class, () -> { + final TestRecordWithLogicalTypes withLogicalTypes = new TestRecordWithLogicalTypes(true, 34, 35L, 3.14F, 3019.34, + null, LocalDate.now(), LocalTime.now(), Instant.now(), new BigDecimal("123.45"), + new BigDecimal(-23.456562323)); - final ByteBuffer b = withLogicalTypes.toByteBuffer(); - TestRecordWithoutLogicalTypes.fromByteBuffer(b); + final ByteBuffer b = withLogicalTypes.toByteBuffer(); + TestRecordWithoutLogicalTypes.fromByteBuffer(b); + }); } } diff --git a/lang/java/avro/src/test/java/org/apache/avro/specific/TestUnionRecord.java b/lang/java/avro/src/test/java/org/apache/avro/specific/TestUnionRecord.java new file mode 100644 index 00000000000..57c86a96ffc --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/specific/TestUnionRecord.java @@ -0,0 +1,324 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.avro.specific; + +import org.apache.avro.message.BinaryMessageDecoder; +import org.apache.avro.message.BinaryMessageEncoder; +import org.apache.avro.message.SchemaStore; + +@SuppressWarnings("all") +@AvroGenerated +public class TestUnionRecord extends SpecificRecordBase implements SpecificRecord { + private static final long serialVersionUID = -3829374192747523457L; + + public static final org.apache.avro.Schema SCHEMA$ = new org.apache.avro.Schema.Parser().parse( + "{\"type\":\"record\",\"name\":\"TestUnionRecord\",\"namespace\":\"org.apache.avro.specific\",\"fields\":[{\"name\":\"amount\",\"type\":[\"null\",{\"type\":\"bytes\",\"logicalType\":\"decimal\",\"precision\":31,\"scale\":8}],\"default\":null}]}"); + + public static org.apache.avro.Schema getClassSchema() { + return SCHEMA$; + } + + private static final SpecificData MODEL$ = new SpecificData(); + static { + MODEL$.addLogicalTypeConversion(new org.apache.avro.Conversions.DecimalConversion()); + } + + private static final BinaryMessageEncoder ENCODER = new BinaryMessageEncoder(MODEL$, + SCHEMA$); + + private static final BinaryMessageDecoder DECODER = new BinaryMessageDecoder(MODEL$, + SCHEMA$); + + /** + * Return the BinaryMessageEncoder instance used by this class. + * + * @return the message encoder used by this class + */ + public static BinaryMessageEncoder getEncoder() { + return ENCODER; + } + + /** + * Return the BinaryMessageDecoder instance used by this class. + * + * @return the message decoder used by this class + */ + public static BinaryMessageDecoder getDecoder() { + return DECODER; + } + + /** + * Create a new BinaryMessageDecoder instance for this class that uses the + * specified {@link SchemaStore}. + * + * @param resolver a {@link SchemaStore} used to find schemas by fingerprint + * @return a BinaryMessageDecoder instance for this class backed by the given + * SchemaStore + */ + public static BinaryMessageDecoder createDecoder(SchemaStore resolver) { + return new BinaryMessageDecoder(MODEL$, SCHEMA$, resolver); + } + + /** + * Serializes this TestUnionRecord to a ByteBuffer. + * + * @return a buffer holding the serialized data for this instance + * @throws java.io.IOException if this instance could not be serialized + */ + public java.nio.ByteBuffer toByteBuffer() throws java.io.IOException { + return ENCODER.encode(this); + } + + /** + * Deserializes a TestUnionRecord from a ByteBuffer. + * + * @param b a byte buffer holding serialized data for an instance of this class + * @return a TestUnionRecord instance decoded from the given buffer + * @throws java.io.IOException if the given bytes could not be deserialized into + * an instance of this class + */ + public static TestUnionRecord fromByteBuffer(java.nio.ByteBuffer b) throws java.io.IOException { + return DECODER.decode(b); + } + + private java.math.BigDecimal amount; + + /** + * Default constructor. Note that this does not initialize fields to their + * default values from the schema. If that is desired then one should use + * newBuilder(). + */ + public TestUnionRecord() { + } + + /** + * All-args constructor. + * + * @param amount The new value for amount + */ + public TestUnionRecord(java.math.BigDecimal amount) { + this.amount = amount; + } + + @Override + public SpecificData getSpecificData() { + return MODEL$; + } + + @Override + public org.apache.avro.Schema getSchema() { + return SCHEMA$; + } + + // Used by DatumWriter. Applications should not call. + @Override + public Object get(int field$) { + switch (field$) { + case 0: + return amount; + default: + throw new IndexOutOfBoundsException("Invalid index: " + field$); + } + } + + // Used by DatumReader. Applications should not call. + @Override + @SuppressWarnings(value = "unchecked") + public void put(int field$, Object value$) { + switch (field$) { + case 0: + amount = (java.math.BigDecimal) value$; + break; + default: + throw new IndexOutOfBoundsException("Invalid index: " + field$); + } + } + + /** + * Gets the value of the 'amount' field. + * + * @return The value of the 'amount' field. + */ + public java.math.BigDecimal getAmount() { + return amount; + } + + /** + * Sets the value of the 'amount' field. + * + * @param value the value to set. + */ + public void setAmount(java.math.BigDecimal value) { + this.amount = value; + } + + /** + * Creates a new TestUnionRecord RecordBuilder. + * + * @return A new TestUnionRecord RecordBuilder + */ + public static Builder newBuilder() { + return new Builder(); + } + + /** + * Creates a new TestUnionRecord RecordBuilder by copying an existing Builder. + * + * @param other The existing builder to copy. + * @return A new TestUnionRecord RecordBuilder + */ + public static Builder newBuilder(Builder other) { + if (other == null) { + return new Builder(); + } else { + return new Builder(other); + } + } + + /** + * Creates a new TestUnionRecord RecordBuilder by copying an existing + * TestUnionRecord instance. + * + * @param other The existing instance to copy. + * @return A new TestUnionRecord RecordBuilder + */ + public static Builder newBuilder(TestUnionRecord other) { + if (other == null) { + return new Builder(); + } else { + return new Builder(other); + } + } + + /** + * RecordBuilder for TestUnionRecord instances. + */ + @AvroGenerated + public static class Builder extends SpecificRecordBuilderBase + implements org.apache.avro.data.RecordBuilder { + + private java.math.BigDecimal amount; + + /** Creates a new Builder */ + private Builder() { + super(SCHEMA$, MODEL$); + } + + /** + * Creates a Builder by copying an existing Builder. + * + * @param other The existing Builder to copy. + */ + private Builder(Builder other) { + super(other); + if (isValidValue(fields()[0], other.amount)) { + this.amount = data().deepCopy(fields()[0].schema(), other.amount); + fieldSetFlags()[0] = other.fieldSetFlags()[0]; + } + } + + /** + * Creates a Builder by copying an existing TestUnionRecord instance + * + * @param other The existing instance to copy. + */ + private Builder(TestUnionRecord other) { + super(SCHEMA$, MODEL$); + if (isValidValue(fields()[0], other.amount)) { + this.amount = data().deepCopy(fields()[0].schema(), other.amount); + fieldSetFlags()[0] = true; + } + } + + /** + * Gets the value of the 'amount' field. + * + * @return The value. + */ + public java.math.BigDecimal getAmount() { + return amount; + } + + /** + * Sets the value of the 'amount' field. + * + * @param value The value of 'amount'. + * @return This builder. + */ + public Builder setAmount(java.math.BigDecimal value) { + validate(fields()[0], value); + this.amount = value; + fieldSetFlags()[0] = true; + return this; + } + + /** + * Checks whether the 'amount' field has been set. + * + * @return True if the 'amount' field has been set, false otherwise. + */ + public boolean hasAmount() { + return fieldSetFlags()[0]; + } + + /** + * Clears the value of the 'amount' field. + * + * @return This builder. + */ + public Builder clearAmount() { + amount = null; + fieldSetFlags()[0] = false; + return this; + } + + @Override + @SuppressWarnings("unchecked") + public TestUnionRecord build() { + try { + TestUnionRecord record = new TestUnionRecord(); + record.amount = fieldSetFlags()[0] ? this.amount : (java.math.BigDecimal) defaultValue(fields()[0]); + return record; + } catch (org.apache.avro.AvroMissingFieldException e) { + throw e; + } catch (Exception e) { + throw new org.apache.avro.AvroRuntimeException(e); + } + } + } + + @SuppressWarnings("unchecked") + private static final org.apache.avro.io.DatumWriter WRITER$ = (org.apache.avro.io.DatumWriter) MODEL$ + .createDatumWriter(SCHEMA$); + + @Override + public void writeExternal(java.io.ObjectOutput out) throws java.io.IOException { + WRITER$.write(this, SpecificData.getEncoder(out)); + } + + @SuppressWarnings("unchecked") + private static final org.apache.avro.io.DatumReader READER$ = (org.apache.avro.io.DatumReader) MODEL$ + .createDatumReader(SCHEMA$); + + @Override + public void readExternal(java.io.ObjectInput in) throws java.io.IOException { + READER$.read(this, SpecificData.getDecoder(in)); + } + +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/specific/int$.java b/lang/java/avro/src/test/java/org/apache/avro/specific/int$.java new file mode 100644 index 00000000000..586d4219124 --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/specific/int$.java @@ -0,0 +1,227 @@ +/** + * Autogenerated by Avro + * + * DO NOT EDIT DIRECTLY + */ +package org.apache.avro.specific; + +import org.apache.avro.message.BinaryMessageDecoder; +import org.apache.avro.message.BinaryMessageEncoder; +import org.apache.avro.message.SchemaStore; + +@AvroGenerated +public class int$ extends SpecificRecordBase implements SpecificRecord { + private static final long serialVersionUID = 3003385205621277651L; + + public static final org.apache.avro.Schema SCHEMA$ = new org.apache.avro.Schema.Parser() + .parse("{\"type\":\"record\",\"name\":\"int\",\"namespace\":\"org.apache.avro.specific\",\"fields\":[]}"); + + public static org.apache.avro.Schema getClassSchema() { + return SCHEMA$; + } + + private static final SpecificData MODEL$ = new SpecificData(); + + private static final BinaryMessageEncoder ENCODER = new BinaryMessageEncoder<>(MODEL$, SCHEMA$); + + private static final BinaryMessageDecoder DECODER = new BinaryMessageDecoder<>(MODEL$, SCHEMA$); + + /** + * Return the BinaryMessageEncoder instance used by this class. + * + * @return the message encoder used by this class + */ + public static BinaryMessageEncoder getEncoder() { + return ENCODER; + } + + /** + * Return the BinaryMessageDecoder instance used by this class. + * + * @return the message decoder used by this class + */ + public static BinaryMessageDecoder getDecoder() { + return DECODER; + } + + /** + * Create a new BinaryMessageDecoder instance for this class that uses the + * specified {@link SchemaStore}. + * + * @param resolver a {@link SchemaStore} used to find schemas by fingerprint + * @return a BinaryMessageDecoder instance for this class backed by the given + * SchemaStore + */ + public static BinaryMessageDecoder createDecoder(SchemaStore resolver) { + return new BinaryMessageDecoder<>(MODEL$, SCHEMA$, resolver); + } + + /** + * Serializes this int to a ByteBuffer. + * + * @return a buffer holding the serialized data for this instance + * @throws java.io.IOException if this instance could not be serialized + */ + public java.nio.ByteBuffer toByteBuffer() throws java.io.IOException { + return ENCODER.encode(this); + } + + /** + * Deserializes a int from a ByteBuffer. + * + * @param b a byte buffer holding serialized data for an instance of this class + * @return a int instance decoded from the given buffer + * @throws java.io.IOException if the given bytes could not be deserialized into + * an instance of this class + */ + public static int$ fromByteBuffer(java.nio.ByteBuffer b) throws java.io.IOException { + return DECODER.decode(b); + } + + public SpecificData getSpecificData() { + return MODEL$; + } + + public org.apache.avro.Schema getSchema() { + return SCHEMA$; + } + + // Used by DatumWriter. Applications should not call. + public Object get(int field$) { + switch (field$) { + default: + throw new IndexOutOfBoundsException("Invalid index: " + field$); + } + } + + // Used by DatumReader. Applications should not call. + @SuppressWarnings(value = "unchecked") + public void put(int field$, Object value$) { + switch (field$) { + default: + throw new IndexOutOfBoundsException("Invalid index: " + field$); + } + } + + /** + * Creates a new int$ RecordBuilder. + * + * @return A new int$ RecordBuilder + */ + public static Builder newBuilder() { + return new Builder(); + } + + /** + * Creates a new int$ RecordBuilder by copying an existing Builder. + * + * @param other The existing builder to copy. + * @return A new int$ RecordBuilder + */ + public static Builder newBuilder(Builder other) { + if (other == null) { + return new Builder(); + } else { + return new Builder(other); + } + } + + /** + * Creates a new int$ RecordBuilder by copying an existing int$ instance. + * + * @param other The existing instance to copy. + * @return A new int$ RecordBuilder + */ + public static Builder newBuilder(int$ other) { + if (other == null) { + return new Builder(); + } else { + return new Builder(other); + } + } + + /** + * RecordBuilder for int$ instances. + */ + @AvroGenerated + public static class Builder extends SpecificRecordBuilderBase + implements org.apache.avro.data.RecordBuilder { + + /** Creates a new Builder */ + private Builder() { + super(SCHEMA$, MODEL$); + } + + /** + * Creates a Builder by copying an existing Builder. + * + * @param other The existing Builder to copy. + */ + private Builder(Builder other) { + super(other); + } + + /** + * Creates a Builder by copying an existing int$ instance + * + * @param other The existing instance to copy. + */ + private Builder(int$ other) { + super(SCHEMA$, MODEL$); + } + + @Override + @SuppressWarnings("unchecked") + public int$ build() { + try { + int$ record = new int$(); + return record; + } catch (org.apache.avro.AvroMissingFieldException e) { + throw e; + } catch (Exception e) { + throw new org.apache.avro.AvroRuntimeException(e); + } + } + } + + @SuppressWarnings("unchecked") + private static final org.apache.avro.io.DatumWriter WRITER$ = (org.apache.avro.io.DatumWriter) MODEL$ + .createDatumWriter(SCHEMA$); + + @Override + public void writeExternal(java.io.ObjectOutput out) throws java.io.IOException { + WRITER$.write(this, SpecificData.getEncoder(out)); + } + + @SuppressWarnings("unchecked") + private static final org.apache.avro.io.DatumReader READER$ = (org.apache.avro.io.DatumReader) MODEL$ + .createDatumReader(SCHEMA$); + + @Override + public void readExternal(java.io.ObjectInput in) throws java.io.IOException { + READER$.read(this, SpecificData.getDecoder(in)); + } + + @Override + protected boolean hasCustomCoders() { + return true; + } + + @Override + public void customEncode(org.apache.avro.io.Encoder out) throws java.io.IOException { + } + + @Override + public void customDecode(org.apache.avro.io.ResolvingDecoder in) throws java.io.IOException { + org.apache.avro.Schema.Field[] fieldOrder = in.readFieldOrderIfDiff(); + if (fieldOrder == null) { + } else { + for (int i = 0; i < 0; i++) { + switch (fieldOrder[i].pos()) { + default: + throw new java.io.IOException("Corrupt ResolvingDecoder."); + } + } + } + } +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/util/TestCaseFinder.java b/lang/java/avro/src/test/java/org/apache/avro/util/TestCaseFinder.java index 3c8ef0ce7d7..5cc695766d7 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/util/TestCaseFinder.java +++ b/lang/java/avro/src/test/java/org/apache/avro/util/TestCaseFinder.java @@ -17,35 +17,24 @@ */ package org.apache.avro.util; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.io.BufferedReader; import java.io.StringReader; import java.util.ArrayList; import java.util.Arrays; import java.util.List; +import org.junit.jupiter.api.Nested; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.MethodSource; -import org.junit.Test; -import org.junit.experimental.runners.Enclosed; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; -import org.junit.runners.Parameterized.Parameters; - -@RunWith(Enclosed.class) public class TestCaseFinder { - @RunWith(Parameterized.class) + @Nested public static class SimpleCases { - String input, label; - List expectedOutput; - - public SimpleCases(String input, String label, Object[][] ex) { - this.input = input; - this.label = label; - this.expectedOutput = Arrays.asList(ex); - } - @Parameters public static List cases() { List result = new ArrayList<>(); result.add(new Object[] { "", "foo", new Object[][] {} }); @@ -70,37 +59,49 @@ public static List cases() { return result; } - @Test - public void testOutput() throws Exception { + @ParameterizedTest + @MethodSource("cases") + void output(String input, String label, Object[][] ex) throws Exception { List result = new ArrayList<>(); CaseFinder.find(mk(input), label, result); - assertTrue(pr(result), eq(result, expectedOutput)); + List expectedOutput = Arrays.asList(ex); + assertTrue(eq(result, expectedOutput), pr(result)); } } - public static class NonParameterized { - @Test(expected = java.lang.IllegalArgumentException.class) - public void testBadDocLabel1() throws Exception { - List result = new ArrayList<>(); - CaseFinder.find(mk("< { + List result = new ArrayList<>(); + CaseFinder.find(mk("< result = new ArrayList<>(); - CaseFinder.find(mk("< { + List result = new ArrayList<>(); + CaseFinder.find(mk("< result = new ArrayList<>(); - CaseFinder.find(mk("< { + List result = new ArrayList<>(); + CaseFinder.find(mk("< result = new ArrayList<>(); - CaseFinder.find(mk("< { + List result = new ArrayList<>(); + CaseFinder.find(mk("< writer = new DataFileWriter(genericData.createDatumWriter(schema))) { + writer.create(schema, file); + for (Object datum : new RandomData(genericData, schema, this.count, seed)) { + writer.append(datum); + } + } + } + + private void checkRead(GenericData genericData, Schema schema) throws IOException { + // noinspection unchecked + try (DataFileReader reader = new DataFileReader(file, genericData.createDatumReader(schema))) { + for (Object expected : new RandomData(genericData, schema, this.count, seed)) { + assertEquals(expected, reader.next()); + } + } + } + + /* + * Test classes: they implement the same schema, but one is a SpecificRecord and + * the other uses a reflected schema. + */ + + public static final String TEST_SCHEMA_JSON = "{\"type\":\"record\",\"name\":\"Record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":{\"type\":\"string\",\"avro.java.string\":\"String\"}}]}"; + + public static final Schema TEST_SCHEMA = new Schema.Parser().parse(TEST_SCHEMA_JSON); + + public static class SpecificTestRecord extends SpecificRecordBase { + public static final Schema SCHEMA$ = new Schema.Parser().parse(TEST_SCHEMA_JSON.replace("\"name\":\"Record\"", + "\"name\":\"" + SpecificTestRecord.class.getCanonicalName() + "\"")); + private int x; + private String y; + + @Override + public Schema getSchema() { + return SCHEMA$; + } + + @Override + public void put(int i, Object v) { + switch (i) { + case 0: + x = (Integer) v; + break; + case 1: + y = (String) v; + break; + default: + throw new RuntimeException(); + } + } + + @Override + public Object get(int i) { + switch (i) { + case 0: + return x; + case 1: + return y; + } + throw new RuntimeException(); + } + } + + public static class ReflectTestRecord { + private int x; + private String y; + + public int getX() { + return x; + } + + public void setX(int x) { + this.x = x; + } + + public String getY() { + return y; + } + + public void setY(String y) { + this.y = y; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + ReflectTestRecord that = (ReflectTestRecord) o; + return x == that.x && Objects.equals(y, that.y); + } + + @Override + public int hashCode() { + return Objects.hash(x, y); + } + + @Override + public String toString() { + return String.format("{\"x\": %d, \"y\": \"%s\"}", x, y); + } + } +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/util/TestSchemaResolver.java b/lang/java/avro/src/test/java/org/apache/avro/util/TestSchemaResolver.java new file mode 100644 index 00000000000..1a340b7fa5d --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/util/TestSchemaResolver.java @@ -0,0 +1,82 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Copyright 2017 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro.util; + +import org.apache.avro.Schema; +import org.apache.avro.SchemaBuilder; +import org.junit.Test; + +import java.io.IOException; + +public class TestSchemaResolver { + + @Test + public void testResolving() throws IOException { + // Path testIdl = Paths.get(".", "src", "test", "idl", + // "cycle.avdl").toAbsolutePath(); + // IdlReader parser = new IdlReader(); + // IdlFile idlFile = parser.parse(testIdl); + // Protocol protocol = idlFile.getProtocol(); + // System.out.println(protocol); + // Assert.assertEquals(5, protocol.getTypes().size()); + } + + @Test(expected = IllegalArgumentException.class) + public void testIsUnresolvedSchemaError1() { + // No "org.apache.avro.idl.unresolved.name" property + Schema s = SchemaBuilder.record("R").fields().endRecord(); + SchemaResolver.getUnresolvedSchemaName(s); + } + + @Test(expected = IllegalArgumentException.class) + public void testIsUnresolvedSchemaError2() { + // No "UnresolvedSchema" property + Schema s = SchemaBuilder.record("R").prop("org.apache.avro.idl.unresolved.name", "x").fields().endRecord(); + SchemaResolver.getUnresolvedSchemaName(s); + } + + @Test(expected = IllegalArgumentException.class) + public void testIsUnresolvedSchemaError3() { + // Namespace not "org.apache.avro.compiler". + Schema s = SchemaBuilder.record("UnresolvedSchema").prop("org.apache.avro.idl.unresolved.name", "x").fields() + .endRecord(); + SchemaResolver.getUnresolvedSchemaName(s); + } + + @Test(expected = IllegalArgumentException.class) + public void testGetUnresolvedSchemaNameError() { + Schema s = SchemaBuilder.fixed("a").size(10); + SchemaResolver.getUnresolvedSchemaName(s); + } +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/util/TestSchemas.java b/lang/java/avro/src/test/java/org/apache/avro/util/TestSchemas.java new file mode 100644 index 00000000000..6609819299c --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/util/TestSchemas.java @@ -0,0 +1,213 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Copyright 2017 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro.util; + +import org.apache.avro.Schema; +import org.junit.Assert; +import org.junit.Test; + +public class TestSchemas { + + private static class TestVisitor implements SchemaVisitor { + StringBuilder sb = new StringBuilder(); + + @Override + public SchemaVisitorAction visitTerminal(Schema terminal) { + sb.append(terminal); + return SchemaVisitorAction.CONTINUE; + } + + @Override + public SchemaVisitorAction visitNonTerminal(Schema nonTerminal) { + String n = nonTerminal.getName(); + sb.append(n).append('.'); + if (n.startsWith("t")) { + return SchemaVisitorAction.TERMINATE; + } else if (n.startsWith("ss")) { + return SchemaVisitorAction.SKIP_SIBLINGS; + } else if (n.startsWith("st")) { + return SchemaVisitorAction.SKIP_SUBTREE; + } else { + return SchemaVisitorAction.CONTINUE; + } + } + + @Override + public SchemaVisitorAction afterVisitNonTerminal(Schema nonTerminal) { + sb.append("!"); + String n = nonTerminal.getName(); + if (n.startsWith("ct")) { + return SchemaVisitorAction.TERMINATE; + } else if (n.startsWith("css")) { + return SchemaVisitorAction.SKIP_SIBLINGS; + } else if (n.startsWith("cst")) { + return SchemaVisitorAction.SKIP_SUBTREE; + } else { + return SchemaVisitorAction.CONTINUE; + } + } + + @Override + public String get() { + return sb.toString(); + } + } + + @Test + public void testVisit1() { + String s1 = "{\"type\": \"record\", \"name\": \"t1\", \"fields\": [" + "{\"name\": \"f1\", \"type\": \"int\"}" + + "]}"; + Assert.assertEquals("t1.", Schemas.visit(new Schema.Parser().parse(s1), new TestVisitor())); + } + + @Test + public void testVisit2() { + String s2 = "{\"type\": \"record\", \"name\": \"c1\", \"fields\": [" + "{\"name\": \"f1\", \"type\": \"int\"}" + + "]}"; + Assert.assertEquals("c1.\"int\"!", Schemas.visit(new Schema.Parser().parse(s2), new TestVisitor())); + + } + + @Test + public void testVisit3() { + String s3 = "{\"type\": \"record\", \"name\": \"ss1\", \"fields\": [" + "{\"name\": \"f1\", \"type\": \"int\"}" + + "]}"; + Assert.assertEquals("ss1.", Schemas.visit(new Schema.Parser().parse(s3), new TestVisitor())); + + } + + @Test + public void testVisit4() { + String s4 = "{\"type\": \"record\", \"name\": \"st1\", \"fields\": [" + "{\"name\": \"f1\", \"type\": \"int\"}" + + "]}"; + Assert.assertEquals("st1.!", Schemas.visit(new Schema.Parser().parse(s4), new TestVisitor())); + + } + + @Test + public void testVisit5() { + String s5 = "{\"type\": \"record\", \"name\": \"c1\", \"fields\": [" + + "{\"name\": \"f1\", \"type\": {\"type\": \"record\", \"name\": \"c2\", \"fields\": " + + "[{\"name\": \"f11\", \"type\": \"int\"}]}}," + "{\"name\": \"f2\", \"type\": \"long\"}" + "]}"; + Assert.assertEquals("c1.c2.\"int\"!\"long\"!", Schemas.visit(new Schema.Parser().parse(s5), new TestVisitor())); + + } + + @Test + public void testVisit6() { + String s6 = "{\"type\": \"record\", \"name\": \"c1\", \"fields\": [" + + "{\"name\": \"f1\", \"type\": {\"type\": \"record\", \"name\": \"ss2\", \"fields\": " + + "[{\"name\": \"f11\", \"type\": \"int\"}]}}," + "{\"name\": \"f2\", \"type\": \"long\"}" + "]}"; + Assert.assertEquals("c1.ss2.!", Schemas.visit(new Schema.Parser().parse(s6), new TestVisitor())); + + } + + @Test + public void testVisit7() { + String s7 = "{\"type\": \"record\", \"name\": \"c1\", \"fields\": [" + + "{\"name\": \"f1\", \"type\": {\"type\": \"record\", \"name\": \"css2\", \"fields\": " + + "[{\"name\": \"f11\", \"type\": \"int\"}]}}," + "{\"name\": \"f2\", \"type\": \"long\"}" + "]}"; + Assert.assertEquals("c1.css2.\"int\"!!", Schemas.visit(new Schema.Parser().parse(s7), new TestVisitor())); + } + + @Test(expected = UnsupportedOperationException.class) + public void testVisit8() { + String s8 = "{\"type\": \"record\", \"name\": \"c1\", \"fields\": [" + + "{\"name\": \"f1\", \"type\": {\"type\": \"record\", \"name\": \"cst2\", \"fields\": " + + "[{\"name\": \"f11\", \"type\": \"int\"}]}}," + "{\"name\": \"f2\", \"type\": \"int\"}" + "]}"; + Schemas.visit(new Schema.Parser().parse(s8), new TestVisitor()); + } + + @Test + public void testVisit9() { + String s9 = "{\"type\": \"record\", \"name\": \"c1\", \"fields\": [" + + "{\"name\": \"f1\", \"type\": {\"type\": \"record\", \"name\": \"ct2\", \"fields\": " + + "[{\"name\": \"f11\", \"type\": \"int\"}]}}," + "{\"name\": \"f2\", \"type\": \"long\"}" + "]}"; + Assert.assertEquals("c1.ct2.\"int\"!", Schemas.visit(new Schema.Parser().parse(s9), new TestVisitor())); + } + + @Test(expected = UnsupportedOperationException.class) + public void testVisit10() { + String s10 = "{\"type\": \"record\", \"name\": \"c1\", \"fields\": [" + + "{\"name\": \"f1\", \"type\": {\"type\": \"record\", \"name\": \"ct2\", \"fields\": " + + "[{\"name\": \"f11\", \"type\": \"int\"}]}}," + "{\"name\": \"f2\", \"type\": \"int\"}" + "]}"; + Schemas.visit(new Schema.Parser().parse(s10), new TestVisitor() { + @Override + public SchemaVisitorAction visitTerminal(Schema terminal) { + return SchemaVisitorAction.SKIP_SUBTREE; + } + }); + } + + @Test + public void testVisit11() { + String s11 = "{\"type\": \"record\", \"name\": \"c1\", \"fields\": [" + + "{\"name\": \"f1\", \"type\": {\"type\": \"record\", \"name\": \"c2\", \"fields\": " + + "[{\"name\": \"f11\", \"type\": \"int\"},{\"name\": \"f12\", \"type\": \"double\"}" + "]}}," + + "{\"name\": \"f2\", \"type\": \"long\"}" + "]}"; + Assert.assertEquals("c1.c2.\"int\".!\"long\".!", Schemas.visit(new Schema.Parser().parse(s11), new TestVisitor() { + @Override + public SchemaVisitorAction visitTerminal(Schema terminal) { + sb.append(terminal).append('.'); + return SchemaVisitorAction.SKIP_SIBLINGS; + } + })); + } + + @Test + public void testVisit12() { + String s12 = "{\"type\": \"record\", \"name\": \"c1\", \"fields\": [" + + "{\"name\": \"f1\", \"type\": {\"type\": \"record\", \"name\": \"ct2\", \"fields\": " + + "[{\"name\": \"f11\", \"type\": \"int\"}]}}," + "{\"name\": \"f2\", \"type\": \"long\"}" + "]}"; + Assert.assertEquals("c1.ct2.\"int\".", Schemas.visit(new Schema.Parser().parse(s12), new TestVisitor() { + @Override + public SchemaVisitorAction visitTerminal(Schema terminal) { + sb.append(terminal).append('.'); + return SchemaVisitorAction.TERMINATE; + } + })); + } + + @Test + public void testVisit13() { + String s12 = "{\"type\": \"int\"}"; + Assert.assertEquals("\"int\".", Schemas.visit(new Schema.Parser().parse(s12), new TestVisitor() { + @Override + public SchemaVisitorAction visitTerminal(Schema terminal) { + sb.append(terminal).append('.'); + return SchemaVisitorAction.SKIP_SIBLINGS; + } + })); + } +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/util/TestUtf8.java b/lang/java/avro/src/test/java/org/apache/avro/util/TestUtf8.java index 918465a725c..e0977ff9f96 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/util/TestUtf8.java +++ b/lang/java/avro/src/test/java/org/apache/avro/util/TestUtf8.java @@ -19,9 +19,7 @@ import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.is; -import static org.junit.Assert.assertNotEquals; -import static org.junit.Assert.assertSame; -import static org.junit.Assert.assertEquals; +import static org.junit.jupiter.api.Assertions.*; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; @@ -30,11 +28,13 @@ import java.io.ObjectOutputStream; import java.nio.charset.StandardCharsets; -import org.junit.Test; +import org.apache.avro.SystemLimitException; +import org.apache.avro.TestSystemLimitException; +import org.junit.jupiter.api.Test; public class TestUtf8 { @Test - public void testByteConstructor() throws Exception { + void byteConstructor() throws Exception { byte[] bs = "Foo".getBytes(StandardCharsets.UTF_8); Utf8 u = new Utf8(bs); assertEquals(bs.length, u.getByteLength()); @@ -44,7 +44,7 @@ public void testByteConstructor() throws Exception { } @Test - public void testArrayReusedWhenLargerThanRequestedSize() { + void arrayReusedWhenLargerThanRequestedSize() { byte[] bs = "55555".getBytes(StandardCharsets.UTF_8); Utf8 u = new Utf8(bs); assertEquals(5, u.getByteLength()); @@ -58,7 +58,7 @@ public void testArrayReusedWhenLargerThanRequestedSize() { } @Test - public void testHashCodeReused() { + void hashCodeReused() { assertEquals(97, new Utf8("a").hashCode()); assertEquals(3904, new Utf8("zz").hashCode()); assertEquals(122, new Utf8("z").hashCode()); @@ -99,7 +99,27 @@ public void testHashCodeReused() { } @Test - public void testSerialization() throws IOException, ClassNotFoundException { + void oversizeUtf8() { + Utf8 u = new Utf8(); + u.setByteLength(1024); + assertEquals(1024, u.getByteLength()); + assertThrows(UnsupportedOperationException.class, + () -> u.setByteLength(TestSystemLimitException.MAX_ARRAY_VM_LIMIT + 1)); + + try { + System.setProperty(SystemLimitException.MAX_STRING_LENGTH_PROPERTY, Long.toString(1000L)); + TestSystemLimitException.resetLimits(); + + Exception ex = assertThrows(SystemLimitException.class, () -> u.setByteLength(1024)); + assertEquals("String length 1024 exceeds maximum allowed", ex.getMessage()); + } finally { + System.clearProperty(SystemLimitException.MAX_STRING_LENGTH_PROPERTY); + TestSystemLimitException.resetLimits(); + } + } + + @Test + void serialization() throws IOException, ClassNotFoundException { try (ByteArrayOutputStream bos = new ByteArrayOutputStream(); ObjectOutputStream oos = new ObjectOutputStream(bos)) { diff --git a/lang/java/avro/src/test/java/org/apache/avro/util/TimePeriodTest.java b/lang/java/avro/src/test/java/org/apache/avro/util/TimePeriodTest.java new file mode 100644 index 00000000000..cd9809be414 --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/util/TimePeriodTest.java @@ -0,0 +1,306 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro.util; + +import org.junit.jupiter.api.Test; + +import java.time.DateTimeException; +import java.time.Duration; +import java.time.LocalDate; +import java.time.LocalDateTime; +import java.time.LocalTime; +import java.time.Period; +import java.time.chrono.IsoChronology; +import java.time.chrono.JapaneseChronology; +import java.time.temporal.ChronoUnit; +import java.time.temporal.Temporal; +import java.time.temporal.TemporalAmount; +import java.time.temporal.TemporalUnit; +import java.time.temporal.UnsupportedTemporalTypeException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; + +import static java.time.temporal.ChronoUnit.DAYS; +import static java.time.temporal.ChronoUnit.ERAS; +import static java.time.temporal.ChronoUnit.MICROS; +import static java.time.temporal.ChronoUnit.MILLIS; +import static java.time.temporal.ChronoUnit.MONTHS; +import static java.time.temporal.ChronoUnit.NANOS; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotEquals; +import static org.junit.jupiter.api.Assertions.assertSame; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +class TimePeriodTest { + // This Long is too large to fit into an unsigned int. + private static final long TOO_LARGE = Integer.MAX_VALUE * 3L; + + @Test + void validateConstruction() { + TimePeriod timePeriod = TimePeriod.of(12, 34, 56); + assertSame(timePeriod, TimePeriod.from(timePeriod)); + assertComponents(12, 34, 56, timePeriod); + + assertComponents(14, 3, 0, TimePeriod.from(IsoChronology.INSTANCE.period(1, 2, 3))); + + assertComponents(36_000, 0, 0, TimePeriod.from(TimeAmount.of(ChronoUnit.MILLENNIA, 3))); + assertComponents(3_600, 0, 0, TimePeriod.from(TimeAmount.of(ChronoUnit.CENTURIES, 3))); + assertComponents(360, 0, 0, TimePeriod.from(TimeAmount.of(ChronoUnit.DECADES, 3))); + assertComponents(36, 0, 0, TimePeriod.from(TimeAmount.of(ChronoUnit.YEARS, 3))); + assertComponents(3, 0, 0, TimePeriod.from(TimeAmount.of(MONTHS, 3))); + + assertComponents(0, 21, 0, TimePeriod.from(TimeAmount.of(ChronoUnit.WEEKS, 3))); + assertComponents(0, 3, 0, TimePeriod.from(TimeAmount.of(DAYS, 3))); + assertComponents(0, 2, 0, TimePeriod.from(TimeAmount.of(ChronoUnit.HALF_DAYS, 4))); + assertComponents(0, 2, 43_200_000, TimePeriod.from(TimeAmount.of(ChronoUnit.HALF_DAYS, 5))); + + assertComponents(0, 0, 10_800_000, TimePeriod.from(TimeAmount.of(ChronoUnit.HOURS, 3))); + assertComponents(0, 0, 180_000, TimePeriod.from(TimeAmount.of(ChronoUnit.MINUTES, 3))); + assertComponents(0, 0, 3_000, TimePeriod.from(TimeAmount.of(ChronoUnit.SECONDS, 3))); + assertComponents(0, 0, 3, TimePeriod.from(TimeAmount.of(MILLIS, 3))); + assertComponents(0, 0, 3, TimePeriod.from(TimeAmount.of(MICROS, 3_000))); + assertComponents(0, 0, 3, TimePeriod.from(TimeAmount.of(NANOS, 3_000_000))); + + // Micros and nanos must be a multiple of milliseconds + assertThrows(DateTimeException.class, () -> TimePeriod.from(TimeAmount.of(ChronoUnit.MICROS, 3))); + assertThrows(DateTimeException.class, () -> TimePeriod.from(TimeAmount.of(ChronoUnit.NANOS, 3))); + // Unsupported cases (null, non-ISO chronology, unknown temporal unit, + // non-ChronoUnit) + assertThrows(NullPointerException.class, () -> TimePeriod.from(null)); + assertThrows(DateTimeException.class, () -> TimePeriod.from(JapaneseChronology.INSTANCE.period(1, 2, 3))); + assertThrows(UnsupportedTemporalTypeException.class, () -> TimePeriod.from(TimeAmount.of(ChronoUnit.ERAS, 1))); + assertThrows(UnsupportedTemporalTypeException.class, () -> TimePeriod.from(TimeAmount.of(DummyUnit.INSTANCE, 3))); + // Arguments are long, but must fit an unsigned long + assertThrows(ArithmeticException.class, () -> TimePeriod.of(TOO_LARGE, 0, 0)); + assertThrows(ArithmeticException.class, () -> TimePeriod.of(0, TOO_LARGE, 0)); + assertThrows(ArithmeticException.class, () -> TimePeriod.of(0, 0, TOO_LARGE)); + + // Odd one out: querying an unsupported temporal unit + // (assertComponents handles all valid cases) + assertThrows(UnsupportedTemporalTypeException.class, () -> TimePeriod.of(1, 1, 1).get(ERAS)); + } + + @Test + void checkConversionsFromJavaTime() { + assertEquals(TimePeriod.of(12, 0, 0), TimePeriod.from(Period.ofYears(1))); + assertEquals(TimePeriod.of(2, 0, 0), TimePeriod.from(Period.ofMonths(2))); + assertEquals(TimePeriod.of(0, 21, 0), TimePeriod.from(Period.ofWeeks(3))); + assertEquals(TimePeriod.of(0, 4, 0), TimePeriod.from(Period.ofDays(4))); + + assertEquals(TimePeriod.of(0, 0, 1), TimePeriod.from(Duration.ofNanos(1_000_000))); + assertEquals(TimePeriod.of(0, 0, 2), TimePeriod.from(Duration.ofMillis(2))); + assertEquals(TimePeriod.of(0, 0, 3_000), TimePeriod.from(Duration.ofSeconds(3))); + assertEquals(TimePeriod.of(0, 0, 240000), TimePeriod.from(Duration.ofMinutes(4))); + assertEquals(TimePeriod.of(0, 0, 18000000), TimePeriod.from(Duration.ofHours(5))); + // Duration never takes into account things like daylight saving + assertEquals(TimePeriod.of(0, 0, 518400000), TimePeriod.from(Duration.ofDays(6))); + } + + @Test + void checkConversionsToJavaTime() { + TimePeriod months = TimePeriod.of(1, 0, 0); + TimePeriod days = TimePeriod.of(0, 2, 0); + TimePeriod time = TimePeriod.of(0, 0, 3); + TimePeriod all = TimePeriod.of(1, 2, 3); + + assertTrue(months.isDateBased()); + assertTrue(days.isDateBased()); + assertFalse(all.isDateBased()); + assertFalse(time.isDateBased()); + + assertEquals(Period.of(0, 1, 0), months.toPeriod()); + assertEquals(Period.of(0, 0, 2), days.toPeriod()); + assertThrows(DateTimeException.class, all::toPeriod); + assertThrows(DateTimeException.class, time::toPeriod); + + assertThrows(DateTimeException.class, () -> TimePeriod.of(0, Integer.MAX_VALUE * 2L, 0).toPeriod()); + + assertFalse(months.isTimeBased()); + assertFalse(days.isTimeBased()); + assertFalse(all.isTimeBased()); + assertTrue(time.isTimeBased()); + + assertThrows(DateTimeException.class, months::toDuration); + // Note: though Duration supports this, it uses a fixed 86400 seconds + assertEquals(Duration.ofSeconds(172800), days.toDuration()); + assertThrows(DateTimeException.class, all::toDuration); + assertEquals(Duration.ofMillis(3), time.toDuration()); + } + + @Test + void checkAddingToTemporalItems() { + TimePeriod monthAndTwoDays = TimePeriod.of(1, 2, 0); + TimePeriod threeMillis = TimePeriod.of(0, 0, 3); + TimePeriod complexTimePeriod = TimePeriod.of(1, 2, 3); + + LocalDateTime localDateTime = LocalDateTime.of(2001, 2, 3, 4, 5, 6, 7_000_000); + LocalDate localDate = LocalDate.of(2001, 2, 3); + LocalTime localTime = LocalTime.of(4, 5, 6, 7_000_000); + + assertEquals(localDateTime.plusMonths(1).plusDays(2), localDateTime.plus(monthAndTwoDays)); + assertEquals(localDateTime.plus(3, MILLIS), localDateTime.plus(threeMillis)); + assertEquals(localDateTime.plusMonths(1).plusDays(2).plus(3, MILLIS), localDateTime.plus(complexTimePeriod)); + + assertEquals(localDate.plusMonths(1).plusDays(2), localDate.plus(monthAndTwoDays)); + + assertEquals(localTime.plus(3, MILLIS), localTime.plus(threeMillis)); + + assertEquals(localDateTime.minusMonths(1).minusDays(2), localDateTime.minus(monthAndTwoDays)); + assertEquals(localDateTime.minus(3, MILLIS), localDateTime.minus(threeMillis)); + assertEquals(localDateTime.minusMonths(1).minusDays(2).minus(3, MILLIS), localDateTime.minus(complexTimePeriod)); + + assertEquals(localDate.minusMonths(1).minusDays(2), localDate.minus(monthAndTwoDays)); + + assertEquals(localTime.minus(3, MILLIS), localTime.minus(threeMillis)); + } + + @Test + void checkEqualityTests() { + TimePeriod timePeriod1a = TimePeriod.of(1, 2, 3); + TimePeriod timePeriod1b = TimePeriod.of(1, 2, 3); + TimePeriod timePeriod2 = TimePeriod.of(9, 9, 9); + TimePeriod timePeriod3 = TimePeriod.of(1, 9, 9); + TimePeriod timePeriod4 = TimePeriod.of(1, 2, 9); + + // noinspection EqualsWithItself + assertEquals(timePeriod1a, timePeriod1a); + assertEquals(timePeriod1a, timePeriod1b); + assertEquals(timePeriod1a.hashCode(), timePeriod1b.hashCode()); + + assertNotEquals(timePeriod1a, null); + // noinspection AssertBetweenInconvertibleTypes + assertNotEquals(timePeriod1a, "not equal"); + assertNotEquals(timePeriod1a, timePeriod2); + assertNotEquals(timePeriod1a.hashCode(), timePeriod2.hashCode()); + assertNotEquals(timePeriod1a, timePeriod3); + assertNotEquals(timePeriod1a.hashCode(), timePeriod3.hashCode()); + assertNotEquals(timePeriod1a, timePeriod4); + assertNotEquals(timePeriod1a.hashCode(), timePeriod4.hashCode()); + } + + @Test + void checkStringRepresentation() { + assertEquals("P0", TimePeriod.of(0, 0, 0).toString()); + assertEquals("P1Y", TimePeriod.of(12, 0, 0).toString()); + assertEquals("P2M", TimePeriod.of(2, 0, 0).toString()); + assertEquals("P3", TimePeriod.of(0, 3, 0).toString()); + assertEquals("P1Y2M3", TimePeriod.of(14, 3, 0).toString()); + assertEquals("PT04", TimePeriod.of(0, 0, 14400000).toString()); + assertEquals("PT00:05", TimePeriod.of(0, 0, 300000).toString()); + assertEquals("PT00:00:06", TimePeriod.of(0, 0, 6000).toString()); + assertEquals("PT00:00:00.007", TimePeriod.of(0, 0, 7).toString()); + assertEquals("P1Y2M3T04:05:06.007", TimePeriod.of(14, 3, 14706007).toString()); + + // Days and millis will never overflow to months/days, to respect differences + // in months and days (daylight saving). + assertEquals("P123T1193:02:47.295", TimePeriod.of(0, 123, 4294967295L).toString()); + } + + private void assertComponents(long months, long days, long millis, TimePeriod timePeriod) { + List expectedUnits = new ArrayList<>(Arrays.asList(MONTHS, DAYS, MILLIS)); + if (months == 0) { + expectedUnits.remove(MONTHS); + } + if (days == 0) { + expectedUnits.remove(DAYS); + } + if (millis == 0) { + expectedUnits.remove(MILLIS); + } + assertEquals(expectedUnits, timePeriod.getUnits()); + + assertEquals(months, timePeriod.getMonths()); + assertEquals(months, timePeriod.get(MONTHS)); + assertEquals(days, timePeriod.getDays()); + assertEquals(days, timePeriod.get(DAYS)); + assertEquals(millis, timePeriod.getMillis()); + assertEquals(millis, timePeriod.get(MILLIS)); + } + + private static class TimeAmount implements TemporalAmount { + private final Map amountsPerUnit = new LinkedHashMap<>(); + + static TimeAmount of(TemporalUnit unit, long amount) { + return new TimeAmount().with(unit, amount); + } + + TimeAmount with(TemporalUnit unit, long amount) { + amountsPerUnit.put(unit, amount); + return this; + } + + @Override + public long get(TemporalUnit unit) { + return amountsPerUnit.get(unit); + } + + @Override + public List getUnits() { + return new ArrayList<>(amountsPerUnit.keySet()); + } + + @Override + public Temporal addTo(Temporal temporal) { + throw new UnsupportedOperationException(); + } + + @Override + public Temporal subtractFrom(Temporal temporal) { + throw new UnsupportedOperationException(); + } + } + + private static class DummyUnit implements TemporalUnit { + private static final DummyUnit INSTANCE = new DummyUnit(); + + @Override + public Duration getDuration() { + return null; + } + + @Override + public boolean isDurationEstimated() { + return false; + } + + @Override + public boolean isDateBased() { + return false; + } + + @Override + public boolean isTimeBased() { + return false; + } + + @Override + public R addTo(R temporal, long amount) { + return null; + } + + @Override + public long between(Temporal temporal1Inclusive, Temporal temporal2Exclusive) { + return 0; + } + } +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/util/UtfTextUtilsTest.java b/lang/java/avro/src/test/java/org/apache/avro/util/UtfTextUtilsTest.java new file mode 100644 index 00000000000..6c525e6d39a --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/util/UtfTextUtilsTest.java @@ -0,0 +1,132 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro.util; + +import org.junit.jupiter.api.Test; + +import java.io.ByteArrayInputStream; +import java.io.FilterInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; + +@SuppressWarnings("SpellCheckingInspection") +class UtfTextUtilsTest { + @Test + void validateCharsetDetectionWithBOM() { + assertEquals("UTF-32", testDetection("0000FEFF").name()); + assertEquals("UTF-32", testDetection("FFFE0000").name()); + assertEquals("UTF-16", testDetection("FEFF0041").name()); + assertEquals("UTF-16", testDetection("FFFE4100").name()); + assertEquals("UTF-8", testDetection("EFBBBF41").name()); + + // Invalid UCS-4 encodings: these we're certain we cannot handle. + assertThrows(IllegalArgumentException.class, () -> testDetection("0000FFFE")); + assertThrows(IllegalArgumentException.class, () -> testDetection("FEFF0000")); + } + + @Test + void validateCharsetDetectionWithoutBOM() { + assertEquals("UTF-32BE", testDetection("00000041").name()); + assertEquals("UTF-32LE", testDetection("41000000").name()); + assertEquals("UTF-16BE", testDetection("00410042").name()); + assertEquals("UTF-16LE", testDetection("41004200").name()); + assertEquals("UTF-8", testDetection("41424344").name()); + + assertEquals("UTF-8", testDetection("414243").name()); + + assertEquals("UTF-16BE", testDetection("0041").name()); + assertEquals("UTF-16LE", testDetection("4100").name()); + assertEquals("UTF-8", testDetection("4142").name()); + + assertEquals("UTF-8", testDetection("41").name()); + + assertEquals("UTF-8", testDetection("").name()); + + // Invalid UCS-4 encodings: these we're fairly certain we cannot handle. + assertThrows(IllegalArgumentException.class, () -> testDetection("00004100")); + assertThrows(IllegalArgumentException.class, () -> testDetection("00410000")); + } + + private Charset testDetection(String hexBytes) { + return UtfTextUtils.detectUtfCharset(hexBytes(hexBytes)); + } + + private static byte[] hexBytes(String hexBytes) { + byte[] bytes = new byte[hexBytes.length() / 2]; + for (int i = 0; i < bytes.length; i++) { + int index = i * 2; + bytes[i] = (byte) Integer.parseUnsignedInt(hexBytes.substring(index, index + 2), 16); + } + return bytes; + } + + @Test + void validateTextConversionFromBytes() { + assertEquals("A", UtfTextUtils.asString(hexBytes("EFBBBF41"), StandardCharsets.UTF_8)); + assertEquals("A", UtfTextUtils.asString(hexBytes("EFBBBF41"), null)); + + assertEquals("A", UtfTextUtils.asString(hexBytes("41"), StandardCharsets.UTF_8)); + assertEquals("A", UtfTextUtils.asString(hexBytes("41"), null)); + } + + @Test + void validateTextConversionFromStreams() throws IOException { + assertEquals("A", + UtfTextUtils.readAllBytes(new ByteArrayInputStream(hexBytes("EFBBBF41")), StandardCharsets.UTF_8)); + assertEquals("A", UtfTextUtils.readAllBytes(new ByteArrayInputStream(hexBytes("EFBBBF41")), null)); + + assertEquals("A", UtfTextUtils.readAllBytes(new ByteArrayInputStream(hexBytes("41")), StandardCharsets.UTF_8)); + assertEquals("A", UtfTextUtils.readAllBytes(new ByteArrayInputStream(hexBytes("41")), null)); + + // Invalid UCS-4 encoding should throw an IOException instead of an + // IllegalArgumentException. + assertThrows(IOException.class, + () -> UtfTextUtils.readAllBytes(new ByteArrayInputStream(hexBytes("0000FFFE")), null)); + } + + @Test + void validateSupportForUnmarkableStreams() throws IOException { + assertEquals("ABCD", + UtfTextUtils.readAllBytes(new UnmarkableInputStream(new ByteArrayInputStream(hexBytes("41424344"))), null)); + } + + private static class UnmarkableInputStream extends FilterInputStream { + public UnmarkableInputStream(InputStream input) { + super(input); + } + + @Override + public synchronized void mark(int ignored) { + } + + @Override + public synchronized void reset() throws IOException { + throw new IOException("mark/reset not supported"); + } + + @Override + public boolean markSupported() { + return false; + } + } +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/util/WeakIdentityHashMapTest.java b/lang/java/avro/src/test/java/org/apache/avro/util/WeakIdentityHashMapTest.java new file mode 100644 index 00000000000..8ecda312032 --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/util/WeakIdentityHashMapTest.java @@ -0,0 +1,89 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.avro.util; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import java.util.ArrayList; +import java.util.List; + +/** + * This test aims to stress WeakIdentityHashMap class in multithread env. + */ +class WeakIdentityHashMapTest { + + private static final int TEST_SIZE = 4001; + + List data = new ArrayList<>(TEST_SIZE); + + final WeakIdentityHashMap map = new WeakIdentityHashMap<>(); + + List exceptions = new ArrayList<>(TEST_SIZE); + + @Test + void stressMap() { + + for (int i = 1; i <= TEST_SIZE; i++) { + data.add("Data_" + i); + } + + List threads = new ArrayList<>(80); + for (int i = 0; i <= 80; i++) { + final int seed = (i + 1) * 100; + Runnable runnable = () -> rundata(seed); + Thread t = new Thread(runnable); + threads.add(t); + } + threads.forEach(Thread::start); + threads.forEach((Thread t) -> { + try { + t.join(); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + }); + Assertions.assertTrue(exceptions.isEmpty()); + } + + void rundata(int seed) { + try { + for (int i = 1; i <= TEST_SIZE; i++) { + String keyValue = data.get((i + seed) % TEST_SIZE); + map.put(keyValue, keyValue); + if (i % 200 == 0) { + sleep(); + } + String keyValueRemove = data.get(((i + seed) * 3) % TEST_SIZE); + map.remove(keyValueRemove); + } + } catch (RuntimeException ex) { + exceptions.add(ex); + } + } + + void sleep() { + try { + Thread.sleep(5); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + } + +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/util/internal/TestClassValueCache.java b/lang/java/avro/src/test/java/org/apache/avro/util/internal/TestClassValueCache.java index d2c92177dd4..c900be9e31b 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/util/internal/TestClassValueCache.java +++ b/lang/java/avro/src/test/java/org/apache/avro/util/internal/TestClassValueCache.java @@ -17,7 +17,7 @@ */ package org.apache.avro.util.internal; -import org.junit.Test; +import org.junit.jupiter.api.Test; import static org.hamcrest.CoreMatchers.*; import static org.hamcrest.MatcherAssert.assertThat; @@ -25,7 +25,7 @@ public class TestClassValueCache { @Test - public void testBasic() { + void basic() { ClassValueCache cache = new ClassValueCache<>(Class::toString); String fromCache = cache.apply(String.class); diff --git a/lang/java/avro/src/test/java/org/apache/avro/util/internal/TestJacksonUtils.java b/lang/java/avro/src/test/java/org/apache/avro/util/internal/TestJacksonUtils.java index 12ac094d99e..4a272ae5b35 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/util/internal/TestJacksonUtils.java +++ b/lang/java/avro/src/test/java/org/apache/avro/util/internal/TestJacksonUtils.java @@ -19,11 +19,12 @@ import static org.apache.avro.util.internal.JacksonUtils.toJsonNode; import static org.apache.avro.util.internal.JacksonUtils.toObject; -import static org.junit.Assert.assertArrayEquals; -import static org.junit.Assert.assertEquals; +import static org.junit.jupiter.api.Assertions.*; +import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.node.ArrayNode; import com.fasterxml.jackson.databind.node.BigIntegerNode; +import com.fasterxml.jackson.databind.node.BinaryNode; import com.fasterxml.jackson.databind.node.BooleanNode; import com.fasterxml.jackson.databind.node.DecimalNode; import com.fasterxml.jackson.databind.node.DoubleNode; @@ -32,15 +33,24 @@ import com.fasterxml.jackson.databind.node.JsonNodeFactory; import com.fasterxml.jackson.databind.node.LongNode; import com.fasterxml.jackson.databind.node.NullNode; +import com.fasterxml.jackson.databind.node.NumericNode; import com.fasterxml.jackson.databind.node.ObjectNode; import com.fasterxml.jackson.databind.node.TextNode; import java.math.BigDecimal; import java.math.BigInteger; +import java.nio.charset.StandardCharsets; import java.util.Collections; +import java.util.stream.Stream; + import org.apache.avro.JsonProperties; import org.apache.avro.Schema; import org.apache.avro.SchemaBuilder; -import org.junit.Test; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; public class TestJacksonUtils { @@ -49,15 +59,16 @@ enum Direction { } @Test - public void testToJsonNode() { - assertEquals(null, toJsonNode(null)); + void testToJsonNode() { + assertNull(toJsonNode(null)); assertEquals(NullNode.getInstance(), toJsonNode(JsonProperties.NULL_VALUE)); assertEquals(BooleanNode.TRUE, toJsonNode(true)); assertEquals(IntNode.valueOf(1), toJsonNode(1)); assertEquals(LongNode.valueOf(2), toJsonNode(2L)); assertEquals(FloatNode.valueOf(1.0f), toJsonNode(1.0f)); - assertEquals(DoubleNode.valueOf(2.0), toJsonNode(2.0)); - assertEquals(TextNode.valueOf("\u0001\u0002"), toJsonNode(new byte[] { 1, 2 })); + assertEquals(FloatNode.valueOf(33.33000183105469f), toJsonNode(33.33000183105469f)); + assertEquals(DoubleNode.valueOf(2.0), toJsonNode(2.0d)); + assertEquals(BinaryNode.valueOf(new byte[] { 1, 2 }), toJsonNode(new byte[] { 1, 2 })); assertEquals(TextNode.valueOf("a"), toJsonNode("a")); assertEquals(TextNode.valueOf("UP"), toJsonNode(Direction.UP)); assertEquals(BigIntegerNode.valueOf(BigInteger.ONE), toJsonNode(BigInteger.ONE)); @@ -73,15 +84,15 @@ public void testToJsonNode() { } @Test - public void testToObject() { - assertEquals(null, toObject(null)); + void testToObject() { + assertNull(toObject(null)); assertEquals(JsonProperties.NULL_VALUE, toObject(NullNode.getInstance())); assertEquals(true, toObject(BooleanNode.TRUE)); assertEquals(1, toObject(IntNode.valueOf(1))); assertEquals(2L, toObject(IntNode.valueOf(2), Schema.create(Schema.Type.LONG))); assertEquals(1.0f, toObject(DoubleNode.valueOf(1.0), Schema.create(Schema.Type.FLOAT))); assertEquals(2.0, toObject(DoubleNode.valueOf(2.0))); - assertEquals(TextNode.valueOf("\u0001\u0002"), toJsonNode(new byte[] { 1, 2 })); + assertEquals(BinaryNode.valueOf(new byte[] { 1, 2 }), toJsonNode(new byte[] { 1, 2 })); assertArrayEquals(new byte[] { 1, 2 }, (byte[]) toObject(TextNode.valueOf("\u0001\u0002"), Schema.create(Schema.Type.BYTES))); assertEquals("a", toObject(TextNode.valueOf("a"))); @@ -103,4 +114,22 @@ public void testToObject() { assertEquals("a", toObject(TextNode.valueOf("a"), SchemaBuilder.unionOf().stringType().and().intType().endUnion())); } + @ParameterizedTest + @MethodSource("nodes") + void cycle(JsonNode input) { + Object object = JacksonUtils.toObject(input); + JsonNode node = JacksonUtils.toJsonNode(object); + Assertions.assertEquals(input, node); + } + + public static Stream nodes() { + ObjectNode o1 = JsonNodeFactory.instance.objectNode(); + o1.put("intField", 123); + o1.put("floatField", 33.33000183105469f); + o1.put("doubleField", 33.33000183105469245d); + return Stream.of(JsonNodeFactory.instance.numberNode(33.33000183105469f), + JsonNodeFactory.instance.binaryNode("Hello".getBytes(StandardCharsets.ISO_8859_1)), + JsonNodeFactory.instance.arrayNode().add(1).add("Hello").add(o1)).map(Arguments::of); + } + } diff --git a/lang/java/avro/src/test/java/org/apache/avro/util/springframework/ComparableComparator.java b/lang/java/avro/src/test/java/org/apache/avro/util/springframework/ComparableComparator.java new file mode 100644 index 00000000000..54c887cc167 --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/util/springframework/ComparableComparator.java @@ -0,0 +1,44 @@ +/* + * Copyright 2002-2018 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.avro.util.springframework; + +import java.util.Comparator; + +/** + * Comparator that adapts Comparables to the Comparator interface. Mainly for + * internal use in other Comparators, when supposed to work on Comparables. + * + * @author Keith Donald + * @since 1.2.2 + * @param the type of comparable objects that may be compared by this + * comparator + * @see Comparable + */ +class ComparableComparator> implements Comparator { + + /** + * A shared instance of this default comparator. see Comparators#comparable() + */ + @SuppressWarnings("rawtypes") + public static final ComparableComparator INSTANCE = new ComparableComparator(); + + @Override + public int compare(T o1, T o2) { + return o1.compareTo(o2); + } + +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/util/springframework/NullSafeComparator.java b/lang/java/avro/src/test/java/org/apache/avro/util/springframework/NullSafeComparator.java new file mode 100644 index 00000000000..f621abfe42e --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/util/springframework/NullSafeComparator.java @@ -0,0 +1,132 @@ +/* + * Copyright 2002-2018 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.avro.util.springframework; + +import org.apache.avro.reflect.Nullable; + +import java.util.Comparator; + +/** + * A Comparator that will safely compare nulls to be lower or higher than other + * objects. Can decorate a given Comparator or work on Comparables. + * + * @author Keith Donald + * @author Juergen Hoeller + * @since 1.2.2 + * @param the type of objects that may be compared by this comparator + * @see Comparable + */ +class NullSafeComparator implements Comparator { + + /** + * A shared default instance of this comparator, treating nulls lower than + * non-null objects. see Comparators#nullsLow() + */ + @SuppressWarnings("rawtypes") + public static final NullSafeComparator NULLS_LOW = new NullSafeComparator<>(true); + + /** + * A shared default instance of this comparator, treating nulls higher than + * non-null objects. see Comparators#nullsHigh() + */ + @SuppressWarnings("rawtypes") + public static final NullSafeComparator NULLS_HIGH = new NullSafeComparator<>(false); + + private final Comparator nonNullComparator; + + private final boolean nullsLow; + + /** + * Create a NullSafeComparator that sorts {@code null} based on the provided + * flag, working on Comparables. + *

    + * When comparing two non-null objects, their Comparable implementation will be + * used: this means that non-null elements (that this Comparator will be applied + * to) need to implement Comparable. + *

    + * As a convenience, you can use the default shared instances: + * {@code NullSafeComparator.NULLS_LOW} and + * {@code NullSafeComparator.NULLS_HIGH}. + * + * @param nullsLow whether to treat nulls lower or higher than non-null objects + * @see Comparable + * @see #NULLS_LOW + * @see #NULLS_HIGH + */ + @SuppressWarnings("unchecked") + private NullSafeComparator(boolean nullsLow) { + this.nonNullComparator = ComparableComparator.INSTANCE; + this.nullsLow = nullsLow; + } + + /** + * Create a NullSafeComparator that sorts {@code null} based on the provided + * flag, decorating the given Comparator. + *

    + * When comparing two non-null objects, the specified Comparator will be used. + * The given underlying Comparator must be able to handle the elements that this + * Comparator will be applied to. + * + * @param comparator the comparator to use when comparing two non-null objects + * @param nullsLow whether to treat nulls lower or higher than non-null + * objects + */ + public NullSafeComparator(Comparator comparator, boolean nullsLow) { + // Assert.notNull(comparator, "Non-null Comparator is required"); + this.nonNullComparator = comparator; + this.nullsLow = nullsLow; + } + + @Override + public int compare(@Nullable T o1, @Nullable T o2) { + if (o1 == o2) { + return 0; + } + if (o1 == null) { + return (this.nullsLow ? -1 : 1); + } + if (o2 == null) { + return (this.nullsLow ? 1 : -1); + } + return this.nonNullComparator.compare(o1, o2); + } + + @Override + @SuppressWarnings("unchecked") + public boolean equals(@Nullable Object other) { + if (this == other) { + return true; + } + if (!(other instanceof NullSafeComparator)) { + return false; + } + NullSafeComparator otherComp = (NullSafeComparator) other; + return (this.nonNullComparator.equals(otherComp.nonNullComparator) && this.nullsLow == otherComp.nullsLow); + } + + @Override + public int hashCode() { + return this.nonNullComparator.hashCode() * (this.nullsLow ? -1 : 1); + } + + @Override + public String toString() { + return "NullSafeComparator: non-null comparator [" + this.nonNullComparator + "]; " + + (this.nullsLow ? "nulls low" : "nulls high"); + } + +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/util/springframework/StopWatch.java b/lang/java/avro/src/test/java/org/apache/avro/util/springframework/StopWatch.java new file mode 100644 index 00000000000..10131fa309a --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/util/springframework/StopWatch.java @@ -0,0 +1,415 @@ +/* + * Copyright 2002-2021 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.avro.util.springframework; + +import org.apache.avro.reflect.Nullable; + +import java.text.NumberFormat; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.TimeUnit; + +/** + * Simple stop watch, allowing for timing of a number of tasks, exposing total + * running time and running time for each named task. + * + *

    + * Conceals use of {@link System#nanoTime()}, improving the readability of + * application code and reducing the likelihood of calculation errors. + * + *

    + * Note that this object is not designed to be thread-safe and does not use + * synchronization. + * + *

    + * This class is normally used to verify performance during proof-of-concept + * work and in development, rather than as part of production applications. + * + *

    + * As of Spring Framework 5.2, running time is tracked and reported in + * nanoseconds. + * + * @author Rod Johnson + * @author Juergen Hoeller + * @author Sam Brannen + * @since May 2, 2001 + */ +class StopWatch { + + /** + * Identifier of this {@code StopWatch}. + *

    + * Handy when we have output from multiple stop watches and need to distinguish + * between them in log or console output. + */ + private final String id; + + private boolean keepTaskList = true; + + private final List taskList = new ArrayList<>(1); + + /** Start time of the current task. */ + private long startTimeNanos; + + /** Name of the current task. */ + @Nullable + private String currentTaskName; + + @Nullable + private TaskInfo lastTaskInfo; + + private int taskCount; + + /** Total running time. */ + private long totalTimeNanos; + + /** + * Construct a new {@code StopWatch}. + *

    + * Does not start any task. + */ + public StopWatch() { + this(""); + } + + /** + * Construct a new {@code StopWatch} with the given ID. + *

    + * The ID is handy when we have output from multiple stop watches and need to + * distinguish between them. + *

    + * Does not start any task. + * + * @param id identifier for this stop watch + */ + public StopWatch(String id) { + this.id = id; + } + + /** + * Get the ID of this {@code StopWatch}, as specified on construction. + * + * @return the ID (empty String by default) + * @since 4.2.2 + * @see #StopWatch(String) + */ + public String getId() { + return this.id; + } + + /** + * Configure whether the {@link TaskInfo} array is built over time. + *

    + * Set this to {@code false} when using a {@code StopWatch} for millions of + * intervals; otherwise, the {@code TaskInfo} structure will consume excessive + * memory. + *

    + * Default is {@code true}. + */ + public void setKeepTaskList(boolean keepTaskList) { + this.keepTaskList = keepTaskList; + } + + /** + * Start an unnamed task. + *

    + * The results are undefined if {@link #stop()} or timing methods are called + * without invoking this method first. + * + * @see #start(String) + * @see #stop() + */ + public void start() throws IllegalStateException { + start(""); + } + + /** + * Start a named task. + *

    + * The results are undefined if {@link #stop()} or timing methods are called + * without invoking this method first. + * + * @param taskName the name of the task to start + * @see #start() + * @see #stop() + */ + public void start(String taskName) throws IllegalStateException { + if (this.currentTaskName != null) { + throw new IllegalStateException("Can't start StopWatch: it's already running"); + } + this.currentTaskName = taskName; + this.startTimeNanos = System.nanoTime(); + } + + /** + * Stop the current task. + *

    + * The results are undefined if timing methods are called without invoking at + * least one pair of {@code start()} / {@code stop()} methods. + * + * @see #start() + * @see #start(String) + */ + public void stop() throws IllegalStateException { + if (this.currentTaskName == null) { + throw new IllegalStateException("Can't stop StopWatch: it's not running"); + } + long lastTime = System.nanoTime() - this.startTimeNanos; + this.totalTimeNanos += lastTime; + this.lastTaskInfo = new TaskInfo(this.currentTaskName, lastTime); + if (this.keepTaskList) { + this.taskList.add(this.lastTaskInfo); + } + ++this.taskCount; + this.currentTaskName = null; + } + + /** + * Determine whether this {@code StopWatch} is currently running. + * + * @see #currentTaskName() + */ + public boolean isRunning() { + return (this.currentTaskName != null); + } + + /** + * Get the name of the currently running task, if any. + * + * @since 4.2.2 + * @see #isRunning() + */ + @Nullable + public String currentTaskName() { + return this.currentTaskName; + } + + /** + * Get the time taken by the last task in nanoseconds. + * + * @since 5.2 + * @see #getLastTaskTimeMillis() + */ + public long getLastTaskTimeNanos() throws IllegalStateException { + if (this.lastTaskInfo == null) { + throw new IllegalStateException("No tasks run: can't get last task interval"); + } + return this.lastTaskInfo.getTimeNanos(); + } + + /** + * Get the time taken by the last task in milliseconds. + * + * @see #getLastTaskTimeNanos() + */ + public long getLastTaskTimeMillis() throws IllegalStateException { + if (this.lastTaskInfo == null) { + throw new IllegalStateException("No tasks run: can't get last task interval"); + } + return this.lastTaskInfo.getTimeMillis(); + } + + /** + * Get the name of the last task. + */ + public String getLastTaskName() throws IllegalStateException { + if (this.lastTaskInfo == null) { + throw new IllegalStateException("No tasks run: can't get last task name"); + } + return this.lastTaskInfo.getTaskName(); + } + + /** + * Get the last task as a {@link TaskInfo} object. + */ + public TaskInfo getLastTaskInfo() throws IllegalStateException { + if (this.lastTaskInfo == null) { + throw new IllegalStateException("No tasks run: can't get last task info"); + } + return this.lastTaskInfo; + } + + /** + * Get the total time in nanoseconds for all tasks. + * + * @since 5.2 + * @see #getTotalTimeMillis() + * @see #getTotalTimeSeconds() + */ + public long getTotalTimeNanos() { + return this.totalTimeNanos; + } + + /** + * Get the total time in milliseconds for all tasks. + * + * @see #getTotalTimeNanos() + * @see #getTotalTimeSeconds() + */ + public long getTotalTimeMillis() { + return nanosToMillis(this.totalTimeNanos); + } + + /** + * Get the total time in seconds for all tasks. + * + * @see #getTotalTimeNanos() + * @see #getTotalTimeMillis() + */ + public double getTotalTimeSeconds() { + return nanosToSeconds(this.totalTimeNanos); + } + + /** + * Get the number of tasks timed. + */ + public int getTaskCount() { + return this.taskCount; + } + + /** + * Get an array of the data for tasks performed. + */ + public TaskInfo[] getTaskInfo() { + if (!this.keepTaskList) { + throw new UnsupportedOperationException("Task info is not being kept!"); + } + return this.taskList.toArray(new TaskInfo[0]); + } + + /** + * Get a short description of the total running time. + */ + public String shortSummary() { + return "StopWatch '" + getId() + "': running time = " + getTotalTimeNanos() + " ns"; + } + + /** + * Generate a string with a table describing all tasks performed. + *

    + * For custom reporting, call {@link #getTaskInfo()} and use the task info + * directly. + */ + public String prettyPrint() { + StringBuilder sb = new StringBuilder(shortSummary()); + sb.append('\n'); + if (!this.keepTaskList) { + sb.append("No task info kept"); + } else { + sb.append("---------------------------------------------\n"); + sb.append("ns % Task name\n"); + sb.append("---------------------------------------------\n"); + NumberFormat nf = NumberFormat.getNumberInstance(); + nf.setMinimumIntegerDigits(9); + nf.setGroupingUsed(false); + NumberFormat pf = NumberFormat.getPercentInstance(); + pf.setMinimumIntegerDigits(3); + pf.setGroupingUsed(false); + for (TaskInfo task : getTaskInfo()) { + sb.append(nf.format(task.getTimeNanos())).append(" "); + sb.append(pf.format((double) task.getTimeNanos() / getTotalTimeNanos())).append(" "); + sb.append(task.getTaskName()).append('\n'); + } + } + return sb.toString(); + } + + /** + * Generate an informative string describing all tasks performed + *

    + * For custom reporting, call {@link #getTaskInfo()} and use the task info + * directly. + */ + @Override + public String toString() { + StringBuilder sb = new StringBuilder(shortSummary()); + if (this.keepTaskList) { + for (TaskInfo task : getTaskInfo()) { + sb.append("; [").append(task.getTaskName()).append("] took ").append(task.getTimeNanos()).append(" ns"); + long percent = Math.round(100.0 * task.getTimeNanos() / getTotalTimeNanos()); + sb.append(" = ").append(percent).append('%'); + } + } else { + sb.append("; no task info kept"); + } + return sb.toString(); + } + + private static long nanosToMillis(long duration) { + return TimeUnit.NANOSECONDS.toMillis(duration); + } + + private static double nanosToSeconds(long duration) { + return duration / 1_000_000_000.0; + } + + /** + * Nested class to hold data about one task executed within the + * {@code StopWatch}. + */ + public static final class TaskInfo { + + private final String taskName; + + private final long timeNanos; + + TaskInfo(String taskName, long timeNanos) { + this.taskName = taskName; + this.timeNanos = timeNanos; + } + + /** + * Get the name of this task. + */ + public String getTaskName() { + return this.taskName; + } + + /** + * Get the time in nanoseconds this task took. + * + * @since 5.2 + * @see #getTimeMillis() + * @see #getTimeSeconds() + */ + public long getTimeNanos() { + return this.timeNanos; + } + + /** + * Get the time in milliseconds this task took. + * + * @see #getTimeNanos() + * @see #getTimeSeconds() + */ + public long getTimeMillis() { + return nanosToMillis(this.timeNanos); + } + + /** + * Get the time in seconds this task took. + * + * @see #getTimeMillis() + * @see #getTimeNanos() + */ + public double getTimeSeconds() { + return nanosToSeconds(this.timeNanos); + } + + } + +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/util/springframework/TestConcurrentReferenceHashMap.java b/lang/java/avro/src/test/java/org/apache/avro/util/springframework/TestConcurrentReferenceHashMap.java new file mode 100644 index 00000000000..c351768864b --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/util/springframework/TestConcurrentReferenceHashMap.java @@ -0,0 +1,688 @@ +/* + * Copyright 2002-2021 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.avro.util.springframework; + +import org.apache.avro.reflect.Nullable; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; +import org.apache.avro.util.springframework.ConcurrentReferenceHashMap.Entry; +import org.apache.avro.util.springframework.ConcurrentReferenceHashMap.Reference; +import org.apache.avro.util.springframework.ConcurrentReferenceHashMap.Restructure; + +import java.lang.ref.WeakReference; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.WeakHashMap; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.equalTo; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotEquals; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +/** + * Tests for {@link ConcurrentReferenceHashMap}. + * + * @author Phillip Webb + * @author Juergen Hoeller + */ +class TestConcurrentReferenceHashMap { + + private static final Comparator NULL_SAFE_STRING_SORT = new NullSafeComparator<>( + new ComparableComparator(), true); + + private TestWeakConcurrentCache map = new TestWeakConcurrentCache<>(); + + @Test + void shouldCreateWithDefaults() { + ConcurrentReferenceHashMap map = new ConcurrentReferenceHashMap<>(); + assertThat(map.getSegmentsSize(), equalTo(16)); + assertThat(map.getSegment(0).getSize(), equalTo(1)); + assertThat(map.getLoadFactor(), equalTo(0.75f)); + } + + @Test + void shouldCreateWithInitialCapacity() { + ConcurrentReferenceHashMap map = new ConcurrentReferenceHashMap<>(32); + assertThat(map.getSegmentsSize(), equalTo(16)); + assertThat(map.getSegment(0).getSize(), equalTo(2)); + assertThat(map.getLoadFactor(), equalTo(0.75f)); + } + + @Test + void shouldCreateWithInitialCapacityAndLoadFactor() { + ConcurrentReferenceHashMap map = new ConcurrentReferenceHashMap<>(32, 0.5f); + assertThat(map.getSegmentsSize(), equalTo(16)); + assertThat(map.getSegment(0).getSize(), equalTo(2)); + assertThat(map.getLoadFactor(), equalTo(0.5f)); + } + + @Test + void shouldCreateWithInitialCapacityAndConcurrentLevel() { + ConcurrentReferenceHashMap map = new ConcurrentReferenceHashMap<>(16, 2); + assertThat(map.getSegmentsSize(), equalTo(2)); + assertThat(map.getSegment(0).getSize(), equalTo(8)); + assertThat(map.getLoadFactor(), equalTo(0.75f)); + } + + @Test + void shouldCreateFullyCustom() { + ConcurrentReferenceHashMap map = new ConcurrentReferenceHashMap<>(5, 0.5f, 3); + // concurrencyLevel of 3 ends up as 4 (nearest power of 2) + assertThat(map.getSegmentsSize(), equalTo(4)); + // initialCapacity is 5/4 (rounded up, to nearest power of 2) + assertThat(map.getSegment(0).getSize(), equalTo(2)); + assertThat(map.getLoadFactor(), equalTo(0.5f)); + } + + @Test + void shouldNeedNonNegativeInitialCapacity() { + new ConcurrentReferenceHashMap(0, 1); + IllegalArgumentException e = assertThrows(IllegalArgumentException.class, + () -> new TestWeakConcurrentCache(-1, 1)); + assertTrue(e.getMessage().contains("Initial capacity must not be negative")); + } + + @Test + void shouldNeedPositiveLoadFactor() { + new ConcurrentReferenceHashMap(0, 0.1f, 1); + IllegalArgumentException e = assertThrows(IllegalArgumentException.class, + () -> new TestWeakConcurrentCache(0, 0.0f, 1)); + assertTrue(e.getMessage().contains("Load factor must be positive")); + } + + @Test + void shouldNeedPositiveConcurrencyLevel() { + new ConcurrentReferenceHashMap(1, 1); + IllegalArgumentException e = assertThrows(IllegalArgumentException.class, + () -> new TestWeakConcurrentCache(1, 0)); + assertTrue(e.getMessage().contains("Concurrency level must be positive")); + } + + @Test + void shouldPutAndGet() { + // NOTE we are using mock references so we don't need to worry about GC + assertEquals(0, this.map.size()); + this.map.put(123, "123"); + assertThat(this.map.get(123), equalTo("123")); + assertEquals(1, this.map.size()); + this.map.put(123, "123b"); + assertEquals(1, this.map.size()); + this.map.put(123, null); + assertEquals(1, this.map.size()); + } + + @Test + void shouldReplaceOnDoublePut() { + this.map.put(123, "321"); + this.map.put(123, "123"); + assertThat(this.map.get(123), equalTo("123")); + } + + @Test + void shouldPutNullKey() { + assertNull(this.map.get(null)); + assertThat(this.map.getOrDefault(null, "456"), equalTo("456")); + this.map.put(null, "123"); + assertThat(this.map.get(null), equalTo("123")); + assertThat(this.map.getOrDefault(null, "456"), equalTo("123")); + } + + @Test + void shouldPutNullValue() { + assertNull(this.map.get(123)); + assertThat(this.map.getOrDefault(123, "456"), equalTo("456")); + this.map.put(123, "321"); + assertThat(this.map.get(123), equalTo("321")); + assertThat(this.map.getOrDefault(123, "456"), equalTo("321")); + this.map.put(123, null); + assertNull(this.map.get(123)); + assertNull(this.map.getOrDefault(123, "456")); + } + + @Test + void shouldGetWithNoItems() { + assertNull(this.map.get(123)); + } + + @Test + void shouldApplySupplementalHash() { + Integer key = 123; + this.map.put(key, "123"); + assertNotEquals(this.map.getSupplementalHash(), key.hashCode()); + assertNotEquals(this.map.getSupplementalHash() >> 30 & 0xFF, 0); + } + + @Test + void shouldGetFollowingNexts() { + // Use loadFactor to disable resize + this.map = new TestWeakConcurrentCache<>(1, 10.0f, 1); + this.map.put(1, "1"); + this.map.put(2, "2"); + this.map.put(3, "3"); + assertThat(this.map.getSegment(0).getSize(), equalTo(1)); + assertThat(this.map.get(1), equalTo("1")); + assertThat(this.map.get(2), equalTo("2")); + assertThat(this.map.get(3), equalTo("3")); + assertNull(this.map.get(4)); + } + + @Test + void shouldResize() { + this.map = new TestWeakConcurrentCache<>(1, 0.75f, 1); + this.map.put(1, "1"); + assertThat(this.map.getSegment(0).getSize(), equalTo(1)); + assertThat(this.map.get(1), equalTo("1")); + + this.map.put(2, "2"); + assertThat(this.map.getSegment(0).getSize(), equalTo(2)); + assertThat(this.map.get(1), equalTo("1")); + assertThat(this.map.get(2), equalTo("2")); + + this.map.put(3, "3"); + assertThat(this.map.getSegment(0).getSize(), equalTo(4)); + assertThat(this.map.get(1), equalTo("1")); + assertThat(this.map.get(2), equalTo("2")); + assertThat(this.map.get(3), equalTo("3")); + + this.map.put(4, "4"); + assertThat(this.map.getSegment(0).getSize(), equalTo(8)); + assertThat(this.map.get(4), equalTo("4")); + + // Putting again should not increase the count + for (int i = 1; i <= 5; i++) { + this.map.put(i, String.valueOf(i)); + } + assertThat(this.map.getSegment(0).getSize(), equalTo(8)); + assertThat(this.map.get(5), equalTo("5")); + } + + @Test + void shouldPurgeOnGet() { + this.map = new TestWeakConcurrentCache<>(1, 0.75f, 1); + for (int i = 1; i <= 5; i++) { + this.map.put(i, String.valueOf(i)); + } + this.map.getMockReference(1, Restructure.NEVER).queueForPurge(); + this.map.getMockReference(3, Restructure.NEVER).queueForPurge(); + assertNull(this.map.getReference(1, Restructure.WHEN_NECESSARY)); + assertThat(this.map.get(2), equalTo("2")); + assertNull(this.map.getReference(3, Restructure.WHEN_NECESSARY)); + assertThat(this.map.get(4), equalTo("4")); + assertThat(this.map.get(5), equalTo("5")); + } + + @Test + void shouldPurgeOnPut() { + this.map = new TestWeakConcurrentCache<>(1, 0.75f, 1); + for (int i = 1; i <= 5; i++) { + this.map.put(i, String.valueOf(i)); + } + this.map.getMockReference(1, Restructure.NEVER).queueForPurge(); + this.map.getMockReference(3, Restructure.NEVER).queueForPurge(); + this.map.put(1, "1"); + assertThat(this.map.get(1), equalTo("1")); + assertThat(this.map.get(2), equalTo("2")); + assertNull(this.map.getReference(3, Restructure.WHEN_NECESSARY)); + assertThat(this.map.get(4), equalTo("4")); + assertThat(this.map.get(5), equalTo("5")); + } + + @Test + void shouldPutIfAbsent() { + assertNull(this.map.putIfAbsent(123, "123")); + assertThat(this.map.putIfAbsent(123, "123b"), equalTo("123")); + assertThat(this.map.get(123), equalTo("123")); + } + + @Test + void shouldPutIfAbsentWithNullValue() { + assertNull(this.map.putIfAbsent(123, null)); + assertNull(this.map.putIfAbsent(123, "123")); + assertNull(this.map.get(123)); + } + + @Test + void shouldPutIfAbsentWithNullKey() { + assertNull(this.map.putIfAbsent(null, "123")); + assertThat(this.map.putIfAbsent(null, "123b"), equalTo("123")); + assertThat(this.map.get(null), equalTo("123")); + } + + @Test + void shouldRemoveKeyAndValue() { + this.map.put(123, "123"); + assertFalse(this.map.remove(123, "456")); + assertThat(this.map.get(123), equalTo("123")); + assertTrue(this.map.remove(123, "123")); + assertFalse(this.map.containsKey(123)); + assertTrue(this.map.isEmpty()); + } + + @Test + void shouldRemoveKeyAndValueWithExistingNull() { + this.map.put(123, null); + assertFalse(this.map.remove(123, "456")); + assertNull(this.map.get(123)); + assertTrue(this.map.remove(123, null)); + assertFalse(this.map.containsKey(123)); + assertTrue(this.map.isEmpty()); + } + + @Test + void shouldReplaceOldValueWithNewValue() { + this.map.put(123, "123"); + assertFalse(this.map.replace(123, "456", "789")); + assertThat(this.map.get(123), equalTo("123")); + assertTrue(this.map.replace(123, "123", "789")); + assertThat(this.map.get(123), equalTo("789")); + } + + @Test + void shouldReplaceOldNullValueWithNewValue() { + this.map.put(123, null); + assertFalse(this.map.replace(123, "456", "789")); + assertNull(this.map.get(123)); + assertTrue(this.map.replace(123, null, "789")); + assertThat(this.map.get(123), equalTo("789")); + } + + @Test + void shouldReplaceValue() { + this.map.put(123, "123"); + assertThat(this.map.replace(123, "456"), equalTo("123")); + assertThat(this.map.get(123), equalTo("456")); + } + + @Test + void shouldReplaceNullValue() { + this.map.put(123, null); + assertNull(this.map.replace(123, "456")); + assertThat(this.map.get(123), equalTo("456")); + } + + @Test + void shouldGetSize() { + assertEquals(0, this.map.size()); + this.map.put(123, "123"); + this.map.put(123, null); + this.map.put(456, "456"); + assertEquals(2, this.map.size()); + } + + @Test + void shouldSupportIsEmpty() { + assertTrue(this.map.isEmpty()); + this.map.put(123, "123"); + this.map.put(123, null); + this.map.put(456, "456"); + assertFalse(this.map.isEmpty()); + } + + @Test + void shouldContainKey() { + assertFalse(this.map.containsKey(123)); + assertFalse(this.map.containsKey(456)); + this.map.put(123, "123"); + this.map.put(456, null); + assertTrue(this.map.containsKey(123)); + assertTrue(this.map.containsKey(456)); + } + + @Test + void shouldContainValue() { + assertFalse(this.map.containsValue("123")); + assertFalse(this.map.containsValue(null)); + this.map.put(123, "123"); + this.map.put(456, null); + assertTrue(this.map.containsValue("123")); + assertTrue(this.map.containsValue(null)); + } + + @Test + void shouldRemoveWhenKeyIsInMap() { + this.map.put(123, null); + this.map.put(456, "456"); + this.map.put(null, "789"); + assertNull(this.map.remove(123)); + assertThat(this.map.remove(456), equalTo("456")); + assertThat(this.map.remove(null), equalTo("789")); + assertTrue(this.map.isEmpty()); + } + + @Test + void shouldRemoveWhenKeyIsNotInMap() { + assertNull(this.map.remove(123)); + assertNull(this.map.remove(null)); + assertTrue(this.map.isEmpty()); + } + + @Test + void shouldPutAll() { + Map m = new HashMap<>(); + m.put(123, "123"); + m.put(456, null); + m.put(null, "789"); + this.map.putAll(m); + assertEquals(3, this.map.size()); + assertThat(this.map.get(123), equalTo("123")); + assertNull(this.map.get(456)); + assertThat(this.map.get(null), equalTo("789")); + } + + @Test + void shouldClear() { + this.map.put(123, "123"); + this.map.put(456, null); + this.map.put(null, "789"); + this.map.clear(); + assertEquals(0, this.map.size()); + assertFalse(this.map.containsKey(123)); + assertFalse(this.map.containsKey(456)); + assertFalse(this.map.containsKey(null)); + } + + @Test + void shouldGetKeySet() { + this.map.put(123, "123"); + this.map.put(456, null); + this.map.put(null, "789"); + Set expected = new HashSet<>(); + expected.add(123); + expected.add(456); + expected.add(null); + assertThat(this.map.keySet(), equalTo(expected)); + } + + @Test + void shouldGetValues() { + this.map.put(123, "123"); + this.map.put(456, null); + this.map.put(null, "789"); + List actual = new ArrayList<>(this.map.values()); + List expected = new ArrayList<>(); + expected.add("123"); + expected.add(null); + expected.add("789"); + actual.sort(NULL_SAFE_STRING_SORT); + expected.sort(NULL_SAFE_STRING_SORT); + assertThat(actual, equalTo(expected)); + } + + @Test + void shouldGetEntrySet() { + this.map.put(123, "123"); + this.map.put(456, null); + this.map.put(null, "789"); + HashMap expected = new HashMap<>(); + expected.put(123, "123"); + expected.put(456, null); + expected.put(null, "789"); + assertThat(this.map.entrySet(), equalTo(expected.entrySet())); + } + + @Test + void shouldGetEntrySetFollowingNext() { + // Use loadFactor to disable resize + this.map = new TestWeakConcurrentCache<>(1, 10.0f, 1); + this.map.put(1, "1"); + this.map.put(2, "2"); + this.map.put(3, "3"); + HashMap expected = new HashMap<>(); + expected.put(1, "1"); + expected.put(2, "2"); + expected.put(3, "3"); + assertThat(this.map.entrySet(), equalTo(expected.entrySet())); + } + + @Test + void shouldRemoveViaEntrySet() { + this.map.put(1, "1"); + this.map.put(2, "2"); + this.map.put(3, "3"); + Iterator> iterator = this.map.entrySet().iterator(); + iterator.next(); + iterator.next(); + iterator.remove(); + assertThrows(IllegalStateException.class, iterator::remove); + iterator.next(); + assertFalse(iterator.hasNext()); + assertEquals(2, this.map.size()); + assertFalse(this.map.containsKey(2)); + } + + @Test + void shouldSetViaEntrySet() { + this.map.put(1, "1"); + this.map.put(2, "2"); + this.map.put(3, "3"); + Iterator> iterator = this.map.entrySet().iterator(); + iterator.next(); + iterator.next().setValue("2b"); + iterator.next(); + assertFalse(iterator.hasNext()); + assertEquals(3, this.map.size()); + assertThat(this.map.get(2), equalTo("2b")); + } + + @Test + void containsViaEntrySet() { + this.map.put(1, "1"); + this.map.put(2, "2"); + this.map.put(3, "3"); + Set> entrySet = this.map.entrySet(); + Set> copy = new HashMap<>(this.map).entrySet(); + copy.forEach(entry -> assertTrue(entrySet.contains(entry))); + this.map.put(1, "A"); + this.map.put(2, "B"); + this.map.put(3, "C"); + copy.forEach(entry -> assertFalse(entrySet.contains(entry))); + this.map.put(1, "1"); + this.map.put(2, "2"); + this.map.put(3, "3"); + copy.forEach(entry -> assertTrue(entrySet.contains(entry))); + entrySet.clear(); + copy.forEach(entry -> assertFalse(entrySet.contains(entry))); + } + + @Test + @Disabled("Intended for use during development only") + void shouldBeFasterThanSynchronizedMap() throws InterruptedException { + Map> synchronizedMap = Collections + .synchronizedMap(new WeakHashMap>()); + StopWatch mapTime = timeMultiThreaded("SynchronizedMap", synchronizedMap, + v -> new WeakReference<>(String.valueOf(v))); + System.out.println(mapTime.prettyPrint()); + + this.map.setDisableTestHooks(true); + StopWatch cacheTime = timeMultiThreaded("WeakConcurrentCache", this.map, String::valueOf); + System.out.println(cacheTime.prettyPrint()); + + // We should be at least 4 time faster + assertTrue(cacheTime.getTotalTimeSeconds() < (mapTime.getTotalTimeSeconds() / 4.0)); + } + + @Test + void shouldSupportNullReference() { + // GC could happen during restructure so we must be able to create a reference + // for a null entry + map.createReferenceManager().createReference(null, 1234, null); + } + + /** + * Time a multi-threaded access to a cache. + * + * @return the timing stopwatch + */ + private StopWatch timeMultiThreaded(String id, final Map map, ValueFactory factory) + throws InterruptedException { + + StopWatch stopWatch = new StopWatch(id); + for (int i = 0; i < 500; i++) { + map.put(i, factory.newValue(i)); + } + Thread[] threads = new Thread[30]; + stopWatch.start("Running threads"); + for (int threadIndex = 0; threadIndex < threads.length; threadIndex++) { + threads[threadIndex] = new Thread("Cache access thread " + threadIndex) { + @Override + public void run() { + for (int j = 0; j < 1000; j++) { + for (int i = 0; i < 1000; i++) { + map.get(i); + } + } + } + }; + } + for (Thread thread : threads) { + thread.start(); + } + + for (Thread thread : threads) { + if (thread.isAlive()) { + thread.join(2000); + } + } + stopWatch.stop(); + return stopWatch; + } + + private interface ValueFactory { + + V newValue(int k); + } + + private static class TestWeakConcurrentCache extends ConcurrentReferenceHashMap { + + private int supplementalHash; + + private final LinkedList> queue = new LinkedList<>(); + + private boolean disableTestHooks; + + public TestWeakConcurrentCache() { + super(); + } + + public void setDisableTestHooks(boolean disableTestHooks) { + this.disableTestHooks = disableTestHooks; + } + + public TestWeakConcurrentCache(int initialCapacity, float loadFactor, int concurrencyLevel) { + super(initialCapacity, loadFactor, concurrencyLevel); + } + + public TestWeakConcurrentCache(int initialCapacity, int concurrencyLevel) { + super(initialCapacity, concurrencyLevel); + } + + @Override + protected int getHash(@Nullable Object o) { + if (this.disableTestHooks) { + return super.getHash(o); + } + // For testing we want more control of the hash + this.supplementalHash = super.getHash(o); + return (o != null ? o.hashCode() : 0); + } + + public int getSupplementalHash() { + return this.supplementalHash; + } + + @Override + protected ReferenceManager createReferenceManager() { + return new ReferenceManager() { + @Override + public Reference createReference(Entry entry, int hash, @Nullable Reference next) { + if (TestWeakConcurrentCache.this.disableTestHooks) { + return super.createReference(entry, hash, next); + } + return new MockReference<>(entry, hash, next, TestWeakConcurrentCache.this.queue); + } + + @Override + public Reference pollForPurge() { + if (TestWeakConcurrentCache.this.disableTestHooks) { + return super.pollForPurge(); + } + return TestWeakConcurrentCache.this.queue.isEmpty() ? null : TestWeakConcurrentCache.this.queue.removeFirst(); + } + }; + } + + public MockReference getMockReference(K key, Restructure restructure) { + return (MockReference) super.getReference(key, restructure); + } + } + + private static class MockReference implements Reference { + + private final int hash; + + private Entry entry; + + private final Reference next; + + private final LinkedList> queue; + + public MockReference(Entry entry, int hash, Reference next, LinkedList> queue) { + this.hash = hash; + this.entry = entry; + this.next = next; + this.queue = queue; + } + + @Override + public Entry get() { + return this.entry; + } + + @Override + public int getHash() { + return this.hash; + } + + @Override + public Reference getNext() { + return this.next; + } + + @Override + public void release() { + this.queue.add(this); + this.entry = null; + } + + public void queueForPurge() { + this.queue.add(this); + } + } + +} diff --git a/lang/java/avro/src/test/resources/META-INF/services/org.apache.avro.Conversion b/lang/java/avro/src/test/resources/META-INF/services/org.apache.avro.Conversion new file mode 100644 index 00000000000..890ba764260 --- /dev/null +++ b/lang/java/avro/src/test/resources/META-INF/services/org.apache.avro.Conversion @@ -0,0 +1,17 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +org.apache.avro.CustomTypeConverter diff --git a/lang/java/avro/src/test/resources/META-INF/services/org.apache.avro.FormattedSchemaParser b/lang/java/avro/src/test/resources/META-INF/services/org.apache.avro.FormattedSchemaParser new file mode 100644 index 00000000000..b2db6ddb269 --- /dev/null +++ b/lang/java/avro/src/test/resources/META-INF/services/org.apache.avro.FormattedSchemaParser @@ -0,0 +1,18 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +org.apache.avro.DummySchemaParser diff --git a/lang/java/avro/src/test/resources/META-INF/services/org.apache.avro.LogicalTypes$LogicalTypeFactory b/lang/java/avro/src/test/resources/META-INF/services/org.apache.avro.LogicalTypes$LogicalTypeFactory new file mode 100644 index 00000000000..b55c233ae46 --- /dev/null +++ b/lang/java/avro/src/test/resources/META-INF/services/org.apache.avro.LogicalTypes$LogicalTypeFactory @@ -0,0 +1,17 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +org.apache.avro.CustomTypeLogicalTypeFactory diff --git a/lang/java/avro/src/test/resources/TestRecordWithLogicalTypes.avsc b/lang/java/avro/src/test/resources/TestRecordWithLogicalTypes.avsc index f5d212917f4..5f5e870f9c7 100644 --- a/lang/java/avro/src/test/resources/TestRecordWithLogicalTypes.avsc +++ b/lang/java/avro/src/test/resources/TestRecordWithLogicalTypes.avsc @@ -40,6 +40,12 @@ "type" : "long", "logicalType" : "timestamp-millis" } - } ] + }, { + "name" : "bd", + "type" : { + "type" : "bytes", + "logicalType" : "big-decimal" + } + } ] } diff --git a/lang/java/avro/src/test/resources/TestRecordWithMapsAndArrays.avsc b/lang/java/avro/src/test/resources/TestRecordWithMapsAndArrays.avsc new file mode 100644 index 00000000000..d19c0d8dfb9 --- /dev/null +++ b/lang/java/avro/src/test/resources/TestRecordWithMapsAndArrays.avsc @@ -0,0 +1,47 @@ +{ + "type": "record", + "name": "TestRecordWithMapsAndArrays", + "namespace": "org.apache.avro.specific", + "fields": [ + { + "name": "arr", + "type": { + "type": "array", + "items": "string", + "default": [] + } + }, + { + "name": "map", + "type": { + "type": "map", + "values": "long", + "default": {} + } + }, + { + "name": "nested_arr", + "type": { + "type": "array", + "items": { + "type": "array", + "items": "string", + "default": [] + }, + "default": [] + } + }, + { + "name": "nested_map", + "type": { + "type": "map", + "values": { + "type": "map", + "values": "long", + "default": {} + }, + "default": {} + } + } + ] +} diff --git a/lang/java/avro/src/test/resources/TestUnionRecord.avsc b/lang/java/avro/src/test/resources/TestUnionRecord.avsc new file mode 100644 index 00000000000..36241c8b601 --- /dev/null +++ b/lang/java/avro/src/test/resources/TestUnionRecord.avsc @@ -0,0 +1,23 @@ +[ + "null", + { + "namespace": "org.apache.avro.specific", + "type": "record", + "name": "TestUnionRecord", + "fields": [ + { + "name": "amount", + "type": [ + "null", + { + "type": "bytes", + "logicalType": "decimal", + "precision": 31, + "scale": 8 + } + ], + "default": null + } + ] + } +] diff --git a/lang/java/avro/src/test/resources/multipleFile/ApplicationEvent.avsc b/lang/java/avro/src/test/resources/multipleFile/ApplicationEvent.avsc new file mode 100644 index 00000000000..6902084350f --- /dev/null +++ b/lang/java/avro/src/test/resources/multipleFile/ApplicationEvent.avsc @@ -0,0 +1,28 @@ +{ + "namespace": "model", + "type": "record", + "doc": "", + "name": "ApplicationEvent", + "fields": [ + { + "name": "applicationId", + "type": "string", + "doc": "Application ID" + }, + { + "name": "status", + "type": "string", + "doc": "Application Status" + }, + { + "name": "documents", + "type": ["null", { + "type": "array", + "items": "model.DocumentInfo" + }], + "doc": "", + "default": null + } + ] + +} diff --git a/lang/java/avro/src/test/resources/multipleFile/DocumentInfo.avsc b/lang/java/avro/src/test/resources/multipleFile/DocumentInfo.avsc new file mode 100644 index 00000000000..95dd4243ea6 --- /dev/null +++ b/lang/java/avro/src/test/resources/multipleFile/DocumentInfo.avsc @@ -0,0 +1,19 @@ +{ + "namespace": "model", + "type": "record", + "doc": "", + "name": "DocumentInfo", + "fields": [ + { + "name": "documentId", + "type": "string", + "doc": "Document ID" + }, + { + "name": "filePath", + "type": "string", + "doc": "Document Path" + } + ] + +} diff --git a/lang/java/avro/src/test/resources/multipleFile/MyResponse.avsc b/lang/java/avro/src/test/resources/multipleFile/MyResponse.avsc new file mode 100644 index 00000000000..ac6d08291d9 --- /dev/null +++ b/lang/java/avro/src/test/resources/multipleFile/MyResponse.avsc @@ -0,0 +1,14 @@ +{ + "namespace": "model", + "type": "record", + "doc": "", + "name": "MyResponse", + "fields": [ + { + "name": "isSuccessful", + "type": "boolean", + "doc": "Indicator for successful or unsuccessful call" + } + ] + +} diff --git a/lang/java/avro/src/test/resources/multipleFile/README.md b/lang/java/avro/src/test/resources/multipleFile/README.md new file mode 100644 index 00000000000..fe3541b660e --- /dev/null +++ b/lang/java/avro/src/test/resources/multipleFile/README.md @@ -0,0 +1,8 @@ +## test for parsing multiple files. +This folder aims to test `public List Schema.parse(Iterable sources) throws IOException` method. + +The objective is to check that a record schema define in a file can be use in another record schema as a field type. +Here, ApplicationEvent.avsc file contains a field of type DocumentInfo, defined in file DocumentInfo.avsc. + +The is written at TestSchema.testParseMultipleFile. + diff --git a/lang/java/build.sh b/lang/java/build.sh index 96fdb3489cb..8d6a8bdba10 100755 --- a/lang/java/build.sh +++ b/lang/java/build.sh @@ -16,6 +16,7 @@ # limitations under the License. set -e +set -x usage() { echo "Usage: $0 {lint|test|dist|clean}" @@ -31,9 +32,9 @@ main() { mvn -B spotless:apply ;; test) - mvn -B test + mvn -B verify # Test the modules that depend on hadoop using Hadoop 2 - mvn -B test -Phadoop2 + mvn -Dmaven.build.cache.enabled=false -B test -Phadoop2 ;; dist) mvn -P dist package -DskipTests javadoc:aggregate diff --git a/lang/java/compiler/pom.xml b/lang/java/compiler/pom.xml index 863af78181b..f9b8d2eee63 100644 --- a/lang/java/compiler/pom.xml +++ b/lang/java/compiler/pom.xml @@ -1,11 +1,11 @@ - org.codehaus.mojo + org.javacc.plugin javacc-maven-plugin @@ -133,11 +133,12 @@ test -classpath - + org.apache.avro.compiler.specific.SchemaTask ${project.basedir}/src/test/resources/full_record_v1.avsc ${project.basedir}/src/test/resources/full_record_v2.avsc - ${project.basedir}/target/generated-test-sources + ${project.basedir}/src/test/resources/regression_error_field_in_record.avsc + ${project.basedir}/target/generated-test-sources/javacc @@ -149,10 +150,8 @@ add-test-source generate-test-sources @@ -161,16 +160,14 @@ - ${project.basedir}/target/generated-test-sources + ${project.basedir}/target/generated-test-sources/javacc add-source generate-sources @@ -186,37 +183,8 @@ - - - - org.eclipse.m2e - lifecycle-mapping - 1.0.0 - - - - - - org.codehaus.mojo - exec-maven-plugin - [1.0,) - - exec - - - - - - - - - - - - - ${project.groupId} @@ -225,8 +193,8 @@ org.apache.commons - commons-lang3 - ${commons-lang.version} + commons-text + ${commons-text.version} org.apache.velocity @@ -245,4 +213,42 @@ + + + m2e + + m2e.version + + + + + + org.eclipse.m2e + lifecycle-mapping + 1.0.0 + + + + + + org.codehaus.mojo + exec-maven-plugin + [1.0,) + + exec + + + + + + + + + + + + + + + diff --git a/lang/java/compiler/src/main/java/org/apache/avro/compiler/idl/DocCommentHelper.java b/lang/java/compiler/src/main/java/org/apache/avro/compiler/idl/DocCommentHelper.java new file mode 100644 index 00000000000..5d0ec5218dd --- /dev/null +++ b/lang/java/compiler/src/main/java/org/apache/avro/compiler/idl/DocCommentHelper.java @@ -0,0 +1,135 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro.compiler.idl; + +import java.util.ArrayList; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * Utility class with {@code ThreadLocal} fields that allow the generated + * classes {@link Idl} and {@link IdlTokenManager} to exchange documentation + * comments without forcing explicit parsing of documentation comments. + * + * The reason this works is that all calls to this class happen within a call to + * the method {@link Idl#CompilationUnit()} (either directly or indirectly). + */ +public class DocCommentHelper { + /** + * Pattern to match the common whitespace indents in a multi-line String. + * Doesn't match a single-line String, fully matches any multi-line String. + * + * To use: match on a {@link String#trim() trimmed} String, and then replace all + * newlines followed by the group "indent" with a newline. + */ + private static final Pattern WS_INDENT = Pattern.compile("(?U).*\\R(?\\h*).*(?:\\R\\k.*)*"); + /** + * Pattern to match the whitespace indents plus common stars (1 or 2) in a + * multi-line String. If a String fully matches, replace all occurrences of a + * newline followed by whitespace and then the group "stars" with a newline. + * + * Note: partial matches are invalid. + */ + private static final Pattern STAR_INDENT = Pattern.compile("(?U)(?\\*{1,2}).*(?:\\R\\h*\\k.*)*"); + + private static final ThreadLocal DOC = new ThreadLocal<>(); + private static final ThreadLocal> WARNINGS = ThreadLocal.withInitial(ArrayList::new); + + /** + * Return all warnings that were encountered while parsing, once. Subsequent + * calls before parsing again will return an empty list. + */ + static List getAndClearWarnings() { + List warnings = WARNINGS.get(); + WARNINGS.remove(); + return warnings; + } + + static void setDoc(Token token) { + DocComment newDocComment = new DocComment(token); + DocComment oldDocComment = DOC.get(); + if (oldDocComment != null) { + WARNINGS.get() + .add(String.format( + "Found documentation comment at line %d, column %d. Ignoring previous one at line %d, column %d: \"%s\"\n" + + "Did you mean to use a multiline comment ( /* ... */ ) instead?", + newDocComment.line, newDocComment.column, oldDocComment.line, oldDocComment.column, oldDocComment.text)); + } + DOC.set(newDocComment); + } + + /** + * Clear any documentation (and generate a warning if there was). + * + * This method should NOT be used after an optional component in a grammar + * (i.e., after a @code{[â€Ļ]} or @code{â€Ļ*} construct), because the optional + * grammar part may have already caused parsing a doc comment special token + * placed after the code block. + */ + static void clearDoc() { + DocComment oldDocComment = DOC.get(); + if (oldDocComment != null) { + WARNINGS.get() + .add(String.format( + "Ignoring out-of-place documentation comment at line %d, column %d: \"%s\"\n" + + "Did you mean to use a multiline comment ( /* ... */ ) instead?", + oldDocComment.line, oldDocComment.column, oldDocComment.text)); + } + DOC.remove(); + } + + static String getDoc() { + DocComment docComment = DOC.get(); + DOC.remove(); + return docComment == null ? null : docComment.text; + } + + /* Package private to facilitate testing */ + static String stripIndents(String doc) { + Matcher starMatcher = STAR_INDENT.matcher(doc); + if (starMatcher.matches()) { + return doc.replaceAll("(?U)(?:^|(\\R)\\h*)\\Q" + starMatcher.group("stars") + "\\E\\h?", "$1"); + } + + Matcher whitespaceMatcher = WS_INDENT.matcher(doc); + if (whitespaceMatcher.matches()) { + return doc.replaceAll("(?U)(\\R)" + whitespaceMatcher.group("indent"), "$1"); + } + + return doc; + } + + private static class DocComment { + private final String text; + private final int line; + private final int column; + + DocComment(Token token) { + // The token is everything after the initial '/**', including all + // whitespace and the ending '*/' + int tokenLength = token.image.length(); + this.text = stripIndents(token.image.substring(0, tokenLength - 2).trim()); + this.line = token.beginLine; + // The preceding token was "/**", and the current token includes + // everything since (also all whitespace). Thus, we can safely subtract 3 + // from the token column to get the start of the doc comment. + this.column = token.beginColumn - 3; + } + } +} diff --git a/lang/java/compiler/src/main/java/org/apache/avro/compiler/idl/IsResolvedSchemaVisitor.java b/lang/java/compiler/src/main/java/org/apache/avro/compiler/idl/IsResolvedSchemaVisitor.java new file mode 100644 index 00000000000..6006ad5f82f --- /dev/null +++ b/lang/java/compiler/src/main/java/org/apache/avro/compiler/idl/IsResolvedSchemaVisitor.java @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro.compiler.idl; + +import org.apache.avro.Schema; +import org.apache.avro.compiler.schema.SchemaVisitor; +import org.apache.avro.compiler.schema.SchemaVisitorAction; + +/** + * This visitor checks if the current schema is fully resolved. + */ +public final class IsResolvedSchemaVisitor implements SchemaVisitor { + boolean hasUnresolvedParts; + + IsResolvedSchemaVisitor() { + hasUnresolvedParts = false; + } + + @Override + public SchemaVisitorAction visitTerminal(Schema terminal) { + hasUnresolvedParts = SchemaResolver.isUnresolvedSchema(terminal); + return hasUnresolvedParts ? SchemaVisitorAction.TERMINATE : SchemaVisitorAction.CONTINUE; + } + + @Override + public SchemaVisitorAction visitNonTerminal(Schema nonTerminal) { + hasUnresolvedParts = SchemaResolver.isUnresolvedSchema(nonTerminal); + if (hasUnresolvedParts) { + return SchemaVisitorAction.TERMINATE; + } + if (nonTerminal.getType() == Schema.Type.RECORD && !nonTerminal.hasFields()) { + // We're still initializing the type... + return SchemaVisitorAction.SKIP_SUBTREE; + } + return SchemaVisitorAction.CONTINUE; + } + + @Override + public SchemaVisitorAction afterVisitNonTerminal(Schema nonTerminal) { + return SchemaVisitorAction.CONTINUE; + } + + @Override + public Boolean get() { + return !hasUnresolvedParts; + } +} diff --git a/lang/java/compiler/src/main/java/org/apache/avro/compiler/idl/ResolvingVisitor.java b/lang/java/compiler/src/main/java/org/apache/avro/compiler/idl/ResolvingVisitor.java index c00252ea7ca..1c7175461cc 100644 --- a/lang/java/compiler/src/main/java/org/apache/avro/compiler/idl/ResolvingVisitor.java +++ b/lang/java/compiler/src/main/java/org/apache/avro/compiler/idl/ResolvingVisitor.java @@ -139,10 +139,7 @@ public SchemaVisitorAction afterVisitNonTerminal(final Schema nt) { List fields = nt.getFields(); List newFields = new ArrayList<>(fields.size()); for (Schema.Field field : fields) { - Schema.Field newField = new Schema.Field(field.name(), replace.get(field.schema()), field.doc(), - field.defaultVal(), field.order()); - copyAllProperties(field, newField); - newFields.add(newField); + newFields.add(new Field(field, replace.get(field.schema()))); } newSchema.setFields(newFields); } diff --git a/lang/java/compiler/src/main/java/org/apache/avro/compiler/idl/SchemaResolver.java b/lang/java/compiler/src/main/java/org/apache/avro/compiler/idl/SchemaResolver.java index 2da4944640d..e3e1a2ddb76 100644 --- a/lang/java/compiler/src/main/java/org/apache/avro/compiler/idl/SchemaResolver.java +++ b/lang/java/compiler/src/main/java/org/apache/avro/compiler/idl/SchemaResolver.java @@ -17,18 +17,19 @@ */ package org.apache.avro.compiler.idl; +import org.apache.avro.Protocol; +import org.apache.avro.Schema; +import org.apache.avro.compiler.schema.Schemas; + import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.IdentityHashMap; import java.util.List; import java.util.Map; +import java.util.concurrent.atomic.AtomicInteger; import java.util.function.Function; -import org.apache.avro.Protocol; -import org.apache.avro.Schema; -import org.apache.avro.compiler.schema.Schemas; - /** * Utility class to resolve schemas that are unavailable at the time they are * referenced in the IDL. @@ -44,6 +45,8 @@ private SchemaResolver() { private static final String UR_SCHEMA_NS = "org.apache.avro.compiler"; + private static final AtomicInteger COUNTER = new AtomicInteger(); + /** * Create a schema to represent a "unresolved" schema. (used to represent a * schema where the definition is not known at the time) This concept might be @@ -53,8 +56,8 @@ private SchemaResolver() { * @return */ static Schema unresolvedSchema(final String name) { - Schema schema = Schema.createRecord(UR_SCHEMA_NAME, "unresolved schema", UR_SCHEMA_NS, false, - Collections.EMPTY_LIST); + Schema schema = Schema.createRecord(UR_SCHEMA_NAME + '_' + COUNTER.getAndIncrement(), "unresolved schema", + UR_SCHEMA_NS, false, Collections.EMPTY_LIST); schema.addProp(UR_SCHEMA_ATTR, name); return schema; } @@ -66,8 +69,8 @@ static Schema unresolvedSchema(final String name) { * @return */ static boolean isUnresolvedSchema(final Schema schema) { - return (schema.getType() == Schema.Type.RECORD && schema.getProp(UR_SCHEMA_ATTR) != null - && UR_SCHEMA_NAME.equals(schema.getName()) && UR_SCHEMA_NS.equals(schema.getNamespace())); + return (schema.getType() == Schema.Type.RECORD && schema.getProp(UR_SCHEMA_ATTR) != null && schema.getName() != null + && schema.getName().startsWith(UR_SCHEMA_NAME) && UR_SCHEMA_NS.equals(schema.getNamespace())); } /** @@ -84,14 +87,28 @@ static String getUnresolvedSchemaName(final Schema schema) { } /** - * Will clone the provided protocol while resolving all unreferenced schemas + * Is this a unresolved schema. * - * @param protocol + * @param schema * @return */ + static boolean isFullyResolvedSchema(final Schema schema) { + if (isUnresolvedSchema(schema)) { + return false; + } else { + return Schemas.visit(schema, new IsResolvedSchemaVisitor()); + } + } + + /** + * Will clone the provided protocol while resolving all unreferenced schemas + * + * @param protocol a protocol with possibly unresolved schema references + * @return a protocol without unresolved schema references + */ static Protocol resolve(final Protocol protocol) { Protocol result = new Protocol(protocol.getName(), protocol.getDoc(), protocol.getNamespace()); - final Collection types = protocol.getTypes(); + final Collection types = protocol.getUnresolvedTypes(); // replace unresolved schemas. List newSchemas = new ArrayList<>(types.size()); IdentityHashMap replacements = new IdentityHashMap<>(); diff --git a/lang/java/compiler/src/main/java/org/apache/avro/compiler/schema/Schemas.java b/lang/java/compiler/src/main/java/org/apache/avro/compiler/schema/Schemas.java index 91232f0f5ac..0c0e5ab6725 100644 --- a/lang/java/compiler/src/main/java/org/apache/avro/compiler/schema/Schemas.java +++ b/lang/java/compiler/src/main/java/org/apache/avro/compiler/schema/Schemas.java @@ -21,8 +21,6 @@ import java.util.Collections; import java.util.Deque; import java.util.IdentityHashMap; -import java.util.Iterator; -import java.util.Map; import java.util.Set; import java.util.function.Supplier; import java.util.stream.Collectors; @@ -31,7 +29,7 @@ import org.apache.avro.LogicalType; import org.apache.avro.Schema; import org.apache.avro.Schema.Field; -import org.apache.avro.compiler.specific.SpecificCompiler; +import org.apache.avro.specific.SpecificData; /** * Avro Schema utilities, to traverse... @@ -68,10 +66,7 @@ public static void copyLogicalTypes(final Schema from, final Schema to) { } public static void copyProperties(final JsonProperties from, final JsonProperties to) { - Map objectProps = from.getObjectProps(); - for (Map.Entry entry : objectProps.entrySet()) { - to.addProp(entry.getKey(), entry.getValue()); - } + from.forEachProperty(to::addProp); } public static boolean hasGeneratedJavaClass(final Schema schema) { @@ -89,9 +84,9 @@ public static boolean hasGeneratedJavaClass(final Schema schema) { public static String getJavaClassName(final Schema schema) { String namespace = schema.getNamespace(); if (namespace == null) { - return SpecificCompiler.mangle(schema.getName()); + return SpecificData.mangle(schema.getName()); } else { - return namespace + '.' + SpecificCompiler.mangle(schema.getName()); + return namespace + '.' + SpecificData.mangle(schema.getName()); } } @@ -141,9 +136,8 @@ public static T visit(final Schema start, final SchemaVisitor visitor) { visited.put(schema, schema); break; case RECORD: - Iterator reverseSchemas = schema.getFields().stream().map(Field::schema) - .collect(Collectors.toCollection(ArrayDeque::new)).descendingIterator(); - terminate = visitNonTerminal(visitor, schema, dq, () -> reverseSchemas); + terminate = visitNonTerminal(visitor, schema, dq, () -> schema.getFields().stream().map(Field::schema) + .collect(Collectors.toCollection(ArrayDeque::new)).descendingIterator()); visited.put(schema, schema); break; case UNION: diff --git a/lang/java/compiler/src/main/java/org/apache/avro/compiler/specific/SpecificCompiler.java b/lang/java/compiler/src/main/java/org/apache/avro/compiler/specific/SpecificCompiler.java index f9eb43a1386..53675f4a01b 100644 --- a/lang/java/compiler/src/main/java/org/apache/avro/compiler/specific/SpecificCompiler.java +++ b/lang/java/compiler/src/main/java/org/apache/avro/compiler/specific/SpecificCompiler.java @@ -26,9 +26,9 @@ import java.lang.reflect.InvocationTargetException; import java.nio.file.Files; import java.util.ArrayList; -import java.util.Arrays; import java.util.Collection; import java.util.Collections; +import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; import java.util.LinkedHashMap; @@ -37,6 +37,7 @@ import java.util.Map; import java.util.Set; import java.util.stream.Collectors; +import java.util.stream.Stream; import org.apache.avro.Conversion; import org.apache.avro.Conversions; @@ -48,11 +49,11 @@ import org.apache.avro.Schema; import org.apache.avro.Schema.Field; import org.apache.avro.SchemaNormalization; +import org.apache.avro.SchemaParser; import org.apache.avro.data.TimeConversions; import org.apache.avro.generic.GenericData; import org.apache.avro.generic.GenericData.StringType; import org.apache.avro.specific.SpecificData; -import org.apache.commons.lang3.StringUtils; import org.apache.velocity.Template; import org.apache.velocity.VelocityContext; import org.apache.velocity.app.VelocityEngine; @@ -60,7 +61,6 @@ import org.slf4j.LoggerFactory; import static java.nio.charset.StandardCharsets.UTF_8; -import static org.apache.avro.specific.SpecificData.RESERVED_WORDS; /** * Generate specific Java interfaces and classes for protocols and schemas. @@ -105,8 +105,11 @@ void addLogicalTypeConversions(SpecificData specificData) { specificData.addLogicalTypeConversion(new TimeConversions.TimeMicrosConversion()); specificData.addLogicalTypeConversion(new TimeConversions.TimestampMillisConversion()); specificData.addLogicalTypeConversion(new TimeConversions.TimestampMicrosConversion()); + specificData.addLogicalTypeConversion(new TimeConversions.TimestampNanosConversion()); specificData.addLogicalTypeConversion(new TimeConversions.LocalTimestampMicrosConversion()); specificData.addLogicalTypeConversion(new TimeConversions.LocalTimestampMillisConversion()); + specificData.addLogicalTypeConversion(new TimeConversions.LocalTimestampNanosConversion()); + specificData.addLogicalTypeConversion(new Conversions.UUIDConversion()); } private final SpecificData specificData = new SpecificData(); @@ -120,12 +123,17 @@ void addLogicalTypeConversions(SpecificData specificData) { private boolean gettersReturnOptional = false; private boolean optionalGettersForNullableFieldsOnly = false; private boolean createSetters = true; + private boolean createNullSafeAnnotations = false; private boolean createAllArgsConstructor = true; private String outputCharacterEncoding; private boolean enableDecimalLogicalType = false; private String suffix = ".java"; private List additionalVelocityTools = Collections.emptyList(); + private String recordSpecificClass = "org.apache.avro.specific.SpecificRecordBase"; + + private String errorSpecificClass = "org.apache.avro.specific.SpecificExceptionBase"; + /* * Used in the record.vm template. */ @@ -133,23 +141,6 @@ public boolean isCreateAllArgsConstructor() { return createAllArgsConstructor; } - /* Reserved words for accessor/mutator methods */ - private static final Set ACCESSOR_MUTATOR_RESERVED_WORDS = new HashSet<>( - Arrays.asList("class", "schema", "classSchema")); - - static { - // Add reserved words to accessor/mutator reserved words - ACCESSOR_MUTATOR_RESERVED_WORDS.addAll(RESERVED_WORDS); - } - - /* Reserved words for error types */ - private static final Set ERROR_RESERVED_WORDS = new HashSet<>(Arrays.asList("message", "cause")); - - static { - // Add accessor/mutator reserved words to error reserved words - ERROR_RESERVED_WORDS.addAll(ACCESSOR_MUTATOR_RESERVED_WORDS); - } - private static final String FILE_HEADER = "/**\n" + " * Autogenerated by Avro\n" + " *\n" + " * DO NOT EDIT DIRECTLY\n" + " */\n"; @@ -163,8 +154,20 @@ public SpecificCompiler(Protocol protocol) { } public SpecificCompiler(Schema schema) { + this(Collections.singleton(schema)); + } + + public SpecificCompiler(Collection schemas) { + this(); + for (Schema schema : schemas) { + enqueue(schema); + } + this.protocol = null; + } + + public SpecificCompiler(Iterable schemas) { this(); - enqueue(schema); + schemas.forEach(this::enqueue); this.protocol = null; } @@ -235,6 +238,17 @@ public void setCreateSetters(boolean createSetters) { this.createSetters = createSetters; } + public boolean isCreateNullSafeAnnotations() { + return this.createNullSafeAnnotations; + } + + /** + * Set to true to add jetbrains @Nullable and @NotNull annotations + */ + public void setCreateNullSafeAnnotations(boolean createNullSafeAnnotations) { + this.createNullSafeAnnotations = createNullSafeAnnotations; + } + public boolean isCreateOptionalGetters() { return this.createOptionalGetters; } @@ -371,7 +385,7 @@ private void initializeVelocity() { "org.apache.velocity.runtime.resource.loader.ClasspathResourceLoader"); velocityEngine.addProperty("resource.loader.file.class", "org.apache.velocity.runtime.resource.loader.FileResourceLoader"); - velocityEngine.addProperty("resource.loader.file.path", "/, ."); + velocityEngine.addProperty("resource.loader.file.path", "/, ., "); velocityEngine.setProperty("runtime.strict_mode.enable", true); // Set whitespace gobbling to Backward Compatible (BC) @@ -457,12 +471,16 @@ public static void compileSchema(File src, File dest) throws IOException { * Generates Java classes for a number of schema files. */ public static void compileSchema(File[] srcFiles, File dest) throws IOException { - Schema.Parser parser = new Schema.Parser(); + SchemaParser parser = new SchemaParser(); for (File src : srcFiles) { - Schema schema = parser.parse(src); + parser.parse(src); + } + // FIXME: use lastModified() without causing a NoSuchMethodError in the build + File lastModifiedSourceFile = Stream.of(srcFiles).max(Comparator.comparing(File::lastModified)).orElse(null); + for (Schema schema : parser.getParsedNamedSchemas()) { SpecificCompiler compiler = new SpecificCompiler(schema); - compiler.compileToDestination(src, dest); + compiler.compileToDestination(lastModifiedSourceFile, dest); } } @@ -557,7 +575,7 @@ OutputFile compileInterface(Protocol protocol) { String out = renderTemplate(templateDir + "protocol.vm", context); OutputFile outputFile = new OutputFile(); - String mangledName = mangle(protocol.getName()); + String mangledName = mangleTypeIdentifier(protocol.getName()); outputFile.path = makePath(mangledName, mangle(protocol.getNamespace())); outputFile.contents = out; outputFile.outputCharacterEncoding = outputCharacterEncoding; @@ -629,7 +647,7 @@ OutputFile compile(Schema schema) { } OutputFile outputFile = new OutputFile(); - String name = mangle(schema.getName()); + String name = mangleTypeIdentifier(schema.getName()); outputFile.path = makePath(name, mangle(schema.getNamespace())); outputFile.contents = output; outputFile.outputCharacterEncoding = outputCharacterEncoding; @@ -653,9 +671,7 @@ private Protocol addStringType(Protocol p) { Protocol newP = new Protocol(p.getName(), p.getDoc(), p.getNamespace()); Map types = new LinkedHashMap<>(); - for (Map.Entry a : p.getObjectProps().entrySet()) { - newP.addProp(a.getKey(), a.getValue()); - } + p.forEachProperty(newP::addProp); // annotate types Collection namedTypes = new LinkedHashSet<>(); @@ -795,7 +811,7 @@ private String javaType(Schema schema, boolean checkConvertedLogicalType) { case RECORD: case ENUM: case FIXED: - return mangle(schema.getFullName()); + return SpecificData.mangleFullyQualified(schema.getFullName()); case ARRAY: return "java.util.List<" + javaType(schema.getElementType()) + ">"; case MAP: @@ -856,7 +872,7 @@ public String generateSetterCode(Schema schema, String name, String pname) { /** * Utility for template use. Returns the unboxed java type for a Schema. * - * @deprecated use javaUnbox(Schema, boolean), kept for backward compatibiliby + * @deprecated use javaUnbox(Schema, boolean), kept for backward compatibility * of custom templates */ @Deprecated @@ -920,19 +936,21 @@ public int getNonNullIndex(Schema s) { * record.vm can handle the schema being presented. */ public boolean isCustomCodable(Schema schema) { - if (schema.isError()) - return false; return isCustomCodable(schema, new HashSet<>()); } private boolean isCustomCodable(Schema schema, Set seen) { if (!seen.add(schema)) + // Recursive call: assume custom codable until a caller on the call stack proves + // otherwise. return true; if (schema.getLogicalType() != null) return false; boolean result = true; switch (schema.getType()) { case RECORD: + if (schema.isError()) + return false; for (Schema.Field f : schema.getFields()) result &= isCustomCodable(f.schema(), seen); break; @@ -1035,7 +1053,7 @@ public static String javaEscape(String o) { * Utility for template use. Escapes comment end with HTML entities. */ public static String escapeForJavadoc(String s) { - return s.replace("*/", "*/"); + return s.replace("*/", "*/").replace("<", "<").replace(">", ">"); } /** @@ -1049,47 +1067,44 @@ public static String nullToEmpty(String x) { * Utility for template use. Adds a dollar sign to reserved words. */ public static String mangle(String word) { - return mangle(word, false); + return SpecificData.mangle(word, false); } /** * Utility for template use. Adds a dollar sign to reserved words. */ public static String mangle(String word, boolean isError) { - return mangle(word, isError ? ERROR_RESERVED_WORDS : RESERVED_WORDS); + return SpecificData.mangle(word, isError); + } + + /** + * Utility for template use. Adds a dollar sign to reserved words in type + * identifiers. + */ + public static String mangleTypeIdentifier(String word) { + return SpecificData.mangleTypeIdentifier(word, false); + } + + /** + * Utility for template use. Adds a dollar sign to reserved words in type + * identifiers. + */ + public static String mangleTypeIdentifier(String word, boolean isError) { + return SpecificData.mangle(word, isError); } /** * Utility for template use. Adds a dollar sign to reserved words. */ public static String mangle(String word, Set reservedWords) { - return mangle(word, reservedWords, false); + return SpecificData.mangle(word, reservedWords, false); } /** * Utility for template use. Adds a dollar sign to reserved words. */ public static String mangle(String word, Set reservedWords, boolean isMethod) { - if (StringUtils.isBlank(word)) { - return word; - } - if (word.contains(".")) { - // If the 'word' is really a full path of a class we must mangle just the - String[] packageWords = word.split("\\."); - String[] newPackageWords = new String[packageWords.length]; - - for (int i = 0; i < packageWords.length; i++) { - String oldName = packageWords[i]; - newPackageWords[i] = mangle(oldName, reservedWords, false); - } - - return String.join(".", newPackageWords); - } - if (reservedWords.contains(word) || (isMethod && reservedWords - .contains(Character.toLowerCase(word.charAt(0)) + ((word.length() > 1) ? word.substring(1) : "")))) { - return word + "$"; - } - return word; + return SpecificData.mangle(word, reservedWords, isMethod); } /** @@ -1220,14 +1235,10 @@ private static String generateMethodName(Schema schema, Field field, String pref // Check for the special case in which the schema defines two fields whose // names are identical except for the case of the first character: - char firstChar = field.name().charAt(0); - String conflictingFieldName = (Character.isLowerCase(firstChar) ? Character.toUpperCase(firstChar) - : Character.toLowerCase(firstChar)) + (field.name().length() > 1 ? field.name().substring(1) : ""); - boolean fieldNameConflict = schema.getField(conflictingFieldName) != null; + int indexNameConflict = calcNameIndex(field.name(), schema); StringBuilder methodBuilder = new StringBuilder(prefix); - String fieldName = mangle(field.name(), schema.isError() ? ERROR_RESERVED_WORDS : ACCESSOR_MUTATOR_RESERVED_WORDS, - true); + String fieldName = SpecificData.mangleMethod(field.name(), schema.isError()); boolean nextCharToUpper = true; for (int ii = 0; ii < fieldName.length(); ii++) { @@ -1243,16 +1254,75 @@ private static String generateMethodName(Schema schema, Field field, String pref methodBuilder.append(postfix); // If there is a field name conflict append $0 or $1 - if (fieldNameConflict) { + if (indexNameConflict >= 0) { if (methodBuilder.charAt(methodBuilder.length() - 1) != '$') { methodBuilder.append('$'); } - methodBuilder.append(Character.isLowerCase(firstChar) ? '0' : '1'); + methodBuilder.append(indexNameConflict); } return methodBuilder.toString(); } + /** + * Calc name index for getter / setter field in case of conflict as example, + * having a schema with fields __X, _X, _x, X, x should result with indexes __X: + * 3, _X: 2, _x: 1, X: 0 x: None (-1) + * + * @param fieldName : field name. + * @param schema : schema. + * @return index for field. + */ + private static int calcNameIndex(String fieldName, Schema schema) { + // get name without underscore at start + // and calc number of other similar fields with same subname. + int countSimilar = 0; + String pureFieldName = fieldName; + while (!pureFieldName.isEmpty() && pureFieldName.charAt(0) == '_') { + pureFieldName = pureFieldName.substring(1); + if (schema.getField(pureFieldName) != null) { + countSimilar++; + } + String reversed = reverseFirstLetter(pureFieldName); + if (schema.getField(reversed) != null) { + countSimilar++; + } + } + // field name start with upper have +1 + String reversed = reverseFirstLetter(fieldName); + if (!pureFieldName.isEmpty() && Character.isUpperCase(pureFieldName.charAt(0)) + && schema.getField(reversed) != null) { + countSimilar++; + } + + int ret = -1; // if no similar name, no index. + if (countSimilar > 0) { + ret = countSimilar - 1; // index is count similar -1 (start with $0) + } + + return ret; + } + + /** + * Reverse first letter upper <=> lower. __Name <=> __name + * + * @param name : input name. + * @return name with change case of first letter. + */ + private static String reverseFirstLetter(String name) { + StringBuilder builder = new StringBuilder(name); + int index = 0; + while (builder.length() > index && builder.charAt(index) == '_') { + index++; + } + if (builder.length() > index) { + char c = builder.charAt(index); + char inverseC = Character.isLowerCase(c) ? Character.toUpperCase(c) : Character.toLowerCase(c); + builder.setCharAt(index, inverseC); + } + return builder.toString(); + } + /** * Tests whether an unboxed Java type can be set to null */ @@ -1284,4 +1354,20 @@ public static void main(String[] args) throws Exception { public void setOutputCharacterEncoding(String outputCharacterEncoding) { this.outputCharacterEncoding = outputCharacterEncoding; } + + public String getSchemaParentClass(boolean isError) { + if (isError) { + return this.errorSpecificClass; + } else { + return this.recordSpecificClass; + } + } + + public void setRecordSpecificClass(final String recordSpecificClass) { + this.recordSpecificClass = recordSpecificClass; + } + + public void setErrorSpecificClass(final String errorSpecificClass) { + this.errorSpecificClass = errorSpecificClass; + } } diff --git a/lang/java/compiler/src/main/javacc/org/apache/avro/compiler/idl/idl.jj b/lang/java/compiler/src/main/javacc/org/apache/avro/compiler/idl/idl.jj index 0a2b69667cd..2d312794c3e 100644 --- a/lang/java/compiler/src/main/javacc/org/apache/avro/compiler/idl/idl.jj +++ b/lang/java/compiler/src/main/javacc/org/apache/avro/compiler/idl/idl.jj @@ -63,6 +63,7 @@ package org.apache.avro.compiler.idl; import java.io.*; import java.net.*; import java.util.ArrayList; +import java.util.Collections; import java.util.HashMap; import java.util.LinkedHashMap; import java.util.List; @@ -70,6 +71,7 @@ import java.util.Map; import java.net.URL; import org.apache.avro.Schema; +import org.apache.avro.LogicalType; import org.apache.avro.LogicalTypes; import org.apache.avro.Schema.*; import org.apache.avro.Protocol; @@ -79,28 +81,32 @@ import org.apache.avro.util.internal.Accessor; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.node.*; -import org.apache.commons.lang3.StringEscapeUtils; +import org.apache.commons.text.StringEscapeUtils; /** * Grammar to parse a higher-level language into an Avro Schema. * * Note: each instance is not thread-safe, but multiple separate * instances are safely independent. + * + * @deprecated Use the new org.apache.avro.idl.IdlReader from avro-idl instead. */ +@Deprecated public class Idl implements Closeable { static JsonNodeFactory FACTORY = JsonNodeFactory.instance; + private static final String OPTIONAL_NULLABLE_TYPE_PROPERTY = "org.apache.avro.compiler.idl.Idl.NullableType.optional"; URI inputDir; ClassLoader resourceLoader = null; String namespace; - Map names = new LinkedHashMap(); - - private static final ThreadLocal DOC = new ThreadLocal(); - static void setDoc(String doc) { DOC.set(doc.trim()); } - static String getDoc() { - String doc = DOC.get(); - DOC.set(null); - return doc; + Map names = new LinkedHashMap<>(); + + private List parserWarnings = Collections.emptyList(); + /** + * Return all warnings that were encountered while parsing. + */ + public List getWarningsAfterParsing() { + return parserWarnings; } public Idl(File inputFile) throws IOException { @@ -134,6 +140,7 @@ public class Idl implements Closeable { this.resourceLoader = parent.resourceLoader; } + @SuppressWarnings("RedundantThrows") public void close() throws IOException { jj_input_stream.inputStream.close(); } @@ -156,7 +163,7 @@ public class Idl implements Closeable { JsonNode value = props.get(key); if (!value.isArray()) throw error(key+" property must be array: "+value, token); - List values = new ArrayList(); + List values = new ArrayList<>(); for (JsonNode n : value) if (n.isTextual()) values.add(n.textValue()); @@ -170,7 +177,7 @@ public class Idl implements Closeable { File file = "file".equals(uri.getScheme()) ? new File(uri.getPath()) : null; URL result = null; if (file != null && file.exists()) - result = file.toURI().toURL(); + result = new URL("file:" + file.getPath()); else if (this.resourceLoader != null) if ("classpath".equals(uri.getScheme())) result = this.resourceLoader.getResource(uri.getPath().substring(1)); @@ -181,6 +188,31 @@ public class Idl implements Closeable { return result; } + /** + * For "optional schemas" (recognized by the marker property the NullableType + * production adds), ensure the null schema is in the right place. + * + * @param schema a schema + * @param defaultValue the intended default value + * @return the schema, or an optional schema with null in the right place + */ + private static Schema fixOptionalSchema(Schema schema, JsonNode defaultValue) { + final Object optionalType = schema.getObjectProp(OPTIONAL_NULLABLE_TYPE_PROPERTY); + if (optionalType != null) { + // The schema is a union schema with 2 types: "null" and a non-"null" schema + Schema nullSchema = schema.getTypes().get(0); + Schema nonNullSchema = schema.getTypes().get(1); + boolean nonNullDefault = defaultValue != null && !defaultValue.isNull(); + + // Always return a new schema: this drops the marker property. + if (nonNullDefault) { + return Schema.createUnion(nonNullSchema, nullSchema); + } else { + return Schema.createUnion(nullSchema, nonNullSchema); + } + } + return schema; + } } PARSER_END(Idl) @@ -219,13 +251,13 @@ MORE : SPECIAL_TOKEN : { - <"*/" > {Idl.setDoc(image.substring(0, image.length()-2));} : DEFAULT + "*/" {DocCommentHelper.setDoc(matchedToken);} : DEFAULT } SKIP : { - <"*/" > : DEFAULT + "*/" : DEFAULT } /* RESERVED WORDS AND LITERALS */ @@ -260,6 +292,7 @@ TOKEN : | < TIME: "time_ms" > | < TIMESTAMP: "timestamp_ms" > | < DECIMAL: "decimal" > +| < BIG_DECIMAL: "big_decimal" > | < LOCAL_TIMESTAMP: "local_timestamp_ms" > | < UUID: "uuid" > } @@ -998,6 +1031,7 @@ TOKEN : | < EQUALS: "=" > | < DOT: "." > | < DASH: "-" > +| < QUESTION_MARK: "?" > } TOKEN : @@ -1019,19 +1053,23 @@ TOKEN : Protocol CompilationUnit(): { Protocol p; + DocCommentHelper.getAndClearWarnings(); // Throw away previous results. } { p = ProtocolDeclaration() - ( < "\u001a" > )? + ( "\u001a" )? ( )? - { return SchemaResolver.resolve(p); } + { + parserWarnings = DocCommentHelper.getAndClearWarnings(); + return SchemaResolver.resolve(p); + } } /* * Declaration syntax follows. */ -private Schema NamedSchemaDeclaration(Map props): +private Schema NamedSchemaDeclaration(String doc, Map props): { Schema s; String savedSpace = this.namespace; @@ -1042,9 +1080,9 @@ private Schema NamedSchemaDeclaration(Map props): this.namespace = getTextProp("namespace", props, token); } ( - s = FixedDeclaration() - | s = EnumDeclaration() - | s = RecordDeclaration() + s = FixedDeclaration(doc) + | s = EnumDeclaration(doc) + | s = RecordDeclaration(doc) ) { this.namespace = savedSpace; @@ -1054,9 +1092,12 @@ private Schema NamedSchemaDeclaration(Map props): } else if ("aliases".equals(key)) { // aliases for (String alias : getTextProps("aliases", props, token)) s.addAlias(alias); - } else { // add all other props + } else { // add all other properties Accessor.addProp(s, key, props.get(key)); } + LogicalType logicalType = LogicalTypes.fromSchemaIgnoreInvalid(s); + if (logicalType != null) + logicalType.addToSchema(s); return s; } @@ -1065,11 +1106,10 @@ private Schema NamedSchemaDeclaration(Map props): Schema UnionDefinition(): { Schema s; - List schemata = new ArrayList(); + List schemata = new ArrayList<>(); } { - // TODO should probably disallow other unions here in the parser? - + // Don't disallow unions here: its constructor disallows nested unions and throws a descriptive exception. "union" "{" s = Type() @@ -1089,11 +1129,12 @@ Schema UnionDefinition(): Protocol ProtocolDeclaration(): { - String name; + String doc, name; Protocol p; - Map props = new LinkedHashMap(); + Map props = new LinkedHashMap<>(); } { + doc = Documentation() ( SchemaProperty(props) )* { if (props.containsKey("namespace")) @@ -1102,10 +1143,10 @@ Protocol ProtocolDeclaration(): "protocol" name = Identifier() { - p = new Protocol(name, getDoc(), namespace); + p = new Protocol(name, doc, namespace); for (String key : props.keySet()) if ("namespace".equals(key)) { // already handled: ignore - } else { // add all other props + } else { // add all other properties Accessor.addProp(p, key, props.get(key)); } } @@ -1116,20 +1157,32 @@ Protocol ProtocolDeclaration(): } -Schema EnumDeclaration(): +String Documentation(): +{ + //noinspection ResultOfMethodCallIgnored + getToken(1); // Parse, but don't consume, at least one token; this triggers parsing special tokens like doc comments. +} +{ + // Don't parse anything, just return the doc string + { + return DocCommentHelper.getDoc(); + } +} + + +Schema EnumDeclaration(String doc): { String name; List symbols; String defaultSymbol = null; } { - "enum" { String doc = getDoc(); } + "enum" name = Identifier() symbols = EnumBody() - [ defaultSymbol=Identifier() ] + [ defaultSymbol=Identifier() { DocCommentHelper.clearDoc(); } ] { - Schema s = Schema.createEnum(name, doc, this.namespace, symbols, - defaultSymbol); + Schema s = Schema.createEnum(name, doc, namespace, symbols, defaultSymbol); names.put(s.getFullName(), s); return s; } @@ -1137,13 +1190,14 @@ Schema EnumDeclaration(): List EnumBody(): { - List symbols = new ArrayList(); + List symbols = new ArrayList<>(); } { - "{" - [ EnumConstant(symbols) ( LOOKAHEAD(2) "," EnumConstant(symbols) )* ] + "{" { DocCommentHelper.clearDoc(); } + [ EnumConstant(symbols) ( "," EnumConstant(symbols) )* ] "}" { + DocCommentHelper.clearDoc(); return symbols; } } @@ -1158,13 +1212,14 @@ void EnumConstant(List symbols): void ProtocolBody(Protocol p): { + String doc; Schema schema; Message message; Protocol importProtocol; - Map props = new LinkedHashMap(); + Map props = new LinkedHashMap<>(); } { - "{" + "{" { DocCommentHelper.clearDoc(); } ( ((( importProtocol = ImportIdl() | importProtocol = ImportProtocol()) { @@ -1173,21 +1228,26 @@ void ProtocolBody(Protocol p): p.getMessages().putAll(importProtocol.getMessages()); }) | schema = ImportSchema() - ) + ) { + DocCommentHelper.clearDoc(); + } | + doc = Documentation() ( SchemaProperty(props) )* ( - schema = NamedSchemaDeclaration(props) + schema = NamedSchemaDeclaration(doc, props) | - message = MessageDeclaration(p, props) { + message = MessageDeclaration(p, doc, props) { p.getMessages().put(message.getName(), message); } - ) { props.clear(); } + ) { + props.clear(); + } ) * "}" - { p.setTypes(names.values()); + DocCommentHelper.clearDoc(); } } @@ -1198,13 +1258,8 @@ Protocol ImportIdl() : { { importFile = JsonString() ";" { - try { - Idl idl = new Idl(findFile(importFile), this); - try { - return idl.CompilationUnit(); - } finally { - idl.close(); - } + try (Idl idl=new Idl(findFile(importFile), this)){ + return idl.CompilationUnit(); } catch (IOException e) { throw error("Error importing "+importFile+": "+e, token); } @@ -1217,14 +1272,8 @@ Protocol ImportProtocol() : { { importFile = JsonString() ";" { - - try { - InputStream stream = findFile(importFile).openStream(); - try { - return Protocol.parse(stream); - } finally { - stream.close(); - } + try (InputStream stream=findFile(importFile).openStream()) { + return Protocol.parse(stream); } catch (IOException e) { throw error("Error importing "+importFile+": "+e, token); } @@ -1237,24 +1286,21 @@ Schema ImportSchema() : { { importFile = JsonString() ";" { - try { + try (InputStream stream=findFile(importFile).openStream()){ + // This usage of Schema.Parser should not be changed. + // Remove this whole (old) IDL parser instead. Parser parser = new Schema.Parser(); - parser.addTypes(names); // inherit names - InputStream stream = findFile(importFile).openStream(); - try { - Schema value = parser.parse(stream); - names = parser.getTypes(); // update names - return value; - } finally { - stream.close(); - } + parser.addTypes(names.values()); // inherit names + Schema value = parser.parse(stream); + names = parser.getTypes(); // update names + return value; } catch (IOException e) { throw error("Error importing "+importFile+": "+e, token); } } } -Schema FixedDeclaration(): +Schema FixedDeclaration(String doc): { String name; Token sizeTok; @@ -1263,17 +1309,17 @@ Schema FixedDeclaration(): "fixed" name = Identifier() "(" sizeTok = ")" ";" { - Schema s = Schema.createFixed(name, getDoc(), this.namespace, - Integer.parseInt(sizeTok.image)); + DocCommentHelper.clearDoc(); + Schema s = Schema.createFixed(name, doc, this.namespace, Integer.parseInt(sizeTok.image)); names.put(s.getFullName(), s); return s; } } -Schema RecordDeclaration(): +Schema RecordDeclaration(String doc): { String name; - List fields = new ArrayList(); + List fields = new ArrayList<>(); boolean isError; } { @@ -1283,14 +1329,14 @@ Schema RecordDeclaration(): ) name = Identifier() { - Schema result = Schema.createRecord( - name, getDoc(), this.namespace, isError); + Schema result = Schema.createRecord(name, doc, this.namespace, isError); names.put(result.getFullName(), result); } - "{" + "{" { DocCommentHelper.clearDoc(); } ( FieldDeclaration(fields) )* "}" { + DocCommentHelper.clearDoc(); result.setFields(fields); return result; } @@ -1313,84 +1359,66 @@ private void SchemaProperty(Map properties): void FieldDeclaration(List fields): { + String defaultDoc; Schema type; - Map props = new LinkedHashMap(); } { - // TODO should we be able to specify properties on any Type? - // or just on field declarations as done here - - ( SchemaProperty(props) )* + defaultDoc = Documentation() type = Type() - VariableDeclarator(type, fields) ( "," VariableDeclarator(type, fields) )* - ";" - { - for (String key : props.keySet()) - Accessor.addProp(type, key, props.get(key)); - } + VariableDeclarator(type, defaultDoc, fields) ( "," VariableDeclarator(type, defaultDoc, fields) )* + ";" { DocCommentHelper.clearDoc(); } } -void VariableDeclarator(Schema type, List fields): +void VariableDeclarator(Schema type, String defaultDoc, List fields): { - String name; + String doc, name; JsonNode defaultValue = null; - Map props = new LinkedHashMap(); + Map props = new LinkedHashMap<>(); } { - ( SchemaProperty(props) )* - + doc = Documentation() + ( SchemaProperty(props) )* name = Identifier() - - [ defaultValue=Json() ] - + [ defaultValue=Json() ] { Field.Order order = Field.Order.ASCENDING; for (String key : props.keySet()) if ("order".equals(key)) order = Field.Order.valueOf(getTextProp(key,props,token).toUpperCase()); - boolean validate = !SchemaResolver.isUnresolvedSchema(type); - Field field = Accessor.createField(name, type, getDoc(), defaultValue, validate, order); + boolean validate = SchemaResolver.isFullyResolvedSchema(type); + Schema fieldType = fixOptionalSchema(type, defaultValue); + Field field = Accessor.createField(name, fieldType, doc == null ? defaultDoc : doc, defaultValue, validate, order); for (String key : props.keySet()) if ("order".equals(key)) { // already handled: ignore } else if ("aliases".equals(key)) { // aliases for (String alias : getTextProps("aliases", props, token)) field.addAlias(alias); - } else { // add all other props + } else { // add all other properties Accessor.addProp(field, key, props.get(key)); } fields.add(field); + DocCommentHelper.clearDoc(); } } -String MessageDocumentation(): -{} +private Message MessageDeclaration(Protocol p, String msgDoc, Map props): { - // Don't parse anything, just return the doc string - { - return getDoc(); - } -} - -private Message MessageDeclaration(Protocol p, Map props): -{ - String msgDoc; String name; Schema request; Schema response; boolean oneWay = false; - List errorSchemata = new ArrayList(); + List errorSchemata = new ArrayList<>(); errorSchemata.add(Protocol.SYSTEM_ERROR); } { - msgDoc = MessageDocumentation() - response = ResultType() - name = Identifier() + response = ResultType() name = Identifier() request = FormalParameters() [ "oneway" {oneWay = true; } | "throws" ErrorList(errorSchemata) ] ";" { + DocCommentHelper.clearDoc(); Schema errors = Schema.createUnion(errorSchemata); if (oneWay && response.getType() != Type.NULL) throw error("One-way message'"+name+"' must return void", token); @@ -1412,44 +1440,89 @@ void ErrorList(List errors): Schema FormalParameters(): { - List fields = new ArrayList(); + List fields = new ArrayList<>(); } { - ( - "(" [ FormalParameter(fields) ( "," FormalParameter(fields) )* ] ")" - ) + "(" { DocCommentHelper.clearDoc(); } + [ FormalParameter(fields) ( "," FormalParameter(fields) )* ] ")" { - return Schema.createRecord(fields); + DocCommentHelper.clearDoc(); + return Schema.createRecord(null, null, null, false, fields); } } void FormalParameter(List fields): { + String doc; Schema type; } { + doc = Documentation() type = Type() - VariableDeclarator(type, fields) + VariableDeclarator(type, doc, fields) } Schema Type(): { Schema s; - Map props = new LinkedHashMap(); + Map props = new LinkedHashMap<>(); } { - ( SchemaProperty(props) )* + s = UnannotatedType(props) + { + return s; + } +} + +Schema UnannotatedType(Map props): +{ + Schema s; +} +{ ( - LOOKAHEAD(2) s = ReferenceType() - | s = PrimitiveType() - | s = UnionDefinition() - | s = ArrayType() - | s = MapType() + s = NullableType(props) + | ( + s = UnionDefinition() + | s = ArrayType() + | s = MapType() + ) + { + // NullableType also applies properties, inside any union with null it may create. + for (String key : props.keySet()) + Accessor.addProp(s, key, props.get(key)); + LogicalType logicalType = LogicalTypes.fromSchemaIgnoreInvalid(s); + if (logicalType != null) + logicalType.addToSchema(s); + } ) { + return s; + } +} + +Schema NullableType(Map props): +{ + Schema s; + boolean optional = false; +} +{ + ( + s = ReferenceType() { if (!props.isEmpty()) { throw error("Type references may not be annotated", token); } } + | s = PrimitiveType() + ) [ { optional = true; } ] + { + // By applying the properties here (before creating the union), type annotations modify the optional type instead of the union. for (String key : props.keySet()) Accessor.addProp(s, key, props.get(key)); + LogicalType logicalType = LogicalTypes.fromSchemaIgnoreInvalid(s); + if (logicalType != null) + logicalType.addToSchema(s); + if (optional) { + s = Schema.createUnion(Schema.create(Schema.Type.NULL), s); + // Add a marker property to the union (it will be removed when creating fields) + Accessor.addProp(s, OPTIONAL_NULLABLE_TYPE_PROPERTY, BooleanNode.TRUE); + } return s; } } @@ -1486,10 +1559,8 @@ Schema ReferenceType(): StringBuilder sb = new StringBuilder(); } { - ( - part = Identifier() { sb.append(part); } - ("." tok = AnyIdentifier() { sb.append(".").append(tok.image); })* - ) + part = Identifier() { sb.append(part); } + ("." tok = AnyIdentifier() { sb.append(".").append(tok.image); })* { String name = sb.toString(); if ((name.indexOf('.') == -1) && namespace != null) @@ -1504,7 +1575,7 @@ Schema ReferenceType(): } Schema PrimitiveType(): -{} +{ Schema s; } { "boolean" { return Schema.create(Type.BOOLEAN); } | "bytes" { return Schema.create(Type.BYTES); } @@ -1518,7 +1589,8 @@ Schema PrimitiveType(): | "time_ms" { return LogicalTypes.timeMillis().addToSchema(Schema.create(Type.INT)); } | "timestamp_ms" { return LogicalTypes.timestampMillis().addToSchema(Schema.create(Type.LONG)); } | "local_timestamp_ms" { return LogicalTypes.localTimestampMillis().addToSchema(Schema.create(Type.LONG)); } -| "decimal" { return DecimalTypeProperties(); } +| "decimal" s = DecimalTypeProperties() { return s; } +| "big_decimal" { return LogicalTypes.bigDecimal().addToSchema(Schema.create(Type.BYTES)); } | "uuid" {return LogicalTypes.uuid().addToSchema(Schema.create(Type.STRING));} } @@ -1543,22 +1615,22 @@ Schema ResultType(): Schema schema; } { - LOOKAHEAD(2) "void" { return Schema.create(Type.NULL); } - | schema = Type() { return schema; } + | schema = UnannotatedType(Collections.emptyMap()) { return schema; } } String PropertyName(): { Token t; + String s; StringBuilder name = new StringBuilder(); } { - t = { name.append(t.image); } + s = Identifier() { name.append(s); } ( t = { name.append(t.image); } - t = { name.append(t.image); } | + s = Identifier() { name.append(s); } | t = { name.append(t.image); } - t = { name.append(t.image); } + s = Identifier() { name.append(s); } ) * { return name.toString(); } } @@ -1587,23 +1659,29 @@ Token AnyIdentifier(): t = | t = | t = | + t = | + t = | t = | t = | t = | + t = | t = | t = | + t = | t = | t = | t = | t = | t = | t = | + t = | t = | t = | t =