diff --git a/.asf.yaml b/.asf.yaml
index 08837a974e5..ebc42cac609 100644
--- a/.asf.yaml
+++ b/.asf.yaml
@@ -33,12 +33,16 @@ github:
- php
- python
- ruby
+ - rust
enabled_merge_buttons:
merge: false
rebase: true
squash: true
+ collaborators:
+ - jbonofre
+
notifications:
commits: commits@avro.apache.org
issues: issues@avro.apache.org
diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
new file mode 100644
index 00000000000..bb261cfd8c1
--- /dev/null
+++ b/.devcontainer/devcontainer.json
@@ -0,0 +1,36 @@
+{
+ "name": "Avro Development",
+ "build": {
+ "dockerfile": "../share/docker/Dockerfile",
+ "context": ".."
+ },
+ "customizations": {
+ "vscode": {
+ "settings": {
+ },
+ "extensions": [
+ // Python
+ "ms-python.python",
+ "ms-python.vscode-pylance",
+ // C/C++
+ "ms-vscode.cpptools",
+ // C#
+ "ms-dotnettools.csharp",
+ // Rust
+ "vadimcn.vscode-lldb",
+ "mutantdino.resourcemonitor",
+ "matklad.rust-analyzer",
+ "tamasfe.even-better-toml",
+ "serayuzgur.crates",
+ // Java
+ "vscjava.vscode-java-pack",
+ // Shell script
+ "timonwong.shellcheck",
+ // YAML
+ "redhat.vscode-yaml",
+ // Git
+ "eamodio.gitlens"
+ ]
+ }
+ }
+}
\ No newline at end of file
diff --git a/.editorconfig b/.editorconfig
index b2d8a7c5fc9..b96e2b9c6e8 100644
--- a/.editorconfig
+++ b/.editorconfig
@@ -19,13 +19,30 @@ root = true
charset = utf-8
end_of_line = lf
insert_final_newline = true
+ij_any_block_comment_at_first_column = false
+ij_any_line_comment_at_first_column = false
[*.{java,xml,sh}]
indent_style = space
indent_size = 2
trim_trailing_whitespace=true
-[*.{cs,ps1}]
+ij_continuation_indent_size = 4
+ij_java_wrap_comments = true
+ij_any_indent_case_from_switch = false
+
+[*.{avsc,avpr,avdl}]
+indent_style = space
+indent_size = 2
+trim_trailing_whitespace=true
+
+ij_continuation_indent_size = 4
+ij_json_space_after_colon = true
+ij_json_space_before_colon = true
+ij_json_spaces_within_brackets = true
+ij_any_array_initializer_wrap = off
+
+[*.{ps1}]
indent_style = space
indent_size = 4
trim_trailing_whitespace=true
@@ -37,3 +54,174 @@ trim_trailing_whitespace=true
[*.py]
indent_style = space
indent_size = 4
+
+# Generated code
+[*{_AssemblyInfo.cs,.notsupported.cs,AsmOffsets.cs}]
+generated_code = true
+
+# C# files
+[*.cs]
+indent_style = space
+indent_size = 4
+trim_trailing_whitespace=true
+
+# New line preferences
+csharp_new_line_before_open_brace = all
+csharp_new_line_before_else = true
+csharp_new_line_before_catch = true
+csharp_new_line_before_finally = true
+csharp_new_line_before_members_in_object_initializers = true
+csharp_new_line_before_members_in_anonymous_types = true
+csharp_new_line_between_query_expression_clauses = true
+
+# Indentation preferences
+csharp_indent_block_contents = true
+csharp_indent_braces = false
+csharp_indent_case_contents = true
+csharp_indent_case_contents_when_block = true
+csharp_indent_switch_labels = true
+csharp_indent_labels = one_less_than_current
+
+# Modifier preferences
+csharp_preferred_modifier_order = public,private,protected,internal,static,extern,new,virtual,abstract,sealed,override,readonly,unsafe,volatile,async:suggestion
+
+# avoid this. unless absolutely necessary
+dotnet_style_qualification_for_field = false:suggestion
+dotnet_style_qualification_for_property = false:suggestion
+dotnet_style_qualification_for_method = false:suggestion
+dotnet_style_qualification_for_event = false:suggestion
+
+# Types: use keywords instead of BCL types, and permit var only when the type is clear
+csharp_style_var_for_built_in_types = false:suggestion
+csharp_style_var_when_type_is_apparent = false:none
+csharp_style_var_elsewhere = false:suggestion
+dotnet_style_predefined_type_for_locals_parameters_members = true:suggestion
+dotnet_style_predefined_type_for_member_access = true:suggestion
+
+# Non-private static fields are PascalCase
+dotnet_naming_rule.non_private_static_fields_should_be_pascal_case.severity = suggestion
+dotnet_naming_rule.non_private_static_fields_should_be_pascal_case.symbols = non_private_static_fields
+dotnet_naming_rule.non_private_static_fields_should_be_pascal_case.style = non_private_static_field_style
+dotnet_naming_symbols.non_private_static_fields.applicable_kinds = field
+dotnet_naming_symbols.non_private_static_fields.applicable_accessibilities = public, protected, internal, protected_internal, private_protected
+dotnet_naming_symbols.non_private_static_fields.required_modifiers = static
+dotnet_naming_style.non_private_static_field_style.capitalization = pascal_case
+
+# Constants are PascalCase
+dotnet_naming_rule.constants_should_be_pascal_case.severity = suggestion
+dotnet_naming_rule.constants_should_be_pascal_case.symbols = constants
+dotnet_naming_rule.constants_should_be_pascal_case.style = constant_style
+dotnet_naming_symbols.constants.applicable_kinds = field, local
+dotnet_naming_symbols.constants.required_modifiers = const
+dotnet_naming_style.constant_style.capitalization = pascal_case
+
+# Static fields are camelCase and start with s_
+dotnet_naming_rule.static_fields_should_be_camel_case.severity = suggestion
+dotnet_naming_rule.static_fields_should_be_camel_case.symbols = static_fields
+dotnet_naming_rule.static_fields_should_be_camel_case.style = static_field_style
+dotnet_naming_symbols.static_fields.applicable_kinds = field
+dotnet_naming_symbols.static_fields.required_modifiers = static
+dotnet_naming_style.static_field_style.capitalization = camel_case
+dotnet_naming_style.static_field_style.required_prefix = s_
+
+# Instance fields are camelCase and start with _
+dotnet_naming_rule.instance_fields_should_be_camel_case.severity = suggestion
+dotnet_naming_rule.instance_fields_should_be_camel_case.symbols = instance_fields
+dotnet_naming_rule.instance_fields_should_be_camel_case.style = instance_field_style
+dotnet_naming_symbols.instance_fields.applicable_kinds = field
+dotnet_naming_style.instance_field_style.capitalization = camel_case
+dotnet_naming_style.instance_field_style.required_prefix = _
+
+# Locals and parameters are camelCase
+dotnet_naming_rule.locals_should_be_camel_case.severity = suggestion
+dotnet_naming_rule.locals_should_be_camel_case.symbols = locals_and_parameters
+dotnet_naming_rule.locals_should_be_camel_case.style = camel_case_style
+dotnet_naming_symbols.locals_and_parameters.applicable_kinds = parameter, local
+dotnet_naming_style.camel_case_style.capitalization = camel_case
+
+# Local functions are PascalCase
+dotnet_naming_rule.local_functions_should_be_pascal_case.severity = suggestion
+dotnet_naming_rule.local_functions_should_be_pascal_case.symbols = local_functions
+dotnet_naming_rule.local_functions_should_be_pascal_case.style = local_function_style
+dotnet_naming_symbols.local_functions.applicable_kinds = local_function
+dotnet_naming_style.local_function_style.capitalization = pascal_case
+
+# By default, name items with PascalCase
+dotnet_naming_rule.members_should_be_pascal_case.severity = suggestion
+dotnet_naming_rule.members_should_be_pascal_case.symbols = all_members
+dotnet_naming_rule.members_should_be_pascal_case.style = pascal_case_style
+dotnet_naming_symbols.all_members.applicable_kinds = *
+dotnet_naming_style.pascal_case_style.capitalization = pascal_case
+
+# Code style defaults
+csharp_using_directive_placement = outside_namespace:suggestion
+dotnet_sort_system_directives_first = true
+csharp_prefer_braces = true:silent
+csharp_preserve_single_line_blocks = true:none
+csharp_preserve_single_line_statements = false:none
+csharp_prefer_static_local_function = true:suggestion
+csharp_prefer_simple_using_statement = false:none
+csharp_style_prefer_switch_expression = false:none
+dotnet_style_readonly_field = true:suggestion
+
+# Expression-level preferences
+dotnet_style_object_initializer = true:suggestion
+dotnet_style_collection_initializer = true:suggestion
+dotnet_style_explicit_tuple_names = true:suggestion
+dotnet_style_coalesce_expression = true:suggestion
+dotnet_style_null_propagation = true:suggestion
+dotnet_style_prefer_is_null_check_over_reference_equality_method = true:suggestion
+dotnet_style_prefer_inferred_tuple_names = true:suggestion
+dotnet_style_prefer_inferred_anonymous_type_member_names = true:suggestion
+dotnet_style_prefer_auto_properties = true:suggestion
+dotnet_style_prefer_conditional_expression_over_assignment = true:silent
+dotnet_style_prefer_conditional_expression_over_return = true:silent
+csharp_prefer_simple_default_expression = true:suggestion
+
+# Expression-bodied members
+csharp_style_expression_bodied_methods = true:silent
+csharp_style_expression_bodied_constructors = true:silent
+csharp_style_expression_bodied_operators = true:silent
+csharp_style_expression_bodied_properties = true:silent
+csharp_style_expression_bodied_indexers = true:silent
+csharp_style_expression_bodied_accessors = true:silent
+csharp_style_expression_bodied_lambdas = true:silent
+csharp_style_expression_bodied_local_functions = true:silent
+
+# Pattern matching
+csharp_style_pattern_matching_over_is_with_cast_check = true:suggestion
+csharp_style_pattern_matching_over_as_with_null_check = true:suggestion
+csharp_style_inlined_variable_declaration = true:suggestion
+
+# Null checking preferences
+csharp_style_throw_expression = true:suggestion
+csharp_style_conditional_delegate_call = true:suggestion
+
+# Other features
+csharp_style_prefer_index_operator = false:none
+csharp_style_prefer_range_operator = false:none
+csharp_style_pattern_local_over_anonymous_function = false:none
+
+# Space preferences
+csharp_space_after_cast = false
+csharp_space_after_colon_in_inheritance_clause = true
+csharp_space_after_comma = true
+csharp_space_after_dot = false
+csharp_space_after_keywords_in_control_flow_statements = true
+csharp_space_after_semicolon_in_for_statement = true
+csharp_space_around_binary_operators = before_and_after
+csharp_space_around_declaration_statements = false
+csharp_space_before_colon_in_inheritance_clause = true
+csharp_space_before_comma = false
+csharp_space_before_dot = false
+csharp_space_before_open_square_brackets = false
+csharp_space_before_semicolon_in_for_statement = false
+csharp_space_between_empty_square_brackets = false
+csharp_space_between_method_call_empty_parameter_list_parentheses = false
+csharp_space_between_method_call_name_and_opening_parenthesis = false
+csharp_space_between_method_call_parameter_list_parentheses = false
+csharp_space_between_method_declaration_empty_parameter_list_parentheses = false
+csharp_space_between_method_declaration_name_and_open_parenthesis = false
+csharp_space_between_method_declaration_parameter_list_parentheses = false
+csharp_space_between_parentheses = false
+csharp_space_between_square_brackets = false
diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 00000000000..b12292b62e4
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,11 @@
+# Set default behavior to automatically normalize line endings.
+* text=auto
+
+# Force bash scripts to always use lf line endings so that if a repo is accessed
+# in Unix via a file share from Windows, the scripts will work.
+*.sh text eol=lf
+
+# Force batch scripts to always use crlf line endings so that if a repo is accessed
+# in Unix via a file share from Windows, the scripts will work.
+*.cmd text eol=crlf
+*.bat text eol=crlf
diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
index f8efdbd7f8e..2823e406003 100644
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -1,26 +1,60 @@
-Make sure you have checked _all_ steps below.
+
+
+## What is the purpose of the change
+
+*(For example: This pull request improves file read performance by buffering data, fixing AVRO-XXXX.)*
+
+
+## Verifying this change
+
+*(Please pick one of the following options)*
+
+This change is a trivial rework / code cleanup without any test coverage.
+
+*(or)*
+
+This change is already covered by existing tests, such as *(please describe tests)*.
+
+*(or)*
+
+This change added tests and can be verified as follows:
+
+*(example:)*
+- *Extended interop tests to verify consistent valid schema names between SDKs*
+- *Added test that validates that Java throws an AvroRuntimeException on invalid binary data*
+- *Manually verified the change by building the website and checking the new redirect*
+
+
+## Documentation
+
+- Does this pull request introduce a new feature? (yes / no)
+- If yes, how is the feature documented? (not applicable / docs / JavaDocs / not documented)
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
index 439a4dfcb33..14d2f29b768 100644
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -51,6 +51,13 @@ updates:
day: "sunday"
open-pull-requests-limit: 20
+ - package-ecosystem: "npm"
+ directory: "/doc"
+ schedule:
+ interval: "weekly"
+ day: "sunday"
+ open-pull-requests-limit: 10
+
- package-ecosystem: "pip"
directory: "/lang/py/"
schedule:
@@ -65,10 +72,21 @@ updates:
day: "sunday"
open-pull-requests-limit: 20
- - package-ecosystem: "cargo"
- directory: "/lang/rust/"
+ - package-ecosystem: "bundler"
+ directory: "/doc/"
schedule:
interval: "weekly"
day: "sunday"
open-pull-requests-limit: 20
+ - package-ecosystem: "cargo"
+ directory: "/lang/rust/"
+ schedule:
+ interval: "daily"
+ open-pull-requests-limit: 20
+
+ - package-ecosystem: "github-actions"
+ directory: "/"
+ schedule:
+ interval: "weekly"
+ day: "sunday"
diff --git a/.github/labeler.yml b/.github/labeler.yml
index ae59a356e43..d694c7e6d65 100644
--- a/.github/labeler.yml
+++ b/.github/labeler.yml
@@ -18,15 +18,39 @@
#
# Pull Request Labeler Github Action Configuration: https://github.com/marketplace/actions/labeler
-C: ["lang/c/**/*"]
-C++: ["lang/c++/**/*"]
-C#: ["lang/csharp/**/*"]
-Java: ["lang/java/**/*"]
-Js: ["lang/js/**/*"]
-Perl: ["lang/perl/**/*"]
-Php: ["lang/php/**/*"]
-Python: ["lang/py/**/*"]
-Ruby: ["lang/ruby/**/*"]
-Rust: ["lang/rust/**/*"]
-build: ["**/*Dockerfile*", "**/*.sh", "**/*pom.xml", ".github/**/*"]
-website: ["doc/**/*"]
+C:
+ - changed-files:
+ - any-glob-to-any-file: "lang/c/**/*"
+C++:
+ - changed-files:
+ - any-glob-to-any-file: "lang/c++/**/*"
+C#:
+ - changed-files:
+ - any-glob-to-any-file: "lang/csharp/**/*"
+Java:
+ - changed-files:
+ - any-glob-to-any-file: "lang/java/**/*"
+Js:
+ - changed-files:
+ - any-glob-to-any-file: "lang/js/**/*"
+Perl:
+ - changed-files:
+ - any-glob-to-any-file: "lang/perl/**/*"
+Php:
+ - changed-files:
+ - any-glob-to-any-file: "lang/php/**/*"
+Python:
+ - changed-files:
+ - any-glob-to-any-file: "lang/py/**/*"
+Ruby:
+ - changed-files:
+ - any-glob-to-any-file: "lang/ruby/**/*"
+Rust:
+ - changed-files:
+ - any-glob-to-any-file: "lang/rust/**/*"
+build:
+ - changed-files:
+ - any-glob-to-any-file: ["**/*Dockerfile*", "**/*.sh", "**/*pom.xml", ".github/**/*"]
+website:
+ - changed-files:
+ - any-glob-to-any-file: "doc/**/*"
diff --git a/.github/workflows/codeql-csharp-analysis.yml b/.github/workflows/codeql-csharp-analysis.yml
index 3cbb0fdd245..b6153490740 100644
--- a/.github/workflows/codeql-csharp-analysis.yml
+++ b/.github/workflows/codeql-csharp-analysis.yml
@@ -23,15 +23,19 @@ name: "CodeQL C#"
on:
push:
branches:
- - master
+ - main
pull_request:
# The branches below must be a subset of the branches above
branches:
- - master
+ - main
paths:
- .github/workflows/codeql-csharp-analysis.yml
- lang/csharp/**
+concurrency:
+ group: ${{ github.workflow }}-${{ github.ref }}
+ cancel-in-progress: true
+
jobs:
analyze:
name: Analyze
@@ -49,15 +53,26 @@ jobs:
steps:
- name: Checkout repository
- uses: actions/checkout@v2
+ uses: actions/checkout@v4
with:
# We must fetch at least the immediate parents so that if this is
# a pull request then we can checkout the head.
fetch-depth: 2
+ # Install .NET SDKs
+ - name: Install .NET SDKs
+ uses: actions/setup-dotnet@v4
+ with:
+ dotnet-version: |
+ 3.1.x
+ 5.0.x
+ 6.0.x
+ 7.0.x
+ 8.0.x
+
# Initializes the CodeQL tools for scanning.
- name: Initialize CodeQL
- uses: github/codeql-action/init@v1
+ uses: github/codeql-action/init@v3
with:
languages: ${{ matrix.language }}
# If you wish to specify custom queries, you can do so here or in a config file.
@@ -69,7 +84,7 @@ jobs:
# Autobuild attempts to build any compiled languages (C/C++, C#, or Java).
# If this step fails, then you should remove it and run the build manually (see below)
- name: Autobuild
- uses: github/codeql-action/autobuild@v1
+ uses: github/codeql-action/autobuild@v3
# âšī¸ Command-line programs to run using the OS shell.
# đ https://git.io/JvXDl
@@ -79,4 +94,4 @@ jobs:
# uses a compiled language
- name: Perform CodeQL Analysis
- uses: github/codeql-action/analyze@v1
+ uses: github/codeql-action/analyze@v3
diff --git a/.github/workflows/codeql-java-analysis.yml b/.github/workflows/codeql-java-analysis.yml
index 1b4933fcf60..7e42f8120fb 100644
--- a/.github/workflows/codeql-java-analysis.yml
+++ b/.github/workflows/codeql-java-analysis.yml
@@ -23,15 +23,19 @@ on:
workflow_dispatch:
push:
branches:
- - master
+ - main
pull_request:
branches:
- - master
+ - main
paths:
- .github/workflows/codeql-java-analysis.yml
- lang/java/**
- pom.xml
+concurrency:
+ group: ${{ github.workflow }}-${{ github.ref }}
+ cancel-in-progress: true
+
jobs:
analyze:
name: Analyze
@@ -49,7 +53,7 @@ jobs:
steps:
- name: Checkout repository
- uses: actions/checkout@v2
+ uses: actions/checkout@v4
with:
# We must fetch at least the immediate parents so that if this is
# a pull request then we can checkout the head.
@@ -57,7 +61,7 @@ jobs:
# Initializes the CodeQL tools for scanning.
- name: Initialize CodeQL
- uses: github/codeql-action/init@v1
+ uses: github/codeql-action/init@v3
with:
languages: ${{ matrix.language }}
# If you wish to specify custom queries, you can do so here or in a config file.
@@ -66,10 +70,25 @@ jobs:
# queries: ./path/to/local/query, your-org/your-repo/queries@main
queries: +security-and-quality
+ - name: 'Setup Temurin JDK 8, 11, 17 & 21'
+ uses: actions/setup-java@99b8673ff64fbf99d8d325f52d9a5bdedb8483e9 # v4.2.1
+ with:
+ distribution: 'temurin'
+ java-version: |
+ 8
+ 11
+ 17
+ 21
+
+ - name: 'Setup Maven 3.9.6'
+ uses: stCarolas/setup-maven@d6af6abeda15e98926a57b5aa970a96bb37f97d1 # v5
+ with:
+ maven-version: 3.9.6
+
# Autobuild attempts to build any compiled languages (C/C++, C#, or Java).
# If this step fails, then you should remove it and run the build manually (see below)
- - name: Autobuild
- uses: github/codeql-action/autobuild@v1
+# - name: Autobuild
+# uses: github/codeql-action/autobuild@v3
# âšī¸ Command-line programs to run using the OS shell.
# đ https://git.io/JvXDl
@@ -77,6 +96,8 @@ jobs:
# âī¸ If the Autobuild fails above, remove it and uncomment the following three lines
# and modify them (or add more) to build your code if your project
# uses a compiled language
+ - name: 'Java Test'
+ run: mvn clean test
- name: Perform CodeQL Analysis
- uses: github/codeql-action/analyze@v1
+ uses: github/codeql-action/analyze@v3
diff --git a/.github/workflows/codeql-js-analysis.yml b/.github/workflows/codeql-js-analysis.yml
index 58d2a0a6296..68b9aa85fe7 100644
--- a/.github/workflows/codeql-js-analysis.yml
+++ b/.github/workflows/codeql-js-analysis.yml
@@ -23,15 +23,19 @@ name: "CodeQL JavaScript"
on:
push:
branches:
- - master
+ - main
pull_request:
# The branches below must be a subset of the branches above
branches:
- - master
+ - main
paths:
- .github/workflows/codeql-js-analysis.yml
- lang/js/**
+concurrency:
+ group: ${{ github.workflow }}-${{ github.ref }}
+ cancel-in-progress: true
+
jobs:
analyze:
name: Analyze
@@ -49,7 +53,7 @@ jobs:
steps:
- name: Checkout repository
- uses: actions/checkout@v2
+ uses: actions/checkout@v4
with:
# We must fetch at least the immediate parents so that if this is
# a pull request then we can checkout the head.
@@ -57,7 +61,7 @@ jobs:
# Initializes the CodeQL tools for scanning.
- name: Initialize CodeQL
- uses: github/codeql-action/init@v1
+ uses: github/codeql-action/init@v3
with:
languages: ${{ matrix.language }}
# If you wish to specify custom queries, you can do so here or in a config file.
@@ -69,7 +73,7 @@ jobs:
# Autobuild attempts to build any compiled languages (C/C++, C#, or Java).
# If this step fails, then you should remove it and run the build manually (see below)
- name: Autobuild
- uses: github/codeql-action/autobuild@v1
+ uses: github/codeql-action/autobuild@v3
# âšī¸ Command-line programs to run using the OS shell.
# đ https://git.io/JvXDl
@@ -79,4 +83,4 @@ jobs:
# uses a compiled language
- name: Perform CodeQL Analysis
- uses: github/codeql-action/analyze@v1
+ uses: github/codeql-action/analyze@v3
diff --git a/.github/workflows/codeql-py-analysis.yml b/.github/workflows/codeql-py-analysis.yml
index 048b2ed1a9b..60a47472fec 100644
--- a/.github/workflows/codeql-py-analysis.yml
+++ b/.github/workflows/codeql-py-analysis.yml
@@ -23,15 +23,19 @@ name: "CodeQL Python"
on:
push:
branches:
- - master
+ - main
pull_request:
# The branches below must be a subset of the branches above
branches:
- - master
+ - main
paths:
- .github/workflows/codeql-py-analysis.yml
- lang/py/**
+concurrency:
+ group: ${{ github.workflow }}-${{ github.ref }}
+ cancel-in-progress: true
+
jobs:
analyze:
name: Analyze
@@ -49,7 +53,7 @@ jobs:
steps:
- name: Checkout repository
- uses: actions/checkout@v2
+ uses: actions/checkout@v4
with:
# We must fetch at least the immediate parents so that if this is
# a pull request then we can checkout the head.
@@ -57,7 +61,7 @@ jobs:
# Initializes the CodeQL tools for scanning.
- name: Initialize CodeQL
- uses: github/codeql-action/init@v1
+ uses: github/codeql-action/init@v3
with:
languages: ${{ matrix.language }}
# If you wish to specify custom queries, you can do so here or in a config file.
@@ -69,7 +73,7 @@ jobs:
# Autobuild attempts to build any compiled languages (C/C++, C#, or Java).
# If this step fails, then you should remove it and run the build manually (see below)
- name: Autobuild
- uses: github/codeql-action/autobuild@v1
+ uses: github/codeql-action/autobuild@v3
# âšī¸ Command-line programs to run using the OS shell.
# đ https://git.io/JvXDl
@@ -79,4 +83,4 @@ jobs:
# uses a compiled language
- name: Perform CodeQL Analysis
- uses: github/codeql-action/analyze@v1
+ uses: github/codeql-action/analyze@v3
diff --git a/.github/workflows/java-publish-snapshot.yml b/.github/workflows/java-publish-snapshot.yml
new file mode 100644
index 00000000000..89d8759a61b
--- /dev/null
+++ b/.github/workflows/java-publish-snapshot.yml
@@ -0,0 +1,71 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# For most projects, this workflow file will not need changing; you simply need
+# to commit it to your repository.
+
+name: "Publish Snapshot to Maven"
+on:
+ workflow_dispatch:
+ push:
+ branches: [ main ]
+ paths:
+ - .github/workflows/java-publish-snapshot.yml
+ - lang/java/**
+ - pom.xml
+
+concurrency:
+ group: ${{ github.workflow }}-${{ github.ref }}
+ cancel-in-progress: true
+
+defaults:
+ run:
+ working-directory: lang/java
+
+jobs:
+ publish-snapshot:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4
+
+ - name: Cache Local Maven Repository
+ uses: actions/cache@v4
+ with:
+ path: ~/.m2/repository
+ key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }}
+ restore-keys: |
+ ${{ runner.os }}-maven-
+
+ - name: 'Setup Temurin JDK 8, 11, 17 & 21'
+ uses: actions/setup-java@99b8673ff64fbf99d8d325f52d9a5bdedb8483e9 # v4.2.1
+ with:
+ distribution: 'temurin'
+ java-version: |
+ 8
+ 11
+ 17
+ 21
+
+ - name: 'Setup Maven 3.9.6'
+ uses: stCarolas/setup-maven@d6af6abeda15e98926a57b5aa970a96bb37f97d1 # v5
+ with:
+ maven-version: 3.9.6
+
+ - name: 'Deploy Maven snapshots'
+ env:
+ ASF_USERNAME: ${{ secrets.NEXUS_USER }}
+ ASF_PASSWORD: ${{ secrets.NEXUS_PW }}
+ run: |
+ echo "apache.snapshots.https $ASF_USERNAME $ASF_PASSWORD " > settings.xml
+ mvn --settings settings.xml -U -B -e -fae -ntp -PskipQuality deploy
diff --git a/.github/workflows/labeler.yml b/.github/workflows/labeler.yml
index c786eb6440e..815d5d02a5c 100644
--- a/.github/workflows/labeler.yml
+++ b/.github/workflows/labeler.yml
@@ -23,7 +23,7 @@ jobs:
triage:
runs-on: ubuntu-latest
steps:
- - uses: actions/labeler@v2
+ - uses: actions/labeler@v5
with:
repo-token: "${{ secrets.GITHUB_TOKEN }}"
sync-labels: true
diff --git a/.github/workflows/maven4.yml b/.github/workflows/maven4.yml
new file mode 100644
index 00000000000..5376488b3e3
--- /dev/null
+++ b/.github/workflows/maven4.yml
@@ -0,0 +1,69 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+name: 'Maven 4'
+on:
+ workflow_dispatch:
+ push:
+ branches: [ main ]
+ pull_request:
+ branches: [ main ]
+ paths:
+ - .github/workflows/maven4.yml
+ - lang/java/**
+
+concurrency:
+ group: ${{ github.workflow }}-${{ github.ref }}
+ cancel-in-progress: true
+
+jobs:
+ maven4:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4
+
+ - name: Cache Local Maven Repository
+ uses: actions/cache@v4
+ with:
+ path: ~/.m2/repository
+ key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }}
+ restore-keys: |
+ ${{ runner.os }}-maven-
+
+ - name: Cache Maven 4 Build Cache
+ uses: actions/cache@v4
+ with:
+ path: ~/.m2/build-cache
+ key: ${{ runner.os }}-maven-build-cache-${{ hashFiles('**/pom.xml') }}
+ restore-keys: |
+ ${{ runner.os }}-maven-build-cache
+
+ - name: 'Setup Temurin JDK 8, 11, 17 & 21'
+ uses: actions/setup-java@99b8673ff64fbf99d8d325f52d9a5bdedb8483e9 # v4.2.1
+ with:
+ distribution: 'temurin'
+ java-version: |
+ 8
+ 11
+ 17
+ 21
+
+ - name: Setup Maven 4
+ uses: stCarolas/setup-maven@d6af6abeda15e98926a57b5aa970a96bb37f97d1 # v5
+ with:
+ maven-version: 4.0.0-alpha-10
+
+ - name: Test
+ run: mvn clean verify
diff --git a/.github/workflows/rat.yml b/.github/workflows/rat.yml
index d3fa1868a46..c38d808f8c3 100644
--- a/.github/workflows/rat.yml
+++ b/.github/workflows/rat.yml
@@ -17,29 +17,42 @@ name: 'Rat'
on:
workflow_dispatch:
push:
- branches: [ master ]
+ branches: [ main ]
pull_request:
- branches: [ master ]
+ branches: [ main ]
+
+concurrency:
+ group: ${{ github.workflow }}-${{ github.ref }}
+ cancel-in-progress: true
jobs:
rat:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v2
+ - uses: actions/checkout@v4
- name: Cache Local Maven Repository
- uses: actions/cache@v2
+ uses: actions/cache@v4
with:
path: ~/.m2/repository
key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }}
restore-keys: |
${{ runner.os }}-maven-
- - name: Setup Java
- uses: actions/setup-java@v2
+ - name: 'Setup Temurin JDK 8, 11, 17 & 21'
+ uses: actions/setup-java@99b8673ff64fbf99d8d325f52d9a5bdedb8483e9 # v4.2.1
+ with:
+ distribution: 'temurin'
+ java-version: |
+ 8
+ 11
+ 17
+ 21
+
+ - name: 'Setup Maven 3.9.6'
+ uses: stCarolas/setup-maven@d6af6abeda15e98926a57b5aa970a96bb37f97d1 # v5
with:
- distribution: 'adopt'
- java-version: '11'
+ maven-version: 3.9.6
- name: Run Rat
- run: mvn test -Dmaven.main.skip=true -Dmaven.test.skip=true -DskipTests=true -P rat -pl :avro-toplevel
+ run: mvn test -Dmaven.main.skip=true -Dmaven.test.skip=true -DskipTests=true -Dinvoker.skip=true -P rat -pl :avro-toplevel
diff --git a/.github/workflows/spotless.yml b/.github/workflows/spotless.yml
index 45c7e9de4d7..f69108badab 100644
--- a/.github/workflows/spotless.yml
+++ b/.github/workflows/spotless.yml
@@ -17,32 +17,45 @@ name: 'Spotless'
on:
workflow_dispatch:
push:
- branches: [ master ]
+ branches: [ main ]
pull_request:
- branches: [ master ]
+ branches: [ main ]
paths:
- .github/workflows/spotless.yml
- lang/java/**
+concurrency:
+ group: ${{ github.workflow }}-${{ github.ref }}
+ cancel-in-progress: true
+
jobs:
spotless:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v2
+ - uses: actions/checkout@v4
- name: Cache Local Maven Repository
- uses: actions/cache@v2
+ uses: actions/cache@v4
with:
path: ~/.m2/repository
key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }}
restore-keys: |
${{ runner.os }}-maven-
- - name: Setup Java
- uses: actions/setup-java@v2
+ - name: 'Setup Temurin JDK 8, 11, 17 & 21'
+ uses: actions/setup-java@99b8673ff64fbf99d8d325f52d9a5bdedb8483e9 # v4.2.1
+ with:
+ distribution: 'temurin'
+ java-version: |
+ 8
+ 11
+ 17
+ 21
+
+ - name: 'Setup Maven 3.9.6'
+ uses: stCarolas/setup-maven@d6af6abeda15e98926a57b5aa970a96bb37f97d1 # v5
with:
- distribution: 'adopt'
- java-version: '11'
+ maven-version: 3.9.6
- name: Run Spotless Check
run: mvn spotless:check
diff --git a/.github/workflows/test-docker.yml b/.github/workflows/test-docker.yml
new file mode 100644
index 00000000000..5c9b6aa3d3a
--- /dev/null
+++ b/.github/workflows/test-docker.yml
@@ -0,0 +1,38 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+name: 'Docker tests'
+on:
+ workflow_dispatch:
+ push:
+ branches: [ main ]
+ pull_request:
+ branches: [ main ]
+ paths:
+ - 'share/docker/*'
+
+concurrency:
+ group: ${{ github.workflow }}-${{ github.ref }}
+ cancel-in-progress: true
+
+jobs:
+ test:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4
+
+ - name: Run Docker tests
+ shell: bash
+ run: ./build.sh docker-test
diff --git a/.github/workflows/test-lang-c++-ARM.yml b/.github/workflows/test-lang-c++-ARM.yml
new file mode 100644
index 00000000000..f101eaeb2b5
--- /dev/null
+++ b/.github/workflows/test-lang-c++-ARM.yml
@@ -0,0 +1,52 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+name: 'Test C++ on ARM'
+on:
+ workflow_dispatch:
+ push:
+ branches: [ main ]
+ pull_request:
+ branches: [ main ]
+ paths:
+ - '.github/workflows/test-lang-c\+\+.yml'
+ - 'lang/c\+\+/**'
+
+defaults:
+ run:
+ working-directory: lang/c++
+
+concurrency:
+ group: ${{ github.workflow }}-${{ github.ref }}
+ cancel-in-progress: true
+
+jobs:
+ arm64:
+ name: C++ on Linux ARM64
+ runs-on: ["self-hosted", "asf-arm"]
+
+ steps:
+ - name: Checkout
+ uses: actions/checkout@v4
+
+ - name: Install dependencies
+ run: |
+ sudo apt-get update -q
+ sudo apt-get install -q -y gcc g++ libboost-all-dev libfmt-dev cmake
+
+ - name: Build
+ run: |
+ set -x
+ ./build.sh clean test
diff --git a/.github/workflows/test-lang-c++.yml b/.github/workflows/test-lang-c++.yml
index c7db3804fec..61afa7ff61c 100644
--- a/.github/workflows/test-lang-c++.yml
+++ b/.github/workflows/test-lang-c++.yml
@@ -17,9 +17,9 @@ name: Test C++
on:
workflow_dispatch:
push:
- branches: [ master ]
+ branches: [ main ]
pull_request:
- branches: [ master ]
+ branches: [ main ]
paths:
- '.github/workflows/test-lang-c\+\+.yml'
- 'lang/c\+\+/**'
@@ -28,14 +28,24 @@ defaults:
run:
working-directory: lang/c++
+concurrency:
+ group: ${{ github.workflow }}-${{ github.ref }}
+ cancel-in-progress: true
+
jobs:
test:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v2
+ - uses: actions/checkout@v4
- name: Install Dependencies
- run: sudo apt-get install -qqy cppcheck libboost-all-dev libsnappy-dev cmake
+ run: sudo apt update && sudo apt-get install -qqy cppcheck libboost-all-dev libsnappy-dev libfmt-dev cmake
+
+ - name: Print Versions
+ run: |
+ gcc --version
+ cmake --version
+ cppcheck --version
- name: Clean
run: ./build.sh clean
@@ -45,3 +55,9 @@ jobs:
- name: Test
run: ./build.sh test
+
+ - name: Release build
+ run: |
+ mkdir -p build
+ cd build
+ cmake -G "Unix Makefiles" -D CMAKE_BUILD_TYPE=Release ..
diff --git a/.github/workflows/test-lang-c-ARM.yml b/.github/workflows/test-lang-c-ARM.yml
new file mode 100644
index 00000000000..ffb31be2b39
--- /dev/null
+++ b/.github/workflows/test-lang-c-ARM.yml
@@ -0,0 +1,52 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+name: 'Test C on ARM'
+on:
+ workflow_dispatch:
+ push:
+ branches: [ main ]
+ pull_request:
+ branches: [ main ]
+ paths:
+ - .github/workflows/test-lang-c.yml
+ - lang/c/**
+
+defaults:
+ run:
+ working-directory: lang/c
+
+concurrency:
+ group: ${{ github.workflow }}-${{ github.ref }}
+ cancel-in-progress: true
+
+jobs:
+ arm64:
+ name: C on Linux ARM64
+ runs-on: ["self-hosted", "asf-arm"]
+
+ steps:
+ - name: Checkout
+ uses: actions/checkout@v4
+
+ - name: Install dependencies
+ run: |
+ sudo apt-get update -q
+ sudo apt-get install -q -y cmake liblzma-dev libsnappy-dev libjansson-dev zlib1g-dev pkg-config
+
+ - name: Build
+ run: |
+ set -x
+ ./build.sh clean test
diff --git a/.github/workflows/test-lang-c.yml b/.github/workflows/test-lang-c.yml
index 764a29364d6..9f31078bf1c 100644
--- a/.github/workflows/test-lang-c.yml
+++ b/.github/workflows/test-lang-c.yml
@@ -17,9 +17,9 @@ name: Test C
on:
workflow_dispatch:
push:
- branches: [ master ]
+ branches: [ main ]
pull_request:
- branches: [ master ]
+ branches: [ main ]
paths:
- .github/workflows/test-lang-c.yml
- lang/c/**
@@ -28,11 +28,15 @@ defaults:
run:
working-directory: lang/c
+concurrency:
+ group: ${{ github.workflow }}-${{ github.ref }}
+ cancel-in-progress: true
+
jobs:
test:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v2
+ - uses: actions/checkout@v4
- name: Install Dependencies
run: sudo apt-get install -qqy libjansson-dev libsnappy-dev
@@ -43,17 +47,40 @@ jobs:
- name: Test
run: ./build.sh test
+ - name: Check pkg-config
+ run: |
+ mkdir -p build
+ cd build
+ cmake ..
+ export PKG_CONFIG_PATH=./src
+ pkg-config --libs avro-c
+
- name: Cache Local Maven Repository
- uses: actions/cache@v2
+ uses: actions/cache@v4
with:
path: ~/.m2/repository
key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }}
restore-keys: |
${{ runner.os }}-maven-
+ - name: 'Setup Temurin JDK 8, 11, 17 & 21'
+ uses: actions/setup-java@99b8673ff64fbf99d8d325f52d9a5bdedb8483e9 # v4.2.1
+ with:
+ distribution: 'temurin'
+ java-version: |
+ 8
+ 11
+ 17
+ 21
+
+ - name: 'Setup Maven 3.9.6'
+ uses: stCarolas/setup-maven@d6af6abeda15e98926a57b5aa970a96bb37f97d1 # v5
+ with:
+ maven-version: 3.9.6
+
- name: Install Java Avro for Interop Test
working-directory: .
- run: mvn -B install -DskipTests
+ run: mvn -B install -PskipQuality
- name: Create Interop Data Directory
working-directory: .
@@ -72,7 +99,7 @@ jobs:
interop:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v2
+ - uses: actions/checkout@v4
- name: Install Dependencies
run: |
@@ -83,16 +110,31 @@ jobs:
libzstd-dev
- name: Cache Local Maven Repository
- uses: actions/cache@v2
+ uses: actions/cache@v4
with:
path: ~/.m2/repository
key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }}
restore-keys: |
${{ runner.os }}-maven-
+ - name: 'Setup Temurin JDK 8, 11, 17 & 21'
+ uses: actions/setup-java@99b8673ff64fbf99d8d325f52d9a5bdedb8483e9 # v4.2.1
+ with:
+ distribution: 'temurin'
+ java-version: |
+ 8
+ 11
+ 17
+ 21
+
+ - name: 'Setup Maven 3.9.6'
+ uses: stCarolas/setup-maven@d6af6abeda15e98926a57b5aa970a96bb37f97d1 # v5
+ with:
+ maven-version: 3.9.6
+
- name: Install Java Avro for Interop Test
working-directory: .
- run: mvn -B install -DskipTests
+ run: mvn -B install -PskipQuality
- name: Create Interop Data Directory
working-directory: .
diff --git a/.github/workflows/test-lang-csharp-ARM.yml b/.github/workflows/test-lang-csharp-ARM.yml
new file mode 100644
index 00000000000..4c3eacb0521
--- /dev/null
+++ b/.github/workflows/test-lang-csharp-ARM.yml
@@ -0,0 +1,160 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+name: 'Test C# on ARM'
+on:
+ workflow_dispatch:
+ push:
+ branches: [ main ]
+ pull_request:
+ branches: [ main ]
+ paths:
+ - .github/workflows/test-lang-csharp.yml
+ - lang/csharp/**
+
+defaults:
+ run:
+ working-directory: lang/csharp
+
+concurrency:
+ group: ${{ github.workflow }}-${{ github.ref }}
+ cancel-in-progress: true
+
+jobs:
+ test:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4
+
+ - name: Add libzstd
+ shell: bash
+ run: sudo apt-get install -y libzstd-dev
+
+ - name: Install .NET SDKs
+ uses: actions/setup-dotnet@v4
+ with:
+ dotnet-version: |
+ 3.1.x
+ 5.0.x
+ 6.0.x
+ 7.0.x
+ 8.0.x
+
+ - uses: actions/cache@v4
+ with:
+ path: ~/.nuget/packages
+ key: ${{ runner.os }}-nuget-${{ hashFiles('**/packages.lock.json') }}
+ restore-keys: |
+ ${{ runner.os }}-nuget-
+
+ - name: Lint
+ run: ./build.sh lint
+
+ - name: Test
+ run: ./build.sh test
+
+ interop:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4
+
+ - name: Add libzstd
+ shell: bash
+ run: sudo apt-get install -y libzstd-dev
+
+ - name: Install .NET SDKs
+ uses: actions/setup-dotnet@v4
+ with:
+ dotnet-version: |
+ 3.1.x
+ 5.0.x
+ 6.0.x
+ 7.0.x
+ 8.0.x
+
+ - name: Cache Local Maven Repository
+ uses: actions/cache@v4
+ with:
+ path: ~/.m2/repository
+ key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }}
+ restore-keys: |
+ ${{ runner.os }}-maven-
+
+ - name: 'Setup Temurin JDK 8, 11, 17 & 21'
+ uses: actions/setup-java@99b8673ff64fbf99d8d325f52d9a5bdedb8483e9 # v4.2.1
+ with:
+ distribution: 'temurin'
+ java-version: |
+ 8
+ 11
+ 17
+ 21
+
+ - name: 'Setup Maven 3.9.6'
+ uses: stCarolas/setup-maven@d6af6abeda15e98926a57b5aa970a96bb37f97d1 # v5
+ with:
+ maven-version: 3.9.6
+
+ - name: Install Java Avro for Interop Test
+ working-directory: .
+ run: mvn -B install -PskipQuality
+
+ - name: Create Interop Data Directory
+ working-directory: .
+ run: mkdir -p build/interop/data
+
+ - name: Generate Interop Resources
+ working-directory: lang/java/avro
+ run: mvn -B -P interop-data-generate generate-resources
+
+ - name: Generate Interop Data
+ run: ./build.sh interop-data-generate
+
+ - name: Run Interop Tests
+ run: ./build.sh interop-data-test
+
+ arm64:
+ name: C# on Linux ARM64
+ runs-on: ["self-hosted", "asf-arm"]
+
+ steps:
+ - name: Checkout
+ uses: actions/checkout@v4
+
+ - name: Cache Nuget
+ uses: actions/cache@v4
+ with:
+ path: ~/.nuget/packages
+ key: ${{ runner.os }}-nuget-${{ hashFiles('**/packages.lock.json') }}
+ restore-keys: |
+ ${{ runner.os }}-nuget-
+
+ - name: Install dependencies
+ run: |
+ sudo apt-get update -q
+ sudo apt-get install -q -y wget libzstd-dev libicu-dev
+ wget https://dot.net/v1/dotnet-install.sh
+ bash ./dotnet-install.sh --channel "3.1" --install-dir "$HOME/.dotnet" # 3.1
+ bash ./dotnet-install.sh --channel "5.0" --install-dir "$HOME/.dotnet" # 5.0
+ bash ./dotnet-install.sh --channel "6.0" --install-dir "$HOME/.dotnet" # 6.0
+ bash ./dotnet-install.sh --channel "7.0" --install-dir "$HOME/.dotnet" # 7.0
+ bash ./dotnet-install.sh --channel "8.0" --install-dir "$HOME/.dotnet" # 8.0
+
+ - name: Build
+ run: |
+ set -x
+ export PATH=$HOME/.dotnet:$PATH
+ dotnet --list-sdks
+ ./build.sh clean test
diff --git a/.github/workflows/test-lang-csharp.yml b/.github/workflows/test-lang-csharp.yml
index b1959009e64..c81628213c0 100644
--- a/.github/workflows/test-lang-csharp.yml
+++ b/.github/workflows/test-lang-csharp.yml
@@ -17,9 +17,9 @@ name: 'Test C#'
on:
workflow_dispatch:
push:
- branches: [ master ]
+ branches: [ main ]
pull_request:
- branches: [ master ]
+ branches: [ main ]
paths:
- .github/workflows/test-lang-csharp.yml
- lang/csharp/**
@@ -28,13 +28,31 @@ defaults:
run:
working-directory: lang/csharp
+concurrency:
+ group: ${{ github.workflow }}-${{ github.ref }}
+ cancel-in-progress: true
+
jobs:
test:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v2
+ - uses: actions/checkout@v4
+
+ - name: Add libzstd
+ shell: bash
+ run: sudo apt-get install -y libzstd-dev
+
+ - name: Install .NET SDKs
+ uses: actions/setup-dotnet@v4
+ with:
+ dotnet-version: |
+ 3.1.x
+ 5.0.x
+ 6.0.x
+ 7.0.x
+ 8.0.x
- - uses: actions/cache@v2
+ - uses: actions/cache@v4
with:
path: ~/.nuget/packages
key: ${{ runner.os }}-nuget-${{ hashFiles('**/packages.lock.json') }}
@@ -50,19 +68,48 @@ jobs:
interop:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v2
+ - uses: actions/checkout@v4
+
+ - name: Add libzstd
+ shell: bash
+ run: sudo apt-get install -y libzstd-dev
+
+ - name: Install .NET SDKs
+ uses: actions/setup-dotnet@v4
+ with:
+ dotnet-version: |
+ 3.1.x
+ 5.0.x
+ 6.0.x
+ 7.0.x
+ 8.0.x
- name: Cache Local Maven Repository
- uses: actions/cache@v2
+ uses: actions/cache@v4
with:
path: ~/.m2/repository
key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }}
restore-keys: |
${{ runner.os }}-maven-
+ - name: 'Setup Temurin JDK 8, 11, 17 & 21'
+ uses: actions/setup-java@99b8673ff64fbf99d8d325f52d9a5bdedb8483e9 # v4.2.1
+ with:
+ distribution: 'temurin'
+ java-version: |
+ 8
+ 11
+ 17
+ 21
+
+ - name: 'Setup Maven 3.9.6'
+ uses: stCarolas/setup-maven@d6af6abeda15e98926a57b5aa970a96bb37f97d1 # v5
+ with:
+ maven-version: 3.9.6
+
- name: Install Java Avro for Interop Test
working-directory: .
- run: mvn -B install -DskipTests
+ run: mvn -B install -PskipQuality
- name: Create Interop Data Directory
working-directory: .
diff --git a/.github/workflows/test-lang-java-ARM.yml b/.github/workflows/test-lang-java-ARM.yml
new file mode 100644
index 00000000000..ba48cf194f5
--- /dev/null
+++ b/.github/workflows/test-lang-java-ARM.yml
@@ -0,0 +1,79 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+name: 'Test Java on ARM'
+on:
+ workflow_dispatch:
+ push:
+ branches: [ main ]
+ pull_request:
+ branches: [ main ]
+ paths:
+ - .github/workflows/test-lang-java.yml
+ - lang/java/**
+ - pom.xml
+
+defaults:
+ run:
+ working-directory: lang/java
+
+concurrency:
+ group: ${{ github.workflow }}-${{ github.ref }}
+ cancel-in-progress: true
+
+jobs:
+ arm64:
+ name: Java on Linux ARM64
+ runs-on: ["self-hosted", "asf-arm"]
+
+ steps:
+ - name: 'Checkout sourcecode'
+ uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
+
+ - name: 'Setup Temurin JDK 8, 11, 17 & 21'
+ uses: actions/setup-java@99b8673ff64fbf99d8d325f52d9a5bdedb8483e9 # v4.2.1
+ with:
+ distribution: 'temurin'
+ java-version: |
+ 8
+ 11
+ 17
+ 21
+
+ - name: 'Cache Local Maven Repository'
+ uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2 # v4.0.0
+ with:
+ path: ~/.m2/repository
+ key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }}
+ restore-keys: |
+ ${{ runner.os }}-maven-
+
+ - name: 'Setup Maven 3.9.6'
+ uses: stCarolas/setup-maven@d6af6abeda15e98926a57b5aa970a96bb37f97d1 # v5
+ with:
+ maven-version: 3.9.6
+
+ - name: Build
+ run: ./build.sh clean test
+# set -x
+# export MAVEN_VERSION="3.9.6"
+# wget https://archive.apache.org/dist/maven/maven-3/$MAVEN_VERSION/binaries/apache-maven-$MAVEN_VERSION-bin.tar.gz
+# tar zxvf apache-maven-$MAVEN_VERSION-bin.tar.gz
+# export M2_HOME=$PWD/apache-maven-$MAVEN_VERSION
+# export PATH="$M2_HOME/bin:$PATH"
+# java -version
+# mvn -version
+# #MAVEN_OPTS="-Dsurefire.excludes=*TestCustomCodec*,*TestAllCodecs*,*TestNettyServer*" ./build.sh clean test
+# ./build.sh clean test
diff --git a/.github/workflows/test-lang-java.yml b/.github/workflows/test-lang-java.yml
index b76b9c3a5b8..bca1f0b7ae8 100644
--- a/.github/workflows/test-lang-java.yml
+++ b/.github/workflows/test-lang-java.yml
@@ -17,9 +17,9 @@ name: 'Test Java'
on:
workflow_dispatch:
push:
- branches: [ master ]
+ branches: [ main ]
pull_request:
- branches: [ master ]
+ branches: [ main ]
paths:
- .github/workflows/test-lang-java.yml
- lang/java/**
@@ -29,94 +29,128 @@ defaults:
run:
working-directory: lang/java
+concurrency:
+ group: ${{ github.workflow }}-${{ github.ref }}
+ cancel-in-progress: true
+
jobs:
test:
- name: Java ${{ matrix.java }} Test
+ name: 'Java Test'
runs-on: ubuntu-latest
- strategy:
- matrix:
- java:
- - '8'
- - '11'
steps:
- - uses: actions/checkout@v2
+ - name: 'Checkout sourcecode'
+ uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
- - name: Cache Local Maven Repository
- uses: actions/cache@v2
+ - name: 'Cache Local Maven Repository'
+ uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2 # v4.0.0
with:
path: ~/.m2/repository
key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }}
restore-keys: |
${{ runner.os }}-maven-
- - name: Setup Java
- uses: actions/setup-java@v2
+ - name: 'Setup Temurin JDK 11, 17 & 21'
+ uses: actions/setup-java@99b8673ff64fbf99d8d325f52d9a5bdedb8483e9 # v4.2.1
+ with:
+ distribution: 'temurin'
+ java-version: |
+ 11
+ 17
+ 21
+
+ - name: 'Setup Maven 3.9.6'
+ uses: stCarolas/setup-maven@d6af6abeda15e98926a57b5aa970a96bb37f97d1 # v5
with:
- distribution: 'adopt'
- java-version: ${{ matrix.java }}
+ maven-version: 3.9.6
- - name: Lint
+ - name: 'Java Lint'
run: ./build.sh lint
- - name: Test
+ - name: 'Java Test'
run: ./build.sh test
+ - name: 'Install Java Avro for reproducibility test'
+ working-directory: .
+ run: mvn -B clean install -PskipQuality
+
+ - name: 'Test Reproducible Build'
+ working-directory: .
+ run: mvn clean verify -PskipQuality artifact:compare
+
interop:
- name: Java ${{ matrix.java }} Interop
+ name: 'Java Interop'
runs-on: ubuntu-latest
- strategy:
- matrix:
- java:
- - '8'
- - '11'
+
steps:
- - uses: actions/checkout@v2
+ - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
- - name: Cache Local Maven Repository
- uses: actions/cache@v2
+ - name: 'Cache Local Maven Repository'
+ uses: actions/cache@v4
with:
path: ~/.m2/repository
key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }}
restore-keys: |
${{ runner.os }}-maven-
- - name: Setup Java
- uses: actions/setup-java@v2
+ - name: 'Setup Temurin JDK 11, 17 & 21'
+ uses: actions/setup-java@99b8673ff64fbf99d8d325f52d9a5bdedb8483e9 # v4.2.1
with:
- distribution: 'adopt'
- java-version: ${{ matrix.java }}
+ distribution: 'temurin'
+ java-version: |
+ 11
+ 17
+ 21
+
+ - name: 'Setup Maven 3.9.6'
+ uses: stCarolas/setup-maven@d6af6abeda15e98926a57b5aa970a96bb37f97d1 # v5
+ with:
+ maven-version: 3.9.6
- - name: Setup Python for Generating Input Data
- uses: actions/setup-python@v2
+ - name: 'Setup Python for Generating Input Data'
+ uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0
- - name: Apt Install Compression Libs Required by Python
+ - name: 'Apt Install Compression Libs Required by Python'
run: |
sudo apt-get install -qqy --no-install-recommends libbz2-dev \
liblzma-dev \
libsnappy-dev \
libzstd-dev
- - name: Install Python Dependencies
+ - name: 'Install Python Dependencies'
run: |
- python3 -m pip install --upgrade pip setuptools tox-wheel
+ python3 -m pip install --upgrade pip setuptools tox
python3 -m pip install python-snappy zstandard
- - name: Install Java Avro for Interop Test
- working-directory: .
- run: mvn -B install -DskipTests
-
- - name: Create Interop Data Directory
+ - name: 'Setup C# for Generating Interop Data'
+ uses: actions/setup-dotnet@v4
+ with:
+ dotnet-version: |
+ 3.1.x
+ 5.0.x
+ 6.0.x
+ 7.0.x
+ 8.0.x
+
+ - name: 'Create Interop Data Directory'
working-directory: .
run: mkdir -p build/interop/data
- - name: Generate Interop Resources
- working-directory: lang/java/avro
- run: mvn -B -P interop-data-generate generate-resources
-
- - name: Generate Interop Data using Python
+ - name: 'Generate Interop Data using Python'
working-directory: lang/py
run: ./build.sh interop-data-generate
- - name: Run Interop Tests
- working-directory: lang/java/ipc
- run: mvn -B test -P interop-data-test
+ - name: 'Generate Interop Data using C#'
+ working-directory: lang/csharp
+ run: ./build.sh interop-data-generate
+
+ - name: 'Install Java Avro for other tests'
+ working-directory: .
+ run: mvn -B install -PskipQuality
+
+ - name: 'Generate Interop Data using Java 11, 17 & 21'
+ working-directory: lang/java/interop-data-test
+ run: mvn -B verify -Pgenerate-test-data
+
+ - name: 'Run Interop Tests using Java 11, 17 & 21'
+ working-directory: lang/java/interop-data-test
+ run: mvn -B verify -Pcheck-test-data
diff --git a/.github/workflows/test-lang-js-ARM.yml b/.github/workflows/test-lang-js-ARM.yml
new file mode 100644
index 00000000000..54e1216eaca
--- /dev/null
+++ b/.github/workflows/test-lang-js-ARM.yml
@@ -0,0 +1,65 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+name: 'Test JavaScript on ARM'
+on:
+ workflow_dispatch:
+ push:
+ branches: [ main ]
+ pull_request:
+ branches: [ main ]
+ paths:
+ - .github/workflows/test-lang-js.yml
+ - lang/js/**
+
+defaults:
+ run:
+ working-directory: lang/js
+
+concurrency:
+ group: ${{ github.workflow }}-${{ github.ref }}
+ cancel-in-progress: true
+
+jobs:
+ arm64:
+ name: JavaScript on Linux ARM64
+ runs-on: ["self-hosted", "asf-arm"]
+
+ steps:
+ - name: Checkout
+ uses: actions/checkout@v4
+
+ - name: Cache Npm
+ uses: actions/cache@v4
+ with:
+ path: ~/.npm
+ key: ${{ runner.os }}-node-${{ hashFiles('**/package-lock.json') }}
+ restore-keys: |
+ ${{ runner.os }}-node-
+
+ - name: Setup Node
+ uses: actions/setup-node@v4
+ with:
+ node-version: 18
+
+ - name: Install dependencies
+ run: |
+ sudo apt-get update -q
+ sudo apt-get install -q -y wget tar xz-utils
+
+ - name: Build
+ run: |
+ set -x
+ ./build.sh clean test
diff --git a/.github/workflows/test-lang-js.yml b/.github/workflows/test-lang-js.yml
index 1f5bebce252..20f2cc09f4c 100644
--- a/.github/workflows/test-lang-js.yml
+++ b/.github/workflows/test-lang-js.yml
@@ -17,9 +17,9 @@ name: 'Test JavaScript'
on:
workflow_dispatch:
push:
- branches: [ master ]
+ branches: [ main ]
pull_request:
- branches: [ master ]
+ branches: [ main ]
paths:
- .github/workflows/test-lang-js.yml
- lang/js/**
@@ -28,6 +28,10 @@ defaults:
run:
working-directory: lang/js
+concurrency:
+ group: ${{ github.workflow }}-${{ github.ref }}
+ cancel-in-progress: true
+
jobs:
test:
name: Node ${{ matrix.node }}
@@ -37,14 +41,15 @@ jobs:
node:
- 12
- 14
+ - 16
steps:
- - uses: actions/checkout@v2
+ - uses: actions/checkout@v4
- name: Setup Node
- uses: actions/setup-node@v2
+ uses: actions/setup-node@v4
with:
node-version: ${{ matrix.node }}
- - uses: actions/cache@v2
+ - uses: actions/cache@v4
with:
path: ~/.npm
key: ${{ runner.os }}-node-${{ hashFiles('**/package-lock.json') }}
@@ -65,39 +70,42 @@ jobs:
node:
- 12
- 14
+ - 16
steps:
- - uses: actions/checkout@v2
+ - uses: actions/checkout@v4
- name: Setup Node
- uses: actions/setup-node@v2
+ uses: actions/setup-node@v4
with:
node-version: ${{ matrix.node }}
- - uses: actions/cache@v2
+ - uses: actions/cache@v4
with:
path: ~/.npm
key: ${{ runner.os }}-node-${{ hashFiles('**/package-lock.json') }}
restore-keys: |
${{ runner.os }}-node-
- - name: Cache Local Maven Repository
- uses: actions/cache@v2
- with:
- path: ~/.m2/repository
- key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }}
- restore-keys: |
- ${{ runner.os }}-maven-
+ - name: Setup Python for Generating Input Data
+ uses: actions/setup-python@v5
- - name: Install Java Avro for Interop Test
- working-directory: .
- run: mvn -B install -DskipTests
+ - name: Apt Install Compression Libs Required by Python
+ run: |
+ sudo apt-get install -qqy --no-install-recommends libbz2-dev \
+ liblzma-dev \
+ libsnappy-dev \
+ libzstd-dev
+ - name: Install Python Dependencies
+ run: |
+ python3 -m pip install --upgrade pip setuptools tox
+ python3 -m pip install python-snappy zstandard
- name: Create Interop Data Directory
working-directory: .
run: mkdir -p build/interop/data
- - name: Generate Interop Resources
- working-directory: lang/java/avro
- run: mvn -B -P interop-data-generate generate-resources
+ - name: Generate Interop Data using Python
+ working-directory: lang/py
+ run: ./build.sh interop-data-generate
- name: Generate Interop Data
run: ./build.sh interop-data-generate
diff --git a/.github/workflows/test-lang-perl-ARM.yml b/.github/workflows/test-lang-perl-ARM.yml
new file mode 100644
index 00000000000..a7d3576cbdf
--- /dev/null
+++ b/.github/workflows/test-lang-perl-ARM.yml
@@ -0,0 +1,144 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+name: 'Test Perl on ARM'
+on:
+ workflow_dispatch:
+ push:
+ branches: [ main ]
+ pull_request:
+ branches: [ main ]
+ paths:
+ - .github/workflows/test-lang-perl.yml
+ - lang/perl/**
+
+defaults:
+ run:
+ working-directory: lang/perl
+
+concurrency:
+ group: ${{ github.workflow }}-${{ github.ref }}
+ cancel-in-progress: true
+
+jobs:
+ test:
+ name: Perl ${{ matrix.perl }} Tests
+ runs-on: ubuntu-latest
+ strategy:
+ matrix:
+ perl:
+ - '5.32'
+ steps:
+ - uses: actions/checkout@v4
+
+ - uses: shogo82148/actions-setup-perl@v1
+ with:
+ perl-version: ${{ matrix.perl }}
+
+ - name: Install Dependencies
+ run: |
+ cpanm --mirror https://www.cpan.org/ install Compress::Zstd \
+ Encode \
+ Error::Simple \
+ JSON::MaybeXS \
+ Module::Install \
+ Module::Install::ReadmeFromPod \
+ Object::Tiny \
+ Perl::Critic \
+ Regexp::Common \
+ Test::Exception \
+ Test::More \
+ Test::Pod \
+ Try::Tiny
+
+ - name: Lint
+ run: ./build.sh lint
+
+ - name: Test
+ run: ./build.sh test
+
+ interop:
+ name: Perl ${{ matrix.perl }} Interop
+ runs-on: ubuntu-latest
+ strategy:
+ matrix:
+ perl:
+ - '5.32'
+ steps:
+ - uses: actions/checkout@v4
+
+ - uses: shogo82148/actions-setup-perl@v1
+ with:
+ perl-version: ${{ matrix.perl }}
+
+ - name: Install Dependencies
+ run: |
+ sudo apt-get -qqy install --no-install-recommends libjansson-dev \
+ libsnappy-dev
+ cpanm --mirror https://www.cpan.org/ install CPAN::Uploader \
+ Compress::Zstd \
+ Encode \
+ Error::Simple \
+ JSON::MaybeXS \
+ Module::Install \
+ Module::Install::ReadmeFromPod \
+ Object::Tiny \
+ Perl::Critic \
+ Regexp::Common \
+ Test::Exception \
+ Test::More \
+ Test::Pod \
+ Try::Tiny
+
+ - name: Cache Local Maven Repository
+ uses: actions/cache@v4
+ with:
+ path: ~/.m2/repository
+ key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }}
+ restore-keys: |
+ ${{ runner.os }}-maven-
+
+ - name: 'Setup Temurin JDK 8, 11, 17 & 21'
+ uses: actions/setup-java@99b8673ff64fbf99d8d325f52d9a5bdedb8483e9 # v4.2.1
+ with:
+ distribution: 'temurin'
+ java-version: |
+ 8
+ 11
+ 17
+ 21
+
+ - name: 'Setup Maven 3.9.6'
+ uses: stCarolas/setup-maven@d6af6abeda15e98926a57b5aa970a96bb37f97d1 # v5
+ with:
+ maven-version: 3.9.6
+
+ - name: Install Java Avro for Interop Test
+ working-directory: .
+ run: mvn -B install -PskipQuality
+
+ - name: Create Interop Data Directory
+ working-directory: .
+ run: mkdir -p build/interop/data
+
+ - name: Generate Interop Resources
+ working-directory: lang/java/avro
+ run: mvn -B -P interop-data-generate generate-resources
+
+ - name: Generate Interop Data
+ run: ./build.sh interop-data-generate
+
+ - name: Run Interop Tests
+ run: ./build.sh interop-data-test
diff --git a/.github/workflows/test-lang-perl.yml b/.github/workflows/test-lang-perl.yml
index bed6c367023..44e4105a2a5 100644
--- a/.github/workflows/test-lang-perl.yml
+++ b/.github/workflows/test-lang-perl.yml
@@ -17,9 +17,9 @@ name: 'Test Perl'
on:
workflow_dispatch:
push:
- branches: [ master ]
+ branches: [ main ]
pull_request:
- branches: [ master ]
+ branches: [ main ]
paths:
- .github/workflows/test-lang-perl.yml
- lang/perl/**
@@ -28,6 +28,10 @@ defaults:
run:
working-directory: lang/perl
+concurrency:
+ group: ${{ github.workflow }}-${{ github.ref }}
+ cancel-in-progress: true
+
jobs:
test:
name: Perl ${{ matrix.perl }} Tests
@@ -37,7 +41,7 @@ jobs:
perl:
- '5.32'
steps:
- - uses: actions/checkout@v2
+ - uses: actions/checkout@v4
- uses: shogo82148/actions-setup-perl@v1
with:
@@ -45,25 +49,19 @@ jobs:
- name: Install Dependencies
run: |
- sudo apt-get -qqy install --no-install-recommends libjansson-dev \
- libcompress-raw-zlib-perl \
- libcpan-uploader-perl \
- libencode-perl \
- libio-string-perl \
- libjson-xs-perl \
- libmodule-install-perl \
- libmodule-install-readmefrompod-perl \
- libobject-tiny-perl \
- libperl-critic-perl \
- libsnappy-dev \
- libtest-exception-perl \
- libtest-pod-perl
cpanm --mirror https://www.cpan.org/ install Compress::Zstd \
+ Encode \
Error::Simple \
- Module::Install::Repository \
+ JSON::MaybeXS \
+ Module::Install \
+ Module::Install::ReadmeFromPod \
+ Object::Tiny \
+ Perl::Critic \
Regexp::Common \
- Try::Tiny \
- inc::Module::Install
+ Test::Exception \
+ Test::More \
+ Test::Pod \
+ Try::Tiny
- name: Lint
run: ./build.sh lint
@@ -79,7 +77,7 @@ jobs:
perl:
- '5.32'
steps:
- - uses: actions/checkout@v2
+ - uses: actions/checkout@v4
- uses: shogo82148/actions-setup-perl@v1
with:
@@ -87,37 +85,48 @@ jobs:
- name: Install Dependencies
run: |
- sudo apt-get -qqy install --no-install-recommends libcompress-raw-zlib-perl \
- libcpan-uploader-perl \
- libencode-perl \
- libio-string-perl \
- libjansson-dev \
- libjson-xs-perl \
- libmodule-install-perl \
- libmodule-install-readmefrompod-perl \
- libobject-tiny-perl \
- libsnappy-dev \
- libtest-exception-perl \
- libtest-pod-perl
- cpanm --mirror https://www.cpan.org/ install Compress::Zstd \
+ sudo apt-get -qqy install --no-install-recommends libjansson-dev \
+ libsnappy-dev
+ cpanm --mirror https://www.cpan.org/ install CPAN::Uploader \
+ Compress::Zstd \
+ Encode \
Error::Simple \
- Module::Install::Repository \
+ JSON::MaybeXS \
+ Module::Install \
+ Module::Install::ReadmeFromPod \
Object::Tiny \
Regexp::Common \
- Try::Tiny \
- inc::Module::Install
+ Test::Exception \
+ Test::More \
+ Test::Pod \
+ Try::Tiny
- name: Cache Local Maven Repository
- uses: actions/cache@v2
+ uses: actions/cache@v4
with:
path: ~/.m2/repository
key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }}
restore-keys: |
${{ runner.os }}-maven-
+ - name: 'Setup Temurin JDK 8, 11, 17 & 21'
+ uses: actions/setup-java@99b8673ff64fbf99d8d325f52d9a5bdedb8483e9 # v4.2.1
+ with:
+ distribution: 'temurin'
+ java-version: |
+ 8
+ 11
+ 17
+ 21
+
+ - name: 'Setup Maven 3.9.6'
+ uses: stCarolas/setup-maven@d6af6abeda15e98926a57b5aa970a96bb37f97d1 # v5
+ with:
+ maven-version: 3.9.6
+
- name: Install Java Avro for Interop Test
working-directory: .
- run: mvn -B install -DskipTests
+ run: mvn -B install -PskipQuality
- name: Create Interop Data Directory
working-directory: .
diff --git a/.github/workflows/test-lang-php-ARM.yml b/.github/workflows/test-lang-php-ARM.yml
new file mode 100644
index 00000000000..85c2aa349bf
--- /dev/null
+++ b/.github/workflows/test-lang-php-ARM.yml
@@ -0,0 +1,58 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+name: 'Test PHP on ARM'
+on:
+ workflow_dispatch:
+ push:
+ branches: [ main ]
+ pull_request:
+ branches: [ main ]
+ paths:
+ - .github/workflows/test-lang-php.yml
+ - lang/php/**
+
+defaults:
+ run:
+ working-directory: lang/php
+
+concurrency:
+ group: ${{ github.workflow }}-${{ github.ref }}
+ cancel-in-progress: true
+
+jobs:
+ arm64:
+ name: PHP on Linux ARM64
+ runs-on: ["self-hosted", "asf-arm"]
+
+ steps:
+ - name: Checkout
+ uses: actions/checkout@v4
+
+ - name: Install dependencies
+ run: |
+ sudo apt-get update -q
+ sudo apt-get install -q -y wget php php-xml php-mbstring php-curl php-gmp php-bz2 unzip libtidy-dev libpq5
+ php -r "copy('https://getcomposer.org/installer', 'composer-setup.php');"
+ php -r "if (hash_file('sha384', 'composer-setup.php') === file_get_contents('https://composer.github.io/installer.sig')) { echo 'Installer verified'; } else { echo 'Installer corrupt'; unlink('composer-setup.php'); } echo PHP_EOL;"
+ php composer-setup.php --version=2.2.5
+ php -r "unlink('composer-setup.php');"
+ sudo mv composer.phar /usr/local/bin/composer
+
+ - name: Build
+ run: |
+ set -x
+ composer --version
+ ./build.sh clean test
diff --git a/.github/workflows/test-lang-php.yml b/.github/workflows/test-lang-php.yml
index 1fc227f7f6c..b6329f04d06 100644
--- a/.github/workflows/test-lang-php.yml
+++ b/.github/workflows/test-lang-php.yml
@@ -17,9 +17,9 @@ name: 'Test PHP'
on:
workflow_dispatch:
push:
- branches: [ master ]
+ branches: [ main ]
pull_request:
- branches: [ master ]
+ branches: [ main ]
paths:
- .github/workflows/test-lang-php.yml
- lang/php/**
@@ -28,6 +28,10 @@ defaults:
run:
working-directory: lang/php
+concurrency:
+ group: ${{ github.workflow }}-${{ github.ref }}
+ cancel-in-progress: true
+
jobs:
test:
name: PHP ${{ matrix.php }} Test
@@ -40,18 +44,19 @@ jobs:
- '8.0'
steps:
- - uses: actions/checkout@v2
+ - uses: actions/checkout@v4
- name: Setup PHP
uses: shivammathur/setup-php@v2
with:
php-version: ${{ matrix.php }}
+ tools: composer:2.2.5
- name: Get Composer Cache Directory
id: composer-cache
- run: echo "::set-output name=dir::$(composer config cache-files-dir)"
+ run: echo "dir=$(composer config cache-files-dir)" >> $GITHUB_OUTPUT
- - uses: actions/cache@v2
+ - uses: actions/cache@v4
with:
path: ${{ steps.composer-cache.outputs.dir }}
key: ${{ runner.os }}-composer-${{ hashFiles('**/composer.lock') }}
@@ -75,24 +80,40 @@ jobs:
- '8.0'
steps:
- - uses: actions/checkout@v2
+ - uses: actions/checkout@v4
- name: Setup PHP
uses: shivammathur/setup-php@v2
with:
php-version: ${{ matrix.php }}
+ tools: composer:2.2.5
- name: Cache Local Maven Repository
- uses: actions/cache@v2
+ uses: actions/cache@v4
with:
path: ~/.m2/repository
key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }}
restore-keys: |
${{ runner.os }}-maven-
+ - name: 'Setup Temurin JDK 8, 11, 17 & 21'
+ uses: actions/setup-java@99b8673ff64fbf99d8d325f52d9a5bdedb8483e9 # v4.2.1
+ with:
+ distribution: 'temurin'
+ java-version: |
+ 8
+ 11
+ 17
+ 21
+
+ - name: 'Setup Maven 3.9.6'
+ uses: stCarolas/setup-maven@d6af6abeda15e98926a57b5aa970a96bb37f97d1 # v5
+ with:
+ maven-version: 3.9.6
+
- name: Install Java Avro for Interop Test
working-directory: .
- run: mvn -B install -DskipTests
+ run: mvn -B install -PskipQuality
- name: Create Interop Data Directory
working-directory: .
@@ -102,7 +123,7 @@ jobs:
working-directory: lang/java/avro
run: mvn -B -P interop-data-generate generate-resources
- - uses: actions/checkout@v2
+ - uses: actions/checkout@v4
with:
repository: kjdev/php-ext-zstd
path: lang/php/php-ext-zstd
@@ -118,7 +139,7 @@ jobs:
echo "extension=zstd.so" | sudo tee -a /etc/php/${{ matrix.php }}/cli/conf.d/10-zstd.ini
php -m
- - uses: actions/checkout@v2
+ - uses: actions/checkout@v4
with:
repository: kjdev/php-ext-snappy
path: lang/php/php-ext-snappy
diff --git a/.github/workflows/test-lang-py-ARM.yml b/.github/workflows/test-lang-py-ARM.yml
new file mode 100644
index 00000000000..874c7c35f9b
--- /dev/null
+++ b/.github/workflows/test-lang-py-ARM.yml
@@ -0,0 +1,53 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+name: 'Test Python on ARM'
+on:
+ workflow_dispatch:
+ push:
+ branches: [ main ]
+ pull_request:
+ branches: [ main ]
+ paths:
+ - .github/workflows/test-lang-py.yml
+ - lang/py/**
+
+defaults:
+ run:
+ working-directory: lang/py
+
+concurrency:
+ group: ${{ github.workflow }}-${{ github.ref }}
+ cancel-in-progress: true
+
+jobs:
+ arm64:
+ name: Python on Linux ARM64
+ runs-on: ["self-hosted", "asf-arm"]
+
+ steps:
+ - name: Checkout
+ uses: actions/checkout@v4
+
+ - name: Install dependencies
+ run: |
+ sudo apt-get update -q
+ sudo apt-get install -q -y python3 python3-dev python3-pip git libbz2-dev libjansson-dev liblzma-dev libsnappy-dev libzstd-dev
+ python3 -m pip install --upgrade pip setuptools tox
+
+ - name: Build
+ run: |
+ set -x
+ ./build.sh clean test
diff --git a/.github/workflows/test-lang-py.yml b/.github/workflows/test-lang-py.yml
index 19522c01b7e..83bd1f83f15 100644
--- a/.github/workflows/test-lang-py.yml
+++ b/.github/workflows/test-lang-py.yml
@@ -17,9 +17,9 @@ name: 'Test Python'
on:
workflow_dispatch:
push:
- branches: [ master ]
+ branches: [ main ]
pull_request:
- branches: [ master ]
+ branches: [ main ]
paths:
- .github/workflows/test-lang-py.yml
- lang/py/**
@@ -28,26 +28,32 @@ defaults:
run:
working-directory: lang/py
+concurrency:
+ group: ${{ github.workflow }}-${{ github.ref }}
+ cancel-in-progress: true
+
jobs:
test:
name: Python ${{ matrix.python }} Tests
- runs-on: ubuntu-latest
+ runs-on: ubuntu-20.04
strategy:
fail-fast: false
matrix:
python:
+ - '3.12'
+ - '3.11'
+ - '3.10'
- '3.9'
- '3.8'
- '3.7'
- - '3.6'
- - 'pypy-3.7'
- - 'pypy-3.6'
+ - 'pypy-3.9'
+ - 'pypy-3.10'
steps:
- - uses: actions/checkout@v2
+ - uses: actions/checkout@v4
- name: Setup Python
- uses: actions/setup-python@v2
+ uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python }}
@@ -61,14 +67,14 @@ jobs:
- name: Install Dependencies
run: |
- python3 -m pip install --upgrade pip setuptools tox-wheel
+ python3 -m pip install --upgrade pip setuptools tox
- name: Lint
- if: ${{ matrix.python == '3.9' }}
+ if: ${{ matrix.python == '3.10' }}
run: python3 -m tox -e lint
- name: Typechecks
- if: ${{ matrix.python == '3.9' }}
+ if: ${{ matrix.python == '3.10' }}
run: python3 -m tox -e typechecks
- name: Test
@@ -76,23 +82,24 @@ jobs:
interop:
name: Python ${{ matrix.python }} Interop
- runs-on: ubuntu-latest
+ runs-on: ubuntu-20.04
strategy:
fail-fast: false
matrix:
python:
+ - '3.11'
+ - '3.10'
- '3.9'
- '3.8'
- '3.7'
- - '3.6'
- - 'pypy-3.7'
- - 'pypy-3.6'
+ - 'pypy-3.9'
+ - 'pypy-3.10'
steps:
- - uses: actions/checkout@v2
+ - uses: actions/checkout@v4
- name: Setup Python
- uses: actions/setup-python@v2
+ uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python }}
@@ -106,20 +113,35 @@ jobs:
- name: Install Dependencies
run: |
- python3 -m pip install --upgrade pip setuptools tox-wheel
+ python3 -m pip install --upgrade pip setuptools tox
python3 -m pip install python-snappy zstandard
- name: Cache Local Maven Repository
- uses: actions/cache@v2
+ uses: actions/cache@v4
with:
path: ~/.m2/repository
key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }}
restore-keys: |
${{ runner.os }}-maven-
+ - name: 'Setup Temurin JDK 8, 11, 17 & 21'
+ uses: actions/setup-java@99b8673ff64fbf99d8d325f52d9a5bdedb8483e9 # v4.2.1
+ with:
+ distribution: 'temurin'
+ java-version: |
+ 8
+ 11
+ 17
+ 21
+
+ - name: 'Setup Maven 3.9.6'
+ uses: stCarolas/setup-maven@d6af6abeda15e98926a57b5aa970a96bb37f97d1 # v5
+ with:
+ maven-version: 3.9.6
+
- name: Install Java Avro for Interop Test
working-directory: .
- run: mvn -B install -DskipTests
+ run: mvn -B install -PskipQuality
- name: Create Interop Data Directory
working-directory: .
diff --git a/.github/workflows/test-lang-ruby-ARM.yml b/.github/workflows/test-lang-ruby-ARM.yml
new file mode 100644
index 00000000000..c1ce73878bf
--- /dev/null
+++ b/.github/workflows/test-lang-ruby-ARM.yml
@@ -0,0 +1,60 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+name: 'Test Ruby on ARM'
+on:
+ workflow_dispatch:
+ push:
+ branches: [ main ]
+ pull_request:
+ branches: [ main ]
+ paths:
+ - .github/workflows/test-lang-ruby.yml
+ - lang/ruby/**
+
+defaults:
+ run:
+ working-directory: lang/ruby
+
+concurrency:
+ group: ${{ github.workflow }}-${{ github.ref }}
+ cancel-in-progress: true
+
+jobs:
+ arm64:
+ name: Ruby on Linux ARM64
+ runs-on: ["self-hosted", "asf-arm"]
+
+ steps:
+ - name: Checkout
+ uses: actions/checkout@v4
+
+ - name: Cache gems
+ uses: actions/cache@v4
+ with:
+ path: .gem
+ key: ${{ runner.os }}-gems-${{ hashFiles('**/Gemfile.lock') }}
+ restore-keys: |
+ ${{ runner.os }}-gems-
+
+ - name: Install dependencies
+ run: |
+ sudo apt-get update -q
+ sudo apt-get install -q -y ruby-dev bundler libsnappy-dev libyaml-dev
+
+ - name: Build
+ run: |
+ set -x
+ ./build.sh clean test
diff --git a/.github/workflows/test-lang-ruby.yml b/.github/workflows/test-lang-ruby.yml
index 8f4f5076b89..ac85605f8e1 100644
--- a/.github/workflows/test-lang-ruby.yml
+++ b/.github/workflows/test-lang-ruby.yml
@@ -17,9 +17,9 @@ name: 'Test Ruby'
on:
workflow_dispatch:
push:
- branches: [ master ]
+ branches: [ main ]
pull_request:
- branches: [ master ]
+ branches: [ main ]
paths:
- .github/workflows/test-lang-ruby.yml
- lang/ruby/**
@@ -28,6 +28,10 @@ defaults:
run:
working-directory: lang/ruby
+concurrency:
+ group: ${{ github.workflow }}-${{ github.ref }}
+ cancel-in-progress: true
+
jobs:
test:
name: Ruby ${{ matrix.ruby }} Tests
@@ -35,20 +39,22 @@ jobs:
strategy:
matrix:
ruby:
- - '2.6'
- '2.7'
- '3.0'
+ - '3.1'
+ - '3.2'
+ - '3.3'
steps:
- - uses: actions/checkout@v2
+ - uses: actions/checkout@v4
- uses: ruby/setup-ruby@v1
with:
ruby-version: ${{ matrix.ruby }}
- name: Install Dependencies
- run: sudo apt-get install -qqy bundler libsnappy-dev
+ run: sudo apt-get install -qqy libsnappy-dev
- - uses: actions/cache@v2
+ - uses: actions/cache@v4
with:
path: .gem
key: ${{ runner.os }}-gems-${{ hashFiles('**/Gemfile.lock') }}
@@ -74,20 +80,22 @@ jobs:
strategy:
matrix:
ruby:
- - '2.6'
- '2.7'
- '3.0'
+ - '3.1'
+ - '3.2'
+ - '3.3'
steps:
- - uses: actions/checkout@v2
+ - uses: actions/checkout@v4
- uses: ruby/setup-ruby@v1
with:
ruby-version: ${{ matrix.ruby }}
- name: Install Dependencies
- run: sudo apt-get install -qqy bundler libsnappy-dev
+ run: sudo apt-get install -qqy libsnappy-dev
- - uses: actions/cache@v2
+ - uses: actions/cache@v4
with:
path: .gem
key: ${{ runner.os }}-gems-${{ hashFiles('**/Gemfile.lock') }}
@@ -98,16 +106,31 @@ jobs:
run: bundle config path .gem
- name: Cache Local Maven Repository
- uses: actions/cache@v2
+ uses: actions/cache@v4
with:
path: ~/.m2/repository
key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }}
restore-keys: |
${{ runner.os }}-maven-
+ - name: 'Setup Temurin JDK 8, 11, 17 & 21'
+ uses: actions/setup-java@99b8673ff64fbf99d8d325f52d9a5bdedb8483e9 # v4.2.1
+ with:
+ distribution: 'temurin'
+ java-version: |
+ 8
+ 11
+ 17
+ 21
+
+ - name: 'Setup Maven 3.9.6'
+ uses: stCarolas/setup-maven@d6af6abeda15e98926a57b5aa970a96bb37f97d1 # v5
+ with:
+ maven-version: 3.9.6
+
- name: Install Java Avro for Interop Test
working-directory: .
- run: mvn -B install -DskipTests
+ run: mvn -B install -PskipQuality
- name: Create Interop Data Directory
working-directory: .
diff --git a/.github/workflows/test-lang-rust-audit.yml b/.github/workflows/test-lang-rust-audit.yml
index efb3f1eaff7..d4bbd4b6931 100644
--- a/.github/workflows/test-lang-rust-audit.yml
+++ b/.github/workflows/test-lang-rust-audit.yml
@@ -19,31 +19,55 @@ name: Rust Security Audit
on:
workflow_dispatch:
push:
- branches: [ master ]
+ branches: [ main ]
+ paths:
+ - .github/workflows/test-lang-rust-audit.yml
+ - lang/rust/**/Cargo.toml
+ - lang/rust/Cargo.lock
+ - lang/rust/deny.toml
pull_request:
- branches: [ master ]
+ branches: [ main ]
paths:
- .github/workflows/test-lang-rust-audit.yml
- - lang/rust/Cargo.toml
+ - lang/rust/**/Cargo.toml
- lang/rust/Cargo.lock
+ - lang/rust/deny.toml
+
+permissions:
+ contents: read
+
+env:
+ RUSTFLAGS: -Dwarnings
defaults:
run:
working-directory: lang/rust
+concurrency:
+ group: ${{ github.workflow }}-${{ github.ref }}
+ cancel-in-progress: true
+
jobs:
audit:
runs-on: ubuntu-latest
steps:
- name: Checkout
- uses: actions/checkout@v2
- # Currently does not work. See https://github.com/actions-rs/audit-check/issues/194
- #- name: Rust Audit
- # uses: actions-rs/audit-check@v1
- # with:
- # token: ${{ secrets.GITHUB_TOKEN }}
- # Install it manually
+ uses: actions/checkout@v4
+ - name: Dependency Review
+ if: github.event_name == 'pull_request'
+ uses: actions/dependency-review-action@v4
- name: Install Cargo Audit
run: cargo install cargo-audit
- name: Audit
run: cargo audit
+
+ deny:
+ runs-on: ubuntu-latest
+ steps:
+ - name: Checkout
+ uses: actions/checkout@v4
+ - name: Install Cargo Deny
+ run: cargo install cargo-deny
+ - name: Check
+ run: cargo deny check
+
\ No newline at end of file
diff --git a/.github/workflows/test-lang-rust-ci-ARM.yml b/.github/workflows/test-lang-rust-ci-ARM.yml
new file mode 100644
index 00000000000..3ac8e0bfdd8
--- /dev/null
+++ b/.github/workflows/test-lang-rust-ci-ARM.yml
@@ -0,0 +1,78 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+name: 'Rust Continuous Integration on ARM'
+on:
+ workflow_dispatch:
+ push:
+ branches: [ main ]
+ pull_request:
+ branches: [ main ]
+ paths:
+ - .github/workflows/test-lang-rust-ci.yml
+ - lang/rust/**
+
+permissions:
+ contents: read
+
+env:
+ RUSTFLAGS: -Dwarnings
+
+defaults:
+ run:
+ working-directory: lang/rust
+
+concurrency:
+ group: ${{ github.workflow }}-${{ github.ref }}
+ cancel-in-progress: true
+
+jobs:
+ arm64:
+ name: Rust on Linux ARM64
+ runs-on: ["self-hosted", "asf-arm"]
+
+ steps:
+ - name: Checkout
+ uses: actions/checkout@v4
+
+ - name: Cache Cargo
+ uses: actions/cache@v4
+ with:
+ # these represent dependencies downloaded by cargo
+ # and thus do not depend on the OS, arch nor rust version.
+ path: ~/.cargo
+ key: ${{ runner.os }}-target-arm64-${{ hashFiles('**/Cargo.lock') }}
+
+ - name: Cache Rust dependencies
+ uses: actions/cache@v4
+ with:
+ # these represent compiled steps of both dependencies and avro
+ # and thus are specific for a particular OS, arch and rust version.
+ path: lang/rust/target
+ key: ${{ runner.os }}-target-cache1-stable-
+
+ - name: Rust Toolchain
+ uses: dtolnay/rust-toolchain@nightly
+ with:
+ toolchain: stable
+ components: rustfmt
+ targets: x86_64-unknown-linux-gnu
+
+ - name: Build
+ run: |
+ set -x
+ ./build.sh test
diff --git a/.github/workflows/test-lang-rust-ci.yml b/.github/workflows/test-lang-rust-ci.yml
index 977ea110731..025dca1a836 100644
--- a/.github/workflows/test-lang-rust-ci.yml
+++ b/.github/workflows/test-lang-rust-ci.yml
@@ -19,61 +19,236 @@ name: Rust Continuous Integration
on:
workflow_dispatch:
push:
- branches: [ master ]
+ branches: [ main ]
pull_request:
- branches: [ master ]
+ branches: [ main ]
paths:
- .github/workflows/test-lang-rust-ci.yml
- lang/rust/**
+permissions:
+ contents: read
+
+env:
+ RUSTFLAGS: -Dwarnings
+
defaults:
run:
working-directory: lang/rust
+concurrency:
+ group: ${{ github.workflow }}-${{ github.ref }}
+ cancel-in-progress: true
+
jobs:
ci:
runs-on: ubuntu-latest
strategy:
matrix:
rust:
- - stable
- - beta
- - nightly
- - 1.48.0 # MSRV
+ - 'stable'
+ - 'beta'
+ - 'nightly'
+ - '1.73.0' # MSRV
+ target:
+ - x86_64-unknown-linux-gnu
+ - wasm32-unknown-unknown
steps:
- name: Checkout
- uses: actions/checkout@v2
+ uses: actions/checkout@v4
+
+ - name: Cache Cargo
+ uses: actions/cache@v4
+ with:
+ # these represent dependencies downloaded by cargo
+ # and thus do not depend on the OS, arch nor rust version.
+ path: ~/.cargo
+ key: ${{ runner.os }}-target-cache1-${{ hashFiles('**/Cargo.lock') }}
+ - name: Cache Rust dependencies
+ uses: actions/cache@v4
+ with:
+ # these represent compiled steps of both dependencies and avro
+ # and thus are specific for a particular OS, arch and rust version.
+ path: lang/rust/target
+ key: ${{ runner.os }}-target-cache1-${{ matrix.rust }}-${{ hashFiles('**/Cargo.lock') }}
- name: Rust Toolchain
- uses: actions-rs/toolchain@v1
+ uses: dtolnay/rust-toolchain@nightly
with:
- profile: minimal
toolchain: ${{ matrix.rust }}
- override: true
components: rustfmt
+ targets: ${{ matrix.target }}
- - name: Rust Format
- uses: actions-rs/cargo@v1
+ - name: Cache cargo-rdme
+ if: matrix.rust == 'stable' && matrix.target == 'x86_64-unknown-linux-gnu'
+ uses: actions/cache@v4
with:
- command: fmt
- args: --manifest-path lang/rust/Cargo.toml --all -- --check
+ path: ~/.cargo-${{ matrix.rust }}/cargo-rdme
+ key: cargo-rdme-
+
+ # Check if the doc cumment in avro/src/lib.rs and avro/README.md are in sync.
+ - name: Run cargo-rdme
+ # The result is environment independent so one test pattern is enough.
+ if: matrix.rust == 'stable' && matrix.target == 'x86_64-unknown-linux-gnu'
+ run: |
+ cargo install --root ~/.cargo-${{ matrix.rust }}/cargo-rdme --locked cargo-rdme
+ export PATH=$PATH:~/.cargo-${{ matrix.rust }}/cargo-rdme/bin
+ cargo rdme --check
+
+ - name: Rust Format
+ if: matrix.target != 'wasm32-unknown-unknown'
+ run: cargo fmt --all -- --check
- name: Rust Build
- uses: actions-rs/cargo@v1
- with:
- command: build
- args: --manifest-path lang/rust/Cargo.toml --all-features --all-targets
+ run: cargo build --all-features --all-targets
- name: Rust Test
- uses: actions-rs/cargo@v1
- with:
- command: test
- args: --manifest-path lang/rust/Cargo.toml --all-features --all-targets
+ if: matrix.target != 'wasm32-unknown-unknown'
+ run: cargo test --all-features --target ${{ matrix.target }}
+
+ - name: Rust Test AVRO-3549
+ if: matrix.target != 'wasm32-unknown-unknown'
+ run: cargo test --target ${{ matrix.target }} test_avro_3549_read_not_enabled_codec
# because of https://github.com/rust-lang/cargo/issues/6669
- name: Rust Test docs
- uses: actions-rs/cargo@v1
+ if: matrix.target != 'wasm32-unknown-unknown'
+ run: cargo test --doc
+
+ interop:
+ runs-on: ubuntu-latest
+
+ steps:
+ - name: Checkout
+ uses: actions/checkout@v4
+
+ - name: Rust Toolchain
+ uses: dtolnay/rust-toolchain@nightly
+ with:
+ toolchain: stable
+
+ - name: Cache Cargo
+ uses: actions/cache@v4
+ with:
+ # these represent dependencies downloaded by cargo
+ # and thus do not depend on the OS, arch nor rust version.
+ path: ~/.cargo
+ key: ${{ runner.os }}-target-cache1-${{ hashFiles('**/Cargo.lock') }}
+ - name: Cache Rust dependencies
+ uses: actions/cache@v4
+ with:
+ # these represent compiled steps of both dependencies and avro
+ # and thus are specific for a particular OS, arch and rust version.
+ path: lang/rust/target
+ key: ${{ runner.os }}-target-cache1-stable-${{ hashFiles('**/Cargo.lock') }}
+
+ - name: Cache Local Maven Repository
+ uses: actions/cache@v4
+ with:
+ path: ~/.m2/repository
+ key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }}
+ restore-keys: |
+ ${{ runner.os }}-maven-
+
+ - name: 'Setup Temurin JDK 8, 11, 17 & 21'
+ uses: actions/setup-java@99b8673ff64fbf99d8d325f52d9a5bdedb8483e9 # v4.2.1
with:
- command: test
- args: --manifest-path lang/rust/Cargo.toml --doc
+ distribution: 'temurin'
+ java-version: |
+ 8
+ 11
+ 17
+ 21
+
+ - name: 'Setup Maven 3.9.6'
+ uses: stCarolas/setup-maven@d6af6abeda15e98926a57b5aa970a96bb37f97d1 # v5
+ with:
+ maven-version: 3.9.6
+
+ - name: Install Java Avro for Interop Test
+ working-directory: .
+ run: mvn -B install -PskipQuality
+
+ - name: Create Interop Data Directory
+ working-directory: .
+ run: mkdir -p build/interop/data
+
+ - name: Generate Interop Resources
+ working-directory: lang/java/avro
+ run: mvn -B -P interop-data-generate generate-resources
+
+ - name: Generate interop data
+ run: ./build.sh interop-data-generate
+
+ - name: Rust reads interop files created by Java and Rust
+ run: ./build.sh interop-data-test
+
+ - uses: shogo82148/actions-setup-perl@v1
+ with:
+ perl-version: 5.32
+
+ - name: Install Dependencies
+ run: |
+ sudo apt-get -qqy install --no-install-recommends libcompress-raw-zlib-perl \
+ libcpan-uploader-perl \
+ libencode-perl \
+ libio-string-perl \
+ libjansson-dev \
+ libjson-xs-perl \
+ libmodule-install-perl \
+ libmodule-install-readmefrompod-perl \
+ libobject-tiny-perl \
+ libsnappy-dev \
+ libtest-exception-perl \
+ libtest-pod-perl
+ cpanm --mirror https://www.cpan.org/ install Compress::Zstd \
+ Error::Simple \
+ Module::Install::Repository \
+ Object::Tiny \
+ Regexp::Common \
+ Try::Tiny \
+ inc::Module::Install
+
+
+ - name: Perl reads interop files created by Java and Rust
+ working-directory: lang/perl
+ run: ./build.sh interop-data-test
+
+ web-assembly:
+ runs-on: ubuntu-latest
+
+ steps:
+ - name: Checkout
+ uses: actions/checkout@v4
+
+ - name: Rust Toolchain
+ uses: dtolnay/rust-toolchain@nightly
+ with:
+ toolchain: stable
+ targets: wasm32-unknown-unknown
+
+ - name: Cache Cargo
+ uses: actions/cache@v4
+ with:
+ # these represent dependencies downloaded by cargo
+ # and thus do not depend on the OS, arch nor rust version.
+ path: ~/.cargo
+ key: ${{ runner.os }}-target-cache1-${{ hashFiles('**/Cargo.lock') }}
+
+ - name: Cache Rust dependencies
+ uses: actions/cache@v4
+ with:
+ # these represent compiled steps of both dependencies and avro
+ # and thus are specific for a particular OS, arch and rust version.
+ path: lang/rust/target
+ key: ${{ runner.os }}-target-cache1-stable-${{ hashFiles('**/Cargo.lock') }}
+
+ - name: Install wasm-pack
+ run: curl https://rustwasm.github.io/wasm-pack/installer/init.sh -sSf | sh
+
+ - name: Build the Web Assembly demo app
+ run: wasm-pack build wasm-demo
+
+ - name: Test the Web Assembly demo app
+ run: RUST_BACKTRACE=1 wasm-pack test --headless --firefox wasm-demo
diff --git a/.github/workflows/test-lang-rust-clippy.yml b/.github/workflows/test-lang-rust-clippy.yml
index cedc5f5f042..e7e0ba85448 100644
--- a/.github/workflows/test-lang-rust-clippy.yml
+++ b/.github/workflows/test-lang-rust-clippy.yml
@@ -19,28 +19,39 @@ name: Rust Clippy Check
on:
workflow_dispatch:
push:
- branches: [ master ]
+ branches: [ main ]
pull_request:
- branches: [ master ]
+ branches: [ main ]
paths:
- .github/workflows/test-lang-rust-clippy.yml
- lang/rust/**
+permissions:
+ contents: read
+
+env:
+ RUSTFLAGS: -Dwarnings
+
defaults:
run:
working-directory: lang/rust
+concurrency:
+ group: ${{ github.workflow }}-${{ github.ref }}
+ cancel-in-progress: true
+
jobs:
clippy_check:
runs-on: ubuntu-latest
+ strategy:
+ matrix:
+ rust:
+ - 'stable'
+ - '1.73.0' # MSRV
steps:
- - uses: actions/checkout@v2
- - uses: actions-rs/toolchain@v1
+ - uses: actions/checkout@v4
+ - uses: dtolnay/rust-toolchain@nightly
with:
- toolchain: stable
+ toolchain: ${{ matrix.rust }}
components: clippy
- override: true
- - uses: actions-rs/clippy-check@v1
- with:
- token: ${{ secrets.GITHUB_TOKEN }}
- args: --manifest-path lang/rust/Cargo.toml --all-features --all-targets -- -Dclippy::all -Dunused_imports
+ - run: cargo clippy --all-features --all-targets -- -Dclippy::all -Dunused_imports
diff --git a/.gitignore b/.gitignore
index fd46be4f46f..437a035f0b3 100644
--- a/.gitignore
+++ b/.gitignore
@@ -28,3 +28,5 @@ test-output
vendor
composer.lock
.phpunit.result.cache
+.mvn/jvm.config # Maven JVM settings
+**/*.run.xml # Intellij IDEA Run configurations
diff --git a/.gitmodules b/.gitmodules
new file mode 100644
index 00000000000..d96e7ce1437
--- /dev/null
+++ b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "doc/themes/docsy"]
+ path = doc/themes/docsy
+ url = https://github.com/google/docsy
diff --git a/.mvn/extensions.xml b/.mvn/extensions.xml
new file mode 100644
index 00000000000..e2e84018d96
--- /dev/null
+++ b/.mvn/extensions.xml
@@ -0,0 +1,25 @@
+
+
+
+
+ org.apache.maven.extensions
+ maven-build-cache-extension
+ 1.0.1
+
+
diff --git a/.travis/before_install.sh b/.travis/before_install.sh
deleted file mode 100755
index db76c129165..00000000000
--- a/.travis/before_install.sh
+++ /dev/null
@@ -1,44 +0,0 @@
-#!/bin/bash
-
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# https://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-set -e
-
-case "$TRAVIS_OS_NAME" in
-"linux")
- sudo apt-get -q update
- sudo apt-get -q install --no-install-recommends -y curl git gnupg-agent locales pinentry-curses pkg-config rsync software-properties-common
- sudo apt-get -q clean
- sudo rm -rf /var/lib/apt/lists/*
-
- # Only Yetus 0.9.0+ supports `ADD` and `COPY` commands in Dockerfile
- curl -L https://www-us.apache.org/dist/yetus/0.10.0/apache-yetus-0.10.0-bin.tar.gz | tar xvz -C /tmp/
- # A dirty workaround to disable the Yetus robot for TravisCI,
- # since it'll cancel the changes that .travis/script.sh will do,
- # even if the `--dirty-workspace` option is specified.
- rm /tmp/apache-yetus-0.10.0/lib/precommit/robots.d/travisci.sh
- ;;
-"windows")
- # Install all (latest) SDKs which are used by multi framework projects
- choco install dotnetcore-2.1-sdk # .NET Core 2.1
- choco install dotnetcore-sdk # .NET Core 3.1
- choco install dotnet-sdk # .NET 5.0
- ;;
-*)
- echo "Invalid PLATFORM"
- exit 1
- ;;
-esac
diff --git a/BUILD.md b/BUILD.md
index c09994e67fb..d3059a2e3ba 100644
--- a/BUILD.md
+++ b/BUILD.md
@@ -4,21 +4,21 @@
The following packages must be installed before Avro can be built:
- - Java: JDK 1.8, Maven 3 or better, protobuf-compile
- - PHP: php7, phpunit, php7-gmp
- - Python 3: 3.5 or greater
+ - Java: 11, 17 and 21 with the appropriate toolchain config, Maven 3.9.6 or better, protobuf-compile
+ - PHP: php8, phpunit, php8-gmp
+ - Python 3: 3.7 or greater, tox (tox will install other dependencies as needed)
- C: gcc, cmake, asciidoc, source-highlight, Jansson, pkg-config
- C++: cmake 3.7.2 or greater, g++, flex, bison, libboost-dev
- C#: .NET Core 2.2 SDK
- JavaScript: Node 12.x+, nodejs, npm
- - Ruby: Ruby 2.6 or greater, ruby-dev, gem, bundler, snappy
+ - Ruby: Ruby 2.7 or greater, ruby-dev, gem, bundler, snappy
- Perl: Perl 5.24.1 or greater, gmake, Module::Install,
Module::Install::ReadmeFromPod, Module::Install::Repository,
- Math::BigInt, JSON::XS, Try::Tiny, Regexp::Common, Encode,
- IO::String, Object::Tiny, Compress::ZLib, Error::Simple,
- Test::More, Test::Exception, Test::Pod
+ Math::BigInt, JSON::MaybeXS, Try::Tiny, Regexp::Common, Encode,
+ Object::Tiny, Compress::ZLib, Error::Simple, Test::More,
+ Test::Exception, Test::Pod
+ - Rust: rustc and Cargo 1.65.0 or greater
- Apache Ant 1.7
- - Apache Forrest 0.9 (for documentation)
- md5sum, sha1sum, used by top-level dist target
## Using docker
@@ -59,6 +59,20 @@ DOCKER_IMAGE_NAME=avro-build:1.10.1-rc1 ./build.sh docker
DOCKER_RUN_ENTRYPOINT="mvn --version" ./build.sh docker
```
+## Developing inside a Container (Visual Studio Code Devcontainer)
+
+Requirement:
+ - [Visual Studio Code](https://code.visualstudio.com/)
+ - [Remote Development extension pack](https://aka.ms/vscode-remote/download/extension)
+ - Docker
+ - Windows: (Docker Desktop)[https://www.docker.com/products/docker-desktop]
+ - macOS: (Docker Desktop)[https://www.docker.com/products/docker-desktop]
+ - Linux: (Docker CE/EE)[https://docs.docker.com/install/#supported-platforms] and (Docker Compose)[https://docs.docker.com/compose/install]
+
+Useful links:
+ - (Developing inside a Container)[https://code.visualstudio.com/docs/remote/containers]
+ - (Going further with Dev Containers)[https://microsoft.github.io/code-with-engineering-playbook/developer-experience/going-further/]
+
## Building
Once the requirements are installed (or from the Docker container),
diff --git a/DIST_README.txt b/DIST_README.txt
index 003751c75b5..9c68790ee86 100644
--- a/DIST_README.txt
+++ b/DIST_README.txt
@@ -9,6 +9,6 @@ This distribution contains the following files:
- avro-doc-x.y.z.tar.gz contains Avro's pre-built documentation.
- - the c/, cpp/, csharp/, java/, js/, perl/, php/, py/, and ruby/
+ - the c/, cpp/, csharp/, java/, js/, perl/, php/, py/, rust/ and ruby/
subdirectories contain pre-built, language-specific binaries,
bundles, etc. as conveniences.
diff --git a/LICENSE.txt b/LICENSE.txt
index 7e159a69bc2..42761f6f813 100644
--- a/LICENSE.txt
+++ b/LICENSE.txt
@@ -295,16 +295,6 @@ Copyright (C) 2006 Toni Ronkko
| ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
| OTHER DEALINGS IN THE SOFTWARE.
-----------------------------------------------------------------------
-License for ivy-2.2.0.jar used in the python implementation:
-
-Apache License version 2.0 (see above)
-
-----------------------------------------------------------------------
-License for pyAntTasks-1.3.jar used in the python implementation:
-
-Apache License version 2.0 (see above)
-
----------------------------------------------------------------------
License for NUnit binary included with the C# implementation:
File: nunit.framework.dll
diff --git a/NOTICE.txt b/NOTICE.txt
index 737629b09ba..41fa8b76b62 100644
--- a/NOTICE.txt
+++ b/NOTICE.txt
@@ -37,25 +37,6 @@ The Odiago NOTICE at the time of the contribution:
| This product includes software developed by Odiago, Inc.
| (https://www.wibidata.com).
-Apache Ivy includes the following in its NOTICE file:
-
-| Apache Ivy
-| Copyright 2007-2010 The Apache Software Foundation
-|
-| This product includes software developed by
-| The Apache Software Foundation (https://www.apache.org/).
-|
-| Portions of Ivy were originally developed by
-| Jayasoft SARL (http://www.jayasoft.fr/)
-| and are licensed to the Apache Software Foundation under the
-| "Software Grant License Agreement"
-|
-| SSH and SFTP support is provided by the JCraft JSch package,
-| which is open source software, available under
-| the terms of a BSD style license.
-| The original software and related information is available
-| at http://www.jcraft.com/jsch/.
-
Apache Log4Net includes the following in its NOTICE file:
| Apache log4net
diff --git a/README.md b/README.md
index 472656a3eb4..8167ed4ca75 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,7 @@
-# Apache Avroâĸ
+Apache Avroâĸ
+============
+### Current CI status (Github servers)
[![test c][test c img]][test c]
[![test c#][test c# img]][test c#]
[![test c++][test c++ img]][test c++]
@@ -14,6 +16,19 @@
[![rust clippy check][rust clippy check img]][rust clippy check]
[![rust security audit][rust security audit img]][rust security audit]
+### Current CI status (ARM based servers)
+[![test c ARM][test c ARM img]][test c ARM]
+[![test c# ARM][test c# ARM img]][test c# ARM]
+[![test c++ ARM][test c++ ARM img]][test c++ ARM]
+[![test java ARM][test java ARM img]][test java ARM]
+[![test javascript ARM][test javascript ARM img]][test javascript ARM]
+[![test perl ARM][test perl ARM img]][test perl ARM]
+[![test ruby ARM][test ruby ARM img]][test ruby ARM]
+[![test python ARM][test python ARM img]][test python ARM]
+[![test php ARM][test php ARM img]][test php ARM]
+[![rust continuous integration ARM][rust continuous integration ARM img]][rust continuous integration ARM]
+
+### Current CodeQL status
[![codeql c#][codeql c# img]][codeql c#]
[![codeql java][codeql java img]][codeql java]
[![codeql javascript][codeql javascript img]][codeql javascript]
@@ -43,7 +58,18 @@ To contribute to Avro, please read:
[test python]: https://github.com/apache/avro/actions/workflows/test-lang-py.yml
[test php]: https://github.com/apache/avro/actions/workflows/test-lang-php.yml
+[test c ARM]: https://github.com/apache/avro/actions/workflows/test-lang-c-ARM.yml
+[test c# ARM]: https://github.com/apache/avro/actions/workflows/test-lang-csharp-ARM.yml
+[test c++ ARM]: https://github.com/apache/avro/actions/workflows/test-lang-c++-ARM.yml
+[test java ARM]: https://github.com/apache/avro/actions/workflows/test-lang-java-ARM.yml
+[test javascript ARM]: https://github.com/apache/avro/actions/workflows/test-lang-js-ARM.yml
+[test perl ARM]: https://github.com/apache/avro/actions/workflows/test-lang-perl-ARM.yml
+[test ruby ARM]: https://github.com/apache/avro/actions/workflows/test-lang-ruby-ARM.yml
+[test python ARM]: https://github.com/apache/avro/actions/workflows/test-lang-py-ARM.yml
+[test php ARM]: https://github.com/apache/avro/actions/workflows/test-lang-php-ARM.yml
+
[rust continuous integration]: https://github.com/apache/avro/actions/workflows/test-lang-rust-ci.yml
+[rust continuous integration ARM]: https://github.com/apache/avro/actions/workflows/test-lang-rust-ci-ARM.yml
[rust clippy check]: https://github.com/apache/avro/actions/workflows/test-lang-rust-clippy.yml
[rust security audit]: https://github.com/apache/avro/actions/workflows/test-lang-rust-audit.yml
@@ -62,11 +88,34 @@ To contribute to Avro, please read:
[test python img]: https://github.com/apache/avro/actions/workflows/test-lang-py.yml/badge.svg
[test php img]: https://github.com/apache/avro/actions/workflows/test-lang-php.yml/badge.svg
+[test c ARM img]: https://github.com/apache/avro/actions/workflows/test-lang-c-ARM.yml/badge.svg
+[test c# ARM img]: https://github.com/apache/avro/actions/workflows/test-lang-csharp-ARM.yml/badge.svg
+[test c++ ARM img]: https://github.com/apache/avro/actions/workflows/test-lang-c++-ARM.yml/badge.svg
+[test java ARM img]: https://github.com/apache/avro/actions/workflows/test-lang-java-ARM.yml/badge.svg
+[test javascript ARM img]: https://github.com/apache/avro/actions/workflows/test-lang-js-ARM.yml/badge.svg
+[test perl ARM img]: https://github.com/apache/avro/actions/workflows/test-lang-perl-ARM.yml/badge.svg
+[test ruby ARM img]: https://github.com/apache/avro/actions/workflows/test-lang-ruby-ARM.yml/badge.svg
+[test python ARM img]: https://github.com/apache/avro/actions/workflows/test-lang-py-ARM.yml/badge.svg
+[test php ARM img]: https://github.com/apache/avro/actions/workflows/test-lang-php-ARM.yml/badge.svg
+
[rust continuous integration img]: https://github.com/apache/avro/actions/workflows/test-lang-rust-ci.yml/badge.svg
[rust clippy check img]: https://github.com/apache/avro/actions/workflows/test-lang-rust-clippy.yml/badge.svg
[rust security audit img]: https://github.com/apache/avro/actions/workflows/test-lang-rust-audit.yml/badge.svg
+[rust continuous integration ARM img]: https://github.com/apache/avro/actions/workflows/test-lang-rust-ci-ARM.yml/badge.svg
+
[codeql c# img]: https://github.com/apache/avro/actions/workflows/codeql-csharp-analysis.yml/badge.svg
[codeql java img]: https://github.com/apache/avro/actions/workflows/codeql-java-analysis.yml/badge.svg
[codeql javascript img]: https://github.com/apache/avro/actions/workflows/codeql-js-analysis.yml/badge.svg
[codeql python img]: https://github.com/apache/avro/actions/workflows/codeql-py-analysis.yml/badge.svg
+
+You can use devcontainers to develop Avro:
+
+* [](https://vscode.dev/redirect?url=vscode://ms-vscode-remote.remote-containers/cloneInVolume?url=https://github.com/apache/avro)
+* [](https://codespaces.new/apache/avro?quickstart=1&hide_repo_select=true)
+
+
+### Trademark & logo's
+ApacheÂŽ, Apache Avro and the Apache Avro airplane logo are trademarks of The Apache Software Foundation.
+
+The Apache Avro airplane logo on this page has been designed by [Emma Kellam](https://github.com/emmak3l) for use by this project.
diff --git a/build.sh b/build.sh
index 231a20e7e2f..2598148e024 100755
--- a/build.sh
+++ b/build.sh
@@ -39,6 +39,9 @@ change_java_version() {
# ===========================================================================
+# This might not have been sourced if the entrypoint is not bash
+[[ -f "$HOME/.cargo/env" ]] && . "$HOME/.cargo/env"
+
set -xe
cd "${0%/*}"
@@ -53,6 +56,9 @@ DOCKER_BUILD_XTRA_ARGS=${DOCKER_BUILD_XTRA_ARGS-}
# Override the docker image name used.
DOCKER_IMAGE_NAME=${DOCKER_IMAGE_NAME-}
+# When building a docker container, these are the files that will sent and available.
+DOCKER_EXTRA_CONTEXT="lang/ruby/Gemfile lang/ruby/avro.gemspec lang/ruby/Manifest share/VERSION.txt"
+
usage() {
echo "Usage: $0 {lint|test|dist|sign|clean|veryclean|docker [--args \"docker-args\"]|rat|githooks|docker-test}"
exit 1
@@ -174,7 +180,15 @@ do
cp "lang/perl/Avro-$VERSION.tar.gz" dist/perl/
# build docs
- (cd doc; ant)
+ cp -r doc/ build/staging-web/
+ find build/staging-web/ -type f -print0 | xargs -0 sed -r -i "s#\+\+version\+\+#${VERSION,,}#g"
+ mkdir -p build/staging-web/public/docs/
+ mv build/staging-web/doc/content/en/docs/++version++ build/staging-web/public/docs/"${VERSION,,}"
+ read -n 1 -s -r -p "Build build/staging-web/ manually now. Press a key to continue..."
+ # If it was a SNAPSHOT, it was lowercased during the build.
+ cp -R build/staging-web/public/docs/"${VERSION,,}"/* "build/$DOC_DIR/"
+ cp -R "build/$DOC_DIR/api" build/staging-web/public/docs/"${VERSION,,}"/
+ ( cd build/staging-web/public/docs/; ln -s "${VERSION,,}" current )
# add LICENSE and NOTICE for docs
mkdir -p "build/$DOC_DIR"
cp doc/LICENSE "build/$DOC_DIR"
@@ -198,7 +212,13 @@ do
\! -name '*.asc' \! -name '*.txt' );
do
(cd "${f%/*}" && shasum -a 512 "${f##*/}") > "$f.sha512"
- gpg --passphrase "$password" --armor --output "$f.asc" --detach-sig "$f"
+
+ if [ -z "$GPG_LOCAL_USER" ]; then
+ gpg --pinentry-mode loopback --passphrase "$password" --armor --output "$f.asc" --detach-sig "$f"
+ else
+ gpg --pinentry-mode loopback --local-user="$GPG_LOCAL_USER" --passphrase "$password" --armor --output "$f.asc" --detach-sig "$f"
+ fi
+
done
set -x
@@ -206,7 +226,7 @@ do
clean)
rm -rf build dist
- (cd doc; ant clean)
+ rm -rf doc/public/ doc/resources/ doc/node_modules/ doc/package-lock.json doc/.hugo_build.lock
(mvn -B clean)
rm -rf lang/java/*/userlogs/
@@ -234,7 +254,7 @@ do
veryclean)
rm -rf build dist
- (cd doc; ant clean)
+ rm -rf doc/public/ doc/resources/ doc/node_modules/ doc/package-lock.json doc/.hugo_build.lock
(mvn -B clean)
rm -rf lang/java/*/userlogs/
@@ -264,7 +284,6 @@ do
rm -rf lang/perl/inc/
rm -rf lang/ruby/.gem/
rm -rf lang/ruby/Gemfile.lock
- rm -rf lang/py/lib/ivy-2.2.0.jar
rm -rf lang/csharp/src/apache/ipc.test/bin/
rm -rf lang/csharp/src/apache/ipc.test/obj
;;
@@ -286,15 +305,20 @@ do
DOCKER_IMAGE_NAME=${DOCKER_IMAGE_NAME:-"avro-build-$USER_NAME:latest"}
{
cat share/docker/Dockerfile
- grep -vF 'FROM avro-build-ci' share/docker/DockerfileLocal
echo "ENV HOME /home/$USER_NAME"
echo "RUN getent group $GROUP_ID || groupadd -g $GROUP_ID $USER_NAME"
echo "RUN getent passwd $USER_ID || useradd -g $GROUP_ID -u $USER_ID -k /root -m $USER_NAME"
+ echo "RUN mkdir -p /home/$USER_NAME/.m2/repository"
} > Dockerfile
+
+ if [ -z "$BUILDPLATFORM" ]; then
+ export BUILDPLATFORM=$(docker info --format "{{.OSType}}/{{.Architecture}}")
+ fi
+ # Include the ruby gemspec for preinstallation.
# shellcheck disable=SC2086
- tar -cf- lang/ruby/Gemfile Dockerfile | docker build $DOCKER_BUILD_XTRA_ARGS -t "$DOCKER_IMAGE_NAME" -
+ tar -cf- Dockerfile $DOCKER_EXTRA_CONTEXT | DOCKER_BUILDKIT=1 docker build $DOCKER_BUILD_XTRA_ARGS --build-arg="BUILDPLATFORM=${BUILDPLATFORM}" -t "$DOCKER_IMAGE_NAME" -
rm Dockerfile
- # By mapping the .m2 directory you can do an mvn install from
+ # By mapping the .m2/repository directory you can do an mvn install from
# within the container and use the result on your normal
# system. And this also is a significant speedup in subsequent
# builds because the dependencies are downloaded only once.
@@ -306,10 +330,13 @@ do
# extra second before the changes are available within the docker container.
# shellcheck disable=SC2086
docker run --rm -t -i \
- --env "JAVA=${JAVA:-8}" \
+ --env "JAVA=${JAVA:-21}" \
--user "${USER_NAME}" \
--volume "${HOME}/.gnupg:/home/${USER_NAME}/.gnupg" \
- --volume "${HOME}/.m2:/home/${USER_NAME}/.m2${DOCKER_MOUNT_FLAG}" \
+ --volume "${PWD}/share/docker/m2:/home/${USER_NAME}/.m2/" \
+ --volume "${PWD}/share/docker/m2/toolchains.xml:/home/${USER_NAME}/.m2/toolchains.xml" \
+ --volume "${HOME}/.m2/repository:/home/${USER_NAME}/.m2/repository${DOCKER_MOUNT_FLAG}" \
+ --volume "${HOME}/.m2/build-cache:/home/${USER_NAME}/.m2/build-cache${DOCKER_MOUNT_FLAG}" \
--volume "${PWD}:/home/${USER_NAME}/avro${DOCKER_MOUNT_FLAG}" \
--workdir "/home/${USER_NAME}/avro" \
${DOCKER_RUN_XTRA_ARGS} "$DOCKER_IMAGE_NAME" ${DOCKER_RUN_ENTRYPOINT}
@@ -327,9 +354,15 @@ do
;;
docker-test)
- tar -cf- share/docker/Dockerfile lang/ruby/Gemfile |
- docker build -t avro-test -f share/docker/Dockerfile -
- docker run --rm -v "${PWD}:/avro${DOCKER_MOUNT_FLAG}" --env "JAVA=${JAVA:-8}" avro-test /avro/share/docker/run-tests.sh
+ if [ -z "$BUILDPLATFORM" ]; then
+ export BUILDPLATFORM=$(docker info --format "{{.OSType}}/{{.Architecture}}")
+ fi
+ tar -cf- share/docker/Dockerfile $DOCKER_EXTRA_CONTEXT |
+ DOCKER_BUILDKIT=1 docker build -t avro-test --build-arg BUILDPLATFORM="${BUILDPLATFORM}" -f share/docker/Dockerfile -
+ docker run --rm \
+ --volume "${PWD}:/avro${DOCKER_MOUNT_FLAG}" \
+ --volume "${PWD}/share/docker/m2/:/root/.m2/" \
+ --env "JAVA=${JAVA:-11}" avro-test /avro/share/docker/run-tests.sh
;;
*)
diff --git a/composer.json b/composer.json
index e5f1313aeba..b39f209ff6a 100644
--- a/composer.json
+++ b/composer.json
@@ -9,5 +9,10 @@
"require-dev": {
"phpunit/phpunit": "^9.1",
"squizlabs/php_codesniffer": "^3.5"
+ },
+ "config": {
+ "allow-plugins": {
+ "beberlei/composer-monorepo-plugin": true
+ }
}
}
diff --git a/doc/.gitignore b/doc/.gitignore
index 567609b1234..b56c8f8a701 100644
--- a/doc/.gitignore
+++ b/doc/.gitignore
@@ -1 +1,5 @@
-build/
+public/
+resources/
+node_modules/
+package-lock.json
+.hugo_build.lock
diff --git a/doc/forrest.properties b/doc/Dockerfile
similarity index 92%
rename from doc/forrest.properties
rename to doc/Dockerfile
index 32df46f523d..1a671067c65 100644
--- a/doc/forrest.properties
+++ b/doc/Dockerfile
@@ -17,6 +17,6 @@
# under the License.
#
-# Make Forrest work with Java6
-forrest.validate.sitemap=false
+FROM klakegg/hugo:ext-alpine
+RUN apk add git
diff --git a/doc/LICENSE b/doc/LICENSE
index af6b6731242..e0f8f08e158 100644
--- a/doc/LICENSE
+++ b/doc/LICENSE
@@ -306,12 +306,6 @@ Prototype JavaScript framework, version 1.4.0_pre4
For a copy of the MIT license text, see above.
-----------------------------------------------------------------------
-License for Apache Forrest (skin), included in the Avro documentation:
-
-Copyright: 2009-2015 The Apache Software Foundation
-License: https://www.apache.org/licenses/LICENSE-2.0 (see above)
-
----------------------------------------------------------------------
License for Doxygen-generated documentation for the C++ and C# implementations:
diff --git a/doc/NOTICE b/doc/NOTICE
index 8b7999217fd..7320bb0adfc 100644
--- a/doc/NOTICE
+++ b/doc/NOTICE
@@ -1,5 +1,5 @@
Apache Avro
-Copyright 2010-2015 The Apache Software Foundation
+Copyright 2010-2022 The Apache Software Foundation
This product includes software developed at
The Apache Software Foundation (https://www.apache.org/).
@@ -26,16 +26,9 @@ is:
| implied. See the License for the specific language governing
| permissions and limitations under the License.
-The Odiago NOTICE at the time of the contribution:
-
-| This product includes software developed by Odiago, Inc.
-| (https://www.wibidata.com).
-
-The documentation contains the default Apache Forrest skin.
-Apache Forrest includes the following in its NOTICE file:
-
-| Apache Forrest
-| Copyright 2002-2007 The Apache Software Foundation.
+|--------------------------------------------------------------------------
+| This product includes software developed by The Docsy Authors.
+| (https://www.docsy.dev/).
|
| This product includes software developed at
| The Apache Software Foundation (https://www.apache.org/).
@@ -49,35 +42,3 @@ Apache Forrest includes the following in its NOTICE file:
| Other accompanying products do not require attribution, so are not listed.
|
| ------------------------------------------------------------------------
-| This product includes software developed by the OpenSymphony Group
-| http://www.opensymphony.com/
-|
-| This product includes software developed for project Krysalis
-| http://www.krysalis.org/
-|
-| This product includes software developed by Andy Clark.
-| https://people.apache.org/~andyc/neko/
-|
-| This product includes software developed by the ExoLab Project
-| https://www.exolab.org/
-|
-| This product includes software developed by TouchGraph LLC
-| https://www.touchgraph.com/
-|
-| This product includes software developed by Marc De Scheemaecker
-| http://nanoxml.cyberelf.be/
-|
-| This product includes software developed by the ANTLR project
-| https://www.antlr.org/
-|
-| This product includes software developed by Chaperon
-| http://chaperon.sourceforge.net/
-|
-| This product includes software developed by Sal Mangano (included in the XSLT Cookbook published by O'Reilly)
-| https://www.oreilly.com/catalog/xsltckbk/
-|
-| This product includes software developed by The Werken Company.
-| http://jaxen.werken.com/
-|
-| This product includes software developed by the jfor project
-| http://www.jfor.org/
diff --git a/doc/README.md b/doc/README.md
new file mode 100644
index 00000000000..31f167d8712
--- /dev/null
+++ b/doc/README.md
@@ -0,0 +1,61 @@
+# Apache Avro website
+
+This website is base on [Hugo](https://gohugo.io) and uses the [Docsy](https://www.docsy.dev/) theme.
+Before building the website, you need to initialize submodules.
+
+```
+git submodule update --init --recursive
+```
+
+## Previewing the website locally
+
+```
+# From the doc directory, you will need to do this at least once for our SCSS modifications
+(cd doc && npm install)
+
+# Serve the website dynamically using extended hugo:
+hugo server --buildDrafts --buildFuture --bind 0.0.0.0 --navigateToChanged
+
+# You can do the same thing without installing hugo via docker.
+# From the Avro root directory:
+docker run --rm -v $(pwd):/src -p 1313:1313 jakejarvis/hugo-extended:latest --source doc/ server \
+ --buildDrafts --buildFuture --bind 0.0.0.0 --navigateToChanged
+```
+
+## Building the website in a distribution
+
+When you build an Avro distribution with the script, there is currently a manual step required.
+
+After all the binary artifacts and source have been created and copied to the `dist/` directory, the process will
+stop with **Build build/staging-web/ manually now. Press a key to continue...**
+
+At this point, from another terminal and in the Avro root directory, you can build the website:
+
+```
+# Install the necessary npm packages
+docker run --entrypoint=sh --rm -v $(pwd):/src -p 1313:1313 jakejarvis/hugo-extended:latest \
+ -c "cd build/staging-web && npm install"
+# Generate the website and the release documentation
+docker run --rm -v $(pwd):/src -p 1313:1313 jakejarvis/hugo-extended:latest \
+ --source build/staging-web/ --gc --minify
+# Optional: docker leaves some files with unmanageable permissions
+sudo chown -R $USER:$USER build/staging-web
+```
+
+## Avro version
+
+(TODO)
+
+When a new version of Apache Avro is released:
+
+1. Change the value of `params.avroversion` in `config.toml`
+2. Add a new entry to the `Releases` pages in the `Blog` section, for example:
+
+```
+cp content/en/blog/releases/avro-1.10.2-released.md content/en/blog/releases/avro-1.11.0-released.md
+```
+
+## Updating the https://avro.apache.org website from a distribution
+
+(TODO)
+
diff --git a/doc/assets/icons/logo-black-text.png b/doc/assets/icons/logo-black-text.png
new file mode 100644
index 00000000000..c593f891c1d
Binary files /dev/null and b/doc/assets/icons/logo-black-text.png differ
diff --git a/doc/assets/icons/logo-text.svg b/doc/assets/icons/logo-text.svg
new file mode 100644
index 00000000000..6f842518618
--- /dev/null
+++ b/doc/assets/icons/logo-text.svg
@@ -0,0 +1,38 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/assets/icons/logo-white-text.png b/doc/assets/icons/logo-white-text.png
new file mode 100644
index 00000000000..29ad9ed4fd0
Binary files /dev/null and b/doc/assets/icons/logo-white-text.png differ
diff --git a/doc/assets/icons/logo.png b/doc/assets/icons/logo.png
new file mode 100644
index 00000000000..5651de93f58
Binary files /dev/null and b/doc/assets/icons/logo.png differ
diff --git a/doc/assets/icons/logo.svg b/doc/assets/icons/logo.svg
new file mode 100644
index 00000000000..beee014a3d5
--- /dev/null
+++ b/doc/assets/icons/logo.svg
@@ -0,0 +1,27 @@
+
+
+
+
+
+
+
+
diff --git a/doc/assets/scss/PTMono-Regular.ttf b/doc/assets/scss/PTMono-Regular.ttf
new file mode 100644
index 00000000000..b1983838c66
Binary files /dev/null and b/doc/assets/scss/PTMono-Regular.ttf differ
diff --git a/doc/assets/scss/_styles_project.scss b/doc/assets/scss/_styles_project.scss
new file mode 100644
index 00000000000..b69a1eb167a
--- /dev/null
+++ b/doc/assets/scss/_styles_project.scss
@@ -0,0 +1,35 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+@font-face {
+ font-family: 'PT Mono';
+ font-style: normal;
+ font-weight: 400;
+ font-display: swap;
+ src: url(./PTMono-Regular.ttf) format('truetype');
+}
+
+// Disable all github editing links for now
+.td-page-meta--view { display: none !important; }
+.td-page-meta--edit { display: none !important; }
+.td-page-meta--child { display: none !important; }
+.td-page-meta--issue { display: none !important; }
+.td-page-meta--project-issue { display: none !important; }
+
+.navbar-brand {
+ font-family: "PT Mono", monospace;
+}
diff --git a/doc/assets/scss/_variables_project.scss b/doc/assets/scss/_variables_project.scss
new file mode 100644
index 00000000000..fb5495c8669
--- /dev/null
+++ b/doc/assets/scss/_variables_project.scss
@@ -0,0 +1,21 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+.dropdown-menu .show {
+ overflow-y: auto;
+ max-height: 700px;
+}
diff --git a/doc/build.xml b/doc/build.xml
deleted file mode 100644
index d711608a36d..00000000000
--- a/doc/build.xml
+++ /dev/null
@@ -1,56 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- <!ENTITY AvroVersion "${version}">
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/doc/config.toml b/doc/config.toml
new file mode 100644
index 00000000000..4b58b37f02a
--- /dev/null
+++ b/doc/config.toml
@@ -0,0 +1,419 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+baseURL = "/"
+title = "Apache Avro"
+
+# Language settings
+contentDir = "content/en"
+defaultContentLanguage = "en"
+defaultContentLanguageInSubdir = false
+# Useful when translating.
+enableMissingTranslationPlaceholders = true
+
+enableRobotsTXT = true
+
+# Hugo allows theme composition (and inheritance). The precedence is from left to right.
+theme = ["docsy"]
+
+# Will give values to .Lastmod etc.
+enableGitInfo = true
+
+# Comment out to disable taxonomies in Docsy
+# disableKinds = ["taxonomy", "taxonomyTerm"]
+
+# You can add your own taxonomies
+[taxonomies]
+tag = "tags"
+category = "categories"
+
+[params.taxonomy]
+# set taxonomyCloud = [] to hide taxonomy clouds
+taxonomyCloud = ["tags", "categories"]
+
+# If used, must have same lang as taxonomyCloud
+taxonomyCloudTitle = ["Tag Cloud", "Categories"]
+
+# set taxonomyPageHeader = [] to hide taxonomies on the page headers
+taxonomyPageHeader = ["tags", "categories"]
+
+
+# Highlighting config
+pygmentsCodeFences = true
+pygmentsUseClasses = false
+# Use the new Chroma Go highlighter in Hugo.
+pygmentsUseClassic = false
+#pygmentsOptions = "linenos=table"
+# See https://help.farbox.com/pygments.html
+pygmentsStyle = "tango"
+
+# Configure how URLs look like per section.
+[permalinks]
+blog = "/:section/:year/:month/:day/:slug/"
+
+## Configuration for BlackFriday markdown parser: https://github.com/russross/blackfriday
+[blackfriday]
+plainIDAnchors = true
+hrefTargetBlank = true
+angledQuotes = false
+latexDashes = true
+
+# Image processing configuration.
+[imaging]
+resampleFilter = "CatmullRom"
+quality = 75
+anchor = "smart"
+
+[services]
+[services.googleAnalytics]
+# Comment out the next line to disable GA tracking. Also disables the feature described in [params.ui.feedback].
+# id = "UA-00000000-0"
+
+# Language configuration
+
+[languages.params]
+[languages.en.params]
+title = "Apache Avro"
+description = ""
+languageName ="English"
+# Weight used for sorting.
+weight = 1
+
+[markup]
+ [markup.goldmark]
+ [markup.goldmark.renderer]
+ unsafe = true
+ [markup.highlight]
+ # See a complete list of available styles at https://xyproto.github.io/splash/docs/all.html
+ style = "tango"
+ # Uncomment if you want your chosen highlight style used for code blocks without a specified language
+ # guessSyntax = "true"
+
+# Everything below this are Site Params
+
+# Comment out if you don't want the "print entire section" link enabled.
+[outputs]
+section = ["HTML", "print", "RSS"]
+
+[params]
+avroversion = "++version++"
+copyright = "The Apache Software Foundation"
+apache_foundation = "https://www.apache.org/"
+apache_events_logo = "https://www.apache.org/events/current-event-234x60.png"
+apache_events_url = "https://www.apache.org/events/current-event.html"
+privacy_policy = "http://www.apache.org/foundation/policies/privacy.html"
+license = "http://www.apache.org/licenses/"
+
+# First one is picked as the Twitter card image if not set on page.
+# images = ["images/project-illustration.png"]
+
+# Menu title if your navbar has a versions selector to access old versions of your site.
+# This menu appears only if you have at least one [params.versions] set.
+version_menu = "Releases"
+
+# Flag used in the "version-banner" partial to decide whether to display a
+# banner on every page indicating that this is an archived version of the docs.
+# Set this flag to "true" if you want to display the banner.
+archived_version = false
+
+# The version number for the version of the docs represented in this doc set.
+# Used in the "version-banner" partial to display a version number for the
+# current doc set.
+version = "++version++"
+
+# A link to latest version of the docs. Used in the "version-banner" partial to
+# point people to the main doc site.
+url_latest_version = "https://avro.apache.org"
+
+# Repository configuration (URLs for in-page links to opening issues and suggesting changes)
+github_repo = "https://github.com/apache/avro"
+github_subdir = "doc"
+
+# An optional link to a related project repo. For example, the sibling repository where your product code lives.
+github_project_repo = "https://github.com/apache/avro"
+
+# Specify a value here if your content directory is not in your repo's root directory
+# github_subdir = ""
+
+# Uncomment this if you have a newer GitHub repo with "main" as the default branch,
+# or specify a new value if you want to reference another branch in your GitHub links
+# github_branch= "main"
+
+# Google Custom Search Engine ID. Remove or comment out to disable search.
+# gcs_engine_id = "d72aa9b2712488cc3"
+
+# Enable Algolia DocSearch
+algolia_docsearch = false
+
+# Enable Lunr.js offline search
+offlineSearch = false
+
+# Enable syntax highlighting and copy buttons on code blocks with Prism
+prism_syntax_highlighting = true
+
+# User interface configuration
+[params.ui]
+# Set to true to disable breadcrumb navigation.
+breadcrumb_disable = false
+# Set to true to disable the About link in the site footer
+footer_about_disable = true
+# Set to false if you don't want to display a logo (/assets/icons/logo.svg) in the top navbar
+navbar_logo = true
+# Set to true if you don't want the top navbar to be translucent when over a `block/cover`, like on the homepage.
+navbar_translucent_over_cover_disable = false
+# Enable to show the side bar menu in its compact state.
+sidebar_menu_compact = false
+# Set to true to hide the sidebar search box (the top nav search box will still be displayed if search is enabled)
+sidebar_search_disable = true
+sidebar_menu_foldable = true
+
+# Adds a H2 section titled "Feedback" to the bottom of each doc. The responses are sent to Google Analytics as events.
+# This feature depends on [services.googleAnalytics] and will be disabled if "services.googleAnalytics.id" is not set.
+# If you want this feature, but occasionally need to remove the "Feedback" section from a single page,
+# add "hide_feedback: true" to the page's front matter.
+[params.ui.feedback]
+enable = false
+# The responses that the user sees after clicking "yes" (the page was helpful) or "no" (the page was not helpful).
+yes = 'Glad to hear it! Please tell us how we can improve .'
+no = 'Sorry to hear that. Please tell us how we can improve .'
+
+# Adds a reading time to the top of each doc.
+# If you want this feature, but occasionally need to remove the Reading time from a single page,
+# add "hide_readingtime: true" to the page's front matter
+[params.ui.readingtime]
+enable = true
+
+[params.asf]
+[[params.asf.links]]
+name = "ASF Web Site"
+url = "http://www.apache.org/"
+
+[[params.asf.links]]
+name = "License"
+url = "http://www.apache.org/licenses/"
+
+[[params.asf.links]]
+name = "Donate"
+url = "http://www.apache.org/foundation/sponsorship.html"
+
+
+[[params.asf.links]]
+name = "Thanks"
+url = "http://www.apache.org/foundation/thanks.html"
+
+[[params.asf.links]]
+name = "Security"
+url = "http://www.apache.org/security/"
+
+
+[params.links]
+# End user relevant links. These will show up on left side of footer and in the community page if you have one.
+[[params.links.user]]
+ name = "User mailing list"
+ url = "https://lists.apache.org/list.html?user@avro.apache.org"
+ icon = "fa fa-envelope"
+ desc = "Discussion and help from your fellow users"
+[[params.links.user]]
+ name ="Twitter"
+ url = "https://twitter.com/ApacheAvro"
+ icon = "fab fa-twitter"
+ desc = "Follow us on Twitter to get the latest news!"
+[[params.links.user]]
+ name = "Stack Overflow"
+ url = "https://stackoverflow.com/questions/tagged/avro"
+ icon = "fab fa-stack-overflow"
+ desc = "Practical questions and curated answers"
+# Developer relevant links. These will show up on right side of footer and in the community page if you have one.
+[[params.links.developer]]
+ name = "GitHub"
+ url = "https://github.com/apache/avro"
+ icon = "fab fa-github"
+ desc = "Development takes place here!"
+[[params.links.developer]]
+ name = "Issues"
+ url = "https://issues.apache.org/jira/projects/AVRO/issues"
+ icon = "fab fa-jira"
+ desc = "Track bugs and new features"
+[[params.links.developer]]
+ name = "Chat with other project developers at Slack"
+ url = "https://the-asf.slack.com/"
+ icon = "fab fa-slack"
+ desc = "Chat with other project developers at #avro channel"
+[[params.links.developer]]
+ name = "Developer mailing list"
+ url = "https://lists.apache.org/list.html?dev@avro.apache.org"
+ icon = "fa fa-envelope"
+ desc = "Discuss development issues around the project"
+
+[[params.versions]]
+ version = "++version++ (Current)"
+ url = "/docs/++version++/"
+
+[[params.versions]]
+version = "1.11.2"
+url = "https://avro.apache.org/docs/1.11.2/"
+
+[[params.versions]]
+version = "1.11.1"
+url = "https://avro.apache.org/docs/1.11.1/"
+
+[[params.versions]]
+version = "1.11.0"
+url = "https://avro.apache.org/docs/1.11.0/"
+
+[[params.versions]]
+ version = "1.10.2"
+ url = "https://avro.apache.org/docs/1.10.2/"
+
+[[params.versions]]
+ version = "1.10.1"
+ url = "https://avro.apache.org/docs/1.10.1/"
+
+[[params.versions]]
+ version = "1.10.0"
+ url = "https://avro.apache.org/docs/1.10.0/"
+
+[[params.versions]]
+ version = "1.9.2"
+ url = "https://avro.apache.org/docs/1.9.2/"
+
+[[params.versions]]
+ version = "1.9.1"
+ url = "https://avro.apache.org/docs/1.9.1/"
+
+[[params.versions]]
+ version = "1.9.0"
+ url = "https://avro.apache.org/docs/1.9.0/"
+
+[[params.versions]]
+ version = "1.8.2"
+ url = "https://avro.apache.org/docs/1.8.2/"
+
+[[params.versions]]
+ version = "1.8.1"
+ url = "https://avro.apache.org/docs/1.8.1/"
+
+[[params.versions]]
+ version = "1.8.0"
+ url = "https://avro.apache.org/docs/1.8.0/"
+
+[[params.versions]]
+ version = "1.7.7"
+ url = "https://avro.apache.org/docs/1.7.7/"
+
+[[params.versions]]
+ version = "1.7.6"
+ url = "https://avro.apache.org/docs/1.7.6/"
+
+[[params.versions]]
+ version = "1.7.5"
+ url = "https://avro.apache.org/docs/1.7.5/"
+
+[[params.versions]]
+ version = "1.7.4"
+ url = "https://avro.apache.org/docs/1.7.4/"
+
+[[params.versions]]
+ version = "1.7.3"
+ url = "https://avro.apache.org/docs/1.7.3/"
+
+[[params.versions]]
+ version = "1.7.2"
+ url = "https://avro.apache.org/docs/1.7.2/"
+
+[[params.versions]]
+ version = "1.7.1"
+ url = "https://avro.apache.org/docs/1.7.1/"
+
+[[params.versions]]
+ version = "1.7.0"
+ url = "https://avro.apache.org/docs/1.7.0/"
+
+[[params.versions]]
+ version = "1.6.3"
+ url = "https://avro.apache.org/docs/1.6.3/"
+
+[[params.versions]]
+ version = "1.6.2"
+ url = "https://avro.apache.org/docs/1.6.2/"
+
+[[params.versions]]
+ version = "1.6.1"
+ url = "https://avro.apache.org/docs/1.6.1/"
+
+[[params.versions]]
+ version = "1.6.0"
+ url = "https://avro.apache.org/docs/1.6.0/"
+
+[[params.versions]]
+ version = "1.5.4"
+ url = "https://avro.apache.org/docs/1.5.4/"
+
+[[params.versions]]
+ version = "1.5.3"
+ url = "https://avro.apache.org/docs/1.5.3/"
+
+[[params.versions]]
+ version = "1.5.2"
+ url = "https://avro.apache.org/docs/1.5.2/"
+
+[[params.versions]]
+ version = "1.5.1"
+ url = "https://avro.apache.org/docs/1.5.1/"
+
+[[params.versions]]
+ version = "1.5.0"
+ url = "https://avro.apache.org/docs/1.5.0/"
+
+[[params.versions]]
+ version = "1.4.1"
+ url = "https://avro.apache.org/docs/1.4.1/"
+
+[[params.versions]]
+ version = "1.4.0"
+ url = "https://avro.apache.org/docs/1.4.0/"
+
+[[params.versions]]
+ version = "1.3.3"
+ url = "https://avro.apache.org/docs/1.3.3/"
+
+[[params.versions]]
+ version = "1.3.2"
+ url = "https://avro.apache.org/docs/1.3.2/"
+
+[[params.versions]]
+ version = "1.3.1"
+ url = "https://avro.apache.org/docs/1.3.1/"
+
+[[params.versions]]
+ version = "1.3.0"
+ url = "https://avro.apache.org/docs/1.3.0/"
+
+[[params.versions]]
+ version = "1.2.0"
+ url = "https://avro.apache.org/docs/1.2.0/"
+
+[[params.versions]]
+ version = "1.1.0"
+ url = "https://avro.apache.org/docs/1.1.0/"
+
+[[params.versions]]
+ version = "1.0.0"
+ url = "https://avro.apache.org/docs/1.0.0/"
+
diff --git a/doc/content/en/_index.html b/doc/content/en/_index.html
new file mode 100644
index 00000000000..618a99a14bf
--- /dev/null
+++ b/doc/content/en/_index.html
@@ -0,0 +1,72 @@
++++
+title = "Apache Avro"
+linkTitle = "Apache Avro"
+
++++
+
+
+
+
+
+
+
+
+
+
Apache Avroâĸ - a data serialization system
+
+
+
+
+
+
+
+{{% blocks/lead color="primary" %}}
+Apache Avroâĸ is the leading serialization format for record data, and first choice for streaming data pipelines.
+It offers excellent schema evolution, and has implementations for the JVM (Java, Kotlin, Scala, …), Python, C/C++/C#, PHP, Ruby,
+Rust, JavaScript, and even Perl.
+{{% /blocks/lead %}}
+
+{{< blocks/section color="dark" type="features">}}
+
+
+{{% blocks/feature icon="fab fa-java" title="Getting started with Java" url="/docs/++version++/getting-started-java" %}}
+For Java / JVM users, find out everything you need to know about specifying a schema, (de)serializing Avro data and code generation.
+{{% /blocks/feature %}}
+
+{{% blocks/feature icon="fab fa-python" title="Getting started with Python" url="/docs/++version++/getting-started-python" %}}
+For Python users, find out everything you need to know about specifying a schema and (de)serializing Avro data.
+{{% /blocks/feature %}}
+
+{{% blocks/feature icon="fad fa-comments" title="Join Our Community!" url="/community/" %}}
+Learn from or connect with other users in our open and welcoming community. We'd love to hear from you!
+{{% /blocks/feature %}}
+
+{{< /blocks/section >}}
\ No newline at end of file
diff --git a/doc/content/en/blog/_index.md b/doc/content/en/blog/_index.md
new file mode 100644
index 00000000000..85f97bd211d
--- /dev/null
+++ b/doc/content/en/blog/_index.md
@@ -0,0 +1,33 @@
+---
+title: "Blog"
+linkTitle: "Blog"
+menu:
+ main:
+ weight: 30
+---
+
+
+
+This is the **blog** section. It has two categories: News and Releases.
+
+Files in these directories will be listed in reverse chronological order.
+
diff --git a/doc/content/en/blog/news/_index.md b/doc/content/en/blog/news/_index.md
new file mode 100644
index 00000000000..243dcf5f4a2
--- /dev/null
+++ b/doc/content/en/blog/news/_index.md
@@ -0,0 +1,26 @@
+---
+title: "News About Apache Avro"
+linkTitle: "News"
+weight: 20
+---
+
+
diff --git a/doc/content/en/blog/news/avro-joins-apache.md b/doc/content/en/blog/news/avro-joins-apache.md
new file mode 100755
index 00000000000..dbc1872644d
--- /dev/null
+++ b/doc/content/en/blog/news/avro-joins-apache.md
@@ -0,0 +1,28 @@
+---
+title: "Avro joins Apache"
+linkTitle: "Avro joins Apache"
+date: 2009-04-10
+---
+
+
+
+Avro has joined the Apache Software Foundation as a Hadoop subproject.
diff --git a/doc/content/en/blog/news/new-committer-christophe-le-saec.md b/doc/content/en/blog/news/new-committer-christophe-le-saec.md
new file mode 100755
index 00000000000..1522c1722b9
--- /dev/null
+++ b/doc/content/en/blog/news/new-committer-christophe-le-saec.md
@@ -0,0 +1,41 @@
+---
+title: "New committer: Christophe Le Saec"
+linkTitle: "New committer: Christophe Le Saec"
+date: 2023-08-09
+---
+
+
+
+The Project Management Committee (PMC) for Apache Avro has invited Christophe
+Le Saec to become a committer and we are pleased to announce that
+he has accepted.
+
+Christophe definitely puts in the work and, has an impressive breadth of
+knowledge about the languages of the Avro SDK!
+
+As an ASF project, we tend to be very conservative about making changes, and
+Christophe brings in fresh ideas and very quickly proposes concrete
+implementations to prove them. He has a good understanding of Avro, the
+motivation to move things forward, and the expertise to make changes! At the
+same time, he's easy to talk to and flexible in coming to a consensus.
+
+Thanks for all your hard work!
diff --git a/doc/content/en/blog/news/new-committer-david-mollitor.md b/doc/content/en/blog/news/new-committer-david-mollitor.md
new file mode 100755
index 00000000000..eb793009466
--- /dev/null
+++ b/doc/content/en/blog/news/new-committer-david-mollitor.md
@@ -0,0 +1,41 @@
+---
+title: "New committer: David Mollitor"
+linkTitle: "New committer: David Mollitor"
+date: 2021-10-05
+---
+
+
+
+The Project Management Committee (PMC) for Apache Avro
+has invited David Mollitor to become a committer and we are pleased
+to announce that he has accepted.
+
+Since 2017, David has raised and fixed many issues in the
+Java SDK. Recently he's been finding and providing fixes for subtle
+performance issues. His work is always high-quality and he is
+reactive and pleasant to talk with on code reviews and JIRA.
+
+Being a committer enables easier contribution to the
+project since there is no need to go via the patch
+submission process. This should enable better productivity.
+
+It's great to have you as part of the team, David!
diff --git a/doc/content/en/blog/news/new-committer-martin-grigorov.md b/doc/content/en/blog/news/new-committer-martin-grigorov.md
new file mode 100755
index 00000000000..78cc3b61648
--- /dev/null
+++ b/doc/content/en/blog/news/new-committer-martin-grigorov.md
@@ -0,0 +1,41 @@
+---
+title: "New committer: Martin Grigorov"
+linkTitle: "New committer: Martin Grigorov"
+date: 2022-01-04
+---
+
+
+
+The Project Management Committee (PMC) for Apache Avro
+has invited Martin Grigorov to become a committer and we are pleased
+to announce that he has accepted.
+
+Over the last few months, he has been active, reliable and easy to
+work with on PRs and on the mailing list. His work is of high
+quality, and he has a breadth of experience in many of the SDK languages.
+I'm especially keen to point out the work he's been doing on the website!
+
+Being a committer enables easier contribution to the
+project since there is no need to go via the patch
+submission process. This should enable better productivity.
+
+It's great to have you as part of the team, Martin!
diff --git a/doc/content/en/blog/news/new-committer-oscar-westra-van-holthe-kind.md b/doc/content/en/blog/news/new-committer-oscar-westra-van-holthe-kind.md
new file mode 100755
index 00000000000..535a2d88185
--- /dev/null
+++ b/doc/content/en/blog/news/new-committer-oscar-westra-van-holthe-kind.md
@@ -0,0 +1,41 @@
+---
+title: "New committer: Oscar Westra van Holthe - Kind"
+linkTitle: "New committer: Oscar Westra van Holthe - Kind"
+date: 2023-08-09
+---
+
+
+
+The Project Management Committee (PMC) for Apache Avro has invited Oscar
+Westra van Holthe - Kind to become a committer and we are pleased to announce that
+he has accepted.
+
+Oscar has done some really solid work on the IDL and JavaCC parts of the Java
+SDK. We trust his work and think it's exceptionally high quality. From the
+start, he has already doing much of the work of a committer, demonstrated by
+his continuous presence in commenting JIRA, reviewing PRs as well as
+encouraging and insightful words on the mailing list.
+
+As a bonus, in his spare time, Oscar also maintains the IntelliJ plugin for
+[IDL support](https://plugins.jetbrains.com/plugin/15728-apache-avro-idl-schema-support)!
+
+Thanks for all your hard work, and welcome!
diff --git a/doc/content/en/blog/news/new-committer-zoltan-csizmadia.md b/doc/content/en/blog/news/new-committer-zoltan-csizmadia.md
new file mode 100755
index 00000000000..42834c551c5
--- /dev/null
+++ b/doc/content/en/blog/news/new-committer-zoltan-csizmadia.md
@@ -0,0 +1,47 @@
+---
+title: "New committer: Zoltan Csizmadia"
+linkTitle: "New committer: Zoltan Csizmadia"
+date: 2022-03-29
+---
+
+
+
+The Project Management Committee (PMC) for Apache Avro has invited
+Zoltan Csizmadia to become a committer and we are pleased to announce
+that he has accepted.
+
+Zoltan has been present in the C# SDK for over two years and has
+really increased his activity in maintaining this language in the last
+few months. He knows the technology, but more importantly, he is
+patient and works well with those of us who rely on the expertise of
+others. Recently, he has been engaging with other contributors to
+increase the maintainability and quality of the dotnet code, and we
+have confidence in his decisions to balance stability of the
+established code and the expectations of modern C# developers.
+
+Being a committer enables easier contribution to the project since
+there is no need to go via the patch submission process. This should
+enable better productivity.
+
+Please join me in congratulating Zoltan on his recognition of great
+work thus far in our community.
+
diff --git a/doc/content/en/blog/news/new-pmc-martin-grigorov.md b/doc/content/en/blog/news/new-pmc-martin-grigorov.md
new file mode 100755
index 00000000000..659bdb30d42
--- /dev/null
+++ b/doc/content/en/blog/news/new-pmc-martin-grigorov.md
@@ -0,0 +1,30 @@
+---
+title: "New PMC member: Martin Grigorov"
+linkTitle: "New PMC member: Martin Grigorov"
+date: 2022-09-13
+---
+
+
+
+The Project Management Committee (PMC) for Apache Avro is pleased to announce that Martin Grigorov has accepted our invitation to become a PMC member. He has has been active, reliable and responsive to the community and a solid contributor to various SDKs, bringing well-thought out reviews and comments to both old and new PRs and JIRA. He definitely stepped up for the website refactoring and preparing for the 1.11.1 release!
+
+Please join me in welcoming Martin to the Avro PMC!
diff --git a/doc/content/en/blog/news/new-pmc-michael-a-smith.md b/doc/content/en/blog/news/new-pmc-michael-a-smith.md
new file mode 100755
index 00000000000..2d203128eca
--- /dev/null
+++ b/doc/content/en/blog/news/new-pmc-michael-a-smith.md
@@ -0,0 +1,34 @@
+---
+title: "New PMC member: Michael A. Smith"
+linkTitle: "New PMC member: Michael A. Smith"
+date: 2023-08-09
+---
+
+
+
+The Project Management Committee (PMC) for Apache Avro has invited Michael A.
+Smith to the PMC and we are pleased to announce that he has accepted.
+
+Notably, Michael has taken a leadership role in ensuring the quality of the
+Python SDK, lending his expertise to ensure that Avro has a place in the
+python community, while keeping our implementation up-to-date with standards
+and modern versions. It's not an easy task, and we appreciate all he does!
diff --git a/doc/content/en/blog/news/new-project-logo.md b/doc/content/en/blog/news/new-project-logo.md
new file mode 100644
index 00000000000..24f886912da
--- /dev/null
+++ b/doc/content/en/blog/news/new-project-logo.md
@@ -0,0 +1,50 @@
+---
+title: "New Project Logo"
+linkTitle: "New Project Logo"
+date: 2023-11-21
+---
+
+
+
+The Apache Avro project has a new project logo!
+
+The old logo was derived from the logo of a (now defunct) aircraft manufacturer
+in Great Britain. This posed a risk, as the Apache foundation would not contest
+legal action (even if extremely unlikely).
+
+But thanks to Emma Kellam, we now have a new logo! She has made several logo
+designs, and after some debate and several votes (it was a close call!), we can
+announce the new logo:
+
+[//]: # (the logo scales to 100% high or all available width, so limit it)
+
+{{< project_logo >}}
+
+
+The new logo is an homage to the previous logo, which is also triangular and
+uses blue colours. The paper airplane embodies keywords like 'fast', 'small'
+and 'efficient'. The blobby tail left behind by the airplane makes the icon
+unique and embodies 'flow' and 'transformation'.
+
+All in all a very nice logo for Apache Avro, which embodies the same keywords.
+
+Thanks for all your hard work Emma, and welcome!
diff --git a/doc/content/en/blog/releases/_index.md b/doc/content/en/blog/releases/_index.md
new file mode 100644
index 00000000000..55875726bd8
--- /dev/null
+++ b/doc/content/en/blog/releases/_index.md
@@ -0,0 +1,28 @@
+---
+title: "Releases"
+linkTitle: "Releases"
+weight: 20
+aliases:
+- /releases.html
+---
+
+
diff --git a/doc/content/en/blog/releases/avro-1.0.0-released.md b/doc/content/en/blog/releases/avro-1.0.0-released.md
new file mode 100755
index 00000000000..1bf74fbbb0f
--- /dev/null
+++ b/doc/content/en/blog/releases/avro-1.0.0-released.md
@@ -0,0 +1,28 @@
+---
+title: "Avro 1.0.0"
+linkTitle: "Avro 1.0.0"
+date: 2010-07-15
+---
+
+
+
+The first release of Avro is now available. To download, it, use the "Download" link below.
diff --git a/doc/content/en/blog/releases/avro-1.1.0-released.md b/doc/content/en/blog/releases/avro-1.1.0-released.md
new file mode 100755
index 00000000000..f8876219ced
--- /dev/null
+++ b/doc/content/en/blog/releases/avro-1.1.0-released.md
@@ -0,0 +1,28 @@
+---
+title: "Avro 1.1.0"
+linkTitle: "Avro 1.1.0"
+date: 2009-09-15
+---
+
+
+
+Apache Avro 1.1.0 is now available!
diff --git a/doc/content/en/blog/releases/avro-1.10.0-released.md b/doc/content/en/blog/releases/avro-1.10.0-released.md
new file mode 100755
index 00000000000..d7a41f5e39d
--- /dev/null
+++ b/doc/content/en/blog/releases/avro-1.10.0-released.md
@@ -0,0 +1,28 @@
+---
+title: "Avro 1.10.0"
+linkTitle: "Avro 1.10.0"
+date: 2020-06-29
+---
+
+
+
+Apache Avro 1.10.0 has been released!
diff --git a/doc/content/en/blog/releases/avro-1.10.1-released.md b/doc/content/en/blog/releases/avro-1.10.1-released.md
new file mode 100755
index 00000000000..b8e6b199bd2
--- /dev/null
+++ b/doc/content/en/blog/releases/avro-1.10.1-released.md
@@ -0,0 +1,28 @@
+---
+title: "Avro 1.10.1"
+linkTitle: "Avro 1.10.1"
+date: 2020-12-03
+---
+
+
+
+Apache Avro 1.10.1 has been released!
diff --git a/doc/content/en/blog/releases/avro-1.10.2-released.md b/doc/content/en/blog/releases/avro-1.10.2-released.md
new file mode 100755
index 00000000000..ea1a74a7256
--- /dev/null
+++ b/doc/content/en/blog/releases/avro-1.10.2-released.md
@@ -0,0 +1,28 @@
+---
+title: "Avro 1.10.2"
+linkTitle: "Avro 1.10.2"
+date: 2021-03-15
+---
+
+
+
+Apache Avro 1.10.2 has been released!
diff --git a/doc/content/en/blog/releases/avro-1.11.0-released.md b/doc/content/en/blog/releases/avro-1.11.0-released.md
new file mode 100755
index 00000000000..54fd8b7293c
--- /dev/null
+++ b/doc/content/en/blog/releases/avro-1.11.0-released.md
@@ -0,0 +1,70 @@
+---
+title: "Avro 1.11.0"
+linkTitle: "Avro 1.11.0"
+date: 2021-10-31
+---
+
+
+
+The Apache Avro community is pleased to announce the release of Avro 1.11.0!
+
+All signed release artifacts, signatures and verification instructions can
+be found }}">here
+
+This release includes 120 Jira issues, including some interesting features:
+
+* Specification: AVRO-3212 Support documentation tags for FIXED types
+* C#: AVRO-2961 Support dotnet framework 5.0
+* C#: AVRO-3225 Prevent memory errors when deserializing untrusted data
+* C++: AVRO-2923 Logical type corrections
+* Java: AVRO-2863 Support Avro core on android
+* Javascript: AVRO-3131 Drop support for node.js 10
+* Perl: AVRO-3190 Fix error when reading from EOF
+* Python: AVRO-2906 Improved performance validating deep record data
+* Python: AVRO-2914 Drop Python 2 support
+* Python: AVRO-3004 Drop Python 3.5 support
+* Ruby: AVRO-3108 Drop Ruby 2.5 support
+
+For the first time, the 1.11.0 release includes experimental support for
+**Rust**. Work is continuing on this donated SDK, but we have not versioned and
+published official artifacts for this release.
+
+**Python**: The avro package fully supports Python 3. We will no longer publish a
+separate avro-python3 package
+
+And of course upgraded dependencies to latest versions, CVE fixes and more:
+https://issues.apache.org/jira/issues/?jql=project%3DAVRO%20AND%20fixVersion%3D1.11.0
+
+The link to all fixed JIRA issues and a brief summary can be found at:
+https://github.com/apache/avro/releases/tag/release-1.11.0
+
+In addition, language-specific release artifacts are available:
+
+* C#: https://www.nuget.org/packages/Apache.Avro/1.11.0
+* Java: from Maven Central,
+* Javascript: https://www.npmjs.com/package/avro-js/v/1.11.0
+* Perl: https://metacpan.org/release/Avro
+* Python 3: https://pypi.org/project/avro/1.11.0
+* Ruby: https://rubygems.org/gems/avro/versions/1.11.0
+
+Thanks to everyone for contributing!
+
diff --git a/doc/content/en/blog/releases/avro-1.11.1-released.md b/doc/content/en/blog/releases/avro-1.11.1-released.md
new file mode 100755
index 00000000000..d78d172930d
--- /dev/null
+++ b/doc/content/en/blog/releases/avro-1.11.1-released.md
@@ -0,0 +1,130 @@
+---
+title: "Avro 1.11.1"
+linkTitle: "Avro 1.11.1"
+date: 2022-07-31
+---
+
+
+
+The Apache Avro community is pleased to announce the release of Avro 1.11.1!
+
+All signed release artifacts, signatures and verification instructions can
+be found }}">here
+
+## Most interesting
+
+This release includes 256 Jira issues, including some interesting features:
+
+Avro specification
+- [AVRO-3436](https://issues.apache.org/jira/browse/AVRO-3436) Clarify which names are allowed to be qualified with namespaces
+- [AVRO-3370](https://issues.apache.org/jira/browse/AVRO-3370) Inconsistent behaviour on types as invalid names
+- [AVRO-3275](https://issues.apache.org/jira/browse/AVRO-3275) Clarify how fullnames are created, with example
+- [AVRO-3257](https://issues.apache.org/jira/browse/AVRO-3257) IDL: add syntax to create optional fields
+- [AVRO-2019](https://issues.apache.org/jira/browse/AVRO-2019) Improve docs for logical type annotation
+
+C++
+- [AVRO-2722](https://issues.apache.org/jira/browse/AVRO-2722) Use of boost::mt19937 is not thread safe
+
+C#
+- [AVRO-3383](https://issues.apache.org/jira/browse/AVRO-3383) Many completed subtasks for modernizing C# coding style
+- [AVRO-3481](https://issues.apache.org/jira/browse/AVRO-3481) Input and output variable type mismatch
+- [AVRO-3475](https://issues.apache.org/jira/browse/AVRO-3475) Enforce time-millis and time-micros specification
+- [AVRO-3469](https://issues.apache.org/jira/browse/AVRO-3469) Build and test using .NET SDK 7.0
+- [AVRO-3468](https://issues.apache.org/jira/browse/AVRO-3468) Default values for logical types not supported
+- [AVRO-3467](https://issues.apache.org/jira/browse/AVRO-3467) Use oracle-actions to test with Early Access JDKs
+- [AVRO-3453](https://issues.apache.org/jira/browse/AVRO-3453) Avrogen Add Generated Code Attribute
+- [AVRO-3432](https://issues.apache.org/jira/browse/AVRO-3432) Add command line option to skip creation of directories
+- [AVRO-3411](https://issues.apache.org/jira/browse/AVRO-3411) Add Visual Studio Code Devcontainer support
+- [AVRO-3388](https://issues.apache.org/jira/browse/AVRO-3388) Implement extra codecs for C# as seperate nuget packages
+- [AVRO-3265](https://issues.apache.org/jira/browse/AVRO-3265) avrogen generates uncompilable code when namespace ends
+with ".Avro"
+- [AVRO-3219](https://issues.apache.org/jira/browse/AVRO-3219) Support nullable enum type fields
+
+Java
+- [AVRO-3531](https://issues.apache.org/jira/browse/AVRO-3531) GenericDatumReader in multithread lead to infinite loop
+- [AVRO-3482](https://issues.apache.org/jira/browse/AVRO-3482) Reuse MAGIC in DataFileReader
+- [AVRO-3586](https://issues.apache.org/jira/browse/AVRO-3586) Make Avro Build Reproducible
+- [AVRO-3441](https://issues.apache.org/jira/browse/AVRO-3441) Automatically register LogicalTypeFactory classes
+- [AVRO-3375](https://issues.apache.org/jira/browse/AVRO-3375) Add union branch, array index and map key "path"
+information to serialization errors
+- [AVRO-3374](https://issues.apache.org/jira/browse/AVRO-3374) Fully qualified type reference "ns.int" loses namespace
+- [AVRO-3294](https://issues.apache.org/jira/browse/AVRO-3294) IDL parsing allows doc comments in strange places
+- [AVRO-3273](https://issues.apache.org/jira/browse/AVRO-3273) avro-maven-plugin breaks on old versions of Maven
+- [AVRO-3266](https://issues.apache.org/jira/browse/AVRO-3266) Output stream incompatible with MagicS3GuardCommitter
+- [AVRO-3243](https://issues.apache.org/jira/browse/AVRO-3243) Lock conflicts when using computeIfAbsent
+- [AVRO-3120](https://issues.apache.org/jira/browse/AVRO-3120) Support Next Java LTS (Java 17)
+- [AVRO-2498](https://issues.apache.org/jira/browse/AVRO-2498) UUID generation is not working
+
+Javascript
+- [AVRO-3489](https://issues.apache.org/jira/browse/AVRO-3489) Replace istanbul with nyc for code coverage
+- [AVRO-3322](https://issues.apache.org/jira/browse/AVRO-3322) Buffer is not defined in browser environment
+- [AVRO-3084](https://issues.apache.org/jira/browse/AVRO-3084) Fix JavaScript interop test to work with other languages on CI
+
+Perl
+- [AVRO-3263](https://issues.apache.org/jira/browse/AVRO-3263) Schema validation warning on invalid schema with a long field
+
+Python
+- [AVRO-3542](https://issues.apache.org/jira/browse/AVRO-3542) Scale assignment optimization
+- [AVRO-3521](https://issues.apache.org/jira/browse/AVRO-3521) "Scale" property from decimal object
+- [AVRO-3380](https://issues.apache.org/jira/browse/AVRO-3380) Byte reading in avro.io does not assert bytes read
+- [AVRO-3229](https://issues.apache.org/jira/browse/AVRO-3229) validate the default value of an enum field
+- [AVRO-3218](https://issues.apache.org/jira/browse/AVRO-3218) Pass LogicalType to BytesDecimalSchema
+
+Ruby
+- [AVRO-3277](https://issues.apache.org/jira/browse/AVRO-3277) Test against Ruby 3.1
+
+Rust
+- [AVRO-3558](https://issues.apache.org/jira/browse/AVRO-3558) Add a demo crate that shows usage as WebAssembly
+- [AVRO-3526](https://issues.apache.org/jira/browse/AVRO-3526) Improve resolving Bytes and Fixed from string
+- [AVRO-3506](https://issues.apache.org/jira/browse/AVRO-3506) Implement Single Object Writer
+- [AVRO-3507](https://issues.apache.org/jira/browse/AVRO-3507) Implement Single Object Reader
+- [AVRO-3405](https://issues.apache.org/jira/browse/AVRO-3405) Add API for user-provided metadata to file
+- [AVRO-3339](https://issues.apache.org/jira/browse/AVRO-3339) Rename crate from avro-rs to apache-avro
+- [AVRO-3479](https://issues.apache.org/jira/browse/AVRO-3479) Derive Avro Schema macro
+
+Website
+- [AVRO-2175](https://issues.apache.org/jira/browse/AVRO-2175) Website refactor
+- [AVRO-3450](https://issues.apache.org/jira/browse/AVRO-3450) Document IDL support in IDEs
+
+
+## Rust
+
+This is the first release that provides the `apache-avro` crate at [crates.io](https://crates.io/crates/apache-avro)!
+
+## JIRA
+
+A list of all JIRA tickets fixed in 1.11.1 could be found [here](https://issues.apache.org/jira/issues/?jql=project%3DAVRO%20AND%20fixVersion%3D1.11.1)
+
+## Language repositories
+
+In addition, language-specific release artifacts are available:
+
+* C#: https://www.nuget.org/packages/Apache.Avro/1.11.1
+* Java: https://repo1.maven.org/maven2/org/apache/avro/avro/1.11.1/
+* Javascript: https://www.npmjs.com/package/avro-js/v/1.11.1
+* Perl: https://metacpan.org/release/Avro
+* Python 3: https://pypi.org/project/avro/1.11.1
+* Ruby: https://rubygems.org/gems/avro/versions/1.11.1
+* Rust: https://crates.io/crates/apache-avro/0.14.0
+
+Thanks to everyone for contributing!
+
diff --git a/doc/content/en/blog/releases/avro-1.11.2-released.md b/doc/content/en/blog/releases/avro-1.11.2-released.md
new file mode 100755
index 00000000000..3949d5f52ed
--- /dev/null
+++ b/doc/content/en/blog/releases/avro-1.11.2-released.md
@@ -0,0 +1,98 @@
+---
+title: "Avro 1.11.2"
+linkTitle: "Avro 1.11.2"
+date: 2023-07-03
+---
+
+
+
+The Apache Avro community is pleased to announce the release of Avro 1.11.2!
+
+All signed release artifacts, signatures and verification instructions can
+be found }}">here
+
+This release addresses 89 [Avro JIRA](https://issues.apache.org/jira/issues/?jql=project%3DAVRO%20AND%20fixVersion%3D1.11.2).
+
+## Highlights
+
+C#
+- [AVRO-3434](https://issues.apache.org/jira/browse/AVRO-3434): Support logical schemas in reflect reader and writer
+- [AVRO-3670](https://issues.apache.org/jira/browse/AVRO-3670): Add NET 7.0 support
+- [AVRO-3724](https://issues.apache.org/jira/browse/AVRO-3724): Fix C# JsonEncoder for nested array of records
+- [AVRO-3756](https://issues.apache.org/jira/browse/AVRO-3756): Add a method to return types instead of writing them to disk
+
+C++
+- [AVRO-3601](https://issues.apache.org/jira/browse/AVRO-3601): C++ API header contains breaking include
+- [AVRO-3705](https://issues.apache.org/jira/browse/AVRO-3705): C++17 support
+
+Java
+- [AVRO-2943](https://issues.apache.org/jira/browse/AVRO-2943): Add new GenericData String/Utf8 ARRAY comparison test
+- [AVRO-2943](https://issues.apache.org/jira/browse/AVRO-2943): improve GenericRecord MAP type comparison
+- [AVRO-3473](https://issues.apache.org/jira/browse/AVRO-3473): Use ServiceLoader to discover Conversion
+- [AVRO-3536](https://issues.apache.org/jira/browse/AVRO-3536): Inherit conversions for Union type
+- [AVRO-3597](https://issues.apache.org/jira/browse/AVRO-3597): Allow custom readers to override string creation
+- [AVRO-3560](https://issues.apache.org/jira/browse/AVRO-3560): Throw SchemaParseException on dangling content beyond end of schema
+- [AVRO-3602](https://issues.apache.org/jira/browse/AVRO-3602): Support Map(with non-String keys) and Set in ReflectDatumReader
+- [AVRO-3676](https://issues.apache.org/jira/browse/AVRO-3676): Produce valid toString() for UUID JSON
+- [AVRO-3698](https://issues.apache.org/jira/browse/AVRO-3698): SpecificData.getClassName must replace reserved words
+- [AVRO-3700](https://issues.apache.org/jira/browse/AVRO-3700): Publish Java SBOM artifacts with CycloneDX
+- [AVRO-3783](https://issues.apache.org/jira/browse/AVRO-3783): Read LONG length for bytes, only allow INT sizes
+- [AVRO-3706](https://issues.apache.org/jira/browse/AVRO-3706): accept space in folder name
+
+Python
+- [AVRO-3761](https://issues.apache.org/jira/browse/AVRO-3761): Fix broken validation of nullable UUID field
+- [AVRO-3229](https://issues.apache.org/jira/browse/AVRO-3229): Raise on invalid enum default only if validation enabled
+- [AVRO-3622](https://issues.apache.org/jira/browse/AVRO-3622): Fix compatibility check for schemas having or missing namespace
+- [AVRO-3669](https://issues.apache.org/jira/browse/AVRO-3669): Add py.typed marker file (PEP561 compliance)
+- [AVRO-3672](https://issues.apache.org/jira/browse/AVRO-3672): Add CI testing for Python 3.11
+- [AVRO-3680](https://issues.apache.org/jira/browse/AVRO-3680): allow to disable name validation
+
+Ruby
+- [AVRO-3775](https://issues.apache.org/jira/browse/AVRO-3775): Fix decoded default value of logical type
+- [AVRO-3697](https://issues.apache.org/jira/browse/AVRO-3697): Test against Ruby 3.2
+- [AVRO-3722](https://issues.apache.org/jira/browse/AVRO-3722): Eagerly initialize instance variables for better inline cache hits
+
+Rust
+- Many, many bug fixes and implementation progress in this experimental SDK.
+- Rust CI builds and lints are passing, and has been released to crates.io as version 0.15.0
+
+In addition:
+- Upgrade dependencies to latest versions, including CVE fixes.
+- Testing and build improvements.
+- Performance fixes, other bug fixes, better documentation and more...
+
+
+Known issues
+- [AVRO-3789](https://issues.apache.org/jira/browse/AVRO-3789) Java: Problem when comparing empty MAP types.
+
+## Language SDK / Convenience artifacts
+
+* C#: https://www.nuget.org/packages/Apache.Avro/1.11.2
+* Java: https://repo1.maven.org/maven2/org/apache/avro/avro/1.11.2/
+* Javascript: https://www.npmjs.com/package/avro-js/v/1.11.2
+* Perl: https://metacpan.org/release/Avro
+* Python 3: https://pypi.org/project/avro/1.11.2
+* Ruby: https://rubygems.org/gems/avro/versions/1.11.2
+* Rust: https://crates.io/crates/apache-avro/0.15.0
+
+Thanks to everyone for contributing!
+
diff --git a/doc/content/en/blog/releases/avro-1.11.3-released.md b/doc/content/en/blog/releases/avro-1.11.3-released.md
new file mode 100755
index 00000000000..50a0eef3fcf
--- /dev/null
+++ b/doc/content/en/blog/releases/avro-1.11.3-released.md
@@ -0,0 +1,79 @@
+---
+title: "Avro 1.11.3"
+linkTitle: "Avro 1.11.3"
+date: 2023-09-22
+---
+
+
+
+The Apache Avro community is pleased to announce the release of Avro 1.11.3!
+
+All signed release artifacts, signatures and verification instructions can
+be found }}">here
+
+This release [addresses 39 Jira issues](https://issues.apache.org/jira/issues/?jql=project%3DAVRO%20AND%20fixVersion%3D1.11.3).
+
+## Highlights
+
+Java
+- [AVRO-3789](https://issues.apache.org/jira/browse/AVRO-3789): Comparing maps in GenericData is wrong for certain combinations and fails for empty maps
+- [AVRO-3713](https://issues.apache.org/jira/browse/AVRO-3713): Thread scalability problem with the use of SynchronizedMap
+- [AVRO-3486](https://issues.apache.org/jira/browse/AVRO-3486): Protocol namespace not parsed correctly if protocol is defined by full name
+- [AVRO-2771](https://issues.apache.org/jira/browse/AVRO-2771): Allow having Error in a Record
+- [AVRO-3819](https://issues.apache.org/jira/browse/AVRO-3819): Rationalize the system properties that limit allocation
+
+Python
+- [AVRO-3819](https://issues.apache.org/jira/browse/AVRO-3819): Rationalize the system properties that limit allocation
+- [AVRO-312](https://issues.apache.org/jira/browse/AVRO-312): Generate documentation for Python with Sphinx
+
+Rust
+- [AVRO-3853](https://issues.apache.org/jira/browse/AVRO-3853): Support local-timestamp logical types for the Rust SDK
+- [AVRO-3851](https://issues.apache.org/jira/browse/AVRO-3851): Validate default value for record fields and enums on parsing
+- [AVRO-3847](https://issues.apache.org/jira/browse/AVRO-3847): Record field doesn't accept default value if field type is union and the type of default value is pre-defined name
+- [AVRO-3846](https://issues.apache.org/jira/browse/AVRO-3846): Race condition can happen among serde tests
+- [AVRO-3838](https://issues.apache.org/jira/browse/AVRO-3838): Replace regex crate with regex-lite
+- [AVRO-3837](https://issues.apache.org/jira/browse/AVRO-3837): Disallow invalid namespaces for the Rust binding
+- [AVRO-3835](https://issues.apache.org/jira/browse/AVRO-3835): Get rid of byteorder and zerocopy dependencies
+- [AVRO-3830](https://issues.apache.org/jira/browse/AVRO-3830): Handle namespace properly if a name starts with dot
+- [AVRO-3827](https://issues.apache.org/jira/browse/AVRO-3827): Disallow duplicate field names
+- [AVRO-3787](https://issues.apache.org/jira/browse/AVRO-3787): Deserialization fails to use default if an enum in a record in a union is given an unknown symbol
+- [AVRO-3786](https://issues.apache.org/jira/browse/AVRO-3786): Deserialization results in FindUnionVariant error if the writer and reader have the same symbol but at different positions
+-
+
+In addition:
+- Upgrade dependencies to latest versions, including CVE fixes.
+- Testing and build improvements.
+- Performance fixes, other bug fixes, better documentation and more.
+
+Known issues: â
+
+## Language SDK / Convenience artifacts
+
+* C#: https://www.nuget.org/packages/Apache.Avro/1.11.3
+* Java: https://repo1.maven.org/maven2/org/apache/avro/avro/1.11.3/
+* Javascript: https://www.npmjs.com/package/avro-js/v/1.11.3
+* Perl: https://metacpan.org/release/Avro
+* Python 3: https://pypi.org/project/avro/1.11.3
+* Ruby: https://rubygems.org/gems/avro/versions/1.11.3
+* Rust: https://crates.io/crates/apache-avro/0.16.0
+
+Thanks to everyone for contributing!
diff --git a/doc/content/en/blog/releases/avro-1.2.0-released.md b/doc/content/en/blog/releases/avro-1.2.0-released.md
new file mode 100755
index 00000000000..24fc57ad92e
--- /dev/null
+++ b/doc/content/en/blog/releases/avro-1.2.0-released.md
@@ -0,0 +1,28 @@
+---
+title: "Avro 1.2.0"
+linkTitle: "Avro 1.2.0"
+date: 2009-10-15
+---
+
+
+
+Apache Avro 1.2.0 is now available!
diff --git a/doc/content/en/blog/releases/avro-1.3.0-released.md b/doc/content/en/blog/releases/avro-1.3.0-released.md
new file mode 100755
index 00000000000..dc29d337dc5
--- /dev/null
+++ b/doc/content/en/blog/releases/avro-1.3.0-released.md
@@ -0,0 +1,28 @@
+---
+title: "Avro 1.3.0"
+linkTitle: "Avro 1.3.0"
+date: 2010-02-26
+---
+
+
+
+Apache Avro 1.3.0 has been released!
diff --git a/doc/content/en/blog/releases/avro-1.3.1-released.md b/doc/content/en/blog/releases/avro-1.3.1-released.md
new file mode 100755
index 00000000000..f767d50dd49
--- /dev/null
+++ b/doc/content/en/blog/releases/avro-1.3.1-released.md
@@ -0,0 +1,28 @@
+---
+title: "Avro 1.3.1"
+linkTitle: "Avro 1.3.1"
+date: 2010-03-19
+---
+
+
+
+Apache Avro 1.3.1 has been released!
diff --git a/doc/content/en/blog/releases/avro-1.3.2-released.md b/doc/content/en/blog/releases/avro-1.3.2-released.md
new file mode 100755
index 00000000000..3a0492df121
--- /dev/null
+++ b/doc/content/en/blog/releases/avro-1.3.2-released.md
@@ -0,0 +1,28 @@
+---
+title: "Avro 1.3.2"
+linkTitle: "Avro 1.3.2"
+date: 2010-03-31
+---
+
+
+
+Apache Avro 1.3.2 has been released!
diff --git a/doc/content/en/blog/releases/avro-1.3.3-released.md b/doc/content/en/blog/releases/avro-1.3.3-released.md
new file mode 100755
index 00000000000..15eeabd4ecf
--- /dev/null
+++ b/doc/content/en/blog/releases/avro-1.3.3-released.md
@@ -0,0 +1,28 @@
+---
+title: "Avro 1.3.3"
+linkTitle: "Avro 1.3.3"
+date: 2010-07-07
+---
+
+
+
+Apache Avro 1.3.3 has been released!
diff --git a/doc/content/en/blog/releases/avro-1.4.0-released.md b/doc/content/en/blog/releases/avro-1.4.0-released.md
new file mode 100755
index 00000000000..ca9df708ba5
--- /dev/null
+++ b/doc/content/en/blog/releases/avro-1.4.0-released.md
@@ -0,0 +1,28 @@
+---
+title: "Avro 1.4.0"
+linkTitle: "Avro 1.4.0"
+date: 2010-09-08
+---
+
+
+
+Apache Avro 1.4.0 has been released!
diff --git a/doc/content/en/blog/releases/avro-1.4.1-released.md b/doc/content/en/blog/releases/avro-1.4.1-released.md
new file mode 100755
index 00000000000..b2ef0836f84
--- /dev/null
+++ b/doc/content/en/blog/releases/avro-1.4.1-released.md
@@ -0,0 +1,28 @@
+---
+title: "Avro 1.4.1"
+linkTitle: "Avro 1.4.1"
+date: 2010-10-13
+---
+
+
+
+Apache Avro 1.4.1 has been released!
diff --git a/doc/content/en/blog/releases/avro-1.5.0-released.md b/doc/content/en/blog/releases/avro-1.5.0-released.md
new file mode 100755
index 00000000000..94daaccc724
--- /dev/null
+++ b/doc/content/en/blog/releases/avro-1.5.0-released.md
@@ -0,0 +1,28 @@
+---
+title: "Avro 1.5.0"
+linkTitle: "Avro 1.5.0"
+date: 2011-03-11
+---
+
+
+
+Apache Avro 1.5.0 has been released!
diff --git a/doc/content/en/blog/releases/avro-1.5.1-released.md b/doc/content/en/blog/releases/avro-1.5.1-released.md
new file mode 100755
index 00000000000..24354286ad1
--- /dev/null
+++ b/doc/content/en/blog/releases/avro-1.5.1-released.md
@@ -0,0 +1,28 @@
+---
+title: "Avro 1.5.1"
+linkTitle: "Avro 1.5.1"
+date: 2011-05-06
+---
+
+
+
+Apache Avro 1.5.1 has been released!
diff --git a/doc/content/en/blog/releases/avro-1.5.2-released.md b/doc/content/en/blog/releases/avro-1.5.2-released.md
new file mode 100755
index 00000000000..25fb5b7549c
--- /dev/null
+++ b/doc/content/en/blog/releases/avro-1.5.2-released.md
@@ -0,0 +1,28 @@
+---
+title: "Avro 1.5.2"
+linkTitle: "Avro 1.5.2"
+date: 2011-08-12
+---
+
+
+
+Apache Avro 1.5.2 has been released!
diff --git a/doc/content/en/blog/releases/avro-1.5.3-released.md b/doc/content/en/blog/releases/avro-1.5.3-released.md
new file mode 100755
index 00000000000..5be5225a925
--- /dev/null
+++ b/doc/content/en/blog/releases/avro-1.5.3-released.md
@@ -0,0 +1,28 @@
+---
+title: "Avro 1.5.3"
+linkTitle: "Avro 1.5.3"
+date: 2011-08-29
+---
+
+
+
+Apache Avro 1.5.3 has been released!
diff --git a/doc/content/en/blog/releases/avro-1.5.4-released.md b/doc/content/en/blog/releases/avro-1.5.4-released.md
new file mode 100755
index 00000000000..8eeab4f4b54
--- /dev/null
+++ b/doc/content/en/blog/releases/avro-1.5.4-released.md
@@ -0,0 +1,28 @@
+---
+title: "Avro 1.5.4"
+linkTitle: "Avro 1.5.4"
+date: 2011-09-12
+---
+
+
+
+Apache Avro 1.5.4 has been released!
diff --git a/doc/content/en/blog/releases/avro-1.6.0-released.md b/doc/content/en/blog/releases/avro-1.6.0-released.md
new file mode 100755
index 00000000000..e131f4534f1
--- /dev/null
+++ b/doc/content/en/blog/releases/avro-1.6.0-released.md
@@ -0,0 +1,28 @@
+---
+title: "Avro 1.6.0"
+linkTitle: "Avro 1.6.0"
+date: 2011-11-02
+---
+
+
+
+Apache Avro 1.6.0 has been released!
diff --git a/doc/content/en/blog/releases/avro-1.6.1-released.md b/doc/content/en/blog/releases/avro-1.6.1-released.md
new file mode 100755
index 00000000000..724b9b04003
--- /dev/null
+++ b/doc/content/en/blog/releases/avro-1.6.1-released.md
@@ -0,0 +1,28 @@
+---
+title: "Avro 1.6.1"
+linkTitle: "Avro 1.6.1"
+date: 2011-11-14
+---
+
+
+
+Apache Avro 1.6.1 has been released!
diff --git a/doc/content/en/blog/releases/avro-1.6.2-released.md b/doc/content/en/blog/releases/avro-1.6.2-released.md
new file mode 100755
index 00000000000..15fae250b4e
--- /dev/null
+++ b/doc/content/en/blog/releases/avro-1.6.2-released.md
@@ -0,0 +1,28 @@
+---
+title: "Avro 1.6.2"
+linkTitle: "Avro 1.6.2"
+date: 2012-02-14
+---
+
+
+
+Apache Avro 1.6.2 has been released!
diff --git a/doc/content/en/blog/releases/avro-1.6.3-released.md b/doc/content/en/blog/releases/avro-1.6.3-released.md
new file mode 100755
index 00000000000..3029f9e4739
--- /dev/null
+++ b/doc/content/en/blog/releases/avro-1.6.3-released.md
@@ -0,0 +1,28 @@
+---
+title: "Avro 1.6.3"
+linkTitle: "Avro 1.6.3"
+date: 2012-03-19
+---
+
+
+
+Apache Avro 1.6.3 has been released!
diff --git a/doc/content/en/blog/releases/avro-1.7.0-released.md b/doc/content/en/blog/releases/avro-1.7.0-released.md
new file mode 100755
index 00000000000..02e5c15156a
--- /dev/null
+++ b/doc/content/en/blog/releases/avro-1.7.0-released.md
@@ -0,0 +1,28 @@
+---
+title: "Avro 1.7.0"
+linkTitle: "Avro 1.7.0"
+date: 2012-06-11
+---
+
+
+
+Apache Avro 1.7.0 has been released!
diff --git a/doc/content/en/blog/releases/avro-1.7.1-released.md b/doc/content/en/blog/releases/avro-1.7.1-released.md
new file mode 100755
index 00000000000..6ef9278dae3
--- /dev/null
+++ b/doc/content/en/blog/releases/avro-1.7.1-released.md
@@ -0,0 +1,28 @@
+---
+title: "Avro 1.7.1"
+linkTitle: "Avro 1.7.1"
+date: 2012-07-18
+---
+
+
+
+Apache Avro 1.7.1 has been released!
diff --git a/doc/content/en/blog/releases/avro-1.7.2-released.md b/doc/content/en/blog/releases/avro-1.7.2-released.md
new file mode 100755
index 00000000000..94e5719ed0e
--- /dev/null
+++ b/doc/content/en/blog/releases/avro-1.7.2-released.md
@@ -0,0 +1,28 @@
+---
+title: "Avro 1.7.2"
+linkTitle: "Avro 1.7.2"
+date: 2012-09-25
+---
+
+
+
+Apache Avro 1.7.2 has been released!
diff --git a/doc/content/en/blog/releases/avro-1.7.3-released.md b/doc/content/en/blog/releases/avro-1.7.3-released.md
new file mode 100755
index 00000000000..63ff58392f8
--- /dev/null
+++ b/doc/content/en/blog/releases/avro-1.7.3-released.md
@@ -0,0 +1,28 @@
+---
+title: "Avro 1.7.3"
+linkTitle: "Avro 1.7.3"
+date: 2012-12-07
+---
+
+
+
+Apache Avro 1.7.3 has been released!
diff --git a/doc/content/en/blog/releases/avro-1.7.4-released.md b/doc/content/en/blog/releases/avro-1.7.4-released.md
new file mode 100755
index 00000000000..a91a8ebc18b
--- /dev/null
+++ b/doc/content/en/blog/releases/avro-1.7.4-released.md
@@ -0,0 +1,28 @@
+---
+title: "Avro 1.7.4"
+linkTitle: "Avro 1.7.4"
+date: 2013-02-26
+---
+
+
+
+Apache Avro 1.7.4 has been released!
diff --git a/doc/content/en/blog/releases/avro-1.7.5-released.md b/doc/content/en/blog/releases/avro-1.7.5-released.md
new file mode 100755
index 00000000000..44288ccf66e
--- /dev/null
+++ b/doc/content/en/blog/releases/avro-1.7.5-released.md
@@ -0,0 +1,28 @@
+---
+title: "Avro 1.7.5"
+linkTitle: "Avro 1.7.5"
+date: 2013-08-19
+---
+
+
+
+Apache Avro 1.7.5 has been released!
diff --git a/doc/content/en/blog/releases/avro-1.7.6-released.md b/doc/content/en/blog/releases/avro-1.7.6-released.md
new file mode 100755
index 00000000000..fe93cd5c2ff
--- /dev/null
+++ b/doc/content/en/blog/releases/avro-1.7.6-released.md
@@ -0,0 +1,28 @@
+---
+title: "Avro 1.7.6"
+linkTitle: "Avro 1.7.6"
+date: 2014-01-22
+---
+
+
+
+Apache Avro 1.7.6 has been released!
diff --git a/doc/content/en/blog/releases/avro-1.7.7-released.md b/doc/content/en/blog/releases/avro-1.7.7-released.md
new file mode 100755
index 00000000000..07a378ec000
--- /dev/null
+++ b/doc/content/en/blog/releases/avro-1.7.7-released.md
@@ -0,0 +1,28 @@
+---
+title: "Avro 1.7.7"
+linkTitle: "Avro 1.7.7"
+date: 2014-07-23
+---
+
+
+
+Apache Avro 1.7.7 has been released!
diff --git a/doc/content/en/blog/releases/avro-1.8.0-released.md b/doc/content/en/blog/releases/avro-1.8.0-released.md
new file mode 100755
index 00000000000..9ca4a129d1f
--- /dev/null
+++ b/doc/content/en/blog/releases/avro-1.8.0-released.md
@@ -0,0 +1,28 @@
+---
+title: "Avro 1.8.0"
+linkTitle: "Avro 1.8.0"
+date: 2016-01-29
+---
+
+
+
+Apache Avro 1.8.0 has been released!
diff --git a/doc/content/en/blog/releases/avro-1.8.1-released.md b/doc/content/en/blog/releases/avro-1.8.1-released.md
new file mode 100755
index 00000000000..ed20e60219d
--- /dev/null
+++ b/doc/content/en/blog/releases/avro-1.8.1-released.md
@@ -0,0 +1,28 @@
+---
+title: "Avro 1.8.1"
+linkTitle: "Avro 1.8.1"
+date: 2016-05-19
+---
+
+
+
+Apache Avro 1.8.1 has been released!
diff --git a/doc/content/en/blog/releases/avro-1.8.2-released.md b/doc/content/en/blog/releases/avro-1.8.2-released.md
new file mode 100755
index 00000000000..07720e05053
--- /dev/null
+++ b/doc/content/en/blog/releases/avro-1.8.2-released.md
@@ -0,0 +1,28 @@
+---
+title: "Avro 1.8.2"
+linkTitle: "Avro 1.8.2"
+date: 2017-05-20
+---
+
+
+
+Apache Avro 1.8.2 has been released!
diff --git a/doc/content/en/blog/releases/avro-1.9.0-released.md b/doc/content/en/blog/releases/avro-1.9.0-released.md
new file mode 100755
index 00000000000..0833216066a
--- /dev/null
+++ b/doc/content/en/blog/releases/avro-1.9.0-released.md
@@ -0,0 +1,28 @@
+---
+title: "Avro 1.9.0"
+linkTitle: "Avro 1.9.0"
+date: 2019-05-14
+---
+
+
+
+Apache Avro 1.9.0 has been released!
diff --git a/doc/content/en/blog/releases/avro-1.9.1-released.md b/doc/content/en/blog/releases/avro-1.9.1-released.md
new file mode 100755
index 00000000000..6ae614e5b3b
--- /dev/null
+++ b/doc/content/en/blog/releases/avro-1.9.1-released.md
@@ -0,0 +1,28 @@
+---
+title: "Avro 1.9.1"
+linkTitle: "Avro 1.9.1"
+date: 2019-09-02
+---
+
+
+
+Apache Avro 1.9.1 has been released!
diff --git a/doc/content/en/blog/releases/avro-1.9.2-released.md b/doc/content/en/blog/releases/avro-1.9.2-released.md
new file mode 100755
index 00000000000..e4580fa42bd
--- /dev/null
+++ b/doc/content/en/blog/releases/avro-1.9.2-released.md
@@ -0,0 +1,28 @@
+---
+title: "Avro 1.9.2"
+linkTitle: "Avro 1.9.2"
+date: 2020-02-19
+---
+
+
+
+Apache Avro 1.9.2 has been released!
diff --git a/doc/content/en/blog/releases/avro-joins-apache.md b/doc/content/en/blog/releases/avro-joins-apache.md
new file mode 100755
index 00000000000..dbc1872644d
--- /dev/null
+++ b/doc/content/en/blog/releases/avro-joins-apache.md
@@ -0,0 +1,28 @@
+---
+title: "Avro joins Apache"
+linkTitle: "Avro joins Apache"
+date: 2009-04-10
+---
+
+
+
+Avro has joined the Apache Software Foundation as a Hadoop subproject.
diff --git a/doc/content/en/community/_index.md b/doc/content/en/community/_index.md
new file mode 100644
index 00000000000..643c532589a
--- /dev/null
+++ b/doc/content/en/community/_index.md
@@ -0,0 +1,35 @@
+---
+title: Community
+menu:
+ main:
+ weight: 40
+aliases:
+- /irc.html
+- /issue_tracking.html
+- /mailing_lists.html
+- /mail/
+- /version_control.html
+---
+
+
+
+
\ No newline at end of file
diff --git a/doc/content/en/docs/++version++/Getting started (Java)/_index.md b/doc/content/en/docs/++version++/Getting started (Java)/_index.md
new file mode 100644
index 00000000000..429e9837641
--- /dev/null
+++ b/doc/content/en/docs/++version++/Getting started (Java)/_index.md
@@ -0,0 +1,289 @@
+---
+categories: []
+tags: ["java"]
+title: "Getting Started (Java)"
+linkTitle: "Getting Started (Java)"
+weight: 2
+---
+
+
+
+This is a short guide for getting started with Apache Avroâĸ using Java. This guide only covers using Avro for data serialization; see Patrick Hunt's [Avro RPC Quick Start](https://github.com/phunt/avro-rpc-quickstart) for a good introduction to using Avro for RPC.
+
+## Download
+
+Avro implementations for C, C++, C#, Java, PHP, Python, and Ruby can be downloaded from the [Apache Avroâĸ Download]({{< relref "/project/download" >}}) page. This guide uses Avro {{< avro_version >}}, the latest version at the time of writing. For the examples in this guide, download avro-{{< avro_version >}}.jar and avro-tools-{{< avro_version >}}.jar.
+
+Alternatively, if you are using Maven, add the following dependency to your POM:
+
+```xml
+
+ org.apache.avro
+ avro
+ {{< avro_version >}}
+
+```
+
+As well as the Avro Maven plugin (for performing code generation):
+
+```xml
+
+ org.apache.avro
+ avro-maven-plugin
+ {{< avro_version >}}
+
+ ${project.basedir}/src/main/avro/
+ ${project.basedir}/src/main/java/
+
+
+
+ generate-sources
+
+ schema
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-compiler-plugin
+
+ 1.8
+ 1.8
+
+
+```
+
+You may also build the required Avro jars from source. Building Avro is beyond the scope of this guide; see the Build Documentation page in the wiki for more information.
+
+## Defining a schema
+
+Avro schemas are defined using JSON or IDL (the latter requires an extra dependency). Schemas are composed of primitive types (null, boolean, int, long, float, double, bytes, and string) and complex types (record, enum, array, map, union, and fixed). You can learn more about Avro schemas and types from the specification, but for now let's start with a simple schema example, user.avsc:
+
+```json
+{"namespace": "example.avro",
+ "type": "record",
+ "name": "User",
+ "fields": [
+ {"name": "name", "type": "string"},
+ {"name": "favorite_number", "type": ["int", "null"]},
+ {"name": "favorite_color", "type": ["string", "null"]}
+ ]
+}
+```
+
+This schema defines a record representing a hypothetical user. (Note that a schema file can only contain a single schema definition.) At minimum, a record definition must include its type ("type": "record"), a name ("name": "User"), and fields, in this case name, favorite_number, and favorite_color. We also define a namespace ("namespace": "example.avro"), which together with the name attribute defines the "full name" of the schema (example.avro.User in this case).
+
+Fields are defined via an array of objects, each of which defines a name and type (other attributes are optional, see the record specification for more details). The type attribute of a field is another schema object, which can be either a primitive or complex type. For example, the name field of our User schema is the primitive type string, whereas the favorite_number and favorite_color fields are both unions, represented by JSON arrays. unions are a complex type that can be any of the types listed in the array; e.g., favorite_number can either be an int or null, essentially making it an optional field.
+
+## Serializing and deserializing with code generation
+
+### Compiling the schema
+Code generation allows us to automatically create classes based on our previously-defined schema. Once we have defined the relevant classes, there is no need to use the schema directly in our programs. We use the avro-tools jar to generate code as follows:
+
+```shell
+java -jar /path/to/avro-tools-{{< avro_version >}}.jar compile schema
+```
+
+This will generate the appropriate source files in a package based on the schema's namespace in the provided destination folder. For instance, to generate a User class in package example.avro from the schema defined above, run
+
+```shell
+java -jar /path/to/avro-tools-{{< avro_version >}}.jar compile schema user.avsc .
+```
+
+Note that if you using the Avro Maven plugin, there is no need to manually invoke the schema compiler; the plugin automatically performs code generation on any .avsc files present in the configured source directory.
+
+### Creating Users
+Now that we've completed the code generation, let's create some Users, serialize them to a data file on disk, and then read back the file and deserialize the User objects.
+
+First let's create some Users and set their fields.
+
+```java
+User user1 = new User();
+user1.setName("Alyssa");
+user1.setFavoriteNumber(256);
+// Leave favorite color null
+
+// Alternate constructor
+User user2 = new User("Ben", 7, "red");
+
+// Construct via builder
+User user3 = User.newBuilder()
+ .setName("Charlie")
+ .setFavoriteColor("blue")
+ .setFavoriteNumber(null)
+ .build();
+```
+
+As shown in this example, Avro objects can be created either by invoking a constructor directly or by using a builder. Unlike constructors, builders will automatically set any default values specified in the schema. Additionally, builders validate the data as it set, whereas objects constructed directly will not cause an error until the object is serialized. However, using constructors directly generally offers better performance, as builders create a copy of the datastructure before it is written.
+
+Note that we do not set user1's favorite color. Since that record is of type ["string", "null"], we can either set it to a string or leave it null; it is essentially optional. Similarly, we set user3's favorite number to null (using a builder requires setting all fields, even if they are null).
+
+### Serializing
+Now let's serialize our Users to disk.
+
+```java
+// Serialize user1, user2 and user3 to disk
+DatumWriter userDatumWriter = new SpecificDatumWriter(User.class);
+DataFileWriter dataFileWriter = new DataFileWriter(userDatumWriter);
+dataFileWriter.create(user1.getSchema(), new File("users.avro"));
+dataFileWriter.append(user1);
+dataFileWriter.append(user2);
+dataFileWriter.append(user3);
+dataFileWriter.close();
+```
+
+We create a DatumWriter, which converts Java objects into an in-memory serialized format. The SpecificDatumWriter class is used with generated classes and extracts the schema from the specified generated type.
+
+Next we create a DataFileWriter, which writes the serialized records, as well as the schema, to the file specified in the dataFileWriter.create call. We write our users to the file via calls to the dataFileWriter.append method. When we are done writing, we close the data file.
+
+### Deserializing
+Finally, let's deserialize the data file we just created.
+
+```java
+// Deserialize Users from disk
+DatumReader userDatumReader = new SpecificDatumReader(User.class);
+DataFileReader dataFileReader = new DataFileReader(file, userDatumReader);
+User user = null;
+while (dataFileReader.hasNext()) {
+// Reuse user object by passing it to next(). This saves us from
+// allocating and garbage collecting many objects for files with
+// many items.
+user = dataFileReader.next(user);
+System.out.println(user);
+}
+```
+
+This snippet will output:
+
+```json
+{"name": "Alyssa", "favorite_number": 256, "favorite_color": null}
+{"name": "Ben", "favorite_number": 7, "favorite_color": "red"}
+{"name": "Charlie", "favorite_number": null, "favorite_color": "blue"}
+```
+
+Deserializing is very similar to serializing. We create a SpecificDatumReader, analogous to the SpecificDatumWriter we used in serialization, which converts in-memory serialized items into instances of our generated class, in this case User. We pass the DatumReader and the previously created File to a DataFileReader, analogous to the DataFileWriter, which reads both the schema used by the writer as well as the data from the file on disk. The data will be read using the writer's schema included in the file and the schema provided by the reader, in this case the User class. The writer's schema is needed to know the order in which fields were written, while the reader's schema is needed to know what fields are expected and how to fill in default values for fields added since the file was written. If there are differences between the two schemas, they are resolved according to the Schema Resolution specification.
+
+Next we use the DataFileReader to iterate through the serialized Users and print the deserialized object to stdout. Note how we perform the iteration: we create a single User object which we store the current deserialized user in, and pass this record object to every call of dataFileReader.next. This is a performance optimization that allows the DataFileReader to reuse the same User object rather than allocating a new User for every iteration, which can be very expensive in terms of object allocation and garbage collection if we deserialize a large data file. While this technique is the standard way to iterate through a data file, it's also possible to use for (User user : dataFileReader) if performance is not a concern.
+
+### Compiling and running the example code
+This example code is included as a Maven project in the examples/java-example directory in the Avro docs. From this directory, execute the following commands to build and run the example:
+
+```shell
+$ mvn compile # includes code generation via Avro Maven plugin
+$ mvn -q exec:java -Dexec.mainClass=example.SpecificMain
+```
+
+### Beta feature: Generating faster code
+In release 1.9.0, we introduced a new approach to generating code that speeds up decoding of objects by more than 10% and encoding by more than 30% (future performance enhancements are underway). To ensure a smooth introduction of this change into production systems, this feature is controlled by a feature flag, the system property org.apache.avro.specific.use_custom_coders. In this first release, this feature is off by default. To turn it on, set the system flag to true at runtime. In the sample above, for example, you could enable the fater coders as follows:
+
+$ mvn -q exec:java -Dexec.mainClass=example.SpecificMain \
+ -Dorg.apache.avro.specific.use_custom_coders=true
+
+Note that you do not have to recompile your Avro schema to have access to this feature. The feature is compiled and built into your code, and you turn it on and off at runtime using the feature flag. As a result, you can turn it on during testing, for example, and then off in production. Or you can turn it on in production, and quickly turn it off if something breaks.
+
+We encourage the Avro community to exercise this new feature early to help build confidence. (For those paying one-demand for compute resources in the cloud, it can lead to meaningful cost savings.) As confidence builds, we will turn this feature on by default, and eventually eliminate the feature flag (and the old code).
+
+## Serializing and deserializing without code generation
+Data in Avro is always stored with its corresponding schema, meaning we can always read a serialized item regardless of whether we know the schema ahead of time. This allows us to perform serialization and deserialization without code generation.
+
+Let's go over the same example as in the previous section, but without using code generation: we'll create some users, serialize them to a data file on disk, and then read back the file and deserialize the users objects.
+
+### Creating users
+First, we use a SchemaParser to read our schema definition and create a Schema object.
+
+```java
+Schema schema = new SchemaParser().parse(new File("user.avsc")).mainSchema();
+```
+
+Using this schema, let's create some users.
+
+```java
+GenericRecord user1 = new GenericData.Record(schema);
+user1.put("name", "Alyssa");
+user1.put("favorite_number", 256);
+// Leave favorite color null
+
+GenericRecord user2 = new GenericData.Record(schema);
+user2.put("name", "Ben");
+user2.put("favorite_number", 7);
+user2.put("favorite_color", "red");
+```
+
+Since we're not using code generation, we use GenericRecords to represent users. GenericRecord uses the schema to verify that we only specify valid fields. If we try to set a non-existent field (e.g., user1.put("favorite_animal", "cat")), we'll get an AvroRuntimeException when we run the program.
+
+Note that we do not set user1's favorite color. Since that record is of type ["string", "null"], we can either set it to a string or leave it null; it is essentially optional.
+
+### Serializing
+Now that we've created our user objects, serializing and deserializing them is almost identical to the example above which uses code generation. The main difference is that we use generic instead of specific readers and writers.
+
+First we'll serialize our users to a data file on disk.
+
+```java
+// Serialize user1 and user2 to disk
+File file = new File("users.avro");
+DatumWriter datumWriter = new GenericDatumWriter(schema);
+DataFileWriter dataFileWriter = new DataFileWriter(datumWriter);
+dataFileWriter.create(schema, file);
+dataFileWriter.append(user1);
+dataFileWriter.append(user2);
+dataFileWriter.close();
+```
+
+We create a DatumWriter, which converts Java objects into an in-memory serialized format. Since we are not using code generation, we create a GenericDatumWriter. It requires the schema both to determine how to write the GenericRecords and to verify that all non-nullable fields are present.
+
+As in the code generation example, we also create a DataFileWriter, which writes the serialized records, as well as the schema, to the file specified in the dataFileWriter.create call. We write our users to the file via calls to the dataFileWriter.append method. When we are done writing, we close the data file.
+
+### Deserializing
+Finally, we'll deserialize the data file we just created.
+
+```java
+// Deserialize users from disk
+DatumReader datumReader = new GenericDatumReader(schema);
+DataFileReader dataFileReader = new DataFileReader(file, datumReader);
+GenericRecord user = null;
+while (dataFileReader.hasNext()) {
+// Reuse user object by passing it to next(). This saves us from
+// allocating and garbage collecting many objects for files with
+// many items.
+user = dataFileReader.next(user);
+System.out.println(user);
+```
+
+This outputs:
+
+```json
+{"name": "Alyssa", "favorite_number": 256, "favorite_color": null}
+{"name": "Ben", "favorite_number": 7, "favorite_color": "red"}
+```
+
+Deserializing is very similar to serializing. We create a GenericDatumReader, analogous to the GenericDatumWriter we used in serialization, which converts in-memory serialized items into GenericRecords. We pass the DatumReader and the previously created File to a DataFileReader, analogous to the DataFileWriter, which reads both the schema used by the writer as well as the data from the file on disk. The data will be read using the writer's schema included in the file, and the reader's schema provided to the GenericDatumReader. The writer's schema is needed to know the order in which fields were written, while the reader's schema is needed to know what fields are expected and how to fill in default values for fields added since the file was written. If there are differences between the two schemas, they are resolved according to the Schema Resolution specification.
+
+Next, we use the DataFileReader to iterate through the serialized users and print the deserialized object to stdout. Note how we perform the iteration: we create a single GenericRecord object which we store the current deserialized user in, and pass this record object to every call of dataFileReader.next. This is a performance optimization that allows the DataFileReader to reuse the same record object rather than allocating a new GenericRecord for every iteration, which can be very expensive in terms of object allocation and garbage collection if we deserialize a large data file. While this technique is the standard way to iterate through a data file, it's also possible to use for (GenericRecord user : dataFileReader) if performance is not a concern.
+
+### Compiling and running the example code
+This example code is included as a Maven project in the examples/java-example directory in the Avro docs. From this directory, execute the following commands to build and run the example:
+
+```shell
+$ mvn compile
+$ mvn -q exec:java -Dexec.mainClass=example.GenericMain
+```
diff --git a/doc/content/en/docs/++version++/Getting started (Python)/_index.md b/doc/content/en/docs/++version++/Getting started (Python)/_index.md
new file mode 100644
index 00000000000..44e3a8f37fd
--- /dev/null
+++ b/doc/content/en/docs/++version++/Getting started (Python)/_index.md
@@ -0,0 +1,147 @@
+---
+categories: []
+tags: ["python"]
+title: "Getting Started (Python)"
+linkTitle: "Getting Started (Python)"
+weight: 3
+---
+
+
+
+This is a short guide for getting started with Apache Avroâĸ using Python. This guide only covers using Avro for data serialization; see Patrick Hunt's Avro RPC Quick Start for a good introduction to using Avro for RPC.
+
+## Notice for Python 3 users
+A package called "avro-python3" had been provided to support Python 3 previously, but the codebase was consolidated into the "avro" package and that supports both Python 2 and 3 now. The avro-python3 package will be removed in the near future, so users should use the "avro" package instead. They are mostly API compatible, but there's a few minor difference (e.g., function name capitalization, such as avro.schema.Parse vs avro.schema.parse).
+
+## Download
+For Python, the easiest way to get started is to install it from PyPI. Python's Avro API is available over PyPi.
+
+```shell
+$ python3 -m pip install avro
+```
+
+The official releases of the Avro implementations for C, C++, C#, Java, PHP, Python, and Ruby can be downloaded from the Apache Avroâĸ Releases page. This guide uses Avro {{< avro_version >}}, the latest version at the time of writing. Download and unzip avro-{{< avro_version >}}.tar.gz, and install via python setup.py (this will probably require root privileges). Ensure that you can import avro from a Python prompt.
+
+```shell
+$ tar xvf avro-{{< avro_version >}}.tar.gz
+$ cd avro-{{< avro_version >}}
+$ python setup.py install
+$ python
+>>> import avro # should not raise ImportError
+```
+
+Alternatively, you may build the Avro Python library from source. From your the root Avro directory, run the commands
+
+```shell
+$ cd lang/py/
+$ python3 -m pip install -e .
+$ python
+```
+
+## Defining a schema
+Avro schemas are defined using JSON. Schemas are composed of primitive types (null, boolean, int, long, float, double, bytes, and string) and complex types (record, enum, array, map, union, and fixed). You can learn more about Avro schemas and types from the specification, but for now let's start with a simple schema example, user.avsc:
+
+```json
+{"namespace": "example.avro",
+ "type": "record",
+ "name": "User",
+ "fields": [
+ {"name": "name", "type": "string"},
+ {"name": "favorite_number", "type": ["int", "null"]},
+ {"name": "favorite_color", "type": ["string", "null"]}
+ ]
+}
+```
+
+This schema defines a record representing a hypothetical user. (Note that a schema file can only contain a single schema definition.) At minimum, a record definition must include its type ("type": "record"), a name ("name": "User"), and fields, in this case name, favorite_number, and favorite_color. We also define a namespace ("namespace": "example.avro"), which together with the name attribute defines the "full name" of the schema (example.avro.User in this case).
+
+Fields are defined via an array of objects, each of which defines a name and type (other attributes are optional, see the record specification for more details). The type attribute of a field is another schema object, which can be either a primitive or complex type. For example, the name field of our User schema is the primitive type string, whereas the favorite_number and favorite_color fields are both unions, represented by JSON arrays. unions are a complex type that can be any of the types listed in the array; e.g., favorite_number can either be an int or null, essentially making it an optional field.
+
+## Serializing and deserializing without code generation
+Data in Avro is always stored with its corresponding schema, meaning we can always read a serialized item, regardless of whether we know the schema ahead of time. This allows us to perform serialization and deserialization without code generation. Note that the Avro Python library does not support code generation.
+
+Try running the following code snippet, which serializes two users to a data file on disk, and then reads back and deserializes the data file:
+
+```python
+import avro.schema
+from avro.datafile import DataFileReader, DataFileWriter
+from avro.io import DatumReader, DatumWriter
+
+schema = avro.schema.parse(open("user.avsc", "rb").read())
+
+writer = DataFileWriter(open("users.avro", "wb"), DatumWriter(), schema)
+writer.append({"name": "Alyssa", "favorite_number": 256})
+writer.append({"name": "Ben", "favorite_number": 7, "favorite_color": "red"})
+writer.close()
+
+reader = DataFileReader(open("users.avro", "rb"), DatumReader())
+for user in reader:
+ print(user)
+reader.close()
+```
+
+This outputs:
+
+```json
+{'favorite_color': None, 'favorite_number': 256, 'name': 'Alyssa'}
+{'favorite_color': 'red', 'favorite_number': 7, 'name': 'Ben'}
+```
+
+Do make sure that you open your files in binary mode (i.e. using the modes wb or rb respectively). Otherwise you might generate corrupt files due to automatic replacement of newline characters with the platform-specific representations.
+
+Let's take a closer look at what's going on here.
+
+```python
+schema = avro.schema.parse(open("user.avsc", "rb").read())
+```
+
+avro.schema.parse takes a string containing a JSON schema definition as input and outputs a avro.schema.Schema object (specifically a subclass of Schema, in this case RecordSchema). We're passing in the contents of our user.avsc schema file here.
+
+```python
+writer = DataFileWriter(open("users.avro", "wb"), DatumWriter(), schema)
+```
+
+We create a DataFileWriter, which we'll use to write serialized items to a data file on disk. The DataFileWriter constructor takes three arguments:
+
+* The file we'll serialize to
+* A DatumWriter, which is responsible for actually serializing the items to Avro's binary format (DatumWriters can be used separately from DataFileWriters, e.g., to perform IPC with Avro).
+* The schema we're using. The DataFileWriter needs the schema both to write the schema to the data file, and to verify that the items we write are valid items and write the appropriate fields.
+
+```python
+writer.append({"name": "Alyssa", "favorite_number": 256})
+writer.append({"name": "Ben", "favorite_number": 7, "favorite_color": "red"})
+```
+
+We use DataFileWriter.append to add items to our data file. Avro records are represented as Python dicts. Since the field favorite_color has type ["string", "null"], we are not required to specify this field, as shown in the first append. Were we to omit the required name field, an exception would be raised. Any extra entries not corresponding to a field are present in the dict are ignored.
+
+```python
+reader = DataFileReader(open("users.avro", "rb"), DatumReader())
+```
+
+We open the file again, this time for reading back from disk. We use a DataFileReader and DatumReader analagous to the DataFileWriter and DatumWriter above.
+
+```python
+for user in reader:
+ print(user)
+```
+
+The DataFileReader is an iterator that returns dicts corresponding to the serialized items.
diff --git a/doc/content/en/docs/++version++/IDL Language/_index.md b/doc/content/en/docs/++version++/IDL Language/_index.md
new file mode 100644
index 00000000000..7d0121274a9
--- /dev/null
+++ b/doc/content/en/docs/++version++/IDL Language/_index.md
@@ -0,0 +1,511 @@
+---
+title: "IDL Language"
+linkTitle: "IDL Language"
+weight: 201
+---
+
+
+
+## Introduction
+This document defines Avro IDL, a higher-level language for authoring Avro schemata. Before reading this document, you should have familiarity with the concepts of schemata and protocols, as well as the various primitive and complex types available in Avro.
+
+## Overview
+
+### Purpose
+The aim of the Avro IDL language is to enable developers to author schemata in a way that feels more similar to common programming languages like Java, C++, or Python. Additionally, the Avro IDL language may feel more familiar for those users who have previously used the interface description languages (IDLs) in other frameworks like Thrift, Protocol Buffers, or CORBA.
+
+### Usage
+Each Avro IDL file defines either a single Avro Protocol, or an Avro Schema with supporting named schemata in a namespace. When parsed, it thus yields either a Protocol or a Schema. These can be respectively written to JSON-format Avro Protocol files with extension .avpr or JSON-format Avro Schema files with extension .avsc.
+
+To convert a _.avdl_ file into a _.avpr_ file, it may be processed by the `idl` tool. For example:
+```shell
+$ java -jar avro-tools.jar idl src/test/idl/input/namespaces.avdl /tmp/namespaces.avpr
+$ head /tmp/namespaces.avpr
+{
+ "protocol" : "TestNamespace",
+ "namespace" : "avro.test.protocol",
+```
+To convert a _.avdl_ file into a _.avsc_ file, it may be processed by the `idl` tool too. For example:
+```shell
+$ java -jar avro-tools.jar idl src/test/idl/input/schema_syntax_schema.avdl /tmp/schema_syntax.avsc
+$ head /tmp/schema_syntax.avsc
+{
+ "type": "array",
+ "items": {
+ "type": "record",
+ "name": "StatusUpdate",
+```
+The `idl` tool can also process input to and from _stdin_ and _stdout_. See `idl --help` for full usage information.
+
+A Maven plugin is also provided to compile .avdl files. To use it, add something like the following to your pom.xml:
+```xml
+
+
+
+ org.apache.avro
+ avro-maven-plugin
+
+
+
+ idl
+
+
+
+
+
+
+```
+
+## Defining a Schema in Avro IDL
+An Avro IDL file consists of exactly one (main) schema definition. The minimal schema is defined by the following code:
+```java
+schema int;
+```
+This is equivalent to (and generates) the following JSON schema definition:
+```json
+{
+ "type": "int"
+}
+```
+More complex schemata can also be defined, for example by adding named schemata like this:
+```java
+namespace default.namespace.for.named.schemata;
+schema Message;
+
+record Message {
+ string? title = null;
+ string message;
+}
+```
+This is equivalent to (and generates) the following JSON schema definition:
+```json
+{
+ "type" : "record",
+ "name" : "Message",
+ "namespace" : "default.namespace.for.named.schemata",
+ "fields" : [ {
+ "name" : "title",
+ "type" : [ "null", "string" ],
+ "default": null
+ }, {
+ "name" : "message",
+ "type" : "string"
+ } ]
+}
+```
+Schemata in Avro IDL can contain the following items:
+
+* Imports of external protocol and schema files (only named schemata are imported).
+* Definitions of named schemata, including records, errors, enums, and fixeds.
+
+## Defining a Protocol in Avro IDL
+An Avro IDL file consists of exactly one protocol definition. The minimal protocol is defined by the following code:
+```java
+protocol MyProtocol {
+}
+```
+This is equivalent to (and generates) the following JSON protocol definition:
+```json
+{
+"protocol" : "MyProtocol",
+ "types" : [ ],
+ "messages" : {
+ }
+}
+```
+The namespace of the protocol may be changed using the @namespace annotation:
+```java
+@namespace("mynamespace")
+protocol MyProtocol {
+}
+```
+This notation is used throughout Avro IDL as a way of specifying properties for the annotated element, as will be described later in this document.
+
+Protocols in Avro IDL can contain the following items:
+
+* Imports of external protocol and schema files.
+* Definitions of named schemata, including records, errors, enums, and fixeds.
+* Definitions of RPC messages
+
+## Imports
+Files may be imported in one of three formats:
+
+* An IDL file may be imported with a statement like:
+
+ `import idl "foo.avdl";`
+
+* A JSON protocol file may be imported with a statement like:
+
+ `import protocol "foo.avpr";`
+
+* A JSON schema file may be imported with a statement like:
+
+ `import schema "foo.avsc";`
+
+When importing into an IDL schema file, only (named) types are imported into this file. When importing into an IDL protocol, messages are imported into the protocol as well.
+
+Imported file names are resolved relative to the current IDL file.
+
+## Defining an Enumeration
+Enums are defined in Avro IDL using a syntax similar to C or Java. An Avro Enum supports optional default values. In the case that a reader schema is unable to recognize a symbol written by the writer, the reader will fall back to using the defined default value. This default is only used when an incompatible symbol is read. It is not used if the enum field is missing.
+
+Example Writer Enum Definition
+```java
+enum Shapes {
+ SQUARE, TRIANGLE, CIRCLE, OVAL
+}
+```
+Example Reader Enum Definition
+```java
+enum Shapes {
+ SQUARE, TRIANGLE, CIRCLE
+} = CIRCLE;
+```
+In the above example, the reader will use the default value of `CIRCLE` whenever reading data written with the `OVAL` symbol of the writer. Also note that, unlike the JSON format, anonymous enums cannot be defined.
+
+## Defining a Fixed Length Field
+Fixed fields are defined using the following syntax:
+```
+fixed MD5(16);
+```
+This example defines a fixed-length type called MD5, which contains 16 bytes.
+
+## Defining Records and Errors
+Records are defined in Avro IDL using a syntax similar to a struct definition in C:
+```java
+record Employee {
+ string name;
+ boolean active = true;
+ long salary;
+}
+```
+The above example defines a record with the name âEmployeeâ with three fields.
+
+To define an error, simply use the keyword _error_ instead of _record_. For example:
+```java
+error Kaboom {
+ string explanation;
+ int result_code = -1;
+}
+```
+Each field in a record or error consists of a type and a name, optional property annotations and an optional default value.
+
+A type reference in Avro IDL must be one of:
+
+* A primitive type
+* A logical type
+* A named schema (either defined or imported)
+* A complex type (array, map, or union)
+
+### Primitive Types
+The primitive types supported by Avro IDL are the same as those supported by Avro's JSON format. This list includes _int_, _long_, _string_, _boolean_, _float_, _double_, _null_, and _bytes_.
+
+### Logical Types
+Some of the logical types supported by Avro's JSON format are directly supported by Avro IDL. The currently supported types are:
+
+* _decimal_ (logical type [decimal]({{< relref "specification#decimal" >}}))
+* _date_ (logical type [date]({{< relref "specification#date" >}}))
+* _time_ms_ (logical type [time-millis]({{< relref "specification#time-millisecond-precision" >}}))
+* _timestamp_ms_ (logical type [timestamp-millis]({{< relref "specification#timestamp-millisecond-precision" >}}))
+* _local_timestamp_ms_ (logical type [local-timestamp-millis]({{< relref "specification#local_timestamp_ms" >}}))
+* _uuid_ (logical type [uuid]({{< relref "specification#uuid" >}}))
+
+For example:
+```java
+record Job {
+ string jobid;
+ date submitDate;
+ time_ms submitTime;
+ timestamp_ms finishTime;
+ decimal(9,2) finishRatio;
+ uuid pk = "a1a2a3a4-b1b2-c1c2-d1d2-d3d4d5d6d7d8";
+}
+```
+
+Logical types can also be specified via an annotation, which is useful for logical types for which a keyword does not exist:
+
+```java
+record Job {
+ string jobid;
+ @logicalType("timestamp-micros")
+ long finishTime;
+}
+```
+
+### References to Named Schemata
+If a named schema has already been defined in the same Avro IDL file, it may be referenced by name as if it were a primitive type:
+```java
+record Card {
+ Suit suit; // refers to the enum Card defined above
+ int number;
+}
+```
+
+### Default Values
+Default values for fields may be optionally specified by using an equals sign after the field name followed by a JSON expression indicating the default value. This JSON is interpreted as described in the [spec]({{< relref "specification#schema-record" >}}).
+
+### Complex Types
+
+#### Arrays
+Array types are written in a manner that will seem familiar to C++ or Java programmers. An array of any type t is denoted `array`. For example, an array of strings is denoted `array`, and a multidimensional array of Foo records would be `array>`.
+
+#### Maps
+Map types are written similarly to array types. An array that contains values of type t is written `map`. As in the JSON schema format, all maps contain `string`-type keys.
+
+#### Unions
+Union types are denoted as `union { typeA, typeB, typeC, ... }`. For example, this record contains a string field that is optional (unioned with null), and a field containing either a precise or a imprecise number:
+```java
+record RecordWithUnion {
+ union { null, string } optionalString;
+ union { decimal(12, 6), float } number;
+}
+```
+Note that the same restrictions apply to Avro IDL unions as apply to unions defined in the JSON format; namely, a union may not contain multiple elements of the same type. Also, fields/parameters that use the union type and have a default parameter must specify a default value of the same type as the **first** union type.
+
+Because it occurs so often, there is a special shorthand to denote a union of `null` with one other schema. The first three fields in the following snippet have identical schemata, as do the last two fields:
+
+```java
+record RecordWithUnion {
+ union { null, string } optionalString1 = null;
+ string? optionalString2 = null;
+ string? optionalString3; // No default value
+
+ union { string, null } optionalString4 = "something";
+ string? optionalString5 = "something else";
+}
+```
+
+Note that unlike explicit unions, the position of the `null` type is fluid; it will be the first or last type depending on the default value (if any). So all fields are valid in the example above.
+
+## Defining RPC Messages
+The syntax to define an RPC message within a Avro IDL protocol is similar to the syntax for a method declaration within a C header file or a Java interface. To define an RPC message _add_ which takes two arguments named _foo_ and _bar_, returning an _int_, simply include the following definition within the protocol:
+```java
+int add(int foo, int bar = 0);
+```
+Message arguments, like record fields, may specify default values.
+
+To define a message with no response, you may use the alias _void_, equivalent to the Avro _null_ type:
+```java
+void logMessage(string message);
+```
+If you have defined or imported an error type within the same protocol, you may declare that a message can throw this error using the syntax:
+```java
+void goKaboom() throws Kaboom;
+```
+To define a one-way message, use the keyword `oneway` after the parameter list, for example:
+```java
+void fireAndForget(string message) oneway;
+```
+
+## Other Language Features
+
+### Comments and documentation
+All Java-style comments are supported within a Avro IDL file. Any text following _//_ on a line is ignored, as is any text between _/*_ and _*/_, possibly spanning multiple lines.
+
+Comments that begin with _/**_ are used as the documentation string for the type or field definition that follows the comment.
+
+### Escaping Identifiers
+Occasionally, one may want to distinguish between identifiers and languages keywords. In order to do so, backticks (`) may be used to escape
+the identifier. For example, to define a message with the literal name error, you may write:
+```java
+void `error`();
+```
+This syntax is allowed anywhere an identifier is expected.
+
+### Annotations for Ordering and Namespaces
+Java-style annotations may be used to add additional properties to types and fields throughout Avro IDL. These can be custom properties, or
+special properties as used in the JSON-format Avro Schema and Protocol files.
+
+For example, to specify the sort order of a field within a record, one may use the `@order` annotation before the field name as follows:
+```java
+record MyRecord {
+ string @order("ascending") myAscendingSortField;
+ string @order("descending") myDescendingField;
+ string @order("ignore") myIgnoredField;
+}
+```
+A field's type (with the exception of type references) may also be preceded by annotations, e.g.:
+```java
+record MyRecord {
+ @java-class("java.util.ArrayList") array myStrings;
+}
+```
+This can be used to support java classes that can be serialized/deserialized via their `toString`/`String constructor`, e.g.:
+```java
+record MyRecord {
+ @java-class("java.math.BigDecimal") string value;
+ @java-key-class("java.io.File") map fileStates;
+ array<@java-class("java.math.BigDecimal") string> weights;
+}
+```
+Similarly, a `@namespace` annotation may be used to modify the namespace when defining a named schema. For example:
+```java
+@namespace("org.apache.avro.firstNamespace")
+protocol MyProto {
+ @namespace("org.apache.avro.someOtherNamespace")
+ record Foo {}
+
+ record Bar {}
+}
+```
+will define a protocol in the _firstNamespace_ namespace. The record _Foo_ will be defined in _someOtherNamespace_ and _Bar_ will be defined in _firstNamespace_ as it inherits its default from its container.
+
+Type and field aliases are specified with the `@aliases` annotation as follows:
+```java
+@aliases(["org.old.OldRecord", "org.ancient.AncientRecord"])
+record MyRecord {
+ string @aliases(["oldField", "ancientField"]) myNewField;
+}
+```
+Some annotations like those listed above are handled specially. All other annotations are added as properties to the protocol, message, schema or field. You can use any identifier or series of identifiers separated by dots and/or dashes as property name.
+
+## Complete Example
+The following is an example of two Avro IDL files that together show most of the above features:
+
+### schema.avdl
+```java
+/*
+ * Header with license information.
+ */
+// Optional default namespace (if absent, the default namespace is the null namespace).
+namespace org.apache.avro.test;
+// Optional main schema definition; if used, the IDL file is equivalent to a .avsc file.
+schema TestRecord;
+
+/** Documentation for the enum type Kind */
+@aliases(["org.foo.KindOf"])
+enum Kind {
+ FOO,
+ BAR, // the bar enum value
+ BAZ
+} = FOO; // For schema evolution purposes, unmatched values do not throw an error, but are resolved to FOO.
+
+/** MD5 hash; good enough to avoid most collisions, and smaller than (for example) SHA256. */
+fixed MD5(16);
+
+record TestRecord {
+ /** Record name; has no intrinsic order */
+ string @order("ignore") name;
+
+ Kind @order("descending") kind;
+
+ MD5 hash;
+
+ /*
+ Note that 'null' is the first union type. Just like .avsc / .avpr files, the default value must be of the first union type.
+ */
+ union { null, MD5 } /** Optional field */ @aliases(["hash"]) nullableHash = null;
+ // Shorthand syntax; the null in this union is placed based on the default value (or first is there's no default).
+ MD5? anotherNullableHash = null;
+
+ array arrayOfLongs;
+}
+```
+
+### protocol.avdl
+```java
+/*
+ * Header with license information.
+ */
+
+/**
+ * An example protocol in Avro IDL
+ */
+@namespace("org.apache.avro.test")
+protocol Simple {
+ // Import the example file above
+ import idl "schema.avdl";
+
+ /** Errors are records that can be thrown from a method */
+ error TestError {
+ string message;
+ }
+
+ string hello(string greeting);
+ /** Return what was given. Demonstrates the use of backticks to name types/fields/messages/parameters after keywords */
+ TestRecord echo(TestRecord `record`);
+ int add(int arg1, int arg2);
+ bytes echoBytes(bytes data);
+ void `error`() throws TestError;
+ // The oneway keyword forces the method to return null.
+ void ping() oneway;
+}
+```
+
+Additional examples may be found in the Avro source tree under the `src/test/idl/input` directory.
+
+## IDE support
+
+There are several editors and IDEs that support Avro IDL files, usually via plugins.
+
+### JetBrains
+
+Apache Avro IDL Schema Support 203.1.2 was released in 9 December 2021.
+
+Features:
+* Syntax Highlighting
+* Code Completion
+* Code Formatting
+* Error Highlighting
+* Inspections & quick fixes
+* JSON schemas for .avpr and .avsc files
+
+It's available via the [JetBrains Marketplace](https://plugins.jetbrains.com/plugin/15728-apache-avro-idl-schema-support)
+and on [GitHub](https://github.com/opwvhk/avro-schema-support).
+
+The plugin supports almost the all JetBrains products: IntelliJ IDEA, PyCharm, WebStorm, Android Studio, AppCode, GoLand, Rider, CLion, RubyMine, PhpStorm, DataGrip, DataSpell, MPS, Code With Me Guest and JetBrains Client.
+
+Only JetBrains Gateway does not support this plugin directly. But the backend (JetBrains) IDE that it connects to does.
+
+### Eclipse
+
+Avroclipse 0.0.11 was released on 4 December 2019.
+
+Features:
+* Syntax Highlighting
+* Error Highlighting
+* Code Completion
+
+It is available on the [Eclipse Marketplace](https://marketplace.eclipse.org/content/avroclipse)
+and [GitHub](https://github.com/dvdkruk/avroclipse).
+
+### Visual Studio Code
+
+avro-idl 0.5.0 was released on 16 June 2021. It provides syntax highlighting.
+
+It is available on the [VisualStudio Marketplace](https://marketplace.visualstudio.com/items?itemName=streetsidesoftware.avro)
+and [GitHub](https://github.com/Jason3S/vscode-avro-ext)
+
+### Atom.io
+
+atom-language-avro 0.0.13 was released on 14 August 2015. It provides syntax highlighting.
+
+It is available as [Atom.io package](https://atom.io/packages/atom-language-avro)
+and [GitHub](https://github.com/jonesetc/atom-language-avro)
+
+### Vim
+
+A `.avdl` detecting plugin by Gurpreet Atwal on [GitHub](https://github.com/gurpreetatwal/vim-avro) (Last change in December 2016)
+
+[avro-idl.vim](https://github.com/apache/avro/blob/main/share/editors/avro-idl.vim) in the Avro repository `share/editors` directory (last change in September 2010)
+
+Both provide syntax highlighting.
diff --git a/doc/content/en/docs/++version++/MapReduce guide/_index.md b/doc/content/en/docs/++version++/MapReduce guide/_index.md
new file mode 100644
index 00000000000..f262bc6e2a7
--- /dev/null
+++ b/doc/content/en/docs/++version++/MapReduce guide/_index.md
@@ -0,0 +1,396 @@
+---
+title: "MapReduce guide"
+linkTitle: "MapReduce guide"
+weight: 200
+---
+
+
+
+Avro provides a convenient way to represent complex data structures within a Hadoop MapReduce job. Avro data can be used as both input to and output from a MapReduce job, as well as the intermediate format. The example in this guide uses Avro data for all three, but it's possible to mix and match; for instance, MapReduce can be used to aggregate a particular field in an Avro record.
+
+This guide assumes basic familiarity with both Hadoop MapReduce and Avro. See the [Hadoop documentation](https://hadoop.apache.org/docs/current/) and the [Avro getting started guide](./getting-started-java/) for introductions to these projects. This guide uses the old MapReduce API (`org.apache.hadoop.mapred`) and the new MapReduce API (`org.apache.hadoop.mapreduce`).
+
+## Setup
+The code from this guide is included in the Avro docs under examples/mr-example. The example is set up as a Maven project that includes the necessary Avro and MapReduce dependencies and the Avro Maven plugin for code generation, so no external jars are needed to run the example. In particular, the POM includes the following dependencies:
+```xml
+
+ org.apache.avro
+ avro
+ {{< avro_version >}}
+
+
+ org.apache.avro
+ avro-mapred
+ {{< avro_version >}}
+
+
+ org.apache.hadoop
+ hadoop-client
+ 3.1.2
+
+```
+And the following plugin:
+```xml
+
+ org.apache.avro
+ avro-maven-plugin
+ {{< avro_version >}}
+
+
+ generate-sources
+
+ schema
+
+
+ ${project.basedir}/../
+ ${project.basedir}/target/generated-sources/
+
+
+
+
+```
+
+If you do not configure the *sourceDirectory* and *outputDirectory* properties, the defaults will be used. The *sourceDirectory* property defaults to *src/main/avro*. The *outputDirectory* property defaults to *target/generated-sources*. You can change the paths to match your project layout.
+
+Alternatively, Avro jars can be downloaded directly from the Apache Avroâĸ Releases [page](https://avro.apache.org/releases.html). The relevant Avro jars for this guide are *avro-{{< avro_version >}}.jar* and *avro-mapred-{{< avro_version >}}.jar*, as well as *avro-tools-{{< avro_version >}}.jar* for code generation and viewing Avro data files as JSON. In addition, you will need to install Hadoop in order to use MapReduce.
+
+## Example: ColorCount
+Below is a simple example of a MapReduce that uses Avro. There is an example for both the old (org.apache.hadoop.mapred) and new (org.apache.hadoop.mapreduce) APIs under *examples/mr-example/src/main/java/example/*. _MapredColorCount_ is the example for the older mapred API while _MapReduceColorCount_ is the example for the newer mapreduce API. Both examples are below, but we will detail the mapred API in our subsequent examples.
+
+MapredColorCount.java:
+```java
+package example;
+
+import java.io.IOException;
+
+import org.apache.avro.*;
+import org.apache.avro.Schema.Type;
+import org.apache.avro.mapred.*;
+import org.apache.hadoop.conf.*;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapred.*;
+import org.apache.hadoop.util.*;
+
+import example.avro.User;
+
+public class MapredColorCount extends Configured implements Tool {
+
+ public static class ColorCountMapper extends AvroMapper> {
+ @Override
+ public void map(User user, AvroCollector> collector, Reporter reporter)
+ throws IOException {
+ CharSequence color = user.getFavoriteColor();
+ // We need this check because the User.favorite_color field has type ["string", "null"]
+ if (color == null) {
+ color = "none";
+ }
+ collector.collect(new Pair(color, 1));
+ }
+ }
+
+ public static class ColorCountReducer extends AvroReducer> {
+ @Override
+ public void reduce(CharSequence key, Iterable values,
+ AvroCollector> collector,
+ Reporter reporter)
+ throws IOException {
+ int sum = 0;
+ for (Integer value : values) {
+ sum += value;
+ }
+ collector.collect(new Pair(key, sum));
+ }
+ }
+
+ public int run(String[] args) throws Exception {
+ if (args.length != 2) {
+ System.err.println("Usage: MapredColorCount ");
+ return -1;
+ }
+
+ JobConf conf = new JobConf(getConf(), MapredColorCount.class);
+ conf.setJobName("colorcount");
+
+ FileInputFormat.setInputPaths(conf, new Path(args[0]));
+ FileOutputFormat.setOutputPath(conf, new Path(args[1]));
+
+ AvroJob.setMapperClass(conf, ColorCountMapper.class);
+ AvroJob.setReducerClass(conf, ColorCountReducer.class);
+
+ // Note that AvroJob.setInputSchema and AvroJob.setOutputSchema set
+ // relevant config options such as input/output format, map output
+ // classes, and output key class.
+ AvroJob.setInputSchema(conf, User.getClassSchema());
+ AvroJob.setOutputSchema(conf, Pair.getPairSchema(Schema.create(Type.STRING),
+ Schema.create(Type.INT)));
+
+ JobClient.runJob(conf);
+ return 0;
+ }
+
+ public static void main(String[] args) throws Exception {
+ int res = ToolRunner.run(new Configuration(), new MapredColorCount(), args);
+ System.exit(res);
+ }
+}
+```
+
+MapReduceColorCount.java:
+```java
+package example;
+
+import java.io.IOException;
+
+import org.apache.avro.Schema;
+import org.apache.avro.mapred.AvroKey;
+import org.apache.avro.mapred.AvroValue;
+import org.apache.avro.mapreduce.AvroJob;
+import org.apache.avro.mapreduce.AvroKeyInputFormat;
+import org.apache.avro.mapreduce.AvroKeyValueOutputFormat;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+import example.avro.User;
+
+public class MapReduceColorCount extends Configured implements Tool {
+
+ public static class ColorCountMapper extends
+ Mapper, NullWritable, Text, IntWritable> {
+
+ @Override
+ public void map(AvroKey key, NullWritable value, Context context)
+ throws IOException, InterruptedException {
+
+ CharSequence color = key.datum().getFavoriteColor();
+ if (color == null) {
+ color = "none";
+ }
+ context.write(new Text(color.toString()), new IntWritable(1));
+ }
+ }
+
+ public static class ColorCountReducer extends
+ Reducer, AvroValue> {
+
+ @Override
+ public void reduce(Text key, Iterable values,
+ Context context) throws IOException, InterruptedException {
+
+ int sum = 0;
+ for (IntWritable value : values) {
+ sum += value.get();
+ }
+ context.write(new AvroKey(key.toString()), new AvroValue(sum));
+ }
+ }
+
+ public int run(String[] args) throws Exception {
+ if (args.length != 2) {
+ System.err.println("Usage: MapReduceColorCount ");
+ return -1;
+ }
+
+ Job job = new Job(getConf());
+ job.setJarByClass(MapReduceColorCount.class);
+ job.setJobName("Color Count");
+
+ FileInputFormat.setInputPaths(job, new Path(args[0]));
+ FileOutputFormat.setOutputPath(job, new Path(args[1]));
+
+ job.setInputFormatClass(AvroKeyInputFormat.class);
+ job.setMapperClass(ColorCountMapper.class);
+ AvroJob.setInputKeySchema(job, User.getClassSchema());
+ job.setMapOutputKeyClass(Text.class);
+ job.setMapOutputValueClass(IntWritable.class);
+
+ job.setOutputFormatClass(AvroKeyValueOutputFormat.class);
+ job.setReducerClass(ColorCountReducer.class);
+ AvroJob.setOutputKeySchema(job, Schema.create(Schema.Type.STRING));
+ AvroJob.setOutputValueSchema(job, Schema.create(Schema.Type.INT));
+
+ return (job.waitForCompletion(true) ? 0 : 1);
+ }
+
+ public static void main(String[] args) throws Exception {
+ int res = ToolRunner.run(new MapReduceColorCount(), args);
+ System.exit(res);
+ }
+}
+```
+ColorCount reads in data files containing *User* records, defined in _examples/user.avsc_, and counts the number of instances of each favorite color. (This example draws inspiration from the canonical _WordCount_ MapReduce application.) This example uses the old MapReduce API. See MapReduceAvroWordCount, found under _doc/examples/mr-example/src/main/java/example/_ to see the new MapReduce API example. The User schema is defined as follows:
+```json
+{"namespace": "example.avro",
+ "type": "record",
+ "name": "User",
+ "fields": [
+ {"name": "name", "type": "string"},
+ {"name": "favorite_number", "type": ["int", "null"]},
+ {"name": "favorite_color", "type": ["string", "null"]}
+ ]
+}
+```
+This schema is compiled into the *User* class used by *ColorCount* via the Avro Maven plugin (see _examples/mr-example/pom.xml_ for how this is set up).
+
+*ColorCountMapper* essentially takes a *User* as input and extracts the User's favorite color, emitting the key-value pair ``. _ColorCountReducer_ then adds up how many occurrences of a particular favorite color were emitted, and outputs the result as a Pair record. These Pairs are serialized to an Avro data file.
+
+## Running ColorCount
+The _ColorCount_ application is provided as a Maven project in the Avro docs under _examples/mr-example_. To build the project, including the code generation of the User schema, run:
+```shell
+mvn compile
+```
+Next, run _GenerateData_ from `examples/mr-examples` to create an Avro data file, `input/users.avro`, containing 20 Users with favorite colors chosen randomly from a list:
+```shell
+mvn exec:java -q -Dexec.mainClass=example.GenerateData
+```
+Besides creating the data file, GenerateData prints the JSON representations of the Users generated to stdout, for example:
+```json
+{"name": "user", "favorite_number": null, "favorite_color": "red"}
+{"name": "user", "favorite_number": null, "favorite_color": "green"}
+{"name": "user", "favorite_number": null, "favorite_color": "purple"}
+{"name": "user", "favorite_number": null, "favorite_color": null}
+...
+```
+Now we're ready to run ColorCount. We specify our freshly-generated input folder as the input path and output as our output folder (note that MapReduce will not start a job if the output folder already exists):
+```shell
+mvn exec:java -q -Dexec.mainClass=example.MapredColorCount -Dexec.args="input output"
+```
+Once ColorCount completes, checking the contents of the new output directory should yield the following:
+```shell
+$ ls output/
+part-00000.avro _SUCCESS
+```
+You can check the contents of the generated Avro file using the avro-tools jar:
+```shell
+$ java -jar /path/to/avro-tools-{{< avro_version >}}.jar tojson output/part-00000.avro
+{"value": 3, "key": "blue"}
+{"value": 7, "key": "green"}
+{"value": 1, "key": "none"}
+{"value": 2, "key": "orange"}
+{"value": 3, "key": "purple"}
+{"value": 2, "key": "red"}
+{"value": 2, "key": "yellow"}
+```
+Now let's go over the ColorCount example in detail.
+
+## AvroMapper - org.apache.hadoop.mapred API
+
+The easiest way to use Avro data files as input to a MapReduce job is to subclass `AvroMapper`. An `AvroMapper` defines a `map` function that takes an Avro datum as input and outputs a key/value pair represented as a Pair record. In the ColorCount example, ColorCountMapper is an AvroMapper that takes a User as input and outputs a `Pair>`, where the CharSequence key is the user's favorite color and the Integer value is 1.
+```java
+public static class ColorCountMapper extends AvroMapper> {
+ @Override
+ public void map(User user, AvroCollector> collector, Reporter reporter)
+ throws IOException {
+ CharSequence color = user.getFavoriteColor();
+ // We need this check because the User.favorite_color field has type ["string", "null"]
+ if (color == null) {
+ color = "none";
+ }
+ collector.collect(new Pair(color, 1));
+ }
+}
+```
+In order to use our AvroMapper, we must call AvroJob.setMapperClass and AvroJob.setInputSchema.
+```java
+AvroJob.setMapperClass(conf, ColorCountMapper.class);
+AvroJob.setInputSchema(conf, User.getClassSchema());
+```
+Note that `AvroMapper` does not implement the `Mapper` interface. Under the hood, the specified Avro data files are deserialized into AvroWrappers containing the actual data, which are processed by a Mapper that calls the configured AvroMapper's map function. AvroJob.setInputSchema sets up the relevant configuration parameters needed to make this happen, thus you should not need to call `JobConf.setMapperClass`, `JobConf.setInputFormat`, `JobConf.setMapOutputKeyClass`, `JobConf.setMapOutputValueClass`, or `JobConf.setOutputKeyComparatorClass`.
+
+## Mapper - org.apache.hadoop.mapreduce API
+This document will not go into all the differences between the mapred and mapreduce APIs, however will describe the main differences. As you can see, ColorCountMapper is now a subclass of the Hadoop Mapper class and is passed an AvroKey as it's key. Additionally, the AvroJob method calls were slightly changed.
+```java
+ public static class ColorCountMapper extends
+ Mapper, NullWritable, Text, IntWritable> {
+
+ @Override
+ public void map(AvroKey key, NullWritable value, Context context)
+ throws IOException, InterruptedException {
+
+ CharSequence color = key.datum().getFavoriteColor();
+ if (color == null) {
+ color = "none";
+ }
+ context.write(new Text(color.toString()), new IntWritable(1));
+ }
+ }
+```
+
+## AvroReducer - org.apache.hadoop.mapred API
+Analogously to AvroMapper, an AvroReducer defines a reducer function that takes the key/value types output by an AvroMapper (or any mapper that outputs Pairs) and outputs a key/value pair represented a Pair record. In the ColorCount example, ColorCountReducer is an AvroReducer that takes the CharSequence key representing a favorite color and the `Iterable` representing the counts for that color (they should all be 1 in this example) and adds up the counts.
+```java
+public static class ColorCountReducer extends AvroReducer> {
+ @Override
+ public void reduce(CharSequence key, Iterable values,
+ AvroCollector> collector,
+ Reporter reporter)
+ throws IOException {
+ int sum = 0;
+ for (Integer value : values) {
+ sum += value;
+ }
+ collector.collect(new Pair(key, sum));
+ }
+}
+```
+In order to use our AvroReducer, we must call AvroJob.setReducerClass and AvroJob.setOutputSchema.
+```java
+AvroJob.setReducerClass(conf, ColorCountReducer.class);
+AvroJob.setOutputSchema(conf, Pair.getPairSchema(Schema.create(Type.STRING),
+ Schema.create(Type.INT)));
+```
+Note that _AvroReducer_ does not implement the _Reducer_ interface. The intermediate Pairs output by the mapper are split into _AvroKeys_ and _AvroValues_, which are processed by a Reducer that calls the configured AvroReducer's `reduce` function. `AvroJob.setOutputSchema` sets up the relevant configuration parameters needed to make this happen, thus you should not need to call `JobConf.setReducerClass`, `JobConf.setOutputFormat`, `JobConf.setOutputKeyClass`, `JobConf.setMapOutputKeyClass`, `JobConf.setMapOutputValueClass`, or `JobConf.setOutputKeyComparatorClass`.
+
+## Reduce - org.apache.hadoop.mapreduce API
+As before we not detail every difference between the APIs. As with the _Mapper_ change _ColorCountReducer_ is now a subclass of _Reducer_ and _AvroKey_ and _AvroValue_ are emitted. Additionally, the _AvroJob_ method calls were slightly changed.
+```java
+ public static class ColorCountReducer extends
+ Reducer, AvroValue> {
+
+ @Override
+ public void reduce(Text key, Iterable values,
+ Context context) throws IOException, InterruptedException {
+
+ int sum = 0;
+ for (IntWritable value : values) {
+ sum += value.get();
+ }
+ context.write(new AvroKey(key.toString()), new AvroValue(sum));
+ }
+ }
+```
+
+## Learning more
+The mapred API allows users to mix Avro AvroMappers and AvroReducers with non-Avro Mappers and Reducers and the mapreduce API allows users input Avro and output non-Avro or vice versa.
+
+The mapred package has API org.apache.avro.mapred documentation as does the `org.apache.avro.mapreduce` package. MapReduce API (`org.apache.hadoop.mapreduce`). Similarily to the mapreduce package, it's possible with the mapred API to implement your own Mappers and Reducers directly using the public classes provided in these libraries. See the `AvroWordCount` application, found under _examples/mr-example/src/main/java/example/AvroWordCount.java_ in the Avro documentation, for an example of implementing a Reducer that outputs Avro data using the old MapReduce API. See the `MapReduceAvroWordCount` application, found under _examples/mr-example/src/main/java/example/MapReduceAvroWordCount.java_ in the Avro documentation, for an example of implementing a Reducer that outputs Avro data using the new MapReduce API.
diff --git a/doc/content/en/docs/++version++/SASL profile/_index.md b/doc/content/en/docs/++version++/SASL profile/_index.md
new file mode 100644
index 00000000000..a938310414d
--- /dev/null
+++ b/doc/content/en/docs/++version++/SASL profile/_index.md
@@ -0,0 +1,93 @@
+---
+title: "SASL profile"
+linkTitle: "SASL profile"
+weight: 202
+---
+
+
+
+## Introduction
+SASL ([RFC 2222](https://www.ietf.org/rfc/rfc2222.txt)) provides a framework for authentication and security of network protocols. Each protocol that uses SASL is meant to define a SASL profile. This document provides a SASL profile for connection-based Avro RPC.
+
+## Overview
+SASL negotiation proceeds as a series of message interactions over a connection between a client and server using a selected SASL mechanism. The client starts this negotiation by sending its chosen mechanism name with an initial (possibly empty) message. Negotiation proceeds with the exchange of messages until either side indicates success or failure. The content of the messages is mechanism-specific. If the negotiation succeeds, then the session can proceed over the connection, otherwise it must be abandoned.
+
+Some mechanisms continue to process session data after negotiation (e.g., encrypting it), while some specify that further session data is transmitted unmodified.
+
+## Negotiation
+
+### Commands
+Avro SASL negotiation uses four one-byte commands.
+
+* 0: START Used in a client's initial message.
+* 1: CONTINUE Used while negotiation is ongoing.
+* 2: FAIL Terminates negotiation unsuccessfully.
+* 3: COMPLETE Terminates negotiation successfully.
+
+The format of a START message is:
+
+`| 0 | 4-byte mechanism name length | mechanism name | 4-byte payload length | payload data |`
+
+The format of a CONTINUE message is:
+
+`| 1 | 4-byte payload length | payload data |`
+
+The format of a FAIL message is:
+
+`| 2 | 4-byte message length | UTF-8 message |`
+
+The format of a COMPLETE message is:
+
+`| 3 | 4-byte payload length | payload data |`
+
+### Process
+Negotiation is initiated by a client sending a START command containing the client's chosen mechanism name and any mechanism-specific payload data.
+
+The server and client then interchange some number (possibly zero) of CONTINUE messages. Each message contains payload data that is processed by the security mechanism to generate the next message.
+
+Once either the client or server send a FAIL message then negotiation has failed. UTF-8-encoded text is included in the failure message. Once either a FAIL message has been sent or received, or any other error occurs in the negotiation, further communication on this connection must cease.
+
+Once either the client or server send a COMPLETE message then negotiation has completed successfully. Session data may now be transmitted over the connection until it is closed by either side.
+
+## Session Data
+If no SASL QOP (quality of protection) is negotiated, then all subsequent writes to/reads over this connection are written/read unmodified. In particular, messages use Avro [framing](#Message+Framing), and are of the form:
+
+`| 4-byte frame length | frame data | ... | 4 zero bytes |`
+
+If a SASL QOP is negotiated, then it must be used by the connection for all subsequent messages. This is done by wrapping each non-empty frame written using the security mechanism and unwrapping each non-empty frame read. The length written in each non-empty frame is the length of the wrapped data. Complete frames must be passed to the security mechanism for unwrapping. Unwrapped data is then passed to the application as the content of the frame.
+
+If at any point processing fails due to wrapping, unwrapping or framing errors, then all further communication on this connection must cease.
+
+## Anonymous Mechanism
+The SASL anonymous mechanism ([RFC 2245](https://www.ietf.org/rfc/rfc2222.txt)) is quite simple to implement. In particular, an initial anonymous request may be prefixed by the following static sequence:
+
+`| 0 | 0009 | ANONYMOUS | 0000 |`
+
+If a server uses the anonymous mechanism, it should check that the mechanism name in the start message prefixing the first request received is 'ANONYMOUS', then simply prefix its initial response with a COMPLETE message of:
+
+`| 3 | 0000 |`
+
+If an anonymous server recieves some other mechanism name, then it may respond with a FAIL message as simple as:
+
+`| 2 | 0000 |`
+
+Note that the anonymous mechanism need add no additional round-trip messages between client and server. The START message can be piggybacked on the initial request and the COMPLETE or FAIL message can be piggybacked on the initial response.
diff --git a/doc/content/en/docs/++version++/Specification/_index.md b/doc/content/en/docs/++version++/Specification/_index.md
new file mode 100755
index 00000000000..75eda7b7f62
--- /dev/null
+++ b/doc/content/en/docs/++version++/Specification/_index.md
@@ -0,0 +1,896 @@
+---
+title: "Specification"
+linkTitle: "Specification"
+weight: 4
+date: 2021-10-25
+aliases:
+- spec.html
+---
+
+
+
+## Introduction
+This document defines Apache Avro. It is intended to be the authoritative specification. Implementations of Avro must adhere to this document.
+
+## Schema Declaration {#schema-declaration}
+A Schema is represented in [JSON](https://www.json.org/) by one of:
+
+* A JSON string, naming a defined type.
+* A JSON object, of the form:
+```js
+{"type": "typeName", ...attributes...}
+```
+where _typeName_ is either a primitive or derived type name, as defined below. Attributes not defined in this document are permitted as metadata, but must not affect the format of serialized data.
+* A JSON array, representing a union of embedded types.
+
+## Primitive Types
+The set of primitive type names is:
+
+* _null_: no value
+* _boolean_: a binary value
+* _int_: 32-bit signed integer
+* _long_: 64-bit signed integer
+* _float_: single precision (32-bit) IEEE 754 floating-point number
+* _double_: double precision (64-bit) IEEE 754 floating-point number
+* _bytes_: sequence of 8-bit unsigned bytes
+* _string_: unicode character sequence
+
+Primitive types have no specified attributes.
+
+Primitive type names are also defined type names. Thus, for example, the schema "string" is equivalent to:
+```json
+{"type": "string"}
+```
+
+## Complex Types
+Avro supports six kinds of complex types: _records_, _enums_, _arrays_, _maps_, _unions_ and _fixed_.
+
+### Records {#schema-record}
+Records use the type name "record" and support the following attributes:
+
+* _name_: a JSON string providing the name of the record (required).
+* _namespace_, a JSON string that qualifies the name (optional);
+* _doc_: a JSON string providing documentation to the user of this schema (optional).
+* _aliases_: a JSON array of strings, providing alternate names for this record (optional).
+* _fields_: a JSON array, listing fields (required). Each field is a JSON object with the following attributes:
+ * _name_: a JSON string providing the name of the field (required), and
+ * _doc_: a JSON string describing this field for users (optional).
+ * _type_: a [schema]({{< ref "#schema-declaration" >}} "Schema declaration"), as defined above
+ * _order_: specifies how this field impacts sort ordering of this record (optional). Valid values are "ascending" (the default), "descending", or "ignore". For more details on how this is used, see the sort order section below.
+ * _aliases_: a JSON array of strings, providing alternate names for this field (optional).
+ * _default_: A default value for this field, only used when reading instances that lack the field for schema evolution purposes. The presence of a default value does not make the field optional at encoding time. Permitted values depend on the field's schema type, according to the table below. Default values for union fields correspond to the first schema that matches in the union. Default values for bytes and fixed fields are JSON strings, where Unicode code points 0-255 are mapped to unsigned 8-bit byte values 0-255. Avro encodes a field even if its value is equal to its default.
+
+*field default values*
+
+| **avro type** | **json type** | **example** |
+|---------------|----------------|-------------|
+| null | null | `null` |
+| boolean | boolean | `true` |
+| int,long | integer | `1` |
+| float,double | number | `1.1` |
+| bytes | string | `"\u00FF"` |
+| string | string | `"foo"` |
+| record | object | `{"a": 1}` |
+| enum | string | `"FOO"` |
+| array | array | `[1]` |
+| map | object | `{"a": 1}` |
+| fixed | string | `"\u00ff"` |
+
+For example, a linked-list of 64-bit values may be defined with:
+```jsonc
+{
+ "type": "record",
+ "name": "LongList",
+ "aliases": ["LinkedLongs"], // old name for this
+ "fields" : [
+ {"name": "value", "type": "long"}, // each element has a long
+ {"name": "next", "type": ["null", "LongList"]} // optional next element
+ ]
+}
+```
+
+### Enums
+Enums use the type name "enum" and support the following attributes:
+
+* _name_: a JSON string providing the name of the enum (required).
+* _namespace_, a JSON string that qualifies the name (optional);
+* _aliases_: a JSON array of strings, providing alternate names for this enum (optional).
+* _doc_: a JSON string providing documentation to the user of this schema (optional).
+* _symbols_: a JSON array, listing symbols, as JSON strings (required). All symbols in an enum must be unique; duplicates are prohibited. Every symbol must match the regular expression [A-Za-z_][A-Za-z0-9_]* (the same requirement as for [names]({{< ref "#names" >}} "Names")).
+* _default_: A default value for this enumeration, used during resolution when the reader encounters a symbol from the writer that isn't defined in the reader's schema (optional). The value provided here must be a JSON string that's a member of the symbols array. See documentation on schema resolution for how this gets used.
+
+For example, playing card suits might be defined with:
+```json
+{
+ "type": "enum",
+ "name": "Suit",
+ "symbols" : ["SPADES", "HEARTS", "DIAMONDS", "CLUBS"]
+}
+```
+
+### Arrays
+Arrays use the type name "array" and support a single attribute:
+
+* _items_: the schema of the array's items.
+
+For example, an array of strings is declared with:
+```json
+{
+ "type": "array",
+ "items" : "string",
+ "default": []
+}
+```
+
+### Maps
+Maps use the type name "map" and support one attribute:
+
+* _values_: the schema of the map's values.
+
+Map keys are assumed to be strings.
+
+For example, a map from string to long is declared with:
+```json
+{
+ "type": "map",
+ "values" : "long",
+ "default": {}
+}
+```
+
+### Unions
+Unions, as mentioned above, are represented using JSON arrays. For example, `["null", "string"]` declares a schema which may be either a null or string.
+
+(Note that when a [default value]({{< ref "#schema-record" >}} "Schema record") is specified for a record field whose type is a union, the type of the default value must match with one element of the union.
+
+Unions may not contain more than one schema with the same type, except for the named types record, fixed and enum. For example, unions containing two array types or two map types are not permitted, but two types with different names are permitted. (Names permit efficient resolution when reading and writing unions.)
+
+Unions may not immediately contain other unions.
+
+### Fixed
+Fixed uses the type name "fixed" and supports the following attributes:
+
+* _name_: a string naming this fixed (required).
+* _namespace_, a string that qualifies the name (optional);
+* _aliases_: a JSON array of strings, providing alternate names for this enum (optional).
+* _size_: an integer, specifying the number of bytes per value (required).
+
+For example, 16-byte quantity may be declared with:
+```json
+{"type": "fixed", "size": 16, "name": "md5"}
+```
+
+### Names
+Record, enums and fixed are named types. Each has a fullname that is composed of two parts: a name and a namespace, separated by a dot. Equality of names is defined on the fullname – it is an error to specify two different types with the same name.
+
+Record fields and enum symbols have names as well (but no namespace). Equality of field names and enum symbols is defined within their scope (the record/enum that defines them). It is an error to define multiple fields or enum symbols with the same name in a single type. Fields and enum symbols across scopes are never equal, so field names and enum symbols can be reused in a different type.
+
+The name portion of the fullname of named types, record field names, and enum symbols must:
+
+* start with [A-Za-z_]
+* subsequently contain only [A-Za-z0-9_]
+
+A namespace is a dot-separated sequence of such names. The empty string may also be used as a namespace to indicate the null namespace. Equality of names (including field names and enum symbols) as well as fullnames is case-sensitive.
+
+The null namespace may not be used in a dot-separated sequence of names. So the grammar for a namespace is:
+```
+ | [()*]
+```
+
+In record, enum and fixed definitions, the fullname is determined according to the algorithm below the example:
+
+```
+{
+ "type": "record",
+ "name": "Example",
+ "doc": "A simple name (attribute) and no namespace attribute: use the null namespace (\"\"); the fullname is 'Example'.",
+ "fields": [
+ {
+ "name": "inheritNull",
+ "type": {
+ "type": "enum",
+ "name": "Simple",
+ "doc": "A simple name (attribute) and no namespace attribute: inherit the null namespace of the enclosing type 'Example'. The fullname is 'Simple'.",
+ "symbols": ["a", "b"]
+ }
+ }, {
+ "name": "explicitNamespace",
+ "type": {
+ "type": "fixed",
+ "name": "Simple",
+ "namespace": "explicit",
+ "doc": "A simple name (attribute) and a namespace (attribute); the fullname is 'explicit.Simple' (this is a different type than of the 'inheritNull' field).",
+ "size": 12
+ }
+ }, {
+ "name": "fullName",
+ "type": {
+ "type": "record",
+ "name": "a.full.Name",
+ "namespace": "ignored",
+ "doc": "A name attribute with a fullname, so the namespace attribute is ignored. The fullname is 'a.full.Name', and the namespace is 'a.full'.",
+ "fields": [
+ {
+ "name": "inheritNamespace",
+ "type": {
+ "type": "enum",
+ "name": "Understanding",
+ "doc": "A simple name (attribute) and no namespace attribute: inherit the namespace of the enclosing type 'a.full.Name'. The fullname is 'a.full.Understanding'.",
+ "symbols": ["d", "e"]
+ }
+ }
+ ]
+ }
+ }
+ ]
+}
+```
+
+The fullname of a record, enum or fixed definition is determined by the required `name` and optional `namespace` attributes like this:
+
+* A fullname is specified. If the name specified contains a dot, then it is assumed to be a fullname, and any namespace also specified is ignored. For example, use "name": "org.foo.X" to indicate the fullname org.foo.X.
+* A simple name (a name that contains no dots) and namespace are both specified. For example, one might use "name": "X", "namespace": "org.foo" to indicate the fullname org.foo.X.
+* A simple name only is specified (a name that contains no dots). In this case the namespace is taken from the most tightly enclosing named schema or protocol, and the fullname is constructed from that namespace and the name. For example, if "name": "X" is specified, and this occurs within a field of the record definition of org.foo.Y, then the fullname is org.foo.X. This also happens if there is no enclosing namespace (i.e., the enclosing schema definition has the null namespace).
+
+References to previously defined names are as in the latter two cases above: if they contain a dot they are a fullname, if they do not contain a dot, the namespace is the namespace of the enclosing definition.
+
+Primitive type names (`null`, `boolean`, `int`, `long`, `float`, `double`, `bytes`, `string`) have no namespace and their names may not be defined in any namespace.
+
+Complex types (`record`, `enum`, `array`, `map`, `fixed`) have no namespace, but their names (as well as `union`) are permitted to be reused as type names. This can be confusing to the human reader, but is always unambiguous for binary serialization. Due to the limitations of JSON encoding, it is a best practice to use a namespace when using these names.
+
+A schema or protocol may not contain multiple definitions of a fullname. Further, a name must be defined before it is used ("before" in the depth-first, left-to-right traversal of the JSON parse tree, where the types attribute of a protocol is always deemed to come "before" the messages attribute.)
+
+### Aliases
+Named types and fields may have aliases. An implementation may optionally use aliases to map a writer's schema to the reader's. This facilitates both schema evolution as well as processing disparate datasets.
+
+Aliases function by re-writing the writer's schema using aliases from the reader's schema. For example, if the writer's schema was named "Foo" and the reader's schema is named "Bar" and has an alias of "Foo", then the implementation would act as though "Foo" were named "Bar" when reading. Similarly, if data was written as a record with a field named "x" and is read as a record with a field named "y" with alias "x", then the implementation would act as though "x" were named "y" when reading.
+
+A type alias may be specified either as a fully namespace-qualified, or relative to the namespace of the name it is an alias for. For example, if a type named "a.b" has aliases of "c" and "x.y", then the fully qualified names of its aliases are "a.c" and "x.y".
+
+Aliases are alternative names, and thus subject to the same uniqueness constraints as names. Aliases should be valid names, but this is not required: any string is accepted as an alias. When aliases are used "to map a writer's schema to the reader's" (see above), this allows schema evolution to correct illegal names in old schemata.
+
+## Fixing an invalid, but previously accepted, schema
+Over time, rules and validations on schemas have changed. It is therefore possible that a schema used to work with an older version of Avro, but now fails to parse.
+
+This can have several reasons, as listed below. Each reason also describes a fix, which can be applied using [schema resolution]({{< ref "#schema-resolution" >}}): you fix the problems in the schema in a way that is compatible, and then you can use the new schema to read the old data.
+
+### Invalid names
+Invalid names of types and fields can be corrected by renaming (using an [alias]({{< ref "#aliases" >}})). This works for simple names, namespaces and fullnames.
+
+This fix is twofold: first, you add the invalid name as an alias to the type/field. Then, you change the name to any valid name.
+
+### Invalid defaults
+Default values are only used to fill in missing data when reading. Invalid defaults create invalid values in these cases. The fix is to correct the default values.
+
+
+## Data Serialization and Deserialization
+Binary encoded Avro data does not include type information or field names. The benefit is that the serialized data is small, but as a result a schema must always be used in order to read Avro data correctly. The best way to ensure that the schema is structurally identical to the one used to write the data is to use the exact same schema.
+
+Therefore, files or systems that store Avro data should always include the writer's schema for that data. Avro-based remote procedure call (RPC) systems must also guarantee that remote recipients of data have a copy of the schema used to write that data. In general, it is advisable that any reader of Avro data should use a schema that is the same (as defined more fully in [Parsing Canonical Form for Schemas]({{< ref "#parsing-canonical-form-for-schemas" >}} "Parsing Canonical Form for Schemas")) as the schema that was used to write the data in order to deserialize it correctly. Deserializing data into a newer schema is accomplished by specifying an additional schema, the results of which are described in [Schema Resolution]({{< ref "#schema-resolution" >}}).
+
+In general, both serialization and deserialization proceed as a depth-first, left-to-right traversal of the schema, serializing or deserializing primitive types as they are encountered. Therefore, it is possible, though not advisable, to read Avro data with a schema that does not have the same Parsing Canonical Form as the schema with which the data was written. In order for this to work, the serialized primitive values must be compatible, in order value by value, with the items in the deserialization schema. For example, int and long are always serialized the same way, so an int could be deserialized as a long. Since the compatibility of two schemas depends on both the data and the serialization format (eg. binary is more permissive than JSON because JSON includes field names, eg. a long that is too large will overflow an int), it is simpler and more reliable to use schemas with identical Parsing Canonical Form.
+
+### Encodings
+Avro specifies two serialization encodings: binary and JSON. Most applications will use the binary encoding, as it is smaller and faster. But, for debugging and web-based applications, the JSON encoding may sometimes be appropriate.
+
+### Binary Encoding {#binary-encoding}
+Binary encoding does not include field names, self-contained information about the types of individual bytes, nor field or record separators. Therefore readers are wholly reliant on the schema used when the data was encoded.
+
+#### Primitive Types
+Primitive types are encoded in binary as follows:
+
+* _null_ is written as zero bytes.
+* a _boolean_ is written as a single byte whose value is either 0 (false) or 1 (true).
+* _int_ and _long_ values are written using [variable-length](https://lucene.apache.org/java/3_5_0/fileformats.html#VInt) [zig-zag](https://code.google.com/apis/protocolbuffers/docs/encoding.html#types) coding. Some examples:
+
+| *value* | *hex* |
+|---|---|
+| 0 | 00 |
+|-1 | 01 |
+| 1 | 02 |
+|-2 | 03 |
+| 2 | 04 |
+|...|...|
+|-64 | 7f |
+|64 | 80 01|
+|...|...|
+
+* a _float_ is written as 4 bytes. The float is converted into a 32-bit integer using a method equivalent to Java's [floatToRawIntBits](https://docs.oracle.com/javase/8/docs/api/java/lang/Float.html#floatToRawIntBits-float-) and then encoded in little-endian format.
+* a _double_ is written as 8 bytes. The double is converted into a 64-bit integer using a method equivalent to Java's [doubleToRawLongBits](https://docs.oracle.com/javase/8/docs/api/java/lang/Double.html#doubleToRawLongBits-double-) and then encoded in little-endian format.
+* _bytes_ are encoded as a long followed by that many bytes of data.
+* a _string_ is encoded as a long followed by that many bytes of UTF-8 encoded character data.
+For example, the three-character string "foo" would be encoded as the long value 3 (encoded as hex 06) followed by the UTF-8 encoding of 'f', 'o', and 'o' (the hex bytes 66 6f 6f):
+```
+06 66 6f 6f
+```
+
+### Complex Types
+Complex types are encoded in binary as follows:
+
+#### Records
+A record is encoded by encoding the values of its fields in the order that they are declared. In other words, a record is encoded as just the concatenation of the encodings of its fields. Field values are encoded per their schema.
+
+For example, the record schema
+```json
+{
+ "type": "record",
+ "name": "test",
+ "fields" : [
+ {"name": "a", "type": "long"},
+ {"name": "b", "type": "string"}
+ ]
+}
+```
+
+An instance of this record whose a field has value 27 (encoded as hex 36) and whose b field has value "foo" (encoded as hex bytes 06 66 6f 6f), would be encoded simply as the concatenation of these, namely the hex byte sequence:
+```
+36 06 66 6f 6f
+```
+
+#### Enums
+An enum is encoded by a int, representing the zero-based position of the symbol in the schema.
+
+For example, consider the enum:
+```json
+{"type": "enum", "name": "Foo", "symbols": ["A", "B", "C", "D"] }
+```
+
+This would be encoded by an int between zero and three, with zero indicating "A", and 3 indicating "D".
+
+#### Arrays
+Arrays are encoded as a series of blocks. Each block consists of a long count value, followed by that many array items. A block with count zero indicates the end of the array. Each item is encoded per the array's item schema.
+
+If a block's count is negative, its absolute value is used, and the count is followed immediately by a long block size indicating the number of bytes in the block. This block size permits fast skipping through data, e.g., when projecting a record to a subset of its fields.
+
+For example, the array schema
+```json
+{"type": "array", "items": "long"}
+```
+an array containing the items 3 and 27 could be encoded as the long value 2 (encoded as hex 04) followed by long values 3 and 27 (encoded as hex 06 36) terminated by zero:
+```
+04 06 36 00
+```
+
+The blocked representation permits one to read and write arrays larger than can be buffered in memory, since one can start writing items without knowing the full length of the array.
+
+#### Maps {#schema-maps}
+Maps are encoded as a series of _blocks_. Each block consists of a `long` _count_ value, followed by that many key/value pairs. A block with count zero indicates the end of the map. Each item is encoded per the map's value schema.
+
+If a block's count is negative, its absolute value is used, and the count is followed immediately by a `long` block size indicating the number of bytes in the block. This block size permits fast skipping through data, e.g., when projecting a record to a subset of its fields.
+
+The blocked representation permits one to read and write maps larger than can be buffered in memory, since one can start writing items without knowing the full length of the map.
+
+#### Unions
+A union is encoded by first writing an `int` value indicating the zero-based position within the union of the schema of its value. The value is then encoded per the indicated schema within the union.
+
+For example, the union schema `["null","string"]` would encode:
+
+* _null_ as zero (the index of "null" in the union):
+`00`
+* the string "a" as one (the index of "string" in the union, 1, encoded as hex 02), followed by the serialized string:
+`02 02 61`
+NOTE: Currently for C/C++ implementations, the positions are practically an int, but theoretically a long. In reality, we don't expect unions with 215M members
+
+#### Fixed
+Fixed instances are encoded using the number of bytes declared in the schema.
+
+### JSON Encoding
+Except for unions, the JSON encoding is the same as is used to encode [field default values]({{< ref "#schema-record" >}}).
+
+The value of a union is encoded in JSON as follows:
+
+* if its type is _null_, then it is encoded as a JSON _null_;
+* otherwise it is encoded as a JSON object with one name/value pair whose name is the type's name and whose value is the recursively encoded value. For Avro's named types (record, fixed or enum) the user-specified name is used, for other types the type name is used.
+
+For example, the union schema `["null","string","Foo"]`, where Foo is a record name, would encode:
+
+* _null_ as _null_;
+* the string "a" as `{"string": "a"}` and
+* a Foo instance as `{"Foo": {...}}`, where `{...}` indicates the JSON encoding of a Foo instance.
+
+Note that the original schema is still required to correctly process JSON-encoded data. For example, the JSON encoding does not distinguish between _int_ and _long_, _float_ and _double_, records and maps, enums and strings, etc.
+
+### Single-object encoding
+In some situations a single Avro serialized object is to be stored for a longer period of time. One very common example is storing Avro records for several weeks in an [Apache Kafka](https://kafka.apache.org/) topic.
+
+In the period after a schema change this persistence system will contain records that have been written with different schemas. So the need arises to know which schema was used to write a record to support schema evolution correctly. In most cases the schema itself is too large to include in the message, so this binary wrapper format supports the use case more effectively.
+
+#### Single object encoding specification
+Single Avro objects are encoded as follows:
+
+1. A two-byte marker, `C3 01`, to show that the message is Avro and uses this single-record format (version 1).
+1. The 8-byte little-endian CRC-64-AVRO [fingerprint]({{< ref "#schema-fingerprints" >}} "Schema fingerprints") of the object's schema.
+1. The Avro object encoded using [Avro's binary encoding]({{< ref "#binary-encoding" >}}).
+
+Implementations use the 2-byte marker to determine whether a payload is Avro. This check helps avoid expensive lookups that resolve the schema from a fingerprint, when the message is not an encoded Avro payload.
+
+## Sort Order
+Avro defines a standard sort order for data. This permits data written by one system to be efficiently sorted by another system. This can be an important optimization, as sort order comparisons are sometimes the most frequent per-object operation. Note also that Avro binary-encoded data can be efficiently ordered without deserializing it to objects.
+
+Data items may only be compared if they have identical schemas. Pairwise comparisons are implemented recursively with a depth-first, left-to-right traversal of the schema. The first mismatch encountered determines the order of the items.
+
+Two items with the same schema are compared according to the following rules.
+
+* _null_ data is always equal.
+* _boolean_ data is ordered with false before true.
+* _int_, _long_, _float_ and _double_ data is ordered by ascending numeric value.
+* _bytes_ and fixed data are compared lexicographically by unsigned 8-bit values.
+* _string_ data is compared lexicographically by Unicode code point. Note that since UTF-8 is used as the binary encoding for strings, sorting of bytes and string binary data is identical.
+* _array_ data is compared lexicographically by element.
+* _enum_ data is ordered by the symbol's position in the enum schema. For example, an enum whose symbols are `["z", "a"]` would sort "z" values before "a" values.
+* _union_ data is first ordered by the branch within the union, and, within that, by the type of the branch. For example, an `["int", "string"]` union would order all int values before all string values, with the ints and strings themselves ordered as defined above.
+* _record_ data is ordered lexicographically by field. If a field specifies that its order is:
+ * "ascending", then the order of its values is unaltered.
+ * "descending", then the order of its values is reversed.
+ * "ignore", then its values are ignored when sorting.
+* _map_ data may not be compared. It is an error to attempt to compare data containing maps unless those maps are in an `"order":"ignore"` record field.
+
+## Object Container Files
+Avro includes a simple object container file format. A file has a schema, and all objects stored in the file must be written according to that schema, using binary encoding. Objects are stored in blocks that may be compressed. Syncronization markers are used between blocks to permit efficient splitting of files for MapReduce processing.
+
+Files may include arbitrary user-specified metadata.
+
+A file consists of:
+
+* A file header, followed by
+* one or more file data blocks.
+
+A file header consists of:
+
+* Four bytes, ASCII 'O', 'b', 'j', followed by 1.
+* file metadata, including the schema.
+* The 16-byte, randomly-generated sync marker for this file.
+
+File metadata is written as if defined by the following [map]({{< ref "#schema-maps" >}}) schema:
+```json
+{"type": "map", "values": "bytes"}
+```
+All metadata properties that start with "avro." are reserved. The following file metadata properties are currently used:
+
+* **avro.schema** contains the schema of objects stored in the file, as JSON data (required).
+* **avro.codec** the name of the compression codec used to compress blocks, as a string. Implementations are required to support the following codecs: "null" and "deflate". If codec is absent, it is assumed to be "null". The codecs are described with more detail below.
+
+A file header is thus described by the following schema:
+```json
+{"type": "record", "name": "org.apache.avro.file.Header",
+ "fields" : [
+ {"name": "magic", "type": {"type": "fixed", "name": "Magic", "size": 4}},
+ {"name": "meta", "type": {"type": "map", "values": "bytes"}},
+ {"name": "sync", "type": {"type": "fixed", "name": "Sync", "size": 16}}
+ ]
+}
+```
+
+A file data block consists of:
+
+* A long indicating the count of objects in this block.
+* A long indicating the size in bytes of the serialized objects in the current block, after any codec is applied
+* The serialized objects. If a codec is specified, this is compressed by that codec.
+* The file's 16-byte sync marker.
+
+A file data block is thus described by the following schema:
+```json
+{"type": "record", "name": "org.apache.avro.file.DataBlock",
+ "fields" : [
+ {"name": "count", "type": "long"},
+ {"name": "data", "type": "bytes"},
+ {"name": "sync", "type": {"type": "fixed", "name": "Sync", "size": 16}}
+ ]
+}
+```
+
+Each block's binary data can be efficiently extracted or skipped without deserializing the contents. The combination of block size, object counts, and sync markers enable detection of corrupt blocks and help ensure data integrity.
+
+### Required Codecs
+
+_null_
+
+The "null" codec simply passes through data uncompressed.
+
+_deflate_
+
+The "deflate" codec writes the data block using the deflate algorithm as specified in [RFC 1951](https://www.isi.edu/in-notes/rfc1951.txt), and typically implemented using the zlib library. Note that this format (unlike the "zlib format" in RFC 1950) does not have a checksum.
+
+### Optional Codecs
+_bzip2_
+
+The "bzip2" codec uses the [bzip2](https://sourceware.org/bzip2/) compression library.
+
+_snappy_
+
+The "snappy" codec uses Google's [Snappy](https://code.google.com/p/snappy/) compression library. Each compressed block is followed by the 4-byte, big-endian CRC32 checksum of the uncompressed data in the block.
+
+_xz_
+
+The "xz" codec uses the [XZ](https://tukaani.org/xz/) compression library.
+
+_zstandard_
+
+The "zstandard" codec uses Facebook's [Zstandard](https://facebook.github.io/zstd/) compression library.
+
+### Protocol Declaration
+Avro protocols describe RPC interfaces. Like schemas, they are defined with JSON text.
+
+A protocol is a JSON object with the following attributes:
+
+* _protocol_, a string, the name of the protocol (required);
+* _namespace_, an optional string that qualifies the name (optional);
+* _doc_, an optional string describing this protocol;
+* _types_, an optional list of definitions of named types (records, enums, fixed and errors). An error definition is just like a record definition except it uses "error" instead of "record". Note that forward references to named types are not permitted.
+* _messages_, an optional JSON object whose keys are message names and whose values are objects whose attributes are described below. No two messages may have the same name.
+
+The name and namespace qualification rules defined for schema objects apply to protocols as well.
+
+### Messages
+A message has attributes:
+
+* a _doc_, an optional description of the message,
+* a _request_, a list of named, typed parameter schemas (this has the same form as the fields of a record declaration);
+* a _response_ schema;
+* an optional union of declared error schemas. The effective union has "string" prepended to the declared union, to permit transmission of undeclared "system" errors. For example, if the declared error union is `["AccessError"]`, then the effective union is `["string", "AccessError"]`. When no errors are declared, the effective error union is `["string"]`. Errors are serialized using the effective union; however, a protocol's JSON declaration contains only the declared union.
+* an optional one-way boolean parameter.
+
+A request parameter list is processed equivalently to an anonymous record. Since record field lists may vary between reader and writer, request parameters may also differ between the caller and responder, and such differences are resolved in the same manner as record field differences.
+
+The one-way parameter may only be true when the response type is `"null"` and no errors are listed.
+
+### Sample Protocol
+For example, one may define a simple HelloWorld protocol with:
+```json
+{
+ "namespace": "com.acme",
+ "protocol": "HelloWorld",
+ "doc": "Protocol Greetings",
+
+ "types": [
+ {"name": "Greeting", "type": "record", "fields": [
+ {"name": "message", "type": "string"}]},
+ {"name": "Curse", "type": "error", "fields": [
+ {"name": "message", "type": "string"}]}
+ ],
+
+ "messages": {
+ "hello": {
+ "doc": "Say hello.",
+ "request": [{"name": "greeting", "type": "Greeting" }],
+ "response": "Greeting",
+ "errors": ["Curse"]
+ }
+ }
+}
+```
+
+## Protocol Wire Format
+
+### Message Transport
+Messages may be transmitted via different transport mechanisms.
+
+To the transport, a _message_ is an opaque byte sequence.
+
+A transport is a system that supports:
+
+* **transmission of request messages**
+* **receipt of corresponding response messages**
+Servers may send a response message back to the client corresponding to a request message. The mechanism of correspondence is transport-specific. For example, in HTTP it is implicit, since HTTP directly supports requests and responses. But a transport that multiplexes many client threads over a single socket would need to tag messages with unique identifiers.
+
+Transports may be either stateless or stateful. In a stateless transport, messaging assumes no established connection state, while stateful transports establish connections that may be used for multiple messages. This distinction is discussed further in the [handshake](#handshake) section below.
+
+#### HTTP as Transport
+When [HTTP](https://www.w3.org/Protocols/rfc2616/rfc2616.html) is used as a transport, each Avro message exchange is an HTTP request/response pair. All messages of an Avro protocol should share a single URL at an HTTP server. Other protocols may also use that URL. Both normal and error Avro response messages should use the 200 (OK) response code. The chunked encoding may be used for requests and responses, but, regardless the Avro request and response are the entire content of an HTTP request and response. The HTTP Content-Type of requests and responses should be specified as "avro/binary". Requests should be made using the POST method.
+
+HTTP is used by Avro as a stateless transport.
+
+### Message Framing
+Avro messages are _framed_ as a list of buffers.
+
+Framing is a layer between messages and the transport. It exists to optimize certain operations.
+
+The format of framed message data is:
+
+* a series of buffers, where each buffer consists of:
+ * a four-byte, big-endian _buffer length_, followed by
+ * that many bytes of _buffer_ data.
+* a message is always terminated by a zero-length buffer.
+
+Framing is transparent to request and response message formats (described below). Any message may be presented as a single or multiple buffers.
+
+Framing can permit readers to more efficiently get different buffers from different sources and for writers to more efficiently store different buffers to different destinations. In particular, it can reduce the number of times large binary objects are copied. For example, if an RPC parameter consists of a megabyte of file data, that data can be copied directly to a socket from a file descriptor, and, on the other end, it could be written directly to a file descriptor, never entering user space.
+
+A simple, recommended, framing policy is for writers to create a new segment whenever a single binary object is written that is larger than a normal output buffer. Small objects are then appended in buffers, while larger objects are written as their own buffers. When a reader then tries to read a large object the runtime can hand it an entire buffer directly, without having to copy it.
+
+### Handshake
+The purpose of the handshake is to ensure that the client and the server have each other's protocol definition, so that the client can correctly deserialize responses, and the server can correctly deserialize requests. Both clients and servers should maintain a cache of recently seen protocols, so that, in most cases, a handshake will be completed without extra round-trip network exchanges or the transmission of full protocol text.
+
+RPC requests and responses may not be processed until a handshake has been completed. With a stateless transport, all requests and responses are prefixed by handshakes. With a stateful transport, handshakes are only attached to requests and responses until a successful handshake response has been returned over a connection. After this, request and response payloads are sent without handshakes for the lifetime of that connection.
+
+The handshake process uses the following record schemas:
+```json
+{
+ "type": "record",
+ "name": "HandshakeRequest", "namespace":"org.apache.avro.ipc",
+ "fields": [
+ {"name": "clientHash",
+ "type": {"type": "fixed", "name": "MD5", "size": 16}},
+ {"name": "clientProtocol", "type": ["null", "string"]},
+ {"name": "serverHash", "type": "MD5"},
+ {"name": "meta", "type": ["null", {"type": "map", "values": "bytes"}]}
+ ]
+}
+{
+ "type": "record",
+ "name": "HandshakeResponse", "namespace": "org.apache.avro.ipc",
+ "fields": [
+ {"name": "match",
+ "type": {"type": "enum", "name": "HandshakeMatch",
+ "symbols": ["BOTH", "CLIENT", "NONE"]}},
+ {"name": "serverProtocol",
+ "type": ["null", "string"]},
+ {"name": "serverHash",
+ "type": ["null", {"type": "fixed", "name": "MD5", "size": 16}]},
+ {"name": "meta",
+ "type": ["null", {"type": "map", "values": "bytes"}]}
+ ]
+}
+```
+
+* A client first prefixes each request with a `HandshakeRequest` containing just the hash of its protocol and of the server's protocol (`clientHash!=null, clientProtocol=null, serverHash!=null`), where the hashes are 128-bit MD5 hashes of the JSON protocol text. If a client has never connected to a given server, it sends its hash as a guess of the server's hash, otherwise it sends the hash that it previously obtained from this server.
+The server responds with a HandshakeResponse containing one of:
+ * `match=BOTH, serverProtocol=null, serverHash=null` if the client sent the valid hash of the server's protocol and the server knows what protocol corresponds to the client's hash. In this case, the request is complete and the response data immediately follows the HandshakeResponse.
+ * `match=CLIENT, serverProtocol!=null, serverHash!=null` if the server has previously seen the client's protocol, but the client sent an incorrect hash of the server's protocol. The request is complete and the response data immediately follows the HandshakeResponse. The client must use the returned protocol to process the response and should also cache that protocol and its hash for future interactions with this server.
+ * `match=NONE` if the server has not previously seen the client's protocol. The serverHash and serverProtocol may also be non-null if the server's protocol hash was incorrect.
+In this case the client must then re-submit its request with its protocol text (`clientHash!=null, clientProtocol!=null, serverHash!=null`) and the server should respond with a successful match (match=BOTH, serverProtocol=null, serverHash=null) as above.
+
+The meta field is reserved for future handshake enhancements.
+
+### Call Format
+A _call_ consists of a request message paired with its resulting response or error message. Requests and responses contain extensible metadata, and both kinds of messages are framed as described above.
+
+The format of a call request is:
+
+* _request metadata_, a map with values of type bytes
+* the _message name_, an Avro string, followed by
+* the _message parameters_. Parameters are serialized according to the message's request declaration.
+When the empty string is used as a message name a server should ignore the parameters and return an empty response. A client may use this to ping a server or to perform a handshake without sending a protocol message.
+
+When a message is declared one-way and a stateful connection has been established by a successful handshake response, no response data is sent. Otherwise the format of the call response is:
+
+* _response metadata_, a map with values of type bytes
+* a one-byte error _flag_ boolean, followed by either:
+ * if the error flag is false, the message _response_, serialized per the message's response schema.
+ * if the error flag is true, the _error_, serialized per the message's effective error union schema.
+
+### Schema Resolution {#schema-resolution}
+A reader of Avro data, whether from an RPC or a file, can always parse that data because the original schema must be provided along with the data. However, the reader may be programmed to read data into a different schema. For example, if the data was written with a different version of the software than it is read, then fields may have been added or removed from records. This section specifies how such schema differences should be resolved.
+
+We refer to the schema used to write the data as the writer's schema, and the schema that the application expects the reader's schema. Differences between these should be resolved as follows:
+
+* It is an error if the two schemas do not _match_.
+To match, one of the following must hold:
+ * both schemas are arrays whose item types match
+ * both schemas are maps whose value types match
+ * both schemas are enums whose (unqualified) names match
+ * both schemas are fixed whose sizes and (unqualified) names match
+ * both schemas are records with the same (unqualified) name
+ * either schema is a union
+ * both schemas have same primitive type
+ * the writer's schema may be promoted to the reader's as follows:
+ * int is promotable to long, float, or double
+ * long is promotable to float or double
+ * float is promotable to double
+ * string is promotable to bytes
+ * bytes is promotable to string
+* **if both are records**:
+ * the ordering of fields may be different: fields are matched by name.
+ * schemas for fields with the same name in both records are resolved recursively.
+ * if the writer's record contains a field with a name not present in the reader's record, the writer's value for that field is ignored.
+ * if the reader's record schema has a field that contains a default value, and writer's schema does not have a field with the same name, then the reader should use the default value from its field.
+ * if the reader's record schema has a field with no default value, and writer's schema does not have a field with the same name, an error is signalled.
+* **if both are enums**:
+if the writer's symbol is not present in the reader's enum and the reader has a default value, then that value is used, otherwise an error is signalled.
+
+* **if both are arrays**:
+This resolution algorithm is applied recursively to the reader's and writer's array item schemas.
+
+* **if both are maps**:
+This resolution algorithm is applied recursively to the reader's and writer's value schemas.
+
+* **if both are unions**:
+The first schema in the reader's union that matches the selected writer's union schema is recursively resolved against it. if none match, an error is signalled.
+
+* **if reader's is a union, but writer's is not**
+The first schema in the reader's union that matches the writer's schema is recursively resolved against it. If none match, an error is signalled.
+
+* **if writer's is a union, but reader's is not**
+If the reader's schema matches the selected writer's schema, it is recursively resolved against it. If they do not match, an error is signalled.
+
+A schema's _doc_ fields are ignored for the purposes of schema resolution. Hence, the _doc_ portion of a schema may be dropped at serialization.
+
+### Parsing Canonical Form for Schemas {#parsing-canonical-form-for-schemas}
+One of the defining characteristics of Avro is that a reader must use the schema used by the writer of the data in order to know how to read the data. This assumption results in a data format that's compact and also amenable to many forms of schema evolution. However, the specification so far has not defined what it means for the reader to have the "same" schema as the writer. Does the schema need to be textually identical? Well, clearly adding or removing some whitespace to a JSON expression does not change its meaning. At the same time, reordering the fields of records clearly does change the meaning. So what does it mean for a reader to have "the same" schema as a writer?
+
+Parsing Canonical Form is a transformation of a writer's schema that let's us define what it means for two schemas to be "the same" for the purpose of reading data written against the schema. It is called Parsing Canonical Form because the transformations strip away parts of the schema, like "doc" attributes, that are irrelevant to readers trying to parse incoming data. It is called Canonical Form because the transformations normalize the JSON text (such as the order of attributes) in a way that eliminates unimportant differences between schemas. If the Parsing Canonical Forms of two different schemas are textually equal, then those schemas are "the same" as far as any reader is concerned, i.e., there is no serialized data that would allow a reader to distinguish data generated by a writer using one of the original schemas from data generated by a writing using the other original schema. (We sketch a proof of this property in a companion document.)
+
+The next subsection specifies the transformations that define Parsing Canonical Form. But with a well-defined canonical form, it can be convenient to go one step further, transforming these canonical forms into simple integers ("fingerprints") that can be used to uniquely identify schemas. The subsection after next recommends some standard practices for generating such fingerprints.
+
+#### Transforming into Parsing Canonical Form
+Assuming an input schema (in JSON form) that's already UTF-8 text for a _valid_ Avro schema (including all quotes as required by JSON), the following transformations will produce its Parsing Canonical Form:
+
+* [PRIMITIVES] Convert primitive schemas to their simple form (e.g., int instead of `{"type":"int"}`).
+* [FULLNAMES] Replace short names with fullnames, using applicable namespaces to do so. Then eliminate namespace attributes, which are now redundant.
+* [STRIP] Keep only attributes that are relevant to parsing data, which are: _type_, _name_, _fields_, _symbols_, _items_, _values_, _size_. Strip all others (e.g., _doc_ and _aliases_).
+* [ORDER] Order the appearance of fields of JSON objects as follows: _name_, _type_, _fields_, _symbols_, _items_, _values_, _size_. For example, if an object has _type_, _name_, and _size_ fields, then the _name_ field should appear first, followed by the _type_ and then the _size_ fields.
+* [STRINGS] For all JSON string literals in the schema text, replace any escaped characters (e.g., \uXXXX escapes) with their UTF-8 equivalents.
+* [INTEGERS] Eliminate quotes around and any leading zeros in front of JSON integer literals (which appear in the _size_ attributes of _fixed_ schemas).
+* [WHITESPACE] Eliminate all whitespace in JSON outside of string literals.
+
+#### Schema Fingerprints {#schema-fingerprints}
+"[A] fingerprinting algorithm is a procedure that maps an arbitrarily large data item (such as a computer file) to a much shorter bit string, its fingerprint, that uniquely identifies the original data for all practical purposes" (quoted from [Wikipedia](https://en.wikipedia.org/wiki/Fingerprint_(computing))). In the Avro context, fingerprints of Parsing Canonical Form can be useful in a number of applications; for example, to cache encoder and decoder objects, to tag data items with a short substitute for the writer's full schema, and to quickly negotiate common-case schemas between readers and writers.
+
+In designing fingerprinting algorithms, there is a fundamental trade-off between the length of the fingerprint and the probability of collisions. To help application designers find appropriate points within this trade-off space, while encouraging interoperability and ease of implementation, we recommend using one of the following three algorithms when fingerprinting Avro schemas:
+
+* When applications can tolerate longer fingerprints, we recommend using the [SHA-256 digest algorithm](https://en.wikipedia.org/wiki/SHA-2) to generate 256-bit fingerprints of Parsing Canonical Forms. Most languages today have SHA-256 implementations in their libraries.
+* At the opposite extreme, the smallest fingerprint we recommend is a 64-bit [Rabin fingerprint](https://en.wikipedia.org/wiki/Rabin_fingerprint). Below, we provide pseudo-code for this algorithm that can be easily translated into any programming language. 64-bit fingerprints should guarantee uniqueness for schema caches of up to a million entries (for such a cache, the chance of a collision is 3E-8). We don't recommend shorter fingerprints, as the chances of collisions is too great (for example, with 32-bit fingerprints, a cache with as few as 100,000 schemas has a 50% chance of having a collision).
+* Between these two extremes, we recommend using the [MD5 message digest](https://en.wikipedia.org/wiki/MD5) to generate 128-bit fingerprints. These make sense only where very large numbers of schemas are being manipulated (tens of millions); otherwise, 64-bit fingerprints should be sufficient. As with SHA-256, MD5 implementations are found in most libraries today.
+
+These fingerprints are not meant to provide any security guarantees, even the longer SHA-256-based ones. Most Avro applications should be surrounded by security measures that prevent attackers from writing random data and otherwise interfering with the consumers of schemas. We recommend that these surrounding mechanisms be used to prevent collision and pre-image attacks (i.e., "forgery") on schema fingerprints, rather than relying on the security properties of the fingerprints themselves.
+
+Rabin fingerprints are [cyclic redundancy checks](https://en.wikipedia.org/wiki/Cyclic_redundancy_check) computed using irreducible polynomials. In the style of the Appendix of [RFC 1952](https://www.ietf.org/rfc/rfc1952.txt) (pg 10), which defines the CRC-32 algorithm, here's our definition of the 64-bit AVRO fingerprinting algorithm:
+```java
+long fingerprint64(byte[] buf) {
+ if (FP_TABLE == null) initFPTable();
+ long fp = EMPTY;
+ for (int i = 0; i < buf.length; i++)
+ fp = (fp >>> 8) ^ FP_TABLE[(int)(fp ^ buf[i]) & 0xff];
+ return fp;
+}
+
+static long EMPTY = 0xc15d213aa4d7a795L;
+static long[] FP_TABLE = null;
+
+void initFPTable() {
+ FP_TABLE = new long[256];
+ for (int i = 0; i < 256; i++) {
+ long fp = i;
+ for (int j = 0; j < 8; j++)
+ fp = (fp >>> 1) ^ (EMPTY & -(fp & 1L));
+ FP_TABLE[i] = fp;
+ }
+}
+```
+
+Readers interested in the mathematics behind this algorithm may want to read [Chapter 14 of the Second Edition of Hacker's Delight](https://books.google.com/books?id=XD9iAwAAQBAJ&pg=PA319). (Unlike RFC-1952 and the book chapter, we prepend a single one bit to messages. We do this because CRCs ignore leading zero bits, which can be problematic. Our code prepends a one-bit by initializing fingerprints using EMPTY, rather than initializing using zero as in RFC-1952 and the book chapter.)
+
+## Logical Types
+A logical type is an Avro primitive or complex type with extra attributes to represent a derived type. The attribute `logicalType` must always be present for a logical type, and is a string with the name of one of the logical types listed later in this section. Other attributes may be defined for particular logical types.
+
+A logical type is always serialized using its underlying Avro type so that values are encoded in exactly the same way as the equivalent Avro type that does not have a `logicalType` attribute. Language implementations may choose to represent logical types with an appropriate native type, although this is not required.
+
+Language implementations must ignore unknown logical types when reading, and should use the underlying Avro type. If a logical type is invalid, for example a decimal with scale greater than its precision, then implementations should ignore the logical type and use the underlying Avro type.
+
+### Decimal
+The `decimal` logical type represents an arbitrary-precision signed decimal number of the form _unscaled à 10-scale _.
+
+A `decimal` logical type annotates Avro _bytes_ or _fixed_ types. The byte array must contain the two's-complement representation of the unscaled integer value in big-endian byte order. The scale is fixed, and is specified using an attribute.
+
+The following attributes are supported:
+
+* _scale_, a JSON integer representing the scale (optional). If not specified the scale is 0.
+* _precision_, a JSON integer representing the (maximum) precision of decimals stored in this type (required).
+For example, the following schema represents decimal numbers with a maximum precision of 4 and a scale of 2:
+```json
+{
+ "type": "bytes",
+ "logicalType": "decimal",
+ "precision": 4,
+ "scale": 2
+}
+```
+Precision must be a positive integer greater than zero. If the underlying type is a _fixed_, then the precision is limited by its size. An array of length n can store at most _floor(log10 (28 Ã n - 1 - 1))_ base-10 digits of precision.
+
+Scale must be zero or a positive integer less than or equal to the precision.
+
+For the purposes of schema resolution, two schemas that are `decimal` logical types _match_ if their scales and precisions match.
+
+**alternative**
+
+As it's not always possible to fix scale and precision in advance for a decimal field, `big-decimal` is another `decimal` logical type restrict to Avro _bytes_.
+
+_Currently only available in Java and Rust_.
+
+```json
+{
+ "type": "bytes",
+ "logicalType": "big-decimal"
+}
+```
+Here, as scale property is stored in value itself it needs more bytes than preceding `decimal` type, but it allows more flexibility.
+
+### UUID
+
+The `uuid` logical type represents a random generated universally unique identifier (UUID).
+
+A `uuid` logical type annotates an Avro `string` or `fixed` of length 16. Both the string and `fixed` byte layout have to conform with [RFC-4122](https://www.ietf.org/rfc/rfc4122.txt).
+
+The following schemas represent a uuid:
+
+```json
+{
+ "type": "string",
+ "logicalType": "uuid"
+}
+```
+
+```json
+{
+ "type": "fixed",
+ "size": 16,
+ "logicalType": "uuid"
+}
+```
+
+### Date
+The `date` logical type represents a date within the calendar, with no reference to a particular time zone or time of day.
+
+A `date` logical type annotates an Avro `int`, where the int stores the number of days from the unix epoch, 1 January 1970 (ISO calendar).
+
+The following schema represents a date:
+```json
+{
+ "type": "int",
+ "logicalType": "date"
+}
+```
+
+### Time (millisecond precision) {#time_ms}
+The `time-millis` logical type represents a time of day, with no reference to a particular calendar, time zone or date, with a precision of one millisecond.
+
+A `time-millis` logical type annotates an Avro `int`, where the int stores the number of milliseconds after midnight, 00:00:00.000.
+
+### Time (microsecond precision)
+The `time-micros` logical type represents a time of day, with no reference to a particular calendar, time zone or date, with a precision of one microsecond.
+
+A `time-micros` logical type annotates an Avro `long`, where the long stores the number of microseconds after midnight, 00:00:00.000000.
+
+### Timestamps {#timestamps}
+
+The `timestamp-{millis,micros,nanos}` logical type represents an instant on the global timeline, independent of a particular time zone or calendar. Upon reading a value back, we can only reconstruct the instant, but not the original representation. In practice, such timestamps are typically displayed to users in their local time zones, therefore they may be displayed differently depending on the execution environment.
+
+- `timestamp-millis`: logical type annotates an Avro `long`, where the long stores the number of milliseconds from the unix epoch, 1 January 1970 00:00:00.000.
+- `timestamp-micros`: logical type annotates an Avro `long`, where the long stores the number of microseconds from the unix epoch, 1 January 1970 00:00:00.000000.
+- `timestamp-nanos`: logical type annotates an Avro `long`, where the long stores the number of nanoseconds from the unix epoch, 1 January 1970 00:00:00.000000000.
+
+Example: Given an event at noon local time (12:00) on January 1, 2000, in Helsinki where the local time was two hours east of UTC (UTC+2). The timestamp is first shifted to UTC 2000-01-01T10:00:00 and that is then converted to Avro long 946720800000 (milliseconds) and written.
+
+### Local Timestamps {#local_timestamp}
+
+The `local-timestamp-{millis,micros,nanos}` logical type represents a timestamp in a local timezone, regardless of what specific time zone is considered local.
+
+- `local-timestamp-millis`: logical type annotates an Avro `long`, where the long stores the number of milliseconds, from 1 January 1970 00:00:00.000.
+- `local-timestamp-micros`: logical type annotates an Avro `long`, where the long stores the number of microseconds, from 1 January 1970 00:00:00.000000.
+- `local-timestamp-nanos`: logical type annotates an Avro `long`, where the long stores the number of nanoseconds, from 1 January 1970 00:00:00.000000000.
+
+Example: Given an event at noon local time (12:00) on January 1, 2000, in Helsinki where the local time was two hours east of UTC (UTC+2). The timestamp is converted to Avro long 946728000000 (milliseconds) and then written.
+
+### Duration
+The `duration` logical type represents an amount of time defined by a number of months, days and milliseconds. This is not equivalent to a number of milliseconds, because, depending on the moment in time from which the duration is measured, the number of days in the month and number of milliseconds in a day may differ. Other standard periods such as years, quarters, hours and minutes can be expressed through these basic periods.
+
+A `duration` logical type annotates Avro `fixed` type of size 12, which stores three little-endian unsigned integers that represent durations at different granularities of time. The first stores a number in months, the second stores a number in days, and the third stores a number in milliseconds.
diff --git a/doc/content/en/docs/++version++/_index.md b/doc/content/en/docs/++version++/_index.md
new file mode 100755
index 00000000000..13b815d86ee
--- /dev/null
+++ b/doc/content/en/docs/++version++/_index.md
@@ -0,0 +1,59 @@
+---
+title: "Apache Avroâĸ ++version++ Documentation"
+linkTitle: "++version++"
+type: docs
+weight: 10
+---
+
+
+
+## Introduction
+
+Apache Avroâĸ is a data serialization system.
+
+Avro provides:
+
+* Rich data structures.
+* A compact, fast, binary data format.
+* A container file, to store persistent data.
+* Remote procedure call (RPC).
+* Simple integration with dynamic languages. Code generation is not required to read or write data files nor to use or implement RPC protocols. Code generation as an optional optimization, only worth implementing for statically typed languages.
+
+## Schemas
+
+Avro relies on schemas. When Avro data is read, the schema used when writing it is always present. This permits each datum to be written with no per-value overheads, making serialization both fast and small. This also facilitates use with dynamic, scripting languages, since data, together with its schema, is fully self-describing.
+
+When Avro data is stored in a file, its schema is stored with it, so that files may be processed later by any program. If the program reading the data expects a different schema this can be easily resolved, since both schemas are present.
+
+When Avro is used in RPC, the client and server exchange schemas in the connection handshake. (This can be optimized so that, for most calls, no schemas are actually transmitted.) Since both client and server both have the other's full schema, correspondence between same named fields, missing fields, extra fields, etc. can all be easily resolved.
+
+Avro schemas are defined with JSON . This facilitates implementation in languages that already have JSON libraries.
+
+## Comparison with other systems
+
+Avro provides functionality similar to systems such as [Thrift](https://thrift.apache.org/), [Protocol Buffers](https://code.google.com/p/protobuf/), etc. Avro differs from these systems in the following fundamental aspects.
+
+* Dynamic typing: Avro does not require that code be generated. Data is always accompanied by a schema that permits full processing of that data without code generation, static datatypes, etc. This facilitates construction of generic data-processing systems and languages.
+* Untagged data: Since the schema is present when data is read, considerably less type information need be encoded with data, resulting in smaller serialization size.
+* No manually-assigned field IDs: When a schema changes, both the old and new schema are always present when processing data, so differences may be resolved symbolically, using field names.
+
+
diff --git a/doc/content/en/docs/++version++/api-c++.md b/doc/content/en/docs/++version++/api-c++.md
new file mode 100644
index 00000000000..4382750a46a
--- /dev/null
+++ b/doc/content/en/docs/++version++/api-c++.md
@@ -0,0 +1,29 @@
+---
+title: "C++ API"
+linkTitle: "C++ API"
+weight: 102
+manualLink: /docs/++version++/api/cpp/html/
+---
+
+
+
+The C++ API documentation can be found here .
diff --git a/doc/content/en/docs/++version++/api-c.md b/doc/content/en/docs/++version++/api-c.md
new file mode 100644
index 00000000000..79a5209e526
--- /dev/null
+++ b/doc/content/en/docs/++version++/api-c.md
@@ -0,0 +1,29 @@
+---
+title: "C API"
+linkTitle: "C API"
+weight: 101
+manualLink: /docs/++version++/api/c/
+---
+
+
+
+The C API documentation can be found here .
diff --git a/doc/content/en/docs/++version++/api-csharp.md b/doc/content/en/docs/++version++/api-csharp.md
new file mode 100644
index 00000000000..cfad0d1e343
--- /dev/null
+++ b/doc/content/en/docs/++version++/api-csharp.md
@@ -0,0 +1,29 @@
+---
+title: "C# API"
+linkTitle: "C# API"
+weight: 103
+manualLink: /docs/++version++/api/csharp/html/
+---
+
+
+
+The C# API documentation can be found here .
diff --git a/doc/content/en/docs/++version++/api-java.md b/doc/content/en/docs/++version++/api-java.md
new file mode 100644
index 00000000000..12d743567df
--- /dev/null
+++ b/doc/content/en/docs/++version++/api-java.md
@@ -0,0 +1,29 @@
+---
+title: "Java API"
+linkTitle: "Java API"
+weight: 100
+manualLink: /docs/++version++/api/java/
+---
+
+
+
+The Javadocs can be found here .
diff --git a/doc/content/en/docs/++version++/api-py.md b/doc/content/en/docs/++version++/api-py.md
new file mode 100644
index 00000000000..fb4f4ba13b5
--- /dev/null
+++ b/doc/content/en/docs/++version++/api-py.md
@@ -0,0 +1,29 @@
+---
+title: "Python API"
+linkTitle: "Python API"
+weight: 104
+manualLink: /docs/++version++/api/py/html/
+---
+
+
+
+The Python API documentation can be found here .
diff --git a/doc/content/en/docs/_index.md b/doc/content/en/docs/_index.md
new file mode 100755
index 00000000000..541db8d6b76
--- /dev/null
+++ b/doc/content/en/docs/_index.md
@@ -0,0 +1,58 @@
+
+---
+title: "Documentation"
+linkTitle: "Documentation"
+---
+
+
+
+## Introduction
+
+Apache Avroâĸ is a data serialization system.
+
+Avro provides:
+
+* Rich data structures.
+* A compact, fast, binary data format.
+* A container file, to store persistent data.
+* Remote procedure call (RPC).
+* Simple integration with dynamic languages. Code generation is not required to read or write data files nor to use or implement RPC protocols. Code generation as an optional optimization, only worth implementing for statically typed languages.
+
+## Schemas
+
+Avro relies on schemas. When Avro data is read, the schema used when writing it is always present. This permits each datum to be written with no per-value overheads, making serialization both fast and small. This also facilitates use with dynamic, scripting languages, since data, together with its schema, is fully self-describing.
+
+When Avro data is stored in a file, its schema is stored with it, so that files may be processed later by any program. If the program reading the data expects a different schema this can be easily resolved, since both schemas are present.
+
+When Avro is used in RPC, the client and server exchange schemas in the connection handshake. (This can be optimized so that, for most calls, no schemas are actually transmitted.) Since both client and server both have the other's full schema, correspondence between same named fields, missing fields, extra fields, etc. can all be easily resolved.
+
+Avro schemas are defined with JSON . This facilitates implementation in languages that already have JSON libraries.
+
+## Comparison with other systems
+
+Avro provides functionality similar to systems such as [Thrift](https://thrift.apache.org/), [Protocol Buffers](https://code.google.com/p/protobuf/), etc. Avro differs from these systems in the following fundamental aspects.
+
+* Dynamic typing: Avro does not require that code be generated. Data is always accompanied by a schema that permits full processing of that data without code generation, static datatypes, etc. This facilitates construction of generic data-processing systems and languages.
+* Untagged data: Since the schema is present when data is read, considerably less type information need be encoded with data, resulting in smaller serialization size.
+* No manually-assigned field IDs: When a schema changes, both the old and new schema are always present when processing data, so differences may be resolved symbolically, using field names.
+
+
diff --git a/doc/content/en/project/Articles/_index.md b/doc/content/en/project/Articles/_index.md
new file mode 100755
index 00000000000..e30d9ef1d07
--- /dev/null
+++ b/doc/content/en/project/Articles/_index.md
@@ -0,0 +1,84 @@
+---
+title: "Articles"
+linkTitle: "Articles"
+weight: 4
+---
+
+
+** **
+
+**Guide to Apache Avro**
+Feb 19, 2023, by baeldung.
+
+https://www.baeldung.com/java-apache-avro
+
+** **
+
+**Apache Avro IDL Schema Support**,
+Apr 11, 2022, by Oscar Westra van Holthe - Kind.
+
+https://plugins.jetbrains.com/plugin/15728-apache-avro-idl-schema-support
+
+** **
+
+**Generate random JSON data from an AVRO schema using Java**,
+Jan 24, 2022, by Maarten Smeets.
+
+https://technology.amis.nl/soa/kafka/generate-random-json-data-from-an-avro-schema-using-java/
+
+** **
+
+**A Gentle (and Practical) Introduction to Apache Avro**,
+Dec 22, 2020, by Anton Rodriguez.
+
+https://dzone.com/articles/gentle-and-practical-introduction-to-apache-avro-part-1
+
+** **
+
+**Apache Avro â A data serialization system**
+Dec 09, 2018, by Dennis Vriend.
+
+https://binx.io/2018/12/09/apache-avro/
+
+** **
+
+**Introduction to Apache Avro**
+Mar 12, 2016, by Bartosz Konieczny.
+
+https://www.waitingforcode.com/apache-avro/introduction-to-apache-avro/read
+
+** **
+
+**Reading and Writing Avro Files from the Command Line**,
+Mar 17, 2013, by Michael G. Noll.
+
+https://www.michael-noll.com/blog/2013/03/17/reading-and-writing-avro-files-from-the-command-line/
+
+** **
+
+**Using Apache Avro**
+Jan 25, 2011, by Boris Lublinsky.
+
+https://www.infoq.com/articles/ApacheAvro/
+
+
+
diff --git a/doc/content/en/project/Committer onboarding guide/_index.md b/doc/content/en/project/Committer onboarding guide/_index.md
new file mode 100755
index 00000000000..eb865a42ae2
--- /dev/null
+++ b/doc/content/en/project/Committer onboarding guide/_index.md
@@ -0,0 +1,48 @@
+---
+title: "Committer onboarding guide"
+linkTitle: "Committer onboarding guide"
+weight: 7
+---
+
+
+** **
+For you, the new committer:
+
+1. File your ICLA and send it to secretary@apache.org
+2. Log in to https://whimsy.apache.org; that will confirm a working ASF account
+3. You can edit email routing for the account, and add other emails that you own
+4. You can directly edit mailing list subscriptions (for example, you might switch them to your ASF account - you can still post from any of your registered emails)
+5. Link your GitHub account with your ASF account at https://gitbox.apache.org/ once you see the big green "Merge" button on pull requests, this is working
+7. Read the ASF new committer guide: https://www.apache.org/dev/new-committers-guide.html
+
+** **
+
+A committer in JIRA can add a new contributor by following these steps:
+
+1. Log in to JIRA with your committer credentials.
+2. Navigate to the project where you want to add the new contributor.
+3. Click on the "People" tab at the top of the page.
+4. Click on the "Add People" button.
+5. Enter the email address of the new contributor in the "Email Address" field.
+6. Select the appropriate role for the new contributor from the "Role" dropdown menu.
+7. Click the "Add" button to add the new contributor to the project.
+8. An email will be sent to the new contributor asking them to accept the invitation to join the project.
diff --git a/doc/content/en/project/Contributors onboarding guide/_index.md b/doc/content/en/project/Contributors onboarding guide/_index.md
new file mode 100644
index 00000000000..e53b136c681
--- /dev/null
+++ b/doc/content/en/project/Contributors onboarding guide/_index.md
@@ -0,0 +1,39 @@
+---
+title: "Contributor onboarding guide"
+linkTitle: "Contributor onboarding guide"
+weight: 8
+---
+
+
+
+
+1. Familiarize yourself with Apache Avro: Before you start contributing to Apache Avro, it's essential to have a good understanding of what Apache Avro is and how it works. You can start by reading the Apache Avro documentation to get an overview of the project's features, use cases, and architecture.
+
+2. Join the Apache Avro community: Join the Apache Avro mailing lists, IRC channels, and forums to interact with other contributors and users. You can ask questions, discuss ideas, and get feedback on your contributions from experienced contributors.
+3. Set up your development environment: To contribute to Apache Avro, you need to set up your development environment. The Apache Avro project uses Git for version control, and Apache Maven for building. You can follow the instructions in the Apache Avro documentation to set up your environment.
+4. Choose a contribution: Apache Avro is an open-source project, and there are always new features, bug fixes, and improvements that can be made. You can choose from a wide range of contributions, from documentation updates to code changes.
+5. Review existing issues and pull requests: Before you start working on a contribution, it's important to review existing issues and pull requests to avoid duplicating efforts. You can use the Apache Avro issue tracker to search for issues and pull requests related to your contribution.
+6. Create a new issue or pull request: If you can't find an existing issue or pull request related to your contribution, you can create a new one. Make sure to provide detailed information about your contribution, including a description of the problem, proposed solution, and any relevant code changes.
+7. Work on your contribution: Once you have a clear understanding of the contribution you want to make, you can start working on it. Make sure to follow the Apache Avro coding guidelines and best practices to ensure that your code is of high quality.
+8. Submit your contribution: When you're ready to submit your contribution, create a pull request in the Apache Avro GitHub repository. Make sure to include a detailed description of your changes, and any relevant documentation or test cases.
+9. Participate in reviews: Once you've submitted your contribution, it will be reviewed by other contributors. You may need to make additional changes based on their feedback before your contribution is accepted.
+Celebrate your contribution: Once your contribution has been accepted, celebrate your achievement! You've helped improve Apache Avro and contributed to the open-source community.
\ No newline at end of file
diff --git a/doc/content/en/project/Credits/_index.md b/doc/content/en/project/Credits/_index.md
new file mode 100644
index 00000000000..4b3f892115b
--- /dev/null
+++ b/doc/content/en/project/Credits/_index.md
@@ -0,0 +1,67 @@
+---
+title: "Credits"
+linkTitle: "Credits"
+weight: 2
+aliases:
+- /credits.html
+---
+
+
+
+## Apache Avro credits
+
+### Committers
+
+Apache Avro's active committers are:
+
+| **username** | **name** | **organization** | **roles** | **timezone** |
+|:-------------|:--------------------|:------------------------|:----------------------:|:------------:|
+| blue | Ryan Blue | Netflix | spec, java, ruby | -8 |
+| brucem | Bruce Mitchener | Army of Bruce | c | +7 |
+| busbey | Sean Busbey | Cloudera | java, ruby | -6 |
+| cutting | Doug Cutting | Cloudera | spec, java | -8 |
+| dcreager | Douglas Creager | RedJack, LLC | c | -5 |
+| hammer | Jeff Hammerbacher | Cloudera | python | -8 |
+| iemejia | IsmaÃĢl MejÃa | Talend | java, docker | +1 |
+| kojiromike | Michael A. Smith | Independent | python, docker | -5 |
+| massie | Matt Massie | UC Berkeley | c | -8 |
+| martinkl | Martin Kleppmann | University of Cambridge | c, ruby | 0 |
+| mgrigorov | Martin Grigorov | Huawei | rust | +2 |
+| nielsbasjes | Niels Basjes | Bol.com | java, docker | +1 |
+| philz | Philip Zeyliger | Cloudera | java | -8 |
+| rskraba | Ryan Skraba | Talend | java, docker | +1 |
+| sbanacho | Scott Banachowski | Microsoft | c++ | -8 |
+| scottcarey | Scott Carey | RichRelevance | java | -8 |
+| sekikn | Kengo Seki | NTT Data | perl, interoperability | +9 |
+| sharadag | Sharad Agarwal | InMobi | python | +5.5 |
+| thiru | Thiruvalluvan M. G. | VertiCloud | java | +5.5 |
+| tjwp | Tim Perkins | Shopify | ruby | -5 |
+| tomwhite | Tom White | Cloudera | java | 0 |
+-------------
+
+### Contributors
+A list of Avro contributors and their contributions is available from [Jira](http://s.apache.org/AvroFixed)
+
+### Emeriti
+Contributors who are no longer active on Avro are:
+
+* None
diff --git a/doc/content/en/project/Donate/_index.md b/doc/content/en/project/Donate/_index.md
new file mode 100755
index 00000000000..c87561fefc1
--- /dev/null
+++ b/doc/content/en/project/Donate/_index.md
@@ -0,0 +1,29 @@
+---
+title: "Donate"
+linkTitle: "Donate"
+weight: 13
+manualLink: https://www.apache.org/foundation/sponsorship.html
+---
+
+
+
+If you would like to donate please see the Apache Software Foundation [donation program](https://www.apache.org/foundation/sponsorship.html)
diff --git a/doc/content/en/project/Download/_index.md b/doc/content/en/project/Download/_index.md
new file mode 100755
index 00000000000..eff8aa3e621
--- /dev/null
+++ b/doc/content/en/project/Download/_index.md
@@ -0,0 +1,81 @@
+---
+title: "Download"
+linkTitle: "Download"
+weight: 1
+---
+
+
+
+## Download
+Releases may be downloaded from Apache mirrors: [Download](https://www.apache.org/dyn/closer.cgi/avro/)
+
+The latest release is: Avro {{< avro_version >}} (3.4M, source, [pgp](https://downloads.apache.org/avro/avro-{{< avro_version >}}/avro-src-{{< avro_version >}}.tar.gz.asc), [sha512](https://downloads.apache.org/avro/avro-{{< avro_version >}}/avro-src-{{< avro_version >}}.tar.gz.sha512))
+
+* C#: https://www.nuget.org/packages/Apache.Avro/{{< avro_version >}}
+* Java: from Maven Central,
+* Javascript: https://www.npmjs.com/package/avro-js/v/{{< avro_version >}}
+* Perl: https://metacpan.org/release/Avro
+* Python 3: https://pypi.org/project/avro/{{< avro_version >}}
+* Ruby: https://rubygems.org/gems/avro/versions/{{< avro_version >}}
+
+
+## Release Notes
+Release notes for Avro releases are available in [Jira](https://issues.apache.org/jira/browse/AVRO?report=com.atlassian.jira.plugin.system.project:changelog-panel#selectedTab=com.atlassian.jira.plugin.system.project%3Achangelog-panel)
+
+##Verifying a release
+It is essential that you verify the integrity of the downloaded files using the PGP signatures or SHA512 checksums. Please read [How to verify downloaded](https://www.apache.org/info/verification.html) files for more information on why you should verify our releases.
+
+The PGP signatures can be verified using PGP or GPG. First download the [KEYS](https://downloads.apache.org/avro/KEYS) file as well as the .asc signature files for the relevant release packages. Make sure you get these files from the main distribution directory, rather than from a mirror. Then verify the signatures using:
+
+```shell
+% gpg --import KEYS
+% gpg --verify downloaded_file.asc downloaded_file
+```
+
+or
+
+```shell
+% pgpk -a KEYS
+% pgpv downloaded_file.asc
+```
+
+or
+
+```shell
+% pgp -ka KEYS
+% pgp downloaded_file.asc
+```
+Alternatively, you can verify the hash on the file.
+
+Hashes can be calculated using GPG:
+```shell
+% gpg --print-md SHA256 downloaded_file
+```
+The output should be compared with the contents of the SHA256 file. Similarly for other hashes (SHA512, SHA1, MD5 etc) which may be provided.
+
+Windows 7 and later systems should all now have certUtil:
+```shell
+% certUtil -hashfile pathToFileToCheck
+```
+HashAlgorithm choices: _MD2 MD4 MD5 SHA1 SHA256 SHA384 SHA512_
+
+Unix-like systems (and macOS) will have a utility called _md5_, _md5sum_ or _shasum_.
diff --git a/doc/content/en/project/Events/_index.md b/doc/content/en/project/Events/_index.md
new file mode 100755
index 00000000000..7d8646283f0
--- /dev/null
+++ b/doc/content/en/project/Events/_index.md
@@ -0,0 +1,28 @@
+---
+title: "Events"
+linkTitle: "Events"
+weight: 12
+---
+
+
+
+Apache Avro members often participate in events organized by the [Apache Software Foundation](https://www.apache.org/events/current-event.html)
diff --git a/doc/content/en/project/How to contribute/_index.md b/doc/content/en/project/How to contribute/_index.md
new file mode 100755
index 00000000000..6514d7c3627
--- /dev/null
+++ b/doc/content/en/project/How to contribute/_index.md
@@ -0,0 +1,388 @@
+---
+title: "How to contribute"
+linkTitle: "How to contribute"
+weight: 5
+---
+
+
+
+## Getting the source code
+
+First of all, you need the Avro source code.
+
+The easiest way is to clone or fork the GitHub mirror:
+
+```shell
+git clone https://github.com/apache/avro.git -o github
+```
+
+## Making Changes
+
+Before you start, file an issue in [JIRA](https://issues.apache.org/jira/browse/AVRO) or discuss your ideas on the [Avro developer mailing list](http://avro.apache.org/mailing_lists.html). Describe your proposed changes and check that they fit in with what others are doing and have planned for the project. Be patient, it may take folks a while to understand your requirements.
+
+Modify the source code and add some (very) nice features using your favorite IDE.
+
+But take care about the following points
+
+**All Languages**
+- Contributions should pass existing unit tests.
+- Contributions should document public facing APIs.
+- Contributions should add new tests to demonstrate bug fixes or test new features.
+
+**Java**
+
+- All public classes and methods should have informative [Javadoc comments](https://www.oracle.com/fr/technical-resources/articles/java/javadoc-tool.html).
+- Do not use @author tags.
+- Java code should be formatted according to [Oracle's conventions](https://www.oracle.com/java/technologies/javase/codeconventions-introduction.html), with one exception:
+ - Indent two spaces per level, not four.
+- [JUnit](http://www.junit.org/) is our test framework:
+- You must implement a class whose class name starts with Test.
+- Define methods within your class and tag them with the @Test annotation. Call JUnit's many assert methods to verify conditions; these methods will be executed when you run mvn test.
+- By default, do not let tests write any temporary files to /tmp. Instead, the tests should write to the location specified by the test.dir system property.
+- Place your class in the src/test/java/ tree.
+- You can run all the unit tests with the command mvn test, or you can run a specific unit test with the command mvn -Dtest= test (for example mvn -Dtest=TestFoo test)
+
+
+## Code Style (Autoformatting)
+
+For Java code we use [Spotless](https://github.com/diffplug/spotless/) to format the code to comply with Avro's code style conventions (see above). Automatic formatting relies on [Avro's Eclipse JDT formatter definition](https://github.com/apache/avro/blob/main/lang/java/eclipse-java-formatter.xml). You can use the same definition to auto format from Eclipse or from IntelliJ configuring the Eclipse formatter plugin.
+
+If you use maven code styles issues are checked at the compile phase. If your code breaks because of bad formatting, you can format it automatically by running the command:
+```shell
+mvn spotless:apply
+```
+
+## Unit Tests
+
+Please make sure that all unit tests succeed before constructing your patch and that no new compiler warnings are introduced by your patch. Each language has its own directory and test process.
+
+Java
+
+```shell
+cd avro-trunk/lang/java
+mvn clean test
+```
+
+
+Python
+
+```shell
+cd avro-trunk/lang/py
+./setup.py build test
+```
+
+
+Rust
+
+```shell
+cd avro-trunk/lang/rust
+./build.sh clean test
+```
+
+
+C#
+
+```shell
+cd avro-trunk/lang/csharp
+./build.sh clean test
+```
+
+
+C
+
+```shell
+cd avro-trunk/lang/c
+./build.sh clean
+./build.sh test
+```
+
+
+C++
+
+```shell
+cd avro-trunk/lang/c++
+./build.sh clean test
+```
+
+
+Ruby
+
+```shell
+cd avro-trunk/lang/ruby
+gem install echoe
+rake clean test
+```
+
+
+PHP
+
+```shell
+cd avro-trunk/lang/php
+./build.sh clean
+./build.sh test
+```
+
+
+
+## Contributing your code
+
+Contribution can be made directly via github with a Pull Request, or via a patch.
+
+**Via Github**
+
+Method is to create a [pull request](https://help.github.com/articles/using-pull-requests/).
+
+On your fork, create a branch named with JIRA (avro-1234_fixNpe for example)
+On source, go to it
+```shell
+git pull
+git switch avro-1234_fixNpe
+```
+
+code your changes (following preceding recommendations)
+
+check and add updated sources
+```shell
+git status
+
+# Add any new or changed files with:
+git add src/.../MyNewClass.java
+git add src/.../TestMyNewClass.java
+```
+
+Finally, create a commit with your changes and a good log message, and push it:
+```shell
+git commit -m "AVRO-1234: Fix NPE by adding check to ..."
+git push
+```
+On your github fork site, a button will propose you to build the Pull Request.
+Click on it, fill Conversation form, and create it.
+Link this PR to the corresponding JIRA ticket (on JIRA ticket, add PR to "Issue Links" chapter, and add label 'pull-request-available' to it .
+
+
+
+## Jira Guidelines
+
+Please comment on issues in [Jira](https://issues.apache.org/jira/projects/AVRO/issues), making your concerns known. Please also vote for issues that are a high priority for you.
+
+Please refrain from editing descriptions and comments if possible, as edits spam the mailing list and clutter Jira's "All" display, which is otherwise very useful. Instead, preview descriptions and comments using the preview button (on the right) before posting them. Keep descriptions brief and save more elaborate proposals for comments, since descriptions are included in Jira's automatically sent messages. If you change your mind, note this in a new comment, rather than editing an older comment. The issue should preserve this history of the discussion.
+
+## Stay involved
+
+Contributors should join the Avro mailing lists. In particular, the commit list (to see changes as they are made), the dev list (to join discussions of changes) and the user list (to help others).
+
+## Workflow
+
+Building and running the site locally requires a recent extended version of Hugo. Install [Hugo](https://gohugo.io/installation/) for your environment. Once you've made your working copy of the site repo, from the repo root folder, run:
+
+```shell
+hugo server --navigateToChanged
+```
+Edit .md and .html files in content/ folder
+
+Once satisfied with the changes, commit them:
+```shell
+git commit -a
+```
+Generate the HTML file stop hugo server --navigateToChanged (with Ctrl+C) and run
+```shell
+hugo
+```
+This will generate the HTMLs in public/ folder and this is actually what is being deployed
+
+Add the modified HTML files to Git
+
+```shell
+git add .
+git rm offline-search-index.<>.json
+git commit -a
+git push
+```
+This way even when the PR modifies a lot of files we can review only the first commit, the meaningful one, with the modified files in content/ folder
+
+
+## Running a container locally
+You can also run avro-website inside a Docker container, the container runs with a volume bound to the avro-website folder. This approach doesn't require you to install any dependencies other than Docker Desktop on Windows and Mac, and Docker Compose on Linux.
+
+Build the docker image
+
+```shell
+docker-compose build
+```
+Run the built image
+ ```shell
+docker-compose up
+```
+NOTE: You can run both commands at once with docker-compose up --build.
+
+Verify that the service is working.
+
+Open your web browser and type http://localhost:1313 in your navigation bar, This opens a local instance of the docsy-example homepage. You can now make changes to the docsy example and those changes will immediately show up in your browser after you save.
+
+**Cleanup**
+
+To stop Docker Compose, on your terminal window, press Ctrl + C.
+
+To remove the produced images run:
+ ```shell
+docker-compose rm
+```
+
+## Troubleshooting
+As you run the website locally, you may run into the following error:
+ ```shell
+â hugo server
+
+INFO 2021/01/21 21:07:55 Using config file:
+Building sites âĻ INFO 2021/01/21 21:07:55 syncing static files to /
+Built in 288 ms
+Error: Error building site: TOCSS: failed to transform "scss/main.scss" (text/x-scss): resource "scss/scss/main.scss_9fadf33d895a46083cdd64396b57ef68" not found in file cache
+ ```
+This error occurs if you have not installed the extended version of Hugo. See our user guide for instructions on how to install Hugo.
+
+## Edit content
+The website content is in content/en folder. It contains .md (Markdown) and .html (HTML) files.
+
+**Layouts**
+
+To change the layout of any page edit layouts//**.html. If there is no layout for a given page at that location then copy the one provided by the theme and edit it:
+ ```shell
+ cp themes/docsy/layouts/ layouts/
+ ```
+**Avro version**
+
+When a new version of Apache Avro is released:
+
+Change the value of params.avroversion in config.toml
+Add a new entry to the Releases pages in the Blog section, for example:
+ ```shell
+cp content/en/blog/releases/avro-1.10.2-released.md content/en/blog/releases/avro-1.11.0-released.md
+ ```
+**API documentation for C/C++/C# modules**
+
+The API documentations for C/C++/C# are built by their respective build.sh dist implementations. The final HTML should be copied to the external folder, for example:
+ ```shell
+cp ../avro/build/avro-doc-1.12.0-SNAPSHOT/api/c/* content/en/docs/external/c/
+ ```
+
+## JIRA conventions
+
+Issue types: JIRA issues are categorized into different types such as bugs, improvements, new features, etc. Each issue type has a unique icon and a set of fields that are specific to that type.
+
+Workflow: JIRA issues follow a predefined workflow that defines the steps that an issue goes through from creation to resolution. Each step in the workflow can have its own set of conditions and actions.
+
+Priority: JIRA allows users to set priorities for issues to help determine the order in which they should be addressed. The priority can be set to one of five levels: Blocker, Critical, Major, Minor, and Trivial. Blocker is the highest priority and Trivial is the lowest priority.
+
+Labels: Labels are used to tag issues with keywords or phrases that can help with searching and filtering.
+
+Components: Components are used to group related issues together. For example, a software project might have components for the user interface, database, and networking.
+
+## See Also
+
+- [Apache contributor documentation](http://www.apache.org/dev/contributors.html)
+- [Apache voting documentation](http://www.apache.org/foundation/voting.html)
+
diff --git a/doc/content/en/project/License/_index.md b/doc/content/en/project/License/_index.md
new file mode 100755
index 00000000000..1840ccfc0aa
--- /dev/null
+++ b/doc/content/en/project/License/_index.md
@@ -0,0 +1,29 @@
+---
+title: "License"
+linkTitle: "License"
+weight: 11
+manualLink: https://www.apache.org/licenses/
+---
+
+
+
+Apache Avro project is licensed under [Apache Software License 2.0](https://www.apache.org/licenses/LICENSE-2.0)
diff --git a/doc/content/en/project/Papers/_index.md b/doc/content/en/project/Papers/_index.md
new file mode 100755
index 00000000000..beaace16894
--- /dev/null
+++ b/doc/content/en/project/Papers/_index.md
@@ -0,0 +1,73 @@
+---
+title: "Papers"
+linkTitle: "Papers"
+weight: 3
+---
+
+
+** **
+
+**A Benchmark of JSON-compatible Binary Serialization Specifications**
+Jan 9 2022, by Juan Cruz Viotti, Mital Kinderkhedia.
+
+https://arxiv.org/abs/2201.03051
+
+** **
+
+**A Survey of JSON-compatible Binary Serialization Specifications**
+Jan 6 2022, by Juan Cruz Viotti, Mital Kinderkhedia.
+
+https://arxiv.org/abs/2201.02089
+
+** **
+
+**Putting Avro into Hive**
+Apr 2017, by S. Sreekanth, A Sai Ram Pramodhini, Ch S Likita, Chiluka Manisha.
+
+https://journals.pen2print.org/index.php/ijr/article/view/7377/0
+
+
+** **
+
+**Benchmarking Performance of Data Serialization and RPC Frameworks in Microservices Architecture: gRPC vs. Apache Thrift vs. Apache Avro**
+Oct 27 2016, by Nguyen, Thuy.
+
+https://aaltodoc.aalto.fi/handle/123456789/23386
+
+** **
+
+**Apache Avro**
+Sep 30 2016, by Deepak Vohra.
+
+https://link.springer.com/chapter/10.1007/978-1-4842-2199-0_7
+
+** **
+
+
+**Object serialization vs relational data modelling in Apache Cassandra: a performance evaluation**
+Apr 2015, by Valdemar Johansen.
+
+https://www.diva-portal.org/smash/get/diva2:839521/FULLTEXT02.pdf
+
+
+
+
diff --git a/doc/content/en/project/Privacy policy/_index.md b/doc/content/en/project/Privacy policy/_index.md
new file mode 100755
index 00000000000..0be9694d7d4
--- /dev/null
+++ b/doc/content/en/project/Privacy policy/_index.md
@@ -0,0 +1,29 @@
+---
+title: "Privacy policy"
+linkTitle: "Privacy policy"
+weight: 9
+manualLink: https://privacy.apache.org/policies/privacy-policy-public.html
+---
+
+
+
+Apache Avro project shares the same privacy policy as the [Apache Software Foundation](https://privacy.apache.org/policies/privacy-policy-public.html)
diff --git a/doc/content/en/project/Security/_index.md b/doc/content/en/project/Security/_index.md
new file mode 100755
index 00000000000..baa55da933f
--- /dev/null
+++ b/doc/content/en/project/Security/_index.md
@@ -0,0 +1,29 @@
+---
+title: "Security"
+linkTitle: "Security"
+weight: 10
+manualLink: https://www.apache.org/security/
+---
+
+
+
+Apache Avro project shares the same security policy as the [Apache Software Foundation](https://www.apache.org/security/)
diff --git a/doc/content/en/project/Thanks/_index.md b/doc/content/en/project/Thanks/_index.md
new file mode 100755
index 00000000000..b2ae7dff925
--- /dev/null
+++ b/doc/content/en/project/Thanks/_index.md
@@ -0,0 +1,29 @@
+---
+title: "Thanks"
+linkTitle: "Thanks"
+weight: 14
+manualLink: https://www.apache.org/foundation/thanks.html
+---
+
+
+
+Apache Avro project could not exist without the continued generous support from the community! We would like to take this opportunity to thank the ASF [Sponsors](https://www.apache.org/foundation/thanks.html).
diff --git a/doc/content/en/project/_index.md b/doc/content/en/project/_index.md
new file mode 100755
index 00000000000..19a78e4205d
--- /dev/null
+++ b/doc/content/en/project/_index.md
@@ -0,0 +1,35 @@
+---
+title: "Project"
+linkTitle: "Project"
+weight: 1
+layout: project
+menu:
+ main:
+ weight: 1
+aliases:
+- /linkmap.html
+
+---
+
+
+
+Apache Avro project is a member of the Apache Software Foundation!
diff --git a/doc/content/en/project/pmc onboarding guide/_index.md b/doc/content/en/project/pmc onboarding guide/_index.md
new file mode 100644
index 00000000000..22d7545eda8
--- /dev/null
+++ b/doc/content/en/project/pmc onboarding guide/_index.md
@@ -0,0 +1,34 @@
+---
+title: "PMC onboarding guide"
+linkTitle: "PMC onboarding guide"
+weight: 6
+---
+
+
+
+1. Use https://whimsy.apache.org you can check that you got added to the PMC list properly
+2. Validate you are in the PMC group in JIRA and the Confluence Wiki
+3. Subscribe to private@avro.apache.org; you can use whimsy to do this for whatever email account you want, or send mail from that mail address to private-subscribe@
+4. You should have access also to https://reporter.apache.org which seeds our board reports
+5. You can now access and read the private list archive (for linking to vote threads, etc) at https://lists.apache.org/list.html?private@avro.apache.org
+6. Review the ASF PMC guides. There are a few, but you should re-read what the responsibilities are.
+7. The PMC keeps a set of valuable resources in https://svn.apache.org/repos/private/pmc
\ No newline at end of file
diff --git a/doc/content/en/search.md b/doc/content/en/search.md
new file mode 100644
index 00000000000..5ac34d9ea48
--- /dev/null
+++ b/doc/content/en/search.md
@@ -0,0 +1,25 @@
+---
+title: Search Results
+layout: search
+---
+
+
diff --git a/doc/docker-compose.yaml b/doc/docker-compose.yaml
new file mode 100644
index 00000000000..833d8839a0b
--- /dev/null
+++ b/doc/docker-compose.yaml
@@ -0,0 +1,32 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+version: "3.3"
+
+services:
+
+ site:
+ image: docsy/docsy-example
+ build:
+ context: .
+ command: server
+ ports:
+ - "1313:1313"
+ volumes:
+ - .:/src
diff --git a/doc/examples/example.py b/doc/examples/example.py
index 7b88c1cc195..f81bbe67cc4 100644
--- a/doc/examples/example.py
+++ b/doc/examples/example.py
@@ -16,18 +16,28 @@
# specific language governing permissions and limitations
# under the License.
#
+from pathlib import Path
+
import avro.schema
from avro.datafile import DataFileReader, DataFileWriter
from avro.io import DatumReader, DatumWriter
-schema = avro.schema.parse(open("user.avsc").read())
+# read in the schema file
+schema_text = Path("user.avsc").read_text()
+# then parse it
+schema = avro.schema.parse(schema_text)
-writer = DataFileWriter(open("/tmp/users.avro", "w"), DatumWriter(), schema)
-writer.append({"name": "Alyssa", "favorite_number": 256, "WTF": 2})
-writer.append({"name": "Ben", "favorite_number": 7, "favorite_color": "red"})
-writer.close()
+# create a DataFileWriter to write data to a file
+users_file = Path("/tmp/users.avro")
+with users_file.open("wb") as users_fh, DataFileWriter(
+ users_fh, DatumWriter(), schema
+) as writer:
+ writer.append({"name": "Alyssa", "favorite_number": 256})
+ writer.append({"name": "Ben", "favorite_number": 7, "favorite_color": "red"})
-reader = DataFileReader(open("/tmp/users.avro", "r"), DatumReader())
-for user in reader:
- print user
-reader.close()
+# create a DataFileReader to read data from a file
+with users_file.open("rb") as users_fh, DataFileReader(
+ users_fh, DatumReader()
+) as reader:
+ for user in reader:
+ print(user)
diff --git a/doc/examples/java-example/pom.xml b/doc/examples/java-example/pom.xml
index d741dfe7a4f..66502ed0b24 100644
--- a/doc/examples/java-example/pom.xml
+++ b/doc/examples/java-example/pom.xml
@@ -26,6 +26,7 @@
java-example
https://maven.apache.org
+ 1.11.3
UTF-8
@@ -38,7 +39,7 @@
org.apache.avro
avro
- 1.10.2
+ ${avro.version}
@@ -48,36 +49,27 @@
maven-compiler-plugin
3.8.1
- 1.8
- 1.8
+ 11
+ 11
org.apache.avro
avro-maven-plugin
- 1.10.2
+ ${avro.version}
+
+ ${project.basedir}/../
+ ${project.basedir}/src/main/java/
+
generate-sources
schema
-
- ${project.basedir}/../
- ${project.basedir}/src/main/java/
-
-
- org.apache.maven.plugins
- maven-plugin
- 1.10.2
-
- 1.8
- 1.8
-
-
@@ -92,7 +84,7 @@
org.apache.avro
avro-maven-plugin
- [1.10.2,)
+ [${avro.version},)
schema
diff --git a/doc/examples/mr-example/pom.xml b/doc/examples/mr-example/pom.xml
index 2f64b35ec8e..be6b689b07c 100644
--- a/doc/examples/mr-example/pom.xml
+++ b/doc/examples/mr-example/pom.xml
@@ -28,6 +28,7 @@
mr-example
+ 1.11.3
UTF-8
@@ -38,14 +39,14 @@
maven-compiler-plugin
3.8.1
- 1.8
- 1.8
+ 11
+ 11
org.apache.avro
avro-maven-plugin
- 1.10.0
+ ${avro.version}
generate-sources
@@ -73,7 +74,7 @@
org.apache.avro
avro-maven-plugin
- [1.10.0,)
+ [${avro.version},)
schema
@@ -94,12 +95,12 @@
org.apache.avro
avro
- 1.10.2
+ ${avro.version}
org.apache.avro
avro-mapred
- 1.10.2
+ ${avro.version}
org.apache.hadoop
diff --git a/doc/layouts/404.html b/doc/layouts/404.html
new file mode 100644
index 00000000000..4d5d5158a77
--- /dev/null
+++ b/doc/layouts/404.html
@@ -0,0 +1,29 @@
+
+
+{{ define "main"}}
+
+
+
Not found
+
Oops! This page doesn't exist. Try going back to our home page .
+
+
+{{ end }}
diff --git a/doc/layouts/partials/favicons.html b/doc/layouts/partials/favicons.html
new file mode 100644
index 00000000000..7ff1b9f01c1
--- /dev/null
+++ b/doc/layouts/partials/favicons.html
@@ -0,0 +1,37 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/layouts/partials/footer.html b/doc/layouts/partials/footer.html
new file mode 100644
index 00000000000..0638c0074de
--- /dev/null
+++ b/doc/layouts/partials/footer.html
@@ -0,0 +1,63 @@
+
+
+{{ $links := .Site.Params.links }}
+
+
+
+
+ {{ with $links }}
+ {{ with index . "user"}}
+ {{ template "footer-links-block" . }}
+ {{ end }}
+ {{ end }}
+
+
+ {{ with $links }}
+ {{ with index . "developer"}}
+ {{ template "footer-links-block" . }}
+ {{ end }}
+ {{ end }}
+
+
+ {{ with .Site.Params }}
© {{ now.Year}} {{ .copyright }} {{ T "footer_all_rights_reserved" }} {{ end }}
+ {{ if not .Site.Params.ui.footer_about_disable }}
+ {{ with .Site.GetPage "about" }}
{{ .Title }}
{{ end }}
+ {{ end }}
+
Apache Avro, Avro™, Apache®, and the Apache feather logo are either registered trademarks or trademarks of The Apache Software Foundation.
+
+
+ {{ with .Site.Params }}
{{ end }}
+
+
+
+
+{{ define "footer-links-block" }}
+
+ {{ range . }}
+
+
+
+
+
+ {{ end }}
+
+{{ end }}
diff --git a/doc/layouts/partials/navbar-asf-links.html b/doc/layouts/partials/navbar-asf-links.html
new file mode 100644
index 00000000000..54e3b8dcf08
--- /dev/null
+++ b/doc/layouts/partials/navbar-asf-links.html
@@ -0,0 +1,29 @@
+
+
+
+ ASF links
+
+
diff --git a/doc/layouts/partials/navbar-docs-selector.html b/doc/layouts/partials/navbar-docs-selector.html
new file mode 100644
index 00000000000..a60c934cb8c
--- /dev/null
+++ b/doc/layouts/partials/navbar-docs-selector.html
@@ -0,0 +1,29 @@
+
+
+
+ Documentation
+
+
diff --git a/doc/layouts/partials/navbar.html b/doc/layouts/partials/navbar.html
new file mode 100644
index 00000000000..a57cdd1f31c
--- /dev/null
+++ b/doc/layouts/partials/navbar.html
@@ -0,0 +1,61 @@
+
+
+{{ $cover := and (.HasShortcode "blocks/cover") (not .Site.Params.ui.navbar_translucent_over_cover_disable) }}
+
+
+{{/* AVRO-3555: Don't inline the existing logo for now.
+ {{ if .Site.Params.ui.navbar_logo }}{{ with resources.Get "icons/logo.svg" }}{{ ( . | minify).Content | safeHTML }}{{ end }}{{ end }} {{ .Site.Title }}
+*/}}
+ {{ .Site.Title }}
+
+
+
+
+ {{ $p := . }}
+ {{ range .Site.Menus.main }}
+
+ {{ $active := or ($p.IsMenuCurrent "main" .) ($p.HasMenuCurrent "main" .) }}
+ {{ with .Page }}
+ {{ $active = or $active ( $.IsDescendant .) }}
+ {{ end }}
+ {{ $pre := .Pre }}
+ {{ $post := .Post }}
+ {{ $url := urls.Parse .URL }}
+ {{ $baseurl := urls.Parse $.Site.Params.Baseurl }}
+ {{ with .Pre}}{{ $pre }}{{ end }}{{ .Name }} {{ with .Post}}{{ $post }}{{ end }}
+
+ {{ end }}
+
+ {{ partial "navbar-docs-selector.html" . }}
+
+
+ {{ partial "navbar-asf-links.html" . }}
+
+ {{ if (gt (len .Site.Home.Translations) 0) }}
+
+ {{ partial "navbar-lang-selector.html" . }}
+
+ {{ end }}
+
+
+ {{ partial "search-input.html" . }}
+
diff --git a/doc/layouts/project/baseof.html b/doc/layouts/project/baseof.html
new file mode 100644
index 00000000000..9ec1e4d1793
--- /dev/null
+++ b/doc/layouts/project/baseof.html
@@ -0,0 +1,53 @@
+
+
+
+
+
+ {{ partial "head.html" . }}
+
+
+
+ {{ partial "navbar.html" . }}
+
+
+
+
+
+
+
+ {{ partial "version-banner.html" . }}
+ {{ if not .Site.Params.ui.breadcrumb_disable }}{{ partial "breadcrumb.html" . }}{{ end }}
+ {{ block "main" . }}{{ end }}
+
+
+
+ {{ partial "footer.html" . }}
+
+ {{ partial "scripts.html" . }}
+
+
diff --git a/doc/layouts/project/baseof.print.html b/doc/layouts/project/baseof.print.html
new file mode 100644
index 00000000000..b74e38c0e2e
--- /dev/null
+++ b/doc/layouts/project/baseof.print.html
@@ -0,0 +1,47 @@
+
+
+
+
+
+ {{ partial "head.html" . }}
+
+
+
+ {{ partial "navbar.html" . }}
+
+
+
+
+
+
+
+
+ {{ block "main" . }}{{ end }}
+
+
+
+ {{ partial "footer.html" . }}
+
+ {{ partial "scripts.html" . }}
+
+
diff --git a/doc/layouts/project/list.html b/doc/layouts/project/list.html
new file mode 100644
index 00000000000..885d754e559
--- /dev/null
+++ b/doc/layouts/project/list.html
@@ -0,0 +1,52 @@
+{{ define "main" }}
+
+
+
{{ .Title }}
+ {{ with .Params.description }}
{{ . | markdownify }}
{{ end }}
+
+ {{ $context := . }}
+ {{ if .Site.Params.Taxonomy.taxonomyPageHeader }}
+ {{ range $index, $taxo := .Site.Params.Taxonomy.taxonomyPageHeader }}
+ {{ partial "taxonomy_terms_article.html" (dict "context" $context "taxo" $taxo ) }}
+ {{ end }}
+ {{ else }}
+ {{ range $taxo, $taxo_map := .Site.Taxonomies }}
+ {{ partial "taxonomy_terms_article.html" (dict "context" $context "taxo" $taxo ) }}
+ {{ end }}
+ {{ end }}
+ {{ if (and (not .Params.hide_readingtime) (.Site.Params.ui.readingtime.enable)) }}
+ {{ partial "reading-time.html" . }}
+ {{ end }}
+
+ {{ .Content }}
+ {{ partial "section-index.html" . }}
+ {{ if (and (not .Params.hide_feedback) (.Site.Params.ui.feedback.enable) (.Site.GoogleAnalytics)) }}
+ {{ partial "feedback.html" .Site.Params.ui.feedback }}
+
+ {{ end }}
+ {{ if (.Site.DisqusShortname) }}
+
+ {{ partial "disqus-comment.html" . }}
+ {{ end }}
+ {{ partial "page-meta-lastmod.html" . }}
+
+{{ end }}
diff --git a/doc/layouts/project/list.print.html b/doc/layouts/project/list.print.html
new file mode 100644
index 00000000000..33fa25d9fe1
--- /dev/null
+++ b/doc/layouts/project/list.print.html
@@ -0,0 +1,23 @@
+{{ define "main" }}
+
+{{ partial "print/render" . }}
+{{ end }}
diff --git a/doc/layouts/project/single.html b/doc/layouts/project/single.html
new file mode 100644
index 00000000000..bbc65acfe9d
--- /dev/null
+++ b/doc/layouts/project/single.html
@@ -0,0 +1,24 @@
+
+
+{{ define "main" }}
+{{ .Render "content" }}
+{{ end }}
diff --git a/doc/layouts/shortcodes/avro_version.html b/doc/layouts/shortcodes/avro_version.html
new file mode 100644
index 00000000000..04a4bf5ef24
--- /dev/null
+++ b/doc/layouts/shortcodes/avro_version.html
@@ -0,0 +1,24 @@
+{{/*
+
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ https://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+
+*/}}{{/*
+
+This file must not have a trailing newline.
+
+*/}}{{ $.Site.Params.avroversion }}
\ No newline at end of file
diff --git a/doc/layouts/shortcodes/project_logo.html b/doc/layouts/shortcodes/project_logo.html
new file mode 100644
index 00000000000..42503d33a25
--- /dev/null
+++ b/doc/layouts/shortcodes/project_logo.html
@@ -0,0 +1,22 @@
+{{/*
+
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+https://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied. See the License for the
+specific language governing permissions and limitations
+under the License.
+
+*/}}{{/*
+This file should not have a trailing newline.
+*/}}{{ with resources.Get "/icons/logo-text.svg" }}{{ (.|minify).Content | safeHTML }}{{ end }}
diff --git a/doc/package.json b/doc/package.json
new file mode 100644
index 00000000000..02c3dc72247
--- /dev/null
+++ b/doc/package.json
@@ -0,0 +1,7 @@
+{
+ "devDependencies": {
+ "autoprefixer": "^10.4.0",
+ "postcss": "^8.3.7",
+ "postcss-cli": "^11.0.0"
+ }
+}
diff --git a/doc/src/cli.xconf b/doc/src/cli.xconf
deleted file mode 100644
index 85712ac715b..00000000000
--- a/doc/src/cli.xconf
+++ /dev/null
@@ -1,328 +0,0 @@
-
-
-
-
-
-
-
- .
- WEB-INF/cocoon.xconf
- ../tmp/cocoon-work
- ../site
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- index.html
-
-
-
-
-
-
- */*
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/doc/src/content/htmldocs/canonical-completeness.html b/doc/src/content/htmldocs/canonical-completeness.html
deleted file mode 100644
index 0827d57812e..00000000000
--- a/doc/src/content/htmldocs/canonical-completeness.html
+++ /dev/null
@@ -1,204 +0,0 @@
-
-
-
-Completeness of "Parsing Canonical Form"
-
-
-
-Completeness of "Parsing Canonical Form"
-
-1.0 Introduction
-
-One of the defining characteristics of Avro is that a reader is assumed to have the "same" schema used by the writer of the data the reader is reading. This assumption leads to a data format that's compact and amenable to many forms of schema evolution. However, there are nuances to defining exactly what it means for the reader to have "the same" schema used by the writer. We want to allow, for example, trivial transformations, such as the insertion of whitespace. But we can't allow transformations that change the real meaning of schemas, such as a reordering of fields in a record
-
-To clearly define what it means for a reader to have "the same" schema as a writer, the Avro specification defines Parsing Canonical Form (PCF), a set of transformations on Avro schemas that strip away irrelevencies (e.g., "doc" attributes) and normalize the JSON text (e.g., dealing with whitespace). Two schemas are defined to be "the same" as far as a reader is concerned if and only if their PCFs are textually equal.
-
-We believe that PCF is sound and complete . Soundness means that the PCF of a schema is logically equivalent to the original form, i.e., we can use the PCF in place of the original form without introducing bugs. Completeness is "maximal soundness:" if two schemas are logically equivalent, then their PFCs will be textually identical. The Avro specification claims that PCF is complete when it says: "[if two schemas have the same PCF, then] there is no serialized data that would allow a reader to distinguish data generated by a writer using one of the original schemas from data generated by a writing using the other original schema."
-
-We believe that the transformations that define PCF are "self-evidently" sound to people familiar with Avro. For example, fixing the order of fields in a JSON object, or eliminating irrelevant attributes like doc, or using the simple int in place of {"type":"int"} clearly don't change the meaning of a schema.
-
-Completeness, on the other hand, is much less obvious. How do we know that there aren't two logically equivalent schemas that happen to reduce to different canonical forms? All it takes is one such pair to foil our claim of completeness.
-
-In general, completeness properties like this can be tricky to prove. It turns out that, while soundness is critical to us, completeness is not. If two schemas are operationally equivalent (i.e., a reader can't tell their output apart), but we accidentally treat them as if they are different, then typically all that happens is that we'll do more work. For example, we might generate a decoder object to decode some incoming data when it turns out that we had already cached a decoder object that could do the job. This is not likely to happen often, and thus incompleteness isn't a huge problem.
-
-At the same time, if we knew that our canonical forms were complete, then we might take advantage of that fact in some circumstances (e.g., to serialize schemas). Also, the Schema.equals(Object) method provided in the Avro implementation makes many of the same assumptions made in the PCF definition. Thus, a completeness proof for our canonicalization would give us confidence in the correctness of this equality algorithm. So this issue is not entirely academic.
-
-We haven't worked out a full, formal proof (we hope someone from the community will step up to that task!). However, we've been thinking about it quite a bit, and we thought we'd share our thoughts so far.
-
-
-2.0 Completeness argument for Parsing Canonical Form
-
-Our formalization of Avro schemas would be based on interpreting them as grammars. In this interpretation, Avro schemas are grammars that generate tagged data streams. Consider, for example, the following schema for a linked-list:
-
- {"type":"record", "name":"list", "fields":[
- {"name":"value", "type":"int"},
- {"name":"tail", "type":["null", "list"]}
- ]}
-
-Interpreted as a grammar, it can generate a tagged data-stream that looks like this:
-
- [record,"list"][field,"value"][int,10][field,"tail"][union,1]
- [record,"list"][field,"value"][int,22][field,"tail"][union,0]
-
-(this is a two-record linked list whose first cell contains the value "10" and second cell the value "22"). Avro schemas can trivially be interpreted as grammars for such tagged data streams. Formal proofs involving Avro schemas can be carried out as proofs about languages and grammars.
-
-So what does it mean for the canonical form of a schema to be "complete?" Let L(S) denote the language generated by the Avro schema S, and C(S) denote the canonical form of the schema. The canonicalization is complete if:
-
-For all schemas S1 and S2 ,
- L(S1 ) = L(S2 ) ⇒ C(S1 ) = C(S2 )
-
-That is, for any two schemas that generate the same language, their canonicalizations are textually equivalent.
-
-To prove this, we need to define some functions:
-
-J is a variable name we often use to denote a JSON expression representing an Avro schema
-C(J) is the Parsing Canonical Form of J as defined in the Avro specification
-P(J) is the ASG for an Avro schema generated by parsing J (think of P(J) as a Schema Java object)
-S is a variable name we often use to denote such ASGs
-L(S) is the language generated by a schema ASG
-
-With all these symbols defined, our completeness criteria is now rendered as:
-
-∀ J1 , J2 :
-L(P(J1 )) = L(P(J2 )) ⇒ C(J1 ) = C(J2 )
-
-We'll prove this by breaking it into two parts:
-
-(1): ∀ S1 , S2 :
-L(S1 ) = L(S2 ) ⇒ S1 ≅ S2
-(2): ∀ J1 , J2 :
-P(J1 ) ≅ P(J2 ) ⇒ C(J1 ) = C(J2 )
-
-
-In this two-step decomposition, we've introduced a new operator ≅, which compares the ASGs of two Avro schemas. The ASG of an Avro schema can be viewed as a rooted, labeled, directed graph. Because Avro schemas can be recursive, these graphs can be cyclic. The ≅ operator is "true" between two ASGs when the set of minimal labeled paths (no cycles, starting from the root) on the two ASGs are the same. (The Schema.equals(Object) method in the Avro implementation computes something close to this ≅ relation, except that ≅ ignores "irrelevant" attributes like doc and aliases.)
-
-It turns out that, implicit in the Avro Specification, there are "canonicalization" rules that are important to our proof of completeness. In particular, the Avro Specification says that a name must be defined "before" it is used, and that a name cannot be defined more than once in a schema. Consider the following redefinition of the linked-list schema, for example:
-
- {"type":"record", "name":"list", "fields":[
- {"name":"value", "type":"int"},
- {"name":"tail",
- "type":["null", {"type":"record", "name":"list", "fields":[
- {"name":"value", "type":"int"},
- {"name":"tail", "type":["null", "list"]}]}]}
- ]}
-
-In this redefinition, we've "unpacked" the recursion in the linked list by one level. In some sense, this is a perfectly fine definition of a linked list, and is operationally equivalent to the more compact version given earlier. So it makes sense that our claim of completeness is dependent upon this kind of "unpacking" not occuring in real schemas.
-
-To deal with this issue in our proof, we pretend that the Avro specification does not require that named schemas be defined just once, and be defined "before" they are used. Rather, we treat this requirement as an additional transformation rule in the definition of Parsing Canonical Form:
-
- [MINIMIZE] Eliminate redundant definitions of named types (records, enums, and fixeds). That is, for each named type, have a defining instance that appears at first use, and then use just the name (rather than the full schema) everywhere else.
-
-(As in the Avro spec, "first use" is defined as the first occurrence in a depth-first, left-to-right traversal of the schema abstract-syntax graph (ASG).)
-
-Getting back to the proof of (1) and (2) from above, we need to introduce more functions:
-
-P(J)=PA (PJ (J)) - decompose parser into:
- PJ is the JSON parser
- PA is the Avro parser (takes JSON ASTs as input)
-C(J)=CJ (CA (CM (J))) - decompose canonicalization into:
- CM (J) the MINIMIZE step
- CA (J) Avro normalizations
- CJ (J) JSON normalizations
-M(S) is the "named-schema NFA minimzation" of S
-
-"Named-schema NFA minimization" is similar to general NFA minimization, except that we only collapse nodes and edges related to named schema entities and not other nodes. For example, we would not collapse the nodes associated with int or union schemas.
-
- Our proof of (1) looks like this (this proof refers to lemmas (3) and (4), which are defined later):
-
-
-∀S1 ,S2 : L(S1 )=L(S2 )
-
-⇒M(S1 )=M(S2 )
-by (3)
-
-
-⇒S1 ≅S2
-by (4)
-
-
-
-Here's the proof of (2) (this proof refers to lemmas (4)-(7), which are defined later):
-
-
-∀J1 ,J2 : P(J1 )≅P(J2 )
-
-
-⇒M(P(J1 ))=M(P(J2 ))
-by (4)
-
-
-
-⇒P(CM (J1 ))=P(CM (J2 ))
-by (5)
-
-
-
-⇒PA (PJ (CM (J1 )))=PA (PJ (CM (J2 )))
-by definition of P
-
-
-
-⇒PJ (CA (CM (J1 )))=PJ (CA (CM (J2 )))
-by (6)
-
-
-
-⇒CJ (CA (CM (J1 )))=CJ (CA (CM (J2 )))
-by (7)
-
-
-
-⇒C(J1 )=C(J2 )
-by definition of C
-
-
-
-
-Here are the lemmas needed above:
-
-(3): ∀ S1 , S2 :
-L(S1 ) = L(S2 ) ⇒ M(S1 ) = M(S2 )
-
-(4): ∀ S1 , S2 :
-M(S1 ) = M(S2 ) ⇔ S1 ≅ S2
-
-(5): ∀ J : M(P(J)) = P(CM (J))
-
-(6): ∀ J1 , J2 :
-PA (PJ (J1 )) = PA (PJ (J2 )) ⇒ PJ (CA (J1 )) = PJ (CA (J2 ))
-
-(7): ∀ J1 , J2 :
-PJ (J1 ) = PJ (J2 ) ⇒ CJ (J1 ) = CJ (J2 )
-
-
-Proving the lemmas:
-
- This says that the language-related part of our canonicalization is complete, i.e., M finds the equivalence-classes of L . I would imagine one could prove this by modifying a proof that the equality of LL(1) grammars is a decidable problem. I haven't gotten very far in showing this, however.
- The right-hand direction of this follows from the definition of minimization. The left-hand direction seems correct, but I'm not sure how to prove it (I think it also follows from the definition of minimization).
- This is showing that the MINIMIZE step (which is done on JSON expressions) is equivalent to doing an named-schema NFA minimization on the ASG representation. This should follow pretty directly from a detailed definition of M , if we provided one.
- This says that the Avro-related part of our canonicalization is complete, i.e., that CA finds equivalence-classes of PA .
- This says that the JSON-related part of our canonicalization is complete, i.e., that CJ finds equivalence-classes of PJ . Note that, implicitly, this lemma ranges over only JSON expressions that are legal Avro schemas with no doc strings or default values, and thus (for example) doesn't need to worry about normalization of floating-point literals.
-
-
-
-3.0 Concluding remarks
-
-Engineers have a history of running ahead of formal mathematical proofs, when things "seem correct" to them. In this case, it seems pretty obvious that Parsing Canonical Form is complete as well as sound, and we should go ahead and treat it as such. At the same time, formal proofs often turn up corner cases and exceptions that are valuable to document and account for. Thus, it'd nice if someone could provide a better completeness argument than we've been able to so far.
-
-
-
diff --git a/doc/src/content/htmldocs/performance-testing.html b/doc/src/content/htmldocs/performance-testing.html
deleted file mode 100644
index d98992e4118..00000000000
--- a/doc/src/content/htmldocs/performance-testing.html
+++ /dev/null
@@ -1,173 +0,0 @@
-
-
-
-Testing performance improvements
-
-
-
-
-(Note: This document pertains only to the Java implementation Avro.)
-
-
-1.0 Introduction
-
-Recent work on improving the performance of "specific record" (AVRO-2090 and AVRO-2247 has highlighted the need for a benchmark that can be used to test the validity of alleged performance "improvements."
-
- As a starting point, the Avro project has class called Perf (in the test source of the ipc subproject). Perf is a command-line tool contains close to 70 performance individual performance tests. These tests include tests for reading and writing primitive values, arrays and maps, plus tests for reading and writing records through all of the APIs (generic, specific, reflect).
-
- When using Perf for some recent performance work, we encountered two problems. First, because it depends on build artifacts from across the Avro project, it can be tricky to invoke. Second, and more seriously, independent runs of the tests in Perf can vary in performance by as much as 40%. While typical variance is less than that, the variance is high enough that it makes it impossible to tell if a change in performance is simply this noise, or can be properly attributed to a proposed optimization.
-
- This document addresses both problems, the usability problem in Section 2 and the variability issue in Section 3. Regarding the variability issue, as you will see, we haven't really been able to manage it in a fundamental manner. As suggested by Zoltan Frakas , we should look into porting Perf over to using the Java Microbenchmark Harness (JMH) .
-
-
-2.0 Invoking Perf
-
-2.1 Simple invocation
-
-Here is the easiest way we found to directly invoke Perf.
-
-As mentioned in the Introduction, Perf is dependent upon build artifacts from some of the other Avro subprojects. When you invoke Perf, it should be invoked with your most recent build of those artifacts (assuming you're performance-testing your current work). We have found that the easiest way to ensure the proper artifacts are used is to use Maven to invoke Perf.
-
-The recipe for using Maven in this way is simple. First, from the lang/java directoy, you need to build and install Avro:
-
- mvn clean install
-
-(You can add -DskipTests to the above command line if you don't need to run test suite.) When this is done, you need to change your working directory to the lang/java/ipc directory. From there, you can invoke Perf with the following command line:
-
-
- mvn exec:java -Dexec.classpathScope=test -Dexec.mainClass=org.apache.avro.io.Perf -Dexec.args="..."
-
-
-The exec.args string contains the arguments you want to pass through to the Perf.main function.
-
-To speed up your edit-compile-test loop, you can do a selective build of Avro in addition to skipping tests:
-
-
mvn clean && mvn -pl "avro,compiler,maven-plugin,ipc" install -DskipTests
-
-
-
-2.2 Using the run-perf.sh script
-
-If you're using Perf, chances are that you want to compare the performance of a proposed optimization against the performance of a baseline (that baseline most likely being the current master branch of Avro). Generating this comparative data can be tedious if you're running Perf by hand. To relieve this tedium, you can use the run-perf.sh script instead (found in the share/test directory from the Avro top-level directory).
-
-To use this script, you put different implementations of Avro onto different branches of your Avro git repository. One of these branches is designated the "baseline" branch and the others are the "treatment" branches. The script will run the baseline and all the treatments, and will compare generate a CSV file containing a comparison of the treatments against the baseline.
-
-Running run-perf.sh --help will output a detailed manual-page for this script. Appendix A of this document contains sample invocations of this test script for different use cases.
-
-NOTE: as mentioned in run-perf.sh --help, this script is designed to be run from the lang/java/ipc directory , which is the Maven project containing the Perf program.
-
-
-
-3.0 Managing variance
-
-As mentioned in the introduction, we tried a number of different mechanisms to reduce variance, including:
-
- Varying org.apache.avro.io.perf.count, org.apache.io.perf.cycles, and org.apache.avro.io.perf.use-direct, as well as the number of times we run Perf.java within a single "run" of a test.
-
-
Taking the minimum times across runs, rather than the maximum times, using the second or third run as a baseline rather than the first, using statistical methods to eliminate outlying values.
-
-
Modified the code slightly, for example: starting the timer of a cycle after, rather than before, encoders or decoders are constructed; cacheing encoders and decoders; and reusing record objects during read tests rather than construct new ones for each record being read.
-
-
Using Docker's --cpuset-cpus flag to force the tests onto a single core.
-
-
Using a dedicated EC2 instance (c5d.2xlarge).
-
-Of the above, the only change that made a significant difference was the last: in going from a laptop and desktop computer to a dedicated EC2 instances, we went from over 70 tests (out of 200) with a variance of 5% or more between runs to 35. As mentioned in the introduction, we should switch to a framework like JMH to attack this problem more fundamentally.
-
- If you want to setup your own EC2 instance for testing, here's how we did it. We launched a dedicated EC2 c5d.2xlarge instance from the AWS console, using the "Amazon Linux 64-bit HVM GP2" AMI. We logged into this instance and ran the following commands to install Docker and Git (we did all our Avro build and testing inside the Docker image):
-
- sudo yum update
- sudo yum install -y git-all
- git config --global user.name "Your Name"
- git config --global user.email email-address-used@github.com
- git config --global core.editor emacs
- sudo install -y docker
- sudo usermod -aG docker ec2-user ## Need to log back in for this to take effect
- sudo service docker start
-
-At this point you can checkout Avro and launch your Docker container:
-
- git clone https://github.com/apache/avro.git
- cd avro
- screen
- ./build.sh docker --args "--cpuset-cpus 2,6"
-
-Note the use of screen here: executions of run-perf.sh can take a few hours, depending on the configuration. By running it inside of screen, you are protected from an SSH disconnection causing run-perf.sh to prematurely terminate.
-
-The --args flag in the last command deserves some explanation. In general, the --args allows you to pass additional arguments to the docker run command executed inside build.sh. In this case, the --cpuset-cpus flag for docker tells docker to schedule the contianer exclusively on the listed (virtual) CPUs. We identified vCPUs 2 and 6 using the lscpu Linux command:
-
- [ec2-user@ip-0-0-0-0 avro]$ lscpu --extended
- CPU NODE SOCKET CORE L1d:L1i:L2:L3 ONLINE
- 0 0 0 0 0:0:0:0 yes
- 1 0 0 1 1:1:1:0 yes
- 2 0 0 2 2:2:2:0 yes
- 3 0 0 3 3:3:3:0 yes
- 4 0 0 0 0:0:0:0 yes
- 5 0 0 1 1:1:1:0 yes
- 6 0 0 2 2:2:2:0 yes
- 7 0 0 3 3:3:3:0 yes
-
-Notice that (v)CPUs 2 and 6 are both on core 2: it's sufficient to schedule the container on the same core, vs a single vCPU. One final tip: to confirm that your container is running on the expected CPUs, run top and then press the 1 key -- this will show you the load on each individual CPU.
-
-
-Appendix A: Sample uses of run-perf.sh
-
-A detailed explanation of run-perf.sh is printed when you give it the --help flag. To help you more quickly understand how to use run-perf.sh we present here a few examples of how we used it in our recent testing efforts.
-
-
To summarize, you invoke it as follows:
-
- ../../../share/test/run-perf.sh [--out-dir D] \
- [--perf-args STRING] [-Dkey=value]* [--] \
- [-Dkey=value]* branch_baseline[:name_baseline_run] \
- [-Dkey=value]* branch_1[:name_treatment_run_1] \
- ...
- [-Dkey=value]* branch_n[:name_treatment_run_n]
-
-The path given here is relative to the lang/java/ipc directory, which needs to be the current working directory when calling this script. The script executes multiple runs of testing. The first run is called the baseline run , the subsequent runs are the treatment runs . Each run consists of four identical executions of Perf.java. The running times for each Perf.java test are averaged to obtain the final running time for the test. For each treatment run, the final running times for each test are compared, as a percentage, to the running time for the test in the baseline run. These percentages are output in the file summary.csv.
-
-The following invocation is what we used to measure the variance of Perf.java:
-
-../../../share/test/run-perf.sh --out-dir ~/calibration \
- -Dorg.apache.avro.specific.use_custom_coders=true \
- AVRO-2269:baseline AVRO-2269:run1 AVRO-2269:run2 AVRO-2269:run3
-
-In this invocation, the baseline run and all three treatment runs come from the same Git branch: AVRO-2269. We need to give a name to each run: in this case runs have been named "baseline"--the baseline run--and "run1", "run2", and "run3"--the treatment runs. Note that the name of the Git branch to be used for a run must always be provided, but the name for the run itself (e.g., "baseline") is optional. If a name for the run is not provided, then the name of the Git branch will be used as the name of the run. However, each run must have a unique name, so in this example we had to explicitly name the branches since all runs are on the same branch.
-
-run-perf.sh uses Maven to invoke Perf.java. The -D flag is used to pass system properties to Maven, which in turn will pass them through to Perf.java. In the example above, we use this flag to turn on the custom-coders feature recently checked into Avro. Note that initial -D flags will be passed to all runs, while -D switches that come just before the name of Git branch of a run apply to only that run. In the case of the baseline run, which comes first, if you want to pass -D flags to just that run, then use the -- flag to indicate that all global parameters for run-perf.sh have been provided, followed by the -D flags you want to pass to only the baseline run.
-
-
Finally, note that run-perf.sh generates a lot of intermediate files as well as the final summary.csv file. Thus, it is recommended that the output of each execution of run-pref.sh is sent to a dedicated directory, provided by the --out-dir flag. If that directory does not exist, it will be created. (Observe that run-perf.sh outputs a file called command.txt containing the full command-line used to invoke it. This can be helpful if you run a lot of experiments and forget the detailed setup of some of them along the way.)
-
-
The next invocation is what we used to ensure that the new "custom coders" optimization for specific records does indeed improve performance:
-
-../../../share/test/run-perf.sh --out-dir ~/retest-codegen \
- --perf-args "-Sf" \
- AVRO-2269:baseline \
- -Dorg.apache.avro.specific.use_custom_coders=true AVRO-2269:custom-coders
-
-In this case, unlike the previous one, the -D flag that turns on the use of custom coders is applied specifically to the treatment run, and not globally. Also, since this flag only affects the Specific Record case, we use the --perf-args flag to pass additional arguments to Perf.java; in this case, the -Sf flag tells Perf.java to run just the specific-record related tests and not the entire test suite.
-
-This last example shows how we checked the performance impact of two new feature-branches we've been developing:
-
-../../../share/test/run-perf.sh --out-dir ~/new-branches \
- -Dorg.apache.avro.specific.use_custom_coders=true \
- AVRO-2269:baseline combined-opts full-refactor
-
-In this case, once again, we turn on custom-coders for all runs. In this case, again, the Git branch AVRO-2269 is used for our baseline run. However, in this case, the treatment runs come from two other Git branches: combined-opts and full-refactor. We didn't provide run-names for these runs because the Git branch-name were fine to be used as run names (we explicitly named the first run "baseline" not because we had to, but because we like the convention of using that name).
-
-Although we didn't state it before, in preparing for a run, run-perf.sh will checkout the Git branch to be used for the run and use mvn install to build and install it. It does this for each branch, so the invocation just given will checkout and build three different branches during its overall execution. (As an optimization, if one run uses the same branch as the previous run, then the branch is not checked-out or rebuilt between runs.)
-
-
-
diff --git a/doc/src/content/mddocs/refactoring-resolution.md b/doc/src/content/mddocs/refactoring-resolution.md
deleted file mode 100644
index 860f5c802db..00000000000
--- a/doc/src/content/mddocs/refactoring-resolution.md
+++ /dev/null
@@ -1,143 +0,0 @@
-
-
-# Refactoring Resolution
-by Raymie Stata
-
-
-## Problem statement
-
-In the early days of Avro, Schema resolution was implemented in a
-number of places, e.g., `GenericDatumReader` as well as
-`ResolvingGrammarGenerator`. However, Schema resolution is
-complicated and thus error prone. Multiple implementations were hard
-to maintain, both for correctness and for updates to the
-schema-resolution spec.
-
-To address the problems of multiple implementations, we converged on
-the implementation found in `ResolvingGrammarGenerator` (together with
-`ResolvingDecoder`) as the single implementation, and refactored other
-parts of Avro to depend on this implementation.
-
-Converging on a single implementation solved the maintenance problem,
-and has served well for a number of years. However, the logic in
-`ResolvingGrammarGenerator` does _two_ things: it contains the logic
-for _schema resolution_ itself, and it contains the logic for
-embedding that logic into a grammar that can be used by
-`ResolvingDecoder`.
-
-Recently, Avro contributors have wanted access to the logic of schema
-resolution _apart from_ `ResolvingDecoder`. For example,
-[AVRO-2247](https://issues.apache.org/jira/browse/AVRO-2247) proposes
-a new, faster approach to implementing `DatumReaders`. The initial
-implementation of AVRO-2247 was forced to reimplement Schema
-resolution -- going back to the world of multiple implementations --
-because there isn't a reusable implementation of our resolution logic.
-
-Similarly, as I've been working on extending the performance
-improvements of
-[AVRO-2090](https://issues.apache.org/jira/browse/AVRO-2090) when
-writing data, I've been thinking about the possibilities of dynamic
-code generation. Here too, I can't reuse `ResolvingGrammarGenerator`,
-which would force me to reimplement the schema-resolution logic.
-
-
-## Proposed solution
-
-We introduce a new class to encapsulate the logic of schema resolution
-independent from the logic of implementing schema resolution as a
-`ResolvingDecoder` grammar. In particular, we introduce a new class
-`org.apache.avro.Resolver` with the following key function:
-
- public static Resolver.Action resolve(Schema writer, Schema reader);
-
-The subclasses of `Resolver.Action` encapsulate various ways to
-resolve schemas. The `resolve` function walks the reader's and
-writer's schema parse trees together, and generate a tree of
-`Resolver.Action` nodes indicating how to resolve each subtree of the
-writer's schema into the corresponding subtree of the reader's.
-
-`Resolve.Action` has the following subclasses:
-
- * `DoNothing` -- nothing needs to be done to resolve the writer's
- data into the reader's schema. That is, the reader should read
- the data written by the writer as if it were written using the
- reader's own schema. This can be generated for any kind of
- schema -- for example, if the reader's and writer's schemas are
- the exact same union schema, a `DoNothing` will be generated --
- so consumers of `Resolver` need to be able to handle `DoNothing`
- for all schemas.
-
- * `Promote` -- the writer's value needs to be promoted to the
- reader's schema. Generated only for numeric and byte/string
- types.
-
- * `ContainerAction` -- no resolution is needed directly on
- container schemas, but a `ContainerAction` contains the `Action`
- needed for the contained schema
-
- * `EnumAdjust` -- resolution involves dealing with reordering of
- symbols and symbols that have been removed from the enumeration.
- An `EnumAdjust` object contains the information needed to do so.
-
- * `RecordAdjust` -- resolution involves recursively resolving the
- schemas for each field, and dealing with reordering and removal
- of fields. A `RecordAdjust` object contains the information
- needed to do so.
-
- * `SkipAction` -- only generated as a sub-action of a
- `RecordAdjust` action. Used to indicate that a writer's field
- does not appear in the reader's schema and thus should be
- skipped.
-
- * `WriterUnion` -- generated when the writer's schema is a union
- and the reader's schema is not the identical union. Has
- subactions for resolving each branch of the writer's union
- against the reader's schema.
-
- * `ReaderUnion` -- generated when the reader's schema is a union
- and the writer's was not. Had information indicating which of
- the reader's union-branch was the best fit for the writer's
- schema, and a subaction for resolving the schema of that branch
- against the writer's schema.
-
- * `ErrorAction` -- generated when the (sub)schemas can't be
- resolved.
-
-These new classes aresimilar to the family of `Symbol` objects we've
-defined for `ResolvingGrammarGenerator`. For example,
-`Action.RecordAdjust` is similar to `Symbol.FieldOrderAction`, and
-`Action.EnumAdjust` in `Symbol.EnumAdjustAction`. This similarity is
-not surprising, since those `Symbol` objects were design to
-encapsulate the logic of schema resolution as well.
-
-However, where `ResolvingGrammarGenerator` embeds those `Symbol`
-objects into flattened productions highly optimized for the LL(1)
-parser implemented by `ResolvingDecoder`. The `Resolver`, in
-contrast, captures the schema-resolution logic in a tree-like
-structure that closely mirrors the syntax-tree of the schemas being
-resolved. This tree-like representation is easily consumed by
-multiple implementations of resolution -- be it the grammar-based
-implementation of `ResolvingDecoder`, the "action-sequence"-based
-implementation of AVRO-2247, or the dynamic code-gen implementation
-being considered as an extension to AVRO-2090.
-
-We have reimplemented `ResolvingGrammarGenerator` to eliminate it's
-implementaiton of schema-resolution logic and instead consume the
-output of `Resolver.resolve`. Thus, it might be helpful to study
-`ResolvingGrammarGenerator` to better understand how to consume this
-output in other circumstances.
diff --git a/doc/src/content/xdocs/gettingstartedjava.xml b/doc/src/content/xdocs/gettingstartedjava.xml
deleted file mode 100644
index 5440b07efe4..00000000000
--- a/doc/src/content/xdocs/gettingstartedjava.xml
+++ /dev/null
@@ -1,527 +0,0 @@
-
-
-
- %avro-entities;
-]>
-
-
- Apache Avro &AvroVersion; Getting Started (Java)
-
-
-
- This is a short guide for getting started with Apache Avro using
- Java. This guide only covers using Avro for data serialization; see
- Patrick Hunt's Avro
- RPC Quick Start for a good introduction to using Avro for RPC.
-
-
- Download
-
- Avro implementations for C, C++, C#, Java, PHP, Python, and Ruby can be
- downloaded from the Apache Avro
- Releases page. This guide uses Avro &AvroVersion;, the latest
- version at the time of writing. For the examples in this guide,
- download avro-&AvroVersion;.jar and
- avro-tools-&AvroVersion;.jar .
-
-
- Alternatively, if you are using Maven, add the following dependency to
- your POM:
-
-
-<dependency>
- <groupId>org.apache.avro</groupId>
- <artifactId>avro</artifactId>
- <version>&AvroVersion;</version>
-</dependency>
-
-
- As well as the Avro Maven plugin (for performing code generation):
-
-
-<plugin>
- <groupId>org.apache.avro</groupId>
- <artifactId>avro-maven-plugin</artifactId>
- <version>&AvroVersion;</version>
- <executions>
- <execution>
- <phase>generate-sources</phase>
- <goals>
- <goal>schema</goal>
- </goals>
- <configuration>
- <sourceDirectory>${project.basedir}/src/main/avro/</sourceDirectory>
- <outputDirectory>${project.basedir}/src/main/java/</outputDirectory>
- </configuration>
- </execution>
- </executions>
-</plugin>
-<plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-compiler-plugin</artifactId>
- <configuration>
- <source>1.8</source>
- <target>1.8</target>
- </configuration>
-</plugin>
-
-
- You may also build the required Avro jars from source. Building Avro is
- beyond the scope of this guide; see the Build
- Documentation page in the wiki for more information.
-
-
-
-
- Defining a schema
-
- Avro schemas are defined using JSON. Schemas are composed of primitive types
- (null, boolean, int,
- long, float, double,
- bytes, and string) and complex types (record,
- enum, array, map,
- union, and fixed). You can learn more about
- Avro schemas and types from the specification, but for now let's start
- with a simple schema example, user.avsc :
-
-
-{"namespace": "example.avro",
- "type": "record",
- "name": "User",
- "fields": [
- {"name": "name", "type": "string"},
- {"name": "favorite_number", "type": ["int", "null"]},
- {"name": "favorite_color", "type": ["string", "null"]}
- ]
-}
-
-
- This schema defines a record representing a hypothetical user. (Note
- that a schema file can only contain a single schema definition.) At
- minimum, a record definition must include its type ("type":
- "record"), a name ("name": "User"), and fields, in
- this case name, favorite_number, and
- favorite_color. We also define a namespace
- ("namespace": "example.avro"), which together with the name
- attribute defines the "full name" of the schema
- (example.avro.User in this case).
-
-
-
- Fields are defined via an array of objects, each of which defines a name
- and type (other attributes are optional, see the record specification for more
- details). The type attribute of a field is another schema object, which
- can be either a primitive or complex type. For example, the
- name field of our User schema is the primitive type
- string, whereas the favorite_number and
- favorite_color fields are both unions,
- represented by JSON arrays. unions are a complex type that
- can be any of the types listed in the array; e.g.,
- favorite_number can either be an int or
- null, essentially making it an optional field.
-
-
-
-
- Serializing and deserializing with code generation
-
- Compiling the schema
-
- Code generation allows us to automatically create classes based on our
- previously-defined schema. Once we have defined the relevant classes,
- there is no need to use the schema directly in our programs. We use the
- avro-tools jar to generate code as follows:
-
-
-java -jar /path/to/avro-tools-&AvroVersion;.jar compile schema <schema file> <destination>
-
-
- This will generate the appropriate source files in a package based on
- the schema's namespace in the provided destination folder. For
- instance, to generate a User class in package
- example.avro from the schema defined above, run
-
-
-java -jar /path/to/avro-tools-&AvroVersion;.jar compile schema user.avsc .
-
-
- Note that if you using the Avro Maven plugin, there is no need to
- manually invoke the schema compiler; the plugin automatically
- performs code generation on any .avsc files present in the configured
- source directory.
-
-
-
- Creating Users
-
- Now that we've completed the code generation, let's create some
- Users, serialize them to a data file on disk, and then
- read back the file and deserialize the User objects.
-
-
- First let's create some Users and set their fields.
-
-
-User user1 = new User();
-user1.setName("Alyssa");
-user1.setFavoriteNumber(256);
-// Leave favorite color null
-
-// Alternate constructor
-User user2 = new User("Ben", 7, "red");
-
-// Construct via builder
-User user3 = User.newBuilder()
- .setName("Charlie")
- .setFavoriteColor("blue")
- .setFavoriteNumber(null)
- .build();
-
-
- As shown in this example, Avro objects can be created either by
- invoking a constructor directly or by using a builder. Unlike
- constructors, builders will automatically set any default values
- specified in the schema. Additionally, builders validate the data as
- it set, whereas objects constructed directly will not cause an error
- until the object is serialized. However, using constructors directly
- generally offers better performance, as builders create a copy of the
- datastructure before it is written.
-
-
- Note that we do not set user1's favorite color. Since
- that record is of type ["string", "null"], we can either
- set it to a string or leave it null; it is
- essentially optional. Similarly, we set user3's favorite
- number to null (using a builder requires setting all fields, even if
- they are null).
-
-
-
- Serializing
-
- Now let's serialize our Users to disk.
-
-
-// Serialize user1, user2 and user3 to disk
-DatumWriter<User> userDatumWriter = new SpecificDatumWriter<User>(User.class);
-DataFileWriter<User> dataFileWriter = new DataFileWriter<User>(userDatumWriter);
-dataFileWriter.create(user1.getSchema(), new File("users.avro"));
-dataFileWriter.append(user1);
-dataFileWriter.append(user2);
-dataFileWriter.append(user3);
-dataFileWriter.close();
-
-
- We create a DatumWriter, which converts Java objects into
- an in-memory serialized format. The SpecificDatumWriter
- class is used with generated classes and extracts the schema from the
- specified generated type.
-
-
- Next we create a DataFileWriter, which writes the
- serialized records, as well as the schema, to the file specified in the
- dataFileWriter.create call. We write our users to the file
- via calls to the dataFileWriter.append method. When we are
- done writing, we close the data file.
-
-
-
- Deserializing
-
- Finally, let's deserialize the data file we just created.
-
-
-// Deserialize Users from disk
-DatumReader<User> userDatumReader = new SpecificDatumReader<User>(User.class);
-DataFileReader<User> dataFileReader = new DataFileReader<User>(file, userDatumReader);
-User user = null;
-while (dataFileReader.hasNext()) {
-// Reuse user object by passing it to next(). This saves us from
-// allocating and garbage collecting many objects for files with
-// many items.
-user = dataFileReader.next(user);
-System.out.println(user);
-}
-
-
- This snippet will output:
-
-
-{"name": "Alyssa", "favorite_number": 256, "favorite_color": null}
-{"name": "Ben", "favorite_number": 7, "favorite_color": "red"}
-{"name": "Charlie", "favorite_number": null, "favorite_color": "blue"}
-
-
- Deserializing is very similar to serializing. We create a
- SpecificDatumReader, analogous to the
- SpecificDatumWriter we used in serialization, which
- converts in-memory serialized items into instances of our generated
- class, in this case User. We pass the
- DatumReader and the previously created File
- to a DataFileReader, analogous to the
- DataFileWriter, which reads both the schema used by the
- writer as well as the data from the file on disk. The data will be
- read using the writer's schema included in the file and the
- schema provided by the reader, in this case the User
- class. The writer's schema is needed to know the order in which
- fields were written, while the reader's schema is needed to know what
- fields are expected and how to fill in default values for fields
- added since the file was written. If there are differences between
- the two schemas, they are resolved according to the
- Schema Resolution
- specification.
-
-
- Next we use the DataFileReader to iterate through the
- serialized Users and print the deserialized object to
- stdout. Note how we perform the iteration: we create a single
- User object which we store the current deserialized user
- in, and pass this record object to every call of
- dataFileReader.next. This is a performance optimization
- that allows the DataFileReader to reuse the same
- User object rather than allocating a new
- User for every iteration, which can be very expensive in
- terms of object allocation and garbage collection if we deserialize a
- large data file. While this technique is the standard way to iterate
- through a data file, it's also possible to use for (User user :
- dataFileReader) if performance is not a concern.
-
-
-
- Compiling and running the example code
-
- This example code is included as a Maven project in the
- examples/java-example directory in the Avro docs. From this
- directory, execute the following commands to build and run the
- example:
-
-
-$ mvn compile # includes code generation via Avro Maven plugin
-$ mvn -q exec:java -Dexec.mainClass=example.SpecificMain
-
-
-
- Beta feature: Generating faster code
-
- In this release we have introduced a new approach to
- generating code that speeds up decoding of objects by more
- than 10% and encoding by more than 30% (future performance
- enhancements are underway). To ensure a smooth introduction
- of this change into production systems, this feature is
- controlled by a feature flag, the system
- property org.apache.avro.specific.use_custom_coders.
- In this first release, this feature is off by default. To
- turn it on, set the system flag to true at
- runtime. In the sample above, for example, you could enable
- the fater coders as follows:
-
-
-$ mvn -q exec:java -Dexec.mainClass=example.SpecificMain \
- -Dorg.apache.avro.specific.use_custom_coders=true
-
-
- Note that you do not have to recompile your Avro
- schema to have access to this feature. The feature is
- compiled and built into your code, and you turn it on and
- off at runtime using the feature flag. As a result, you can
- turn it on during testing, for example, and then off in
- production. Or you can turn it on in production, and
- quickly turn it off if something breaks.
-
-
- We encourage the Avro community to exercise this new feature
- early to help build confidence. (For those paying
- one-demand for compute resources in the cloud, it can lead
- to meaningful cost savings.) As confidence builds, we will
- turn this feature on by default, and eventually eliminate
- the feature flag (and the old code).
-
-
-
-
-
- Serializing and deserializing without code generation
-
- Data in Avro is always stored with its corresponding schema, meaning we
- can always read a serialized item regardless of whether we know the
- schema ahead of time. This allows us to perform serialization and
- deserialization without code generation.
-
-
- Let's go over the same example as in the previous section, but without
- using code generation: we'll create some users, serialize them to a data
- file on disk, and then read back the file and deserialize the users
- objects.
-
-
- Creating users
-
- First, we use a Parser to read our schema definition and
- create a Schema object.
-
-
-Schema schema = new Schema.Parser().parse(new File("user.avsc"));
-
-
- Using this schema, let's create some users.
-
-
-GenericRecord user1 = new GenericData.Record(schema);
-user1.put("name", "Alyssa");
-user1.put("favorite_number", 256);
-// Leave favorite color null
-
-GenericRecord user2 = new GenericData.Record(schema);
-user2.put("name", "Ben");
-user2.put("favorite_number", 7);
-user2.put("favorite_color", "red");
-
-
- Since we're not using code generation, we use
- GenericRecords to represent users.
- GenericRecord uses the schema to verify that we only
- specify valid fields. If we try to set a non-existent field (e.g.,
- user1.put("favorite_animal", "cat")), we'll get an
- AvroRuntimeException when we run the program.
-
-
- Note that we do not set user1's favorite color. Since
- that record is of type ["string", "null"], we can either
- set it to a string or leave it null; it is
- essentially optional.
-
-
-
- Serializing
-
- Now that we've created our user objects, serializing and deserializing
- them is almost identical to the example above which uses code
- generation. The main difference is that we use generic instead of
- specific readers and writers.
-
-
- First we'll serialize our users to a data file on disk.
-
-
-// Serialize user1 and user2 to disk
-File file = new File("users.avro");
-DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<GenericRecord>(schema);
-DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<GenericRecord>(datumWriter);
-dataFileWriter.create(schema, file);
-dataFileWriter.append(user1);
-dataFileWriter.append(user2);
-dataFileWriter.close();
-
-
- We create a DatumWriter, which converts Java objects into
- an in-memory serialized format. Since we are not using code
- generation, we create a GenericDatumWriter. It requires
- the schema both to determine how to write the
- GenericRecords and to verify that all non-nullable fields
- are present.
-
-
- As in the code generation example, we also create a
- DataFileWriter, which writes the serialized records, as
- well as the schema, to the file specified in the
- dataFileWriter.create call. We write our users to the
- file via calls to the dataFileWriter.append method. When
- we are done writing, we close the data file.
-
-
-
- Deserializing
-
- Finally, we'll deserialize the data file we just created.
-
-
-// Deserialize users from disk
-DatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>(schema);
-DataFileReader<GenericRecord> dataFileReader = new DataFileReader<GenericRecord>(file, datumReader);
-GenericRecord user = null;
-while (dataFileReader.hasNext()) {
-// Reuse user object by passing it to next(). This saves us from
-// allocating and garbage collecting many objects for files with
-// many items.
-user = dataFileReader.next(user);
-System.out.println(user);
-
- This outputs:
-
-{"name": "Alyssa", "favorite_number": 256, "favorite_color": null}
-{"name": "Ben", "favorite_number": 7, "favorite_color": "red"}
-
-
- Deserializing is very similar to serializing. We create a
- GenericDatumReader, analogous to the
- GenericDatumWriter we used in serialization, which
- converts in-memory serialized items into GenericRecords.
- We pass the DatumReader and the previously created
- File to a DataFileReader, analogous to the
- DataFileWriter, which reads both the schema used by the
- writer as well as the data from the file on disk. The data will be
- read using the writer's schema included in the file, and the reader's
- schema provided to the GenericDatumReader. The writer's
- schema is needed to know the order in which fields were written,
- while the reader's schema is needed to know what fields are expected
- and how to fill in default values for fields added since the file
- was written. If there are differences between the two schemas, they
- are resolved according to the
- Schema Resolution
- specification.
-
-
- Next, we use the DataFileReader to iterate through the
- serialized users and print the deserialized object to stdout. Note
- how we perform the iteration: we create a single
- GenericRecord object which we store the current
- deserialized user in, and pass this record object to every call of
- dataFileReader.next. This is a performance optimization
- that allows the DataFileReader to reuse the same record
- object rather than allocating a new GenericRecord for
- every iteration, which can be very expensive in terms of object
- allocation and garbage collection if we deserialize a large data file.
- While this technique is the standard way to iterate through a data
- file, it's also possible to use for (GenericRecord user :
- dataFileReader) if performance is not a concern.
-
-
-
- Compiling and running the example code
-
- This example code is included as a Maven project in the
- examples/java-example directory in the Avro docs. From this
- directory, execute the following commands to build and run the
- example:
-
-
-$ mvn compile
-$ mvn -q exec:java -Dexec.mainClass=example.GenericMain
-
-
-
-
-
diff --git a/doc/src/content/xdocs/gettingstartedpython.xml b/doc/src/content/xdocs/gettingstartedpython.xml
deleted file mode 100644
index f6216b116d8..00000000000
--- a/doc/src/content/xdocs/gettingstartedpython.xml
+++ /dev/null
@@ -1,258 +0,0 @@
-
-
-
- %avro-entities;
-]>
-
-
- Apache Avro &AvroVersion; Getting Started (Python)
-
-
-
- This is a short guide for getting started with Apache Avro using
- Python. This guide only covers using Avro for data serialization; see
- Patrick Hunt's Avro
- RPC Quick Start for a good introduction to using Avro for RPC.
-
-
-
- Notice for Python 3 users
-
- A package called "avro-python3" had been provided to support
- Python 3 previously, but the codebase was consolidated into
- the "avro" package that supports Python 3 now.
-
- The avro-python3 package will be removed in the near future,
- so users should use the "avro" package instead.
- They are mostly API compatible, but there's a few minor difference
- (e.g., function name capitalization,
- such as avro.schema.Parse vs avro.schema.parse).
-
-
-
-
- Download and Install
-
- The easiest way to get started in Python is to install avro from PyPI
- using pip , the Python Package Installer.
-
-
-$ python3 -m pip install avro
-
- Consider doing a local install or using a virtualenv to avoid permissions problems and interfering with system packages:
-
-$ python3 -m pip install --user install avro
-
- or
-
- $ python3 -m venv avro-venv
- $ avro-venv/bin/pip install avro
-
-
- The official releases of the Avro implementations for C, C++, C#, Java,
- PHP, Python, and Ruby can be downloaded from the Apache Avro
- Releases page. This guide uses Avro &AvroVersion;, the latest
- version at the time of writing. Download and install
- avro-&AvroVersion;-py2.py3-none-any.whl or
- avro-&AvroVersion;.tar.gz via
- python -m pip avro-&AvroVersion;-py2.py3-none-any.whl
- or
- python -m pip avro-&AvroVersion;.tar.gz.
- (As above, consider using a virtualenv or user-local install.)
-
- Check that you can import avro from a Python prompt.
-
-$ python3 -c 'import avro; print(avro.__version__)'
-
- The above should print &AvroVersion;. It should not raise an ImportError.
-
- Alternatively, you may build the Avro Python library from source. From
- your the root Avro directory, run the commands
-
-
-$ cd lang/py/
-$ python3 -m pip install -e .
-$ python3
-
-
-
-
- Defining a schema
-
- Avro schemas are defined using JSON. Schemas are composed of primitive types
- (null, boolean, int,
- long, float, double,
- bytes, and string) and complex types (record,
- enum, array, map,
- union, and fixed). You can learn more about
- Avro schemas and types from the specification, but for now let's start
- with a simple schema example, user.avsc :
-
-
-{"namespace": "example.avro",
- "type": "record",
- "name": "User",
- "fields": [
- {"name": "name", "type": "string"},
- {"name": "favorite_number", "type": ["int", "null"]},
- {"name": "favorite_color", "type": ["string", "null"]}
- ]
-}
-
-
- This schema defines a record representing a hypothetical user. (Note
- that a schema file can only contain a single schema definition.) At
- minimum, a record definition must include its type ("type":
- "record"), a name ("name": "User"), and fields, in
- this case name, favorite_number, and
- favorite_color. We also define a namespace
- ("namespace": "example.avro"), which together with the name
- attribute defines the "full name" of the schema
- (example.avro.User in this case).
-
-
-
- Fields are defined via an array of objects, each of which defines a name
- and type (other attributes are optional, see the record specification for more
- details). The type attribute of a field is another schema object, which
- can be either a primitive or complex type. For example, the
- name field of our User schema is the primitive type
- string, whereas the favorite_number and
- favorite_color fields are both unions,
- represented by JSON arrays. unions are a complex type that
- can be any of the types listed in the array; e.g.,
- favorite_number can either be an int or
- null, essentially making it an optional field.
-
-
-
-
- Serializing and deserializing without code generation
-
- Data in Avro is always stored with its corresponding schema, meaning we
- can always read a serialized item, regardless of whether we know the
- schema ahead of time. This allows us to perform serialization and
- deserialization without code generation. Note that the Avro Python
- library does not support code generation.
-
-
- Try running the following code snippet, which serializes two users to a
- data file on disk, and then reads back and deserializes the data file:
-
-
-import avro.schema
-from avro.datafile import DataFileReader, DataFileWriter
-from avro.io import DatumReader, DatumWriter
-
-schema = avro.schema.parse(open("user.avsc", "rb").read())
-
-writer = DataFileWriter(open("users.avro", "wb"), DatumWriter(), schema)
-writer.append({"name": "Alyssa", "favorite_number": 256})
-writer.append({"name": "Ben", "favorite_number": 7, "favorite_color": "red"})
-writer.close()
-
-reader = DataFileReader(open("users.avro", "rb"), DatumReader())
-for user in reader:
- print user
-reader.close()
-
- This outputs:
-
-{u'favorite_color': None, u'favorite_number': 256, u'name': u'Alyssa'}
-{u'favorite_color': u'red', u'favorite_number': 7, u'name': u'Ben'}
-
-
- Do make sure that you open your files in binary mode (i.e. using the modes
- wb or rb respectively). Otherwise you might
- generate corrupt files due to
-
- automatic replacement of newline characters with the
- platform-specific representations.
-
-
- Let's take a closer look at what's going on here.
-
-
-schema = avro.schema.parse(open("user.avsc", "rb").read())
-
-
- avro.schema.parse takes a string containing a JSON schema
- definition as input and outputs a avro.schema.Schema object
- (specifically a subclass of Schema, in this case
- RecordSchema). We're passing in the contents of our
- user.avsc schema file here.
-
-
-writer = DataFileWriter(open("users.avro", "wb"), DatumWriter(), schema)
-
-
- We create a DataFileWriter, which we'll use to write
- serialized items to a data file on disk. The
- DataFileWriter constructor takes three arguments:
-
-
- The file we'll serialize to
- A DatumWriter, which is responsible for actually
- serializing the items to Avro's binary format
- (DatumWriters can be used separately from
- DataFileWriters, e.g., to perform IPC with Avro).
- The schema we're using. The DataFileWriter needs the
- schema both to write the schema to the data file, and to verify that
- the items we write are valid items and write the appropriate
- fields.
-
-
-writer.append({"name": "Alyssa", "favorite_number": 256})
-writer.append({"name": "Ben", "favorite_number": 7, "favorite_color": "red"})
-
-
- We use DataFileWriter.append to add items to our data
- file. Avro records are represented as Python dicts.
- Since the field favorite_color has type ["int",
- "null"], we are not required to specify this field, as shown in
- the first append. Were we to omit the required name
- field, an exception would be raised. Any extra entries not
- corresponding to a field are present in the dict are
- ignored.
-
-
-reader = DataFileReader(open("users.avro", "rb"), DatumReader())
-
-
- We open the file again, this time for reading back from disk. We use
- a DataFileReader and DatumReader analagous
- to the DataFileWriter and DatumWriter above.
-
-
-for user in reader:
- print user
-
-
- The DataFileReader is an iterator that returns
- dicts corresponding to the serialized items.
-
-
-
-
diff --git a/doc/src/content/xdocs/idl.xml b/doc/src/content/xdocs/idl.xml
deleted file mode 100644
index 52a607503b3..00000000000
--- a/doc/src/content/xdocs/idl.xml
+++ /dev/null
@@ -1,486 +0,0 @@
-
-
-
- %avro-entities;
-]>
-
-
- Apache Avro &AvroVersion; IDL
-
-
-
-
- Introduction
-
- This document defines Avro IDL, a higher-level language for authoring Avro schemata.
- Before reading this document, you should have familiarity with the concepts of schemata and protocols,
- as well as the various primitive and complex types available in Avro.
-
-
-
-
- Overview
-
- Purpose
- The aim of the Avro IDL language is to enable developers to author schemata in a way that
- feels more similar to common programming languages like Java, C++, or Python. Additionally,
- the Avro IDL language may feel more familiar for those users who have previously used the
- interface description languages (IDLs) in other frameworks like Thrift, Protocol Buffers, or CORBA.
-
-
-
- Usage
-
- Each Avro IDL file defines a single Avro Protocol, and thus generates as its output a JSON-format
- Avro Protocol file with extension .avpr.
-
-
- To convert a .avdl file into a .avpr file, it may be processed by the
- idl tool. For example:
-
-
-$ java -jar avro-tools.jar idl src/test/idl/input/namespaces.avdl /tmp/namespaces.avpr
-$ head /tmp/namespaces.avpr
-{
- "protocol" : "TestNamespace",
- "namespace" : "avro.test.protocol",
-
-
- The idl tool can also process input to and from stdin and stdout .
- See idl --help for full usage information.
-
- A Maven plugin is also provided to compile .avdl files. To
- use it, add something like the following to your pom.xml:
-
-
-
- org.apache.avro
- avro-maven-plugin
-
-
-
- idl-protocol
-
-
-
-
-
-
-]]>
-
-
-
-
- Defining a Protocol in Avro IDL
-
- An Avro IDL file consists of exactly one protocol definition. The minimal protocol is defined
- by the following code:
-
-
-protocol MyProtocol {
-}
-
-
- This is equivalent to (and generates) the following JSON protocol definition:
-
-
-
-{
-"protocol" : "MyProtocol",
- "types" : [ ],
- "messages" : {
- }
-}
-
-
- The namespace of the protocol may be changed using the @namespace annotation:
-
-
-@namespace("mynamespace")
-protocol MyProtocol {
-}
-
-
- This notation is used throughout Avro IDL as a way of specifying properties for the annotated element,
- as will be described later in this document.
-
-
- Protocols in Avro IDL can contain the following items:
-
-
- Imports of external protocol and schema files.
- Definitions of named schemata, including record s, error s, enum s, and fixed s.
- Definitions of RPC messages
-
-
-
- Imports
- Files may be imported in one of three formats:
-
- An IDL file may be imported with a statement like:
- import idl "foo.avdl";
-
- A JSON protocol file may be imported with a statement like:
- import protocol "foo.avpr";
-
- A JSON schema file may be imported with a statement like:
- import schema "foo.avsc";
-
-
- Messages and types in the imported file are added to this
- file's protocol.
- Imported file names are resolved relative to the current IDL file.
-
-
-
-
-
-
- Defining RPC Messages
- The syntax to define an RPC message within a Avro IDL protocol is similar to the syntax for
- a method declaration within a C header file or a Java interface. To define an RPC message
- add which takes two arguments named foo and bar,
- returning an int, simply include the following definition within the protocol:
-
-
-int add(int foo, int bar = 0);
-
- Message arguments, like record fields, may specify default
- values.
- To define a message with no response, you may use the alias void, equivalent
- to the Avro null type:
-
-
-void logMessage(string message);
-
-
- If you have previously defined an error type within the same protocol, you may declare that
- a message can throw this error using the syntax:
-
-
-void goKaboom() throws Kaboom;
-
- To define a one-way message, use the
- keyword oneway after the parameter list, for example:
-
-
-void fireAndForget(string message) oneway;
-
-
-
- Other Language Features
-
-
- Escaping Identifiers
- Occasionally, one will need to use a reserved language keyword as an identifier. In order
- to do so, backticks (`) may be used to escape the identifier. For example, to define
- a message with the literal name error , you may write:
-
-
-void `error`();
-
- This syntax is allowed anywhere an identifier is expected.
-
-
- Annotations for Ordering and Namespaces
- Java-style annotations may be used to add additional
- properties to types and fields throughout Avro IDL.
-
- For example, to specify the sort order of a field within
- a record, one may use the @order annotation
- before the field name as follows:
-
-record MyRecord {
- string @order("ascending") myAscendingSortField;
- string @order("descending") myDescendingField;
- string @order("ignore") myIgnoredField;
-}
-
- A field's type may also be preceded by annotations, e.g.:
-
-record MyRecord {
- @java-class("java.util.ArrayList") array<string> myStrings;
-}
-
-
- This can be used to support java classes that can be
- serialized/deserialized via their toString/String constructor, e.g.:
-
-record MyRecord {
- @java-class("java.math.BigDecimal") string value;
- @java-key-class("java.io.File") map<string> fileStates;
- array<@java-class("java.math.BigDecimal") string> weights;
-}
-
-
- Similarly, a @namespace annotation may be used to modify the namespace
- when defining a named schema. For example:
-
-
-@namespace("org.apache.avro.firstNamespace")
-protocol MyProto {
- @namespace("org.apache.avro.someOtherNamespace")
- record Foo {}
-
- record Bar {}
-}
-
-
- will define a protocol in the firstNamespace namespace. The record Foo will be
- defined in someOtherNamespace and Bar will be defined in firstNamespace
- as it inherits its default from its container.
-
- Type and field aliases are specified with
- the @aliases annotation as follows:
-
-@aliases(["org.old.OldRecord", "org.ancient.AncientRecord"])
-record MyRecord {
- string @aliases(["oldField", "ancientField"]) myNewField;
-}
-
- Some annotations like those listed above are handled
- specially. All other annotations are added as properties to
- the protocol, message, schema or field.
-
-
-
- Complete Example
- The following is a complete example of a Avro IDL file that shows most of the above features:
-
-/**
- * An example protocol in Avro IDL
- */
-@namespace("org.apache.avro.test")
-protocol Simple {
-
- @aliases(["org.foo.KindOf"])
- enum Kind {
- FOO,
- BAR, // the bar enum value
- BAZ
- }
-
- fixed MD5(16);
-
- record TestRecord {
- @order("ignore")
- string name;
-
- @order("descending")
- Kind kind;
-
- MD5 hash;
-
- union { MD5, null} @aliases(["hash"]) nullableHash;
-
- array<long> arrayOfLongs;
- }
-
- error TestError {
- string message;
- }
-
- string hello(string greeting);
- TestRecord echo(TestRecord `record`);
- int add(int arg1, int arg2);
- bytes echoBytes(bytes data);
- void `error`() throws TestError;
- void ping() oneway;
-}
-
- Additional examples may be found in the Avro source tree under the src/test/idl/input directory.
-
-
- Apache Avro, Avro, Apache, and the Avro and Apache logos are
- trademarks of The Apache Software Foundation.
-
-
-
diff --git a/doc/src/content/xdocs/index.xml b/doc/src/content/xdocs/index.xml
deleted file mode 100644
index 4247e212ec3..00000000000
--- a/doc/src/content/xdocs/index.xml
+++ /dev/null
@@ -1,96 +0,0 @@
-
-
-
- %avro-entities;
-]>
-
-
- Apache Avro &AvroVersion; Documentation
-
-
-
- Introduction
- Apache Avro is a data serialization system.
- Avro provides:
-
- Rich data structures.
- A compact, fast, binary data format.
- A container file, to store persistent data.
- Remote procedure call (RPC).
- Simple integration with dynamic languages. Code
- generation is not required to read or write data files nor
- to use or implement RPC protocols. Code generation as an
- optional optimization, only worth implementing for
- statically typed languages.
-
-
-
- Schemas
- Avro relies on schemas . When Avro data is read, the
- schema used when writing it is always present. This permits
- each datum to be written with no per-value overheads, making
- serialization both fast and small. This also facilitates use
- with dynamic, scripting languages, since data, together with
- its schema, is fully self-describing.
- When Avro data is stored in a file, its schema is stored with
- it, so that files may be processed later by any program. If
- the program reading the data expects a different schema this
- can be easily resolved, since both schemas are present.
- When Avro is used in RPC, the client and server exchange
- schemas in the connection handshake. (This can be optimized
- so that, for most calls, no schemas are actually transmitted.)
- Since both client and server both have the other's full
- schema, correspondence between same named fields, missing
- fields, extra fields, etc. can all be easily resolved.
- Avro schemas are defined with
- JSON . This
- facilitates implementation in languages that already have
- JSON libraries.
-
-
- Comparison with other systems
- Avro provides functionality similar to systems such
- as Thrift ,
- Protocol
- Buffers , etc. Avro differs from these systems in the
- following fundamental aspects.
-
- Dynamic typing : Avro does not require that code
- be generated. Data is always accompanied by a schema that
- permits full processing of that data without code
- generation, static datatypes, etc. This facilitates
- construction of generic data-processing systems and
- languages.
- Untagged data : Since the schema is present when
- data is read, considerably less type information need be
- encoded with data, resulting in smaller serialization size.
- No manually-assigned field IDs : When a schema
- changes, both the old and new schema are always present when
- processing data, so differences may be resolved
- symbolically, using field names.
-
-
-
- Apache Avro, Avro, Apache, and the Avro and Apache logos are
- trademarks of The Apache Software Foundation.
-
-
-
diff --git a/doc/src/content/xdocs/mr.xml b/doc/src/content/xdocs/mr.xml
deleted file mode 100644
index f5a70b95a58..00000000000
--- a/doc/src/content/xdocs/mr.xml
+++ /dev/null
@@ -1,580 +0,0 @@
-
-
-
- %avro-entities;
-]>
-
-
- Apache Avro &AvroVersion; Hadoop MapReduce guide
-
-
-
- Avro provides a convenient way to represent complex data structures within
- a Hadoop MapReduce job. Avro data can be used as both input to and output
- from a MapReduce job, as well as the intermediate format. The example in
- this guide uses Avro data for all three, but it's possible to mix and
- match; for instance, MapReduce can be used to aggregate a particular field
- in an Avro record.
-
-
- This guide assumes basic familiarity with both Hadoop MapReduce and Avro.
- See the Hadoop
- documentation and the Avro getting
- started guide for introductions to these projects. This guide uses
- the old MapReduce API (org.apache.hadoop.mapred) and the new
- MapReduce API (org.apache.hadoop.mapreduce).
-
-
- Setup
-
- The code from this guide is included in the Avro docs under
- examples/mr-example . The example is set up as a Maven project
- that includes the necessary Avro and MapReduce dependencies and the Avro
- Maven plugin for code generation, so no external jars are needed to run
- the example. In particular, the POM includes the following dependencies:
-
-
-<dependency>
- <groupId>org.apache.avro</groupId>
- <artifactId>avro</artifactId>
- <version>&AvroVersion;</version>
-</dependency>
-<dependency>
- <groupId>org.apache.avro</groupId>
- <artifactId>avro-mapred</artifactId>
- <version>&AvroVersion;</version>
-</dependency>
-<dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-client</artifactId>
- <version>3.1.2</version>
-</dependency>
-
-
- And the following plugin:
-
-
-<plugin>
- <groupId>org.apache.avro</groupId>
- <artifactId>avro-maven-plugin</artifactId>
- <version>&AvroVersion;</version>
- <executions>
- <execution>
- <phase>generate-sources</phase>
- <goals>
- <goal>schema</goal>
- </goals>
- <configuration>
- <sourceDirectory>${project.basedir}/../</sourceDirectory>
- <outputDirectory>${project.basedir}/target/generated-sources/</outputDirectory>
- </configuration>
- </execution>
- </executions>
-</plugin>
-
-
- If you do not configure the sourceDirectory and outputDirectory
- properties, the defaults will be used. The sourceDirectory property
- defaults to src/main/avro . The outputDirectory property
- defaults to target/generated-sources . You can change the paths to
- match your project layout.
-
-
- Alternatively, Avro jars can be downloaded directly from the Apache Avro
- Releases page. The relevant Avro jars for this guide are
- avro-&AvroVersion;.jar and
- avro-mapred-&AvroVersion;.jar , as well as
- avro-tools-&AvroVersion;.jar for code generation and viewing
- Avro data files as JSON. In addition, you will need to install Hadoop
- in order to use MapReduce.
-
-
-
-
- Example: ColorCount
-
- Below is a simple example of a MapReduce that uses Avro. There is an example
- for both the old (org.apache.hadoop.mapred ) and new
- (org.apache.hadoop.mapreduce ) APIs under
- examples/mr-example/src/main/java/example/ . MapredColorCount
- is the example for the older mapred API while MapReduceColorCount is
- the example for the newer mapreduce API. Both examples are below, but
- we will detail the mapred API in our subsequent examples.
-
-
- MapredColorCount:
-
-package example;
-
-import java.io.IOException;
-
-import org.apache.avro.*;
-import org.apache.avro.Schema.Type;
-import org.apache.avro.mapred.*;
-import org.apache.hadoop.conf.*;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.mapred.*;
-import org.apache.hadoop.util.*;
-
-import example.avro.User;
-
-public class MapredColorCount extends Configured implements Tool {
-
- public static class ColorCountMapper extends AvroMapper<User, Pair<CharSequence, Integer>> {
- @Override
- public void map(User user, AvroCollector<Pair<CharSequence, Integer>> collector, Reporter reporter)
- throws IOException {
- CharSequence color = user.getFavoriteColor();
- // We need this check because the User.favorite_color field has type ["string", "null"]
- if (color == null) {
- color = "none";
- }
- collector.collect(new Pair<CharSequence, Integer>(color, 1));
- }
- }
-
- public static class ColorCountReducer extends AvroReducer<CharSequence, Integer,
- Pair<CharSequence, Integer>> {
- @Override
- public void reduce(CharSequence key, Iterable<Integer> values,
- AvroCollector<Pair<CharSequence, Integer>> collector,
- Reporter reporter)
- throws IOException {
- int sum = 0;
- for (Integer value : values) {
- sum += value;
- }
- collector.collect(new Pair<CharSequence, Integer>(key, sum));
- }
- }
-
- public int run(String[] args) throws Exception {
- if (args.length != 2) {
- System.err.println("Usage: MapredColorCount <input path> <output path>");
- return -1;
- }
-
- JobConf conf = new JobConf(getConf(), MapredColorCount.class);
- conf.setJobName("colorcount");
-
- FileInputFormat.setInputPaths(conf, new Path(args[0]));
- FileOutputFormat.setOutputPath(conf, new Path(args[1]));
-
- AvroJob.setMapperClass(conf, ColorCountMapper.class);
- AvroJob.setReducerClass(conf, ColorCountReducer.class);
-
- // Note that AvroJob.setInputSchema and AvroJob.setOutputSchema set
- // relevant config options such as input/output format, map output
- // classes, and output key class.
- AvroJob.setInputSchema(conf, User.getClassSchema());
- AvroJob.setOutputSchema(conf, Pair.getPairSchema(Schema.create(Type.STRING),
- Schema.create(Type.INT)));
-
- JobClient.runJob(conf);
- return 0;
- }
-
- public static void main(String[] args) throws Exception {
- int res = ToolRunner.run(new Configuration(), new MapredColorCount(), args);
- System.exit(res);
- }
-}
-
-
- MapReduceColorCount:
-
-package example;
-
-import java.io.IOException;
-
-import org.apache.avro.Schema;
-import org.apache.avro.mapred.AvroKey;
-import org.apache.avro.mapred.AvroValue;
-import org.apache.avro.mapreduce.AvroJob;
-import org.apache.avro.mapreduce.AvroKeyInputFormat;
-import org.apache.avro.mapreduce.AvroKeyValueOutputFormat;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.IntWritable;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.hadoop.mapreduce.Reducer;
-import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
-import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
-import org.apache.hadoop.util.Tool;
-import org.apache.hadoop.util.ToolRunner;
-
-import example.avro.User;
-
-public class MapReduceColorCount extends Configured implements Tool {
-
- public static class ColorCountMapper extends
- Mapper<AvroKey<User>, NullWritable, Text, IntWritable> {
-
- @Override
- public void map(AvroKey<User> key, NullWritable value, Context context)
- throws IOException, InterruptedException {
-
- CharSequence color = key.datum().getFavoriteColor();
- if (color == null) {
- color = "none";
- }
- context.write(new Text(color.toString()), new IntWritable(1));
- }
- }
-
- public static class ColorCountReducer extends
- Reducer<Text, IntWritable, AvroKey<CharSequence>, AvroValue<Integer>> {
-
- @Override
- public void reduce(Text key, Iterable<IntWritable> values,
- Context context) throws IOException, InterruptedException {
-
- int sum = 0;
- for (IntWritable value : values) {
- sum += value.get();
- }
- context.write(new AvroKey<CharSequence>(key.toString()), new AvroValue<Integer>(sum));
- }
- }
-
- public int run(String[] args) throws Exception {
- if (args.length != 2) {
- System.err.println("Usage: MapReduceColorCount <input path> <output path>");
- return -1;
- }
-
- Job job = new Job(getConf());
- job.setJarByClass(MapReduceColorCount.class);
- job.setJobName("Color Count");
-
- FileInputFormat.setInputPaths(job, new Path(args[0]));
- FileOutputFormat.setOutputPath(job, new Path(args[1]));
-
- job.setInputFormatClass(AvroKeyInputFormat.class);
- job.setMapperClass(ColorCountMapper.class);
- AvroJob.setInputKeySchema(job, User.getClassSchema());
- job.setMapOutputKeyClass(Text.class);
- job.setMapOutputValueClass(IntWritable.class);
-
- job.setOutputFormatClass(AvroKeyValueOutputFormat.class);
- job.setReducerClass(ColorCountReducer.class);
- AvroJob.setOutputKeySchema(job, Schema.create(Schema.Type.STRING));
- AvroJob.setOutputValueSchema(job, Schema.create(Schema.Type.INT));
-
- return (job.waitForCompletion(true) ? 0 : 1);
- }
-
- public static void main(String[] args) throws Exception {
- int res = ToolRunner.run(new MapReduceColorCount(), args);
- System.exit(res);
- }
-}
-
-
-
- ColorCount reads in data files containing User records,
- defined in examples/user.avsc , and counts the number of
- instances of each favorite color. (This example draws inspiration from
- the canonical WordCount MapReduce application.) This example uses the
- old MapReduce API. See MapReduceAvroWordCount, found under
- doc/examples/mr-example/src/main/java/example/ to see the new MapReduce
- API example. The User
- schema is defined as follows:
-
-
-{"namespace": "example.avro",
- "type": "record",
- "name": "User",
- "fields": [
- {"name": "name", "type": "string"},
- {"name": "favorite_number", "type": ["int", "null"]},
- {"name": "favorite_color", "type": ["string", "null"]}
- ]
-}
-
-
- This schema is compiled into the User class used by
- ColorCount via the Avro Maven plugin (see
- examples/mr-example/pom.xml for how this is set up).
-
-
- ColorCountMapper essentially takes a User as input and
- extracts the User's favorite color, emitting the key-value
- pair <favoriteColor , 1>.
- ColorCountReducer then adds up how many occurrences of a particular
- favorite color were emitted, and outputs the result as a
- Pair record. These Pairs are serialized to an
- Avro data file.
-
-
- Running ColorCount
-
- The ColorCount application is provided as a Maven project in the Avro
- docs under examples/mr-example . To build the project,
- including the code generation of the User schema, run:
-
-
-mvn compile
-
-
- Next, run GenerateData from examples/mr-examples to create an Avro data
- file, input/users.avro , containing 20 Users with
- favorite colors chosen randomly from a list:
-
-
-mvn exec:java -q -Dexec.mainClass=example.GenerateData
-
-
- Besides creating the data file, GenerateData prints the JSON
- representations of the Users generated to stdout, for example:
-
-
-{"name": "user", "favorite_number": null, "favorite_color": "red"}
-{"name": "user", "favorite_number": null, "favorite_color": "green"}
-{"name": "user", "favorite_number": null, "favorite_color": "purple"}
-{"name": "user", "favorite_number": null, "favorite_color": null}
-...
-
-
- Now we're ready to run ColorCount. We specify our freshly-generated
- input folder as the input path and output as our
- output folder (note that MapReduce will not start a job if the output
- folder already exists):
-
-
-mvn exec:java -q -Dexec.mainClass=example.MapredColorCount -Dexec.args="input output"
-
-
- Once ColorCount completes, checking the contents of the new
- output directory should yield the following:
-
-
-$ ls output/
-part-00000.avro _SUCCESS
-
-
- You can check the contents of the generated Avro file using the avro-tools jar:
-
-
-$ java -jar /path/to/avro-tools-&AvroVersion;.jar tojson output/part-00000.avro
-{"value": 3, "key": "blue"}
-{"value": 7, "key": "green"}
-{"value": 1, "key": "none"}
-{"value": 2, "key": "orange"}
-{"value": 3, "key": "purple"}
-{"value": 2, "key": "red"}
-{"value": 2, "key": "yellow"}
-
-
-
- Now let's go over the ColorCount example in detail.
-
- Mapper - org.apache.hadoop.mapred API
-
- The easiest way to use Avro data files as input to a MapReduce job is to
- subclass AvroMapper. An AvroMapper defines a
- map function that takes an Avro datum as input and outputs a key/value
- pair represented as a Pair record. In the ColorCount
- example, ColorCountMapper is an AvroMapper
- that takes a User as input and outputs a
- Pair<CharSequence, Integer>>, where the
- CharSequence key is the user's favorite color and the
- Integer value is 1.
-
-
-public static class ColorCountMapper extends AvroMapper<User, Pair<CharSequence, Integer>> {
- @Override
- public void map(User user, AvroCollector<Pair<CharSequence, Integer>> collector, Reporter reporter)
- throws IOException {
- CharSequence color = user.getFavoriteColor();
- // We need this check because the User.favorite_color field has type ["string", "null"]
- if (color == null) {
- color = "none";
- }
- collector.collect(new Pair<CharSequence, Integer>(color, 1));
- }
-}
-
-
- In order to use our AvroMapper, we must call
- AvroJob.setMapperClass and
- AvroJob.setInputSchema.
-
-
-AvroJob.setMapperClass(conf, ColorCountMapper.class);
-AvroJob.setInputSchema(conf, User.getClassSchema());
-
-
- Note that AvroMapper does not implement the
- Mapper interface. Under the hood, the specified Avro data
- files are deserialized into AvroWrappers containing the
- actual data, which are processed by a Mapper that calls the
- configured AvroMapper's map function.
- AvroJob.setInputSchema sets up the relevant configuration
- parameters needed to make this happen, thus you should not need to call
- JobConf.setMapperClass,
- JobConf.setInputFormat,
- JobConf.setMapOutputKeyClass,
- JobConf.setMapOutputValueClass, or
- JobConf.setOutputKeyComparatorClass.
-
-
-
- Mapper - org.apache.hadoop.mapreduce API
-
- This document will not go into all the differences between the mapred and mapreduce APIs,
- however will describe the main differences. As you can see, ColorCountMapper is now a
- subclass of the Hadoop Mapper class and is passed an AvroKey as it's key.
-
- Additionally, the AvroJob method calls were slightly changed.
-
-
- public static class ColorCountMapper extends
- Mapper<AvroKey<User>, NullWritable, Text, IntWritable> {
-
- @Override
- public void map(AvroKey<User> key, NullWritable value, Context context)
- throws IOException, InterruptedException {
-
- CharSequence color = key.datum().getFavoriteColor();
- if (color == null) {
- color = "none";
- }
- context.write(new Text(color.toString()), new IntWritable(1));
- }
- }
-
-
-
- Reducer - org.apache.hadoop.mapred API
-
- Analogously to AvroMapper, an AvroReducer
- defines a reducer function that takes the key/value types output by an
- AvroMapper (or any mapper that outputs Pairs)
- and outputs a key/value pair represented a Pair record. In
- the ColorCount example, ColorCountReducer is an
- AvroReducer that takes the CharSequence key
- representing a favorite color and the Iterable<Integer>
- representing the counts for that color (they should all be 1 in this
- example) and adds up the counts.
-
-
-public static class ColorCountReducer extends AvroReducer<CharSequence, Integer,
- Pair<CharSequence, Integer>> {
- @Override
- public void reduce(CharSequence key, Iterable<Integer> values,
- AvroCollector<Pair<CharSequence, Integer>> collector,
- Reporter reporter)
- throws IOException {
- int sum = 0;
- for (Integer value : values) {
- sum += value;
- }
- collector.collect(new Pair<CharSequence, Integer>(key, sum));
- }
-}
-
-
- In order to use our AvroReducer, we must call
- AvroJob.setReducerClass and
- AvroJob.setOutputSchema.
-
-
-AvroJob.setReducerClass(conf, ColorCountReducer.class);
-AvroJob.setOutputSchema(conf, Pair.getPairSchema(Schema.create(Type.STRING),
- Schema.create(Type.INT)));
-
-
- Note that AvroReducer does not implement the
- Reducer interface. The intermediate Pairs
- output by the mapper are split into AvroKeys and
- AvroValues, which are processed by a Reducer
- that calls the configured AvroReducer's reduce function.
- AvroJob.setOutputSchema sets up the relevant configuration
- parameters needed to make this happen, thus you should not need to call
- JobConf.setReducerClass,
- JobConf.setOutputFormat,
- JobConf.setOutputKeyClass,
- JobConf.setMapOutputKeyClass,
- JobConf.setMapOutputValueClass, or
- JobConf.setOutputKeyComparatorClass.
-
-
-
- Reduce - org.apache.hadoop.mapreduce API
-
- As before we not detail every difference between the APIs. As with the Mapper
- change ColorCountReducer is now a subclass of Reducer and AvroKey and AvroValue
- are emitted.
-
- Additionally, the AvroJob method calls were slightly changed.
-
-
- public static class ColorCountReducer extends
- Reducer<Text, IntWritable, AvroKey<CharSequence>, AvroValue<Integer>> {
-
- @Override
- public void reduce(Text key, Iterable<IntWritable> values,
- Context context) throws IOException, InterruptedException {
-
- int sum = 0;
- for (IntWritable value : values) {
- sum += value.get();
- }
- context.write(new AvroKey<CharSequence>(key.toString()), new AvroValue<Integer>(sum));
- }
- }
-
-
-
- Learning more
-
- The mapred API allows users to mix Avro AvroMappers and
- AvroReducers with non-Avro Mappers and
- Reducers and the mapreduce API allows users input Avro
- and output non-Avro or vice versa.
-
-
-
- The mapred package has API
- org.apache.avro.mapred documentation as does the
- org.apache.avro.mapreduce package .
- MapReduce API (org.apache.hadoop.mapreduce). Similarily to the mapreduce package,
- it's possible with the mapred API to implement your own Mappers and
- Reducers directly using the public classes provided in
- these libraries. See the AvroWordCount application, found under
- examples/mr-example/src/main/java/example/AvroWordCount.java in
- the Avro documentation, for an example of implementing a
- Reducer that outputs Avro data using the old MapReduce API.
- See the MapReduceAvroWordCount application, found under
- examples/mr-example/src/main/java/example/MapReduceAvroWordCount.java in
- the Avro documentation, for an example of implementing a
- Reducer that outputs Avro data using the new MapReduce API.
-
-
-
-
diff --git a/doc/src/content/xdocs/sasl.xml b/doc/src/content/xdocs/sasl.xml
deleted file mode 100644
index 514cca5aff1..00000000000
--- a/doc/src/content/xdocs/sasl.xml
+++ /dev/null
@@ -1,152 +0,0 @@
-
-
-
- %avro-entities;
-]>
-
-
- Apache Avro &AvroVersion; SASL Profile
-
-
-
- Introduction
- SASL (RFC 2222 )
- provides a framework for authentication and security of network
- protocols. Each protocol that uses SASL is meant to define a
- SASL profile . This document provides a SASL profile
- for connection-based Avro RPC.
-
-
-
- Overview
- SASL negotiation proceeds as a series of message interactions
- over a connection between a client and server using a selected
- SASL mechanism . The client starts this negotiation by
- sending its chosen mechanism name with an initial (possibly
- empty) message. Negotiation proceeds with the exchange of
- messages until either side indicates success or failure. The
- content of the messages is mechanism-specific. If the
- negotiation succeeds, then the session can proceed over the
- connection, otherwise it must be abandoned.
- Some mechanisms continue to process session data after
- negotiation (e.g., encrypting it), while some specify that
- further session data is transmitted unmodifed.
-
-
-
- Negotiation
-
- Commands
- Avro SASL negotiation uses four one-byte commands.
-
- 0: START Used in a client's initial message.
- 1: CONTINUE Used while negotiation is ongoing.
- 2: FAIL Terminates negotiation unsuccessfully.
- 3: COMPLETE Terminates negotiation sucessfully.
-
-
- The format of a START message is:
- | 0 | 4-byte mechanism name length | mechanism name | 4-byte payload length | payload data |
-
- The format of a CONTINUE message is:
- | 1 | 4-byte payload length | payload data |
-
- The format of a FAIL message is:
- | 2 | 4-byte message length | UTF-8 message |
-
- The format of a COMPLETE message is:
- | 3 | 4-byte payload length | payload data |
-
-
-
- Process
- Negotiation is initiated by a client sending a START command
- containing the client's chosen mechanism name and any
- mechanism-specific payload data.
-
- The server and client then interchange some number
- (possibly zero) of CONTINUE messages. Each message contains
- payload data that is processed by the security mechanism to
- generate the next message.
-
- Once either the client or server send a FAIL message then
- negotiation has failed. UTF-8-encoded text is included in
- the failure message. Once either a FAIL message has been
- sent or received, or any other error occurs in the
- negotiation, further communication on this connection must
- cease.
-
- Once either the client or server send a COMPLETE message
- then negotiation has completed successfully. Session data
- may now be transmitted over the connection until it is
- closed by either side.
-
-
-
-
-
- Session Data
- If no SASL QOP (quality of protection) is negotiated, then
- all subsequent writes to/reads over this connection are
- written/read unmodified. In particular, messages use
- Avro framing , and are
- of the form:
- | 4-byte frame length | frame data | ... | 4 zero bytes |
- If a SASL QOP is negotiated, then it must be used by the
- connection for all subsequent messages. This is done by
- wrapping each non-empty frame written using the security
- mechanism and unwrapping each non-empty frame read. The
- length written in each non-empty frame is the length of the
- wrapped data. Complete frames must be passed to the security
- mechanism for unwrapping. Unwrapped data is then passed to
- the application as the content of the frame.
- If at any point processing fails due to wrapping, unwrapping
- or framing errors, then all further communication on this
- connection must cease.
-
-
-
- Anonymous Mechanism
- The SASL anonymous mechanism
- (RFC 2245 ) is
- quite simple to implement. In particular, an initial anonymous
- request may be prefixed by the following static sequence:
- | 0 | 0009 | ANONYMOUS | 0000 |
- If a server uses the anonymous mechanism, it should check
- that the mechanism name in the start message prefixing the first
- request received is 'ANONYMOUS', then simply prefix its initial
- response with a COMPLETE message of:
- | 3 | 0000 |
- If an anonymous server recieves some other mechanism name,
- then it may respond with a FAIL message as simple as:
- | 2 | 0000 |
- Note that the anonymous mechanism need add no additional
- round-trip messages between client and server. The START
- message can be piggybacked on the initial request and the
- COMPLETE or FAIL message can be piggybacked on the initial
- response.
-
-
- Apache Avro, Avro, Apache, and the Avro and Apache logos are
- trademarks of The Apache Software Foundation.
-
-
-
diff --git a/doc/src/content/xdocs/site.xml b/doc/src/content/xdocs/site.xml
deleted file mode 100644
index d3dcbb9435c..00000000000
--- a/doc/src/content/xdocs/site.xml
+++ /dev/null
@@ -1,91 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/doc/src/content/xdocs/spec.xml b/doc/src/content/xdocs/spec.xml
deleted file mode 100644
index 09a9d353d12..00000000000
--- a/doc/src/content/xdocs/spec.xml
+++ /dev/null
@@ -1,1624 +0,0 @@
-
-
-
- %avro-entities;
-]>
-
-
- Apache Avro &AvroVersion; Specification
-
-
-
-
- Introduction
-
- This document defines Apache Avro. It is intended to be the
- authoritative specification. Implementations of Avro must
- adhere to this document.
-
-
-
-
-
- Schema Declaration
- A Schema is represented in JSON by one of:
-
- A JSON string, naming a defined type.
-
- A JSON object, of the form:
-
- {"type": "typeName " ...attributes ...}
-
- where typeName is either a primitive or derived
- type name, as defined below. Attributes not defined in this
- document are permitted as metadata, but must not affect
- the format of serialized data.
-
- A JSON array, representing a union of embedded types.
-
-
-
- Primitive Types
- The set of primitive type names is:
-
- null: no value
- boolean: a binary value
- int: 32-bit signed integer
- long: 64-bit signed integer
- float: single precision (32-bit) IEEE 754 floating-point number
- double: double precision (64-bit) IEEE 754 floating-point number
- bytes: sequence of 8-bit unsigned bytes
- string: unicode character sequence
-
-
- Primitive types have no specified attributes.
-
- Primitive type names are also defined type names. Thus, for
- example, the schema "string" is equivalent to:
-
- {"type": "string"}
-
-
-
-
- Complex Types
-
- Avro supports six kinds of complex types: records, enums,
- arrays, maps, unions and fixed.
-
-
- Records
-
- Records use the type name "record" and support the following attributes:
-
- name: a JSON string providing the name
- of the record (required).
- namespace, a JSON string that qualifies the name;
- doc: a JSON string providing documentation to the
- user of this schema (optional).
- aliases: a JSON array of strings, providing
- alternate names for this record (optional).
- fields: a JSON array, listing fields (required).
- Each field is a JSON object with the following attributes:
-
- name: a JSON string providing the name
- of the field (required), and
- doc: a JSON string describing this field
- for users (optional).
- type: a schema , as defined above
- default: A default value for this
- field, only used when reading instances that lack
- the field for schema evolution purposes. The
- presence of a default value does not make the
- field optional at encoding time. Permitted values
- depend on the field's schema type, according to the
- table below. Default values for union fields correspond
- to the first schema in the union. Default values for bytes
- and fixed fields are JSON strings, where Unicode
- code points 0-255 are mapped to unsigned 8-bit byte
- values 0-255. Avro encodes a field even if its
- value is equal to its default.
-
- field default values
- avro type json type example
- null null null
- boolean boolean true
- int,long integer 1
- float,double number 1.1
- bytes string "\u00FF"
- string string "foo"
- record object {"a": 1}
- enum string "FOO"
- array array [1]
- map object {"a": 1}
- fixed string "\u00ff"
-
-
- order: specifies how this field
- impacts sort ordering of this record (optional).
- Valid values are "ascending" (the default),
- "descending", or "ignore". For more details on how
- this is used, see the sort
- order section below.
- aliases: a JSON array of strings, providing
- alternate names for this field (optional).
-
-
-
-
- For example, a linked-list of 64-bit values may be defined with:
-
-{
- "type": "record",
- "name": "LongList",
- "aliases": ["LinkedLongs"], // old name for this
- "fields" : [
- {"name": "value", "type": "long"}, // each element has a long
- {"name": "next", "type": ["null", "LongList"]} // optional next element
- ]
-}
-
-
-
-
- Enums
-
- Enums use the type name "enum" and support the following
- attributes:
-
- name: a JSON string providing the name
- of the enum (required).
- namespace , a JSON string that qualifies the name;
- aliases: a JSON array of strings, providing
- alternate names for this enum (optional).
- doc: a JSON string providing documentation to the
- user of this schema (optional).
- symbols: a JSON array, listing symbols,
- as JSON strings (required). All symbols in an enum must
- be unique; duplicates are prohibited. Every symbol must
- match the regular expression [A-Za-z_][A-Za-z0-9_]*
- (the same requirement as for names ).
- default: A default value for this
- enumeration, used during resolution when the reader
- encounters a symbol from the writer that isn't defined
- in the reader's schema (optional). The value provided
- here must be a JSON string that's a member of
- the symbols array.
- See documentation on schema resolution for how this gets
- used.
-
- For example, playing card suits might be defined with:
-
-{
- "type": "enum",
- "name": "Suit",
- "symbols" : ["SPADES", "HEARTS", "DIAMONDS", "CLUBS"]
-}
-
-
-
-
- Arrays
- Arrays use the type name "array" and support
- a single attribute:
-
- items: the schema of the array's items.
-
- For example, an array of strings is declared
- with:
-
-{
- "type": "array",
- "items" : "string",
- "default": []
-}
-
-
-
-
- Maps
- Maps use the type name "map" and support
- one attribute:
-
- values: the schema of the map's values.
-
- Map keys are assumed to be strings.
- For example, a map from string to long is declared
- with:
-
-{
- "type": "map",
- "values" : "long",
- "default": {}
-}
-
-
-
-
- Unions
- Unions, as mentioned above, are represented using JSON
- arrays. For example, ["null", "string"]
- declares a schema which may be either a null or string.
- (Note that when a default
- value is specified for a record field whose type is a
- union, the type of the default value must match the
- first element of the union. Thus, for unions
- containing "null", the "null" is usually listed first, since
- the default value of such unions is typically null.)
- Unions may not contain more than one schema with the same
- type, except for the named types record, fixed and enum. For
- example, unions containing two array types or two map types
- are not permitted, but two types with different names are
- permitted. (Names permit efficient resolution when reading
- and writing unions.)
- Unions may not immediately contain other unions.
-
-
-
- Fixed
- Fixed uses the type name "fixed" and supports
- the following attributes:
-
- name: a string naming this fixed (required).
- namespace, a string that qualifies the name;
- aliases: a JSON array of strings, providing
- alternate names for this enum (optional).
- doc: a JSON string providing documentation to the
- user of this schema (optional).
- size: an integer, specifying the number
- of bytes per value (required).
-
- For example, 16-byte quantity may be declared with:
- {"type": "fixed", "size": 16, "name": "md5"}
-
-
-
-
-
-
- Names
- Record, enums and fixed are named types. Each has
- a fullname that is composed of two parts;
- a name and a namespace . Equality of names
- is defined on the fullname.
- The name portion of a fullname, record field names, and
- enum symbols must:
-
- start with [A-Za-z_]
- subsequently contain only [A-Za-z0-9_]
-
- A namespace is a dot-separated sequence of such names.
- The empty string may also be used as a namespace to indicate the
- null namespace.
- Equality of names (including field names and enum symbols)
- as well as fullnames is case-sensitive.
- The null namespace may not be used in a dot-separated
- sequence of names. So the grammar for a namespace
- is:
- <empty> | <name>[(<dot><name>)*]
- In record, enum and fixed definitions, the fullname is
- determined in one of the following ways:
-
- A name and namespace are both specified. For example,
- one might use "name": "X", "namespace":
- "org.foo" to indicate the
- fullname org.foo.X.
- A fullname is specified. If the name specified contains
- a dot, then it is assumed to be a fullname, and any
- namespace also specified is ignored. For example,
- use "name": "org.foo.X" to indicate the
- fullname org.foo.X.
- A name only is specified, i.e., a name that contains no
- dots. In this case the namespace is taken from the most
- tightly enclosing schema or protocol. For example,
- if "name": "X" is specified, and this occurs
- within a field of the record definition
- of org.foo.Y, then the fullname
- is org.foo.X. If there is no enclosing
- namespace then the null namespace is used.
-
- References to previously defined names are as in the latter
- two cases above: if they contain a dot they are a fullname, if
- they do not contain a dot, the namespace is the namespace of
- the enclosing definition.
- Primitive type names have no namespace and their names may
- not be defined in any namespace.
- A schema or protocol may not contain multiple definitions
- of a fullname. Further, a name must be defined before it is
- used ("before" in the depth-first, left-to-right traversal of
- the JSON parse tree, where the types attribute of
- a protocol is always deemed to come "before" the
- messages attribute.)
-
-
-
-
- Aliases
- Named types and fields may have aliases. An implementation
- may optionally use aliases to map a writer's schema to the
- reader's. This faciliates both schema evolution as well as
- processing disparate datasets.
- Aliases function by re-writing the writer's schema using
- aliases from the reader's schema. For example, if the
- writer's schema was named "Foo" and the reader's schema is
- named "Bar" and has an alias of "Foo", then the implementation
- would act as though "Foo" were named "Bar" when reading.
- Similarly, if data was written as a record with a field named
- "x" and is read as a record with a field named "y" with alias
- "x", then the implementation would act as though "x" were
- named "y" when reading.
- A type alias may be specified either as a fully
- namespace-qualified, or relative to the namespace of the name
- it is an alias for. For example, if a type named "a.b" has
- aliases of "c" and "x.y", then the fully qualified names of
- its aliases are "a.c" and "x.y".
-
-
-
-
-
- Data Serialization and Deserialization
-
- Binary encoded Avro data does not include type information or
- field names. The benefit is that the serialized data is small, but
- as a result a schema must always be used in order to read Avro data
- correctly. The best way to ensure that the schema is structurally
- identical to the one used to write the data is to use the exact same
- schema.
-
- Therefore, files or systems that store Avro data should always
- include the writer's schema for that data. Avro-based remote procedure
- call (RPC) systems must also guarantee that remote recipients of data
- have a copy of the schema used to write that data. In general, it is
- advisable that any reader of Avro data should use a schema that is
- the same (as defined more fully in
- Parsing Canonical Form for
- Schemas ) as the schema that was used to write the data in order to
- deserialize it correctly. Deserializing data into a newer schema is
- accomplished by specifying an additional schema, the results of which are
- described in Schema Resolution .
-
- In general, both serialization and deserialization proceed as a
- depth-first, left-to-right traversal of the schema, serializing or
- deserializing primitive types as they are encountered. Therefore, it is
- possible, though not advisable, to read Avro data with a schema that
- does not have the same Parsing Canonical Form as the schema with which
- the data was written. In order for this to work, the serialized primitive
- values must be compatible, in order value by value, with the items in the
- deserialization schema. For example, int and long are always serialized
- the same way, so an int could be deserialized as a long. Since the
- compatibility of two schemas depends on both the data and the
- serialization format (eg. binary is more permissive than JSON because JSON
- includes field names, eg. a long that is too large will overflow an int),
- it is simpler and more reliable to use schemas with identical Parsing
- Canonical Form.
-
-
- Encodings
- Avro specifies two serialization encodings: binary and
- JSON. Most applications will use the binary encoding, as it
- is smaller and faster. But, for debugging and web-based
- applications, the JSON encoding may sometimes be
- appropriate.
-
-
-
- Binary Encoding
- Binary encoding does not include field names, self-contained
- information about the types of individual bytes, nor field or
- record separators. Therefore readers are wholly reliant on
- the schema used when the data was encoded.
-
-
- Primitive Types
- Primitive types are encoded in binary as follows:
-
- null is written as zero bytes.
- a boolean is written as a single byte whose
- value is either 0 (false) or 1
- (true).
- int and long values are written
- using variable-length
- zig-zag coding. Some examples:
-
- value hex
- 000
- -101
- 102
- -203
- 204
- ...
- -647f
- 64 80 01
- ...
-
-
- a float is written as 4 bytes. The float is
- converted into a 32-bit integer using a method equivalent
- to Java's floatToIntBits and then encoded
- in little-endian format.
- a double is written as 8 bytes. The double
- is converted into a 64-bit integer using a method equivalent
- to Java's
- doubleToLongBits and then encoded in little-endian
- format.
- bytes are encoded as
- a long followed by that many bytes of data.
-
- a string is encoded as
- a long followed by that many bytes of UTF-8
- encoded character data.
- For example, the three-character string "foo" would
- be encoded as the long value 3 (encoded as
- hex 06) followed by the UTF-8 encoding of
- 'f', 'o', and 'o' (the hex bytes 66 6f
- 6f):
-
- 06 66 6f 6f
-
-
-
-
-
-
-
- Complex Types
- Complex types are encoded in binary as follows:
-
-
- Records
- A record is encoded by encoding the values of its
- fields in the order that they are declared. In other
- words, a record is encoded as just the concatenation of
- the encodings of its fields. Field values are encoded per
- their schema.
- For example, the record schema
-
- {
- "type": "record",
- "name": "test",
- "fields" : [
- {"name": "a", "type": "long"},
- {"name": "b", "type": "string"}
- ]
- }
-
- An instance of this record whose a field has
- value 27 (encoded as hex 36) and
- whose b field has value "foo" (encoded as hex
- bytes 06 66 6f 6f), would be encoded simply
- as the concatenation of these, namely the hex byte
- sequence:
- 36 06 66 6f 6f
-
-
-
- Enums
- An enum is encoded by a int, representing
- the zero-based position of the symbol in the schema.
- For example, consider the enum:
-
- {"type": "enum", "name": "Foo", "symbols": ["A", "B", "C", "D"] }
-
- This would be encoded by an int between
- zero and three, with zero indicating "A", and 3 indicating
- "D".
-
-
-
-
- Arrays
- Arrays are encoded as a series of blocks .
- Each block consists of a long count
- value, followed by that many array items. A block with
- count zero indicates the end of the array. Each item is
- encoded per the array's item schema.
-
- If a block's count is negative, its absolute value is used,
- and the count is followed immediately by a long
- block size indicating the number of bytes in the
- block. This block size permits fast skipping through data,
- e.g., when projecting a record to a subset of its fields.
-
- For example, the array schema
- {"type": "array", "items": "long"}
- an array containing the items 3 and 27 could be encoded
- as the long value 2 (encoded as hex 04) followed by long
- values 3 and 27 (encoded as hex 06 36)
- terminated by zero:
- 04 06 36 00
-
- The blocked representation permits one to read and write
- arrays larger than can be buffered in memory, since one can
- start writing items without knowing the full length of the
- array.
-
-
-
-
- Maps
- Maps are encoded as a series of blocks . Each
- block consists of a long count
- value, followed by that many key/value pairs. A block
- with count zero indicates the end of the map. Each item
- is encoded per the map's value schema.
-
- If a block's count is negative, its absolute value is used,
- and the count is followed immediately by a long
- block size indicating the number of bytes in the
- block. This block size permits fast skipping through data,
- e.g., when projecting a record to a subset of its fields.
-
- The blocked representation permits one to read and write
- maps larger than can be buffered in memory, since one can
- start writing items without knowing the full length of the
- map.
-
-
-
-
- Unions
- A union is encoded by first writing an int
- value indicating the zero-based position within the
- union of the schema of its value. The value is then
- encoded per the indicated schema within the union.
- For example, the union
- schema ["null","string"] would encode:
-
- null as zero (the index of "null" in the union):
- 00
- the string "a" as one (the index of
- "string" in the union, encoded as hex 02),
- followed by the serialized string:
- 02 02 61
-
- NOTE : Currently for C/C++ implementations, the positions are practically an int, but theoretically a long.
- In reality, we don't expect unions with 215M members
-
-
-
- Fixed
- Fixed instances are encoded using the number of bytes
- declared in the schema.
-
-
-
-
-
-
-
- JSON Encoding
-
- Except for unions, the JSON encoding is the same as is used
- to encode field default
- values .
-
- The value of a union is encoded in JSON as follows:
-
-
- if its type is null, then it is encoded as
- a JSON null;
- otherwise it is encoded as a JSON object with one
- name/value pair whose name is the type's name and whose
- value is the recursively encoded value. For Avro's named
- types (record, fixed or enum) the user-specified name is
- used, for other types the type name is used.
-
-
- For example, the union
- schema ["null","string","Foo"], where Foo is a
- record name, would encode:
-
- null as null;
- the string "a" as
- {"string": "a"}; and
- a Foo instance as {"Foo": {...}},
- where {...} indicates the JSON encoding of a
- Foo instance.
-
-
- Note that the original schema is still required to correctly
- process JSON-encoded data. For example, the JSON encoding does not
- distinguish between int
- and long, float
- and double, records and maps, enums and strings,
- etc.
-
-
-
-
- Single-object encoding
-
- In some situations a single Avro serialized object is to be stored for a
- longer period of time. One very common example is storing Avro records
- for several weeks in an Apache Kafka topic.
- In the period after a schema change this persistence system will contain records
- that have been written with different schemas. So the need arises to know which schema
- was used to write a record to support schema evolution correctly.
- In most cases the schema itself is too large to include in the message,
- so this binary wrapper format supports the use case more effectively.
-
-
- Single object encoding specification
- Single Avro objects are encoded as follows:
-
- A two-byte marker, C3 01, to show that the message is Avro and uses this single-record format (version 1).
- The 8-byte little-endian CRC-64-AVRO fingerprint of the object's schema
- The Avro object encoded using Avro's binary encoding
-
-
-
- Implementations use the 2-byte marker to determine whether a payload is Avro.
- This check helps avoid expensive lookups that resolve the schema from a
- fingerprint, when the message is not an encoded Avro payload.
-
-
-
-
-
-
- Sort Order
-
- Avro defines a standard sort order for data. This permits
- data written by one system to be efficiently sorted by another
- system. This can be an important optimization, as sort order
- comparisons are sometimes the most frequent per-object
- operation. Note also that Avro binary-encoded data can be
- efficiently ordered without deserializing it to objects.
-
- Data items may only be compared if they have identical
- schemas. Pairwise comparisons are implemented recursively
- with a depth-first, left-to-right traversal of the schema.
- The first mismatch encountered determines the order of the
- items.
-
- Two items with the same schema are compared according to the
- following rules.
-
- null data is always equal.
- boolean data is ordered with false before true.
- int, long, float
- and double data is ordered by ascending numeric
- value.
- bytes and fixed data are
- compared lexicographically by unsigned 8-bit values.
- string data is compared lexicographically by
- Unicode code point. Note that since UTF-8 is used as the
- binary encoding for strings, sorting of bytes and string
- binary data is identical.
- array data is compared lexicographically by
- element.
- enum data is ordered by the symbol's position
- in the enum schema. For example, an enum whose symbols are
- ["z", "a"] would sort "z" values
- before "a" values.
- union data is first ordered by the branch
- within the union, and, within that, by the type of the
- branch. For example, an ["int", "string"]
- union would order all int values before all string values,
- with the ints and strings themselves ordered as defined
- above.
- record data is ordered lexicographically by
- field. If a field specifies that its order is:
-
- "ascending", then the order of its values
- is unaltered.
- "descending", then the order of its values
- is reversed.
- "ignore", then its values are ignored
- when sorting.
-
-
- map data may not be compared. It is an error
- to attempt to compare data containing maps unless those maps
- are in an "order":"ignore" record field.
-
-
-
-
-
- Object Container Files
- Avro includes a simple object container file format. A file
- has a schema, and all objects stored in the file must be written
- according to that schema, using binary encoding. Objects are
- stored in blocks that may be compressed. Syncronization markers
- are used between blocks to permit efficient splitting of files
- for MapReduce processing.
-
- Files may include arbitrary user-specified metadata.
-
- A file consists of:
-
- A file header , followed by
- one or more file data blocks .
-
-
- A file header consists of:
-
- Four bytes, ASCII 'O', 'b', 'j', followed by 1.
- file metadata , including the schema.
- The 16-byte, randomly-generated sync marker for this file.
-
-
- File metadata is written as if defined by the following map schema:
- {"type": "map", "values": "bytes"}
-
- All metadata properties that start with "avro." are reserved.
- The following file metadata properties are currently used:
-
- avro.schema contains the schema of objects
- stored in the file, as JSON data (required).
- avro.codec the name of the compression codec
- used to compress blocks, as a string. Implementations
- are required to support the following codecs: "null" and "deflate".
- If codec is absent, it is assumed to be "null". The codecs
- are described with more detail below.
-
-
- A file header is thus described by the following schema:
-
-{"type": "record", "name": "org.apache.avro.file.Header",
- "fields" : [
- {"name": "magic", "type": {"type": "fixed", "name": "Magic", "size": 4}},
- {"name": "meta", "type": {"type": "map", "values": "bytes"}},
- {"name": "sync", "type": {"type": "fixed", "name": "Sync", "size": 16}},
- ]
-}
-
-
- A file data block consists of:
-
- A long indicating the count of objects in this block.
- A long indicating the size in bytes of the serialized objects
- in the current block, after any codec is applied
- The serialized objects. If a codec is specified, this is
- compressed by that codec.
- The file's 16-byte sync marker.
-
- Thus, each block's binary data can be efficiently extracted or skipped without
- deserializing the contents. The combination of block size, object counts, and
- sync markers enable detection of corrupt blocks and help ensure data integrity.
-
- Required Codecs
-
- null
- The "null" codec simply passes through data uncompressed.
-
-
-
- deflate
- The "deflate" codec writes the data block using the
- deflate algorithm as specified in
- RFC 1951 ,
- and typically implemented using the zlib library. Note that this
- format (unlike the "zlib format" in RFC 1950) does not have a
- checksum.
-
-
-
-
- Optional Codecs
-
- bzip2
- The "bzip2" codec uses the bzip2
- compression library.
-
-
-
- snappy
- The "snappy" codec uses
- Google's Snappy
- compression library. Each compressed block is followed
- by the 4-byte, big-endian CRC32 checksum of the
- uncompressed data in the block.
-
-
-
- xz
- The "xz" codec uses the XZ
- compression library.
-
-
-
- zstandard
- The "zstandard" codec uses
- Facebook's Zstandard
- compression library.
-
-
-
-
-
- Protocol Declaration
- Avro protocols describe RPC interfaces. Like schemas, they are
- defined with JSON text.
-
- A protocol is a JSON object with the following attributes:
-
- protocol , a string, the name of the protocol
- (required);
- namespace , an optional string that qualifies the name;
- doc , an optional string describing this protocol;
- types , an optional list of definitions of named types
- (records, enums, fixed and errors). An error definition is
- just like a record definition except it uses "error" instead
- of "record". Note that forward references to named types
- are not permitted.
- messages , an optional JSON object whose keys are
- message names and whose values are objects whose attributes
- are described below. No two messages may have the same
- name.
-
- The name and namespace qualification rules defined for schema objects
- apply to protocols as well.
-
-
- Messages
- A message has attributes:
-
- a doc , an optional description of the message,
- a request , a list of named,
- typed parameter schemas (this has the same form
- as the fields of a record declaration);
- a response schema;
- an optional union of declared error schemas.
- The effective union has "string"
- prepended to the declared union, to permit transmission of
- undeclared "system" errors. For example, if the declared
- error union is ["AccessError"], then the
- effective union is ["string", "AccessError"].
- When no errors are declared, the effective error union
- is ["string"]. Errors are serialized using
- the effective union; however, a protocol's JSON
- declaration contains only the declared union.
-
- an optional one-way boolean parameter.
-
- A request parameter list is processed equivalently to an
- anonymous record. Since record field lists may vary between
- reader and writer, request parameters may also differ
- between the caller and responder, and such differences are
- resolved in the same manner as record field differences.
- The one-way parameter may only be true when the response type
- is "null" and no errors are listed.
-
-
- Sample Protocol
- For example, one may define a simple HelloWorld protocol with:
-
-{
- "namespace": "com.acme",
- "protocol": "HelloWorld",
- "doc": "Protocol Greetings",
-
- "types": [
- {"name": "Greeting", "type": "record", "fields": [
- {"name": "message", "type": "string"}]},
- {"name": "Curse", "type": "error", "fields": [
- {"name": "message", "type": "string"}]}
- ],
-
- "messages": {
- "hello": {
- "doc": "Say hello.",
- "request": [{"name": "greeting", "type": "Greeting" }],
- "response": "Greeting",
- "errors": ["Curse"]
- }
- }
-}
-
-
-
-
-
- Protocol Wire Format
-
-
- Message Transport
- Messages may be transmitted via
- different transport mechanisms.
-
- To the transport, a message is an opaque byte sequence.
-
- A transport is a system that supports:
-
- transmission of request messages
-
- receipt of corresponding response messages
- Servers may send a response message back to the client
- corresponding to a request message. The mechanism of
- correspondance is transport-specific. For example, in
- HTTP it is implicit, since HTTP directly supports requests
- and responses. But a transport that multiplexes many
- client threads over a single socket would need to tag
- messages with unique identifiers.
-
-
-
- Transports may be either stateless
- or stateful . In a stateless transport, messaging
- assumes no established connection state, while stateful
- transports establish connections that may be used for multiple
- messages. This distinction is discussed further in
- the handshake section below.
-
-
- HTTP as Transport
- When
- HTTP
- is used as a transport, each Avro message exchange is an
- HTTP request/response pair. All messages of an Avro
- protocol should share a single URL at an HTTP server.
- Other protocols may also use that URL. Both normal and
- error Avro response messages should use the 200 (OK)
- response code. The chunked encoding may be used for
- requests and responses, but, regardless the Avro request
- and response are the entire content of an HTTP request and
- response. The HTTP Content-Type of requests and responses
- should be specified as "avro/binary". Requests should be
- made using the POST method.
- HTTP is used by Avro as a stateless transport.
-
-
-
-
- Message Framing
- Avro messages are framed as a list of buffers.
- Framing is a layer between messages and the transport.
- It exists to optimize certain operations.
-
- The format of framed message data is:
-
- a series of buffers , where each buffer consists of:
-
- a four-byte, big-endian buffer length , followed by
- that many bytes of buffer data .
-
-
- A message is always terminated by a zero-length buffer.
-
-
- Framing is transparent to request and response message
- formats (described below). Any message may be presented as a
- single or multiple buffers.
-
- Framing can permit readers to more efficiently get
- different buffers from different sources and for writers to
- more efficiently store different buffers to different
- destinations. In particular, it can reduce the number of
- times large binary objects are copied. For example, if an RPC
- parameter consists of a megabyte of file data, that data can
- be copied directly to a socket from a file descriptor, and, on
- the other end, it could be written directly to a file
- descriptor, never entering user space.
-
- A simple, recommended, framing policy is for writers to
- create a new segment whenever a single binary object is
- written that is larger than a normal output buffer. Small
- objects are then appended in buffers, while larger objects are
- written as their own buffers. When a reader then tries to
- read a large object the runtime can hand it an entire buffer
- directly, without having to copy it.
-
-
-
- Handshake
-
- The purpose of the handshake is to ensure that the client
- and the server have each other's protocol definition, so that
- the client can correctly deserialize responses, and the server
- can correctly deserialize requests. Both clients and servers
- should maintain a cache of recently seen protocols, so that,
- in most cases, a handshake will be completed without extra
- round-trip network exchanges or the transmission of full
- protocol text.
-
- RPC requests and responses may not be processed until a
- handshake has been completed. With a stateless transport, all
- requests and responses are prefixed by handshakes. With a
- stateful transport, handshakes are only attached to requests
- and responses until a successful handshake response has been
- returned over a connection. After this, request and response
- payloads are sent without handshakes for the lifetime of that
- connection.
-
- The handshake process uses the following record schemas:
-
-
-{
- "type": "record",
- "name": "HandshakeRequest", "namespace":"org.apache.avro.ipc",
- "fields": [
- {"name": "clientHash",
- "type": {"type": "fixed", "name": "MD5", "size": 16}},
- {"name": "clientProtocol", "type": ["null", "string"]},
- {"name": "serverHash", "type": "MD5"},
- {"name": "meta", "type": ["null", {"type": "map", "values": "bytes"}]}
- ]
-}
-{
- "type": "record",
- "name": "HandshakeResponse", "namespace": "org.apache.avro.ipc",
- "fields": [
- {"name": "match",
- "type": {"type": "enum", "name": "HandshakeMatch",
- "symbols": ["BOTH", "CLIENT", "NONE"]}},
- {"name": "serverProtocol",
- "type": ["null", "string"]},
- {"name": "serverHash",
- "type": ["null", {"type": "fixed", "name": "MD5", "size": 16}]},
- {"name": "meta",
- "type": ["null", {"type": "map", "values": "bytes"}]}
- ]
-}
-
-
-
- A client first prefixes each request with
- a HandshakeRequest containing just the hash of
- its protocol and of the server's protocol
- (clientHash!=null, clientProtocol=null,
- serverHash!=null), where the hashes are 128-bit MD5
- hashes of the JSON protocol text. If a client has never
- connected to a given server, it sends its hash as a guess of
- the server's hash, otherwise it sends the hash that it
- previously obtained from this server.
-
- The server responds with
- a HandshakeResponse containing one of:
-
- match=BOTH, serverProtocol=null,
- serverHash=null if the client sent the valid hash
- of the server's protocol and the server knows what
- protocol corresponds to the client's hash. In this case,
- the request is complete and the response data
- immediately follows the HandshakeResponse.
-
- match=CLIENT, serverProtocol!=null,
- serverHash!=null if the server has previously
- seen the client's protocol, but the client sent an
- incorrect hash of the server's protocol. The request is
- complete and the response data immediately follows the
- HandshakeResponse. The client must use the returned
- protocol to process the response and should also cache
- that protocol and its hash for future interactions with
- this server.
-
- match=NONE if the server has not
- previously seen the client's protocol.
- The serverHash
- and serverProtocol may also be non-null if
- the server's protocol hash was incorrect.
-
- In this case the client must then re-submit its request
- with its protocol text (clientHash!=null,
- clientProtocol!=null, serverHash!=null) and the
- server should respond with a successful match
- (match=BOTH, serverProtocol=null,
- serverHash=null) as above.
-
-
-
-
-
- The meta field is reserved for future
- handshake enhancements.
-
-
-
-
- Call Format
- A call consists of a request message paired with
- its resulting response or error message. Requests and
- responses contain extensible metadata, and both kinds of
- messages are framed as described above.
-
- The format of a call request is:
-
- request metadata , a map with values of
- type bytes
- the message name , an Avro string,
- followed by
- the message parameters . Parameters are
- serialized according to the message's request
- declaration.
-
-
- When the empty string is used as a message name a server
- should ignore the parameters and return an empty response. A
- client may use this to ping a server or to perform a handshake
- without sending a protocol message.
-
- When a message is declared one-way and a stateful
- connection has been established by a successful handshake
- response, no response data is sent. Otherwise the format of
- the call response is:
-
- response metadata , a map with values of
- type bytes
- a one-byte error flag boolean, followed by either:
-
- if the error flag is false, the message response ,
- serialized per the message's response schema.
- if the error flag is true, the error ,
- serialized per the message's effective error union
- schema.
-
-
-
-
-
-
-
-
- Schema Resolution
-
- A reader of Avro data, whether from an RPC or a file, can
- always parse that data because the original schema must be
- provided along with the data. However, the reader may be
- programmed to read data into a different schema.
- For example, if the data was written with a different version
- of the software than it is read, then fields may have been
- added or removed from records. This section specifies how such
- schema differences should be resolved.
-
- We refer to the schema used to write the data as
- the writer's schema, and the schema that the
- application expects the reader's schema. Differences
- between these should be resolved as follows:
-
-
- It is an error if the two schemas do not match .
- To match, one of the following must hold:
-
- both schemas are arrays whose item types match
- both schemas are maps whose value types match
- both schemas are enums whose (unqualified) names match
- both schemas are fixed whose sizes and (unqualified) names match
- both schemas are records with the same (unqualified) name
- either schema is a union
- both schemas have same primitive type
- the writer's schema may be promoted to the
- reader's as follows:
-
- int is promotable to long, float, or double
- long is promotable to float or double
- float is promotable to double
- string is promotable to bytes
- bytes is promotable to string
-
-
-
-
-
- if both are records:
-
- the ordering of fields may be different: fields are
- matched by name.
-
- schemas for fields with the same name in both records
- are resolved recursively.
-
- if the writer's record contains a field with a name
- not present in the reader's record, the writer's value
- for that field is ignored.
-
- if the reader's record schema has a field that
- contains a default value, and writer's schema does not
- have a field with the same name, then the reader should
- use the default value from its field.
-
- if the reader's record schema has a field with no
- default value, and writer's schema does not have a field
- with the same name, an error is signalled.
-
-
-
- if both are enums:
- if the writer's symbol is not present in the reader's
- enum and the reader has a default value, then
- that value is used, otherwise an error is signalled.
-
-
- if both are arrays:
- This resolution algorithm is applied recursively to the reader's and
- writer's array item schemas.
-
-
- if both are maps:
- This resolution algorithm is applied recursively to the reader's and
- writer's value schemas.
-
-
- if both are unions:
- The first schema in the reader's union that matches the
- selected writer's union schema is recursively resolved
- against it. if none match, an error is signalled.
-
-
- if reader's is a union, but writer's is not
- The first schema in the reader's union that matches the
- writer's schema is recursively resolved against it. If none
- match, an error is signalled.
-
-
- if writer's is a union, but reader's is not
- If the reader's schema matches the selected writer's schema,
- it is recursively resolved against it. If they do not
- match, an error is signalled.
-
-
-
-
- A schema's "doc" fields are ignored for the purposes of schema resolution. Hence,
- the "doc" portion of a schema may be dropped at serialization.
-
-
-
-
- Parsing Canonical Form for Schemas
-
- One of the defining characteristics of Avro is that a reader
- must use the schema used by the writer of the data in
- order to know how to read the data. This assumption results in a data
- format that's compact and also amenable to many forms of schema
- evolution. However, the specification so far has not defined
- what it means for the reader to have the "same" schema as the
- writer. Does the schema need to be textually identical? Well,
- clearly adding or removing some whitespace to a JSON expression
- does not change its meaning. At the same time, reordering the
- fields of records clearly does change the meaning. So
- what does it mean for a reader to have "the same" schema as a
- writer?
-
- Parsing Canonical Form is a transformation of a
- writer's schema that let's us define what it means for two
- schemas to be "the same" for the purpose of reading data written
- against the schema. It is called Parsing Canonical Form
- because the transformations strip away parts of the schema, like
- "doc" attributes, that are irrelevant to readers trying to parse
- incoming data. It is called Canonical Form because the
- transformations normalize the JSON text (such as the order of
- attributes) in a way that eliminates unimportant differences
- between schemas. If the Parsing Canonical Forms of two
- different schemas are textually equal, then those schemas are
- "the same" as far as any reader is concerned, i.e., there is no
- serialized data that would allow a reader to distinguish data
- generated by a writer using one of the original schemas from
- data generated by a writing using the other original schema.
- (We sketch a proof of this property in a companion
- document.)
-
- The next subsection specifies the transformations that define
- Parsing Canonical Form. But with a well-defined canonical form,
- it can be convenient to go one step further, transforming these
- canonical forms into simple integers ("fingerprints") that can
- be used to uniquely identify schemas. The subsection after next
- recommends some standard practices for generating such
- fingerprints.
-
-
- Transforming into Parsing Canonical Form
-
- Assuming an input schema (in JSON form) that's already
- UTF-8 text for a valid Avro schema (including all
- quotes as required by JSON), the following transformations
- will produce its Parsing Canonical Form:
-
- [PRIMITIVES] Convert primitive schemas to their simple
- form (e.g., int instead of
- {"type":"int"}).
-
- [FULLNAMES] Replace short names with fullnames, using
- applicable namespaces to do so. Then eliminate
- namespace attributes, which are now redundant.
-
- [STRIP] Keep only attributes that are relevant to
- parsing data, which are: type,
- name, fields,
- symbols, items,
- values, size. Strip all others
- (e.g., doc and aliases).
-
- [ORDER] Order the appearance of fields of JSON objects
- as follows: name, type,
- fields, symbols,
- items, values, size.
- For example, if an object has type,
- name, and size fields, then the
- name field should appear first, followed by the
- type and then the size fields.
-
- [STRINGS] For all JSON string literals in the schema
- text, replace any escaped characters (e.g., \uXXXX escapes)
- with their UTF-8 equivalents.
-
- [INTEGERS] Eliminate quotes around and any leading
- zeros in front of JSON integer literals (which appear in the
- size attributes of fixed schemas).
-
- [WHITESPACE] Eliminate all whitespace in JSON outside of string literals.
-
-
-
-
- Schema Fingerprints
-
- "[A] fingerprinting algorithm is a procedure that maps an
- arbitrarily large data item (such as a computer file) to a
- much shorter bit string, its fingerprint, that
- uniquely identifies the original data for all practical
- purposes" (quoted from [Wikipedia ]).
- In the Avro context, fingerprints of Parsing Canonical Form
- can be useful in a number of applications; for example, to
- cache encoder and decoder objects, to tag data items with a
- short substitute for the writer's full schema, and to quickly
- negotiate common-case schemas between readers and writers.
-
- In designing fingerprinting algorithms, there is a
- fundamental trade-off between the length of the fingerprint
- and the probability of collisions. To help application
- designers find appropriate points within this trade-off space,
- while encouraging interoperability and ease of implementation,
- we recommend using one of the following three algorithms when
- fingerprinting Avro schemas:
-
-
- When applications can tolerate longer fingerprints, we
- recommend using the SHA-256 digest
- algorithm to generate 256-bit fingerprints of Parsing
- Canonical Forms. Most languages today have SHA-256
- implementations in their libraries.
-
- At the opposite extreme, the smallest fingerprint we
- recommend is a 64-bit Rabin
- fingerprint . Below, we provide pseudo-code for this
- algorithm that can be easily translated into any programming
- language. 64-bit fingerprints should guarantee uniqueness
- for schema caches of up to a million entries (for such a
- cache, the chance of a collision is 3E-8). We don't
- recommend shorter fingerprints, as the chances of collisions
- is too great (for example, with 32-bit fingerprints, a cache
- with as few as 100,000 schemas has a 50% chance of having a
- collision).
-
- Between these two extremes, we recommend using the MD5 message
- digest to generate 128-bit fingerprints. These make
- sense only where very large numbers of schemas are being
- manipulated (tens of millions); otherwise, 64-bit
- fingerprints should be sufficient. As with SHA-256, MD5
- implementations are found in most libraries today.
-
-
- These fingerprints are not meant to provide any
- security guarantees, even the longer SHA-256-based ones. Most
- Avro applications should be surrounded by security measures
- that prevent attackers from writing random data and otherwise
- interfering with the consumers of schemas. We recommend that
- these surrounding mechanisms be used to prevent collision and
- pre-image attacks (i.e., "forgery") on schema fingerprints,
- rather than relying on the security properties of the
- fingerprints themselves.
-
- Rabin fingerprints are cyclic
- redundancy checks computed using irreducible polynomials.
- In the style of the Appendix of RFC 1952
- (pg 10), which defines the CRC-32 algorithm, here's our
- definition of the 64-bit AVRO fingerprinting algorithm:
-
-
-long fingerprint64(byte[] buf) {
- if (FP_TABLE == null) initFPTable();
- long fp = EMPTY;
- for (int i = 0; i < buf.length; i++)
- fp = (fp >>> 8) ^ FP_TABLE[(int)(fp ^ buf[i]) & 0xff];
- return fp;
-}
-
-static long EMPTY = 0xc15d213aa4d7a795L;
-static long[] FP_TABLE = null;
-
-void initFPTable() {
- FP_TABLE = new long[256];
- for (int i = 0; i < 256; i++) {
- long fp = i;
- for (int j = 0; j < 8; j++)
- fp = (fp >>> 1) ^ (EMPTY & -(fp & 1L));
- FP_TABLE[i] = fp;
- }
-}
-
-
- Readers interested in the mathematics behind this
- algorithm may want to read
- Chapter 14 of the Second Edition of Hacker's Delight .
- (Unlike RFC-1952 and the book chapter, we prepend
- a single one bit to messages. We do this because CRCs ignore
- leading zero bits, which can be problematic. Our code
- prepends a one-bit by initializing fingerprints using
- EMPTY, rather than initializing using zero as in
- RFC-1952 and the book chapter.)
-
-
-
-
- Logical Types
-
- A logical type is an Avro primitive or complex type with extra attributes to
- represent a derived type. The attribute logicalType must
- always be present for a logical type, and is a string with the name of one of
- the logical types listed later in this section. Other attributes may be defined
- for particular logical types.
-
- A logical type is always serialized using its underlying Avro type so
- that values are encoded in exactly the same way as the equivalent Avro
- type that does not have a logicalType attribute. Language
- implementations may choose to represent logical types with an
- appropriate native type, although this is not required.
-
- Language implementations must ignore unknown logical types when
- reading, and should use the underlying Avro type. If a logical type is
- invalid, for example a decimal with scale greater than its precision,
- then implementations should ignore the logical type and use the
- underlying Avro type.
-
-
- Decimal
- The decimal logical type represents an arbitrary-precision signed
- decimal number of the form unscaled × 10-scale .
-
- A decimal logical type annotates Avro
- bytes or fixed types. The byte array must
- contain the two's-complement representation of the unscaled integer
- value in big-endian byte order. The scale is fixed, and is specified
- using an attribute.
-
- The following attributes are supported:
-
- scale, a JSON integer representing the scale
- (optional). If not specified the scale is 0.
- precision, a JSON integer representing the (maximum)
- precision of decimals stored in this type (required).
-
-
- For example, the following schema represents decimal numbers with a
- maximum precision of 4 and a scale of 2:
-
-{
- "type": "bytes",
- "logicalType": "decimal",
- "precision": 4,
- "scale": 2
-}
-
-
- Precision must be a positive integer greater than zero. If the
- underlying type is a fixed, then the precision is
- limited by its size. An array of length n can store at
- most floor(log_10(28 × n - 1 - 1))
- base-10 digits of precision.
-
- Scale must be zero or a positive integer less than or equal to the
- precision.
-
- For the purposes of schema resolution, two schemas that are
- decimal logical types match if their scales and
- precisions match.
-
-
-
-
- UUID
-
- The uuid logical type represents a random generated universally unique identifier (UUID).
-
-
-
- A uuid logical type annotates an Avro string. The string has to conform with RFC-4122
-
-
-
-
- Date
-
- The date logical type represents a date within the calendar, with no reference to a particular time zone or time of day.
-
-
- A date logical type annotates an Avro int, where the int stores the number of days from the unix epoch, 1 January 1970 (ISO calendar).
-
- The following schema represents a date:
-
-{
- "type": "int",
- "logicalType": "date"
-}
-
-
-
-
- Time (millisecond precision)
-
- The time-millis logical type represents a time of day, with no reference to a particular calendar, time zone or date, with a precision of one millisecond.
-
-
- A time-millis logical type annotates an Avro int, where the int stores the number of milliseconds after midnight, 00:00:00.000.
-
-
-
-
- Time (microsecond precision)
-
- The time-micros logical type represents a time of day, with no reference to a particular calendar, time zone or date, with a precision of one microsecond.
-
-
- A time-micros logical type annotates an Avro long, where the long stores the number of microseconds after midnight, 00:00:00.000000.
-
-
-
-
- Timestamp (millisecond precision)
-
- The timestamp-millis logical type represents an instant on the global timeline, independent of a particular time zone or calendar, with a precision of one millisecond.
- Please note that time zone information gets lost in this process. Upon reading a value back, we can only reconstruct the instant, but not the original representation.
- In practice, such timestamps are typically displayed to users in their local time zones, therefore they may be displayed differently depending on the execution environment.
-
-
- A timestamp-millis logical type annotates an Avro long, where the long stores the number of milliseconds from the unix epoch, 1 January 1970 00:00:00.000 UTC.
-
-
-
-
- Timestamp (microsecond precision)
-
- The timestamp-micros logical type represents an instant on the global timeline, independent of a particular time zone or calendar, with a precision of one microsecond.
- Please note that time zone information gets lost in this process. Upon reading a value back, we can only reconstruct the instant, but not the original representation.
- In practice, such timestamps are typically displayed to users in their local time zones, therefore they may be displayed differently depending on the execution environment.
-
-
- A timestamp-micros logical type annotates an Avro long, where the long stores the number of microseconds from the unix epoch, 1 January 1970 00:00:00.000000 UTC.
-
-
-
-
- Local timestamp (millisecond precision)
-
- The local-timestamp-millis logical type represents a timestamp in a local timezone, regardless of what specific time zone is considered local, with a precision of one millisecond.
-
-
- A local-timestamp-millis logical type annotates an Avro long, where the long stores the number of milliseconds, from 1 January 1970 00:00:00.000.
-
-
-
-
- Local timestamp (microsecond precision)
-
- The local-timestamp-micros logical type represents a timestamp in a local timezone, regardless of what specific time zone is considered local, with a precision of one microsecond.
-
-
- A local-timestamp-micros logical type annotates an Avro long, where the long stores the number of microseconds, from 1 January 1970 00:00:00.000000.
-
-
-
-
- Duration
-
- The duration logical type represents an amount of time defined by a number of months, days and milliseconds. This is not equivalent to a number of milliseconds, because, depending on the moment in time from which the duration is measured, the number of days in the month and number of milliseconds in a day may differ. Other standard periods such as years, quarters, hours and minutes can be expressed through these basic periods.
-
-
- A duration logical type annotates Avro fixed type of size 12, which stores three little-endian unsigned integers that represent durations at different granularities of time. The first stores a number in months, the second stores a number in days, and the third stores a number in milliseconds.
-
-
-
-
-
- Apache Avro, Avro, Apache, and the Avro and Apache logos are
- trademarks of The Apache Software Foundation.
-
-
-
diff --git a/doc/src/content/xdocs/tabs.xml b/doc/src/content/xdocs/tabs.xml
deleted file mode 100644
index 97e7d2c3813..00000000000
--- a/doc/src/content/xdocs/tabs.xml
+++ /dev/null
@@ -1,39 +0,0 @@
-
-
-
- %avro-entities;
-]>
-
-
-
-
-
-
-
-
-
-
diff --git a/doc/src/resources/images/apache_feather.gif b/doc/src/resources/images/apache_feather.gif
deleted file mode 100644
index 1a0c3e6b6d1..00000000000
Binary files a/doc/src/resources/images/apache_feather.gif and /dev/null differ
diff --git a/doc/src/resources/images/avro-logo.png b/doc/src/resources/images/avro-logo.png
deleted file mode 100644
index 4cbe12dd97b..00000000000
Binary files a/doc/src/resources/images/avro-logo.png and /dev/null differ
diff --git a/doc/src/resources/images/favicon.ico b/doc/src/resources/images/favicon.ico
deleted file mode 100644
index 161bcf7841c..00000000000
Binary files a/doc/src/resources/images/favicon.ico and /dev/null differ
diff --git a/doc/src/skinconf.xml b/doc/src/skinconf.xml
deleted file mode 100644
index 2328639c8ce..00000000000
--- a/doc/src/skinconf.xml
+++ /dev/null
@@ -1,350 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
- true
-
- false
-
- true
-
- true
-
-
- true
-
-
- true
-
-
- true
-
-
- false
-
-
- true
-
-
- Avro
- Serialization System
- https://avro.apache.org/
- images/avro-logo.png
-
-
- Apache
- The Apache Software Foundation
- https://www.apache.org/
- images/apache_feather.gif
-
-
-
-
-
-
- images/favicon.ico
-
-
- 2012
- The Apache Software Foundation.
- https://www.apache.org/licenses/
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- p.quote {
- margin-left: 2em;
- padding: .5em;
- background-color: #f0f0f0;
- font-family: monospace;
- }
- table.right {
- text-align: right;
- display: block;
- }
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- 1in
- 1in
- 1.25in
- 1in
-
-
-
- false
-
-
- false
-
-
-
-
-
- Built with Apache Forrest
- https://forrest.apache.org/
- images/built-with-forrest-button.png
- 88
- 31
-
-
-
-
-
-
diff --git a/doc/themes/docsy b/doc/themes/docsy
new file mode 160000
index 00000000000..a77761a6de8
--- /dev/null
+++ b/doc/themes/docsy
@@ -0,0 +1 @@
+Subproject commit a77761a6de8c5d4bb284dab5d0b47447883eb6d2
diff --git a/lang/c++/CMakeLists.txt b/lang/c++/CMakeLists.txt
index 4a3793152e6..19059a41b13 100644
--- a/lang/c++/CMakeLists.txt
+++ b/lang/c++/CMakeLists.txt
@@ -16,17 +16,21 @@
# specific language governing permissions and limitations
# under the License.
#
-cmake_minimum_required (VERSION 3.1)
+cmake_minimum_required (VERSION 3.5)
set (CMAKE_LEGACY_CYGWIN_WIN32 0)
if (NOT DEFINED CMAKE_CXX_STANDARD)
- set(CMAKE_CXX_STANDARD 11)
+ set(CMAKE_CXX_STANDARD 17)
+endif()
+
+if (CMAKE_CXX_STANDARD LESS 17)
+ message(FATAL_ERROR "Avro requires at least C++17")
endif()
set(CMAKE_CXX_STANDARD_REQUIRED ON)
-if (CMAKE_VERSION VERSION_GREATER_EQUAL 3.0)
+if (APPLE)
# Enable MACOSX_RPATH by default
cmake_policy (SET CMP0042 NEW)
endif()
@@ -64,7 +68,7 @@ if (WIN32 AND NOT CYGWIN AND NOT MSYS)
endif()
if (CMAKE_COMPILER_IS_GNUCXX)
- set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall")
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -Wduplicated-cond -Wduplicated-branches -Wlogical-op -Wuseless-cast -Wconversion -pedantic -Werror")
if (AVRO_ADD_PROTECTOR_FLAGS)
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fstack-protector-all -D_GLIBCXX_DEBUG")
# Unset _GLIBCXX_DEBUG for avrogencpp.cc because using Boost Program Options
@@ -78,6 +82,16 @@ endif ()
find_package (Boost 1.38 REQUIRED
COMPONENTS filesystem iostreams program_options regex system)
+include(FetchContent)
+FetchContent_Declare(
+ fmt
+ GIT_REPOSITORY https://github.com/fmtlib/fmt.git
+ GIT_TAG 10.2.1
+ GIT_PROGRESS TRUE
+ USES_TERMINAL_DOWNLOAD TRUE
+)
+FetchContent_MakeAvailable(fmt)
+
find_package(Snappy)
if (SNAPPY_FOUND)
set(SNAPPY_PKG libsnappy)
@@ -92,7 +106,9 @@ endif (SNAPPY_FOUND)
add_definitions (${Boost_LIB_DIAGNOSTIC_DEFINITIONS})
-include_directories (api ${CMAKE_CURRENT_BINARY_DIR} ${Boost_INCLUDE_DIRS})
+add_definitions (-DAVRO_VERSION="${AVRO_VERSION_MAJOR}.${AVRO_VERSION_MINOR}.${AVRO_VERSION_PATCH}")
+
+include_directories (include/avro ${CMAKE_CURRENT_BINARY_DIR} ${Boost_INCLUDE_DIRS})
set (AVRO_SOURCE_FILES
impl/Compiler.cc impl/Node.cc impl/LogicalType.cc
@@ -109,6 +125,7 @@ set (AVRO_SOURCE_FILES
impl/json/JsonIO.cc
impl/json/JsonDom.cc
impl/Resolver.cc impl/Validator.cc
+ impl/CustomAttributes.cc
)
add_library (avrocpp SHARED ${AVRO_SOURCE_FILES})
@@ -118,6 +135,7 @@ set_property (TARGET avrocpp
add_library (avrocpp_s STATIC ${AVRO_SOURCE_FILES})
target_include_directories(avrocpp_s PRIVATE ${SNAPPY_INCLUDE_DIR})
+target_link_libraries(avrocpp_s ${Boost_LIBRARIES} ${SNAPPY_LIBRARIES} fmt::fmt-header-only)
set_property (TARGET avrocpp avrocpp_s
APPEND PROPERTY COMPILE_DEFINITIONS AVRO_SOURCE)
@@ -128,19 +146,19 @@ set_target_properties (avrocpp PROPERTIES
set_target_properties (avrocpp_s PROPERTIES
VERSION ${AVRO_VERSION_MAJOR}.${AVRO_VERSION_MINOR}.${AVRO_VERSION_PATCH})
-target_link_libraries (avrocpp ${Boost_LIBRARIES} ${SNAPPY_LIBRARIES})
+target_link_libraries (avrocpp ${Boost_LIBRARIES} ${SNAPPY_LIBRARIES} fmt::fmt-header-only)
target_include_directories(avrocpp PRIVATE ${SNAPPY_INCLUDE_DIR})
add_executable (precompile test/precompile.cc)
-target_link_libraries (precompile avrocpp_s ${Boost_LIBRARIES} ${SNAPPY_LIBRARIES})
+target_link_libraries (precompile avrocpp_s)
macro (gen file ns)
add_custom_command (OUTPUT ${file}.hh
COMMAND avrogencpp
-p -
-i ${CMAKE_CURRENT_SOURCE_DIR}/jsonschemas/${file}
- -o ${file}.hh -n ${ns} -U
+ -o ${file}.hh -n ${ns}
DEPENDS avrogencpp ${CMAKE_CURRENT_SOURCE_DIR}/jsonschemas/${file})
add_custom_target (${file}_hh DEPENDS ${file}.hh)
endmacro (gen)
@@ -153,6 +171,7 @@ gen (tweet testgen3)
gen (union_array_union uau)
gen (union_map_union umu)
gen (union_conflict uc)
+gen (union_empty_record uer)
gen (recursive rec)
gen (reuse ru)
gen (circulardep cd)
@@ -161,9 +180,19 @@ gen (tree2 tr2)
gen (crossref cr)
gen (primitivetypes pt)
gen (cpp_reserved_words cppres)
+gen (cpp_reserved_words_union_typedef cppres_union)
add_executable (avrogencpp impl/avrogencpp.cc)
-target_link_libraries (avrogencpp avrocpp_s ${Boost_LIBRARIES} ${SNAPPY_LIBRARIES})
+target_link_libraries (avrogencpp avrocpp_s)
+
+target_include_directories(avrocpp_s PUBLIC
+ $
+ $
+)
+target_include_directories(avrocpp PUBLIC
+ $
+ $
+)
enable_testing()
@@ -186,6 +215,7 @@ unittest (JsonTests)
unittest (AvrogencppTests)
unittest (CompilerTests)
unittest (AvrogencppTestReservedWords)
+unittest (CommonsSchemasTests)
add_dependencies (AvrogencppTestReservedWords cpp_reserved_words_hh)
@@ -193,7 +223,8 @@ add_dependencies (AvrogencppTests bigrecord_hh bigrecord_r_hh bigrecord2_hh
tweet_hh
union_array_union_hh union_map_union_hh union_conflict_hh
recursive_hh reuse_hh circulardep_hh tree1_hh tree2_hh crossref_hh
- primitivetypes_hh empty_record_hh)
+ primitivetypes_hh empty_record_hh cpp_reserved_words_union_typedef_hh
+ union_empty_record_hh)
include (InstallRequiredSystemLibraries)
@@ -208,7 +239,7 @@ install (TARGETS avrocpp avrocpp_s
install (TARGETS avrogencpp RUNTIME DESTINATION bin)
-install (DIRECTORY api/ DESTINATION include/avro
+install (DIRECTORY include/avro DESTINATION include
FILES_MATCHING PATTERN *.hh)
if (NOT CMAKE_BUILD_TYPE)
diff --git a/lang/c++/LICENSE b/lang/c++/LICENSE
index d641439cded..d7f066e1d81 100644
--- a/lang/c++/LICENSE
+++ b/lang/c++/LICENSE
@@ -201,36 +201,6 @@
See the License for the specific language governing permissions and
limitations under the License.
-----------------------------------------------------------------------
-License for the m4 macros used by the C++ implementation:
-
-Files:
-* lang/c++/m4/m4_ax_boost_system.m4
- Copyright (c) 2008 Thomas Porschberg
- Copyright (c) 2008 Michael Tindal
- Copyright (c) 2008 Daniel Casimiro
-* lang/c++/m4/m4_ax_boost_asio.m4
- Copyright (c) 2008 Thomas Porschberg
- Copyright (c) 2008 Pete Greenwell
-* lang/c++/m4/m4_ax_boost_filesystem.m4
- Copyright (c) 2009 Thomas Porschberg
- Copyright (c) 2009 Michael Tindal
- Copyright (c) 2009 Roman Rybalko
-* lang/c++/m4/m4_ax_boost_thread.m4
- Copyright (c) 2009 Thomas Porschberg
- Copyright (c) 2009 Michael Tindal
-* lang/c++/m4/m4_ax_boost_regex.m4
- Copyright (c) 2008 Thomas Porschberg
- Copyright (c) 2008 Michael Tindal
-* lang/c++/m4/m4_ax_boost_base.m4
- Copyright (c) 2008 Thomas Porschberg
-
-License text:
-| Copying and distribution of this file, with or without modification, are
-| permitted in any medium without royalty provided the copyright notice
-| and this notice are preserved. This file is offered as-is, without any
-| warranty.
-
----------------------------------------------------------------------
License for the AVRO_BOOT_NO_TRAIT code in the C++ implementation:
File: lang/c++/api/Boost.hh
diff --git a/lang/c++/MainPage.dox b/lang/c++/MainPage.dox
index eab49d3e627..91977fca2f1 100644
--- a/lang/c++/MainPage.dox
+++ b/lang/c++/MainPage.dox
@@ -55,15 +55,14 @@ One should be able to build Avro C++ on (1) any UNIX flavor including cygwin for
In order to build Avro C++, one needs the following:
- A C++ compiler and runtime libraries.
+ A C++17 or later compiler and runtime libraries.
Boost library version 1.38 or later. Apart from the header-only libraries of Boost, Avro C++ requires filesystem, iostreams, system and program_options libraries. Please see https://www.boost.org or your platform's documentation for details on how to set up Boost for your platform.
- CMake build tool version 2.6 or later. Please see https://www.cmake.org or your platform's documentation for details on how to set up CMake for your system.
+ CMake build tool version 3.5 or later. Please see https://www.cmake.org or your platform's documentation for details on how to set up CMake for your system.
Python. If not already present, please consult your platform-specific documentation on how to install Python on your system.
For Ubuntu Linux, for example, you can have these by doing
apt-get install for the following packages:
-\ul
\li cmake
\li g++
\li libboost-dev
@@ -73,7 +72,6 @@ For Ubuntu Linux, for example, you can have these by doing
\li libboost-system-dev
For Windows native builds, you need to install the following:
-\ul
\li cmake
\li boost distribution from Boost consulting
\li Visual studio
@@ -336,4 +334,3 @@ corresponding to a given schema.
Please see DataFile.hh for more details.
*/
-
diff --git a/lang/c++/README b/lang/c++/README
index 6b081f13a86..be5f2ff62d7 100644
--- a/lang/c++/README
+++ b/lang/c++/README
@@ -29,9 +29,9 @@ INSTRUCTIONS
Pre-requisites:
-To compile requires boost headers, and the boost regex library. Optionally, it requires Snappy compression library. If Snappy is available, it builds support for Snappy compression and skips it otherwise. (Please see your OS-specific instructions on how to install Boost and Snappy for your OS).
+To compile requires boost headers. Optionally, it requires Snappy compression library. If Snappy is available, it builds support for Snappy compression and skips it otherwise. (Please see your OS-specific instructions on how to install Boost and Snappy for your OS).
-To build one requires cmake 2.6 or later.
+To build one requires cmake 3.5 or later and a compiler supporting C++17 or later.
To generate a Makefile under Unix, MacOS (using GNU) or Cygwin use:
@@ -39,8 +39,8 @@ mkdir build
cd build
cmake -G "Unix Makefiles" ..
-If it doesn't work, either you are missing some packages (boost, flex or bison),
-or you need to help configure locate them.
+If it doesn't work, either you are missing boost package or you need to help
+configure locate it.
If the Makefile is configured correctly, then you can make and run tests:
diff --git a/lang/c++/build.sh b/lang/c++/build.sh
index ac9964c75e5..11e1599d1d9 100755
--- a/lang/c++/build.sh
+++ b/lang/c++/build.sh
@@ -58,8 +58,8 @@ function do_doc() {
function do_dist() {
rm -rf $BUILD_CPP/
mkdir -p $BUILD_CPP
- cp -r api AUTHORS build.sh CMakeLists.txt ChangeLog \
- LICENSE NOTICE impl jsonschemas NEWS parser README test examples \
+ cp -r include AUTHORS build.sh CMakeLists.txt ChangeLog \
+ LICENSE NOTICE impl jsonschemas NEWS README test examples \
$BUILD_CPP
find $BUILD_CPP -name '.svn' | xargs rm -rf
cp ../../share/VERSION.txt $BUILD_CPP
@@ -71,10 +71,10 @@ function do_dist() {
fi
}
-(mkdir -p build; cd build; cmake --version; cmake -G "Unix Makefiles" ..)
for target in "$@"
do
+cmake -S . -B build
case "$target" in
lint)
# some versions of cppcheck seem to require an explicit
@@ -83,16 +83,20 @@ case "$target" in
;;
test)
- (cd build && cmake -G "Unix Makefiles" -D CMAKE_BUILD_TYPE=Debug -D AVRO_ADD_PROTECTOR_FLAGS=1 .. && make && cd .. \
+ (cmake -S. -Bbuild -D CMAKE_BUILD_TYPE=Debug -D AVRO_ADD_PROTECTOR_FLAGS=1 && cmake --build build -- -k \
&& ./build/buffertest \
&& ./build/unittest \
+ && ./build/AvrogencppTestReservedWords \
+ && ./build/AvrogencppTests \
&& ./build/CodecTests \
+ && ./build/CommonsSchemasTests \
&& ./build/CompilerTests \
- && ./build/StreamTests \
- && ./build/SpecificTests \
- && ./build/AvrogencppTests \
&& ./build/DataFileTests \
- && ./build/SchemaTests)
+ && ./build/JsonTests \
+ && ./build/LargeSchemaTests \
+ && ./build/SchemaTests \
+ && ./build/SpecificTests \
+ && ./build/StreamTests)
;;
xcode-test)
@@ -104,7 +108,7 @@ case "$target" in
;;
dist)
- (cd build && cmake -G "Unix Makefiles" -D CMAKE_BUILD_TYPE=Release ..)
+ (cd build && cmake -D CMAKE_BUILD_TYPE=Release ..)
do_dist
do_doc
;;
@@ -118,12 +122,12 @@ case "$target" in
;;
clean)
- (cd build && make clean)
+ (cmake --build build --target clean)
rm -rf doc test.avro test?.df test??.df test_skip.df test_lastSync.df test_readRecordUsingLastSync.df
;;
install)
- (cd build && cmake -G "Unix Makefiles" -D CMAKE_BUILD_TYPE=Release .. && make install)
+ (cmake -S. -Bbuild -D CMAKE_BUILD_TYPE=Release && cmake --build build --target install)
;;
*)
diff --git a/lang/c++/examples/cpx.hh b/lang/c++/examples/cpx.hh
index 53c6ee130db..e240abb568a 100644
--- a/lang/c++/examples/cpx.hh
+++ b/lang/c++/examples/cpx.hh
@@ -16,14 +16,12 @@
* limitations under the License.
*/
-
#ifndef CPX_HH_1278398428__H_
#define CPX_HH_1278398428__H_
-
-#include "avro/Specific.hh"
-#include "avro/Encoder.hh"
#include "avro/Decoder.hh"
+#include "avro/Encoder.hh"
+#include "avro/Specific.hh"
namespace c {
struct cpx {
@@ -31,18 +29,19 @@ struct cpx {
double im;
};
-}
+} // namespace c
namespace avro {
-template<> struct codec_traits {
- static void encode(Encoder& e, const c::cpx& v) {
+template<>
+struct codec_traits {
+ static void encode(Encoder &e, const c::cpx &v) {
avro::encode(e, v.re);
avro::encode(e, v.im);
}
- static void decode(Decoder& d, c::cpx& v) {
+ static void decode(Decoder &d, c::cpx &v) {
avro::decode(d, v.re);
avro::decode(d, v.im);
}
};
-}
+} // namespace avro
#endif
diff --git a/lang/c++/examples/custom.cc b/lang/c++/examples/custom.cc
index e3aa32da8c4..baf6d0cf796 100644
--- a/lang/c++/examples/custom.cc
+++ b/lang/c++/examples/custom.cc
@@ -18,19 +18,19 @@
#include
-#include "avro/Encoder.hh"
#include "avro/Decoder.hh"
+#include "avro/Encoder.hh"
#include "avro/Specific.hh"
namespace avro {
template
-struct codec_traits > {
- static void encode(Encoder& e, const std::complex& c) {
+struct codec_traits> {
+ static void encode(Encoder &e, const std::complex &c) {
avro::encode(e, std::real(c));
avro::encode(e, std::imag(c));
}
- static void decode(Decoder& d, std::complex& c) {
+ static void decode(Decoder &d, std::complex &c) {
T re, im;
avro::decode(d, re);
avro::decode(d, im);
@@ -38,10 +38,8 @@ struct codec_traits > {
}
};
-}
-int
-main()
-{
+} // namespace avro
+int main() {
std::unique_ptr out = avro::memoryOutputStream();
avro::EncoderPtr e = avro::binaryEncoder();
e->init(*out);
diff --git a/lang/c++/examples/datafile.cc b/lang/c++/examples/datafile.cc
index 2875e8fc738..6942a3074fa 100644
--- a/lang/c++/examples/datafile.cc
+++ b/lang/c++/examples/datafile.cc
@@ -18,25 +18,21 @@
#include
-#include "cpx.hh"
-#include "avro/Encoder.hh"
-#include "avro/Decoder.hh"
-#include "avro/ValidSchema.hh"
#include "avro/Compiler.hh"
#include "avro/DataFile.hh"
+#include "avro/Decoder.hh"
+#include "avro/Encoder.hh"
+#include "avro/ValidSchema.hh"
+#include "cpx.hh"
-
-avro::ValidSchema loadSchema(const char* filename)
-{
+avro::ValidSchema loadSchema(const char *filename) {
std::ifstream ifs(filename);
avro::ValidSchema result;
avro::compileJsonSchema(ifs, result);
return result;
}
-int
-main()
-{
+int main() {
avro::ValidSchema cpxSchema = loadSchema("cpx.json");
{
@@ -59,4 +55,3 @@ main()
}
return 0;
}
-
diff --git a/lang/c++/examples/generated.cc b/lang/c++/examples/generated.cc
index f1394ee602a..42a8dd923f1 100644
--- a/lang/c++/examples/generated.cc
+++ b/lang/c++/examples/generated.cc
@@ -16,14 +16,11 @@
* limitations under the License.
*/
-#include "cpx.hh"
-#include "avro/Encoder.hh"
#include "avro/Decoder.hh"
+#include "avro/Encoder.hh"
+#include "cpx.hh"
-
-int
-main()
-{
+int main() {
std::unique_ptr out = avro::memoryOutputStream();
avro::EncoderPtr e = avro::binaryEncoder();
e->init(*out);
@@ -41,4 +38,3 @@ main()
std::cout << '(' << c2.re << ", " << c2.im << ')' << std::endl;
return 0;
}
-
diff --git a/lang/c++/examples/generic.cc b/lang/c++/examples/generic.cc
index 2675564e8ba..3abd37eccbc 100644
--- a/lang/c++/examples/generic.cc
+++ b/lang/c++/examples/generic.cc
@@ -16,20 +16,18 @@
* limitations under the License.
*/
-#include
#include
+#include
#include "cpx.hh"
#include "avro/Compiler.hh"
-#include "avro/Encoder.hh"
#include "avro/Decoder.hh"
-#include "avro/Specific.hh"
+#include "avro/Encoder.hh"
#include "avro/Generic.hh"
+#include "avro/Specific.hh"
-int
-main()
-{
+int main() {
std::ifstream ifs("cpx.json");
avro::ValidSchema cpxSchema;
@@ -51,14 +49,14 @@ main()
avro::decode(*d, datum);
std::cout << "Type: " << datum.type() << std::endl;
if (datum.type() == avro::AVRO_RECORD) {
- const avro::GenericRecord& r = datum.value();
+ const avro::GenericRecord &r = datum.value();
std::cout << "Field-count: " << r.fieldCount() << std::endl;
if (r.fieldCount() == 2) {
- const avro::GenericDatum& f0 = r.fieldAt(0);
+ const avro::GenericDatum &f0 = r.fieldAt(0);
if (f0.type() == avro::AVRO_DOUBLE) {
std::cout << "Real: " << f0.value() << std::endl;
}
- const avro::GenericDatum& f1 = r.fieldAt(1);
+ const avro::GenericDatum &f1 = r.fieldAt(1);
if (f1.type() == avro::AVRO_DOUBLE) {
std::cout << "Imaginary: " << f1.value() << std::endl;
}
diff --git a/lang/c++/examples/imaginary.hh b/lang/c++/examples/imaginary.hh
index 774aefd1172..a268ea1c835 100644
--- a/lang/c++/examples/imaginary.hh
+++ b/lang/c++/examples/imaginary.hh
@@ -16,31 +16,30 @@
* limitations under the License.
*/
-
#ifndef IMAGINARY_HH_3460301992__H_
#define IMAGINARY_HH_3460301992__H_
-
-#include "boost/any.hpp"
-#include "avro/Specific.hh"
-#include "avro/Encoder.hh"
#include "avro/Decoder.hh"
+#include "avro/Encoder.hh"
+#include "avro/Specific.hh"
+#include "boost/any.hpp"
namespace i {
struct cpx {
double im;
};
-}
+} // namespace i
namespace avro {
-template<> struct codec_traits {
- static void encode(Encoder& e, const i::cpx& v) {
+template<>
+struct codec_traits {
+ static void encode(Encoder &e, const i::cpx &v) {
avro::encode(e, v.im);
}
- static void decode(Decoder& d, i::cpx& v) {
+ static void decode(Decoder &d, i::cpx &v) {
avro::decode(d, v.im);
}
};
-}
+} // namespace avro
#endif
diff --git a/lang/c++/examples/resolving.cc b/lang/c++/examples/resolving.cc
index 8ce9addb894..a3aec70bc05 100644
--- a/lang/c++/examples/resolving.cc
+++ b/lang/c++/examples/resolving.cc
@@ -22,24 +22,19 @@
#include "imaginary.hh"
#include "avro/Compiler.hh"
-#include "avro/Encoder.hh"
#include "avro/Decoder.hh"
-#include "avro/Specific.hh"
+#include "avro/Encoder.hh"
#include "avro/Generic.hh"
+#include "avro/Specific.hh"
-
-
-avro::ValidSchema load(const char* filename)
-{
+avro::ValidSchema load(const char *filename) {
std::ifstream ifs(filename);
avro::ValidSchema result;
avro::compileJsonSchema(ifs, result);
return result;
}
-int
-main()
-{
+int main() {
avro::ValidSchema cpxSchema = load("cpx.json");
avro::ValidSchema imaginarySchema = load("imaginary.json");
@@ -53,11 +48,10 @@ main()
std::unique_ptr in = avro::memoryInputStream(*out);
avro::DecoderPtr d = avro::resolvingDecoder(cpxSchema, imaginarySchema,
- avro::binaryDecoder());
+ avro::binaryDecoder());
d->init(*in);
i::cpx c2;
avro::decode(*d, c2);
std::cout << "Imaginary: " << c2.im << std::endl;
-
}
diff --git a/lang/c++/examples/schemaload.cc b/lang/c++/examples/schemaload.cc
index d6b442dd960..63375af9a54 100644
--- a/lang/c++/examples/schemaload.cc
+++ b/lang/c++/examples/schemaload.cc
@@ -18,13 +18,10 @@
#include
-#include "avro/ValidSchema.hh"
#include "avro/Compiler.hh"
+#include "avro/ValidSchema.hh"
-
-int
-main()
-{
+int main() {
std::ifstream in("cpx.json");
avro::ValidSchema cpxSchema;
diff --git a/lang/c++/examples/validating.cc b/lang/c++/examples/validating.cc
index 64f0649fa5d..5479edeb3d4 100644
--- a/lang/c++/examples/validating.cc
+++ b/lang/c++/examples/validating.cc
@@ -16,23 +16,23 @@
* limitations under the License.
*/
-#include
#include
+#include
#include "avro/Compiler.hh"
-#include "avro/Encoder.hh"
#include "avro/Decoder.hh"
+#include "avro/Encoder.hh"
#include "avro/Specific.hh"
namespace avro {
template
-struct codec_traits > {
- static void encode(Encoder& e, const std::complex& c) {
+struct codec_traits> {
+ static void encode(Encoder &e, const std::complex &c) {
avro::encode(e, std::real(c));
avro::encode(e, std::imag(c));
}
- static void decode(Decoder& d, std::complex& c) {
+ static void decode(Decoder &d, std::complex &c) {
T re, im;
avro::decode(d, re);
avro::decode(d, im);
@@ -40,10 +40,8 @@ struct codec_traits > {
}
};
-}
-int
-main()
-{
+} // namespace avro
+int main() {
std::ifstream ifs("cpx.json");
avro::ValidSchema cpxSchema;
@@ -51,14 +49,14 @@ main()
std::unique_ptr out = avro::memoryOutputStream();
avro::EncoderPtr e = avro::validatingEncoder(cpxSchema,
- avro::binaryEncoder());
+ avro::binaryEncoder());
e->init(*out);
std::complex c1(1.0, 2.0);
avro::encode(*e, c1);
std::unique_ptr in = avro::memoryInputStream(*out);
avro::DecoderPtr d = avro::validatingDecoder(cpxSchema,
- avro::binaryDecoder());
+ avro::binaryDecoder());
d->init(*in);
std::complex c2;
diff --git a/lang/c++/impl/BinaryDecoder.cc b/lang/c++/impl/BinaryDecoder.cc
index 248b503342a..a970d605207 100644
--- a/lang/c++/impl/BinaryDecoder.cc
+++ b/lang/c++/impl/BinaryDecoder.cc
@@ -74,14 +74,13 @@ bool BinaryDecoder::decodeBool() {
} else if (v == 1) {
return true;
}
- throw Exception(boost::format("Invalid value for bool: %1%") % v);
+ throw Exception("Invalid value for bool: {}", v);
}
int32_t BinaryDecoder::decodeInt() {
auto val = doDecodeLong();
if (val < INT32_MIN || val > INT32_MAX) {
- throw Exception(
- boost::format("Value out of range for Avro int: %1%") % val);
+ throw Exception("Value out of range for Avro int: {}", val);
}
return static_cast(val);
}
@@ -105,8 +104,7 @@ double BinaryDecoder::decodeDouble() {
size_t BinaryDecoder::doDecodeLength() {
ssize_t len = decodeInt();
if (len < 0) {
- throw Exception(
- boost::format("Cannot have negative length: %1%") % len);
+ throw Exception("Cannot have negative length: {}", len);
}
return len;
}
diff --git a/lang/c++/impl/Compiler.cc b/lang/c++/impl/Compiler.cc
index d76546f317d..3b287c9eeb0 100644
--- a/lang/c++/impl/Compiler.cc
+++ b/lang/c++/impl/Compiler.cc
@@ -17,9 +17,12 @@
*/
#include
#include
+#include
#include
#include "Compiler.hh"
+#include "CustomAttributes.hh"
+#include "NodeConcepts.hh"
#include "Schema.hh"
#include "Stream.hh"
#include "Types.hh"
@@ -93,7 +96,7 @@ static NodePtr makeNode(const string &t, SymbolTable &st, const string &ns) {
if (it != st.end()) {
return NodePtr(new NodeSymbolic(asSingleAttribute(n), it->second));
}
- throw Exception(boost::format("Unknown type: %1%") % n.fullname());
+ throw Exception("Unknown type: {}", n);
}
/** Returns "true" if the field is in the container */
@@ -109,7 +112,7 @@ json::Object::const_iterator findField(const Entity &e,
template
void ensureType(const Entity &e, const string &name) {
if (e.type() != json::type_traits::type()) {
- throw Exception(boost::format("Json field \"%1%\" is not a %2%: %3%") % name % json::type_traits::name() % e.toString());
+ throw Exception("Json field \"{}\" is not a {}: {}", name, json::type_traits::name(), e.toString());
}
}
@@ -144,16 +147,20 @@ string getDocField(const Entity &e, const Object &m) {
struct Field {
const string name;
+ const vector aliases;
const NodePtr schema;
const GenericDatum defaultValue;
- Field(string n, NodePtr v, GenericDatum dv) : name(std::move(n)), schema(std::move(v)), defaultValue(std::move(dv)) {}
+ const CustomAttributes customAttributes;
+
+ Field(string n, vector a, NodePtr v, GenericDatum dv, const CustomAttributes &ca)
+ : name(std::move(n)), aliases(std::move(a)), schema(std::move(v)), defaultValue(std::move(dv)), customAttributes(ca) {}
};
static void assertType(const Entity &e, EntityType et) {
if (e.type() != et) {
- throw Exception(boost::format("Unexpected type for default value: "
- "Expected %1%, but found %2% in line %3%")
- % json::typeToString(et) % json::typeToString(e.type()) % e.line());
+ throw Exception(
+ "Unexpected type for default value: Expected {}, but found {} in line {}",
+ json::typeToString(et), json::typeToString(e.type()), e.line());
}
}
@@ -212,9 +219,9 @@ static GenericDatum makeGenericDatum(NodePtr n,
for (size_t i = 0; i < n->leaves(); ++i) {
auto it = v.find(n->nameAt(i));
if (it == v.end()) {
- throw Exception(boost::format(
- "No value found in default for %1%")
- % n->nameAt(i));
+ throw Exception(
+ "No value found in default for {}",
+ n->nameAt(i));
}
result.setFieldAt(i,
makeGenericDatum(n->leafAt(i), it->second, st));
@@ -252,13 +259,39 @@ static GenericDatum makeGenericDatum(NodePtr n,
case AVRO_FIXED:
assertType(e, json::EntityType::String);
return GenericDatum(n, GenericFixed(n, toBin(e.bytesValue())));
- default: throw Exception(boost::format("Unknown type: %1%") % t);
+ default: throw Exception("Unknown type: {}", t);
+ }
+}
+
+static const std::unordered_set &getKnownFields() {
+ // return known fields
+ static const std::unordered_set kKnownFields =
+ {"name", "type", "aliases", "default", "doc", "size", "logicalType",
+ "values", "precision", "scale", "namespace"};
+ return kKnownFields;
+}
+
+static void getCustomAttributes(const Object &m, CustomAttributes &customAttributes) {
+ // Don't add known fields on primitive type and fixed type into custom
+ // fields.
+ const std::unordered_set &kKnownFields = getKnownFields();
+ for (const auto &entry : m) {
+ if (kKnownFields.find(entry.first) == kKnownFields.end()) {
+ customAttributes.addAttribute(entry.first, entry.second.stringValue());
+ }
}
}
static Field makeField(const Entity &e, SymbolTable &st, const string &ns) {
const Object &m = e.objectValue();
- const string &n = getStringField(e, m, "name");
+ string n = getStringField(e, m, "name");
+ vector aliases;
+ string aliasesName = "aliases";
+ if (containsField(m, aliasesName)) {
+ for (const auto &alias : getArrayField(e, m, aliasesName)) {
+ aliases.emplace_back(alias.stringValue());
+ }
+ }
auto it = findField(e, m, "type");
auto it2 = m.find("default");
NodePtr node = makeNode(it->second, st, ns);
@@ -266,31 +299,38 @@ static Field makeField(const Entity &e, SymbolTable &st, const string &ns) {
node->setDoc(getDocField(e, m));
}
GenericDatum d = (it2 == m.end()) ? GenericDatum() : makeGenericDatum(node, it2->second, st);
- return Field(n, node, d);
+ // Get custom attributes
+ CustomAttributes customAttributes;
+ getCustomAttributes(m, customAttributes);
+ return Field(std::move(n), std::move(aliases), node, d, customAttributes);
}
// Extended makeRecordNode (with doc).
static NodePtr makeRecordNode(const Entity &e, const Name &name,
const string *doc, const Object &m,
SymbolTable &st, const string &ns) {
- const Array &v = getArrayField(e, m, "fields");
concepts::MultiAttribute fieldNames;
+ vector> fieldAliases;
concepts::MultiAttribute fieldValues;
+ concepts::MultiAttribute customAttributes;
vector defaultValues;
-
- for (const auto &it : v) {
+ string fields = "fields";
+ for (const auto &it : getArrayField(e, m, fields)) {
Field f = makeField(it, st, ns);
fieldNames.add(f.name);
+ fieldAliases.push_back(f.aliases);
fieldValues.add(f.schema);
defaultValues.push_back(f.defaultValue);
+ customAttributes.add(f.customAttributes);
}
+
NodeRecord *node;
if (doc == nullptr) {
node = new NodeRecord(asSingleAttribute(name), fieldValues, fieldNames,
- defaultValues);
+ fieldAliases, defaultValues, customAttributes);
} else {
node = new NodeRecord(asSingleAttribute(name), asSingleAttribute(*doc),
- fieldValues, fieldNames, defaultValues);
+ fieldValues, fieldNames, fieldAliases, defaultValues, customAttributes);
}
return NodePtr(node);
}
@@ -305,9 +345,10 @@ static LogicalType makeLogicalType(const Entity &e, const Object &m) {
if (typeField == "decimal") {
LogicalType decimalType(LogicalType::DECIMAL);
try {
- decimalType.setPrecision(getLongField(e, m, "precision"));
+ // Precision probably won't go over 38 and scale beyond -77/+77
+ decimalType.setPrecision(static_cast(getLongField(e, m, "precision")));
if (containsField(m, "scale")) {
- decimalType.setScale(getLongField(e, m, "scale"));
+ decimalType.setScale(static_cast(getLongField(e, m, "scale")));
}
} catch (Exception &ex) {
// If any part of the logical type is malformed, per the standard we
@@ -337,11 +378,12 @@ static LogicalType makeLogicalType(const Entity &e, const Object &m) {
static NodePtr makeEnumNode(const Entity &e,
const Name &name, const Object &m) {
- const Array &v = getArrayField(e, m, "symbols");
+ string symbolsName = "symbols";
+ const Array &v = getArrayField(e, m, symbolsName);
concepts::MultiAttribute symbols;
for (const auto &it : v) {
if (it.type() != json::EntityType::String) {
- throw Exception(boost::format("Enum symbol not a string: %1%") % it.toString());
+ throw Exception("Enum symbol not a string: {}", it.toString());
}
symbols.add(it.stringValue());
}
@@ -354,12 +396,12 @@ static NodePtr makeEnumNode(const Entity &e,
static NodePtr makeFixedNode(const Entity &e,
const Name &name, const Object &m) {
- int v = static_cast(getLongField(e, m, "size"));
+ int64_t v = getLongField(e, m, "size");
if (v <= 0) {
- throw Exception(boost::format("Size for fixed is not positive: %1%") % e.toString());
+ throw Exception("Size for fixed is not positive: {}", e.toString());
}
NodePtr node =
- NodePtr(new NodeFixed(asSingleAttribute(name), asSingleAttribute(v)));
+ NodePtr(new NodeFixed(asSingleAttribute(name), asSingleAttribute(static_cast(v))));
if (containsField(m, "doc")) {
node->setDoc(getDocField(e, m));
}
@@ -392,21 +434,31 @@ static NodePtr makeMapNode(const Entity &e, const Object &m,
static Name getName(const Entity &e, const Object &m, const string &ns) {
const string &name = getStringField(e, m, "name");
+ Name result;
if (isFullName(name)) {
- return Name(name);
+ result = Name(name);
} else {
auto it = m.find("namespace");
if (it != m.end()) {
if (it->second.type() != json::type_traits::type()) {
- throw Exception(boost::format(
- "Json field \"%1%\" is not a %2%: %3%")
- % "namespace" % json::type_traits::name() % it->second.toString());
+ throw Exception(
+ "Json field \"namespace\" is not a string: {}",
+ it->second.toString());
}
- Name result = Name(name, it->second.stringValue());
- return result;
+ result = Name(name, it->second.stringValue());
+ } else {
+ result = Name(name, ns);
}
- return Name(name, ns);
}
+
+ std::string aliases = "aliases";
+ if (containsField(m, aliases)) {
+ for (const auto &alias : getArrayField(e, m, aliases)) {
+ result.addAlias(alias.stringValue());
+ }
+ }
+
+ return result;
}
static NodePtr makeNode(const Entity &e, const Object &m,
@@ -452,11 +504,10 @@ static NodePtr makeNode(const Entity &e, const Object &m,
return result;
}
- throw Exception(boost::format("Unknown type definition: %1%")
- % e.toString());
+ throw Exception("Unknown type definition: %1%", e.toString());
}
-static NodePtr makeNode(const Entity &e, const Array &m,
+static NodePtr makeNode(const Entity &, const Array &m,
SymbolTable &st, const string &ns) {
concepts::MultiAttribute mm;
for (const auto &it : m) {
@@ -470,13 +521,13 @@ static NodePtr makeNode(const json::Entity &e, SymbolTable &st, const string &ns
case json::EntityType::String: return makeNode(e.stringValue(), st, ns);
case json::EntityType::Obj: return makeNode(e, e.objectValue(), st, ns);
case json::EntityType::Arr: return makeNode(e, e.arrayValue(), st, ns);
- default: throw Exception(boost::format("Invalid Avro type: %1%") % e.toString());
+ default: throw Exception("Invalid Avro type: {}", e.toString());
}
}
json::Object::const_iterator findField(const Entity &e, const Object &m, const string &fieldName) {
auto it = m.find(fieldName);
if (it == m.end()) {
- throw Exception(boost::format("Missing Json field \"%1%\": %2%") % fieldName % e.toString());
+ throw Exception("Missing Json field \"{}\": {}", fieldName, e.toString());
} else {
return it;
}
diff --git a/lang/c++/impl/CustomAttributes.cc b/lang/c++/impl/CustomAttributes.cc
new file mode 100644
index 00000000000..13c56a5e926
--- /dev/null
+++ b/lang/c++/impl/CustomAttributes.cc
@@ -0,0 +1,53 @@
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "CustomAttributes.hh"
+#include "Exception.hh"
+#include
+#include
+
+namespace avro {
+
+boost::optional CustomAttributes::getAttribute(const std::string &name) const {
+ boost::optional result;
+ std::map::const_iterator iter =
+ attributes_.find(name);
+ if (iter == attributes_.end()) {
+ return result;
+ }
+ result = iter->second;
+ return result;
+}
+
+void CustomAttributes::addAttribute(const std::string &name,
+ const std::string &value) {
+ auto iter_and_find =
+ attributes_.insert(std::pair(name, value));
+ if (!iter_and_find.second) {
+ throw Exception(name + " already exists and cannot be added");
+ }
+}
+
+void CustomAttributes::printJson(std::ostream &os,
+ const std::string &name) const {
+ if (attributes().find(name) == attributes().end()) {
+ throw Exception(name + " doesn't exist");
+ }
+ os << "\"" << name << "\": \"" << attributes().at(name) << "\"";
+}
+} // namespace avro
diff --git a/lang/c++/impl/DataFile.cc b/lang/c++/impl/DataFile.cc
index 18fb3f61a68..66281ae9820 100644
--- a/lang/c++/impl/DataFile.cc
+++ b/lang/c++/impl/DataFile.cc
@@ -93,9 +93,9 @@ DataFileWriterBase::DataFileWriterBase(std::unique_ptr outputStrea
void DataFileWriterBase::init(const ValidSchema &schema, size_t syncInterval, const Codec &codec) {
if (syncInterval < minSyncInterval || syncInterval > maxSyncInterval) {
- throw Exception(boost::format("Invalid sync interval: %1%. "
- "Should be between %2% and %3%")
- % syncInterval % minSyncInterval % maxSyncInterval);
+ throw Exception(
+ "Invalid sync interval: {}. Should be between {} and {}",
+ syncInterval, minSyncInterval, maxSyncInterval);
}
setMetadata(AVRO_CODEC_KEY, AVRO_NULL_CODEC);
@@ -108,7 +108,7 @@ void DataFileWriterBase::init(const ValidSchema &schema, size_t syncInterval, co
setMetadata(AVRO_CODEC_KEY, AVRO_SNAPPY_CODEC);
#endif
} else {
- throw Exception(boost::format("Unknown codec: %1%") % codec);
+ throw Exception("Unknown codec: {}", int(codec));
}
setMetadata(AVRO_SCHEMA_KEY, schema.toJson(false));
@@ -120,7 +120,9 @@ void DataFileWriterBase::init(const ValidSchema &schema, size_t syncInterval, co
DataFileWriterBase::~DataFileWriterBase() {
if (stream_) {
- close();
+ try {
+ close();
+ } catch (...) {}
}
}
@@ -193,10 +195,10 @@ void DataFileWriterBase::sync() {
os.push(boost::iostreams::back_inserter(temp));
boost::iostreams::write(os, compressed.c_str(), compressed_size);
}
- temp.push_back((checksum >> 24) & 0xFF);
- temp.push_back((checksum >> 16) & 0xFF);
- temp.push_back((checksum >> 8) & 0xFF);
- temp.push_back(checksum & 0xFF);
+ temp.push_back(static_cast((checksum >> 24) & 0xFF));
+ temp.push_back(static_cast((checksum >> 16) & 0xFF));
+ temp.push_back(static_cast((checksum >> 8) & 0xFF));
+ temp.push_back(static_cast(checksum & 0xFF));
std::unique_ptr in = memoryInputStream(
reinterpret_cast(temp.data()), temp.size());
int64_t byteCount = temp.size();
@@ -232,9 +234,8 @@ void DataFileWriterBase::flush() {
sync();
}
-boost::mt19937 random(static_cast(time(nullptr)));
-
DataFileSync DataFileWriterBase::makeSync() {
+ boost::mt19937 random(static_cast(time(nullptr)));
DataFileSync sync;
std::generate(sync.begin(), sync.end(), random);
return sync;
@@ -257,14 +258,14 @@ void DataFileWriterBase::setMetadata(const string &key, const string &value) {
metadata_[key] = v;
}
-DataFileReaderBase::DataFileReaderBase(const char *filename) : filename_(filename), codec_(NULL_CODEC), stream_(fileSeekableInputStream(filename)),
- decoder_(binaryDecoder()), objectCount_(0), eof_(false), blockStart_(-1),
- blockEnd_(-1) {
+DataFileReaderBase::DataFileReaderBase(const char *filename) : filename_(filename), stream_(fileSeekableInputStream(filename)),
+ decoder_(binaryDecoder()), objectCount_(0), eof_(false),
+ codec_(NULL_CODEC), blockStart_(-1), blockEnd_(-1) {
readHeader();
}
-DataFileReaderBase::DataFileReaderBase(std::unique_ptr inputStream) : codec_(NULL_CODEC), stream_(std::move(inputStream)),
- decoder_(binaryDecoder()), objectCount_(0), eof_(false) {
+DataFileReaderBase::DataFileReaderBase(std::unique_ptr inputStream) : stream_(std::move(inputStream)),
+ decoder_(binaryDecoder()), objectCount_(0), eof_(false), codec_(NULL_CODEC) {
readHeader();
}
@@ -392,6 +393,9 @@ void DataFileReaderBase::readDataBlock() {
compressed_.insert(compressed_.end(), data, data + len);
}
len = compressed_.size();
+ if (len < 4)
+ throw Exception("Cannot read compressed data, expected at least 4 bytes, got " + std::to_string(len));
+
int b1 = compressed_[len - 4] & 0xFF;
int b2 = compressed_[len - 3] & 0xFF;
int b3 = compressed_[len - 2] & 0xFF;
@@ -407,8 +411,8 @@ void DataFileReaderBase::readDataBlock() {
uint32_t c = crc();
if (checksum != c) {
throw Exception(
- boost::format("Checksum did not match for Snappy compression: Expected: %1%, computed: %2%") % checksum
- % c);
+ "Checksum did not match for Snappy compression: Expected: {}, computed: {}",
+ checksum, c);
}
os_.reset(new boost::iostreams::filtering_istream());
os_->push(
@@ -451,7 +455,7 @@ static ValidSchema makeSchema(const vector &v) {
istringstream iss(toString(v));
ValidSchema vs;
compileJsonSchema(iss, vs);
- return ValidSchema(vs);
+ return vs;
}
void DataFileReaderBase::readHeader() {
@@ -523,8 +527,7 @@ void DataFileReaderBase::sync(int64_t position) {
eof_ = true;
return;
}
- int len =
- std::min(static_cast(SyncSize - i), n);
+ size_t len = std::min(SyncSize - i, n);
memcpy(&sync_buffer[i], p, len);
p += len;
n -= len;
diff --git a/lang/c++/impl/FileStream.cc b/lang/c++/impl/FileStream.cc
index 749fd835abd..9063cf1f734 100644
--- a/lang/c++/impl/FileStream.cc
+++ b/lang/c++/impl/FileStream.cc
@@ -49,9 +49,9 @@ struct BufferCopyIn {
struct FileBufferCopyIn : public BufferCopyIn {
#ifdef _WIN32
HANDLE h_;
- FileBufferCopyIn(const char *filename) : h_(::CreateFileA(filename, GENERIC_READ, 0, NULL, OPEN_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL)) {
+ explicit FileBufferCopyIn(const char *filename) : h_(::CreateFileA(filename, GENERIC_READ, 0, NULL, OPEN_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL)) {
if (h_ == INVALID_HANDLE_VALUE) {
- throw Exception(boost::format("Cannot open file: %1%") % ::GetLastError());
+ throw Exception("Cannot open file: {}", ::GetLastError());
}
}
@@ -59,16 +59,16 @@ struct FileBufferCopyIn : public BufferCopyIn {
::CloseHandle(h_);
}
- void seek(size_t len) {
+ void seek(size_t len) override {
if (::SetFilePointer(h_, len, NULL, FILE_CURRENT) == INVALID_SET_FILE_POINTER && ::GetLastError() != NO_ERROR) {
- throw Exception(boost::format("Cannot skip file: %1%") % ::GetLastError());
+ throw Exception("Cannot skip file: {}", ::GetLastError());
}
}
- bool read(uint8_t *b, size_t toRead, size_t &actual) {
+ bool read(uint8_t *b, size_t toRead, size_t &actual) override {
DWORD dw = 0;
if (!::ReadFile(h_, b, toRead, &dw, NULL)) {
- throw Exception(boost::format("Cannot read file: %1%") % ::GetLastError());
+ throw Exception("Cannot read file: {}", ::GetLastError());
}
actual = static_cast(dw);
return actual != 0;
@@ -78,7 +78,7 @@ struct FileBufferCopyIn : public BufferCopyIn {
explicit FileBufferCopyIn(const char *filename) : fd_(open(filename, O_RDONLY | O_BINARY)) {
if (fd_ < 0) {
- throw Exception(boost::format("Cannot open file: %1%") % ::strerror(errno));
+ throw Exception("Cannot open file: {}", strerror(errno));
}
}
@@ -89,12 +89,12 @@ struct FileBufferCopyIn : public BufferCopyIn {
void seek(size_t len) final {
off_t r = ::lseek(fd_, len, SEEK_CUR);
if (r == static_cast(-1)) {
- throw Exception(boost::format("Cannot skip file: %1%") % strerror(errno));
+ throw Exception("Cannot skip file: {}", strerror(errno));
}
}
bool read(uint8_t *b, size_t toRead, size_t &actual) final {
- int n = ::read(fd_, b, toRead);
+ auto n = ::read(fd_, b, toRead);
if (n > 0) {
actual = n;
return true;
@@ -232,9 +232,9 @@ struct BufferCopyOut {
struct FileBufferCopyOut : public BufferCopyOut {
#ifdef _WIN32
HANDLE h_;
- FileBufferCopyOut(const char *filename) : h_(::CreateFileA(filename, GENERIC_WRITE, 0, NULL, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL)) {
+ explicit FileBufferCopyOut(const char *filename) : h_(::CreateFileA(filename, GENERIC_WRITE, 0, NULL, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL)) {
if (h_ == INVALID_HANDLE_VALUE) {
- throw Exception(boost::format("Cannot open file: %1%") % ::GetLastError());
+ throw Exception("Cannot open file: {}", ::GetLastError());
}
}
@@ -242,11 +242,11 @@ struct FileBufferCopyOut : public BufferCopyOut {
::CloseHandle(h_);
}
- void write(const uint8_t *b, size_t len) {
+ void write(const uint8_t *b, size_t len) override {
while (len > 0) {
DWORD dw = 0;
if (!::WriteFile(h_, b, len, &dw, NULL)) {
- throw Exception(boost::format("Cannot read file: %1%") % ::GetLastError());
+ throw Exception("Cannot read file: {}", ::GetLastError());
}
b += dw;
len -= dw;
@@ -258,7 +258,7 @@ struct FileBufferCopyOut : public BufferCopyOut {
explicit FileBufferCopyOut(const char *filename) : fd_(::open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0644)) {
if (fd_ < 0) {
- throw Exception(boost::format("Cannot open file: %1%") % ::strerror(errno));
+ throw Exception("Cannot open file: {}", ::strerror(errno));
}
}
@@ -268,7 +268,7 @@ struct FileBufferCopyOut : public BufferCopyOut {
void write(const uint8_t *b, size_t len) final {
if (::write(fd_, b, len) < 0) {
- throw Exception(boost::format("Cannot write file: %1%") % ::strerror(errno));
+ throw Exception("Cannot write file: {}", ::strerror(errno));
}
}
#endif
diff --git a/lang/c++/impl/Generic.cc b/lang/c++/impl/Generic.cc
index 6e0436ae344..1535c604be7 100644
--- a/lang/c++/impl/Generic.cc
+++ b/lang/c++/impl/Generic.cc
@@ -29,7 +29,7 @@ typedef vector bytes;
void GenericContainer::assertType(const NodePtr &schema, Type type) {
if (schema->type() != type) {
- throw Exception(boost::format("Schema type %1 expected %2") % toString(schema->type()) % toString(type));
+ throw Exception("Schema type {} expected {}", schema->type(), type);
}
}
@@ -129,7 +129,7 @@ void GenericReader::read(GenericDatum &datum, Decoder &d, bool isResolving) {
}
} break;
default:
- throw Exception(boost::format("Unknown schema type %1%") % toString(datum.type()));
+ throw Exception("Unknown schema type {}", datum.type());
}
}
@@ -217,7 +217,7 @@ void GenericWriter::write(const GenericDatum &datum, Encoder &e) {
e.mapEnd();
} break;
default:
- throw Exception(boost::format("Unknown schema type %1%") % toString(datum.type()));
+ throw Exception("Unknown schema type {}", datum.type());
}
}
diff --git a/lang/c++/impl/GenericDatum.cc b/lang/c++/impl/GenericDatum.cc
index 7b2bf93bca9..49700a927f5 100644
--- a/lang/c++/impl/GenericDatum.cc
+++ b/lang/c++/impl/GenericDatum.cc
@@ -83,7 +83,7 @@ void GenericDatum::init(const NodePtr &schema) {
value_ = GenericUnion(sc);
break;
default:
- throw Exception(boost::format("Unknown schema type %1%") % toString(type_));
+ throw Exception("Unknown schema type {}", toString(type_));
}
}
diff --git a/lang/c++/impl/LogicalType.cc b/lang/c++/impl/LogicalType.cc
index 1aa24bf20de..5e03a313d8f 100644
--- a/lang/c++/impl/LogicalType.cc
+++ b/lang/c++/impl/LogicalType.cc
@@ -28,22 +28,22 @@ LogicalType::Type LogicalType::type() const {
return type_;
}
-void LogicalType::setPrecision(int precision) {
+void LogicalType::setPrecision(int32_t precision) {
if (type_ != DECIMAL) {
throw Exception("Only logical type DECIMAL can have precision");
}
if (precision <= 0) {
- throw Exception(boost::format("Precision cannot be: %1%") % precision);
+ throw Exception("Precision cannot be: {}", precision);
}
precision_ = precision;
}
-void LogicalType::setScale(int scale) {
+void LogicalType::setScale(int32_t scale) {
if (type_ != DECIMAL) {
throw Exception("Only logical type DECIMAL can have scale");
}
if (scale < 0) {
- throw Exception(boost::format("Scale cannot be: %1%") % scale);
+ throw Exception("Scale cannot be: {}", scale);
}
scale_ = scale;
}
diff --git a/lang/c++/impl/Node.cc b/lang/c++/impl/Node.cc
index 46310d0f9ef..14ce6ecf05b 100644
--- a/lang/c++/impl/Node.cc
+++ b/lang/c++/impl/Node.cc
@@ -17,6 +17,7 @@
*/
#include
+#include
#include "Node.hh"
@@ -26,12 +27,44 @@ using std::string;
Node::~Node() = default;
+struct Name::Aliases {
+ std::vector raw;
+ std::unordered_set fullyQualified;
+};
+
+Name::Name() = default;
+
Name::Name(const std::string &name) {
fullname(name);
}
+Name::Name(std::string simpleName, std::string ns) : ns_(std::move(ns)), simpleName_(std::move(simpleName)) {
+ check();
+}
+
+Name::Name(const Name &other) {
+ *this = other;
+}
+
+Name &Name::operator=(const Name &other) {
+ if (this != &other) {
+ ns_ = other.ns_;
+ simpleName_ = other.simpleName_;
+ if (other.aliases_) {
+ aliases_ = std::make_unique(*other.aliases_);
+ }
+ }
+ return *this;
+}
+
+Name::Name(Name &&other) = default;
+
+Name &Name::operator=(Name &&other) = default;
+
+Name::~Name() = default;
+
string Name::fullname() const {
- return (ns_.empty()) ? simpleName_ : ns_ + "." + simpleName_;
+ return ns_.empty() ? simpleName_ : ns_ + "." + simpleName_;
}
void Name::fullname(const string &name) {
@@ -46,6 +79,23 @@ void Name::fullname(const string &name) {
check();
}
+const std::vector &Name::aliases() const {
+ static const std::vector emptyAliases;
+ return aliases_ ? aliases_->raw : emptyAliases;
+}
+
+void Name::addAlias(const std::string &alias) {
+ if (!aliases_) {
+ aliases_ = std::make_unique();
+ }
+ aliases_->raw.push_back(alias);
+ if (!ns_.empty() && alias.find_last_of('.') == string::npos) {
+ aliases_->fullyQualified.emplace(ns_ + "." + alias);
+ } else {
+ aliases_->fullyQualified.insert(alias);
+ }
+}
+
bool Name::operator<(const Name &n) const {
return (ns_ < n.ns_) || (!(n.ns_ < ns_) && (simpleName_ < n.simpleName_));
}
@@ -72,6 +122,16 @@ bool Name::operator==(const Name &n) const {
return ns_ == n.ns_ && simpleName_ == n.simpleName_;
}
+bool Name::equalOrAliasedBy(const Name &n) const {
+ return *this == n || (n.aliases_ && n.aliases_->fullyQualified.find(fullname()) != n.aliases_->fullyQualified.end());
+}
+
+void Name::clear() {
+ ns_.clear();
+ simpleName_.clear();
+ aliases_.reset();
+}
+
void Node::setLogicalType(LogicalType logicalType) {
checkLock();
@@ -86,14 +146,13 @@ void Node::setLogicalType(LogicalType logicalType) {
if (type_ == AVRO_FIXED) {
// Max precision that can be supported by the current size of
// the FIXED type.
- long maxPrecision = floor(log10(2.0) * (8.0 * fixedSize() - 1));
+ auto maxPrecision = static_cast(floor(log10(2.0) * (8.0 * static_cast(fixedSize()) - 1)));
if (logicalType.precision() > maxPrecision) {
throw Exception(
- boost::format(
- "DECIMAL precision %1% is too large for the "
- "FIXED type of size %2%, precision cannot be "
- "larger than %3%")
- % logicalType.precision() % fixedSize() % maxPrecision);
+ "DECIMAL precision {} is too large for the "
+ "FIXED type of size {}, precision cannot be "
+ "larger than {}",
+ logicalType.precision(), fixedSize(), maxPrecision);
}
}
if (logicalType.scale() > logicalType.precision()) {
diff --git a/lang/c++/impl/NodeImpl.cc b/lang/c++/impl/NodeImpl.cc
index 810e1641ed9..e3073aaaef2 100644
--- a/lang/c++/impl/NodeImpl.cc
+++ b/lang/c++/impl/NodeImpl.cc
@@ -71,7 +71,7 @@ string escape(const string &unescaped) {
// Wrap an indentation in a struct for ostream operator<<
struct indent {
explicit indent(size_t depth) : d(depth) {}
- int d;
+ size_t d;
};
/// ostream operator for indent
@@ -83,6 +83,18 @@ std::ostream &operator<<(std::ostream &os, indent x) {
return os;
}
+void printCustomAttributes(const CustomAttributes &customAttributes, size_t depth,
+ std::ostream &os) {
+ std::map::const_iterator iter =
+ customAttributes.attributes().begin();
+ while (iter != customAttributes.attributes().end()) {
+ os << ",\n"
+ << indent(depth);
+ customAttributes.printJson(os, iter->first);
+ ++iter;
+ }
+}
+
} // anonymous namespace
const int kByteStringSize = 6;
@@ -101,7 +113,7 @@ NodePrimitive::resolve(const Node &reader) const {
return RESOLVE_PROMOTABLE_TO_LONG;
}
- // fall-through intentional
+ [[fallthrough]];
case AVRO_LONG:
@@ -109,7 +121,7 @@ NodePrimitive::resolve(const Node &reader) const {
return RESOLVE_PROMOTABLE_TO_FLOAT;
}
- // fall-through intentional
+ [[fallthrough]];
case AVRO_FLOAT:
@@ -243,17 +255,35 @@ static void printName(std::ostream &os, const Name &n, size_t depth) {
void NodeRecord::printJson(std::ostream &os, size_t depth) const {
os << "{\n";
os << indent(++depth) << "\"type\": \"record\",\n";
- printName(os, nameAttribute_.get(), depth);
+ const Name &name = nameAttribute_.get();
+ printName(os, name, depth);
+
+ const auto &aliases = name.aliases();
+ if (!aliases.empty()) {
+ os << indent(depth) << "\"aliases\": [";
+ ++depth;
+ for (size_t i = 0; i < aliases.size(); ++i) {
+ if (i > 0) {
+ os << ',';
+ }
+ os << '\n'
+ << indent(depth) << "\"" << aliases[i] << "\"";
+ }
+ os << '\n'
+ << indent(--depth) << "]\n";
+ }
+
if (!getDoc().empty()) {
os << indent(depth) << R"("doc": ")"
<< escape(getDoc()) << "\",\n";
}
- os << indent(depth) << "\"fields\": [";
+ os << indent(depth) << "\"fields\": [";
size_t fields = leafAttributes_.size();
++depth;
- // Serialize "default" field:
- assert(defaultValues.empty() || (defaultValues.size() == fields));
+ assert(fieldsAliases_.empty() || (fieldsAliases_.size() == fields));
+ assert(fieldsDefaultValues_.empty() || (fieldsDefaultValues_.size() == fields));
+ assert(customAttributes_.size() == 0 || customAttributes_.size() == fields);
for (size_t i = 0; i < fields; ++i) {
if (i > 0) {
os << ',';
@@ -264,16 +294,37 @@ void NodeRecord::printJson(std::ostream &os, size_t depth) const {
os << indent(depth) << "\"type\": ";
leafAttributes_.get(i)->printJson(os, depth);
- if (!defaultValues.empty()) {
- if (!defaultValues[i].isUnion() && defaultValues[i].type() == AVRO_NULL) {
+ if (!fieldsAliases_.empty() && !fieldsAliases_[i].empty()) {
+ os << ",\n"
+ << indent(depth) << "\"aliases\": [";
+ ++depth;
+ for (size_t j = 0; j < fieldsAliases_[i].size(); ++j) {
+ if (j > 0) {
+ os << ',';
+ }
+ os << '\n'
+ << indent(depth) << "\"" << fieldsAliases_[i][j] << "\"";
+ }
+ os << '\n'
+ << indent(--depth) << ']';
+ }
+
+ // Serialize "default" field:
+ if (!fieldsDefaultValues_.empty()) {
+ if (!fieldsDefaultValues_[i].isUnion() && fieldsDefaultValues_[i].type() == AVRO_NULL) {
// No "default" field.
} else {
os << ",\n"
<< indent(depth) << "\"default\": ";
- leafAttributes_.get(i)->printDefaultToJson(defaultValues[i], os,
+ leafAttributes_.get(i)->printDefaultToJson(fieldsDefaultValues_[i], os,
depth);
}
}
+
+ if (customAttributes_.size() == fields) {
+ printCustomAttributes(customAttributes_.get(i), depth, os);
+ }
+
os << '\n';
os << indent(--depth) << '}';
}
@@ -283,7 +334,7 @@ void NodeRecord::printJson(std::ostream &os, size_t depth) const {
}
void NodePrimitive::printDefaultToJson(const GenericDatum &g, std::ostream &os,
- size_t depth) const {
+ size_t) const {
assert(isPrimitive(g.type()));
switch (g.type()) {
@@ -324,13 +375,13 @@ void NodePrimitive::printDefaultToJson(const GenericDatum &g, std::ostream &os,
}
void NodeEnum::printDefaultToJson(const GenericDatum &g, std::ostream &os,
- size_t depth) const {
+ size_t) const {
assert(g.type() == AVRO_ENUM);
os << "\"" << g.value().symbol() << "\"";
}
void NodeFixed::printDefaultToJson(const GenericDatum &g, std::ostream &os,
- size_t depth) const {
+ size_t) const {
assert(g.type() == AVRO_FIXED);
// ex: "\uOOff"
// Convert to a string
@@ -409,16 +460,38 @@ void NodeRecord::printDefaultToJson(const GenericDatum &g, std::ostream &os,
<< indent(--depth) << "}";
}
}
-NodeRecord::NodeRecord(const HasName &name,
- const MultiLeaves &fields,
- const LeafNames &fieldsNames,
- std::vector dv) : NodeImplRecord(AVRO_RECORD, name, fields, fieldsNames, NoSize()),
- defaultValues(std::move(dv)) {
+
+NodeRecord::NodeRecord(const HasName &name, const MultiLeaves &fields,
+ const LeafNames &fieldsNames, std::vector dv)
+ : NodeRecord(name, HasDoc(), fields, fieldsNames, {}, std::move(dv), MultiAttributes()) {}
+
+NodeRecord::NodeRecord(const HasName &name, const HasDoc &doc, const MultiLeaves &fields,
+ const LeafNames &fieldsNames, std::vector dv)
+ : NodeRecord(name, doc, fields, fieldsNames, {}, std::move(dv), MultiAttributes()) {}
+
+NodeRecord::NodeRecord(const HasName &name, const MultiLeaves &fields,
+ const LeafNames &fieldsNames, std::vector> fieldsAliases,
+ std::vector dv, const MultiAttributes &customAttributes)
+ : NodeRecord(name, HasDoc(), fields, fieldsNames, std::move(fieldsAliases), std::move(dv), customAttributes) {}
+
+NodeRecord::NodeRecord(const HasName &name, const HasDoc &doc, const MultiLeaves &fields,
+ const LeafNames &fieldsNames, std::vector> fieldsAliases,
+ std::vector dv, const MultiAttributes &customAttributes)
+ : NodeImplRecord(AVRO_RECORD, name, doc, fields, fieldsNames, customAttributes, NoSize()),
+ fieldsAliases_(std::move(fieldsAliases)),
+ fieldsDefaultValues_(std::move(dv)) {
+
for (size_t i = 0; i < leafNameAttributes_.size(); ++i) {
if (!nameIndex_.add(leafNameAttributes_.get(i), i)) {
- throw Exception(boost::format(
- "Cannot add duplicate field: %1%")
- % leafNameAttributes_.get(i));
+ throw Exception("Cannot add duplicate field: {}", leafNameAttributes_.get(i));
+ }
+
+ if (!fieldsAliases_.empty()) {
+ for (const auto &alias : fieldsAliases_[i]) {
+ if (!nameIndex_.add(alias, i)) {
+ throw Exception("Cannot add duplicate field: {}", alias);
+ }
+ }
}
}
}
@@ -458,9 +531,9 @@ void NodeEnum::printJson(std::ostream &os, size_t depth) const {
printName(os, nameAttribute_.get(), depth);
os << indent(depth) << "\"symbols\": [\n";
- int names = leafNameAttributes_.size();
+ auto names = leafNameAttributes_.size();
++depth;
- for (int i = 0; i < names; ++i) {
+ for (size_t i = 0; i < names; ++i) {
if (i > 0) {
os << ",\n";
}
@@ -504,9 +577,9 @@ NodeMap::NodeMap() : NodeImplMap(AVRO_MAP) {
void NodeUnion::printJson(std::ostream &os, size_t depth) const {
os << "[\n";
- int fields = leafAttributes_.size();
+ auto fields = leafAttributes_.size();
++depth;
- for (int i = 0; i < fields; ++i) {
+ for (size_t i = 0; i < fields; ++i) {
if (i > 0) {
os << ",\n";
}
diff --git a/lang/c++/impl/Resolver.cc b/lang/c++/impl/Resolver.cc
index 919345e8a2d..5fdd551a317 100644
--- a/lang/c++/impl/Resolver.cc
+++ b/lang/c++/impl/Resolver.cc
@@ -51,7 +51,7 @@ class PrimitiveSkipper : public Resolver {
public:
PrimitiveSkipper() : Resolver() {}
- void parse(Reader &reader, uint8_t *address) const final {
+ void parse(Reader &reader, uint8_t *) const final {
T val;
reader.readValue(val);
DEBUG_OUT("Skipping " << val);
@@ -93,7 +93,7 @@ class PrimitivePromoter : public Resolver {
DEBUG_OUT("Promoting " << val);
}
- void parseIt(Reader &reader, uint8_t *, const std::false_type &) const {}
+ void parseIt(Reader &, uint8_t *, const std::false_type &) const {}
template
void parseIt(Reader &reader, uint8_t *address) const {
@@ -108,7 +108,7 @@ class PrimitiveSkipper> : public Resolver {
public:
PrimitiveSkipper() : Resolver() {}
- void parse(Reader &reader, uint8_t *address) const final {
+ void parse(Reader &reader, uint8_t *) const final {
std::vector val;
reader.readBytes(val);
DEBUG_OUT("Skipping bytes");
@@ -276,9 +276,9 @@ class ArrayParser : public Resolver {
class EnumSkipper : public Resolver {
public:
- EnumSkipper(ResolverFactory &factory, const NodePtr &writer) : Resolver() {}
+ EnumSkipper(ResolverFactory &, const NodePtr &) : Resolver() {}
- void parse(Reader &reader, uint8_t *address) const final {
+ void parse(Reader &reader, uint8_t *) const final {
int64_t val = reader.readEnum();
DEBUG_OUT("Skipping enum" << val);
}
@@ -290,9 +290,9 @@ class EnumParser : public Resolver {
VAL
};
- EnumParser(ResolverFactory &factory, const NodePtr &writer, const NodePtr &reader, const CompoundLayout &offsets) : Resolver(),
- offset_(offsets.at(0).offset()),
- readerSize_(reader->names()) {
+ EnumParser(ResolverFactory &, const NodePtr &writer, const NodePtr &reader, const CompoundLayout &offsets) : Resolver(),
+ offset_(offsets.at(0).offset()),
+ readerSize_(reader->names()) {
const size_t writerSize = writer->names();
mapping_.reserve(writerSize);
@@ -307,7 +307,7 @@ class EnumParser : public Resolver {
void parse(Reader &reader, uint8_t *address) const final {
auto val = static_cast(reader.readEnum());
- assert(static_cast(val) < mapping_.size());
+ assert(val < mapping_.size());
if (mapping_[val] < readerSize_) {
auto *location = reinterpret_cast(address + offset_);
@@ -349,7 +349,7 @@ class UnionParser : public Resolver {
*readerChoice = choiceMapping_[writerChoice];
auto *setter = reinterpret_cast(address + setFuncOffset_);
- auto *value = reinterpret_cast(address + offset_);
+ uint8_t *value = address + offset_;
uint8_t *location = (*setter)(value, *readerChoice);
resolvers_[writerChoice]->parse(reader, location);
@@ -397,7 +397,7 @@ class NonUnionToUnionParser : public Resolver {
auto *choice = reinterpret_cast(address + choiceOffset_);
*choice = choice_;
auto *setter = reinterpret_cast(address + setFuncOffset_);
- auto *value = reinterpret_cast(address + offset_);
+ uint8_t *value = address + offset_;
uint8_t *location = (*setter)(value, choice_);
resolver_->parse(reader, location);
@@ -413,35 +413,35 @@ class NonUnionToUnionParser : public Resolver {
class FixedSkipper : public Resolver {
public:
- FixedSkipper(ResolverFactory &factory, const NodePtr &writer) : Resolver() {
+ FixedSkipper(ResolverFactory &, const NodePtr &writer) : Resolver() {
size_ = writer->fixedSize();
}
- void parse(Reader &reader, uint8_t *address) const final {
+ void parse(Reader &reader, uint8_t *) const final {
DEBUG_OUT("Skipping fixed");
std::unique_ptr val(new uint8_t[size_]);
reader.readFixed(&val[0], size_);
}
protected:
- int size_;
+ size_t size_;
};
class FixedParser : public Resolver {
public:
- FixedParser(ResolverFactory &factory, const NodePtr &writer, const NodePtr &reader, const CompoundLayout &offsets) : Resolver() {
+ FixedParser(ResolverFactory &, const NodePtr &writer, const NodePtr &, const CompoundLayout &offsets) : Resolver() {
size_ = writer->fixedSize();
offset_ = offsets.at(0).offset();
}
void parse(Reader &reader, uint8_t *address) const final {
DEBUG_OUT("Reading fixed");
- auto *location = reinterpret_cast(address + offset_);
+ uint8_t *location = address + offset_;
reader.readFixed(location, size_);
}
protected:
- int size_;
+ size_t size_;
size_t offset_;
};
@@ -449,7 +449,7 @@ class ResolverFactory : private boost::noncopyable {
template
unique_ptr
- constructPrimitiveSkipper(const NodePtr &writer) {
+ constructPrimitiveSkipper(const NodePtr &) {
return unique_ptr(new PrimitiveSkipper());
}
@@ -710,8 +710,8 @@ NonUnionToUnionParser::NonUnionToUnionParser(ResolverFactory &factory,
const NodePtr &writer,
const NodePtr &reader,
const CompoundLayout &offsets) : Resolver(),
- offset_(offsets.offset()),
choice_(0),
+ offset_(offsets.offset()),
choiceOffset_(offsets.at(0).offset()),
setFuncOffset_(offsets.at(1).offset()) {
#ifndef NDEBUG
diff --git a/lang/c++/impl/Schema.cc b/lang/c++/impl/Schema.cc
index 42245292e67..8f42b850a09 100644
--- a/lang/c++/impl/Schema.cc
+++ b/lang/c++/impl/Schema.cc
@@ -18,6 +18,7 @@
#include
+#include "CustomAttributes.hh"
#include "Schema.hh"
namespace avro {
@@ -27,11 +28,18 @@ RecordSchema::RecordSchema(const std::string &name) : Schema(new NodeRecord) {
}
void RecordSchema::addField(const std::string &name, const Schema &fieldSchema) {
+ const CustomAttributes emptyCustomAttribute;
+ addField(name, fieldSchema, emptyCustomAttribute);
+}
+
+void RecordSchema::addField(const std::string &name, const Schema &fieldSchema, const CustomAttributes &customFields) {
// add the name first. it will throw if the name is a duplicate, preventing
// the leaf from being added
node_->addName(name);
node_->addLeaf(fieldSchema.root());
+
+ node_->addCustomAttributesForField(customFields);
}
std::string RecordSchema::getDoc() const {
diff --git a/lang/c++/impl/Stream.cc b/lang/c++/impl/Stream.cc
index 63a8b4e8fc5..738b1e40520 100644
--- a/lang/c++/impl/Stream.cc
+++ b/lang/c++/impl/Stream.cc
@@ -117,7 +117,7 @@ class MemoryInputStream2 : public InputStream {
}
};
-class MemoryOutputStream : public OutputStream {
+class MemoryOutputStream final : public OutputStream {
public:
const size_t chunkSize_;
std::vector data_;
@@ -129,7 +129,7 @@ class MemoryOutputStream : public OutputStream {
~MemoryOutputStream() final {
for (std::vector::const_iterator it = data_.begin();
it != data_.end(); ++it) {
- delete[] * it;
+ delete[] *it;
}
}
diff --git a/lang/c++/impl/ValidSchema.cc b/lang/c++/impl/ValidSchema.cc
index 63a3bbee919..d99d7e24198 100644
--- a/lang/c++/impl/ValidSchema.cc
+++ b/lang/c++/impl/ValidSchema.cc
@@ -16,7 +16,6 @@
* limitations under the License.
*/
-#include
#include
#include
#include
@@ -25,7 +24,6 @@
#include "Schema.hh"
#include "ValidSchema.hh"
-using boost::format;
using std::make_pair;
using std::ostringstream;
using std::shared_ptr;
@@ -37,8 +35,7 @@ using SymbolMap = std::map;
static bool validate(const NodePtr &node, SymbolMap &symbolMap) {
if (!node->isValid()) {
- throw Exception(format("Schema is invalid, due to bad node of type %1%")
- % node->type());
+ throw Exception("Schema is invalid, due to bad node of type {}", node->type());
}
if (node->hasName()) {
@@ -51,7 +48,7 @@ static bool validate(const NodePtr &node, SymbolMap &symbolMap) {
if (node->type() == AVRO_SYMBOLIC) {
if (!found) {
- throw Exception(format("Symbolic name \"%1%\" is unknown") % node->name());
+ throw Exception("Symbolic name \"{}\" is unknown", node->name());
}
shared_ptr symNode =
@@ -69,8 +66,8 @@ static bool validate(const NodePtr &node, SymbolMap &symbolMap) {
}
node->lock();
- auto leaves = node->leaves();
- for (auto i = 0; i < leaves; ++i) {
+ size_t leaves = node->leaves();
+ for (size_t i = 0; i < leaves; ++i) {
const NodePtr &leaf(node->leafAt(i));
if (!validate(leaf, symbolMap)) {
diff --git a/lang/c++/impl/Validator.cc b/lang/c++/impl/Validator.cc
index 0e5fd8bedad..c00460480b1 100644
--- a/lang/c++/impl/Validator.cc
+++ b/lang/c++/impl/Validator.cc
@@ -62,7 +62,7 @@ bool Validator::countingSetup() {
compoundStack_.pop_back();
proceed = false;
} else {
- counters_.push_back(static_cast(count_));
+ counters_.push_back(count_);
}
}
@@ -71,14 +71,14 @@ bool Validator::countingSetup() {
void Validator::countingAdvance() {
if (countingSetup()) {
- auto index = (compoundStack_.back().pos)++;
+ size_t index = (compoundStack_.back().pos)++;
const NodePtr &node = compoundStack_.back().node;
if (index < node->leaves()) {
setupOperation(node->leafAt(index));
} else {
compoundStack_.back().pos = 0;
- int count = --counters_.back();
+ size_t count = --counters_.back();
if (count == 0) {
counters_.pop_back();
compoundStarted_ = true;
@@ -100,14 +100,13 @@ void Validator::unionAdvance() {
waitingForCount_ = false;
NodePtr node = compoundStack_.back().node;
- if (count_ < static_cast(node->leaves())) {
+ if (count_ < node->leaves()) {
compoundStack_.pop_back();
setupOperation(node->leafAt(static_cast(count_)));
} else {
throw Exception(
- boost::format("Union selection out of range, got %1%,"
- " expecting 0-%2%")
- % count_ % (node->leaves() - 1));
+ "Union selection out of range, got {}, expecting 0-{}",
+ count_, node->leaves() - 1);
}
}
}
@@ -117,7 +116,7 @@ void Validator::fixedAdvance() {
compoundStack_.pop_back();
}
-int Validator::nextSizeExpected() const {
+size_t Validator::nextSizeExpected() const {
return compoundStack_.back().node->fixedSize();
}
@@ -169,11 +168,9 @@ void Validator::advance() {
}
}
-void Validator::setCount(int64_t count) {
+void Validator::setCount(size_t count) {
if (!waitingForCount_) {
throw Exception("Not expecting count");
- } else if (count_ < 0) {
- throw Exception("Count cannot be negative");
}
count_ = count;
diff --git a/lang/c++/impl/Zigzag.cc b/lang/c++/impl/Zigzag.cc
index 538a89cbaa7..7875f789bd2 100644
--- a/lang/c++/impl/Zigzag.cc
+++ b/lang/c++/impl/Zigzag.cc
@@ -30,11 +30,11 @@ encodeInt64(int64_t input, std::array &output) noexcept {
auto v = val & mask;
size_t bytesOut = 0;
while (val >>= 7) {
- output[bytesOut++] = (v | 0x80);
+ output[bytesOut++] = static_cast(v | 0x80);
v = val & mask;
}
- output[bytesOut++] = v;
+ output[bytesOut++] = static_cast(v);
return bytesOut;
}
size_t
@@ -46,11 +46,11 @@ encodeInt32(int32_t input, std::array &output) noexcept {
auto v = val & mask;
size_t bytesOut = 0;
while (val >>= 7) {
- output[bytesOut++] = (v | 0x80);
+ output[bytesOut++] = static_cast(v | 0x80);
v = val & mask;
}
- output[bytesOut++] = v;
+ output[bytesOut++] = static_cast(v);
return bytesOut;
}
diff --git a/lang/c++/impl/avrogencpp.cc b/lang/c++/impl/avrogencpp.cc
index 0b6b35a2f23..39da7af3539 100644
--- a/lang/c++/impl/avrogencpp.cc
+++ b/lang/c++/impl/avrogencpp.cc
@@ -32,8 +32,6 @@
#include
#include
-#include
-
#include "Compiler.hh"
#include "NodeImpl.hh"
#include "ValidSchema.hh"
@@ -53,12 +51,6 @@ using boost::lexical_cast;
using avro::compileJsonSchema;
using avro::ValidSchema;
-#if __cplusplus >= 201703L
-#define ANY_NS "std"
-#else
-#define ANY_NS "boost"
-#endif
-
struct PendingSetterGetter {
string structName;
string type;
@@ -108,6 +100,7 @@ class CodeGen {
void generateRecordTraits(const NodePtr &n);
void generateUnionTraits(const NodePtr &n);
void emitCopyright();
+ void emitGeneratedWarning();
public:
CodeGen(std::ostream &os, std::string ns,
@@ -117,7 +110,9 @@ class CodeGen {
schemaFile_(std::move(schemaFile)), headerFile_(std::move(headerFile)),
includePrefix_(std::move(includePrefix)), noUnion_(noUnion),
guardString_(std::move(guardString)),
- random_(static_cast(::time(nullptr))) {}
+ random_(static_cast(::time(nullptr))) {
+ }
+
void generate(const ValidSchema &schema);
};
@@ -125,7 +120,7 @@ static string decorate(const std::string &name) {
static const char *cppReservedWords[] = {
"alignas", "alignof", "and", "and_eq", "asm", "auto", "bitand", "bitor", "bool", "break",
"case", "catch", "char", "char8_t", "char16_t", "char32_t", "class", "compl", "concept",
- "const", "consteval", "constexpr", "const_cast", "continue", "co_await", "co_return",
+ "const", "consteval", "constexpr", "constinit", "const_cast", "continue", "co_await", "co_return",
"co_yield", "decltype", "default", "delete", "do", "double", "dynamic_cast", "else",
"enum", "explicit", "export", "extern", "false", "float", "for", "friend", "goto", "if",
"import", "inline", "int", "long", "module", "mutable", "namespace", "new", "noexcept", "not",
@@ -250,6 +245,11 @@ string CodeGen::generateRecordType(const NodePtr &n) {
if (n->leafAt(i)->type() == avro::AVRO_UNION) {
os_ << " typedef " << types[i]
<< ' ' << n->nameAt(i) << "_t;\n";
+ types[i] = n->nameAt(i) + "_t";
+ }
+ if (n->leafAt(i)->type() == avro::AVRO_ARRAY && n->leafAt(i)->leafAt(0)->type() == avro::AVRO_UNION) {
+ os_ << " typedef " << types[i] << "::value_type"
+ << ' ' << n->nameAt(i) << "_item_t;\n";
}
}
}
@@ -257,11 +257,7 @@ string CodeGen::generateRecordType(const NodePtr &n) {
// the nameAt(i) does not take c++ reserved words into account
// so we need to call decorate on it
std::string decoratedNameAt = decorate(n->nameAt(i));
- if (!noUnion_ && n->leafAt(i)->type() == avro::AVRO_UNION) {
- os_ << " " << decoratedNameAt << "_t";
- } else {
- os_ << " " << types[i];
- }
+ os_ << " " << types[i];
os_ << ' ' << decoratedNameAt << ";\n";
}
@@ -275,13 +271,7 @@ string CodeGen::generateRecordType(const NodePtr &n) {
// so we need to call decorate on it
std::string decoratedNameAt = decorate(n->nameAt(i));
os_ << " " << decoratedNameAt << "(";
- if (!noUnion_ && n->leafAt(i)->type() == avro::AVRO_UNION) {
- // the nameAt(i) does not take c++ reserved words into account
- // so we need to call decorate on it
- os_ << decoratedNameAt << "_t";
- } else {
- os_ << types[i];
- }
+ os_ << types[i];
os_ << "())";
if (i != (c - 1)) {
os_ << ',';
@@ -326,9 +316,9 @@ static void generateGetterAndSetter(ostream &os,
os << type << sn << "get_" << name << "() const {\n"
<< " if (idx_ != " << idx << ") {\n"
<< " throw avro::Exception(\"Invalid type for "
- << "union\");\n"
+ << "union " << structName << "\");\n"
<< " }\n"
- << " return " << ANY_NS << "::any_cast<" << type << " >(value_);\n"
+ << " return std::any_cast<" << type << " >(value_);\n"
<< "}\n\n";
os << "inline\n"
@@ -385,7 +375,7 @@ string CodeGen::generateUnionType(const NodePtr &n) {
os_ << "struct " << result << " {\n"
<< "private:\n"
<< " size_t idx_;\n"
- << " " << ANY_NS << "::any value_;\n"
+ << " std::any value_;\n"
<< "public:\n"
<< " size_t idx() const { return idx_; }\n";
@@ -397,7 +387,7 @@ string CodeGen::generateUnionType(const NodePtr &n) {
<< " }\n"
<< " void set_null() {\n"
<< " idx_ = " << i << ";\n"
- << " value_ = " << ANY_NS << "::any();\n"
+ << " value_ = std::any();\n"
<< " }\n";
} else {
const string &type = types[i];
@@ -551,8 +541,22 @@ void CodeGen::generateRecordTraits(const NodePtr &n) {
}
string fn = fullname(decorate(n->name()));
- os_ << "template<> struct codec_traits<" << fn << "> {\n"
- << " static void encode(Encoder& e, const " << fn << "& v) {\n";
+ os_ << "template<> struct codec_traits<" << fn << "> {\n";
+
+ if (c == 0) {
+ os_ << " static void encode(Encoder&, const " << fn << "&) {}\n";
+ // ResolvingDecoder::fieldOrder mutates the state of the decoder, so if that decoder is
+ // passed in, we need to call the method even though it will return an empty vector.
+ os_ << " static void decode(Decoder& d, " << fn << "&) {\n";
+ os_ << " if (avro::ResolvingDecoder *rd = dynamic_cast(&d)) {\n";
+ os_ << " rd->fieldOrder();\n";
+ os_ << " }\n";
+ os_ << " }\n";
+ os_ << "};\n";
+ return;
+ }
+
+ os_ << " static void encode(Encoder& e, const " << fn << "& v) {\n";
for (size_t i = 0; i < c; ++i) {
// the nameAt(i) does not take c++ reserved words into account
@@ -702,17 +706,22 @@ void CodeGen::emitCopyright() {
" * See the License for the specific language governing "
"permissions and\n"
" * limitations under the License.\n"
- " */\n\n\n";
+ " */\n\n";
+}
+
+void CodeGen::emitGeneratedWarning() {
+ os_ << "/* This code was generated by avrogencpp " << AVRO_VERSION << ". Do not edit.*/\n\n";
}
string CodeGen::guard() {
string h = headerFile_;
makeCanonical(h, true);
- return h + "_" + lexical_cast(random_()) + "__H_";
+ return h + "_" + lexical_cast(random_()) + "_H";
}
void CodeGen::generate(const ValidSchema &schema) {
emitCopyright();
+ emitGeneratedWarning();
string h = guardString_.empty() ? guard() : guardString_;
@@ -720,24 +729,14 @@ void CodeGen::generate(const ValidSchema &schema) {
os_ << "#define " << h << "\n\n\n";
os_ << "#include \n"
-#if __cplusplus >= 201703L
<< "#include \n"
-#else
- << "#include \"boost/any.hpp\"\n"
-#endif
<< "#include \"" << includePrefix_ << "Specific.hh\"\n"
<< "#include \"" << includePrefix_ << "Encoder.hh\"\n"
<< "#include \"" << includePrefix_ << "Decoder.hh\"\n"
<< "\n";
- vector nsVector;
if (!ns_.empty()) {
- boost::algorithm::split_regex(nsVector, ns_, boost::regex("::"));
- for (vector::const_iterator it =
- nsVector.begin();
- it != nsVector.end(); ++it) {
- os_ << "namespace " << *it << " {\n";
- }
+ os_ << "namespace " << ns_ << " {\n";
inNamespace_ = true;
}
@@ -760,11 +759,7 @@ void CodeGen::generate(const ValidSchema &schema) {
if (!ns_.empty()) {
inNamespace_ = false;
- for (vector::const_iterator it =
- nsVector.begin();
- it != nsVector.end(); ++it) {
- os_ << "}\n";
- }
+ os_ << "}\n";
}
os_ << "namespace avro {\n";
@@ -810,14 +805,32 @@ int main(int argc, char **argv) {
const string NO_UNION_TYPEDEF("no-union-typedef");
po::options_description desc("Allowed options");
- desc.add_options()("help,h", "produce help message")("include-prefix,p", po::value()->default_value("avro"),
- "prefix for include headers, - for none, default: avro")("no-union-typedef,U", "do not generate typedefs for unions in records")("namespace,n", po::value(), "set namespace for generated code")("input,i", po::value(), "input file")("output,o", po::value(), "output file to generate");
+ // clang-format off
+ desc.add_options()
+ ("help,h", "produce help message")
+ ("version,V", "produce version information")
+ ("include-prefix,p", po::value()->default_value("avro"), "prefix for include headers, - for none, default: avro")
+ ("no-union-typedef,U", "do not generate typedefs for unions in records")
+ ("namespace,n", po::value(), "set namespace for generated code")
+ ("input,i", po::value(), "input file")
+ ("output,o", po::value(), "output file to generate");
+ // clang-format on
po::variables_map vm;
po::store(po::parse_command_line(argc, argv, desc), vm);
po::notify(vm);
- if (vm.count("help") || vm.count(IN_FILE) == 0 || vm.count(OUT_FILE) == 0) {
+ if (vm.count("help")) {
+ std::cout << desc << std::endl;
+ return 0;
+ }
+
+ if (vm.count("version")) {
+ std::cout << AVRO_VERSION << std::endl;
+ return 0;
+ }
+
+ if (vm.count(IN_FILE) == 0 || vm.count(OUT_FILE) == 0) {
std::cout << desc << std::endl;
return 1;
}
@@ -827,6 +840,7 @@ int main(int argc, char **argv) {
string inf = vm.count(IN_FILE) > 0 ? vm[IN_FILE].as() : string();
string incPrefix = vm[INCLUDE_PREFIX].as();
bool noUnion = vm.count(NO_UNION_TYPEDEF) != 0;
+
if (incPrefix == "-") {
incPrefix.clear();
} else if (*incPrefix.rbegin() != '/') {
diff --git a/lang/c++/impl/json/JsonDom.cc b/lang/c++/impl/json/JsonDom.cc
index 5bffda2559c..c2696d827ad 100644
--- a/lang/c++/impl/json/JsonDom.cc
+++ b/lang/c++/impl/json/JsonDom.cc
@@ -25,9 +25,6 @@
#include "JsonIO.hh"
#include "Stream.hh"
-using boost::format;
-using std::string;
-
namespace avro {
namespace json {
const char *typeToString(EntityType t) {
@@ -142,8 +139,7 @@ void writeEntity(JsonGenerator &g, const Entity &n) {
void Entity::ensureType(EntityType type) const {
if (type_ != type) {
- format msg = format("Invalid type. Expected \"%1%\" actual %2%") % typeToString(type) % typeToString(type_);
- throw Exception(msg);
+ throw Exception("Invalid type. Expected \"{}\" actual {}", typeToString(type), typeToString(type_));
}
}
diff --git a/lang/c++/impl/json/JsonDom.hh b/lang/c++/impl/json/JsonDom.hh
index 3fb5670b70b..2a0695adff6 100644
--- a/lang/c++/impl/json/JsonDom.hh
+++ b/lang/c++/impl/json/JsonDom.hh
@@ -76,22 +76,22 @@ public:
explicit Entity(size_t line = 0) : type_(EntityType::Null), line_(line) {}
// Not explicit because do want implicit conversion
// NOLINTNEXTLINE(google-explicit-constructor)
- Entity(Bool v, size_t line = 0) : type_(EntityType::Bool), value_(v), line_(line) {}
+ explicit Entity(Bool v, size_t line = 0) : type_(EntityType::Bool), value_(v), line_(line) {}
// Not explicit because do want implicit conversion
// NOLINTNEXTLINE(google-explicit-constructor)
- Entity(Long v, size_t line = 0) : type_(EntityType::Long), value_(v), line_(line) {}
+ explicit Entity(Long v, size_t line = 0) : type_(EntityType::Long), value_(v), line_(line) {}
// Not explicit because do want implicit conversion
// NOLINTNEXTLINE(google-explicit-constructor)
- Entity(Double v, size_t line = 0) : type_(EntityType::Double), value_(v), line_(line) {}
+ explicit Entity(Double v, size_t line = 0) : type_(EntityType::Double), value_(v), line_(line) {}
// Not explicit because do want implicit conversion
// NOLINTNEXTLINE(google-explicit-constructor)
- Entity(const std::shared_ptr &v, size_t line = 0) : type_(EntityType::String), value_(v), line_(line) {}
+ explicit Entity(const std::shared_ptr &v, size_t line = 0) : type_(EntityType::String), value_(v), line_(line) {}
// Not explicit because do want implicit conversion
// NOLINTNEXTLINE(google-explicit-constructor)
- Entity(const std::shared_ptr &v, size_t line = 0) : type_(EntityType::Arr), value_(v), line_(line) {}
+ explicit Entity(const std::shared_ptr &v, size_t line = 0) : type_(EntityType::Arr), value_(v), line_(line) {}
// Not explicit because do want implicit conversion
// NOLINTNEXTLINE(google-explicit-constructor)
- Entity(const std::shared_ptr &v, size_t line = 0) : type_(EntityType::Obj), value_(v), line_(line) {}
+ explicit Entity(const std::shared_ptr