diff --git a/.github/fork_workflows/fork_pr_integration_tests_aws.yml b/.github/fork_workflows/fork_pr_integration_tests_aws.yml
new file mode 100644
index 0000000000..ef53fc1c7d
--- /dev/null
+++ b/.github/fork_workflows/fork_pr_integration_tests_aws.yml
@@ -0,0 +1,159 @@
+name: fork-pr-integration-tests-aws
+
+on: [pull_request]
+
+jobs:
+ build-docker-image:
+ if: github.repository == 'your github repo' # swap here with your project id
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v3
+ with:
+ # pull_request_target runs the workflow in the context of the base repo
+ # as such actions/checkout needs to be explicit configured to retrieve
+ # code from the PR.
+ ref: refs/pull/${{ github.event.pull_request.number }}/merge
+ submodules: recursive
+ - name: Set up QEMU
+ uses: docker/setup-qemu-action@v1
+ - name: Set up Docker Buildx
+ uses: docker/setup-buildx-action@v2
+ with:
+ install: true
+ - name: Set up AWS SDK
+ uses: aws-actions/configure-aws-credentials@v1
+ with:
+ aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
+ aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+ aws-region: us-west-2
+ - name: Login to Amazon ECR
+ id: login-ecr
+ uses: aws-actions/amazon-ecr-login@v1
+ - name: Set ECR image tag
+ id: image-tag
+ run: echo "::set-output name=DOCKER_IMAGE_TAG::`git rev-parse HEAD`"
+ - name: Cache Public ECR Image
+ id: lambda_python_3_9
+ uses: actions/cache@v2
+ with:
+ path: ~/cache
+ key: lambda_python_3_9
+ - name: Handle Cache Miss (pull public ECR image & save it to tar file)
+ if: steps.cache-primes.outputs.cache-hit != 'true'
+ run: |
+ mkdir -p ~/cache
+ docker pull public.ecr.aws/lambda/python:3.9
+ docker save public.ecr.aws/lambda/python:3.9 -o ~/cache/lambda_python_3_9.tar
+ - name: Handle Cache Hit (load docker image from tar file)
+ if: steps.cache-primes.outputs.cache-hit == 'true'
+ run: |
+ docker load -i ~/cache/lambda_python_3_9.tar
+ - name: Build and push
+ env:
+ ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }}
+ ECR_REPOSITORY: feast-python-server
+ run: |
+ docker build \
+ --file sdk/python/feast/infra/feature_servers/aws_lambda/Dockerfile \
+ --tag $ECR_REGISTRY/$ECR_REPOSITORY:${{ steps.image-tag.outputs.DOCKER_IMAGE_TAG }} \
+ --load \
+ .
+ docker push $ECR_REGISTRY/$ECR_REPOSITORY:${{ steps.image-tag.outputs.DOCKER_IMAGE_TAG }}
+ outputs:
+ DOCKER_IMAGE_TAG: ${{ steps.image-tag.outputs.DOCKER_IMAGE_TAG }}
+ integration-test-python:
+ if: github.repository == 'your github repo' # swap here with your project id
+ runs-on: ${{ matrix.os }}
+ strategy:
+ fail-fast: false
+ matrix:
+ python-version: [ "3.8" ]
+ os: [ ubuntu-latest ]
+ env:
+ OS: ${{ matrix.os }}
+ PYTHON: ${{ matrix.python-version }}
+ services:
+ redis:
+ image: redis
+ ports:
+ - 6379:6379
+ options: >-
+ --health-cmd "redis-cli ping"
+ --health-interval 10s
+ --health-timeout 5s
+ --health-retries 5
+ steps:
+ - uses: actions/checkout@v2
+ with:
+ # pull_request_target runs the workflow in the context of the base repo
+ # as such actions/checkout needs to be explicit configured to retrieve
+ # code from the PR.
+ ref: refs/pull/${{ github.event.pull_request.number }}/merge
+ submodules: recursive
+ - name: Setup Python
+ uses: actions/setup-python@v2
+ id: setup-python
+ with:
+ python-version: ${{ matrix.python-version }}
+ architecture: x64
+ - name: Setup Go
+ id: setup-go
+ uses: actions/setup-go@v2
+ with:
+ go-version: 1.18.0
+ - name: Set up AWS SDK
+ uses: aws-actions/configure-aws-credentials@v1
+ with:
+ aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
+ aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+ aws-region: us-west-2
+ - name: Use AWS CLI
+ run: aws sts get-caller-identity
+ - name: Upgrade pip version
+ run: |
+ pip install --upgrade "pip>=21.3.1,<22.1"
+ - name: Get pip cache dir
+ id: pip-cache
+ run: |
+ echo "::set-output name=dir::$(pip cache dir)"
+ - name: pip cache
+ uses: actions/cache@v2
+ with:
+ path: |
+ ${{ steps.pip-cache.outputs.dir }}
+ /opt/hostedtoolcache/Python
+ /Users/runner/hostedtoolcache/Python
+ key: ${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-pip-${{ hashFiles(format('**/py{0}-ci-requirements.txt', env.PYTHON)) }}
+ restore-keys: |
+ ${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-pip-
+ - name: Install pip-tools
+ run: pip install pip-tools
+ - name: Install apache-arrow on ubuntu
+ if: matrix.os == 'ubuntu-latest'
+ run: |
+ sudo apt update
+ sudo apt install -y -V ca-certificates lsb-release wget
+ wget https://apache.jfrog.io/artifactory/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb
+ sudo apt install -y -V ./apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb
+ sudo apt update
+ sudo apt install -y -V libarrow-dev
+ - name: Install apache-arrow on macos
+ if: matrix.os == 'macOS-latest'
+ run: brew install apache-arrow
+ - name: Install dependencies
+ run: make install-python-ci-dependencies
+ - name: Setup Redis Cluster
+ run: |
+ docker pull vishnunair/docker-redis-cluster:latest
+ docker run -d -p 6001:6379 -p 6002:6380 -p 6003:6381 -p 6004:6382 -p 6005:6383 -p 6006:6384 --name redis-cluster vishnunair/docker-redis-cluster
+ - name: Test python
+ if: ${{ always() }} # this will guarantee that step won't be canceled and resources won't leak
+ env:
+ FEAST_SERVER_DOCKER_IMAGE_TAG: ${{ needs.build-docker-image.outputs.DOCKER_IMAGE_TAG }}
+ run: |
+ pytest -n 8 --cov=./ --cov-report=xml --color=yes sdk/python/tests --integration --durations=5 --timeout=1200 --timeout_method=thread -k "aws and not Snowflake and not BigQuery"
+ pytest -n 8 --cov=./ --cov-report=xml --color=yes sdk/python/tests --integration --durations=5 --timeout=1200 --timeout_method=thread -k "File and not Snowflake and not BigQuery"
+ pytest -n 8 --cov=./ --cov-report=xml --color=yes sdk/python/tests --integration --durations=5 --timeout=1200 --timeout_method=thread -k "dynamo and not Snowflake and not BigQuery"
+ pytest -n 8 --cov=./ --cov-report=xml --color=yes sdk/python/tests --integration --durations=5 --timeout=1200 --timeout_method=thread -k "Redshift and not Snowflake and not BigQuery"
+
+
diff --git a/.github/fork_workflows/fork_pr_integration_tests_gcp.yml b/.github/fork_workflows/fork_pr_integration_tests_gcp.yml
new file mode 100644
index 0000000000..d53aef0155
--- /dev/null
+++ b/.github/fork_workflows/fork_pr_integration_tests_gcp.yml
@@ -0,0 +1,97 @@
+name: fork-pr-integration-tests-gcp
+
+on: [pull_request]
+
+jobs:
+ integration-test-python:
+ if: github.repository == 'your github repo' # swap here with your project id
+ runs-on: ${{ matrix.os }}
+ strategy:
+ fail-fast: false
+ matrix:
+ python-version: [ "3.8" ]
+ os: [ ubuntu-latest ]
+ env:
+ OS: ${{ matrix.os }}
+ PYTHON: ${{ matrix.python-version }}
+ services:
+ redis:
+ image: redis
+ ports:
+ - 6379:6379
+ options: >-
+ --health-cmd "redis-cli ping"
+ --health-interval 10s
+ --health-timeout 5s
+ --health-retries 5
+ steps:
+ - uses: actions/checkout@v2
+ with:
+ # pull_request_target runs the workflow in the context of the base repo
+ # as such actions/checkout needs to be explicit configured to retrieve
+ # code from the PR.
+ ref: refs/pull/${{ github.event.pull_request.number }}/merge
+ submodules: recursive
+ - name: Setup Python
+ uses: actions/setup-python@v2
+ id: setup-python
+ with:
+ python-version: ${{ matrix.python-version }}
+ architecture: x64
+ - name: Setup Go
+ id: setup-go
+ uses: actions/setup-go@v2
+ with:
+ go-version: 1.18.0
+ - name: Set up gcloud SDK
+ uses: google-github-actions/setup-gcloud@v0
+ with:
+ project_id: ${{ secrets.GCP_PROJECT_ID }}
+ service_account_key: ${{ secrets.GCP_SA_KEY }}
+ export_default_credentials: true
+ - name: Use gcloud CLI
+ run: gcloud info
+ - name: Upgrade pip version
+ run: |
+ pip install --upgrade "pip>=21.3.1,<22.1"
+ - name: Get pip cache dir
+ id: pip-cache
+ run: |
+ echo "::set-output name=dir::$(pip cache dir)"
+ - name: pip cache
+ uses: actions/cache@v2
+ with:
+ path: |
+ ${{ steps.pip-cache.outputs.dir }}
+ /opt/hostedtoolcache/Python
+ /Users/runner/hostedtoolcache/Python
+ key: ${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-pip-${{ hashFiles(format('**/py{0}-ci-requirements.txt', env.PYTHON)) }}
+ restore-keys: |
+ ${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-pip-
+ - name: Install pip-tools
+ run: pip install pip-tools
+ - name: Install apache-arrow on ubuntu
+ if: matrix.os == 'ubuntu-latest'
+ run: |
+ sudo apt update
+ sudo apt install -y -V ca-certificates lsb-release wget
+ wget https://apache.jfrog.io/artifactory/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb
+ sudo apt install -y -V ./apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb
+ sudo apt update
+ sudo apt install -y -V libarrow-dev
+ - name: Install apache-arrow on macos
+ if: matrix.os == 'macOS-latest'
+ run: brew install apache-arrow
+ - name: Install dependencies
+ run: make install-python-ci-dependencies
+ - name: Setup Redis Cluster
+ run: |
+ docker pull vishnunair/docker-redis-cluster:latest
+ docker run -d -p 6001:6379 -p 6002:6380 -p 6003:6381 -p 6004:6382 -p 6005:6383 -p 6006:6384 --name redis-cluster vishnunair/docker-redis-cluster
+ - name: Test python
+ if: ${{ always() }} # this will guarantee that step won't be canceled and resources won't leak
+ # Run only BigQuery and File tests without dynamo and redshift tests.
+ run: |
+ pytest -n 8 --cov=./ --cov-report=xml --color=yes sdk/python/tests --integration --durations=5 --timeout=1200 --timeout_method=thread -k "BigQuery and not dynamo and not Redshift and not Snowflake"
+ pytest -n 8 --cov=./ --cov-report=xml --color=yes sdk/python/tests --integration --durations=5 --timeout=1200 --timeout_method=thread -k "File and not dynamo and not Redshift and not Snowflake"
+
diff --git a/.github/fork_workflows/fork_pr_integration_tests_snowflake.yml b/.github/fork_workflows/fork_pr_integration_tests_snowflake.yml
new file mode 100644
index 0000000000..8832c75fca
--- /dev/null
+++ b/.github/fork_workflows/fork_pr_integration_tests_snowflake.yml
@@ -0,0 +1,96 @@
+name: fork-pr-integration-tests-snowflake
+
+on: [pull_request]
+
+jobs:
+ integration-test-python:
+ if: github.repository == 'your github repo' # swap here with your project id
+ runs-on: ${{ matrix.os }}
+ strategy:
+ fail-fast: false
+ matrix:
+ python-version: [ "3.8" ]
+ os: [ ubuntu-latest ]
+ env:
+ OS: ${{ matrix.os }}
+ PYTHON: ${{ matrix.python-version }}
+ services:
+ redis:
+ image: redis
+ ports:
+ - 6379:6379
+ options: >-
+ --health-cmd "redis-cli ping"
+ --health-interval 10s
+ --health-timeout 5s
+ --health-retries 5
+ steps:
+ - uses: actions/checkout@v2
+ with:
+ # pull_request_target runs the workflow in the context of the base repo
+ # as such actions/checkout needs to be explicit configured to retrieve
+ # code from the PR.
+ ref: refs/pull/${{ github.event.pull_request.number }}/merge
+ submodules: recursive
+ - name: Setup Python
+ uses: actions/setup-python@v2
+ id: setup-python
+ with:
+ python-version: ${{ matrix.python-version }}
+ architecture: x64
+ - name: Setup Go
+ id: setup-go
+ uses: actions/setup-go@v2
+ with:
+ go-version: 1.18.0
+
+ - name: Upgrade pip version
+ run: |
+ pip install --upgrade "pip>=21.3.1,<22.1"
+ - name: Get pip cache dir
+ id: pip-cache
+ run: |
+ echo "::set-output name=dir::$(pip cache dir)"
+ - name: pip cache
+ uses: actions/cache@v2
+ with:
+ path: |
+ ${{ steps.pip-cache.outputs.dir }}
+ /opt/hostedtoolcache/Python
+ /Users/runner/hostedtoolcache/Python
+ key: ${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-pip-${{ hashFiles(format('**/py{0}-ci-requirements.txt', env.PYTHON)) }}
+ restore-keys: |
+ ${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-pip-
+ - name: Install pip-tools
+ run: pip install pip-tools
+ - name: Install apache-arrow on ubuntu
+ if: matrix.os == 'ubuntu-latest'
+ run: |
+ sudo apt update
+ sudo apt install -y -V ca-certificates lsb-release wget
+ wget https://apache.jfrog.io/artifactory/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb
+ sudo apt install -y -V ./apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb
+ sudo apt update
+ sudo apt install -y -V libarrow-dev
+ - name: Install apache-arrow on macos
+ if: matrix.os == 'macOS-latest'
+ run: brew install apache-arrow
+ - name: Install dependencies
+ run: make install-python-ci-dependencies
+ - name: Setup Redis Cluster
+ run: |
+ docker pull vishnunair/docker-redis-cluster:latest
+ docker run -d -p 6001:6379 -p 6002:6380 -p 6003:6381 -p 6004:6382 -p 6005:6383 -p 6006:6384 --name redis-cluster vishnunair/docker-redis-cluster
+ - name: Test python
+ if: ${{ always() }} # this will guarantee that step won't be canceled and resources won't leak
+ env:
+ SNOWFLAKE_CI_DEPLOYMENT: ${{ secrets.SNOWFLAKE_CI_DEPLOYMENT }}
+ SNOWFLAKE_CI_USER: ${{ secrets.SNOWFLAKE_CI_USER }}
+ SNOWFLAKE_CI_PASSWORD: ${{ secrets.SNOWFLAKE_CI_PASSWORD }}
+ SNOWFLAKE_CI_ROLE: ${{ secrets.SNOWFLAKE_CI_ROLE }}
+ SNOWFLAKE_CI_WAREHOUSE: ${{ secrets.SNOWFLAKE_CI_WAREHOUSE }}
+ # Run only Snowflake BigQuery and File tests without dynamo and redshift tests.
+ run: |
+ pytest -n 8 --cov=./ --cov-report=xml --color=yes sdk/python/tests --integration --durations=5 --timeout=1200 --timeout_method=thread -k "Snowflake and not dynamo and not Redshift and not Bigquery and not gcp"
+ pytest -n 8 --cov=./ --cov-report=xml --color=yes sdk/python/tests --integration --durations=5 --timeout=1200 --timeout_method=thread -k "File and not dynamo and not Redshift and not Bigquery and not gcp"
+
diff --git a/.github/workflows/java_master_only.yml b/.github/workflows/java_master_only.yml
index fc2bb52387..f5297615f6 100644
--- a/.github/workflows/java_master_only.yml
+++ b/.github/workflows/java_master_only.yml
@@ -9,6 +9,7 @@ on:
jobs:
build-docker-images:
+ if: github.repository == 'feast-dev/feast'
runs-on: ubuntu-latest
strategy:
matrix:
@@ -46,6 +47,7 @@ jobs:
fi
lint-java:
+ if: github.repository == 'feast-dev/feast'
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
@@ -55,6 +57,7 @@ jobs:
run: make lint-java
unit-test-java:
+ if: github.repository == 'feast-dev/feast'
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
@@ -80,6 +83,7 @@ jobs:
path: ${{ github.workspace }}/docs/coverage/java/target/site/jacoco-aggregate/
integration-test:
+ if: github.repository == 'feast-dev/feast'
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
diff --git a/.github/workflows/java_pr.yml b/.github/workflows/java_pr.yml
index 39593f02ce..328a8e7c7b 100644
--- a/.github/workflows/java_pr.yml
+++ b/.github/workflows/java_pr.yml
@@ -9,6 +9,7 @@ on:
jobs:
lint-java:
+ if: github.repository == 'feast-dev/feast'
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
@@ -22,6 +23,7 @@ jobs:
run: make lint-java
unit-test-java:
+ if: github.repository == 'feast-dev/feast'
runs-on: ubuntu-latest
needs: lint-java
steps:
@@ -51,11 +53,40 @@ jobs:
name: java-coverage-report
path: ${{ github.workspace }}/docs/coverage/java/target/site/jacoco-aggregate/
- integration-test:
+ build-docker-image-java:
+ if: github.repository == 'feast-dev/feast'
+ runs-on: ubuntu-latest
+ strategy:
+ matrix:
+ component: [ feature-server-java ]
+ env:
+ MAVEN_CACHE: gs://feast-templocation-kf-feast/.m2.2020-08-19.tar
+ REGISTRY: gcr.io/kf-feast
+ steps:
+ - uses: actions/checkout@v2
+ with:
+ submodules: 'true'
+ - uses: google-github-actions/setup-gcloud@v0
+ with:
+ version: '290.0.1'
+ export_default_credentials: true
+ project_id: ${{ secrets.GCP_PROJECT_ID }}
+ service_account_key: ${{ secrets.GCP_SA_KEY }}
+ - run: gcloud auth configure-docker --quiet
+ - name: Get m2 cache
+ run: |
+ infra/scripts/download-maven-cache.sh \
+ --archive-uri ${MAVEN_CACHE} \
+ --output-dir .
+ - name: Build image
+ run: make build-${{ matrix.component }}-docker REGISTRY=${REGISTRY} VERSION=${GITHUB_SHA}
+
+ integration-test-java-pr:
# all jobs MUST have this if check for 'ok-to-test' or 'approved' for security purposes.
if:
- (github.event.action == 'labeled' && (github.event.label.name == 'approved' || github.event.label.name == 'ok-to-test')) ||
- (github.event.action != 'labeled' && (contains(github.event.pull_request.labels.*.name, 'ok-to-test') || contains(github.event.pull_request.labels.*.name, 'approved')))
+ ((github.event.action == 'labeled' && (github.event.label.name == 'approved' || github.event.label.name == 'ok-to-test')) ||
+ (github.event.action != 'labeled' && (contains(github.event.pull_request.labels.*.name, 'ok-to-test') || contains(github.event.pull_request.labels.*.name, 'approved')))) &&
+ github.repository == 'feast-dev/feast'
runs-on: ubuntu-latest
needs: unit-test-java
steps:
diff --git a/.github/workflows/lint_pr.yml b/.github/workflows/lint_pr.yml
index 40c3dead00..f9af8b27c7 100644
--- a/.github/workflows/lint_pr.yml
+++ b/.github/workflows/lint_pr.yml
@@ -9,6 +9,7 @@ on:
jobs:
validate-title:
+ if: github.repository == 'feast-dev/feast'
name: Validate PR title
runs-on: ubuntu-latest
steps:
diff --git a/.github/workflows/master_only.yml b/.github/workflows/master_only.yml
index c9ebcdaf04..51e3830fe6 100644
--- a/.github/workflows/master_only.yml
+++ b/.github/workflows/master_only.yml
@@ -7,6 +7,7 @@ on:
jobs:
build-lambda-docker-image:
+ if: github.repository == 'feast-dev/feast'
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
@@ -58,6 +59,7 @@ jobs:
outputs:
DOCKER_IMAGE_TAG: ${{ steps.image-tag.outputs.DOCKER_IMAGE_TAG }}
integration-test-python-and-go:
+ if: github.repository == 'feast-dev/feast'
needs: build-lambda-docker-image
runs-on: ${{ matrix.os }}
strategy:
@@ -180,6 +182,7 @@ jobs:
run: aws s3 cp --recursive .benchmarks s3://feast-ci-pytest-benchmarks
build-all-docker-images:
+ if: github.repository == 'feast-dev/feast'
runs-on: ubuntu-latest
strategy:
matrix:
diff --git a/.github/workflows/nightly-ci.yml b/.github/workflows/nightly-ci.yml
index fead512408..0c2ba6a66a 100644
--- a/.github/workflows/nightly-ci.yml
+++ b/.github/workflows/nightly-ci.yml
@@ -11,6 +11,7 @@ on:
jobs:
check_date:
+ if: github.repository == 'feast-dev/feast'
runs-on: ubuntu-latest
name: Check latest commit
outputs:
@@ -24,6 +25,7 @@ jobs:
if: ${{ github.event_name == 'schedule' }}
run: echo '::set-output name=WAS_EDITED::'$(test -n "$(git log --format=%H --since='24 hours ago')" && echo 'true' || echo 'false')
build-docker-image:
+ if: github.repository == 'feast-dev/feast'
needs: [check_date]
runs-on: ubuntu-latest
steps:
@@ -79,6 +81,7 @@ jobs:
outputs:
DOCKER_IMAGE_TAG: ${{ steps.image-tag.outputs.DOCKER_IMAGE_TAG }}
integration-test-python:
+ if: github.repository == 'feast-dev/feast'
needs: [check_date, build-docker-image]
runs-on: ${{ matrix.os }}
strategy:
diff --git a/.github/workflows/pr_integration_tests.yml b/.github/workflows/pr_integration_tests.yml
index 58bf45c687..ab8a79760f 100644
--- a/.github/workflows/pr_integration_tests.yml
+++ b/.github/workflows/pr_integration_tests.yml
@@ -16,8 +16,9 @@ jobs:
build-docker-image:
# all jobs MUST have this if check for 'ok-to-test' or 'approved' for security purposes.
if:
- (github.event.action == 'labeled' && (github.event.label.name == 'approved' || github.event.label.name == 'lgtm' || github.event.label.name == 'ok-to-test')) ||
- (github.event.action != 'labeled' && (contains(github.event.pull_request.labels.*.name, 'ok-to-test') || contains(github.event.pull_request.labels.*.name, 'approved') || contains(github.event.pull_request.labels.*.name, 'lgtm')))
+ ((github.event.action == 'labeled' && (github.event.label.name == 'approved' || github.event.label.name == 'lgtm' || github.event.label.name == 'ok-to-test')) ||
+ (github.event.action != 'labeled' && (contains(github.event.pull_request.labels.*.name, 'ok-to-test') || contains(github.event.pull_request.labels.*.name, 'approved') || contains(github.event.pull_request.labels.*.name, 'lgtm')))) &&
+ github.repository == 'feast-dev/feast'
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
@@ -77,8 +78,9 @@ jobs:
integration-test-python:
# all jobs MUST have this if check for 'ok-to-test' or 'approved' for security purposes.
if:
- (github.event.action == 'labeled' && (github.event.label.name == 'approved' || github.event.label.name == 'lgtm' || github.event.label.name == 'ok-to-test')) ||
- (github.event.action != 'labeled' && (contains(github.event.pull_request.labels.*.name, 'ok-to-test') || contains(github.event.pull_request.labels.*.name, 'approved') || contains(github.event.pull_request.labels.*.name, 'lgtm')))
+ ((github.event.action == 'labeled' && (github.event.label.name == 'approved' || github.event.label.name == 'lgtm' || github.event.label.name == 'ok-to-test')) ||
+ (github.event.action != 'labeled' && (contains(github.event.pull_request.labels.*.name, 'ok-to-test') || contains(github.event.pull_request.labels.*.name, 'approved') || contains(github.event.pull_request.labels.*.name, 'lgtm')))) &&
+ github.repository == 'feast-dev/feast'
needs: build-docker-image
runs-on: ${{ matrix.os }}
strategy:
diff --git a/.github/workflows/pr_local_integration_tests.yml b/.github/workflows/pr_local_integration_tests.yml
index d4db8a3a7c..2a012c323b 100644
--- a/.github/workflows/pr_local_integration_tests.yml
+++ b/.github/workflows/pr_local_integration_tests.yml
@@ -12,9 +12,10 @@ jobs:
integration-test-python-local:
# all jobs MUST have this if check for 'ok-to-test' or 'approved' for security purposes.
if:
- (github.event.action == 'labeled' && (github.event.label.name == 'approved' || github.event.label.name == 'lgtm' || github.event.label.name == 'ok-to-test')) ||
- (github.event.action != 'labeled' && (contains(github.event.pull_request.labels.*.name, 'ok-to-test') || contains(github.event.pull_request.labels.*.name, 'approved') || contains(github.event.pull_request.labels.*.name, 'lgtm')))
- runs-on: ${{ matrix.os }}
+ ((github.event.action == 'labeled' && (github.event.label.name == 'approved' || github.event.label.name == 'lgtm' || github.event.label.name == 'ok-to-test')) ||
+ (github.event.action != 'labeled' && (contains(github.event.pull_request.labels.*.name, 'ok-to-test') || contains(github.event.pull_request.labels.*.name, 'approved') || contains(github.event.pull_request.labels.*.name, 'lgtm')))) ||
+ github.repository != 'feast-dev/feast'
+ runs-on: ${{ matrix.os }}p
strategy:
fail-fast: false
matrix:
@@ -67,14 +68,11 @@ jobs:
sudo apt install -y -V libarrow-dev
- name: Install dependencies
run: make install-python-ci-dependencies
- - name: Set up gcloud SDK # TODO(adchia): remove this dependency
- uses: google-github-actions/setup-gcloud@v0
- with:
- project_id: ${{ secrets.GCP_PROJECT_ID }}
- service_account_key: ${{ secrets.GCP_SA_KEY }}
- export_default_credentials: true
- - name: Use gcloud CLI
- run: gcloud info
- name: Test local integration tests
if: ${{ always() }} # this will guarantee that step won't be canceled and resources won't leak
- run: make test-python-integration-local
+ env:
+ FEAST_USAGE: "False"
+ IS_TEST: "True"
+ FEAST_LOCAL_ONLINE_CONTAINER: "True"
+ FEAST_IS_LOCAL_TEST: "True"
+ run: pytest -n 8 --cov=./ --cov-report=xml --color=yes --integration -k "not gcs_registry and not s3_registry and not test_lambda_materialization" sdk/python/tests
diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
index 184fdb3cb6..9eb561263c 100644
--- a/.github/workflows/publish.yml
+++ b/.github/workflows/publish.yml
@@ -7,6 +7,7 @@ on:
jobs:
get-version:
+ if: github.repository == 'feast-dev/feast'
runs-on: ubuntu-latest
outputs:
release_version: ${{ steps.get_release_version.outputs.release_version }}
@@ -100,6 +101,7 @@ jobs:
fi
publish-helm-charts:
+ if: github.repository == 'feast-dev/feast'
runs-on: ubuntu-latest
needs: get-version
env:
@@ -129,6 +131,7 @@ jobs:
uses: ./.github/workflows/build_wheels.yml
publish-python-sdk:
+ if: github.repository == 'feast-dev/feast'
runs-on: ubuntu-latest
needs: [build_wheels]
steps:
@@ -142,6 +145,7 @@ jobs:
password: ${{ secrets.PYPI_PASSWORD }}
publish-java-sdk:
+ if: github.repository == 'feast-dev/feast'
container: maven:3.6-jdk-11
runs-on: ubuntu-latest
needs: get-version
@@ -177,23 +181,3 @@ jobs:
mkdir -p /root/.m2/
echo -n "$MAVEN_SETTINGS" > /root/.m2/settings.xml
infra/scripts/publish-java-sdk.sh --revision ${VERSION_WITHOUT_PREFIX} --gpg-key-import-dir /root
-
- publish-web-ui-npm:
- runs-on: ubuntu-latest
- steps:
- - uses: actions/checkout@v2
- - uses: actions/setup-node@v2
- with:
- node-version: '17.x'
- registry-url: 'https://registry.npmjs.org'
- - name: Install yarn dependencies
- working-directory: ./ui
- run: yarn install
- - name: Build yarn rollup
- working-directory: ./ui
- run: yarn build:lib
- - name: Publish UI package
- working-directory: ./ui
- run: npm publish
- env:
- NODE_AUTH_TOKEN: ${{secrets.NPM_TOKEN}}
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 2f4d15590a..9fcbc1e052 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -15,6 +15,63 @@ on:
type: string
jobs:
+
+ get_dry_release_versions:
+ runs-on: ubuntu-latest
+ env:
+ GITHUB_TOKEN: ${{ github.event.inputs.token }}
+ outputs:
+ current_version: ${{ steps.get_versions.outputs.current_version }}
+ next_version: ${{ steps.get_versions.outputs.next_version }}
+ steps:
+ - name: Checkout
+ uses: actions/checkout@v2
+ with:
+ persist-credentials: false
+ - name: Setup Node.js
+ uses: actions/setup-node@v2
+ with:
+ node-version: '16'
+ - name: Release (Dry Run)
+ id: get_versions
+ run: |
+ CURRENT_VERSION=$(npx -p @semantic-release/changelog -p @semantic-release/git -p @semantic-release/exec -p semantic-release semantic-release --dry-run | grep "associated with version " | sed -E 's/.* version//' | sed -E 's/ on.*//')
+ NEXT_VERSION=$(npx -p @semantic-release/changelog -p @semantic-release/git -p @semantic-release/exec -p semantic-release semantic-release --dry-run | grep 'The next release version is' | sed -E 's/.* ([[:digit:].]+)$/\1/')
+ echo ::set-output name=current_version::$CURRENT_VERSION
+ echo ::set-output name=next_version::$NEXT_VERSION
+ echo "Current version is ${CURRENT_VERSION}"
+ echo "Next version is ${NEXT_VERSION}"
+
+ publish-web-ui-npm:
+ if: github.repository == 'feast-dev/feast'
+ needs: get_dry_release_versions
+ runs-on: ubuntu-latest
+ env:
+ # This publish is working using an NPM automation token to bypass 2FA
+ NPM_TOKEN: ${{ secrets.NPM_TOKEN }}
+ CURRENT_VERSION: ${{ needs.get_dry_release_versions.outputs.current_version }}
+ NEXT_VERSION: ${{ needs.get_dry_release_versions.outputs.next_version }}
+ steps:
+ - uses: actions/checkout@v2
+ - uses: actions/setup-node@v2
+ with:
+ node-version: '17.x'
+ registry-url: 'https://registry.npmjs.org'
+ - name: Bump file versions (temporarily for Web UI publish)
+ run: python ./infra/scripts/release/bump_file_versions.py ${CURRENT_VERSION} ${NEXT_VERSION}
+ - name: Install yarn dependencies
+ working-directory: ./ui
+ run: yarn install
+ - name: Build yarn rollup
+ working-directory: ./ui
+ run: yarn build:lib
+ - name: Publish UI package
+ working-directory: ./ui
+ run: npm publish
+ env:
+ # This publish is working using an NPM automation token to bypass 2FA
+ NPM_TOKEN: ${{ secrets.NPM_TOKEN }}
+
release:
name: release
runs-on: ubuntu-latest
diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml
index 7bbe9ad6ac..de6d98d140 100644
--- a/.github/workflows/unit_tests.yml
+++ b/.github/workflows/unit_tests.yml
@@ -1,6 +1,6 @@
name: unit-tests
-on: [push, pull_request]
+on: [pull_request]
jobs:
unit-test-python:
runs-on: ${{ matrix.os }}
@@ -69,15 +69,10 @@ jobs:
- name: Install dependencies
run: make install-python-ci-dependencies
- name: Test Python
- env:
- SNOWFLAKE_CI_DEPLOYMENT: ${{ secrets.SNOWFLAKE_CI_DEPLOYMENT }}
- SNOWFLAKE_CI_USER: ${{ secrets.SNOWFLAKE_CI_USER }}
- SNOWFLAKE_CI_PASSWORD: ${{ secrets.SNOWFLAKE_CI_PASSWORD }}
- SNOWFLAKE_CI_ROLE: ${{ secrets.SNOWFLAKE_CI_ROLE }}
- SNOWFLAKE_CI_WAREHOUSE: ${{ secrets.SNOWFLAKE_CI_WAREHOUSE }}
run: pytest -n 8 --cov=./ --cov-report=xml --color=yes sdk/python/tests
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v1
+ if: github.repository == 'feast-dev/feast'
with:
token: ${{ secrets.CODECOV_TOKEN }}
files: ./coverage.xml
diff --git a/.releaserc.js b/.releaserc.js
index 2acf9b7350..aadc4373e9 100644
--- a/.releaserc.js
+++ b/.releaserc.js
@@ -28,18 +28,26 @@ module.exports = {
"releaseRules": [
{breaking: true, release: 'minor'},
{tag: 'Breaking', release: 'minor'},
- ]
+ {type: '*!', release: 'minor'},
+ ],
+ // Ensure that the "BREAKING CHANGE" notes in commit footers are parsed
+ "parserOpts": {
+ "noteKeywords": ["BREAKING CHANGE", "BREAKING CHANGES"]
+ }
}],
["@semantic-release/exec", {
// Validate the type of release we are doing
"verifyReleaseCmd": "./infra/scripts/validate-release.sh ${nextRelease.type} " + current_branch,
- // Bump all version files
- "prepareCmd": "python ./infra/scripts/release/bump_file_versions.py ${lastRelease.version} ${nextRelease.version}"
+ // Bump all version files and build UI / update yarn.lock
+ "prepareCmd": "python ./infra/scripts/release/bump_file_versions.py ${lastRelease.version} ${nextRelease.version}; make build-ui"
}],
- "@semantic-release/release-notes-generator",
+ ["@semantic-release/release-notes-generator", {
+ // Ensure that a "Breaking Changes" section is added to the release notes
+ "preset": "angular"
+ }],
// Update the changelog
[
@@ -58,7 +66,8 @@ module.exports = {
"CHANGELOG.md",
"java/pom.xml",
"infra/charts/**/*.*",
- "ui/package.json"
+ "ui/package.json",
+ "sdk/python/feast/ui/yarn.lock"
],
message: "chore(release): release ${nextRelease.version}\n\n${nextRelease.notes}"
}
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 80852af83d..d4ea5f20d6 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,16 @@
# Changelog
+## [0.23.1](https://github.com/feast-dev/feast/compare/v0.23.0...v0.23.1) (2022-08-10)
+
+
+### Bug Fixes
+
+* Check if on_demand_feature_views is an empty list rather than None for snowflake provider ([#3046](https://github.com/feast-dev/feast/issues/3046)) ([6af401f](https://github.com/feast-dev/feast/commit/6af401f09da5de1fdcb6ad7e74a42fc515307040))
+* Fix Feast Java inconsistency with int64 serialization vs python ([#3031](https://github.com/feast-dev/feast/issues/3031)) ([f340aeb](https://github.com/feast-dev/feast/commit/f340aeb020e1e6c51d8d4a8978924f51c585306c))
+* Fixing Web UI, which fails for the SQL registry ([#3028](https://github.com/feast-dev/feast/issues/3028)) ([d584ecd](https://github.com/feast-dev/feast/commit/d584ecdfffa4ef19ef8cfbb3da49cd7dd7b7de91))
+* More explicit error messages ([#2708](https://github.com/feast-dev/feast/issues/2708)) ([ca48963](https://github.com/feast-dev/feast/commit/ca48963c69a0d9eae103fb3c44e54edd5241593a))
+* Return an empty infra object from sql registry when it doesn't exist ([#3022](https://github.com/feast-dev/feast/issues/3022)) ([9a64e77](https://github.com/feast-dev/feast/commit/9a64e772d8682bb0fed78aa7af8b304bbf330d6b))
+
# [0.23.0](https://github.com/feast-dev/feast/compare/v0.22.0...v0.23.0) (2022-08-02)
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index a8671d9986..43ab6a58b8 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -1,6 +1,46 @@
-# Development Guide: Main Feast Repository
+
Development Guide: Main Feast Repository
+
> Please see [Development Guide](https://docs.feast.dev/project/development-guide) for project level development instructions.
+Maintainer's Guide
+
+> Please see [Maintainer's Guide](https://docs.feast.dev/project/maintainers) for instructions for maintainers.
+
+Table of Contents
+
+- [Overview](#overview)
+- [Community](#community)
+- [Making a pull request](#making-a-pull-request)
+ - [Pull request checklist](#pull-request-checklist)
+ - [Forking the repo](#forking-the-repo)
+ - [Pre-commit Hooks](#pre-commit-hooks)
+ - [Signing off commits](#signing-off-commits)
+ - [Incorporating upstream changes from master](#incorporating-upstream-changes-from-master)
+- [Feast Python SDK / CLI](#feast-python-sdk--cli)
+ - [Environment Setup](#environment-setup)
+ - [Code Style & Linting](#code-style--linting)
+ - [Unit Tests](#unit-tests)
+ - [Integration Tests](#integration-tests)
+ - [Local integration tests](#local-integration-tests)
+ - [(Advanced) Full integration tests](#advanced-full-integration-tests)
+ - [(Advanced) Running specific provider tests or running your test against specific online or offline stores](#advanced-running-specific-provider-tests-or-running-your-test-against-specific-online-or-offline-stores)
+ - [(Experimental) Run full integration tests against containerized services](#experimental-run-full-integration-tests-against-containerized-services)
+ - [Contrib integration tests](#contrib-integration-tests)
+ - [(Contrib) Running tests for Spark offline store](#contrib-running-tests-for-spark-offline-store)
+ - [(Contrib) Running tests for Trino offline store](#contrib-running-tests-for-trino-offline-store)
+ - [(Contrib) Running tests for Postgres offline store](#contrib-running-tests-for-postgres-offline-store)
+ - [(Contrib) Running tests for Postgres online store](#contrib-running-tests-for-postgres-online-store)
+ - [(Contrib) Running tests for HBase online store](#contrib-running-tests-for-hbase-online-store)
+- [(Experimental) Feast UI](#experimental-feast-ui)
+- [Feast Java Serving](#feast-java-serving)
+- [Feast Go Client](#feast-go-client)
+ - [Environment Setup](#environment-setup-1)
+ - [Building](#building)
+ - [Code Style & Linting](#code-style--linting-1)
+ - [Unit Tests](#unit-tests-1)
+ - [Testing with Github Actions workflows](#testing-with-github-actions-workflows)
+- [Issues](#issues)
+
## Overview
This guide is targeted at developers looking to contribute to Feast components in
the main Feast repository:
@@ -147,7 +187,7 @@ These tests create new temporary tables / datasets locally only, and they are cl
make test-python-integration-local
```
-#### Full integration tests
+#### (Advanced) Full integration tests
To test across clouds, on top of setting up Redis, you also need GCP / AWS / Snowflake setup.
> Note: you can manually control what tests are run today by inspecting
@@ -155,36 +195,47 @@ To test across clouds, on top of setting up Redis, you also need GCP / AWS / Sno
> and commenting out tests that are added to `DEFAULT_FULL_REPO_CONFIGS`
**GCP**
-1. Install the [Cloud SDK](https://cloud.google.com/sdk/docs/install).
-2. Then run login to gcloud:
+### Setup your GCP BigQuery Instance
+1. You can get free credits [here](https://cloud.google.com/free/docs/free-cloud-features#free-trial).
+2. You will need to setup a service account, enable the BigQuery API, and create a staging location for a bucket.
+ * Setup your service account and project using steps 1-5 [here](https://codelabs.developers.google.com/codelabs/cloud-bigquery-python#0).
+ * Remember to save your `PROJECT_ID` and your `key.json`. These will be your secrets that you will need to configure in Github actions. Namely, `secrets.GCP_PROJECT_ID` and `secrets.GCP_SA_KEY`. The `GCP_SA_KEY` value is the contents of your `key.json` file.
+ * Follow these [instructions](https://cloud.google.com/storage/docs/creating-buckets) in your project to create a bucket for running GCP tests and remember to save the bucket name.
+ * Make sure to add the service account email that you created in the previous step to the users that can access your bucket. Then, make sure to give the account the correct access roles, namely `objectCreator`, `objectViewer`, `objectAdmin`, and `admin`, so that your tests can use the bucket.
+3. Install the [Cloud SDK](https://cloud.google.com/sdk/docs/install).
+4. Login to gcloud if you haven't already:
```
gcloud auth login
gcloud auth application-default login
```
-- When you run `gcloud auth application-default login`, you should see some output of the form:
- ```
- Credentials saved to file: [$HOME/.config/gcloud/application_default_credentials.json]
- ```
-- You should run `export GOOGLE_APPLICATION_CREDENTIALS="$HOME/.config/gcloud/application_default_credentials.json”` to add the application credentials to your .zshrc or .bashrc.
-3. Run `export GCLOUD_PROJECT=[your project]` to your .zshrc or .bashrc.
-4. Running `gcloud config list` should give you something like this:
-```sh
-$ gcloud config list
-[core]
-account = [your email]
-disable_usage_reporting = True
-project = [your project]
+ - When you run `gcloud auth application-default login`, you should see some output of the form:
+ ```
+ Credentials saved to file: [$HOME/.config/gcloud/application_default_credentials.json]
+ ```
+ - You should run `export GOOGLE_APPLICATION_CREDENTIALS="$HOME/.config/gcloud/application_default_credentials.json”` to add the application credentials to your .zshrc or .bashrc.
+5. Run `export GCLOUD_PROJECT=[your project id from step 2]` to your .zshrc or .bashrc.
+6. Running `gcloud config list` should give you something like this:
+ ```sh
+ $ gcloud config list
+ [core]
+ account = [your email]
+ disable_usage_reporting = True
+ project = [your project id]
+
+ Your active configuration is: [default]
+ ```
+7. Export GCP specific environment variables in your workflow. Namely,
+ ```sh
+ export GCS_REGION='[your gcs region e.g US]'
+ export GCS_STAGING_LOCATION='[your gcs staging location]'
+ ```
+ **NOTE**: Your `GCS_STAGING_LOCATION` should be in the form `gs://` where the bucket name is from step 2.
-Your active configuration is: [default]
-```
-5. Export gcp specific environment variables. Namely,
-```sh
-export GCS_REGION='[your gcs region e.g US]'
-export GCS_STAGING_LOCATION='[your gcs staging location]'
-```
+8. Once authenticated, you should be able to run the integration tests for BigQuery without any failures.
**AWS**
-1. TODO(adchia): flesh out setting up AWS login (or create helper script)
+1. Setup AWS by creating an account, database, and cluster. You will need to enable Redshift and Dynamo.
+ * You can get free credits [here](https://aws.amazon.com/free/?all-free-tier.sort-by=item.additionalFields.SortRank&al[…]f.Free%20Tier%20Types=*all&awsf.Free%20Tier%20Categories=*all).
2. To run the AWS Redshift and Dynamo integration tests you will have to export your own AWS credentials. Namely,
```sh
@@ -200,18 +251,42 @@ export AWS_REGISTRY_PATH='[your aws registry path]'
**Snowflake**
1. See https://signup.snowflake.com/ to setup a trial.
-2. Then to run successfully, you'll need some environment variables setup:
-```sh
-export SNOWFLAKE_CI_DEPLOYMENT='[snowflake_deployment]'
-export SNOWFLAKE_CI_USER='[your user]'
-export SNOWFLAKE_CI_PASSWORD='[your pw]'
-export SNOWFLAKE_CI_ROLE='[your CI role e.g. SYSADMIN]'
-export SNOWFLAKE_CI_WAREHOUSE='[your warehouse]'
-```
+2. Setup your account and if you are not an `ACCOUNTADMIN` (if you created your own account, you should be), give yourself the `SYSADMIN` role.
+ ```sql
+ grant role accountadmin, sysadmin to user user2;
+ ```
+ * Also remember to save your [account name](https://docs.snowflake.com/en/user-guide/admin-account-identifier.html#:~:text=organization_name%20is%20the%20name%20of,your%20account%20within%20your%20organization), username, and role.
+ * Your account name can be found under
+3. Create Dashboard and add a Tile.
+4. Create a warehouse and database named `FEAST` with the schemas `OFFLINE` and `ONLINE`.
+ ```sql
+ create or replace warehouse feast_tests_wh with
+ warehouse_size='MEDIUM' --set your warehouse size to whatever your budget allows--
+ auto_suspend = 180
+ auto_resume = true
+ initially_suspended=true;
+
+ create or replace database FEAST;
+ use database FEAST;
+ create schema OFFLINE;
+ create schema ONLINE;
+ ```
+5. You will need to create a data unloading location(either on S3, GCP, or Azure). Detailed instructions [here](https://docs.snowflake.com/en/user-guide/data-unload-overview.html). You will need to save the storage export location and the storage export name. You will need to create a [storage integration ](https://docs.snowflake.com/en/sql-reference/sql/create-storage-integration.html) in your warehouse to make this work. Name this storage integration `FEAST_S3`.
+6. Then to run successfully, you'll need some environment variables setup:
+ ```sh
+ export SNOWFLAKE_CI_DEPLOYMENT='[your snowflake account name]'
+ export SNOWFLAKE_CI_USER='[your snowflake username]'
+ export SNOWFLAKE_CI_PASSWORD='[your snowflake pw]'
+ export SNOWFLAKE_CI_ROLE='[your CI role e.g. SYSADMIN]'
+ export SNOWFLAKE_CI_WAREHOUSE='[your warehouse]'
+ export BLOB_EXPORT_STORAGE_NAME='[your data unloading storage name]'
+ export BLOB_EXPORT_URI='[your data unloading blob uri]`
+ ```
+7. Once everything is setup, running snowflake integration tests should pass without failures.
-Then run `make test-python-integration`. Note that for Snowflake / GCP / AWS, this will create new temporary tables / datasets.
+Note that for Snowflake / GCP / AWS, running `make test-python-integration` will create new temporary tables / datasets in your cloud storage tables.
-#### Running specific provider tests or running your test against specific online or offline stores
+#### (Advanced) Running specific provider tests or running your test against specific online or offline stores
1. If you don't need to have your test run against all of the providers(`gcp`, `aws`, and `snowflake`) or don't need to run against all of the online stores, you can tag your test with specific providers or stores that you need(`@pytest.mark.universal_online_stores` or `@pytest.mark.universal_online_stores` with the `only` parameter). The `only` parameter selects specific offline providers and online stores that your test will test against. Example:
@@ -245,6 +320,26 @@ The services with containerized replacements currently implemented are:
You can run `make test-python-integration-container` to run tests against the containerized versions of dependencies.
+### Contrib integration tests
+#### (Contrib) Running tests for Spark offline store
+You can run `make test-python-universal-spark` to run all tests against the Spark offline store. (Note: you'll have to run `pip install -e ".[dev]"` first).
+
+Not all tests are passing yet
+
+#### (Contrib) Running tests for Trino offline store
+You can run `make test-python-universal-trino` to run all tests against the Trino offline store. (Note: you'll have to run `pip install -e ".[dev]"` first)
+
+#### (Contrib) Running tests for Postgres offline store
+TODO
+
+#### (Contrib) Running tests for Postgres online store
+TODO
+
+#### (Contrib) Running tests for HBase online store
+TODO
+
+## (Experimental) Feast UI
+See [Feast contributing guide](ui/CONTRIBUTING.md)
## Feast Java Serving
See [Java contributing guide](java/CONTRIBUTING.md)
@@ -258,7 +353,7 @@ Setting up your development environment for Feast Go SDK:
### Building
Build the Feast Go Client with the `go` toolchain:
```sh
-go build
+make go build
```
### Code Style & Linting
diff --git a/Makefile b/Makefile
index ee2b7c8f1b..67be3ba248 100644
--- a/Makefile
+++ b/Makefile
@@ -63,7 +63,11 @@ benchmark-python-local:
FEAST_USAGE=False IS_TEST=True FEAST_IS_LOCAL_TEST=True python -m pytest --integration --benchmark --benchmark-autosave --benchmark-save-data sdk/python/tests
test-python:
- FEAST_USAGE=False IS_TEST=True python -m pytest -n 8 sdk/python/tests
+ @(docker info > /dev/null 2>&1 && \
+ FEAST_USAGE=False \
+ IS_TEST=True \
+ python -m pytest -n 8 sdk/python/tests \
+ ) || echo "This script uses Docker, and it isn't running - please start the Docker Daemon and try again!";
test-python-integration:
FEAST_USAGE=False IS_TEST=True python -m pytest -n 8 --integration sdk/python/tests
@@ -75,13 +79,9 @@ test-python-integration-local:
FEAST_IS_LOCAL_TEST=True \
FEAST_LOCAL_ONLINE_CONTAINER=True \
python -m pytest -n 8 --integration \
- -k "not test_apply_entity_integration and \
- not test_apply_feature_view_integration and \
- not test_apply_data_source_integration and \
- not test_lambda_materialization and \
- not test_feature_view_inference_success and \
- not test_update_file_data_source_with_inferred_event_timestamp_col and \
- not test_nullable_online_store" \
+ -k "not gcs_registry and \
+ not s3_registry and \
+ not test_lambda_materialization" \
sdk/python/tests \
) || echo "This script uses Docker, and it isn't running - please start the Docker Daemon and try again!";
@@ -93,9 +93,32 @@ test-python-integration-container:
python -m pytest -n 8 --integration sdk/python/tests \
) || echo "This script uses Docker, and it isn't running - please start the Docker Daemon and try again!";
-test-python-universal-contrib:
+test-python-universal-spark:
+ PYTHONPATH='.' \
+ FULL_REPO_CONFIGS_MODULE=sdk.python.feast.infra.offline_stores.contrib.spark_repo_configuration \
+ PYTEST_PLUGINS=feast.infra.offline_stores.contrib.spark_offline_store.tests \
+ FEAST_USAGE=False IS_TEST=True \
+ python -m pytest -n 8 --integration \
+ -k "not test_historical_retrieval_fails_on_validation and \
+ not test_historical_retrieval_with_validation and \
+ not test_historical_features_persisting and \
+ not test_historical_retrieval_fails_on_validation and \
+ not test_universal_cli and \
+ not test_go_feature_server and \
+ not test_feature_logging and \
+ not test_reorder_columns and \
+ not test_logged_features_validation and \
+ not test_lambda_materialization_consistency and \
+ not test_offline_write and \
+ not test_push_features_to_offline_store.py and \
+ not gcs_registry and \
+ not s3_registry and \
+ not test_universal_types" \
+ sdk/python/tests
+
+test-python-universal-trino:
PYTHONPATH='.' \
- FULL_REPO_CONFIGS_MODULE=sdk.python.feast.infra.offline_stores.contrib.contrib_repo_configuration \
+ FULL_REPO_CONFIGS_MODULE=sdk.python.feast.infra.offline_stores.contrib.trino_repo_configuration \
PYTEST_PLUGINS=feast.infra.offline_stores.contrib.trino_offline_store.tests \
FEAST_USAGE=False IS_TEST=True \
python -m pytest -n 8 --integration \
@@ -106,6 +129,13 @@ test-python-universal-contrib:
not test_universal_cli and \
not test_go_feature_server and \
not test_feature_logging and \
+ not test_reorder_columns and \
+ not test_logged_features_validation and \
+ not test_lambda_materialization_consistency and \
+ not test_offline_write and \
+ not test_push_features_to_offline_store.py and \
+ not gcs_registry and \
+ not s3_registry and \
not test_universal_types" \
sdk/python/tests
diff --git a/README.md b/README.md
index ab69636a20..8ad37a83aa 100644
--- a/README.md
+++ b/README.md
@@ -23,7 +23,7 @@ Feast (**Fea**ture **St**ore) is an open source feature store for machine learni
Feast allows ML platform teams to:
-* **Make features consistently available for training and serving** by managing an _offline store_ (to process historical data for scale-out batch scoring or model training), a low-latency _online store_ (to power real-time prediction)_,_ and a battle-tested _feature server_ (for serving pre-computed features online).
+* **Make features consistently available for training and serving** by managing an _offline store_ (to process historical data for scale-out batch scoring or model training), a low-latency _online store_ (to power real-time prediction)_,_ and a battle-tested _feature server_ (to serve pre-computed features online).
* **Avoid data leakage** by generating point-in-time correct feature sets so data scientists can focus on feature engineering rather than debugging error-prone dataset joining logic. This ensure that future feature values do not leak to models during training.
* **Decouple ML from data infrastructure** by providing a single data access layer that abstracts feature storage from feature retrieval, ensuring models remain portable as you move from training models to serving models, from batch models to realtime models, and from one data infra system to another.
diff --git a/docs/README.md b/docs/README.md
index 1b70f8fedc..b838e5fe5b 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -2,11 +2,11 @@
## What is Feast?
-Feast (**Fea**ture **St**ore) is a customizable operational data system that re-uses existing infrastructure to manage and serve machine learning features to realtime models.
+Feast (**Fea**ture **St**ore) is a customizable operational data system that re-uses existing infrastructure to manage and serve machine learning features to realtime models.
Feast allows ML platform teams to:
-* **Make features consistently available for training and serving** by managing an _offline store_ (to process historical data for scale-out batch scoring or model training), a low-latency _online store_ (to power real-time prediction)_,_ and a battle-tested _feature server_ (for serving pre-computed features online).
+* **Make features consistently available for training and serving** by managing an _offline store_ (to process historical data for scale-out batch scoring or model training), a low-latency _online store_ (to power real-time prediction)_,_ and a battle-tested _feature server_ (to serve pre-computed features online).
* **Avoid data leakage** by generating point-in-time correct feature sets so data scientists can focus on feature engineering rather than debugging error-prone dataset joining logic. This ensure that future feature values do not leak to models during training.
* **Decouple ML from data infrastructure** by providing a single data access layer that abstracts feature storage from feature retrieval, ensuring models remain portable as you move from training models to serving models, from batch models to realtime models, and from one data infra system to another.
@@ -20,8 +20,6 @@ Feast allows ML platform teams to:
Feast helps ML platform teams with DevOps experience productionize real-time models. Feast can also help these teams build towards a feature platform that improves collaboration between engineers and data scientists.
-
-
Feast is likely **not** the right tool if you
* are in an organization that’s just getting started with ML and is not yet sure what the business impact of ML is
@@ -67,7 +65,7 @@ Explore the following resources to get started with Feast:
* [Quickstart](getting-started/quickstart.md) is the fastest way to get started with Feast
* [Concepts](getting-started/concepts/) describes all important Feast API concepts
* [Architecture](getting-started/architecture-and-components/) describes Feast's overall architecture.
-* [Tutorials](tutorials/tutorials-overview.md) shows full examples of using Feast in machine learning applications.
+* [Tutorials](tutorials/tutorials-overview/) shows full examples of using Feast in machine learning applications.
* [Running Feast with Snowflake/GCP/AWS](how-to-guides/feast-snowflake-gcp-aws/) provides a more in-depth guide to using Feast.
* [Reference](reference/feast-cli-commands.md) contains detailed API and design documents.
* [Contributing](project/contributing.md) contains resources for anyone who wants to contribute to Feast.
diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md
index b0e88b413f..aa95d40368 100644
--- a/docs/SUMMARY.md
+++ b/docs/SUMMARY.md
@@ -10,33 +10,32 @@
* [Quickstart](getting-started/quickstart.md)
* [Concepts](getting-started/concepts/README.md)
* [Overview](getting-started/concepts/overview.md)
- * [Data source](getting-started/concepts/data-source.md)
- * [Dataset](getting-started/concepts/dataset.md)
+ * [Data ingestion](getting-started/concepts/data-ingestion.md)
* [Entity](getting-started/concepts/entity.md)
* [Feature view](getting-started/concepts/feature-view.md)
- * [Stream feature view](getting-started/concepts/stream-feature-view.md)
* [Feature retrieval](getting-started/concepts/feature-retrieval.md)
* [Point-in-time joins](getting-started/concepts/point-in-time-joins.md)
* [Registry](getting-started/concepts/registry.md)
+ * [\[Alpha\] Saved dataset](getting-started/concepts/dataset.md)
* [Architecture](getting-started/architecture-and-components/README.md)
* [Overview](getting-started/architecture-and-components/overview.md)
* [Feature repository](getting-started/architecture-and-components/feature-repository.md)
* [Registry](getting-started/architecture-and-components/registry.md)
* [Offline store](getting-started/architecture-and-components/offline-store.md)
* [Online store](getting-started/architecture-and-components/online-store.md)
- * [Provider](getting-started/architecture-and-components/provider.md)
* [Batch Materialization Engine](getting-started/architecture-and-components/batch-materialization-engine.md)
+ * [Provider](getting-started/architecture-and-components/provider.md)
* [Learning by example](getting-started/feast-workshop.md)
* [Third party integrations](getting-started/third-party-integrations.md)
* [FAQ](getting-started/faq.md)
## Tutorials
-* [Overview](tutorials/tutorials-overview.md)
-* [Driver ranking](tutorials/driver-ranking-with-feast.md)
-* [Fraud detection on GCP](tutorials/fraud-detection.md)
-* [Real-time credit scoring on AWS](tutorials/real-time-credit-scoring-on-aws.md)
-* [Driver stats on Snowflake](tutorials/driver-stats-on-snowflake.md)
+* [Sample use-case tutorials](tutorials/tutorials-overview/README.md)
+ * [Driver ranking](tutorials/tutorials-overview/driver-ranking-with-feast.md)
+ * [Fraud detection on GCP](tutorials/tutorials-overview/fraud-detection.md)
+ * [Real-time credit scoring on AWS](tutorials/tutorials-overview/real-time-credit-scoring-on-aws.md)
+ * [Driver stats on Snowflake](tutorials/tutorials-overview/driver-stats-on-snowflake.md)
* [Validating historical features with Great Expectations](tutorials/validating-historical-features.md)
* [Using Scalable Registry](tutorials/using-scalable-registry.md)
* [Building streaming features](tutorials/building-streaming-features.md)
@@ -51,11 +50,12 @@
* [Load data into the online store](how-to-guides/feast-snowflake-gcp-aws/load-data-into-the-online-store.md)
* [Read features from the online store](how-to-guides/feast-snowflake-gcp-aws/read-features-from-the-online-store.md)
* [Running Feast in production](how-to-guides/running-feast-in-production.md)
-* [Upgrading from Feast 0.9](https://docs.google.com/document/u/1/d/1AOsr\_baczuARjCpmZgVd8mCqTF4AZ49OEyU4Cn-uTT0/edit)
* [Upgrading for Feast 0.20+](how-to-guides/automated-feast-upgrade.md)
-* [Adding a custom batch materialization engine](how-to-guides/creating-a-custom-materialization-engine.md)
-* [Adding a new online store](how-to-guides/adding-support-for-a-new-online-store.md)
-* [Adding a new offline store](how-to-guides/adding-a-new-offline-store.md)
+* [Customizing Feast](how-to-guides/customizing-feast/README.md)
+ * [Adding a custom batch materialization engine](how-to-guides/customizing-feast/creating-a-custom-materialization-engine.md)
+ * [Adding a new offline store](how-to-guides/customizing-feast/adding-a-new-offline-store.md)
+ * [Adding a new online store](how-to-guides/customizing-feast/adding-support-for-a-new-online-store.md)
+ * [Adding a custom provider](how-to-guides/customizing-feast/creating-a-custom-provider.md)
* [Adding or reusing tests](how-to-guides/adding-or-reusing-tests.md)
## Reference
@@ -70,13 +70,16 @@
* [Kinesis](reference/data-sources/kinesis.md)
* [Spark (contrib)](reference/data-sources/spark.md)
* [PostgreSQL (contrib)](reference/data-sources/postgres.md)
+ * [Trino (contrib)](reference/data-sources/trino.md)
* [Offline stores](reference/offline-stores/README.md)
+ * [Overview](reference/offline-stores/overview.md)
* [File](reference/offline-stores/file.md)
* [Snowflake](reference/offline-stores/snowflake.md)
* [BigQuery](reference/offline-stores/bigquery.md)
* [Redshift](reference/offline-stores/redshift.md)
* [Spark (contrib)](reference/offline-stores/spark.md)
* [PostgreSQL (contrib)](reference/offline-stores/postgres.md)
+ * [Trino (contrib)](reference/offline-stores/trino.md)
* [Online stores](reference/online-stores/README.md)
* [SQLite](reference/online-stores/sqlite.md)
* [Snowflake](reference/online-stores/snowflake.md)
@@ -93,11 +96,11 @@
* [.feastignore](reference/feature-repository/feast-ignore.md)
* [Feature servers](reference/feature-servers/README.md)
* [Python feature server](reference/feature-servers/python-feature-server.md)
- * [Go feature server](reference/feature-servers/go-feature-server.md)
-* [\[Alpha\] Web UI](reference/alpha-web-ui.md)
-* [\[Alpha\] Data quality monitoring](reference/dqm.md)
+ * [\[Alpha\] Go feature server](reference/feature-servers/go-feature-server.md)
+ * [\[Alpha\] AWS Lambda feature server](reference/feature-servers/alpha-aws-lambda-feature-server.md)
+* [\[Beta\] Web UI](reference/alpha-web-ui.md)
* [\[Alpha\] On demand feature view](reference/alpha-on-demand-feature-view.md)
-* [\[Alpha\] AWS Lambda feature server](reference/alpha-aws-lambda-feature-server.md)
+* [\[Alpha\] Data quality monitoring](reference/dqm.md)
* [Feast CLI reference](reference/feast-cli-commands.md)
* [Python API reference](http://rtd.feast.dev)
* [Usage](reference/usage.md)
@@ -106,6 +109,7 @@
* [Contribution process](project/contributing.md)
* [Development guide](project/development-guide.md)
+ * [Maintainer Docs](project/maintainers.md)
* [Versioning policy](project/versioning-policy.md)
* [Release process](project/release-process.md)
* [Feast 0.9 vs Feast 0.10+](project/feast-0.9-vs-feast-0.10+.md)
diff --git a/docs/getting-started/architecture-and-components/README.md b/docs/getting-started/architecture-and-components/README.md
index 8a6e181ea7..6e6b5f6ee2 100644
--- a/docs/getting-started/architecture-and-components/README.md
+++ b/docs/getting-started/architecture-and-components/README.md
@@ -1,15 +1,29 @@
# Architecture
-{% page-ref page="overview.md" %}
+{% content-ref url="overview.md" %}
+[overview.md](overview.md)
+{% endcontent-ref %}
-{% page-ref page="feature-repository.md" %}
+{% content-ref url="feature-repository.md" %}
+[feature-repository.md](feature-repository.md)
+{% endcontent-ref %}
-{% page-ref page="registry.md" %}
+{% content-ref url="registry.md" %}
+[registry.md](registry.md)
+{% endcontent-ref %}
-{% page-ref page="offline-store.md" %}
+{% content-ref url="offline-store.md" %}
+[offline-store.md](offline-store.md)
+{% endcontent-ref %}
-{% page-ref page="online-store.md" %}
+{% content-ref url="online-store.md" %}
+[online-store.md](online-store.md)
+{% endcontent-ref %}
-{% page-ref page="provider.md" %}
+{% content-ref url="batch-materialization-engine.md" %}
+[batch-materialization-engine.md](batch-materialization-engine.md)
+{% endcontent-ref %}
-{% page-reg page="batch-materialization-engine.md" %}
+{% content-ref url="provider.md" %}
+[provider.md](provider.md)
+{% endcontent-ref %}
diff --git a/docs/getting-started/architecture-and-components/batch-materialization-engine.md b/docs/getting-started/architecture-and-components/batch-materialization-engine.md
index fb3c83ccb4..7be22fe125 100644
--- a/docs/getting-started/architecture-and-components/batch-materialization-engine.md
+++ b/docs/getting-started/architecture-and-components/batch-materialization-engine.md
@@ -4,7 +4,6 @@ A batch materialization engine is a component of Feast that's responsible for mo
A materialization engine abstracts over specific technologies or frameworks that are used to materialize data. It allows users to use a pure local serialized approach (which is the default LocalMaterializationEngine), or delegates the materialization to seperate components (e.g. AWS Lambda, as implemented by the the LambdaMaterializaionEngine).
-If the built-in engines are not sufficient, you can create your own custom materialization engine. Please see [this guide](../../how-to-guides/creating-a-custom-materialization-engine.md) for more details.
+If the built-in engines are not sufficient, you can create your own custom materialization engine. Please see [this guide](../../how-to-guides/customizing-feast/creating-a-custom-materialization-engine.md) for more details.
Please see [feature\_store.yaml](../../reference/feature-repository/feature-store-yaml.md#overview) for configuring engines.
-
diff --git a/docs/getting-started/architecture-and-components/offline-store.md b/docs/getting-started/architecture-and-components/offline-store.md
index 29a72bd5f0..c59a526a53 100644
--- a/docs/getting-started/architecture-and-components/offline-store.md
+++ b/docs/getting-started/architecture-and-components/offline-store.md
@@ -1,17 +1,17 @@
# Offline store
-Feast uses offline stores as storage and compute systems. Offline stores store historic time-series feature values. Feast does not generate these features, but instead uses the offline store as the interface for querying existing features in your organization.
-
-Offline stores are used primarily for two reasons
+An offline store is an interface for working with historical time-series feature values that are stored in [data sources](../../getting-started/concepts/data-ingestion.md).
+The `OfflineStore` interface has several different implementations, such as `BigQueryOfflineStore`, each of which is backed by a different storage and compute engine.
+For more details on which offline stores are supported, please see [Offline Stores](../../reference/offline-stores/).
+Offline stores are primarily used for two reasons:
1. Building training datasets from time-series features.
-2. Materializing \(loading\) features from the offline store into an online store in order to serve those features at low latency for prediction.
-
-Offline stores are configured through the [feature\_store.yaml](../../reference/offline-stores/). When building training datasets or materializing features into an online store, Feast will use the configured offline store along with the data sources you have defined as part of feature views to execute the necessary data operations.
-
-It is not possible to query all data sources from all offline stores, and only a single offline store can be used at a time. For example, it is not possible to query a BigQuery table from a `File` offline store, nor is it possible for a `BigQuery` offline store to query files from your local file system.
+2. Materializing \(loading\) features into an online store to serve those features at low latency in a production setting.
-Please see the [Offline Stores](../../reference/offline-stores/) reference for more details on configuring offline stores.
+Offline stores are configured through the [feature\_store.yaml](../../reference/offline-stores/).
+When building training datasets or materializing features into an online store, Feast will use the configured offline store along with your data sources to execute the necessary data operations.
-Please see the [Push Source](reference/data-sources/push.md) for reference on how to push features directly to the offline store in your feature store.
+Only a single offline store can be used at a time.
+Moreover, offline stores are not compatible with all data sources; for example, the `BigQuery` offline store cannot be used to query a file-based data source.
+Please see [Push Source](../../reference/data-sources/push.md) for more details on how to push features directly to the offline store in your feature store.
diff --git a/docs/getting-started/architecture-and-components/provider.md b/docs/getting-started/architecture-and-components/provider.md
index 9eadf73ded..89f01c4e5b 100644
--- a/docs/getting-started/architecture-and-components/provider.md
+++ b/docs/getting-started/architecture-and-components/provider.md
@@ -1,10 +1,9 @@
# Provider
-A provider is an implementation of a feature store using specific feature store components \(e.g. offline store, online store\) targeting a specific environment \(e.g. GCP stack\).
+A provider is an implementation of a feature store using specific feature store components (e.g. offline store, online store) targeting a specific environment (e.g. GCP stack).
-Providers orchestrate various components \(offline store, online store, infrastructure, compute\) inside an environment. For example, the `gcp` provider supports [BigQuery](https://cloud.google.com/bigquery) as an offline store and [Datastore](https://cloud.google.com/datastore) as an online store, ensuring that these components can work together seamlessly. Feast has three built-in providers \(`local`, `gcp`, and `aws`\) with default configurations that make it easy for users to start a feature store in a specific environment. These default configurations can be overridden easily. For instance, you can use the `gcp` provider but use Redis as the online store instead of Datastore.
+Providers orchestrate various components (offline store, online store, infrastructure, compute) inside an environment. For example, the `gcp` provider supports [BigQuery](https://cloud.google.com/bigquery) as an offline store and [Datastore](https://cloud.google.com/datastore) as an online store, ensuring that these components can work together seamlessly. Feast has three built-in providers (`local`, `gcp`, and `aws`) with default configurations that make it easy for users to start a feature store in a specific environment. These default configurations can be overridden easily. For instance, you can use the `gcp` provider but use Redis as the online store instead of Datastore.
-If the built-in providers are not sufficient, you can create your own custom provider. Please see [this guide](../../how-to-guides/creating-a-custom-provider.md) for more details.
+If the built-in providers are not sufficient, you can create your own custom provider. Please see [this guide](../../how-to-guides/customizing-feast/creating-a-custom-provider.md) for more details.
Please see [feature\_store.yaml](../../reference/feature-repository/feature-store-yaml.md#overview) for configuring providers.
-
diff --git a/docs/getting-started/concepts/README.md b/docs/getting-started/concepts/README.md
index 0fc415f059..e805e3b486 100644
--- a/docs/getting-started/concepts/README.md
+++ b/docs/getting-started/concepts/README.md
@@ -1,21 +1,33 @@
# Concepts
-{% page-ref page="overview.md" %}
+{% content-ref url="overview.md" %}
+[overview.md](overview.md)
+{% endcontent-ref %}
-{% page-ref page="data-source.md" %}
+{% content-ref url="data-ingestion.md" %}
+[data-ingestion.md](data-ingestion.md)
+{% endcontent-ref %}
-{% page-ref page="dataset.md" %}
+{% content-ref url="entity.md" %}
+[entity.md](entity.md)
+{% endcontent-ref %}
-{% page-ref page="entity.md" %}
+{% content-ref url="feature-view.md" %}
+[feature-view.md](feature-view.md)
+{% endcontent-ref %}
-{% page-ref page="feature-view.md" %}
+{% content-ref url="feature-retrieval.md" %}
+[feature-retrieval.md](feature-retrieval.md)
+{% endcontent-ref %}
-{% page-ref page="feature-view.md" %}
+{% content-ref url="point-in-time-joins.md" %}
+[point-in-time-joins.md](point-in-time-joins.md)
+{% endcontent-ref %}
-{% page-ref page="stream-feature-view.md" %}
+{% content-ref url="registry.md" %}
+[registry.md](registry.md)
+{% endcontent-ref %}
-{% page-ref page="feature-retrieval.md" %}
-
-{% page-ref page="point-in-time-joins.md" %}
-
-{% page-ref page="registry.md" %}
+{% content-ref url="dataset.md" %}
+[dataset.md](dataset.md)
+{% endcontent-ref %}
diff --git a/docs/getting-started/concepts/data-ingestion.md b/docs/getting-started/concepts/data-ingestion.md
new file mode 100644
index 0000000000..3599494f51
--- /dev/null
+++ b/docs/getting-started/concepts/data-ingestion.md
@@ -0,0 +1,95 @@
+# Data ingestion
+
+## Data source
+
+The data source refers to raw underlying data (e.g. a table in BigQuery).
+
+Feast uses a time-series data model to represent data. This data model is used to interpret feature data in data sources in order to build training datasets or when materializing features into an online store.
+
+Below is an example data source with a single entity (`driver`) and two features (`trips_today`, and `rating`).
+
+.png>)
+
+Feast supports primarily **time-stamped** tabular data as data sources. There are many kinds of possible data sources:
+
+* **Batch data sources:** ideally, these live in data warehouses (BigQuery, Snowflake, Redshift), but can be in data lakes (S3, GCS, etc). Feast supports ingesting and querying data across both.
+* **Stream data sources**: Feast does **not** have native streaming integrations. It does however facilitate making streaming features available in different environments. There are two kinds of sources:
+ * **Push sources** allow users to push features into Feast, and make it available for training / batch scoring ("offline"), for realtime feature serving ("online") or both.
+ * **\[Alpha] Stream sources** allow users to register metadata from Kafka or Kinesis sources. The onus is on the user to ingest from these sources, though Feast provides some limited helper methods to ingest directly from Kafka / Kinesis topics.
+* **(Experimental) Request data sources:** This is data that is only available at request time (e.g. from a user action that needs an immediate model prediction response). This is primarily relevant as an input into **on-demand feature views**, which allow light-weight feature engineering and combining features across sources.
+
+## Batch data ingestion
+
+Ingesting from batch sources is only necessary to power real-time models. This is done through **materialization**. Under the hood, Feast manages an _offline store_ (to scalably generate training data from batch sources) and an _online store_ (to provide low-latency access to features for real-time models).
+
+A key command to use in Feast is the `materialize_incremental` command, which fetches the latest values for all entities in the batch source and ingests these values into the online store.
+
+Materialization can be called programmatically or through the CLI:
+
+
+
+Code example: programmatic scheduled materialization
+
+This snippet creates a feature store object which points to the registry (which knows of all defined features) and the online store (DynamoDB in this case), and
+
+```python
+# Define Python callable
+def materialize():
+ repo_config = RepoConfig(
+ registry=RegistryConfig(path="s3://[YOUR BUCKET]/registry.pb"),
+ project="feast_demo_aws",
+ provider="aws",
+ offline_store="file",
+ online_store=DynamoDBOnlineStoreConfig(region="us-west-2")
+ )
+ store = FeatureStore(config=repo_config)
+ store.materialize_incremental(datetime.datetime.now())
+
+# (In production) Use Airflow PythonOperator
+materialize_python = PythonOperator(
+ task_id='materialize_python',
+ python_callable=materialize,
+)
+```
+
+
+
+
+
+Code example: CLI based materialization
+
+
+
+#### How to run this in the CLI
+
+```bash
+CURRENT_TIME=$(date -u +"%Y-%m-%dT%H:%M:%S")
+feast materialize-incremental $CURRENT_TIME
+```
+
+#### How to run this on Airflow
+
+```python
+# Use BashOperator
+materialize_bash = BashOperator(
+ task_id='materialize',
+ bash_command=f'feast materialize-incremental {datetime.datetime.now().replace(microsecond=0).isoformat()}',
+)
+```
+
+
+
+### Batch data schema inference
+
+If the `schema` parameter is not specified when defining a data source, Feast attempts to infer the schema of the data source during `feast apply`.
+The way it does this depends on the implementation of the offline store. For the offline stores that ship with Feast out of the box this inference is performed by inspecting the schema of the table in the cloud data warehouse,
+or if a query is provided to the source, by running the query with a `LIMIT` clause and inspecting the result.
+
+
+## Stream data ingestion
+
+Ingesting from stream sources happens either via a Push API or via a contrib processor that leverages an existing Spark context.
+
+* To **push data into the offline or online stores**: see [push sources](../../reference/data-sources/push.md) for details.
+* (experimental) To **use a contrib Spark processor** to ingest from a topic, see [Tutorial: Building streaming features](../../tutorials/building-streaming-features.md)
+
diff --git a/docs/getting-started/concepts/data-source.md b/docs/getting-started/concepts/data-source.md
deleted file mode 100644
index d468108ca1..0000000000
--- a/docs/getting-started/concepts/data-source.md
+++ /dev/null
@@ -1,12 +0,0 @@
-# Data source
-
-The data source refers to raw underlying data \(e.g. a table in BigQuery\).
-
-Feast uses a time-series data model to represent data. This data model is used to interpret feature data in data sources in order to build training datasets or when materializing features into an online store.
-
-Below is an example data source with a single entity \(`driver`\) and two features \(`trips_today`, and `rating`\).
-
-
-
-
-
diff --git a/docs/getting-started/concepts/dataset.md b/docs/getting-started/concepts/dataset.md
index 59f7168905..be8026a293 100644
--- a/docs/getting-started/concepts/dataset.md
+++ b/docs/getting-started/concepts/dataset.md
@@ -1,22 +1,18 @@
-# Dataset
+# \[Alpha] Saved dataset
-Feast datasets allow for conveniently saving dataframes that include both features and entities to be subsequently used for data analysis and model training.
-[Data Quality Monitoring](https://docs.google.com/document/d/110F72d4NTv80p35wDSONxhhPBqWRwbZXG4f9mNEMd98) was the primary motivation for creating dataset concept.
+Feast datasets allow for conveniently saving dataframes that include both features and entities to be subsequently used for data analysis and model training. [Data Quality Monitoring](https://docs.google.com/document/d/110F72d4NTv80p35wDSONxhhPBqWRwbZXG4f9mNEMd98) was the primary motivation for creating dataset concept.
Dataset's metadata is stored in the Feast registry and raw data (features, entities, additional input keys and timestamp) is stored in the [offline store](../architecture-and-components/offline-store.md).
Dataset can be created from:
-1. Results of historical retrieval
-2. [planned] Logging request (including input for [on demand transformation](../../reference/alpha-on-demand-feature-view.md)) and response during feature serving
-3. [planned] Logging features during writing to online store (from batch source or stream)
+1. Results of historical retrieval
+2. \[planned] Logging request (including input for [on demand transformation](../../reference/alpha-on-demand-feature-view.md)) and response during feature serving
+3. \[planned] Logging features during writing to online store (from batch source or stream)
-### Creating Saved Dataset from Historical Retrieval
+### Creating a saved dataset from historical retrieval
-To create a saved dataset from historical features for later retrieval or analysis, a user needs to call `get_historical_features` method first and then pass the returned retrieval job to `create_saved_dataset` method.
-`create_saved_dataset` will trigger provided retrieval job (by calling `.persist()` on it) to store the data using specified `storage`.
-Storage type must be the same as globally configured offline store (eg, it's impossible to persist data to Redshift with BigQuery source).
-`create_saved_dataset` will also create SavedDataset object with all related metadata and will write it to the registry.
+To create a saved dataset from historical features for later retrieval or analysis, a user needs to call `get_historical_features` method first and then pass the returned retrieval job to `create_saved_dataset` method. `create_saved_dataset` will trigger provided retrieval job (by calling `.persist()` on it) to store the data using specified `storage`. Storage type must be the same as globally configured offline store (eg, it's impossible to persist data to Redshift with BigQuery source). `create_saved_dataset` will also create SavedDataset object with all related metadata and will write it to the registry.
```python
from feast import FeatureStore
@@ -40,11 +36,12 @@ dataset.to_df()
```
Saved dataset can be later retrieved using `get_saved_dataset` method:
+
```python
dataset = store.get_saved_dataset('my_training_dataset')
dataset.to_df()
```
----
+***
-Check out our [tutorial on validating historical features](../../tutorials/validating-historical-features.md) to see how this concept can be applied in real-world use case.
\ No newline at end of file
+Check out our [tutorial on validating historical features](../../tutorials/validating-historical-features.md) to see how this concept can be applied in real-world use case.
diff --git a/docs/getting-started/concepts/entity.md b/docs/getting-started/concepts/entity.md
index 77cfc0aff2..1ea3037ef2 100644
--- a/docs/getting-started/concepts/entity.md
+++ b/docs/getting-started/concepts/entity.md
@@ -3,20 +3,41 @@
An entity is a collection of semantically related features. Users define entities to map to the domain of their use case. For example, a ride-hailing service could have customers and drivers as their entities, which group related features that correspond to these customers and drivers.
```python
-driver = Entity(name='driver', value_type=ValueType.STRING, join_keys=['driver_id'])
+driver = Entity(name='driver', join_keys=['driver_id'])
```
-Entities are typically defined as part of feature views. Entity name is used to reference the entity from a feature view definition and join key is used to identify the physical primary key on which feature values should be stored and retrieved. These keys are used during the lookup of feature values from the online store and the join process in point-in-time joins. It is possible to define composite entities \(more than one entity object\) in a feature view. It is also possible for feature views to have zero entities. See [feature view](feature-view.md) for more details.
+The _entity name_ is used to uniquely identify the entity (for example to show in the experimental Web UI). The _join key_ is used to identify the physical primary key on which feature values should be stored and retrieved.
-Entities should be reused across feature views.
+Entities are used by Feast in many contexts, as we explore below:
-## **Entity key**
+### Use case #1: Defining and storing features
-A related concept is an entity key. These are one or more entity values that uniquely describe a feature view record. In the case of an entity \(like a `driver`\) that only has a single entity field, the entity _is_ an entity key. However, it is also possible for an entity key to consist of multiple entity values. For example, a feature view with the composite entity of \(customer, country\) might have an entity key of \(1001, 5\).
+Feast's primary object for defining features is a _feature view,_ which is a collection of features. Feature views map to 0 or more entities, since a feature can be associated with:
-
+* zero entities (e.g. a global feature like _num\_daily\_global\_transactions_)
+* one entity (e.g. a user feature like _user\_age_ or _last\_5\_bought\_items_)
+* multiple entities, aka a composite key (e.g. a user + merchant category feature like _num\_user\_purchases\_in\_merchant\_category)_
-Entity keys act as primary keys. They are used during the lookup of features from the online store, and they are also used to match feature rows across feature views during point-in-time joins.
+Feast refers to this collection of entities for a feature view as an **entity key**.
+.png>)
+Entities should be reused across feature views. This helps with discovery of features, since it enables data scientists understand how other teams build features for the entity they are most interested in.
+Feast will use the feature view concept to then define how to store groups of features in a low-latency online store.
+
+### Use case #2: Retrieving features
+
+At _training time_, users control what entities they want to look up, for example corresponding to train / test / validate splits. A user specifies a list of _entity keys + timestamps_ they want to fetch point-in-time correct features for to generate a training dataset.
+
+At _serving time_, users specify _entity key(s)_ to fetch the latest feature values for to power a real-time model prediction (e.g. a fraud detection model that needs to fetch the transaction user's features).
+
+{% hint style="info" %}
+**Q: Can I retrieve features for all entities?**
+
+Kind of.
+
+In practice, this is most relevant for _batch scoring models_ (e.g. predict user churn for all existing users) that are offline only. For these use cases, Feast supports generating features for a SQL-backed list of entities. There is an [open GitHub issue](https://github.com/feast-dev/feast/issues/1611) that welcomes contribution to make this a more intuitive API.
+
+For _real-time feature retrieval_, there is no out of the box support for this because it would promote expensive and slow scan operations. Users can still pass in a large list of entities for retrieval, but this does not scale well.
+{% endhint %}
diff --git a/docs/getting-started/concepts/feast-types.md b/docs/getting-started/concepts/feast-types.md
new file mode 100644
index 0000000000..72741f263e
--- /dev/null
+++ b/docs/getting-started/concepts/feast-types.md
@@ -0,0 +1,14 @@
+# Data Types in Feast
+
+Feast frequently has to mediate data across platforms and systems, each with its own unique type system.
+To make this possible, Feast itself has a type system for all the types it is able to handle natively.
+
+Feast's type system is built on top of [protobuf](https://github.com/protocolbuffers/protobuf). The messages that make up the type system can be found [here](https://github.com/feast-dev/feast/blob/master/protos/feast/types/Value.proto), and the corresponding python classes that wrap them can be found [here](https://github.com/feast-dev/feast/blob/master/sdk/python/feast/types.py).
+
+Feast supports primitive data types (numerical values, strings, bytes, booleans and timestamps). The only complex data type Feast supports is Arrays, and arrays cannot contain other arrays.
+
+Each feature or schema field in Feast is associated with a data type, which is stored in Feast's [registry](registry.md). These types are also used to ensure that Feast operates on values correctly (e.g. making sure that timestamp columns used for [point-in-time correct joins](point-in-time-joins.md) actually have the timestamp type).
+
+As a result, each system that feast interacts with needs a way to translate data types from the native platform, into a feast type. E.g., Snowflake SQL types are converted to Feast types [here](https://rtd.feast.dev/en/master/feast.html#feast.type_map.snowflake_python_type_to_feast_value_type). The onus is therefore on authors of offline or online store connectors to make sure that this type mapping happens correctly.
+
+**Note**: Feast currently does *not* support a null type in its type system.
\ No newline at end of file
diff --git a/docs/getting-started/concepts/feature-repo.md b/docs/getting-started/concepts/feature-repo.md
new file mode 100644
index 0000000000..0316019bea
--- /dev/null
+++ b/docs/getting-started/concepts/feature-repo.md
@@ -0,0 +1,13 @@
+# Feature Repository
+
+## Feature Repo
+
+A feature repository is the collection of python files that define entities, feature views and data sources. Feature Repos also have a `feature_store.yaml` file at their root.
+
+Users can collaborate by making and reviewing changes to Feast object definitions (feature views, entities, etc) in the feature repo.
+But, these objects must be applied, either through API, or the CLI, for them to be available by downstream Feast actions (such as materialization, or retrieving online features). Internally, Feast only looks at the registry when performing these actions, and not at the feature repo directly.
+
+## Declarative Feature Definitions
+
+When using the CLI to apply changes (via `feast apply`), the CLI determines the state of the feature repo from the source files and updates the registry state to reflect the definitions in the feature repo files.
+This means that new feature views are added to the registry, existing feature views are updated as necessary, and Feast objects removed from the source files are deleted from the registry.
\ No newline at end of file
diff --git a/docs/getting-started/concepts/feature-retrieval.md b/docs/getting-started/concepts/feature-retrieval.md
index 85b7d9c5b7..01dfe96344 100644
--- a/docs/getting-started/concepts/feature-retrieval.md
+++ b/docs/getting-started/concepts/feature-retrieval.md
@@ -1,14 +1,152 @@
# Feature retrieval
-## Dataset
+## Overview
-A dataset is a collection of rows that is produced by a historical retrieval from Feast in order to train a model. A dataset is produced by a join from one or more feature views onto an entity dataframe. Therefore, a dataset may consist of features from multiple feature views.
+Generally, Feast supports several patterns of feature retrieval:
-**Dataset vs Feature View:** Feature views contain the schema of data and a reference to where data can be found \(through its data source\). Datasets are the actual data manifestation of querying those data sources.
+1. Training data generation (via `feature_store.get_historical_features(...)`)
+2. Offline feature retrieval for batch scoring (via `feature_store.get_historical_features(...)`)
+3. Online feature retrieval for real-time model predictions (via `feature_store.get_online_features(...)`)
-**Dataset vs Data Source:** Datasets are the output of historical retrieval, whereas data sources are the inputs. One or more data sources can be used in the creation of a dataset.
+Each of these retrieval mechanisms accept:
+
+* some way of specifying entities (to fetch features for)
+* some way to specify the features to fetch (either via [feature services](feature-retrieval.md#feature-services), which group features needed for a model version, or [feature references](feature-retrieval.md#feature-references))
+
+
+
+How to: generate training data
+
+Feast abstracts away point-in-time join complexities with the `get_historical_features` API.
+
+It expects an **entity dataframe (or SQL query)** and a **list of feature references (or feature service)**
+
+#### **Option 1: using feature references (to pick individual features when exploring data)**
+
+```python
+entity_df = pd.DataFrame.from_dict(
+ {
+ "driver_id": [1001, 1002, 1003, 1004, 1001],
+ "event_timestamp": [
+ datetime(2021, 4, 12, 10, 59, 42),
+ datetime(2021, 4, 12, 8, 12, 10),
+ datetime(2021, 4, 12, 16, 40, 26),
+ datetime(2021, 4, 12, 15, 1, 12),
+ datetime.now()
+ ]
+ }
+)
+training_df = store.get_historical_features(
+ entity_df=entity_df,
+ features=[
+ "driver_hourly_stats:conv_rate",
+ "driver_hourly_stats:acc_rate",
+ "driver_daily_features:daily_miles_driven"
+ ],
+).to_df()
+print(training_df.head())
+```
+
+#### Option 2: using feature services (to version models)
+
+```python
+entity_df = pd.DataFrame.from_dict(
+ {
+ "driver_id": [1001, 1002, 1003, 1004, 1001],
+ "event_timestamp": [
+ datetime(2021, 4, 12, 10, 59, 42),
+ datetime(2021, 4, 12, 8, 12, 10),
+ datetime(2021, 4, 12, 16, 40, 26),
+ datetime(2021, 4, 12, 15, 1, 12),
+ datetime.now()
+ ]
+ }
+)
+training_df = store.get_historical_features(
+ entity_df=entity_df,
+ features=store.get_feature_service("model_v1"),
+).to_df()
+print(training_df.head())
+```
+
+
+
+
+
+How to: retrieve offline features for batch scoring
+
+The main difference here from training data generation is how to handle timestamps in the entity dataframe. You want to pass in the **current time** to get the latest feature values for all your entities.
+
+#### Option 1: fetching features with entity dataframe
+
+```python
+from feast import FeatureStore
+import pandas as pd
+
+store = FeatureStore(repo_path=".")
+
+# Get the latest feature values for unique entities
+entity_df = pd.DataFrame.from_dict({"driver_id": [1001, 1002, 1003, 1004, 1005],})
+entity_df["event_timestamp"] = pd.to_datetime("now", utc=True)
+batch_scoring_features = store.get_historical_features(
+ entity_df=entity_df, features=store.get_feature_service("model_v2"),
+).to_df()
+# predictions = model.predict(batch_scoring_features)
+```
+
+#### Option 2: fetching features using a SQL query to generate entities
+
+```python
+from feast import FeatureStore
+import pandas as pd
+
+store = FeatureStore(repo_path=".")
+
+# Get the latest feature values for unique entities
+batch_scoring_features = store.get_historical_features(
+ entity_df="""
+ SELECT
+ user_id,
+ CURRENT_TIME() as event_timestamp
+ FROM entity_source_table
+ WHERE user_last_active_time BETWEEN '2019-01-01' and '2020-12-31'
+ GROUP BY user_id
+ """
+ ,
+ features=store.get_feature_service("model_v2"),
+).to_df()
+# predictions = model.predict(batch_scoring_features)
+```
+
+
+
+
+
+How to: retrieve online features for real-time model inference
+
+Feast will ensure the latest feature values for registered features are available. At retrieval time, you need to supply a list of **entities** and the corresponding **features** to be retrieved. Similar to `get_historical_features`, we recommend using feature services as a mechanism for grouping features in a model version.
+
+_Note: unlike `get_historical_features`, the `entity_rows` **do not need timestamps** since you only want one feature value per entity key._
+
+```python
+features = store.get_online_features(
+ features=[
+ "driver_hourly_stats:conv_rate",
+ "driver_hourly_stats:acc_rate",
+ "driver_daily_features:daily_miles_driven",
+ ],
+ entity_rows=[
+ {
+ "driver_id": 1001,
+ }
+ ],
+).to_dict()
+```
+
+
## Feature Services
+
A feature service is an object that represents a logical group of features from one or more [feature views](feature-view.md#feature-view). Feature Services allows features from within a feature view to be used as needed by an ML model. Users can expect to create one feature service per model version, allowing for tracking of the features used by models.
{% tabs %}
@@ -38,6 +176,7 @@ Applying a feature service does not result in an actual service being deployed.
Feature services enable referencing all or some features from a feature view.
Retrieving from the online store with a feature service
+
```python
from feast import FeatureStore
feature_store = FeatureStore('.') # Initialize the feature store
@@ -49,6 +188,7 @@ features = feature_store.get_online_features(
```
Retrieving from the offline store with a feature service
+
```python
from feast import FeatureStore
feature_store = FeatureStore('.') # Initialize the feature store
@@ -78,7 +218,7 @@ online_features = fs.get_online_features(
)
```
-It is possible to retrieve features from multiple feature views with a single request, and Feast is able to join features from multiple tables in order to build a training dataset. However, It is not possible to reference \(or retrieve\) features from multiple projects at the same time.
+It is possible to retrieve features from multiple feature views with a single request, and Feast is able to join features from multiple tables in order to build a training dataset. However, it is not possible to reference (or retrieve) features from multiple projects at the same time.
{% hint style="info" %}
Note, if you're using [Feature views without entities](feature-view.md#feature-views-without-entities), then those features can be added here without additional entity values in the `entity_rows`
@@ -90,3 +230,10 @@ The timestamp on which an event occurred, as found in a feature view's data sour
Event timestamps are used during point-in-time joins to ensure that the latest feature values are joined from feature views onto entity rows. Event timestamps are also used to ensure that old feature values aren't served to models during online serving.
+## Dataset
+
+A dataset is a collection of rows that is produced by a historical retrieval from Feast in order to train a model. A dataset is produced by a join from one or more feature views onto an entity dataframe. Therefore, a dataset may consist of features from multiple feature views.
+
+**Dataset vs Feature View:** Feature views contain the schema of data and a reference to where data can be found (through its data source). Datasets are the actual data manifestation of querying those data sources.
+
+**Dataset vs Data Source:** Datasets are the output of historical retrieval, whereas data sources are the inputs. One or more data sources can be used in the creation of a dataset.
diff --git a/docs/getting-started/concepts/feature-view.md b/docs/getting-started/concepts/feature-view.md
index d0b8004828..dbfa89c086 100644
--- a/docs/getting-started/concepts/feature-view.md
+++ b/docs/getting-started/concepts/feature-view.md
@@ -2,7 +2,23 @@
## Feature views
-A feature view is an object that represents a logical group of time-series feature data as it is found in a [data source](data-source.md). Feature views consist of zero or more [entities](entity.md), one or more [features](feature-view.md#feature), and a [data source](data-source.md). Feature views allow Feast to model your existing feature data in a consistent way in both an offline (training) and online (serving) environment. Feature views generally contain features that are properties of a specific object, in which case that object is defined as an entity and included in the feature view. If the features are not related to a specific object, the feature view might not have entities; see [feature views without entities](feature-view.md#feature-views-without-entities) below.
+{% hint style="warning" %}
+**Note**: feature views do not work with non-timestamped data. A workaround is to insert dummy timestamps
+{% endhint %}
+
+A feature view is an object that represents a logical group of time-series feature data as it is found in a [data source](data-ingestion.md). Depending on the kind of feature view, it may contain some lightweight (experimental) feature transformations (see [\[Alpha\] On demand feature views](feature-view.md#alpha-on-demand-feature-views)).
+
+Feature views consist of:
+
+* a [data source](data-ingestion.md)
+* zero or more [entities](entity.md)
+ * If the features are not related to a specific object, the feature view might not have entities; see [feature views without entities](feature-view.md#feature-views-without-entities) below.
+* a name to uniquely identify this feature view in the project.
+* (optional, but recommended) a schema specifying one or more [features](feature-view.md#feature) (without this, Feast will infer the schema by reading from the data source)
+* (optional, but recommended) metadata (for example, description, or other free-form metadata via `tags`)
+* (optional) a TTL, which limits how far back Feast will look when generating historical datasets
+
+Feature views allow Feast to model your existing feature data in a consistent way in both an offline (training) and online (serving) environment. Feature views generally contain features that are properties of a specific object, in which case that object is defined as an entity and included in the feature view.
{% tabs %}
{% tab title="driver_trips_feature_view.py" %}
@@ -31,10 +47,6 @@ Feature views are used during
* Loading of feature values into an online store. Feature views determine the storage schema in the online store. Feature values can be loaded from batch sources or from [stream sources](../../reference/data-sources/push.md).
* Retrieval of features from the online store. Feature views provide the schema definition to Feast in order to look up features from the online store.
-{% hint style="info" %}
-Feast does not generate feature values. It acts as the ingestion and serving system. The data sources described within feature views should reference feature values in their already computed form.
-{% endhint %}
-
## Feature views without entities
If a feature view contains features that are not related to a specific entity, the feature view can be defined without entities (only event timestamps are needed for this feature view).
@@ -61,9 +73,7 @@ global_stats_fv = FeatureView(
## Feature inferencing
-If the `features` parameter is not specified in the feature view creation, Feast will infer the features during `feast apply` by creating a feature for each column in the
-underlying data source except the columns corresponding to the entities of the feature view or the columns corresponding to the timestamp columns of the feature view's
-data source. The names and value types of the inferred features will use the names and data types of the columns from which the features were inferred.
+If the `features` parameter is not specified in the feature view creation, Feast will infer the features during `feast apply` by creating a feature for each column in the underlying data source except the columns corresponding to the entities of the feature view or the columns corresponding to the timestamp columns of the feature view's data source. The names and value types of the inferred features will use the names and data types of the columns from which the features were inferred.
## Entity aliasing
@@ -133,16 +143,32 @@ trips_today = Field(
)
```
-Together with [data sources](data-source.md), they indicate to Feast where to find your feature values, e.g., in a specific parquet file or BigQuery table. Feature definitions are also used when reading features from the feature store, using [feature references](feature-retrieval.md#feature-references).
+Together with [data sources](data-ingestion.md), they indicate to Feast where to find your feature values, e.g., in a specific parquet file or BigQuery table. Feature definitions are also used when reading features from the feature store, using [feature references](feature-retrieval.md#feature-references).
Feature names must be unique within a [feature view](feature-view.md#feature-view).
+Each field can have additional metadata associated with it, specified as key-value [tags](https://rtd.feast.dev/en/master/feast.html#feast.field.Field).
+
## \[Alpha] On demand feature views
-On demand feature views allows users to use existing features and request time data (features only available at request time) to transform and create new features. Users define python transformation logic which is executed in both historical retrieval and online retrieval paths:
+On demand feature views allows data scientists to use existing features and request time data (features only available at request time) to transform and create new features. Users define python transformation logic which is executed in both historical retrieval and online retrieval paths.
+
+Currently, these transformations are executed locally. This is fine for online serving, but does not scale well offline.
+
+### Why use on demand feature views?
+
+This enables data scientists to easily impact the online feature retrieval path. For example, a data scientist could
+
+1. Call `get_historical_features` to generate a training dataframe
+2. Iterate in notebook on feature engineering in Pandas
+3. Copy transformation logic into on demand feature views and commit to a dev branch of the feature repository
+4. Verify with `get_historical_features` (on a small dataset) that the transformation gives expected output over historical data
+5. Verify with `get_online_features` on dev branch that the transformation correctly outputs online features
+6. Submit a pull request to the staging / prod branches which impact production traffic
```python
from feast import Field, RequestSource
+from feast.on_demand_feature_view import on_demand_feature_view
from feast.types import Float64
# Define a request data source which encodes features / information only
@@ -172,3 +198,58 @@ def transformed_conv_rate(features_df: pd.DataFrame) -> pd.DataFrame:
df['conv_rate_plus_val2'] = (features_df['conv_rate'] + features_df['val_to_add_2'])
return df
```
+
+## \[Alpha] Stream feature views
+
+A stream feature view is an extension of a normal feature view. The primary difference is that stream feature views have both stream and batch data sources, whereas a normal feature view only has a batch data source.
+
+Stream feature views should be used instead of normal feature views when there are stream data sources (e.g. Kafka and Kinesis) available to provide fresh features in an online setting. Here is an example definition of a stream feature view with an attached transformation:
+
+```python
+from datetime import timedelta
+
+from feast import Field, FileSource, KafkaSource, stream_feature_view
+from feast.data_format import JsonFormat
+from feast.types import Float32
+
+driver_stats_batch_source = FileSource(
+ name="driver_stats_source",
+ path="data/driver_stats.parquet",
+ timestamp_field="event_timestamp",
+)
+
+driver_stats_stream_source = KafkaSource(
+ name="driver_stats_stream",
+ kafka_bootstrap_servers="localhost:9092",
+ topic="drivers",
+ timestamp_field="event_timestamp",
+ batch_source=driver_stats_batch_source,
+ message_format=JsonFormat(
+ schema_json="driver_id integer, event_timestamp timestamp, conv_rate double, acc_rate double, created timestamp"
+ ),
+ watermark_delay_threshold=timedelta(minutes=5),
+)
+
+@stream_feature_view(
+ entities=[driver],
+ ttl=timedelta(seconds=8640000000),
+ mode="spark",
+ schema=[
+ Field(name="conv_percentage", dtype=Float32),
+ Field(name="acc_percentage", dtype=Float32),
+ ],
+ timestamp_field="event_timestamp",
+ online=True,
+ source=driver_stats_stream_source,
+)
+def driver_hourly_stats_stream(df: DataFrame):
+ from pyspark.sql.functions import col
+
+ return (
+ df.withColumn("conv_percentage", col("conv_rate") * 100.0)
+ .withColumn("acc_percentage", col("acc_rate") * 100.0)
+ .drop("conv_rate", "acc_rate")
+ )
+```
+
+See [here](https://github.com/feast-dev/streaming-tutorial) for a example of how to use stream feature views.
diff --git a/docs/getting-started/concepts/overview.md b/docs/getting-started/concepts/overview.md
index 7134073792..ffbad86c03 100644
--- a/docs/getting-started/concepts/overview.md
+++ b/docs/getting-started/concepts/overview.md
@@ -1,14 +1,29 @@
# Overview
-The top-level namespace within Feast is a [project](overview.md#project). Users define one or more [feature views](feature-view.md) within a project. Each feature view contains one or more [features](feature-view.md#feature). These features typically relate to one or more [entities](entity.md). A feature view must always have a [data source](data-source.md), which in turn is used during the generation of training [datasets](feature-retrieval.md#dataset) and when materializing feature values into the online store.
+### Feast project structure
-
+The top-level namespace within Feast is a **project**. Users define one or more [feature views](feature-view.md) within a project. Each feature view contains one or more [features](feature-view.md#feature). These features typically relate to one or more [entities](entity.md). A feature view must always have a [data source](data-ingestion.md), which in turn is used during the generation of training [datasets](feature-retrieval.md#dataset) and when materializing feature values into the online store.
-## Project
+.png>)
-Projects provide complete isolation of feature stores at the infrastructure level. This is accomplished through resource namespacing, e.g., prefixing table names with the associated project. Each project should be considered a completely separate universe of entities and features. It is not possible to retrieve features from multiple projects in a single request. We recommend having a single feature store and a single project per environment \(`dev`, `staging`, `prod`\).
+**Projects** provide complete isolation of feature stores at the infrastructure level. This is accomplished through resource namespacing, e.g., prefixing table names with the associated project. Each project should be considered a completely separate universe of entities and features. It is not possible to retrieve features from multiple projects in a single request. We recommend having a single feature store and a single project per environment (`dev`, `staging`, `prod`).
-{% hint style="info" %}
-Projects are currently being supported for backward compatibility reasons. Projects may change in the future as we simplify the Feast API.
-{% endhint %}
+### Data ingestion
+For _offline use cases_ that only rely on batch data, Feast does not need to ingest data and can query your existing data (leveraging a compute engine, whether it be a data warehouse or (experimental) Spark / Trino). Feast can help manage **pushing** streaming features to a batch source to make features available for training.
+
+For _online use cases_, Feast supports **ingesting** features from batch sources to make them available online (through a process called **materialization**), and **pushing** streaming features to make them available both offline / online. We explore this more in the next concept page ([Data ingestion](data-ingestion.md))
+
+### Feature registration and retrieval
+
+Features are _registered_ as code in a version controlled repository, and tie to data sources + model versions via the concepts of **entities, feature views,** and **feature services.** We explore these concepts more in the upcoming concept pages. These features are then _stored_ in a **registry**, which can be accessed across users and services. The features can then be _retrieved_ via SDK API methods or via a deployed **feature server** which exposes endpoints to query for online features (to power real time models).
+
+
+
+Feast supports several patterns of feature retrieval.
+
+| Use case | Example | API |
+| :------------------------------------------------------: | :----------------------------------------------------------------------------------------------------: | :-----------------------: |
+| Training data generation | Fetching user and item features for (user, item) pairs when training a production recommendation model | `get_historical_features` |
+| Offline feature retrieval for batch predictions | Predicting user churn for all users on a daily basis | `get_historical_features` |
+| Online feature retrieval for real-time model predictions | Fetching pre-computed features to predict whether a real-time credit card transaction is fraudulent | `get_online_features` |
diff --git a/docs/getting-started/concepts/registry.md b/docs/getting-started/concepts/registry.md
index 2236f31931..4e85cb6e3c 100644
--- a/docs/getting-started/concepts/registry.md
+++ b/docs/getting-started/concepts/registry.md
@@ -1,9 +1,49 @@
# Registry
-The Feast registry is where all applied Feast objects (e.g. Feature views, entities, etc) are stored. The registry exposes methods to apply, list, retrieve and delete these objects. The registry is abstraction, with multiple possible implementations.
+Feast uses a registry to store all applied Feast objects (e.g. Feature views, entities, etc). The registry exposes methods to apply, list, retrieve and delete these objects, and is an abstraction with multiple implementations.
+
+### Options for registry implementations
By default, the registry Feast uses a file-based registry implementation, which stores the protobuf representation of the registry as a serialized file. This registry file can be stored in a local file system, or in cloud storage (in, say, S3 or GCS).
However, there's inherent limitations with a file-based registry, since changing a single field in the registry requires re-writing the whole registry file. With multiple concurrent writers, this presents a risk of data loss, or bottlenecks writes to the registry since all changes have to be serialized (e.g. when running materialization for multiple feature views or time ranges concurrently).
-Alternatively, a [SQL Registry](../../tutorials/using-scalable-registry.md) can be used for a more scalable registry.
\ No newline at end of file
+Alternatively, a [SQL Registry](../../tutorials/using-scalable-registry.md) can be used for a more scalable registry.
+
+### Updating the registry
+
+We recommend users store their Feast feature definitions in a version controlled repository, which then via CI/CD automatically stays synced with the registry. Users will often also want multiple registries to correspond to different environments (e.g. dev vs staging vs prod), with staging and production registries with locked down write access since they can impact real user traffic. See [Running Feast in Production](../../how-to-guides/running-feast-in-production.md#1.-automatically-deploying-changes-to-your-feature-definitions) for details on how to set this up.
+
+### Accessing the registry from clients
+
+Users can specify the registry through a `feature_store.yaml` config file, or programmatically. We often see teams preferring the programmatic approach because it makes notebook driven development very easy:
+
+#### Option 1: programmatically specifying the registry
+
+```python
+repo_config = RepoConfig(
+ registry=RegistryConfig(path="gs://feast-test-gcs-bucket/registry.pb"),
+ project="feast_demo_gcp",
+ provider="gcp",thon
+ offline_store="file", # Could also be the OfflineStoreConfig e.g. FileOfflineStoreConfig
+ online_store="null", # Could also be the OnlineStoreConfig e.g. RedisOnlineStoreConfig
+)
+store = FeatureStore(config=repo_config)
+```
+
+#### Option 2: specifying the registry in the project's `feature_store.yaml` file
+
+```yaml
+project: feast_demo_aws
+provider: aws
+registry: s3://feast-test-s3-bucket/registry.pb
+online_store: null
+offline_store:
+ type: file
+```
+
+Instantiating a `FeatureStore` object can then point to this:
+
+```python
+store = FeatureStore(repo_path=".")
+```
diff --git a/docs/getting-started/concepts/stream-feature-view.md b/docs/getting-started/concepts/stream-feature-view.md
deleted file mode 100644
index 2ce3993614..0000000000
--- a/docs/getting-started/concepts/stream-feature-view.md
+++ /dev/null
@@ -1,56 +0,0 @@
-# Stream feature view
-
-## Stream feature views
-
-A stream feature view is an extension of a normal feature view. The primary difference is that stream feature views have both stream and batch data sources, whereas a normal feature view only has a batch data source.
-
-Stream feature views should be used instead of normal feature views when there are stream data sources (e.g. Kafka and Kinesis) available to provide fresh features in an online setting. Here is an example definition of a stream feature view with an attached transformation:
-
-```python
-from datetime import timedelta
-
-from feast import Field, FileSource, KafkaSource, stream_feature_view
-from feast.data_format import JsonFormat
-from feast.types import Float32
-
-driver_stats_batch_source = FileSource(
- name="driver_stats_source",
- path="data/driver_stats.parquet",
- timestamp_field="event_timestamp",
-)
-
-driver_stats_stream_source = KafkaSource(
- name="driver_stats_stream",
- kafka_bootstrap_servers="localhost:9092",
- topic="drivers",
- timestamp_field="event_timestamp",
- batch_source=driver_stats_batch_source,
- message_format=JsonFormat(
- schema_json="driver_id integer, event_timestamp timestamp, conv_rate double, acc_rate double, created timestamp"
- ),
- watermark_delay_threshold=timedelta(minutes=5),
-)
-
-@stream_feature_view(
- entities=[driver],
- ttl=timedelta(seconds=8640000000),
- mode="spark",
- schema=[
- Field(name="conv_percentage", dtype=Float32),
- Field(name="acc_percentage", dtype=Float32),
- ],
- timestamp_field="event_timestamp",
- online=True,
- source=driver_stats_stream_source,
-)
-def driver_hourly_stats_stream(df: DataFrame):
- from pyspark.sql.functions import col
-
- return (
- df.withColumn("conv_percentage", col("conv_rate") * 100.0)
- .withColumn("acc_percentage", col("acc_rate") * 100.0)
- .drop("conv_rate", "acc_rate")
- )
-```
-
-See [here](https://github.com/feast-dev/streaming-tutorial) for a example of how to use stream feature views.
diff --git a/docs/getting-started/faq.md b/docs/getting-started/faq.md
index b2438fdf7a..a511ddb0dc 100644
--- a/docs/getting-started/faq.md
+++ b/docs/getting-started/faq.md
@@ -10,7 +10,7 @@ We encourage you to ask questions on [Slack](https://slack.feast.dev) or [GitHub
### Do you have any examples of how Feast should be used?
-The [quickstart](quickstart.md) is the easiest way to learn about Feast. For more detailed tutorials, please check out the [tutorials](../tutorials/tutorials-overview.md) page.
+The [quickstart](quickstart.md) is the easiest way to learn about Feast. For more detailed tutorials, please check out the [tutorials](../tutorials/tutorials-overview/) page.
## Concepts
@@ -19,13 +19,14 @@ The [quickstart](quickstart.md) is the easiest way to learn about Feast. For mor
No, there are [feature views without entities](concepts/feature-view.md#feature-views-without-entities).
### How does Feast handle model or feature versioning?
-Feast expects that each version of a model corresponds to a different feature service.
-Feature views once they are used by a feature service are intended to be immutable and not deleted (until a feature service is removed). In the future, `feast plan` and `feast apply will throw errors if it sees this kind of behavior.
+Feast expects that each version of a model corresponds to a different feature service.
+
+Feature views once they are used by a feature service are intended to be immutable and not deleted (until a feature service is removed). In the future, `feast plan` and `feast apply` will throw errors if it sees this kind of behavior.
### What is the difference between data sources and the offline store?
-The data source itself defines the underlying data warehouse table in which the features are stored. The offline store interface defines the APIs required to make an arbitrary compute layer work for Feast (e.g. pulling features given a set of feature views from their sources, exporting the data set results to different formats). Please see [data sources](concepts/data-source.md) and [offline store](architecture-and-components/offline-store.md) for more details.
+The data source itself defines the underlying data warehouse table in which the features are stored. The offline store interface defines the APIs required to make an arbitrary compute layer work for Feast (e.g. pulling features given a set of feature views from their sources, exporting the data set results to different formats). Please see [data sources](concepts/data-ingestion.md) and [offline store](architecture-and-components/offline-store.md) for more details.
### Is it possible to have offline and online stores from different providers?
@@ -34,6 +35,7 @@ Yes, this is possible. For example, you can use BigQuery as an offline store and
## Functionality
### How do I run `get_historical_features` without providing an entity dataframe?
+
Feast does not provide a way to do this right now. This is an area we're actively interested in contributions for. See [GitHub issue](https://github.com/feast-dev/feast/issues/1611)
### Does Feast provide security or access control?
@@ -44,19 +46,21 @@ It is a good idea though to lock down the registry file so only the CI/CD pipeli
### Does Feast support streaming sources?
-Yes. In earlier versions of Feast, we used Feast Spark to manage ingestion from stream sources. In the current version of Feast, we support [push based ingestion](../reference/data-sources/push.md). Streaming transformations are actively being worked on.
+Yes. In earlier versions of Feast, we used Feast Spark to manage ingestion from stream sources. In the current version of Feast, we support [push based ingestion](../reference/data-sources/push.md). Feast also defines a [stream processor](../tutorials/building-streaming-features.md) that allows a deeper integration with stream sources.
### Does Feast support feature transformation?
There are several kinds of transformations:
-- On demand transformations (See [docs](../reference/alpha-on-demand-feature-view.md))
- - These transformations are Pandas transformations run on batch data when you call `get_historical_features` and at online serving time when you call `get_online_features.
- - Note that if you use push sources to ingest streaming features, these transformations will execute on the fly as well
-- Batch transformations (WIP, see [RFC](https://docs.google.com/document/d/1964OkzuBljifDvkV-0fakp2uaijnVzdwWNGdz7Vz50A/edit#))
- - These will include SQL + PySpark based transformations on batch data sources.
-- Streaming transformations (RFC in progress)
+
+* On demand transformations (See [docs](../reference/alpha-on-demand-feature-view.md))
+ * These transformations are Pandas transformations run on batch data when you call `get_historical_features` and at online serving time when you call \`get\_online\_features.
+ * Note that if you use push sources to ingest streaming features, these transformations will execute on the fly as well
+* Batch transformations (WIP, see [RFC](https://docs.google.com/document/d/1964OkzuBljifDvkV-0fakp2uaijnVzdwWNGdz7Vz50A/edit))
+ * These will include SQL + PySpark based transformations on batch data sources.
+* Streaming transformations (RFC in progress)
### Does Feast have a Web UI?
+
Yes. See [documentation](../reference/alpha-web-ui.md).
### Does Feast support composite keys?
@@ -84,15 +88,26 @@ Yes. Specifically:
### Does Feast support X storage engine?
-The list of supported offline and online stores can be found [here](../reference/offline-stores/) and [here](../reference/online-stores/), respectively. The [roadmap](../roadmap.md) indicates the stores for which we are planning to add support. Finally, our Provider abstraction is built to be extensible, so you can plug in your own implementations of offline and online stores. Please see more details about custom providers [here](../how-to-guides/creating-a-custom-provider.md).
+The list of supported offline and online stores can be found [here](../reference/offline-stores/) and [here](../reference/online-stores/), respectively. The [roadmap](../roadmap.md) indicates the stores for which we are planning to add support. Finally, our Provider abstraction is built to be extensible, so you can plug in your own implementations of offline and online stores. Please see more details about customizing Feast [here](../how-to-guides/customizing-feast/).
### Does Feast support using different clouds for offline vs online stores?
Yes. Using a GCP or AWS provider in `feature_store.yaml` primarily sets default offline / online stores and configures where the remote registry file can live (Using the AWS provider also allows for deployment to AWS Lambda). You can override the offline and online stores to be in different clouds if you wish.
+### What is the difference between a data source and an offline store?
+
+The data source and the offline store are closely tied, but separate concepts.
+The offline store controls how feast talks to a data store for historical feature retrieval, and the data source points to specific table (or query) within a data store. Offline stores are infrastructure-level connectors to data stores like Snowflake.
+
+Additional differences:
+
+- Data sources may be specific to a project (e.g. feed ranking), but offline stores are agnostic and used across projects.
+- A feast project may define several data sources that power different feature views, but a feast project has a single offline store.
+- Feast users typically need to define data sources when using feast, but only need to use/configure existing offline stores without creating new ones.
+
### How can I add a custom online store?
-Please follow the instructions [here](../how-to-guides/adding-support-for-a-new-online-store.md).
+Please follow the instructions [here](../how-to-guides/customizing-feast/adding-support-for-a-new-online-store.md).
### Can the same storage engine be used for both the offline and online store?
@@ -105,10 +120,6 @@ Yes. There are two ways to use S3 in Feast:
* Using Redshift as a data source via Spectrum ([AWS tutorial](https://docs.aws.amazon.com/redshift/latest/dg/tutorial-nested-data-create-table.html)), and then continuing with the [Running Feast with Snowflake/GCP/AWS](../how-to-guides/feast-snowflake-gcp-aws/) guide. See a [presentation](https://youtu.be/pMFbRJ7AnBk?t=9463) we did on this at our apply() meetup.
* Using the `s3_endpoint_override` in a `FileSource` data source. This endpoint is more suitable for quick proof of concepts that won't necessarily scale for production use cases.
-### How can I use Spark with Feast?
-
-Feast supports ingestion via Spark (See ) does not support Spark natively. However, you can create a [custom provider](../how-to-guides/creating-a-custom-provider.md) that will support Spark, which can help with more scalable materialization and ingestion.
-
### Is Feast planning on supporting X functionality?
Please see the [roadmap](../roadmap.md).
@@ -119,7 +130,6 @@ Please see the [roadmap](../roadmap.md).
For more details on contributing to the Feast community, see [here](../community.md) and this [here](../project/contributing.md).
-
## Feast 0.9 (legacy)
### What is the difference between Feast 0.9 and Feast 0.10+?
@@ -130,7 +140,6 @@ Feast 0.10+ is much lighter weight and more extensible than Feast 0.9. It is des
Please see this [document](https://docs.google.com/document/d/1AOsr\_baczuARjCpmZgVd8mCqTF4AZ49OEyU4Cn-uTT0). If you have any questions or suggestions, feel free to leave a comment on the document!
-
### What are the plans for Feast Core, Feast Serving, and Feast Spark?
-Feast Core and Feast Serving were both part of Feast Java. We plan to support Feast Serving. We will not support Feast Core; instead we will support our object store based registry. We will not support Feast Spark. For more details on what we plan on supporting, please see the [roadmap](../roadmap.md).
\ No newline at end of file
+Feast Core and Feast Serving were both part of Feast Java. We plan to support Feast Serving. We will not support Feast Core; instead we will support our object store based registry. We will not support Feast Spark. For more details on what we plan on supporting, please see the [roadmap](../roadmap.md).
diff --git a/docs/getting-started/feast-workshop.md b/docs/getting-started/feast-workshop.md
index 8b6778c2d3..0d64845222 100644
--- a/docs/getting-started/feast-workshop.md
+++ b/docs/getting-started/feast-workshop.md
@@ -30,15 +30,15 @@ _See also:_ [_Feast quickstart_](https://docs.feast.dev/getting-started/quicksta
These are meant mostly to be done in order, with examples building on previous concepts.
-See https://github.com/feast-dev/feast-workshop
-
-| Time (min) | Description | Module |
-| :--------: | ----------------------------------------------------------------------- |-----------|
-| 30-45 | Setting up Feast projects & CI/CD + powering batch predictions | Module 0 |
-| 15-20 | Streaming ingestion & online feature retrieval with Kafka, Spark, Redis | Module 1 |
-| 10-15 | Real-time feature engineering with on demand transformations | Module 2 |
-| TBD | Feature server deployment (embed, as a service, AWS Lambda) | TBD |
-| TBD | Versioning features / models in Feast | TBD |
-| TBD | Data quality monitoring in Feast | TBD |
-| TBD | Batch transformations | TBD |
-| TBD | Stream transformations | TBD |
+See [https://github.com/feast-dev/feast-workshop](https://github.com/feast-dev/feast-workshop)
+
+| Time (min) | Description | Module |
+| :--------: | ----------------------------------------------------------------------- | -------- |
+| 30-45 | Setting up Feast projects & CI/CD + powering batch predictions | Module 0 |
+| 15-20 | Streaming ingestion & online feature retrieval with Kafka, Spark, Redis | Module 1 |
+| 10-15 | Real-time feature engineering with on demand transformations | Module 2 |
+| TBD | Feature server deployment (embed, as a service, AWS Lambda) | TBD |
+| TBD | Versioning features / models in Feast | TBD |
+| TBD | Data quality monitoring in Feast | TBD |
+| TBD | Batch transformations | TBD |
+| TBD | Stream transformations | TBD |
diff --git a/docs/getting-started/quickstart.md b/docs/getting-started/quickstart.md
index 7bbcb78732..16bd28d5bf 100644
--- a/docs/getting-started/quickstart.md
+++ b/docs/getting-started/quickstart.md
@@ -13,15 +13,15 @@ You can run this tutorial in Google Colab or run it on your localhost, following
## Overview
-In this tutorial, we use feature stores to generate training data and power online model inference for a ride-sharing driver satisfaction prediction model. Feast solves several common issues in this flow:
+In this tutorial, we use feature stores to generate training data and power online model inference for a ride-sharing driver satisfaction prediction model. Feast solves several common issues in this flow:
1. **Training-serving skew and complex data joins:** Feature values often exist across multiple tables. Joining these datasets can be complicated, slow, and error-prone.
* Feast joins these tables with battle-tested logic that ensures _point-in-time_ correctness so future feature values do not leak to models.
* Feast alerts users to offline / online skew with data quality monitoring
-2. **Online feature availability:** At inference time, models often need access to features that aren't readily available and need to be precomputed from other datasources.
+2. **Online feature availability:** At inference time, models often need access to features that aren't readily available and need to be precomputed from other datasources.
* Feast manages deployment to a variety of online stores (e.g. DynamoDB, Redis, Google Cloud Datastore) and ensures necessary features are consistently _available_ and _freshly computed_ at inference time.
3. **Feature reusability and model versioning:** Different teams within an organization are often unable to reuse features across projects, resulting in duplicate feature creation logic. Models have data dependencies that need to be versioned, for example when running A/B tests on model versions.
- * Feast enables discovery of and collaboration on previously used features and enables versioning of sets of features (via _feature services_).
+ * Feast enables discovery of and collaboration on previously used features and enables versioning of sets of features (via _feature services_).
* Feast enables feature transformation so users can re-use transformation logic across online / offline usecases and across models.
## Step 1: Install Feast
@@ -40,7 +40,7 @@ pip install feast
## Step 2: Create a feature repository
-Bootstrap a new feature repository using `feast init` from the command line.
+Bootstrap a new feature repository using `feast init` from the command line.
{% tabs %}
{% tab title="Bash" %}
@@ -133,9 +133,9 @@ Valid values for `provider` in `feature_store.yaml` are:
* gcp: use BigQuery/Snowflake with Google Cloud Datastore/Redis
* aws: use Redshift/Snowflake with DynamoDB/Redis
-Note that there are many other sources Feast works with, including Azure, Hive, Trino, and PostgreSQL via community plugins. See [Third party integrations](../getting-started/third-party-integrations.md) for all supported datasources.
+Note that there are many other sources Feast works with, including Azure, Hive, Trino, and PostgreSQL via community plugins. See [Third party integrations](third-party-integrations.md) for all supported datasources.
-A custom setup can also be made by following [adding a custom provider](../how-to-guides/creating-a-custom-provider.md).
+A custom setup can also be made by following [adding a custom provider](../how-to-guides/customizing-feast/creating-a-custom-provider.md).
### Inspecting the raw data
@@ -385,8 +385,6 @@ driver_stats_fs = FeatureService(
)
```
-{% tabs %}
-{% tab title="Python" %}
```python
from feast import FeatureStore
feature_store = FeatureStore('.') # Initialize the feature store
@@ -428,6 +426,6 @@ One of the ways to view this is with the `feast ui` command.
* Read the [Concepts](concepts/) page to understand the Feast data model.
* Read the [Architecture](architecture-and-components/) page.
-* Check out our [Tutorials](../tutorials/tutorials-overview.md) section for more examples on how to use Feast.
+* Check out our [Tutorials](../tutorials/tutorials-overview/) section for more examples on how to use Feast.
* Follow our [Running Feast with Snowflake/GCP/AWS](../how-to-guides/feast-snowflake-gcp-aws/) guide for a more in-depth tutorial on using Feast.
* Join other Feast users and contributors in [Slack](https://slack.feast.dev) and become part of the community!
diff --git a/docs/getting-started/third-party-integrations.md b/docs/getting-started/third-party-integrations.md
index ef47a11029..8e6a600aa0 100644
--- a/docs/getting-started/third-party-integrations.md
+++ b/docs/getting-started/third-party-integrations.md
@@ -5,13 +5,13 @@ We integrate with a wide set of tools and technologies so you can make Feast wor
{% hint style="info" %}
Don't see your offline store or online store of choice here? Check out our guides to make a custom one!
-* [Adding a new offline store](../how-to-guides/adding-a-new-offline-store.md)
-* [Adding a new online store](../how-to-guides/adding-support-for-a-new-online-store.md)
+* [Adding a new offline store](../how-to-guides/customizing-feast/adding-a-new-offline-store.md)
+* [Adding a new online store](../how-to-guides/customizing-feast/adding-support-for-a-new-online-store.md)
{% endhint %}
## Integrations
-See [Functionality and Roadmap](../../README.md#-functionality-and-roadmap)
+See [Functionality and Roadmap](../../#-functionality-and-roadmap)
## Standards
@@ -19,7 +19,7 @@ In order for a plugin integration to be highlighted, it must meet the following
1. The plugin must have tests. Ideally it would use the Feast universal tests (see this [guide](../how-to-guides/adding-or-reusing-tests.md) for an example), but custom tests are fine.
2. The plugin must have some basic documentation on how it should be used.
-3. The author must work with a maintainer to pass a basic code review (e.g. to ensure that the implementation roughly matches the core Feast implementations).
+3. The author must work with a maintainer to pass a basic code review (e.g. to ensure that the implementation roughly matches the core Feast implementations).
In order for a plugin integration to be merged into the main Feast repo, it must meet the following requirements:
diff --git a/docs/how-to-guides/adding-or-reusing-tests.md b/docs/how-to-guides/adding-or-reusing-tests.md
index 86c116442f..45b9aa26e0 100644
--- a/docs/how-to-guides/adding-or-reusing-tests.md
+++ b/docs/how-to-guides/adding-or-reusing-tests.md
@@ -6,111 +6,208 @@ This guide will go over:
1. how Feast tests are setup
2. how to extend the test suite to test new functionality
-3. how to use the existing test suite to test a new custom offline / online store.
+3. how to use the existing test suite to test a new custom offline / online store
## Test suite overview
-Let's inspect the test setup in `sdk/python/tests/integration`:
+Unit tests are contained in `sdk/python/tests/unit`.
+Integration tests are contained in `sdk/python/tests/integration`.
+Let's inspect the structure of `sdk/python/tests/integration`:
```bash
$ tree
-
.
├── e2e
-│ └── test_universal_e2e.py
+│ ├── test_go_feature_server.py
+│ ├── test_python_feature_server.py
+│ ├── test_universal_e2e.py
+│ ├── test_usage_e2e.py
+│ └── test_validation.py
├── feature_repos
+│ ├── integration_test_repo_config.py
│ ├── repo_configuration.py
│ └── universal
+│ ├── catalog
│ ├── data_source_creator.py
│ ├── data_sources
+│ │ ├── __init__.py
│ │ ├── bigquery.py
│ │ ├── file.py
-│ │ └── redshift.py
+│ │ ├── redshift.py
+│ │ └── snowflake.py
│ ├── entities.py
-│ └── feature_views.py
+│ ├── feature_views.py
+│ ├── online_store
+│ │ ├── __init__.py
+│ │ ├── datastore.py
+│ │ ├── dynamodb.py
+│ │ ├── hbase.py
+│ │ └── redis.py
+│ └── online_store_creator.py
+├── materialization
+│ └── test_lambda.py
├── offline_store
+│ ├── test_feature_logging.py
+│ ├── test_offline_write.py
+│ ├── test_push_features_to_offline_store.py
│ ├── test_s3_custom_endpoint.py
│ └── test_universal_historical_retrieval.py
├── online_store
-│ ├── test_e2e_local.py
-│ ├── test_feature_service_read.py
-│ ├── test_online_retrieval.py
+│ ├── test_push_features_to_online_store.py
│ └── test_universal_online.py
-├── registration
-│ ├── test_cli.py
-│ ├── test_cli_apply_duplicated_featureview_names.py
-│ ├── test_cli_chdir.py
-│ ├── test_feature_service_apply.py
-│ ├── test_feature_store.py
-│ ├── test_inference.py
-│ ├── test_registry.py
-│ ├── test_universal_odfv_feature_inference.py
-│ └── test_universal_types.py
-└── scaffolding
- ├── test_init.py
- ├── test_partial_apply.py
- ├── test_repo_config.py
- └── test_repo_operations.py
-
-8 directories, 27 files
-```
+└── registration
+ ├── test_feature_store.py
+ ├── test_inference.py
+ ├── test_registry.py
+ ├── test_universal_cli.py
+ ├── test_universal_odfv_feature_inference.py
+ └── test_universal_types.py
-`feature_repos` has setup files for most tests in the test suite and pytest fixtures for other tests. These fixtures parametrize on different offline stores, online stores, etc. and thus abstract away store specific implementations so tests don't need to rewrite e.g. uploading dataframes to a specific store for setup.
+```
-## Understanding an example test
+* `feature_repos` has setup files for most tests in the test suite.
+* `conftest.py` (in the parent directory) contains the most common [fixtures](https://docs.pytest.org/en/6.2.x/fixture.html), which are designed as an abstraction on top of specific offline/online stores, so tests do not need to be rewritten for different stores. Individual test files also contain more specific fixtures.
+* The tests are organized by which Feast component(s) they test.
+
+## Structure of the test suite
+
+### Universal feature repo
+
+The universal feature repo refers to a set of fixtures (e.g. `environment` and `universal_data_sources`) that can be parametrized to cover various combinations of offline stores, online stores, and providers.
+This allows tests to run against all these various combinations without requiring excess code.
+The universal feature repo is constructed by fixtures in `conftest.py` with help from the various files in `feature_repos`.
+
+### Integration vs. unit tests
+
+Tests in Feast are split into integration and unit tests.
+If a test requires external resources (e.g. cloud resources on GCP or AWS), it is an integration test.
+If a test can be run purely locally (where locally includes Docker resources), it is a unit test.
+
+* Integration tests test non-local Feast behavior. For example, tests that require reading data from BigQuery or materializing data to DynamoDB are integration tests. Integration tests also tend to involve more complex Feast functionality.
+* Unit tests test local Feast behavior. For example, tests that only require registering feature views are unit tests. Unit tests tend to only involve simple Feast functionality.
+
+### Main types of tests
+
+#### Integration tests
+
+1. E2E tests
+ * E2E tests test end-to-end functionality of Feast over the various codepaths (initialize a feature store, apply, and materialize).
+ * The main codepaths include:
+ * basic e2e tests for offline stores
+ * `test_universal_e2e.py`
+ * go feature server
+ * `test_go_feature_server.py`
+ * python http server
+ * `test_python_feature_server.py`
+ * usage tracking
+ * `test_usage_e2e.py`
+ * data quality monitoring feature validation
+ * `test_validation.py`
+2. Offline and Online Store Tests
+ * Offline and online store tests mainly test for the offline and online retrieval functionality.
+ * The various specific functionalities that are tested include:
+ * push API tests
+ * `test_push_features_to_offline_store.py`
+ * `test_push_features_to_online_store.py`
+ * `test_offline_write.py`
+ * historical retrieval tests
+ * `test_universal_historical_retrieval.py`
+ * online retrieval tests
+ * `test_universal_online.py`
+ * data quality monitoring feature logging tests
+ * `test_feature_logging.py`
+ * online store tests
+ * `test_universal_online.py`
+3. Registration Tests
+ * The registration folder contains all of the registry tests and some universal cli tests. This includes:
+ * CLI Apply and Materialize tests tested against on the universal test suite
+ * Data type inference tests
+ * Registry tests
+4. Miscellaneous Tests
+ * AWS Lambda Materialization Tests (Currently do not work)
+ * `test_lambda.py`
+
+#### Unit tests
+
+1. Registry Diff Tests
+ * These are tests for the infrastructure and registry diff functionality that Feast uses to determine if changes to the registry or infrastructure is needed.
+2. Local CLI Tests and Local Feast Tests
+ * These tests test all of the cli commands against the local file offline store.
+3. Infrastructure Unit Tests
+ * DynamoDB tests with dynamo mocked out
+ * Repository configuration tests
+ * Schema inference unit tests
+ * Key serialization tests
+ * Basic provider unit tests
+4. Feature Store Validation Tests
+ * These test mainly contain class level validation like hashing tests, protobuf and class serialization, and error and warning handling.
+ * Data source unit tests
+ * Feature service unit tests
+ * Feature service, feature view, and feature validation tests
+ * Protobuf/json tests for Feast ValueTypes
+ * Serialization tests
+ * Type mapping
+ * Feast types
+ * Serialization tests due to this [issue](https://github.com/feast-dev/feast/issues/2345)
+ * Feast usage tracking unit tests
+
+#### Docstring tests
+
+Docstring tests are primarily smoke tests to make sure imports and setup functions can be executed without errors.
+
+## Understanding the test suite with an example test
+
+### Example test
Let's look at a sample test using the universal repo:
{% tabs %}
-{% tab title="Python" %}
+{% tab code="sdk/python/tests/integration/offline_store/test_universal_historical_retrieval.py" %}
```python
@pytest.mark.integration
-@pytest.mark.parametrize("full_feature_names", [True, False], ids=lambda v: str(v))
+@pytest.mark.universal_offline_stores
+@pytest.mark.parametrize("full_feature_names", [True, False], ids=lambda v: f"full:{v}")
def test_historical_features(environment, universal_data_sources, full_feature_names):
store = environment.feature_store
(entities, datasets, data_sources) = universal_data_sources
- feature_views = construct_universal_feature_views(data_sources)
- customer_df, driver_df, orders_df, global_df, entity_df = (
- datasets["customer"],
- datasets["driver"],
- datasets["orders"],
- datasets["global"],
- datasets["entity"],
- )
- # ... more test code
+ feature_views = construct_universal_feature_views(data_sources)
- customer_fv, driver_fv, driver_odfv, order_fv, global_fv = (
- feature_views["customer"],
- feature_views["driver"],
- feature_views["driver_odfv"],
- feature_views["order"],
- feature_views["global"],
- )
+ entity_df_with_request_data = datasets.entity_df.copy(deep=True)
+ entity_df_with_request_data["val_to_add"] = [
+ i for i in range(len(entity_df_with_request_data))
+ ]
+ entity_df_with_request_data["driver_age"] = [
+ i + 100 for i in range(len(entity_df_with_request_data))
+ ]
feature_service = FeatureService(
- "convrate_plus100",
+ name="convrate_plus100",
+ features=[feature_views.driver[["conv_rate"]], feature_views.driver_odfv],
+ )
+ feature_service_entity_mapping = FeatureService(
+ name="entity_mapping",
features=[
- feature_views["driver"][["conv_rate"]],
- feature_views["driver_odfv"]
+ feature_views.location.with_name("origin").with_join_key_map(
+ {"location_id": "origin_id"}
+ ),
+ feature_views.location.with_name("destination").with_join_key_map(
+ {"location_id": "destination_id"}
+ ),
],
)
- feast_objects = []
- feast_objects.extend(
+ store.apply(
[
- customer_fv,
- driver_fv,
- driver_odfv,
- order_fv,
- global_fv,
driver(),
customer(),
+ location(),
feature_service,
+ feature_service_entity_mapping,
+ *feature_views.values(),
]
)
- store.apply(feast_objects)
# ... more test code
job_from_df = store.get_historical_features(
@@ -122,48 +219,85 @@ def test_historical_features(environment, universal_data_sources, full_feature_n
"customer_profile:avg_passenger_count",
"customer_profile:lifetime_trip_count",
"conv_rate_plus_100:conv_rate_plus_100",
+ "conv_rate_plus_100:conv_rate_plus_100_rounded",
"conv_rate_plus_100:conv_rate_plus_val_to_add",
"order:order_is_success",
"global_stats:num_rides",
"global_stats:avg_ride_length",
+ "field_mapping:feature_name",
],
full_feature_names=full_feature_names,
)
+
+ if job_from_df.supports_remote_storage_export():
+ files = job_from_df.to_remote_storage()
+ print(files)
+ assert len(files) > 0 # This test should be way more detailed
+
+ start_time = datetime.utcnow()
actual_df_from_df_entities = job_from_df.to_df()
# ... more test code
- assert_frame_equal(
- expected_df, actual_df_from_df_entities, check_dtype=False,
+ validate_dataframes(
+ expected_df,
+ table_from_df_entities,
+ keys=[event_timestamp, "order_id", "driver_id", "customer_id"],
)
# ... more test code
```
{% endtab %}
{% endtabs %}
-The key fixtures are the `environment` and `universal_data_sources` fixtures, which are defined in the `feature_repos` directories. This by default pulls in a standard dataset with driver and customer entities, certain feature views, and feature values. By including the environment as a parameter, the test automatically parametrizes across other offline / online store combinations.
+* The key fixtures are the `environment` and `universal_data_sources` fixtures, which are defined in the `feature_repos` directories and the `conftest.py` file. This by default pulls in a standard dataset with driver and customer entities (that we have pre-defined), certain feature views, and feature values.
+ * The `environment` fixture sets up a feature store, parametrized by the provider and the online/offline store. It allows the test to query against that feature store without needing to worry about the underlying implementation or any setup that may be involved in creating instances of these datastores.
+ * Each fixture creates a different integration test with its own `IntegrationTestRepoConfig` which is used by pytest to generate a unique test testing one of the different environments that require testing.
+
+* Feast tests also use a variety of markers:
+ * The `@pytest.mark.integration` marker is used to designate integration tests which will cause the test to be run when you call `make test-python-integration`.
+ * The `@pytest.mark.universal_offline_stores` marker will parametrize the test on all of the universal offline stores including file, redshift, bigquery and snowflake.
+ * The `full_feature_names` parametrization defines whether or not the test should reference features as their full feature name (fully qualified path) or just the feature name itself.
+
## Writing a new test or reusing existing tests
### To add a new test to an existing test file
-* Use the same function signatures as an existing test (e.g. use `environment` as an argument) to include the relevant test fixtures.
-* If possible, expand an individual test instead of writing a new test, due to the cost of standing up offline / online stores.
+* Use the same function signatures as an existing test (e.g. use `environment` and `universal_data_sources` as an argument) to include the relevant test fixtures.
+* If possible, expand an individual test instead of writing a new test, due to the cost of starting up offline / online stores.
+* Use the `universal_offline_stores` and `universal_online_store` markers to parametrize the test against different offline store and online store combinations. You can also designate specific online and offline stores to test by using the `only` parameter on the marker.
+```python
+@pytest.mark.universal_online_stores(only=["redis"])
+```
### To test a new offline / online store from a plugin repo
* Install Feast in editable mode with `pip install -e`.
* The core tests for offline / online store behavior are parametrized by the `FULL_REPO_CONFIGS` variable defined in `feature_repos/repo_configuration.py`. To overwrite this variable without modifying the Feast repo, create your own file that contains a `FULL_REPO_CONFIGS` (which will require adding a new `IntegrationTestRepoConfig` or two) and set the environment variable `FULL_REPO_CONFIGS_MODULE` to point to that file. Then the core offline / online store tests can be run with `make test-python-universal`.
* See the [custom offline store demo](https://github.com/feast-dev/feast-custom-offline-store-demo) and the [custom online store demo](https://github.com/feast-dev/feast-custom-online-store-demo) for examples.
+### What are some important things to keep in mind when adding a new offline / online store?
+
+#### Type mapping/Inference
+
+Many problems arise when implementing your data store's type conversion to interface with Feast datatypes.
+1. You will need to correctly update `inference.py` so that Feast can infer your datasource schemas
+2. You also need to update `type_map.py` so that Feast knows how to convert your datastores types to Feast-recognized types in `feast/types.py`.
+
+#### Historical and online retrieval
+
+The most important functionality in Feast is historical and online retrieval. Most of the e2e and universal integration test test this functionality in some way. Making sure this functionality works also indirectly asserts that reading and writing from your datastore works as intended.
+
+
### To include a new offline / online store in the main Feast repo
* Extend `data_source_creator.py` for your offline store.
-* In `repo_configuration.py` add a new`IntegrationTestRepoConfig` or two (depending on how many online stores you want to test).
+* In `repo_configuration.py` add a new `IntegrationTestRepoConfig` or two (depending on how many online stores you want to test).
+ * Generally, you should only need to test against sqlite. However, if you need to test against a production online store, then you can also test against Redis or dynamodb.
* Run the full test suite with `make test-python-integration.`
### Including a new offline / online store in the main Feast repo from external plugins with community maintainers.
-* This folder is for plugins that are officially maintained with community owners. Place the APIs in feast/infra/offline_stores/contrib/.
+* This folder is for plugins that are officially maintained with community owners. Place the APIs in `feast/infra/offline_stores/contrib/`.
* Extend `data_source_creator.py` for your offline store and implement the required APIs.
* In `contrib_repo_configuration.py` add a new `IntegrationTestRepoConfig` (depending on how many online stores you want to test).
* Run the test suite on the contrib test suite with `make test-python-contrib-universal`.
@@ -171,7 +305,7 @@ The key fixtures are the `environment` and `universal_data_sources` fixtures, wh
### To include a new online store
* In `repo_configuration.py` add a new config that maps to a serialized version of configuration you need in `feature_store.yaml` to setup the online store.
-* In `repo_configuration.py`, add new`IntegrationTestRepoConfig` for offline stores you want to test.
+* In `repo_configuration.py`, add new `IntegrationTestRepoConfig` for online stores you want to test.
* Run the full test suite with `make test-python-integration`
### To use custom data in a new test
@@ -193,11 +327,11 @@ def your_test(environment: Environment):
# ... run test
```
-### Running your own redis cluster for testing
+### Running your own Redis cluster for testing
-* Install redis on your computer. If you are a mac user, you should be able to `brew install redis`.
+* Install Redis on your computer. If you are a mac user, you should be able to `brew install redis`.
* Running `redis-server --help` and `redis-cli --help` should show corresponding help menus.
-* Run `cd scripts/create-cluster` and run `./create-cluster start` then `./create-cluster create` to start the server. You should see output that looks like this:
+* * Run `./infra/scripts/redis-cluster.sh start` then `./infra/scripts/redis-cluster.sh create` to start the Redis cluster locally. You should see output that looks like this:
~~~~
Starting 6001
Starting 6002
@@ -206,6 +340,6 @@ Starting 6004
Starting 6005
Starting 6006
~~~~
-* You should be able to run the integration tests and have the redis cluster tests pass.
-* If you would like to run your own redis cluster, you can run the above commands with your own specified ports and connect to the newly configured cluster.
-* To stop the cluster, run `./create-cluster stop` and then `./create-cluster clean`.
+* You should be able to run the integration tests and have the Redis cluster tests pass.
+* If you would like to run your own Redis cluster, you can run the above commands with your own specified ports and connect to the newly configured cluster.
+* To stop the cluster, run `./infra/scripts/redis-cluster.sh stop` and then `./infra/scripts/redis-cluster.sh clean`.
diff --git a/docs/how-to-guides/customizing-feast/README.md b/docs/how-to-guides/customizing-feast/README.md
new file mode 100644
index 0000000000..91c04e2f35
--- /dev/null
+++ b/docs/how-to-guides/customizing-feast/README.md
@@ -0,0 +1,24 @@
+# Customizing Feast
+
+Feast is highly pluggable and configurable:
+
+* One can use existing plugins (offline store, online store, batch materialization engine, providers) and configure those using the built in options. See reference documentation for details.
+* The other way to customize Feast is to build your own custom components, and then point Feast to delegate to them.
+
+Below are some guides on how to add new custom components:
+
+{% content-ref url="adding-a-new-offline-store.md" %}
+[adding-a-new-offline-store.md](adding-a-new-offline-store.md)
+{% endcontent-ref %}
+
+{% content-ref url="adding-support-for-a-new-online-store.md" %}
+[adding-support-for-a-new-online-store.md](adding-support-for-a-new-online-store.md)
+{% endcontent-ref %}
+
+{% content-ref url="creating-a-custom-materialization-engine.md" %}
+[creating-a-custom-materialization-engine.md](creating-a-custom-materialization-engine.md)
+{% endcontent-ref %}
+
+{% content-ref url="creating-a-custom-provider.md" %}
+[creating-a-custom-provider.md](creating-a-custom-provider.md)
+{% endcontent-ref %}
diff --git a/docs/how-to-guides/adding-a-new-offline-store.md b/docs/how-to-guides/customizing-feast/adding-a-new-offline-store.md
similarity index 85%
rename from docs/how-to-guides/adding-a-new-offline-store.md
rename to docs/how-to-guides/customizing-feast/adding-a-new-offline-store.md
index c548538fce..91b23eaad5 100644
--- a/docs/how-to-guides/adding-a-new-offline-store.md
+++ b/docs/how-to-guides/customizing-feast/adding-a-new-offline-store.md
@@ -2,7 +2,7 @@
## Overview
-Feast makes adding support for a new offline store easy. Developers can simply implement the [OfflineStore](../../sdk/python/feast/infra/offline\_stores/offline\_store.py#L41) interface to add support for a new store (other than the existing stores like Parquet files, Redshift, and Bigquery).
+Feast makes adding support for a new offline store easy. Developers can simply implement the [OfflineStore](../../../sdk/python/feast/infra/offline\_stores/offline\_store.py#L41) interface to add support for a new store (other than the existing stores like Parquet files, Redshift, and Bigquery).
In this guide, we will show you how to extend the existing File offline store and use in a feature repo. While we will be implementing a specific store, this guide should be representative for adding support for any new offline store.
@@ -22,7 +22,7 @@ The process for using a custom offline store consists of 8 steps:
## 1. Defining an OfflineStore class
{% hint style="info" %}
- OfflineStore class names must end with the OfflineStore suffix!
+OfflineStore class names must end with the OfflineStore suffix!
{% endhint %}
### Contrib offline stores
@@ -31,23 +31,26 @@ New offline stores go in `sdk/python/feast/infra/offline_stores/contrib/`.
#### What is a contrib plugin?
-- Not guaranteed to implement all interface methods
-- Not guaranteed to be stable.
-- Should have warnings for users to indicate this is a contrib plugin that is not maintained by the maintainers.
+* Not guaranteed to implement all interface methods
+* Not guaranteed to be stable.
+* Should have warnings for users to indicate this is a contrib plugin that is not maintained by the maintainers.
#### How do I make a contrib plugin an "official" plugin?
+
To move an offline store plugin out of contrib, you need:
-- GitHub actions (i.e `make test-python-integration`) is setup to run all tests against the offline store and pass.
-- At least two contributors own the plugin (ideally tracked in our `OWNERS` / `CODEOWNERS` file).
+
+* GitHub actions (i.e `make test-python-integration`) is setup to run all tests against the offline store and pass.
+* At least two contributors own the plugin (ideally tracked in our `OWNERS` / `CODEOWNERS` file).
#### Define the offline store class
-The OfflineStore class contains a couple of methods to read features from the offline store. Unlike the OnlineStore class, Feast does not manage any infrastructure for the offline store.
+
+The OfflineStore class contains a couple of methods to read features from the offline store. Unlike the OnlineStore class, Feast does not manage any infrastructure for the offline store.
To fully implement the interface for the offline store, you will need to implement these methods:
* `pull_latest_from_table_or_query` is invoked when running materialization (using the `feast materialize` or `feast materialize-incremental` commands, or the corresponding `FeatureStore.materialize()` method. This method pull data from the offline store, and the `FeatureStore` class takes care of writing this data into the online store.
* `get_historical_features` is invoked when reading values from the offline store using the `FeatureStore.get_historical_features()` method. Typically, this method is used to retrieve features when training ML models.
-* (optional) `offline_write_batch` is a method that supports directly pushing a pyarrow table to a feature view. Given a feature view with a specific schema, this function should write the pyarrow table to the batch source defined. More details about the push api can be found [here](docs/reference/data-sources/push.md). This method only needs implementation if you want to support the push api in your offline store.
+* (optional) `offline_write_batch` is a method that supports directly pushing a pyarrow table to a feature view. Given a feature view with a specific schema, this function should write the pyarrow table to the batch source defined. More details about the push api can be found [here](../docs/reference/data-sources/push.md). This method only needs implementation if you want to support the push api in your offline store.
* (optional) `pull_all_from_table_or_query` is a method that pulls all the data from an offline store from a specified start date to a specified end date. This method is only used for **SavedDatasets** as part of data quality monitoring validation.
* (optional) `write_logged_features` is a method that takes a pyarrow table or a path that points to a parquet file and writes the data to a defined source defined by `LoggingSource` and `LoggingConfig`. This method is only used internally for **SavedDatasets**.
@@ -140,29 +143,30 @@ To fully implement the interface for the offline store, you will need to impleme
)
# Implementation here.
pass
-
```
{% endcode %}
### 1.1 Type Mapping
Most offline stores will have to perform some custom mapping of offline store datatypes to feast value types.
-- The function to implement here are `source_datatype_to_feast_value_type` and `get_column_names_and_types` in your `DataSource` class.
+
+* The function to implement here are `source_datatype_to_feast_value_type` and `get_column_names_and_types` in your `DataSource` class.
* `source_datatype_to_feast_value_type` is used to convert your DataSource's datatypes to feast value types.
* `get_column_names_and_types` retrieves the column names and corresponding datasource types.
Add any helper functions for type conversion to `sdk/python/feast/type_map.py`.
-- Be sure to implement correct type mapping so that Feast can process your feature columns without casting incorrectly that can potentially cause loss of information or incorrect data.
+
+* Be sure to implement correct type mapping so that Feast can process your feature columns without casting incorrectly that can potentially cause loss of information or incorrect data.
## 2. Defining an OfflineStoreConfig class
Additional configuration may be needed to allow the OfflineStore to talk to the backing store. For example, Redshift needs configuration information like the connection information for the Redshift instance, credentials for connecting to the database, etc.
-To facilitate configuration, all OfflineStore implementations are **required** to also define a corresponding OfflineStoreConfig class in the same file. This OfflineStoreConfig class should inherit from the `FeastConfigBaseModel` class, which is defined [here](../../sdk/python/feast/repo\_config.py#L44).
+To facilitate configuration, all OfflineStore implementations are **required** to also define a corresponding OfflineStoreConfig class in the same file. This OfflineStoreConfig class should inherit from the `FeastConfigBaseModel` class, which is defined [here](../../../sdk/python/feast/repo\_config.py#L44).
The `FeastConfigBaseModel` is a [pydantic](https://pydantic-docs.helpmanual.io) class, which parses yaml configuration into python objects. Pydantic also allows the model classes to define validators for the config classes, to make sure that the config classes are correctly defined.
-This config class **must** container a `type` field, which contains the fully qualified class name of its corresponding OfflineStore class.
+This config class **must** container a `type` field, which contains the fully qualified class name of its corresponding OfflineStore class.
Additionally, the name of the config class must be the same as the OfflineStore class, with the `Config` suffix.
@@ -195,7 +199,7 @@ online_store:
```
{% endcode %}
-This configuration information is available to the methods of the OfflineStore, via the `config: RepoConfig` parameter which is passed into the methods of the OfflineStore interface, specifically at the `config.offline_store` field of the `config` parameter. This fields in the `feature_store.yaml` should map directly to your `OfflineStoreConfig` class that is detailed above in Section 2.
+This configuration information is available to the methods of the OfflineStore, via the `config: RepoConfig` parameter which is passed into the methods of the OfflineStore interface, specifically at the `config.offline_store` field of the `config` parameter. This fields in the `feature_store.yaml` should map directly to your `OfflineStoreConfig` class that is detailed above in Section 2.
{% code title="feast_custom_offline_store/file.py" %}
```python
@@ -225,7 +229,7 @@ Custom offline stores may need to implement their own instances of the `Retrieva
The `RetrievalJob` interface exposes two methods - `to_df` and `to_arrow`. The expectation is for the retrieval job to be able to return the rows read from the offline store as a parquet DataFrame, or as an Arrow table respectively.
-Users who want to have their offline store support **scalable batch materialization** for online use cases (detailed in this [RFC](https://docs.google.com/document/d/1J7XdwwgQ9dY_uoV9zkRVGQjK9Sy43WISEW6D5V9qzGo/edit#heading=h.9gaqqtox9jg6)) will also need to implement `to_remote_storage` to distribute the reading and writing of offline store records to blob storage (such as S3). This may be used by a custom [Materialization Engine](https://github.com/feast-dev/feast/blob/master/sdk/python/feast/infra/materialization/batch_materialization_engine.py#L72) to parallelize the materialization of data by processing it in chunks. If this is not implemented, Feast will default to local materialization (pulling all records into memory to materialize).
+Users who want to have their offline store support **scalable batch materialization** for online use cases (detailed in this [RFC](https://docs.google.com/document/d/1J7XdwwgQ9dY\_uoV9zkRVGQjK9Sy43WISEW6D5V9qzGo/edit#heading=h.9gaqqtox9jg6)) will also need to implement `to_remote_storage` to distribute the reading and writing of offline store records to blob storage (such as S3). This may be used by a custom [Materialization Engine](https://github.com/feast-dev/feast/blob/master/sdk/python/feast/infra/materialization/batch\_materialization\_engine.py#L72) to parallelize the materialization of data by processing it in chunks. If this is not implemented, Feast will default to local materialization (pulling all records into memory to materialize).
{% code title="feast_custom_offline_store/file.py" %}
```python
@@ -258,7 +262,7 @@ class CustomFileRetrievalJob(RetrievalJob):
Before this offline store can be used as the batch source for a feature view in a feature repo, a subclass of the `DataSource` [base class](https://rtd.feast.dev/en/master/index.html?highlight=DataSource#feast.data\_source.DataSource) needs to be defined. This class is responsible for holding information needed by specific feature views to support reading historical values from the offline store. For example, a feature view using Redshift as the offline store may need to know which table contains historical feature values.
-The data source class should implement two methods - `from_proto`, and `to_proto`.
+The data source class should implement two methods - `from_proto`, and `to_proto`.
For custom offline stores that are not being implemented in the main feature repo, the `custom_options` field should be used to store any configuration needed by the data source. In this case, the implementer is responsible for serializing this configuration into bytes in the `to_proto` method and reading the value back from bytes in the `from_proto` method.
@@ -317,9 +321,9 @@ class CustomFileDataSource(FileSource):
```
{% endcode %}
-## 5. Using the custom offline store
+## 5. Using the custom offline store
-After implementing these classes, the custom offline store can be used by referencing it in a feature repo's `feature_store.yaml` file, specifically in the `offline_store` field. The value specified should be the fully qualified class name of the OfflineStore.
+After implementing these classes, the custom offline store can be used by referencing it in a feature repo's `feature_store.yaml` file, specifically in the `offline_store` field. The value specified should be the fully qualified class name of the OfflineStore.
As long as your OfflineStore class is available in your Python environment, it will be imported by Feast dynamically at runtime.
@@ -372,17 +376,17 @@ driver_hourly_stats_view = FeatureView(
Even if you have created the `OfflineStore` class in a separate repo, you can still test your implementation against the Feast test suite, as long as you have Feast as a submodule in your repo.
1. In order to test against the test suite, you need to create a custom `DataSourceCreator` that implement our testing infrastructure methods, `create_data_source` and optionally, `created_saved_dataset_destination`.
- * `create_data_source` should create a datasource based on the dataframe passed in. It may be implemented by uploading the contents of the dataframe into the offline store and returning a datasource object pointing to that location. See `BigQueryDataSourceCreator` for an implementation of a data source creator.
- * `created_saved_dataset_destination` is invoked when users need to save the dataset for use in data validation. This functionality is still in alpha and is **optional**.
+ * `create_data_source` should create a datasource based on the dataframe passed in. It may be implemented by uploading the contents of the dataframe into the offline store and returning a datasource object pointing to that location. See `BigQueryDataSourceCreator` for an implementation of a data source creator.
+ * `created_saved_dataset_destination` is invoked when users need to save the dataset for use in data validation. This functionality is still in alpha and is **optional**.
+2. Make sure that your offline store doesn't break any unit tests first by running:
-2. Make sure that your offline store doesn't break any unit tests first by running:
```
make test-python
```
+3. Next, set up your offline store to run the universal integration tests. These are integration tests specifically intended to test offline and online stores against Feast API functionality, to ensure that the Feast APIs works with your offline store.
-3. Next, set up your offline store to run the universal integration tests. These are integration tests specifically intended to test offline and online stores against Feast API functionality, to ensure that the Feast APIs works with your offline store.
- - Feast parametrizes integration tests using the `FULL_REPO_CONFIGS` variable defined in `sdk/python/tests/integration/feature_repos/repo_configuration.py` which stores different offline store classes for testing.
- - To overwrite the default configurations to use your own offline store, you can simply create your own file that contains a `FULL_REPO_CONFIGS` dictionary, and point Feast to that file by setting the environment variable `FULL_REPO_CONFIGS_MODULE` to point to that file. The module should add new `IntegrationTestRepoConfig` classes to the `AVAILABLE_OFFLINE_STORES` by defining an offline store that you would like Feast to test with.
+ * Feast parametrizes integration tests using the `FULL_REPO_CONFIGS` variable defined in `sdk/python/tests/integration/feature_repos/repo_configuration.py` which stores different offline store classes for testing.
+ * To overwrite the default configurations to use your own offline store, you can simply create your own file that contains a `FULL_REPO_CONFIGS` dictionary, and point Feast to that file by setting the environment variable `FULL_REPO_CONFIGS_MODULE` to point to that file. The module should add new `IntegrationTestRepoConfig` classes to the `AVAILABLE_OFFLINE_STORES` by defining an offline store that you would like Feast to test with.
A sample `FULL_REPO_CONFIGS_MODULE` looks something like this:
@@ -394,8 +398,7 @@ Even if you have created the `OfflineStore` class in a separate repo, you can st
AVAILABLE_OFFLINE_STORES = [("local", PostgreSQLDataSourceCreator)]
```
-
-4. You should swap out the `FULL_REPO_CONFIGS` environment variable and run the integration tests against your offline store. In the example repo, the file that overwrites `FULL_REPO_CONFIGS` is `feast_custom_offline_store/feast_tests.py`, so you would run:
+4. You should swap out the `FULL_REPO_CONFIGS` environment variable and run the integration tests against your offline store. In the example repo, the file that overwrites `FULL_REPO_CONFIGS` is `feast_custom_offline_store/feast_tests.py`, so you would run:
```bash
export FULL_REPO_CONFIGS_MODULE='feast_custom_offline_store.feast_tests'
@@ -403,20 +406,17 @@ Even if you have created the `OfflineStore` class in a separate repo, you can st
```
If the integration tests fail, this indicates that there is a mistake in the implementation of this offline store!
-
5. Remember to add your datasource to `repo_config.py` similar to how we added `spark`, `trino`, etc, to the dictionary `OFFLINE_STORE_CLASS_FOR_TYPE` and add the necessary configuration to `repo_configuration.py`. Namely, `AVAILABLE_OFFLINE_STORES` should load your repo configuration module.
### 7. Dependencies
-Add any dependencies for your offline store to our `sdk/python/setup.py` under a new `__REQUIRED` list with the packages and add it to the setup script so that if your offline store is needed, users can install the necessary python packages. These packages should be defined as extras so that they are not installed by users by default.
-You will need to regenerate our requirements files. To do this, create separate pyenv environments for python 3.8, 3.9, and 3.10. In each environment, run the following commands:
+Add any dependencies for your offline store to our `sdk/python/setup.py` under a new `__REQUIRED` list with the packages and add it to the setup script so that if your offline store is needed, users can install the necessary python packages. These packages should be defined as extras so that they are not installed by users by default. You will need to regenerate our requirements files. To do this, create separate pyenv environments for python 3.8, 3.9, and 3.10. In each environment, run the following commands:
```
export PYTHON=
make lock-python-ci-dependencies
```
-
### 8. Add Documentation
Remember to add documentation for your offline store.
@@ -425,12 +425,12 @@ Remember to add documentation for your offline store.
2. You should also add a reference in `docs/reference/data-sources/README.md` and `docs/SUMMARY.md` to these markdown files.
**NOTE**: Be sure to document the following things about your offline store:
-- How to create the datasource and most what configuration is needed in the `feature_store.yaml` file in order to create the datasource.
-- Make sure to flag that the datasource is in alpha development.
-- Add some documentation on what the data model is for the specific offline store for more clarity.
-- Finally, generate the python code docs by running:
+
+* How to create the datasource and most what configuration is needed in the `feature_store.yaml` file in order to create the datasource.
+* Make sure to flag that the datasource is in alpha development.
+* Add some documentation on what the data model is for the specific offline store for more clarity.
+* Finally, generate the python code docs by running:
```bash
make build-sphinx
```
-
diff --git a/docs/how-to-guides/adding-support-for-a-new-online-store.md b/docs/how-to-guides/customizing-feast/adding-support-for-a-new-online-store.md
similarity index 86%
rename from docs/how-to-guides/adding-support-for-a-new-online-store.md
rename to docs/how-to-guides/customizing-feast/adding-support-for-a-new-online-store.md
index d1f5986f18..fe16347b73 100644
--- a/docs/how-to-guides/adding-support-for-a-new-online-store.md
+++ b/docs/how-to-guides/customizing-feast/adding-support-for-a-new-online-store.md
@@ -2,13 +2,12 @@
## Overview
-Feast makes adding support for a new online store (database) easy. Developers can simply implement the [OnlineStore](../../sdk/python/feast/infra/online\_stores/online\_store.py#L26) interface to add support for a new store (other than the existing stores like Redis, DynamoDB, SQLite, and Datastore).
+Feast makes adding support for a new online store (database) easy. Developers can simply implement the [OnlineStore](../../../sdk/python/feast/infra/online\_stores/online\_store.py#L26) interface to add support for a new store (other than the existing stores like Redis, DynamoDB, SQLite, and Datastore).
In this guide, we will show you how to integrate with MySQL as an online store. While we will be implementing a specific store, this guide should be representative for adding support for any new online store.
The full working code for this guide can be found at [feast-dev/feast-custom-online-store-demo](https://github.com/feast-dev/feast-custom-online-store-demo).
-
The process of using a custom online store consists of 6 steps:
1. Defining the `OnlineStore` class.
@@ -21,7 +20,7 @@ The process of using a custom online store consists of 6 steps:
## 1. Defining an OnlineStore class
{% hint style="info" %}
- OnlineStore class names must end with the OnlineStore suffix!
+OnlineStore class names must end with the OnlineStore suffix!
{% endhint %}
### Contrib online stores
@@ -30,19 +29,21 @@ New online stores go in `sdk/python/feast/infra/online_stores/contrib/`.
#### What is a contrib plugin?
-- Not guaranteed to implement all interface methods
-- Not guaranteed to be stable.
-- Should have warnings for users to indicate this is a contrib plugin that is not maintained by the maintainers.
+* Not guaranteed to implement all interface methods
+* Not guaranteed to be stable.
+* Should have warnings for users to indicate this is a contrib plugin that is not maintained by the maintainers.
#### How do I make a contrib plugin an "official" plugin?
+
To move an online store plugin out of contrib, you need:
-- GitHub actions (i.e `make test-python-integration`) is setup to run all tests against the online store and pass.
-- At least two contributors own the plugin (ideally tracked in our `OWNERS` / `CODEOWNERS` file).
+
+* GitHub actions (i.e `make test-python-integration`) is setup to run all tests against the online store and pass.
+* At least two contributors own the plugin (ideally tracked in our `OWNERS` / `CODEOWNERS` file).
The OnlineStore class broadly contains two sets of methods
* One set deals with managing infrastructure that the online store needed for operations
-* One set deals with writing data into the store, and reading data from the store.
+* One set deals with writing data into the store, and reading data from the store.
### 1.1 Infrastructure Methods
@@ -50,11 +51,11 @@ There are two methods that deal with managing infrastructure for online stores,
* `update` is invoked when users run `feast apply` as a CLI command, or the `FeatureStore.apply()` sdk method.
-The `update` method should be used to perform any operations necessary before data can be written to or read from the store. The `update` method can be used to create MySQL tables in preparation for reads and writes to new feature views.
+The `update` method should be used to perform any operations necessary before data can be written to or read from the store. The `update` method can be used to create MySQL tables in preparation for reads and writes to new feature views.
* `teardown` is invoked when users run `feast teardown` or `FeatureStore.teardown()`.
-The `teardown` method should be used to perform any clean-up operations. `teardown` can be used to drop MySQL indices and tables corresponding to the feature views being deleted.
+The `teardown` method should be used to perform any clean-up operations. `teardown` can be used to drop MySQL indices and tables corresponding to the feature views being deleted.
{% code title="feast_custom_online_store/mysql.py" %}
```python
@@ -123,10 +124,10 @@ def teardown(
### 1.2 Read/Write Methods
-There are two methods that deal with writing data to and from the online stores.`online_write_batch `and `online_read`.
+There are two methods that deal with writing data to and from the online stores.`online_write_batch` and `online_read`.
-* `online_write_batch `is invoked when running materialization (using the `feast materialize` or `feast materialize-incremental` commands, or the corresponding `FeatureStore.materialize()` method.
-* `online_read `is invoked when reading values from the online store using the `FeatureStore.get_online_features()` method.
+* `online_write_batch` is invoked when running materialization (using the `feast materialize` or `feast materialize-incremental` commands, or the corresponding `FeatureStore.materialize()` method.
+* `online_read` is invoked when reading values from the online store using the `FeatureStore.get_online_features()` method.
{% code title="feast_custom_online_store/mysql.py" %}
```python
@@ -210,22 +211,24 @@ def online_read(
### 1.3 Type Mapping
Most online stores will have to perform some custom mapping of online store datatypes to feast value types.
-- The function to implement here are `source_datatype_to_feast_value_type` and `get_column_names_and_types` in your `DataSource` class.
+
+* The function to implement here are `source_datatype_to_feast_value_type` and `get_column_names_and_types` in your `DataSource` class.
* `source_datatype_to_feast_value_type` is used to convert your DataSource's datatypes to feast value types.
* `get_column_names_and_types` retrieves the column names and corresponding datasource types.
Add any helper functions for type conversion to `sdk/python/feast/type_map.py`.
-- Be sure to implement correct type mapping so that Feast can process your feature columns without casting incorrectly that can potentially cause loss of information or incorrect data.
+
+* Be sure to implement correct type mapping so that Feast can process your feature columns without casting incorrectly that can potentially cause loss of information or incorrect data.
## 2. Defining an OnlineStoreConfig class
Additional configuration may be needed to allow the OnlineStore to talk to the backing store. For example, MySQL may need configuration information like the host at which the MySQL instance is running, credentials for connecting to the database, etc.
-To facilitate configuration, all OnlineStore implementations are **required** to also define a corresponding OnlineStoreConfig class in the same file. This OnlineStoreConfig class should inherit from the `FeastConfigBaseModel` class, which is defined [here](../../sdk/python/feast/repo\_config.py#L44).
+To facilitate configuration, all OnlineStore implementations are **required** to also define a corresponding OnlineStoreConfig class in the same file. This OnlineStoreConfig class should inherit from the `FeastConfigBaseModel` class, which is defined [here](../../../sdk/python/feast/repo\_config.py#L44).
The `FeastConfigBaseModel` is a [pydantic](https://pydantic-docs.helpmanual.io) class, which parses yaml configuration into python objects. Pydantic also allows the model classes to define validators for the config classes, to make sure that the config classes are correctly defined.
-This config class **must** container a `type` field, which contains the fully qualified class name of its corresponding OnlineStore class.
+This config class **must** container a `type` field, which contains the fully qualified class name of its corresponding OnlineStore class.
Additionally, the name of the config class must be the same as the OnlineStore class, with the `Config` suffix.
@@ -254,7 +257,7 @@ online_store:
```
{% endcode %}
-This configuration information is available to the methods of the OnlineStore, via the`config: RepoConfig` parameter which is passed into all the methods of the OnlineStore interface, specifically at the `config.online_store` field of the `config` parameter.
+This configuration information is available to the methods of the OnlineStore, via the`config: RepoConfig` parameter which is passed into all the methods of the OnlineStore interface, specifically at the `config.online_store` field of the `config` parameter.
{% code title="feast_custom_online_store/mysql.py" %}
```python
@@ -281,9 +284,9 @@ def online_write_batch(
```
{% endcode %}
-## 3. Using the custom online store
+## 3. Using the custom online store
-After implementing both these classes, the custom online store can be used by referencing it in a feature repo's `feature_store.yaml` file, specifically in the `online_store` field. The value specified should be the fully qualified class name of the OnlineStore.
+After implementing both these classes, the custom online store can be used by referencing it in a feature repo's `feature_store.yaml` file, specifically in the `online_store` field. The value specified should be the fully qualified class name of the OnlineStore.
As long as your OnlineStore class is available in your Python environment, it will be imported by Feast dynamically at runtime.
@@ -302,7 +305,7 @@ online_store:
```
{% endcode %}
-If additional configuration for the online store is **not **required, then we can omit the other fields and only specify the `type` of the online store class as the value for the `online_store`.
+If additional configuration for the online store is \*\*not \*\*required, then we can omit the other fields and only specify the `type` of the online store class as the value for the `online_store`.
{% code title="feature_repo/feature_store.yaml" %}
```yaml
@@ -319,15 +322,14 @@ online_store: feast_custom_online_store.mysql.MySQLOnlineStore
Even if you have created the `OnlineStore` class in a separate repo, you can still test your implementation against the Feast test suite, as long as you have Feast as a submodule in your repo.
-1. In the Feast submodule, we can run all the unit tests and make sure they pass:
+1. In the Feast submodule, we can run all the unit tests and make sure they pass:
+
```
make test-python
```
-
-
2. The universal tests, which are integration tests specifically intended to test offline and online stores, should be run against Feast to ensure that the Feast APIs works with your online store.
- - Feast parametrizes integration tests using the `FULL_REPO_CONFIGS` variable defined in `sdk/python/tests/integration/feature_repos/repo_configuration.py` which stores different online store classes for testing.
- - To overwrite these configurations, you can simply create your own file that contains a `FULL_REPO_CONFIGS` variable, and point Feast to that file by setting the environment variable `FULL_REPO_CONFIGS_MODULE` to point to that file.
+ * Feast parametrizes integration tests using the `FULL_REPO_CONFIGS` variable defined in `sdk/python/tests/integration/feature_repos/repo_configuration.py` which stores different online store classes for testing.
+ * To overwrite these configurations, you can simply create your own file that contains a `FULL_REPO_CONFIGS` variable, and point Feast to that file by setting the environment variable `FULL_REPO_CONFIGS_MODULE` to point to that file.
A sample `FULL_REPO_CONFIGS_MODULE` looks something like this:
@@ -341,10 +343,8 @@ AVAILABLE_ONLINE_STORES = {"postgres": (None, PostgreSQLDataSourceCreator)}
```
{% endcode %}
-
If you are planning to start the online store up locally(e.g spin up a local Redis Instance) for testing, then the dictionary entry should be something like:
-
```python
{
"sqlite": ({"type": "sqlite"}, None),
@@ -352,10 +352,8 @@ If you are planning to start the online store up locally(e.g spin up a local Red
}
```
-
If you are planning instead to use a Dockerized container to run your tests against your online store, you can define a `OnlineStoreCreator` and replace the `None` object above with your `OnlineStoreCreator` class.
-
If you create a containerized docker image for testing, developers who are trying to test with your online store will not have to spin up their own instance of the online store for testing. An example of an `OnlineStoreCreator` is shown below:
{% code title="sdk/python/tests/integration/feature_repos/universal/online_store/redis.py" %}
@@ -381,33 +379,33 @@ export FULL_REPO_CONFIGS_MODULE='feast_custom_online_store.feast_tests'
make test-python-universal
```
-- If there are some tests that fail, this indicates that there is a mistake in the implementation of this online store!
-
+* If there are some tests that fail, this indicates that there is a mistake in the implementation of this online store!
### 5. Add Dependencies
Add any dependencies for your online store to our `sdk/python/setup.py` under a new `_REQUIRED` list with the packages and add it to the setup script so that if your online store is needed, users can install the necessary python packages. These packages should be defined as extras so that they are not installed by users by default.
-- You will need to regenerate our requirements files. To do this, create separate pyenv environments for python 3.8, 3.9, and 3.10. In each environment, run the following commands:
+
+* You will need to regenerate our requirements files. To do this, create separate pyenv environments for python 3.8, 3.9, and 3.10. In each environment, run the following commands:
```
export PYTHON=
make lock-python-ci-dependencies
```
-
### 6. Add Documentation
Remember to add the documentation for your online store.
-1. Add a new markdown file to `docs/reference/online-stores/`.
+
+1. Add a new markdown file to `docs/reference/online-stores/`.
2. You should also add a reference in `docs/reference/online-stores/README.md` and `docs/SUMMARY.md`. Add a new markdown document to document your online store functionality similar to how the other online stores are documented.
**NOTE**:Be sure to document the following things about your online store:
-- Be sure to cover how to create the datasource and what configuration is needed in the `feature_store.yaml` file in order to create the datasource.
-- Make sure to flag that the online store is in alpha development.
-- Add some documentation on what the data model is for the specific online store for more clarity.
-- Finally, generate the python code docs by running:
+
+* Be sure to cover how to create the datasource and what configuration is needed in the `feature_store.yaml` file in order to create the datasource.
+* Make sure to flag that the online store is in alpha development.
+* Add some documentation on what the data model is for the specific online store for more clarity.
+* Finally, generate the python code docs by running:
```bash
make build-sphinx
```
-
diff --git a/docs/how-to-guides/creating-a-custom-materialization-engine.md b/docs/how-to-guides/customizing-feast/creating-a-custom-materialization-engine.md
similarity index 92%
rename from docs/how-to-guides/creating-a-custom-materialization-engine.md
rename to docs/how-to-guides/customizing-feast/creating-a-custom-materialization-engine.md
index 935ac3dc99..cca7bd3621 100644
--- a/docs/how-to-guides/creating-a-custom-materialization-engine.md
+++ b/docs/how-to-guides/customizing-feast/creating-a-custom-materialization-engine.md
@@ -1,4 +1,4 @@
-# Adding a custom materialization engine
+# Adding a custom batch materialization engine
### Overview
@@ -7,10 +7,10 @@ Feast batch materialization operations (`materialize` and `materialize-increment
Custom batch materialization engines allow Feast users to extend Feast to customize the materialization process. Examples include:
* Setting up custom materialization-specific infrastructure during `feast apply` (e.g. setting up Spark clusters or Lambda Functions)
-* Launching custom batch ingestion \(materialization\) jobs \(Spark, Beam, AWS Lambda\)
+* Launching custom batch ingestion (materialization) jobs (Spark, Beam, AWS Lambda)
* Tearing down custom materialization-specific infrastructure during `feast teardown` (e.g. tearing down Spark clusters, or deleting Lambda Functions)
-Feast comes with built-in materialization engines, e.g, `LocalMaterializationEngine`, and an experimental `LambdaMaterializationEngine`. However, users can develop their own materialization engines by creating a class that implements the contract in the [BatchMaterializationEngine class](https://github.com/feast-dev/feast/blob/6d7b38a39024b7301c499c20cf4e7aef6137c47c/sdk/python/feast/infra/materialization/batch_materialization_engine.py#L72).
+Feast comes with built-in materialization engines, e.g, `LocalMaterializationEngine`, and an experimental `LambdaMaterializationEngine`. However, users can develop their own materialization engines by creating a class that implements the contract in the [BatchMaterializationEngine class](https://github.com/feast-dev/feast/blob/6d7b38a39024b7301c499c20cf4e7aef6137c47c/sdk/python/feast/infra/materialization/batch\_materialization\_engine.py#L72).
### Guide
@@ -79,14 +79,13 @@ class MyCustomEngine(LocalMaterializationEngine):
)
for task in tasks
]
-
```
Notice how in the above engine we have only overwritten two of the methods on the `LocalMaterializatinEngine`, namely `update` and `materialize`. These two methods are convenient to replace if you are planning to launch custom batch jobs.
#### Step 2: Configuring Feast to use the engine
-Configure your [feature\_store.yaml](../reference/feature-repository/feature-store-yaml.md) file to point to your new engine class:
+Configure your [feature\_store.yaml](../../reference/feature-repository/feature-store-yaml.md) file to point to your new engine class:
```yaml
project: repo
@@ -99,7 +98,7 @@ offline_store:
type: file
```
-Notice how the `batch_engine` field above points to the module and class where your engine can be found.
+Notice how the `batch_engine` field above points to the module and class where your engine can be found.
#### Step 3: Using the engine
@@ -109,7 +108,7 @@ Now you should be able to use your engine by running a Feast command:
feast apply
```
-```text
+```
Registered entity driver_id
Registered feature view driver_hourly_stats
Deploying infrastructure for driver_hourly_stats
diff --git a/docs/how-to-guides/creating-a-custom-provider.md b/docs/how-to-guides/customizing-feast/creating-a-custom-provider.md
similarity index 94%
rename from docs/how-to-guides/creating-a-custom-provider.md
rename to docs/how-to-guides/customizing-feast/creating-a-custom-provider.md
index 40ec20ee6a..027ca20c39 100644
--- a/docs/how-to-guides/creating-a-custom-provider.md
+++ b/docs/how-to-guides/customizing-feast/creating-a-custom-provider.md
@@ -6,8 +6,8 @@ All Feast operations execute through a `provider`. Operations like materializing
Custom providers allow Feast users to extend Feast to execute any custom logic. Examples include:
-* Launching custom streaming ingestion jobs \(Spark, Beam\)
-* Launching custom batch ingestion \(materialization\) jobs \(Spark, Beam\)
+* Launching custom streaming ingestion jobs (Spark, Beam)
+* Launching custom batch ingestion (materialization) jobs (Spark, Beam)
* Adding custom validation to feature repositories during `feast apply`
* Adding custom infrastructure setup logic which runs during `feast apply`
* Extending Feast commands with in-house metrics, logging, or tracing
@@ -87,7 +87,7 @@ It is possible to overwrite all the methods on the provider class. In fact, it i
#### Step 2: Configuring Feast to use the provider
-Configure your [feature\_store.yaml](../reference/feature-repository/feature-store-yaml.md) file to point to your new provider class:
+Configure your [feature\_store.yaml](../../reference/feature-repository/feature-store-yaml.md) file to point to your new provider class:
```yaml
project: repo
@@ -100,7 +100,7 @@ offline_store:
type: file
```
-Notice how the `provider` field above points to the module and class where your provider can be found.
+Notice how the `provider` field above points to the module and class where your provider can be found.
#### Step 3: Using the provider
@@ -110,7 +110,7 @@ Now you should be able to use your provider by running a Feast command:
feast apply
```
-```text
+```
Registered entity driver_id
Registered feature view driver_hourly_stats
Deploying infrastructure for driver_hourly_stats
@@ -128,4 +128,3 @@ That's it. You should now have a fully functional custom provider!
### Next steps
Have a look at the [custom provider demo repository](https://github.com/feast-dev/feast-custom-provider-demo) for a fully functional example of a custom provider. Feel free to fork it when creating your own custom provider!
-
diff --git a/docs/how-to-guides/running-feast-in-production.md b/docs/how-to-guides/running-feast-in-production.md
index f03629ea4b..04166809a5 100644
--- a/docs/how-to-guides/running-feast-in-production.md
+++ b/docs/how-to-guides/running-feast-in-production.md
@@ -2,20 +2,16 @@
## Overview
-After learning about Feast concepts and playing with Feast locally, you're now ready to use Feast in production.
-This guide aims to help with the transition from a sandbox project to production-grade deployment in the cloud or on-premise.
+After learning about Feast concepts and playing with Feast locally, you're now ready to use Feast in production. This guide aims to help with the transition from a sandbox project to production-grade deployment in the cloud or on-premise.
Overview of typical production configuration is given below:

{% hint style="success" %}
-**Important note:** We're trying to keep Feast modular. With the exception of the core, most of the Feast blocks are loosely connected and can be used independently. Hence, you are free to build your own production configuration.
-For example, you might not have a stream source and, thus, no need to write features in real-time to an online store.
-Or you might not need to retrieve online features.
+**Important note:** Feast is highly customizable and modular. Most Feast blocks are loosely connected and can be used independently. Hence, you are free to build your own production configuration.
-Furthermore, there's no single "true" approach. As you will see in this guide, Feast usually provides several options for each problem.
-It's totally up to you to pick a path that's better suited to your needs.
+For example, you might not have a stream source and, thus, no need to write features in real-time to an online store. Or you might not need to retrieve online features. Feast also often provides multiple options to achieve the same goal. We discuss tradeoffs below.
{% endhint %}
In this guide we will show you how to:
@@ -28,9 +24,22 @@ In this guide we will show you how to:
## 1. Automatically deploying changes to your feature definitions
-The first step to setting up a deployment of Feast is to create a Git repository that contains your feature definitions. The recommended way to version and track your feature definitions is by committing them to a repository and tracking changes through commits.
+### Setting up a feature repository
-Most teams will need to have a feature store deployed to more than one environment. We have created an example repository \([Feast Repository Example](https://github.com/feast-dev/feast-ci-repo-example)\) which contains two Feast projects, one per environment.
+The first step to setting up a deployment of Feast is to create a Git repository that contains your feature definitions. The recommended way to version and track your feature definitions is by committing them to a repository and tracking changes through commits. If you recall, running `feast apply` commits feature definitions to a **registry**, which users can then read elsewhere.
+
+### Setting up CI/CD to automatically update the registry
+
+We recommend typically setting up CI/CD to automatically run `feast plan` and `feast apply` when pull requests are opened / merged.
+
+### Setting up multiple environments
+
+Most teams will need to have a feature store deployed to more than one environment. There are two common ways teams approach this
+
+1. Have separate GitHub branches for each environment
+2. Have separate `feature_store.yaml` files that correspond to each environment
+
+For the second approach, we have created an example repository ([Feast Repository Example](https://github.com/feast-dev/feast-ci-repo-example)) which contains two Feast projects, one per environment.
The contents of this repository are shown below:
@@ -53,11 +62,11 @@ The repository contains three sub-folders:
* `staging/`: This folder contains the staging `feature_store.yaml` and Feast objects. Users that want to make changes to the Feast deployment in the staging environment will commit changes to this directory.
* `production/`: This folder contains the production `feature_store.yaml` and Feast objects. Typically users would first test changes in staging before copying the feature definitions into the production folder, before committing the changes.
-* `.github`: This folder is an example of a CI system that applies the changes in either the `staging` or `production` repositories using `feast apply`. This operation saves your feature definitions to a shared registry \(for example, on GCS\) and configures your infrastructure for serving features.
+* `.github`: This folder is an example of a CI system that applies the changes in either the `staging` or `production` repositories using `feast apply`. This operation saves your feature definitions to a shared registry (for example, on GCS) and configures your infrastructure for serving features.
The `feature_store.yaml` contains the following:
-```text
+```
project: staging
registry: gs://feast-ci-demo-registry/staging/registry.db
provider: gcp
@@ -69,8 +78,7 @@ Notice how the registry has been configured to use a Google Cloud Storage bucket
It is important to note that the CI system above must have access to create, modify, or remove infrastructure in your production environment. This is unlike clients of the feature store, who will only have read access.
{% endhint %}
-If your organization consists of many independent data science teams or a single group is working on several projects
-that could benefit from sharing features, entities, sources, and transformations, then we encourage you to utilize Python packages inside each environment:
+If your organization consists of many independent data science teams or a single group is working on several projects that could benefit from sharing features, entities, sources, and transformations, then we encourage you to utilize Python packages inside each environment:
```
└── production
@@ -89,16 +97,17 @@ that could benefit from sharing features, entities, sources, and transformations
└── feature_store.yaml
```
-In summary, once you have set up a Git based repository with CI that runs `feast apply` on changes, your infrastructure \(offline store, online store, and cloud environment\) will automatically be updated to support the loading of data into the feature store or retrieval of data.
+In summary, once you have set up a Git based repository with CI that runs `feast apply` on changes, your infrastructure (offline store, online store, and cloud environment) will automatically be updated to support the loading of data into the feature store or retrieval of data.
## 2. How to load data into your online store and keep it up to date
To keep your online store up to date, you need to run a job that loads feature data from your feature view sources into your online store. In Feast, this loading operation is called materialization.
### 2.1. Manual materializations
+
The simplest way to schedule materialization is to run an **incremental** materialization using the Feast CLI:
-```text
+```
feast materialize-incremental 2022-01-01T00:00:00
```
@@ -106,9 +115,9 @@ The above command will load all feature values from all feature view sources int
A timestamp is required to set the end date for materialization. If your source is fully up to date then the end date would be the current time. However, if you are querying a source where data is not yet available, then you do not want to set the timestamp to the current time. You would want to use a timestamp that ends at a date for which data is available. The next time `materialize-incremental` is run, Feast will load data that starts from the previous end date, so it is important to ensure that the materialization interval does not overlap with time periods for which data has not been made available. This is commonly the case when your source is an ETL pipeline that is scheduled on a daily basis.
-An alternative approach to incremental materialization \(where Feast tracks the intervals of data that need to be ingested\), is to call Feast directly from your scheduler like Airflow. In this case, Airflow is the system that tracks the intervals that have been ingested.
+An alternative approach to incremental materialization (where Feast tracks the intervals of data that need to be ingested), is to call Feast directly from your scheduler like Airflow. In this case, Airflow is the system that tracks the intervals that have been ingested.
-```text
+```
feast materialize -v driver_hourly_stats 2020-01-01T00:00:00 2020-01-02T00:00:00
```
@@ -118,14 +127,10 @@ The timestamps above should match the interval of data that has been computed by
### 2.2. Automate periodic materializations
-It is up to you which orchestration/scheduler to use to periodically run `$ feast materialize`.
-Feast keeps the history of materialization in its registry so that the choice could be as simple as a [unix cron util](https://en.wikipedia.org/wiki/Cron).
-Cron util should be sufficient when you have just a few materialization jobs (it's usually one materialization job per feature view) triggered infrequently.
-However, the amount of work can quickly outgrow the resources of a single machine. That happens because the materialization job needs to repackage all rows before writing them to an online store. That leads to high utilization of CPU and memory.
-In this case, you might want to use a job orchestrator to run multiple jobs in parallel using several workers.
-Kubernetes Jobs or Airflow are good choices for more comprehensive job orchestration.
+It is up to you which orchestration/scheduler to use to periodically run `$ feast materialize`. Feast keeps the history of materialization in its registry so that the choice could be as simple as a [unix cron util](https://en.wikipedia.org/wiki/Cron). Cron util should be sufficient when you have just a few materialization jobs (it's usually one materialization job per feature view) triggered infrequently. However, the amount of work can quickly outgrow the resources of a single machine. That happens because the materialization job needs to repackage all rows before writing them to an online store. That leads to high utilization of CPU and memory. In this case, you might want to use a job orchestrator to run multiple jobs in parallel using several workers. Kubernetes Jobs or Airflow are good choices for more comprehensive job orchestration.
If you are using Airflow as a scheduler, Feast can be invoked through the [BashOperator](https://airflow.apache.org/docs/apache-airflow/stable/howto/operator/bash.html) after the [Python SDK](https://pypi.org/project/feast/) has been installed into a virtual environment and your feature repo has been synced:
+
```python
materialize = BashOperator(
task_id='materialize',
@@ -134,8 +139,7 @@ materialize = BashOperator(
```
{% hint style="success" %}
-Important note: Airflow worker must have read and write permissions to the registry file on GS / S3
-since it pulls configuration and updates materialization history.
+Important note: Airflow worker must have read and write permissions to the registry file on GS / S3 since it pulls configuration and updates materialization history.
{% endhint %}
## 3. How to use Feast for model training
@@ -207,17 +211,14 @@ It is important to note that both the training pipeline and model serving servic
## 4. Retrieving online features for prediction
-Once you have successfully loaded (or in Feast terminology materialized) your data from batch sources into the online store, you can start consuming features for model inference.
-There are three approaches for that purpose sorted from the most simple one (in an operational sense) to the most performant (benchmarks to be published soon):
+Once you have successfully loaded (or in Feast terminology materialized) your data from batch sources into the online store, you can start consuming features for model inference. There are three approaches for that purpose sorted from the most simple one (in an operational sense) to the most performant (benchmarks to be published soon):
### 4.1. Use the Python SDK within an existing Python service
-This approach is the most convenient to keep your infrastructure as minimalistic as possible and avoid deploying extra services.
-The Feast Python SDK will connect directly to the online store (Redis, Datastore, etc), pull the feature data, and run transformations locally (if required).
-The obvious drawback is that your service must be written in Python to use the Feast Python SDK.
-A benefit of using a Python stack is that you can enjoy production-grade services with integrations with many existing data science tools.
+This approach is the most convenient to keep your infrastructure as minimalistic as possible and avoid deploying extra services. The Feast Python SDK will connect directly to the online store (Redis, Datastore, etc), pull the feature data, and run transformations locally (if required). The obvious drawback is that your service must be written in Python to use the Feast Python SDK. A benefit of using a Python stack is that you can enjoy production-grade services with integrations with many existing data science tools.
To integrate online retrieval into your service use the following code:
+
```python
from feast import FeatureStore
@@ -235,47 +236,41 @@ feature_vector = fs.get_online_features(
### 4.2. Consume features via HTTP API from Serverless Feature Server
-If you don't want to add the Feast Python SDK as a dependency, or your feature retrieval service is written in a non-Python language,
-Feast can deploy a simple feature server
-on serverless infrastructure (eg, AWS Lambda, Google Cloud Run) for you.
-This service will provide an HTTP API with JSON I/O, which can be easily used with any programming language.
+If you don't want to add the Feast Python SDK as a dependency, or your feature retrieval service is written in a non-Python language, Feast can deploy a simple feature server on serverless infrastructure (eg, AWS Lambda, Google Cloud Run) for you. This service will provide an HTTP API with JSON I/O, which can be easily used with any programming language.
-[Read more about this feature](../reference/alpha-aws-lambda-feature-server.md)
+[Read more about this feature](../reference/feature-servers/alpha-aws-lambda-feature-server.md)
### 4.3. Go feature server deployed on Kubernetes
-For users with very latency-sensitive and high QPS use-cases, Feast offers a high-performance [Go feature server](../reference/feature-servers/go-feature-server.md).
-It can use either HTTP or gRPC.
+For users with very latency-sensitive and high QPS use-cases, Feast offers a high-performance [Go feature server](../reference/feature-servers/go-feature-server.md). It can use either HTTP or gRPC.
The Go feature server can be deployed to a Kubernetes cluster via Helm charts in a few simple steps:
1. Install [kubectl](https://kubernetes.io/docs/tasks/tools/install-kubectl/) and [helm 3](https://helm.sh/)
2. Add the Feast Helm repository and download the latest charts:
+
```
helm repo add feast-charts https://feast-helm-charts.storage.googleapis.com
helm repo update
```
-3. Run Helm Install
+
+1. Run Helm Install
+
```
helm install feast-release feast-charts/feast-feature-server \
--set global.registry.path=s3://feast/registries/prod \
--set global.project=
```
-This chart will deploy a single service.
-The service must have read access to the registry file on cloud storage.
-It will keep a copy of the registry in their memory and periodically refresh it, so expect some delays in update propagation in exchange for better performance.
-In order for the Go feature server to be enabled, you should set `go_feature_serving: True` in the `feature_store.yaml`.
+This chart will deploy a single service. The service must have read access to the registry file on cloud storage. It will keep a copy of the registry in their memory and periodically refresh it, so expect some delays in update propagation in exchange for better performance. In order for the Go feature server to be enabled, you should set `go_feature_serving: True` in the `feature_store.yaml`.
## 5. Ingesting features from a stream source
-Recently Feast added functionality for [stream ingestion](../reference/data-sources/push.md).
-Please note that this is still in an early phase and new incompatible changes may be introduced.
+Recently Feast added functionality for [stream ingestion](../reference/data-sources/push.md). Please note that this is still in an early phase and new incompatible changes may be introduced.
### 5.1. Using Python SDK in your Apache Spark / Beam pipeline
-The default option to write features from a stream is to add the Python SDK into your existing PySpark / Beam pipeline.
-Feast SDK provides writer implementation that can be called from `foreachBatch` stream writer in PySpark like this:
+The default option to write features from a stream is to add the Python SDK into your existing PySpark / Beam pipeline. Feast SDK provides writer implementation that can be called from `foreachBatch` stream writer in PySpark like this:
```python
store = FeatureStore(...)
@@ -289,21 +284,17 @@ streamingDF.writeStream.foreachBatch(feast_writer).start()
### 5.2. Push Service (Alpha)
-Alternatively, if you want to ingest features directly from a broker (eg, Kafka or Kinesis), you can use the "push service", which will write to an online store and/or offline store.
-This service will expose an HTTP API or when deployed on Serverless platforms like AWS Lambda or Google Cloud Run,
-this service can be directly connected to Kinesis or PubSub.
+Alternatively, if you want to ingest features directly from a broker (eg, Kafka or Kinesis), you can use the "push service", which will write to an online store and/or offline store. This service will expose an HTTP API or when deployed on Serverless platforms like AWS Lambda or Google Cloud Run, this service can be directly connected to Kinesis or PubSub.
-If you are using Kafka, [HTTP Sink](https://docs.confluent.io/kafka-connect-http/current/overview.html) could be utilized as a middleware.
-In this case, the "push service" can be deployed on Kubernetes or as a Serverless function.
+If you are using Kafka, [HTTP Sink](https://docs.confluent.io/kafka-connect-http/current/overview.html) could be utilized as a middleware. In this case, the "push service" can be deployed on Kubernetes or as a Serverless function.
## 6. Monitoring
Feast services can report their metrics to a StatsD-compatible collector. To activate this function, you'll need to provide a StatsD IP address and a port when deploying the helm chart (in future, this will be added to `feature_store.yaml`).
-We use an [InfluxDB-style extension](https://github.com/prometheus/statsd_exporter#tagging-extensions) for StatsD format to be able to send tags along with metrics. Keep that in mind while selecting the collector ([telegraph](https://www.influxdata.com/blog/getting-started-with-sending-statsd-metrics-to-telegraf-influxdb/#introducing-influx-statsd) will work for sure).
+We use an [InfluxDB-style extension](https://github.com/prometheus/statsd\_exporter#tagging-extensions) for StatsD format to be able to send tags along with metrics. Keep that in mind while selecting the collector ([telegraph](https://www.influxdata.com/blog/getting-started-with-sending-statsd-metrics-to-telegraf-influxdb/#introducing-influx-statsd) will work for sure).
-We chose StatsD since it's a de-facto standard with various implementations (eg, [1](https://github.com/prometheus/statsd_exporter), [2](https://github.com/influxdata/telegraf/blob/master/plugins/inputs/statsd/README.md))
-and metrics can be easily exported to Prometheus, InfluxDB, AWS CloudWatch, etc.
+We chose StatsD since it's a de-facto standard with various implementations (eg, [1](https://github.com/prometheus/statsd\_exporter), [2](https://github.com/influxdata/telegraf/blob/master/plugins/inputs/statsd/README.md)) and metrics can be easily exported to Prometheus, InfluxDB, AWS CloudWatch, etc.
## 7. Using environment variables in your yaml configuration
@@ -329,7 +320,8 @@ online_store:
connection_string: ${REDIS_CONNECTION_STRING:"0.0.0.0:6379"}
```
----
+***
+
## Summary
Summarizing it all together we want to show several options of architecture that will be most frequently used in production:
@@ -345,21 +337,19 @@ Summarizing it all together we want to show several options of architecture that

-
-### Option #2 *(still in development)*
+### Option #2 _(still in development)_
Same as Option #1, except:
-* Push service is deployed as AWS Lambda / Google Cloud Run and is configured as a sink for Kinesis or PubSub to ingest features directly from a stream broker.
-Lambda / Cloud Run is being managed by Feast SDK (from CI environment)
+
+* Push service is deployed as AWS Lambda / Google Cloud Run and is configured as a sink for Kinesis or PubSub to ingest features directly from a stream broker. Lambda / Cloud Run is being managed by Feast SDK (from CI environment)
* Materialization jobs are managed inside Kubernetes via Kubernetes Job (currently not managed by Helm)

-
-### Option #3 *(still in development)*
+### Option #3 _(still in development)_
Same as Option #2, except:
+
* Push service is deployed on Kubernetes cluster and exposes an HTTP API that can be used as a sink for Kafka (via kafka-http connector) or accessed directly.

-
diff --git a/docs/project/development-guide.md b/docs/project/development-guide.md
index 58e29a5ca7..5aae0628f6 100644
--- a/docs/project/development-guide.md
+++ b/docs/project/development-guide.md
@@ -8,6 +8,7 @@ This guide is targeted at developers looking to contribute to Feast:
* [Making a Pull Request](development-guide.md#making-a-pull-request)
* [Feast Data Storage Format](development-guide.md#feast-data-storage-format)
* [Feast Protobuf API](development-guide.md#feast-protobuf-api)
+* [Maintainer Guide](./maintainers.md)
> Learn How the Feast [Contributing Process](contributing.md) works.
diff --git a/docs/project/maintainers.md b/docs/project/maintainers.md
new file mode 100644
index 0000000000..ff77c3dfc4
--- /dev/null
+++ b/docs/project/maintainers.md
@@ -0,0 +1,59 @@
+# Setting up your environment
+> Please see the [Development Guide](https://docs.feast.dev/project/development-guide) for project level development instructions and [Contributing Guide](https://github.com/feast-dev/feast/blob/master/CONTRIBUTING.md) for specific details on how to set up your develop environment and contribute to Feast.
+
+# Maintainers Development
+> In most scenarios, your code changes or the areas of Feast that you are actively maintaining will only touch parts of the code(e.g one offline store/online store).
+
+## Forked Repo Best Practices
+1. You should setup your fork so that you can make pull requests against your own master branch.
+ - This prevents unnecessary integration tests and other github actions that are irrelevant to your code changes from being run everytime you would like to make a code change.
+ - **NOTE**: Most workflows are enabled by default so manually [disable workflows](https://docs.github.com/en/actions/managing-workflow-runs/disabling-and-enabling-a-workflow) that are not needed.
+2. When you are ready to merge changes into the official feast branch, make a pull request with the main feast branch and request a review from other maintainers.
+ - Since your code changes should only touch tests that are relevant to your functionality, and other tests should pass as well.
+
+**NOTE**: Remember to frequently sync your fork master branch with `feast-dev/feast:master`.
+
+## Github Actions Workflow on Fork
+- **Recommended**: The github actions workflows that should be enabled on the fork are as follows:
+ - `unit-tests`
+ - Runs all of the unit tests that should always pass.
+ - `linter`
+ - Lints your pr for styling or complexity issues using mypy, isort, and flake.
+ - `fork-pr-integration-tests-[provider]`
+ - Run all of the integration tests to test Feast functionality on your fork for a specific provider.
+ - The `.github/workflows` folder has examples of common workflows(`aws`, `gcp`, and `snowflake`).
+ 1. Move the `fork_pr_integration_tests_[provider].yml` from `.github/fork_workflows` to `.github/workflows`.
+ 2. Edit `fork_pr_integration_tests_[provider].yml` (more details below) to only run the integration tests that are relevant to your area of interest.
+ 3. Push the workflow to your branch and it should automatically be added to the actions on your fork.
+ - `build_wheels`
+ - Release verification workflow to use for [release](docs/project/release-process.md).
+
+## Integration Test Workflow Changes
+Fork specific integration tests are run by the `fork_pr_integration_tests.yml_[provider]` yaml workflow files.
+
+1. Under the `integration-test-python` job, replace `your github repo` with your feast github repo name.
+2. If your offline store/online store needs special setup, add it to the job similar to how gcp is setup.
+
+ ```yaml
+ - name: Set up gcloud SDK
+ uses: google-github-actions/setup-gcloud@v0
+ with:
+ project_id: ${{ secrets.GCP_PROJECT_ID }}
+ service_account_key: ${{ secrets.GCP_SA_KEY }}
+ export_default_credentials: true
+ ```
+
+3. Add any environment variables that you need to your github [secrets](https://github.com/Azure/actions-workflow-samples/blob/master/assets/create-secrets-for-GitHub-workflows.md).
+ - For specific github secrets that you will need to test the already supported datastores(e.g AWS, Bigquery, Snowflake, etc.) refer to this [guide](https://github.com/feast-dev/feast/blob/master/CONTRIBUTING.md) under the `Integration Tests` section.
+ - Access these by setting environment variables as `secrets.SECRET_NAME`.
+4. To limit pytest in your github workflow to test only your specific tests, leverage the `-k` option for pytest.
+
+ ```bash
+ pytest -n 8 --cov=./ --cov-report=xml --color=yes sdk/python/tests --integration --durations=5 --timeout=1200 --timeout_method=thread -k "BigQuery and not dynamo and not Redshift"
+ ```
+
+ - Each test in Feast is parametrized by its offline and online store so we can filter out tests by name. The above command chooses only tests with BigQuery that do not use Dynamo or Redshift.
+
+5. Everytime a pull request or a change to a pull request is made, the integration tests, the local integration tests, the unit tests, and the linter should run.
+
+> Sample fork setups can be found here: [snowflake](https://github.com/kevjumba/feast/pull/30) and [bigquery](https://github.com/kevjumba/feast/pull/31).
diff --git a/docs/project/release-process.md b/docs/project/release-process.md
index e9f3295d91..7fb9c2a560 100644
--- a/docs/project/release-process.md
+++ b/docs/project/release-process.md
@@ -22,31 +22,33 @@ For Feast maintainers, these are the concrete steps for making a new release.
### Release for Python and Java SDK
1. Generate a [Personal Access Token](https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/creating-a-personal-access-token) or retrieve your saved personal access token.
- - The personal access token should have all of the permissions under the `repo` checkbox.
+ * The personal access token should have all of the permissions under the `repo` checkbox.
2. Access the `Actions` tab on the main `feast-dev/feast` repo and find the `release` action.
3. Look for the header `This workflow has a workflow_dispatch event trigger` again and click `Run Workflow` on the right.
+ * If you are making a minor or major release, you should run it off of the master branch.
+ * If you are making a patch release, run it off of the corresponding minor release branch.
4. Try the dry run first with your personal access token. If this succeeds, uncheck `Dry Run` and run the release workflow.
5. All of the jobs should succeed besides the UI job which needs to be released separately. Ping a maintainer on Slack to run the UI release manually.
6. Try to install the feast release in your local environment and test out the `feast init` -> `feast apply` workflow to verify as a sanity check that the release worked correctly.
### (for minor releases) Post-release steps
1. Create a new branch based on master (i.e. v0.22-branch) and push to the main Feast repo. This will be where cherry-picks go for future patch releases and where documentation will point.
-2. Write a summary of the release in the GitHub release
+2. Write a summary of the release in the GitHub release
1. By default, Semantic Release will pull in messages from commits (features vs fixes, etc). But this is hard to digest still, so it helps to have a high level overview.
### Update documentation
-In the Feast Gitbook (ask [Danny Chiao](https://tectonfeast.slack.com/team/U029405HFEU) in Slack for access):
+In the Feast Gitbook (ask [Danny Chiao](https://tectonfeast.slack.com/team/U029405HFEU) in Slack for access):
1. Create a new space within the Feast collection
-2. Go to the overflow menu on the top -> Synchronize with Git
+2. Go to the overflow menu on the top -> Synchronize with Git
1. Specify GitHub as the provider
-
+

2. Configure to point to the new release branch

3. Publish the new page for this branch as part of the collection
-
+

4. Go back to the main Feast collection and go to the overflow menu -> "Customize collection"
diff --git a/docs/reference/alpha-on-demand-feature-view.md b/docs/reference/alpha-on-demand-feature-view.md
index eb8c4f6291..01b47d13dc 100644
--- a/docs/reference/alpha-on-demand-feature-view.md
+++ b/docs/reference/alpha-on-demand-feature-view.md
@@ -1,23 +1,30 @@
-# \[Alpha\] On demand feature view
+# \[Alpha] On demand feature view
**Warning**: This is an _experimental_ feature. It's intended for early testing and feedback, and could change without warnings in future releases.
-{% hint style="info" %}
-To enable this feature, run **`feast alpha enable on_demand_transforms`**
-{% endhint %}
-
## Overview
-On demand feature views allows users to use existing features and request time data \(features only available at request time\) to transform and create new features. Users define python transformation logic which is executed in both historical retrieval and online retrieval paths.
+On demand feature views allows data scientists to use existing features and request time data (features only available at request time) to transform and create new features. Users define python transformation logic which is executed in both historical retrieval and online retrieval paths.
+
+Currently, these transformations are executed locally. This is fine for online serving, but does not scale well offline.
-Currently, these transformations are executed locally. Future milestones include building a Feature Transformation Server for executing transformations at higher scale.
+### Why use on demand feature views?
+
+This enables data scientists to easily impact the online feature retrieval path. For example, a data scientist could
+
+1. Call `get_historical_features` to generate a training dataframe
+2. Iterate in notebook on feature engineering in Pandas
+3. Copy transformation logic into on demand feature views and commit to a dev branch of the feature repository
+4. Verify with `get_historical_features` (on a small dataset) that the transformation gives expected output over historical data
+5. Verify with `get_online_features` on dev branch that the transformation correctly outputs online features
+6. Submit a pull request to the staging / prod branches which impact production traffic
## CLI
There are new CLI commands:
* `feast on-demand-feature-views list` lists all registered on demand feature view after `feast apply` is run
-* `feast on-demand-feature-views describe [NAME]` describes the definition of an on demand feature view
+* `feast on-demand-feature-views describe [NAME]` describes the definition of an on demand feature view
## Example
@@ -63,7 +70,7 @@ def transformed_conv_rate(features_df: pd.DataFrame) -> pd.DataFrame:
### **Feature retrieval**
{% hint style="info" %}
-The on demand feature view's name is the function name \(i.e. `transformed_conv_rate`\).
+The on demand feature view's name is the function name (i.e. `transformed_conv_rate`).
{% endhint %}
And then to retrieve historical or online features, we can call this in a feature service or reference individual features:
@@ -80,4 +87,3 @@ training_df = store.get_historical_features(
],
).to_df()
```
-
diff --git a/docs/reference/alpha-web-ui.md b/docs/reference/alpha-web-ui.md
index 182f9fb13d..7d21a3d45d 100644
--- a/docs/reference/alpha-web-ui.md
+++ b/docs/reference/alpha-web-ui.md
@@ -1,14 +1,15 @@
-# \[Alpha\] Feast Web UI
+# \[Beta] Web UI
-**Warning**: This is an _experimental_ feature. It's intended for early testing and feedback, and could change without warnings in future releases.
+**Warning**: This is an _experimental_ feature. To our knowledge, this is stable, but there are still rough edges in the experience. Contributions are welcome!
## Overview
The Feast Web UI allows users to explore their feature repository through a Web UI. It includes functionality such as:
-- Browsing Feast objects (feature views, entities, data sources, feature services, and saved datasets) and their relationships
-- Searching and filtering for Feast objects by tags
-
+* Browsing Feast objects (feature views, entities, data sources, feature services, and saved datasets) and their relationships
+* Searching and filtering for Feast objects by tags
+
+
## Usage
@@ -19,6 +20,7 @@ There are several ways to use the Feast Web UI.
The easiest way to get started is to run the `feast ui` command within a feature repository:
Output of `feast ui --help`:
+
```bash
Usage: feast ui [OPTIONS]
@@ -84,6 +86,7 @@ When you start the React app, it will look for `project-list.json` to find a lis
```
Then start the React App
+
```bash
yarn start
```
@@ -92,7 +95,7 @@ yarn start
The advantage of importing Feast UI as a module is in the ease of customization. The `` component exposes a `feastUIConfigs` prop thorough which you can customize the UI. Currently it supports a few parameters.
-##### Fetching the Project List
+**Fetching the Project List**
You can use `projectListPromise` to provide a promise that overrides where the Feast UI fetches the project list from.
@@ -110,7 +113,7 @@ You can use `projectListPromise` to provide a promise that overrides where the F
/>
```
-##### Custom Tabs
+**Custom Tabs**
You can add custom tabs for any of the core Feast objects through the `tabsRegistry`.
@@ -133,4 +136,3 @@ const tabsRegistry = {
```
Examples of custom tabs can be found in the `ui/custom-tabs` folder.
-
diff --git a/docs/reference/data-sources/README.md b/docs/reference/data-sources/README.md
index b4fbc98b46..6ab2e4b083 100644
--- a/docs/reference/data-sources/README.md
+++ b/docs/reference/data-sources/README.md
@@ -1,6 +1,6 @@
# Data sources
-Please see [Data Source](../../getting-started/concepts/feature-view.md#data-source) for an explanation of data sources.
+Please see [Data Source](../../getting-started/concepts/data-ingestion.md) for an explanation of data sources.
{% content-ref url="file.md" %}
[file.md](file.md)
@@ -37,3 +37,7 @@ Please see [Data Source](../../getting-started/concepts/feature-view.md#data-sou
{% content-ref url="postgres.md" %}
[postgres.md]([postgres].md)
{% endcontent-ref %}
+
+{% content-ref url="trino.md" %}
+[trino.md]([trino].md)
+{% endcontent-ref %}
diff --git a/docs/reference/data-sources/bigquery.md b/docs/reference/data-sources/bigquery.md
index 47eb9b1bf6..b64ea4b7c2 100644
--- a/docs/reference/data-sources/bigquery.md
+++ b/docs/reference/data-sources/bigquery.md
@@ -1,15 +1,14 @@
-# BigQuery
+# BigQuery source
## Description
-BigQuery data sources allow for the retrieval of historical feature values from BigQuery for building training datasets as well as materializing features into an online store.
-
-* Either a table reference or a SQL query can be provided.
-* No performance guarantees can be provided over SQL query-based sources. Please use table references where possible.
+BigQuery data sources are BigQuery tables or views.
+These can be specified either by a table reference or a SQL query.
+However, no performance guarantees can be provided for SQL query-based sources, so table references are recommended.
## Examples
-Using a table reference
+Using a table reference:
```python
from feast import BigQuerySource
@@ -19,7 +18,7 @@ my_bigquery_source = BigQuerySource(
)
```
-Using a query
+Using a query:
```python
from feast import BigQuerySource
@@ -30,5 +29,4 @@ BigQuerySource(
)
```
-Configuration options are available [here](https://rtd.feast.dev/en/latest/index.html#feast.data_source.BigQuerySource).
-
+The full set of configuration options is available [here](https://rtd.feast.dev/en/latest/index.html#feast.infra.offline_stores.bigquery_source.BigQuerySource).
diff --git a/docs/reference/data-sources/file.md b/docs/reference/data-sources/file.md
index 12e6529840..838ea2f972 100644
--- a/docs/reference/data-sources/file.md
+++ b/docs/reference/data-sources/file.md
@@ -1,8 +1,9 @@
-# File
+# File source
## Description
-File data sources allow for the retrieval of historical feature values from files on disk for building training datasets, as well as for materializing features into an online store.
+File data sources are files on disk or on S3.
+Currently only Parquet files are supported.
{% hint style="warning" %}
FileSource is meant for development purposes only and is not optimized for production use.
@@ -20,5 +21,4 @@ parquet_file_source = FileSource(
)
```
-Configuration options are available [here](https://rtd.feast.dev/en/latest/index.html#feast.data_source.FileSource).
-
+The full set of configuration options is available [here](https://rtd.feast.dev/en/latest/index.html#feast.infra.offline_stores.file_source.FileSource).
diff --git a/docs/reference/data-sources/postgres.md b/docs/reference/data-sources/postgres.md
index 759cb50bbd..09046b7b07 100644
--- a/docs/reference/data-sources/postgres.md
+++ b/docs/reference/data-sources/postgres.md
@@ -1,15 +1,18 @@
-# PostgreSQL
+# PostgreSQL source (contrib)
## Description
-**NOTE**: The Postgres plugin is a contrib plugin. This means it may not be fully stable.
+PostgreSQL data sources are PostgreSQL tables or views.
+These can be specified either by a table reference or a SQL query.
+## Disclaimer
-The PostgreSQL data source allows for the retrieval of historical feature values from a PostgreSQL database for building training datasets as well as materializing features into an online store.
+The PostgreSQL data source does not achieve full test coverage.
+Please do not assume complete stability.
## Examples
-Defining a Postgres source
+Defining a Postgres source:
```python
from feast.infra.offline_stores.contrib.postgres_offline_store.postgres_source import (
@@ -23,3 +26,5 @@ driver_stats_source = PostgreSQLSource(
created_timestamp_column="created",
)
```
+
+The full set of configuration options is available [here](https://rtd.feast.dev/en/master/#feast.infra.offline_stores.contrib.postgres_offline_store.postgres_source.PostgreSQLSource).
diff --git a/docs/reference/data-sources/redshift.md b/docs/reference/data-sources/redshift.md
index 7f50c64d02..8ecbfc3edc 100644
--- a/docs/reference/data-sources/redshift.md
+++ b/docs/reference/data-sources/redshift.md
@@ -1,15 +1,14 @@
-# Redshift
+# Redshift source
## Description
-Redshift data sources allow for the retrieval of historical feature values from Redshift for building training datasets as well as materializing features into an online store.
-
-* Either a table name or a SQL query can be provided.
-* No performance guarantees can be provided over SQL query-based sources. Please use table references where possible.
+Redshift data sources are Redshift tables or views.
+These can be specified either by a table reference or a SQL query.
+However, no performance guarantees can be provided for SQL query-based sources, so table references are recommended.
## Examples
-Using a table name
+Using a table name:
```python
from feast import RedshiftSource
@@ -19,7 +18,7 @@ my_redshift_source = RedshiftSource(
)
```
-Using a query
+Using a query:
```python
from feast import RedshiftSource
@@ -30,5 +29,4 @@ my_redshift_source = RedshiftSource(
)
```
-Configuration options are available [here](https://rtd.feast.dev/en/master/feast.html?#feast.RedshiftSource).
-
+The full set of configuration options is available [here](https://rtd.feast.dev/en/master/#feast.infra.offline_stores.redshift_source.RedshiftSource).
diff --git a/docs/reference/data-sources/snowflake.md b/docs/reference/data-sources/snowflake.md
index 0f5304b6cd..ff3a4f8a87 100644
--- a/docs/reference/data-sources/snowflake.md
+++ b/docs/reference/data-sources/snowflake.md
@@ -1,14 +1,13 @@
-# Snowflake
+# Snowflake source
## Description
-Snowflake data sources allow for the retrieval of historical feature values from Snowflake for building training datasets as well as materializing features into an online store.
-
-* Either a table reference or a SQL query can be provided.
+Snowflake data sources are Snowflake tables or views.
+These can be specified either by a table reference or a SQL query.
## Examples
-Using a table reference
+Using a table reference:
```python
from feast import SnowflakeSource
@@ -20,7 +19,7 @@ my_snowflake_source = SnowflakeSource(
)
```
-Using a query
+Using a query:
```python
from feast import SnowflakeSource
@@ -38,7 +37,9 @@ my_snowflake_source = SnowflakeSource(
)
```
-One thing to remember is how Snowflake handles table and column name conventions.
-You can read more about quote identifiers [here](https://docs.snowflake.com/en/sql-reference/identifiers-syntax.html)
+{% hint style="warning" %}
+Be careful about how Snowflake handles table and column name conventions.
+In particular, you can read more about quote identifiers [here](https://docs.snowflake.com/en/sql-reference/identifiers-syntax.html).
+{% endhint %}
-Configuration options are available [here](https://rtd.feast.dev/en/latest/index.html#feast.data_source.SnowflakeSource).
+The full set of configuration options is available [here](https://rtd.feast.dev/en/latest/index.html#feast.infra.offline_stores.snowflake_source.SnowflakeSource).
diff --git a/docs/reference/data-sources/spark.md b/docs/reference/data-sources/spark.md
index 266a401a51..6e923d7952 100644
--- a/docs/reference/data-sources/spark.md
+++ b/docs/reference/data-sources/spark.md
@@ -1,16 +1,17 @@
-# Spark (contrib)
+# Spark source (contrib)
## Description
-**NOTE**: Spark data source api is currently in alpha development and the API is not completely stable. The API may change or update in the future.
+Spark data sources are tables or files that can be loaded from some Spark store (e.g. Hive or in-memory). They can also be specified by a SQL query.
-The spark data source API allows for the retrieval of historical feature values from file/database sources for building training datasets as well as materializing features into an online store.
+## Disclaimer
-* Either a table name, a SQL query, or a file path can be provided.
+The Spark data source does not achieve full test coverage.
+Please do not assume complete stability.
## Examples
-Using a table reference from SparkSession(for example, either in memory or a Hive Metastore)
+Using a table reference from SparkSession (for example, either in-memory or a Hive Metastore):
```python
from feast.infra.offline_stores.contrib.spark_offline_store.spark_source import (
@@ -22,7 +23,7 @@ my_spark_source = SparkSource(
)
```
-Using a query
+Using a query:
```python
from feast.infra.offline_stores.contrib.spark_offline_store.spark_source import (
@@ -35,7 +36,7 @@ my_spark_source = SparkSource(
)
```
-Using a file reference
+Using a file reference:
```python
from feast.infra.offline_stores.contrib.spark_offline_store.spark_source import (
@@ -49,3 +50,5 @@ my_spark_source = SparkSource(
created_timestamp_column="created",
)
```
+
+The full set of configuration options is available [here](https://rtd.feast.dev/en/master/#feast.infra.offline_stores.contrib.spark_offline_store.spark_source.SparkSource).
diff --git a/docs/reference/data-sources/trino.md b/docs/reference/data-sources/trino.md
new file mode 100644
index 0000000000..7b722c9a30
--- /dev/null
+++ b/docs/reference/data-sources/trino.md
@@ -0,0 +1,29 @@
+# Trino source (contrib)
+
+## Description
+
+Trino data sources are Trino tables or views.
+These can be specified either by a table reference or a SQL query.
+
+## Disclaimer
+
+The Trino data source does not achieve full test coverage.
+Please do not assume complete stability.
+
+## Examples
+
+Defining a Trino source:
+
+```python
+from feast.infra.offline_stores.contrib.trino_offline_store.trino_source import (
+ TrinoSource,
+)
+
+driver_hourly_stats = TrinoSource(
+ event_timestamp_column="event_timestamp",
+ table_ref="feast.driver_stats",
+ created_timestamp_column="created",
+)
+```
+
+The full set of configuration options is available [here](https://rtd.feast.dev/en/master/#trino-source).
diff --git a/docs/reference/feature-servers/README.md b/docs/reference/feature-servers/README.md
index 301cea372c..f9a40104c3 100644
--- a/docs/reference/feature-servers/README.md
+++ b/docs/reference/feature-servers/README.md
@@ -2,4 +2,14 @@
Feast users can choose to retrieve features from a feature server, as opposed to through the Python SDK.
-{% page-ref page="python-feature-server.md" %}
+{% content-ref url="python-feature-server.md" %}
+[python-feature-server.md](python-feature-server.md)
+{% endcontent-ref %}
+
+{% content-ref url="go-feature-server.md" %}
+[go-feature-server.md](go-feature-server.md)
+{% endcontent-ref %}
+
+{% content-ref url="alpha-aws-lambda-feature-server.md" %}
+[alpha-aws-lambda-feature-server.md](alpha-aws-lambda-feature-server.md)
+{% endcontent-ref %}
\ No newline at end of file
diff --git a/docs/reference/alpha-aws-lambda-feature-server.md b/docs/reference/feature-servers/alpha-aws-lambda-feature-server.md
similarity index 66%
rename from docs/reference/alpha-aws-lambda-feature-server.md
rename to docs/reference/feature-servers/alpha-aws-lambda-feature-server.md
index eadcf40bb4..caf5542bdc 100644
--- a/docs/reference/alpha-aws-lambda-feature-server.md
+++ b/docs/reference/feature-servers/alpha-aws-lambda-feature-server.md
@@ -1,20 +1,16 @@
-# \[Alpha\] AWS Lambda feature server
+# \[Alpha] AWS Lambda feature server
**Warning**: This is an _experimental_ feature. It's intended for early testing and feedback, and could change without warnings in future releases.
-{% hint style="info" %}
-To enable this feature, run **`feast alpha enable aws_lambda_feature_server`**
-{% endhint %}
-
## Overview
-The AWS Lambda feature server is an HTTP endpoint that serves features with JSON I/O, deployed as a Docker image through AWS Lambda and AWS API Gateway. This enables users to get features from Feast using any programming language that can make HTTP requests. A [local feature server](feature-servers/python-feature-server.md) is also available. A remote feature server on GCP Cloud Run is currently being developed.
+The AWS Lambda feature server is an HTTP endpoint that serves features with JSON I/O, deployed as a Docker image through AWS Lambda and AWS API Gateway. This enables users to get features from Feast using any programming language that can make HTTP requests. A [local feature server](python-feature-server.md) is also available. A remote feature server on GCP Cloud Run is currently being developed.
## Deployment
The AWS Lambda feature server is only available to projects using the `AwsProvider` with registries on S3. It is disabled by default. To enable it, `feature_store.yaml` must be modified; specifically, the `enable` flag must be on and an `execution_role_name` must be specified. For example, after running `feast init -t aws`, changing the registry to be on S3, and enabling the feature server, the contents of `feature_store.yaml` should look similar to the following:
-```text
+```
project: dev
registry: s3://feast/registries/dev
provider: aws
@@ -27,9 +23,6 @@ offline_store:
database: feast
s3_staging_location: s3://feast/redshift/tests/staging_location
iam_role: arn:aws:iam::{aws_account}:role/redshift_s3_access_role
-flags:
- alpha_features: true
- aws_lambda_feature_server: true
feature_server:
enabled: True
execution_role_name: arn:aws:iam::{aws_account}:role/lambda_execution_role
@@ -41,12 +34,12 @@ If enabled, the feature server will be deployed during `feast apply`. After it i
Feast requires the following permissions in order to deploy and teardown AWS Lambda feature server:
-| Permissions | Resources |
-| ----------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------- |
-| lambda:CreateFunction
lambda:GetFunction
lambda:DeleteFunction
lambda:AddPermission
lambda:UpdateFunctionConfiguration
| arn:aws:lambda:\:\:function:feast-\* |
-| ecr:CreateRepository
ecr:DescribeRepositories
ecr:DeleteRepository
ecr:PutImage
ecr:DescribeImages
ecr:BatchDeleteImage
ecr:CompleteLayerUpload
ecr:UploadLayerPart
ecr:InitiateLayerUpload
ecr:BatchCheckLayerAvailability
ecr:GetDownloadUrlForLayer
ecr:GetRepositoryPolicy
ecr:SetRepositoryPolicy
ecr:GetAuthorizationToken
| \* |
-| iam:PassRole
| arn:aws:iam::\:role/ |
-| apigateway:*
| arn:aws:apigateway:*::/apis/*/routes/*/routeresponses
arn:aws:apigateway:*::/apis/*/routes/*/routeresponses/*
arn:aws:apigateway:*::/apis/*/routes/*
arn:aws:apigateway:*::/apis/*/routes
arn:aws:apigateway:*::/apis/*/integrations
arn:aws:apigateway:*::/apis/*/stages/*/routesettings/*
arn:aws:apigateway:*::/apis/*
arn:aws:apigateway:*::/apis
|
+| Permissions | Resources |
+| --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
+| lambda:CreateFunction
lambda:GetFunction
lambda:DeleteFunction
lambda:AddPermission
lambda:UpdateFunctionConfiguration
| arn:aws:lambda:\:\:function:feast-\* |
+| ecr:CreateRepository
ecr:DescribeRepositories
ecr:DeleteRepository
ecr:PutImage
ecr:DescribeImages
ecr:BatchDeleteImage
ecr:CompleteLayerUpload
ecr:UploadLayerPart
ecr:InitiateLayerUpload
ecr:BatchCheckLayerAvailability
ecr:GetDownloadUrlForLayer
ecr:GetRepositoryPolicy
ecr:SetRepositoryPolicy
ecr:GetAuthorizationToken
| \* |
+| iam:PassRole
| arn:aws:iam::\:role/ |
+| apigateway:*
| arn:aws:apigateway:*::/apis/*/routes/*/routeresponses
arn:aws:apigateway:*::/apis/*/routes/*/routeresponses/*
arn:aws:apigateway:*::/apis/*/routes/*
arn:aws:apigateway:*::/apis/*/routes
arn:aws:apigateway:*::/apis/*/integrations
arn:aws:apigateway:*::/apis/*/stages/*/routesettings/*
arn:aws:apigateway:*::/apis/*
arn:aws:apigateway:*::/apis
|
The following inline policy can be used to grant Feast the necessary permissions:
@@ -202,4 +195,3 @@ $ curl -X POST \
]
}
```
-
diff --git a/docs/reference/feature-servers/python-feature-server.md b/docs/reference/feature-servers/python-feature-server.md
index 2646c28ef4..7588435576 100644
--- a/docs/reference/feature-servers/python-feature-server.md
+++ b/docs/reference/feature-servers/python-feature-server.md
@@ -10,13 +10,14 @@ There is a CLI command that starts the server: `feast serve`. By default, Feast
## Deploying as a service
-One can deploy a feature server by building a docker image that bundles in the project's `feature_store.yaml`. See this [helm chart](https://github.com/feast-dev/feast/blob/master/infra/charts/feast-python-server) for an example.
+One can deploy a feature server by building a docker image that bundles in the project's `feature_store.yaml`. See this [helm chart](https://github.com/feast-dev/feast/blob/master/infra/charts/feast-python-server) for an example on how to run Feast on Kubernetes.
-A [remote feature server](../alpha-aws-lambda-feature-server.md) on AWS Lambda is also available.
+A [remote feature server](alpha-aws-lambda-feature-server.md) on AWS Lambda is also available.
## Example
### Initializing a feature server
+
Here's an example of how to start the Python feature server with a local feature repo:
```bash
@@ -49,6 +50,7 @@ INFO: Uvicorn running on http://127.0.0.1:6566 (Press CTRL+C to quit)
```
### Retrieving features
+
After the server starts, we can execute cURL commands from another terminal tab:
```bash
@@ -140,7 +142,7 @@ $ curl -X POST \
It's also possible to specify a feature service name instead of the list of features:
-```text
+```
curl -X POST \
"http://localhost:6566/get-online-features" \
-d '{
@@ -152,10 +154,12 @@ curl -X POST \
```
### Pushing features to the online and offline stores
+
The Python feature server also exposes an endpoint for [push sources](../../data-sources/push.md). This endpoint allows you to push data to the online and/or offline store.
-The request definition for pushmode is a string parameter `to` where the options are: ["online", "offline", "online_and_offline"]. Note that timestamps need to be strings.
-```text
+The request definition for pushmode is a string parameter `to` where the options are: \["online", "offline", "online\_and\_offline"]. Note that timestamps need to be strings.
+
+```
curl -X POST "http://localhost:6566/push" -d '{
"push_source_name": "driver_hourly_stats_push_source",
"df": {
@@ -171,6 +175,7 @@ curl -X POST "http://localhost:6566/push" -d '{
```
or equivalently from Python:
+
```python
import json
import requests
diff --git a/docs/reference/offline-stores/README.md b/docs/reference/offline-stores/README.md
index 57d7f35dea..08a28f9e7e 100644
--- a/docs/reference/offline-stores/README.md
+++ b/docs/reference/offline-stores/README.md
@@ -1,16 +1,37 @@
# Offline stores
-Please see [Offline Store](../../getting-started/architecture-and-components/offline-store.md) for an explanation of offline stores.
+Please see [Offline Store](../../getting-started/architecture-and-components/offline-store.md) for a conceptual explanation of offline stores.
-{% page-ref page="file.md" %}
+## Reference
-{% page-ref page="snowflake.md" %}
+{% content-ref url="overview.md" %}
+[overview.md](overview.md)
+{% endcontent-ref %}
-{% page-ref page="bigquery.md" %}
+{% content-ref url="file.md" %}
+[file.md](file.md)
+{% endcontent-ref %}
-{% page-ref page="redshift.md" %}
+{% content-ref url="snowflake.md" %}
+[snowflake.md](snowflake.md)
+{% endcontent-ref %}
-{% page-ref page="spark.md" %}
+{% content-ref url="bigquery.md" %}
+[bigquery.md](bigquery.md)
+{% endcontent-ref %}
-{% page-ref page="postgres.md" %}
+{% content-ref url="redshift.md" %}
+[redshift.md](redshift.md)
+{% endcontent-ref %}
+{% content-ref url="spark.md" %}
+[spark.md](spark.md)
+{% endcontent-ref %}
+
+{% content-ref url="postgres.md" %}
+[postgres.md](postgres.md)
+{% endcontent-ref %}
+
+{% content-ref url="trino.md" %}
+[trino.md](trino.md)
+{% endcontent-ref %}
diff --git a/docs/reference/offline-stores/bigquery.md b/docs/reference/offline-stores/bigquery.md
index 255c587d6b..0e286d78c4 100644
--- a/docs/reference/offline-stores/bigquery.md
+++ b/docs/reference/offline-stores/bigquery.md
@@ -1,13 +1,11 @@
-# BigQuery
+# BigQuery offline store
## Description
The BigQuery offline store provides support for reading [BigQuerySources](../data-sources/bigquery.md).
-* BigQuery tables and views are allowed as sources.
* All joins happen within BigQuery.
-* Entity dataframes can be provided as a SQL query or can be provided as a Pandas dataframe. Pandas dataframes will be uploaded to BigQuery in order to complete join operations.
-* A [BigQueryRetrievalJob](https://github.com/feast-dev/feast/blob/c50a36ec1ad5b8d81c6f773c23204db7c7a7d218/sdk/python/feast/infra/offline_stores/bigquery.py#L210) is returned when calling `get_historical_features()`.
+* Entity dataframes can be provided as a SQL query or can be provided as a Pandas dataframe. A Pandas dataframes will be uploaded to BigQuery as a table (marked for expiration) in order to complete join operations.
## Example
@@ -22,4 +20,38 @@ offline_store:
```
{% endcode %}
-Configuration options are available [here](https://rtd.feast.dev/en/latest/#feast.repo_config.BigQueryOfflineStoreConfig).
+The full set of configuration options is available in [BigQueryOfflineStoreConfig](https://rtd.feast.dev/en/latest/index.html#feast.infra.offline_stores.bigquery.BigQueryOfflineStoreConfig).
+
+## Functionality Matrix
+
+The set of functionality supported by offline stores is described in detail [here](overview.md#functionality).
+Below is a matrix indicating which functionality is supported by the BigQuery offline store.
+
+| | BigQuery |
+| :----------------------------------------------------------------- | :------- |
+| `get_historical_features` (point-in-time correct join) | yes |
+| `pull_latest_from_table_or_query` (retrieve latest feature values) | yes |
+| `pull_all_from_table_or_query` (retrieve a saved dataset) | yes |
+| `offline_write_batch` (persist dataframes to offline store) | yes |
+| `write_logged_features` (persist logged features to offline store) | yes |
+
+Below is a matrix indicating which functionality is supported by `BigQueryRetrievalJob`.
+
+| | BigQuery |
+| ----------------------------------------------------- | -------- |
+| export to dataframe | yes |
+| export to arrow table | yes |
+| export to arrow batches | no |
+| export to SQL | yes |
+| export to data lake (S3, GCS, etc.) | no |
+| export to data warehouse | yes |
+| export as Spark dataframe | no |
+| local execution of Python-based on-demand transforms | yes |
+| remote execution of Python-based on-demand transforms | no |
+| persist results in the offline store | yes |
+| preview the query plan before execution | yes |
+| read partitioned data* | partial |
+
+*See [GitHub issue](https://github.com/feast-dev/feast/issues/2530) for details on proposed solutions for enabling the BigQuery offline store to understand tables that use `_PARTITIONTIME` as the partition column.
+
+To compare this set of functionality against other offline stores, please see the full [functionality matrix](overview.md#functionality-matrix).
diff --git a/docs/reference/offline-stores/file.md b/docs/reference/offline-stores/file.md
index 42ac821691..4b76d9af90 100644
--- a/docs/reference/offline-stores/file.md
+++ b/docs/reference/offline-stores/file.md
@@ -1,11 +1,13 @@
-# File
+# File offline store
## Description
-The File offline store provides support for reading [FileSources](../data-sources/file.md).
+The file offline store provides support for reading [FileSources](../data-sources/file.md).
+It uses Dask as the compute engine.
-* Only Parquet files are currently supported.
-* All data is downloaded and joined using Python and may not scale to production workloads.
+{% hint style="warning" %}
+All data is downloaded and joined using Python and therefore may not scale to production workloads.
+{% endhint %}
## Example
@@ -19,4 +21,36 @@ offline_store:
```
{% endcode %}
-Configuration options are available [here](https://rtd.feast.dev/en/latest/#feast.repo_config.FileOfflineStoreConfig).
+The full set of configuration options is available in [FileOfflineStoreConfig](https://rtd.feast.dev/en/latest/#feast.infra.offline_stores.file.FileOfflineStoreConfig).
+
+## Functionality Matrix
+
+The set of functionality supported by offline stores is described in detail [here](overview.md#functionality).
+Below is a matrix indicating which functionality is supported by the file offline store.
+
+| | File |
+| :-------------------------------- | :-- |
+| `get_historical_features` (point-in-time correct join) | yes |
+| `pull_latest_from_table_or_query` (retrieve latest feature values) | yes |
+| `pull_all_from_table_or_query` (retrieve a saved dataset) | yes |
+| `offline_write_batch` (persist dataframes to offline store) | yes |
+| `write_logged_features` (persist logged features to offline store) | yes |
+
+Below is a matrix indicating which functionality is supported by `FileRetrievalJob`.
+
+| | File |
+| --------------------------------- | --- |
+| export to dataframe | yes |
+| export to arrow table | yes |
+| export to arrow batches | no |
+| export to SQL | no |
+| export to data lake (S3, GCS, etc.) | no |
+| export to data warehouse | no |
+| export as Spark dataframe | no |
+| local execution of Python-based on-demand transforms | yes |
+| remote execution of Python-based on-demand transforms | no |
+| persist results in the offline store | yes |
+| preview the query plan before execution | yes |
+| read partitioned data | yes |
+
+To compare this set of functionality against other offline stores, please see the full [functionality matrix](overview.md#functionality-matrix).
diff --git a/docs/reference/offline-stores/overview.md b/docs/reference/offline-stores/overview.md
new file mode 100644
index 0000000000..10f99813ba
--- /dev/null
+++ b/docs/reference/offline-stores/overview.md
@@ -0,0 +1,58 @@
+# Overview
+
+## Functionality
+
+Here are the methods exposed by the `OfflineStore` interface, along with the core functionality supported by the method:
+* `get_historical_features`: point-in-time correct join to retrieve historical features
+* `pull_latest_from_table_or_query`: retrieve latest feature values for materialization into the online store
+* `pull_all_from_table_or_query`: retrieve a saved dataset
+* `offline_write_batch`: persist dataframes to the offline store, primarily for push sources
+* `write_logged_features`: persist logged features to the offline store, for feature logging
+
+The first three of these methods all return a `RetrievalJob` specific to an offline store, such as a `SnowflakeRetrievalJob`. Here is a list of functionality supported by `RetrievalJob`s:
+* export to dataframe
+* export to arrow table
+* export to arrow batches (to handle large datasets in memory)
+* export to SQL
+* export to data lake (S3, GCS, etc.)
+* export to data warehouse
+* export as Spark dataframe
+* local execution of Python-based on-demand transforms
+* remote execution of Python-based on-demand transforms
+* persist results in the offline store
+* preview the query plan before execution (`RetrievalJob`s are lazily executed)
+* read partitioned data
+
+## Functionality Matrix
+
+There are currently four core offline store implementations: `FileOfflineStore`, `BigQueryOfflineStore`, `SnowflakeOfflineStore`, and `RedshiftOfflineStore`.
+There are several additional implementations contributed by the Feast community (`PostgreSQLOfflineStore`, `SparkOfflineStore`, and `TrinoOfflineStore`), which are not guaranteed to be stable or to match the functionality of the core implementations.
+Details for each specific offline store, such as how to configure it in a `feature_store.yaml`, can be found [here](README.md).
+
+Below is a matrix indicating which offline stores support which methods.
+
+| | File | BigQuery | Snowflake | Redshift | Postgres | Spark | Trino |
+| :-------------------------------- | :-- | :-- | :-- | :-- | :-- | :-- | :-- |
+| `get_historical_features` | yes | yes | yes | yes | yes | yes | yes |
+| `pull_latest_from_table_or_query` | yes | yes | yes | yes | yes | yes | yes |
+| `pull_all_from_table_or_query` | yes | yes | yes | yes | yes | yes | yes |
+| `offline_write_batch` | yes | yes | yes | yes | no | no | no |
+| `write_logged_features` | yes | yes | yes | yes | no | no | no |
+
+
+Below is a matrix indicating which `RetrievalJob`s support what functionality.
+
+| | File | BigQuery | Snowflake | Redshift | Postgres | Spark | Trino |
+| --------------------------------- | --- | --- | --- | --- | --- | --- | --- |
+| export to dataframe | yes | yes | yes | yes | yes | yes | yes |
+| export to arrow table | yes | yes | yes | yes | yes | yes | yes |
+| export to arrow batches | no | no | no | yes | no | no | no |
+| export to SQL | no | yes | no | yes | yes | no | yes |
+| export to data lake (S3, GCS, etc.) | no | no | yes | no | yes | no | no |
+| export to data warehouse | no | yes | yes | yes | yes | no | no |
+| export as Spark dataframe | no | no | no | no | no | yes | no |
+| local execution of Python-based on-demand transforms | yes | yes | yes | yes | yes | no | yes |
+| remote execution of Python-based on-demand transforms | no | no | no | no | no | no | no |
+| persist results in the offline store | yes | yes | yes | yes | yes | yes | no |
+| preview the query plan before execution | yes | yes | yes | yes | yes | yes | yes |
+| read partitioned data | yes | yes | yes | yes | yes | yes | yes |
diff --git a/docs/reference/offline-stores/postgres.md b/docs/reference/offline-stores/postgres.md
index 9bd472673a..506666fc37 100644
--- a/docs/reference/offline-stores/postgres.md
+++ b/docs/reference/offline-stores/postgres.md
@@ -1,20 +1,14 @@
-# PostgreSQL (contrib)
+# PostgreSQL offline store (contrib)
## Description
-The PostgreSQL offline store is an offline store that provides support for reading [PostgreSQL](../data-sources/postgres.md) data sources.
+The PostgreSQL offline store provides support for reading [PostgreSQLSources](../data-sources/postgres.md).
+* Entity dataframes can be provided as a SQL query or can be provided as a Pandas dataframe. A Pandas dataframes will be uploaded to Postgres as a table in order to complete join operations.
+## Disclaimer
-**DISCLAIMER**: This PostgreSQL offline store still does not achieve full test coverage.
-
-* Entity dataframes can be provided as a SQL query or can be provided as a Pandas dataframe. Pandas dataframes will be converted to a Spark dataframe and processed as a temporary view.
-* A `PostgreSQLRetrievalJob` is returned when calling `get_historical_features()`.
- * This allows you to call
- * `to_df` to retrieve the pandas dataframe.
- * `to_arrow` to retrieve the dataframe as a PyArrow table.
- * `to_sql` to get the SQL query used to pull the features.
-
-* sslmode, sslkey_path, sslcert_path, and sslrootcert_path are optional
+The PostgreSQL offline store does not achieve full test coverage.
+Please do not assume complete stability.
## Example
@@ -39,3 +33,38 @@ online_store:
path: data/online_store.db
```
{% endcode %}
+
+Note that `sslmode`, `sslkey_path`, `sslcert_path`, and `sslrootcert_path` are optional parameters.
+The full set of configuration options is available in [PostgreSQLOfflineStoreConfig](https://rtd.feast.dev/en/master/#feast.infra.offline_stores.contrib.postgres_offline_store.postgres.PostgreSQLOfflineStoreConfig).
+
+## Functionality Matrix
+
+The set of functionality supported by offline stores is described in detail [here](overview.md#functionality).
+Below is a matrix indicating which functionality is supported by the PostgreSQL offline store.
+
+| | Postgres |
+| :-------------------------------- | :-- |
+| `get_historical_features` (point-in-time correct join) | yes |
+| `pull_latest_from_table_or_query` (retrieve latest feature values) | yes |
+| `pull_all_from_table_or_query` (retrieve a saved dataset) | yes |
+| `offline_write_batch` (persist dataframes to offline store) | no |
+| `write_logged_features` (persist logged features to offline store) | no |
+
+Below is a matrix indicating which functionality is supported by `PostgreSQLRetrievalJob`.
+
+| | Postgres |
+| --------------------------------- | --- |
+| export to dataframe | yes |
+| export to arrow table | yes |
+| export to arrow batches | no |
+| export to SQL | yes |
+| export to data lake (S3, GCS, etc.) | yes |
+| export to data warehouse | yes |
+| export as Spark dataframe | no |
+| local execution of Python-based on-demand transforms | yes |
+| remote execution of Python-based on-demand transforms | no |
+| persist results in the offline store | yes |
+| preview the query plan before execution | yes |
+| read partitioned data | yes |
+
+To compare this set of functionality against other offline stores, please see the full [functionality matrix](overview.md#functionality-matrix).
diff --git a/docs/reference/offline-stores/redshift.md b/docs/reference/offline-stores/redshift.md
index 73148730c5..2cdf49bdb9 100644
--- a/docs/reference/offline-stores/redshift.md
+++ b/docs/reference/offline-stores/redshift.md
@@ -1,13 +1,11 @@
-# Redshift
+# Redshift offline store
## Description
The Redshift offline store provides support for reading [RedshiftSources](../data-sources/redshift.md).
-* Redshift tables and views are allowed as sources.
* All joins happen within Redshift.
-* Entity dataframes can be provided as a SQL query or can be provided as a Pandas dataframe. Pandas dataframes will be uploaded to Redshift in order to complete join operations.
-* A [RedshiftRetrievalJob](https://github.com/feast-dev/feast/blob/bf557bcb72c7878a16dccb48443bbbe9dc3efa49/sdk/python/feast/infra/offline_stores/redshift.py#L161) is returned when calling `get_historical_features()`.
+* Entity dataframes can be provided as a SQL query or can be provided as a Pandas dataframe. A Pandas dataframes will be uploaded to Redshift temporarily in order to complete join operations.
## Example
@@ -27,7 +25,39 @@ offline_store:
```
{% endcode %}
-Configuration options are available [here](https://github.com/feast-dev/feast/blob/bf557bcb72c7878a16dccb48443bbbe9dc3efa49/sdk/python/feast/infra/offline_stores/redshift.py#L22).
+The full set of configuration options is available in [RedshiftOfflineStoreConfig](https://rtd.feast.dev/en/master/#feast.infra.offline_stores.redshift.RedshiftOfflineStoreConfig).
+
+## Functionality Matrix
+
+The set of functionality supported by offline stores is described in detail [here](overview.md#functionality).
+Below is a matrix indicating which functionality is supported by the Redshift offline store.
+
+| | Redshift |
+| :-------------------------------- | :-- |
+| `get_historical_features` (point-in-time correct join) | yes |
+| `pull_latest_from_table_or_query` (retrieve latest feature values) | yes |
+| `pull_all_from_table_or_query` (retrieve a saved dataset) | yes |
+| `offline_write_batch` (persist dataframes to offline store) | yes |
+| `write_logged_features` (persist logged features to offline store) | yes |
+
+Below is a matrix indicating which functionality is supported by `RedshiftRetrievalJob`.
+
+| | Redshift |
+| --------------------------------- | --- |
+| export to dataframe | yes |
+| export to arrow table | yes |
+| export to arrow batches | yes |
+| export to SQL | yes |
+| export to data lake (S3, GCS, etc.) | no |
+| export to data warehouse | yes |
+| export as Spark dataframe | no |
+| local execution of Python-based on-demand transforms | yes |
+| remote execution of Python-based on-demand transforms | no |
+| persist results in the offline store | yes |
+| preview the query plan before execution | yes |
+| read partitioned data | yes |
+
+To compare this set of functionality against other offline stores, please see the full [functionality matrix](overview.md#functionality-matrix).
## Permissions
diff --git a/docs/reference/offline-stores/snowflake.md b/docs/reference/offline-stores/snowflake.md
index e2afaef90d..b3b58fe786 100644
--- a/docs/reference/offline-stores/snowflake.md
+++ b/docs/reference/offline-stores/snowflake.md
@@ -1,17 +1,10 @@
-# Snowflake
+# Snowflake offline store
## Description
The [Snowflake](https://trial.snowflake.com) offline store provides support for reading [SnowflakeSources](../data-sources/snowflake.md).
-
-* Snowflake tables and views are allowed as sources.
* All joins happen within Snowflake.
-* Entity dataframes can be provided as a SQL query or can be provided as a Pandas dataframe. Pandas dataframes will be uploaded to Snowflake in order to complete join operations.
-* A `SnowflakeRetrievalJob` is returned when calling `get_historical_features()`.
- * This allows you to call
- * `to_snowflake` to save the dataset into Snowflake
- * `to_sql` to get the SQL query that would execute on `to_df`
- * `to_arrow_chunks` to get the result in batches ([Snowflake python connector docs](https://docs.snowflake.com/en/user-guide/python-connector-api.html#get_result_batches))
+* Entity dataframes can be provided as a SQL query or can be provided as a Pandas dataframe. A Pandas dataframes will be uploaded to Snowflake as a temporary table in order to complete join operations.
## Example
@@ -31,4 +24,36 @@ offline_store:
```
{% endcode %}
-Configuration options are available in [SnowflakeOfflineStoreConfig](https://github.com/feast-dev/feast/blob/master/sdk/python/feast/infra/offline_stores/snowflake.py#L56).
+The full set of configuration options is available in [SnowflakeOfflineStoreConfig](https://rtd.feast.dev/en/latest/#feast.infra.offline_stores.snowflake.SnowflakeOfflineStoreConfig).
+
+## Functionality Matrix
+
+The set of functionality supported by offline stores is described in detail [here](overview.md#functionality).
+Below is a matrix indicating which functionality is supported by the Snowflake offline store.
+
+| | Snowflake |
+| :-------------------------------- | :-- |
+| `get_historical_features` (point-in-time correct join) | yes |
+| `pull_latest_from_table_or_query` (retrieve latest feature values) | yes |
+| `pull_all_from_table_or_query` (retrieve a saved dataset) | yes |
+| `offline_write_batch` (persist dataframes to offline store) | yes |
+| `write_logged_features` (persist logged features to offline store) | yes |
+
+Below is a matrix indicating which functionality is supported by `SnowflakeRetrievalJob`.
+
+| | Snowflake |
+| --------------------------------- | --- |
+| export to dataframe | yes |
+| export to arrow table | yes |
+| export to arrow batches | no |
+| export to SQL | no |
+| export to data lake (S3, GCS, etc.) | yes |
+| export to data warehouse | yes |
+| export as Spark dataframe | no |
+| local execution of Python-based on-demand transforms | yes |
+| remote execution of Python-based on-demand transforms | no |
+| persist results in the offline store | yes |
+| preview the query plan before execution | yes |
+| read partitioned data | yes |
+
+To compare this set of functionality against other offline stores, please see the full [functionality matrix](overview.md#functionality-matrix).
diff --git a/docs/reference/offline-stores/spark.md b/docs/reference/offline-stores/spark.md
index 7eec8d7b73..f1ef1300bd 100644
--- a/docs/reference/offline-stores/spark.md
+++ b/docs/reference/offline-stores/spark.md
@@ -1,20 +1,15 @@
-# Spark (contrib)
+# Spark offline store (contrib)
## Description
-The Spark offline store is an offline store currently in alpha development that provides support for reading [SparkSources](../data-sources/spark.md).
+The Spark offline store provides support for reading [SparkSources](../data-sources/spark.md).
-## Disclaimer
+* Entity dataframes can be provided as a SQL query or can be provided as a Pandas dataframe. A Pandas dataframes will be converted to a Spark dataframe and processed as a temporary view.
-This Spark offline store still does not achieve full test coverage and continues to fail some integration tests when integrating with the feast universal test suite. Please do NOT assume complete stability of the API.
+## Disclaimer
-* Spark tables and views are allowed as sources that are loaded in from some Spark store(e.g in Hive or in memory).
-* Entity dataframes can be provided as a SQL query or can be provided as a Pandas dataframe. Pandas dataframes will be converted to a Spark dataframe and processed as a temporary view.
-* A `SparkRetrievalJob` is returned when calling `get_historical_features()`.
- * This allows you to call
- * `to_df` to retrieve the pandas dataframe.
- * `to_arrow` to retrieve the dataframe as a pyarrow Table.
- * `to_spark_df` to retrieve the dataframe the spark.
+The Spark offline store does not achieve full test coverage.
+Please do not assume complete stability.
## Example
@@ -36,3 +31,37 @@ online_store:
path: data/online_store.db
```
{% endcode %}
+
+The full set of configuration options is available in [SparkOfflineStoreConfig](https://rtd.feast.dev/en/master/#feast.infra.offline_stores.contrib.spark_offline_store.spark.SparkOfflineStoreConfig).
+
+## Functionality Matrix
+
+The set of functionality supported by offline stores is described in detail [here](overview.md#functionality).
+Below is a matrix indicating which functionality is supported by the Spark offline store.
+
+| | Spark |
+| :-------------------------------- | :-- |
+| `get_historical_features` (point-in-time correct join) | yes |
+| `pull_latest_from_table_or_query` (retrieve latest feature values) | yes |
+| `pull_all_from_table_or_query` (retrieve a saved dataset) | yes |
+| `offline_write_batch` (persist dataframes to offline store) | no |
+| `write_logged_features` (persist logged features to offline store) | no |
+
+Below is a matrix indicating which functionality is supported by `SparkRetrievalJob`.
+
+| | Spark |
+| --------------------------------- | --- |
+| export to dataframe | yes |
+| export to arrow table | yes |
+| export to arrow batches | no |
+| export to SQL | no |
+| export to data lake (S3, GCS, etc.) | no |
+| export to data warehouse | no |
+| export as Spark dataframe | yes |
+| local execution of Python-based on-demand transforms | no |
+| remote execution of Python-based on-demand transforms | no |
+| persist results in the offline store | yes |
+| preview the query plan before execution | yes |
+| read partitioned data | yes |
+
+To compare this set of functionality against other offline stores, please see the full [functionality matrix](overview.md#functionality-matrix).
diff --git a/docs/reference/offline-stores/trino.md b/docs/reference/offline-stores/trino.md
new file mode 100644
index 0000000000..8cc604248f
--- /dev/null
+++ b/docs/reference/offline-stores/trino.md
@@ -0,0 +1,64 @@
+# Trino offline store (contrib)
+
+## Description
+
+The Trino offline store provides support for reading [TrinoSources](../data-sources/trino.md).
+* Entity dataframes can be provided as a SQL query or can be provided as a Pandas dataframe. A Pandas dataframes will be uploaded to Trino as a table in order to complete join operations.
+
+## Disclaimer
+
+The Trino offline store does not achieve full test coverage.
+Please do not assume complete stability.
+
+## Example
+
+{% code title="feature_store.yaml" %}
+```yaml
+project: feature_repo
+registry: data/registry.db
+provider: local
+offline_store:
+ type: feast_trino.trino.TrinoOfflineStore
+ host: localhost
+ port: 8080
+ catalog: memory
+ connector:
+ type: memory
+online_store:
+ path: data/online_store.db
+```
+{% endcode %}
+
+The full set of configuration options is available in [TrinoOfflineStoreConfig](https://rtd.feast.dev/en/master/#trino-offline-store).
+
+## Functionality Matrix
+
+The set of functionality supported by offline stores is described in detail [here](overview.md#functionality).
+Below is a matrix indicating which functionality is supported by the Trino offline store.
+
+| | Trino |
+| :-------------------------------- | :-- |
+| `get_historical_features` (point-in-time correct join) | yes |
+| `pull_latest_from_table_or_query` (retrieve latest feature values) | yes |
+| `pull_all_from_table_or_query` (retrieve a saved dataset) | yes |
+| `offline_write_batch` (persist dataframes to offline store) | no |
+| `write_logged_features` (persist logged features to offline store) | no |
+
+Below is a matrix indicating which functionality is supported by `TrinoRetrievalJob`.
+
+| | Trino |
+| --------------------------------- | --- |
+| export to dataframe | yes |
+| export to arrow table | yes |
+| export to arrow batches | no |
+| export to SQL | yes |
+| export to data lake (S3, GCS, etc.) | no |
+| export to data warehouse | no |
+| export as Spark dataframe | no |
+| local execution of Python-based on-demand transforms | yes |
+| remote execution of Python-based on-demand transforms | no |
+| persist results in the offline store | no |
+| preview the query plan before execution | yes |
+| read partitioned data | yes |
+
+To compare this set of functionality against other offline stores, please see the full [functionality matrix](overview.md#functionality-matrix).
diff --git a/docs/reference/online-stores/README.md b/docs/reference/online-stores/README.md
index 5eb566af3c..8367e2ce74 100644
--- a/docs/reference/online-stores/README.md
+++ b/docs/reference/online-stores/README.md
@@ -2,14 +2,26 @@
Please see [Online Store](../../getting-started/architecture-and-components/online-store.md) for an explanation of online stores.
-{% page-ref page="sqlite.md" %}
+{% content-ref url="sqlite.md" %}
+[sqlite.md](sqlite.md)
+{% endcontent-ref %}
-{% page-ref page="snowflake.md" %}
+{% content-ref url="snowflake.md" %}
+[snowflake.md](snowflake.md)
+{% endcontent-ref %}
-{% page-ref page="redis.md" %}
+{% content-ref url="redis.md" %}
+[redis.md](redis.md)
+{% endcontent-ref %}
-{% page-ref page="datastore.md" %}
+{% content-ref url="datastore.md" %}
+[datastore.md](datastore.md)
+{% endcontent-ref %}
-{% page-ref page="dynamodb.md" %}
+{% content-ref url="dynamodb.md" %}
+[dynamodb.md](dynamodb.md)
+{% endcontent-ref %}
-{% page-ref page="postgres.md" %}
+{% content-ref url="postgres.md" %}
+[postgres.md](postgres.md)
+{% endcontent-ref %}
diff --git a/docs/reference/online-stores/datastore.md b/docs/reference/online-stores/datastore.md
index 012d497f30..ed1425abb6 100644
--- a/docs/reference/online-stores/datastore.md
+++ b/docs/reference/online-stores/datastore.md
@@ -1,4 +1,4 @@
-# Datastore
+# Datastore online store
## Description
diff --git a/docs/reference/online-stores/dynamodb.md b/docs/reference/online-stores/dynamodb.md
index 2af7e422d6..f9f8b4339d 100644
--- a/docs/reference/online-stores/dynamodb.md
+++ b/docs/reference/online-stores/dynamodb.md
@@ -1,4 +1,4 @@
-# DynamoDB
+# DynamoDB online store
## Description
diff --git a/docs/reference/online-stores/postgres.md b/docs/reference/online-stores/postgres.md
index 7d24079da9..4f51dff617 100644
--- a/docs/reference/online-stores/postgres.md
+++ b/docs/reference/online-stores/postgres.md
@@ -1,4 +1,4 @@
-# PostgreSQL (contrib)
+# PostgreSQL online store (contrib)
## Description
diff --git a/docs/reference/online-stores/redis.md b/docs/reference/online-stores/redis.md
index ce1de2ad54..4388ccfa0a 100644
--- a/docs/reference/online-stores/redis.md
+++ b/docs/reference/online-stores/redis.md
@@ -1,4 +1,4 @@
-# Redis
+# Redis online store
## Description
diff --git a/docs/reference/online-stores/snowflake.md b/docs/reference/online-stores/snowflake.md
index ccf3d526da..bf975fa7ea 100644
--- a/docs/reference/online-stores/snowflake.md
+++ b/docs/reference/online-stores/snowflake.md
@@ -1,4 +1,4 @@
-# Snowflake
+# Snowflake online store
## Description
diff --git a/docs/reference/online-stores/sqlite.md b/docs/reference/online-stores/sqlite.md
index fd11e3439c..668e6024e3 100644
--- a/docs/reference/online-stores/sqlite.md
+++ b/docs/reference/online-stores/sqlite.md
@@ -1,4 +1,4 @@
-# SQLite
+# SQLite online store
## Description
diff --git a/docs/tutorials/driver-ranking-with-feast.md b/docs/tutorials/driver-ranking-with-feast.md
deleted file mode 100644
index 4ad34cd9c0..0000000000
--- a/docs/tutorials/driver-ranking-with-feast.md
+++ /dev/null
@@ -1,25 +0,0 @@
----
-description: >-
- Making a prediction using a linear regression model is a common use case in
- ML. This model predicts if a driver will complete a trip based on features
- ingested into Feast.
----
-
-# Driver ranking
-
-In this example, you'll learn how to use some of the key functionality in Feast. The tutorial runs in both local mode and on the Google Cloud Platform \(GCP\). For GCP, you must have access to a GCP project already, including read and write permissions to BigQuery.
-
-## [Driver Ranking Example](https://github.com/feast-dev/feast-driver-ranking-tutorial)
-
-This tutorial guides you on how to use Feast with [Scikit-learn](https://scikit-learn.org/stable/). You will learn how to:
-
-* Train a model locally \(on your laptop\) using data from [BigQuery](https://cloud.google.com/bigquery/)
-* Test the model for online inference using [SQLite](https://www.sqlite.org/index.html) \(for fast iteration\)
-* Test the model for online inference using [Firestore](https://firebase.google.com/products/firestore) \(for production use\)
-
-Try it and let us know what you think!
-
-| [ Run in Google Colab ](https://colab.research.google.com/github/feast-dev/feast-driver-ranking-tutorial/blob/master/notebooks/Driver_Ranking_Tutorial.ipynb) | [ View Source in Github](https://github.com/feast-dev/feast-driver-ranking-tutorial/blob/master/notebooks/Driver_Ranking_Tutorial.ipynb) |
-| :--- | :--- |
-
-
diff --git a/docs/tutorials/tutorials-overview.md b/docs/tutorials/tutorials-overview.md
deleted file mode 100644
index 9432783a69..0000000000
--- a/docs/tutorials/tutorials-overview.md
+++ /dev/null
@@ -1,15 +0,0 @@
-# Overview
-
-These Feast tutorials showcase how to use Feast to simplify end to end model training / serving.
-
-{% page-ref page="fraud-detection.md" %}
-
-{% page-ref page="driver-ranking-with-feast.md" %}
-
-{% page-ref page="real-time-credit-scoring-on-aws.md" %}
-
-{% page-ref page="driver-stats-on-snowflake.md" %}
-
-{% page-ref page="validating-historical-features.md" %}
-
-{% page-ref page="using-scalable-registry.md" %}
diff --git a/docs/tutorials/tutorials-overview/README.md b/docs/tutorials/tutorials-overview/README.md
new file mode 100644
index 0000000000..76cb2bea6b
--- /dev/null
+++ b/docs/tutorials/tutorials-overview/README.md
@@ -0,0 +1,19 @@
+# Sample use-case tutorials
+
+These Feast tutorials showcase how to use Feast to simplify end to end model training / serving.
+
+{% content-ref url="driver-ranking-with-feast.md" %}
+[driver-ranking-with-feast.md](driver-ranking-with-feast.md)
+{% endcontent-ref %}
+
+{% content-ref url="fraud-detection.md" %}
+[fraud-detection.md](fraud-detection.md)
+{% endcontent-ref %}
+
+{% content-ref url="real-time-credit-scoring-on-aws.md" %}
+[real-time-credit-scoring-on-aws.md](real-time-credit-scoring-on-aws.md)
+{% endcontent-ref %}
+
+{% content-ref url="driver-stats-on-snowflake.md" %}
+[driver-stats-on-snowflake.md](driver-stats-on-snowflake.md)
+{% endcontent-ref %}
diff --git a/docs/tutorials/tutorials-overview/driver-ranking-with-feast.md b/docs/tutorials/tutorials-overview/driver-ranking-with-feast.md
new file mode 100644
index 0000000000..54f3035319
--- /dev/null
+++ b/docs/tutorials/tutorials-overview/driver-ranking-with-feast.md
@@ -0,0 +1,23 @@
+---
+description: >-
+ Making a prediction using a linear regression model is a common use case in
+ ML. This model predicts if a driver will complete a trip based on features
+ ingested into Feast.
+---
+
+# Driver ranking
+
+In this example, you'll learn how to use some of the key functionality in Feast. The tutorial runs in both local mode and on the Google Cloud Platform (GCP). For GCP, you must have access to a GCP project already, including read and write permissions to BigQuery.
+
+## [Driver Ranking Example](https://github.com/feast-dev/feast-driver-ranking-tutorial)
+
+This tutorial guides you on how to use Feast with [Scikit-learn](https://scikit-learn.org/stable/). You will learn how to:
+
+* Train a model locally (on your laptop) using data from [BigQuery](https://cloud.google.com/bigquery/)
+* Test the model for online inference using [SQLite](https://www.sqlite.org/index.html) (for fast iteration)
+* Test the model for online inference using [Firestore](https://firebase.google.com/products/firestore) (for production use)
+
+Try it and let us know what you think!
+
+| [ Run in Google Colab](https://colab.research.google.com/github/feast-dev/feast-driver-ranking-tutorial/blob/master/notebooks/Driver\_Ranking\_Tutorial.ipynb) | [ View Source in Github](https://github.com/feast-dev/feast-driver-ranking-tutorial/blob/master/notebooks/Driver\_Ranking\_Tutorial.ipynb) |
+| -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
diff --git a/docs/tutorials/driver-stats-on-snowflake.md b/docs/tutorials/tutorials-overview/driver-stats-on-snowflake.md
similarity index 100%
rename from docs/tutorials/driver-stats-on-snowflake.md
rename to docs/tutorials/tutorials-overview/driver-stats-on-snowflake.md
diff --git a/docs/tutorials/fraud-detection.md b/docs/tutorials/tutorials-overview/fraud-detection.md
similarity index 51%
rename from docs/tutorials/fraud-detection.md
rename to docs/tutorials/tutorials-overview/fraud-detection.md
index 7bdfde760e..30564d0b0c 100644
--- a/docs/tutorials/fraud-detection.md
+++ b/docs/tutorials/tutorials-overview/fraud-detection.md
@@ -17,13 +17,9 @@ Our end-to-end example will perform the following workflows:
* Building point-in-time correct training datasets from feature data and training a model
* Making online predictions from feature data
-Here's a high-level picture of our system architecture on Google Cloud Platform \(GCP\):
-
-
-
-
-
-|  [Run in Google Colab](https://colab.research.google.com/github/feast-dev/feast-fraud-tutorial/blob/master/notebooks/Fraud_Detection_Tutorial.ipynb) | [ View Source on Github](https://github.com/feast-dev/feast-fraud-tutorial/blob/main/notebooks/Fraud_Detection_Tutorial.ipynb) |
-| :--- | :--- |
+Here's a high-level picture of our system architecture on Google Cloud Platform (GCP):
+
+|  [Run in Google Colab](https://colab.research.google.com/github/feast-dev/feast-fraud-tutorial/blob/master/notebooks/Fraud\_Detection\_Tutorial.ipynb) | [ View Source on Github](https://github.com/feast-dev/feast-fraud-tutorial/blob/main/notebooks/Fraud\_Detection\_Tutorial.ipynb) |
+| ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
diff --git a/docs/tutorials/real-time-credit-scoring-on-aws.md b/docs/tutorials/tutorials-overview/real-time-credit-scoring-on-aws.md
similarity index 74%
rename from docs/tutorials/real-time-credit-scoring-on-aws.md
rename to docs/tutorials/tutorials-overview/real-time-credit-scoring-on-aws.md
index 43f8c98133..6268aba1f1 100644
--- a/docs/tutorials/real-time-credit-scoring-on-aws.md
+++ b/docs/tutorials/tutorials-overview/real-time-credit-scoring-on-aws.md
@@ -10,20 +10,18 @@ When individuals apply for loans from banks and other credit providers, the deci
In this example, we will demonstrate how a real-time credit scoring system can be built using Feast and Scikit-Learn on AWS, using feature data from S3.
-This real-time system accepts a loan request from a customer and responds within 100ms with a decision on whether their loan has been approved or rejected.
+This real-time system accepts a loan request from a customer and responds within 100ms with a decision on whether their loan has been approved or rejected.
## [Real-time Credit Scoring Example](https://github.com/feast-dev/real-time-credit-scoring-on-aws-tutorial)
This end-to-end tutorial will take you through the following steps:
-* Deploying S3 with Parquet as your primary data source, containing both [loan features](https://github.com/feast-dev/real-time-credit-scoring-on-aws-tutorial/blob/22fc6c7272ef033e7ba0afc64ffaa6f6f8fc0277/data/loan_table_sample.csv) and [zip code features](https://github.com/feast-dev/real-time-credit-scoring-on-aws-tutorial/blob/22fc6c7272ef033e7ba0afc64ffaa6f6f8fc0277/data/zipcode_table_sample.csv)
+* Deploying S3 with Parquet as your primary data source, containing both [loan features](https://github.com/feast-dev/real-time-credit-scoring-on-aws-tutorial/blob/22fc6c7272ef033e7ba0afc64ffaa6f6f8fc0277/data/loan\_table\_sample.csv) and [zip code features](https://github.com/feast-dev/real-time-credit-scoring-on-aws-tutorial/blob/22fc6c7272ef033e7ba0afc64ffaa6f6f8fc0277/data/zipcode\_table\_sample.csv)
* Deploying Redshift as the interface Feast uses to build training datasets
* Registering your features with Feast and configuring DynamoDB for online serving
* Building a training dataset with Feast to train your credit scoring model
* Loading feature values from S3 into DynamoDB
* Making online predictions with your credit scoring model using features from DynamoDB
-| [ View Source on Github](https://github.com/feast-dev/real-time-credit-scoring-on-aws-tutorial) |
-| :--- |
-
-
+| [ View Source on Github](https://github.com/feast-dev/real-time-credit-scoring-on-aws-tutorial) |
+| ---------------------------------------------------------------------------------------------------------------------------------------------- |
diff --git a/docs/tutorials/using-scalable-registry.md b/docs/tutorials/using-scalable-registry.md
index 51fa50ff33..0ee02674b1 100644
--- a/docs/tutorials/using-scalable-registry.md
+++ b/docs/tutorials/using-scalable-registry.md
@@ -13,6 +13,11 @@ However, there's inherent limitations with a file-based registry, since changing
An alternative to the file-based registry is the [SQLRegistry](https://rtd.feast.dev/en/latest/feast.infra.registry_stores.html#feast.infra.registry_stores.sql.SqlRegistry) which ships with Feast. This implementation stores the registry in a relational database, and allows for changes to individual objects atomically.
Under the hood, the SQL Registry implementation uses [SQLAlchemy](https://docs.sqlalchemy.org/en/14/) to abstract over the different databases. Consequently, any [database supported](https://docs.sqlalchemy.org/en/14/core/engines.html#supported-databases) by SQLAlchemy can be used by the SQL Registry.
+The following databases are supported and tested out of the box:
+- PostgreSQL
+- MySQL
+- Sqlite
+
Feast can use the SQL Registry via a config change in the feature_store.yaml file. An example of how to configure this would be:
```yaml
diff --git a/examples/java-demo/README.md b/examples/java-demo/README.md
index b908bb7625..2b1d7f75a5 100644
--- a/examples/java-demo/README.md
+++ b/examples/java-demo/README.md
@@ -39,9 +39,6 @@ For this tutorial, we setup Feast with Redis, using the Feast CLI to register an
connection_string: localhost:6379,password=[YOUR PASSWORD]
offline_store:
type: file
- flags:
- alpha_features: true
- on_demand_transforms: true
```
4. Run `feast apply` to apply your local features to the remote registry
5. Materialize features to the online store:
diff --git a/examples/java-demo/feature_repo/application-override.yaml b/examples/java-demo/feature_repo/application-override.yaml
index dbdeda4c04..5a43d886dc 100644
--- a/examples/java-demo/feature_repo/application-override.yaml
+++ b/examples/java-demo/feature_repo/application-override.yaml
@@ -10,6 +10,7 @@ feature-server:
host: my-redis-master
port: 6379
password: [YOUR PASSWORD]
+ entityKeySerializationVersion: 2
global:
registry:
path: gs://[YOUR BUCKET]/demo-repo/registry.db
diff --git a/examples/java-demo/feature_repo/driver_repo.py b/examples/java-demo/feature_repo/driver_repo.py
index e17a5d9cf8..4c4d45c456 100644
--- a/examples/java-demo/feature_repo/driver_repo.py
+++ b/examples/java-demo/feature_repo/driver_repo.py
@@ -1,13 +1,11 @@
import pandas as pd
from feast.data_source import RequestSource
-from feast.field import Field
from feast.on_demand_feature_view import on_demand_feature_view
-from feast.request_feature_view import RequestFeatureView
from feast.types import Float32, Float64, Int64, String
from google.protobuf.duration_pb2 import Duration
from feast.field import Field
-from feast import Entity, Feature, BatchFeatureView, FileSource
+from feast import Entity, FileSource, FeatureView
driver_hourly_stats = FileSource(
path="data/driver_stats_with_string.parquet",
@@ -15,10 +13,10 @@
created_timestamp_column="created",
)
driver = Entity(name="driver_id", description="driver id",)
-driver_hourly_stats_view = BatchFeatureView(
+driver_hourly_stats_view = FeatureView(
name="driver_hourly_stats",
- entities=["driver_id"],
- ttl=Duration(seconds=86400000),
+ entities=[driver],
+ ttl=timedelta(days=365),
schema=[
Field(name="conv_rate", dtype=Float32),
Field(name="acc_rate", dtype=Float32),
@@ -40,6 +38,7 @@
],
)
+
# Define an on demand feature view which can generate new features based on
# existing feature views and RequestSource features
@on_demand_feature_view(
@@ -58,14 +57,3 @@ def transformed_conv_rate(inputs: pd.DataFrame) -> pd.DataFrame:
df["conv_rate_plus_val2"] = inputs["conv_rate"] + inputs["val_to_add_2"]
return df
-
-# Define request feature view
-driver_age_request_fv = RequestFeatureView(
- name="driver_age",
- request_data_source=RequestSource(
- name="driver_age",
- schema=[
- Field(name="driver_age", dtype=Int64),
- ],
- ),
-)
diff --git a/examples/java-demo/feature_repo/feature_store.yaml b/examples/java-demo/feature_repo/feature_store.yaml
index 91c65b512a..cfb0a89e8d 100644
--- a/examples/java-demo/feature_repo/feature_store.yaml
+++ b/examples/java-demo/feature_repo/feature_store.yaml
@@ -6,6 +6,4 @@ online_store:
connection_string: localhost:6379,password=[YOUR PASSWORD]
offline_store:
type: file
-flags:
- alpha_features: true
- on_demand_transforms: true
+entity_key_serialization_version: 2
\ No newline at end of file
diff --git a/examples/java-demo/feature_repo/test_python_fetch.py b/examples/java-demo/feature_repo/test_python_fetch.py
new file mode 100644
index 0000000000..5e2781e150
--- /dev/null
+++ b/examples/java-demo/feature_repo/test_python_fetch.py
@@ -0,0 +1,26 @@
+from feast import FeatureStore
+
+
+def run_demo():
+ store = FeatureStore(repo_path=".")
+
+ print("\n--- Online features ---")
+ features = store.get_online_features(
+ features=[
+ "driver_hourly_stats:conv_rate",
+ ],
+ entity_rows=[
+ {
+ "driver_id": 1001,
+ },
+ {
+ "driver_id": 1002,
+ }
+ ],
+ ).to_dict()
+ for key, value in sorted(features.items()):
+ print(key, " : ", value)
+
+
+if __name__ == "__main__":
+ run_demo()
diff --git a/infra/charts/feast-feature-server/Chart.yaml b/infra/charts/feast-feature-server/Chart.yaml
index 6c1afc9540..aca9574b0c 100644
--- a/infra/charts/feast-feature-server/Chart.yaml
+++ b/infra/charts/feast-feature-server/Chart.yaml
@@ -2,7 +2,7 @@ apiVersion: v2
name: feast-feature-server
description: Feast Feature Server in Go or Python
type: application
-version: 0.22.0
+version: 0.23.1
keywords:
- machine learning
- big data
diff --git a/infra/charts/feast-feature-server/README.md b/infra/charts/feast-feature-server/README.md
index a55451e788..e83f75b39d 100644
--- a/infra/charts/feast-feature-server/README.md
+++ b/infra/charts/feast-feature-server/README.md
@@ -1,6 +1,6 @@
# feast-feature-server
- 
+ 
Feast Feature Server in Go or Python
@@ -12,28 +12,28 @@ Feast Feature Server in Go or Python
## Values
-| Key | Type | Default | Description |
-|-----|------|---------|-------------|
-| affinity | object | `{}` | |
-| fullnameOverride | string | `""` | |
-| image.pullPolicy | string | `"IfNotPresent"` | |
-| image.repository | string | `""` | |
-| image.tag | string | `""` | |
-| imagePullSecrets | list | `[]` | |
-| livenessProbe.initialDelaySeconds | int | `30` | |
-| livenessProbe.periodSeconds | int | `30` | |
-| nameOverride | string | `""` | |
-| nodeSelector | object | `{}` | |
-| podAnnotations | object | `{}` | |
-| podSecurityContext | object | `{}` | |
-| readinessProbe.initialDelaySeconds | int | `20` | |
-| readinessProbe.periodSeconds | int | `10` | |
-| replicaCount | int | `1` | |
-| resources | object | `{}` | |
-| securityContext | object | `{}` | |
-| service.port | int | `80` | |
-| service.type | string | `"ClusterIP"` | |
-| tolerations | list | `[]` | |
+| Key | Type | Default | Description |
+| ---------------------------------- | ------ | ---------------- | ----------- |
+| affinity | object | `{}` | |
+| fullnameOverride | string | `""` | |
+| image.pullPolicy | string | `"IfNotPresent"` | |
+| image.repository | string | `""` | |
+| image.tag | string | `""` | |
+| imagePullSecrets | list | `[]` | |
+| livenessProbe.initialDelaySeconds | int | `30` | |
+| livenessProbe.periodSeconds | int | `30` | |
+| nameOverride | string | `""` | |
+| nodeSelector | object | `{}` | |
+| podAnnotations | object | `{}` | |
+| podSecurityContext | object | `{}` | |
+| readinessProbe.initialDelaySeconds | int | `20` | |
+| readinessProbe.periodSeconds | int | `10` | |
+| replicaCount | int | `1` | |
+| resources | object | `{}` | |
+| securityContext | object | `{}` | |
+| service.port | int | `80` | |
+| service.type | string | `"ClusterIP"` | |
+| tolerations | list | `[]` | |
----------------------------------------------
Autogenerated from chart metadata using [helm-docs v1.11.0](https://github.com/norwoodj/helm-docs/releases/v1.11.0)
diff --git a/infra/charts/feast-python-server/Chart.yaml b/infra/charts/feast-python-server/Chart.yaml
index 6ab82b7a65..b8b229fd59 100644
--- a/infra/charts/feast-python-server/Chart.yaml
+++ b/infra/charts/feast-python-server/Chart.yaml
@@ -2,7 +2,7 @@ apiVersion: v2
name: feast-python-server
description: Feast Feature Server in Python
type: application
-version: 0.23.0
+version: 0.23.1
keywords:
- machine learning
- big data
diff --git a/infra/charts/feast-python-server/README.md b/infra/charts/feast-python-server/README.md
index e3da9b1d29..5f25b7d8fe 100644
--- a/infra/charts/feast-python-server/README.md
+++ b/infra/charts/feast-python-server/README.md
@@ -1,6 +1,6 @@
# feast-python-server
- 
+ 
Feast Feature Server in Python
diff --git a/infra/charts/feast/Chart.yaml b/infra/charts/feast/Chart.yaml
index f4e33de7f3..e7bc00d8a6 100644
--- a/infra/charts/feast/Chart.yaml
+++ b/infra/charts/feast/Chart.yaml
@@ -1,7 +1,7 @@
apiVersion: v1
description: Feature store for machine learning
name: feast
-version: 0.23.0
+version: 0.23.1
keywords:
- machine learning
- big data
diff --git a/infra/charts/feast/README.md b/infra/charts/feast/README.md
index f71dcf6124..b7741c4b45 100644
--- a/infra/charts/feast/README.md
+++ b/infra/charts/feast/README.md
@@ -8,7 +8,7 @@ This repo contains Helm charts for Feast components that are being installed on
## Chart: Feast
-Feature store for machine learning Current chart version is `0.23.0`
+Feature store for machine learning Current chart version is `0.23.1`
## Installation
@@ -55,8 +55,8 @@ For more details, please see: https://docs.feast.dev/how-to-guides/running-feast
| Repository | Name | Version |
|------------|------|---------|
| https://charts.helm.sh/stable | redis | 10.5.6 |
-| https://feast-helm-charts.storage.googleapis.com | feature-server(feature-server) | 0.23.0 |
-| https://feast-helm-charts.storage.googleapis.com | transformation-service(transformation-service) | 0.23.0 |
+| https://feast-helm-charts.storage.googleapis.com | feature-server(feature-server) | 0.23.1 |
+| https://feast-helm-charts.storage.googleapis.com | transformation-service(transformation-service) | 0.23.1 |
## Values
diff --git a/infra/charts/feast/charts/feature-server/Chart.yaml b/infra/charts/feast/charts/feature-server/Chart.yaml
index ee08b0b0f8..42b366e0e7 100644
--- a/infra/charts/feast/charts/feature-server/Chart.yaml
+++ b/infra/charts/feast/charts/feature-server/Chart.yaml
@@ -1,8 +1,8 @@
apiVersion: v1
description: "Feast Feature Server: Online feature serving service for Feast"
name: feature-server
-version: 0.23.0
-appVersion: v0.23.0
+version: 0.23.1
+appVersion: v0.23.1
keywords:
- machine learning
- big data
diff --git a/infra/charts/feast/charts/feature-server/README.md b/infra/charts/feast/charts/feature-server/README.md
index 4717cfff3a..84ed8abd25 100644
--- a/infra/charts/feast/charts/feature-server/README.md
+++ b/infra/charts/feast/charts/feature-server/README.md
@@ -1,6 +1,6 @@
# feature-server
- 
+ 
Feast Feature Server: Online feature serving service for Feast
@@ -17,7 +17,7 @@ Feast Feature Server: Online feature serving service for Feast
| envOverrides | object | `{}` | Extra environment variables to set |
| image.pullPolicy | string | `"IfNotPresent"` | Image pull policy |
| image.repository | string | `"feastdev/feature-server-java"` | Docker image for Feature Server repository |
-| image.tag | string | `"0.23.0"` | Image tag |
+| image.tag | string | `"0.23.1"` | Image tag |
| ingress.grpc.annotations | object | `{}` | Extra annotations for the ingress |
| ingress.grpc.auth.enabled | bool | `false` | Flag to enable auth |
| ingress.grpc.class | string | `"nginx"` | Which ingress controller to use |
diff --git a/infra/charts/feast/charts/feature-server/values.yaml b/infra/charts/feast/charts/feature-server/values.yaml
index 011ce9dc33..f23c77dd1a 100644
--- a/infra/charts/feast/charts/feature-server/values.yaml
+++ b/infra/charts/feast/charts/feature-server/values.yaml
@@ -5,7 +5,7 @@ image:
# image.repository -- Docker image for Feature Server repository
repository: feastdev/feature-server-java
# image.tag -- Image tag
- tag: 0.23.0
+ tag: 0.23.1
# image.pullPolicy -- Image pull policy
pullPolicy: IfNotPresent
diff --git a/infra/charts/feast/charts/transformation-service/Chart.yaml b/infra/charts/feast/charts/transformation-service/Chart.yaml
index 07055730c5..834ce7fe56 100644
--- a/infra/charts/feast/charts/transformation-service/Chart.yaml
+++ b/infra/charts/feast/charts/transformation-service/Chart.yaml
@@ -1,8 +1,8 @@
apiVersion: v1
description: "Transformation service: to compute on-demand features"
name: transformation-service
-version: 0.23.0
-appVersion: v0.23.0
+version: 0.23.1
+appVersion: v0.23.1
keywords:
- machine learning
- big data
diff --git a/infra/charts/feast/charts/transformation-service/README.md b/infra/charts/feast/charts/transformation-service/README.md
index 9bc7a1e5d6..84525fc0d6 100644
--- a/infra/charts/feast/charts/transformation-service/README.md
+++ b/infra/charts/feast/charts/transformation-service/README.md
@@ -1,6 +1,6 @@
# transformation-service
- 
+ 
Transformation service: to compute on-demand features
@@ -13,7 +13,7 @@ Transformation service: to compute on-demand features
| envOverrides | object | `{}` | Extra environment variables to set |
| image.pullPolicy | string | `"IfNotPresent"` | Image pull policy |
| image.repository | string | `"feastdev/feature-transformation-server"` | Docker image for Transformation Server repository |
-| image.tag | string | `"0.23.0"` | Image tag |
+| image.tag | string | `"0.23.1"` | Image tag |
| nodeSelector | object | `{}` | Node labels for pod assignment |
| podLabels | object | `{}` | Labels to be added to Feast Serving pods |
| replicaCount | int | `1` | Number of pods that will be created |
diff --git a/infra/charts/feast/charts/transformation-service/config/feature_store.yaml b/infra/charts/feast/charts/transformation-service/config/feature_store.yaml
index 555e93a306..c003b87cc2 100644
--- a/infra/charts/feast/charts/transformation-service/config/feature_store.yaml
+++ b/infra/charts/feast/charts/transformation-service/config/feature_store.yaml
@@ -2,7 +2,4 @@ registry:
path: {{ .Values.global.registry.path }}
cache_ttl_seconds: {{ .Values.global.registry.cache_ttl_seconds }}
provider: local
-project: {{ .Values.global.project }}
-flags:
- on_demand_transforms: true
- alpha_features: true
\ No newline at end of file
+project: {{ .Values.global.project }}
\ No newline at end of file
diff --git a/infra/charts/feast/charts/transformation-service/values.yaml b/infra/charts/feast/charts/transformation-service/values.yaml
index c1e506a476..53841df813 100644
--- a/infra/charts/feast/charts/transformation-service/values.yaml
+++ b/infra/charts/feast/charts/transformation-service/values.yaml
@@ -5,7 +5,7 @@ image:
# image.repository -- Docker image for Transformation Server repository
repository: feastdev/feature-transformation-server
# image.tag -- Image tag
- tag: 0.23.0
+ tag: 0.23.1
# image.pullPolicy -- Image pull policy
pullPolicy: IfNotPresent
diff --git a/infra/charts/feast/requirements.yaml b/infra/charts/feast/requirements.yaml
index c88fb7a4fa..8d24fcd9e6 100644
--- a/infra/charts/feast/requirements.yaml
+++ b/infra/charts/feast/requirements.yaml
@@ -1,12 +1,12 @@
dependencies:
- name: feature-server
alias: feature-server
- version: 0.23.0
+ version: 0.23.1
condition: feature-server.enabled
repository: https://feast-helm-charts.storage.googleapis.com
- name: transformation-service
alias: transformation-service
- version: 0.23.0
+ version: 0.23.1
condition: transformation-service.enabled
repository: https://feast-helm-charts.storage.googleapis.com
- name: redis
diff --git a/infra/scripts/helm/push-helm-charts.sh b/infra/scripts/helm/push-helm-charts.sh
index 08753adb3c..1c32ee985b 100755
--- a/infra/scripts/helm/push-helm-charts.sh
+++ b/infra/scripts/helm/push-helm-charts.sh
@@ -17,7 +17,9 @@ helm repo add feast-helm-chart-repo $bucket
cd infra/charts
helm package feast
helm package feast-python-server
+helm package feast-feature-server
helm gcs push --public feast-${1}.tgz feast-helm-chart-repo --force
helm gcs push --public feast-python-server-${1}.tgz feast-helm-chart-repo --force
+helm gcs push --public feast-feature-server-${1}.tgz feast-helm-chart-repo --force
rm -f ./*.tgz
\ No newline at end of file
diff --git a/infra/scripts/helm/validate-helm-chart-versions.sh b/infra/scripts/helm/validate-helm-chart-versions.sh
index 0ba75bd744..aac79d9315 100755
--- a/infra/scripts/helm/validate-helm-chart-versions.sh
+++ b/infra/scripts/helm/validate-helm-chart-versions.sh
@@ -3,7 +3,7 @@
set -e
# Amount of file locations that need to be bumped in unison when versions increment
-UNIQUE_VERSIONS_COUNT=18
+UNIQUE_VERSIONS_COUNT=20
if [ $# -ne 1 ]; then
echo "Please provide a single semver version (without a \"v\" prefix) to test the repository against, e.g 0.99.0"
diff --git a/infra/scripts/publish-java-sdk.sh b/infra/scripts/publish-java-sdk.sh
index 68174db17a..0e8b62478f 100755
--- a/infra/scripts/publish-java-sdk.sh
+++ b/infra/scripts/publish-java-sdk.sh
@@ -69,4 +69,4 @@ gpg --import --batch --yes $GPG_KEY_IMPORT_DIR/private-key
echo "============================================================"
echo "Deploying Java SDK with revision: $REVISION"
echo "============================================================"
-mvn -f java/pom.xml --projects .,datatypes,sdk -Drevision=$REVISION --batch-mode clean deploy
+mvn -f java/pom.xml --projects .,datatypes,serving-client -Drevision=$REVISION --batch-mode clean deploy
diff --git a/infra/scripts/create-cluster.sh b/infra/scripts/redis-cluster.sh
similarity index 100%
rename from infra/scripts/create-cluster.sh
rename to infra/scripts/redis-cluster.sh
diff --git a/infra/scripts/release/files_to_bump.txt b/infra/scripts/release/files_to_bump.txt
index a1e2d29623..6a558e04f0 100644
--- a/infra/scripts/release/files_to_bump.txt
+++ b/infra/scripts/release/files_to_bump.txt
@@ -9,4 +9,7 @@ infra/charts/feast/charts/feature-server/values.yaml 8
infra/charts/feast/README.md 11 58 59
infra/charts/feast-python-server/Chart.yaml 5
infra/charts/feast-python-server/README.md 3
-java/pom.xml 41
+infra/charts/feast-feature-server/Chart.yaml 5
+infra/charts/feast-feature-server/README.md 3
+java/pom.xml 38
+ui/package.json 3
diff --git a/infra/templates/README.md.jinja2 b/infra/templates/README.md.jinja2
index 6a8ebdbab7..e59a364d81 100644
--- a/infra/templates/README.md.jinja2
+++ b/infra/templates/README.md.jinja2
@@ -21,7 +21,7 @@ Feast (**Fea**ture **St**ore) is an open source feature store for machine learni
Feast allows ML platform teams to:
-* **Make features consistently available for training and serving** by managing an _offline store_ (to process historical data for scale-out batch scoring or model training), a low-latency _online store_ (to power real-time prediction)_,_ and a battle-tested _feature server_ (for serving pre-computed features online).
+* **Make features consistently available for training and serving** by managing an _offline store_ (to process historical data for scale-out batch scoring or model training), a low-latency _online store_ (to power real-time prediction)_,_ and a battle-tested _feature server_ (to serve pre-computed features online).
* **Avoid data leakage** by generating point-in-time correct feature sets so data scientists can focus on feature engineering rather than debugging error-prone dataset joining logic. This ensure that future feature values do not leak to models during training.
* **Decouple ML from data infrastructure** by providing a single data access layer that abstracts feature storage from feature retrieval, ensuring models remain portable as you move from training models to serving models, from batch models to realtime models, and from one data infra system to another.
diff --git a/java/CONTRIBUTING.md b/java/CONTRIBUTING.md
index f6c789d984..74549034b9 100644
--- a/java/CONTRIBUTING.md
+++ b/java/CONTRIBUTING.md
@@ -2,17 +2,40 @@
> The higher level [Development Guide](https://docs.feast.dev/v/master/project/development-guide)
> gives contributing to Feast codebase as a whole.
-### Overview
+## Overview
This guide is targeted at developers looking to contribute to Feast components in
the feast-java Repository:
- [Feast Serving](#feast-serving)
-- [Feast Java Client](#feast-java-client)
+- [Feast Serving Client](#feast-serving-client)
> Don't see the Feast component that you want to contribute to here?
> Check out the [Development Guide](https://docs.feast.dev/v/master/project/development-guide)
> to learn how Feast components are distributed over multiple repositories.
-#### Common Setup
+### Repository structure
+There are four key top level packages:
+- `serving`: Feast Serving (a gRPC service to serve features)
+- `serving-client`: Feast Serving Client (a thin Java client to communicate with Feast serving via gRPC )
+- `datatypes`: A symlink to the overall project protos. These include the core serving gRPC protos, proto representations of all objects in the Feast registry.
+- `coverage`: Generates JaCoCo coverage reports
+
+#### Feast Serving
+> **Note:** there are references to metrics collection in the code. These are unused and exist for legacy reasons (from when this used Spring Boot), but remain in the code until published to StatsD / Prometheus Pushgateway.
+
+The primary entrypoint into the Feast Serving server is `ServingGuiceApplication`, which connects to the rest of the packages:
+- `connectors`: Contains online store connectors (e.g. Redis)
+- `exception`: Contains user-facing exceptions thrown by Feast Serving
+- `registry`: Logic to parse a Feast file-based registry (in GCS, S3, or local) into the `Registry` proto object, and automatically re-sync the registry.
+- `service`: Core logic that exposes and backs the serving APIs. This includes communication with a feature transformation server to execute on demand transformations
+ - The root code in this package creates the main entrypoint (`ServingServiceV2`) which is injected into `OnlineServingGrpcServiceV2` in `grpc/` implement the gRPC service.
+ - `config`: Guice modules to power the server and config
+ - Includes server config / guice modules in `ServerModule`
+ - Maps overall Feast Serving user configuration from Java to YAML in `ApplicationPropertiesModule` and `ApplicationProperties`
+ - `controller`: server controllers (right now, only a gRPC health check)
+ - `grpc`: Implementation of the gRPC serving service
+ - `interceptors`: gRPC interceptors (currently used to produce metrics around each gRPC request)
+
+### Common Setup
Common Environment Setup for all feast-java Feast components:
Ensure following development tools are installed:
@@ -20,7 +43,7 @@ Ensure following development tools are installed:
- Maven 3.6
- `make`
-#### Code Style
+### Code Style
Feast's Java codebase conforms to the [Google Java Style Guide](https://google.github.io/styleguide/javaguide.html).
Automatically format the code to conform the style guide by:
@@ -33,7 +56,7 @@ mvn spotless:apply
> If you're using IntelliJ, you can import these [code style settings](https://github.com/google/styleguide/blob/gh-pages/intellij-java-google-style.xml)
> if you'd like to use the IDE's reformat function.
-#### Project Makefile
+### Project Makefile
The Project Makefile provides useful shorthands for common development tasks:
@@ -42,18 +65,18 @@ Run all Unit tests:
make test-java
```
-Run all Integration tests:
+Run all Integration tests (note: this also runs GCS + S3 based tests which should fail):
```
make test-java-integration
```
-Building Docker images for Feast Core & Feast Serving:
+Building Docker images for Feast Serving:
```
make build-docker REGISTRY=gcr.io/kf-feast VERSION=develop
```
-#### IDE Setup
+### IDE Setup
If you're using IntelliJ, some additional steps may be needed to make sure IntelliJ autocomplete works as expected.
Specifically, proto-generated code is not indexed by IntelliJ. To fix this, navigate to the following window in IntelliJ:
`Project Structure > Modules > datatypes-java`, and mark the following folders as `Source` directorys:
@@ -64,12 +87,12 @@ Specifically, proto-generated code is not indexed by IntelliJ. To fix this, navi
## Feast Serving
See instructions [here](serving/README.md) for developing.
-## Feast Java Client
+## Feast Serving Client
### Environment Setup
-Setting up your development environment for Feast Java SDK:
+Setting up your development environment:
1. Complete the feast-java [Common Setup](#common-setup)
-> Feast Java Client is a Java Client for retrieving Features from a running Feast Serving instance.
+> Feast Serving Client is a Serving Client for retrieving Features from a running Feast Serving instance.
> See the [Feast Serving Section](#feast-serving) section for how to get a Feast Serving instance running.
### Building
diff --git a/java/README.md b/java/README.md
index 8c3d93628e..53573a6fed 100644
--- a/java/README.md
+++ b/java/README.md
@@ -3,8 +3,8 @@
### Overview
This repository contains the following Feast components.
-* Feast Serving: A service used to serve the latest feature values to models.
-* Feast Java SDK: A client used to retrieve features from Feast Serving.
+* Feast Serving: A gRPC service used to serve the latest feature values to models.
+* Feast Serving Client: A client used to retrieve features from Feast Serving.
### Architecture
@@ -16,6 +16,7 @@ Guides on Contributing:
- [Contribution Process for Feast](https://docs.feast.dev/v/master/project/contributing)
- [Development Guide for Feast](https://docs.feast.dev/v/master/project/development-guide)
- [Development Guide for feast-java (this repository)](CONTRIBUTING.md)
+ - **Note**: includes installing without using Helm
### Installing using Helm
Please see the Helm charts in [infra/charts/feast](../infra/charts/feast).
diff --git a/java/common/pom.xml b/java/common/pom.xml
deleted file mode 100644
index 6b580880f1..0000000000
--- a/java/common/pom.xml
+++ /dev/null
@@ -1,162 +0,0 @@
-
-
-
- 4.0.0
-
-
- feast-parent
- dev.feast
- ${revision}
-
-
- Feast Common
- Feast common module with functionality that can be reused
- feast-common
-
-
-
- dev.feast
- feast-datatypes
- ${project.version}
- compile
-
-
- com.google.protobuf
- protobuf-java-util
- ${protobuf.version}
-
-
-
- org.apache.commons
- commons-lang3
- 3.6
-
-
-
-
- org.projectlombok
- lombok
- ${lombok.version}
-
-
- com.google.auto.value
- auto-value-annotations
- ${auto.value.version}
-
-
-
-
- com.google.code.gson
- gson
- ${gson.version}
-
-
- io.gsonfire
- gson-fire
- ${gson.fire.version}
-
-
- com.fasterxml.jackson.core
- jackson-databind
- 2.12.6.1
-
-
- com.fasterxml.jackson.datatype
- jackson-datatype-jsr310
- ${jackson.version}
-
-
-
-
- org.slf4j
- slf4j-api
-
-
- org.fluentd
- fluent-logger
- 0.3.1
-
-
-
- javax.xml.bind
- jaxb-api
-
-
- javax.validation
- validation-api
-
-
-
-
- com.google.code.findbugs
- jsr305
- 3.0.2
-
-
-
-
- org.hamcrest
- hamcrest-library
- test
- ${hamcrest.version}
-
-
-
- junit
- junit
- 4.13.2
-
-
- org.mockito
- mockito-core
- ${mockito.version}
- test
-
-
-
-
-
- org.apache.maven.plugins
- maven-javadoc-plugin
-
-
-
-
- org.jacoco
- jacoco-maven-plugin
-
-
- org.apache.maven.plugins
- maven-surefire-plugin
- 3.0.0-M4
-
- -Xms2048m -Xmx2048m -Djdk.net.URLClassPath.disableClassPathURLCheck=true
-
-
-
- org.sonatype.plugins
- nexus-staging-maven-plugin
-
- true
-
-
-
-
-
diff --git a/java/common/src/main/java/feast/common/logging/AuditLogger.java b/java/common/src/main/java/feast/common/logging/AuditLogger.java
deleted file mode 100644
index f3538a794b..0000000000
--- a/java/common/src/main/java/feast/common/logging/AuditLogger.java
+++ /dev/null
@@ -1,168 +0,0 @@
-/*
- * SPDX-License-Identifier: Apache-2.0
- * Copyright 2018-2020 The Feast Authors
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * https://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package feast.common.logging;
-
-import com.google.protobuf.InvalidProtocolBufferException;
-import com.google.protobuf.util.JsonFormat;
-import feast.common.logging.config.LoggingProperties;
-import feast.common.logging.config.LoggingProperties.AuditLogProperties;
-import feast.common.logging.entry.*;
-import feast.common.logging.entry.LogResource.ResourceType;
-import java.net.InetAddress;
-import java.net.UnknownHostException;
-import java.util.HashMap;
-import java.util.Map;
-import lombok.extern.slf4j.Slf4j;
-import org.apache.commons.lang3.StringUtils;
-import org.fluentd.logger.FluentLogger;
-import org.slf4j.Marker;
-import org.slf4j.MarkerFactory;
-import org.slf4j.event.Level;
-
-@Slf4j
-public class AuditLogger {
- private static final String FLUENTD_DESTINATION = "fluentd";
- private static final Marker AUDIT_MARKER = MarkerFactory.getMarker("AUDIT_MARK");
- private static FluentLogger fluentLogger;
- private static AuditLogProperties properties;
- private static String artifact;
- private static String version;
-
- public AuditLogger(LoggingProperties loggingProperties, String artifact, String version) {
- // Spring runs this constructor when creating the AuditLogger bean,
- // which allows us to populate the AuditLogger class with dependencies.
- // This allows us to use the dependencies in the AuditLogger's static methods
- AuditLogger.properties = loggingProperties.getAudit();
- AuditLogger.artifact = artifact;
- AuditLogger.version = version;
- if (AuditLogger.properties.getMessageLogging() != null
- && AuditLogger.properties.getMessageLogging().isEnabled()) {
- AuditLogger.fluentLogger =
- FluentLogger.getLogger(
- "feast",
- AuditLogger.properties.getMessageLogging().getFluentdHost(),
- AuditLogger.properties.getMessageLogging().getFluentdPort());
- }
- }
-
- /**
- * Log the handling of a Protobuf message by a service call.
- *
- * @param level log level
- * @param entryBuilder with all fields set except instance.
- */
- public static void logMessage(Level level, MessageAuditLogEntry.Builder entryBuilder) {
- log(level, entryBuilder.setComponent(artifact).setVersion(version).build());
- }
-
- /**
- * Log an action being taken on a specific resource
- *
- * @param level describing the severity of the log.
- * @param action name of the action being taken on specific resource.
- * @param resourceType the type of resource being logged.
- * @param resourceId resource specific identifier identifing the instance of the resource.
- */
- public static void logAction(
- Level level, String action, ResourceType resourceType, String resourceId) {
- log(
- level,
- ActionAuditLogEntry.of(
- artifact, version, LogResource.of(resourceType, resourceId), action));
- }
-
- /**
- * Log a transition in state/status in a specific resource.
- *
- * @param level describing the severity of the log.
- * @param status name of end status which the resource transition to.
- * @param resourceType the type of resource being logged.
- * @param resourceId resource specific identifier identifing the instance of the resource.
- */
- public static void logTransition(
- Level level, String status, ResourceType resourceType, String resourceId) {
- log(
- level,
- TransitionAuditLogEntry.of(
- artifact, version, LogResource.of(resourceType, resourceId), status));
- }
-
- /**
- * Log given {@link AuditLogEntry} at the given logging {@link Level} to the Audit log.
- *
- * @param level describing the severity of the log.
- * @param entry the {@link AuditLogEntry} to push to the audit log.
- */
- private static void log(Level level, AuditLogEntry entry) {
- // Check if audit logging is of this specific log entry enabled.
- if (!properties.isEnabled()) {
- return;
- }
-
- // Either forward log to logging layer or log to console
- String destination = properties.getMessageLogging().getDestination();
- if (destination.equals(FLUENTD_DESTINATION)) {
- if (entry.getKind() == AuditLogEntryKind.MESSAGE) {
- Map fluentdLogs = new HashMap<>();
- MessageAuditLogEntry messageAuditLogEntry = (MessageAuditLogEntry) entry;
- String releaseName;
-
- try {
- releaseName =
- StringUtils.defaultIfEmpty(
- System.getenv("RELEASE_NAME"), InetAddress.getLocalHost().getHostAddress());
- } catch (UnknownHostException e) {
- releaseName = StringUtils.defaultIfEmpty(System.getenv("RELEASE_NAME"), "");
- }
-
- fluentdLogs.put("id", messageAuditLogEntry.getId());
- fluentdLogs.put("identity", messageAuditLogEntry.getIdentity());
- fluentdLogs.put("service", messageAuditLogEntry.getService());
- fluentdLogs.put("status_code", messageAuditLogEntry.getStatusCode());
- fluentdLogs.put("method", messageAuditLogEntry.getMethod());
- fluentdLogs.put("release_name", releaseName);
- try {
- fluentdLogs.put("request", JsonFormat.printer().print(messageAuditLogEntry.getRequest()));
- fluentdLogs.put(
- "response", JsonFormat.printer().print(messageAuditLogEntry.getResponse()));
- } catch (InvalidProtocolBufferException e) {
- }
- fluentLogger.log("fluentd", fluentdLogs);
- }
- } else {
- // Log event to audit log through enabled formats
- String entryJSON = entry.toJSON();
- switch (level) {
- case TRACE:
- log.trace(AUDIT_MARKER, entryJSON);
- break;
- case DEBUG:
- log.debug(AUDIT_MARKER, entryJSON);
- break;
- case INFO:
- log.info(AUDIT_MARKER, entryJSON);
- break;
- case WARN:
- log.warn(AUDIT_MARKER, entryJSON);
- break;
- case ERROR:
- log.error(AUDIT_MARKER, entryJSON);
- break;
- }
- }
- }
-}
diff --git a/java/common/src/main/java/feast/common/logging/config/LoggingProperties.java b/java/common/src/main/java/feast/common/logging/config/LoggingProperties.java
deleted file mode 100644
index 06e62f71af..0000000000
--- a/java/common/src/main/java/feast/common/logging/config/LoggingProperties.java
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * SPDX-License-Identifier: Apache-2.0
- * Copyright 2018-2019 The Feast Authors
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * https://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package feast.common.logging.config;
-
-import feast.common.validators.OneOfStrings;
-import javax.validation.constraints.NotNull;
-import lombok.Getter;
-import lombok.Setter;
-
-@Getter
-@Setter
-public class LoggingProperties {
- @NotNull private AuditLogProperties audit;
-
- @Getter
- @Setter
- public static class AuditLogProperties {
- // Whether to enable/disable audit logging entirely.
- private boolean enabled;
-
- private MessageLogging messageLogging;
-
- @Getter
- @Setter
- public static class MessageLogging {
- // Whether to enable/disable message level (ie request/response) audit logging.
- private boolean enabled;
-
- // Whether to log to console or fluentd
- @OneOfStrings({"console", "fluentd"})
- private String destination;
-
- // fluentD service host for external (request/response) logging.
- private String fluentdHost;
-
- // fluentD service port for external (request/response) logging.
- private Integer fluentdPort;
- }
- }
-}
diff --git a/java/common/src/main/java/feast/common/logging/entry/ActionAuditLogEntry.java b/java/common/src/main/java/feast/common/logging/entry/ActionAuditLogEntry.java
deleted file mode 100644
index 4fdeaee32a..0000000000
--- a/java/common/src/main/java/feast/common/logging/entry/ActionAuditLogEntry.java
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * SPDX-License-Identifier: Apache-2.0
- * Copyright 2018-2020 The Feast Authors
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * https://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package feast.common.logging.entry;
-
-import com.google.auto.value.AutoValue;
-
-/** ActionAuditLogEntry records an action being taken on a specific resource */
-@AutoValue
-public abstract class ActionAuditLogEntry extends AuditLogEntry {
- /** @return The name of the action taken on the resource. */
- public abstract String getAction();
-
- /** @return The target resource of which the action was taken on. */
- public abstract LogResource getResource();
-
- /**
- * Create an {@link AuditLogEntry} that records an action being taken on a specific resource.
- *
- * @param component The name of th Feast component producing this {@link AuditLogEntry}.
- * @param version The version of Feast producing this {@link AuditLogEntry}.
- * @param resource The target resource of which the action was taken on.
- * @param action The name of the action being taken on the given resource.
- * @return log entry that records an action being taken on a specific resource
- */
- public static ActionAuditLogEntry of(
- String component, String version, LogResource resource, String action) {
- return new AutoValue_ActionAuditLogEntry(
- component, version, AuditLogEntryKind.ACTION, action, resource);
- }
-}
diff --git a/java/common/src/main/java/feast/common/logging/entry/AuditLogEntry.java b/java/common/src/main/java/feast/common/logging/entry/AuditLogEntry.java
deleted file mode 100644
index 8148c474b0..0000000000
--- a/java/common/src/main/java/feast/common/logging/entry/AuditLogEntry.java
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * SPDX-License-Identifier: Apache-2.0
- * Copyright 2018-2019 The Feast Authors
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * https://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package feast.common.logging.entry;
-
-import com.google.gson.Gson;
-
-/**
- * AuditLogEntry represents a single audit Log Entry. Audit log entry can converted into string with
- * {{@link #toString()} for human readable representation. Or structured JSON with {{@link
- * #toJSON()} for a machine parsable representation.
- */
-public abstract class AuditLogEntry {
- /** Declare Log Type to allow external Logging systems to filter out {@link AuditLogEntry} */
- public final String logType = "FeastAuditLogEntry";
-
- public final String application = "Feast";
-
- /**
- * The name of the Feast component producing this {@link AuditLogEntry}
- *
- * @return the component
- */
- public abstract String getComponent();
-
- /**
- * The version of Feast producing this {@link AuditLogEntry}
- *
- * @return version
- */
- public abstract String getVersion();
-
- public abstract AuditLogEntryKind getKind();
-
- /**
- * Return a structured JSON representation of this {@link AuditLogEntry}
- *
- * @return structured JSON representation
- */
- public String toJSON() {
- Gson gson = new Gson();
- return gson.toJson(this);
- }
-}
diff --git a/java/common/src/main/java/feast/common/logging/entry/AuditLogEntryKind.java b/java/common/src/main/java/feast/common/logging/entry/AuditLogEntryKind.java
deleted file mode 100644
index d673f6bdb3..0000000000
--- a/java/common/src/main/java/feast/common/logging/entry/AuditLogEntryKind.java
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * SPDX-License-Identifier: Apache-2.0
- * Copyright 2018-2019 The Feast Authors
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * https://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package feast.common.logging.entry;
-
-/** AuditLogEntryKind lists the various kinds of {@link AuditLogEntry} */
-public enum AuditLogEntryKind {
- MESSAGE,
- ACTION,
- TRANSITION,
-}
diff --git a/java/common/src/main/java/feast/common/logging/entry/LogResource.java b/java/common/src/main/java/feast/common/logging/entry/LogResource.java
deleted file mode 100644
index 1d0345a404..0000000000
--- a/java/common/src/main/java/feast/common/logging/entry/LogResource.java
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * SPDX-License-Identifier: Apache-2.0
- * Copyright 2018-2019 The Feast Authors
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * https://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package feast.common.logging.entry;
-
-import com.google.auto.value.AutoValue;
-
-@AutoValue
-/**
- * LogResource is used in {@link AuditLogEntry} to reference a specific resource as the subject of
- * the log
- */
-public abstract class LogResource {
- public enum ResourceType {
- JOB,
- FEATURE_TABLE
- }
-
- public abstract ResourceType getType();
-
- public abstract String getId();
-
- public static LogResource of(ResourceType type, String id) {
- return new AutoValue_LogResource(type, id);
- }
-}
diff --git a/java/common/src/main/java/feast/common/logging/entry/MessageAuditLogEntry.java b/java/common/src/main/java/feast/common/logging/entry/MessageAuditLogEntry.java
deleted file mode 100644
index 8ad428a3a3..0000000000
--- a/java/common/src/main/java/feast/common/logging/entry/MessageAuditLogEntry.java
+++ /dev/null
@@ -1,117 +0,0 @@
-/*
- * SPDX-License-Identifier: Apache-2.0
- * Copyright 2018-2020 The Feast Authors
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * https://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package feast.common.logging.entry;
-
-import com.google.auto.value.AutoValue;
-import com.google.gson.Gson;
-import com.google.gson.GsonBuilder;
-import com.google.gson.JsonParser;
-import com.google.gson.JsonSerializer;
-import com.google.protobuf.Empty;
-import com.google.protobuf.InvalidProtocolBufferException;
-import com.google.protobuf.Message;
-import com.google.protobuf.util.JsonFormat;
-import io.grpc.Status.Code;
-import java.util.UUID;
-
-/** MessageAuditLogEntry records the handling of a Protobuf message by a service call. */
-@AutoValue
-public abstract class MessageAuditLogEntry extends AuditLogEntry {
- /** @return Id used to identify the service call that the log entry is recording */
- public abstract UUID getId();
-
- /** @return The name of the service that was used to handle the service call. */
- public abstract String getService();
-
- /** @return The name of the method that was used to handle the service call. */
- public abstract String getMethod();
-
- /**
- * @return The request Protobuf {@link Message} that was passed to the Service in the service
- * call.
- */
- public abstract Message getRequest();
-
- /**
- * @return The response Protobuf {@link Message} that was passed to the Service in the service
- * call. May be an {@link Empty} protobuf no request could be collected due to an error.
- */
- public abstract Message getResponse();
-
- /**
- * @return The authenticated identity that was assumed during the handling of the service call.
- * For example, the user id or email that identifies the user making the call. Empty if the
- * service call is not authenticated.
- */
- public abstract String getIdentity();
-
- /** @return The result status code of the service call. */
- public abstract Code getStatusCode();
-
- @AutoValue.Builder
- public abstract static class Builder {
- public abstract Builder setId(UUID id);
-
- public abstract Builder setComponent(String component);
-
- public abstract Builder setVersion(String component);
-
- public abstract Builder setKind(AuditLogEntryKind kind);
-
- public abstract Builder setService(String name);
-
- public abstract Builder setMethod(String name);
-
- public abstract Builder setRequest(Message request);
-
- public abstract Builder setResponse(Message response);
-
- public abstract Builder setIdentity(String identity);
-
- public abstract Builder setStatusCode(Code statusCode);
-
- public abstract MessageAuditLogEntry build();
- }
-
- public static MessageAuditLogEntry.Builder newBuilder() {
- return new AutoValue_MessageAuditLogEntry.Builder()
- .setKind(AuditLogEntryKind.MESSAGE)
- .setId(UUID.randomUUID());
- }
-
- @Override
- public String toJSON() {
- // GSON requires custom typeadapter (serializer) to convert Protobuf messages to JSON properly
- Gson gson =
- new GsonBuilder()
- .registerTypeAdapter(
- Message.class,
- (JsonSerializer)
- (message, type, context) -> {
- try {
- String messageJSON = JsonFormat.printer().print(message);
- return new JsonParser().parse(messageJSON);
- } catch (InvalidProtocolBufferException e) {
-
- throw new RuntimeException(
- "Unexpected exception converting Protobuf to JSON", e);
- }
- })
- .create();
- return gson.toJson(this);
- }
-}
diff --git a/java/common/src/main/java/feast/common/logging/entry/TransitionAuditLogEntry.java b/java/common/src/main/java/feast/common/logging/entry/TransitionAuditLogEntry.java
deleted file mode 100644
index 224f10e0b5..0000000000
--- a/java/common/src/main/java/feast/common/logging/entry/TransitionAuditLogEntry.java
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * SPDX-License-Identifier: Apache-2.0
- * Copyright 2018-2020 The Feast Authors
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * https://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package feast.common.logging.entry;
-
-import com.google.auto.value.AutoValue;
-
-/** TransitionAuditLogEntry records a transition in state/status in a specific resource. */
-@AutoValue
-public abstract class TransitionAuditLogEntry extends AuditLogEntry {
- /** @return The resource which the state/status transition occured. */
- public abstract LogResource getResource();
-
- /** @return The end status with the resource transition to. */
- public abstract String getStatus();
-
- /**
- * Construct a new {@link AuditLogEntry} to record a transition in state/status in a specific
- * resource.
- *
- * @param component The name of th Feast component producing this {@link AuditLogEntry}.
- * @param version The version of Feast producing this {@link AuditLogEntry}.
- * @param resource the resource which the transtion occured
- * @param status the end status which the resource transitioned to.
- * @return log entry to record a transition in state/status in a specific resource
- */
- public static TransitionAuditLogEntry of(
- String component, String version, LogResource resource, String status) {
- return new AutoValue_TransitionAuditLogEntry(
- component, version, AuditLogEntryKind.TRANSITION, resource, status);
- }
-}
diff --git a/java/common/src/main/java/feast/common/logging/interceptors/GrpcMessageInterceptor.java b/java/common/src/main/java/feast/common/logging/interceptors/GrpcMessageInterceptor.java
deleted file mode 100644
index e34fefd115..0000000000
--- a/java/common/src/main/java/feast/common/logging/interceptors/GrpcMessageInterceptor.java
+++ /dev/null
@@ -1,110 +0,0 @@
-/*
- * SPDX-License-Identifier: Apache-2.0
- * Copyright 2018-2019 The Feast Authors
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * https://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package feast.common.logging.interceptors;
-
-import com.google.protobuf.Empty;
-import com.google.protobuf.Message;
-import feast.common.logging.AuditLogger;
-import feast.common.logging.config.LoggingProperties;
-import feast.common.logging.entry.MessageAuditLogEntry;
-import io.grpc.ForwardingServerCall.SimpleForwardingServerCall;
-import io.grpc.ForwardingServerCallListener.SimpleForwardingServerCallListener;
-import io.grpc.Metadata;
-import io.grpc.ServerCall;
-import io.grpc.ServerCall.Listener;
-import io.grpc.ServerCallHandler;
-import io.grpc.ServerInterceptor;
-import io.grpc.Status;
-import org.slf4j.event.Level;
-
-/**
- * GrpcMessageInterceptor intercepts a GRPC calls to log handling of GRPC messages to the Audit Log.
- * Intercepts the incoming and outgoing messages logs them to the audit log, together with method
- * name and assumed authenticated identity (if authentication is enabled). NOTE:
- * GrpcMessageInterceptor assumes that all service calls are unary (ie single request/response).
- */
-public class GrpcMessageInterceptor implements ServerInterceptor {
- private final LoggingProperties loggingProperties;
-
- /**
- * Construct GrpcMessageIntercetor.
- *
- * @param loggingProperties properties used to configure logging interceptor.
- */
- public GrpcMessageInterceptor(LoggingProperties loggingProperties) {
- this.loggingProperties = loggingProperties;
- }
-
- @Override
- public Listener interceptCall(
- ServerCall call, Metadata headers, ServerCallHandler next) {
- // Disable the message logging interceptor entirely if message logging is disabled.
- if (!loggingProperties.getAudit().getMessageLogging().isEnabled()) {
- return next.startCall(call, headers);
- }
-
- MessageAuditLogEntry.Builder entryBuilder = MessageAuditLogEntry.newBuilder();
- // default response/request message to empty proto in log entry.
- // request could be empty when the client closes the connection before sending a request
- // message.
- // response could be unset when the service encounters an error when processsing the service
- // call.
- entryBuilder.setRequest(Empty.newBuilder().build());
- entryBuilder.setResponse(Empty.newBuilder().build());
-
- // Unpack service & method name from call
- // full method name is in format ./
- String fullMethodName = call.getMethodDescriptor().getFullMethodName();
- entryBuilder.setService(
- fullMethodName.substring(fullMethodName.lastIndexOf(".") + 1, fullMethodName.indexOf("/")));
- entryBuilder.setMethod(fullMethodName.substring(fullMethodName.indexOf("/") + 1));
-
- // Attempt Extract current authenticated identity.
- entryBuilder.setIdentity("");
-
- // Register forwarding call to intercept outgoing response and log to audit log
- call =
- new SimpleForwardingServerCall<>(call) {
- @Override
- public void sendMessage(RespT message) {
- // 2. Track the response & Log entry to audit logger
- super.sendMessage(message);
- entryBuilder.setResponse((Message) message);
- }
-
- @Override
- public void close(Status status, Metadata trailers) {
- super.close(status, trailers);
- // 3. Log the message log entry to the audit log
- Level logLevel = (status.isOk()) ? Level.INFO : Level.ERROR;
- entryBuilder.setStatusCode(status.getCode());
- AuditLogger.logMessage(logLevel, entryBuilder);
- }
- };
-
- ServerCall.Listener listener = next.startCall(call, headers);
- return new SimpleForwardingServerCallListener<>(listener) {
- @Override
- // Register listener to intercept incoming request messages and log to audit log
- public void onMessage(ReqT message) {
- super.onMessage(message);
- // 1. Track the request.
- entryBuilder.setRequest((Message) message);
- }
- };
- }
-}
diff --git a/java/common/src/main/java/feast/common/validators/OneOfStringValidator.java b/java/common/src/main/java/feast/common/validators/OneOfStringValidator.java
deleted file mode 100644
index 924953a2c4..0000000000
--- a/java/common/src/main/java/feast/common/validators/OneOfStringValidator.java
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * SPDX-License-Identifier: Apache-2.0
- * Copyright 2018-2020 The Feast Authors
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * https://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package feast.common.validators;
-
-import java.util.Arrays;
-import javax.validation.ConstraintValidator;
-import javax.validation.ConstraintValidatorContext;
-
-/** Validates whether a string value is found within a collection. */
-public class OneOfStringValidator implements ConstraintValidator {
-
- /** Values that are permitted for a specific instance of this validator */
- String[] allowedValues;
-
- /**
- * Initialize the OneOfStringValidator with a collection of allowed String values.
- *
- * @param constraintAnnotation constraint annotation
- */
- @Override
- public void initialize(OneOfStrings constraintAnnotation) {
- allowedValues = constraintAnnotation.value();
- }
-
- /**
- * Validates whether a string value is found within the collection defined in the annotation.
- *
- * @param value String value that should be validated
- * @param context Provides contextual data and operation when applying a given constraint
- * validator
- * @return Boolean value indicating whether the string is found within the allowed values.
- */
- @Override
- public boolean isValid(String value, ConstraintValidatorContext context) {
- return Arrays.asList(allowedValues).contains(value);
- }
-}
diff --git a/java/common/src/main/java/feast/common/validators/OneOfStrings.java b/java/common/src/main/java/feast/common/validators/OneOfStrings.java
deleted file mode 100644
index b236f6f1af..0000000000
--- a/java/common/src/main/java/feast/common/validators/OneOfStrings.java
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * SPDX-License-Identifier: Apache-2.0
- * Copyright 2018-2020 The Feast Authors
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * https://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package feast.common.validators;
-
-import java.lang.annotation.*;
-import javax.validation.Constraint;
-import javax.validation.Payload;
-
-/**
- * Annotation for String "one of" validation. Allows for the definition of a collection through an
- * annotation. The collection is used to test values defined in the object.
- */
-@Target({
- ElementType.METHOD,
- ElementType.FIELD,
- ElementType.ANNOTATION_TYPE,
- ElementType.CONSTRUCTOR,
- ElementType.PARAMETER
-})
-@Retention(RetentionPolicy.RUNTIME)
-@Documented
-@Constraint(validatedBy = OneOfStringValidator.class)
-public @interface OneOfStrings {
- /** @return Default error message that is returned if the incorrect value is set */
- String message() default "Field value must be one of the following: {value}";
-
- /** @return Allows for the specification of validation groups to which this constraint belongs. */
- Class>[] groups() default {};
-
- /**
- * @return An attribute payload that can be used to assign custom payload objects to a constraint.
- */
- Class extends Payload>[] payload() default {};
-
- /** @return Default value that is returned if no allowed values are configured */
- String[] value() default {};
-}
diff --git a/java/common/src/main/resources/log4j2.xml b/java/common/src/main/resources/log4j2.xml
deleted file mode 100644
index c75c2db13c..0000000000
--- a/java/common/src/main/resources/log4j2.xml
+++ /dev/null
@@ -1,48 +0,0 @@
-
-
-
-
-
-
- %d{yyyy-MM-dd HH:mm:ss.SSS} %5p ${hostName} --- [%15.15t] %-40.40c{1.} : %m%n%ex
-
-
- {"time":"%d{yyyy-MM-dd'T'HH:mm:ssXXX}","hostname":"${hostName}","severity":"%p","message":%m}%n%ex
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/java/common/src/test/java/feast/common/logging/entry/AuditLogEntryTest.java b/java/common/src/test/java/feast/common/logging/entry/AuditLogEntryTest.java
deleted file mode 100644
index bc3dcbcf74..0000000000
--- a/java/common/src/test/java/feast/common/logging/entry/AuditLogEntryTest.java
+++ /dev/null
@@ -1,107 +0,0 @@
-/*
- * SPDX-License-Identifier: Apache-2.0
- * Copyright 2018-2020 The Feast Authors
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * https://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package feast.common.logging.entry;
-
-import static org.hamcrest.MatcherAssert.assertThat;
-import static org.hamcrest.Matchers.equalTo;
-
-import com.google.gson.JsonObject;
-import com.google.gson.JsonParser;
-import com.google.protobuf.Timestamp;
-import feast.common.logging.entry.LogResource.ResourceType;
-import feast.proto.serving.ServingAPIProto;
-import feast.proto.serving.ServingAPIProto.FeatureReferenceV2;
-import feast.proto.serving.ServingAPIProto.GetOnlineFeaturesRequestV2;
-import feast.proto.serving.ServingAPIProto.GetOnlineFeaturesResponse;
-import feast.proto.types.ValueProto.Value;
-import io.grpc.Status;
-import java.util.Arrays;
-import java.util.List;
-import org.junit.Test;
-
-public class AuditLogEntryTest {
- public List getTestAuditLogs() {
- GetOnlineFeaturesRequestV2 requestSpec =
- GetOnlineFeaturesRequestV2.newBuilder()
- .addAllFeatures(
- Arrays.asList(
- FeatureReferenceV2.newBuilder()
- .setFeatureViewName("featuretable_1")
- .setFeatureName("feature1")
- .build(),
- FeatureReferenceV2.newBuilder()
- .setFeatureViewName("featuretable_1")
- .setFeatureName("feature2")
- .build()))
- .build();
-
- GetOnlineFeaturesResponse responseSpec =
- GetOnlineFeaturesResponse.newBuilder()
- .setMetadata(
- ServingAPIProto.GetOnlineFeaturesResponseMetadata.newBuilder()
- .setFeatureNames(
- ServingAPIProto.FeatureList.newBuilder()
- .addAllVal(
- Arrays.asList(
- "featuretable_1:feature_1", "featuretable_1:feature2"))))
- .addAllResults(
- Arrays.asList(
- GetOnlineFeaturesResponse.FeatureVector.newBuilder()
- .addValues(Value.newBuilder().setInt32Val(32).build())
- .addStatuses(ServingAPIProto.FieldStatus.PRESENT)
- .addEventTimestamps(Timestamp.newBuilder().build())
- .build(),
- GetOnlineFeaturesResponse.FeatureVector.newBuilder()
- .addValues(Value.newBuilder().setInt32Val(64).build())
- .addStatuses(ServingAPIProto.FieldStatus.PRESENT)
- .addEventTimestamps(Timestamp.newBuilder().build())
- .build()))
- .build();
-
- return Arrays.asList(
- MessageAuditLogEntry.newBuilder()
- .setComponent("feast-serving")
- .setVersion("0.9")
- .setService("ServingService")
- .setMethod("getOnlineFeatures")
- .setRequest(requestSpec)
- .setResponse(responseSpec)
- .setStatusCode(Status.OK.getCode())
- .setIdentity("adam@no.such.email")
- .build(),
- ActionAuditLogEntry.of(
- "core", "0.9", LogResource.of(ResourceType.JOB, "kafka-to-redis"), "CREATE"),
- TransitionAuditLogEntry.of(
- "core", "0.9", LogResource.of(ResourceType.FEATURE_TABLE, "featuretable_1"), "READY"));
- }
-
- @Test
- public void shouldReturnJSONRepresentationOfAuditLog() {
- for (AuditLogEntry auditLog : getTestAuditLogs()) {
- // Check that auditLog's toJSON() returns valid JSON
- String logJSON = auditLog.toJSON();
- System.out.println(logJSON);
- JsonParser parser = new JsonParser();
-
- // check basic fields are present in JSON representation.
- JsonObject logObject = parser.parse(logJSON).getAsJsonObject();
- assertThat(logObject.getAsJsonPrimitive("logType").getAsString(), equalTo(auditLog.logType));
- assertThat(
- logObject.getAsJsonPrimitive("kind").getAsString(), equalTo(auditLog.getKind().name()));
- }
- }
-}
diff --git a/java/docs/coverage/pom.xml b/java/coverage/pom.xml
similarity index 85%
rename from java/docs/coverage/pom.xml
rename to java/coverage/pom.xml
index f6e08909ee..a604135c79 100644
--- a/java/docs/coverage/pom.xml
+++ b/java/coverage/pom.xml
@@ -30,7 +30,7 @@
dev.feast
feast-parent
${revision}
- ../..
+ ..
Feast Coverage Java
@@ -41,18 +41,6 @@
-
- dev.feast
- feast-storage-api
- ${project.version}
-
-
-
- dev.feast
- feast-storage-connector-redis
- ${project.version}
-
-
dev.feast
feast-serving
diff --git a/java/infra/docker/feature-server/Dockerfile b/java/infra/docker/feature-server/Dockerfile
index dbd8c91472..5cd0e6e37b 100644
--- a/java/infra/docker/feature-server/Dockerfile
+++ b/java/infra/docker/feature-server/Dockerfile
@@ -8,13 +8,9 @@ WORKDIR /build
COPY java/pom.xml .
COPY java/datatypes/pom.xml datatypes/pom.xml
-COPY java/common/pom.xml common/pom.xml
COPY java/serving/pom.xml serving/pom.xml
-COPY java/storage/api/pom.xml storage/api/pom.xml
-COPY java/storage/connectors/pom.xml storage/connectors/pom.xml
-COPY java/storage/connectors/redis/pom.xml storage/connectors/redis/pom.xml
-COPY java/sdk/pom.xml sdk/pom.xml
-COPY java/docs/coverage/pom.xml docs/coverage/pom.xml
+COPY java/serving-client/pom.xml serving-client/pom.xml
+COPY java/coverage/pom.xml coverage/pom.xml
# Setting Maven repository .m2 directory relative to /build folder gives the
# user to optionally use cached repository when building the image by copying
diff --git a/java/pom.xml b/java/pom.xml
index 0bf92ee244..cac85cdd38 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -29,16 +29,13 @@
datatypes
- storage/api
- storage/connectors
serving
- sdk
- docs/coverage
- common
+ serving-client
+ coverage
- 0.23.0
+ 0.23.1
https://github.com/feast-dev/feast
UTF-8
@@ -91,6 +88,7 @@
*/
]]>
+
${maven.multiModuleProjectDirectory}
false
diff --git a/java/sdk/pom.xml b/java/serving-client/pom.xml
similarity index 97%
rename from java/sdk/pom.xml
rename to java/serving-client/pom.xml
index 5896214b27..7b8838a009 100644
--- a/java/sdk/pom.xml
+++ b/java/serving-client/pom.xml
@@ -4,8 +4,8 @@
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
4.0.0
- Feast SDK for Java
- SDK for registering, storing, and retrieving features
+ Feast Serving Client
+ Client for retrieving features from a Feast feature server
feast-serving-client
diff --git a/java/sdk/src/main/java/dev/feast/FeastClient.java b/java/serving-client/src/main/java/dev/feast/FeastClient.java
similarity index 100%
rename from java/sdk/src/main/java/dev/feast/FeastClient.java
rename to java/serving-client/src/main/java/dev/feast/FeastClient.java
diff --git a/java/sdk/src/main/java/dev/feast/RequestUtil.java b/java/serving-client/src/main/java/dev/feast/RequestUtil.java
similarity index 100%
rename from java/sdk/src/main/java/dev/feast/RequestUtil.java
rename to java/serving-client/src/main/java/dev/feast/RequestUtil.java
diff --git a/java/sdk/src/main/java/dev/feast/Row.java b/java/serving-client/src/main/java/dev/feast/Row.java
similarity index 100%
rename from java/sdk/src/main/java/dev/feast/Row.java
rename to java/serving-client/src/main/java/dev/feast/Row.java
diff --git a/java/sdk/src/main/java/dev/feast/SecurityConfig.java b/java/serving-client/src/main/java/dev/feast/SecurityConfig.java
similarity index 100%
rename from java/sdk/src/main/java/dev/feast/SecurityConfig.java
rename to java/serving-client/src/main/java/dev/feast/SecurityConfig.java
diff --git a/java/sdk/src/test/java/dev/feast/FeastClientTest.java b/java/serving-client/src/test/java/dev/feast/FeastClientTest.java
similarity index 100%
rename from java/sdk/src/test/java/dev/feast/FeastClientTest.java
rename to java/serving-client/src/test/java/dev/feast/FeastClientTest.java
diff --git a/java/sdk/src/test/java/dev/feast/RequestUtilTest.java b/java/serving-client/src/test/java/dev/feast/RequestUtilTest.java
similarity index 100%
rename from java/sdk/src/test/java/dev/feast/RequestUtilTest.java
rename to java/serving-client/src/test/java/dev/feast/RequestUtilTest.java
diff --git a/java/serving/README.md b/java/serving/README.md
index 5ac7194924..a0d87563a9 100644
--- a/java/serving/README.md
+++ b/java/serving/README.md
@@ -41,7 +41,7 @@ From the Feast GitHub root, run:
java \
-Xms1g \
-Xmx4g \
- -jar java/serving/target/feast-serving-0.17.1-SNAPSHOT-jar-with-dependencies.jar \
+ -jar java/serving/target/feast-serving-[YOUR VERSION]-jar-with-dependencies.jar \
classpath:/application.yml,file:./application-override.yaml
```
5. Now you have a Feast Serving gRPC service running on port 6566 locally!
@@ -124,7 +124,7 @@ You can debug this like any other Java executable. Swap the java command above w
-Xrunjdwp:transport=dt_socket,address=5005,server=y,suspend=y \
-Xms1g \
-Xmx4g \
- -jar java/serving/target/feast-serving-0.17.1-SNAPSHOT-jar-with-dependencies.jar \
+ -jar java/serving/target/feast-serving-[YOUR VERSION]-jar-with-dependencies.jar \
classpath:/application.yml,file:./application-override.yaml
```
Now you can attach e.g. a Remote debugger in IntelliJ to port 5005 to debug / make breakpoints.
diff --git a/java/serving/pom.xml b/java/serving/pom.xml
index f173cdd5fe..e597775f9b 100644
--- a/java/serving/pom.xml
+++ b/java/serving/pom.xml
@@ -92,24 +92,6 @@
${project.version}
-
- dev.feast
- feast-common
- ${project.version}
-
-
-
- dev.feast
- feast-storage-api
- ${project.version}
-
-
-
- dev.feast
- feast-storage-connector-redis
- ${project.version}
-
-
com.google.inject
guice
@@ -345,6 +327,16 @@
2.7.4
test
+
+ io.lettuce
+ lettuce-core
+ 6.0.2.RELEASE
+
+
+ org.apache.commons
+ commons-lang3
+ 3.10
+
diff --git a/java/serving/src/main/java/feast/serving/ServingGuiceApplication.java b/java/serving/src/main/java/feast/serving/ServingGuiceApplication.java
index 664d6dd4ec..d91af8abb1 100644
--- a/java/serving/src/main/java/feast/serving/ServingGuiceApplication.java
+++ b/java/serving/src/main/java/feast/serving/ServingGuiceApplication.java
@@ -18,7 +18,7 @@
import com.google.inject.Guice;
import com.google.inject.Injector;
-import feast.serving.config.*;
+import feast.serving.service.config.*;
import io.grpc.Server;
import java.io.IOException;
@@ -32,9 +32,9 @@ public static void main(String[] args) throws InterruptedException, IOException
final Injector i =
Guice.createInjector(
- new ServingServiceConfigV2(),
- new RegistryConfig(),
- new InstrumentationConfig(),
+ new ServingServiceV2Module(),
+ new RegistryConfigModule(),
+ new InstrumentationConfigModule(),
new ServerModule(),
new ApplicationPropertiesModule(args));
diff --git a/java/storage/api/src/main/java/feast/storage/api/retriever/Feature.java b/java/serving/src/main/java/feast/serving/connectors/Feature.java
similarity index 94%
rename from java/storage/api/src/main/java/feast/storage/api/retriever/Feature.java
rename to java/serving/src/main/java/feast/serving/connectors/Feature.java
index 92ae1f31fb..af96a90866 100644
--- a/java/storage/api/src/main/java/feast/storage/api/retriever/Feature.java
+++ b/java/serving/src/main/java/feast/serving/connectors/Feature.java
@@ -14,18 +14,19 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package feast.storage.api.retriever;
+package feast.serving.connectors;
import com.google.protobuf.Timestamp;
import feast.proto.serving.ServingAPIProto.FeatureReferenceV2;
import feast.proto.types.ValueProto;
import feast.proto.types.ValueProto.Value;
import java.util.HashMap;
+import java.util.Map;
public interface Feature {
- HashMap TYPE_TO_VAL_CASE =
- new HashMap() {
+ Map TYPE_TO_VAL_CASE =
+ new HashMap<>() {
{
put(ValueProto.ValueType.Enum.BYTES, ValueProto.Value.ValCase.BYTES_VAL);
put(ValueProto.ValueType.Enum.STRING, ValueProto.Value.ValCase.STRING_VAL);
diff --git a/java/storage/api/src/main/java/feast/storage/api/retriever/OnlineRetrieverV2.java b/java/serving/src/main/java/feast/serving/connectors/OnlineRetriever.java
similarity index 96%
rename from java/storage/api/src/main/java/feast/storage/api/retriever/OnlineRetrieverV2.java
rename to java/serving/src/main/java/feast/serving/connectors/OnlineRetriever.java
index fde8ba7396..79c062814b 100644
--- a/java/storage/api/src/main/java/feast/storage/api/retriever/OnlineRetrieverV2.java
+++ b/java/serving/src/main/java/feast/serving/connectors/OnlineRetriever.java
@@ -14,14 +14,14 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package feast.storage.api.retriever;
+package feast.serving.connectors;
import feast.proto.serving.ServingAPIProto;
import feast.proto.types.ValueProto;
import java.util.List;
import java.util.Map;
-public interface OnlineRetrieverV2 {
+public interface OnlineRetriever {
/**
* Get online features for the given entity rows using data retrieved from the Feature references
* specified in FeatureTable request.
diff --git a/java/storage/api/src/main/java/feast/storage/api/retriever/ProtoFeature.java b/java/serving/src/main/java/feast/serving/connectors/ProtoFeature.java
similarity index 98%
rename from java/storage/api/src/main/java/feast/storage/api/retriever/ProtoFeature.java
rename to java/serving/src/main/java/feast/serving/connectors/ProtoFeature.java
index 09f6b75f49..9820898d00 100644
--- a/java/storage/api/src/main/java/feast/storage/api/retriever/ProtoFeature.java
+++ b/java/serving/src/main/java/feast/serving/connectors/ProtoFeature.java
@@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package feast.storage.api.retriever;
+package feast.serving.connectors;
import com.google.protobuf.Timestamp;
import feast.proto.serving.ServingAPIProto;
diff --git a/java/storage/connectors/redis/src/main/java/feast/storage/connectors/redis/common/RedisHashDecoder.java b/java/serving/src/main/java/feast/serving/connectors/redis/common/RedisHashDecoder.java
similarity index 96%
rename from java/storage/connectors/redis/src/main/java/feast/storage/connectors/redis/common/RedisHashDecoder.java
rename to java/serving/src/main/java/feast/serving/connectors/redis/common/RedisHashDecoder.java
index 78b64fd141..9f5c94924d 100644
--- a/java/storage/connectors/redis/src/main/java/feast/storage/connectors/redis/common/RedisHashDecoder.java
+++ b/java/serving/src/main/java/feast/serving/connectors/redis/common/RedisHashDecoder.java
@@ -14,15 +14,15 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package feast.storage.connectors.redis.common;
+package feast.serving.connectors.redis.common;
import com.google.common.hash.Hashing;
import com.google.protobuf.InvalidProtocolBufferException;
import com.google.protobuf.Timestamp;
import feast.proto.serving.ServingAPIProto;
import feast.proto.types.ValueProto;
-import feast.storage.api.retriever.Feature;
-import feast.storage.api.retriever.ProtoFeature;
+import feast.serving.connectors.Feature;
+import feast.serving.connectors.ProtoFeature;
import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
import java.util.*;
diff --git a/java/storage/connectors/redis/src/main/java/feast/storage/connectors/redis/common/RedisKeyGenerator.java b/java/serving/src/main/java/feast/serving/connectors/redis/common/RedisKeyGenerator.java
similarity index 97%
rename from java/storage/connectors/redis/src/main/java/feast/storage/connectors/redis/common/RedisKeyGenerator.java
rename to java/serving/src/main/java/feast/serving/connectors/redis/common/RedisKeyGenerator.java
index 389ca0abfd..defb337a82 100644
--- a/java/storage/connectors/redis/src/main/java/feast/storage/connectors/redis/common/RedisKeyGenerator.java
+++ b/java/serving/src/main/java/feast/serving/connectors/redis/common/RedisKeyGenerator.java
@@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package feast.storage.connectors.redis.common;
+package feast.serving.connectors.redis.common;
import feast.proto.serving.ServingAPIProto;
import feast.proto.storage.RedisProto;
diff --git a/java/storage/connectors/redis/src/main/java/feast/storage/connectors/redis/retriever/EntityKeySerializer.java b/java/serving/src/main/java/feast/serving/connectors/redis/retriever/EntityKeySerializer.java
similarity index 94%
rename from java/storage/connectors/redis/src/main/java/feast/storage/connectors/redis/retriever/EntityKeySerializer.java
rename to java/serving/src/main/java/feast/serving/connectors/redis/retriever/EntityKeySerializer.java
index 6220dd29d4..d25f0da4f9 100644
--- a/java/storage/connectors/redis/src/main/java/feast/storage/connectors/redis/retriever/EntityKeySerializer.java
+++ b/java/serving/src/main/java/feast/serving/connectors/redis/retriever/EntityKeySerializer.java
@@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package feast.storage.connectors.redis.retriever;
+package feast.serving.connectors.redis.retriever;
import feast.proto.storage.RedisProto;
diff --git a/java/storage/connectors/redis/src/main/java/feast/storage/connectors/redis/retriever/EntityKeySerializerV2.java b/java/serving/src/main/java/feast/serving/connectors/redis/retriever/EntityKeySerializerV2.java
similarity index 96%
rename from java/storage/connectors/redis/src/main/java/feast/storage/connectors/redis/retriever/EntityKeySerializerV2.java
rename to java/serving/src/main/java/feast/serving/connectors/redis/retriever/EntityKeySerializerV2.java
index f99e5cbdb1..672f4d7c31 100644
--- a/java/storage/connectors/redis/src/main/java/feast/storage/connectors/redis/retriever/EntityKeySerializerV2.java
+++ b/java/serving/src/main/java/feast/serving/connectors/redis/retriever/EntityKeySerializerV2.java
@@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package feast.storage.connectors.redis.retriever;
+package feast.serving.connectors.redis.retriever;
import com.google.protobuf.ProtocolStringList;
import feast.proto.storage.RedisProto;
@@ -87,14 +87,15 @@ public byte[] serialize(RedisProto.RedisKeyV2 entityKey) {
break;
case INT64_VAL:
buffer.addAll(encodeInteger(ValueProto.ValueType.Enum.INT64.getNumber()));
- buffer.addAll(encodeInteger(Integer.BYTES));
/* This is super dumb - but in https://github.com/feast-dev/feast/blob/dcae1606f53028ce5413567fb8b66f92cfef0f8e/sdk/python/feast/infra/key_encoding_utils.py#L9
we use `struct.pack(" tracerOptional;
+ private final OnlineRetriever retriever;
private final RegistryRepository registryRepository;
private final OnlineTransformationService onlineTransformationService;
private final String project;
@@ -56,16 +62,16 @@ public class OnlineServingServiceV2 implements ServingServiceV2 {
ValueProto.Value.newBuilder().setStringVal(DUMMY_ENTITY_VAL).build();
public OnlineServingServiceV2(
- OnlineRetrieverV2 retriever,
- Tracer tracer,
+ OnlineRetriever retriever,
RegistryRepository registryRepository,
OnlineTransformationService onlineTransformationService,
- String project) {
+ String project,
+ Optional tracerOptional) {
this.retriever = retriever;
- this.tracer = tracer;
this.registryRepository = registryRepository;
this.onlineTransformationService = onlineTransformationService;
this.project = project;
+ this.tracerOptional = tracerOptional;
}
/** {@inheritDoc} */
@@ -107,20 +113,21 @@ public ServingAPIProto.GetOnlineFeaturesResponse getOnlineFeatures(
List