diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index c405c1f0840..2f2d0d2f5e3 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -2,7 +2,7 @@ name: Bug report about: Create a report to help us improve title: '' -labels: '' +labels: 'kind/bug, priority/p2' assignees: '' --- diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md index bbcbbe7d615..d73d6444812 100644 --- a/.github/ISSUE_TEMPLATE/feature_request.md +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -2,7 +2,7 @@ name: Feature request about: Suggest an idea for this project title: '' -labels: '' +labels: 'kind/feature' assignees: '' --- diff --git a/.github/workflows/master_only.yml b/.github/workflows/master_only.yml index 42f0383832a..3cdddba8479 100644 --- a/.github/workflows/master_only.yml +++ b/.github/workflows/master_only.yml @@ -125,6 +125,11 @@ jobs: FEAST_SERVER_DOCKER_IMAGE_TAG: ${{ needs.build-lambda-docker-image.outputs.DOCKER_IMAGE_TAG }} FEAST_USAGE: "False" IS_TEST: "True" + SNOWFLAKE_CI_DEPLOYMENT: ${{ secrets.SNOWFLAKE_CI_DEPLOYMENT }} + SNOWFLAKE_CI_USER: ${{ secrets.SNOWFLAKE_CI_USER }} + SNOWFLAKE_CI_PASSWORD: ${{ secrets.SNOWFLAKE_CI_PASSWORD }} + SNOWFLAKE_CI_ROLE: ${{ secrets.SNOWFLAKE_CI_ROLE }} + SNOWFLAKE_CI_WAREHOUSE: ${{ secrets.SNOWFLAKE_CI_WAREHOUSE }} run: pytest -n 8 --cov=./ --cov-report=xml --verbose --color=yes sdk/python/tests --integration --durations=5 - name: Upload coverage to Codecov uses: codecov/codecov-action@v1 @@ -140,6 +145,11 @@ jobs: FEAST_SERVER_DOCKER_IMAGE_TAG: ${{ needs.build-lambda-docker-image.outputs.DOCKER_IMAGE_TAG }} FEAST_USAGE: "False" IS_TEST: "True" + SNOWFLAKE_CI_DEPLOYMENT: ${{ secrets.SNOWFLAKE_CI_DEPLOYMENT }} + SNOWFLAKE_CI_USER: ${{ secrets.SNOWFLAKE_CI_USER }} + SNOWFLAKE_CI_PASSWORD: ${{ secrets.SNOWFLAKE_CI_PASSWORD }} + SNOWFLAKE_CI_ROLE: ${{ secrets.SNOWFLAKE_CI_ROLE }} + SNOWFLAKE_CI_WAREHOUSE: ${{ secrets.SNOWFLAKE_CI_WAREHOUSE }} run: pytest --verbose --color=yes sdk/python/tests --integration --benchmark --benchmark-autosave --benchmark-save-data --durations=5 - name: Upload Benchmark Artifact to S3 run: aws s3 cp --recursive .benchmarks s3://feast-ci-pytest-benchmarks diff --git a/.github/workflows/pr_integration_tests.yml b/.github/workflows/pr_integration_tests.yml index 8a910f943c6..e04b78ec320 100644 --- a/.github/workflows/pr_integration_tests.yml +++ b/.github/workflows/pr_integration_tests.yml @@ -151,6 +151,11 @@ jobs: FEAST_SERVER_DOCKER_IMAGE_TAG: ${{ needs.build-docker-image.outputs.DOCKER_IMAGE_TAG }} FEAST_USAGE: "False" IS_TEST: "True" + SNOWFLAKE_CI_DEPLOYMENT: ${{ secrets.SNOWFLAKE_CI_DEPLOYMENT }} + SNOWFLAKE_CI_USER: ${{ secrets.SNOWFLAKE_CI_USER }} + SNOWFLAKE_CI_PASSWORD: ${{ secrets.SNOWFLAKE_CI_PASSWORD }} + SNOWFLAKE_CI_ROLE: ${{ secrets.SNOWFLAKE_CI_ROLE }} + SNOWFLAKE_CI_WAREHOUSE: ${{ secrets.SNOWFLAKE_CI_WAREHOUSE }} run: pytest -n 8 --cov=./ --cov-report=xml --verbose --color=yes sdk/python/tests --integration --durations=5 - name: Upload coverage to Codecov uses: codecov/codecov-action@v1 diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 5ff1139acba..8dd29aeb588 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -144,5 +144,60 @@ jobs: python3 setup.py sdist bdist_wheel python3 -m twine upload --verbose dist/* - # TODO(adchia): publish java sdk once maven repo is updated - # See https://github.com/feast-dev/feast-java/blob/master/.github/workflows/release.yml#L104 \ No newline at end of file + publish-python-sdk-no-telemetry: + runs-on: ubuntu-latest + env: + TWINE_USERNAME: __token__ + TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} + container: python:3.7 + steps: + - uses: actions/checkout@v2 + - name: Install pip-tools + run: pip install pip-tools + - name: Install dependencies + run: make install-python-ci-dependencies PYTHON=3.7 + - name: Publish Python Package + run: | + cd sdk/python + sed -i 's/DEFAULT_FEAST_USAGE_VALUE = "True"/DEFAULT_FEAST_USAGE_VALUE = "False"/g' feast/constants.py + sed -i 's/NAME = "feast"/NAME = "feast-no-telemetry"/g' setup.py + python3 -m pip install --user --upgrade setuptools wheel twine + python3 setup.py sdist bdist_wheel + python3 -m twine upload --verbose dist/* + + publish-java-sdk: + container: maven:3.6-jdk-11 + runs-on: ubuntu-latest + needs: get-version + steps: + - uses: actions/checkout@v2 + with: + submodules: 'true' + - name: Set up JDK 11 + uses: actions/setup-java@v1 + with: + java-version: '11' + java-package: jdk + architecture: x64 + - uses: actions/setup-python@v2 + with: + python-version: '3.7' + architecture: 'x64' + - uses: actions/cache@v2 + with: + path: ~/.m2/repository + key: ${{ runner.os }}-it-maven-${{ hashFiles('**/pom.xml') }} + restore-keys: | + ${{ runner.os }}-it-maven- + - name: Publish java sdk + env: + VERSION_WITHOUT_PREFIX: ${{ needs.get-version.outputs.version_without_prefix }} + GPG_PUBLIC_KEY: ${{ secrets.GPG_PUBLIC_KEY }} + GPG_PRIVATE_KEY: ${{ secrets.GPG_PRIVATE_KEY }} + MAVEN_SETTINGS: ${{ secrets.MAVEN_SETTINGS }} + run: | + echo -n "$GPG_PUBLIC_KEY" > /root/public-key + echo -n "$GPG_PRIVATE_KEY" > /root/private-key + mkdir -p /root/.m2/ + echo -n "$MAVEN_SETTINGS" > /root/.m2/settings.xml + infra/scripts/publish-java-sdk.sh --revision ${VERSION_WITHOUT_PREFIX} --gpg-key-import-dir /root diff --git a/.prow.yaml b/.prow.yaml index b03a71a475a..4c8372cc7c8 100644 --- a/.prow.yaml +++ b/.prow.yaml @@ -1,102 +1,4 @@ -presubmits: -- name: test-core-and-ingestion - decorate: true - spec: - containers: - - image: maven:3.6-jdk-11 - command: ["infra/scripts/test-java-core-ingestion.sh"] - resources: - requests: - cpu: "2000m" - memory: "1536Mi" - skip_branches: - - ^v0\.(3|4)-branch$ - -- name: test-core-and-ingestion-java-8 - decorate: true - always_run: true - spec: - containers: - - image: maven:3.6-jdk-8 - command: ["infra/scripts/test-java-core-ingestion.sh"] - resources: - requests: - cpu: "2000m" - memory: "1536Mi" - branches: - - ^v0\.(3|4)-branch$ - -- name: test-serving - decorate: true - spec: - containers: - - image: maven:3.6-jdk-11 - command: ["infra/scripts/test-java-serving.sh"] - skip_branches: - - ^v0\.(3|4)-branch$ - -- name: test-serving-java-8 - decorate: true - always_run: true - spec: - containers: - - image: maven:3.6-jdk-8 - command: ["infra/scripts/test-java-serving.sh"] - branches: - - ^v0\.(3|4)-branch$ - -- name: test-java-sdk - decorate: true - spec: - containers: - - image: maven:3.6-jdk-11 - command: ["infra/scripts/test-java-sdk.sh"] - skip_branches: - - ^v0\.(3|4)-branch$ - -- name: test-java-sdk-java-8 - decorate: true - always_run: true - spec: - containers: - - image: maven:3.6-jdk-8 - command: ["infra/scripts/test-java-sdk.sh"] - branches: - - ^v0\.(3|4)-branch$ - -- name: test-golang-sdk - decorate: true - spec: - containers: - - image: golang:1.13 - command: ["infra/scripts/test-golang-sdk.sh"] - postsubmits: -- name: publish-python-sdk - decorate: true - spec: - containers: - - image: python:3 - command: - - sh - - -c - - | - make package-protos && make compile-protos-python && infra/scripts/publish-python-sdk.sh \ - --directory-path sdk/python --repository pypi - volumeMounts: - - name: pypirc - mountPath: /root/.pypirc - subPath: .pypirc - readOnly: true - volumes: - - name: pypirc - secret: - secretName: pypirc - branches: - # Filter on tags with semantic versioning, prefixed with "v" - # https://github.com/semver/semver/issues/232 - - ^v(0|[1-9]\d*)\.(0|[1-9]\d*)\.(0|[1-9]\d*)(-(0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)(\.(0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*)?(\+[0-9a-zA-Z-]+(\.[0-9a-zA-Z-]+)*)?$ - - name: publish-java-sdk decorate: true spec: @@ -128,31 +30,3 @@ postsubmits: branches: # Filter on tags with semantic versioning, prefixed with "v". - ^v(0|[1-9]\d*)\.(0|[1-9]\d*)\.(0|[1-9]\d*)(-(0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)(\.(0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*)?(\+[0-9a-zA-Z-]+(\.[0-9a-zA-Z-]+)*)?$ - -- name: publish-java-8-sdk - decorate: true - spec: - containers: - - image: maven:3.6-jdk-8 - command: - - bash - - -c - - infra/scripts/publish-java-sdk.sh --revision ${PULL_BASE_REF:1} - volumeMounts: - - name: gpg-keys - mountPath: /etc/gpg - readOnly: true - - name: maven-settings - mountPath: /root/.m2/settings.xml - subPath: settings.xml - readOnly: true - volumes: - - name: gpg-keys - secret: - secretName: gpg-keys - - name: maven-settings - secret: - secretName: maven-settings - branches: - # Filter on tags with semantic versioning, prefixed with "v". v0.3 and v0.4 only. - - ^v0\.(3|4)\.(0|[1-9]\d*)(-(0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)(\.(0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*)?(\+[0-9a-zA-Z-]+(\.[0-9a-zA-Z-]+)*)?$ diff --git a/CHANGELOG.md b/CHANGELOG.md index 53514c5ad0d..bc0368cca25 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,83 @@ # Changelog +## [v0.18.0](https://github.com/feast-dev/feast/tree/v0.18.0) (2022-02-05) + +[Full Changelog](https://github.com/feast-dev/feast/compare/v0.17.0...v0.18.0) + +**Implemented enhancements:** + +- Tutorial on validation of historical features [\#2277](https://github.com/feast-dev/feast/pull/2277) ([pyalex](https://github.com/pyalex)) +- Feast plan clean up [\#2256](https://github.com/feast-dev/feast/pull/2256) ([felixwang9817](https://github.com/felixwang9817)) +- Return `UNIX\_TIMESTAMP` as Python `datetime` [\#2244](https://github.com/feast-dev/feast/pull/2244) ([judahrand](https://github.com/judahrand)) +- Validating historical features against reference dataset with "great expectations" profiler [\#2243](https://github.com/feast-dev/feast/pull/2243) ([pyalex](https://github.com/pyalex)) +- Implement feature\_store.\_apply\_diffs to handle registry and infra diffs [\#2238](https://github.com/feast-dev/feast/pull/2238) ([felixwang9817](https://github.com/felixwang9817)) +- Compare Python objects instead of proto objects [\#2227](https://github.com/feast-dev/feast/pull/2227) ([felixwang9817](https://github.com/felixwang9817)) +- Modify feature\_store.plan to produce an InfraDiff [\#2211](https://github.com/feast-dev/feast/pull/2211) ([felixwang9817](https://github.com/felixwang9817)) +- Implement diff\_infra\_protos method for feast plan [\#2204](https://github.com/feast-dev/feast/pull/2204) ([felixwang9817](https://github.com/felixwang9817)) +- Persisting results of historical retrieval [\#2197](https://github.com/feast-dev/feast/pull/2197) ([pyalex](https://github.com/pyalex)) +- Merge feast-snowflake plugin into main repo with documentation [\#2193](https://github.com/feast-dev/feast/pull/2193) ([sfc-gh-madkins](https://github.com/sfc-gh-madkins)) +- Add InfraDiff class for feast plan [\#2190](https://github.com/feast-dev/feast/pull/2190) ([felixwang9817](https://github.com/felixwang9817)) +- Use FeatureViewProjection instead of FeatureView in ODFV [\#2186](https://github.com/feast-dev/feast/pull/2186) ([judahrand](https://github.com/judahrand)) + +**Fixed bugs:** + +- Set `created\_timestamp` and `last\_updated\_timestamp` fields [\#2266](https://github.com/feast-dev/feast/pull/2266) ([judahrand](https://github.com/judahrand)) +- Use `datetime.utcnow\(\)` to avoid timezone issues [\#2265](https://github.com/feast-dev/feast/pull/2265) ([judahrand](https://github.com/judahrand)) +- Fix Redis key serialization in java feature server [\#2264](https://github.com/feast-dev/feast/pull/2264) ([pyalex](https://github.com/pyalex)) +- modify registry.db s3 object initialization to work in S3 subdirectory with Java Feast Server [\#2259](https://github.com/feast-dev/feast/pull/2259) ([NalinGHub](https://github.com/NalinGHub)) +- Add snowflake environment variables to allow testing on snowflake infra [\#2258](https://github.com/feast-dev/feast/pull/2258) ([sfc-gh-madkins](https://github.com/sfc-gh-madkins)) +- Correct inconsistent dependency [\#2255](https://github.com/feast-dev/feast/pull/2255) ([judahrand](https://github.com/judahrand)) +- Fix for historical field mappings [\#2252](https://github.com/feast-dev/feast/pull/2252) ([michelle-rascati-sp](https://github.com/michelle-rascati-sp)) +- Add backticks to left\_table\_query\_string [\#2250](https://github.com/feast-dev/feast/pull/2250) ([dmille](https://github.com/dmille)) +- Fix inference of BigQuery ARRAY types. [\#2245](https://github.com/feast-dev/feast/pull/2245) ([judahrand](https://github.com/judahrand)) +- Fix Redshift data creator [\#2242](https://github.com/feast-dev/feast/pull/2242) ([felixwang9817](https://github.com/felixwang9817)) +- Delete entity key from Redis only when all attached feature views are gone [\#2240](https://github.com/feast-dev/feast/pull/2240) ([pyalex](https://github.com/pyalex)) +- Tests for transformation service integration in java feature server [\#2236](https://github.com/feast-dev/feast/pull/2236) ([pyalex](https://github.com/pyalex)) +- Feature server helm chart produces invalid YAML [\#2234](https://github.com/feast-dev/feast/pull/2234) ([pyalex](https://github.com/pyalex)) +- Docker build fails for java feature server [\#2230](https://github.com/feast-dev/feast/pull/2230) ([pyalex](https://github.com/pyalex)) +- Fix ValueType.UNIX\_TIMESTAMP conversions [\#2219](https://github.com/feast-dev/feast/pull/2219) ([judahrand](https://github.com/judahrand)) +- Add on demand feature views deletion [\#2203](https://github.com/feast-dev/feast/pull/2203) ([corentinmarek](https://github.com/corentinmarek)) +- Compare only specs in integration tests [\#2200](https://github.com/feast-dev/feast/pull/2200) ([felixwang9817](https://github.com/felixwang9817)) +- Bump log4j-core from 2.17.0 to 2.17.1 in /java [\#2189](https://github.com/feast-dev/feast/pull/2189) ([dependabot[bot]](https://github.com/apps/dependabot)) +- Support multiple application properties files \(incl from classpath\) [\#2187](https://github.com/feast-dev/feast/pull/2187) ([pyalex](https://github.com/pyalex)) +- Avoid requesting features from OnlineStore twice [\#2185](https://github.com/feast-dev/feast/pull/2185) ([judahrand](https://github.com/judahrand)) +- Speed up Datastore deletes by batch deletions with multithreading [\#2182](https://github.com/feast-dev/feast/pull/2182) ([ptoman-pa](https://github.com/ptoman-pa)) +- Fixes large payload runtime exception in Datastore \(issue 1633\) [\#2181](https://github.com/feast-dev/feast/pull/2181) ([ptoman-pa](https://github.com/ptoman-pa)) + +**Merged pull requests:** + +- Add link to community plugin for Spark offline store [\#2279](https://github.com/feast-dev/feast/pull/2279) ([adchia](https://github.com/adchia)) +- Fix broken links on documentation [\#2278](https://github.com/feast-dev/feast/pull/2278) ([adchia](https://github.com/adchia)) +- Publish alternative python package with FEAST\_USAGE=False by default [\#2275](https://github.com/feast-dev/feast/pull/2275) ([pyalex](https://github.com/pyalex)) +- Unify all helm charts versions [\#2274](https://github.com/feast-dev/feast/pull/2274) ([pyalex](https://github.com/pyalex)) +- Fix / update helm chart workflows to push the feast python server [\#2273](https://github.com/feast-dev/feast/pull/2273) ([adchia](https://github.com/adchia)) +- Update Feast Serving documentation with ways to run and debug locally [\#2272](https://github.com/feast-dev/feast/pull/2272) ([adchia](https://github.com/adchia)) +- Fix Snowflake docs [\#2270](https://github.com/feast-dev/feast/pull/2270) ([felixwang9817](https://github.com/felixwang9817)) +- Update local-feature-server.md [\#2269](https://github.com/feast-dev/feast/pull/2269) ([tsotnet](https://github.com/tsotnet)) +- Update docs to include Snowflake/DQM and removing unused docs from old versions of Feast [\#2268](https://github.com/feast-dev/feast/pull/2268) ([adchia](https://github.com/adchia)) +- Graduate Python feature server [\#2263](https://github.com/feast-dev/feast/pull/2263) ([felixwang9817](https://github.com/felixwang9817)) +- Fix benchmark tests at HEAD by passing in Snowflake secrets [\#2262](https://github.com/feast-dev/feast/pull/2262) ([adchia](https://github.com/adchia)) +- Refactor `pa\_to\_feast\_value\_type` [\#2246](https://github.com/feast-dev/feast/pull/2246) ([judahrand](https://github.com/judahrand)) +- Allow using pandas.StringDtype to support on-demand features with STRING type [\#2229](https://github.com/feast-dev/feast/pull/2229) ([pyalex](https://github.com/pyalex)) +- Bump jackson-databind from 2.10.1 to 2.10.5.1 in /java/common [\#2228](https://github.com/feast-dev/feast/pull/2228) ([dependabot[bot]](https://github.com/apps/dependabot)) +- Split apply total parse repo [\#2226](https://github.com/feast-dev/feast/pull/2226) ([mickey-liu](https://github.com/mickey-liu)) +- Publish renamed java packages to maven central \(via Sonatype\) [\#2225](https://github.com/feast-dev/feast/pull/2225) ([pyalex](https://github.com/pyalex)) +- Make online store nullable [\#2224](https://github.com/feast-dev/feast/pull/2224) ([mirayyuce](https://github.com/mirayyuce)) +- Optimize `\_populate\_result\_rows\_from\_feature\_view` [\#2223](https://github.com/feast-dev/feast/pull/2223) ([judahrand](https://github.com/judahrand)) +- Update to newer `redis-py` [\#2221](https://github.com/feast-dev/feast/pull/2221) ([judahrand](https://github.com/judahrand)) +- Adding a local feature server test [\#2217](https://github.com/feast-dev/feast/pull/2217) ([adchia](https://github.com/adchia)) +- replace GetOnlineFeaturesResponse with GetOnlineFeaturesResponseV2 in… [\#2214](https://github.com/feast-dev/feast/pull/2214) ([tsotnet](https://github.com/tsotnet)) +- Updates to click==8.\* [\#2210](https://github.com/feast-dev/feast/pull/2210) ([diogommartins](https://github.com/diogommartins)) +- Bump protobuf-java from 3.12.2 to 3.16.1 in /java [\#2208](https://github.com/feast-dev/feast/pull/2208) ([dependabot[bot]](https://github.com/apps/dependabot)) +- Add default priority for bug reports [\#2207](https://github.com/feast-dev/feast/pull/2207) ([adchia](https://github.com/adchia)) +- Modify issue templates to automatically attach labels [\#2205](https://github.com/feast-dev/feast/pull/2205) ([adchia](https://github.com/adchia)) +- Python FeatureServer optimization [\#2202](https://github.com/feast-dev/feast/pull/2202) ([judahrand](https://github.com/judahrand)) +- Refactor all importer logic to belong in feast.importer [\#2199](https://github.com/feast-dev/feast/pull/2199) ([felixwang9817](https://github.com/felixwang9817)) +- Refactor `OnlineResponse.to\_dict\(\)` [\#2196](https://github.com/feast-dev/feast/pull/2196) ([judahrand](https://github.com/judahrand)) +- \[Java feature server\] Converge ServingService API to make Python and Java feature servers consistent [\#2166](https://github.com/feast-dev/feast/pull/2166) ([pyalex](https://github.com/pyalex)) +- Add a unit test for the tag\_proto\_objects method [\#2163](https://github.com/feast-dev/feast/pull/2163) ([achals](https://github.com/achals)) + + ## [v0.17.0](https://github.com/feast-dev/feast/tree/v0.17.0) (2021-12-31) [Full Changelog](https://github.com/feast-dev/feast/compare/v0.16.1...v0.17.0) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 6918d7f1de9..bef64577f91 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -5,8 +5,8 @@ This guide is targeted at developers looking to contribute to Feast components in the main Feast repository: - [Feast Python SDK / CLI](#feast-python-sdk-%2F-cli) +- [Feast Java Serving](#feast-java-serving) - [Feast Go Client](#feast-go-client) -- [Feast Terraform](#feast-terraform) ## Making a pull request @@ -50,7 +50,7 @@ Setting up your development environment for Feast Python SDK / CLI: 3. _Recommended:_ Create a virtual environment to isolate development dependencies to be installed ```sh # create & activate a virtual environment -python -v venv venv/ +python -m venv venv/ source venv/bin/activate ``` @@ -117,6 +117,9 @@ AWS Then run `make test-python-integration`. Note that for GCP / AWS, this will create new temporary tables / datasets. +## Feast Java Serving +See [Java contributing guide](java/CONTRIBUTING.md) + ## Feast Go Client :warning: Feast Go Client will move to its own standalone repository in the future. @@ -152,14 +155,4 @@ go vet Unit tests for the Feast Go Client can be run as follows: ```sh go test -``` - -## Feast on Kubernetes -:warning: Feast Terraform will move to its own standalone repository in the future. - -See the deployment guide of the respective cloud providers for how to work with these deployments: -- [Helm Deployment on Kubernetes](https://docs.feast.dev/feast-on-kubernetes/getting-started/install-feast/kubernetes-with-helm) -- [Terraform Deployment on Amazon EKS](https://docs.feast.dev/feast-on-kubernetes/getting-started/install-feast/kubernetes-amazon-eks-with-terraform) -- [Terraform Deployment on Azure AKS](https://docs.feast.dev/feast-on-kubernetes/getting-started/install-feast/kubernetes-azure-aks-with-terraform) -- [Terraform Deployment on Google Cloud GKE](https://docs.feast.dev/feast-on-kubernetes/getting-started/install-feast/google-cloud-gke-with-terraform) -- [Kustomize Deployment on IBM Cloud IKS or OpenShift](https://docs.feast.dev/feast-on-kubernetes/getting-started/install-feast/ibm-cloud-iks-with-kustomize) +``` \ No newline at end of file diff --git a/README.md b/README.md index 6ef49896d4c..de972225dd2 100644 --- a/README.md +++ b/README.md @@ -23,9 +23,9 @@ Feast is an open source feature store for machine learning. Feast is the fastest Please see our [documentation](https://docs.feast.dev/) for more information about the project. ## 📐 Architecture - +![](docs/assets/feast-marchitecture.png) -The above architecture is the minimal Feast deployment. Want to run the full Feast on GCP/AWS? Click [here](https://docs.feast.dev/how-to-guides/feast-gcp-aws). +The above architecture is the minimal Feast deployment. Want to run the full Feast on Snowflake/GCP/AWS? Click [here](https://docs.feast.dev/how-to-guides/feast-snowflake-gcp-aws). ## 🐣 Getting Started @@ -134,26 +134,29 @@ The list below contains the functionality that contributors are planning to deve * We welcome contribution to all items in the roadmap! * Want to influence our roadmap and prioritization? Submit your feedback to [this form](https://docs.google.com/forms/d/e/1FAIpQLSfa1nRQ0sKz-JEFnMMCi4Jseag\_yDssO\_3nV9qMfxfrkil-wA/viewform). * Want to speak to a Feast contributor? We are more than happy to jump on a call. Please schedule a time using [Calendly](https://calendly.com/d/x2ry-g5bb/meet-with-feast-team). + * **Data Sources** + * [x] [Snowflake source](https://docs.feast.dev/reference/data-sources/snowflake) * [x] [Redshift source](https://docs.feast.dev/reference/data-sources/redshift) * [x] [BigQuery source](https://docs.feast.dev/reference/data-sources/bigquery) * [x] [Parquet file source](https://docs.feast.dev/reference/data-sources/file) * [x] [Synapse source (community plugin)](https://github.com/Azure/feast-azure) * [x] [Hive (community plugin)](https://github.com/baineng/feast-hive) * [x] [Postgres (community plugin)](https://github.com/nossrannug/feast-postgres) - * [x] Kafka source (with [push support into the online store](reference/alpha-stream-ingestion.md)) - * [x] [Snowflake source (community plugin)](https://github.com/sfc-gh-madkins/feast-snowflake) + * [x] [Spark (community plugin)](https://github.com/Adyen/feast-spark-offline-store) + * [x] Kafka source (with [push support into the online store](https://docs.feast.dev/reference/alpha-stream-ingestion)) * [ ] HTTP source * **Offline Stores** + * [x] [Snowflake](https://docs.feast.dev/reference/offline-stores/snowflake) * [x] [Redshift](https://docs.feast.dev/reference/offline-stores/redshift) * [x] [BigQuery](https://docs.feast.dev/reference/offline-stores/bigquery) * [x] [Synapse (community plugin)](https://github.com/Azure/feast-azure) * [x] [Hive (community plugin)](https://github.com/baineng/feast-hive) * [x] [Postgres (community plugin)](https://github.com/nossrannug/feast-postgres) + * [x] [Trino (communiuty plugin)](https://github.com/Shopify/feast-trino) + * [x] [Spark (community plugin)](https://github.com/Adyen/feast-spark-offline-store) * [x] [In-memory / Pandas](https://docs.feast.dev/reference/offline-stores/file) * [x] [Custom offline store support](https://docs.feast.dev/how-to-guides/adding-a-new-offline-store) - * [x] [Snowflake (community plugin)](https://github.com/sfc-gh-madkins/feast-snowflake) - * [x] [Trino (communiuty plugin)](https://github.com/Shopify/feast-trino) * **Online Stores** * [x] [DynamoDB](https://docs.feast.dev/reference/online-stores/dynamodb) * [x] [Redis](https://docs.feast.dev/reference/online-stores/redis) @@ -188,7 +191,7 @@ The list below contains the functionality that contributors are planning to deve * [ ] Delete API * [ ] Feature Logging (for training) * **Data Quality Management (See [RFC](https://docs.google.com/document/d/110F72d4NTv80p35wDSONxhhPBqWRwbZXG4f9mNEMd98/edit))** - * [ ] Data profiling and validation (Great Expectations) (Planned for Q1 2022) + * [x] Data profiling and validation (Great Expectations) * [ ] Metric production * [ ] Training-serving skew detection * [ ] Drift detection @@ -196,10 +199,10 @@ The list below contains the functionality that contributors are planning to deve * [x] Python SDK for browsing feature registry * [x] CLI for browsing feature registry * [x] Model-centric feature tracking (feature services) + * [x] Amundsen integration (see [Feast extractor](https://github.com/amundsen-io/amundsen/blob/main/databuilder/databuilder/extractor/feast_extractor.py)) * [ ] REST API for browsing feature registry * [ ] Feast Web UI * [ ] Feature versioning - * [ ] Amundsen integration ## 🎓 Important Resources @@ -207,7 +210,7 @@ The list below contains the functionality that contributors are planning to deve Please refer to the official documentation at [Documentation](https://docs.feast.dev/) * [Quickstart](https://docs.feast.dev/getting-started/quickstart) * [Tutorials](https://docs.feast.dev/tutorials/tutorials-overview) - * [Running Feast with GCP/AWS](https://docs.feast.dev/how-to-guides/feast-gcp-aws) + * [Running Feast with Snowflake/GCP/AWS](https://docs.feast.dev/how-to-guides/feast-snowflake-gcp-aws) * [Change Log](https://github.com/feast-dev/feast/blob/master/CHANGELOG.md) * [Slack (#Feast)](https://slack.feast.dev/) diff --git a/docs/README.md b/docs/README.md index 1a76adbde3d..f8b9af3c32f 100644 --- a/docs/README.md +++ b/docs/README.md @@ -4,7 +4,7 @@ Feast (**Fea**ture **St**ore) is an operational data system for managing and serving machine learning features to models in production. Feast is able to serve feature data to models from a low-latency online store (for real-time prediction) or from an offline store (for scale-out batch scoring or model training). -![](.gitbook/assets/feast-marchitecture-211014.png) +![](assets/feast-marchitecture.png) ## Problems Feast Solves @@ -30,7 +30,7 @@ Feast addresses this problem by introducing feature reuse through a centralized **Feature discovery:** We also aim for Feast to include a first-class user interface for exploring and discovering entities and features. -**‌Feature validation:** We additionally aim for Feast to improve support for statistics generation of feature data and subsequent validation of these statistics. Current support is limited. +**Feature validation:** We additionally aim for Feast to improve support for statistics generation of feature data and subsequent validation of these statistics. Current support is limited. ## What Feast is not @@ -52,6 +52,6 @@ Explore the following resources to get started with Feast: * [Concepts](getting-started/concepts/) describes all important Feast API concepts * [Architecture](getting-started/architecture-and-components/) describes Feast's overall architecture. * [Tutorials](tutorials/tutorials-overview.md) shows full examples of using Feast in machine learning applications. -* [Running Feast with GCP/AWS](how-to-guides/feast-gcp-aws/) provides a more in-depth guide to using Feast. +* [Running Feast with Snowflake/GCP/AWS](how-to-guides/feast-snowflake-gcp-aws/) provides a more in-depth guide to using Feast. * [Reference](reference/feast-cli-commands.md) contains detailed API and design documents. * [Contributing](project/contributing.md) contains resources for anyone who wants to contribute to Feast. diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index 987a432ac9a..439742af9f5 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -16,10 +16,11 @@ * [Feature service](getting-started/concepts/feature-service.md) * [Feature retrieval](getting-started/concepts/feature-retrieval.md) * [Point-in-time joins](getting-started/concepts/point-in-time-joins.md) + * [Dataset](getting-started/concepts/dataset.md) * [Architecture](getting-started/architecture-and-components/README.md) * [Overview](getting-started/architecture-and-components/overview.md) * [Feature repository](getting-started/architecture-and-components/feature-repository.md) - * [Registry](getting-started/architecture-and-components/untitled.md) + * [Registry](getting-started/architecture-and-components/registry.md) * [Offline store](getting-started/architecture-and-components/offline-store.md) * [Online store](getting-started/architecture-and-components/online-store.md) * [Provider](getting-started/architecture-and-components/provider.md) @@ -32,16 +33,18 @@ * [Driver ranking](tutorials/driver-ranking-with-feast.md) * [Fraud detection on GCP](tutorials/fraud-detection.md) * [Real-time credit scoring on AWS](tutorials/real-time-credit-scoring-on-aws.md) +* [Driver stats on Snowflake](tutorials/driver-stats-on-snowflake.md) +* [Validating historical features with Great Expectations](tutorials/validating-historical-features.md) ## How-to Guides -* [Running Feast with GCP/AWS](how-to-guides/feast-gcp-aws/README.md) - * [Install Feast](how-to-guides/feast-gcp-aws/install-feast.md) - * [Create a feature repository](how-to-guides/feast-gcp-aws/create-a-feature-repository.md) - * [Deploy a feature store](how-to-guides/feast-gcp-aws/deploy-a-feature-store.md) - * [Build a training dataset](how-to-guides/feast-gcp-aws/build-a-training-dataset.md) - * [Load data into the online store](how-to-guides/feast-gcp-aws/load-data-into-the-online-store.md) - * [Read features from the online store](how-to-guides/feast-gcp-aws/read-features-from-the-online-store.md) +* [Running Feast with Snowflake/GCP/AWS](how-to-guides/feast-snowflake-gcp-aws/README.md) + * [Install Feast](how-to-guides/feast-snowflake-gcp-aws/install-feast.md) + * [Create a feature repository](how-to-guides/feast-snowflake-gcp-aws/create-a-feature-repository.md) + * [Deploy a feature store](how-to-guides/feast-snowflake-gcp-aws/deploy-a-feature-store.md) + * [Build a training dataset](how-to-guides/feast-snowflake-gcp-aws/build-a-training-dataset.md) + * [Load data into the online store](how-to-guides/feast-snowflake-gcp-aws/load-data-into-the-online-store.md) + * [Read features from the online store](how-to-guides/feast-snowflake-gcp-aws/read-features-from-the-online-store.md) * [Running Feast in production](how-to-guides/running-feast-in-production.md) * [Upgrading from Feast 0.9](https://docs.google.com/document/u/1/d/1AOsr\_baczuARjCpmZgVd8mCqTF4AZ49OEyU4Cn-uTT0/edit) * [Adding a custom provider](how-to-guides/creating-a-custom-provider.md) @@ -53,10 +56,12 @@ * [Data sources](reference/data-sources/README.md) * [File](reference/data-sources/file.md) + * [Snowflake](reference/data-sources/snowflake.md) * [BigQuery](reference/data-sources/bigquery.md) * [Redshift](reference/data-sources/redshift.md) * [Offline stores](reference/offline-stores/README.md) * [File](reference/offline-stores/file.md) + * [Snowflake](reference/offline-stores/snowflake.md) * [BigQuery](reference/offline-stores/bigquery.md) * [Redshift](reference/offline-stores/redshift.md) * [Online stores](reference/online-stores/README.md) @@ -71,9 +76,11 @@ * [Feature repository](reference/feature-repository/README.md) * [feature\_store.yaml](reference/feature-repository/feature-store-yaml.md) * [.feastignore](reference/feature-repository/feast-ignore.md) +* [Feature servers](reference/feature-servers/README.md) + * [Local feature server](reference/feature-servers/local-feature-server.md) +* [\[Alpha\] Data quality monitoring](reference/dqm.md) * [\[Alpha\] On demand feature view](reference/alpha-on-demand-feature-view.md) * [\[Alpha\] Stream ingestion](reference/alpha-stream-ingestion.md) -* [\[Alpha\] Local feature server](reference/feature-server.md) * [\[Alpha\] AWS Lambda feature server](reference/alpha-aws-lambda-feature-server.md) * [Feast CLI reference](reference/feast-cli-commands.md) * [Python API reference](http://rtd.feast.dev) diff --git a/docs/advanced/audit-logging.md b/docs/advanced/audit-logging.md deleted file mode 100644 index 1870a687bd4..00000000000 --- a/docs/advanced/audit-logging.md +++ /dev/null @@ -1,132 +0,0 @@ -# Audit Logging - -{% hint style="warning" %} -This page applies to Feast 0.7. The content may be out of date for Feast 0.8+ -{% endhint %} - -## Introduction - -Feast provides audit logging functionality in order to debug problems and to trace the lineage of events. - -## Audit Log Types - -Audit Logs produced by Feast come in three favors: - -| Audit Log Type | Description | -| :--- | :--- | -| Message Audit Log | Logs service calls that can be used to track Feast request handling. Currently only gRPC request/response is supported. Enabling Message Audit Logs can be resource intensive and significantly increase latency, as such is not recommended on Online Serving. | -| Transition Audit Log | Logs transitions in status in resources managed by Feast \(ie an Ingestion Job becoming RUNNING\). | -| Action Audit Log | Logs actions performed on a specific resource managed by Feast \(ie an Ingestion Job is aborted\). | - -## Configuration - -| Audit Log Type | Description | -| :--- | :--- | -| Message Audit Log | Enabled when both `feast.logging.audit.enabled` and `feast.logging.audit.messageLogging.enabled` is set to `true` | -| Transition Audit Log | Enabled when `feast.logging.audit.enabled` is set to `true` | -| Action Audit Log | Enabled when `feast.logging.audit.enabled` is set to `true` | - -## JSON Format - -Audit Logs produced by Feast are written to the console similar to normal logs but in a structured, machine parsable JSON. Example of a Message Audit Log JSON entry produced: - -```text -{ - "message": { - "logType": "FeastAuditLogEntry", - "kind": "MESSAGE", - "statusCode": "OK", - "request": { - "filter": { - "project": "dummy", - } - }, - "application": "Feast", - "response": {}, - "method": "ListFeatureTables", - "identity": "105960238928959148073", - "service": "CoreService", - "component": "feast-core", - "id": "45329ea9-0d48-46c5-b659-4604f6193711", - "version": "0.10.0-SNAPSHOT" - }, - "hostname": "feast.core" - "timestamp": "2020-10-20T04:45:24Z", - "severity": "INFO", -} -``` - -## Log Entry Schema - -Fields common to all Audit Log Types: - -| Field | Description | -| :--- | :--- | -| `logType` | Log Type. Always set to `FeastAuditLogEntry`. Useful for filtering out Feast audit logs. | -| `application` | Application. Always set to `Feast`. | -| `component` | Feast Component producing the Audit Log. Set to `feast-core` for Feast Core and `feast-serving` for Feast Serving. Use to filtering out Audit Logs by component. | -| `version` | Version of Feast producing this Audit Log. Use to filtering out Audit Logs by version. | - -Fields in Message Audit Log Type - -| Field | Description | -| :--- | :--- | -| `id` | Generated UUID that uniquely identifies the service call. | -| `service` | Name of the Service that handled the service call. | -| `method` | Name of the Method that handled the service call. Useful for filtering Audit Logs by method \(ie `ApplyFeatureTable` calls\) | -| `request` | Full request submitted by client in the service call as JSON. | -| `response` | Full response returned to client by the service after handling the service call as JSON. | -| `identity` | Identity of the client making the service call as an user Id. Only set when Authentication is enabled. | -| `statusCode` | The status code returned by the service handling the service call \(ie `OK` if service call handled without error\). | - -Fields in Action Audit Log Type - -| Field | Description | -| :--- | :--- | -| `action` | Name of the action taken on the resource. | -| `resource.type` | Type of resource of which the action was taken on \(i.e `FeatureTable`\) | -| resource.id | Identifier specifying the specific resource of which the action was taken on. | - -Fields in Transition Audit Log Type - -| Field | Description | -| :--- | :--- | -| `status` | The new status that the resource transitioned to | -| `resource.type` | Type of resource of which the transition occurred \(i.e `FeatureTable`\) | -| `resource.id` | Identifier specifying the specific resource of which the transition occurred. | - -## Log Forwarder - -Feast currently only supports forwarding Request/Response \(Message Audit Log Type\) logs to an external fluentD service with `feast.**` Fluentd tag. - -### Request/Response Log Example - -```text -{ - "id": "45329ea9-0d48-46c5-b659-4604f6193711", - "service": "CoreService" - "status_code": "OK", - "identity": "105960238928959148073", - "method": "ListProjects", - "request": {}, - "response": { - "projects": [ - "default", "project1", "project2" - ] - } - "release_name": 506.457.14.512 -} -``` - -### Configuration - -The Fluentd Log Forwarder configured with the with the following configuration options in `application.yml`: - -| Settings | Description | -| :--- | :--- | -| `feast.logging.audit.messageLogging.destination` | `fluentd` | -| `feast.logging.audit.messageLogging.fluentdHost` | `localhost` | -| `feast.logging.audit.messageLogging.fluentdPort` | `24224` | - -When using Fluentd as the Log forwarder, a Feast `release_name` can be logged instead of the IP address \(eg. IP of Kubernetes pod deployment\), by setting an environment variable `RELEASE_NAME` when deploying Feast. - diff --git a/docs/advanced/metrics.md b/docs/advanced/metrics.md deleted file mode 100644 index 5ea69f883f7..00000000000 --- a/docs/advanced/metrics.md +++ /dev/null @@ -1,59 +0,0 @@ -# Metrics - -{% hint style="warning" %} -This page applies to Feast 0.7. The content may be out of date for Feast 0.8+ -{% endhint %} - -## Overview - -Feast Components export metrics that can provide insight into Feast behavior: - -* [Feast Ingestion Jobs can be configured to push metrics into StatsD](metrics.md#pushing-ingestion-metrics-to-statsd) -* [Prometheus can be configured to scrape metrics from Feast Core and Serving.](metrics.md#exporting-feast-metrics-to-prometheus) - -See the [Metrics Reference ](../reference/metrics-reference.md)for documentation on metrics are exported by Feast. - -{% hint style="info" %} -Feast Job Controller currently does not export any metrics on its own. However its `application.yml` is used to configure metrics export for ingestion jobs. -{% endhint %} - -## Pushing Ingestion Metrics to StatsD - -### **Feast Ingestion Job** - -Feast Ingestion Job can be configured to push Ingestion metrics to a StatsD instance. Metrics export to StatsD for Ingestion Job is configured in Job Controller's `application.yml` under `feast.jobs.metrics` - -```yaml - feast: - jobs: - metrics: - # Enables Statd metrics export if true. - enabled: true - type: statsd - # Host and port of the StatsD instance to export to. - host: localhost - port: 9125 -``` - -{% hint style="info" %} -If you need Ingestion Metrics in Prometheus or some other metrics backend, use a metrics forwarder to forward Ingestion Metrics from StatsD to the metrics backend of choice. \(ie Use [`prometheus-statsd-exporter`](https://github.com/prometheus/statsd_exporter) to forward metrics to Prometheus\). -{% endhint %} - -## Exporting Feast Metrics to Prometheus - -### **Feast Core and Serving** - -Feast Core and Serving exports metrics to a Prometheus instance via Prometheus scraping its `/metrics` endpoint. Metrics export to Prometheus for Core and Serving can be configured via their corresponding `application.yml` - -```yaml -server: - # Configures the port where metrics are exposed via /metrics for Prometheus to scrape. - port: 8081 -``` - -[Direct Prometheus](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#scrape_config) to scrape directly from Core and Serving's `/metrics` endpoint. - -## Further Reading - -See the [Metrics Reference ](../reference/metrics-reference.md)for documentation on metrics are exported by Feast. - diff --git a/docs/advanced/security.md b/docs/advanced/security.md deleted file mode 100644 index 769260074f5..00000000000 --- a/docs/advanced/security.md +++ /dev/null @@ -1,480 +0,0 @@ ---- -description: 'Secure Feast with SSL/TLS, Authentication and Authorization.' ---- - -# Security - -{% hint style="warning" %} -This page applies to Feast 0.7. The content may be out of date for Feast 0.8+ -{% endhint %} - -### Overview - -![Overview of Feast's Security Methods.](../.gitbook/assets/untitled-25-1-%20%282%29%20%282%29%20%282%29%20%283%29%20%283%29%20%283%29%20%283%29%20%283%29%20%283%29%20%281%29%20%281%29.jpg) - -Feast supports the following security methods: - -* [SSL/TLS on messaging between Feast Core, Feast Online Serving and Feast SDKs.](security.md#2-ssl-tls) -* [Authentication to Feast Core and Serving based on Open ID Connect ID tokens.](security.md#3-authentication) -* [Authorization based on project membership and delegating authorization grants to external Authorization Server.](security.md#4-authorization) - -[Important considerations when integrating Authentication/Authorization](security.md#5-authentication-and-authorization). - -### **SSL/TLS** - -Feast supports SSL/TLS encrypted inter-service communication among Feast Core, Feast Online Serving, and Feast SDKs. - -#### Configuring SSL/TLS on Feast Core and Feast Serving - -The following properties configure SSL/TLS. These properties are located in their corresponding `application.yml`files: - -| Configuration Property | Description | -| :--- | :--- | -| `grpc.server.security.enabled` | Enables SSL/TLS functionality if `true` | -| `grpc.server.security.certificateChain` | Provide the path to certificate chain. | -| `grpc.server.security.privateKey` | Provide the to private key. | - -> Read more on enabling SSL/TLS in the[ gRPC starter docs.](https://yidongnan.github.io/grpc-spring-boot-starter/en/server/security.html#enable-transport-layer-security) - -#### Configuring SSL/TLS on Python SDK/CLI - -To enable SSL/TLS in the [Feast Python SDK](https://api.docs.feast.dev/python/#feast.client.Client) or [Feast CLI](../getting-started/connect-to-feast/feast-cli.md), set the config options via `feast config`: - -| Configuration Option | Description | -| :--- | :--- | -| `core_enable_ssl` | Enables SSL/TLS functionality on connections to Feast core if `true` | -| `serving_enable_ssl` | Enables SSL/TLS functionality on connections to Feast Online Serving if `true` | -| `core_server_ssl_cert` | Optional. Specifies the path of the root certificate used to verify Core Service's identity. If omitted, uses system certificates. | -| `serving_server_ssl_cert` | Optional. Specifies the path of the root certificate used to verify Serving Service's identity. If omitted, uses system certificates. | - -{% hint style="info" %} -The Python SDK automatically uses SSL/TLS when connecting to Feast Core and Feast Online Serving via port 443. -{% endhint %} - -#### Configuring SSL/TLS on Go SDK - -Configure SSL/TLS on the [Go SDK](https://godoc.org/github.com/feast-dev/feast/sdk/go) by passing configuration via `SecurityConfig`: - -```go -cli, err := feast.NewSecureGrpcClient("localhost", 6566, feast.SecurityConfig{ - EnableTLS: true, - TLSCertPath: "/path/to/cert.pem", -})Option -``` - -| Config Option | Description | -| :--- | :--- | -| `EnableTLS` | Enables SSL/TLS functionality when connecting to Feast if `true` | -| `TLSCertPath` | Optional. Provides the path of the root certificate used to verify Feast Service's identity. If omitted, uses system certificates. | - -#### Configuring SSL/TLS on **Java** SDK - -Configure SSL/TLS on the [Feast Java SDK](https://javadoc.io/doc/dev.feast/feast-sdk) by passing configuration via `SecurityConfig`: - -```java -FeastClient client = FeastClient.createSecure("localhost", 6566, - SecurityConfig.newBuilder() - .setTLSEnabled(true) - .setCertificatePath(Optional.of("/path/to/cert.pem")) - .build()); -``` - -| Config Option | Description | -| :--- | :--- | -| `setTLSEnabled()` | Enables SSL/TLS functionality when connecting to Feast if `true` | -| `setCertificatesPath()` | Optional. Set the path of the root certificate used to verify Feast Service's identity. If omitted, uses system certificates. | - -### **Authentication** - -{% hint style="warning" %} -To prevent man in the middle attacks, we recommend that SSL/TLS be implemented prior to authentication. -{% endhint %} - -Authentication can be implemented to identify and validate client requests to Feast Core and Feast Online Serving. Currently, Feast uses[ ](https://auth0.com/docs/protocols/openid-connect-protocol)[Open ID Connect \(OIDC\)](https://auth0.com/docs/protocols/openid-connect-protocol) ID tokens \(i.e. [Google Open ID Connect](https://developers.google.com/identity/protocols/oauth2/openid-connect)\) to authenticate client requests. - -#### Configuring Authentication in Feast Core and Feast Online Serving - -Authentication can be configured for Feast Core and Feast Online Serving via properties in their corresponding `application.yml` files: - -| Configuration Property | Description | -| :--- | :--- | -| `feast.security.authentication.enabled` | Enables Authentication functionality if `true` | -| `feast.security.authentication.provider` | Authentication Provider type. Currently only supports `jwt` | -| `feast.security.authentication.option.jwkEndpointURI` | HTTPS URL used by Feast to retrieved the [JWK](https://tools.ietf.org/html/rfc7517) used to verify OIDC ID tokens. | - -{% hint style="info" %} -`jwkEndpointURI`is set to retrieve Google's OIDC JWK by default, allowing OIDC ID tokens issued by Google to be used for authentication. -{% endhint %} - -Behind the scenes, Feast Core and Feast Online Serving authenticate by: - -* Extracting the OIDC ID token `TOKEN`from gRPC metadata submitted with request: - -```text -('authorization', 'Bearer: TOKEN') -``` - -* Validates token's authenticity using the JWK retrieved from the `jwkEndpointURI` - -#### **Authenticating Serving with Feast Core** - -Feast Online Serving communicates with Feast Core during normal operation. When both authentication and authorization are enabled on Feast Core, Feast Online Serving is forced to authenticate its requests to Feast Core. Otherwise, Feast Online Serving produces an Authentication failure error when connecting to Feast Core. - - Properties used to configure Serving authentication via `application.yml`: - -| Configuration Property | Description | -| :--- | :--- | -| `feast.core-authentication.enabled` | Requires Feast Online Serving to authenticate when communicating with Feast Core. | -| `feast.core-authentication.provider` | Selects provider Feast Online Serving uses to retrieve credentials then used to authenticate requests to Feast Core. Valid providers are `google` and `oauth`. | - -{% tabs %} -{% tab title="Google Provider" %} -Google Provider automatically extracts the credential from the credential JSON file. - -* Set [`GOOGLE_APPLICATION_CREDENTIALS` environment variable](https://cloud.google.com/docs/authentication/getting-started#setting_the_environment_variable) to the path of the credential in the JSON file. -{% endtab %} - -{% tab title="OAuth Provider" %} -OAuth Provider makes an OAuth [client credentials](https://auth0.com/docs/flows/call-your-api-using-the-client-credentials-flow) request to obtain the credential. OAuth requires the following options to be set at `feast.security.core-authentication.options.`: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Configuration PropertyDescription
oauth_url - Target URL receiving the client-credentials request.
grant_type - OAuth grant type. Set as client_credentials -
client_id - Client Id used in the client-credentials request.
client_secret - Client secret used in the client-credentials request.
audience - -

Target audience of the credential. Set to host URL of Feast Core.

-

(i.e. https://localhost if Feast Core listens on localhost).

-
jwkEndpointURI - HTTPS URL used to retrieve a JWK that can be used to decode the credential.
-{% endtab %} -{% endtabs %} - -#### **Enabling Authentication in Python SDK/CLI** - -Configure the [Feast Python SDK](https://api.docs.feast.dev/python/) and [Feast CLI](../getting-started/connect-to-feast/feast-cli.md) to use authentication via `feast config`: - -```python -$ feast config set enable_auth true -``` - -| Configuration Option | Description | -| :--- | :--- | -| `enable_auth` | Enables authentication functionality if set to `true`. | -| `auth_provider` | Use an authentication provider to obtain a credential for authentication. Currently supports `google` and `oauth`. | -| `auth_token` | Manually specify a static token for use in authentication. Overrules `auth_provider` if both are set. | - -{% tabs %} -{% tab title="Google Provider" %} -Google Provider automatically finds and uses Google Credentials to authenticate requests: - -* Google Provider automatically uses established credentials for authenticating requests if you are already authenticated with the `gcloud` CLI via: - -```text -$ gcloud auth application-default login -``` - -* Alternatively Google Provider can be configured to use the credentials in the JSON file via`GOOGLE_APPLICATION_CREDENTIALS` environmental variable \([Google Cloud Authentication documentation](https://cloud.google.com/docs/authentication/getting-started)\): - -```bash -$ export GOOGLE_APPLICATION_CREDENTIALS="path/to/key.json" -``` -{% endtab %} - -{% tab title="OAuth Provider" %} -OAuth Provider makes an OAuth [client credentials](https://auth0.com/docs/flows/call-your-api-using-the-client-credentials-flow) request to obtain the credential/token used to authenticate Feast requests. The OAuth provider requires the following config options to be set via `feast config`: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Configuration PropertyDescription
oauth_token_request_url - Target URL receiving the client-credentials request.
oauth_grant_type - OAuth grant type. Set as client_credentials -
oauth_client_id - Client Id used in the client-credentials request.
oauth_client_secret - Client secret used in the client-credentials request.
oauth_audience - -

Target audience of the credential. Set to host URL of target Service.

-

(https://localhost if Service listens on localhost).

-
-{% endtab %} -{% endtabs %} - -#### **Enabling Authentication in Go SDK** - -Configure the [Feast Java SDK](https://javadoc.io/doc/dev.feast/feast-sdk/latest/com/gojek/feast/package-summary.html) to use authentication by specifying the credential via `SecurityConfig`: - -```go -// error handling omitted. -// Use Google Credential as provider. -cred, _ := feast.NewGoogleCredential("localhost:6566") -cli, _ := feast.NewSecureGrpcClient("localhost", 6566, feast.SecurityConfig{ - // Specify the credential to provide tokens for Feast Authentication. - Credential: cred, -}) -``` - -{% tabs %} -{% tab title="Google Credential" %} -Google Credential uses Service Account credentials JSON file set via`GOOGLE_APPLICATION_CREDENTIALS` environmental variable \([Google Cloud Authentication documentation](https://cloud.google.com/docs/authentication/getting-started)\) to obtain tokens for Authenticating Feast requests: - -* Exporting `GOOGLE_APPLICATION_CREDENTIALS` - -```bash -$ export GOOGLE_APPLICATION_CREDENTIALS="path/to/key.json" -``` - -* Create a Google Credential with target audience. - -```go -cred, _ := feast.NewGoogleCredential("localhost:6566") -``` - -> Target audience of the credential should be set to host URL of target Service. \(ie `https://localhost` if Service listens on `localhost`\): -{% endtab %} - -{% tab title="OAuth Credential" %} -OAuth Credential makes an OAuth [client credentials](https://auth0.com/docs/flows/call-your-api-using-the-client-credentials-flow) request to obtain the credential/token used to authenticate Feast requests: - -* Create OAuth Credential with parameters: - -```go -cred := feast.NewOAuthCredential("localhost:6566", "client_id", "secret", "https://oauth.endpoint/auth") -``` - - - - - - - - - - - - - - - - - - - - - - - - - - -
ParameterDescription
audience - -

Target audience of the credential. Set to host URL of target Service.

-

( https://localhost if Service listens on localhost).

-
clientId - Client Id used in the client-credentials request.
clientSecret - Client secret used in the client-credentials request.
endpointURL - Target URL to make the client-credentials request to.
-{% endtab %} -{% endtabs %} - -#### **Enabling Authentication in Java SDK** - -Configure the [Feast Java SDK](https://javadoc.io/doc/dev.feast/feast-sdk/latest/com/gojek/feast/package-summary.html) to use authentication by setting credentials via `SecurityConfig`: - -```java -// Use GoogleAuthCredential as provider. -CallCredentials credentials = new GoogleAuthCredentials( - Map.of("audience", "localhost:6566")); - -FeastClient client = FeastClient.createSecure("localhost", 6566, - SecurityConfig.newBuilder() - // Specify the credentials to provide tokens for Feast Authentication. - .setCredentials(Optional.of(creds)) - .build()); -``` - -{% tabs %} -{% tab title="GoogleAuthCredentials" %} -GoogleAuthCredentials uses Service Account credentials JSON file set via`GOOGLE_APPLICATION_CREDENTIALS` environmental variable \([Google Cloud authentication documentation](https://cloud.google.com/docs/authentication/getting-started)\) to obtain tokens for Authenticating Feast requests: - -* Exporting `GOOGLE_APPLICATION_CREDENTIALS` - -```bash -$ export GOOGLE_APPLICATION_CREDENTIALS="path/to/key.json" -``` - -* Create a Google Credential with target audience. - -```java -CallCredentials credentials = new GoogleAuthCredentials( - Map.of("audience", "localhost:6566")); -``` - -> Target audience of the credentials should be set to host URL of target Service. \(ie `https://localhost` if Service listens on `localhost`\): -{% endtab %} - -{% tab title="OAuthCredentials" %} -OAuthCredentials makes an OAuth [client credentials](https://auth0.com/docs/flows/call-your-api-using-the-client-credentials-flow) request to obtain the credential/token used to authenticate Feast requests: - -* Create OAuthCredentials with parameters: - -```java -CallCredentials credentials = new OAuthCredentials(Map.of( - "audience": "localhost:6566", - "grant_type", "client_credentials", - "client_id", "some_id", - "client_id", "secret", - "oauth_url", "https://oauth.endpoint/auth", - "jwkEndpointURI", "https://jwk.endpoint/jwk")); -``` - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
ParameterDescription
audience - -

Target audience of the credential. Set to host URL of target Service.

-

( https://localhost if Service listens on localhost).

-
grant_type - OAuth grant type. Set as client_credentials -
client_id - Client Id used in the client-credentials request.
client_secret - Client secret used in the client-credentials request.
oauth_url - Target URL to make the client-credentials request to obtain credential.
jwkEndpointURI - HTTPS URL used to retrieve a JWK that can be used to decode the credential.
-{% endtab %} -{% endtabs %} - -### Authorization - -{% hint style="info" %} -Authorization requires that authentication be configured to obtain a user identity for use in authorizing requests. -{% endhint %} - -Authorization provides access control to FeatureTables and/or Features based on project membership. Users who are members of a project are authorized to: - -* Create and/or Update a Feature Table in the Project. -* Retrieve Feature Values for Features in that Project. - -#### **Authorization API/Server** - -![Feast Authorization Flow](../.gitbook/assets/rsz_untitled23%20%282%29%20%282%29%20%282%29%20%283%29%20%283%29%20%283%29%20%283%29%20%283%29%20%283%29%20%283%29.jpg) - -Feast delegates Authorization grants to an external Authorization Server that implements the [Authorization Open API specification](https://github.com/feast-dev/feast/blob/master/common/src/main/resources/api.yaml). - -* Feast checks whether a user is authorized to make a request by making a `checkAccessRequest` to the Authorization Server. -* The Authorization Server should return a `AuthorizationResult` with whether the user is allowed to make the request. - -Authorization can be configured for Feast Core and Feast Online Serving via properties in their corresponding `application.yml` - -| Configuration Property | Description | -| :--- | :--- | -| `feast.security.authorization.enabled` | Enables authorization functionality if `true`. | -| `feast.security.authorization.provider` | Authentication Provider type. Currently only supports `http` | -| `feast.security.authorization.option.authorizationUrl` | URL endpoint of Authorization Server to make check access requests to. | -| `feast.security.authorization.option.subjectClaim` | Optional. Name of the claim of the to extract from the ID Token to include in the check access request as Subject. | - -{% hint style="info" %} -This example of the [Authorization Server with Keto](https://github.com/feast-dev/feast-keto-auth-server) can be used as a reference implementation for implementing an Authorization Server that Feast supports. -{% endhint %} - -### **Authentication & Authorization** - -When using Authentication & Authorization, consider: - -* Enabling Authentication without Authorization makes authentication **optional**. You can still send unauthenticated requests. -* Enabling Authorization forces all requests to be authenticated. Requests that are not authenticated are **dropped.** - - - diff --git a/docs/advanced/troubleshooting.md b/docs/advanced/troubleshooting.md deleted file mode 100644 index 1060466d300..00000000000 --- a/docs/advanced/troubleshooting.md +++ /dev/null @@ -1,136 +0,0 @@ -# Troubleshooting - -{% hint style="warning" %} -This page applies to Feast 0.7. The content may be out of date for Feast 0.8+ -{% endhint %} - -If at any point in time you cannot resolve a problem, please see the [Community](../community.md) section for reaching out to the Feast community. - -### How can I verify that all services are operational? - -#### Docker Compose - -The containers should be in an `up` state: - -```text -docker ps -``` - -#### Google Kubernetes Engine - -All services should either be in a `RUNNING` state or `COMPLETED`state: - -```text -kubectl get pods -``` - -### How can I verify that I can connect to all services? - -First locate the the host and port of the Feast Services. - -#### **Docker Compose \(from inside the docker network\)** - -You will probably need to connect using the hostnames of services and standard Feast ports: - -```bash -export FEAST_CORE_URL=core:6565 -export FEAST_ONLINE_SERVING_URL=online_serving:6566 -export FEAST_HISTORICAL_SERVING_URL=historical_serving:6567 -export FEAST_JOBCONTROLLER_URL=jobcontroller:6570 -``` - -#### **Docker Compose \(from outside the docker network\)** - -You will probably need to connect using `localhost` and standard ports: - -```bash -export FEAST_CORE_URL=localhost:6565 -export FEAST_ONLINE_SERVING_URL=localhost:6566 -export FEAST_HISTORICAL_SERVING_URL=localhost:6567 -export FEAST_JOBCONTROLLER_URL=localhost:6570 -``` - -#### **Google Kubernetes Engine \(GKE\)** - -You will need to find the external IP of one of the nodes as well as the NodePorts. Please make sure that your firewall is open for these ports: - -```bash -export FEAST_IP=$(kubectl describe nodes | grep ExternalIP | awk '{print $2}' | head -n 1) -export FEAST_CORE_URL=${FEAST_IP}:32090 -export FEAST_ONLINE_SERVING_URL=${FEAST_IP}:32091 -export FEAST_HISTORICAL_SERVING_URL=${FEAST_IP}:32092 -``` - -`netcat`, `telnet`, or even `curl` can be used to test whether all services are available and ports are open, but `grpc_cli` is the most powerful. It can be installed from [here](https://github.com/grpc/grpc/blob/master/doc/command_line_tool.md). - -#### Testing Connectivity From Feast Services: - -Use `grpc_cli` to test connetivity by listing the gRPC methods exposed by Feast services: - -```bash -grpc_cli ls ${FEAST_CORE_URL} feast.core.CoreService -``` - -```bash -grpc_cli ls ${FEAST_JOBCONTROLLER_URL} feast.core.JobControllerService -``` - -```bash -grpc_cli ls ${FEAST_HISTORICAL_SERVING_URL} feast.serving.ServingService -``` - -```bash -grpc_cli ls ${FEAST_ONLINE_SERVING_URL} feast.serving.ServingService -``` - -### How can I print logs from the Feast Services? - -Feast will typically have three services that you need to monitor if something goes wrong. - -* Feast Core -* Feast Job Controller -* Feast Serving \(Online\) -* Feast Serving \(Batch\) - -In order to print the logs from these services, please run the commands below. - -#### Docker Compose - -Use `docker-compose logs` to obtain Feast component logs: - -```text - docker logs -f feast_core_1 -``` - -```text - docker logs -f feast_jobcontroller_1 -``` - -```text -docker logs -f feast_historical_serving_1 -``` - -```text -docker logs -f feast_online_serving_1 -``` - -#### Google Kubernetes Engine - -Use `kubectl logs` to obtain Feast component logs: - -```text -kubectl logs $(kubectl get pods | grep feast-core | awk '{print $1}') -``` - -```text -kubectl logs $(kubectl get pods | grep feast-jobcontroller | awk '{print $1}') -``` - -```text -kubectl logs $(kubectl get pods | grep feast-serving-batch | awk '{print $1}') -``` - -```text -kubectl logs $(kubectl get pods | grep feast-serving-online | awk '{print $1}') -``` - diff --git a/docs/advanced/upgrading.md b/docs/advanced/upgrading.md deleted file mode 100644 index 3c7b95d5441..00000000000 --- a/docs/advanced/upgrading.md +++ /dev/null @@ -1,113 +0,0 @@ -# Upgrading Feast - -### Migration from v0.6 to v0.7 - -#### Feast Core Validation changes - -In v0.7, Feast Core no longer accepts starting with number \(0-9\) and using dash in names for: - -* Project -* Feature Set -* Entities -* Features - -Migrate all project, feature sets, entities, feature names: - -* with ‘-’ by recreating them with '-' replace with '\_' -* recreate any names with a number \(0-9\) as the first letter to one without. - -Feast now prevents feature sets from being applied if no store is subscribed to that Feature Set. - -* Ensure that a store is configured to subscribe to the Feature Set before applying the Feature Set. - -#### Feast Core's Job Coordinator is now Feast Job Controller - -In v0.7, Feast Core's Job Coordinator has been decoupled from Feast Core and runs as a separate Feast Job Controller application. See its [Configuration reference](../reference/configuration-reference.md#2-feast-core-serving-and-job-controller) for how to configure Feast Job Controller. - -**Ingestion Job API** - -In v0.7, the following changes are made to the Ingestion Job API: - -* Changed List Ingestion Job API to return list of `FeatureSetReference` instead of list of FeatureSet in response. -* Moved `ListIngestionJobs`, `StopIngestionJob`, `RestartIngestionJob` calls from `CoreService` to `JobControllerService`. -* Python SDK/CLI: Added new [Job Controller client ](https://github.com/feast-dev/feast/blob/master/sdk/python/feast/contrib/job_controller/client.py)and `jobcontroller_url` config option. - -Users of the Ingestion Job API via gRPC should migrate by: - -* Add new client to connect to Job Controller endpoint to call `JobControllerService` and call `ListIngestionJobs`, `StopIngestionJob`, `RestartIngestionJob` from new client. -* Migrate code to accept feature references instead of feature sets returned in `ListIngestionJobs` response. - -Users of Ingestion Job via Python SDK \(ie `feast ingest-jobs list` or `client.stop_ingest_job()` etc.\) should migrate by: - -* `ingest_job()`methods only: Create a new separate [Job Controller client](https://github.com/feast-dev/feast/blob/master/sdk/python/feast/contrib/job_controller/client.py) to connect to the job controller and call `ingest_job()` methods using the new client. -* Configure the Feast Job Controller endpoint url via `jobcontroller_url` config option. - -#### Configuration Properties Changes - -* Rename `feast.jobs.consolidate-jobs-per-source property` to `feast.jobs.controller.consolidate-jobs-per-sources` -* Rename`feast.security.authorization.options.subjectClaim` to `feast.security.authentication.options.subjectClaim` -* Rename `feast.logging.audit.messageLoggingEnabled` to `feast.audit.messageLogging.enabled` - -### Migration from v0.5 to v0.6 - -#### Database schema - -In Release 0.6 we introduced [Flyway](https://flywaydb.org/) to handle schema migrations in PostgreSQL. Flyway is integrated into `core` and for now on all migrations will be run automatically on `core` start. It uses table `flyway_schema_history` in the same database \(also created automatically\) to keep track of already applied migrations. So no specific maintenance should be needed. - -If you already have existing deployment of feast 0.5 - Flyway will detect existing tables and omit first baseline migration. - -After `core` started you should have `flyway_schema_history` look like this - -```text ->> select version, description, script, checksum from flyway_schema_history - -version | description | script | checksum ---------+-----------------------------------------+-----------------------------------------+------------ - 1 | << Flyway Baseline >> | << Flyway Baseline >> | - 2 | RELEASE 0.6 Generalizing Source AND ... | V2__RELEASE_0.6_Generalizing_Source_... | 1537500232 -``` - -In this release next major schema changes were done: - -* Source is not shared between FeatureSets anymore. It's changed to 1:1 relation - - and source's primary key is now auto-incremented number. - -* Due to generalization of Source `sources.topics` & `sources.bootstrap_servers` columns were deprecated. - - They will be replaced with `sources.config`. Data migration handled by code when respected Source is used. - - `topics` and `bootstrap_servers` will be deleted in the next release. - -* Job \(table `jobs`\) is no longer connected to `Source` \(table `sources`\) since it uses consolidated source for optimization purposes. - - All data required by Job would be embedded in its table. - -New Models \(tables\): - -* feature\_statistics - -Minor changes: - -* FeatureSet has new column version \(see [proto](https://github.com/feast-dev/feast/blob/master/protos/feast/core/FeatureSet.proto) for details\) -* Connecting table `jobs_feature_sets` in many-to-many relation between jobs & feature sets - - has now `version` and `delivery_status`. - -### Migration from v0.4 to v0.6 - -#### Database - -For all versions earlier than 0.5 seamless migration is not feasible due to earlier breaking changes and creation of new database will be required. - -Since database will be empty - first \(baseline\) migration would be applied: - -```text ->> select version, description, script, checksum from flyway_schema_history - -version | description | script | checksum ---------+-----------------------------------------+-----------------------------------------+------------ - 1 | Baseline | V1__Baseline.sql | 1091472110 - 2 | RELEASE 0.6 Generalizing Source AND ... | V2__RELEASE_0.6_Generalizing_Source_... | 1537500232 -``` - diff --git a/docs/architecture.md b/docs/architecture.md deleted file mode 100644 index a2dc5cd6a8a..00000000000 --- a/docs/architecture.md +++ /dev/null @@ -1,2 +0,0 @@ -# Architecture - diff --git a/docs/architecture.png b/docs/architecture.png deleted file mode 100644 index 6d56a623605..00000000000 Binary files a/docs/architecture.png and /dev/null differ diff --git a/docs/assets/arch.png b/docs/assets/arch.png deleted file mode 100644 index bc655b60f32..00000000000 Binary files a/docs/assets/arch.png and /dev/null differ diff --git a/docs/assets/feast-components-overview.png b/docs/assets/feast-components-overview.png deleted file mode 100644 index 1f69bb7ed8e..00000000000 Binary files a/docs/assets/feast-components-overview.png and /dev/null differ diff --git a/docs/assets/feast-marchitecture.png b/docs/assets/feast-marchitecture.png new file mode 100644 index 00000000000..0a7b044b098 Binary files /dev/null and b/docs/assets/feast-marchitecture.png differ diff --git a/docs/assets/statistics-sources (1).png b/docs/assets/statistics-sources (1).png deleted file mode 100644 index 02be233968d..00000000000 Binary files a/docs/assets/statistics-sources (1).png and /dev/null differ diff --git a/docs/assets/statistics-sources (2).png b/docs/assets/statistics-sources (2).png deleted file mode 100644 index 02be233968d..00000000000 Binary files a/docs/assets/statistics-sources (2).png and /dev/null differ diff --git a/docs/assets/statistics-sources (3).png b/docs/assets/statistics-sources (3).png deleted file mode 100644 index 02be233968d..00000000000 Binary files a/docs/assets/statistics-sources (3).png and /dev/null differ diff --git a/docs/assets/statistics-sources (4).png b/docs/assets/statistics-sources (4).png deleted file mode 100644 index 02be233968d..00000000000 Binary files a/docs/assets/statistics-sources (4).png and /dev/null differ diff --git a/docs/assets/statistics-sources.png b/docs/assets/statistics-sources.png deleted file mode 100644 index 02be233968d..00000000000 Binary files a/docs/assets/statistics-sources.png and /dev/null differ diff --git a/docs/build-a-training-dataset.md b/docs/build-a-training-dataset.md deleted file mode 100644 index eff44fdf9c3..00000000000 --- a/docs/build-a-training-dataset.md +++ /dev/null @@ -1,2 +0,0 @@ -# Build a training dataset - diff --git a/docs/create-a-feature-repository.md b/docs/create-a-feature-repository.md deleted file mode 100644 index 5f781f0651d..00000000000 --- a/docs/create-a-feature-repository.md +++ /dev/null @@ -1,2 +0,0 @@ -# Create a feature repository - diff --git a/docs/deploy-a-feature-store.md b/docs/deploy-a-feature-store.md deleted file mode 100644 index 0447b0ffbfe..00000000000 --- a/docs/deploy-a-feature-store.md +++ /dev/null @@ -1,2 +0,0 @@ -# Deploy a feature store - diff --git a/docs/docs/.gitbook/assets/basic-architecture-diagram.svg b/docs/docs/.gitbook/assets/basic-architecture-diagram.svg deleted file mode 100644 index b707f490461..00000000000 --- a/docs/docs/.gitbook/assets/basic-architecture-diagram.svg +++ /dev/null @@ -1 +0,0 @@ - \ No newline at end of file diff --git a/docs/docs/.gitbook/assets/feast-docs-overview-diagram-2.svg b/docs/docs/.gitbook/assets/feast-docs-overview-diagram-2.svg deleted file mode 100644 index 7f30963ec78..00000000000 --- a/docs/docs/.gitbook/assets/feast-docs-overview-diagram-2.svg +++ /dev/null @@ -1 +0,0 @@ - \ No newline at end of file diff --git a/docs/entities.md b/docs/entities.md deleted file mode 100644 index dadeac1cac3..00000000000 --- a/docs/entities.md +++ /dev/null @@ -1,2 +0,0 @@ -# Entities - diff --git a/docs/feast-on-kubernetes/advanced-1/README.md b/docs/feast-on-kubernetes/advanced-1/README.md deleted file mode 100644 index 0fb91367c25..00000000000 --- a/docs/feast-on-kubernetes/advanced-1/README.md +++ /dev/null @@ -1,2 +0,0 @@ -# Advanced - diff --git a/docs/feast-on-kubernetes/advanced-1/audit-logging.md b/docs/feast-on-kubernetes/advanced-1/audit-logging.md deleted file mode 100644 index 1870a687bd4..00000000000 --- a/docs/feast-on-kubernetes/advanced-1/audit-logging.md +++ /dev/null @@ -1,132 +0,0 @@ -# Audit Logging - -{% hint style="warning" %} -This page applies to Feast 0.7. The content may be out of date for Feast 0.8+ -{% endhint %} - -## Introduction - -Feast provides audit logging functionality in order to debug problems and to trace the lineage of events. - -## Audit Log Types - -Audit Logs produced by Feast come in three favors: - -| Audit Log Type | Description | -| :--- | :--- | -| Message Audit Log | Logs service calls that can be used to track Feast request handling. Currently only gRPC request/response is supported. Enabling Message Audit Logs can be resource intensive and significantly increase latency, as such is not recommended on Online Serving. | -| Transition Audit Log | Logs transitions in status in resources managed by Feast \(ie an Ingestion Job becoming RUNNING\). | -| Action Audit Log | Logs actions performed on a specific resource managed by Feast \(ie an Ingestion Job is aborted\). | - -## Configuration - -| Audit Log Type | Description | -| :--- | :--- | -| Message Audit Log | Enabled when both `feast.logging.audit.enabled` and `feast.logging.audit.messageLogging.enabled` is set to `true` | -| Transition Audit Log | Enabled when `feast.logging.audit.enabled` is set to `true` | -| Action Audit Log | Enabled when `feast.logging.audit.enabled` is set to `true` | - -## JSON Format - -Audit Logs produced by Feast are written to the console similar to normal logs but in a structured, machine parsable JSON. Example of a Message Audit Log JSON entry produced: - -```text -{ - "message": { - "logType": "FeastAuditLogEntry", - "kind": "MESSAGE", - "statusCode": "OK", - "request": { - "filter": { - "project": "dummy", - } - }, - "application": "Feast", - "response": {}, - "method": "ListFeatureTables", - "identity": "105960238928959148073", - "service": "CoreService", - "component": "feast-core", - "id": "45329ea9-0d48-46c5-b659-4604f6193711", - "version": "0.10.0-SNAPSHOT" - }, - "hostname": "feast.core" - "timestamp": "2020-10-20T04:45:24Z", - "severity": "INFO", -} -``` - -## Log Entry Schema - -Fields common to all Audit Log Types: - -| Field | Description | -| :--- | :--- | -| `logType` | Log Type. Always set to `FeastAuditLogEntry`. Useful for filtering out Feast audit logs. | -| `application` | Application. Always set to `Feast`. | -| `component` | Feast Component producing the Audit Log. Set to `feast-core` for Feast Core and `feast-serving` for Feast Serving. Use to filtering out Audit Logs by component. | -| `version` | Version of Feast producing this Audit Log. Use to filtering out Audit Logs by version. | - -Fields in Message Audit Log Type - -| Field | Description | -| :--- | :--- | -| `id` | Generated UUID that uniquely identifies the service call. | -| `service` | Name of the Service that handled the service call. | -| `method` | Name of the Method that handled the service call. Useful for filtering Audit Logs by method \(ie `ApplyFeatureTable` calls\) | -| `request` | Full request submitted by client in the service call as JSON. | -| `response` | Full response returned to client by the service after handling the service call as JSON. | -| `identity` | Identity of the client making the service call as an user Id. Only set when Authentication is enabled. | -| `statusCode` | The status code returned by the service handling the service call \(ie `OK` if service call handled without error\). | - -Fields in Action Audit Log Type - -| Field | Description | -| :--- | :--- | -| `action` | Name of the action taken on the resource. | -| `resource.type` | Type of resource of which the action was taken on \(i.e `FeatureTable`\) | -| resource.id | Identifier specifying the specific resource of which the action was taken on. | - -Fields in Transition Audit Log Type - -| Field | Description | -| :--- | :--- | -| `status` | The new status that the resource transitioned to | -| `resource.type` | Type of resource of which the transition occurred \(i.e `FeatureTable`\) | -| `resource.id` | Identifier specifying the specific resource of which the transition occurred. | - -## Log Forwarder - -Feast currently only supports forwarding Request/Response \(Message Audit Log Type\) logs to an external fluentD service with `feast.**` Fluentd tag. - -### Request/Response Log Example - -```text -{ - "id": "45329ea9-0d48-46c5-b659-4604f6193711", - "service": "CoreService" - "status_code": "OK", - "identity": "105960238928959148073", - "method": "ListProjects", - "request": {}, - "response": { - "projects": [ - "default", "project1", "project2" - ] - } - "release_name": 506.457.14.512 -} -``` - -### Configuration - -The Fluentd Log Forwarder configured with the with the following configuration options in `application.yml`: - -| Settings | Description | -| :--- | :--- | -| `feast.logging.audit.messageLogging.destination` | `fluentd` | -| `feast.logging.audit.messageLogging.fluentdHost` | `localhost` | -| `feast.logging.audit.messageLogging.fluentdPort` | `24224` | - -When using Fluentd as the Log forwarder, a Feast `release_name` can be logged instead of the IP address \(eg. IP of Kubernetes pod deployment\), by setting an environment variable `RELEASE_NAME` when deploying Feast. - diff --git a/docs/feast-on-kubernetes/advanced-1/metrics.md b/docs/feast-on-kubernetes/advanced-1/metrics.md deleted file mode 100644 index 43f7b973b67..00000000000 --- a/docs/feast-on-kubernetes/advanced-1/metrics.md +++ /dev/null @@ -1,59 +0,0 @@ -# Metrics - -{% hint style="warning" %} -This page applies to Feast 0.7. The content may be out of date for Feast 0.8+ -{% endhint %} - -## Overview - -Feast Components export metrics that can provide insight into Feast behavior: - -* [Feast Ingestion Jobs can be configured to push metrics into StatsD](metrics.md#pushing-ingestion-metrics-to-statsd) -* [Prometheus can be configured to scrape metrics from Feast Core and Serving.](metrics.md#exporting-feast-metrics-to-prometheus) - -See the [Metrics Reference ](../reference-1/metrics-reference.md)for documentation on metrics are exported by Feast. - -{% hint style="info" %} -Feast Job Controller currently does not export any metrics on its own. However its `application.yml` is used to configure metrics export for ingestion jobs. -{% endhint %} - -## Pushing Ingestion Metrics to StatsD - -### **Feast Ingestion Job** - -Feast Ingestion Job can be configured to push Ingestion metrics to a StatsD instance. Metrics export to StatsD for Ingestion Job is configured in Job Controller's `application.yml` under `feast.jobs.metrics` - -```yaml - feast: - jobs: - metrics: - # Enables Statd metrics export if true. - enabled: true - type: statsd - # Host and port of the StatsD instance to export to. - host: localhost - port: 9125 -``` - -{% hint style="info" %} -If you need Ingestion Metrics in Prometheus or some other metrics backend, use a metrics forwarder to forward Ingestion Metrics from StatsD to the metrics backend of choice. \(ie Use [`prometheus-statsd-exporter`](https://github.com/prometheus/statsd_exporter) to forward metrics to Prometheus\). -{% endhint %} - -## Exporting Feast Metrics to Prometheus - -### **Feast Core and Serving** - -Feast Core and Serving exports metrics to a Prometheus instance via Prometheus scraping its `/metrics` endpoint. Metrics export to Prometheus for Core and Serving can be configured via their corresponding `application.yml` - -```yaml -server: - # Configures the port where metrics are exposed via /metrics for Prometheus to scrape. - port: 8081 -``` - -[Direct Prometheus](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#scrape_config) to scrape directly from Core and Serving's `/metrics` endpoint. - -## Further Reading - -See the [Metrics Reference ](../reference-1/metrics-reference.md)for documentation on metrics are exported by Feast. - diff --git a/docs/feast-on-kubernetes/advanced-1/security.md b/docs/feast-on-kubernetes/advanced-1/security.md deleted file mode 100644 index b6e42afd73e..00000000000 --- a/docs/feast-on-kubernetes/advanced-1/security.md +++ /dev/null @@ -1,480 +0,0 @@ ---- -description: 'Secure Feast with SSL/TLS, Authentication and Authorization.' ---- - -# Security - -{% hint style="warning" %} -This page applies to Feast 0.7. The content may be out of date for Feast 0.8+ -{% endhint %} - -### Overview - -![Overview of Feast's Security Methods.](../../.gitbook/assets/untitled-25-1-%20%282%29%20%282%29%20%282%29%20%283%29%20%283%29%20%283%29%20%283%29%20%283%29%20%283%29%20%281%29%20%284%29.jpg) - -Feast supports the following security methods: - -* [SSL/TLS on messaging between Feast Core, Feast Online Serving and Feast SDKs.](security.md#2-ssl-tls) -* [Authentication to Feast Core and Serving based on Open ID Connect ID tokens.](security.md#3-authentication) -* [Authorization based on project membership and delegating authorization grants to external Authorization Server.](security.md#4-authorization) - -[Important considerations when integrating Authentication/Authorization](security.md#5-authentication-and-authorization). - -### **SSL/TLS** - -Feast supports SSL/TLS encrypted inter-service communication among Feast Core, Feast Online Serving, and Feast SDKs. - -#### Configuring SSL/TLS on Feast Core and Feast Serving - -The following properties configure SSL/TLS. These properties are located in their corresponding `application.yml`files: - -| Configuration Property | Description | -| :--- | :--- | -| `grpc.server.security.enabled` | Enables SSL/TLS functionality if `true` | -| `grpc.server.security.certificateChain` | Provide the path to certificate chain. | -| `grpc.server.security.privateKey` | Provide the to private key. | - -> Read more on enabling SSL/TLS in the[ gRPC starter docs.](https://yidongnan.github.io/grpc-spring-boot-starter/en/server/security.html#enable-transport-layer-security) - -#### Configuring SSL/TLS on Python SDK/CLI - -To enable SSL/TLS in the [Feast Python SDK](https://api.docs.feast.dev/python/#feast.client.Client) or [Feast CLI](../getting-started/connect-to-feast/feast-cli.md), set the config options via `feast config`: - -| Configuration Option | Description | -| :--- | :--- | -| `core_enable_ssl` | Enables SSL/TLS functionality on connections to Feast core if `true` | -| `serving_enable_ssl` | Enables SSL/TLS functionality on connections to Feast Online Serving if `true` | -| `core_server_ssl_cert` | Optional. Specifies the path of the root certificate used to verify Core Service's identity. If omitted, uses system certificates. | -| `serving_server_ssl_cert` | Optional. Specifies the path of the root certificate used to verify Serving Service's identity. If omitted, uses system certificates. | - -{% hint style="info" %} -The Python SDK automatically uses SSL/TLS when connecting to Feast Core and Feast Online Serving via port 443. -{% endhint %} - -#### Configuring SSL/TLS on Go SDK - -Configure SSL/TLS on the [Go SDK](https://godoc.org/github.com/feast-dev/feast/sdk/go) by passing configuration via `SecurityConfig`: - -```go -cli, err := feast.NewSecureGrpcClient("localhost", 6566, feast.SecurityConfig{ - EnableTLS: true, - TLSCertPath: "/path/to/cert.pem", -})Option -``` - -| Config Option | Description | -| :--- | :--- | -| `EnableTLS` | Enables SSL/TLS functionality when connecting to Feast if `true` | -| `TLSCertPath` | Optional. Provides the path of the root certificate used to verify Feast Service's identity. If omitted, uses system certificates. | - -#### Configuring SSL/TLS on **Java** SDK - -Configure SSL/TLS on the [Feast Java SDK](https://javadoc.io/doc/dev.feast/feast-sdk) by passing configuration via `SecurityConfig`: - -```java -FeastClient client = FeastClient.createSecure("localhost", 6566, - SecurityConfig.newBuilder() - .setTLSEnabled(true) - .setCertificatePath(Optional.of("/path/to/cert.pem")) - .build()); -``` - -| Config Option | Description | -| :--- | :--- | -| `setTLSEnabled()` | Enables SSL/TLS functionality when connecting to Feast if `true` | -| `setCertificatesPath()` | Optional. Set the path of the root certificate used to verify Feast Service's identity. If omitted, uses system certificates. | - -### **Authentication** - -{% hint style="warning" %} -To prevent man in the middle attacks, we recommend that SSL/TLS be implemented prior to authentication. -{% endhint %} - -Authentication can be implemented to identify and validate client requests to Feast Core and Feast Online Serving. Currently, Feast uses[ ](https://auth0.com/docs/protocols/openid-connect-protocol)[Open ID Connect \(OIDC\)](https://auth0.com/docs/protocols/openid-connect-protocol) ID tokens \(i.e. [Google Open ID Connect](https://developers.google.com/identity/protocols/oauth2/openid-connect)\) to authenticate client requests. - -#### Configuring Authentication in Feast Core and Feast Online Serving - -Authentication can be configured for Feast Core and Feast Online Serving via properties in their corresponding `application.yml` files: - -| Configuration Property | Description | -| :--- | :--- | -| `feast.security.authentication.enabled` | Enables Authentication functionality if `true` | -| `feast.security.authentication.provider` | Authentication Provider type. Currently only supports `jwt` | -| `feast.security.authentication.option.jwkEndpointURI` | HTTPS URL used by Feast to retrieved the [JWK](https://tools.ietf.org/html/rfc7517) used to verify OIDC ID tokens. | - -{% hint style="info" %} -`jwkEndpointURI`is set to retrieve Google's OIDC JWK by default, allowing OIDC ID tokens issued by Google to be used for authentication. -{% endhint %} - -Behind the scenes, Feast Core and Feast Online Serving authenticate by: - -* Extracting the OIDC ID token `TOKEN`from gRPC metadata submitted with request: - -```text -('authorization', 'Bearer: TOKEN') -``` - -* Validates token's authenticity using the JWK retrieved from the `jwkEndpointURI` - -#### **Authenticating Serving with Feast Core** - -Feast Online Serving communicates with Feast Core during normal operation. When both authentication and authorization are enabled on Feast Core, Feast Online Serving is forced to authenticate its requests to Feast Core. Otherwise, Feast Online Serving produces an Authentication failure error when connecting to Feast Core. - - Properties used to configure Serving authentication via `application.yml`: - -| Configuration Property | Description | -| :--- | :--- | -| `feast.core-authentication.enabled` | Requires Feast Online Serving to authenticate when communicating with Feast Core. | -| `feast.core-authentication.provider` | Selects provider Feast Online Serving uses to retrieve credentials then used to authenticate requests to Feast Core. Valid providers are `google` and `oauth`. | - -{% tabs %} -{% tab title="Google Provider" %} -Google Provider automatically extracts the credential from the credential JSON file. - -* Set [`GOOGLE_APPLICATION_CREDENTIALS` environment variable](https://cloud.google.com/docs/authentication/getting-started#setting_the_environment_variable) to the path of the credential in the JSON file. -{% endtab %} - -{% tab title="OAuth Provider" %} -OAuth Provider makes an OAuth [client credentials](https://auth0.com/docs/flows/call-your-api-using-the-client-credentials-flow) request to obtain the credential. OAuth requires the following options to be set at `feast.security.core-authentication.options.`: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Configuration PropertyDescription
oauth_url - Target URL receiving the client-credentials request.
grant_type - OAuth grant type. Set as client_credentials -
client_id - Client Id used in the client-credentials request.
client_secret - Client secret used in the client-credentials request.
audience - -

Target audience of the credential. Set to host URL of Feast Core.

-

(i.e. https://localhost if Feast Core listens on localhost).

-
jwkEndpointURI - HTTPS URL used to retrieve a JWK that can be used to decode the credential.
-{% endtab %} -{% endtabs %} - -#### **Enabling Authentication in Python SDK/CLI** - -Configure the [Feast Python SDK](https://api.docs.feast.dev/python/) and [Feast CLI](../getting-started/connect-to-feast/feast-cli.md) to use authentication via `feast config`: - -```python -$ feast config set enable_auth true -``` - -| Configuration Option | Description | -| :--- | :--- | -| `enable_auth` | Enables authentication functionality if set to `true`. | -| `auth_provider` | Use an authentication provider to obtain a credential for authentication. Currently supports `google` and `oauth`. | -| `auth_token` | Manually specify a static token for use in authentication. Overrules `auth_provider` if both are set. | - -{% tabs %} -{% tab title="Google Provider" %} -Google Provider automatically finds and uses Google Credentials to authenticate requests: - -* Google Provider automatically uses established credentials for authenticating requests if you are already authenticated with the `gcloud` CLI via: - -```text -$ gcloud auth application-default login -``` - -* Alternatively Google Provider can be configured to use the credentials in the JSON file via`GOOGLE_APPLICATION_CREDENTIALS` environmental variable \([Google Cloud Authentication documentation](https://cloud.google.com/docs/authentication/getting-started)\): - -```bash -$ export GOOGLE_APPLICATION_CREDENTIALS="path/to/key.json" -``` -{% endtab %} - -{% tab title="OAuth Provider" %} -OAuth Provider makes an OAuth [client credentials](https://auth0.com/docs/flows/call-your-api-using-the-client-credentials-flow) request to obtain the credential/token used to authenticate Feast requests. The OAuth provider requires the following config options to be set via `feast config`: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Configuration PropertyDescription
oauth_token_request_url - Target URL receiving the client-credentials request.
oauth_grant_type - OAuth grant type. Set as client_credentials -
oauth_client_id - Client Id used in the client-credentials request.
oauth_client_secret - Client secret used in the client-credentials request.
oauth_audience - -

Target audience of the credential. Set to host URL of target Service.

-

(https://localhost if Service listens on localhost).

-
-{% endtab %} -{% endtabs %} - -#### **Enabling Authentication in Go SDK** - -Configure the [Feast Java SDK](https://javadoc.io/doc/dev.feast/feast-sdk/latest/com/gojek/feast/package-summary.html) to use authentication by specifying the credential via `SecurityConfig`: - -```go -// error handling omitted. -// Use Google Credential as provider. -cred, _ := feast.NewGoogleCredential("localhost:6566") -cli, _ := feast.NewSecureGrpcClient("localhost", 6566, feast.SecurityConfig{ - // Specify the credential to provide tokens for Feast Authentication. - Credential: cred, -}) -``` - -{% tabs %} -{% tab title="Google Credential" %} -Google Credential uses Service Account credentials JSON file set via`GOOGLE_APPLICATION_CREDENTIALS` environmental variable \([Google Cloud Authentication documentation](https://cloud.google.com/docs/authentication/getting-started)\) to obtain tokens for Authenticating Feast requests: - -* Exporting `GOOGLE_APPLICATION_CREDENTIALS` - -```bash -$ export GOOGLE_APPLICATION_CREDENTIALS="path/to/key.json" -``` - -* Create a Google Credential with target audience. - -```go -cred, _ := feast.NewGoogleCredential("localhost:6566") -``` - -> Target audience of the credential should be set to host URL of target Service. \(ie `https://localhost` if Service listens on `localhost`\): -{% endtab %} - -{% tab title="OAuth Credential" %} -OAuth Credential makes an OAuth [client credentials](https://auth0.com/docs/flows/call-your-api-using-the-client-credentials-flow) request to obtain the credential/token used to authenticate Feast requests: - -* Create OAuth Credential with parameters: - -```go -cred := feast.NewOAuthCredential("localhost:6566", "client_id", "secret", "https://oauth.endpoint/auth") -``` - - - - - - - - - - - - - - - - - - - - - - - - - - -
ParameterDescription
audience - -

Target audience of the credential. Set to host URL of target Service.

-

( https://localhost if Service listens on localhost).

-
clientId - Client Id used in the client-credentials request.
clientSecret - Client secret used in the client-credentials request.
endpointURL - Target URL to make the client-credentials request to.
-{% endtab %} -{% endtabs %} - -#### **Enabling Authentication in Java SDK** - -Configure the [Feast Java SDK](https://javadoc.io/doc/dev.feast/feast-sdk/latest/com/gojek/feast/package-summary.html) to use authentication by setting credentials via `SecurityConfig`: - -```java -// Use GoogleAuthCredential as provider. -CallCredentials credentials = new GoogleAuthCredentials( - Map.of("audience", "localhost:6566")); - -FeastClient client = FeastClient.createSecure("localhost", 6566, - SecurityConfig.newBuilder() - // Specify the credentials to provide tokens for Feast Authentication. - .setCredentials(Optional.of(creds)) - .build()); -``` - -{% tabs %} -{% tab title="GoogleAuthCredentials" %} -GoogleAuthCredentials uses Service Account credentials JSON file set via`GOOGLE_APPLICATION_CREDENTIALS` environmental variable \([Google Cloud authentication documentation](https://cloud.google.com/docs/authentication/getting-started)\) to obtain tokens for Authenticating Feast requests: - -* Exporting `GOOGLE_APPLICATION_CREDENTIALS` - -```bash -$ export GOOGLE_APPLICATION_CREDENTIALS="path/to/key.json" -``` - -* Create a Google Credential with target audience. - -```java -CallCredentials credentials = new GoogleAuthCredentials( - Map.of("audience", "localhost:6566")); -``` - -> Target audience of the credentials should be set to host URL of target Service. \(ie `https://localhost` if Service listens on `localhost`\): -{% endtab %} - -{% tab title="OAuthCredentials" %} -OAuthCredentials makes an OAuth [client credentials](https://auth0.com/docs/flows/call-your-api-using-the-client-credentials-flow) request to obtain the credential/token used to authenticate Feast requests: - -* Create OAuthCredentials with parameters: - -```java -CallCredentials credentials = new OAuthCredentials(Map.of( - "audience": "localhost:6566", - "grant_type", "client_credentials", - "client_id", "some_id", - "client_id", "secret", - "oauth_url", "https://oauth.endpoint/auth", - "jwkEndpointURI", "https://jwk.endpoint/jwk")); -``` - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
ParameterDescription
audience - -

Target audience of the credential. Set to host URL of target Service.

-

( https://localhost if Service listens on localhost).

-
grant_type - OAuth grant type. Set as client_credentials -
client_id - Client Id used in the client-credentials request.
client_secret - Client secret used in the client-credentials request.
oauth_url - Target URL to make the client-credentials request to obtain credential.
jwkEndpointURI - HTTPS URL used to retrieve a JWK that can be used to decode the credential.
-{% endtab %} -{% endtabs %} - -### Authorization - -{% hint style="info" %} -Authorization requires that authentication be configured to obtain a user identity for use in authorizing requests. -{% endhint %} - -Authorization provides access control to FeatureTables and/or Features based on project membership. Users who are members of a project are authorized to: - -* Create and/or Update a Feature Table in the Project. -* Retrieve Feature Values for Features in that Project. - -#### **Authorization API/Server** - -![Feast Authorization Flow](../../.gitbook/assets/rsz_untitled23%20%282%29%20%282%29%20%282%29%20%283%29%20%283%29%20%283%29%20%283%29%20%283%29%20%283%29%20%283%29%20%283%29%20%283%29%20%283%29%20%283%29%20%283%29%20%283%29.jpg) - -Feast delegates Authorization grants to an external Authorization Server that implements the [Authorization Open API specification](https://github.com/feast-dev/feast/blob/master/common/src/main/resources/api.yaml). - -* Feast checks whether a user is authorized to make a request by making a `checkAccessRequest` to the Authorization Server. -* The Authorization Server should return a `AuthorizationResult` with whether the user is allowed to make the request. - -Authorization can be configured for Feast Core and Feast Online Serving via properties in their corresponding `application.yml` - -| Configuration Property | Description | -| :--- | :--- | -| `feast.security.authorization.enabled` | Enables authorization functionality if `true`. | -| `feast.security.authorization.provider` | Authentication Provider type. Currently only supports `http` | -| `feast.security.authorization.option.authorizationUrl` | URL endpoint of Authorization Server to make check access requests to. | -| `feast.security.authorization.option.subjectClaim` | Optional. Name of the claim of the to extract from the ID Token to include in the check access request as Subject. | - -{% hint style="info" %} -This example of the [Authorization Server with Keto](https://github.com/feast-dev/feast-keto-auth-server) can be used as a reference implementation for implementing an Authorization Server that Feast supports. -{% endhint %} - -### **Authentication & Authorization** - -When using Authentication & Authorization, consider: - -* Enabling Authentication without Authorization makes authentication **optional**. You can still send unauthenticated requests. -* Enabling Authorization forces all requests to be authenticated. Requests that are not authenticated are **dropped.** - - - diff --git a/docs/feast-on-kubernetes/advanced-1/troubleshooting.md b/docs/feast-on-kubernetes/advanced-1/troubleshooting.md deleted file mode 100644 index 7b0224abe31..00000000000 --- a/docs/feast-on-kubernetes/advanced-1/troubleshooting.md +++ /dev/null @@ -1,136 +0,0 @@ -# Troubleshooting - -{% hint style="warning" %} -This page applies to Feast 0.7. The content may be out of date for Feast 0.8+ -{% endhint %} - -If at any point in time you cannot resolve a problem, please see the [Community](../../community.md) section for reaching out to the Feast community. - -### How can I verify that all services are operational? - -#### Docker Compose - -The containers should be in an `up` state: - -```text -docker ps -``` - -#### Google Kubernetes Engine - -All services should either be in a `RUNNING` state or `COMPLETED`state: - -```text -kubectl get pods -``` - -### How can I verify that I can connect to all services? - -First locate the the host and port of the Feast Services. - -#### **Docker Compose \(from inside the docker network\)** - -You will probably need to connect using the hostnames of services and standard Feast ports: - -```bash -export FEAST_CORE_URL=core:6565 -export FEAST_ONLINE_SERVING_URL=online_serving:6566 -export FEAST_HISTORICAL_SERVING_URL=historical_serving:6567 -export FEAST_JOBCONTROLLER_URL=jobcontroller:6570 -``` - -#### **Docker Compose \(from outside the docker network\)** - -You will probably need to connect using `localhost` and standard ports: - -```bash -export FEAST_CORE_URL=localhost:6565 -export FEAST_ONLINE_SERVING_URL=localhost:6566 -export FEAST_HISTORICAL_SERVING_URL=localhost:6567 -export FEAST_JOBCONTROLLER_URL=localhost:6570 -``` - -#### **Google Kubernetes Engine \(GKE\)** - -You will need to find the external IP of one of the nodes as well as the NodePorts. Please make sure that your firewall is open for these ports: - -```bash -export FEAST_IP=$(kubectl describe nodes | grep ExternalIP | awk '{print $2}' | head -n 1) -export FEAST_CORE_URL=${FEAST_IP}:32090 -export FEAST_ONLINE_SERVING_URL=${FEAST_IP}:32091 -export FEAST_HISTORICAL_SERVING_URL=${FEAST_IP}:32092 -``` - -`netcat`, `telnet`, or even `curl` can be used to test whether all services are available and ports are open, but `grpc_cli` is the most powerful. It can be installed from [here](https://github.com/grpc/grpc/blob/master/doc/command_line_tool.md). - -#### Testing Connectivity From Feast Services: - -Use `grpc_cli` to test connetivity by listing the gRPC methods exposed by Feast services: - -```bash -grpc_cli ls ${FEAST_CORE_URL} feast.core.CoreService -``` - -```bash -grpc_cli ls ${FEAST_JOBCONTROLLER_URL} feast.core.JobControllerService -``` - -```bash -grpc_cli ls ${FEAST_HISTORICAL_SERVING_URL} feast.serving.ServingService -``` - -```bash -grpc_cli ls ${FEAST_ONLINE_SERVING_URL} feast.serving.ServingService -``` - -### How can I print logs from the Feast Services? - -Feast will typically have three services that you need to monitor if something goes wrong. - -* Feast Core -* Feast Job Controller -* Feast Serving \(Online\) -* Feast Serving \(Batch\) - -In order to print the logs from these services, please run the commands below. - -#### Docker Compose - -Use `docker-compose logs` to obtain Feast component logs: - -```text - docker logs -f feast_core_1 -``` - -```text - docker logs -f feast_jobcontroller_1 -``` - -```text -docker logs -f feast_historical_serving_1 -``` - -```text -docker logs -f feast_online_serving_1 -``` - -#### Google Kubernetes Engine - -Use `kubectl logs` to obtain Feast component logs: - -```text -kubectl logs $(kubectl get pods | grep feast-core | awk '{print $1}') -``` - -```text -kubectl logs $(kubectl get pods | grep feast-jobcontroller | awk '{print $1}') -``` - -```text -kubectl logs $(kubectl get pods | grep feast-serving-batch | awk '{print $1}') -``` - -```text -kubectl logs $(kubectl get pods | grep feast-serving-online | awk '{print $1}') -``` - diff --git a/docs/feast-on-kubernetes/advanced-1/upgrading.md b/docs/feast-on-kubernetes/advanced-1/upgrading.md deleted file mode 100644 index 7e61d3518b1..00000000000 --- a/docs/feast-on-kubernetes/advanced-1/upgrading.md +++ /dev/null @@ -1,113 +0,0 @@ -# Upgrading Feast - -### Migration from v0.6 to v0.7 - -#### Feast Core Validation changes - -In v0.7, Feast Core no longer accepts starting with number \(0-9\) and using dash in names for: - -* Project -* Feature Set -* Entities -* Features - -Migrate all project, feature sets, entities, feature names: - -* with ‘-’ by recreating them with '-' replace with '\_' -* recreate any names with a number \(0-9\) as the first letter to one without. - -Feast now prevents feature sets from being applied if no store is subscribed to that Feature Set. - -* Ensure that a store is configured to subscribe to the Feature Set before applying the Feature Set. - -#### Feast Core's Job Coordinator is now Feast Job Controller - -In v0.7, Feast Core's Job Coordinator has been decoupled from Feast Core and runs as a separate Feast Job Controller application. See its [Configuration reference](../reference-1/configuration-reference.md#2-feast-core-serving-and-job-controller) for how to configure Feast Job Controller. - -**Ingestion Job API** - -In v0.7, the following changes are made to the Ingestion Job API: - -* Changed List Ingestion Job API to return list of `FeatureSetReference` instead of list of FeatureSet in response. -* Moved `ListIngestionJobs`, `StopIngestionJob`, `RestartIngestionJob` calls from `CoreService` to `JobControllerService`. -* Python SDK/CLI: Added new [Job Controller client ](https://github.com/feast-dev/feast/blob/master/sdk/python/feast/contrib/job_controller/client.py)and `jobcontroller_url` config option. - -Users of the Ingestion Job API via gRPC should migrate by: - -* Add new client to connect to Job Controller endpoint to call `JobControllerService` and call `ListIngestionJobs`, `StopIngestionJob`, `RestartIngestionJob` from new client. -* Migrate code to accept feature references instead of feature sets returned in `ListIngestionJobs` response. - -Users of Ingestion Job via Python SDK \(ie `feast ingest-jobs list` or `client.stop_ingest_job()` etc.\) should migrate by: - -* `ingest_job()`methods only: Create a new separate [Job Controller client](https://github.com/feast-dev/feast/blob/master/sdk/python/feast/contrib/job_controller/client.py) to connect to the job controller and call `ingest_job()` methods using the new client. -* Configure the Feast Job Controller endpoint url via `jobcontroller_url` config option. - -#### Configuration Properties Changes - -* Rename `feast.jobs.consolidate-jobs-per-source property` to `feast.jobs.controller.consolidate-jobs-per-sources` -* Rename`feast.security.authorization.options.subjectClaim` to `feast.security.authentication.options.subjectClaim` -* Rename `feast.logging.audit.messageLoggingEnabled` to `feast.audit.messageLogging.enabled` - -### Migration from v0.5 to v0.6 - -#### Database schema - -In Release 0.6 we introduced [Flyway](https://flywaydb.org/) to handle schema migrations in PostgreSQL. Flyway is integrated into `core` and for now on all migrations will be run automatically on `core` start. It uses table `flyway_schema_history` in the same database \(also created automatically\) to keep track of already applied migrations. So no specific maintenance should be needed. - -If you already have existing deployment of feast 0.5 - Flyway will detect existing tables and omit first baseline migration. - -After `core` started you should have `flyway_schema_history` look like this - -```text ->> select version, description, script, checksum from flyway_schema_history - -version | description | script | checksum ---------+-----------------------------------------+-----------------------------------------+------------ - 1 | << Flyway Baseline >> | << Flyway Baseline >> | - 2 | RELEASE 0.6 Generalizing Source AND ... | V2__RELEASE_0.6_Generalizing_Source_... | 1537500232 -``` - -In this release next major schema changes were done: - -* Source is not shared between FeatureSets anymore. It's changed to 1:1 relation - - and source's primary key is now auto-incremented number. - -* Due to generalization of Source `sources.topics` & `sources.bootstrap_servers` columns were deprecated. - - They will be replaced with `sources.config`. Data migration handled by code when respected Source is used. - - `topics` and `bootstrap_servers` will be deleted in the next release. - -* Job \(table `jobs`\) is no longer connected to `Source` \(table `sources`\) since it uses consolidated source for optimization purposes. - - All data required by Job would be embedded in its table. - -New Models \(tables\): - -* feature\_statistics - -Minor changes: - -* FeatureSet has new column version \(see [proto](https://github.com/feast-dev/feast/blob/master/protos/feast/core/FeatureSet.proto) for details\) -* Connecting table `jobs_feature_sets` in many-to-many relation between jobs & feature sets - - has now `version` and `delivery_status`. - -### Migration from v0.4 to v0.6 - -#### Database - -For all versions earlier than 0.5 seamless migration is not feasible due to earlier breaking changes and creation of new database will be required. - -Since database will be empty - first \(baseline\) migration would be applied: - -```text ->> select version, description, script, checksum from flyway_schema_history - -version | description | script | checksum ---------+-----------------------------------------+-----------------------------------------+------------ - 1 | Baseline | V1__Baseline.sql | 1091472110 - 2 | RELEASE 0.6 Generalizing Source AND ... | V2__RELEASE_0.6_Generalizing_Source_... | 1537500232 -``` - diff --git a/docs/feast-on-kubernetes/concepts/README.md b/docs/feast-on-kubernetes/concepts/README.md deleted file mode 100644 index e834417d3fa..00000000000 --- a/docs/feast-on-kubernetes/concepts/README.md +++ /dev/null @@ -1,2 +0,0 @@ -# Concepts - diff --git a/docs/feast-on-kubernetes/concepts/architecture.md b/docs/feast-on-kubernetes/concepts/architecture.md deleted file mode 100644 index f4cf23eb956..00000000000 --- a/docs/feast-on-kubernetes/concepts/architecture.md +++ /dev/null @@ -1,51 +0,0 @@ -# Architecture - -![](../../.gitbook/assets/image%20%286%29%20%283%29%20%283%29%20%283%29%20%283%29%20%283%29%20%283%29%20%283%29%20%283%29%20%283%29%20%282%29%20%281%29%20%282%29.png) - -## Sequence description - -1. **Log Raw Events:** Production backend applications are configured to emit internal state changes as events to a stream. -2. **Create Stream Features:** Stream processing systems like Flink, Spark, and Beam are used to transform and refine events and to produce features that are logged back to the stream. -3. **Log Streaming Features:** Both raw and refined events are logged into a data lake or batch storage location. -4. **Create Batch Features:** ELT/ETL systems like Spark and SQL are used to transform data in the batch store. -5. **Define and Ingest Features:** The Feast user defines [feature tables](feature-tables.md) based on the features available in batch and streaming sources and publish these definitions to Feast Core. -6. **Poll Feature Definitions:** The Feast Job Service polls for new or changed feature definitions. -7. **Start Ingestion Jobs:** Every new feature table definition results in a new ingestion job being provisioned \(see limitations\). -8. **Batch Ingestion:** Batch ingestion jobs are short-lived jobs that load data from batch sources into either an offline or online store \(see limitations\). -9. **Stream Ingestion:** Streaming ingestion jobs are long-lived jobs that load data from stream sources into online stores. A stream source and batch source on a feature table must have the same features/fields. -10. **Model Training:** A model training pipeline is launched. It uses the Feast Python SDK to retrieve a training dataset and trains a model. -11. **Get Historical Features:** Feast exports a point-in-time correct training dataset based on the list of features and entity DataFrame provided by the model training pipeline. -12. **Deploy Model:** The trained model binary \(and list of features\) are deployed into a model serving system. -13. **Get Prediction:** A backend system makes a request for a prediction from the model serving service. -14. **Retrieve Online Features:** The model serving service makes a request to the Feast Online Serving service for online features using a Feast SDK. -15. **Return Prediction:** The model serving service makes a prediction using the returned features and returns the outcome. - -{% hint style="warning" %} -Limitations - -* Only Redis is supported for online storage. -* Batch ingestion jobs must be triggered from your own scheduler like Airflow. Streaming ingestion jobs are automatically launched by the Feast Job Service. -{% endhint %} - -## Components: - -A complete Feast deployment contains the following components: - -* **Feast Core:** Acts as the central registry for feature and entity definitions in Feast. -* **Feast Job Service:** Manages data processing jobs that load data from sources into stores, and jobs that export training datasets. -* **Feast Serving:** Provides low-latency access to feature values in an online store. -* **Feast Python SDK CLI:** The primary user facing SDK. Used to: - * Manage feature definitions with Feast Core. - * Launch jobs through the Feast Job Service. - * Retrieve training datasets. - * Retrieve online features. -* **Online Store:** The online store is a database that stores only the latest feature values for each entity. The online store can be populated by either batch ingestion jobs \(in the case the user has no streaming source\), or can be populated by a streaming ingestion job from a streaming source. Feast Online Serving looks up feature values from the online store. -* **Offline Store:** The offline store persists batch data that has been ingested into Feast. This data is used for producing training datasets. -* **Feast Spark SDK:** A Spark specific Feast SDK. Allows teams to use Spark for loading features into an online store and for building training datasets over offline sources. - -Please see the [configuration reference](../reference-1/configuration-reference.md#overview) for more details on configuring these components. - -{% hint style="info" %} -Java and Go Clients are also available for online feature retrieval. See [API Reference](../reference-1/api/). -{% endhint %} - diff --git a/docs/feast-on-kubernetes/concepts/entities.md b/docs/feast-on-kubernetes/concepts/entities.md deleted file mode 100644 index e8134cf1425..00000000000 --- a/docs/feast-on-kubernetes/concepts/entities.md +++ /dev/null @@ -1,64 +0,0 @@ -# Entities - -## Overview - -An entity is any domain object that can be modeled and about which information can be stored. Entities are usually recognizable concepts, either concrete or abstract, such as persons, places, things, or events. - -Examples of entities in the context of ride-hailing and food delivery: `customer`, `order`, `driver`, `restaurant`, `dish`, `area`. - -Entities are important in the context of feature stores since features are always properties of a specific entity. For example, we could have a feature `total_trips_24h` for driver `D011234` with a feature value of `11`. - -Feast uses entities in the following way: - -* Entities serve as the keys used to look up features for producing training datasets and online feature values. -* Entities serve as a natural grouping of features in a feature table. A feature table must belong to an entity \(which could be a composite entity\) - -## Structure of an Entity - -When creating an entity specification, consider the following fields: - -* **Name**: Name of the entity -* **Description**: Description of the entity -* **Value Type**: Value type of the entity. Feast will attempt to coerce entity columns in your data sources into this type. -* **Labels**: Labels are maps that allow users to attach their own metadata to entities - -A valid entity specification is shown below: - -```python -customer = Entity( - name="customer_id", - description="Customer id for ride customer", - value_type=ValueType.INT64, - labels={} -) -``` - -## Working with an Entity - -### Creating an Entity: - -```python -# Create a customer entity -customer_entity = Entity(name="customer_id", description="ID of car customer") -client.apply(customer_entity) -``` - -### Updating an Entity: - -```python -# Update a customer entity -customer_entity = client.get_entity("customer_id") -customer_entity.description = "ID of bike customer" -client.apply(customer_entity) -``` - -Permitted changes include: - -* The entity's description and labels - -The following changes are not permitted: - -* Project -* Name of an entity -* Type - diff --git a/docs/feast-on-kubernetes/concepts/feature-tables.md b/docs/feast-on-kubernetes/concepts/feature-tables.md deleted file mode 100644 index 5b5c0efc56d..00000000000 --- a/docs/feast-on-kubernetes/concepts/feature-tables.md +++ /dev/null @@ -1,122 +0,0 @@ -# Feature Tables - -## Overview - -Feature tables are both a schema and a logical means of grouping features, data [sources](sources.md), and other related metadata. - -Feature tables serve the following purposes: - -* Feature tables are a means for defining the location and properties of data [sources](sources.md). -* Feature tables are used to create within Feast a database-level structure for the storage of feature values. -* The data sources described within feature tables allow Feast to find and ingest feature data into stores within Feast. -* Feature tables ensure data is efficiently stored during [ingestion](../user-guide/define-and-ingest-features.md) by providing a grouping mechanism of features values that occur on the same event timestamp. - -{% hint style="info" %} -Feast does not yet apply feature transformations. Transformations are currently expected to happen before data is ingested into Feast. The data sources described within feature tables should reference feature values in their already transformed form. -{% endhint %} - -### Features - -A feature is an individual measurable property observed on an entity. For example the amount of transactions \(feature\) a customer \(entity\) has completed. Features are used for both model training and scoring \(batch, online\). - -Features are defined as part of feature tables. Since Feast does not apply transformations, a feature is basically a schema that only contains a name and a type: - -```python -avg_daily_ride = Feature("average_daily_rides", ValueType.FLOAT) -``` - -Visit [FeatureSpec](https://api.docs.feast.dev/grpc/feast.core.pb.html#FeatureSpecV2) for the complete feature specification API. - -## Structure of a Feature Table - -Feature tables contain the following fields: - -* **Name:** Name of feature table. This name must be unique within a project. -* **Entities:** List of [entities](entities.md) to associate with the features defined in this feature table. Entities are used as lookup keys when retrieving features from a feature table. -* **Features:** List of features within a feature table. -* **Labels:** Labels are arbitrary key-value properties that can be defined by users. -* **Max age:** Max age affect the retrieval of features from a feature table. Age is measured as the duration of time between the event timestamp of a feature and the lookup time on an [entity key]() used to retrieve the feature. Feature values outside max age will be returned as unset values. Max age allows for eviction of keys from online stores and limits the amount of historical scanning required for historical feature values during retrieval. -* **Batch Source:** The batch data source from which Feast will ingest feature values into stores. This can either be used to back-fill stores before switching over to a streaming source, or it can be used as the primary source of data for a feature table. Visit [Sources](sources.md) to learn more about batch sources. -* **Stream Source:** The streaming data source from which you can ingest streaming feature values into Feast. Streaming sources must be paired with a batch source containing the same feature values. A streaming source is only used to populate online stores. The batch equivalent source that is paired with a streaming source is used during the generation of historical feature datasets. Visit [Sources](sources.md) to learn more about stream sources. - -Here is a ride-hailing example of a valid feature table specification: - -{% tabs %} -{% tab title="driver\_trips\_feature\_table.py" %} -```python -from feast import BigQuerySource, FeatureTable, Feature, ValueType -from google.protobuf.duration_pb2 import Duration - -driver_ft = FeatureTable( - name="driver_trips", - entities=["driver_id"], - features=[ - Feature("average_daily_rides", ValueType.FLOAT), - Feature("rating", ValueType.FLOAT) - ], - max_age=Duration(seconds=3600), - labels={ - "team": "driver_matching" - }, - batch_source=BigQuerySource( - table_ref="gcp_project:bq_dataset.bq_table", - event_timestamp_column="datetime", - created_timestamp_column="timestamp", - field_mapping={ - "rating": "driver_rating" - } - ) -) -``` -{% endtab %} -{% endtabs %} - -By default, Feast assumes that features specified in the feature-table specification corresponds one-to-one to the fields found in the sources. All features defined in a feature table should be available in the defined sources. - -Field mappings can be used to map features defined in Feast to fields as they occur in data sources. - -In the example feature-specification table above, we use field mappings to ensure the feature named `rating` in the batch source is mapped to the field named `driver_rating`. - -## Working with a Feature Table - -#### Creating a Feature Table - -```python -driver_ft = FeatureTable(...) -client.apply(driver_ft) -``` - -#### Updating a Feature Table - -```python -driver_ft = FeatureTable() - -client.apply(driver_ft) - -driver_ft.labels = {"team": "marketplace"} - -client.apply(driver_ft) -``` - -#### Feast currently supports the following changes to feature tables: - -* Adding new features. -* Removing features. -* Updating source, max age, and labels. - -{% hint style="warning" %} -Deleted features are archived, rather than removed completely. Importantly, new features cannot use the names of these deleted features. -{% endhint %} - -#### Feast currently does not support the following changes to feature tables: - -* Changes to the project or name of a feature table. -* Changes to entities related to a feature table. -* Changes to names and types of existing features. - -#### Deleting a Feature Table - -{% hint style="danger" %} -Feast currently does not support the deletion of feature tables. -{% endhint %} - diff --git a/docs/feast-on-kubernetes/concepts/overview.md b/docs/feast-on-kubernetes/concepts/overview.md deleted file mode 100644 index 461510984b3..00000000000 --- a/docs/feast-on-kubernetes/concepts/overview.md +++ /dev/null @@ -1,21 +0,0 @@ -# Overview - -### Concepts - -[Entities](entities.md) are objects in an organization like customers, transactions, and drivers, products, etc. - -[Sources](sources.md) are external sources of data where feature data can be found. - -[Feature Tables](feature-tables.md) are objects that define logical groupings of features, data sources, and other related metadata. - -### Concept Hierarchy - -![](../../.gitbook/assets/image%20%284%29%20%282%29%20%282%29%20%282%29%20%282%29%20%282%29%20%282%29%20%282%29%20%282%29%20%282%29%20%282%29%20%282%29%20%282%29%20%282%29%20%283%29%20%283%29%20%283%29%20%283%29%20%281%29.png) - -Feast contains the following core concepts: - -* **Projects:** Serve as a top level namespace for all Feast resources. Each project is a completely independent environment in Feast. Users can only work in a single project at a time. -* **Entities:** Entities are the objects in an organization on which features occur. They map to your business domain \(users, products, transactions, locations\). -* **Feature Tables:** Defines a group of features that occur on a specific entity. -* **Features:** Individual feature within a feature table. - diff --git a/docs/feast-on-kubernetes/concepts/sources.md b/docs/feast-on-kubernetes/concepts/sources.md deleted file mode 100644 index 65595d94a99..00000000000 --- a/docs/feast-on-kubernetes/concepts/sources.md +++ /dev/null @@ -1,90 +0,0 @@ -# Sources - -### Overview - -Sources are descriptions of external feature data and are registered to Feast as part of [feature tables](feature-tables.md). Once registered, Feast can ingest feature data from these sources into stores. - -Currently, Feast supports the following source types: - -#### Batch Source - -* File \(as in Spark\): Parquet \(only\). -* BigQuery - -#### Stream Source - -* Kafka -* Kinesis - -The following encodings are supported on streams - -* Avro -* Protobuf - -### Structure of a Source - -For both batch and stream sources, the following configurations are necessary: - -* **Event timestamp column**: Name of column containing timestamp when event data occurred. Used during point-in-time join of feature values to [entity timestamps](). -* **Created timestamp column**: Name of column containing timestamp when data is created. Used to deduplicate data when multiple copies of the same [entity key]() is ingested. - -Example data source specifications: - -{% tabs %} -{% tab title="batch\_sources.py" %} -```python -from feast import FileSource -from feast.data_format import ParquetFormat - -batch_file_source = FileSource( - file_format=ParquetFormat(), - file_url="file:///feast/customer.parquet", - event_timestamp_column="event_timestamp", - created_timestamp_column="created_timestamp", -) -``` -{% endtab %} - -{% tab title="stream\_sources.py" %} -```python -from feast import KafkaSource -from feast.data_format import ProtoFormat - -stream_kafka_source = KafkaSource( - bootstrap_servers="localhost:9094", - message_format=ProtoFormat(class_path="class.path"), - topic="driver_trips", - event_timestamp_column="event_timestamp", - created_timestamp_column="created_timestamp", -) -``` -{% endtab %} -{% endtabs %} - -The [Feast Python API documentation](https://api.docs.feast.dev/python/) provides more information about options to specify for the above sources. - -### Working with a Source - -#### Creating a Source - -Sources are defined as part of [feature tables](feature-tables.md): - -```python -batch_bigquery_source = BigQuerySource( - table_ref="gcp_project:bq_dataset.bq_table", - event_timestamp_column="event_timestamp", - created_timestamp_column="created_timestamp", -) - -stream_kinesis_source = KinesisSource( - bootstrap_servers="localhost:9094", - record_format=ProtoFormat(class_path="class.path"), - region="us-east-1", - stream_name="driver_trips", - event_timestamp_column="event_timestamp", - created_timestamp_column="created_timestamp", -) -``` - -Feast ensures that the source complies with the schema of the feature table. These specified data sources can then be included inside a feature table specification and registered to Feast Core. - diff --git a/docs/feast-on-kubernetes/concepts/stores.md b/docs/feast-on-kubernetes/concepts/stores.md deleted file mode 100644 index 59deac0a6a6..00000000000 --- a/docs/feast-on-kubernetes/concepts/stores.md +++ /dev/null @@ -1,20 +0,0 @@ -# Stores - -In Feast, a store is a database that is populated with feature data that will ultimately be served to models. - -## Offline \(Historical\) Store - -The offline store maintains historical copies of feature values. These features are grouped and stored in feature tables. During retrieval of historical data, features are queries from these feature tables in order to produce training datasets. - -## Online Store - -The online store maintains only the latest values for a specific feature. - -* Feature values are stored based on their [entity keys]() -* Feast currently supports Redis as an online store. -* Online stores are meant for very high throughput writes from ingestion jobs and very low latency access to features during online serving. - -{% hint style="info" %} -Feast only supports a single online store in production -{% endhint %} - diff --git a/docs/feast-on-kubernetes/getting-started/README.md b/docs/feast-on-kubernetes/getting-started/README.md deleted file mode 100644 index b9423182feb..00000000000 --- a/docs/feast-on-kubernetes/getting-started/README.md +++ /dev/null @@ -1,24 +0,0 @@ -# Getting started - -{% hint style="danger" %} -Feast on Kubernetes is only supported using Feast 0.9 \(and below\). We are working to add support for Feast on Kubernetes with the latest release of Feast. Please see our [roadmap](../../roadmap.md) for more details. -{% endhint %} - -### Install Feast - -If you would like to deploy a new installation of Feast, click on [Install Feast](install-feast/) - -{% page-ref page="install-feast/" %} - -### Connect to Feast - -If you would like to connect to an existing Feast deployment, click on [Connect to Feast](connect-to-feast/) - -{% page-ref page="connect-to-feast/" %} - -### Learn Feast - -If you would like to learn more about Feast, click on [Learn Feast](learn-feast.md) - -{% page-ref page="learn-feast.md" %} - diff --git a/docs/feast-on-kubernetes/getting-started/connect-to-feast/README.md b/docs/feast-on-kubernetes/getting-started/connect-to-feast/README.md deleted file mode 100644 index 4333359f902..00000000000 --- a/docs/feast-on-kubernetes/getting-started/connect-to-feast/README.md +++ /dev/null @@ -1,31 +0,0 @@ -# Connect to Feast - -### Feast Python SDK - -The Feast Python SDK is used as a library to interact with a Feast deployment. - -* Define, register, and manage entities and features -* Ingest data into Feast -* Build and retrieve training datasets -* Retrieve online features - -{% page-ref page="python-sdk.md" %} - -### Feast CLI - -The Feast CLI is a command line implementation of the Feast Python SDK. - -* Define, register, and manage entities and features from the terminal -* Ingest data into Feast -* Manage ingestion jobs - -{% page-ref page="feast-cli.md" %} - -### Online Serving Clients - -The following clients can be used to retrieve online feature values: - -* [Feast Python SDK](https://api.docs.feast.dev/python/) -* [Feast Go SDK](https://godoc.org/github.com/feast-dev/feast/sdk/go) -* [Feast Java SDK](https://javadoc.io/doc/dev.feast/feast-sdk) - diff --git a/docs/feast-on-kubernetes/getting-started/connect-to-feast/feast-cli.md b/docs/feast-on-kubernetes/getting-started/connect-to-feast/feast-cli.md deleted file mode 100644 index 47471b84717..00000000000 --- a/docs/feast-on-kubernetes/getting-started/connect-to-feast/feast-cli.md +++ /dev/null @@ -1,37 +0,0 @@ -# Feast CLI - -Install the Feast CLI using pip: - -```bash -pip install feast==0.9.* -``` - -Configure the CLI to connect to your Feast Core deployment: - -```text -feast config set core_url your.feast.deployment -``` - -{% hint style="info" %} -By default, all configuration is stored in `~/.feast/config` -{% endhint %} - -The CLI is a wrapper around the [Feast Python SDK](python-sdk.md): - -```aspnet -$ feast - -Usage: feast [OPTIONS] COMMAND [ARGS]... - -Options: - --help Show this message and exit. - -Commands: - config View and edit Feast properties - entities Create and manage entities - feature-tables Create and manage feature tables - jobs Create and manage jobs - projects Create and manage projects - version Displays version and connectivity information -``` - diff --git a/docs/feast-on-kubernetes/getting-started/connect-to-feast/python-sdk.md b/docs/feast-on-kubernetes/getting-started/connect-to-feast/python-sdk.md deleted file mode 100644 index 3e7c86880e5..00000000000 --- a/docs/feast-on-kubernetes/getting-started/connect-to-feast/python-sdk.md +++ /dev/null @@ -1,20 +0,0 @@ -# Python SDK - -Install the [Feast Python SDK](https://api.docs.feast.dev/python/) using pip: - -```bash -pip install feast==0.9.* -``` - -Connect to an existing Feast Core deployment: - -```python -from feast import Client - -# Connect to an existing Feast Core deployment -client = Client(core_url='feast.example.com:6565') - -# Ensure that your client is connected by printing out some feature tables -client.list_feature_tables() -``` - diff --git a/docs/feast-on-kubernetes/getting-started/install-feast/README.md b/docs/feast-on-kubernetes/getting-started/install-feast/README.md deleted file mode 100644 index 0b77ab431a0..00000000000 --- a/docs/feast-on-kubernetes/getting-started/install-feast/README.md +++ /dev/null @@ -1,40 +0,0 @@ -# Install Feast - -A production deployment of Feast is deployed using Kubernetes. - -## Kubernetes \(with Helm\) - -This guide installs Feast into an existing Kubernetes cluster using Helm. The installation is not specific to any cloud platform or environment, but requires Kubernetes and Helm. - -{% page-ref page="kubernetes-with-helm.md" %} - -## Amazon EKS \(with Terraform\) - -This guide installs Feast into an AWS environment using Terraform. The Terraform script is opinionated and intended to allow you to start quickly. - -{% page-ref page="kubernetes-amazon-eks-with-terraform.md" %} - -## Azure AKS \(with Helm\) - -This guide installs Feast into an Azure AKS environment with Helm. - -{% page-ref page="kubernetes-azure-aks-with-helm.md" %} - -## Azure AKS \(with Terraform\) - -This guide installs Feast into an Azure environment using Terraform. The Terraform script is opinionated and intended to allow you to start quickly. - -{% page-ref page="kubernetes-azure-aks-with-terraform.md" %} - -## Google Cloud GKE \(with Terraform\) - -This guide installs Feast into a Google Cloud environment using Terraform. The Terraform script is opinionated and intended to allow you to start quickly. - -{% page-ref page="google-cloud-gke-with-terraform.md" %} - -## IBM Cloud Kubernetes Service \(IKS\) and Red Hat OpenShift \(using Kustomize\) - -This guide installs Feast into an existing [IBM Cloud Kubernetes Service](https://www.ibm.com/cloud/kubernetes-service) or [Red Hat OpenShift on IBM Cloud](https://www.ibm.com/cloud/openshift) using Kustomize. - -{% page-ref page="ibm-cloud-iks-with-kustomize.md" %} - diff --git a/docs/feast-on-kubernetes/getting-started/install-feast/google-cloud-gke-with-terraform.md b/docs/feast-on-kubernetes/getting-started/install-feast/google-cloud-gke-with-terraform.md deleted file mode 100644 index a3252cf0bbb..00000000000 --- a/docs/feast-on-kubernetes/getting-started/install-feast/google-cloud-gke-with-terraform.md +++ /dev/null @@ -1,52 +0,0 @@ -# Google Cloud GKE \(with Terraform\) - -### Overview - -This guide installs Feast on GKE using our [reference Terraform configuration](https://github.com/feast-dev/feast/tree/master/infra/terraform/gcp). - -{% hint style="info" %} -The Terraform configuration used here is a greenfield installation that neither assumes anything about, nor integrates with, existing resources in your GCP account. The Terraform configuration presents an easy way to get started, but you may want to customize this set up before using Feast in production. -{% endhint %} - -This Terraform configuration creates the following resources: - -* GKE cluster -* Feast services running on GKE -* Google Memorystore \(Redis\) as online store -* Dataproc cluster -* Kafka running on GKE, exposed to the dataproc cluster via internal load balancer - -### 1. Requirements - -* Install [Terraform](https://www.terraform.io/) > = 0.12 \(tested with 0.13.3\) -* Install [Helm](https://helm.sh/docs/intro/install/) \(tested with v3.3.4\) -* GCP [authentication](https://cloud.google.com/docs/authentication) and sufficient [privilege](https://cloud.google.com/iam/docs/understanding-roles) to create the resources listed above. - -### 2. Configure Terraform - -Create a `.tfvars` file under`feast/infra/terraform/gcp`. Name the file. In our example, we use `my_feast.tfvars`. You can see the full list of configuration variables in `variables.tf`. Sample configurations are provided below: - -{% code title="my\_feast.tfvars" %} -```typescript -gcp_project_name = "kf-feast" -name_prefix = "feast-0-8" -region = "asia-east1" -gke_machine_type = "n1-standard-2" -network = "default" -subnetwork = "default" -dataproc_staging_bucket = "feast-dataproc" -``` -{% endcode %} - -### 3. Apply - -After completing the configuration, initialize Terraform and apply: - -```bash -$ cd feast/infra/terraform/gcp -$ terraform init -$ terraform apply -var-file=my_feast.tfvars -``` - - - diff --git a/docs/feast-on-kubernetes/getting-started/install-feast/ibm-cloud-iks-with-kustomize.md b/docs/feast-on-kubernetes/getting-started/install-feast/ibm-cloud-iks-with-kustomize.md deleted file mode 100644 index 0abca57b6de..00000000000 --- a/docs/feast-on-kubernetes/getting-started/install-feast/ibm-cloud-iks-with-kustomize.md +++ /dev/null @@ -1,193 +0,0 @@ -# IBM Cloud Kubernetes Service \(IKS\) and Red Hat OpenShift \(with Kustomize\) - -## Overview - -This guide installs Feast on an existing IBM Cloud Kubernetes cluster or Red Hat OpenShift on IBM Cloud , and ensures the following services are running: - -* Feast Core -* Feast Online Serving -* Postgres -* Redis -* Kafka \(Optional\) -* Feast Jupyter \(Optional\) -* Prometheus \(Optional\) - -## 1. Prerequisites - -1. [IBM Cloud Kubernetes Service](https://www.ibm.com/cloud/kubernetes-service) or [Red Hat OpenShift on IBM Cloud](https://www.ibm.com/cloud/openshift) -2. Install [Kubectl](https://cloud.ibm.com/docs/containers?topic=containers-cs_cli_install#kubectl) that matches the major.minor versions of your IKS or Install the [OpenShift CLI](https://cloud.ibm.com/docs/openshift?topic=openshift-openshift-cli#cli_oc) that matches your local operating system and OpenShift cluster version. -3. Install [Helm 3](https://helm.sh/) -4. Install [Kustomize](https://kubectl.docs.kubernetes.io/installation/kustomize/) - -## 2. Preparation - -### IBM Cloud Block Storage Setup \(IKS only\) - -:warning: If you have Red Hat OpenShift Cluster on IBM Cloud skip to this [section](ibm-cloud-iks-with-kustomize.md#Security-Context-Constraint-Setup). - -By default, IBM Cloud Kubernetes cluster uses [IBM Cloud File Storage](https://www.ibm.com/cloud/file-storage) based on NFS as the default storage class, and non-root users do not have write permission on the volume mount path for NFS-backed storage. Some common container images in Feast, such as Redis, Postgres, and Kafka specify a non-root user to access the mount path in the images. When containers are deployed using these images, the containers fail to start due to insufficient permissions of the non-root user creating folders on the mount path. - -[IBM Cloud Block Storage](https://www.ibm.com/cloud/block-storage) allows for the creation of raw storage volumes and provides faster performance without the permission restriction of NFS-backed storage - -Therefore, to deploy Feast we need to set up [IBM Cloud Block Storage](https://cloud.ibm.com/docs/containers?topic=containers-block_storage#install_block) as the default storage class so that you can have all the functionalities working and get the best experience from Feast. - -1. [Follow the instructions](https://helm.sh/docs/intro/install/) to install the Helm version 3 client on your local machine. -2. Add the IBM Cloud Helm chart repository to the cluster where you want to use the IBM Cloud Block Storage plug-in. - - ```text - helm repo add iks-charts https://icr.io/helm/iks-charts - helm repo update - ``` - -3. Install the IBM Cloud Block Storage plug-in. When you install the plug-in, pre-defined block storage classes are added to your cluster. - - ```text - helm install v2.0.2 iks-charts/ibmcloud-block-storage-plugin -n kube-system - ``` - - Example output: - - ```text - NAME: v2.0.2 - LAST DEPLOYED: Fri Feb 5 12:29:50 2021 - NAMESPACE: kube-system - STATUS: deployed - REVISION: 1 - NOTES: - Thank you for installing: ibmcloud-block-storage-plugin. Your release is named: v2.0.2 - ... - ``` - -4. Verify that all block storage plugin pods are in a "Running" state. - - ```text - kubectl get pods -n kube-system | grep ibmcloud-block-storage - ``` - -5. Verify that the storage classes for Block Storage were added to your cluster. - - ```text - kubectl get storageclasses | grep ibmc-block - ``` - -6. Set the Block Storage as the default storageclass. - - ```text - kubectl patch storageclass ibmc-block-gold -p '{"metadata": {"annotations":{"storageclass.kubernetes.io/is-default-class":"true"}}}' - kubectl patch storageclass ibmc-file-gold -p '{"metadata": {"annotations":{"storageclass.kubernetes.io/is-default-class":"false"}}}' - - # Check the default storageclass is block storage - kubectl get storageclass | grep \(default\) - ``` - - Example output: - - ```text - ibmc-block-gold (default) ibm.io/ibmc-block 65s - ``` - - **Security Context Constraint Setup \(OpenShift only\)** - -By default, in OpenShift, all pods or containers will use the [Restricted SCC](https://docs.openshift.com/container-platform/4.6/authentication/managing-security-context-constraints.html) which limits the UIDs pods can run with, causing the Feast installation to fail. To overcome this, you can allow Feast pods to run with any UID by executing the following: - -```text -oc adm policy add-scc-to-user anyuid -z default,kf-feast-kafka -n feast -``` - -## 3. Installation - -Install Feast using kustomize. The pods may take a few minutes to initialize. - -```bash -git clone https://github.com/kubeflow/manifests -cd manifests/contrib/feast/ -kustomize build feast/base | kubectl apply -n feast -f - -``` - -### Optional: Enable Feast Jupyter and Kafka - -You may optionally enable the Feast Jupyter component which contains code examples to demonstrate Feast. Some examples require Kafka to stream real time features to the Feast online serving. To enable, edit the following properties in the `values.yaml` under the `manifests/contrib/feast` folder: - -```text -kafka.enabled: true -feast-jupyter.enabled: true -``` - -Then regenerate the resource manifests and deploy: - -```text -make feast/base -kustomize build feast/base | kubectl apply -n feast -f - -``` - -## 4. Use Feast Jupyter Notebook Server to connect to Feast - -After all the pods are in a `RUNNING` state, port-forward to the Jupyter Notebook Server in the cluster: - -```bash -kubectl port-forward \ -$(kubectl get pod -l app=feast-jupyter -o custom-columns=:metadata.name) 8888:8888 -n feast -``` - -```text -Forwarding from 127.0.0.1:8888 -> 8888 -Forwarding from [::1]:8888 -> 8888 -``` - -You can now connect to the bundled Jupyter Notebook Server at `localhost:8888` and follow the example Jupyter notebook. - -{% embed url="http://localhost:8888/tree?" caption="" %} - -## 5. Uninstall Feast - -```text -kustomize build feast/base | kubectl delete -n feast -f - -``` - -## 6. Troubleshooting - -When running the minimal\_ride\_hailing\_example Jupyter Notebook example the following errors may occur: - -1. When running `job = client.get_historical_features(...)`: - - ```text - KeyError: 'historical_feature_output_location' - ``` - - or - - ```text - KeyError: 'spark_staging_location' - ``` - - Add the following environment variable: - - ```text - os.environ["FEAST_HISTORICAL_FEATURE_OUTPUT_LOCATION"] = "file:///home/jovyan/historical_feature_output" - os.environ["FEAST_SPARK_STAGING_LOCATION"] = "file:///home/jovyan/test_data" - ``` - -2. When running `job.get_status()` - - ```text - - ``` - - Add the following environment variable: - - ```text - os.environ["FEAST_REDIS_HOST"] = "feast-release-redis-master" - ``` - -3. When running `job = client.start_stream_to_online_ingestion(...)` - - ```text - org.apache.kafka.vendor.common.KafkaException: Failed to construct kafka consumer - ``` - - Add the following environment variable: - - ```text - os.environ["DEMO_KAFKA_BROKERS"] = "feast-release-kafka:9092" - ``` - diff --git a/docs/feast-on-kubernetes/getting-started/install-feast/kubernetes-amazon-eks-with-terraform.md b/docs/feast-on-kubernetes/getting-started/install-feast/kubernetes-amazon-eks-with-terraform.md deleted file mode 100644 index d03d7fb863e..00000000000 --- a/docs/feast-on-kubernetes/getting-started/install-feast/kubernetes-amazon-eks-with-terraform.md +++ /dev/null @@ -1,68 +0,0 @@ -# Amazon EKS \(with Terraform\) - -### Overview - -This guide installs Feast on AWS using our [reference Terraform configuration](https://github.com/feast-dev/feast/tree/master/infra/terraform/aws). - -{% hint style="info" %} -The Terraform configuration used here is a greenfield installation that neither assumes anything about, nor integrates with, existing resources in your AWS account. The Terraform configuration presents an easy way to get started, but you may want to customize this set up before using Feast in production. -{% endhint %} - -This Terraform configuration creates the following resources: - -* Kubernetes cluster on Amazon EKS \(3x r3.large nodes\) -* Kafka managed by Amazon MSK \(2x kafka.t3.small nodes\) -* Postgres database for Feast metadata, using serverless Aurora \(min capacity: 2\) -* Redis cluster, using Amazon Elasticache \(1x cache.t2.micro\) -* Amazon EMR cluster to run Spark \(3x spot m4.xlarge\) -* Staging S3 bucket to store temporary data - -![](../../../.gitbook/assets/feast-on-aws-3-%20%282%29%20%282%29%20%282%29%20%282%29%20%282%29%20%282%29%20%282%29%20%282%29%20%282%29%20%282%29%20%282%29%20%283%29.png) - -### 1. Requirements - -* Create an AWS account and [configure credentials locally](https://docs.aws.amazon.com/cli/latest/userguide/cli-chap-configure.html) -* Install [Terraform](https://www.terraform.io/) > = 0.12 \(tested with 0.13.3\) -* Install [Helm](https://helm.sh/docs/intro/install/) \(tested with v3.3.4\) - -### 2. Configure Terraform - -Create a `.tfvars` file under`feast/infra/terraform/aws`. Name the file. In our example, we use `my_feast.tfvars`. You can see the full list of configuration variables in `variables.tf`. At a minimum, you need to set `name_prefix` and an AWS region: - -{% code title="my\_feast.tfvars" %} -```typescript -name_prefix = "my-feast" -region = "us-east-1" -``` -{% endcode %} - -### 3. Apply - -After completing the configuration, initialize Terraform and apply: - -```bash -$ cd feast/infra/terraform/aws -$ terraform init -$ terraform apply -var-file=my_feast.tfvars -``` - -Starting may take a minute. A kubectl configuration file is also created in this directory, and the file's name will start with `kubeconfig_` and end with a random suffix. - -### 4. Connect to Feast using Jupyter - -After all pods are running, connect to the Jupyter Notebook Server running in the cluster. - -To connect to the remote Feast server you just created, forward a port from the remote k8s cluster to your local machine. Replace `kubeconfig_XXXXXXX` below with the kubeconfig file name Terraform generates for you. - -```bash -KUBECONFIG=kubeconfig_XXXXXXX kubectl port-forward \ -$(kubectl get pod -o custom-columns=:metadata.name | grep jupyter) 8888:8888 -``` - -```text -Forwarding from 127.0.0.1:8888 -> 8888 -Forwarding from [::1]:8888 -> 8888 -``` - -You can now connect to the bundled Jupyter Notebook Server at `localhost:8888` and follow the example Jupyter notebook. - diff --git a/docs/feast-on-kubernetes/getting-started/install-feast/kubernetes-azure-aks-with-helm.md b/docs/feast-on-kubernetes/getting-started/install-feast/kubernetes-azure-aks-with-helm.md deleted file mode 100644 index 39dcdbd7003..00000000000 --- a/docs/feast-on-kubernetes/getting-started/install-feast/kubernetes-azure-aks-with-helm.md +++ /dev/null @@ -1,139 +0,0 @@ -# Azure AKS \(with Helm\) - -## Overview - -This guide installs Feast on Azure Kubernetes cluster \(known as AKS\), and ensures the following services are running: - -* Feast Core -* Feast Online Serving -* Postgres -* Redis -* Spark -* Kafka -* Feast Jupyter \(Optional\) -* Prometheus \(Optional\) - -## 1. Requirements - -1. Install and configure [Azure CLI](https://docs.microsoft.com/en-us/cli/azure/install-azure-cli) -2. Install and configure [Kubectl](https://kubernetes.io/docs/tasks/tools/install-kubectl/) -3. Install [Helm 3](https://helm.sh/) - -## 2. Preparation - -Create an AKS cluster with Azure CLI. The detailed steps can be found [here](https://docs.microsoft.com/en-us/azure/aks/kubernetes-walkthrough), and a high-level walk through includes: - -```bash -az group create --name myResourceGroup --location eastus -az acr create --resource-group myResourceGroup --name feast-AKS-ACR --sku Basic -az aks create -g myResourceGroup -n feast-AKS --location eastus --attach-acr feast-AKS-ACR --generate-ssh-keys - -az aks install-cli -az aks get-credentials --resource-group myResourceGroup --name feast-AKS -``` - -Add the Feast Helm repository and download the latest charts: - -```bash -helm version # make sure you have the latest Helm installed -helm repo add feast-charts https://feast-helm-charts.storage.googleapis.com -helm repo update -``` - -Feast includes a Helm chart that installs all necessary components to run Feast Core, Feast Online Serving, and an example Jupyter notebook. - -Feast Core requires Postgres to run, which requires a secret to be set on Kubernetes: - -```bash -kubectl create secret generic feast-postgresql --from-literal=postgresql-password=password -``` - -## 3. Feast installation - -Install Feast using Helm. The pods may take a few minutes to initialize. - -```bash -helm install feast-release feast-charts/feast -``` - -## 4. Spark operator installation - -Follow the documentation [to install Spark operator on Kubernetes ](https://github.com/GoogleCloudPlatform/spark-on-k8s-operator), and Feast documentation to [configure Spark roles](../../reference-1/feast-and-spark.md) - -```bash -helm repo add spark-operator https://googlecloudplatform.github.io/spark-on-k8s-operator -helm install my-release spark-operator/spark-operator --set serviceAccounts.spark.name=spark --set image.tag=v1beta2-1.1.2-2.4.5 -``` - -and ensure the service account used by Feast has permissions to manage Spark Application resources. This depends on your k8s setup, but typically you'd need to configure a Role and a RoleBinding like the one below: - -```text -cat < -rules: -- apiGroups: ["sparkoperator.k8s.io"] - resources: ["sparkapplications"] - verbs: ["create", "delete", "deletecollection", "get", "list", "update", "watch", "patch"] ---- -apiVersion: rbac.authorization.k8s.io/v1beta1 -kind: RoleBinding -metadata: - name: use-spark-operator - namespace: -roleRef: - kind: Role - name: use-spark-operator - apiGroup: rbac.authorization.k8s.io -subjects: - - kind: ServiceAccount - name: default -EOF -``` - -## 5. Use Jupyter to connect to Feast - -After all the pods are in a `RUNNING` state, port-forward to the Jupyter Notebook Server in the cluster: - -```bash -kubectl port-forward \ -$(kubectl get pod -o custom-columns=:metadata.name | grep jupyter) 8888:8888 -``` - -```text -Forwarding from 127.0.0.1:8888 -> 8888 -Forwarding from [::1]:8888 -> 8888 -``` - -You can now connect to the bundled Jupyter Notebook Server at `localhost:8888` and follow the example Jupyter notebook. - -{% embed url="http://localhost:8888/tree?" caption="" %} - -## 6. Environment variables - -If you are running the [Minimal Ride Hailing Example](https://github.com/feast-dev/feast/blob/master/examples/minimal/minimal_ride_hailing.ipynb), you may want to make sure the following environment variables are correctly set: - -```text -demo_data_location = "wasbs://@.blob.core.windows.net/" -os.environ["FEAST_AZURE_BLOB_ACCOUNT_NAME"] = "" -os.environ["FEAST_AZURE_BLOB_ACCOUNT_ACCESS_KEY"] = -os.environ["FEAST_HISTORICAL_FEATURE_OUTPUT_LOCATION"] = "wasbs://@.blob.core.windows.net/out/" -os.environ["FEAST_SPARK_STAGING_LOCATION"] = "wasbs://@.blob.core.windows.net/artifacts/" -os.environ["FEAST_SPARK_LAUNCHER"] = "k8s" -os.environ["FEAST_SPARK_K8S_NAMESPACE"] = "default" -os.environ["FEAST_HISTORICAL_FEATURE_OUTPUT_FORMAT"] = "parquet" -os.environ["FEAST_REDIS_HOST"] = "feast-release-redis-master.default.svc.cluster.local" -os.environ["DEMO_KAFKA_BROKERS"] = "feast-release-kafka.default.svc.cluster.local:9092" -``` - -## 7. Further Reading - -* [Feast Concepts](../../concepts/overview.md) -* [Feast Examples/Tutorials](https://github.com/feast-dev/feast/tree/master/examples) -* [Feast Helm Chart Documentation](https://github.com/feast-dev/feast/blob/master/infra/charts/feast/README.md) -* [Configuring Feast components](../../reference-1/configuration-reference.md) -* [Feast and Spark](../../reference-1/feast-and-spark.md) - diff --git a/docs/feast-on-kubernetes/getting-started/install-feast/kubernetes-azure-aks-with-terraform.md b/docs/feast-on-kubernetes/getting-started/install-feast/kubernetes-azure-aks-with-terraform.md deleted file mode 100644 index 71dd15908de..00000000000 --- a/docs/feast-on-kubernetes/getting-started/install-feast/kubernetes-azure-aks-with-terraform.md +++ /dev/null @@ -1,63 +0,0 @@ -# Azure AKS \(with Terraform\) - -## Overview - -This guide installs Feast on Azure using our [reference Terraform configuration](https://github.com/feast-dev/feast/tree/master/infra/terraform/azure). - -{% hint style="info" %} -The Terraform configuration used here is a greenfield installation that neither assumes anything about, nor integrates with, existing resources in your Azure account. The Terraform configuration presents an easy way to get started, but you may want to customize this set up before using Feast in production. -{% endhint %} - -This Terraform configuration creates the following resources: - -* Kubernetes cluster on Azure AKS -* Kafka managed by HDInsight -* Postgres database for Feast metadata, running as a pod on AKS -* Redis cluster, using Azure Cache for Redis -* [spark-on-k8s-operator](https://github.com/GoogleCloudPlatform/spark-on-k8s-operator) to run Spark -* Staging Azure blob storage container to store temporary data - -## 1. Requirements - -* Create an Azure account and [configure credentials locally](https://docs.microsoft.com/en-us/cli/azure/install-azure-cli) -* Install [Terraform](https://www.terraform.io/) \(tested with 0.13.5\) -* Install [Helm](https://helm.sh/docs/intro/install/) \(tested with v3.4.2\) - -## 2. Configure Terraform - -Create a `.tfvars` file under`feast/infra/terraform/azure`. Name the file. In our example, we use `my_feast.tfvars`. You can see the full list of configuration variables in `variables.tf`. At a minimum, you need to set `name_prefix` and `resource_group`: - -{% code title="my\_feast.tfvars" %} -```typescript -name_prefix = "feast" -resource_group = "Feast" # pre-existing resource group -``` -{% endcode %} - -## 3. Apply - -After completing the configuration, initialize Terraform and apply: - -```bash -$ cd feast/infra/terraform/azure -$ terraform init -$ terraform apply -var-file=my_feast.tfvars -``` - -## 4. Connect to Feast using Jupyter - -After all pods are running, connect to the Jupyter Notebook Server running in the cluster. - -To connect to the remote Feast server you just created, forward a port from the remote k8s cluster to your local machine. - -```bash -kubectl port-forward $(kubectl get pod -o custom-columns=:metadata.name | grep jupyter) 8888:8888 -``` - -```text -Forwarding from 127.0.0.1:8888 -> 8888 -Forwarding from [::1]:8888 -> 8888 -``` - -You can now connect to the bundled Jupyter Notebook Server at `localhost:8888` and follow the example Jupyter notebook. - diff --git a/docs/feast-on-kubernetes/getting-started/install-feast/kubernetes-with-helm.md b/docs/feast-on-kubernetes/getting-started/install-feast/kubernetes-with-helm.md deleted file mode 100644 index 032554d1208..00000000000 --- a/docs/feast-on-kubernetes/getting-started/install-feast/kubernetes-with-helm.md +++ /dev/null @@ -1,69 +0,0 @@ -# Kubernetes \(with Helm\) - -## Overview - -This guide installs Feast on an existing Kubernetes cluster, and ensures the following services are running: - -* Feast Core -* Feast Online Serving -* Postgres -* Redis -* Feast Jupyter \(Optional\) -* Prometheus \(Optional\) - -## 1. Requirements - -1. Install and configure [Kubectl](https://kubernetes.io/docs/tasks/tools/install-kubectl/) -2. Install [Helm 3](https://helm.sh/) - -## 2. Preparation - -Add the Feast Helm repository and download the latest charts: - -```text -helm repo add feast-charts https://feast-helm-charts.storage.googleapis.com -helm repo update -``` - -Feast includes a Helm chart that installs all necessary components to run Feast Core, Feast Online Serving, and an example Jupyter notebook. - -Feast Core requires Postgres to run, which requires a secret to be set on Kubernetes: - -```bash -kubectl create secret generic feast-postgresql --from-literal=postgresql-password=password -``` - -## 3. Installation - -Install Feast using Helm. The pods may take a few minutes to initialize. - -```bash -helm install feast-release feast-charts/feast -``` - -## 4. Use Jupyter to connect to Feast - -After all the pods are in a `RUNNING` state, port-forward to the Jupyter Notebook Server in the cluster: - -```bash -kubectl port-forward \ -$(kubectl get pod -l app=feast-jupyter -o custom-columns=:metadata.name) 8888:8888 -``` - -```text -Forwarding from 127.0.0.1:8888 -> 8888 -Forwarding from [::1]:8888 -> 8888 -``` - -You can now connect to the bundled Jupyter Notebook Server at `localhost:8888` and follow the example Jupyter notebook. - -{% embed url="http://localhost:8888/tree?" caption="" %} - -## 5. Further Reading - -* [Feast Concepts](../../concepts/overview.md) -* [Feast Examples/Tutorials](https://github.com/feast-dev/feast/tree/master/examples) -* [Feast Helm Chart Documentation](https://github.com/feast-dev/feast/blob/master/infra/charts/feast/README.md) -* [Configuring Feast components](../../reference-1/configuration-reference.md) -* [Feast and Spark](../../reference-1/feast-and-spark.md) - diff --git a/docs/feast-on-kubernetes/getting-started/install-feast/quickstart.md b/docs/feast-on-kubernetes/getting-started/install-feast/quickstart.md deleted file mode 100644 index b5e50d193c9..00000000000 --- a/docs/feast-on-kubernetes/getting-started/install-feast/quickstart.md +++ /dev/null @@ -1,91 +0,0 @@ -# Docker Compose - -{% hint style="success" %} -This guide is meant for exploratory purposes only. It allows users to run Feast locally using Docker Compose instead of Kubernetes. The goal of this guide is for users to be able to quickly try out the full Feast stack without needing to deploy to Kubernetes. It is not meant for production use. -{% endhint %} - -## Overview - -This guide shows you how to deploy Feast using [Docker Compose](https://docs.docker.com/get-started/). Docker Compose allows you to explore the functionality provided by Feast while requiring only minimal infrastructure. - -This guide includes the following containerized components: - -* [A complete Feast deployment](../../concepts/architecture.md) - * Feast Core with Postgres - * Feast Online Serving with Redis. - * Feast Job Service -* A Jupyter Notebook Server with built in Feast example\(s\). For demo purposes only. -* A Kafka cluster for testing streaming ingestion. For demo purposes only. - -## Get Feast - -Clone the latest stable version of Feast from the [Feast repository](https://github.com/feast-dev/feast/): - -```text -git clone https://github.com/feast-dev/feast.git -cd feast/infra/docker-compose -``` - -Create a new configuration file: - -```text -cp .env.sample .env -``` - -## Start Feast - -Start Feast with Docker Compose: - -```text -docker-compose pull && docker-compose up -d -``` - -Wait until all all containers are in a running state: - -```text -docker-compose ps -``` - -## Try our example\(s\) - -You can now connect to the bundled Jupyter Notebook Server running at `localhost:8888` and follow the example Jupyter notebook. - -{% embed url="http://localhost:8888/tree?" caption="" %} - -## Troubleshooting - -### Open ports - -Please ensure that the following ports are available on your host machine: - -* `6565` -* `6566` -* `8888` -* `9094` -* `5432` - -If a port conflict cannot be resolved, you can modify the port mappings in the provided [docker-compose.yml](https://github.com/feast-dev/feast/tree/master/infra/docker-compose) file to use different ports on the host. - -### Containers are restarting or unavailable - -If some of the containers continue to restart, or you are unable to access a service, inspect the logs using the following command: - -```javascript -docker-compose logs -f -t -``` - -If you are unable to resolve the problem, visit [GitHub](https://github.com/feast-dev/feast/issues) to create an issue. - -## Configuration - -The Feast Docker Compose setup can be configured by modifying properties in your `.env` file. - -### Accessing Google Cloud Storage \(GCP\) - -To access Google Cloud Storage as a data source, the Docker Compose installation requires access to a GCP service account. - -* Create a new [service account](https://cloud.google.com/iam/docs/creating-managing-service-accounts) and save a JSON key. -* Grant the service account access to your bucket\(s\). -* Copy the service account to the path you have configured in `.env` under `GCP_SERVICE_ACCOUNT`. -* Restart your Docker Compose setup of Feast. - diff --git a/docs/feast-on-kubernetes/getting-started/learn-feast.md b/docs/feast-on-kubernetes/getting-started/learn-feast.md deleted file mode 100644 index 983799ca9b9..00000000000 --- a/docs/feast-on-kubernetes/getting-started/learn-feast.md +++ /dev/null @@ -1,15 +0,0 @@ -# Learn Feast - -Explore the following resources to learn more about Feast: - -* [Concepts](../../) describes all important Feast API concepts. -* [User guide](../user-guide/define-and-ingest-features.md) provides guidance on completing Feast workflows. -* [Examples](https://github.com/feast-dev/feast/tree/master/examples) contains Jupyter notebooks that you can run on your Feast deployment. -* [Advanced](../advanced-1/troubleshooting.md) contains information about both advanced and operational aspects of Feast. -* [Reference](../reference-1/api/) contains detailed API and design documents for advanced users. -* [Contributing](../../contributing/contributing.md) contains resources for anyone who wants to contribute to Feast. - -{% hint style="info" %} -The best way to learn Feast is to use it. Jump over to our [Quickstart](install-feast/quickstart.md) guide to have one of our examples running in no time at all! -{% endhint %} - diff --git a/docs/feast-on-kubernetes/reference-1/README.md b/docs/feast-on-kubernetes/reference-1/README.md deleted file mode 100644 index 02577ad8e3a..00000000000 --- a/docs/feast-on-kubernetes/reference-1/README.md +++ /dev/null @@ -1,2 +0,0 @@ -# Reference - diff --git a/docs/feast-on-kubernetes/reference-1/api/README.md b/docs/feast-on-kubernetes/reference-1/api/README.md deleted file mode 100644 index cd75f5bf88f..00000000000 --- a/docs/feast-on-kubernetes/reference-1/api/README.md +++ /dev/null @@ -1,17 +0,0 @@ -# API Reference - -Please see the following API specific reference documentation: - -* [Feast Core gRPC API](https://api.docs.feast.dev/grpc/feast/core/coreservice.pb.html): This is the gRPC API used by Feast Core. This API contains RPCs for creating and managing feature sets, stores, projects, and jobs. -* [Feast Serving gRPC API](https://api.docs.feast.dev/grpc/feast/serving/servingservice.pb.html): This is the gRPC API used by Feast Serving. It contains RPCs used for the retrieval of online feature data or historical feature data. -* [Feast gRPC Types](https://api.docs.feast.dev/grpc/feast/types/value.pb): These are the gRPC types used by both Feast Core, Feast Serving, and the Go, Java, and Python clients. -* [Go Client SDK](https://godoc.org/github.com/feast-dev/feast/sdk/go): The Go library used for the retrieval of online features from Feast. -* [Java Client SDK](https://javadoc.io/doc/dev.feast/feast-sdk): The Java library used for the retrieval of online features from Feast. -* [Python SDK](https://api.docs.feast.dev/python/): This is the complete reference to the Feast Python SDK. The SDK is used to manage feature sets, features, jobs, projects, and entities. It can also be used to retrieve training datasets or online features from Feast Serving. - -## Community Contributions - -The following community provided SDKs are available: - -* [Node.js SDK](https://github.com/MichaelHirn/feast-client/): A Node.js SDK written in TypeScript. The SDK can be used to manage feature sets, features, jobs, projects, and entities. - diff --git a/docs/feast-on-kubernetes/reference-1/configuration-reference.md b/docs/feast-on-kubernetes/reference-1/configuration-reference.md deleted file mode 100644 index 6f9a97dabfd..00000000000 --- a/docs/feast-on-kubernetes/reference-1/configuration-reference.md +++ /dev/null @@ -1,132 +0,0 @@ -# Configuration Reference - -## Overview - -This reference describes how to configure Feast components: - -* [Feast Core and Feast Online Serving](configuration-reference.md#2-feast-core-serving-and-job-controller) -* [Feast CLI and Feast Python SDK](configuration-reference.md#3-feast-cli-and-feast-python-sdk) -* [Feast Go and Feast Java SDK](configuration-reference.md#4-feast-java-and-go-sdk) - -## 1. Feast Core and Feast Online Serving - -Available configuration properties for Feast Core and Feast Online Serving can be referenced from the corresponding `application.yml` of each component: - -| Component | Configuration Reference | -| :--- | :--- | -| Core | [core/src/main/resources/application.yml](https://github.com/feast-dev/feast-java/blob/master/core/src/main/resources/application.yml) | -| Serving \(Online\) | [serving/src/main/resources/application.yml](https://github.com/feast-dev/feast-java/blob/master/serving/src/main/resources/application.yml) | - -Configuration properties for Feast Core and Feast Online Serving are defined depending on Feast is deployed: - -* [Docker Compose deployment](configuration-reference.md#docker-compose-deployment) - Feast is deployed with Docker Compose. -* [Kubernetes deployment](configuration-reference.md#kubernetes-deployment) - Feast is deployed with Kubernetes. -* [Direct Configuration](configuration-reference.md#direct-configuration) - Feast is built and run from source code. - -## Docker Compose Deployment - -For each Feast component deployed using Docker Compose, configuration properties from `application.yml` can be set at: - -| Component | Configuration Path | -| :--- | :--- | -| Core | `infra/docker-compose/core/core.yml` | -| Online Serving | `infra/docker-compose/serving/online-serving.yml` | - -## Kubernetes Deployment - -The Kubernetes Feast Deployment is configured using `values.yaml` in the [Helm chart](https://github.com/feast-dev/feast-helm-charts) included with Feast: - -```yaml -# values.yaml -feast-core: - enabled: true # whether to deploy the feast-core subchart to deploy Feast Core. - # feast-core subchart specific config. - gcpServiceAccount: - enabled: true - # .... -``` - -A reference of the sub-chart-specific configuration can found in its `values.yml`: - -* [feast-core](https://github.com/feast-dev/feast-java/tree/master/infra/charts/feast-core) -* [feast-serving](https://github.com/feast-dev/feast-java/tree/master/infra/charts/feast-serving) - -Configuration properties can be set via `application-override.yaml` for each component in `values.yaml`: - -```yaml -# values.yaml -feast-core: - # .... - application-override.yaml: - # application.yml config properties for Feast Core. - # ... -``` - -Visit the [Helm chart](https://github.com/feast-dev/feast-helm-charts) included with Feast to learn more about configuration. - -## Direct Configuration - -If Feast is built and running from source, configuration properties can be set directly in the Feast component's `application.yml`: - -| Component | Configuration Path | -| :--- | :--- | -| Core | [core/src/main/resources/application.yml](https://github.com/feast-dev/feast-java/blob/master/core/src/main/resources/application.yml) | -| Serving \(Online\) | [serving/src/main/resources/application.yml](https://github.com/feast-dev/feast-java/blob/master/serving/src/main/resources/application.yml) | - -## 2. Feast CLI and Feast Python SDK - -Configuration options for both the [Feast CLI](../getting-started/connect-to-feast/feast-cli.md) and [Feast Python SDK](https://api.docs.feast.dev/python/) can be defined in the following locations, in order of precedence: - -**1. Command line arguments or initialized arguments:** Passing parameters to the Feast CLI or instantiating the Feast Client object with specific parameters will take precedence above other parameters. - -```bash -# Set option as command line arguments. -feast config set core_url "localhost:6565" -``` - -```python -# Pass options as initialized arguments. -client = Client( - core_url="localhost:6565", - project="default" -) -``` - -**2. Environmental variables:** Environmental variables can be set to provide configuration options. They must be prefixed with `FEAST_`. For example `FEAST_CORE_URL`. - -```bash -FEAST_CORE_URL=my_feast:6565 FEAST_PROJECT=default feast projects list -``` - -**3. Configuration file:** Options with the lowest precedence are configured in the Feast configuration file. Feast looks for or creates this configuration file in `~/.feast/config` if it does not already exist. All options must be defined in the `[general]` section of this file. - -```text -[general] -project = default -core_url = localhost:6565 -``` - -Visit the [available configuration parameters](https://api.docs.feast.dev/python/#module-feast.constants) for Feast Python SDK and Feast CLI to learn more. - -## 3. Feast Java and Go SDK - -The [Feast Java SDK](https://javadoc.io/doc/dev.feast/feast-sdk/latest/com/gojek/feast/package-summary.html) and [Feast Go SDK](https://godoc.org/github.com/feast-dev/feast/sdk/go) are configured via arguments passed when instantiating the respective Clients: - -### Go SDK - -```go -// configure serving host and port. -cli := feast.NewGrpcClient("localhost", 6566) -``` - -Visit the[ Feast Go SDK API reference](https://godoc.org/github.com/feast-dev/feast/sdk/go) to learn more about available configuration parameters. - -### Java SDK - -```java -// configure serving host and port. -client = FeastClient.create(servingHost, servingPort); -``` - -Visit the [Feast Java SDK API reference](https://javadoc.io/doc/dev.feast/feast-sdk/latest/com/gojek/feast/package-summary.html) to learn more about available configuration parameters. - diff --git a/docs/feast-on-kubernetes/reference-1/feast-and-spark.md b/docs/feast-on-kubernetes/reference-1/feast-and-spark.md deleted file mode 100644 index be05f177aeb..00000000000 --- a/docs/feast-on-kubernetes/reference-1/feast-and-spark.md +++ /dev/null @@ -1,83 +0,0 @@ ---- -description: Configuring Feast to use Spark for ingestion. ---- - -# Feast and Spark - -Feast relies on Spark to ingest data from the offline store to the online store, streaming ingestion, and running queries to retrieve historical data from the offline store. Feast supports several Spark deployment options. - -## Option 1. Use Kubernetes Operator for Apache Spark - -To install the Spark on K8s Operator - -```bash -helm repo add spark-operator \ - https://googlecloudplatform.github.io/spark-on-k8s-operator - -helm install my-release spark-operator/spark-operator \ - --set serviceAccounts.spark.name=spark -``` - -Currently Feast is tested using `v1beta2-1.1.2-2.4.5`version of the operator image. To configure Feast to use it, set the following options in Feast config: - -| Feast Setting | Value | -| :--- | :--- | -| `SPARK_LAUNCHER` | `"k8s"` | -| `SPARK_STAGING_LOCATION` | S3/GCS/Azure Blob Storage URL to use as a staging location, must be readable and writable by Feast. For S3, use `s3a://` prefix here. Ex.: `s3a://some-bucket/some-prefix/artifacts/` | -| `HISTORICAL_FEATURE_OUTPUT_LOCATION` | S3/GCS/Azure Blob Storage URL used to store results of historical retrieval queries, must be readable and writable by Feast. For S3, use `s3a://` prefix here. Ex.: `s3a://some-bucket/some-prefix/out/` | -| `SPARK_K8S_NAMESPACE` | Only needs to be set if you are customizing the spark-on-k8s-operator. The name of the Kubernetes namespace to run Spark jobs in. This should match the value of `sparkJobNamespace` set on spark-on-k8s-operator Helm chart. Typically this is also the namespace Feast itself will run in. | -| `SPARK_K8S_JOB_TEMPLATE_PATH` | Only needs to be set if you are customizing the Spark job template. Local file path with the template of the SparkApplication resource. No prefix required. Ex.: `/home/jovyan/work/sparkapp-template.yaml`. An example template is [here](https://github.com/feast-dev/feast/blob/4059a21dc4eba9cd27b2d5b0fabe476c07a8b3bd/sdk/python/feast/pyspark/launchers/k8s/k8s_utils.py#L280-L317) and the spec is defined in the [k8s-operator User Guide](https://github.com/GoogleCloudPlatform/spark-on-k8s-operator/blob/master/docs/user-guide.md). | - -Lastly, make sure that the service account used by Feast has permissions to manage Spark Application resources. This depends on your k8s setup, but typically you'd need to configure a Role and a RoleBinding like the one below: - -```text -cat < - - - Limitation - Motivation - - - - - Features names and entity names cannot overlap in feature table definitions - Features and entities become columns in historical stores which may cause - conflicts - - - -

The following field names are reserved in feature tables

-
    -
  • event_timestamp -
  • -
  • datetime -
  • -
  • created_timestamp -
  • -
  • ingestion_id -
  • -
  • job_id -
  • -
- - These keywords are used for column names when persisting metadata in historical - stores - - - - -### Ingestion - -| Limitation | Motivation | -| :--- | :--- | -| Once data has been ingested into Feast, there is currently no way to delete the data without manually going to the database and deleting it. However, during retrieval only the latest rows will be returned for a specific key \(`event_timestamp`, `entity`\) based on its `created_timestamp`. | This functionality simply doesn't exist yet as a Feast API | - -### Storage - -| Limitation | Motivation | -| :--- | :--- | -| Feast does not support offline storage in Feast 0.8 | As part of our re-architecture of Feast, we moved from GCP to cloud-agnostic deployments. Developing offline storage support that is available in all cloud environments is a pending action. | - diff --git a/docs/feast-on-kubernetes/reference-1/metrics-reference.md b/docs/feast-on-kubernetes/reference-1/metrics-reference.md deleted file mode 100644 index 78f94bc3901..00000000000 --- a/docs/feast-on-kubernetes/reference-1/metrics-reference.md +++ /dev/null @@ -1,178 +0,0 @@ -# Metrics Reference - -{% hint style="warning" %} -This page applies to Feast 0.7. The content may be out of date for Feast 0.8+ -{% endhint %} - -Reference of the metrics that each Feast component exports: - -* [Feast Core](metrics-reference.md#feast-core) -* [Feast Serving](metrics-reference.md#feast-serving) -* [Feast Ingestion Job](metrics-reference.md#feast-ingestion-job) - -For how to configure Feast to export Metrics, see the [Metrics user guide.](../advanced-1/metrics.md) - -## Feast Core - -**Exported Metrics** - -Feast Core exports the following metrics: - -| Metrics | Description | Tags | -| :--- | :--- | :--- | -| `feast_core_request_latency_seconds` | Feast Core's latency in serving Requests in Seconds. | `service`, `method`, `status_code` | -| `feast_core_feature_set_total` | No. of Feature Sets registered with Feast Core. | None | -| `feast_core_store_total` | No. of Stores registered with Feast Core. | None | -| `feast_core_max_memory_bytes` | Max amount of memory the Java virtual machine will attempt to use. | None | -| `feast_core_total_memory_bytes` | Total amount of memory in the Java virtual machine | None | -| `feast_core_free_memory_bytes` | Total amount of free memory in the Java virtual machine. | None | -| `feast_core_gc_collection_seconds` | Time spent in a given JVM garbage collector in seconds. | None | - -**Metric Tags** - -Exported Feast Core metrics may be filtered by the following tags/keys - -| Tag | Description | -| :--- | :--- | -| `service` | Name of the Service that request is made to. Should be set to `CoreService` | -| `method` | Name of the Method that the request is calling. \(ie `ListFeatureSets`\) | -| `status_code` | Status code returned as a result of handling the requests \(ie `OK`\). Can be used to find request failures. | - -## Feast Serving - -**Exported Metrics** - -Feast Serving exports the following metrics: - -| Metric | Description | Tags | -| :--- | :--- | :--- | -| `feast_serving_request_latency_seconds` | Feast Serving's latency in serving Requests in Seconds. | `method` | -| `feast_serving_request_feature_count` | No. of requests retrieving a Feature from Feast Serving. | `project`, `feature_name` | -| `feast_serving_not_found_feature_count` | No. of requests retrieving a Feature has resulted in a [`NOT_FOUND` field status.](../user-guide/getting-training-features.md#online-field-statuses) | `project`, `feature_name` | -| `feast_serving_stale_feature_count` | No. of requests retrieving a Feature resulted in a [`OUTSIDE_MAX_AGE` field status.](../user-guide/getting-training-features.md#online-field-statuses) | `project`, `feature_name` | -| `feast_serving_grpc_request_count` | Total gRPC requests served. | `method` | - -**Metric Tags** - -Exported Feast Serving metrics may be filtered by the following tags/keys - -| Tag | Description | -| :--- | :--- | -| `method` | Name of the Method that the request is calling. \(ie `ListFeatureSets`\) | -| `status_code` | Status code returned as a result of handling the requests \(ie `OK`\). Can be used to find request failures. | -| `project` | Name of the project that the FeatureSet of the Feature retrieved belongs to. | -| `feature_name` | Name of the Feature being retrieved. | - -## Feast Ingestion Job - -Feast Ingestion computes both metrics an statistics on [data ingestion.](../user-guide/define-and-ingest-features.md) Make sure you familar with data ingestion concepts before proceeding. - -**Metrics Namespace** - -Metrics are computed at two stages of the Feature Row's/Feature Value's life cycle when being processed by the Ingestion Job: - -* `Inflight`- Prior to writing data to stores, but after successful validation of data. -* `WriteToStoreSucess`- After a successful store write. - -Metrics processed by each staged will be tagged with `metrics_namespace` to the stage where the metric was computed. - -**Metrics Bucketing** - -Metrics with a `{BUCKET}` are computed on a 60 second window/bucket. Suffix with the following to select the bucket to use: - -* `min` - minimum value. -* `max` - maximum value. -* `mean`- mean value. -* `percentile_90`- 90 percentile. -* `percentile_95`- 95 percentile. -* `percentile_99`- 99 percentile. - -**Exported Metrics** - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
MetricDescriptionTags
feast_ingestion_feature_row_lag_ms_{BUCKET} - Lag time in milliseconds between succeeding ingested Feature Rows. -

feast_store, feast_project_name,feast_featureSet_name,ingestion_job_name,

-

metrics_namespace -

-
feast_ingestion_feature_value_lag_ms_{BUCKET} - Lag time in milliseconds between succeeding ingested values for each Feature. -

feast_store, feast_project_name,feast_featureSet_name,

-

feast_feature_name,

-

ingestion_job_name,

-

metrics_namespace -

-
feast_ingestion_feature_value_{BUCKET} - Last value feature for each Feature.feast_store, feature_project_name, feast_feature_name,feast_featureSet_name, ingest_job_name, metrics_namepace -
feast_ingestion_feature_row_ingested_count - No. of Ingested Feature Rows -

feast_store, feast_project_name,feast_featureSet_name,ingestion_job_name,

-

metrics_namespace -

-
feast_ingestion_feature_value_missing_count - No. of times a ingested Feature values did not provide a value for the - Feature. -

feast_store, feast_project_name,feast_featureSet_name,

-

feast_feature_name,

-

ingestion_job_name,

-

metrics_namespace -

-
feast_ingestion_deadletter_row_count - No. of Feature Rows that that the Ingestion Job did not successfully write - to store.feast_store, feast_project_name,feast_featureSet_name,ingestion_job_name -
- -**Metric Tags** - -Exported Feast Ingestion Job metrics may be filtered by the following tags/keys - -| Tag | Description | -| :--- | :--- | -| `feast_store` | Name of the target store the Ingestion Job is writing to. | -| `feast_project_name` | Name of the project that the ingested FeatureSet belongs to. | -| `feast_featureSet_name` | Name of the Feature Set being ingested. | -| `feast_feature_name` | Name of the Feature being ingested. | -| `ingestion_job_name` | Name of the Ingestion Job performing data ingestion. Typically this is set to the Id of the Ingestion Job. | -| `metrics_namespace` | Stage where metrics where computed. Either `Inflight` or `WriteToStoreSuccess` | - diff --git a/docs/feast-on-kubernetes/tutorials-1/README.md b/docs/feast-on-kubernetes/tutorials-1/README.md deleted file mode 100644 index 84ce15b7886..00000000000 --- a/docs/feast-on-kubernetes/tutorials-1/README.md +++ /dev/null @@ -1,2 +0,0 @@ -# Tutorials - diff --git a/docs/feast-on-kubernetes/user-guide/README.md b/docs/feast-on-kubernetes/user-guide/README.md deleted file mode 100644 index be02a733729..00000000000 --- a/docs/feast-on-kubernetes/user-guide/README.md +++ /dev/null @@ -1,2 +0,0 @@ -# User guide - diff --git a/docs/feast-on-kubernetes/user-guide/define-and-ingest-features.md b/docs/feast-on-kubernetes/user-guide/define-and-ingest-features.md deleted file mode 100644 index 5a7e7288ec9..00000000000 --- a/docs/feast-on-kubernetes/user-guide/define-and-ingest-features.md +++ /dev/null @@ -1,52 +0,0 @@ -# Define and ingest features - -In order to retrieve features for both training and serving, Feast requires data being ingested into its offline and online stores. - -Users are expected to already have either a batch or stream source with data stored in it, ready to be ingested into Feast. Once a feature table \(with the corresponding sources\) has been registered with Feast, it is possible to load data from this source into stores. - -The following depicts an example ingestion flow from a data source to the online store. - -## Batch Source to Online Store - -```python -from feast import Client -from datetime import datetime, timedelta - -client = Client(core_url="localhost:6565") -driver_ft = client.get_feature_table("driver_trips") - -# Initialize date ranges -today = datetime.now() -yesterday = today - timedelta(1) - -# Launches a short-lived job that ingests data over the provided date range. -client.start_offline_to_online_ingestion( - driver_ft, yesterday, today -) -``` - -## Stream Source to Online Store - -```python -from feast import Client -from datetime import datetime, timedelta - -client = Client(core_url="localhost:6565") -driver_ft = client.get_feature_table("driver_trips") - -# Launches a long running streaming ingestion job -client.start_stream_to_online_ingestion(driver_ft) -``` - -## Batch Source to Offline Store - -{% hint style="danger" %} -Not supported in Feast 0.8 -{% endhint %} - -## Stream Source to Offline Store - -{% hint style="danger" %} -Not supported in Feast 0.8 -{% endhint %} - diff --git a/docs/feast-on-kubernetes/user-guide/getting-online-features.md b/docs/feast-on-kubernetes/user-guide/getting-online-features.md deleted file mode 100644 index c16dc08a013..00000000000 --- a/docs/feast-on-kubernetes/user-guide/getting-online-features.md +++ /dev/null @@ -1,54 +0,0 @@ -# Getting online features - -Feast provides an API through which online feature values can be retrieved. This allows teams to look up feature values at low latency in production during model serving, in order to make online predictions. - -{% hint style="info" %} -Online stores only maintain the current state of features, i.e latest feature values. No historical data is stored or served. -{% endhint %} - -```python -from feast import Client - -online_client = Client( - core_url="localhost:6565", - serving_url="localhost:6566", -) - -entity_rows = [ - {"driver_id": 1001}, - {"driver_id": 1002}, -] - -# Features in format -feature_refs = [ - "driver_trips:average_daily_rides", - "driver_trips:maximum_daily_rides", - "driver_trips:rating", -] - -response = online_client.get_online_features( - feature_refs=feature_refs, # Contains only feature references - entity_rows=entity_rows, # Contains only entities (driver ids) -) - -# Print features in dictionary format -response_dict = response.to_dict() -print(response_dict) -``` - -The online store must be populated through [ingestion jobs](define-and-ingest-features.md#batch-source-to-online-store) prior to being used for online serving. - -Feast Serving provides a [gRPC API](https://api.docs.feast.dev/grpc/feast.serving.pb.html) that is backed by [Redis](https://redis.io/). We have native clients in [Python](https://api.docs.feast.dev/python/), [Go](https://godoc.org/github.com/gojek/feast/sdk/go), and [Java](https://javadoc.io/doc/dev.feast). - -### Online Field Statuses - -Feast also returns status codes when retrieving features from the Feast Serving API. These status code give useful insight into the quality of data being served. - -| Status | Meaning | -| :--- | :--- | -| NOT\_FOUND | The feature value was not found in the online store. This might mean that no feature value was ingested for this feature. | -| NULL\_VALUE | A entity key was successfully found but no feature values had been set. This status code should not occur during normal operation. | -| OUTSIDE\_MAX\_AGE | The age of the feature row in the online store \(in terms of its event timestamp\) has exceeded the maximum age defined within the feature table. | -| PRESENT | The feature values have been found and are within the maximum age. | -| UNKNOWN | Indicates a system failure. | - diff --git a/docs/feast-on-kubernetes/user-guide/getting-training-features.md b/docs/feast-on-kubernetes/user-guide/getting-training-features.md deleted file mode 100644 index e0f52a8cd96..00000000000 --- a/docs/feast-on-kubernetes/user-guide/getting-training-features.md +++ /dev/null @@ -1,72 +0,0 @@ -# Getting training features - -Feast provides a historical retrieval interface for exporting feature data in order to train machine learning models. Essentially, users are able to enrich their data with features from any feature tables. - -### Retrieving historical features - -Below is an example of the process required to produce a training dataset: - -```python -# Feature references with target feature -features = [ - "driver_trips:average_daily_rides", - "driver_trips:maximum_daily_rides", - "driver_trips:rating", - "driver_trips:rating:trip_completed", -] - -# Define entity source -entity_source = FileSource( - "event_timestamp", - ParquetFormat(), - "gs://some-bucket/customer" -) - -# Retrieve historical dataset from Feast. -historical_feature_retrieval_job = client.get_historical_features( - features=features, - entity_rows=entity_source -) - -output_file_uri = historical_feature_retrieval_job.get_output_file_uri() -``` - -#### 1. Define feature references - -[Feature references]() define the specific features that will be retrieved from Feast. These features can come from multiple feature tables. The only requirement is that the feature tables that make up the feature references have the same entity \(or composite entity\). - -**2. Define an entity dataframe** - -Feast needs to join feature values onto specific entities at specific points in time. Thus, it is necessary to provide an [entity dataframe]() as part of the `get_historical_features` method. In the example above we are defining an entity source. This source is an external file that provides Feast with the entity dataframe. - -**3. Launch historical retrieval job** - -Once the feature references and an entity source are defined, it is possible to call `get_historical_features()`. This method launches a job that extracts features from the sources defined in the provided feature tables, joins them onto the provided entity source, and returns a reference to the training dataset that is produced. - -Please see the [Feast SDK](https://api.docs.feast.dev/python) for more details. - -### Point-in-time Joins - -Feast always joins features onto entity data in a point-in-time correct way. The process can be described through an example. - -In the example below there are two tables \(or dataframes\): - -* The dataframe on the left is the [entity dataframe]() that contains timestamps, entities, and the target variable \(trip\_completed\). This dataframe is provided to Feast through an entity source. -* The dataframe on the right contains driver features. This dataframe is represented in Feast through a feature table and its accompanying data source\(s\). - -The user would like to have the driver features joined onto the entity dataframe to produce a training dataset that contains both the target \(trip\_completed\) and features \(average\_daily\_rides, maximum\_daily\_rides, rating\). This dataset will then be used to train their model. - -![](../../.gitbook/assets/point_in_time_join%20%281%29%20%282%29%20%282%29%20%283%29%20%283%29%20%283%29%20%283%29%20%282%29.png) - -Feast is able to intelligently join feature data with different timestamps to a single entity dataframe. It does this through a point-in-time join as follows: - -1. Feast loads the entity dataframe and all feature tables \(driver dataframe\) into the same location. This can either be a database or in memory. -2. For each [entity row]() in the [entity dataframe](getting-online-features.md), Feast tries to find feature values in each feature table to join to it. Feast extracts the timestamp and entity key of each row in the entity dataframe and scans backward through the feature table until it finds a matching entity key. -3. If the event timestamp of the matching entity key within the driver feature table is within the maximum age configured for the feature table, then the features at that entity key are joined onto the entity dataframe. If the event timestamp is outside of the maximum age, then only null values are returned. -4. If multiple entity keys are found with the same event timestamp, then they are deduplicated by the created timestamp, with newer values taking precedence. -5. Feast repeats this joining process for all feature tables and returns the resulting dataset. - -{% hint style="info" %} -Point-in-time correct joins attempts to prevent the occurrence of feature leakage by trying to recreate the state of the world at a single point in time, instead of joining features based on exact timestamps only. -{% endhint %} - diff --git a/docs/feast-on-kubernetes/user-guide/overview.md b/docs/feast-on-kubernetes/user-guide/overview.md deleted file mode 100644 index 5f367924794..00000000000 --- a/docs/feast-on-kubernetes/user-guide/overview.md +++ /dev/null @@ -1,32 +0,0 @@ -# Overview - -### Using Feast - -Feast development happens through three key workflows: - -1. [Define and load feature data into Feast](define-and-ingest-features.md) -2. [Retrieve historical features for training models](getting-training-features.md) -3. [Retrieve online features for serving models](getting-online-features.md) - -### Defining feature tables and ingesting data into Feast - -Feature creators model the data within their organization into Feast through the definition of [feature tables](../concepts/feature-tables.md) that contain [data sources](../concepts/sources.md). Feature tables are both a schema and a means of identifying data sources for features, and allow Feast to know how to interpret your data, and where to find it. - -After registering a feature table with Feast, users can trigger an ingestion from their data source into Feast. This loads feature values from an upstream data source into Feast stores through ingestion jobs. - -Visit [feature tables](../concepts/feature-tables.md#overview) to learn more about them. - -{% page-ref page="define-and-ingest-features.md" %} - -### Retrieving historical features for training - -In order to generate a training dataset it is necessary to provide both an [entity dataframe ]()and feature references through the[ Feast SDK](https://api.docs.feast.dev/python/) to retrieve historical features. For historical serving, Feast requires that you provide the entities and timestamps for the corresponding feature data. Feast produces a point-in-time correct dataset using the requested features. These features can be requested from an unlimited number of feature sets. - -{% page-ref page="getting-training-features.md" %} - -### Retrieving online features for online serving - -Online retrieval uses feature references through the [Feast Online Serving API](https://api.docs.feast.dev/grpc/feast.serving.pb.html) to retrieve online features. Online serving allows for very low latency requests to feature data at very high throughput. - -{% page-ref page="getting-online-features.md" %} - diff --git a/docs/feature-views.md b/docs/feature-views.md deleted file mode 100644 index 235b8288353..00000000000 --- a/docs/feature-views.md +++ /dev/null @@ -1,2 +0,0 @@ -# Feature Views - diff --git a/docs/getting-started/architecture-and-components/untitled.md b/docs/getting-started/architecture-and-components/registry.md similarity index 100% rename from docs/getting-started/architecture-and-components/untitled.md rename to docs/getting-started/architecture-and-components/registry.md diff --git a/docs/getting-started/concepts/README.md b/docs/getting-started/concepts/README.md index 99ff5861867..7ad0115a72b 100644 --- a/docs/getting-started/concepts/README.md +++ b/docs/getting-started/concepts/README.md @@ -14,3 +14,4 @@ {% page-ref page="point-in-time-joins.md" %} +{% page-ref page="dataset.md" %} diff --git a/docs/getting-started/concepts/dataset.md b/docs/getting-started/concepts/dataset.md new file mode 100644 index 00000000000..59f71689050 --- /dev/null +++ b/docs/getting-started/concepts/dataset.md @@ -0,0 +1,50 @@ +# Dataset + +Feast datasets allow for conveniently saving dataframes that include both features and entities to be subsequently used for data analysis and model training. +[Data Quality Monitoring](https://docs.google.com/document/d/110F72d4NTv80p35wDSONxhhPBqWRwbZXG4f9mNEMd98) was the primary motivation for creating dataset concept. + +Dataset's metadata is stored in the Feast registry and raw data (features, entities, additional input keys and timestamp) is stored in the [offline store](../architecture-and-components/offline-store.md). + +Dataset can be created from: +1. Results of historical retrieval +2. [planned] Logging request (including input for [on demand transformation](../../reference/alpha-on-demand-feature-view.md)) and response during feature serving +3. [planned] Logging features during writing to online store (from batch source or stream) + + +### Creating Saved Dataset from Historical Retrieval + +To create a saved dataset from historical features for later retrieval or analysis, a user needs to call `get_historical_features` method first and then pass the returned retrieval job to `create_saved_dataset` method. +`create_saved_dataset` will trigger provided retrieval job (by calling `.persist()` on it) to store the data using specified `storage`. +Storage type must be the same as globally configured offline store (eg, it's impossible to persist data to Redshift with BigQuery source). +`create_saved_dataset` will also create SavedDataset object with all related metadata and will write it to the registry. + +```python +from feast import FeatureStore +from feast.infra.offline_stores.bigquery_source import SavedDatasetBigQueryStorage + +store = FeatureStore() + +historical_job = store.get_historical_features( + features=["driver:avg_trip"], + entity_df=..., +) + +dataset = store.create_saved_dataset( + from_=historical_job, + name='my_training_dataset', + storage=SavedDatasetBigQueryStorage(table_ref='..my_training_dataset'), + tags={'author': 'oleksii'} +) + +dataset.to_df() +``` + +Saved dataset can be later retrieved using `get_saved_dataset` method: +```python +dataset = store.get_saved_dataset('my_training_dataset') +dataset.to_df() +``` + +--- + +Check out our [tutorial on validating historical features](../../tutorials/validating-historical-features.md) to see how this concept can be applied in real-world use case. \ No newline at end of file diff --git a/docs/getting-started/connect-to-feast/README.md b/docs/getting-started/connect-to-feast/README.md deleted file mode 100644 index 4333359f902..00000000000 --- a/docs/getting-started/connect-to-feast/README.md +++ /dev/null @@ -1,31 +0,0 @@ -# Connect to Feast - -### Feast Python SDK - -The Feast Python SDK is used as a library to interact with a Feast deployment. - -* Define, register, and manage entities and features -* Ingest data into Feast -* Build and retrieve training datasets -* Retrieve online features - -{% page-ref page="python-sdk.md" %} - -### Feast CLI - -The Feast CLI is a command line implementation of the Feast Python SDK. - -* Define, register, and manage entities and features from the terminal -* Ingest data into Feast -* Manage ingestion jobs - -{% page-ref page="feast-cli.md" %} - -### Online Serving Clients - -The following clients can be used to retrieve online feature values: - -* [Feast Python SDK](https://api.docs.feast.dev/python/) -* [Feast Go SDK](https://godoc.org/github.com/feast-dev/feast/sdk/go) -* [Feast Java SDK](https://javadoc.io/doc/dev.feast/feast-sdk) - diff --git a/docs/getting-started/connect-to-feast/feast-cli.md b/docs/getting-started/connect-to-feast/feast-cli.md deleted file mode 100644 index d15414f3604..00000000000 --- a/docs/getting-started/connect-to-feast/feast-cli.md +++ /dev/null @@ -1,37 +0,0 @@ -# Feast CLI - -Install the Feast CLI using pip: - -```bash -pip install feast -``` - -Configure the CLI to connect to your Feast Core deployment: - -```text -feast config set core_url your.feast.deployment -``` - -{% hint style="info" %} -By default, all configuration is stored in `~/.feast/config` -{% endhint %} - -The CLI is a wrapper around the [Feast Python SDK](python-sdk.md): - -```aspnet -$ feast - -Usage: feast [OPTIONS] COMMAND [ARGS]... - -Options: - --help Show this message and exit. - -Commands: - config View and edit Feast properties - entities Create and manage entities - feature-tables Create and manage feature tables - jobs Create and manage jobs - projects Create and manage projects - version Displays version and connectivity information -``` - diff --git a/docs/getting-started/connect-to-feast/python-sdk.md b/docs/getting-started/connect-to-feast/python-sdk.md deleted file mode 100644 index bf31bd38491..00000000000 --- a/docs/getting-started/connect-to-feast/python-sdk.md +++ /dev/null @@ -1,20 +0,0 @@ -# Python SDK - -Install the [Feast Python SDK](https://api.docs.feast.dev/python/) using pip: - -```bash -pip install feast -``` - -Connect to an existing Feast Core deployment: - -```python -from feast import Client - -# Connect to an existing Feast Core deployment -client = Client(core_url='feast.example.com:6565') - -# Ensure that your client is connected by printing out some feature tables -client.list_feature_tables() -``` - diff --git a/docs/getting-started/install-feast/README.md b/docs/getting-started/install-feast/README.md deleted file mode 100644 index 6c1dd80134c..00000000000 --- a/docs/getting-started/install-feast/README.md +++ /dev/null @@ -1,33 +0,0 @@ -# Install Feast - -{% hint style="success" %} -_Would you prefer a lighter-weight, pip-install, no-Kubernetes deployment of Feast?_ The Feast maintainers are currently building a new deployment experience for Feast. If you have thoughts on Feast's deployment, [chat with the maintainers](https://calendly.com/d/gc29-y88c/feast-chat-w-willem-and-jay) to learn more and provide feedback. -{% endhint %} - -A production deployment of Feast is deployed using Kubernetes. - -## Kubernetes \(with Helm\) - -This guide installs Feast into an existing Kubernetes cluster using Helm. The installation is not specific to any cloud platform or environment, but requires Kubernetes and Helm. - -## Amazon EKS \(with Terraform\) - -This guide installs Feast into an AWS environment using Terraform. The Terraform script is opinionated and intended to allow you to start quickly. - -## Azure AKS \(with Helm\) - -This guide installs Feast into an Azure AKS environment with Helm. - -## Azure AKS \(with Terraform\) - -This guide installs Feast into an Azure environment using Terraform. The Terraform script is opinionated and intended to allow you to start quickly. - -## Google Cloud GKE \(with Terraform\) - -This guide installs Feast into a Google Cloud environment using Terraform. The Terraform script is opinionated and intended to allow you to start quickly. - -## IBM Cloud Kubernetes Service \(IKS\) and Red Hat OpenShift \(using Kustomize\) - -This guide installs Feast into an existing [IBM Cloud Kubernetes Service](https://www.ibm.com/cloud/kubernetes-service) or [Red Hat OpenShift on IBM Cloud](https://www.ibm.com/cloud/openshift) using Kustomize. - -{% page-ref page="ibm-cloud-iks-with-kustomize.md" %} diff --git a/docs/getting-started/install-feast/google-cloud-gke-with-terraform.md b/docs/getting-started/install-feast/google-cloud-gke-with-terraform.md deleted file mode 100644 index a3252cf0bbb..00000000000 --- a/docs/getting-started/install-feast/google-cloud-gke-with-terraform.md +++ /dev/null @@ -1,52 +0,0 @@ -# Google Cloud GKE \(with Terraform\) - -### Overview - -This guide installs Feast on GKE using our [reference Terraform configuration](https://github.com/feast-dev/feast/tree/master/infra/terraform/gcp). - -{% hint style="info" %} -The Terraform configuration used here is a greenfield installation that neither assumes anything about, nor integrates with, existing resources in your GCP account. The Terraform configuration presents an easy way to get started, but you may want to customize this set up before using Feast in production. -{% endhint %} - -This Terraform configuration creates the following resources: - -* GKE cluster -* Feast services running on GKE -* Google Memorystore \(Redis\) as online store -* Dataproc cluster -* Kafka running on GKE, exposed to the dataproc cluster via internal load balancer - -### 1. Requirements - -* Install [Terraform](https://www.terraform.io/) > = 0.12 \(tested with 0.13.3\) -* Install [Helm](https://helm.sh/docs/intro/install/) \(tested with v3.3.4\) -* GCP [authentication](https://cloud.google.com/docs/authentication) and sufficient [privilege](https://cloud.google.com/iam/docs/understanding-roles) to create the resources listed above. - -### 2. Configure Terraform - -Create a `.tfvars` file under`feast/infra/terraform/gcp`. Name the file. In our example, we use `my_feast.tfvars`. You can see the full list of configuration variables in `variables.tf`. Sample configurations are provided below: - -{% code title="my\_feast.tfvars" %} -```typescript -gcp_project_name = "kf-feast" -name_prefix = "feast-0-8" -region = "asia-east1" -gke_machine_type = "n1-standard-2" -network = "default" -subnetwork = "default" -dataproc_staging_bucket = "feast-dataproc" -``` -{% endcode %} - -### 3. Apply - -After completing the configuration, initialize Terraform and apply: - -```bash -$ cd feast/infra/terraform/gcp -$ terraform init -$ terraform apply -var-file=my_feast.tfvars -``` - - - diff --git a/docs/getting-started/install-feast/ibm-cloud-iks-with-kustomize.md b/docs/getting-started/install-feast/ibm-cloud-iks-with-kustomize.md deleted file mode 100644 index 817d4dbe14d..00000000000 --- a/docs/getting-started/install-feast/ibm-cloud-iks-with-kustomize.md +++ /dev/null @@ -1,185 +0,0 @@ -# IBM Cloud Kubernetes Service and Red Hat OpenShift \(with Kustomize\) - -## Overview - -This guide installs Feast on an existing IBM Cloud Kubernetes cluster or Red Hat OpenShift on IBM Cloud , and ensures the following services are running: - -* Feast Core -* Feast Online Serving -* Postgres -* Redis -* Kafka \(Optional\) -* Feast Jupyter \(Optional\) -* Prometheus \(Optional\) - -## 1. Prerequisites - -1. [IBM Cloud Kubernetes Service](https://www.ibm.com/cloud/kubernetes-service) or [Red Hat OpenShift on IBM Cloud](https://www.ibm.com/cloud/openshift) -2. Install [Kubectl](https://cloud.ibm.com/docs/containers?topic=containers-cs_cli_install#kubectl) that matches the major.minor versions of your IKS or Install the [OpenShift CLI](https://cloud.ibm.com/docs/openshift?topic=openshift-openshift-cli#cli_oc) that matches your local operating system and OpenShift cluster version. -3. Install [Helm 3](https://helm.sh/) -4. Install [Kustomize](https://kubectl.docs.kubernetes.io/installation/kustomize/) - -## 2. Preparation -### IBM Cloud Block Storage Setup (IKS only) - -:warning: If you have Red Hat OpenShift Cluster on IBM Cloud skip to this [section](#Security-Context-Constraint-Setup). - -By default, IBM Cloud Kubernetes cluster uses [IBM Cloud File Storage](https://www.ibm.com/cloud/file-storage) based on NFS as the default storage class, and non-root users do not have write permission on the volume mount path for NFS-backed storage. Some common container images in Feast, such as Redis, Postgres, and Kafka specify a non-root user to access the mount path in the images. When containers are deployed using these images, the containers fail to start due to insufficient permissions of the non-root user creating folders on the mount path. - -[IBM Cloud Block Storage](https://www.ibm.com/cloud/block-storage) allows for the creation of raw storage volumes and provides faster performance without the permission restriction of NFS-backed storage - -Therefore, to deploy Feast we need to set up [IBM Cloud Block Storage](https://cloud.ibm.com/docs/containers?topic=containers-block_storage#install_block) as the default storage class so that you can have all the functionalities working and get the best experience from Feast. - -1. [Follow the instructions](https://helm.sh/docs/intro/install/) to install the Helm version 3 client on your local machine. -2. Add the IBM Cloud Helm chart repository to the cluster where you want to use the IBM Cloud Block Storage plug-in. - - ```text - helm repo add iks-charts https://icr.io/helm/iks-charts - helm repo update - ``` - -3. Install the IBM Cloud Block Storage plug-in. When you install the plug-in, pre-defined block storage classes are added to your cluster. - - ```text - helm install v2.0.2 iks-charts/ibmcloud-block-storage-plugin -n kube-system - ``` - - Example output: - - ```text - NAME: v2.0.2 - LAST DEPLOYED: Fri Feb 5 12:29:50 2021 - NAMESPACE: kube-system - STATUS: deployed - REVISION: 1 - NOTES: - Thank you for installing: ibmcloud-block-storage-plugin. Your release is named: v2.0.2 - ... - ``` - -4. Verify that all block storage plugin pods are in a "Running" state. - - ```text - kubectl get pods -n kube-system | grep ibmcloud-block-storage - ``` - -5. Verify that the storage classes for Block Storage were added to your cluster. - - ```text - kubectl get storageclasses | grep ibmc-block - ``` - -6. Set the Block Storage as the default storageclass. - - ```text - kubectl patch storageclass ibmc-block-gold -p '{"metadata": {"annotations":{"storageclass.kubernetes.io/is-default-class":"true"}}}' - kubectl patch storageclass ibmc-file-gold -p '{"metadata": {"annotations":{"storageclass.kubernetes.io/is-default-class":"false"}}}' - - # Check the default storageclass is block storage - kubectl get storageclass | grep \(default\) - ``` - - Example output: - - ```text - ibmc-block-gold (default) ibm.io/ibmc-block 65s - ``` -### Security Context Constraint Setup - -By default, in OpenShift, all pods or containers will use the [Restricted SCC](https://docs.openshift.com/container-platform/4.6/authentication/managing-security-context-constraints.html) which limits the UIDs pods can run with, causing the Feast installation to fail. To overcome this, you can allow Feast pods to run with any UID by executing the following: - -```text -oc adm policy add-scc-to-user anyuid -z default,kf-feast-kafka -n feast -``` -## 3. Installation - -Install Feast using kustomize. The pods may take a few minutes to initialize. - -```bash -git clone https://github.com/kubeflow/manifests -cd manifests/contrib/feast/ -kustomize build feast/base | kubectl apply -n feast -f - -``` -### Optional: Enable Feast Jupyter and Kafka - -You may optionally enable the Feast Jupyter component which contains code examples to demonstrate Feast. Some examples require Kafka to stream real time features to the Feast online serving. To enable, edit the following properties in the `values.yaml` under the `manifests/contrib/feast` folder: -``` -kafka.enabled: true -feast-jupyter.enabled: true -``` - -Then regenerate the resource manifests and deploy: -``` -make feast/base -kustomize build feast/base | kubectl apply -n feast -f - -``` - -## 4. Use Feast Jupyter to connect to Feast - -After all the pods are in a `RUNNING` state, port-forward to the Jupyter Notebook Server in the cluster: - -```bash -kubectl port-forward \ -$(kubectl get pod -l app=feast-jupyter -o custom-columns=:metadata.name) 8888:8888 -n feast -``` - -```text -Forwarding from 127.0.0.1:8888 -> 8888 -Forwarding from [::1]:8888 -> 8888 -``` - -You can now connect to the bundled Jupyter Notebook Server at `localhost:8888` and follow the example Jupyter notebook. - -{% embed url="http://localhost:8888/tree?" caption="" %} - -## 5. Uninstall Feast -```text -kustomize build feast/base | kubectl delete -n feast -f - -``` -## 6. Troubleshooting - -When running the minimal\_ride\_hailing\_example Jupyter Notebook example the following errors may occur: - -1. When running `job = client.get_historical_features(...)`: - - ```text - KeyError: 'historical_feature_output_location' - ``` - - or - - ```text - KeyError: 'spark_staging_location' - ``` - - Add the following environment variable: - - ```text - os.environ["FEAST_HISTORICAL_FEATURE_OUTPUT_LOCATION"] = "file:///home/jovyan/historical_feature_output" - os.environ["FEAST_SPARK_STAGING_LOCATION"] = "file:///home/jovyan/test_data" - ``` - -2. When running `job.get_status()` - - ```text - - ``` - - Add the following environment variable: - - ```text - os.environ["FEAST_REDIS_HOST"] = "feast-release-redis-master" - ``` - -3. When running `job = client.start_stream_to_online_ingestion(...)` - - ```text - org.apache.kafka.vendor.common.KafkaException: Failed to construct kafka consumer - ``` - - Add the following environment variable: - - ```text - os.environ["DEMO_KAFKA_BROKERS"] = "feast-release-kafka:9092" - ``` - diff --git a/docs/getting-started/install-feast/kubernetes-amazon-eks-with-terraform.md b/docs/getting-started/install-feast/kubernetes-amazon-eks-with-terraform.md deleted file mode 100644 index 99ff4a8e81b..00000000000 --- a/docs/getting-started/install-feast/kubernetes-amazon-eks-with-terraform.md +++ /dev/null @@ -1,68 +0,0 @@ -# Amazon EKS \(with Terraform\) - -### Overview - -This guide installs Feast on AWS using our [reference Terraform configuration](https://github.com/feast-dev/feast/tree/master/infra/terraform/aws). - -{% hint style="info" %} -The Terraform configuration used here is a greenfield installation that neither assumes anything about, nor integrates with, existing resources in your AWS account. The Terraform configuration presents an easy way to get started, but you may want to customize this set up before using Feast in production. -{% endhint %} - -This Terraform configuration creates the following resources: - -* Kubernetes cluster on Amazon EKS \(3x r3.large nodes\) -* Kafka managed by Amazon MSK \(2x kafka.t3.small nodes\) -* Postgres database for Feast metadata, using serverless Aurora \(min capacity: 2\) -* Redis cluster, using Amazon Elasticache \(1x cache.t2.micro\) -* Amazon EMR cluster to run Spark \(3x spot m4.xlarge\) -* Staging S3 bucket to store temporary data - -![](../../.gitbook/assets/feast-on-aws-3-%20%282%29%20%282%29%20%282%29%20%282%29%20%282%29%20%282%29%20%282%29%20%281%29.png) - -### 1. Requirements - -* Create an AWS account and [configure credentials locally](https://docs.aws.amazon.com/cli/latest/userguide/cli-chap-configure.html) -* Install [Terraform](https://www.terraform.io/) > = 0.12 \(tested with 0.13.3\) -* Install [Helm](https://helm.sh/docs/intro/install/) \(tested with v3.3.4\) - -### 2. Configure Terraform - -Create a `.tfvars` file under`feast/infra/terraform/aws`. Name the file. In our example, we use `my_feast.tfvars`. You can see the full list of configuration variables in `variables.tf`. At a minimum, you need to set `name_prefix` and an AWS region: - -{% code title="my\_feast.tfvars" %} -```typescript -name_prefix = "my-feast" -region = "us-east-1" -``` -{% endcode %} - -### 3. Apply - -After completing the configuration, initialize Terraform and apply: - -```bash -$ cd feast/infra/terraform/aws -$ terraform init -$ terraform apply -var-file=my_feast.tfvars -``` - -Starting may take a minute. A kubectl configuration file is also created in this directory, and the file's name will start with `kubeconfig_` and end with a random suffix. - -### 4. Connect to Feast using Jupyter - -After all pods are running, connect to the Jupyter Notebook Server running in the cluster. - -To connect to the remote Feast server you just created, forward a port from the remote k8s cluster to your local machine. Replace `kubeconfig_XXXXXXX` below with the kubeconfig file name Terraform generates for you. - -```bash -KUBECONFIG=kubeconfig_XXXXXXX kubectl port-forward \ -$(kubectl get pod -o custom-columns=:metadata.name | grep jupyter) 8888:8888 -``` - -```text -Forwarding from 127.0.0.1:8888 -> 8888 -Forwarding from [::1]:8888 -> 8888 -``` - -You can now connect to the bundled Jupyter Notebook Server at `localhost:8888` and follow the example Jupyter notebook. - diff --git a/docs/getting-started/install-feast/kubernetes-azure-aks-with-helm.md b/docs/getting-started/install-feast/kubernetes-azure-aks-with-helm.md deleted file mode 100644 index 66ba73ef23e..00000000000 --- a/docs/getting-started/install-feast/kubernetes-azure-aks-with-helm.md +++ /dev/null @@ -1,139 +0,0 @@ -# Azure AKS \(with Helm\) - -## Overview - -This guide installs Feast on Azure Kubernetes cluster \(known as AKS\), and ensures the following services are running: - -* Feast Core -* Feast Online Serving -* Postgres -* Redis -* Spark -* Kafka -* Feast Jupyter \(Optional\) -* Prometheus \(Optional\) - -## 1. Requirements - -1. Install and configure [Azure CLI](https://docs.microsoft.com/en-us/cli/azure/install-azure-cli) -2. Install and configure [Kubectl](https://kubernetes.io/docs/tasks/tools/install-kubectl/) -3. Install [Helm 3](https://helm.sh/) - -## 2. Preparation - -Create an AKS cluster with Azure CLI. The detailed steps can be found [here](https://docs.microsoft.com/en-us/azure/aks/kubernetes-walkthrough), and a high-level walk through includes: - -```bash -az group create --name myResourceGroup --location eastus -az acr create --resource-group myResourceGroup --name feast-AKS-ACR --sku Basic -az aks create -g myResourceGroup -n feast-AKS --location eastus --attach-acr feast-AKS-ACR --generate-ssh-keys - -az aks install-cli -az aks get-credentials --resource-group myResourceGroup --name feast-AKS -``` - -Add the Feast Helm repository and download the latest charts: - -```bash -helm version # make sure you have the latest Helm installed -helm repo add feast-charts https://feast-helm-charts.storage.googleapis.com -helm repo update -``` - -Feast includes a Helm chart that installs all necessary components to run Feast Core, Feast Online Serving, and an example Jupyter notebook. - -Feast Core requires Postgres to run, which requires a secret to be set on Kubernetes: - -```bash -kubectl create secret generic feast-postgresql --from-literal=postgresql-password=password -``` - -## 3. Feast installation - -Install Feast using Helm. The pods may take a few minutes to initialize. - -```bash -helm install feast-release feast-charts/feast -``` - -## 4. Spark operator installation - -Follow the documentation [to install Spark operator on Kubernetes ](https://github.com/GoogleCloudPlatform/spark-on-k8s-operator), and Feast documentation to [configure Spark roles](../../reference/feast-and-spark.md) - -```bash -helm repo add spark-operator https://googlecloudplatform.github.io/spark-on-k8s-operator -helm install my-release spark-operator/spark-operator --set serviceAccounts.spark.name=spark --set image.tag=v1beta2-1.1.2-2.4.5 -``` - -and ensure the service account used by Feast has permissions to manage Spark Application resources. This depends on your k8s setup, but typically you'd need to configure a Role and a RoleBinding like the one below: - -```text -cat < -rules: -- apiGroups: ["sparkoperator.k8s.io"] - resources: ["sparkapplications"] - verbs: ["create", "delete", "deletecollection", "get", "list", "update", "watch", "patch"] ---- -apiVersion: rbac.authorization.k8s.io/v1beta1 -kind: RoleBinding -metadata: - name: use-spark-operator - namespace: -roleRef: - kind: Role - name: use-spark-operator - apiGroup: rbac.authorization.k8s.io -subjects: - - kind: ServiceAccount - name: default -EOF -``` - -## 5. Use Jupyter to connect to Feast - -After all the pods are in a `RUNNING` state, port-forward to the Jupyter Notebook Server in the cluster: - -```bash -kubectl port-forward \ -$(kubectl get pod -o custom-columns=:metadata.name | grep jupyter) 8888:8888 -``` - -```text -Forwarding from 127.0.0.1:8888 -> 8888 -Forwarding from [::1]:8888 -> 8888 -``` - -You can now connect to the bundled Jupyter Notebook Server at `localhost:8888` and follow the example Jupyter notebook. - -{% embed url="http://localhost:8888/tree?" caption="" %} - -## 6. Environment variables - -If you are running the [Minimal Ride Hailing Example](https://github.com/feast-dev/feast/blob/master/examples/minimal/minimal_ride_hailing.ipynb), you may want to make sure the following environment variables are correctly set: - -```text -demo_data_location = "wasbs://@.blob.core.windows.net/" -os.environ["FEAST_AZURE_BLOB_ACCOUNT_NAME"] = "" -os.environ["FEAST_AZURE_BLOB_ACCOUNT_ACCESS_KEY"] = -os.environ["FEAST_HISTORICAL_FEATURE_OUTPUT_LOCATION"] = "wasbs://@.blob.core.windows.net/out/" -os.environ["FEAST_SPARK_STAGING_LOCATION"] = "wasbs://@.blob.core.windows.net/artifacts/" -os.environ["FEAST_SPARK_LAUNCHER"] = "k8s" -os.environ["FEAST_SPARK_K8S_NAMESPACE"] = "default" -os.environ["FEAST_HISTORICAL_FEATURE_OUTPUT_FORMAT"] = "parquet" -os.environ["FEAST_REDIS_HOST"] = "feast-release-redis-master.default.svc.cluster.local" -os.environ["DEMO_KAFKA_BROKERS"] = "feast-release-kafka.default.svc.cluster.local:9092" -``` - -## 7. Further Reading - -* [Feast Concepts](../../concepts/overview.md) -* [Feast Examples/Tutorials](https://github.com/feast-dev/feast/tree/master/examples) -* [Feast Helm Chart Documentation](https://github.com/feast-dev/feast/blob/master/infra/charts/feast/README.md) -* [Configuring Feast components](../../reference/configuration-reference.md) -* [Feast and Spark](../../reference/feast-and-spark.md) - diff --git a/docs/getting-started/install-feast/kubernetes-azure-aks-with-terraform.md b/docs/getting-started/install-feast/kubernetes-azure-aks-with-terraform.md deleted file mode 100644 index 71dd15908de..00000000000 --- a/docs/getting-started/install-feast/kubernetes-azure-aks-with-terraform.md +++ /dev/null @@ -1,63 +0,0 @@ -# Azure AKS \(with Terraform\) - -## Overview - -This guide installs Feast on Azure using our [reference Terraform configuration](https://github.com/feast-dev/feast/tree/master/infra/terraform/azure). - -{% hint style="info" %} -The Terraform configuration used here is a greenfield installation that neither assumes anything about, nor integrates with, existing resources in your Azure account. The Terraform configuration presents an easy way to get started, but you may want to customize this set up before using Feast in production. -{% endhint %} - -This Terraform configuration creates the following resources: - -* Kubernetes cluster on Azure AKS -* Kafka managed by HDInsight -* Postgres database for Feast metadata, running as a pod on AKS -* Redis cluster, using Azure Cache for Redis -* [spark-on-k8s-operator](https://github.com/GoogleCloudPlatform/spark-on-k8s-operator) to run Spark -* Staging Azure blob storage container to store temporary data - -## 1. Requirements - -* Create an Azure account and [configure credentials locally](https://docs.microsoft.com/en-us/cli/azure/install-azure-cli) -* Install [Terraform](https://www.terraform.io/) \(tested with 0.13.5\) -* Install [Helm](https://helm.sh/docs/intro/install/) \(tested with v3.4.2\) - -## 2. Configure Terraform - -Create a `.tfvars` file under`feast/infra/terraform/azure`. Name the file. In our example, we use `my_feast.tfvars`. You can see the full list of configuration variables in `variables.tf`. At a minimum, you need to set `name_prefix` and `resource_group`: - -{% code title="my\_feast.tfvars" %} -```typescript -name_prefix = "feast" -resource_group = "Feast" # pre-existing resource group -``` -{% endcode %} - -## 3. Apply - -After completing the configuration, initialize Terraform and apply: - -```bash -$ cd feast/infra/terraform/azure -$ terraform init -$ terraform apply -var-file=my_feast.tfvars -``` - -## 4. Connect to Feast using Jupyter - -After all pods are running, connect to the Jupyter Notebook Server running in the cluster. - -To connect to the remote Feast server you just created, forward a port from the remote k8s cluster to your local machine. - -```bash -kubectl port-forward $(kubectl get pod -o custom-columns=:metadata.name | grep jupyter) 8888:8888 -``` - -```text -Forwarding from 127.0.0.1:8888 -> 8888 -Forwarding from [::1]:8888 -> 8888 -``` - -You can now connect to the bundled Jupyter Notebook Server at `localhost:8888` and follow the example Jupyter notebook. - diff --git a/docs/getting-started/install-feast/kubernetes-with-helm.md b/docs/getting-started/install-feast/kubernetes-with-helm.md deleted file mode 100644 index f31d666ba9d..00000000000 --- a/docs/getting-started/install-feast/kubernetes-with-helm.md +++ /dev/null @@ -1,69 +0,0 @@ -# Kubernetes \(with Helm\) - -## Overview - -This guide installs Feast on an existing Kubernetes cluster, and ensures the following services are running: - -* Feast Core -* Feast Online Serving -* Postgres -* Redis -* Feast Jupyter \(Optional\) -* Prometheus \(Optional\) - -## 1. Requirements - -1. Install and configure [Kubectl](https://kubernetes.io/docs/tasks/tools/install-kubectl/) -2. Install [Helm 3](https://helm.sh/) - -## 2. Preparation - -Add the Feast Helm repository and download the latest charts: - -```text -helm repo add feast-charts https://feast-helm-charts.storage.googleapis.com -helm repo update -``` - -Feast includes a Helm chart that installs all necessary components to run Feast Core, Feast Online Serving, and an example Jupyter notebook. - -Feast Core requires Postgres to run, which requires a secret to be set on Kubernetes: - -```bash -kubectl create secret generic feast-postgresql --from-literal=postgresql-password=password -``` - -## 3. Installation - -Install Feast using Helm. The pods may take a few minutes to initialize. - -```bash -helm install feast-release feast-charts/feast -``` - -## 4. Use Jupyter to connect to Feast - -After all the pods are in a `RUNNING` state, port-forward to the Jupyter Notebook Server in the cluster: - -```bash -kubectl port-forward \ -$(kubectl get pod -l app=feast-jupyter -o custom-columns=:metadata.name) 8888:8888 -``` - -```text -Forwarding from 127.0.0.1:8888 -> 8888 -Forwarding from [::1]:8888 -> 8888 -``` - -You can now connect to the bundled Jupyter Notebook Server at `localhost:8888` and follow the example Jupyter notebook. - -{% embed url="http://localhost:8888/tree?" caption="" %} - -## 5. Further Reading - -* [Feast Concepts](../../concepts/overview.md) -* [Feast Examples/Tutorials](https://github.com/feast-dev/feast/tree/master/examples) -* [Feast Helm Chart Documentation](https://github.com/feast-dev/feast/blob/master/infra/charts/feast/README.md) -* [Configuring Feast components](../../reference/configuration-reference.md) -* [Feast and Spark](../../reference/feast-and-spark.md) - diff --git a/docs/getting-started/learn-feast.md b/docs/getting-started/learn-feast.md deleted file mode 100644 index 10f2eb6d291..00000000000 --- a/docs/getting-started/learn-feast.md +++ /dev/null @@ -1,15 +0,0 @@ -# Learn Feast - -Explore the following resources to learn more about Feast: - -* [Concepts](../) describes all important Feast API concepts. -* [User guide](../user-guide/define-and-ingest-features.md) provides guidance on completing Feast workflows. -* [Examples](https://github.com/feast-dev/feast/tree/master/examples) contains Jupyter notebooks that you can run on your Feast deployment. -* [Advanced](../advanced/troubleshooting.md) contains information about both advanced and operational aspects of Feast. -* [Reference](../reference/api/) contains detailed API and design documents for advanced users. -* [Contributing](../contributing/contributing.md) contains resources for anyone who wants to contribute to Feast. - -{% hint style="info" %} -The best way to learn Feast is to use it. Jump over to our [Quickstart](../quickstart.md) guide to have one of our examples running in no time at all! -{% endhint %} - diff --git a/docs/getting-started/quickstart.md b/docs/getting-started/quickstart.md index f93a3aa714e..c067513d313 100644 --- a/docs/getting-started/quickstart.md +++ b/docs/getting-started/quickstart.md @@ -17,7 +17,7 @@ In this tutorial, we use feature stores to generate training data and power onli 1. **Training-serving skew and complex data joins:** Feature values often exist across multiple tables. Joining these datasets can be complicated, slow, and error-prone. * Feast joins these tables with battle-tested logic that ensures _point-in-time_ correctness so future feature values do not leak to models. - * _\*Upcoming_: Feast alerts users to offline / online skew with data quality monitoring. + * Feast alerts users to offline / online skew with data quality monitoring 2. **Online feature availability:** At inference time, models often need access to features that aren't readily available and need to be precomputed from other datasources. * Feast manages deployment to a variety of online stores (e.g. DynamoDB, Redis, Google Cloud Datastore) and ensures necessary features are consistently _available_ and _freshly computed_ at inference time. 3. **Feature reusability and model versioning:** Different teams within an organization are often unable to reuse features across projects, resulting in duplicate feature creation logic. Models have data dependencies that need to be versioned, for example when running A/B tests on model versions. @@ -28,7 +28,7 @@ In this tutorial, we use feature stores to generate training data and power onli Install the Feast SDK and CLI using pip: -* In this tutorial, we focus on a local deployment. For a more in-depth guide on how to use Feast with GCP or AWS deployments, see [Running Feast with GCP/AWS](../how-to-guides/feast-gcp-aws/) +* In this tutorial, we focus on a local deployment. For a more in-depth guide on how to use Feast with Snowflake / GCP / AWS deployments, see [Running Feast with Snowflake/GCP/AWS](../how-to-guides/feast-snowflake-gcp-aws/) {% tabs %} {% tab title="Bash" %} @@ -123,11 +123,14 @@ The key line defining the overall architecture of the feature store is the **pro Valid values for `provider` in `feature_store.yaml` are: -* local: use file source / SQLite -* gcp: use BigQuery / Google Cloud Datastore -* aws: use Redshift / DynamoDB +* local: use file source with SQLite/Redis +* gcp: use BigQuery/Snowflake with Google Cloud Datastore/Redis +* aws: use Redshift/Snowflake with DynamoDB/Redis + +Note that there are many other sources Feast works with, including Azure, Hive, Trino, and PostgreSQL via community plugins. See [Third party integrations](../getting-started/third-party-integrations.md) for all supported datasources. + +A custom setup can also be made by following [adding a custom provider](../how-to-guides/creating-a-custom-provider.md). -To use a custom provider, see [adding a custom provider](../how-to-guides/creating-a-custom-provider.md). There are also several plugins maintained by the community: [Azure](https://github.com/Azure/feast-azure), [Postgres](https://github.com/nossrannug/feast-postgres), and [Hive](https://github.com/baineng/feast-hive). Note that the choice of provider gives sensible defaults but does not enforce those choices; for example, if you choose the AWS provider, you can use [Redis](../reference/online-stores/redis.md) as an online store alongside Redshift as an offline store. ## Step 3: Register feature definitions and deploy your feature store @@ -345,5 +348,5 @@ pprint(feature_vector) * Read the [Concepts](concepts/) page to understand the Feast data model. * Read the [Architecture](architecture-and-components/) page. * Check out our [Tutorials](../tutorials/tutorials-overview.md) section for more examples on how to use Feast. -* Follow our [Running Feast with GCP/AWS](../how-to-guides/feast-gcp-aws/) guide for a more in-depth tutorial on using Feast. +* Follow our [Running Feast with Snowflake/GCP/AWS](../how-to-guides/feast-snowflake-gcp-aws/) guide for a more in-depth tutorial on using Feast. * Join other Feast users and contributors in [Slack](https://slack.feast.dev) and become part of the community! diff --git a/docs/getting-started/third-party-integrations.md b/docs/getting-started/third-party-integrations.md index 31b6acdc880..ba1b360fc05 100644 --- a/docs/getting-started/third-party-integrations.md +++ b/docs/getting-started/third-party-integrations.md @@ -13,27 +13,29 @@ Don't see your offline store or online store of choice here? Check out our guide ### **Data Sources** +* [x] [Snowflake source](https://docs.feast.dev/reference/data-sources/snowflake) * [x] [Redshift source](https://docs.feast.dev/reference/data-sources/redshift) * [x] [BigQuery source](https://docs.feast.dev/reference/data-sources/bigquery) * [x] [Parquet file source](https://docs.feast.dev/reference/data-sources/file) * [x] [Synapse source (community plugin)](https://github.com/Azure/feast-azure) * [x] [Hive (community plugin)](https://github.com/baineng/feast-hive) * [x] [Postgres (community plugin)](https://github.com/nossrannug/feast-postgres) +* [x] [Spark (community plugin)](https://github.com/Adyen/feast-spark-offline-store) * [x] Kafka source (with [push support into the online store](https://docs.feast.dev/reference/alpha-stream-ingestion)) -* [x] [Snowflake source (community plugin)](https://github.com/sfc-gh-madkins/feast-snowflake) * [ ] HTTP source ### Offline Stores +* [x] [Snowflake](https://docs.feast.dev/reference/offline-stores/snowflake) * [x] [Redshift](https://docs.feast.dev/reference/offline-stores/redshift) * [x] [BigQuery](https://docs.feast.dev/reference/offline-stores/bigquery) * [x] [Synapse (community plugin)](https://github.com/Azure/feast-azure) * [x] [Hive (community plugin)](https://github.com/baineng/feast-hive) * [x] [Postgres (community plugin)](https://github.com/nossrannug/feast-postgres) +* [x] [Trino (communiuty plugin)](https://github.com/Shopify/feast-trino) +* [x] [Spark (community plugin)](https://github.com/Adyen/feast-spark-offline-store) * [x] [In-memory / Pandas](https://docs.feast.dev/reference/offline-stores/file) * [x] [Custom offline store support](https://docs.feast.dev/how-to-guides/adding-a-new-offline-store) -* [x] [Snowflake source (community plugin)](https://github.com/sfc-gh-madkins/feast-snowflake) -* [x] [Trino (communiuty plugin)](https://github.com/Shopify/feast-trino) ### Online Stores @@ -59,7 +61,7 @@ Don't see your offline store or online store of choice here? Check out our guide In order for a plugin integration to be highlighted on this page, it must meet the following requirements: -1. The plugin must have tests. Ideally it would use the Feast universal tests (see this [guide](broken-reference) for an example), but custom tests are fine. +1. The plugin must have tests. Ideally it would use the Feast universal tests (see this [guide](../how-to-guides/adding-or-reusing-tests.md) for an example), but custom tests are fine. 2. The plugin must have some basic documentation on how it should be used. 3. The author must work with a maintainer to pass a basic code review (e.g. to ensure that the implementation roughly matches the core Feast implementations). diff --git a/docs/how-to-guides/feast-gcp-aws/README.md b/docs/how-to-guides/feast-snowflake-gcp-aws/README.md similarity index 88% rename from docs/how-to-guides/feast-gcp-aws/README.md rename to docs/how-to-guides/feast-snowflake-gcp-aws/README.md index d120eab3144..753650080b0 100644 --- a/docs/how-to-guides/feast-gcp-aws/README.md +++ b/docs/how-to-guides/feast-snowflake-gcp-aws/README.md @@ -1,4 +1,4 @@ -# Running Feast with GCP/AWS +# Running Feast with Snowflake/GCP/AWS {% page-ref page="install-feast.md" %} diff --git a/docs/how-to-guides/feast-gcp-aws/build-a-training-dataset.md b/docs/how-to-guides/feast-snowflake-gcp-aws/build-a-training-dataset.md similarity index 100% rename from docs/how-to-guides/feast-gcp-aws/build-a-training-dataset.md rename to docs/how-to-guides/feast-snowflake-gcp-aws/build-a-training-dataset.md diff --git a/docs/how-to-guides/feast-gcp-aws/create-a-feature-repository.md b/docs/how-to-guides/feast-snowflake-gcp-aws/create-a-feature-repository.md similarity index 84% rename from docs/how-to-guides/feast-gcp-aws/create-a-feature-repository.md rename to docs/how-to-guides/feast-snowflake-gcp-aws/create-a-feature-repository.md index 1add0a92e86..8754bc051a1 100644 --- a/docs/how-to-guides/feast-gcp-aws/create-a-feature-repository.md +++ b/docs/how-to-guides/feast-snowflake-gcp-aws/create-a-feature-repository.md @@ -13,6 +13,21 @@ Creating a new Feast repository in /<...>/tiny_pika. ``` {% endtab %} +{% tabs %} +{% tab title="Snowflake template" %} +```bash +feast init -t snowflake +Snowflake Deployment URL: ... +Snowflake User Name: ... +Snowflake Password: ... +Snowflake Role Name: ... +Snowflake Warehouse Name: ... +Snowflake Database Name: ... + +Creating a new Feast repository in /<...>/tiny_pika. +``` +{% endtab %} + {% tab title="GCP template" %} ```text feast init -t gcp @@ -30,7 +45,7 @@ Redshift Database Name: ... Redshift User Name: ... Redshift S3 Staging Location (s3://*): ... Redshift IAM Role for S3 (arn:aws:iam::*:role/*): ... -Should I upload example data to Redshift (overwriting 'feast_driver_hourly_stats' table)? (Y/n): +Should I upload example data to Redshift (overwriting 'feast_driver_hourly_stats' table)? (Y/n): Creating a new Feast repository in /<...>/tiny_pika. ``` @@ -63,4 +78,3 @@ You can now use this feature repository for development. You can try the followi * Run `feast apply` to apply these definitions to Feast. * Edit the example feature definitions in `example.py` and run `feast apply` again to change feature definitions. * Initialize a git repository in the same directory and checking the feature repository into version control. - diff --git a/docs/how-to-guides/feast-gcp-aws/deploy-a-feature-store.md b/docs/how-to-guides/feast-snowflake-gcp-aws/deploy-a-feature-store.md similarity index 100% rename from docs/how-to-guides/feast-gcp-aws/deploy-a-feature-store.md rename to docs/how-to-guides/feast-snowflake-gcp-aws/deploy-a-feature-store.md diff --git a/docs/how-to-guides/feast-gcp-aws/install-feast.md b/docs/how-to-guides/feast-snowflake-gcp-aws/install-feast.md similarity index 80% rename from docs/how-to-guides/feast-gcp-aws/install-feast.md rename to docs/how-to-guides/feast-snowflake-gcp-aws/install-feast.md index 019231be095..26d95c6117a 100644 --- a/docs/how-to-guides/feast-gcp-aws/install-feast.md +++ b/docs/how-to-guides/feast-snowflake-gcp-aws/install-feast.md @@ -6,6 +6,12 @@ Install Feast using [pip](https://pip.pypa.io): pip install feast ``` +Install Feast with Snowflake dependencies (required when using Snowflake): + +``` +pip install 'feast[snowflake]' +``` + Install Feast with GCP dependencies (required when using BigQuery or Firestore): ``` diff --git a/docs/how-to-guides/feast-gcp-aws/load-data-into-the-online-store.md b/docs/how-to-guides/feast-snowflake-gcp-aws/load-data-into-the-online-store.md similarity index 100% rename from docs/how-to-guides/feast-gcp-aws/load-data-into-the-online-store.md rename to docs/how-to-guides/feast-snowflake-gcp-aws/load-data-into-the-online-store.md diff --git a/docs/how-to-guides/feast-gcp-aws/read-features-from-the-online-store.md b/docs/how-to-guides/feast-snowflake-gcp-aws/read-features-from-the-online-store.md similarity index 100% rename from docs/how-to-guides/feast-gcp-aws/read-features-from-the-online-store.md rename to docs/how-to-guides/feast-snowflake-gcp-aws/read-features-from-the-online-store.md diff --git a/docs/load-data-into-the-online-store.md b/docs/load-data-into-the-online-store.md deleted file mode 100644 index 48bfb27fc44..00000000000 --- a/docs/load-data-into-the-online-store.md +++ /dev/null @@ -1,2 +0,0 @@ -# Load data into the online store - diff --git a/docs/project/release-process.md b/docs/project/release-process.md index 8ecd55a63f4..af573c92c76 100644 --- a/docs/project/release-process.md +++ b/docs/project/release-process.md @@ -22,7 +22,6 @@ For Feast maintainers, these are the concrete steps for making a new release. 2. Add the change log by applying the change log commit created in step 2. 3. Check that versions are updated with `env TARGET_MERGE_BRANCH=master make lint-versions` 7. Create a [GitHub release](https://github.com/feast-dev/feast/releases) which includes a summary of im~~p~~ortant changes as well as any artifacts associated with the release. Make sure to include the same change log as added in [CHANGELOG.md](../../CHANGELOG.md). Use `Feast vX.Y.Z` as the title. -8. Update the[ Upgrade Guide](broken-reference) to include the action required instructions for users to upgrade to this new release. Instructions should include a migration for each breaking change made to this release. When a tag that matches a Semantic Version string is pushed, CI will automatically build and push the relevant artifacts to their repositories or package managers (docker images, Python wheels, etc). JVM artifacts are promoted from Sonatype OSSRH to Maven Central, but it sometimes takes some time for them to be available. The `sdk/go/v tag` is required to version the Go SDK go module so that users can go get a specific tagged release of the Go SDK. diff --git a/docs/read-features-from-the-online-store.md b/docs/read-features-from-the-online-store.md deleted file mode 100644 index db082897a25..00000000000 --- a/docs/read-features-from-the-online-store.md +++ /dev/null @@ -1,2 +0,0 @@ -# Read features from the online store - diff --git a/docs/reference/api.md b/docs/reference/api.md deleted file mode 100644 index 16467bb2dc7..00000000000 --- a/docs/reference/api.md +++ /dev/null @@ -1,17 +0,0 @@ -# API Reference - -Please see the following API specific reference documentation: - -* [Feast Core gRPC API](https://api.docs.feast.dev/grpc/feast.core.pb.html): This is the gRPC API used by Feast Core. Feast Core has a dual function of schema registry and job manager. This API contains RPCs for creating and managing feature sets, stores, projects, and jobs. -* [Feast Serving gRPC API](https://api.docs.feast.dev/grpc/feast.serving.pb.html): This is the gRPC API used by Feast Serving. It contains RPCs used for the retrieval of online feature data or historical feature data. -* [Feast gRPC Types](https://api.docs.feast.dev/grpc/feast.types.pb.html): These are the gRPC types used by both Feast Core, Feast Serving, and the Go, Java, and Python clients. -* [Go Client SDK](https://godoc.org/github.com/feast-dev/feast/sdk/go): The Go library used for the retrieval of online features from Feast. -* [Java Client SDK](https://javadoc.io/doc/dev.feast/feast-sdk): The Java library used for the retrieval of online features from Feast. -* [Python SDK](https://api.docs.feast.dev/python/): This is the complete reference to the Feast Python SDK. The SDK is used to manage feature sets, features, jobs, projects, and entities. It can also be used to retrieve training datasets or online features from Feast Serving. - -## Community Contributions - -The following community provided SDKs are available: - -* [Node.js SDK](https://github.com/MichaelHirn/feast-client/): A Node.js SDK written in TypeScript. The SDK can be used to manage feature sets, features, jobs, projects, and entities. - diff --git a/docs/reference/api/README.md b/docs/reference/api/README.md deleted file mode 100644 index cd75f5bf88f..00000000000 --- a/docs/reference/api/README.md +++ /dev/null @@ -1,17 +0,0 @@ -# API Reference - -Please see the following API specific reference documentation: - -* [Feast Core gRPC API](https://api.docs.feast.dev/grpc/feast/core/coreservice.pb.html): This is the gRPC API used by Feast Core. This API contains RPCs for creating and managing feature sets, stores, projects, and jobs. -* [Feast Serving gRPC API](https://api.docs.feast.dev/grpc/feast/serving/servingservice.pb.html): This is the gRPC API used by Feast Serving. It contains RPCs used for the retrieval of online feature data or historical feature data. -* [Feast gRPC Types](https://api.docs.feast.dev/grpc/feast/types/value.pb): These are the gRPC types used by both Feast Core, Feast Serving, and the Go, Java, and Python clients. -* [Go Client SDK](https://godoc.org/github.com/feast-dev/feast/sdk/go): The Go library used for the retrieval of online features from Feast. -* [Java Client SDK](https://javadoc.io/doc/dev.feast/feast-sdk): The Java library used for the retrieval of online features from Feast. -* [Python SDK](https://api.docs.feast.dev/python/): This is the complete reference to the Feast Python SDK. The SDK is used to manage feature sets, features, jobs, projects, and entities. It can also be used to retrieve training datasets or online features from Feast Serving. - -## Community Contributions - -The following community provided SDKs are available: - -* [Node.js SDK](https://github.com/MichaelHirn/feast-client/): A Node.js SDK written in TypeScript. The SDK can be used to manage feature sets, features, jobs, projects, and entities. - diff --git a/docs/reference/configuration-reference.md b/docs/reference/configuration-reference.md deleted file mode 100644 index 6f9a97dabfd..00000000000 --- a/docs/reference/configuration-reference.md +++ /dev/null @@ -1,132 +0,0 @@ -# Configuration Reference - -## Overview - -This reference describes how to configure Feast components: - -* [Feast Core and Feast Online Serving](configuration-reference.md#2-feast-core-serving-and-job-controller) -* [Feast CLI and Feast Python SDK](configuration-reference.md#3-feast-cli-and-feast-python-sdk) -* [Feast Go and Feast Java SDK](configuration-reference.md#4-feast-java-and-go-sdk) - -## 1. Feast Core and Feast Online Serving - -Available configuration properties for Feast Core and Feast Online Serving can be referenced from the corresponding `application.yml` of each component: - -| Component | Configuration Reference | -| :--- | :--- | -| Core | [core/src/main/resources/application.yml](https://github.com/feast-dev/feast-java/blob/master/core/src/main/resources/application.yml) | -| Serving \(Online\) | [serving/src/main/resources/application.yml](https://github.com/feast-dev/feast-java/blob/master/serving/src/main/resources/application.yml) | - -Configuration properties for Feast Core and Feast Online Serving are defined depending on Feast is deployed: - -* [Docker Compose deployment](configuration-reference.md#docker-compose-deployment) - Feast is deployed with Docker Compose. -* [Kubernetes deployment](configuration-reference.md#kubernetes-deployment) - Feast is deployed with Kubernetes. -* [Direct Configuration](configuration-reference.md#direct-configuration) - Feast is built and run from source code. - -## Docker Compose Deployment - -For each Feast component deployed using Docker Compose, configuration properties from `application.yml` can be set at: - -| Component | Configuration Path | -| :--- | :--- | -| Core | `infra/docker-compose/core/core.yml` | -| Online Serving | `infra/docker-compose/serving/online-serving.yml` | - -## Kubernetes Deployment - -The Kubernetes Feast Deployment is configured using `values.yaml` in the [Helm chart](https://github.com/feast-dev/feast-helm-charts) included with Feast: - -```yaml -# values.yaml -feast-core: - enabled: true # whether to deploy the feast-core subchart to deploy Feast Core. - # feast-core subchart specific config. - gcpServiceAccount: - enabled: true - # .... -``` - -A reference of the sub-chart-specific configuration can found in its `values.yml`: - -* [feast-core](https://github.com/feast-dev/feast-java/tree/master/infra/charts/feast-core) -* [feast-serving](https://github.com/feast-dev/feast-java/tree/master/infra/charts/feast-serving) - -Configuration properties can be set via `application-override.yaml` for each component in `values.yaml`: - -```yaml -# values.yaml -feast-core: - # .... - application-override.yaml: - # application.yml config properties for Feast Core. - # ... -``` - -Visit the [Helm chart](https://github.com/feast-dev/feast-helm-charts) included with Feast to learn more about configuration. - -## Direct Configuration - -If Feast is built and running from source, configuration properties can be set directly in the Feast component's `application.yml`: - -| Component | Configuration Path | -| :--- | :--- | -| Core | [core/src/main/resources/application.yml](https://github.com/feast-dev/feast-java/blob/master/core/src/main/resources/application.yml) | -| Serving \(Online\) | [serving/src/main/resources/application.yml](https://github.com/feast-dev/feast-java/blob/master/serving/src/main/resources/application.yml) | - -## 2. Feast CLI and Feast Python SDK - -Configuration options for both the [Feast CLI](../getting-started/connect-to-feast/feast-cli.md) and [Feast Python SDK](https://api.docs.feast.dev/python/) can be defined in the following locations, in order of precedence: - -**1. Command line arguments or initialized arguments:** Passing parameters to the Feast CLI or instantiating the Feast Client object with specific parameters will take precedence above other parameters. - -```bash -# Set option as command line arguments. -feast config set core_url "localhost:6565" -``` - -```python -# Pass options as initialized arguments. -client = Client( - core_url="localhost:6565", - project="default" -) -``` - -**2. Environmental variables:** Environmental variables can be set to provide configuration options. They must be prefixed with `FEAST_`. For example `FEAST_CORE_URL`. - -```bash -FEAST_CORE_URL=my_feast:6565 FEAST_PROJECT=default feast projects list -``` - -**3. Configuration file:** Options with the lowest precedence are configured in the Feast configuration file. Feast looks for or creates this configuration file in `~/.feast/config` if it does not already exist. All options must be defined in the `[general]` section of this file. - -```text -[general] -project = default -core_url = localhost:6565 -``` - -Visit the [available configuration parameters](https://api.docs.feast.dev/python/#module-feast.constants) for Feast Python SDK and Feast CLI to learn more. - -## 3. Feast Java and Go SDK - -The [Feast Java SDK](https://javadoc.io/doc/dev.feast/feast-sdk/latest/com/gojek/feast/package-summary.html) and [Feast Go SDK](https://godoc.org/github.com/feast-dev/feast/sdk/go) are configured via arguments passed when instantiating the respective Clients: - -### Go SDK - -```go -// configure serving host and port. -cli := feast.NewGrpcClient("localhost", 6566) -``` - -Visit the[ Feast Go SDK API reference](https://godoc.org/github.com/feast-dev/feast/sdk/go) to learn more about available configuration parameters. - -### Java SDK - -```java -// configure serving host and port. -client = FeastClient.create(servingHost, servingPort); -``` - -Visit the [Feast Java SDK API reference](https://javadoc.io/doc/dev.feast/feast-sdk/latest/com/gojek/feast/package-summary.html) to learn more about available configuration parameters. - diff --git a/docs/reference/data-sources/README.md b/docs/reference/data-sources/README.md index 6732fc16a08..fc6e136a9c2 100644 --- a/docs/reference/data-sources/README.md +++ b/docs/reference/data-sources/README.md @@ -4,7 +4,8 @@ Please see [Data Source](../../getting-started/concepts/feature-view.md#data-sou {% page-ref page="file.md" %} +{% page-ref page="snowflake.md" %} + {% page-ref page="bigquery.md" %} {% page-ref page="redshift.md" %} - diff --git a/docs/reference/data-sources/snowflake.md b/docs/reference/data-sources/snowflake.md new file mode 100644 index 00000000000..0f5304b6cdc --- /dev/null +++ b/docs/reference/data-sources/snowflake.md @@ -0,0 +1,44 @@ +# Snowflake + +## Description + +Snowflake data sources allow for the retrieval of historical feature values from Snowflake for building training datasets as well as materializing features into an online store. + +* Either a table reference or a SQL query can be provided. + +## Examples + +Using a table reference + +```python +from feast import SnowflakeSource + +my_snowflake_source = SnowflakeSource( + database="FEAST", + schema="PUBLIC", + table="FEATURE_TABLE", +) +``` + +Using a query + +```python +from feast import SnowflakeSource + +my_snowflake_source = SnowflakeSource( + query=""" + SELECT + timestamp_column AS "ts", + "created", + "f1", + "f2" + FROM + `FEAST.PUBLIC.FEATURE_TABLE` + """, +) +``` + +One thing to remember is how Snowflake handles table and column name conventions. +You can read more about quote identifiers [here](https://docs.snowflake.com/en/sql-reference/identifiers-syntax.html) + +Configuration options are available [here](https://rtd.feast.dev/en/latest/index.html#feast.data_source.SnowflakeSource). diff --git a/docs/reference/dqm.md b/docs/reference/dqm.md new file mode 100644 index 00000000000..5a02413e534 --- /dev/null +++ b/docs/reference/dqm.md @@ -0,0 +1,77 @@ +# Data Quality Monitoring + +Data Quality Monitoring (DQM) is a Feast module aimed to help users to validate their data with the user-curated set of rules. +Validation could be applied during: +* Historical retrieval (training dataset generation) +* [planned] Writing features into an online store +* [planned] Reading features from an online store + +Its goal is to address several complex data problems, namely: +* Data consistency - new training datasets can be significantly different from previous datasets. This might require a change in model architecture. +* Issues/bugs in the upstream pipeline - bugs in upstream pipelines can cause invalid values to overwrite existing valid values in an online store. +* Training/serving skew - distribution shift could significantly decrease the performance of the model. + +> To monitor data quality, we check that the characteristics of the tested dataset (aka the tested dataset's profile) are "equivalent" to the characteristics of the reference dataset. +> How exactly profile equivalency should be measured is up to the user. + +### Overview + +The validation process consists of the following steps: +1. User prepares reference dataset (currently only [saved datasets](../getting-started/concepts/dataset.md) from historical retrieval are supported). +2. User defines profiler function, which should produce profile by given dataset (currently only profilers based on [Great Expectations](https://docs.greatexpectations.io) are allowed). +3. Validation of tested dataset is performed with reference dataset and profiler provided as parameters. + +### Preparations +Feast with Great Expectations support can be installed via +```shell +pip install 'feast[ge]' +``` + +### Dataset profile +Currently, Feast supports only [Great Expectation's](https://greatexpectations.io/) [ExpectationSuite](https://legacy.docs.greatexpectations.io/en/latest/autoapi/great_expectations/core/expectation_suite/index.html#great_expectations.core.expectation_suite.ExpectationSuite) +as dataset's profile. Hence, the user needs to define a function (profiler) that would receive a dataset and return an [ExpectationSuite](https://legacy.docs.greatexpectations.io/en/latest/autoapi/great_expectations/core/expectation_suite/index.html#great_expectations.core.expectation_suite.ExpectationSuite). + +Great Expectations supports automatic profiling as well as manually specifying expectations: +```python +from great_expectations.dataset import Dataset +from great_expectations.core.expectation_suite import ExpectationSuite + +from feast.dqm.profilers.ge_profiler import ge_profiler + +@ge_profiler +def automatic_profiler(dataset: Dataset) -> ExpectationSuite: + from great_expectations.profile.user_configurable_profiler import UserConfigurableProfiler + + return UserConfigurableProfiler( + profile_dataset=dataset, + ignored_columns=['conv_rate'], + value_set_threshold='few' + ).build_suite() +``` +However, from our experience capabilities of automatic profiler are quite limited. So we would recommend crafting your own expectations: +```python +@ge_profiler +def manual_profiler(dataset: Dataset) -> ExpectationSuite: + dataset.expect_column_max_to_be_between("column", 1, 2) + return dataset.get_expectation_suite() +``` + + + +### Validating Training Dataset +During retrieval of historical features, `validation_reference` can be passed as a parameter to methods `.to_df(validation_reference=...)` or `.to_arrow(validation_reference=...)` of RetrievalJob. +If parameter is provided Feast will run validation once dataset is materialized. In case if validation successful materialized dataset is returned. +Otherwise, `feast.dqm.errors.ValidationFailed` exception would be raised. It will consist of all details for expectations that didn't pass. + +```python +from feast import FeatureStore + +fs = FeatureStore(".") + +job = fs.get_historical_features(...) +job.to_df( + validation_reference=fs + .get_saved_dataset("my_reference_dataset") + .as_reference(profiler=manual_profiler) +) +``` diff --git a/docs/reference/feast-and-spark.md b/docs/reference/feast-and-spark.md deleted file mode 100644 index be05f177aeb..00000000000 --- a/docs/reference/feast-and-spark.md +++ /dev/null @@ -1,83 +0,0 @@ ---- -description: Configuring Feast to use Spark for ingestion. ---- - -# Feast and Spark - -Feast relies on Spark to ingest data from the offline store to the online store, streaming ingestion, and running queries to retrieve historical data from the offline store. Feast supports several Spark deployment options. - -## Option 1. Use Kubernetes Operator for Apache Spark - -To install the Spark on K8s Operator - -```bash -helm repo add spark-operator \ - https://googlecloudplatform.github.io/spark-on-k8s-operator - -helm install my-release spark-operator/spark-operator \ - --set serviceAccounts.spark.name=spark -``` - -Currently Feast is tested using `v1beta2-1.1.2-2.4.5`version of the operator image. To configure Feast to use it, set the following options in Feast config: - -| Feast Setting | Value | -| :--- | :--- | -| `SPARK_LAUNCHER` | `"k8s"` | -| `SPARK_STAGING_LOCATION` | S3/GCS/Azure Blob Storage URL to use as a staging location, must be readable and writable by Feast. For S3, use `s3a://` prefix here. Ex.: `s3a://some-bucket/some-prefix/artifacts/` | -| `HISTORICAL_FEATURE_OUTPUT_LOCATION` | S3/GCS/Azure Blob Storage URL used to store results of historical retrieval queries, must be readable and writable by Feast. For S3, use `s3a://` prefix here. Ex.: `s3a://some-bucket/some-prefix/out/` | -| `SPARK_K8S_NAMESPACE` | Only needs to be set if you are customizing the spark-on-k8s-operator. The name of the Kubernetes namespace to run Spark jobs in. This should match the value of `sparkJobNamespace` set on spark-on-k8s-operator Helm chart. Typically this is also the namespace Feast itself will run in. | -| `SPARK_K8S_JOB_TEMPLATE_PATH` | Only needs to be set if you are customizing the Spark job template. Local file path with the template of the SparkApplication resource. No prefix required. Ex.: `/home/jovyan/work/sparkapp-template.yaml`. An example template is [here](https://github.com/feast-dev/feast/blob/4059a21dc4eba9cd27b2d5b0fabe476c07a8b3bd/sdk/python/feast/pyspark/launchers/k8s/k8s_utils.py#L280-L317) and the spec is defined in the [k8s-operator User Guide](https://github.com/GoogleCloudPlatform/spark-on-k8s-operator/blob/master/docs/user-guide.md). | - -Lastly, make sure that the service account used by Feast has permissions to manage Spark Application resources. This depends on your k8s setup, but typically you'd need to configure a Role and a RoleBinding like the one below: - -```text -cat < - - - Limitation - Motivation - - - - - Features names and entity names cannot overlap in feature table definitions - Features and entities become columns in historical stores which may cause - conflicts - - - -

The following field names are reserved in feature tables

-
    -
  • event_timestamp -
  • -
  • datetime -
  • -
  • created_timestamp -
  • -
  • ingestion_id -
  • -
  • job_id -
  • -
- - These keywords are used for column names when persisting metadata in historical - stores - - - - -### Ingestion - -| Limitation | Motivation | -| :--- | :--- | -| Once data has been ingested into Feast, there is currently no way to delete the data without manually going to the database and deleting it. However, during retrieval only the latest rows will be returned for a specific key \(`event_timestamp`, `entity`\) based on its `created_timestamp`. | This functionality simply doesn't exist yet as a Feast API | - -### Storage - -| Limitation | Motivation | -| :--- | :--- | -| Feast does not support offline storage in Feast 0.8 | As part of our re-architecture of Feast, we moved from GCP to cloud-agnostic deployments. Developing offline storage support that is available in all cloud environments is a pending action. | - diff --git a/docs/reference/metrics-reference.md b/docs/reference/metrics-reference.md deleted file mode 100644 index 34c97c7be60..00000000000 --- a/docs/reference/metrics-reference.md +++ /dev/null @@ -1,178 +0,0 @@ -# Metrics Reference - -{% hint style="warning" %} -This page applies to Feast 0.7. The content may be out of date for Feast 0.8+ -{% endhint %} - -Reference of the metrics that each Feast component exports: - -* [Feast Core](metrics-reference.md#feast-core) -* [Feast Serving](metrics-reference.md#feast-serving) -* [Feast Ingestion Job](metrics-reference.md#feast-ingestion-job) - -For how to configure Feast to export Metrics, see the [Metrics user guide.](../advanced/metrics.md) - -## Feast Core - -**Exported Metrics** - -Feast Core exports the following metrics: - -| Metrics | Description | Tags | -| :--- | :--- | :--- | -| `feast_core_request_latency_seconds` | Feast Core's latency in serving Requests in Seconds. | `service`, `method`, `status_code` | -| `feast_core_feature_set_total` | No. of Feature Sets registered with Feast Core. | None | -| `feast_core_store_total` | No. of Stores registered with Feast Core. | None | -| `feast_core_max_memory_bytes` | Max amount of memory the Java virtual machine will attempt to use. | None | -| `feast_core_total_memory_bytes` | Total amount of memory in the Java virtual machine | None | -| `feast_core_free_memory_bytes` | Total amount of free memory in the Java virtual machine. | None | -| `feast_core_gc_collection_seconds` | Time spent in a given JVM garbage collector in seconds. | None | - -**Metric Tags** - -Exported Feast Core metrics may be filtered by the following tags/keys - -| Tag | Description | -| :--- | :--- | -| `service` | Name of the Service that request is made to. Should be set to `CoreService` | -| `method` | Name of the Method that the request is calling. \(ie `ListFeatureSets`\) | -| `status_code` | Status code returned as a result of handling the requests \(ie `OK`\). Can be used to find request failures. | - -## Feast Serving - -**Exported Metrics** - -Feast Serving exports the following metrics: - -| Metric | Description | Tags | -| :--- | :--- | :--- | -| `feast_serving_request_latency_seconds` | Feast Serving's latency in serving Requests in Seconds. | `method` | -| `feast_serving_request_feature_count` | No. of requests retrieving a Feature from Feast Serving. | `project`, `feature_name` | -| `feast_serving_not_found_feature_count` | No. of requests retrieving a Feature has resulted in a [`NOT_FOUND` field status.](../user-guide/getting-training-features.md#online-field-statuses) | `project`, `feature_name` | -| `feast_serving_stale_feature_count` | No. of requests retrieving a Feature resulted in a [`OUTSIDE_MAX_AGE` field status.](../user-guide/getting-training-features.md#online-field-statuses) | `project`, `feature_name` | -| `feast_serving_grpc_request_count` | Total gRPC requests served. | `method` | - -**Metric Tags** - -Exported Feast Serving metrics may be filtered by the following tags/keys - -| Tag | Description | -| :--- | :--- | -| `method` | Name of the Method that the request is calling. \(ie `ListFeatureSets`\) | -| `status_code` | Status code returned as a result of handling the requests \(ie `OK`\). Can be used to find request failures. | -| `project` | Name of the project that the FeatureSet of the Feature retrieved belongs to. | -| `feature_name` | Name of the Feature being retrieved. | - -## Feast Ingestion Job - -Feast Ingestion computes both metrics an statistics on [data ingestion.](../user-guide/define-and-ingest-features.md) Make sure you familar with data ingestion concepts before proceeding. - -**Metrics Namespace** - -Metrics are computed at two stages of the Feature Row's/Feature Value's life cycle when being processed by the Ingestion Job: - -* `Inflight`- Prior to writing data to stores, but after successful validation of data. -* `WriteToStoreSucess`- After a successful store write. - -Metrics processed by each staged will be tagged with `metrics_namespace` to the stage where the metric was computed. - -**Metrics Bucketing** - -Metrics with a `{BUCKET}` are computed on a 60 second window/bucket. Suffix with the following to select the bucket to use: - -* `min` - minimum value. -* `max` - maximum value. -* `mean`- mean value. -* `percentile_90`- 90 percentile. -* `percentile_95`- 95 percentile. -* `percentile_99`- 99 percentile. - -**Exported Metrics** - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
MetricDescriptionTags
feast_ingestion_feature_row_lag_ms_{BUCKET} - Lag time in milliseconds between succeeding ingested Feature Rows. -

feast_store, feast_project_name,feast_featureSet_name,ingestion_job_name,

-

metrics_namespace -

-
feast_ingestion_feature_value_lag_ms_{BUCKET} - Lag time in milliseconds between succeeding ingested values for each Feature. -

feast_store, feast_project_name,feast_featureSet_name,

-

feast_feature_name,

-

ingestion_job_name,

-

metrics_namespace -

-
feast_ingestion_feature_value_{BUCKET} - Last value feature for each Feature.feast_store, feature_project_name, feast_feature_name,feast_featureSet_name, ingest_job_name, metrics_namepace -
feast_ingestion_feature_row_ingested_count - No. of Ingested Feature Rows -

feast_store, feast_project_name,feast_featureSet_name,ingestion_job_name,

-

metrics_namespace -

-
feast_ingestion_feature_value_missing_count - No. of times a ingested Feature values did not provide a value for the - Feature. -

feast_store, feast_project_name,feast_featureSet_name,

-

feast_feature_name,

-

ingestion_job_name,

-

metrics_namespace -

-
feast_ingestion_deadletter_row_count - No. of Feature Rows that that the Ingestion Job did not successfully write - to store.feast_store, feast_project_name,feast_featureSet_name,ingestion_job_name -
- -**Metric Tags** - -Exported Feast Ingestion Job metrics may be filtered by the following tags/keys - -| Tag | Description | -| :--- | :--- | -| `feast_store` | Name of the target store the Ingestion Job is writing to. | -| `feast_project_name` | Name of the project that the ingested FeatureSet belongs to. | -| `feast_featureSet_name` | Name of the Feature Set being ingested. | -| `feast_feature_name` | Name of the Feature being ingested. | -| `ingestion_job_name` | Name of the Ingestion Job performing data ingestion. Typically this is set to the Id of the Ingestion Job. | -| `metrics_namespace` | Stage where metrics where computed. Either `Inflight` or `WriteToStoreSuccess` | - diff --git a/docs/reference/offline-stores/README.md b/docs/reference/offline-stores/README.md index 1260fe8b29f..141a34d03b6 100644 --- a/docs/reference/offline-stores/README.md +++ b/docs/reference/offline-stores/README.md @@ -4,7 +4,8 @@ Please see [Offline Store](../../getting-started/architecture-and-components/off {% page-ref page="file.md" %} +{% page-ref page="snowflake.md" %} + {% page-ref page="bigquery.md" %} {% page-ref page="redshift.md" %} - diff --git a/docs/reference/offline-stores/snowflake.md b/docs/reference/offline-stores/snowflake.md new file mode 100644 index 00000000000..aa006b43bb0 --- /dev/null +++ b/docs/reference/offline-stores/snowflake.md @@ -0,0 +1,34 @@ +# Snowflake + +## Description + +The Snowflake offline store provides support for reading [SnowflakeSources](../data-sources/snowflake.md). + +* Snowflake tables and views are allowed as sources. +* All joins happen within Snowflake. +* Entity dataframes can be provided as a SQL query or can be provided as a Pandas dataframe. Pandas dataframes will be uploaded to Snowflake in order to complete join operations. +* A `SnowflakeRetrievalJob` is returned when calling `get_historical_features()`. + * This allows you to call + * `to_snowflake` to save the dataset into Snowflake + * `to_sql` to get the SQL query that would execute on `to_df` + * `to_arrow_chunks` to get the result in batches ([Snowflake python connector docs](https://docs.snowflake.com/en/user-guide/python-connector-api.html#get_result_batches)) + +## Example + +{% code title="feature_store.yaml" %} +```yaml +project: my_feature_repo +registry: data/registry.db +provider: local +offline_store: + type: snowflake.offline + account: snowflake_deployment.us-east-1 + user: user_login + password: user_password + role: sysadmin + warehouse: demo_wh + database: FEAST +``` +{% endcode %} + +Configuration options are available in [SnowflakeOfflineStoreConfig](https://github.com/feast-dev/feast/blob/master/sdk/python/feast/infra/offline_stores/snowflake.py#L56). diff --git a/docs/reference/offline-stores/untitled.md b/docs/reference/offline-stores/untitled.md deleted file mode 100644 index 8ffa566a70f..00000000000 --- a/docs/reference/offline-stores/untitled.md +++ /dev/null @@ -1,26 +0,0 @@ -# BigQuery - -### Description - -The BigQuery offline store provides support for reading [BigQuerySources](../data-sources/bigquery.md). - -* BigQuery tables and views are allowed as sources. -* All joins happen within BigQuery. -* Entity dataframes can be provided as a SQL query or can be provided as a Pandas dataframe. Pandas dataframes will be uploaded to BigQuery in order to complete join operations. -* A [BigQueryRetrievalJob](https://github.com/feast-dev/feast/blob/c50a36ec1ad5b8d81c6f773c23204db7c7a7d218/sdk/python/feast/infra/offline_stores/bigquery.py#L210) is returned when calling `get_historical_features()`. - -### Example - -{% code title="feature\_store.yaml" %} -```yaml -project: my_feature_repo -registry: gs://my-bucket/data/registry.db -provider: gcp -offline_store: - type: bigquery - dataset: feast_bq_dataset -``` -{% endcode %} - -Configuration options are available [here](https://rtd.feast.dev/en/latest/#feast.repo_config.BigQueryOfflineStoreConfig). - diff --git a/docs/reference/online-stores/README.md b/docs/reference/online-stores/README.md index aadcc0eb655..2c2902bc579 100644 --- a/docs/reference/online-stores/README.md +++ b/docs/reference/online-stores/README.md @@ -9,4 +9,3 @@ Please see [Online Store](../../getting-started/architecture-and-components/onli {% page-ref page="datastore.md" %} {% page-ref page="dynamodb.md" %} - diff --git a/docs/reference/providers/README.md b/docs/reference/providers/README.md index 7eb992d5acd..dc52d927264 100644 --- a/docs/reference/providers/README.md +++ b/docs/reference/providers/README.md @@ -7,4 +7,3 @@ Please see [Provider](../../getting-started/architecture-and-components/provider {% page-ref page="google-cloud-platform.md" %} {% page-ref page="amazon-web-services.md" %} - diff --git a/docs/reference/repository-config.md b/docs/reference/repository-config.md deleted file mode 100644 index 128d7730717..00000000000 --- a/docs/reference/repository-config.md +++ /dev/null @@ -1,2 +0,0 @@ -# Repository Config - diff --git a/docs/reference/telemetry.md b/docs/reference/telemetry.md deleted file mode 100644 index f8f76787645..00000000000 --- a/docs/reference/telemetry.md +++ /dev/null @@ -1,12 +0,0 @@ -# Telemetry - -### How telemetry is used - -The Feast project logs anonymous usage statistics and errors in order to inform our planning. Several client methods are tracked, beginning in Feast 0.9. Users are assigned a UUID which is sent along with the name of the method, the Feast version, the OS \(using `sys.platform`\), and the current time. - -The [source code](https://github.com/feast-dev/feast/blob/master/sdk/python/feast/telemetry.py) is available here. - -### How to disable telemetry - -Set the environment variable `FEAST_TELEMETRY` to `False`. - diff --git a/docs/repository-config.md b/docs/repository-config.md deleted file mode 100644 index 128d7730717..00000000000 --- a/docs/repository-config.md +++ /dev/null @@ -1,2 +0,0 @@ -# Repository Config - diff --git a/docs/roadmap.md b/docs/roadmap.md index cb67d72b48c..83c43e313e9 100644 --- a/docs/roadmap.md +++ b/docs/roadmap.md @@ -6,26 +6,29 @@ The list below contains the functionality that contributors are planning to deve * We welcome contribution to all items in the roadmap! * Want to influence our roadmap and prioritization? Submit your feedback to [this form](https://docs.google.com/forms/d/e/1FAIpQLSfa1nRQ0sKz-JEFnMMCi4Jseag\_yDssO\_3nV9qMfxfrkil-wA/viewform). * Want to speak to a Feast contributor? We are more than happy to jump on a call. Please schedule a time using [Calendly](https://calendly.com/d/x2ry-g5bb/meet-with-feast-team). + * **Data Sources** + * [x] [Snowflake source](https://docs.feast.dev/reference/data-sources/snowflake) * [x] [Redshift source](https://docs.feast.dev/reference/data-sources/redshift) * [x] [BigQuery source](https://docs.feast.dev/reference/data-sources/bigquery) * [x] [Parquet file source](https://docs.feast.dev/reference/data-sources/file) * [x] [Synapse source (community plugin)](https://github.com/Azure/feast-azure) * [x] [Hive (community plugin)](https://github.com/baineng/feast-hive) * [x] [Postgres (community plugin)](https://github.com/nossrannug/feast-postgres) - * [x] Kafka source (with [push support into the online store](reference/alpha-stream-ingestion.md)) - * [x] [Snowflake source (community plugin)](https://github.com/sfc-gh-madkins/feast-snowflake) + * [x] [Spark (community plugin)](https://github.com/Adyen/feast-spark-offline-store) + * [x] Kafka source (with [push support into the online store](https://docs.feast.dev/reference/alpha-stream-ingestion)) * [ ] HTTP source * **Offline Stores** + * [x] [Snowflake](https://docs.feast.dev/reference/offline-stores/snowflake) * [x] [Redshift](https://docs.feast.dev/reference/offline-stores/redshift) * [x] [BigQuery](https://docs.feast.dev/reference/offline-stores/bigquery) * [x] [Synapse (community plugin)](https://github.com/Azure/feast-azure) * [x] [Hive (community plugin)](https://github.com/baineng/feast-hive) * [x] [Postgres (community plugin)](https://github.com/nossrannug/feast-postgres) + * [x] [Trino (communiuty plugin)](https://github.com/Shopify/feast-trino) + * [x] [Spark (community plugin)](https://github.com/Adyen/feast-spark-offline-store) * [x] [In-memory / Pandas](https://docs.feast.dev/reference/offline-stores/file) * [x] [Custom offline store support](https://docs.feast.dev/how-to-guides/adding-a-new-offline-store) - * [x] [Snowflake (community plugin)](https://github.com/sfc-gh-madkins/feast-snowflake) - * [x] [Trino (communiuty plugin)](https://github.com/Shopify/feast-trino) * **Online Stores** * [x] [DynamoDB](https://docs.feast.dev/reference/online-stores/dynamodb) * [x] [Redis](https://docs.feast.dev/reference/online-stores/redis) @@ -60,7 +63,7 @@ The list below contains the functionality that contributors are planning to deve * [ ] Delete API * [ ] Feature Logging (for training) * **Data Quality Management (See [RFC](https://docs.google.com/document/d/110F72d4NTv80p35wDSONxhhPBqWRwbZXG4f9mNEMd98/edit))** - * [ ] Data profiling and validation (Great Expectations) (Planned for Q1 2022) + * [x] Data profiling and validation (Great Expectations) * [ ] Metric production * [ ] Training-serving skew detection * [ ] Drift detection @@ -68,7 +71,7 @@ The list below contains the functionality that contributors are planning to deve * [x] Python SDK for browsing feature registry * [x] CLI for browsing feature registry * [x] Model-centric feature tracking (feature services) + * [x] Amundsen integration (see [Feast extractor](https://github.com/amundsen-io/amundsen/blob/main/databuilder/databuilder/extractor/feast_extractor.py)) * [ ] REST API for browsing feature registry * [ ] Feast Web UI * [ ] Feature versioning - * [ ] Amundsen integration diff --git a/docs/sources.md b/docs/sources.md deleted file mode 100644 index a76d395d098..00000000000 --- a/docs/sources.md +++ /dev/null @@ -1,2 +0,0 @@ -# Sources - diff --git a/docs/specs/offline_store_format.md b/docs/specs/offline_store_format.md index 6826c501900..ac829dd52f1 100644 --- a/docs/specs/offline_store_format.md +++ b/docs/specs/offline_store_format.md @@ -7,8 +7,8 @@ One of the design goals of Feast is being able to plug seamlessly into existing Feast provides first class support for the following data warehouses (DWH) to store feature data offline out of the box: * [BigQuery](https://cloud.google.com/bigquery) -* [Snowflake](https://www.snowflake.com/) (Coming Soon) -* [Redshift](https://aws.amazon.com/redshift/) (Coming Soon) +* [Snowflake](https://www.snowflake.com/) +* [Redshift](https://aws.amazon.com/redshift/) The integration between Feast and the DWH is highly configurable, but at the same time there are some non-configurable implications and assumptions that Feast imposes on table schemas and mapping between database-native types and Feast type system. This is what this document is about. @@ -28,14 +28,14 @@ Feature data is stored in tables in the DWH. There is one DWH table per Feast Fe ## Type mappings #### Pandas types -Here's how Feast types map to Pandas types for Feast APIs that take in or return a Pandas dataframe: +Here's how Feast types map to Pandas types for Feast APIs that take in or return a Pandas dataframe: | Feast Type | Pandas Type | |-------------|--| | Event Timestamp | `datetime64[ns]` | | BYTES | `bytes` | | STRING | `str` , `category`| -| INT32 | `int32`, `uint32` | +| INT32 | `int16`, `uint16`, `int32`, `uint32` | | INT64 | `int64`, `uint64` | | UNIX_TIMESTAMP | `datetime64[ns]`, `datetime64[ns, tz]` | | DOUBLE | `float64` | @@ -80,3 +80,17 @@ Here's how Feast types map to BigQuery types when using BigQuery for offline sto | BOOL\_LIST | `ARRAY`| Values that are not specified by the table above will cause an error on conversion. + +#### Snowflake Types +Here's how Feast types map to Snowflake types when using Snowflake for offline storage +See source here: +https://docs.snowflake.com/en/user-guide/python-connector-pandas.html#snowflake-to-pandas-data-mapping + +| Feast Type | Snowflake Python Type | +|-------------|--| +| Event Timestamp | `DATETIME64[NS]` | +| UNIX_TIMESTAMP | `DATETIME64[NS]` | +| STRING | `STR` | +| INT32 | `INT8 / UINT8 / INT16 / UINT16 / INT32 / UINT32` | +| INT64 | `INT64 / UINT64` | +| DOUBLE | `FLOAT64` | diff --git a/docs/tutorials/driver-stats-on-snowflake.md b/docs/tutorials/driver-stats-on-snowflake.md new file mode 100644 index 00000000000..94ac109c942 --- /dev/null +++ b/docs/tutorials/driver-stats-on-snowflake.md @@ -0,0 +1,130 @@ +--- +description: >- + Initial demonstration of Snowflake as an offline store with Feast, using the Snowflake demo template. +--- + +# Drivers stats on Snowflake + +In the steps below, we will set up a sample Feast project that leverages Snowflake +as an offline store. + +Starting with data in a Snowflake table, we will register that table to the feature store and define features associated with the columns in that table. From there, we will generate historical training data based on those feature definitions and then materialize the latest feature values into the online store. Lastly, we will retrieve the materialized feature values. + +Our template will generate new data containing driver statistics. From there, we will show you code snippets that will call to the offline store for generating training datasets, and then the code for calling the online store to serve you the latest feature values to serve models in production. + +## Snowflake Offline Store Example + +#### Install feast-snowflake + +```shell +pip install 'feast[snowflake]' +``` + +#### Get a Snowflake Trial Account (Optional) + +[Snowflake Trial Account](http://trial.snowflake.com) + +#### Create a feature repository + +```shell +feast init -t snowflake {feature_repo_name} +Snowflake Deployment URL (exclude .snowflakecomputing.com): +Snowflake User Name:: +Snowflake Password:: +Snowflake Role Name (Case Sensitive):: +Snowflake Warehouse Name (Case Sensitive):: +Snowflake Database Name (Case Sensitive):: +Should I upload example data to Snowflake (overwrite table)? [Y/n]: Y +cd {feature_repo_name} +``` + +The following files will automatically be created in your project folder: + +* feature_store.yaml -- This is your main configuration file +* driver_repo.py -- This is your main feature definition file +* test.py -- This is a file to test your feature store configuration + +#### Inspect `feature_store.yaml` + +Here you will see the information that you entered. This template will use Snowflake as an offline store and SQLite as the online store. The main thing to remember is by default, Snowflake objects have ALL CAPS names unless lower case was specified. + +{% code title="feature_store.yaml" %} +```yaml +project: ... +registry: ... +provider: local +offline_store: + type: snowflake.offline + account: SNOWFLAKE_DEPLOYMENT_URL #drop .snowflakecomputing.com + user: USERNAME + password: PASSWORD + role: ROLE_NAME #case sensitive + warehouse: WAREHOUSE_NAME #case sensitive + database: DATABASE_NAME #case cap sensitive +``` +{% endcode %} + +#### Run our test python script `test.py` + +```shell +python test.py +``` + +## What we did in `test.py` + +#### Initialize our Feature Store +{% code title="test.py" %} +```python +from datetime import datetime, timedelta + +import pandas as pd +from driver_repo import driver, driver_stats_fv + +from feast import FeatureStore + +fs = FeatureStore(repo_path=".") + +fs.apply([driver, driver_stats_fv]) +``` +{% endcode %} + +#### Create a dummy training dataframe, then call our offline store to add additional columns +{% code title="test.py" %} +```python +entity_df = pd.DataFrame( + { + "event_timestamp": [ + pd.Timestamp(dt, unit="ms", tz="UTC").round("ms") + for dt in pd.date_range( + start=datetime.now() - timedelta(days=3), + end=datetime.now(), + periods=3, + ) + ], + "driver_id": [1001, 1002, 1003], + } +) + +features = ["driver_hourly_stats:conv_rate", "driver_hourly_stats:acc_rate"] + +training_df = fs.get_historical_features( + features=features, entity_df=entity_df +).to_df() +``` +{% endcode %} + +#### Materialize the latest feature values into our online store +{% code title="test.py" %} +```python +fs.materialize_incremental(end_date=datetime.now()) +``` +{% endcode %} + +#### Retrieve the latest values from our online store based on our entity key +{% code title="test.py" %} +```python +online_features = fs.get_online_features( + features=features, entity_rows=[{"driver_id": 1001}, {"driver_id": 1002}], +).to_dict() +``` +{% endcode %} diff --git a/docs/tutorials/tutorials-overview.md b/docs/tutorials/tutorials-overview.md index a523f9b38e9..32e64071b06 100644 --- a/docs/tutorials/tutorials-overview.md +++ b/docs/tutorials/tutorials-overview.md @@ -8,3 +8,6 @@ These Feast tutorials showcase how to use Feast to simplify end to end model tra {% page-ref page="real-time-credit-scoring-on-aws.md" %} +{% page-ref page="driver-stats-on-snowflake.md" %} + +{% page-ref page="validating-historical-features.md" %} diff --git a/docs/tutorials/validating-historical-features.md b/docs/tutorials/validating-historical-features.md new file mode 100644 index 00000000000..19ae4ef434c --- /dev/null +++ b/docs/tutorials/validating-historical-features.md @@ -0,0 +1,915 @@ +# Validating historical features with Great Expectations + +In this tutorial, we will use the public dataset of Chicago taxi trips to present data validation capabilities of Feast. +- The original dataset is stored in BigQuery and consists of raw data for each taxi trip (one row per trip) since 2013. +- We will generate several training datasets (aka historical features in Feast) for different periods and evaluate expectations made on one dataset against another. + +Types of features we're ingesting and generating: +- Features that aggregate raw data with daily intervals (eg, trips per day, average fare or speed for a specific day, etc.). +- Features using SQL while pulling data from BigQuery (like total trips time or total miles travelled). +- Features calculated on the fly when requested using Feast's on-demand transformations + +Our plan: + +0. Prepare environment +1. Pull data from BigQuery (optional) +2. Declare & apply features and feature views in Feast +3. Generate reference dataset +4. Develop & test profiler function +5. Run validation on different dataset using reference dataset & profiler + + +> The original notebook and datasets for this tutorial can be found on [GitHub](https://github.com/feast-dev/dqm-tutorial). + +### 0. Setup + +Install Feast Python SDK and great expectations: + + +```python +!pip install 'feast[ge]' +``` + + +### 1. Dataset preparation (Optional) + +**You can skip this step if you don't have GCP account. Please use parquet files that are coming with this tutorial instead** + + +```python +!pip install google-cloud-bigquery +``` + + +```python +import pyarrow.parquet + +from google.cloud.bigquery import Client +``` + + +```python +bq_client = Client(project='kf-feast') +``` + +Running some basic aggregations while pulling data from BigQuery. Grouping by taxi_id and day: + + +```python +data_query = """SELECT + taxi_id, + TIMESTAMP_TRUNC(trip_start_timestamp, DAY) as day, + SUM(trip_miles) as total_miles_travelled, + SUM(trip_seconds) as total_trip_seconds, + SUM(fare) as total_earned, + COUNT(*) as trip_count +FROM `bigquery-public-data.chicago_taxi_trips.taxi_trips` +WHERE + trip_miles > 0 AND trip_seconds > 60 AND + trip_start_timestamp BETWEEN '2019-01-01' and '2020-12-31' AND + trip_total < 1000 +GROUP BY taxi_id, TIMESTAMP_TRUNC(trip_start_timestamp, DAY)""" +``` + + +```python +driver_stats_table = bq_client.query(data_query).to_arrow() + +# Storing resulting dataset into parquet file +pyarrow.parquet.write_table(driver_stats_table, "trips_stats.parquet") +``` + + +```python +def entities_query(year): + return f"""SELECT + distinct taxi_id +FROM `bigquery-public-data.chicago_taxi_trips.taxi_trips` +WHERE + trip_miles > 0 AND trip_seconds > 0 AND + trip_start_timestamp BETWEEN '{year}-01-01' and '{year}-12-31' +""" +``` + + +```python +entities_2019_table = bq_client.query(entities_query(2019)).to_arrow() + +# Storing entities (taxi ids) into parquet file +pyarrow.parquet.write_table(entities_2019_table, "entities.parquet") +``` + + +## 2. Declaring features + + +```python +import pyarrow.parquet +import pandas as pd + +from feast import Feature, FeatureView, Entity, FeatureStore +from feast.value_type import ValueType +from feast.data_format import ParquetFormat +from feast.on_demand_feature_view import on_demand_feature_view +from feast.infra.offline_stores.file_source import FileSource +from feast.infra.offline_stores.file import SavedDatasetFileStorage + +from google.protobuf.duration_pb2 import Duration +``` + + +```python +batch_source = FileSource( + event_timestamp_column="day", + path="trips_stats.parquet", # using parquet file that we created on previous step + file_format=ParquetFormat() +) +``` + + +```python +taxi_entity = Entity(name='taxi', join_key='taxi_id') +``` + + +```python +trips_stats_fv = FeatureView( + name='trip_stats', + entities=['taxi'], + features=[ + Feature("total_miles_travelled", ValueType.DOUBLE), + Feature("total_trip_seconds", ValueType.DOUBLE), + Feature("total_earned", ValueType.DOUBLE), + Feature("trip_count", ValueType.INT64), + + ], + ttl=Duration(seconds=86400), + batch_source=batch_source, +) +``` + +*Read more about feature views in [Feast docs](https://docs.feast.dev/getting-started/concepts/feature-view)* + + +```python +@on_demand_feature_view( + features=[ + Feature("avg_fare", ValueType.DOUBLE), + Feature("avg_speed", ValueType.DOUBLE), + Feature("avg_trip_seconds", ValueType.DOUBLE), + Feature("earned_per_hour", ValueType.DOUBLE), + ], + inputs={ + "stats": trips_stats_fv + } +) +def on_demand_stats(inp): + out = pd.DataFrame() + out["avg_fare"] = inp["total_earned"] / inp["trip_count"] + out["avg_speed"] = 3600 * inp["total_miles_travelled"] / inp["total_trip_seconds"] + out["avg_trip_seconds"] = inp["total_trip_seconds"] / inp["trip_count"] + out["earned_per_hour"] = 3600 * inp["total_earned"] / inp["total_trip_seconds"] + return out +``` + +*Read more about on demand feature views [here](https://docs.feast.dev/reference/alpha-on-demand-feature-view)* + + +```python +store = FeatureStore(".") # using feature_store.yaml that stored in the same directory +``` + + +```python +store.apply([taxi_entity, trips_stats_fv, on_demand_stats]) # writing to the registry +``` + + +## 3. Generating training (reference) dataset + + +```python +taxi_ids = pyarrow.parquet.read_table("entities.parquet").to_pandas() +``` + +Generating range of timestamps with daily frequency: + + +```python +timestamps = pd.DataFrame() +timestamps["event_timestamp"] = pd.date_range("2019-06-01", "2019-07-01", freq='D') +``` + +Cross merge (aka relation multiplication) produces entity dataframe with each taxi_id repeated for each timestamp: + + +```python +entity_df = pd.merge(taxi_ids, timestamps, how='cross') +entity_df +``` + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
taxi_idevent_timestamp
091d5288487e87c5917b813ba6f75ab1c3a9749af906a2d...2019-06-01
191d5288487e87c5917b813ba6f75ab1c3a9749af906a2d...2019-06-02
291d5288487e87c5917b813ba6f75ab1c3a9749af906a2d...2019-06-03
391d5288487e87c5917b813ba6f75ab1c3a9749af906a2d...2019-06-04
491d5288487e87c5917b813ba6f75ab1c3a9749af906a2d...2019-06-05
.........
1569797ebf27414a0c7b128e7925e1da56d51a8b81484f7630cf...2019-06-27
1569807ebf27414a0c7b128e7925e1da56d51a8b81484f7630cf...2019-06-28
1569817ebf27414a0c7b128e7925e1da56d51a8b81484f7630cf...2019-06-29
1569827ebf27414a0c7b128e7925e1da56d51a8b81484f7630cf...2019-06-30
1569837ebf27414a0c7b128e7925e1da56d51a8b81484f7630cf...2019-07-01
+

156984 rows × 2 columns

+
+ + + +Retrieving historical features for resulting entity dataframe and persisting output as a saved dataset: + + +```python +job = store.get_historical_features( + entity_df=entity_df, + features=[ + "trip_stats:total_miles_travelled", + "trip_stats:total_trip_seconds", + "trip_stats:total_earned", + "trip_stats:trip_count", + "on_demand_stats:avg_fare", + "on_demand_stats:avg_trip_seconds", + "on_demand_stats:avg_speed", + "on_demand_stats:earned_per_hour", + ] +) + +store.create_saved_dataset( + from_=job, + name='my_training_ds', + storage=SavedDatasetFileStorage(path='my_training_ds.parquet') +) +``` + +```python +, full_feature_names = False, tags = {}, _retrieval_job = , min_event_timestamp = 2019-06-01 00:00:00, max_event_timestamp = 2019-07-01 00:00:00)> +``` + + +## 4. Developing dataset profiler + +Dataset profiler is a function that accepts dataset and generates set of its characteristics. This charasteristics will be then used to evaluate (validate) next datasets. + +**Important: datasets are not compared to each other! +Feast use a reference dataset and a profiler function to generate a reference profile. +This profile will be then used during validation of the tested dataset.** + + +```python +import numpy as np + +from feast.dqm.profilers.ge_profiler import ge_profiler + +from great_expectations.core.expectation_suite import ExpectationSuite +from great_expectations.dataset import PandasDataset +``` + + +Loading saved dataset first and exploring the data: + + +```python +ds = store.get_saved_dataset('my_training_ds') +ds.to_df() +``` + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
total_earnedavg_trip_secondstaxi_idtotal_miles_travelledtrip_countearned_per_hourevent_timestamptotal_trip_secondsavg_fareavg_speed
068.252270.00000091d5288487e87c5917b813ba6f75ab1c3a9749af906a2d...24.702.054.1189432019-06-01 00:00:00+00:004540.034.12500019.585903
1221.00560.5000007a4a6162eaf27805aef407d25d5cb21fe779cd962922cb...54.1824.059.1436222019-06-01 00:00:00+00:0013452.09.20833314.499554
2160.501010.769231f4c9d05b215d7cbd08eca76252dae51cdb7aca9651d4ef...41.3013.043.9726032019-06-01 00:00:00+00:0013140.012.34615411.315068
3183.75697.550000c1f533318f8480a59173a9728ea0248c0d3eb187f4b897...37.3020.047.4159562019-06-01 00:00:00+00:0013951.09.1875009.625116
4217.751054.076923455b6b5cae6ca5a17cddd251485f2266d13d6a2c92f07c...69.6913.057.2064512019-06-01 00:00:00+00:0013703.016.75000018.308692
.................................
15697938.001980.0000000cccf0ec1f46d1e0beefcfdeaf5188d67e170cdff92618...14.901.069.0909092019-07-01 00:00:00+00:001980.038.00000027.090909
156980135.00551.250000beefd3462e3f5a8e854942a2796876f6db73ebbd25b435...28.4016.055.1020412019-07-01 00:00:00+00:008820.08.43750011.591837
156981NaNNaN9a3c52aa112f46cf0d129fafbd42051b0fb9b0ff8dcb0e...NaNNaNNaN2019-07-01 00:00:00+00:00NaNNaNNaN
15698263.00815.00000008308c31cd99f495dea73ca276d19a6258d7b4c9c88e43...19.964.069.5705522019-07-01 00:00:00+00:003260.015.75000022.041718
156983NaNNaN7ebf27414a0c7b128e7925e1da56d51a8b81484f7630cf...NaNNaNNaN2019-07-01 00:00:00+00:00NaNNaNNaN
+

156984 rows × 10 columns

+
+ + + +Feast uses [Great Expectations](https://docs.greatexpectations.io/docs/) as a validation engine and [ExpectationSuite](https://legacy.docs.greatexpectations.io/en/latest/autoapi/great_expectations/core/expectation_suite/index.html#great_expectations.core.expectation_suite.ExpectationSuite) as a dataset's profile. Hence, we need to develop a function that will generate ExpectationSuite. This function will receive instance of [PandasDataset](https://legacy.docs.greatexpectations.io/en/latest/autoapi/great_expectations/dataset/index.html?highlight=pandasdataset#great_expectations.dataset.PandasDataset) (wrapper around pandas.DataFrame) so we can utilize both Pandas DataFrame API and some helper functions from PandasDataset during profiling. + + +```python +DELTA = 0.1 # controlling allowed window in fraction of the value on scale [0, 1] + +@ge_profiler +def stats_profiler(ds: PandasDataset) -> ExpectationSuite: + # simple checks on data consistency + ds.expect_column_values_to_be_between( + "avg_speed", + min_value=0, + max_value=60, + mostly=0.99 # allow some outliers + ) + + ds.expect_column_values_to_be_between( + "total_miles_travelled", + min_value=0, + max_value=500, + mostly=0.99 # allow some outliers + ) + + # expectation of means based on observed values + observed_mean = ds.trip_count.mean() + ds.expect_column_mean_to_be_between("trip_count", + min_value=observed_mean * (1 - DELTA), + max_value=observed_mean * (1 + DELTA)) + + observed_mean = ds.earned_per_hour.mean() + ds.expect_column_mean_to_be_between("earned_per_hour", + min_value=observed_mean * (1 - DELTA), + max_value=observed_mean * (1 + DELTA)) + + + # expectation of quantiles + qs = [0.5, 0.75, 0.9, 0.95] + observed_quantiles = ds.avg_fare.quantile(qs) + + ds.expect_column_quantile_values_to_be_between( + "avg_fare", + quantile_ranges={ + "quantiles": qs, + "value_ranges": [[None, max_value] for max_value in observed_quantiles] + }) + + return ds.get_expectation_suite() +``` + +Testing our profiler function: + + +```python +ds.get_profile(profiler=stats_profiler) +``` + 02/02/2022 02:43:47 PM INFO: 5 expectation(s) included in expectation_suite. result_format settings filtered. + + + + +**Verify that all expectations that we coded in our profiler are present here. Otherwise (if you can't find some expectations) it means that it failed to pass on the reference dataset (do it silently is default behavior of Great Expectations).** + +Now we can create validation reference from dataset and profiler function: + + +```python +validation_reference = ds.as_reference(profiler=stats_profiler) +``` + +and test it against our existing retrieval job + + +```python +_ = job.to_df(validation_reference=validation_reference) +``` + + 02/02/2022 02:43:52 PM INFO: 5 expectation(s) included in expectation_suite. result_format settings filtered. + 02/02/2022 02:43:53 PM INFO: Validating data_asset_name None with expectation_suite_name default + + +Validation successfully passed as no exception were raised. + + +### 5. Validating new historical retrieval + +Creating new timestamps for Dec 2020: + + +```python +from feast.dqm.errors import ValidationFailed +``` + + +```python +timestamps = pd.DataFrame() +timestamps["event_timestamp"] = pd.date_range("2020-12-01", "2020-12-07", freq='D') +``` + + +```python +entity_df = pd.merge(taxi_ids, timestamps, how='cross') +entity_df +``` + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
taxi_idevent_timestamp
091d5288487e87c5917b813ba6f75ab1c3a9749af906a2d...2020-12-01
191d5288487e87c5917b813ba6f75ab1c3a9749af906a2d...2020-12-02
291d5288487e87c5917b813ba6f75ab1c3a9749af906a2d...2020-12-03
391d5288487e87c5917b813ba6f75ab1c3a9749af906a2d...2020-12-04
491d5288487e87c5917b813ba6f75ab1c3a9749af906a2d...2020-12-05
.........
354437ebf27414a0c7b128e7925e1da56d51a8b81484f7630cf...2020-12-03
354447ebf27414a0c7b128e7925e1da56d51a8b81484f7630cf...2020-12-04
354457ebf27414a0c7b128e7925e1da56d51a8b81484f7630cf...2020-12-05
354467ebf27414a0c7b128e7925e1da56d51a8b81484f7630cf...2020-12-06
354477ebf27414a0c7b128e7925e1da56d51a8b81484f7630cf...2020-12-07
+

35448 rows × 2 columns

+
+ + +```python +job = store.get_historical_features( + entity_df=entity_df, + features=[ + "trip_stats:total_miles_travelled", + "trip_stats:total_trip_seconds", + "trip_stats:total_earned", + "trip_stats:trip_count", + "on_demand_stats:avg_fare", + "on_demand_stats:avg_trip_seconds", + "on_demand_stats:avg_speed", + "on_demand_stats:earned_per_hour", + ] +) +``` + +Execute retrieval job with validation reference: + + +```python +try: + df = job.to_df(validation_reference=validation_reference) +except ValidationFailed as exc: + print(exc.validation_report) +``` + + 02/02/2022 02:43:58 PM INFO: 5 expectation(s) included in expectation_suite. result_format settings filtered. + 02/02/2022 02:43:59 PM INFO: Validating data_asset_name None with expectation_suite_name default + + [ + { + "expectation_config": { + "expectation_type": "expect_column_mean_to_be_between", + "kwargs": { + "column": "trip_count", + "min_value": 10.387244591346153, + "max_value": 12.695521167200855, + "result_format": "COMPLETE" + }, + "meta": {} + }, + "meta": {}, + "result": { + "observed_value": 6.692920555429092, + "element_count": 35448, + "missing_count": 31055, + "missing_percent": 87.6071992778154 + }, + "exception_info": { + "raised_exception": false, + "exception_message": null, + "exception_traceback": null + }, + "success": false + }, + { + "expectation_config": { + "expectation_type": "expect_column_mean_to_be_between", + "kwargs": { + "column": "earned_per_hour", + "min_value": 52.320624975640214, + "max_value": 63.94743052578249, + "result_format": "COMPLETE" + }, + "meta": {} + }, + "meta": {}, + "result": { + "observed_value": 68.99268345164135, + "element_count": 35448, + "missing_count": 31055, + "missing_percent": 87.6071992778154 + }, + "exception_info": { + "raised_exception": false, + "exception_message": null, + "exception_traceback": null + }, + "success": false + }, + { + "expectation_config": { + "expectation_type": "expect_column_quantile_values_to_be_between", + "kwargs": { + "column": "avg_fare", + "quantile_ranges": { + "quantiles": [ + 0.5, + 0.75, + 0.9, + 0.95 + ], + "value_ranges": [ + [ + null, + 16.4 + ], + [ + null, + 26.229166666666668 + ], + [ + null, + 36.4375 + ], + [ + null, + 42.0 + ] + ] + }, + "result_format": "COMPLETE" + }, + "meta": {} + }, + "meta": {}, + "result": { + "observed_value": { + "quantiles": [ + 0.5, + 0.75, + 0.9, + 0.95 + ], + "values": [ + 19.5, + 28.1, + 38.0, + 44.125 + ] + }, + "element_count": 35448, + "missing_count": 31055, + "missing_percent": 87.6071992778154, + "details": { + "success_details": [ + false, + false, + false, + false + ] + } + }, + "exception_info": { + "raised_exception": false, + "exception_message": null, + "exception_traceback": null + }, + "success": false + } + ] + + +Validation failed since several expectations didn't pass: +* Trip count (mean) decreased more than 10% (which is expected when comparing Dec 2020 vs June 2019) +* Average Fare increased - all quantiles are higher than expected +* Earn per hour (mean) increased more than 10% (most probably due to increased fare) + diff --git a/docs/user-guide/define-and-ingest-features.md b/docs/user-guide/define-and-ingest-features.md deleted file mode 100644 index d55fcb1d857..00000000000 --- a/docs/user-guide/define-and-ingest-features.md +++ /dev/null @@ -1,56 +0,0 @@ -# Define and ingest features - -In order to retrieve features for both training and serving, Feast requires data being ingested into its offline and online stores. - -{% hint style="warning" %} -Feast 0.8 does not have an offline store. Only Online storage support exists currently. Feast 0.9 will have offline storage support. In Feast 0.8, historical data is retrieved directly from batch sources. -{% endhint %} - -Users are expected to already have either a batch or stream source with data stored in it, ready to be ingested into Feast. Once a feature table \(with the corresponding sources\) has been registered with Feast, it is possible to load data from this source into stores. - -The following depicts an example ingestion flow from a data source to the online store. - -### Batch Source to Online Store - -```python -from feast import Client -from datetime import datetime, timedelta - -client = Client(core_url="localhost:6565") -driver_ft = client.get_feature_table("driver_trips") - -# Initialize date ranges -today = datetime.now() -yesterday = today - timedelta(1) - -# Launches a short-lived job that ingests data over the provided date range. -client.start_offline_to_online_ingestion( - driver_ft, yesterday, today -) -``` - -### Stream Source to Online Store - -```python -from feast import Client -from datetime import datetime, timedelta - -client = Client(core_url="localhost:6565") -driver_ft = client.get_feature_table("driver_trips") - -# Launches a long running streaming ingestion job -client.start_stream_to_online_ingestion(driver_ft) -``` - -### Batch Source to Offline Store - -{% hint style="danger" %} -Not supported in Feast 0.8 -{% endhint %} - -### Stream Source to Offline Store - -{% hint style="danger" %} -Not supported in Feast 0.8 -{% endhint %} - diff --git a/docs/user-guide/getting-online-features.md b/docs/user-guide/getting-online-features.md deleted file mode 100644 index c16dc08a013..00000000000 --- a/docs/user-guide/getting-online-features.md +++ /dev/null @@ -1,54 +0,0 @@ -# Getting online features - -Feast provides an API through which online feature values can be retrieved. This allows teams to look up feature values at low latency in production during model serving, in order to make online predictions. - -{% hint style="info" %} -Online stores only maintain the current state of features, i.e latest feature values. No historical data is stored or served. -{% endhint %} - -```python -from feast import Client - -online_client = Client( - core_url="localhost:6565", - serving_url="localhost:6566", -) - -entity_rows = [ - {"driver_id": 1001}, - {"driver_id": 1002}, -] - -# Features in format -feature_refs = [ - "driver_trips:average_daily_rides", - "driver_trips:maximum_daily_rides", - "driver_trips:rating", -] - -response = online_client.get_online_features( - feature_refs=feature_refs, # Contains only feature references - entity_rows=entity_rows, # Contains only entities (driver ids) -) - -# Print features in dictionary format -response_dict = response.to_dict() -print(response_dict) -``` - -The online store must be populated through [ingestion jobs](define-and-ingest-features.md#batch-source-to-online-store) prior to being used for online serving. - -Feast Serving provides a [gRPC API](https://api.docs.feast.dev/grpc/feast.serving.pb.html) that is backed by [Redis](https://redis.io/). We have native clients in [Python](https://api.docs.feast.dev/python/), [Go](https://godoc.org/github.com/gojek/feast/sdk/go), and [Java](https://javadoc.io/doc/dev.feast). - -### Online Field Statuses - -Feast also returns status codes when retrieving features from the Feast Serving API. These status code give useful insight into the quality of data being served. - -| Status | Meaning | -| :--- | :--- | -| NOT\_FOUND | The feature value was not found in the online store. This might mean that no feature value was ingested for this feature. | -| NULL\_VALUE | A entity key was successfully found but no feature values had been set. This status code should not occur during normal operation. | -| OUTSIDE\_MAX\_AGE | The age of the feature row in the online store \(in terms of its event timestamp\) has exceeded the maximum age defined within the feature table. | -| PRESENT | The feature values have been found and are within the maximum age. | -| UNKNOWN | Indicates a system failure. | - diff --git a/docs/user-guide/getting-training-features.md b/docs/user-guide/getting-training-features.md deleted file mode 100644 index b9d0b050f29..00000000000 --- a/docs/user-guide/getting-training-features.md +++ /dev/null @@ -1,72 +0,0 @@ -# Getting training features - -Feast provides a historical retrieval interface for exporting feature data in order to train machine learning models. Essentially, users are able to enrich their data with features from any feature tables. - -### Retrieving historical features - -Below is an example of the process required to produce a training dataset: - -```python -# Feature references with target feature -features = [ - "driver_trips:average_daily_rides", - "driver_trips:maximum_daily_rides", - "driver_trips:rating", - "driver_trips:rating:trip_completed", -] - -# Define entity source -entity_source = FileSource( - "event_timestamp", - ParquetFormat(), - "gs://some-bucket/customer" -) - -# Retrieve historical dataset from Feast. -historical_feature_retrieval_job = client.get_historical_features( - features=features, - entity_rows=entity_source -) - -output_file_uri = historical_feature_retrieval_job.get_output_file_uri() -``` - -#### 1. Define feature references - -[Feature references](../concepts/glossary.md#feature-references) define the specific features that will be retrieved from Feast. These features can come from multiple feature tables. The only requirement is that the feature tables that make up the feature references have the same entity \(or composite entity\). - -**2. Define an entity dataframe** - -Feast needs to join feature values onto specific entities at specific points in time. Thus, it is necessary to provide an [entity dataframe](../concepts/glossary.md#entity-dataframe) as part of the `get_historical_features` method. In the example above we are defining an entity source. This source is an external file that provides Feast with the entity dataframe. - -**3. Launch historical retrieval job** - -Once the feature references and an entity source are defined, it is possible to call `get_historical_features()`. This method launches a job that extracts features from the sources defined in the provided feature tables, joins them onto the provided entity source, and returns a reference to the training dataset that is produced. - -Please see the [Feast SDK](https://api.docs.feast.dev/python) for more details. - -### Point-in-time Joins - -Feast always joins features onto entity data in a point-in-time correct way. The process can be described through an example. - -In the example below there are two tables \(or dataframes\): - -* The dataframe on the left is the [entity dataframe](../concepts/glossary.md#entity-dataframe) that contains timestamps, entities, and the target variable \(trip\_completed\). This dataframe is provided to Feast through an entity source. -* The dataframe on the right contains driver features. This dataframe is represented in Feast through a feature table and its accompanying data source\(s\). - -The user would like to have the driver features joined onto the entity dataframe to produce a training dataset that contains both the target \(trip\_completed\) and features \(average\_daily\_rides, maximum\_daily\_rides, rating\). This dataset will then be used to train their model. - -![](../.gitbook/assets/point_in_time_join%20%281%29%20%282%29%20%282%29%20%283%29%20%283%29%20%283%29%20%283%29%20%281%29.png) - -Feast is able to intelligently join feature data with different timestamps to a single entity dataframe. It does this through a point-in-time join as follows: - -1. Feast loads the entity dataframe and all feature tables \(driver dataframe\) into the same location. This can either be a database or in memory. -2. For each [entity row](../concepts/glossary.md#entity-rows) in the [entity dataframe](getting-online-features.md), Feast tries to find feature values in each feature table to join to it. Feast extracts the timestamp and entity key of each row in the entity dataframe and scans backward through the feature table until it finds a matching entity key. -3. If the event timestamp of the matching entity key within the driver feature table is within the maximum age configured for the feature table, then the features at that entity key are joined onto the entity dataframe. If the event timestamp is outside of the maximum age, then only null values are returned. -4. If multiple entity keys are found with the same event timestamp, then they are deduplicated by the created timestamp, with newer values taking precedence. -5. Feast repeats this joining process for all feature tables and returns the resulting dataset. - -{% hint style="info" %} -Point-in-time correct joins attempts to prevent the occurrence of feature leakage by trying to recreate the state of the world at a single point in time, instead of joining features based on exact timestamps only. -{% endhint %} - diff --git a/docs/user-guide/overview.md b/docs/user-guide/overview.md deleted file mode 100644 index 2d6eb9981bb..00000000000 --- a/docs/user-guide/overview.md +++ /dev/null @@ -1,32 +0,0 @@ -# Overview - -### Using Feast - -Feast development happens through three key workflows: - -1. [Define and load feature data into Feast](define-and-ingest-features.md) -2. [Retrieve historical features for training models](getting-training-features.md) -3. [Retrieve online features for serving models](getting-online-features.md) - -### Defining feature tables and ingesting data into Feast - -Feature creators model the data within their organization into Feast through the definition of [feature tables](../concepts/feature-tables.md) that contain [data sources](../concepts/sources.md). Feature tables are both a schema and a means of identifying data sources for features, and allow Feast to know how to interpret your data, and where to find it. - -After registering a feature table with Feast, users can trigger an ingestion from their data source into Feast. This loads feature values from an upstream data source into Feast stores through ingestion jobs. - -Visit [feature tables](../concepts/feature-tables.md#overview) to learn more about them. - -{% page-ref page="define-and-ingest-features.md" %} - -### Retrieving historical features for training - -In order to generate a training dataset it is necessary to provide both an [entity dataframe ](../concepts/glossary.md#entity-dataframe)and feature references through the[ Feast SDK](https://api.docs.feast.dev/python/) to retrieve historical features. For historical serving, Feast requires that you provide the entities and timestamps for the corresponding feature data. Feast produces a point-in-time correct dataset using the requested features. These features can be requested from an unlimited number of feature sets. - -{% page-ref page="getting-training-features.md" %} - -### Retrieving online features for online serving - -Online retrieval uses feature references through the [Feast Online Serving API](https://api.docs.feast.dev/grpc/feast.serving.pb.html) to retrieve online features. Online serving allows for very low latency requests to feature data at very high throughput. - -{% page-ref page="getting-online-features.md" %} - diff --git a/examples/quickstart/quickstart.ipynb b/examples/quickstart/quickstart.ipynb index 3b148137ef9..3679fcc7788 100644 --- a/examples/quickstart/quickstart.ipynb +++ b/examples/quickstart/quickstart.ipynb @@ -27,12 +27,12 @@ "In this tutorial, we use feature stores to generate training data and power online model inference for a ride-sharing driver satisfaction prediction model. Feast addresses several common issues in this flow:\n", "1. **Training-serving skew and complex data joins:** Feature values often exist across multiple tables. Joining these datasets can be complicated, slow, and error-prone.\n", " - Feast joins these tables with battle-tested logic that ensures *point-in-time* correctness so future feature values do not leak to models.\n", - " - **Upcoming*: Feast alerts users to offline / online skew with data quality monitoring. \n", + " - Feast alerts users to offline / online skew with data quality monitoring. \n", "2. **Online feature availability:** At inference time, models often need access to features that aren't readily available and need to be precomputed from other datasources. \n", " - Feast manages deployment to a variety of online stores (e.g. DynamoDB, Redis, Google Cloud Datastore) and ensures necessary features are consistently *available* and *freshly computed* at inference time.\n", "3. **Feature reusability and model versioning:** Different teams within an organization are often unable to reuse features across projects, resulting in duplicate feature creation logic. Models have data dependencies that need to be versioned, for example when running A/B tests on model versions.\n", " - Feast enables discovery of and collaboration on previously used features and enables versioning of sets of features (via *feature services*). \n", - " - **Upcoming*: Feast enables feature transformation so users can re-use transformation logic across online / offline usecases and across models.\n", + " - Feast enables feature transformation so users can re-use transformation logic across online / offline usecases and across models.\n", "\n", "We will:\n", "- Deploy a local feature store with a Parquet file offline store and Sqlite online store.\n", @@ -188,11 +188,13 @@ "\n", "Valid values for `provider` in `feature_store.yaml` are:\n", "\n", - "* local: use file source / SQLite\n", - "* gcp: use BigQuery / Google Cloud Datastore\n", - "* aws: use Redshift / DynamoDB\n", + "* local: use file source with SQLite/Redis\n", + "* gcp: use BigQuery/Snowflake with Google Cloud Datastore/Redis\n", + "* aws: use Redshift/Snowflake with DynamoDB/Redis\n", "\n", - "A custom setup (e.g. using the built-in support for Redis) can be made by following https://docs.feast.dev/v/master/how-to-guides/creating-a-custom-provider" + "Note that there are many other sources Feast works with, including Azure, Hive, Trino, and PostgreSQL via community plugins. See https://docs.feast.dev/getting-started/third-party-integrations for all supported datasources.", + "\n", + "A custom setup can also be made by following https://docs.feast.dev/v/master/how-to-guides/creating-a-custom-provider" ] }, { @@ -794,7 +796,7 @@ "\n", "- Read the [Concepts](https://docs.feast.dev/getting-started/concepts/) page to understand the Feast data model and architecture.\n", "- Check out our [Tutorials](https://docs.feast.dev/tutorials/tutorials-overview) section for more examples on how to use Feast.\n", - "- Follow our [Running Feast with GCP/AWS](https://docs.feast.dev/how-to-guides/feast-gcp-aws) guide for a more in-depth tutorial on using Feast.\n", + "- Follow our [Running Feast with Snowflake/GCP/AWS](https://docs.feast.dev/how-to-guides/feast-snowflake-gcp-aws) guide for a more in-depth tutorial on using Feast.\n", "- Join other Feast users and contributors in [Slack](https://slack.feast.dev/) and become part of the community!" ] } diff --git a/go.mod b/go.mod index f4a14550566..109666b7622 100644 --- a/go.mod +++ b/go.mod @@ -25,8 +25,8 @@ require ( go.opencensus.io v0.22.3 // indirect golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9 // indirect golang.org/x/lint v0.0.0-20210508222113-6edffad5e616 // indirect - golang.org/x/net v0.0.0-20210805182204-aaa1db679c0d - golang.org/x/tools v0.1.7 // indirect + golang.org/x/net v0.0.0-20211015210444-4f30a5c0130f + golang.org/x/tools v0.1.8 // indirect google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013 // indirect google.golang.org/grpc v1.29.1 google.golang.org/protobuf v1.27.1 // indirect diff --git a/go.sum b/go.sum index 5e87ccf6dbf..8b0c2677f3c 100644 --- a/go.sum +++ b/go.sum @@ -345,6 +345,7 @@ github.com/yuin/goldmark v1.1.25/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9de github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.4.0/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= +github.com/yuin/goldmark v1.4.1/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= go.etcd.io/bbolt v1.3.2/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU= go.opencensus.io v0.21.0 h1:mU6zScU4U1YAFPHEHYk+3JC4SY7JxgkqS10ZOSyksNg= go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU= @@ -386,6 +387,7 @@ golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzB golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.5.1/go.mod h1:5OXOZSfqPIIbmVBIIKWRFfZjPR0E5r58TLhUjH0a2Ro= golang.org/x/net v0.0.0-20170114055629-f2499483f923/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180826012351-8a410e7b638d h1:g9qWBGx4puODJTMVyoPrpoxPFgVGd+z1DZwjfRu4d0I= @@ -415,6 +417,7 @@ golang.org/x/net v0.0.0-20200513185701-a91f0712d120/go.mod h1:qpuaurCH72eLCgpAm/ golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.0.0-20210805182204-aaa1db679c0d/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= +golang.org/x/net v0.0.0-20211015210444-4f30a5c0130f/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20190402181905-9f3314589c9a/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= @@ -454,6 +457,7 @@ golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210809222454-d867a43fc93e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20211019181941-9d821ace8654/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/text v0.0.0-20160726164857-2910a502d2bf/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg= @@ -464,6 +468,7 @@ golang.org/x/text v0.3.2 h1:tW2bmiBqwgJj/UpqtC8EpXEZVYOwU0yG4iWbprSVAcs= golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/time v0.0.0-20161028155119-f51c12702a4d/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= @@ -525,6 +530,8 @@ golang.org/x/tools v0.0.0-20201124005743-911501bfb504 h1:jOKV2ysikH1GANB7t2Lotmh golang.org/x/tools v0.0.0-20201124005743-911501bfb504/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= golang.org/x/tools v0.1.7 h1:6j8CgantCy3yc8JGBqkDLMKWqZ0RDU2g1HVgacojGWQ= golang.org/x/tools v0.1.7/go.mod h1:LGqMHiF4EqQNHR1JncWGqT5BVaXmza+X+BDGol+dOxo= +golang.org/x/tools v0.1.8 h1:P1HhGGuLW4aAclzjtmJdf0mJOjVUZUzOTqkAkWL+l6w= +golang.org/x/tools v0.1.8/go.mod h1:nABZi5QlRsZVlzPpHl034qft6wpY4eDcsTt5AaioBiU= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4= diff --git a/infra/charts/feast-python-server/Chart.yaml b/infra/charts/feast-python-server/Chart.yaml index fc20d180bc0..d7e9d7a1471 100644 --- a/infra/charts/feast-python-server/Chart.yaml +++ b/infra/charts/feast-python-server/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: feast-python-server description: Feast Feature Server in Python type: application -version: 0.1.0 +version: 0.18.0 keywords: - machine learning - big data diff --git a/infra/charts/feast-python-server/README.md b/infra/charts/feast-python-server/README.md index b8516bc6dcb..45b5f73b3cb 100644 --- a/infra/charts/feast-python-server/README.md +++ b/infra/charts/feast-python-server/README.md @@ -1,6 +1,6 @@ # feast-python-server -![Version: 0.1.0](https://img.shields.io/badge/Version-0.1.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) +![Version: 0.18.0](https://img.shields.io/badge/Version-0.1.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) Feast Feature Server in Python @@ -56,16 +56,4 @@ RUN pip install pip --upgrade RUN pip install feast COPY feature_store.yaml /feature_store.yaml -``` - -Make sure that you have enabled the flags for the python server. Example `feature_store.yaml`: -``` -project: feature_repo -registry: data/registry.db -provider: local -online_store: - path: data/online_store.db -flags: - alpha_features: true - python_feature_server: true ``` \ No newline at end of file diff --git a/infra/charts/feast/Chart.yaml b/infra/charts/feast/Chart.yaml index 4dd16aa906a..c0a3849e59e 100644 --- a/infra/charts/feast/Chart.yaml +++ b/infra/charts/feast/Chart.yaml @@ -1,7 +1,7 @@ apiVersion: v1 description: Feature store for machine learning name: feast -version: 0.101.0 +version: 0.18.0 keywords: - machine learning - big data diff --git a/infra/charts/feast/README.md b/infra/charts/feast/README.md index b8411cc9f78..40e67b0857c 100644 --- a/infra/charts/feast/README.md +++ b/infra/charts/feast/README.md @@ -10,7 +10,7 @@ This repo contains Helm charts for Feast components that are being installed on ## Chart: Feast -Feature store for machine learning Current chart version is `0.101.0` +Feature store for machine learning Current chart version is `0.18.0` ## Installation @@ -57,8 +57,8 @@ For more details, please see: https://docs.feast.dev/how-to-guides/running-feast | Repository | Name | Version | |------------|------|---------| | https://charts.helm.sh/stable | redis | 10.5.6 | -| https://feast-helm-charts.storage.googleapis.com | feature-server(feature-server) | 0.101.0 | -| https://feast-helm-charts.storage.googleapis.com | transformation-service(transformation-service) | 0.101.0 | +| https://feast-helm-charts.storage.googleapis.com | feature-server(feature-server) | 0.18.0 | +| https://feast-helm-charts.storage.googleapis.com | transformation-service(transformation-service) | 0.18.0 | ## Values diff --git a/infra/charts/feast/charts/feature-server/Chart.yaml b/infra/charts/feast/charts/feature-server/Chart.yaml index f0336cee2f0..006acfc4b45 100644 --- a/infra/charts/feast/charts/feature-server/Chart.yaml +++ b/infra/charts/feast/charts/feature-server/Chart.yaml @@ -1,8 +1,8 @@ apiVersion: v1 description: "Feast Feature Server: Online feature serving service for Feast" name: feature-server -version: 0.100.4 -appVersion: v0.15.0 +version: 0.18.0 +appVersion: v0.18.0 keywords: - machine learning - big data diff --git a/infra/charts/feast/charts/feature-server/README.md b/infra/charts/feast/charts/feature-server/README.md index 773f03af5e9..23ed4102b85 100644 --- a/infra/charts/feast/charts/feature-server/README.md +++ b/infra/charts/feast/charts/feature-server/README.md @@ -1,6 +1,6 @@ # feature-server -![Version: 0.100.4](https://img.shields.io/badge/Version-0.100.4-informational?style=flat-square) ![AppVersion: v0.15.0](https://img.shields.io/badge/AppVersion-v0.15.0-informational?style=flat-square) +![Version: 0.18.0](https://img.shields.io/badge/Version-0.18.0-informational?style=flat-square) ![AppVersion: v0.18.0](https://img.shields.io/badge/AppVersion-v0.18.0-informational?style=flat-square) Feast Feature Server: Online feature serving service for Feast @@ -17,7 +17,7 @@ Feast Feature Server: Online feature serving service for Feast | envOverrides | object | `{}` | Extra environment variables to set | | image.pullPolicy | string | `"IfNotPresent"` | Image pull policy | | image.repository | string | `"feastdev/feature-server-java"` | Docker image for Feature Server repository | -| image.tag | string | `"0.17.0"` | Image tag | +| image.tag | string | `"0.18.0"` | Image tag | | ingress.grpc.annotations | object | `{}` | Extra annotations for the ingress | | ingress.grpc.auth.enabled | bool | `false` | Flag to enable auth | | ingress.grpc.class | string | `"nginx"` | Which ingress controller to use | diff --git a/infra/charts/feast/charts/feature-server/templates/deployment.yaml b/infra/charts/feast/charts/feature-server/templates/deployment.yaml index 9327747423e..02323cbffc6 100644 --- a/infra/charts/feast/charts/feature-server/templates/deployment.yaml +++ b/infra/charts/feast/charts/feature-server/templates/deployment.yaml @@ -89,8 +89,7 @@ spec: - java - -jar - /opt/feast/feast-serving.jar - - --spring.config.location= - {{- if index .Values "application.yaml" "enabled" -}} + - {{ if index .Values "application.yaml" "enabled" -}} classpath:/application.yml {{- end }} {{- if index .Values "application-generated.yaml" "enabled" -}} diff --git a/infra/charts/feast/charts/feature-server/values.yaml b/infra/charts/feast/charts/feature-server/values.yaml index 92de49763c7..a6cf0f41b7c 100644 --- a/infra/charts/feast/charts/feature-server/values.yaml +++ b/infra/charts/feast/charts/feature-server/values.yaml @@ -5,7 +5,7 @@ image: # image.repository -- Docker image for Feature Server repository repository: feastdev/feature-server-java # image.tag -- Image tag - tag: 0.17.0 + tag: 0.18.0 # image.pullPolicy -- Image pull policy pullPolicy: IfNotPresent diff --git a/infra/charts/feast/charts/transformation-service/Chart.yaml b/infra/charts/feast/charts/transformation-service/Chart.yaml index 2760aa93fd9..ea4f9ccfe5f 100644 --- a/infra/charts/feast/charts/transformation-service/Chart.yaml +++ b/infra/charts/feast/charts/transformation-service/Chart.yaml @@ -1,8 +1,8 @@ apiVersion: v1 description: "Transformation service: to compute on-demand features" name: transformation-service -version: 0.100.4 -appVersion: v0.15.0 +version: 0.18.0 +appVersion: v0.18.0 keywords: - machine learning - big data diff --git a/infra/charts/feast/charts/transformation-service/README.md b/infra/charts/feast/charts/transformation-service/README.md index 8089c1572b2..f101f911b45 100644 --- a/infra/charts/feast/charts/transformation-service/README.md +++ b/infra/charts/feast/charts/transformation-service/README.md @@ -1,6 +1,6 @@ # transformation-service -![Version: 0.100.4](https://img.shields.io/badge/Version-0.100.4-informational?style=flat-square) ![AppVersion: v0.15.0](https://img.shields.io/badge/AppVersion-v0.15.0-informational?style=flat-square) +![Version: 0.18.0](https://img.shields.io/badge/Version-0.18.0-informational?style=flat-square) ![AppVersion: v0.18.0](https://img.shields.io/badge/AppVersion-v0.18.0-informational?style=flat-square) Transformation service: to compute on-demand features @@ -13,7 +13,7 @@ Transformation service: to compute on-demand features | envOverrides | object | `{}` | Extra environment variables to set | | image.pullPolicy | string | `"IfNotPresent"` | Image pull policy | | image.repository | string | `"feastdev/feature-transformation-server"` | Docker image for Transformation Server repository | -| image.tag | string | `"0.17.0"` | Image tag | +| image.tag | string | `"0.18.0"` | Image tag | | nodeSelector | object | `{}` | Node labels for pod assignment | | podLabels | object | `{}` | Labels to be added to Feast Serving pods | | replicaCount | int | `1` | Number of pods that will be created | diff --git a/infra/charts/feast/charts/transformation-service/config/feature_store.yaml b/infra/charts/feast/charts/transformation-service/config/feature_store.yaml index 234471fb968..555e93a306a 100644 --- a/infra/charts/feast/charts/transformation-service/config/feature_store.yaml +++ b/infra/charts/feast/charts/transformation-service/config/feature_store.yaml @@ -5,5 +5,4 @@ provider: local project: {{ .Values.global.project }} flags: on_demand_transforms: true - python_feature_server: true alpha_features: true \ No newline at end of file diff --git a/infra/charts/feast/charts/transformation-service/values.yaml b/infra/charts/feast/charts/transformation-service/values.yaml index 7babb5f6b62..e758a535963 100644 --- a/infra/charts/feast/charts/transformation-service/values.yaml +++ b/infra/charts/feast/charts/transformation-service/values.yaml @@ -5,7 +5,7 @@ image: # image.repository -- Docker image for Transformation Server repository repository: feastdev/feature-transformation-server # image.tag -- Image tag - tag: 0.17.0 + tag: 0.18.0 # image.pullPolicy -- Image pull policy pullPolicy: IfNotPresent diff --git a/infra/charts/feast/requirements.yaml b/infra/charts/feast/requirements.yaml index a1ccdde0f33..60eaf7f67de 100644 --- a/infra/charts/feast/requirements.yaml +++ b/infra/charts/feast/requirements.yaml @@ -1,12 +1,12 @@ dependencies: - name: feature-server alias: feature-server - version: 0.101.0 + version: 0.18.0 condition: feature-server.enabled repository: https://feast-helm-charts.storage.googleapis.com - name: transformation-service alias: transformation-service - version: 0.101.0 + version: 0.18.0 condition: transformation-service.enabled repository: https://feast-helm-charts.storage.googleapis.com - name: redis diff --git a/infra/docker-compose/docker-compose.yml b/infra/docker-compose/docker-compose.yml index 98131d6ccf0..579dc6d65fb 100644 --- a/infra/docker-compose/docker-compose.yml +++ b/infra/docker-compose/docker-compose.yml @@ -16,7 +16,7 @@ services: - java - -jar - /opt/feast/feast-core.jar - - --spring.config.location=classpath:/application.yml,file:/etc/feast/application.yml + - classpath:/application.yml,file:/etc/feast/application.yml jobservice: image: gcr.io/kf-feast/feast-jobservice:${FEAST_VERSION} @@ -104,7 +104,7 @@ services: - java - -jar - /opt/feast/feast-serving.jar - - --spring.config.location=classpath:/application.yml,file:/etc/feast/application.yml + - classpath:/application.yml,file:/etc/feast/application.yml redis: image: redis:5-alpine diff --git a/infra/scripts/helm/push-helm-charts.sh b/infra/scripts/helm/push-helm-charts.sh index 74961b196a5..f9750ccecc4 100755 --- a/infra/scripts/helm/push-helm-charts.sh +++ b/infra/scripts/helm/push-helm-charts.sh @@ -10,13 +10,13 @@ fi bucket=gs://feast-helm-charts repo_url=https://feast-helm-charts.storage.googleapis.com/ -helm plugin install https://github.com/hayorov/helm-gcs.git --version 0.2.2 || true +helm plugin install https://github.com/hayorov/helm-gcs.git --version 0.3.18 || true helm repo add feast-helm-chart-repo $bucket -mkdir -p feast -cp -R * feast/ || true - +cd infra/charts helm package feast +helm package feast-python-server -helm gcs push --public feast-${1}.tgz feast-helm-chart-repo --force \ No newline at end of file +helm gcs push --public feast-${1}.tgz feast-helm-chart-repo --force +helm gcs push --public feast-python-server-${1}.tgz feast-helm-chart-repo --force \ No newline at end of file diff --git a/infra/scripts/helm/validate-helm-chart-versions.sh b/infra/scripts/helm/validate-helm-chart-versions.sh index 8d0b2941f5f..8a6b464cbb2 100755 --- a/infra/scripts/helm/validate-helm-chart-versions.sh +++ b/infra/scripts/helm/validate-helm-chart-versions.sh @@ -3,7 +3,7 @@ set -e # Amount of file locations that need to be bumped in unison when versions increment -UNIQUE_VERSIONS_COUNT=4 +UNIQUE_VERSIONS_COUNT=18 if [ $# -ne 1 ]; then echo "Please provide a single semver version (without a \"v\" prefix) to test the repository against, e.g 0.99.0" diff --git a/infra/scripts/publish-java-sdk.sh b/infra/scripts/publish-java-sdk.sh index ed00799e84a..ce1f79d2f1d 100755 --- a/infra/scripts/publish-java-sdk.sh +++ b/infra/scripts/publish-java-sdk.sh @@ -69,4 +69,4 @@ gpg --import --batch --yes $GPG_KEY_IMPORT_DIR/private-key echo "============================================================" echo "Deploying Java SDK with revision: $REVISION" echo "============================================================" -mvn -f java/pom.xml --projects datatypes/java,sdk/java -Drevision=$REVISION --batch-mode clean deploy +mvn -f java/pom.xml --projects .,datatypes/java,sdk/java -Drevision=$REVISION --batch-mode clean deploy diff --git a/infra/templates/README.md.jinja2 b/infra/templates/README.md.jinja2 index a9277bb0700..7d08c0d36f1 100644 --- a/infra/templates/README.md.jinja2 +++ b/infra/templates/README.md.jinja2 @@ -21,9 +21,9 @@ Feast is an open source feature store for machine learning. Feast is the fastest Please see our [documentation](https://docs.feast.dev/) for more information about the project. ## 📐 Architecture - +![](docs/assets/feast-marchitecture.png) -The above architecture is the minimal Feast deployment. Want to run the full Feast on GCP/AWS? Click [here](https://docs.feast.dev/how-to-guides/feast-gcp-aws). +The above architecture is the minimal Feast deployment. Want to run the full Feast on Snowflake/GCP/AWS? Click [here](https://docs.feast.dev/how-to-guides/feast-snowflake-gcp-aws). ## 🐣 Getting Started @@ -133,7 +133,7 @@ pprint(feature_vector) Please refer to the official documentation at [Documentation](https://docs.feast.dev/) * [Quickstart](https://docs.feast.dev/getting-started/quickstart) * [Tutorials](https://docs.feast.dev/tutorials/tutorials-overview) - * [Running Feast with GCP/AWS](https://docs.feast.dev/how-to-guides/feast-gcp-aws) + * [Running Feast with Snowflake/GCP/AWS](https://docs.feast.dev/how-to-guides/feast-snowflake-gcp-aws) * [Change Log](https://github.com/feast-dev/feast/blob/master/CHANGELOG.md) * [Slack (#Feast)](https://slack.feast.dev/) diff --git a/java/CONTRIBUTING.md b/java/CONTRIBUTING.md index 1694b3f33f9..86eacfef419 100644 --- a/java/CONTRIBUTING.md +++ b/java/CONTRIBUTING.md @@ -5,7 +5,6 @@ ### Overview This guide is targeted at developers looking to contribute to Feast components in the feast-java Repository: -- [Feast Core](#feast-core) - [Feast Serving](#feast-serving) - [Feast Java Client](#feast-java-client) @@ -15,11 +14,14 @@ the feast-java Repository: #### Common Setup Common Environment Setup for all feast-java Feast components: -1. . Ensure following development tools are installed: -- Java SE Development Kit 11, Maven 3.6, `make` + +Ensure following development tools are installed: +- Java SE Development Kit 11 +- Maven 3.6 +- `make` #### Code Style -feast-java's codebase conforms to the [Google Java Style Guide](https://google.github.io/styleguide/javaguide.html). +Feast's Java codebase conforms to the [Google Java Style Guide](https://google.github.io/styleguide/javaguide.html). Automatically format the code to conform the style guide by: @@ -59,82 +61,8 @@ Specifically, proto-generated code is not indexed by IntelliJ. To fix this, navi - target/generated-sources/protobuf/java - target/generated-sources/annotations - -## Feast Core -### Environment Setup -Setting up your development environment for Feast Core: -1. Complete the feast-java [Common Setup](#common-setup) -2. Boot up a PostgreSQL instance (version 11 and above). Example of doing so via Docker: -```sh -# spawn a PostgreSQL instance as a Docker container running in the background -docker run \ - --rm -it -d \ - --name postgres \ - -e POSTGRES_DB=postgres \ - -e POSTGRES_USER=postgres \ - -e POSTGRES_PASSWORD=password \ - -p 5432:5432 postgres:12-alpine -``` - -### Configuration -Feast Core is configured using it's [application.yml](https://docs.feast.dev/reference/configuration-reference#1-feast-core-and-feast-online-serving). - -### Building and Running -1. Build / Compile Feast Core with Maven to produce an executable Feast Core JAR -```sh -mvn package -pl core --also-make -Dmaven.test.skip=true -``` - -2. Run Feast Core using the built JAR: -```sh -# where X.X.X is the version of the Feast Core JAR built -java -jar core/target/feast-core-X.X.X-exec.jar -``` - -### Unit / Integration Tests -Unit & Integration Tests can be used to verify functionality: -```sh -# run unit tests -mvn test -pl core --also-make -# run integration tests -mvn verify -pl core --also-make -``` - ## Feast Serving -### Environment Setup -Setting up your development environment for Feast Serving: -1. Complete the feast-java [Common Setup](#common-setup) -2. Boot up a Redis instance (version 5.x). Example of doing so via Docker: -```sh -docker run --name redis --rm -it -d -p 6379:6379 redis:5-alpine -``` - -> Feast Serving requires a running Feast Core instance to retrieve Feature metadata -> in order to serve features. See the [Feast Core section](#feast-core) for -> how to get a Feast Core instance running. - -### Configuration -Feast Serving is configured using it's [application.yml](https://docs.feast.dev/reference/configuration-reference#1-feast-core-and-feast-online-serving). - -### Building and Running -1. Build / Compile Feast Serving with Maven to produce an executable Feast Serving JAR -```sh -mvn package -pl serving --also-make -Dmaven.test.skip=true - -2. Run Feast Serving using the built JAR: -```sh -# where X.X.X is the version of the Feast serving JAR built -java -jar serving/target/feast-serving-X.X.X-exec.jar -``` - -### Unit / Integration Tests -Unit & Integration Tests can be used to verify functionality: -```sh -# run unit tests -mvn test -pl serving --also-make -# run integration tests -mvn verify -pl serving --also-make -``` +See instructions [here](serving/README.md) for developing. ## Feast Java Client ### Environment Setup @@ -144,9 +72,6 @@ Setting up your development environment for Feast Java SDK: > Feast Java Client is a Java Client for retrieving Features from a running Feast Serving instance. > See the [Feast Serving Section](#feast-serving) section for how to get a Feast Serving instance running. -### Configuration -Feast Java Client is [configured as code](https://docs.feast.dev/v/master/reference/configuration-reference#4-feast-java-and-go-sdk) - ### Building 1. Build / Compile Feast Java Client with Maven: diff --git a/java/README.md b/java/README.md index 8d6141faa84..ff5a1b85539 100644 --- a/java/README.md +++ b/java/README.md @@ -1,5 +1,4 @@ # Feast Java components -[![complete](https://github.com/feast-dev/feast-java/actions/workflows/complete.yml/badge.svg)](https://github.com/feast-dev/feast-java/actions/workflows/complete.yml) ### Overview @@ -19,4 +18,4 @@ Guides on Contributing: - [Development Guide for feast-java (this repository)](CONTRIBUTING.md) ### Installing using Helm -Please see the Helm charts in [charts](https://github.com/feast-dev/feast-helm-charts). +Please see the Helm charts in [infra/charts/feast](../infra/charts/feast). diff --git a/java/common/pom.xml b/java/common/pom.xml index 0c5651876ea..e5a648a7f95 100644 --- a/java/common/pom.xml +++ b/java/common/pom.xml @@ -33,13 +33,14 @@ dev.feast - datatypes-java + feast-datatypes ${project.version} compile com.google.protobuf protobuf-java-util + ${protobuf.version} @@ -52,75 +53,34 @@ org.projectlombok lombok + ${lombok.version} com.google.auto.value auto-value-annotations + ${auto.value.version} com.google.code.gson gson + ${gson.version} io.gsonfire gson-fire + ${gson.fire.version} com.fasterxml.jackson.core jackson-databind + 2.10.5.1 com.fasterxml.jackson.datatype jackson-datatype-jsr310 - - - - - org.springframework - spring-context-support - - - net.devh - grpc-server-spring-boot-starter - - - org.springframework.boot - spring-boot-starter-logging - - - - - org.springframework.boot - spring-boot-starter-data-jpa - - - org.springframework.boot - spring-boot-starter-web - - - org.hibernate.validator - hibernate-validator - 6.1.5.Final - - - - - org.springframework.security - spring-security-core - - - org.springframework.security - spring-security-config - - - org.springframework.security - spring-security-oauth2-resource-server - - - org.springframework.security - spring-security-oauth2-jose + 2.10.1 @@ -134,7 +94,6 @@ 0.3.1 - javax.xml.bind jaxb-api @@ -156,6 +115,7 @@ org.hamcrest hamcrest-library test + ${hamcrest.version} @@ -163,28 +123,12 @@ junit 4.13.2 - - org.springframework - spring-test - test - org.mockito mockito-core ${mockito.version} test - - org.springframework.boot - spring-boot-starter-test - test - - - org.junit.vintage - junit-vintage-engine - - - @@ -206,6 +150,13 @@ -Xms2048m -Xmx2048m -Djdk.net.URLClassPath.disableClassPathURLCheck=true + + org.sonatype.plugins + nexus-staging-maven-plugin + + true + + diff --git a/java/common/src/main/java/feast/common/logging/AuditLogger.java b/java/common/src/main/java/feast/common/logging/AuditLogger.java index 5f70fbfc97b..f3538a794b8 100644 --- a/java/common/src/main/java/feast/common/logging/AuditLogger.java +++ b/java/common/src/main/java/feast/common/logging/AuditLogger.java @@ -32,26 +32,23 @@ import org.slf4j.Marker; import org.slf4j.MarkerFactory; import org.slf4j.event.Level; -import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.boot.info.BuildProperties; -import org.springframework.stereotype.Component; @Slf4j -@Component public class AuditLogger { private static final String FLUENTD_DESTINATION = "fluentd"; private static final Marker AUDIT_MARKER = MarkerFactory.getMarker("AUDIT_MARK"); private static FluentLogger fluentLogger; private static AuditLogProperties properties; - private static BuildProperties buildProperties; + private static String artifact; + private static String version; - @Autowired - public AuditLogger(LoggingProperties loggingProperties, BuildProperties buildProperties) { + public AuditLogger(LoggingProperties loggingProperties, String artifact, String version) { // Spring runs this constructor when creating the AuditLogger bean, // which allows us to populate the AuditLogger class with dependencies. // This allows us to use the dependencies in the AuditLogger's static methods AuditLogger.properties = loggingProperties.getAudit(); - AuditLogger.buildProperties = buildProperties; + AuditLogger.artifact = artifact; + AuditLogger.version = version; if (AuditLogger.properties.getMessageLogging() != null && AuditLogger.properties.getMessageLogging().isEnabled()) { AuditLogger.fluentLogger = @@ -69,12 +66,7 @@ public AuditLogger(LoggingProperties loggingProperties, BuildProperties buildPro * @param entryBuilder with all fields set except instance. */ public static void logMessage(Level level, MessageAuditLogEntry.Builder entryBuilder) { - log( - level, - entryBuilder - .setComponent(buildProperties.getArtifact()) - .setVersion(buildProperties.getVersion()) - .build()); + log(level, entryBuilder.setComponent(artifact).setVersion(version).build()); } /** @@ -90,10 +82,7 @@ public static void logAction( log( level, ActionAuditLogEntry.of( - buildProperties.getArtifact(), - buildProperties.getArtifact(), - LogResource.of(resourceType, resourceId), - action)); + artifact, version, LogResource.of(resourceType, resourceId), action)); } /** @@ -109,10 +98,7 @@ public static void logTransition( log( level, TransitionAuditLogEntry.of( - buildProperties.getArtifact(), - buildProperties.getArtifact(), - LogResource.of(resourceType, resourceId), - status)); + artifact, version, LogResource.of(resourceType, resourceId), status)); } /** diff --git a/java/common/src/main/java/feast/common/logging/interceptors/GrpcMessageInterceptor.java b/java/common/src/main/java/feast/common/logging/interceptors/GrpcMessageInterceptor.java index ffd7c6b9543..661642a89ad 100644 --- a/java/common/src/main/java/feast/common/logging/interceptors/GrpcMessageInterceptor.java +++ b/java/common/src/main/java/feast/common/logging/interceptors/GrpcMessageInterceptor.java @@ -30,10 +30,6 @@ import io.grpc.ServerInterceptor; import io.grpc.Status; import org.slf4j.event.Level; -import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.security.core.Authentication; -import org.springframework.security.core.context.SecurityContextHolder; -import org.springframework.stereotype.Component; /** * GrpcMessageInterceptor intercepts a GRPC calls to log handling of GRPC messages to the Audit Log. @@ -41,7 +37,6 @@ * name and assumed authenticated identity (if authentication is enabled). NOTE: * GrpcMessageInterceptor assumes that all service calls are unary (ie single request/response). */ -@Component public class GrpcMessageInterceptor implements ServerInterceptor { private LoggingProperties loggingProperties; @@ -50,7 +45,6 @@ public class GrpcMessageInterceptor implements ServerInterceptor { * * @param loggingProperties properties used to configure logging interceptor. */ - @Autowired public GrpcMessageInterceptor(LoggingProperties loggingProperties) { this.loggingProperties = loggingProperties; } @@ -80,9 +74,7 @@ public Listener interceptCall( entryBuilder.setMethod(fullMethodName.substring(fullMethodName.indexOf("/") + 1)); // Attempt Extract current authenticated identity. - Authentication authentication = SecurityContextHolder.getContext().getAuthentication(); - String identity = (authentication != null) ? getIdentity(authentication) : ""; - entryBuilder.setIdentity(identity); + entryBuilder.setIdentity(""); // Register forwarding call to intercept outgoing response and log to audit log call = @@ -115,13 +107,4 @@ public void onMessage(ReqT message) { } }; } - - /** - * Extract current authenticated identity from given {@link Authentication}. Extracts subject - * claim if specified in AuthorizationProperties, otherwise returns authentication subject. - */ - private String getIdentity(Authentication authentication) { - // use subject claim as identity if set in security authorization properties - return authentication.getName(); - } } diff --git a/java/common/src/main/java/feast/common/models/FeatureV2.java b/java/common/src/main/java/feast/common/models/Feature.java similarity index 74% rename from java/common/src/main/java/feast/common/models/FeatureV2.java rename to java/common/src/main/java/feast/common/models/Feature.java index 8420cca80c6..340a8cbe69e 100644 --- a/java/common/src/main/java/feast/common/models/FeatureV2.java +++ b/java/common/src/main/java/feast/common/models/Feature.java @@ -18,7 +18,7 @@ import feast.proto.serving.ServingAPIProto.FeatureReferenceV2; -public class FeatureV2 { +public class Feature { /** * Accepts FeatureReferenceV2 object and returns its reference in String @@ -27,10 +27,10 @@ public class FeatureV2 { * @param featureReference {@link FeatureReferenceV2} * @return String format of FeatureReferenceV2 */ - public static String getFeatureStringRef(FeatureReferenceV2 featureReference) { - String ref = featureReference.getName(); - if (!featureReference.getFeatureTable().isEmpty()) { - ref = featureReference.getFeatureTable() + ":" + ref; + public static String getFeatureReference(FeatureReferenceV2 featureReference) { + String ref = featureReference.getFeatureName(); + if (!featureReference.getFeatureViewName().isEmpty()) { + ref = featureReference.getFeatureViewName() + ":" + ref; } return ref; } @@ -47,4 +47,12 @@ public static String getFeatureName(String featureReference) { String[] tokens = featureReference.split(":", 2); return tokens[tokens.length - 1]; } + + public static FeatureReferenceV2 parseFeatureReference(String featureReference) { + String[] tokens = featureReference.split(":", 2); + return FeatureReferenceV2.newBuilder() + .setFeatureViewName(tokens[0]) + .setFeatureName(tokens[1]) + .build(); + } } diff --git a/java/common/src/main/java/feast/common/models/FeatureTable.java b/java/common/src/main/java/feast/common/models/FeatureTable.java deleted file mode 100644 index 88fac151ce7..00000000000 --- a/java/common/src/main/java/feast/common/models/FeatureTable.java +++ /dev/null @@ -1,48 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * Copyright 2018-2020 The Feast Authors - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * https://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package feast.common.models; - -import feast.proto.core.FeatureTableProto.FeatureTableSpec; -import feast.proto.serving.ServingAPIProto.FeatureReferenceV2; - -public class FeatureTable { - - /** - * Accepts FeatureTableSpec object and returns its reference in String - * "project/featuretable_name". - * - * @param project project name - * @param featureTableSpec {@link FeatureTableSpec} - * @return String format of FeatureTableReference - */ - public static String getFeatureTableStringRef(String project, FeatureTableSpec featureTableSpec) { - return String.format("%s/%s", project, featureTableSpec.getName()); - } - - /** - * Accepts FeatureReferenceV2 object and returns its reference in String - * "project/featuretable_name". - * - * @param project project name - * @param featureReference {@link FeatureReferenceV2} - * @return String format of FeatureTableReference - */ - public static String getFeatureTableStringRef( - String project, FeatureReferenceV2 featureReference) { - return String.format("%s/%s", project, featureReference.getFeatureTable()); - } -} diff --git a/java/common/src/test/java/feast/common/logging/entry/AuditLogEntryTest.java b/java/common/src/test/java/feast/common/logging/entry/AuditLogEntryTest.java index cf355e09e4b..bc3dcbcf748 100644 --- a/java/common/src/test/java/feast/common/logging/entry/AuditLogEntryTest.java +++ b/java/common/src/test/java/feast/common/logging/entry/AuditLogEntryTest.java @@ -21,11 +21,12 @@ import com.google.gson.JsonObject; import com.google.gson.JsonParser; +import com.google.protobuf.Timestamp; import feast.common.logging.entry.LogResource.ResourceType; +import feast.proto.serving.ServingAPIProto; import feast.proto.serving.ServingAPIProto.FeatureReferenceV2; import feast.proto.serving.ServingAPIProto.GetOnlineFeaturesRequestV2; import feast.proto.serving.ServingAPIProto.GetOnlineFeaturesResponse; -import feast.proto.serving.ServingAPIProto.GetOnlineFeaturesResponse.FieldValues; import feast.proto.types.ValueProto.Value; import io.grpc.Status; import java.util.Arrays; @@ -39,26 +40,35 @@ public List getTestAuditLogs() { .addAllFeatures( Arrays.asList( FeatureReferenceV2.newBuilder() - .setFeatureTable("featuretable_1") - .setName("feature1") + .setFeatureViewName("featuretable_1") + .setFeatureName("feature1") .build(), FeatureReferenceV2.newBuilder() - .setFeatureTable("featuretable_1") - .setName("feature2") + .setFeatureViewName("featuretable_1") + .setFeatureName("feature2") .build())) .build(); GetOnlineFeaturesResponse responseSpec = GetOnlineFeaturesResponse.newBuilder() - .addAllFieldValues( + .setMetadata( + ServingAPIProto.GetOnlineFeaturesResponseMetadata.newBuilder() + .setFeatureNames( + ServingAPIProto.FeatureList.newBuilder() + .addAllVal( + Arrays.asList( + "featuretable_1:feature_1", "featuretable_1:feature2")))) + .addAllResults( Arrays.asList( - FieldValues.newBuilder() - .putFields( - "featuretable_1:feature_1", Value.newBuilder().setInt32Val(32).build()) + GetOnlineFeaturesResponse.FeatureVector.newBuilder() + .addValues(Value.newBuilder().setInt32Val(32).build()) + .addStatuses(ServingAPIProto.FieldStatus.PRESENT) + .addEventTimestamps(Timestamp.newBuilder().build()) .build(), - FieldValues.newBuilder() - .putFields( - "featuretable_1:feature2", Value.newBuilder().setInt32Val(64).build()) + GetOnlineFeaturesResponse.FeatureVector.newBuilder() + .addValues(Value.newBuilder().setInt32Val(64).build()) + .addStatuses(ServingAPIProto.FieldStatus.PRESENT) + .addEventTimestamps(Timestamp.newBuilder().build()) .build())) .build(); diff --git a/java/common/src/test/java/feast/common/models/FeaturesTest.java b/java/common/src/test/java/feast/common/models/FeaturesTest.java index 180f7e4e697..953da61afeb 100644 --- a/java/common/src/test/java/feast/common/models/FeaturesTest.java +++ b/java/common/src/test/java/feast/common/models/FeaturesTest.java @@ -31,14 +31,14 @@ public class FeaturesTest { public void setUp() { featureReference = FeatureReferenceV2.newBuilder() - .setFeatureTable("featuretable_1") - .setName("feature1") + .setFeatureViewName("featuretable_1") + .setFeatureName("feature1") .build(); } @Test public void shouldReturnFeatureStringRef() { - String actualFeatureStringRef = FeatureV2.getFeatureStringRef(featureReference); + String actualFeatureStringRef = Feature.getFeatureReference(featureReference); String expectedFeatureStringRef = "featuretable_1:feature1"; assertThat(actualFeatureStringRef, equalTo(expectedFeatureStringRef)); diff --git a/java/datatypes/java/README.md b/java/datatypes/README.md similarity index 100% rename from java/datatypes/java/README.md rename to java/datatypes/README.md diff --git a/java/datatypes/java/src/main/proto/feast b/java/datatypes/java/src/main/proto/feast deleted file mode 120000 index 53364e5f45f..00000000000 --- a/java/datatypes/java/src/main/proto/feast +++ /dev/null @@ -1 +0,0 @@ -../../../../../../protos/feast \ No newline at end of file diff --git a/java/datatypes/java/pom.xml b/java/datatypes/pom.xml similarity index 85% rename from java/datatypes/java/pom.xml rename to java/datatypes/pom.xml index fe6c380a10e..a5c82d4c45c 100644 --- a/java/datatypes/java/pom.xml +++ b/java/datatypes/pom.xml @@ -30,13 +30,13 @@ 11 11 - datatypes-java + feast-datatypes dev.feast feast-parent ${revision} - ../.. + ../ @@ -75,6 +75,13 @@ + + org.sonatype.plugins + nexus-staging-maven-plugin + + false + + @@ -83,29 +90,34 @@ com.google.guava guava + ${guava.version} com.google.protobuf protobuf-java + ${protobuf.version} io.grpc grpc-core + ${grpc.version} io.grpc grpc-protobuf + ${grpc.version} io.grpc grpc-services + ${grpc.version} io.grpc grpc-stub + ${grpc.version} - javax.annotation javax.annotation-api diff --git a/java/datatypes/src/main/proto/feast b/java/datatypes/src/main/proto/feast new file mode 120000 index 00000000000..463e4045de1 --- /dev/null +++ b/java/datatypes/src/main/proto/feast @@ -0,0 +1 @@ +../../../../../protos/feast \ No newline at end of file diff --git a/java/docs/coverage/java/pom.xml b/java/docs/coverage/pom.xml similarity index 96% rename from java/docs/coverage/java/pom.xml rename to java/docs/coverage/pom.xml index 5f794224969..f6e08909ee6 100644 --- a/java/docs/coverage/java/pom.xml +++ b/java/docs/coverage/pom.xml @@ -30,7 +30,7 @@ dev.feast feast-parent ${revision} - ../../.. + ../.. Feast Coverage Java @@ -61,7 +61,7 @@ dev.feast - feast-sdk + feast-serving-client ${project.version} diff --git a/java/infra/docker/feature-server/Dockerfile b/java/infra/docker/feature-server/Dockerfile index a07d3301b2c..dbd8c914724 100644 --- a/java/infra/docker/feature-server/Dockerfile +++ b/java/infra/docker/feature-server/Dockerfile @@ -7,14 +7,14 @@ FROM maven:3.6-jdk-11 as builder WORKDIR /build COPY java/pom.xml . -COPY java/datatypes/java/pom.xml datatypes/java/pom.xml +COPY java/datatypes/pom.xml datatypes/pom.xml COPY java/common/pom.xml common/pom.xml COPY java/serving/pom.xml serving/pom.xml COPY java/storage/api/pom.xml storage/api/pom.xml COPY java/storage/connectors/pom.xml storage/connectors/pom.xml COPY java/storage/connectors/redis/pom.xml storage/connectors/redis/pom.xml -COPY java/sdk/java/pom.xml sdk/java/pom.xml -COPY java/docs/coverage/java/pom.xml docs/coverage/java/pom.xml +COPY java/sdk/pom.xml sdk/pom.xml +COPY java/docs/coverage/pom.xml docs/coverage/pom.xml # Setting Maven repository .m2 directory relative to /build folder gives the # user to optionally use cached repository when building the image by copying @@ -24,7 +24,7 @@ COPY java/pom.xml .m2/* .m2/ RUN mvn dependency:go-offline -DexcludeGroupIds:dev.feast 2>/dev/null || true COPY java/ . -COPY protos/feast datatypes/java/src/main/proto/feast +COPY protos/feast datatypes/src/main/proto/feast ARG VERSION=dev RUN mvn --also-make --projects serving -Drevision=$VERSION \ diff --git a/java/pom.xml b/java/pom.xml index ead8af13096..afbd6298055 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -28,17 +28,17 @@ pom - datatypes/java + datatypes storage/api storage/connectors serving - sdk/java - docs/coverage/java + sdk + docs/coverage common - 0.15.2-SNAPSHOT + 0.18.0 https://github.com/feast-dev/feast UTF-8 @@ -46,11 +46,7 @@ 1.30.2 3.12.2 - 3.12.2 - 2.3.1.RELEASE - 5.2.7.RELEASE - 5.3.0.RELEASE - 2.9.0.RELEASE + 3.16.1 1.111.1 0.8.0 1.9.10 @@ -59,10 +55,9 @@ 0.26.0 - 2.17.0 + 2.17.1 2.9.9 2.0.2 - 2.5.0.RELEASE 1.18.12 1.8.4 2.8.6 @@ -72,9 +67,9 @@ 2.3.1 1.3.2 2.0.1.Final - 2.8.0 0.20.0 1.6.6 + 29.0-jre - - org.apache.commons - commons-lang3 - ${commons.lang3.version} - - - - com.google.inject - guice - 5.0.1 - - - - - com.google.cloud - google-cloud-bigquery - ${com.google.cloud.version} - - - com.google.cloud - google-cloud-storage - ${com.google.cloud.version} - - - - - com.google.cloud - google-cloud-nio - 0.83.0-alpha - - - - io.opencensus - opencensus-api - ${opencensus.version} - - - io.opencensus - opencensus-contrib-grpc-util - ${opencensus.version} - - - io.opencensus - opencensus-contrib-http-util - ${opencensus.version} - - - - - io.grpc - grpc-core - ${grpc.version} - - - io.grpc - grpc-api - ${grpc.version} - - - io.grpc - grpc-context - ${grpc.version} - - - io.grpc - grpc-all - ${grpc.version} - - - io.grpc - grpc-okhttp - ${grpc.version} - - - io.grpc - grpc-auth - ${grpc.version} - - - io.grpc - grpc-grpclb - ${grpc.version} - - - io.grpc - grpc-alts - ${grpc.version} - - - io.grpc - grpc-netty - ${grpc.version} - - - io.grpc - grpc-netty-shaded - ${grpc.version} - - - io.grpc - grpc-protobuf - ${grpc.version} - - - io.grpc - grpc-services - ${grpc.version} - - - io.grpc - grpc-stub - ${grpc.version} - - - io.grpc - grpc-testing - ${grpc.version} - test - - - - - org.apache.arrow - arrow-java-root - 5.0.0 - pom - - - - - org.apache.arrow - arrow-vector - 5.0.0 - - - - - org.apache.arrow - arrow-memory - 5.0.0 - pom - - - - - org.apache.arrow - arrow-memory-netty - 5.0.0 - runtime - - - - - net.devh - grpc-server-spring-boot-starter - ${grpc.spring.boot.starter.version} - - - - - io.prometheus - simpleclient - ${io.prometheus.version} - - - io.prometheus - simpleclient_servlet - ${io.prometheus.version} - - - - - org.springframework.security - spring-security-core - ${spring.security.version} - - - org.springframework.security - spring-security-config - ${spring.security.version} - - - org.springframework.security - spring-security-oauth2-resource-server - ${spring.security.version} - - - org.springframework.security - spring-security-oauth2-jose - ${spring.security.version} - - - com.google.auth - google-auth-library-oauth2-http - ${google.auth.library.oauth2.http.version} - - - - - joda-time - joda-time - ${joda.time.version} - - - com.datadoghq - java-dogstatsd-client - 2.6.1 - - - com.google.guava - guava - 29.0-jre - - - com.google.protobuf - protobuf-java - ${protobuf.version} - - - com.google.protobuf - protobuf-java-util - ${protobuf.version} - - - org.projectlombok - lombok - ${lombok.version} - provided - - - com.google.auto.value - auto-value-annotations - ${auto.value.version} - - - com.google.auto.value - auto-value - ${auto.value.version} - - - com.google.code.gson - gson - ${gson.version} - - - io.gsonfire - gson-fire - ${gson.fire.version} - - - - com.github.kstyrc - embedded-redis - 0.6 - test - - - - - - net.bytebuddy - byte-buddy - ${byte-buddy.version} - - - org.mockito - mockito-core - ${mockito.version} - test - - - org.springframework.boot - spring-boot-starter-web - ${spring.boot.version} - - - org.springframework.boot - spring-boot-starter-logging - - - - - org.apache.logging.log4j - log4j-api - ${log4jVersion} - - - org.apache.logging.log4j - log4j-core - ${log4jVersion} - - - org.apache.logging.log4j - log4j-jul - ${log4jVersion} - - - org.apache.logging.log4j - log4j-web - ${log4jVersion} - org.apache.logging.log4j log4j-slf4j-impl @@ -462,26 +156,6 @@ 1.7.30 - - - - org.springframework.boot - spring-boot-dependencies - ${spring.boot.version} - pom - import - - - com.squareup.okio - okio - 1.17.2 - javax.xml.bind jaxb-api @@ -497,6 +171,19 @@ validation-api ${javax.validation.version} + + + org.junit.platform + junit-platform-engine + 1.8.2 + test + + + org.junit.platform + junit-platform-commons + 1.8.2 + test + @@ -693,22 +380,6 @@ - - org.springframework.boot - spring-boot-maven-plugin - - - true - - - - build-info - - build-info - - - - org.sonatype.plugins @@ -720,6 +391,7 @@ https://oss.sonatype.org/ true + true @@ -137,6 +142,13 @@ org.jacoco jacoco-maven-plugin + + org.sonatype.plugins + nexus-staging-maven-plugin + + false + + diff --git a/java/sdk/java/src/main/java/com/gojek/feast/FeastClient.java b/java/sdk/src/main/java/dev/feast/FeastClient.java similarity index 74% rename from java/sdk/java/src/main/java/com/gojek/feast/FeastClient.java rename to java/sdk/src/main/java/dev/feast/FeastClient.java index 0c0b279be6b..c10a76ecf81 100644 --- a/java/sdk/java/src/main/java/com/gojek/feast/FeastClient.java +++ b/java/sdk/src/main/java/dev/feast/FeastClient.java @@ -14,16 +14,17 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.gojek.feast; +package dev.feast; -import feast.proto.serving.ServingAPIProto.FeatureReferenceV2; +import com.google.common.collect.Lists; +import feast.proto.serving.ServingAPIProto; import feast.proto.serving.ServingAPIProto.GetFeastServingInfoRequest; import feast.proto.serving.ServingAPIProto.GetFeastServingInfoResponse; -import feast.proto.serving.ServingAPIProto.GetOnlineFeaturesRequestV2; -import feast.proto.serving.ServingAPIProto.GetOnlineFeaturesRequestV2.EntityRow; +import feast.proto.serving.ServingAPIProto.GetOnlineFeaturesRequest; import feast.proto.serving.ServingAPIProto.GetOnlineFeaturesResponse; import feast.proto.serving.ServingServiceGrpc; import feast.proto.serving.ServingServiceGrpc.ServingServiceBlockingStub; +import feast.proto.types.ValueProto; import io.grpc.CallCredentials; import io.grpc.ManagedChannel; import io.grpc.ManagedChannelBuilder; @@ -32,9 +33,8 @@ import io.opentracing.contrib.grpc.TracingClientInterceptor; import io.opentracing.util.GlobalTracer; import java.io.File; -import java.util.HashSet; -import java.util.List; -import java.util.Optional; +import java.time.Instant; +import java.util.*; import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; import javax.net.ssl.SSLException; @@ -118,11 +118,60 @@ public GetFeastServingInfoResponse getFeastServingInfo() { * @param featureRefs list of string feature references to retrieve in the following format * featureTable:feature, where 'featureTable' and 'feature' refer to the FeatureTable and * Feature names respectively. Only the Feature name is required. - * @param rows list of {@link Row} to select the entities to retrieve the features for. + * @param entities list of {@link Row} to select the entities to retrieve the features for. * @return list of {@link Row} containing retrieved data fields. */ - public List getOnlineFeatures(List featureRefs, List rows) { - return getOnlineFeatures(featureRefs, rows, ""); + public List getOnlineFeatures(List featureRefs, List entities) { + GetOnlineFeaturesRequest.Builder requestBuilder = GetOnlineFeaturesRequest.newBuilder(); + + requestBuilder.setFeatures( + ServingAPIProto.FeatureList.newBuilder().addAllVal(featureRefs).build()); + + requestBuilder.putAllEntities(getEntityValuesMap(entities)); + + GetOnlineFeaturesResponse response = stub.getOnlineFeatures(requestBuilder.build()); + + List results = Lists.newArrayList(); + if (response.getResultsCount() == 0) { + return results; + } + + for (int rowIdx = 0; rowIdx < response.getResults(0).getValuesCount(); rowIdx++) { + Row row = Row.create(); + for (int featureIdx = 0; featureIdx < response.getResultsCount(); featureIdx++) { + row.set( + response.getMetadata().getFeatureNames().getVal(featureIdx), + response.getResults(featureIdx).getValues(rowIdx), + response.getResults(featureIdx).getStatuses(rowIdx)); + + row.setEntityTimestamp( + Instant.ofEpochSecond( + response.getResults(featureIdx).getEventTimestamps(rowIdx).getSeconds())); + } + for (Map.Entry entry : + entities.get(rowIdx).getFields().entrySet()) { + row.set(entry.getKey(), entry.getValue()); + } + + results.add(row); + } + return results; + } + + private Map getEntityValuesMap(List entities) { + Map columnarEntities = new HashMap<>(); + for (Row row : entities) { + for (Map.Entry field : row.getFields().entrySet()) { + if (!columnarEntities.containsKey(field.getKey())) { + columnarEntities.put(field.getKey(), ValueProto.RepeatedValue.newBuilder()); + } + columnarEntities.get(field.getKey()).addVal(field.getValue()); + } + } + + return columnarEntities.entrySet().stream() + .map((e) -> Map.entry(e.getKey(), e.getValue().build())) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); } /** @@ -149,42 +198,7 @@ public List getOnlineFeatures(List featureRefs, List rows) { * @return list of {@link Row} containing retrieved data fields. */ public List getOnlineFeatures(List featureRefs, List rows, String project) { - List features = RequestUtil.createFeatureRefs(featureRefs); - // build entity rows and collect entity references - HashSet entityRefs = new HashSet<>(); - List entityRows = - rows.stream() - .map( - row -> { - entityRefs.addAll(row.getFields().keySet()); - return EntityRow.newBuilder() - .setTimestamp(row.getEntityTimestamp()) - .putAllFields(row.getFields()) - .build(); - }) - .collect(Collectors.toList()); - - GetOnlineFeaturesResponse response = - stub.getOnlineFeaturesV2( - GetOnlineFeaturesRequestV2.newBuilder() - .addAllFeatures(features) - .addAllEntityRows(entityRows) - .setProject(project) - .build()); - - return response.getFieldValuesList().stream() - .map( - fieldValues -> { - Row row = Row.create(); - for (String fieldName : fieldValues.getFieldsMap().keySet()) { - row.set( - fieldName, - fieldValues.getFieldsMap().get(fieldName), - fieldValues.getStatusesMap().get(fieldName)); - } - return row; - }) - .collect(Collectors.toList()); + return getOnlineFeatures(featureRefs, rows); } protected FeastClient(ManagedChannel channel, Optional credentials) { diff --git a/java/sdk/java/src/main/java/com/gojek/feast/RequestUtil.java b/java/sdk/src/main/java/dev/feast/RequestUtil.java similarity index 95% rename from java/sdk/java/src/main/java/com/gojek/feast/RequestUtil.java rename to java/sdk/src/main/java/dev/feast/RequestUtil.java index 69c8f9f737a..fc13c453119 100644 --- a/java/sdk/java/src/main/java/com/gojek/feast/RequestUtil.java +++ b/java/sdk/src/main/java/dev/feast/RequestUtil.java @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.gojek.feast; +package dev.feast; import feast.proto.serving.ServingAPIProto.FeatureReferenceV2; import java.util.List; @@ -71,8 +71,8 @@ public static FeatureReferenceV2 parseFeatureRef(String featureRefString) { String[] featureReferenceParts = featureRefString.split(":"); FeatureReferenceV2 featureRef = FeatureReferenceV2.newBuilder() - .setFeatureTable(featureReferenceParts[0]) - .setName(featureReferenceParts[1]) + .setFeatureViewName(featureReferenceParts[0]) + .setFeatureName(featureReferenceParts[1]) .build(); return featureRef; diff --git a/java/sdk/java/src/main/java/com/gojek/feast/Row.java b/java/sdk/src/main/java/dev/feast/Row.java similarity index 97% rename from java/sdk/java/src/main/java/com/gojek/feast/Row.java rename to java/sdk/src/main/java/dev/feast/Row.java index 51f820e320a..308daa5a2f0 100644 --- a/java/sdk/java/src/main/java/com/gojek/feast/Row.java +++ b/java/sdk/src/main/java/dev/feast/Row.java @@ -14,12 +14,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.gojek.feast; +package dev.feast; import com.google.protobuf.ByteString; import com.google.protobuf.Timestamp; import com.google.protobuf.util.Timestamps; -import feast.proto.serving.ServingAPIProto.GetOnlineFeaturesResponse.FieldStatus; +import feast.proto.serving.ServingAPIProto.FieldStatus; import feast.proto.types.ValueProto.Value; import feast.proto.types.ValueProto.Value.ValCase; import java.time.Instant; diff --git a/java/sdk/java/src/main/java/com/gojek/feast/SecurityConfig.java b/java/sdk/src/main/java/dev/feast/SecurityConfig.java similarity index 98% rename from java/sdk/java/src/main/java/com/gojek/feast/SecurityConfig.java rename to java/sdk/src/main/java/dev/feast/SecurityConfig.java index 94c779cf440..29acb97631a 100644 --- a/java/sdk/java/src/main/java/com/gojek/feast/SecurityConfig.java +++ b/java/sdk/src/main/java/dev/feast/SecurityConfig.java @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.gojek.feast; +package dev.feast; import com.google.auto.value.AutoValue; import io.grpc.CallCredentials; diff --git a/java/sdk/java/src/test/java/com/gojek/feast/FeastClientTest.java b/java/sdk/src/test/java/dev/feast/FeastClientTest.java similarity index 71% rename from java/sdk/java/src/test/java/com/gojek/feast/FeastClientTest.java rename to java/sdk/src/test/java/dev/feast/FeastClientTest.java index 29185cd153c..1dfb9989c95 100644 --- a/java/sdk/java/src/test/java/com/gojek/feast/FeastClientTest.java +++ b/java/sdk/src/test/java/dev/feast/FeastClientTest.java @@ -14,20 +14,19 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.gojek.feast; +package dev.feast; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.mockito.AdditionalAnswers.delegatesTo; import static org.mockito.Mockito.mock; import com.google.protobuf.Timestamp; -import feast.proto.serving.ServingAPIProto.FeatureReferenceV2; -import feast.proto.serving.ServingAPIProto.GetOnlineFeaturesRequestV2; -import feast.proto.serving.ServingAPIProto.GetOnlineFeaturesRequestV2.EntityRow; +import feast.proto.serving.ServingAPIProto; +import feast.proto.serving.ServingAPIProto.FieldStatus; +import feast.proto.serving.ServingAPIProto.GetOnlineFeaturesRequest; import feast.proto.serving.ServingAPIProto.GetOnlineFeaturesResponse; -import feast.proto.serving.ServingAPIProto.GetOnlineFeaturesResponse.FieldStatus; -import feast.proto.serving.ServingAPIProto.GetOnlineFeaturesResponse.FieldValues; import feast.proto.serving.ServingServiceGrpc.ServingServiceImplBase; +import feast.proto.types.ValueProto; import feast.proto.types.ValueProto.Value; import io.grpc.*; import io.grpc.inprocess.InProcessChannelBuilder; @@ -56,8 +55,8 @@ public class FeastClientTest { delegatesTo( new ServingServiceImplBase() { @Override - public void getOnlineFeaturesV2( - GetOnlineFeaturesRequestV2 request, + public void getOnlineFeatures( + GetOnlineFeaturesRequest request, StreamObserver responseObserver) { if (!request.equals(FeastClientTest.getFakeRequest())) { responseObserver.onError(Status.FAILED_PRECONDITION.asRuntimeException()); @@ -125,35 +124,46 @@ private void shouldGetOnlineFeaturesWithClient(FeastClient client) { }); } - private static GetOnlineFeaturesRequestV2 getFakeRequest() { + private static GetOnlineFeaturesRequest getFakeRequest() { // setup mock serving service stub - return GetOnlineFeaturesRequestV2.newBuilder() - .addFeatures( - FeatureReferenceV2.newBuilder().setFeatureTable("driver").setName("name").build()) - .addFeatures( - FeatureReferenceV2.newBuilder().setFeatureTable("driver").setName("rating").build()) - .addFeatures( - FeatureReferenceV2.newBuilder().setFeatureTable("driver").setName("null_value").build()) - .addEntityRows( - EntityRow.newBuilder() - .setTimestamp(Timestamp.newBuilder().setSeconds(100)) - .putFields("driver_id", intValue(1))) - .setProject("driver_project") + return GetOnlineFeaturesRequest.newBuilder() + .setFeatures( + ServingAPIProto.FeatureList.newBuilder() + .addVal("driver:name") + .addVal("driver:rating") + .addVal("driver:null_value") + .build()) + .putEntities("driver_id", ValueProto.RepeatedValue.newBuilder().addVal(intValue(1)).build()) .build(); } private static GetOnlineFeaturesResponse getFakeResponse() { return GetOnlineFeaturesResponse.newBuilder() - .addFieldValues( - FieldValues.newBuilder() - .putFields("driver_id", intValue(1)) - .putStatuses("driver_id", FieldStatus.PRESENT) - .putFields("driver:name", strValue("david")) - .putStatuses("driver:name", FieldStatus.PRESENT) - .putFields("driver:rating", intValue(3)) - .putStatuses("driver:rating", FieldStatus.PRESENT) - .putFields("driver:null_value", Value.newBuilder().build()) - .putStatuses("driver:null_value", FieldStatus.NULL_VALUE) + .addResults( + GetOnlineFeaturesResponse.FeatureVector.newBuilder() + .addValues(strValue("david")) + .addStatuses(FieldStatus.PRESENT) + .addEventTimestamps(Timestamp.newBuilder()) + .build()) + .addResults( + GetOnlineFeaturesResponse.FeatureVector.newBuilder() + .addValues(intValue(3)) + .addStatuses(FieldStatus.PRESENT) + .addEventTimestamps(Timestamp.newBuilder()) + .build()) + .addResults( + GetOnlineFeaturesResponse.FeatureVector.newBuilder() + .addValues(Value.newBuilder().build()) + .addStatuses(FieldStatus.NULL_VALUE) + .addEventTimestamps(Timestamp.newBuilder()) + .build()) + .setMetadata( + ServingAPIProto.GetOnlineFeaturesResponseMetadata.newBuilder() + .setFeatureNames( + ServingAPIProto.FeatureList.newBuilder() + .addVal("driver:name") + .addVal("driver:rating") + .addVal("driver:null_value")) .build()) .build(); } diff --git a/java/sdk/java/src/test/java/com/gojek/feast/RequestUtilTest.java b/java/sdk/src/test/java/dev/feast/RequestUtilTest.java similarity index 89% rename from java/sdk/java/src/test/java/com/gojek/feast/RequestUtilTest.java rename to java/sdk/src/test/java/dev/feast/RequestUtilTest.java index 1592e20664d..e5684ecd18a 100644 --- a/java/sdk/java/src/test/java/com/gojek/feast/RequestUtilTest.java +++ b/java/sdk/src/test/java/dev/feast/RequestUtilTest.java @@ -14,14 +14,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.gojek.feast; +package dev.feast; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertThrows; import com.google.common.collect.ImmutableList; import com.google.protobuf.TextFormat; -import feast.common.models.FeatureV2; import feast.proto.serving.ServingAPIProto.FeatureReferenceV2; import java.util.Arrays; import java.util.Comparator; @@ -41,8 +40,8 @@ private static Stream provideValidFeatureRefs() { Arrays.asList("driver:driver_id"), Arrays.asList( FeatureReferenceV2.newBuilder() - .setFeatureTable("driver") - .setName("driver_id") + .setFeatureViewName("driver") + .setFeatureName("driver_id") .build()))); } @@ -52,8 +51,8 @@ void createFeatureRefs_ShouldReturnFeaturesForValidFeatureRefs( List input, List expected) { List actual = RequestUtil.createFeatureRefs(input); // Order of the actual and expected FeatureTables do no not matter - actual.sort(Comparator.comparing(FeatureReferenceV2::getName)); - expected.sort(Comparator.comparing(FeatureReferenceV2::getName)); + actual.sort(Comparator.comparing(FeatureReferenceV2::getFeatureName)); + expected.sort(Comparator.comparing(FeatureReferenceV2::getFeatureName)); assertEquals(expected.size(), actual.size()); for (int i = 0; i < expected.size(); i++) { String expectedString = TextFormat.printer().printToString(expected.get(i)); @@ -68,7 +67,9 @@ void renderFeatureRef_ShouldReturnFeatureRefString( List expected, List input) { input = input.stream().map(ref -> ref.toBuilder().build()).collect(Collectors.toList()); List actual = - input.stream().map(ref -> FeatureV2.getFeatureStringRef(ref)).collect(Collectors.toList()); + input.stream() + .map(ref -> String.format("%s:%s", ref.getFeatureViewName(), ref.getFeatureName())) + .collect(Collectors.toList()); assertEquals(expected.size(), actual.size()); for (int i = 0; i < expected.size(); i++) { assertEquals(expected.get(i), actual.get(i)); diff --git a/java/serving/README.md b/java/serving/README.md index cce8c7d6e29..0ce2edc091d 100644 --- a/java/serving/README.md +++ b/java/serving/README.md @@ -1,101 +1,126 @@ -### Getting Started Guide for Feast Serving Developers +## Getting Started Guide for Feast Serving Developers -Pre-requisites: +### Overview +This guide is targeted at developers looking to contribute to Feast Serving: +- [Building and running Feast Serving locally](#building-and-running-feast-serving-locally) +- [Feast Java Client](#feast-java-client) + +### Pre-requisites: - [Maven](https://maven.apache.org/install.html) build tool version 3.6.x -- A running Feast Core instance -- A running Store instance e.g. local Redis Store instance +- A Feast feature repo (e.g. https://github.com/feast-dev/feast-demo) +- A running Store instance e.g. local Redis instance with `redis-server` -From the Feast project root directory, run the following Maven command to start Feast Serving gRPC service running on port 6566 locally: +### Building and running Feast Serving locally: +From the Feast GitHub root, run: -```bash -# Assumptions: -# - Local Feast Core is running on localhost:6565 -# Uses configuration from serving/src/main/resources/application.yml -mvn -pl serving spring-boot:run -Dspring-boot.run.arguments=\ ---feast.core-host=localhost,\ ---feast.core-port=6565 -``` +1. `mvn -f java/pom.xml install -Dmaven.test.skip=true` +2. Package an executable jar for serving: `mvn -f java/serving/pom.xml package -Dmaven.test.skip=true` +3. Make a file called `application-override.yaml` that specifies your Feast repo project and registry path: + 1. Note if you have a remote registry, you can specify that too (e.g. `gs://...`) + ```yaml + feast: + project: "feast_demo" + registry: "/Users/[your username]/GitHub/feast-demo/feature_repo/data/registry.db" + ``` +4. Run the jar with dependencies that was built from Maven (note the version might vary): + ``` + java \ + -Xms1g \ + -Xmx4g \ + -jar java/serving/target/feast-serving-0.17.1-SNAPSHOT-jar-with-dependencies.jar \ + classpath:/application.yml,file:./application-override.yaml + ``` +5. Now you have a Feast Serving gRPC service running on port 6566 locally! +### Running test queries If you have [grpc_cli](https://github.com/grpc/grpc/blob/master/doc/command_line_tool.md) installed, you can check that Feast Serving is running ``` grpc_cli ls localhost:6566 -grpc_cli call localhost:6566 GetFeastServingVersion '' -grpc_cli call localhost:6566 GetFeastServingType '' ``` +An example of fetching features ```bash -grpc_cli call localhost:6565 ApplyFeatureSet ' -feature_set { - name: "driver" - entities { - name: "driver_id" - value_type: STRING - } - features { - name: "city" - value_type: STRING - } - features { - name: "booking_completed_count" - value_type: INT64 - } - source { - type: KAFKA - kafka_source_config { - bootstrap_servers: "localhost:9092" +grpc_cli call localhost:6566 GetOnlineFeatures ' +features { + val: "driver_hourly_stats:conv_rate" + val: "driver_hourly_stats:acc_rate" +} +entities { + key: "driver_id" + value { + val { + int64_val: 1001 + } + val { + int64_val: 1002 } } } ' - -grpc_cli call localhost:6565 GetFeatureSets ' -filter { - feature_set_name: "driver" +``` +Example output: +``` +connecting to localhost:6566 +metadata { + feature_names { + val: "driver_hourly_stats:conv_rate" + val: "driver_hourly_stats:acc_rate" + } } -' - -grpc_cli call localhost:6566 GetBatchFeatures ' -feature_sets { - name: "driver" - feature_names: "booking_completed_count" - max_age { - seconds: 86400 +results { + values { + float_val: 0.812357187 + } + values { + float_val: 0.379484832 + } + statuses: PRESENT + statuses: PRESENT + event_timestamps { + seconds: 1631725200 + } + event_timestamps { + seconds: 1631725200 } } -entity_dataset { - entity_names: "driver_id" - entity_dataset_rows { - entity_timestamp { - seconds: 1569873954 - } +results { + values { + float_val: 0.840873241 + } + values { + float_val: 0.151376978 + } + statuses: PRESENT + statuses: PRESENT + event_timestamps { + seconds: 1631725200 + } + event_timestamps { + seconds: 1631725200 } } -' +Rpc succeeded with OK status ``` +### Debugging Feast Serving +You can debug this like any other Java executable. Swap the java command above with: ``` -python3 < org.apache.maven.plugins maven-jar-plugin + 3.2.2 @@ -87,10 +88,15 @@ dev.feast - datatypes-java + feast-datatypes ${project.version} + + dev.feast + feast-common + ${project.version} + dev.feast @@ -119,38 +125,50 @@ org.slf4j slf4j-simple + 1.7.30 org.apache.logging.log4j log4j-web + ${log4jVersion} io.grpc grpc-services + ${grpc.version} io.grpc grpc-stub + ${grpc.version} + + + io.grpc + grpc-netty-shaded + ${grpc.version} com.google.protobuf protobuf-java-util + ${protobuf.version} com.google.guava guava + ${guava.version} joda-time joda-time + ${joda.time.version} @@ -198,7 +216,7 @@ com.google.auto.value auto-value-annotations - 1.6.6 + ${auto.value.version} @@ -231,11 +249,13 @@ io.grpc grpc-testing + ${grpc.version} org.mockito mockito-core + ${mockito.version} test @@ -281,11 +301,19 @@ com.fasterxml.jackson.dataformat jackson-dataformat-yaml + 2.11.0 + + + + com.fasterxml.jackson.core + jackson-annotations + 2.12.2 com.github.kstyrc embedded-redis + 0.6 test @@ -340,6 +368,13 @@ false + + org.sonatype.plugins + nexus-staging-maven-plugin + + true + + diff --git a/java/serving/src/main/java/feast/serving/ServingGuiceApplication.java b/java/serving/src/main/java/feast/serving/ServingGuiceApplication.java index 224c3e8e55e..664d6dd4ec5 100644 --- a/java/serving/src/main/java/feast/serving/ServingGuiceApplication.java +++ b/java/serving/src/main/java/feast/serving/ServingGuiceApplication.java @@ -27,7 +27,7 @@ public class ServingGuiceApplication { public static void main(String[] args) throws InterruptedException, IOException { if (args.length == 0) { throw new RuntimeException( - "Path to application configuration file needs to be specifed via CLI"); + "Path to application configuration file needs to be specified via CLI"); } final Injector i = diff --git a/java/serving/src/main/java/feast/serving/config/ApplicationProperties.java b/java/serving/src/main/java/feast/serving/config/ApplicationProperties.java index 4d822d8dbcd..791c871e59b 100644 --- a/java/serving/src/main/java/feast/serving/config/ApplicationProperties.java +++ b/java/serving/src/main/java/feast/serving/config/ApplicationProperties.java @@ -21,6 +21,8 @@ // https://www.baeldung.com/configuration-properties-in-spring-boot // https://docs.spring.io/spring-boot/docs/current/reference/html/boot-features-external-config.html#boot-features-external-config-typesafe-configuration-properties +import com.fasterxml.jackson.annotation.JsonMerge; +import com.fasterxml.jackson.annotation.OptBoolean; import feast.common.logging.config.LoggingProperties; import feast.storage.connectors.redis.retriever.RedisClusterStoreConfig; import feast.storage.connectors.redis.retriever.RedisStoreConfig; @@ -42,22 +44,32 @@ public void setRegistry(String registry) { this.registry = registry; } - public void setRegistryRefreshInterval(int registryRefreshInterval) { - this.registryRefreshInterval = registryRefreshInterval; - } - @NotBlank private String registry; public String getRegistry() { return registry; } + @NotBlank private String project; + + public String getProject() { + return project; + } + + public void setProject(final String project) { + this.project = project; + } + private int registryRefreshInterval; public int getRegistryRefreshInterval() { return registryRefreshInterval; } + public void setRegistryRefreshInterval(int registryRefreshInterval) { + this.registryRefreshInterval = registryRefreshInterval; + } + /** * Finds and returns the active store * @@ -83,6 +95,7 @@ public void setActiveStore(String activeStore) { /** * Collection of store configurations. The active store is selected by the "activeStore" field. */ + @JsonMerge(OptBoolean.FALSE) private List stores = new ArrayList<>(); /* Metric tracing properties. */ @@ -134,38 +147,46 @@ public TracingProperties getTracing() { public LoggingProperties getLogging() { return logging; } - } - private FeastProperties feast; + private String gcpProject; - public void setFeast(FeastProperties feast) { - this.feast = feast; - } + public String getGcpProject() { + return gcpProject; + } - public FeastProperties getFeast() { - return feast; - } + public void setGcpProject(String gcpProject) { + this.gcpProject = gcpProject; + } - private String gcpProject; + public void setAwsRegion(String awsRegion) { + this.awsRegion = awsRegion; + } - public String getGcpProject() { - return gcpProject; - } + private String awsRegion; - public void setAwsRegion(String awsRegion) { - this.awsRegion = awsRegion; - } + public String getAwsRegion() { + return awsRegion; + } + + private String transformationServiceEndpoint; - private String awsRegion; + public String getTransformationServiceEndpoint() { + return transformationServiceEndpoint; + } - public String getAwsRegion() { - return awsRegion; + public void setTransformationServiceEndpoint(String transformationServiceEndpoint) { + this.transformationServiceEndpoint = transformationServiceEndpoint; + } } - private String transformationServiceEndpoint; + private FeastProperties feast; + + public void setFeast(FeastProperties feast) { + this.feast = feast; + } - public String getTransformationServiceEndpoint() { - return transformationServiceEndpoint; + public FeastProperties getFeast() { + return feast; } /** Store configuration class for database that this Feast Serving uses. */ @@ -177,6 +198,9 @@ public static class Store { private Map config = new HashMap<>(); + // default construct for deserialization + public Store() {} + public Store(String name, String type, Map config) { this.name = name; this.type = type; @@ -210,6 +234,10 @@ public StoreType getType() { return StoreType.valueOf(this.type); } + public void setType(String type) { + this.type = type; + } + /** * Gets the configuration to this specific store. This is a map of strings. These options are * unique to the store. Please see protos/feast/core/Store.proto for the store specific @@ -217,10 +245,6 @@ public StoreType getType() { * * @return Returns the store specific configuration */ - public Map getConfig() { - return config; - } - public RedisClusterStoreConfig getRedisClusterConfig() { return new RedisClusterStoreConfig( this.config.get("connection_string"), @@ -235,6 +259,10 @@ public RedisStoreConfig getRedisConfig() { Boolean.valueOf(this.config.getOrDefault("ssl", "false")), this.config.getOrDefault("password", "")); } + + public void setConfig(Map config) { + this.config = config; + } } public static class Server { @@ -243,6 +271,10 @@ public static class Server { public int getPort() { return port; } + + public void setPort(int port) { + this.port = port; + } } public static class GrpcServer { @@ -251,6 +283,10 @@ public static class GrpcServer { public Server getServer() { return server; } + + public void setServer(Server server) { + this.server = server; + } } public static class RestServer { @@ -259,6 +295,10 @@ public static class RestServer { public Server getServer() { return server; } + + public void setServer(Server server) { + this.server = server; + } } private GrpcServer grpc; @@ -268,10 +308,18 @@ public GrpcServer getGrpc() { return grpc; } + public void setGrpc(GrpcServer grpc) { + this.grpc = grpc; + } + public RestServer getRest() { return rest; } + public void setRest(RestServer rest) { + this.rest = rest; + } + public enum StoreType { REDIS, REDIS_CLUSTER; diff --git a/java/serving/src/main/java/feast/serving/config/ApplicationPropertiesModule.java b/java/serving/src/main/java/feast/serving/config/ApplicationPropertiesModule.java index f5a542137c8..07183fc7101 100644 --- a/java/serving/src/main/java/feast/serving/config/ApplicationPropertiesModule.java +++ b/java/serving/src/main/java/feast/serving/config/ApplicationPropertiesModule.java @@ -17,12 +17,15 @@ package feast.serving.config; import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.ObjectReader; import com.fasterxml.jackson.dataformat.yaml.YAMLFactory; +import com.google.common.io.Resources; import com.google.inject.AbstractModule; import com.google.inject.Provides; import com.google.inject.Singleton; -import java.io.File; import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; public class ApplicationPropertiesModule extends AbstractModule { private final String[] args; @@ -36,9 +39,37 @@ public ApplicationPropertiesModule(String[] args) { public ApplicationProperties provideApplicationProperties() throws IOException { ObjectMapper mapper = new ObjectMapper(new YAMLFactory()); mapper.findAndRegisterModules(); - ApplicationProperties properties = - mapper.readValue(new File(this.args[0]), ApplicationProperties.class); + mapper.setDefaultMergeable(Boolean.TRUE); + + ApplicationProperties properties = new ApplicationProperties(); + ObjectReader objectReader = mapper.readerForUpdating(properties); + + String[] filePaths = this.args[0].split(","); + for (String filePath : filePaths) { + objectReader.readValue(readPropertiesFile(filePath)); + } return properties; } + + /** + * Read file path in spring compatible format, eg classpath:/application.yml or + * file:/path/application.yml + */ + private byte[] readPropertiesFile(String filePath) throws IOException { + if (filePath.startsWith("classpath:")) { + filePath = filePath.substring("classpath:".length()); + if (filePath.startsWith("/")) { + filePath = filePath.substring(1); + } + + return Resources.toByteArray(Resources.getResource(filePath)); + } + + if (filePath.startsWith("file")) { + filePath = filePath.substring("file:".length()); + } + + return Files.readAllBytes(Path.of(filePath)); + } } diff --git a/java/serving/src/main/java/feast/serving/config/RegistryConfig.java b/java/serving/src/main/java/feast/serving/config/RegistryConfig.java index d23ab374d85..3e7cbe3f1f9 100644 --- a/java/serving/src/main/java/feast/serving/config/RegistryConfig.java +++ b/java/serving/src/main/java/feast/serving/config/RegistryConfig.java @@ -31,7 +31,7 @@ public class RegistryConfig extends AbstractModule { @Provides Storage googleStorage(ApplicationProperties applicationProperties) { return StorageOptions.newBuilder() - .setProjectId(applicationProperties.getGcpProject()) + .setProjectId(applicationProperties.getFeast().getGcpProject()) .build() .getService(); } @@ -39,7 +39,7 @@ Storage googleStorage(ApplicationProperties applicationProperties) { @Provides public AmazonS3 awsStorage(ApplicationProperties applicationProperties) { return AmazonS3ClientBuilder.standard() - .withRegion(applicationProperties.getAwsRegion()) + .withRegion(applicationProperties.getFeast().getAwsRegion()) .build(); } diff --git a/java/serving/src/main/java/feast/serving/config/ServerModule.java b/java/serving/src/main/java/feast/serving/config/ServerModule.java index 6993857935b..5428306f2b7 100644 --- a/java/serving/src/main/java/feast/serving/config/ServerModule.java +++ b/java/serving/src/main/java/feast/serving/config/ServerModule.java @@ -17,9 +17,13 @@ package feast.serving.config; import com.google.inject.AbstractModule; +import com.google.inject.Provides; +import feast.serving.controller.HealthServiceController; import feast.serving.grpc.OnlineServingGrpcServiceV2; +import feast.serving.service.ServingServiceV2; import io.grpc.Server; import io.grpc.ServerBuilder; +import io.grpc.health.v1.HealthGrpc; import io.grpc.protobuf.services.ProtoReflectionService; import io.opentracing.contrib.grpc.TracingServerInterceptor; @@ -30,17 +34,24 @@ protected void configure() { bind(OnlineServingGrpcServiceV2.class); } - // @Provides + @Provides public Server provideGrpcServer( ApplicationProperties applicationProperties, OnlineServingGrpcServiceV2 onlineServingGrpcServiceV2, - TracingServerInterceptor tracingServerInterceptor) { + TracingServerInterceptor tracingServerInterceptor, + HealthGrpc.HealthImplBase healthImplBase) { ServerBuilder serverBuilder = ServerBuilder.forPort(applicationProperties.getGrpc().getServer().getPort()); serverBuilder .addService(ProtoReflectionService.newInstance()) - .addService(tracingServerInterceptor.intercept(onlineServingGrpcServiceV2)); + .addService(tracingServerInterceptor.intercept(onlineServingGrpcServiceV2)) + .addService(healthImplBase); return serverBuilder.build(); } + + @Provides + public HealthGrpc.HealthImplBase healthService(ServingServiceV2 servingServiceV2) { + return new HealthServiceController(servingServiceV2); + } } diff --git a/java/serving/src/main/java/feast/serving/config/ServingApiConfiguration.java b/java/serving/src/main/java/feast/serving/config/ServingApiConfiguration.java deleted file mode 100644 index ce4fe134373..00000000000 --- a/java/serving/src/main/java/feast/serving/config/ServingApiConfiguration.java +++ /dev/null @@ -1,40 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * Copyright 2018-2019 The Feast Authors - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * https://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package feast.serving.config; - -import java.util.List; -import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.context.annotation.Bean; -import org.springframework.context.annotation.Configuration; -import org.springframework.http.converter.HttpMessageConverter; -import org.springframework.http.converter.protobuf.ProtobufJsonFormatHttpMessageConverter; -import org.springframework.web.servlet.config.annotation.WebMvcConfigurer; - -@Configuration -public class ServingApiConfiguration implements WebMvcConfigurer { - @Autowired private ProtobufJsonFormatHttpMessageConverter protobufConverter; - - @Bean - ProtobufJsonFormatHttpMessageConverter protobufHttpMessageConverter() { - return new ProtobufJsonFormatHttpMessageConverter(); - } - - @Override - public void configureMessageConverters(List> converters) { - converters.add(protobufConverter); - } -} diff --git a/java/serving/src/main/java/feast/serving/config/ServingServiceConfigV2.java b/java/serving/src/main/java/feast/serving/config/ServingServiceConfigV2.java index 52d7d1c8d61..4ea0692ccd5 100644 --- a/java/serving/src/main/java/feast/serving/config/ServingServiceConfigV2.java +++ b/java/serving/src/main/java/feast/serving/config/ServingServiceConfigV2.java @@ -44,12 +44,20 @@ public ServingServiceV2 registryBasedServingServiceV2( case REDIS_CLUSTER: RedisClientAdapter redisClusterClient = RedisClusterClient.create(store.getRedisClusterConfig()); - retrieverV2 = new OnlineRetriever(redisClusterClient, new EntityKeySerializerV2()); + retrieverV2 = + new OnlineRetriever( + applicationProperties.getFeast().getProject(), + redisClusterClient, + new EntityKeySerializerV2()); break; case REDIS: RedisClientAdapter redisClient = RedisClient.create(store.getRedisConfig()); log.info("Created EntityKeySerializerV2"); - retrieverV2 = new OnlineRetriever(redisClient, new EntityKeySerializerV2()); + retrieverV2 = + new OnlineRetriever( + applicationProperties.getFeast().getProject(), + redisClient, + new EntityKeySerializerV2()); break; default: throw new RuntimeException( @@ -60,14 +68,18 @@ public ServingServiceV2 registryBasedServingServiceV2( log.info("Working Directory = " + System.getProperty("user.dir")); - final String transformationServiceEndpoint = - applicationProperties.getTransformationServiceEndpoint(); final OnlineTransformationService onlineTransformationService = - new OnlineTransformationService(transformationServiceEndpoint, registryRepository); + new OnlineTransformationService( + applicationProperties.getFeast().getTransformationServiceEndpoint(), + registryRepository); servingService = new OnlineServingServiceV2( - retrieverV2, tracer, registryRepository, onlineTransformationService); + retrieverV2, + tracer, + registryRepository, + onlineTransformationService, + applicationProperties.getFeast().getProject()); return servingService; } diff --git a/java/serving/src/main/java/feast/serving/config/WebSecurityConfig.java b/java/serving/src/main/java/feast/serving/config/WebSecurityConfig.java deleted file mode 100644 index 04d3f4b5afb..00000000000 --- a/java/serving/src/main/java/feast/serving/config/WebSecurityConfig.java +++ /dev/null @@ -1,51 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * Copyright 2018-2020 The Feast Authors - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * https://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package feast.serving.config; - -import org.springframework.context.annotation.Configuration; -import org.springframework.security.config.annotation.web.builders.HttpSecurity; -import org.springframework.security.config.annotation.web.configuration.EnableWebSecurity; -import org.springframework.security.config.annotation.web.configuration.WebSecurityConfigurerAdapter; - -/** - * WebSecurityConfig disables auto configuration of Spring HTTP Security and allows security methods - * to be overridden - */ -@Configuration -@EnableWebSecurity -public class WebSecurityConfig extends WebSecurityConfigurerAdapter { - - /** - * Allows for custom web security rules to be applied. - * - * @param http {@link HttpSecurity} for configuring web based security - * @throws Exception exception - */ - @Override - protected void configure(HttpSecurity http) throws Exception { - - // Bypasses security/authentication for the following paths - http.authorizeRequests() - .antMatchers("/actuator/**", "/metrics/**") - .permitAll() - .anyRequest() - .authenticated() - .and() - .csrf() - .disable(); - } -} diff --git a/java/serving/src/main/java/feast/serving/controller/HealthServiceController.java b/java/serving/src/main/java/feast/serving/controller/HealthServiceController.java index ef675d4c157..2f98ae032f9 100644 --- a/java/serving/src/main/java/feast/serving/controller/HealthServiceController.java +++ b/java/serving/src/main/java/feast/serving/controller/HealthServiceController.java @@ -16,24 +16,20 @@ */ package feast.serving.controller; +import com.google.inject.Inject; import feast.proto.serving.ServingAPIProto.GetFeastServingInfoRequest; -import feast.serving.interceptors.GrpcMonitoringInterceptor; import feast.serving.service.ServingServiceV2; import io.grpc.health.v1.HealthGrpc.HealthImplBase; import io.grpc.health.v1.HealthProto.HealthCheckRequest; import io.grpc.health.v1.HealthProto.HealthCheckResponse; import io.grpc.health.v1.HealthProto.ServingStatus; import io.grpc.stub.StreamObserver; -import net.devh.boot.grpc.server.service.GrpcService; -import org.springframework.beans.factory.annotation.Autowired; // Reference: https://github.com/grpc/grpc/blob/master/doc/health-checking.md - -@GrpcService(interceptors = {GrpcMonitoringInterceptor.class}) public class HealthServiceController extends HealthImplBase { private final ServingServiceV2 servingService; - @Autowired + @Inject public HealthServiceController(final ServingServiceV2 servingService) { this.servingService = servingService; } diff --git a/java/serving/src/main/java/feast/serving/controller/ServingServiceGRpcController.java b/java/serving/src/main/java/feast/serving/controller/ServingServiceGRpcController.java deleted file mode 100644 index 0a406930e6c..00000000000 --- a/java/serving/src/main/java/feast/serving/controller/ServingServiceGRpcController.java +++ /dev/null @@ -1,91 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * Copyright 2018-2019 The Feast Authors - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * https://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package feast.serving.controller; - -import feast.proto.serving.ServingAPIProto; -import feast.proto.serving.ServingAPIProto.GetFeastServingInfoRequest; -import feast.proto.serving.ServingAPIProto.GetFeastServingInfoResponse; -import feast.proto.serving.ServingAPIProto.GetOnlineFeaturesResponse; -import feast.proto.serving.ServingServiceGrpc.ServingServiceImplBase; -import feast.serving.config.ApplicationProperties; -import feast.serving.exception.SpecRetrievalException; -import feast.serving.interceptors.GrpcMonitoringContext; -import feast.serving.service.ServingServiceV2; -import feast.serving.util.RequestHelper; -import io.grpc.Status; -import io.grpc.stub.StreamObserver; -import io.opentracing.Span; -import io.opentracing.Tracer; -import org.slf4j.Logger; - -public class ServingServiceGRpcController extends ServingServiceImplBase { - - private static final Logger log = - org.slf4j.LoggerFactory.getLogger(ServingServiceGRpcController.class); - private final ServingServiceV2 servingServiceV2; - private final String version; - private final Tracer tracer; - - public ServingServiceGRpcController( - ServingServiceV2 servingServiceV2, - ApplicationProperties applicationProperties, - Tracer tracer) { - this.servingServiceV2 = servingServiceV2; - this.version = applicationProperties.getFeast().getVersion(); - this.tracer = tracer; - } - - @Override - public void getFeastServingInfo( - GetFeastServingInfoRequest request, - StreamObserver responseObserver) { - GetFeastServingInfoResponse feastServingInfo = servingServiceV2.getFeastServingInfo(request); - feastServingInfo = feastServingInfo.toBuilder().setVersion(version).build(); - responseObserver.onNext(feastServingInfo); - responseObserver.onCompleted(); - } - - @Override - public void getOnlineFeaturesV2( - ServingAPIProto.GetOnlineFeaturesRequestV2 request, - StreamObserver responseObserver) { - try { - // authorize for the project in request object. - request.getProject(); - if (!request.getProject().isEmpty()) { - // update monitoring context - GrpcMonitoringContext.getInstance().setProject(request.getProject()); - } - RequestHelper.validateOnlineRequest(request); - Span span = tracer.buildSpan("getOnlineFeaturesV2").start(); - GetOnlineFeaturesResponse onlineFeatures = servingServiceV2.getOnlineFeatures(request); - if (span != null) { - span.finish(); - } - responseObserver.onNext(onlineFeatures); - responseObserver.onCompleted(); - } catch (SpecRetrievalException e) { - log.error("Failed to retrieve specs from Registry", e); - responseObserver.onError( - Status.NOT_FOUND.withDescription(e.getMessage()).withCause(e).asException()); - } catch (Exception e) { - log.warn("Failed to get Online Features", e); - responseObserver.onError( - Status.INTERNAL.withDescription(e.getMessage()).withCause(e).asRuntimeException()); - } - } -} diff --git a/java/serving/src/main/java/feast/serving/controller/ServingServiceRestController.java b/java/serving/src/main/java/feast/serving/controller/ServingServiceRestController.java deleted file mode 100644 index 2f446adf675..00000000000 --- a/java/serving/src/main/java/feast/serving/controller/ServingServiceRestController.java +++ /dev/null @@ -1,61 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * Copyright 2018-2019 The Feast Authors - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * https://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package feast.serving.controller; - -import static feast.serving.util.mappers.ResponseJSONMapper.mapGetOnlineFeaturesResponse; - -import feast.proto.serving.ServingAPIProto.GetFeastServingInfoRequest; -import feast.proto.serving.ServingAPIProto.GetFeastServingInfoResponse; -import feast.proto.serving.ServingAPIProto.GetOnlineFeaturesRequestV2; -import feast.proto.serving.ServingAPIProto.GetOnlineFeaturesResponse; -import feast.serving.config.ApplicationProperties; -import feast.serving.service.ServingServiceV2; -import feast.serving.util.RequestHelper; -import java.util.List; -import java.util.Map; -import org.springframework.web.bind.annotation.RequestBody; -import org.springframework.web.bind.annotation.RequestMapping; - -public class ServingServiceRestController { - - private final ServingServiceV2 servingService; - private final String version; - - public ServingServiceRestController( - ServingServiceV2 servingService, ApplicationProperties applicationProperties) { - this.servingService = servingService; - this.version = applicationProperties.getFeast().getVersion(); - } - - @RequestMapping(value = "/api/v1/info", produces = "application/json") - public GetFeastServingInfoResponse getInfo() { - GetFeastServingInfoResponse feastServingInfo = - servingService.getFeastServingInfo(GetFeastServingInfoRequest.getDefaultInstance()); - return feastServingInfo.toBuilder().setVersion(version).build(); - } - - @RequestMapping( - value = "/api/v1/features/online", - produces = "application/json", - consumes = "application/json") - public List> getOnlineFeatures( - @RequestBody GetOnlineFeaturesRequestV2 request) { - RequestHelper.validateOnlineRequest(request); - GetOnlineFeaturesResponse onlineFeatures = servingService.getOnlineFeatures(request); - return mapGetOnlineFeaturesResponse(onlineFeatures); - } -} diff --git a/java/serving/src/main/java/feast/serving/grpc/OnlineServingGrpcServiceV2.java b/java/serving/src/main/java/feast/serving/grpc/OnlineServingGrpcServiceV2.java index 68a17539abb..fe024404f33 100644 --- a/java/serving/src/main/java/feast/serving/grpc/OnlineServingGrpcServiceV2.java +++ b/java/serving/src/main/java/feast/serving/grpc/OnlineServingGrpcServiceV2.java @@ -19,11 +19,15 @@ import feast.proto.serving.ServingAPIProto; import feast.proto.serving.ServingServiceGrpc; import feast.serving.service.ServingServiceV2; +import io.grpc.Status; import io.grpc.stub.StreamObserver; import javax.inject.Inject; +import org.slf4j.Logger; public class OnlineServingGrpcServiceV2 extends ServingServiceGrpc.ServingServiceImplBase { private final ServingServiceV2 servingServiceV2; + private static final Logger log = + org.slf4j.LoggerFactory.getLogger(OnlineServingGrpcServiceV2.class); @Inject OnlineServingGrpcServiceV2(ServingServiceV2 servingServiceV2) { @@ -34,15 +38,27 @@ public class OnlineServingGrpcServiceV2 extends ServingServiceGrpc.ServingServic public void getFeastServingInfo( ServingAPIProto.GetFeastServingInfoRequest request, StreamObserver responseObserver) { - responseObserver.onNext(this.servingServiceV2.getFeastServingInfo(request)); - responseObserver.onCompleted(); + try { + responseObserver.onNext(this.servingServiceV2.getFeastServingInfo(request)); + responseObserver.onCompleted(); + } catch (RuntimeException e) { + log.warn("Failed to get Serving Info", e); + responseObserver.onError( + Status.INTERNAL.withDescription(e.getMessage()).withCause(e).asRuntimeException()); + } } @Override - public void getOnlineFeaturesV2( - ServingAPIProto.GetOnlineFeaturesRequestV2 request, + public void getOnlineFeatures( + ServingAPIProto.GetOnlineFeaturesRequest request, StreamObserver responseObserver) { - responseObserver.onNext(this.servingServiceV2.getOnlineFeatures(request)); - responseObserver.onCompleted(); + try { + responseObserver.onNext(this.servingServiceV2.getOnlineFeatures(request)); + responseObserver.onCompleted(); + } catch (RuntimeException e) { + log.warn("Failed to get Online Features", e); + responseObserver.onError( + Status.INTERNAL.withDescription(e.getMessage()).withCause(e).asRuntimeException()); + } } } diff --git a/java/serving/src/main/java/feast/serving/modules/ServerModule.java b/java/serving/src/main/java/feast/serving/modules/ServerModule.java deleted file mode 100644 index 29d1f574321..00000000000 --- a/java/serving/src/main/java/feast/serving/modules/ServerModule.java +++ /dev/null @@ -1,19 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * Copyright 2018-2021 The Feast Authors - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * https://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package feast.serving.modules; - -public class ServerModule {} diff --git a/java/serving/src/main/java/feast/serving/registry/Registry.java b/java/serving/src/main/java/feast/serving/registry/Registry.java index 144135c3cae..37fae3d8dcb 100644 --- a/java/serving/src/main/java/feast/serving/registry/Registry.java +++ b/java/serving/src/main/java/feast/serving/registry/Registry.java @@ -16,10 +16,7 @@ */ package feast.serving.registry; -import feast.proto.core.FeatureProto; -import feast.proto.core.FeatureViewProto; -import feast.proto.core.OnDemandFeatureViewProto; -import feast.proto.core.RegistryProto; +import feast.proto.core.*; import feast.proto.serving.ServingAPIProto; import feast.serving.exception.SpecRetrievalException; import java.util.List; @@ -32,6 +29,7 @@ public class Registry { private Map featureViewNameToSpec; private Map onDemandFeatureViewNameToSpec; + private Map featureServiceNameToSpec; Registry(RegistryProto.Registry registry) { this.registry = registry; @@ -53,6 +51,12 @@ public class Registry { Collectors.toMap( OnDemandFeatureViewProto.OnDemandFeatureViewSpec::getName, Function.identity())); + this.featureServiceNameToSpec = + registry.getFeatureServicesList().stream() + .map(fs -> fs.getSpec()) + .collect( + Collectors.toMap( + FeatureServiceProto.FeatureServiceSpec::getName, Function.identity())); } public RegistryProto.Registry getRegistry() { @@ -60,8 +64,8 @@ public RegistryProto.Registry getRegistry() { } public FeatureViewProto.FeatureViewSpec getFeatureViewSpec( - String projectName, ServingAPIProto.FeatureReferenceV2 featureReference) { - String featureViewName = featureReference.getFeatureTable(); + ServingAPIProto.FeatureReferenceV2 featureReference) { + String featureViewName = featureReference.getFeatureViewName(); if (featureViewNameToSpec.containsKey(featureViewName)) { return featureViewNameToSpec.get(featureViewName); } @@ -70,11 +74,10 @@ public FeatureViewProto.FeatureViewSpec getFeatureViewSpec( } public FeatureProto.FeatureSpecV2 getFeatureSpec( - String projectName, ServingAPIProto.FeatureReferenceV2 featureReference) { - final FeatureViewProto.FeatureViewSpec spec = - this.getFeatureViewSpec(projectName, featureReference); + ServingAPIProto.FeatureReferenceV2 featureReference) { + final FeatureViewProto.FeatureViewSpec spec = this.getFeatureViewSpec(featureReference); for (final FeatureProto.FeatureSpecV2 featureSpec : spec.getFeaturesList()) { - if (featureSpec.getName().equals(featureReference.getName())) { + if (featureSpec.getName().equals(featureReference.getFeatureName())) { return featureSpec; } } @@ -82,12 +85,12 @@ public FeatureProto.FeatureSpecV2 getFeatureSpec( throw new SpecRetrievalException( String.format( "Unable to find feature with name: %s in feature view: %s", - featureReference.getName(), featureReference.getFeatureTable())); + featureReference.getFeatureName(), featureReference.getFeatureViewName())); } public OnDemandFeatureViewProto.OnDemandFeatureViewSpec getOnDemandFeatureViewSpec( - String projectName, ServingAPIProto.FeatureReferenceV2 featureReference) { - String onDemandFeatureViewName = featureReference.getFeatureTable(); + ServingAPIProto.FeatureReferenceV2 featureReference) { + String onDemandFeatureViewName = featureReference.getFeatureViewName(); if (onDemandFeatureViewNameToSpec.containsKey(onDemandFeatureViewName)) { return onDemandFeatureViewNameToSpec.get(onDemandFeatureViewName); } @@ -97,7 +100,16 @@ public OnDemandFeatureViewProto.OnDemandFeatureViewSpec getOnDemandFeatureViewSp } public boolean isOnDemandFeatureReference(ServingAPIProto.FeatureReferenceV2 featureReference) { - String onDemandFeatureViewName = featureReference.getFeatureTable(); + String onDemandFeatureViewName = featureReference.getFeatureViewName(); return onDemandFeatureViewNameToSpec.containsKey(onDemandFeatureViewName); } + + public FeatureServiceProto.FeatureServiceSpec getFeatureServiceSpec(String name) { + FeatureServiceProto.FeatureServiceSpec spec = featureServiceNameToSpec.get(name); + if (spec == null) { + throw new SpecRetrievalException( + String.format("Unable to find feature service with name: %s", name)); + } + return spec; + } } diff --git a/java/serving/src/main/java/feast/serving/registry/RegistryRepository.java b/java/serving/src/main/java/feast/serving/registry/RegistryRepository.java index 23c204b5822..369493ee0fe 100644 --- a/java/serving/src/main/java/feast/serving/registry/RegistryRepository.java +++ b/java/serving/src/main/java/feast/serving/registry/RegistryRepository.java @@ -18,6 +18,7 @@ import com.google.protobuf.Duration; import feast.proto.core.FeatureProto; +import feast.proto.core.FeatureServiceProto; import feast.proto.core.FeatureViewProto; import feast.proto.core.OnDemandFeatureViewProto; import feast.proto.core.RegistryProto; @@ -72,31 +73,33 @@ private void refresh() { } public FeatureViewProto.FeatureViewSpec getFeatureViewSpec( - String projectName, ServingAPIProto.FeatureReferenceV2 featureReference) { - return this.registry.getFeatureViewSpec(projectName, featureReference); + ServingAPIProto.FeatureReferenceV2 featureReference) { + return this.registry.getFeatureViewSpec(featureReference); } public FeatureProto.FeatureSpecV2 getFeatureSpec( - String projectName, ServingAPIProto.FeatureReferenceV2 featureReference) { - return this.registry.getFeatureSpec(projectName, featureReference); + ServingAPIProto.FeatureReferenceV2 featureReference) { + return this.registry.getFeatureSpec(featureReference); } public OnDemandFeatureViewProto.OnDemandFeatureViewSpec getOnDemandFeatureViewSpec( - String projectName, ServingAPIProto.FeatureReferenceV2 featureReference) { - return this.registry.getOnDemandFeatureViewSpec(projectName, featureReference); + ServingAPIProto.FeatureReferenceV2 featureReference) { + return this.registry.getOnDemandFeatureViewSpec(featureReference); } public boolean isOnDemandFeatureReference(ServingAPIProto.FeatureReferenceV2 featureReference) { return this.registry.isOnDemandFeatureReference(featureReference); } - public Duration getMaxAge( - String projectName, ServingAPIProto.FeatureReferenceV2 featureReference) { - return getFeatureViewSpec(projectName, featureReference).getTtl(); + public FeatureServiceProto.FeatureServiceSpec getFeatureServiceSpec(String name) { + return this.registry.getFeatureServiceSpec(name); } - public List getEntitiesList( - String projectName, ServingAPIProto.FeatureReferenceV2 featureReference) { - return getFeatureViewSpec(projectName, featureReference).getEntitiesList(); + public Duration getMaxAge(ServingAPIProto.FeatureReferenceV2 featureReference) { + return getFeatureViewSpec(featureReference).getTtl(); + } + + public List getEntitiesList(ServingAPIProto.FeatureReferenceV2 featureReference) { + return getFeatureViewSpec(featureReference).getEntitiesList(); } } diff --git a/java/serving/src/main/java/feast/serving/registry/S3RegistryFile.java b/java/serving/src/main/java/feast/serving/registry/S3RegistryFile.java index 486e2ca39c6..4b122a5de03 100644 --- a/java/serving/src/main/java/feast/serving/registry/S3RegistryFile.java +++ b/java/serving/src/main/java/feast/serving/registry/S3RegistryFile.java @@ -33,7 +33,8 @@ public S3RegistryFile(AmazonS3 s3Client, String url) { this.s3Client = s3Client; String[] split = url.replace("s3://", "").split("/"); - this.s3Object = this.s3Client.getObject(split[0], split[1]); + String objectPath = String.join("/", java.util.Arrays.copyOfRange(split, 1, split.length)); + this.s3Object = this.s3Client.getObject(split[0], objectPath); } @Override diff --git a/java/serving/src/main/java/feast/serving/service/OnlineServingServiceV2.java b/java/serving/src/main/java/feast/serving/service/OnlineServingServiceV2.java index 2d5621e4b4f..f4e330fbf73 100644 --- a/java/serving/src/main/java/feast/serving/service/OnlineServingServiceV2.java +++ b/java/serving/src/main/java/feast/serving/service/OnlineServingServiceV2.java @@ -16,32 +16,29 @@ */ package feast.serving.service; -import static feast.common.models.FeatureTable.getFeatureTableStringRef; - +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; import com.google.protobuf.Duration; -import feast.common.models.FeatureV2; -import feast.proto.serving.ServingAPIProto.FeastServingType; +import com.google.protobuf.Timestamp; +import feast.common.models.Feature; +import feast.proto.core.FeatureServiceProto; +import feast.proto.serving.ServingAPIProto; import feast.proto.serving.ServingAPIProto.FeatureReferenceV2; +import feast.proto.serving.ServingAPIProto.FieldStatus; import feast.proto.serving.ServingAPIProto.GetFeastServingInfoRequest; import feast.proto.serving.ServingAPIProto.GetFeastServingInfoResponse; -import feast.proto.serving.ServingAPIProto.GetOnlineFeaturesRequestV2; -import feast.proto.serving.ServingAPIProto.GetOnlineFeaturesResponse; import feast.proto.serving.TransformationServiceAPIProto.TransformFeaturesRequest; import feast.proto.serving.TransformationServiceAPIProto.TransformFeaturesResponse; import feast.proto.serving.TransformationServiceAPIProto.ValueType; import feast.proto.types.ValueProto; -import feast.serving.exception.SpecRetrievalException; import feast.serving.registry.RegistryRepository; import feast.serving.util.Metrics; -import feast.storage.api.retriever.Feature; import feast.storage.api.retriever.OnlineRetrieverV2; import io.grpc.Status; import io.opentracing.Span; import io.opentracing.Tracer; import java.util.*; -import java.util.function.Function; import java.util.stream.Collectors; -import java.util.stream.IntStream; import org.apache.commons.lang3.tuple.Pair; import org.slf4j.Logger; @@ -52,126 +49,78 @@ public class OnlineServingServiceV2 implements ServingServiceV2 { private final OnlineRetrieverV2 retriever; private final RegistryRepository registryRepository; private final OnlineTransformationService onlineTransformationService; + private final String project; public OnlineServingServiceV2( OnlineRetrieverV2 retriever, Tracer tracer, RegistryRepository registryRepository, - OnlineTransformationService onlineTransformationService) { + OnlineTransformationService onlineTransformationService, + String project) { this.retriever = retriever; this.tracer = tracer; this.registryRepository = registryRepository; this.onlineTransformationService = onlineTransformationService; + this.project = project; } /** {@inheritDoc} */ @Override public GetFeastServingInfoResponse getFeastServingInfo( GetFeastServingInfoRequest getFeastServingInfoRequest) { - return GetFeastServingInfoResponse.newBuilder() - .setType(FeastServingType.FEAST_SERVING_TYPE_ONLINE) - .build(); + return GetFeastServingInfoResponse.getDefaultInstance(); } @Override - public GetOnlineFeaturesResponse getOnlineFeatures(GetOnlineFeaturesRequestV2 request) { - // Autofill default project if project is not specified - String projectName = request.getProject(); - if (projectName.isEmpty()) { - projectName = "default"; - } - + public ServingAPIProto.GetOnlineFeaturesResponse getOnlineFeatures( + ServingAPIProto.GetOnlineFeaturesRequest request) { // Split all feature references into non-ODFV (e.g. batch and stream) references and ODFV. - List allFeatureReferences = request.getFeaturesList(); - List featureReferences = + List allFeatureReferences = getFeaturesList(request); + List retrievedFeatureReferences = allFeatureReferences.stream() .filter(r -> !this.registryRepository.isOnDemandFeatureReference(r)) .collect(Collectors.toList()); + int userRequestedFeaturesSize = retrievedFeatureReferences.size(); + List onDemandFeatureReferences = allFeatureReferences.stream() .filter(r -> this.registryRepository.isOnDemandFeatureReference(r)) .collect(Collectors.toList()); - // Get the set of request data feature names and feature inputs from the ODFV references. - Pair, List> pair = - this.onlineTransformationService.extractRequestDataFeatureNamesAndOnDemandFeatureInputs( - onDemandFeatureReferences, projectName); - Set requestDataFeatureNames = pair.getLeft(); - List onDemandFeatureInputs = pair.getRight(); + // ToDo (pyalex): refactor transformation service to delete unused left part of the returned + // Pair from extractRequestDataFeatureNamesAndOnDemandFeatureInputs. + // Currently, we can retrieve context variables directly from GetOnlineFeaturesRequest. + List onDemandFeatureInputs = + this.onlineTransformationService.extractOnDemandFeaturesDependencies( + onDemandFeatureReferences); // Add on demand feature inputs to list of feature references to retrieve. - Set addedFeatureReferences = new HashSet(); for (FeatureReferenceV2 onDemandFeatureInput : onDemandFeatureInputs) { - if (!featureReferences.contains(onDemandFeatureInput)) { - featureReferences.add(onDemandFeatureInput); - addedFeatureReferences.add(onDemandFeatureInput); + if (!retrievedFeatureReferences.contains(onDemandFeatureInput)) { + retrievedFeatureReferences.add(onDemandFeatureInput); } } - // Separate entity rows into entity data and request feature data. - Pair, Map>> - entityRowsAndRequestDataFeatures = - this.onlineTransformationService.separateEntityRows(requestDataFeatureNames, request); - List entityRows = - entityRowsAndRequestDataFeatures.getLeft(); - Map> requestDataFeatures = - entityRowsAndRequestDataFeatures.getRight(); - // TODO: error checking on lengths of lists in entityRows and requestDataFeatures - - // Extract values and statuses to be used later in constructing FieldValues for the response. - // The online features retrieved will augment these two data structures. - List> values = - entityRows.stream().map(r -> new HashMap<>(r.getFieldsMap())).collect(Collectors.toList()); - List> statuses = - entityRows.stream() - .map( - r -> - r.getFieldsMap().entrySet().stream() - .map(entry -> Pair.of(entry.getKey(), getMetadata(entry.getValue(), false))) - .collect(Collectors.toMap(Pair::getLeft, Pair::getRight))) - .collect(Collectors.toList()); + List> entityRows = getEntityRows(request); - String finalProjectName = projectName; - Map featureMaxAges = - featureReferences.stream() - .distinct() - .collect( - Collectors.toMap( - Function.identity(), - ref -> this.registryRepository.getMaxAge(finalProjectName, ref))); - List entityNames = - featureReferences.stream() - .map(ref -> this.registryRepository.getEntitiesList(finalProjectName, ref)) - .findFirst() - .get(); - - Map featureValueTypes = - featureReferences.stream() - .distinct() - .collect( - Collectors.toMap( - Function.identity(), - ref -> { - try { - return this.registryRepository - .getFeatureSpec(finalProjectName, ref) - .getValueType(); - } catch (SpecRetrievalException e) { - return ValueProto.ValueType.Enum.INVALID; - } - })); + List entityNames; + if (retrievedFeatureReferences.size() > 0) { + entityNames = this.registryRepository.getEntitiesList(retrievedFeatureReferences.get(0)); + } else { + throw new RuntimeException("Requested features list must not be empty"); + } Span storageRetrievalSpan = tracer.buildSpan("storageRetrieval").start(); if (storageRetrievalSpan != null) { storageRetrievalSpan.setTag("entities", entityRows.size()); - storageRetrievalSpan.setTag("features", featureReferences.size()); + storageRetrievalSpan.setTag("features", retrievedFeatureReferences.size()); } - List> features = - retriever.getOnlineFeatures(projectName, entityRows, featureReferences, entityNames); + List> features = + retriever.getOnlineFeatures(entityRows, retrievedFeatureReferences, entityNames); + if (storageRetrievalSpan != null) { storageRetrievalSpan.finish(); } - if (features.size() != entityRows.size()) { throw Status.INTERNAL .withDescription( @@ -182,132 +131,193 @@ public GetOnlineFeaturesResponse getOnlineFeatures(GetOnlineFeaturesRequestV2 re Span postProcessingSpan = tracer.buildSpan("postProcessing").start(); - for (int i = 0; i < entityRows.size(); i++) { - GetOnlineFeaturesRequestV2.EntityRow entityRow = entityRows.get(i); - Map featureRow = features.get(i); + ServingAPIProto.GetOnlineFeaturesResponse.Builder responseBuilder = + ServingAPIProto.GetOnlineFeaturesResponse.newBuilder(); - Map rowValues = values.get(i); - Map rowStatuses = statuses.get(i); + Timestamp now = Timestamp.newBuilder().setSeconds(System.currentTimeMillis() / 1000).build(); + Timestamp nullTimestamp = Timestamp.newBuilder().build(); + ValueProto.Value nullValue = ValueProto.Value.newBuilder().build(); - for (FeatureReferenceV2 featureReference : featureReferences) { - if (featureRow.containsKey(featureReference)) { - Feature feature = featureRow.get(featureReference); + for (int featureIdx = 0; featureIdx < userRequestedFeaturesSize; featureIdx++) { + FeatureReferenceV2 featureReference = retrievedFeatureReferences.get(featureIdx); - ValueProto.Value value = feature.getFeatureValue(featureValueTypes.get(featureReference)); + ValueProto.ValueType.Enum valueType = + this.registryRepository.getFeatureSpec(featureReference).getValueType(); - Boolean isOutsideMaxAge = - checkOutsideMaxAge(feature, entityRow, featureMaxAges.get(featureReference)); + Duration maxAge = this.registryRepository.getMaxAge(featureReference); - if (value != null) { - rowValues.put(FeatureV2.getFeatureStringRef(featureReference), value); - } else { - rowValues.put( - FeatureV2.getFeatureStringRef(featureReference), - ValueProto.Value.newBuilder().build()); - } + ServingAPIProto.GetOnlineFeaturesResponse.FeatureVector.Builder vectorBuilder = + responseBuilder.addResultsBuilder(); - rowStatuses.put( - FeatureV2.getFeatureStringRef(featureReference), getMetadata(value, isOutsideMaxAge)); - } else { - rowValues.put( - FeatureV2.getFeatureStringRef(featureReference), - ValueProto.Value.newBuilder().build()); + for (int rowIdx = 0; rowIdx < features.size(); rowIdx++) { + feast.storage.api.retriever.Feature feature = features.get(rowIdx).get(featureIdx); + if (feature == null) { + vectorBuilder.addValues(nullValue); + vectorBuilder.addStatuses(FieldStatus.NOT_FOUND); + vectorBuilder.addEventTimestamps(nullTimestamp); + continue; + } - rowStatuses.put( - FeatureV2.getFeatureStringRef(featureReference), getMetadata(null, false)); + ValueProto.Value featureValue = feature.getFeatureValue(valueType); + if (featureValue == null) { + vectorBuilder.addValues(nullValue); + vectorBuilder.addStatuses(FieldStatus.NOT_FOUND); + vectorBuilder.addEventTimestamps(nullTimestamp); + continue; } + + vectorBuilder.addValues(featureValue); + vectorBuilder.addStatuses( + getFeatureStatus(featureValue, checkOutsideMaxAge(feature, now, maxAge))); + vectorBuilder.addEventTimestamps(feature.getEventTimestamp()); } - // Populate metrics/log request - populateCountMetrics(rowStatuses, projectName); + + populateCountMetrics(featureReference, vectorBuilder); } + responseBuilder.setMetadata( + ServingAPIProto.GetOnlineFeaturesResponseMetadata.newBuilder() + .setFeatureNames( + ServingAPIProto.FeatureList.newBuilder() + .addAllVal( + retrievedFeatureReferences.stream() + .map(Feature::getFeatureReference) + .collect(Collectors.toList())))); + if (postProcessingSpan != null) { postProcessingSpan.finish(); } - populateHistogramMetrics(entityRows, featureReferences, projectName); - populateFeatureCountMetrics(featureReferences, projectName); - - // Handle ODFVs. For each ODFV reference, we send a TransformFeaturesRequest to the FTS. - // The request should contain the entity data, the retrieved features, and the request data. if (!onDemandFeatureReferences.isEmpty()) { - // Augment values, which contains the entity data and retrieved features, with the request - // data. Also augment statuses. - for (int i = 0; i < values.size(); i++) { - Map rowValues = values.get(i); - Map rowStatuses = statuses.get(i); - - for (Map.Entry> entry : requestDataFeatures.entrySet()) { - String key = entry.getKey(); - List fieldValues = entry.getValue(); - rowValues.put(key, fieldValues.get(i)); - rowStatuses.put(key, GetOnlineFeaturesResponse.FieldStatus.PRESENT); - } - } + // Handle ODFVs. For each ODFV reference, we send a TransformFeaturesRequest to the FTS. + // The request should contain the entity data, the retrieved features, and the request context + // data. + this.populateOnDemandFeatures( + onDemandFeatureReferences, + onDemandFeatureInputs, + retrievedFeatureReferences, + request, + features, + responseBuilder); + } - // Serialize the augmented values. - ValueType transformationInput = - this.onlineTransformationService.serializeValuesIntoArrowIPC(values); - - // Send out requests to the FTS and process the responses. - Set onDemandFeatureStringReferences = - onDemandFeatureReferences.stream() - .map(r -> FeatureV2.getFeatureStringRef(r)) - .collect(Collectors.toSet()); - for (FeatureReferenceV2 featureReference : onDemandFeatureReferences) { - String onDemandFeatureViewName = featureReference.getFeatureTable(); - TransformFeaturesRequest transformFeaturesRequest = - TransformFeaturesRequest.newBuilder() - .setOnDemandFeatureViewName(onDemandFeatureViewName) - .setProject(projectName) - .setTransformationInput(transformationInput) - .build(); - - TransformFeaturesResponse transformFeaturesResponse = - this.onlineTransformationService.transformFeatures(transformFeaturesRequest); - - this.onlineTransformationService.processTransformFeaturesResponse( - transformFeaturesResponse, - onDemandFeatureViewName, - onDemandFeatureStringReferences, - values, - statuses); - } + populateHistogramMetrics(entityRows, retrievedFeatureReferences); + populateFeatureCountMetrics(retrievedFeatureReferences); - // Remove all features that were added as inputs for ODFVs. - Set addedFeatureStringReferences = - addedFeatureReferences.stream() - .map(r -> FeatureV2.getFeatureStringRef(r)) - .collect(Collectors.toSet()); - for (int i = 0; i < values.size(); i++) { - Map rowValues = values.get(i); - Map rowStatuses = statuses.get(i); - List keysToRemove = - rowValues.keySet().stream() - .filter(k -> addedFeatureStringReferences.contains(k)) - .collect(Collectors.toList()); - for (String key : keysToRemove) { - rowValues.remove(key); - rowStatuses.remove(key); + return responseBuilder.build(); + } + + private List getFeaturesList( + ServingAPIProto.GetOnlineFeaturesRequest request) { + if (request.getFeatures().getValCount() > 0) { + return request.getFeatures().getValList().stream() + .map(Feature::parseFeatureReference) + .collect(Collectors.toList()); + } + + FeatureServiceProto.FeatureServiceSpec featureServiceSpec = + this.registryRepository.getFeatureServiceSpec(request.getFeatureService()); + + return featureServiceSpec.getFeaturesList().stream() + .flatMap( + featureViewProjection -> + featureViewProjection.getFeatureColumnsList().stream() + .map( + f -> + FeatureReferenceV2.newBuilder() + .setFeatureViewName(featureViewProjection.getFeatureViewName()) + .setFeatureName(f.getName()) + .build())) + .collect(Collectors.toList()); + } + + private List> getEntityRows( + ServingAPIProto.GetOnlineFeaturesRequest request) { + if (request.getEntitiesCount() == 0) { + throw new RuntimeException("Entities map shouldn't be empty"); + } + + Set entityNames = request.getEntitiesMap().keySet(); + String firstEntity = entityNames.stream().findFirst().get(); + int rowsCount = request.getEntitiesMap().get(firstEntity).getValCount(); + List> entityRows = Lists.newArrayListWithExpectedSize(rowsCount); + + for (Map.Entry entity : request.getEntitiesMap().entrySet()) { + for (int i = 0; i < rowsCount; i++) { + if (entityRows.size() < i + 1) { + entityRows.add(i, Maps.newHashMapWithExpectedSize(entityNames.size())); } + + entityRows.get(i).put(entity.getKey(), entity.getValue().getVal(i)); } } - // Build response field values from entityValuesMap and entityStatusesMap - // Response field values should be in the same order as the entityRows provided by the user. - List fieldValuesList = - IntStream.range(0, entityRows.size()) - .mapToObj( - entityRowIdx -> - GetOnlineFeaturesResponse.FieldValues.newBuilder() - .putAllFields(values.get(entityRowIdx)) - .putAllStatuses(statuses.get(entityRowIdx)) - .build()) + return entityRows; + } + + private void populateOnDemandFeatures( + List onDemandFeatureReferences, + List onDemandFeatureInputs, + List retrievedFeatureReferences, + ServingAPIProto.GetOnlineFeaturesRequest request, + List> features, + ServingAPIProto.GetOnlineFeaturesResponse.Builder responseBuilder) { + + List>> onDemandContext = + request.getRequestContextMap().entrySet().stream() + .map(e -> Pair.of(e.getKey(), e.getValue().getValList())) .collect(Collectors.toList()); - return GetOnlineFeaturesResponse.newBuilder().addAllFieldValues(fieldValuesList).build(); - } + for (int featureIdx = 0; featureIdx < retrievedFeatureReferences.size(); featureIdx++) { + FeatureReferenceV2 featureReference = retrievedFeatureReferences.get(featureIdx); + if (!onDemandFeatureInputs.contains(featureReference)) { + continue; + } + + ValueProto.ValueType.Enum valueType = + this.registryRepository.getFeatureSpec(featureReference).getValueType(); + + List valueList = Lists.newArrayListWithExpectedSize(features.size()); + for (int rowIdx = 0; rowIdx < features.size(); rowIdx++) { + valueList.add(features.get(rowIdx).get(featureIdx).getFeatureValue(valueType)); + } + + onDemandContext.add( + Pair.of( + String.format( + "%s__%s", + featureReference.getFeatureViewName(), featureReference.getFeatureName()), + valueList)); + } + // Serialize the augmented values. + ValueType transformationInput = + this.onlineTransformationService.serializeValuesIntoArrowIPC(onDemandContext); + + // Send out requests to the FTS and process the responses. + Set onDemandFeatureStringReferences = + onDemandFeatureReferences.stream() + .map(r -> Feature.getFeatureReference(r)) + .collect(Collectors.toSet()); + + for (FeatureReferenceV2 featureReference : onDemandFeatureReferences) { + String onDemandFeatureViewName = featureReference.getFeatureViewName(); + TransformFeaturesRequest transformFeaturesRequest = + TransformFeaturesRequest.newBuilder() + .setOnDemandFeatureViewName(onDemandFeatureViewName) + .setTransformationInput(transformationInput) + .build(); + + TransformFeaturesResponse transformFeaturesResponse = + this.onlineTransformationService.transformFeatures(transformFeaturesRequest); + + this.onlineTransformationService.processTransformFeaturesResponse( + transformFeaturesResponse, + onDemandFeatureViewName, + onDemandFeatureStringReferences, + responseBuilder); + } + } /** * Generate Field level Status metadata for the given valueMap. * @@ -317,17 +327,16 @@ public GetOnlineFeaturesResponse getOnlineFeatures(GetOnlineFeaturesRequestV2 re * @return a 1:1 map keyed by field name containing field status metadata instead of values in the * given valueMap. */ - private static GetOnlineFeaturesResponse.FieldStatus getMetadata( - ValueProto.Value value, boolean isOutsideMaxAge) { + private static FieldStatus getFeatureStatus(ValueProto.Value value, boolean isOutsideMaxAge) { if (value == null) { - return GetOnlineFeaturesResponse.FieldStatus.NOT_FOUND; + return FieldStatus.NOT_FOUND; } else if (isOutsideMaxAge) { - return GetOnlineFeaturesResponse.FieldStatus.OUTSIDE_MAX_AGE; + return FieldStatus.OUTSIDE_MAX_AGE; } else if (value.getValCase().equals(ValueProto.Value.ValCase.VAL_NOT_SET)) { - return GetOnlineFeaturesResponse.FieldStatus.NULL_VALUE; + return FieldStatus.NULL_VALUE; } - return GetOnlineFeaturesResponse.FieldStatus.PRESENT; + return FieldStatus.PRESENT; } /** @@ -336,17 +345,17 @@ private static GetOnlineFeaturesResponse.FieldStatus getMetadata( * in entity row exceeds FeatureTable max age. * * @param feature contains the ingestion timing and feature data. - * @param entityRow contains the retrieval timing of when features are pulled. + * @param entityTimestamp contains the retrieval timing of when features are pulled. * @param maxAge feature's max age. */ private static boolean checkOutsideMaxAge( - Feature feature, GetOnlineFeaturesRequestV2.EntityRow entityRow, Duration maxAge) { + feast.storage.api.retriever.Feature feature, Timestamp entityTimestamp, Duration maxAge) { if (maxAge.equals(Duration.getDefaultInstance())) { // max age is not set return false; } - long givenTimestamp = entityRow.getTimestamp().getSeconds(); + long givenTimestamp = entityTimestamp.getSeconds(); if (givenTimestamp == 0) { givenTimestamp = System.currentTimeMillis() / 1000; } @@ -359,54 +368,45 @@ private static boolean checkOutsideMaxAge( * * @param entityRows entity rows provided in request * @param featureReferences feature references provided in request - * @param project project name provided in request */ private void populateHistogramMetrics( - List entityRows, - List featureReferences, - String project) { + List> entityRows, List featureReferences) { Metrics.requestEntityCountDistribution - .labels(project) + .labels(this.project) .observe(Double.valueOf(entityRows.size())); Metrics.requestFeatureCountDistribution - .labels(project) + .labels(this.project) .observe(Double.valueOf(featureReferences.size())); - - long countDistinctFeatureTables = - featureReferences.stream() - .map(featureReference -> getFeatureTableStringRef(project, featureReference)) - .distinct() - .count(); - Metrics.requestFeatureTableCountDistribution - .labels(project) - .observe(Double.valueOf(countDistinctFeatureTables)); } /** * Populate count metrics that can be used for analysing online retrieval calls * - * @param statusMap Statuses of features which have been requested - * @param project Project where request for features was called from + * @param featureRef singe Feature Reference + * @param featureVector Feature Vector built for this requested feature */ private void populateCountMetrics( - Map statusMap, String project) { - statusMap.forEach( - (featureRefString, status) -> { - if (status == GetOnlineFeaturesResponse.FieldStatus.NOT_FOUND) { - Metrics.notFoundKeyCount.labels(project, featureRefString).inc(); - } - if (status == GetOnlineFeaturesResponse.FieldStatus.OUTSIDE_MAX_AGE) { - Metrics.staleKeyCount.labels(project, featureRefString).inc(); - } - }); + FeatureReferenceV2 featureRef, + ServingAPIProto.GetOnlineFeaturesResponse.FeatureVectorOrBuilder featureVector) { + String featureRefString = Feature.getFeatureReference(featureRef); + featureVector + .getStatusesList() + .forEach( + (status) -> { + if (status == FieldStatus.NOT_FOUND) { + Metrics.notFoundKeyCount.labels(this.project, featureRefString).inc(); + } + if (status == FieldStatus.OUTSIDE_MAX_AGE) { + Metrics.staleKeyCount.labels(this.project, featureRefString).inc(); + } + }); } - private void populateFeatureCountMetrics( - List featureReferences, String project) { + private void populateFeatureCountMetrics(List featureReferences) { featureReferences.forEach( featureReference -> Metrics.requestFeatureCount - .labels(project, FeatureV2.getFeatureStringRef(featureReference)) + .labels(project, Feature.getFeatureReference(featureReference)) .inc()); } } diff --git a/java/serving/src/main/java/feast/serving/service/OnlineTransformationService.java b/java/serving/src/main/java/feast/serving/service/OnlineTransformationService.java index 23ee9854b23..d1df763f6ed 100644 --- a/java/serving/src/main/java/feast/serving/service/OnlineTransformationService.java +++ b/java/serving/src/main/java/feast/serving/service/OnlineTransformationService.java @@ -16,12 +16,10 @@ */ package feast.serving.service; +import com.google.common.collect.Lists; import com.google.protobuf.ByteString; -import feast.common.models.FeatureV2; -import feast.proto.core.DataSourceProto; -import feast.proto.core.FeatureProto; -import feast.proto.core.FeatureViewProto; -import feast.proto.core.OnDemandFeatureViewProto; +import com.google.protobuf.Timestamp; +import feast.proto.core.*; import feast.proto.serving.ServingAPIProto; import feast.proto.serving.TransformationServiceAPIProto.TransformFeaturesRequest; import feast.proto.serving.TransformationServiceAPIProto.TransformFeaturesResponse; @@ -32,6 +30,7 @@ import io.grpc.ManagedChannel; import io.grpc.ManagedChannelBuilder; import io.grpc.Status; +import java.io.ByteArrayOutputStream; import java.io.IOException; import java.nio.channels.Channels; import java.util.*; @@ -45,9 +44,7 @@ import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.Schema; import org.apache.arrow.vector.util.ByteArrayReadableSeekableByteChannel; -import org.apache.commons.lang3.tuple.ImmutablePair; import org.apache.commons.lang3.tuple.Pair; -import org.apache.tomcat.util.http.fileupload.ByteArrayOutputStream; import org.slf4j.Logger; public class OnlineTransformationService implements TransformationService { @@ -75,20 +72,21 @@ public OnlineTransformationService( @Override public TransformFeaturesResponse transformFeatures( TransformFeaturesRequest transformFeaturesRequest) { + if (this.stub == null) { + throw new RuntimeException( + "Transformation service endpoint must be configured to enable this functionality."); + } return this.stub.transformFeatures(transformFeaturesRequest); } /** {@inheritDoc} */ @Override - public Pair, List> - extractRequestDataFeatureNamesAndOnDemandFeatureInputs( - List onDemandFeatureReferences, String projectName) { - Set requestDataFeatureNames = new HashSet(); - List onDemandFeatureInputs = - new ArrayList(); + public List extractOnDemandFeaturesDependencies( + List onDemandFeatureReferences) { + List onDemandFeatureInputs = new ArrayList<>(); for (ServingAPIProto.FeatureReferenceV2 featureReference : onDemandFeatureReferences) { OnDemandFeatureViewProto.OnDemandFeatureViewSpec onDemandFeatureViewSpec = - this.registryRepository.getOnDemandFeatureViewSpec(projectName, featureReference); + this.registryRepository.getOnDemandFeatureViewSpec(featureReference); Map inputs = onDemandFeatureViewSpec.getInputsMap(); @@ -96,11 +94,20 @@ public TransformFeaturesResponse transformFeatures( OnDemandFeatureViewProto.OnDemandInput.InputCase inputCase = input.getInputCase(); switch (inputCase) { case REQUEST_DATA_SOURCE: - DataSourceProto.DataSource requestDataSource = input.getRequestDataSource(); - DataSourceProto.DataSource.RequestDataOptions requestDataOptions = - requestDataSource.getRequestDataOptions(); - Set requestDataNames = requestDataOptions.getSchemaMap().keySet(); - requestDataFeatureNames.addAll(requestDataNames); + // Do nothing. The value should be provided as dedicated request parameter + break; + case FEATURE_VIEW_PROJECTION: + FeatureReferenceProto.FeatureViewProjection projection = + input.getFeatureViewProjection(); + for (FeatureProto.FeatureSpecV2 featureSpec : projection.getFeatureColumnsList()) { + String featureName = featureSpec.getName(); + ServingAPIProto.FeatureReferenceV2 onDemandFeatureInput = + ServingAPIProto.FeatureReferenceV2.newBuilder() + .setFeatureViewName(projection.getFeatureViewName()) + .setFeatureName(featureName) + .build(); + onDemandFeatureInputs.add(onDemandFeatureInput); + } break; case FEATURE_VIEW: FeatureViewProto.FeatureView featureView = input.getFeatureView(); @@ -110,8 +117,8 @@ public TransformFeaturesResponse transformFeatures( String featureName = featureSpec.getName(); ServingAPIProto.FeatureReferenceV2 onDemandFeatureInput = ServingAPIProto.FeatureReferenceV2.newBuilder() - .setFeatureTable(featureViewName) - .setName(featureName) + .setFeatureViewName(featureViewName) + .setFeatureName(featureName) .build(); onDemandFeatureInputs.add(onDemandFeatureInput); } @@ -124,61 +131,7 @@ public TransformFeaturesResponse transformFeatures( } } } - Pair, List> pair = - new ImmutablePair, List>( - requestDataFeatureNames, onDemandFeatureInputs); - return pair; - } - - /** {@inheritDoc} */ - public Pair< - List, - Map>> - separateEntityRows( - Set requestDataFeatureNames, ServingAPIProto.GetOnlineFeaturesRequestV2 request) { - // Separate entity rows into entity data and request feature data. - List entityRows = - new ArrayList(); - Map> requestDataFeatures = - new HashMap>(); - - for (ServingAPIProto.GetOnlineFeaturesRequestV2.EntityRow entityRow : - request.getEntityRowsList()) { - Map fieldsMap = new HashMap(); - - for (Map.Entry entry : entityRow.getFieldsMap().entrySet()) { - String key = entry.getKey(); - ValueProto.Value value = entry.getValue(); - - if (requestDataFeatureNames.contains(key)) { - if (!requestDataFeatures.containsKey(key)) { - requestDataFeatures.put(key, new ArrayList()); - } - requestDataFeatures.get(key).add(value); - } else { - fieldsMap.put(key, value); - } - } - - // Construct new entity row containing the extracted entity data, if necessary. - if (!fieldsMap.isEmpty()) { - ServingAPIProto.GetOnlineFeaturesRequestV2.EntityRow newEntityRow = - ServingAPIProto.GetOnlineFeaturesRequestV2.EntityRow.newBuilder() - .setTimestamp(entityRow.getTimestamp()) - .putAllFields(fieldsMap) - .build(); - entityRows.add(newEntityRow); - } - } - - Pair< - List, - Map>> - pair = - new ImmutablePair< - List, - Map>>(entityRows, requestDataFeatures); - return pair; + return onDemandFeatureInputs; } /** {@inheritDoc} */ @@ -187,8 +140,7 @@ public void processTransformFeaturesResponse( transformFeaturesResponse, String onDemandFeatureViewName, Set onDemandFeatureStringReferences, - List> values, - List> statuses) { + ServingAPIProto.GetOnlineFeaturesResponse.Builder responseBuilder) { try { BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE); ArrowFileReader reader = @@ -203,10 +155,11 @@ public void processTransformFeaturesResponse( VectorSchemaRoot readBatch = reader.getVectorSchemaRoot(); Schema responseSchema = readBatch.getSchema(); List responseFields = responseSchema.getFields(); + Timestamp now = Timestamp.newBuilder().setSeconds(System.currentTimeMillis() / 1000).build(); for (Field field : responseFields) { String columnName = field.getName(); - String fullFeatureName = onDemandFeatureViewName + ":" + columnName; + String fullFeatureName = columnName.replace("__", ":"); ArrowType columnType = field.getType(); // The response will contain all features for the specified ODFV, so we @@ -217,6 +170,9 @@ public void processTransformFeaturesResponse( FieldVector fieldVector = readBatch.getVector(field); int valueCount = fieldVector.getValueCount(); + ServingAPIProto.GetOnlineFeaturesResponse.FeatureVector.Builder vectorBuilder = + responseBuilder.addResultsBuilder(); + List valueList = Lists.newArrayListWithExpectedSize(valueCount); // TODO: support all Feast types // TODO: clean up the switch statement @@ -226,27 +182,13 @@ public void processTransformFeaturesResponse( case INT64_BITWIDTH: for (int i = 0; i < valueCount; i++) { long int64Value = ((BigIntVector) fieldVector).get(i); - Map rowValues = values.get(i); - Map rowStatuses = - statuses.get(i); - ValueProto.Value value = - ValueProto.Value.newBuilder().setInt64Val(int64Value).build(); - rowValues.put(fullFeatureName, value); - rowStatuses.put( - fullFeatureName, ServingAPIProto.GetOnlineFeaturesResponse.FieldStatus.PRESENT); + valueList.add(ValueProto.Value.newBuilder().setInt64Val(int64Value).build()); } break; case INT32_BITWIDTH: for (int i = 0; i < valueCount; i++) { - int intValue = ((IntVector) fieldVector).get(i); - Map rowValues = values.get(i); - Map rowStatuses = - statuses.get(i); - ValueProto.Value value = - ValueProto.Value.newBuilder().setInt32Val(intValue).build(); - rowValues.put(fullFeatureName, value); - rowStatuses.put( - fullFeatureName, ServingAPIProto.GetOnlineFeaturesResponse.FieldStatus.PRESENT); + int int32Value = ((IntVector) fieldVector).get(i); + valueList.add(ValueProto.Value.newBuilder().setInt32Val(int32Value).build()); } break; default: @@ -265,27 +207,13 @@ public void processTransformFeaturesResponse( case DOUBLE: for (int i = 0; i < valueCount; i++) { double doubleValue = ((Float8Vector) fieldVector).get(i); - Map rowValues = values.get(i); - Map rowStatuses = - statuses.get(i); - ValueProto.Value value = - ValueProto.Value.newBuilder().setDoubleVal(doubleValue).build(); - rowValues.put(fullFeatureName, value); - rowStatuses.put( - fullFeatureName, ServingAPIProto.GetOnlineFeaturesResponse.FieldStatus.PRESENT); + valueList.add(ValueProto.Value.newBuilder().setDoubleVal(doubleValue).build()); } break; case SINGLE: for (int i = 0; i < valueCount; i++) { float floatValue = ((Float4Vector) fieldVector).get(i); - Map rowValues = values.get(i); - Map rowStatuses = - statuses.get(i); - ValueProto.Value value = - ValueProto.Value.newBuilder().setFloatVal(floatValue).build(); - rowValues.put(fullFeatureName, value); - rowStatuses.put( - fullFeatureName, ServingAPIProto.GetOnlineFeaturesResponse.FieldStatus.PRESENT); + valueList.add(ValueProto.Value.newBuilder().setFloatVal(floatValue).build()); } break; default: @@ -299,6 +227,14 @@ public void processTransformFeaturesResponse( .asRuntimeException(); } } + + for (ValueProto.Value v : valueList) { + vectorBuilder.addValues(v); + vectorBuilder.addStatuses(ServingAPIProto.FieldStatus.PRESENT); + vectorBuilder.addEventTimestamps(now); + } + + responseBuilder.getMetadataBuilder().getFeatureNamesBuilder().addVal(fullFeatureName); } } catch (IOException e) { log.info(e.toString()); @@ -310,30 +246,52 @@ public void processTransformFeaturesResponse( } /** {@inheritDoc} */ - public ValueType serializeValuesIntoArrowIPC(List> values) { + public ValueType serializeValuesIntoArrowIPC(List>> values) { // In order to be serialized correctly, the data must be packaged in a VectorSchemaRoot. // We first construct all the columns. Map columnNameToColumn = new HashMap(); BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE); - Map firstAugmentedRowValues = values.get(0); - for (Map.Entry entry : firstAugmentedRowValues.entrySet()) { + + List columnFields = new ArrayList(); + List columns = new ArrayList(); + + for (Pair> columnEntry : values) { // The Python FTS does not expect full feature names, so we extract the feature name. - String columnName = FeatureV2.getFeatureName(entry.getKey()); - ValueProto.Value.ValCase valCase = entry.getValue().getValCase(); + String columnName = columnEntry.getKey(); + + List columnValues = columnEntry.getValue(); FieldVector column; + ValueProto.Value.ValCase valCase = columnValues.get(0).getValCase(); // TODO: support all Feast types switch (valCase) { case INT32_VAL: column = new IntVector(columnName, allocator); + column.setValueCount(columnValues.size()); + for (int idx = 0; idx < columnValues.size(); idx++) { + ((IntVector) column).set(idx, columnValues.get(idx).getInt32Val()); + } break; case INT64_VAL: column = new BigIntVector(columnName, allocator); + column.setValueCount(columnValues.size()); + for (int idx = 0; idx < columnValues.size(); idx++) { + ((BigIntVector) column).set(idx, columnValues.get(idx).getInt64Val()); + } + break; case DOUBLE_VAL: column = new Float8Vector(columnName, allocator); + column.setValueCount(columnValues.size()); + for (int idx = 0; idx < columnValues.size(); idx++) { + ((Float8Vector) column).set(idx, columnValues.get(idx).getDoubleVal()); + } break; case FLOAT_VAL: column = new Float4Vector(columnName, allocator); + column.setValueCount(columnValues.size()); + for (int idx = 0; idx < columnValues.size(); idx++) { + ((Float4Vector) column).set(idx, columnValues.get(idx).getFloatVal()); + } break; default: throw Status.INTERNAL @@ -341,53 +299,11 @@ public ValueType serializeValuesIntoArrowIPC(List> "Column " + columnName + " has a type that is currently not handled: " + valCase) .asRuntimeException(); } - column.allocateNew(); - columnNameToColumn.put(columnName, column); - } - - // Add the data, row by row. - for (int i = 0; i < values.size(); i++) { - Map augmentedRowValues = values.get(i); - for (Map.Entry entry : augmentedRowValues.entrySet()) { - String columnName = FeatureV2.getFeatureName(entry.getKey()); - ValueProto.Value value = entry.getValue(); - ValueProto.Value.ValCase valCase = value.getValCase(); - FieldVector column = columnNameToColumn.get(columnName); - // TODO: support all Feast types - switch (valCase) { - case INT32_VAL: - ((IntVector) column).setSafe(i, value.getInt32Val()); - break; - case INT64_VAL: - ((BigIntVector) column).setSafe(i, value.getInt64Val()); - break; - case DOUBLE_VAL: - ((Float8Vector) column).setSafe(i, value.getDoubleVal()); - break; - case FLOAT_VAL: - ((Float4Vector) column).setSafe(i, value.getFloatVal()); - break; - default: - throw Status.INTERNAL - .withDescription( - "Column " - + columnName - + " has a type that is currently not handled: " - + valCase) - .asRuntimeException(); - } - } - } - - // Construct the VectorSchemaRoot. - List columnFields = new ArrayList(); - List columns = new ArrayList(); - for (FieldVector column : columnNameToColumn.values()) { - column.setValueCount(values.size()); - columnFields.add(column.getField()); columns.add(column); + columnFields.add(column.getField()); } + VectorSchemaRoot schemaRoot = new VectorSchemaRoot(columnFields, columns); // Serialize the VectorSchemaRoot into Arrow IPC format. diff --git a/java/serving/src/main/java/feast/serving/service/ServingServiceV2.java b/java/serving/src/main/java/feast/serving/service/ServingServiceV2.java index 05acb31b78e..4a44f4e09e5 100644 --- a/java/serving/src/main/java/feast/serving/service/ServingServiceV2.java +++ b/java/serving/src/main/java/feast/serving/service/ServingServiceV2.java @@ -17,8 +17,6 @@ package feast.serving.service; import feast.proto.serving.ServingAPIProto; -import feast.proto.serving.ServingAPIProto.GetOnlineFeaturesRequestV2; -import feast.proto.serving.ServingAPIProto.GetOnlineFeaturesResponse; public interface ServingServiceV2 { /** @@ -36,23 +34,15 @@ ServingAPIProto.GetFeastServingInfoResponse getFeastServingInfo( /** * Get features from an online serving store, given a list of {@link - * feast.proto.serving.ServingAPIProto.FeatureReferenceV2}s to retrieve, and list of {@link - * feast.proto.serving.ServingAPIProto.GetOnlineFeaturesRequestV2.EntityRow}s to join the - * retrieved values to. - * - *

Features can be queried across feature tables, but each {@link - * feast.proto.serving.ServingAPIProto.GetOnlineFeaturesRequestV2.EntityRow} must contain all - * entities for all feature tables included in the request. + * feast.proto.serving.ServingAPIProto.FeatureReferenceV2}s to retrieve or name of the feature + * service, and vectorized entities Map<String, {@link + * feast.proto.types.ValueProto.RepeatedValue}> to join the retrieved values to. * *

This request is fulfilled synchronously. * - * @param getFeaturesRequest {@link GetOnlineFeaturesRequestV2} containing list of {@link - * feast.proto.serving.ServingAPIProto.FeatureReferenceV2}s to retrieve and list of {@link - * feast.proto.serving.ServingAPIProto.GetOnlineFeaturesRequestV2.EntityRow}s to join the - * retrieved values to. - * @return {@link GetOnlineFeaturesResponse} with list of {@link - * feast.proto.serving.ServingAPIProto.GetOnlineFeaturesResponse.FieldValues} for each {@link - * feast.proto.serving.ServingAPIProto.GetOnlineFeaturesRequestV2.EntityRow} supplied. + * @return {@link feast.proto.serving.ServingAPIProto.GetOnlineFeaturesResponse} with list of + * {@link feast.proto.serving.ServingAPIProto.GetOnlineFeaturesResponse.FeatureVector}. */ - GetOnlineFeaturesResponse getOnlineFeatures(GetOnlineFeaturesRequestV2 getFeaturesRequest); + ServingAPIProto.GetOnlineFeaturesResponse getOnlineFeatures( + ServingAPIProto.GetOnlineFeaturesRequest getFeaturesRequest); } diff --git a/java/serving/src/main/java/feast/serving/service/TransformationService.java b/java/serving/src/main/java/feast/serving/service/TransformationService.java index caa52793020..e993e76e0a7 100644 --- a/java/serving/src/main/java/feast/serving/service/TransformationService.java +++ b/java/serving/src/main/java/feast/serving/service/TransformationService.java @@ -17,14 +17,11 @@ package feast.serving.service; import feast.proto.serving.ServingAPIProto; -import feast.proto.serving.ServingAPIProto.GetOnlineFeaturesRequestV2; -import feast.proto.serving.ServingAPIProto.GetOnlineFeaturesResponse; import feast.proto.serving.TransformationServiceAPIProto.TransformFeaturesRequest; import feast.proto.serving.TransformationServiceAPIProto.TransformFeaturesResponse; import feast.proto.serving.TransformationServiceAPIProto.ValueType; import feast.proto.types.ValueProto; import java.util.List; -import java.util.Map; import java.util.Set; import org.apache.commons.lang3.tuple.Pair; @@ -38,28 +35,13 @@ public interface TransformationService { TransformFeaturesResponse transformFeatures(TransformFeaturesRequest transformFeaturesRequest); /** - * Extract the set of request data feature names and the list of on demand feature inputs from a - * list of ODFV references. + * Extract the list of on demand feature inputs from a list of ODFV references. * * @param onDemandFeatureReferences list of ODFV references to be parsed - * @param projectName project name - * @return a pair containing the set of request data feature names and list of on demand feature - * inputs + * @return list of on demand feature inputs */ - Pair, List> - extractRequestDataFeatureNamesAndOnDemandFeatureInputs( - List onDemandFeatureReferences, String projectName); - - /** - * Separate the entity rows of a request into entity data and request feature data. - * - * @param requestDataFeatureNames set of feature names for the request data - * @param request the GetOnlineFeaturesRequestV2 containing the entity rows - * @return a pair containing the set of request data feature names and list of on demand feature - * inputs - */ - Pair, Map>> - separateEntityRows(Set requestDataFeatureNames, GetOnlineFeaturesRequestV2 request); + List extractOnDemandFeaturesDependencies( + List onDemandFeatureReferences); /** * Process a response from the feature transformation server by augmenting the given lists of @@ -68,15 +50,13 @@ public interface TransformationService { * @param transformFeaturesResponse response to be processed * @param onDemandFeatureViewName name of ODFV to which the response corresponds * @param onDemandFeatureStringReferences set of all ODFV references that should be kept - * @param values list of field maps to be augmented with additional fields from the response - * @param statuses list of status maps to be augmented + * @param responseBuilder {@link ServingAPIProto.GetOnlineFeaturesResponse.Builder} */ void processTransformFeaturesResponse( TransformFeaturesResponse transformFeaturesResponse, String onDemandFeatureViewName, Set onDemandFeatureStringReferences, - List> values, - List> statuses); + ServingAPIProto.GetOnlineFeaturesResponse.Builder responseBuilder); /** * Serialize data into Arrow IPC format, to be sent to the Python feature transformation server. @@ -84,5 +64,5 @@ void processTransformFeaturesResponse( * @param values list of field maps to be serialized * @return the data packaged into a ValueType proto object */ - ValueType serializeValuesIntoArrowIPC(List> values); + ValueType serializeValuesIntoArrowIPC(List>> values); } diff --git a/java/serving/src/main/java/feast/serving/util/RequestHelper.java b/java/serving/src/main/java/feast/serving/util/RequestHelper.java index 4d478f430f2..f730e019821 100644 --- a/java/serving/src/main/java/feast/serving/util/RequestHelper.java +++ b/java/serving/src/main/java/feast/serving/util/RequestHelper.java @@ -16,27 +16,28 @@ */ package feast.serving.util; +import feast.common.models.Feature; +import feast.proto.serving.ServingAPIProto; import feast.proto.serving.ServingAPIProto.FeatureReferenceV2; -import feast.proto.serving.ServingAPIProto.GetOnlineFeaturesRequestV2; public class RequestHelper { - public static void validateOnlineRequest(GetOnlineFeaturesRequestV2 request) { + public static void validateOnlineRequest(ServingAPIProto.GetOnlineFeaturesRequest request) { // All EntityRows should not be empty - if (request.getEntityRowsCount() <= 0) { + if (request.getEntitiesCount() <= 0) { throw new IllegalArgumentException("Entity value must be provided"); } // All FeatureReferences should have FeatureTable name and Feature name - for (FeatureReferenceV2 featureReference : request.getFeaturesList()) { - validateOnlineRequestFeatureReference(featureReference); + for (String featureReference : request.getFeatures().getValList()) { + validateOnlineRequestFeatureReference(Feature.parseFeatureReference(featureReference)); } } public static void validateOnlineRequestFeatureReference(FeatureReferenceV2 featureReference) { - if (featureReference.getFeatureTable().isEmpty()) { + if (featureReference.getFeatureViewName().isEmpty()) { throw new IllegalArgumentException("FeatureTable name must be provided in FeatureReference"); } - if (featureReference.getName().isEmpty()) { + if (featureReference.getFeatureName().isEmpty()) { throw new IllegalArgumentException("Feature name must be provided in FeatureReference"); } } diff --git a/java/serving/src/main/java/feast/serving/util/mappers/ResponseJSONMapper.java b/java/serving/src/main/java/feast/serving/util/mappers/ResponseJSONMapper.java index 238df549513..3ab9f43c341 100644 --- a/java/serving/src/main/java/feast/serving/util/mappers/ResponseJSONMapper.java +++ b/java/serving/src/main/java/feast/serving/util/mappers/ResponseJSONMapper.java @@ -16,8 +16,7 @@ */ package feast.serving.util.mappers; -import feast.proto.serving.ServingAPIProto.GetOnlineFeaturesResponse; -import feast.proto.serving.ServingAPIProto.GetOnlineFeaturesResponse.FieldValues; +import feast.proto.serving.ServingAPIProto; import feast.proto.types.ValueProto.Value; import java.util.List; import java.util.Map; @@ -27,15 +26,23 @@ public class ResponseJSONMapper { public static List> mapGetOnlineFeaturesResponse( - GetOnlineFeaturesResponse response) { - return response.getFieldValuesList().stream() + ServingAPIProto.GetOnlineFeaturesResponse response) { + return response.getResultsList().stream() .map(fieldValues -> convertFieldValuesToMap(fieldValues)) .collect(Collectors.toList()); } - private static Map convertFieldValuesToMap(FieldValues fieldValues) { - return fieldValues.getFieldsMap().entrySet().stream() - .collect(Collectors.toMap(es -> es.getKey(), es -> extractValue(es.getValue()))); + private static Map convertFieldValuesToMap( + ServingAPIProto.GetOnlineFeaturesResponse.FeatureVector vec) { + return Map.of( + "values", + vec.getValuesList().stream() + .map(ResponseJSONMapper::extractValue) + .collect(Collectors.toList()), + "statuses", + vec.getStatusesList(), + "event_timestamp", + vec.getEventTimestampsList()); } private static Object extractValue(Value value) { diff --git a/java/serving/src/main/resources/application.yml b/java/serving/src/main/resources/application.yml index 4fba32a0ae6..1f6d5b34c43 100644 --- a/java/serving/src/main/resources/application.yml +++ b/java/serving/src/main/resources/application.yml @@ -1,4 +1,5 @@ feast: + project: "" registry: "prompt_dory/data/registry.db" registryRefreshInterval: 0 diff --git a/java/serving/src/test/java/feast/serving/it/ServingBase.java b/java/serving/src/test/java/feast/serving/it/ServingBase.java deleted file mode 100644 index 3a42f9a85e5..00000000000 --- a/java/serving/src/test/java/feast/serving/it/ServingBase.java +++ /dev/null @@ -1,307 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * Copyright 2018-2021 The Feast Authors - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * https://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package feast.serving.it; - -import static org.awaitility.Awaitility.await; -import static org.hamcrest.Matchers.equalTo; -import static org.junit.jupiter.api.Assertions.*; - -import com.google.common.collect.ImmutableList; -import com.google.inject.*; -import com.google.inject.Module; -import com.google.inject.util.Modules; -import com.google.protobuf.Timestamp; -import feast.proto.core.FeatureProto; -import feast.proto.core.FeatureViewProto; -import feast.proto.core.RegistryProto; -import feast.proto.serving.ServingAPIProto; -import feast.proto.serving.ServingServiceGrpc; -import feast.proto.types.ValueProto; -import feast.serving.config.*; -import feast.serving.grpc.OnlineServingGrpcServiceV2; -import feast.serving.util.DataGenerator; -import io.grpc.*; -import io.grpc.inprocess.InProcessChannelBuilder; -import io.grpc.inprocess.InProcessServerBuilder; -import io.grpc.protobuf.services.ProtoReflectionService; -import io.grpc.util.MutableHandlerRegistry; -import java.io.File; -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Paths; -import java.time.Duration; -import java.util.concurrent.TimeUnit; -import org.junit.jupiter.api.*; -import org.testcontainers.containers.DockerComposeContainer; -import org.testcontainers.containers.wait.strategy.Wait; -import org.testcontainers.junit.jupiter.Testcontainers; - -@Testcontainers -abstract class ServingBase { - static DockerComposeContainer environment; - - ServingServiceGrpc.ServingServiceBlockingStub servingStub; - Injector injector; - String serverName; - ManagedChannel channel; - Server server; - MutableHandlerRegistry serviceRegistry; - - @BeforeAll - static void globalSetup() { - environment = - new DockerComposeContainer( - new File("src/test/resources/docker-compose/docker-compose-redis-it.yml")) - .withExposedService("redis", 6379) - .withOptions() - .waitingFor( - "materialize", - Wait.forLogMessage(".*Materialization finished.*\\n", 1) - .withStartupTimeout(Duration.ofMinutes(5))); - environment.start(); - } - - @AfterAll - static void globalTeardown() { - environment.stop(); - } - - @BeforeEach - public void envSetUp() throws Exception { - - AbstractModule appPropertiesModule = - new AbstractModule() { - @Override - protected void configure() { - bind(OnlineServingGrpcServiceV2.class); - } - - @Provides - ApplicationProperties applicationProperties() { - final ApplicationProperties p = new ApplicationProperties(); - p.setAwsRegion("us-east-1"); - - final ApplicationProperties.FeastProperties feastProperties = createFeastProperties(); - p.setFeast(feastProperties); - - final ApplicationProperties.TracingProperties tracingProperties = - new ApplicationProperties.TracingProperties(); - feastProperties.setTracing(tracingProperties); - - tracingProperties.setEnabled(false); - return p; - } - }; - - Module overrideConfig = registryConfig(); - Module registryConfig; - if (overrideConfig != null) { - registryConfig = Modules.override(new RegistryConfig()).with(registryConfig()); - } else { - registryConfig = new RegistryConfig(); - } - - injector = - Guice.createInjector( - new ServingServiceConfigV2(), - registryConfig, - new InstrumentationConfig(), - appPropertiesModule); - - OnlineServingGrpcServiceV2 onlineServingGrpcServiceV2 = - injector.getInstance(OnlineServingGrpcServiceV2.class); - - serverName = InProcessServerBuilder.generateName(); - - server = - InProcessServerBuilder.forName(serverName) - .fallbackHandlerRegistry(serviceRegistry) - .addService(onlineServingGrpcServiceV2) - .addService(ProtoReflectionService.newInstance()) - .build(); - server.start(); - - channel = InProcessChannelBuilder.forName(serverName).usePlaintext().directExecutor().build(); - - servingStub = - ServingServiceGrpc.newBlockingStub(channel) - .withDeadlineAfter(5, TimeUnit.SECONDS) - .withWaitForReady(); - } - - @AfterEach - public void envTeardown() throws Exception { - // assume channel and server are not null - channel.shutdown(); - server.shutdown(); - // fail the test if cannot gracefully shutdown - try { - assert channel.awaitTermination(5, TimeUnit.SECONDS) - : "channel cannot be gracefully shutdown"; - assert server.awaitTermination(5, TimeUnit.SECONDS) : "server cannot be gracefully shutdown"; - } finally { - channel.shutdownNow(); - server.shutdownNow(); - } - } - - protected ServingAPIProto.GetOnlineFeaturesRequestV2 buildOnlineRequest(int driverId) { - // getOnlineFeatures Information - String projectName = "feast_project"; - String entityName = "driver_id"; - - // Instantiate EntityRows - final Timestamp timestamp = Timestamp.getDefaultInstance(); - ServingAPIProto.GetOnlineFeaturesRequestV2.EntityRow entityRow1 = - DataGenerator.createEntityRow( - entityName, DataGenerator.createInt64Value(driverId), timestamp.getSeconds()); - ImmutableList entityRows = - ImmutableList.of(entityRow1); - - // Instantiate FeatureReferences - ServingAPIProto.FeatureReferenceV2 feature1Reference = - DataGenerator.createFeatureReference("driver_hourly_stats", "conv_rate"); - ServingAPIProto.FeatureReferenceV2 feature2Reference = - DataGenerator.createFeatureReference("driver_hourly_stats", "avg_daily_trips"); - ImmutableList featureReferences = - ImmutableList.of(feature1Reference, feature2Reference); - - // Build GetOnlineFeaturesRequestV2 - return TestUtils.createOnlineFeatureRequest(projectName, featureReferences, entityRows); - } - - static RegistryProto.Registry registryProto = readLocalRegistry(); - - private static RegistryProto.Registry readLocalRegistry() { - try { - return RegistryProto.Registry.parseFrom( - Files.readAllBytes(Paths.get("src/test/resources/docker-compose/feast10/registry.db"))); - } catch (IOException e) { - e.printStackTrace(); - } - - return null; - } - - @Test - public void shouldGetOnlineFeatures() { - ServingAPIProto.GetOnlineFeaturesRequestV2 req = buildOnlineRequest(1005); - ServingAPIProto.GetOnlineFeaturesResponse featureResponse = - servingStub.withDeadlineAfter(1000, TimeUnit.MILLISECONDS).getOnlineFeaturesV2(req); - - assertEquals(1, featureResponse.getFieldValuesCount()); - - final ServingAPIProto.GetOnlineFeaturesResponse.FieldValues fieldValue = - featureResponse.getFieldValues(0); - for (final String key : - ImmutableList.of( - "driver_hourly_stats:avg_daily_trips", "driver_hourly_stats:conv_rate", "driver_id")) { - assertTrue(fieldValue.containsFields(key)); - assertTrue(fieldValue.containsStatuses(key)); - assertEquals( - ServingAPIProto.GetOnlineFeaturesResponse.FieldStatus.PRESENT, - fieldValue.getStatusesOrThrow(key)); - } - - assertEquals( - 500, fieldValue.getFieldsOrThrow("driver_hourly_stats:avg_daily_trips").getInt64Val()); - assertEquals(1005, fieldValue.getFieldsOrThrow("driver_id").getInt64Val()); - assertEquals( - 0.5, fieldValue.getFieldsOrThrow("driver_hourly_stats:conv_rate").getDoubleVal(), 0.0001); - } - - @Test - public void shouldGetOnlineFeaturesWithOutsideMaxAgeStatus() { - ServingAPIProto.GetOnlineFeaturesResponse featureResponse = - servingStub.getOnlineFeaturesV2(buildOnlineRequest(1001)); - - assertEquals(1, featureResponse.getFieldValuesCount()); - - final ServingAPIProto.GetOnlineFeaturesResponse.FieldValues fieldValue = - featureResponse.getFieldValues(0); - for (final String key : - ImmutableList.of("driver_hourly_stats:avg_daily_trips", "driver_hourly_stats:conv_rate")) { - assertTrue(fieldValue.containsFields(key)); - assertTrue(fieldValue.containsStatuses(key)); - assertEquals( - ServingAPIProto.GetOnlineFeaturesResponse.FieldStatus.OUTSIDE_MAX_AGE, - fieldValue.getStatusesOrThrow(key)); - } - - assertEquals( - 100, fieldValue.getFieldsOrThrow("driver_hourly_stats:avg_daily_trips").getInt64Val()); - assertEquals(1001, fieldValue.getFieldsOrThrow("driver_id").getInt64Val()); - assertEquals( - 0.1, fieldValue.getFieldsOrThrow("driver_hourly_stats:conv_rate").getDoubleVal(), 0.0001); - } - - @Test - public void shouldGetOnlineFeaturesWithNotFoundStatus() { - ServingAPIProto.GetOnlineFeaturesResponse featureResponse = - servingStub.getOnlineFeaturesV2(buildOnlineRequest(-1)); - - assertEquals(1, featureResponse.getFieldValuesCount()); - - final ServingAPIProto.GetOnlineFeaturesResponse.FieldValues fieldValue = - featureResponse.getFieldValues(0); - for (final String key : - ImmutableList.of("driver_hourly_stats:avg_daily_trips", "driver_hourly_stats:conv_rate")) { - assertTrue(fieldValue.containsFields(key)); - assertTrue(fieldValue.containsStatuses(key)); - assertEquals( - ServingAPIProto.GetOnlineFeaturesResponse.FieldStatus.NOT_FOUND, - fieldValue.getStatusesOrThrow(key)); - } - } - - @Test - public void shouldRefreshRegistryAndServeNewFeatures() throws InterruptedException { - updateRegistryFile( - registryProto - .toBuilder() - .addFeatureViews( - FeatureViewProto.FeatureView.newBuilder() - .setSpec( - FeatureViewProto.FeatureViewSpec.newBuilder() - .setName("new_view") - .addEntities("driver_id") - .addFeatures( - FeatureProto.FeatureSpecV2.newBuilder() - .setName("new_feature") - .setValueType(ValueProto.ValueType.Enum.BOOL)))) - .build()); - - ServingAPIProto.GetOnlineFeaturesRequestV2 request = - buildOnlineRequest(1005) - .toBuilder() - .addFeatures(DataGenerator.createFeatureReference("new_view", "new_feature")) - .build(); - - await() - .ignoreException(StatusRuntimeException.class) - .atMost(5, TimeUnit.SECONDS) - .until(() -> servingStub.getOnlineFeaturesV2(request).getFieldValuesCount(), equalTo(1)); - } - - abstract ApplicationProperties.FeastProperties createFeastProperties(); - - AbstractModule registryConfig() { - return null; - } - - abstract void updateRegistryFile(RegistryProto.Registry registry); -} diff --git a/java/serving/src/test/java/feast/serving/it/ServingBaseTests.java b/java/serving/src/test/java/feast/serving/it/ServingBaseTests.java new file mode 100644 index 00000000000..c610d7df6b1 --- /dev/null +++ b/java/serving/src/test/java/feast/serving/it/ServingBaseTests.java @@ -0,0 +1,184 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright 2018-2021 The Feast Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package feast.serving.it; + +import static org.awaitility.Awaitility.await; +import static org.hamcrest.Matchers.equalTo; +import static org.junit.jupiter.api.Assertions.*; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import feast.proto.core.FeatureProto; +import feast.proto.core.FeatureViewProto; +import feast.proto.core.RegistryProto; +import feast.proto.serving.ServingAPIProto; +import feast.proto.serving.ServingAPIProto.FieldStatus; +import feast.proto.types.ValueProto; +import feast.serving.util.DataGenerator; +import io.grpc.StatusRuntimeException; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.util.List; +import java.util.Map; +import java.util.concurrent.TimeUnit; +import org.junit.jupiter.api.*; + +abstract class ServingBaseTests extends ServingEnvironment { + + protected ServingAPIProto.GetOnlineFeaturesRequest buildOnlineRequest(int driverId) { + // getOnlineFeatures Information + String entityName = "driver_id"; + + // Instantiate EntityRows + Map entityRows = + ImmutableMap.of( + entityName, + ValueProto.RepeatedValue.newBuilder() + .addVal(DataGenerator.createInt64Value(driverId)) + .build()); + + ImmutableList featureReferences = + ImmutableList.of("driver_hourly_stats:conv_rate", "driver_hourly_stats:avg_daily_trips"); + + // Build GetOnlineFeaturesRequestV2 + return TestUtils.createOnlineFeatureRequest(featureReferences, entityRows); + } + + static RegistryProto.Registry registryProto = readLocalRegistry(); + + private static RegistryProto.Registry readLocalRegistry() { + try { + return RegistryProto.Registry.parseFrom( + Files.readAllBytes(Paths.get("src/test/resources/docker-compose/feast10/registry.db"))); + } catch (IOException e) { + e.printStackTrace(); + } + + return null; + } + + @Test + public void shouldGetOnlineFeatures() { + ServingAPIProto.GetOnlineFeaturesResponse featureResponse = + servingStub.getOnlineFeatures(buildOnlineRequest(1005)); + + assertEquals(2, featureResponse.getResultsCount()); + assertEquals(1, featureResponse.getResults(0).getValuesCount()); + + assertEquals( + ImmutableList.of("driver_hourly_stats:conv_rate", "driver_hourly_stats:avg_daily_trips"), + featureResponse.getMetadata().getFeatureNames().getValList()); + + for (int featureIdx : List.of(0, 1)) { + assertEquals( + List.of(ServingAPIProto.FieldStatus.PRESENT), + featureResponse.getResults(featureIdx).getStatusesList()); + } + + assertEquals(0.5, featureResponse.getResults(0).getValues(0).getDoubleVal(), 0.0001); + assertEquals(500, featureResponse.getResults(1).getValues(0).getInt64Val()); + } + + @Test + public void shouldGetOnlineFeaturesWithOutsideMaxAgeStatus() { + ServingAPIProto.GetOnlineFeaturesResponse featureResponse = + servingStub.getOnlineFeatures(buildOnlineRequest(1001)); + + assertEquals(2, featureResponse.getResultsCount()); + assertEquals(1, featureResponse.getResults(0).getValuesCount()); + + for (int featureIdx : List.of(0, 1)) { + assertEquals( + FieldStatus.OUTSIDE_MAX_AGE, featureResponse.getResults(featureIdx).getStatuses(0)); + } + + assertEquals(0.1, featureResponse.getResults(0).getValues(0).getDoubleVal(), 0.0001); + assertEquals(100, featureResponse.getResults(1).getValues(0).getInt64Val()); + } + + @Test + public void shouldGetOnlineFeaturesWithNotFoundStatus() { + ServingAPIProto.GetOnlineFeaturesResponse featureResponse = + servingStub.getOnlineFeatures(buildOnlineRequest(-1)); + + assertEquals(2, featureResponse.getResultsCount()); + assertEquals(1, featureResponse.getResults(0).getValuesCount()); + + for (final int featureIdx : List.of(0, 1)) { + assertEquals(FieldStatus.NOT_FOUND, featureResponse.getResults(featureIdx).getStatuses(0)); + } + } + + @Test + public void shouldRefreshRegistryAndServeNewFeatures() throws InterruptedException { + updateRegistryFile( + registryProto + .toBuilder() + .addFeatureViews( + FeatureViewProto.FeatureView.newBuilder() + .setSpec( + FeatureViewProto.FeatureViewSpec.newBuilder() + .setName("new_view") + .addEntities("driver_id") + .addFeatures( + FeatureProto.FeatureSpecV2.newBuilder() + .setName("new_feature") + .setValueType(ValueProto.ValueType.Enum.BOOL)))) + .build()); + + ServingAPIProto.GetOnlineFeaturesRequest request = buildOnlineRequest(1005); + + ServingAPIProto.GetOnlineFeaturesRequest requestWithNewFeature = + request + .toBuilder() + .setFeatures(request.getFeatures().toBuilder().addVal("new_view:new_feature")) + .build(); + + await() + .ignoreException(StatusRuntimeException.class) + .atMost(5, TimeUnit.SECONDS) + .until( + () -> servingStub.getOnlineFeatures(requestWithNewFeature).getResultsCount(), + equalTo(3)); + } + + /** https://github.com/feast-dev/feast/issues/2253 */ + @Test + public void shouldGetOnlineFeaturesWithStringEntity() { + Map entityRows = + ImmutableMap.of( + "entity", + ValueProto.RepeatedValue.newBuilder() + .addVal(DataGenerator.createStrValue("key-1")) + .build()); + + ImmutableList featureReferences = + ImmutableList.of("feature_view_0:feature_0", "feature_view_0:feature_1"); + + ServingAPIProto.GetOnlineFeaturesRequest req = + TestUtils.createOnlineFeatureRequest(featureReferences, entityRows); + + ServingAPIProto.GetOnlineFeaturesResponse resp = servingStub.getOnlineFeatures(req); + + for (final int featureIdx : List.of(0, 1)) { + assertEquals(FieldStatus.PRESENT, resp.getResults(featureIdx).getStatuses(0)); + } + } + + abstract void updateRegistryFile(RegistryProto.Registry registry); +} diff --git a/java/serving/src/test/java/feast/serving/it/ServingBenchmarkIT.java b/java/serving/src/test/java/feast/serving/it/ServingBenchmarkIT.java new file mode 100644 index 00000000000..1d77c2e4f7c --- /dev/null +++ b/java/serving/src/test/java/feast/serving/it/ServingBenchmarkIT.java @@ -0,0 +1,152 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright 2018-2021 The Feast Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package feast.serving.it; + +import com.google.api.client.util.Lists; +import com.google.common.base.Stopwatch; +import com.google.common.collect.ImmutableMap; +import com.google.common.math.Quantiles; +import feast.proto.serving.ServingAPIProto; +import feast.proto.types.ValueProto; +import feast.serving.config.ApplicationProperties; +import feast.serving.util.DataGenerator; +import java.util.List; +import java.util.LongSummaryStatistics; +import java.util.Map; +import java.util.Random; +import java.util.concurrent.TimeUnit; +import java.util.stream.Collectors; +import java.util.stream.IntStream; +import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class ServingBenchmarkIT extends ServingEnvironment { + private Random rand = new Random(); + public static final Logger log = LoggerFactory.getLogger(ServingBenchmarkIT.class); + + private static int WARM_UP_COUNT = 10; + + @Override + ApplicationProperties.FeastProperties createFeastProperties() { + return TestUtils.createBasicFeastProperties( + environment.getServiceHost("redis", 6379), environment.getServicePort("redis", 6379)); + } + + protected ServingAPIProto.GetOnlineFeaturesRequest buildOnlineRequest( + int rowsCount, int featuresCount) { + List entities = + IntStream.range(0, rowsCount) + .mapToObj( + i -> DataGenerator.createStrValue(String.format("key-%s", rand.nextInt(1000)))) + .collect(Collectors.toList()); + + List featureReferences = + IntStream.range(0, featuresCount) + .mapToObj(i -> String.format("feature_view_%d:feature_%d", i / 10, i)) + .collect(Collectors.toList()); + + Map entityRows = + ImmutableMap.of( + "entity", ValueProto.RepeatedValue.newBuilder().addAllVal(entities).build()); + + return TestUtils.createOnlineFeatureRequest(featureReferences, entityRows); + } + + protected ServingAPIProto.GetOnlineFeaturesRequest buildOnlineRequest(int rowsCount) { + List entities = + IntStream.range(0, rowsCount) + .mapToObj(i -> DataGenerator.createInt64Value(rand.nextInt(1000))) + .collect(Collectors.toList()); + + Map entityRows = + ImmutableMap.of( + "entity", ValueProto.RepeatedValue.newBuilder().addAllVal(entities).build()); + + return TestUtils.createOnlineFeatureRequest("benchmark_feature_service", entityRows); + } + + @Test + public void benchmarkServing100rows10features() { + ServingAPIProto.GetOnlineFeaturesRequest req = buildOnlineRequest(100, 10); + + measure( + () -> servingStub.withDeadlineAfter(1, TimeUnit.SECONDS).getOnlineFeatures(req), + "100 rows; 10 features", + 1000); + } + + @Test + public void benchmarkServing100rows50features() { + ServingAPIProto.GetOnlineFeaturesRequest req = buildOnlineRequest(100, 50); + + measure( + () -> servingStub.withDeadlineAfter(1, TimeUnit.SECONDS).getOnlineFeatures(req), + "100 rows; 50 features", + 1000); + } + + @Test + public void benchmarkServing100rows100features() { + ServingAPIProto.GetOnlineFeaturesRequest req = buildOnlineRequest(100, 100); + + measure( + () -> servingStub.withDeadlineAfter(1, TimeUnit.SECONDS).getOnlineFeatures(req), + "100 rows; 100 features", + 1000); + } + + @Test + public void benchmarkServing100rowsFullFeatureService() { + ServingAPIProto.GetOnlineFeaturesRequest req = buildOnlineRequest(100); + + measure( + () -> servingStub.withDeadlineAfter(1, TimeUnit.SECONDS).getOnlineFeatures(req), + "100 rows; Full FS", + 1000); + } + + private void measure(Runnable target, String name, int runs) { + Stopwatch timer = Stopwatch.createUnstarted(); + + List records = Lists.newArrayList(); + + for (int i = 0; i < runs; i++) { + timer.reset(); + timer.start(); + target.run(); + timer.stop(); + if (i >= WARM_UP_COUNT) { + records.add(timer.elapsed(TimeUnit.MILLISECONDS)); + } + } + + LongSummaryStatistics summary = + records.stream().collect(Collectors.summarizingLong(Long::longValue)); + + log.info(String.format("Test %s took (min): %d ms", name, summary.getMin())); + log.info(String.format("Test %s took (avg): %f ms", name, summary.getAverage())); + log.info( + String.format("Test %s took (median): %f ms", name, Quantiles.median().compute(records))); + log.info( + String.format( + "Test %s took (95p): %f ms", name, Quantiles.percentiles().index(95).compute(records))); + log.info( + String.format( + "Test %s took (99p): %f ms", name, Quantiles.percentiles().index(99).compute(records))); + } +} diff --git a/java/serving/src/test/java/feast/serving/it/ServingEnvironment.java b/java/serving/src/test/java/feast/serving/it/ServingEnvironment.java new file mode 100644 index 00000000000..c00dc7b1f31 --- /dev/null +++ b/java/serving/src/test/java/feast/serving/it/ServingEnvironment.java @@ -0,0 +1,172 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright 2018-2021 The Feast Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package feast.serving.it; + +import static org.hamcrest.CoreMatchers.*; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.greaterThan; + +import com.google.inject.*; +import com.google.inject.Module; +import com.google.inject.util.Modules; +import feast.proto.serving.ServingServiceGrpc; +import feast.serving.config.*; +import feast.serving.grpc.OnlineServingGrpcServiceV2; +import io.grpc.ManagedChannel; +import io.grpc.ManagedChannelBuilder; +import io.grpc.Server; +import io.grpc.util.MutableHandlerRegistry; +import java.io.File; +import java.io.IOException; +import java.net.ServerSocket; +import java.util.concurrent.TimeUnit; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.BeforeEach; +import org.testcontainers.containers.DockerComposeContainer; +import org.testcontainers.containers.wait.strategy.Wait; +import org.testcontainers.junit.jupiter.Testcontainers; + +@Testcontainers +abstract class ServingEnvironment { + static DockerComposeContainer environment; + + ServingServiceGrpc.ServingServiceBlockingStub servingStub; + Injector injector; + String serverName; + ManagedChannel channel; + Server server; + MutableHandlerRegistry serviceRegistry; + + static int serverPort = getFreePort(); + + @BeforeAll + static void globalSetup() { + environment = + new DockerComposeContainer( + new File("src/test/resources/docker-compose/docker-compose-redis-it.yml")) + .withExposedService("redis", 6379) + .withExposedService("feast", 8080) + .waitingFor("feast", Wait.forListeningPort()); + environment.start(); + } + + @AfterAll + static void globalTeardown() { + environment.stop(); + } + + @BeforeEach + public void envSetUp() throws Exception { + AbstractModule appPropertiesModule = + new AbstractModule() { + @Override + protected void configure() { + bind(OnlineServingGrpcServiceV2.class); + } + + @Provides + ApplicationProperties applicationProperties() { + final ApplicationProperties p = new ApplicationProperties(); + + ApplicationProperties.GrpcServer grpcServer = new ApplicationProperties.GrpcServer(); + ApplicationProperties.Server server = new ApplicationProperties.Server(); + server.setPort(serverPort); + grpcServer.setServer(server); + p.setGrpc(grpcServer); + + final ApplicationProperties.FeastProperties feastProperties = createFeastProperties(); + feastProperties.setAwsRegion("us-east-1"); + p.setFeast(feastProperties); + + final ApplicationProperties.TracingProperties tracingProperties = + new ApplicationProperties.TracingProperties(); + feastProperties.setTracing(tracingProperties); + + tracingProperties.setEnabled(false); + return p; + } + }; + + Module overrideConfig = registryConfig(); + Module registryConfig; + if (overrideConfig != null) { + registryConfig = Modules.override(new RegistryConfig()).with(registryConfig()); + } else { + registryConfig = new RegistryConfig(); + } + + injector = + Guice.createInjector( + new ServingServiceConfigV2(), + registryConfig, + new InstrumentationConfig(), + appPropertiesModule, + new ServerModule()); + + server = injector.getInstance(Server.class); + server.start(); + + channel = ManagedChannelBuilder.forAddress("localhost", serverPort).usePlaintext().build(); + + servingStub = + ServingServiceGrpc.newBlockingStub(channel) + .withDeadlineAfter(5, TimeUnit.SECONDS) + .withWaitForReady(); + } + + @AfterEach + public void envTeardown() throws Exception { + // assume channel and server are not null + channel.shutdown(); + server.shutdown(); + // fail the test if cannot gracefully shutdown + try { + assert channel.awaitTermination(5, TimeUnit.SECONDS) + : "channel cannot be gracefully shutdown"; + assert server.awaitTermination(5, TimeUnit.SECONDS) : "server cannot be gracefully shutdown"; + } finally { + channel.shutdownNow(); + server.shutdownNow(); + } + + server = null; + channel = null; + servingStub = null; + } + + abstract ApplicationProperties.FeastProperties createFeastProperties(); + + AbstractModule registryConfig() { + return null; + } + + private static int getFreePort() { + ServerSocket serverSocket; + try { + serverSocket = new ServerSocket(0); + } catch (IOException e) { + throw new RuntimeException("Couldn't allocate port"); + } + + assertThat(serverSocket, is(notNullValue())); + assertThat(serverSocket.getLocalPort(), greaterThan(0)); + + return serverSocket.getLocalPort(); + } +} diff --git a/java/serving/src/test/java/feast/serving/it/ServingRedisGSRegistryIT.java b/java/serving/src/test/java/feast/serving/it/ServingRedisGSRegistryIT.java index 36e0eebe8d4..78871cd45c2 100644 --- a/java/serving/src/test/java/feast/serving/it/ServingRedisGSRegistryIT.java +++ b/java/serving/src/test/java/feast/serving/it/ServingRedisGSRegistryIT.java @@ -20,8 +20,6 @@ import com.google.cloud.storage.*; import com.google.cloud.storage.testing.RemoteStorageHelper; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableMap; import feast.proto.core.RegistryProto; import feast.serving.config.ApplicationProperties; import java.util.concurrent.ExecutionException; @@ -29,7 +27,7 @@ import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; -public class ServingRedisGSRegistryIT extends ServingBase { +public class ServingRedisGSRegistryIT extends ServingBaseTests { static Storage storage = RemoteStorageHelper.create() .getOptions() @@ -64,16 +62,9 @@ static void tearDown() throws ExecutionException, InterruptedException { @Override ApplicationProperties.FeastProperties createFeastProperties() { final ApplicationProperties.FeastProperties feastProperties = - new ApplicationProperties.FeastProperties(); + TestUtils.createBasicFeastProperties( + environment.getServiceHost("redis", 6379), environment.getServicePort("redis", 6379)); feastProperties.setRegistry(blobId.toGsUtilUri()); - feastProperties.setRegistryRefreshInterval(1); - - feastProperties.setActiveStore("online"); - - feastProperties.setStores( - ImmutableList.of( - new ApplicationProperties.Store( - "online", "REDIS", ImmutableMap.of("host", "localhost", "port", "6379")))); return feastProperties; } diff --git a/java/serving/src/test/java/feast/serving/it/ServingRedisLocalRegistryIT.java b/java/serving/src/test/java/feast/serving/it/ServingRedisLocalRegistryIT.java index 53fda39466e..c83d8dbbf1c 100644 --- a/java/serving/src/test/java/feast/serving/it/ServingRedisLocalRegistryIT.java +++ b/java/serving/src/test/java/feast/serving/it/ServingRedisLocalRegistryIT.java @@ -16,27 +16,14 @@ */ package feast.serving.it; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableMap; import feast.proto.core.RegistryProto; import feast.serving.config.ApplicationProperties; -public class ServingRedisLocalRegistryIT extends ServingBase { +public class ServingRedisLocalRegistryIT extends ServingBaseTests { @Override ApplicationProperties.FeastProperties createFeastProperties() { - final ApplicationProperties.FeastProperties feastProperties = - new ApplicationProperties.FeastProperties(); - feastProperties.setRegistry("src/test/resources/docker-compose/feast10/registry.db"); - feastProperties.setRegistryRefreshInterval(1); - - feastProperties.setActiveStore("online"); - - feastProperties.setStores( - ImmutableList.of( - new ApplicationProperties.Store( - "online", "REDIS", ImmutableMap.of("host", "localhost", "port", "6379")))); - - return feastProperties; + return TestUtils.createBasicFeastProperties( + environment.getServiceHost("redis", 6379), environment.getServicePort("redis", 6379)); } @Override diff --git a/java/serving/src/test/java/feast/serving/it/ServingRedisS3RegistryIT.java b/java/serving/src/test/java/feast/serving/it/ServingRedisS3RegistryIT.java index 648fdaa5b59..d67fbf26215 100644 --- a/java/serving/src/test/java/feast/serving/it/ServingRedisS3RegistryIT.java +++ b/java/serving/src/test/java/feast/serving/it/ServingRedisS3RegistryIT.java @@ -21,8 +21,6 @@ import com.amazonaws.services.s3.AmazonS3; import com.amazonaws.services.s3.AmazonS3ClientBuilder; import com.amazonaws.services.s3.model.ObjectMetadata; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableMap; import com.google.inject.AbstractModule; import com.google.inject.Provides; import feast.proto.core.RegistryProto; @@ -31,7 +29,7 @@ import org.junit.jupiter.api.BeforeAll; import org.testcontainers.junit.jupiter.Container; -public class ServingRedisS3RegistryIT extends ServingBase { +public class ServingRedisS3RegistryIT extends ServingBaseTests { @Container static final S3MockContainer s3Mock = new S3MockContainer("2.2.3"); private static AmazonS3 createClient() { @@ -64,16 +62,9 @@ static void setUp() { @Override ApplicationProperties.FeastProperties createFeastProperties() { final ApplicationProperties.FeastProperties feastProperties = - new ApplicationProperties.FeastProperties(); + TestUtils.createBasicFeastProperties( + environment.getServiceHost("redis", 6379), environment.getServicePort("redis", 6379)); feastProperties.setRegistry("s3://test-bucket/registry.db"); - feastProperties.setRegistryRefreshInterval(1); - - feastProperties.setActiveStore("online"); - - feastProperties.setStores( - ImmutableList.of( - new ApplicationProperties.Store( - "online", "REDIS", ImmutableMap.of("host", "localhost", "port", "6379")))); return feastProperties; } diff --git a/java/serving/src/test/java/feast/serving/it/TestUtils.java b/java/serving/src/test/java/feast/serving/it/TestUtils.java index fb88b2fb372..867fa4afb06 100644 --- a/java/serving/src/test/java/feast/serving/it/TestUtils.java +++ b/java/serving/src/test/java/feast/serving/it/TestUtils.java @@ -16,9 +16,13 @@ */ package feast.serving.it; -import feast.proto.serving.ServingAPIProto.FeatureReferenceV2; -import feast.proto.serving.ServingAPIProto.GetOnlineFeaturesRequestV2; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import feast.proto.serving.ServingAPIProto; +import feast.proto.serving.ServingAPIProto.GetOnlineFeaturesRequest; import feast.proto.serving.ServingServiceGrpc; +import feast.proto.types.ValueProto; +import feast.serving.config.ApplicationProperties; import io.grpc.Channel; import io.grpc.ManagedChannelBuilder; import java.util.*; @@ -32,14 +36,55 @@ public static ServingServiceGrpc.ServingServiceBlockingStub getServingServiceStu return ServingServiceGrpc.newBlockingStub(secureChannel); } - public static GetOnlineFeaturesRequestV2 createOnlineFeatureRequest( - String projectName, - List featureReferences, - List entityRows) { - return GetOnlineFeaturesRequestV2.newBuilder() - .setProject(projectName) - .addAllFeatures(featureReferences) - .addAllEntityRows(entityRows) + public static GetOnlineFeaturesRequest createOnlineFeatureRequest( + List featureReferences, Map entityRows) { + return createOnlineFeatureRequest(featureReferences, entityRows, new HashMap<>()); + } + + public static GetOnlineFeaturesRequest createOnlineFeatureRequest( + List featureReferences, + Map entityRows, + Map requestContext) { + return GetOnlineFeaturesRequest.newBuilder() + .setFeatures(ServingAPIProto.FeatureList.newBuilder().addAllVal(featureReferences)) + .putAllEntities(entityRows) + .putAllRequestContext(requestContext) + .build(); + } + + public static GetOnlineFeaturesRequest createOnlineFeatureRequest( + String featureService, Map entityRows) { + return createOnlineFeatureRequest(featureService, entityRows, new HashMap<>()); + } + + public static GetOnlineFeaturesRequest createOnlineFeatureRequest( + String featureService, + Map entityRows, + Map requestContext) { + return GetOnlineFeaturesRequest.newBuilder() + .setFeatureService(featureService) + .putAllEntities(entityRows) + .putAllRequestContext(requestContext) .build(); } + + public static ApplicationProperties.FeastProperties createBasicFeastProperties( + String redisHost, Integer redisPort) { + final ApplicationProperties.FeastProperties feastProperties = + new ApplicationProperties.FeastProperties(); + feastProperties.setRegistry("src/test/resources/docker-compose/feast10/registry.db"); + feastProperties.setRegistryRefreshInterval(1); + + feastProperties.setActiveStore("online"); + feastProperties.setProject("feast_project"); + + feastProperties.setStores( + ImmutableList.of( + new ApplicationProperties.Store( + "online", + "REDIS", + ImmutableMap.of("host", redisHost, "port", redisPort.toString())))); + + return feastProperties; + } } diff --git a/java/serving/src/test/java/feast/serving/it/TransformationServiceIT.java b/java/serving/src/test/java/feast/serving/it/TransformationServiceIT.java new file mode 100644 index 00000000000..102d8515285 --- /dev/null +++ b/java/serving/src/test/java/feast/serving/it/TransformationServiceIT.java @@ -0,0 +1,103 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright 2018-2022 The Feast Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package feast.serving.it; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Lists; +import feast.proto.serving.ServingAPIProto; +import feast.proto.types.ValueProto; +import feast.serving.config.ApplicationProperties; +import feast.serving.util.DataGenerator; +import java.util.List; +import java.util.Map; +import org.junit.jupiter.api.Test; + +public class TransformationServiceIT extends ServingEnvironment { + @Override + ApplicationProperties.FeastProperties createFeastProperties() { + ApplicationProperties.FeastProperties feastProperties = + TestUtils.createBasicFeastProperties( + environment.getServiceHost("redis", 6379), environment.getServicePort("redis", 6379)); + feastProperties.setTransformationServiceEndpoint( + String.format( + "%s:%d", + environment.getServiceHost("feast", 8080), environment.getServicePort("feast", 8080))); + return feastProperties; + } + + private ServingAPIProto.GetOnlineFeaturesRequest buildOnlineRequest( + int driverId, boolean transformedFeaturesOnly) { + Map entityRows = + ImmutableMap.of( + "driver_id", + ValueProto.RepeatedValue.newBuilder() + .addVal(DataGenerator.createInt64Value(driverId)) + .build()); + + Map requestContext = + ImmutableMap.of( + "val_to_add", + ValueProto.RepeatedValue.newBuilder().addVal(DataGenerator.createInt64Value(3)).build(), + "val_to_add_2", + ValueProto.RepeatedValue.newBuilder() + .addVal(DataGenerator.createInt64Value(5)) + .build()); + + List featureReferences = + Lists.newArrayList( + "transformed_conv_rate:conv_rate_plus_val1", + "transformed_conv_rate:conv_rate_plus_val2"); + + if (!transformedFeaturesOnly) { + featureReferences.add("driver_hourly_stats:conv_rate"); + } + + return TestUtils.createOnlineFeatureRequest(featureReferences, entityRows, requestContext); + } + + @Test + public void shouldCalculateOnDemandFeatures() { + ServingAPIProto.GetOnlineFeaturesResponse featureResponse = + servingStub.getOnlineFeatures(buildOnlineRequest(1005, false)); + + for (int featureIdx : List.of(0, 1, 2)) { + assertEquals( + List.of(ServingAPIProto.FieldStatus.PRESENT), + featureResponse.getResults(featureIdx).getStatusesList()); + } + + // conv_rate + assertEquals(0.5, featureResponse.getResults(0).getValues(0).getDoubleVal(), 0.0001); + // conv_rate + val_to_add (3.0) + assertEquals(3.5, featureResponse.getResults(1).getValues(0).getDoubleVal(), 0.0001); + // conv_rate + val_to_add_2 (5.0) + assertEquals(5.5, featureResponse.getResults(2).getValues(0).getDoubleVal(), 0.0001); + } + + @Test + public void shouldCorrectlyFetchDependantFeatures() { + ServingAPIProto.GetOnlineFeaturesResponse featureResponse = + servingStub.getOnlineFeatures(buildOnlineRequest(1005, true)); + + // conv_rate + val_to_add (3.0) + assertEquals(3.5, featureResponse.getResults(0).getValues(0).getDoubleVal(), 0.0001); + // conv_rate + val_to_add_2 (5.0) + assertEquals(5.5, featureResponse.getResults(1).getValues(0).getDoubleVal(), 0.0001); + } +} diff --git a/java/serving/src/test/java/feast/serving/service/OnlineServingServiceTest.java b/java/serving/src/test/java/feast/serving/service/OnlineServingServiceTest.java index c43e3218c7f..64d2e20c9b3 100644 --- a/java/serving/src/test/java/feast/serving/service/OnlineServingServiceTest.java +++ b/java/serving/src/test/java/feast/serving/service/OnlineServingServiceTest.java @@ -29,10 +29,8 @@ import feast.proto.core.FeatureProto; import feast.proto.core.FeatureViewProto; import feast.proto.serving.ServingAPIProto; -import feast.proto.serving.ServingAPIProto.GetOnlineFeaturesRequestV2; +import feast.proto.serving.ServingAPIProto.FieldStatus; import feast.proto.serving.ServingAPIProto.GetOnlineFeaturesResponse; -import feast.proto.serving.ServingAPIProto.GetOnlineFeaturesResponse.FieldStatus; -import feast.proto.serving.ServingAPIProto.GetOnlineFeaturesResponse.FieldValues; import feast.proto.types.ValueProto; import feast.serving.registry.Registry; import feast.serving.registry.RegistryRepository; @@ -42,8 +40,9 @@ import io.opentracing.Tracer; import io.opentracing.Tracer.SpanBuilder; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; -import java.util.Map; +import java.util.stream.Collectors; import org.junit.Before; import org.junit.Test; import org.mockito.ArgumentMatchers; @@ -62,6 +61,8 @@ public class OnlineServingServiceTest { List mockedFeatureRows; List featureSpecs; + Timestamp now = Timestamp.newBuilder().setSeconds(System.currentTimeMillis() / 1000).build(); + @Before public void setUp() { initMocks(this); @@ -71,56 +72,57 @@ public void setUp() { OnlineTransformationService onlineTransformationService = new OnlineTransformationService(transformationServiceEndpoint, registryRepo); onlineServingServiceV2 = - new OnlineServingServiceV2(retrieverV2, tracer, registryRepo, onlineTransformationService); + new OnlineServingServiceV2( + retrieverV2, tracer, registryRepo, onlineTransformationService, "feast_project"); mockedFeatureRows = new ArrayList<>(); mockedFeatureRows.add( new ProtoFeature( ServingAPIProto.FeatureReferenceV2.newBuilder() - .setFeatureTable("featuretable_1") - .setName("feature_1") + .setFeatureViewName("featureview_1") + .setFeatureName("feature_1") .build(), - Timestamp.newBuilder().setSeconds(100).build(), + now, createStrValue("1"))); mockedFeatureRows.add( new ProtoFeature( ServingAPIProto.FeatureReferenceV2.newBuilder() - .setFeatureTable("featuretable_1") - .setName("feature_2") + .setFeatureViewName("featureview_1") + .setFeatureName("feature_2") .build(), - Timestamp.newBuilder().setSeconds(100).build(), + now, createStrValue("2"))); mockedFeatureRows.add( new ProtoFeature( ServingAPIProto.FeatureReferenceV2.newBuilder() - .setFeatureTable("featuretable_1") - .setName("feature_1") + .setFeatureViewName("featureview_1") + .setFeatureName("feature_1") .build(), - Timestamp.newBuilder().setSeconds(100).build(), + now, createStrValue("3"))); mockedFeatureRows.add( new ProtoFeature( ServingAPIProto.FeatureReferenceV2.newBuilder() - .setFeatureTable("featuretable_1") - .setName("feature_2") + .setFeatureViewName("featureview_1") + .setFeatureName("feature_2") .build(), - Timestamp.newBuilder().setSeconds(100).build(), + now, createStrValue("4"))); mockedFeatureRows.add( new ProtoFeature( ServingAPIProto.FeatureReferenceV2.newBuilder() - .setFeatureTable("featuretable_1") - .setName("feature_3") + .setFeatureViewName("featureview_1") + .setFeatureName("feature_3") .build(), - Timestamp.newBuilder().setSeconds(100).build(), + now, createStrValue("5"))); mockedFeatureRows.add( new ProtoFeature( ServingAPIProto.FeatureReferenceV2.newBuilder() - .setFeatureTable("featuretable_1") - .setName("feature_1") + .setFeatureViewName("featureview_1") + .setFeatureName("feature_1") .build(), - Timestamp.newBuilder().setSeconds(50).build(), + Timestamp.newBuilder().setSeconds(1).build(), createStrValue("6"))); featureSpecs = new ArrayList<>(); @@ -141,66 +143,63 @@ public void shouldReturnResponseWithValuesAndMetadataIfKeysPresent() { String projectName = "default"; ServingAPIProto.FeatureReferenceV2 featureReference1 = ServingAPIProto.FeatureReferenceV2.newBuilder() - .setFeatureTable("featuretable_1") - .setName("feature_1") + .setFeatureViewName("featureview_1") + .setFeatureName("feature_1") .build(); ServingAPIProto.FeatureReferenceV2 featureReference2 = ServingAPIProto.FeatureReferenceV2.newBuilder() - .setFeatureTable("featuretable_1") - .setName("feature_2") + .setFeatureViewName("featureview_1") + .setFeatureName("feature_2") .build(); List featureReferences = List.of(featureReference1, featureReference2); - GetOnlineFeaturesRequestV2 request = getOnlineFeaturesRequestV2(projectName, featureReferences); + ServingAPIProto.GetOnlineFeaturesRequest request = getOnlineFeaturesRequest(featureReferences); - List> featureRows = + List> featureRows = List.of( - ImmutableMap.of( - mockedFeatureRows.get(0).getFeatureReference(), mockedFeatureRows.get(0), - mockedFeatureRows.get(1).getFeatureReference(), mockedFeatureRows.get(1)), - ImmutableMap.of( - mockedFeatureRows.get(2).getFeatureReference(), mockedFeatureRows.get(2), - mockedFeatureRows.get(3).getFeatureReference(), mockedFeatureRows.get(3))); + List.of(mockedFeatureRows.get(0), mockedFeatureRows.get(1)), + List.of(mockedFeatureRows.get(2), mockedFeatureRows.get(3))); - when(retrieverV2.getOnlineFeatures(any(), any(), any(), any())).thenReturn(featureRows); - when(registry.getFeatureViewSpec(any(), any())).thenReturn(getFeatureViewSpec()); - when(registry.getFeatureSpec(projectName, mockedFeatureRows.get(0).getFeatureReference())) + when(retrieverV2.getOnlineFeatures(any(), any(), any())).thenReturn(featureRows); + when(registry.getFeatureViewSpec(any())).thenReturn(getFeatureViewSpec()); + when(registry.getFeatureSpec(mockedFeatureRows.get(0).getFeatureReference())) .thenReturn(featureSpecs.get(0)); - when(registry.getFeatureSpec(projectName, mockedFeatureRows.get(1).getFeatureReference())) + when(registry.getFeatureSpec(mockedFeatureRows.get(1).getFeatureReference())) .thenReturn(featureSpecs.get(1)); - when(registry.getFeatureSpec(projectName, mockedFeatureRows.get(2).getFeatureReference())) + when(registry.getFeatureSpec(mockedFeatureRows.get(2).getFeatureReference())) .thenReturn(featureSpecs.get(0)); - when(registry.getFeatureSpec(projectName, mockedFeatureRows.get(3).getFeatureReference())) + when(registry.getFeatureSpec(mockedFeatureRows.get(3).getFeatureReference())) .thenReturn(featureSpecs.get(1)); when(tracer.buildSpan(ArgumentMatchers.any())).thenReturn(Mockito.mock(SpanBuilder.class)); GetOnlineFeaturesResponse expected = GetOnlineFeaturesResponse.newBuilder() - .addFieldValues( - FieldValues.newBuilder() - .putFields("entity1", createInt64Value(1)) - .putStatuses("entity1", FieldStatus.PRESENT) - .putFields("entity2", createStrValue("a")) - .putStatuses("entity2", FieldStatus.PRESENT) - .putFields("featuretable_1:feature_1", createStrValue("1")) - .putStatuses("featuretable_1:feature_1", FieldStatus.PRESENT) - .putFields("featuretable_1:feature_2", createStrValue("2")) - .putStatuses("featuretable_1:feature_2", FieldStatus.PRESENT) - .build()) - .addFieldValues( - FieldValues.newBuilder() - .putFields("entity1", createInt64Value(2)) - .putStatuses("entity1", FieldStatus.PRESENT) - .putFields("entity2", createStrValue("b")) - .putStatuses("entity2", FieldStatus.PRESENT) - .putFields("featuretable_1:feature_1", createStrValue("3")) - .putStatuses("featuretable_1:feature_1", FieldStatus.PRESENT) - .putFields("featuretable_1:feature_2", createStrValue("4")) - .putStatuses("featuretable_1:feature_2", FieldStatus.PRESENT) - .build()) + .addResults( + GetOnlineFeaturesResponse.FeatureVector.newBuilder() + .addValues(createStrValue("1")) + .addValues(createStrValue("3")) + .addStatuses(FieldStatus.PRESENT) + .addStatuses(FieldStatus.PRESENT) + .addEventTimestamps(now) + .addEventTimestamps(now)) + .addResults( + GetOnlineFeaturesResponse.FeatureVector.newBuilder() + .addValues(createStrValue("2")) + .addValues(createStrValue("4")) + .addStatuses(FieldStatus.PRESENT) + .addStatuses(FieldStatus.PRESENT) + .addEventTimestamps(now) + .addEventTimestamps(now)) + .setMetadata( + ServingAPIProto.GetOnlineFeaturesResponseMetadata.newBuilder() + .setFeatureNames( + ServingAPIProto.FeatureList.newBuilder() + .addVal("featureview_1:feature_1") + .addVal("featureview_1:feature_2"))) .build(); - GetOnlineFeaturesResponse actual = onlineServingServiceV2.getOnlineFeatures(request); + ServingAPIProto.GetOnlineFeaturesResponse actual = + onlineServingServiceV2.getOnlineFeatures(request); assertThat(actual, equalTo(expected)); } @@ -209,17 +208,17 @@ public void shouldReturnResponseWithUnsetValuesAndMetadataIfKeysNotPresent() { String projectName = "default"; ServingAPIProto.FeatureReferenceV2 featureReference1 = ServingAPIProto.FeatureReferenceV2.newBuilder() - .setFeatureTable("featuretable_1") - .setName("feature_1") + .setFeatureViewName("featureview_1") + .setFeatureName("feature_1") .build(); ServingAPIProto.FeatureReferenceV2 featureReference2 = ServingAPIProto.FeatureReferenceV2.newBuilder() - .setFeatureTable("featuretable_1") - .setName("feature_2") + .setFeatureViewName("featureview_1") + .setFeatureName("feature_2") .build(); List featureReferences = List.of(featureReference1, featureReference2); - GetOnlineFeaturesRequestV2 request = getOnlineFeaturesRequestV2(projectName, featureReferences); + ServingAPIProto.GetOnlineFeaturesRequest request = getOnlineFeaturesRequest(featureReferences); List entityKeyList1 = new ArrayList<>(); List entityKeyList2 = new ArrayList<>(); @@ -227,47 +226,44 @@ public void shouldReturnResponseWithUnsetValuesAndMetadataIfKeysNotPresent() { entityKeyList1.add(mockedFeatureRows.get(1)); entityKeyList2.add(mockedFeatureRows.get(4)); - List> featureRows = + List> featureRows = List.of( - ImmutableMap.of( - mockedFeatureRows.get(0).getFeatureReference(), mockedFeatureRows.get(0), - mockedFeatureRows.get(1).getFeatureReference(), mockedFeatureRows.get(1)), - ImmutableMap.of( - mockedFeatureRows.get(4).getFeatureReference(), mockedFeatureRows.get(4))); + List.of(mockedFeatureRows.get(0), mockedFeatureRows.get(1)), + Arrays.asList(null, mockedFeatureRows.get(4))); - when(retrieverV2.getOnlineFeatures(any(), any(), any(), any())).thenReturn(featureRows); - when(registry.getFeatureViewSpec(any(), any())).thenReturn(getFeatureViewSpec()); - when(registry.getFeatureSpec(projectName, mockedFeatureRows.get(0).getFeatureReference())) + when(retrieverV2.getOnlineFeatures(any(), any(), any())).thenReturn(featureRows); + when(registry.getFeatureViewSpec(any())).thenReturn(getFeatureViewSpec()); + when(registry.getFeatureSpec(mockedFeatureRows.get(0).getFeatureReference())) .thenReturn(featureSpecs.get(0)); - when(registry.getFeatureSpec(projectName, mockedFeatureRows.get(1).getFeatureReference())) + when(registry.getFeatureSpec(mockedFeatureRows.get(1).getFeatureReference())) .thenReturn(featureSpecs.get(1)); when(tracer.buildSpan(ArgumentMatchers.any())).thenReturn(Mockito.mock(SpanBuilder.class)); GetOnlineFeaturesResponse expected = GetOnlineFeaturesResponse.newBuilder() - .addFieldValues( - FieldValues.newBuilder() - .putFields("entity1", createInt64Value(1)) - .putStatuses("entity1", FieldStatus.PRESENT) - .putFields("entity2", createStrValue("a")) - .putStatuses("entity2", FieldStatus.PRESENT) - .putFields("featuretable_1:feature_1", createStrValue("1")) - .putStatuses("featuretable_1:feature_1", FieldStatus.PRESENT) - .putFields("featuretable_1:feature_2", createStrValue("2")) - .putStatuses("featuretable_1:feature_2", FieldStatus.PRESENT) - .build()) - .addFieldValues( - FieldValues.newBuilder() - .putFields("entity1", createInt64Value(2)) - .putStatuses("entity1", FieldStatus.PRESENT) - .putFields("entity2", createStrValue("b")) - .putStatuses("entity2", FieldStatus.PRESENT) - .putFields("featuretable_1:feature_1", createEmptyValue()) - .putStatuses("featuretable_1:feature_1", FieldStatus.NOT_FOUND) - .putFields("featuretable_1:feature_2", createEmptyValue()) - .putStatuses("featuretable_1:feature_2", FieldStatus.NOT_FOUND) - .build()) + .addResults( + GetOnlineFeaturesResponse.FeatureVector.newBuilder() + .addValues(createStrValue("1")) + .addValues(createEmptyValue()) + .addStatuses(FieldStatus.PRESENT) + .addStatuses(FieldStatus.NOT_FOUND) + .addEventTimestamps(now) + .addEventTimestamps(Timestamp.newBuilder().build())) + .addResults( + GetOnlineFeaturesResponse.FeatureVector.newBuilder() + .addValues(createStrValue("2")) + .addValues(createStrValue("5")) + .addStatuses(FieldStatus.PRESENT) + .addStatuses(FieldStatus.PRESENT) + .addEventTimestamps(now) + .addEventTimestamps(now)) + .setMetadata( + ServingAPIProto.GetOnlineFeaturesResponseMetadata.newBuilder() + .setFeatureNames( + ServingAPIProto.FeatureList.newBuilder() + .addVal("featureview_1:feature_1") + .addVal("featureview_1:feature_2"))) .build(); GetOnlineFeaturesResponse actual = onlineServingServiceV2.getOnlineFeatures(request); assertThat(actual, equalTo(expected)); @@ -278,32 +274,28 @@ public void shouldReturnResponseWithValuesAndMetadataIfMaxAgeIsExceeded() { String projectName = "default"; ServingAPIProto.FeatureReferenceV2 featureReference1 = ServingAPIProto.FeatureReferenceV2.newBuilder() - .setFeatureTable("featuretable_1") - .setName("feature_1") + .setFeatureViewName("featureview_1") + .setFeatureName("feature_1") .build(); ServingAPIProto.FeatureReferenceV2 featureReference2 = ServingAPIProto.FeatureReferenceV2.newBuilder() - .setFeatureTable("featuretable_1") - .setName("feature_2") + .setFeatureViewName("featureview_1") + .setFeatureName("feature_2") .build(); List featureReferences = List.of(featureReference1, featureReference2); - GetOnlineFeaturesRequestV2 request = getOnlineFeaturesRequestV2(projectName, featureReferences); + ServingAPIProto.GetOnlineFeaturesRequest request = getOnlineFeaturesRequest(featureReferences); - List> featureRows = + List> featureRows = List.of( - ImmutableMap.of( - mockedFeatureRows.get(5).getFeatureReference(), mockedFeatureRows.get(5), - mockedFeatureRows.get(1).getFeatureReference(), mockedFeatureRows.get(1)), - ImmutableMap.of( - mockedFeatureRows.get(5).getFeatureReference(), mockedFeatureRows.get(5), - mockedFeatureRows.get(1).getFeatureReference(), mockedFeatureRows.get(1))); + List.of(mockedFeatureRows.get(5), mockedFeatureRows.get(1)), + List.of(mockedFeatureRows.get(5), mockedFeatureRows.get(1))); - when(retrieverV2.getOnlineFeatures(any(), any(), any(), any())).thenReturn(featureRows); - when(registry.getFeatureViewSpec(any(), any())) + when(retrieverV2.getOnlineFeatures(any(), any(), any())).thenReturn(featureRows); + when(registry.getFeatureViewSpec(any())) .thenReturn( FeatureViewProto.FeatureViewSpec.newBuilder() - .setName("featuretable_1") + .setName("featureview_1") .addEntities("entity1") .addEntities("entity2") .addFeatures( @@ -316,39 +308,39 @@ public void shouldReturnResponseWithValuesAndMetadataIfMaxAgeIsExceeded() { .setName("feature_2") .setValueType(ValueProto.ValueType.Enum.STRING) .build()) - .setTtl(Duration.newBuilder().setSeconds(1)) + .setTtl(Duration.newBuilder().setSeconds(3600)) .build()); - when(registry.getFeatureSpec(projectName, mockedFeatureRows.get(1).getFeatureReference())) + when(registry.getFeatureSpec(mockedFeatureRows.get(1).getFeatureReference())) .thenReturn(featureSpecs.get(1)); - when(registry.getFeatureSpec(projectName, mockedFeatureRows.get(5).getFeatureReference())) + when(registry.getFeatureSpec(mockedFeatureRows.get(5).getFeatureReference())) .thenReturn(featureSpecs.get(0)); when(tracer.buildSpan(ArgumentMatchers.any())).thenReturn(Mockito.mock(SpanBuilder.class)); GetOnlineFeaturesResponse expected = GetOnlineFeaturesResponse.newBuilder() - .addFieldValues( - FieldValues.newBuilder() - .putFields("entity1", createInt64Value(1)) - .putStatuses("entity1", FieldStatus.PRESENT) - .putFields("entity2", createStrValue("a")) - .putStatuses("entity2", FieldStatus.PRESENT) - .putFields("featuretable_1:feature_1", createStrValue("6")) - .putStatuses("featuretable_1:feature_1", FieldStatus.OUTSIDE_MAX_AGE) - .putFields("featuretable_1:feature_2", createStrValue("2")) - .putStatuses("featuretable_1:feature_2", FieldStatus.PRESENT) - .build()) - .addFieldValues( - FieldValues.newBuilder() - .putFields("entity1", createInt64Value(2)) - .putStatuses("entity1", FieldStatus.PRESENT) - .putFields("entity2", createStrValue("b")) - .putStatuses("entity2", FieldStatus.PRESENT) - .putFields("featuretable_1:feature_1", createStrValue("6")) - .putStatuses("featuretable_1:feature_1", FieldStatus.OUTSIDE_MAX_AGE) - .putFields("featuretable_1:feature_2", createStrValue("2")) - .putStatuses("featuretable_1:feature_2", FieldStatus.PRESENT) - .build()) + .addResults( + GetOnlineFeaturesResponse.FeatureVector.newBuilder() + .addValues(createStrValue("6")) + .addValues(createStrValue("6")) + .addStatuses(FieldStatus.OUTSIDE_MAX_AGE) + .addStatuses(FieldStatus.OUTSIDE_MAX_AGE) + .addEventTimestamps(Timestamp.newBuilder().setSeconds(1).build()) + .addEventTimestamps(Timestamp.newBuilder().setSeconds(1).build())) + .addResults( + GetOnlineFeaturesResponse.FeatureVector.newBuilder() + .addValues(createStrValue("2")) + .addValues(createStrValue("2")) + .addStatuses(FieldStatus.PRESENT) + .addStatuses(FieldStatus.PRESENT) + .addEventTimestamps(now) + .addEventTimestamps(now)) + .setMetadata( + ServingAPIProto.GetOnlineFeaturesResponseMetadata.newBuilder() + .setFeatureNames( + ServingAPIProto.FeatureList.newBuilder() + .addVal("featureview_1:feature_1") + .addVal("featureview_1:feature_2"))) .build(); GetOnlineFeaturesResponse actual = onlineServingServiceV2.getOnlineFeatures(request); assertThat(actual, equalTo(expected)); @@ -356,7 +348,7 @@ public void shouldReturnResponseWithValuesAndMetadataIfMaxAgeIsExceeded() { private FeatureViewProto.FeatureViewSpec getFeatureViewSpec() { return FeatureViewProto.FeatureViewSpec.newBuilder() - .setName("featuretable_1") + .setName("featureview_1") .addEntities("entity1") .addEntities("entity2") .addFeatures( @@ -373,31 +365,26 @@ private FeatureViewProto.FeatureViewSpec getFeatureViewSpec() { .build(); } - private GetOnlineFeaturesRequestV2 getOnlineFeaturesRequestV2( - String projectName, List featureReferences) { - return GetOnlineFeaturesRequestV2.newBuilder() - .setProject(projectName) - .addAllFeatures(featureReferences) - .addEntityRows( - GetOnlineFeaturesRequestV2.EntityRow.newBuilder() - .setTimestamp(Timestamp.newBuilder().setSeconds(100)) - .putFields("entity1", createInt64Value(1)) - .putFields("entity2", createStrValue("a"))) - .addEntityRows( - GetOnlineFeaturesRequestV2.EntityRow.newBuilder() - .setTimestamp(Timestamp.newBuilder().setSeconds(100)) - .putFields("entity1", createInt64Value(2)) - .putFields("entity2", createStrValue("b"))) - .addFeatures( - ServingAPIProto.FeatureReferenceV2.newBuilder() - .setFeatureTable("featuretable_1") - .setName("feature_1") - .build()) - .addFeatures( - ServingAPIProto.FeatureReferenceV2.newBuilder() - .setFeatureTable("featuretable_1") - .setName("feature_2") + private ServingAPIProto.GetOnlineFeaturesRequest getOnlineFeaturesRequest( + List featureReferences) { + return ServingAPIProto.GetOnlineFeaturesRequest.newBuilder() + .setFeatures( + ServingAPIProto.FeatureList.newBuilder() + .addAllVal( + featureReferences.stream() + .map(feast.common.models.Feature::getFeatureReference) + .collect(Collectors.toList())) .build()) + .putAllEntities( + ImmutableMap.of( + "entity1", + ValueProto.RepeatedValue.newBuilder() + .addAllVal(List.of(createInt64Value(1), createInt64Value(2))) + .build(), + "entity2", + ValueProto.RepeatedValue.newBuilder() + .addAllVal(List.of(createStrValue("a"), createStrValue("b"))) + .build())) .build(); } } diff --git a/java/serving/src/test/java/feast/serving/util/DataGenerator.java b/java/serving/src/test/java/feast/serving/util/DataGenerator.java index ab537fa6f9b..d53632d0d64 100644 --- a/java/serving/src/test/java/feast/serving/util/DataGenerator.java +++ b/java/serving/src/test/java/feast/serving/util/DataGenerator.java @@ -260,8 +260,8 @@ public static ValueProto.Value createInt64Value(long value) { public static ServingAPIProto.FeatureReferenceV2 createFeatureReference( String featureTableName, String featureName) { return ServingAPIProto.FeatureReferenceV2.newBuilder() - .setFeatureTable(featureTableName) - .setName(featureName) + .setFeatureViewName(featureTableName) + .setFeatureName(featureName) .build(); } diff --git a/java/serving/src/test/java/feast/serving/util/RequestHelperTest.java b/java/serving/src/test/java/feast/serving/util/RequestHelperTest.java index 140d46cd569..fc19dbb02e0 100644 --- a/java/serving/src/test/java/feast/serving/util/RequestHelperTest.java +++ b/java/serving/src/test/java/feast/serving/util/RequestHelperTest.java @@ -16,39 +16,40 @@ */ package feast.serving.util; -import feast.proto.serving.ServingAPIProto.FeatureReferenceV2; -import feast.proto.serving.ServingAPIProto.GetOnlineFeaturesRequestV2; +import feast.proto.serving.ServingAPIProto; import org.junit.Test; public class RequestHelperTest { @Test(expected = IllegalArgumentException.class) public void shouldErrorIfEntityRowEmpty() { - FeatureReferenceV2 featureReference = - FeatureReferenceV2.newBuilder() - .setFeatureTable("featuretablename") - .setName("featurename") + + ServingAPIProto.GetOnlineFeaturesRequest getOnlineFeaturesRequest = + ServingAPIProto.GetOnlineFeaturesRequest.newBuilder() + .setFeatures( + ServingAPIProto.FeatureList.newBuilder().addVal("view:featurename").build()) .build(); - GetOnlineFeaturesRequestV2 getOnlineFeaturesRequestV2 = - GetOnlineFeaturesRequestV2.newBuilder().addFeatures(featureReference).build(); - RequestHelper.validateOnlineRequest(getOnlineFeaturesRequestV2); + + RequestHelper.validateOnlineRequest(getOnlineFeaturesRequest); } @Test(expected = IllegalArgumentException.class) public void shouldErrorIfFeatureReferenceTableEmpty() { - FeatureReferenceV2 featureReference = - FeatureReferenceV2.newBuilder().setName("featurename").build(); - GetOnlineFeaturesRequestV2 getOnlineFeaturesRequestV2 = - GetOnlineFeaturesRequestV2.newBuilder().addFeatures(featureReference).build(); - RequestHelper.validateOnlineRequest(getOnlineFeaturesRequestV2); + ServingAPIProto.GetOnlineFeaturesRequest getOnlineFeaturesRequest = + ServingAPIProto.GetOnlineFeaturesRequest.newBuilder() + .setFeatures(ServingAPIProto.FeatureList.newBuilder().addVal("featurename").build()) + .build(); + + RequestHelper.validateOnlineRequest(getOnlineFeaturesRequest); } @Test(expected = IllegalArgumentException.class) public void shouldErrorIfFeatureReferenceNameEmpty() { - FeatureReferenceV2 featureReference = - FeatureReferenceV2.newBuilder().setFeatureTable("featuretablename").build(); - GetOnlineFeaturesRequestV2 getOnlineFeaturesRequestV2 = - GetOnlineFeaturesRequestV2.newBuilder().addFeatures(featureReference).build(); - RequestHelper.validateOnlineRequest(getOnlineFeaturesRequestV2); + ServingAPIProto.GetOnlineFeaturesRequest getOnlineFeaturesRequest = + ServingAPIProto.GetOnlineFeaturesRequest.newBuilder() + .setFeatures(ServingAPIProto.FeatureList.newBuilder().addVal("view").build()) + .build(); + + RequestHelper.validateOnlineRequest(getOnlineFeaturesRequest); } } diff --git a/java/serving/src/test/resources/docker-compose/docker-compose-redis-it.yml b/java/serving/src/test/resources/docker-compose/docker-compose-redis-it.yml index 08a50233df1..13835e07d41 100644 --- a/java/serving/src/test/resources/docker-compose/docker-compose-redis-it.yml +++ b/java/serving/src/test/resources/docker-compose/docker-compose-redis-it.yml @@ -2,11 +2,13 @@ version: '3' services: redis: - image: redis:5-alpine + image: redis:6.2 ports: - "6379:6379" - materialize: + feast: build: feast10 + ports: + - "8080:8080" links: - redis diff --git a/java/serving/src/test/resources/docker-compose/feast10/Dockerfile b/java/serving/src/test/resources/docker-compose/feast10/Dockerfile index bde9f11592f..df14bb592b4 100644 --- a/java/serving/src/test/resources/docker-compose/feast10/Dockerfile +++ b/java/serving/src/test/resources/docker-compose/feast10/Dockerfile @@ -5,6 +5,11 @@ WORKDIR /usr/src/ COPY requirements.txt ./ RUN pip install --no-cache-dir -r requirements.txt +RUN git clone https://github.com/feast-dev/feast.git /root/feast +RUN cd /root/feast/sdk/python && pip install -e '.[redis]' + +WORKDIR /app COPY . . +EXPOSE 8080 -CMD [ "python", "./materialize.py" ] +CMD ["/bin/sh", "-c", "python materialize.py && feast serve_transformations --port 8080"] diff --git a/java/serving/src/test/resources/docker-compose/feast10/definitions.py b/java/serving/src/test/resources/docker-compose/feast10/definitions.py new file mode 100644 index 00000000000..c7ed6c96193 --- /dev/null +++ b/java/serving/src/test/resources/docker-compose/feast10/definitions.py @@ -0,0 +1,97 @@ +import pandas as pd + +from google.protobuf.duration_pb2 import Duration + +from feast.value_type import ValueType +from feast.feature import Feature +from feast.feature_view import FeatureView +from feast.entity import Entity +from feast.feature_service import FeatureService +from feast.on_demand_feature_view import RequestDataSource, on_demand_feature_view +from feast import FileSource + + +file_path = "driver_stats.parquet" +driver_hourly_stats = FileSource( + path=file_path, + event_timestamp_column="event_timestamp", + created_timestamp_column="created", +) + +# Define an entity for the driver. You can think of entity as a primary key used to +# fetch features. +driver = Entity(name="driver_id", value_type=ValueType.INT64, description="driver id",) + +# Our parquet files contain sample data that includes a driver_id column, timestamps and +# three feature column. Here we define a Feature View that will allow us to serve this +# data to our model online. +driver_hourly_stats_view = FeatureView( + name="driver_hourly_stats", + entities=["driver_id"], + ttl=Duration(seconds=86400 * 7), + features=[ + Feature(name="conv_rate", dtype=ValueType.DOUBLE), + Feature(name="acc_rate", dtype=ValueType.FLOAT), + Feature(name="avg_daily_trips", dtype=ValueType.INT64), + ], + online=True, + batch_source=driver_hourly_stats, + tags={}, +) + + +input_request = RequestDataSource( + name="vals_to_add", + schema={ + "val_to_add": ValueType.INT64, + "val_to_add_2": ValueType.INT64 + } +) + + +@on_demand_feature_view( + inputs={ + 'driver_hourly_stats': driver_hourly_stats_view, + 'vals_to_add': input_request + }, + features=[ + Feature(name='conv_rate_plus_val1', dtype=ValueType.DOUBLE), + Feature(name='conv_rate_plus_val2', dtype=ValueType.DOUBLE) + ] +) +def transformed_conv_rate(features_df: pd.DataFrame) -> pd.DataFrame: + df = pd.DataFrame() + df['conv_rate_plus_val1'] = (features_df['conv_rate'] + features_df['val_to_add']) + df['conv_rate_plus_val2'] = (features_df['conv_rate'] + features_df['val_to_add_2']) + return df + + +generated_data_source = FileSource( + path="benchmark_data.parquet", + event_timestamp_column="event_timestamp", +) + +entity = Entity( + name="entity", + value_type=ValueType.STRING, +) + +benchmark_feature_views = [ + FeatureView( + name=f"feature_view_{i}", + entities=["entity"], + ttl=Duration(seconds=86400), + features=[ + Feature(name=f"feature_{10 * i + j}", dtype=ValueType.INT64) + for j in range(10) + ], + online=True, + batch_source=generated_data_source, + ) + for i in range(25) +] + +benchmark_feature_service = FeatureService( + name=f"benchmark_feature_service", + features=benchmark_feature_views, +) diff --git a/java/serving/src/test/resources/docker-compose/feast10/materialize.py b/java/serving/src/test/resources/docker-compose/feast10/materialize.py index c347728c68b..8389d8527bf 100644 --- a/java/serving/src/test/resources/docker-compose/feast10/materialize.py +++ b/java/serving/src/test/resources/docker-compose/feast10/materialize.py @@ -1,12 +1,12 @@ -# This is an example feature definition file - import pandas as pd import numpy as np -from google.protobuf.duration_pb2 import Duration - from datetime import datetime, timedelta -from feast import Entity, Feature, FeatureView, FileSource, ValueType, FeatureService, FeatureStore +from feast import FeatureStore + +from definitions import driver_hourly_stats_view, driver, entity,\ + benchmark_feature_service, benchmark_feature_views, transformed_conv_rate + print("Running materialize.py") @@ -23,41 +23,39 @@ # some of rows are beyond 7 days to test OUTSIDE_MAX_AGE status df["event_timestamp"] = start + pd.Series(np.arange(0, 10)).map(lambda days: timedelta(days=days)) -df.to_parquet("driver_stats.parquet") - -# Read data from parquet files. Parquet is convenient for local development mode. For +# Store data in parquet files. Parquet is convenient for local development mode. For # production, you can use your favorite DWH, such as BigQuery. See Feast documentation # for more info. -file_path = "driver_stats.parquet" -driver_hourly_stats = FileSource( - path=file_path, - event_timestamp_column="event_timestamp", - created_timestamp_column="created", -) - -# Define an entity for the driver. You can think of entity as a primary key used to -# fetch features. -driver = Entity(name="driver_id", value_type=ValueType.INT64, description="driver id",) - -# Our parquet files contain sample data that includes a driver_id column, timestamps and -# three feature column. Here we define a Feature View that will allow us to serve this -# data to our model online. -driver_hourly_stats_view = FeatureView( - name="driver_hourly_stats", - entities=["driver_id"], - ttl=Duration(seconds=86400 * 7), - features=[ - Feature(name="conv_rate", dtype=ValueType.DOUBLE), - Feature(name="acc_rate", dtype=ValueType.FLOAT), - Feature(name="avg_daily_trips", dtype=ValueType.INT64), - ], - online=True, - batch_source=driver_hourly_stats, - tags={}, -) +df.to_parquet("driver_stats.parquet") + + +# For Benchmarks +# Please read more in Feast RFC-031 +# (link https://docs.google.com/document/d/12UuvTQnTTCJhdRgy6h10zSbInNGSyEJkIxpOcgOen1I/edit) +# about this benchmark setup +def generate_data(num_rows: int, num_features: int, destination: str) -> pd.DataFrame: + features = [f"feature_{i}" for i in range(num_features)] + columns = ["entity", "event_timestamp"] + features + df = pd.DataFrame(0, index=np.arange(num_rows), columns=columns) + df["event_timestamp"] = datetime.utcnow() + for column in features: + df[column] = np.random.randint(1, num_rows, num_rows) + + df["entity"] = "key-" + \ + pd.Series(np.arange(1, num_rows + 1)).astype(pd.StringDtype()) + + df.to_parquet(destination) + + +generate_data(10**3, 250, "benchmark_data.parquet") + fs = FeatureStore(".") -fs.apply([driver_hourly_stats_view, driver]) +fs.apply([driver_hourly_stats_view, + transformed_conv_rate, + driver, + entity, benchmark_feature_service, + *benchmark_feature_views]) now = datetime.now() fs.materialize(start, now) diff --git a/java/serving/src/test/resources/docker-compose/feast10/registry.db b/java/serving/src/test/resources/docker-compose/feast10/registry.db index b9a19475af0..746934e3d0a 100644 Binary files a/java/serving/src/test/resources/docker-compose/feast10/registry.db and b/java/serving/src/test/resources/docker-compose/feast10/registry.db differ diff --git a/java/serving/src/test/resources/docker-compose/feast10/requirements.txt b/java/serving/src/test/resources/docker-compose/feast10/requirements.txt index 447f126392b..94e4771de2a 100644 --- a/java/serving/src/test/resources/docker-compose/feast10/requirements.txt +++ b/java/serving/src/test/resources/docker-compose/feast10/requirements.txt @@ -1,5 +1,3 @@ -feast[redis]>=0.13,<1 - # for source generation pyarrow==6.0.0 diff --git a/java/storage/api/pom.xml b/java/storage/api/pom.xml index 583bcd06406..90f656e281e 100644 --- a/java/storage/api/pom.xml +++ b/java/storage/api/pom.xml @@ -32,16 +32,10 @@ dev.feast - datatypes-java + feast-datatypes ${project.version} - - - - - - com.google.auto.value auto-value-annotations diff --git a/java/storage/api/src/main/java/feast/storage/api/retriever/FeatureTableRequest.java b/java/storage/api/src/main/java/feast/storage/api/retriever/FeatureTableRequest.java index 6188a270c40..2f181e6de83 100644 --- a/java/storage/api/src/main/java/feast/storage/api/retriever/FeatureTableRequest.java +++ b/java/storage/api/src/main/java/feast/storage/api/retriever/FeatureTableRequest.java @@ -56,6 +56,7 @@ public Builder addFeatureReference(FeatureReferenceV2 featureReference) { public Map getFeatureRefsByName() { return getFeatureReferences().stream() .collect( - Collectors.toMap(FeatureReferenceV2::getName, featureReference -> featureReference)); + Collectors.toMap( + FeatureReferenceV2::getFeatureName, featureReference -> featureReference)); } } diff --git a/java/storage/api/src/main/java/feast/storage/api/retriever/OnlineRetrieverV2.java b/java/storage/api/src/main/java/feast/storage/api/retriever/OnlineRetrieverV2.java index db5db8b63c4..fde8ba7396d 100644 --- a/java/storage/api/src/main/java/feast/storage/api/retriever/OnlineRetrieverV2.java +++ b/java/storage/api/src/main/java/feast/storage/api/retriever/OnlineRetrieverV2.java @@ -17,6 +17,7 @@ package feast.storage.api.retriever; import feast.proto.serving.ServingAPIProto; +import feast.proto.types.ValueProto; import java.util.List; import java.util.Map; @@ -31,16 +32,14 @@ public interface OnlineRetrieverV2 { * Feature} returned should match the no. of given {@link * ServingAPIProto.GetOnlineFeaturesRequestV2.EntityRow}s * - * @param project name of project to request features from. * @param entityRows list of entity rows to request features for. * @param featureReferences specifies the FeatureTable to retrieve data from * @param entityNames name of entities * @return list of {@link Feature}s corresponding to data retrieved for each entity row from * FeatureTable specified in FeatureTable request. */ - List> getOnlineFeatures( - String project, - List entityRows, + List> getOnlineFeatures( + List> entityRows, List featureReferences, List entityNames); } diff --git a/java/storage/connectors/pom.xml b/java/storage/connectors/pom.xml index e896910e73d..11e32a154c2 100644 --- a/java/storage/connectors/pom.xml +++ b/java/storage/connectors/pom.xml @@ -41,7 +41,7 @@ dev.feast - datatypes-java + feast-datatypes ${project.version} diff --git a/java/storage/connectors/redis/pom.xml b/java/storage/connectors/redis/pom.xml index 7b0c944a66e..ce25f41da6c 100644 --- a/java/storage/connectors/redis/pom.xml +++ b/java/storage/connectors/redis/pom.xml @@ -48,6 +48,7 @@ com.google.guava guava + ${guava.version} @@ -61,6 +62,7 @@ com.github.kstyrc embedded-redis + 0.6 test @@ -68,12 +70,14 @@ org.hamcrest hamcrest-core test + ${hamcrest.version} org.hamcrest hamcrest-library test + ${hamcrest.version} @@ -93,7 +97,7 @@ org.slf4j slf4j-simple - 1.7.30 + 1.7.32 test diff --git a/java/storage/connectors/redis/src/main/java/feast/storage/connectors/redis/common/RedisHashDecoder.java b/java/storage/connectors/redis/src/main/java/feast/storage/connectors/redis/common/RedisHashDecoder.java index fd0f0a56dc1..78b64fd141e 100644 --- a/java/storage/connectors/redis/src/main/java/feast/storage/connectors/redis/common/RedisHashDecoder.java +++ b/java/storage/connectors/redis/src/main/java/feast/storage/connectors/redis/common/RedisHashDecoder.java @@ -16,7 +16,6 @@ */ package feast.storage.connectors.redis.common; -import com.google.common.collect.Maps; import com.google.common.hash.Hashing; import com.google.protobuf.InvalidProtocolBufferException; import com.google.protobuf.Timestamp; @@ -35,13 +34,14 @@ public class RedisHashDecoder { * Converts all retrieved Redis Hash values based on EntityRows into {@link Feature} * * @param redisHashValues retrieved Redis Hash values based on EntityRows - * @param byteToFeatureReferenceMap map to decode bytes back to FeatureReference + * @param byteToFeatureIdxMap map to decode bytes back to FeatureReference * @param timestampPrefix timestamp prefix * @return Map of {@link ServingAPIProto.FeatureReferenceV2} to {@link Feature} */ - public static Map retrieveFeature( + public static List retrieveFeature( Map redisHashValues, - Map byteToFeatureReferenceMap, + Map byteToFeatureIdxMap, + List featureReferences, String timestampPrefix) { Map featureTableTimestampMap = redisHashValues.entrySet().stream() @@ -57,14 +57,11 @@ public static Map retrieveFeature( "Couldn't parse timestamp proto while pulling data from Redis"); } })); - Map results = - Maps.newHashMapWithExpectedSize(byteToFeatureReferenceMap.size()); + List results = new ArrayList<>(Collections.nCopies(featureReferences.size(), null)); for (Map.Entry entry : redisHashValues.entrySet()) { - ServingAPIProto.FeatureReferenceV2 featureReference = - byteToFeatureReferenceMap.get(ByteBuffer.wrap(entry.getKey())); - - if (featureReference == null) { + Integer featureIdx = byteToFeatureIdxMap.get(ByteBuffer.wrap(entry.getKey())); + if (featureIdx == null) { continue; } @@ -75,11 +72,11 @@ public static Map retrieveFeature( throw new RuntimeException( "Couldn't parse feature value proto while pulling data from Redis"); } - results.put( - featureReference, + results.set( + featureIdx, new ProtoFeature( - featureReference, - featureTableTimestampMap.get(featureReference.getFeatureTable()), + featureReferences.get(featureIdx), + featureTableTimestampMap.get(featureReferences.get(featureIdx).getFeatureViewName()), v)); } @@ -94,7 +91,7 @@ public static byte[] getTimestampRedisHashKeyBytes(String featureTable, String t public static byte[] getFeatureReferenceRedisHashKeyBytes( ServingAPIProto.FeatureReferenceV2 featureReference) { String delimitedFeatureReference = - featureReference.getFeatureTable() + ":" + featureReference.getName(); + featureReference.getFeatureViewName() + ":" + featureReference.getFeatureName(); return Hashing.murmur3_32() .hashString(delimitedFeatureReference, StandardCharsets.UTF_8) .asBytes(); diff --git a/java/storage/connectors/redis/src/main/java/feast/storage/connectors/redis/common/RedisKeyGenerator.java b/java/storage/connectors/redis/src/main/java/feast/storage/connectors/redis/common/RedisKeyGenerator.java index 797dd522151..389ca0abfde 100644 --- a/java/storage/connectors/redis/src/main/java/feast/storage/connectors/redis/common/RedisKeyGenerator.java +++ b/java/storage/connectors/redis/src/main/java/feast/storage/connectors/redis/common/RedisKeyGenerator.java @@ -28,7 +28,7 @@ public class RedisKeyGenerator { public static List buildRedisKeys( - String project, List entityRows) { + String project, List> entityRows) { List redisKeys = entityRows.stream() .map(entityRow -> makeRedisKey(project, entityRow)) @@ -45,17 +45,16 @@ public static List buildRedisKeys( * @return {@link RedisProto.RedisKeyV2} */ private static RedisProto.RedisKeyV2 makeRedisKey( - String project, ServingAPIProto.GetOnlineFeaturesRequestV2.EntityRow entityRow) { + String project, Map entityRow) { RedisProto.RedisKeyV2.Builder builder = RedisProto.RedisKeyV2.newBuilder().setProject(project); - Map fieldsMap = entityRow.getFieldsMap(); - List entityNames = new ArrayList<>(new HashSet<>(fieldsMap.keySet())); + List entityNames = new ArrayList<>(new HashSet<>(entityRow.keySet())); // Sort entity names by alphabetical order entityNames.sort(String::compareTo); for (String entityName : entityNames) { builder.addEntityNames(entityName); - builder.addEntityValues(fieldsMap.get(entityName)); + builder.addEntityValues(entityRow.get(entityName)); } return builder.build(); } diff --git a/java/storage/connectors/redis/src/main/java/feast/storage/connectors/redis/retriever/EntityKeySerializerV2.java b/java/storage/connectors/redis/src/main/java/feast/storage/connectors/redis/retriever/EntityKeySerializerV2.java index 922a09d3f55..3e9ab7e8ab5 100644 --- a/java/storage/connectors/redis/src/main/java/feast/storage/connectors/redis/retriever/EntityKeySerializerV2.java +++ b/java/storage/connectors/redis/src/main/java/feast/storage/connectors/redis/retriever/EntityKeySerializerV2.java @@ -16,16 +16,14 @@ */ package feast.storage.connectors.redis.retriever; -import com.google.common.primitives.UnsignedBytes; import com.google.protobuf.ProtocolStringList; import feast.proto.storage.RedisProto; import feast.proto.types.ValueProto; import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.nio.charset.StandardCharsets; -import java.util.ArrayList; -import java.util.Comparator; -import java.util.List; +import java.util.*; +import org.apache.commons.lang3.ArrayUtils; import org.apache.commons.lang3.tuple.Pair; // This is derived from @@ -48,70 +46,52 @@ public byte[] serialize(RedisProto.RedisKeyV2 entityKey) { } tuples.sort(Comparator.comparing(Pair::getLeft)); - ByteBuffer stringBytes = ByteBuffer.allocate(Integer.BYTES); - stringBytes.order(ByteOrder.LITTLE_ENDIAN); - stringBytes.putInt(ValueProto.ValueType.Enum.STRING.getNumber()); - for (Pair pair : tuples) { - for (final byte b : stringBytes.array()) { - buffer.add(b); - } - for (final byte b : pair.getLeft().getBytes(StandardCharsets.UTF_8)) { - buffer.add(b); - } + buffer.addAll(encodeInteger(ValueProto.ValueType.Enum.STRING.getNumber())); + buffer.addAll(encodeString(pair.getLeft())); } for (Pair pair : tuples) { final ValueProto.Value val = pair.getRight(); switch (val.getValCase()) { case STRING_VAL: - buffer.add(UnsignedBytes.checkedCast(ValueProto.ValueType.Enum.STRING.getNumber())); - buffer.add( - UnsignedBytes.checkedCast( - val.getStringVal().getBytes(StandardCharsets.UTF_8).length)); - for (final byte b : val.getStringVal().getBytes(StandardCharsets.UTF_8)) { - buffer.add(b); - } + String stringVal = val.getStringVal(); + + buffer.addAll(encodeInteger(ValueProto.ValueType.Enum.STRING.getNumber())); + buffer.addAll(encodeInteger(stringVal.length())); + buffer.addAll(encodeString(stringVal)); + break; case BYTES_VAL: - buffer.add(UnsignedBytes.checkedCast(ValueProto.ValueType.Enum.BYTES.getNumber())); - for (final byte b : val.getBytesVal().toByteArray()) { - buffer.add(b); - } + byte[] bytes = val.getBytesVal().toByteArray(); + + buffer.addAll(encodeInteger(ValueProto.ValueType.Enum.BYTES.getNumber())); + buffer.addAll(encodeInteger(bytes.length)); + buffer.addAll(encodeBytes(bytes)); + break; case INT32_VAL: - ByteBuffer int32ByteBuffer = - ByteBuffer.allocate(Integer.BYTES + Integer.BYTES + Integer.BYTES); - int32ByteBuffer.order(ByteOrder.LITTLE_ENDIAN); - int32ByteBuffer.putInt(ValueProto.ValueType.Enum.INT32.getNumber()); - int32ByteBuffer.putInt(Integer.BYTES); - int32ByteBuffer.putInt(val.getInt32Val()); - for (final byte b : int32ByteBuffer.array()) { - buffer.add(b); - } + buffer.addAll(encodeInteger(ValueProto.ValueType.Enum.INT32.getNumber())); + buffer.addAll(encodeInteger(Integer.BYTES)); + buffer.addAll(encodeInteger(val.getInt32Val())); + break; case INT64_VAL: - ByteBuffer int64ByteBuffer = - ByteBuffer.allocate(Integer.BYTES + Integer.BYTES + Integer.BYTES); - int64ByteBuffer.order(ByteOrder.LITTLE_ENDIAN); - int64ByteBuffer.putInt(ValueProto.ValueType.Enum.INT64.getNumber()); - int64ByteBuffer.putInt(Integer.BYTES); + buffer.addAll(encodeInteger(ValueProto.ValueType.Enum.INT64.getNumber())); + buffer.addAll(encodeInteger(Integer.BYTES)); /* This is super dumb - but in https://github.com/feast-dev/feast/blob/dcae1606f53028ce5413567fb8b66f92cfef0f8e/sdk/python/feast/infra/key_encoding_utils.py#L9 we use `struct.pack(" encodeBytes(byte[] toByteArray) { + return Arrays.asList(ArrayUtils.toObject(toByteArray)); + } + + private List encodeInteger(Integer value) { + ByteBuffer buffer = ByteBuffer.allocate(Integer.BYTES); + buffer.order(ByteOrder.LITTLE_ENDIAN); + buffer.putInt(value); + + return Arrays.asList(ArrayUtils.toObject(buffer.array())); + } + + private List encodeString(String value) { + byte[] stringBytes = value.getBytes(StandardCharsets.UTF_8); + return encodeBytes(stringBytes); + } } diff --git a/java/storage/connectors/redis/src/main/java/feast/storage/connectors/redis/retriever/OnlineRetriever.java b/java/storage/connectors/redis/src/main/java/feast/storage/connectors/redis/retriever/OnlineRetriever.java index ab03049b9fb..a71812e875e 100644 --- a/java/storage/connectors/redis/src/main/java/feast/storage/connectors/redis/retriever/OnlineRetriever.java +++ b/java/storage/connectors/redis/src/main/java/feast/storage/connectors/redis/retriever/OnlineRetriever.java @@ -19,6 +19,7 @@ import com.google.common.collect.Lists; import feast.proto.serving.ServingAPIProto; import feast.proto.storage.RedisProto; +import feast.proto.types.ValueProto; import feast.storage.api.retriever.Feature; import feast.storage.api.retriever.OnlineRetrieverV2; import feast.storage.connectors.redis.common.RedisHashDecoder; @@ -38,52 +39,52 @@ public class OnlineRetriever implements OnlineRetrieverV2 { private static final String timestampPrefix = "_ts"; private final RedisClientAdapter redisClientAdapter; private final EntityKeySerializer keySerializer; + private final String project; // Number of fields in request to Redis which requires using HGETALL instead of HMGET public static final int HGETALL_NUMBER_OF_FIELDS_THRESHOLD = 50; - public OnlineRetriever(RedisClientAdapter redisClientAdapter, EntityKeySerializer keySerializer) { + public OnlineRetriever( + String project, RedisClientAdapter redisClientAdapter, EntityKeySerializer keySerializer) { + this.project = project; this.redisClientAdapter = redisClientAdapter; this.keySerializer = keySerializer; } @Override - public List> getOnlineFeatures( - String project, - List entityRows, + public List> getOnlineFeatures( + List> entityRows, List featureReferences, List entityNames) { - List redisKeys = RedisKeyGenerator.buildRedisKeys(project, entityRows); + List redisKeys = + RedisKeyGenerator.buildRedisKeys(this.project, entityRows); return getFeaturesFromRedis(redisKeys, featureReferences); } - private List> getFeaturesFromRedis( + private List> getFeaturesFromRedis( List redisKeys, List featureReferences) { - List> features = new ArrayList<>(); - // To decode bytes back to Feature Reference - Map byteToFeatureReferenceMap = new HashMap<>(); + // To decode bytes back to Feature + Map byteToFeatureIdxMap = new HashMap<>(); // Serialize using proto List binaryRedisKeys = redisKeys.stream().map(this.keySerializer::serialize).collect(Collectors.toList()); List retrieveFields = new ArrayList<>(); - featureReferences.stream() - .forEach( - featureReference -> { - - // eg. murmur() - byte[] featureReferenceBytes = - RedisHashDecoder.getFeatureReferenceRedisHashKeyBytes(featureReference); - retrieveFields.add(featureReferenceBytes); - byteToFeatureReferenceMap.put( - ByteBuffer.wrap(featureReferenceBytes), featureReference); - }); + for (int idx = 0; + idx < featureReferences.size(); + idx++) { // eg. murmur() + byte[] featureReferenceBytes = + RedisHashDecoder.getFeatureReferenceRedisHashKeyBytes(featureReferences.get(idx)); + retrieveFields.add(featureReferenceBytes); + + byteToFeatureIdxMap.put(ByteBuffer.wrap(featureReferenceBytes), idx); + } featureReferences.stream() - .map(ServingAPIProto.FeatureReferenceV2::getFeatureTable) + .map(ServingAPIProto.FeatureReferenceV2::getFeatureViewName) .distinct() .forEach( table -> { @@ -121,12 +122,12 @@ private List> getFeaturesFromRe } } - List> results = - Lists.newArrayListWithExpectedSize(futures.size()); + List> results = Lists.newArrayListWithExpectedSize(futures.size()); for (Future> f : futures) { try { results.add( - RedisHashDecoder.retrieveFeature(f.get(), byteToFeatureReferenceMap, timestampPrefix)); + RedisHashDecoder.retrieveFeature( + f.get(), byteToFeatureIdxMap, featureReferences, timestampPrefix)); } catch (InterruptedException | ExecutionException e) { throw new RuntimeException("Unexpected error when pulling data from Redis"); } diff --git a/protos/feast/core/DataSource.proto b/protos/feast/core/DataSource.proto index ee5c6939d79..41bba6443fd 100644 --- a/protos/feast/core/DataSource.proto +++ b/protos/feast/core/DataSource.proto @@ -32,19 +32,22 @@ message DataSource { reserved 6 to 10; // Type of Data Source. + // Next available id: 9 enum SourceType { INVALID = 0; BATCH_FILE = 1; + BATCH_SNOWFLAKE = 8; BATCH_BIGQUERY = 2; + BATCH_REDSHIFT = 5; STREAM_KAFKA = 3; STREAM_KINESIS = 4; - BATCH_REDSHIFT = 5; CUSTOM_SOURCE = 6; REQUEST_SOURCE = 7; + } SourceType type = 1; - // Defines mapping between fields in the sourced data + // Defines mapping between fields in the sourced data // and fields in parent FeatureTable. map field_mapping = 2; @@ -128,6 +131,22 @@ message DataSource { string schema = 3; } + // Defines options for DataSource that sources features from a Snowflake Query + message SnowflakeOptions { + // Snowflake table name + string table = 1; + + // SQL query that returns a table containing feature data. Must contain an event_timestamp column, and respective + // entity columns + string query = 2; + + // Snowflake schema name + string schema = 3; + + // Snowflake schema name + string database = 4; + } + // Defines configuration for custom third-party data sources. message CustomSourceOptions { // Serialized configuration information for the data source. The implementer of the custom data source is @@ -153,5 +172,6 @@ message DataSource { RedshiftOptions redshift_options = 15; RequestDataOptions request_data_options = 18; CustomSourceOptions custom_options = 16; + SnowflakeOptions snowflake_options = 19; } } diff --git a/protos/feast/core/OnDemandFeatureView.proto b/protos/feast/core/OnDemandFeatureView.proto index e1169416973..58feff5bfdb 100644 --- a/protos/feast/core/OnDemandFeatureView.proto +++ b/protos/feast/core/OnDemandFeatureView.proto @@ -22,13 +22,16 @@ option go_package = "github.com/feast-dev/feast/sdk/go/protos/feast/core"; option java_outer_classname = "OnDemandFeatureViewProto"; option java_package = "feast.proto.core"; +import "google/protobuf/timestamp.proto"; import "feast/core/FeatureView.proto"; +import "feast/core/FeatureViewProjection.proto"; import "feast/core/Feature.proto"; import "feast/core/DataSource.proto"; message OnDemandFeatureView { // User-specified specifications of this feature view. OnDemandFeatureViewSpec spec = 1; + OnDemandFeatureViewMeta meta = 2; } message OnDemandFeatureViewSpec { @@ -41,15 +44,26 @@ message OnDemandFeatureViewSpec { // List of features specifications for each feature defined with this feature view. repeated FeatureSpecV2 features = 3; - // List of features specifications for each feature defined with this feature view. + // Map of inputs for this feature view. map inputs = 4; UserDefinedFunction user_defined_function = 5; + + +} + +message OnDemandFeatureViewMeta { + // Time where this Feature View is created + google.protobuf.Timestamp created_timestamp = 1; + + // Time where this Feature View is last updated + google.protobuf.Timestamp last_updated_timestamp = 2; } message OnDemandInput { oneof input { FeatureView feature_view = 1; + FeatureViewProjection feature_view_projection = 3; DataSource request_data_source = 2; } } diff --git a/protos/feast/core/Registry.proto b/protos/feast/core/Registry.proto index 035e87a49f8..3deeb972385 100644 --- a/protos/feast/core/Registry.proto +++ b/protos/feast/core/Registry.proto @@ -25,8 +25,10 @@ import "feast/core/Entity.proto"; import "feast/core/FeatureService.proto"; import "feast/core/FeatureTable.proto"; import "feast/core/FeatureView.proto"; +import "feast/core/InfraObject.proto"; import "feast/core/OnDemandFeatureView.proto"; import "feast/core/RequestFeatureView.proto"; +import "feast/core/SavedDataset.proto"; import "google/protobuf/timestamp.proto"; message Registry { @@ -36,6 +38,8 @@ message Registry { repeated OnDemandFeatureView on_demand_feature_views = 8; repeated RequestFeatureView request_feature_views = 9; repeated FeatureService feature_services = 7; + repeated SavedDataset saved_datasets = 11; + Infra infra = 10; string registry_schema_version = 3; // to support migrations; incremented when schema is changed string version_id = 4; // version id, random string generated on each update of the data; now used only for debugging purposes diff --git a/protos/feast/core/SavedDataset.proto b/protos/feast/core/SavedDataset.proto new file mode 100644 index 00000000000..ebd2e56d350 --- /dev/null +++ b/protos/feast/core/SavedDataset.proto @@ -0,0 +1,77 @@ +// +// Copyright 2021 The Feast Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + + +syntax = "proto3"; + +package feast.core; +option java_package = "feast.proto.core"; +option java_outer_classname = "SavedDatasetProto"; +option go_package = "github.com/feast-dev/feast/sdk/go/protos/feast/core"; + +import "google/protobuf/timestamp.proto"; +import "feast/core/FeatureViewProjection.proto"; +import "feast/core/DataSource.proto"; + +message SavedDatasetSpec { + // Name of the dataset. Must be unique since it's possible to overwrite dataset by name + string name = 1; + + // Name of Feast project that this Dataset belongs to. + string project = 2; + + // list of feature references with format ":" + repeated string features = 3; + + // entity columns + request columns from all feature views used during retrieval + repeated string join_keys = 4; + + // Whether full feature names are used in stored data + bool full_feature_names = 5; + + SavedDatasetStorage storage = 6; + + // User defined metadata + map tags = 7; +} + +message SavedDatasetStorage { + oneof kind { + DataSource.FileOptions file_storage = 4; + DataSource.BigQueryOptions bigquery_storage = 5; + DataSource.RedshiftOptions redshift_storage = 6; + DataSource.SnowflakeOptions snowflake_storage = 7; + } +} + +message SavedDatasetMeta { + // Time when this saved dataset is created + google.protobuf.Timestamp created_timestamp = 1; + + // Time when this saved dataset is last updated + google.protobuf.Timestamp last_updated_timestamp = 2; + + // Min timestamp in the dataset (needed for retrieval) + google.protobuf.Timestamp min_event_timestamp = 3; + + // Max timestamp in the dataset (needed for retrieval) + google.protobuf.Timestamp max_event_timestamp = 4; +} + +message SavedDataset { + SavedDatasetSpec spec = 1; + SavedDatasetMeta meta = 2; +} diff --git a/protos/feast/core/ValidationProfile.proto b/protos/feast/core/ValidationProfile.proto new file mode 100644 index 00000000000..31c4e150a07 --- /dev/null +++ b/protos/feast/core/ValidationProfile.proto @@ -0,0 +1,48 @@ +// +// Copyright 2021 The Feast Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + + +syntax = "proto3"; + +package feast.core; +option java_package = "feast.proto.core"; +option java_outer_classname = "ValidationProfile"; +option go_package = "github.com/feast-dev/feast/sdk/go/protos/feast/core"; + +import "google/protobuf/timestamp.proto"; +import "feast/core/SavedDataset.proto"; + +message GEValidationProfiler { + message UserDefinedProfiler { + // The python-syntax function body (serialized by dill) + bytes body = 1; + } + + UserDefinedProfiler profiler = 1; +} + +message GEValidationProfile { + // JSON-serialized ExpectationSuite object + bytes expectation_suite = 1; +} + +message ValidationReference { + SavedDataset dataset = 1; + + oneof profiler { + GEValidationProfiler ge_profiler = 2; + } +} diff --git a/protos/feast/serving/ServingService.proto b/protos/feast/serving/ServingService.proto index e37ecbbdde4..6c551a97baf 100644 --- a/protos/feast/serving/ServingService.proto +++ b/protos/feast/serving/ServingService.proto @@ -29,8 +29,8 @@ service ServingService { // Get information about this Feast serving. rpc GetFeastServingInfo (GetFeastServingInfoRequest) returns (GetFeastServingInfoResponse); - // Get online features (v2) synchronously. - rpc GetOnlineFeaturesV2 (GetOnlineFeaturesRequestV2) returns (GetOnlineFeaturesResponse); + // Get online features synchronously. + rpc GetOnlineFeatures (GetOnlineFeaturesRequest) returns (GetOnlineFeaturesResponse); } message GetFeastServingInfoRequest {} @@ -38,24 +38,17 @@ message GetFeastServingInfoRequest {} message GetFeastServingInfoResponse { // Feast version of this serving deployment. string version = 1; - - // Type of serving deployment, either ONLINE or BATCH. Different store types support different - // feature retrieval methods. - FeastServingType type = 2; - - // Note: Batch specific options start from 10. - // Staging location for this serving store, if any. - string job_staging_location = 10; } message FeatureReferenceV2 { - // Name of the Feature Table to retrieve the feature from. - string feature_table = 1; + // Name of the Feature View to retrieve the feature from. + string feature_view_name = 1; // Name of the Feature to retrieve the feature from. - string name = 2; + string feature_name = 2; } +// ToDo (oleksii): remove this message (since it's not used) and move EntityRow on package level message GetOnlineFeaturesRequestV2 { // List of features that are being retrieved repeated FeatureReferenceV2 features = 4; @@ -94,48 +87,48 @@ message GetOnlineFeaturesRequest { // A map of entity name -> list of values map entities = 3; bool full_feature_names = 4; + + // Context for OnDemand Feature Transformation + // (was moved to dedicated parameter to avoid unnecessary separation logic on serving side) + // A map of variable name -> list of values + map request_context = 5; } message GetOnlineFeaturesResponse { - // Feature values retrieved from feast. - repeated FieldValues field_values = 1; - - message FieldValues { - // Map of feature or entity name to feature/entity values. - // Timestamps are not returned in this response. - map fields = 1; - // Map of feature or entity name to feature/entity statuses/metadata. - map statuses = 2; - } - - enum FieldStatus { - // Status is unset for this field. - INVALID = 0; - - // Field value is present for this field and age is within max age. - PRESENT = 1; - - // Values could be found for entity key and age is within max age, but - // this field value is assigned a value on ingestion into feast. - NULL_VALUE = 2; - - // Entity key did not return any values as they do not exist in Feast. - // This could suggest that the feature values have not yet been ingested - // into feast or the ingestion failed. - NOT_FOUND = 3; - - // Values could be found for entity key, but field values are outside the maximum - // allowable range. - OUTSIDE_MAX_AGE = 4; + GetOnlineFeaturesResponseMetadata metadata = 1; + + // Length of "results" array should match length of requested features. + // We also preserve the same order of features here as in metadata.feature_names + repeated FeatureVector results = 2; + + message FeatureVector { + repeated feast.types.Value values = 1; + repeated FieldStatus statuses = 2; + repeated google.protobuf.Timestamp event_timestamps = 3; } } -enum FeastServingType { - FEAST_SERVING_TYPE_INVALID = 0; - // Online serving receives entity data directly and synchronously and will - // respond immediately. - FEAST_SERVING_TYPE_ONLINE = 1; - // Batch serving receives entity data asynchronously and orchestrates the - // retrieval through a staging location. - FEAST_SERVING_TYPE_BATCH = 2; +message GetOnlineFeaturesResponseMetadata { + FeatureList feature_names = 1; +} + +enum FieldStatus { + // Status is unset for this field. + INVALID = 0; + + // Field value is present for this field and age is within max age. + PRESENT = 1; + + // Values could be found for entity key and age is within max age, but + // this field value is assigned a value on ingestion into feast. + NULL_VALUE = 2; + + // Entity key did not return any values as they do not exist in Feast. + // This could suggest that the feature values have not yet been ingested + // into feast or the ingestion failed. + NOT_FOUND = 3; + + // Values could be found for entity key, but field values are outside the maximum + // allowable range. + OUTSIDE_MAX_AGE = 4; } diff --git a/sdk/go/client.go b/sdk/go/client.go index 4deb0a789cc..c7251e33195 100644 --- a/sdk/go/client.go +++ b/sdk/go/client.go @@ -110,7 +110,7 @@ func (fc *GrpcClient) GetOnlineFeatures(ctx context.Context, req *OnlineFeatures if err != nil { return nil, err } - resp, err := fc.cli.GetOnlineFeaturesV2(ctx, featuresRequest) + resp, err := fc.cli.GetOnlineFeatures(ctx, featuresRequest) // collect unqiue entity refs from entity rows entityRefs := make(map[string]struct{}) diff --git a/sdk/go/client_test.go b/sdk/go/client_test.go index a94a577e84c..cb15f66654b 100644 --- a/sdk/go/client_test.go +++ b/sdk/go/client_test.go @@ -34,18 +34,25 @@ func TestGetOnlineFeatures(t *testing.T) { }, want: OnlineFeaturesResponse{ RawResponse: &serving.GetOnlineFeaturesResponse{ - FieldValues: []*serving.GetOnlineFeaturesResponse_FieldValues{ + Results: []*serving.GetOnlineFeaturesResponse_FeatureVector{ { - Fields: map[string]*types.Value{ - "driver:rating": Int64Val(1), - "driver:null_value": {}, + Values: []*types.Value{Int64Val(1)}, + Statuses: []serving.FieldStatus{ + serving.FieldStatus_PRESENT, }, - Statuses: map[string]serving.GetOnlineFeaturesResponse_FieldStatus{ - "driver:rating": serving.GetOnlineFeaturesResponse_PRESENT, - "driver:null_value": serving.GetOnlineFeaturesResponse_NULL_VALUE, + }, + { + Values: []*types.Value{{}}, + Statuses: []serving.FieldStatus{ + serving.FieldStatus_NULL_VALUE, }, }, }, + Metadata: &serving.GetOnlineFeaturesResponseMetadata{ + FeatureNames: &serving.FeatureList{ + Val: []string{"driver:rating", "driver:null_value"}, + }, + }, }, }, }, @@ -60,7 +67,7 @@ func TestGetOnlineFeatures(t *testing.T) { ctx := context.Background() rawRequest, _ := tc.req.buildRequest() resp := tc.want.RawResponse - cli.EXPECT().GetOnlineFeaturesV2(ctx, rawRequest).Return(resp, nil).Times(1) + cli.EXPECT().GetOnlineFeatures(ctx, rawRequest).Return(resp, nil).Times(1) client := &GrpcClient{ cli: cli, diff --git a/sdk/go/mocks/serving_mock.go b/sdk/go/mocks/serving_mock.go index 00d2e768ef8..038d49f5e53 100644 --- a/sdk/go/mocks/serving_mock.go +++ b/sdk/go/mocks/serving_mock.go @@ -57,21 +57,21 @@ func (mr *MockServingServiceClientMockRecorder) GetFeastServingInfo(arg0, arg1 i } // GetOnlineFeaturesV2 mocks base method -func (m *MockServingServiceClient) GetOnlineFeaturesV2(arg0 context.Context, arg1 *serving.GetOnlineFeaturesRequestV2, arg2 ...grpc.CallOption) (*serving.GetOnlineFeaturesResponse, error) { +func (m *MockServingServiceClient) GetOnlineFeatures(arg0 context.Context, arg1 *serving.GetOnlineFeaturesRequest, arg2 ...grpc.CallOption) (*serving.GetOnlineFeaturesResponse, error) { m.ctrl.T.Helper() varargs := []interface{}{arg0, arg1} for _, a := range arg2 { varargs = append(varargs, a) } - ret := m.ctrl.Call(m, "GetOnlineFeaturesV2", varargs...) + ret := m.ctrl.Call(m, "GetOnlineFeatures", varargs...) ret0, _ := ret[0].(*serving.GetOnlineFeaturesResponse) ret1, _ := ret[1].(error) return ret0, ret1 } // GetOnlineFeaturesV2 indicates an expected call of GetOnlineFeaturesV2 -func (mr *MockServingServiceClientMockRecorder) GetOnlineFeaturesV2(arg0, arg1 interface{}, arg2 ...interface{}) *gomock.Call { +func (mr *MockServingServiceClientMockRecorder) GetOnlineFeatures(arg0, arg1 interface{}, arg2 ...interface{}) *gomock.Call { mr.mock.ctrl.T.Helper() varargs := append([]interface{}{arg0, arg1}, arg2...) - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetOnlineFeaturesV2", reflect.TypeOf((*MockServingServiceClient)(nil).GetOnlineFeaturesV2), varargs...) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetOnlineFeatures", reflect.TypeOf((*MockServingServiceClient)(nil).GetOnlineFeatures), varargs...) } diff --git a/sdk/go/protos/feast/core/DataFormat.pb.go b/sdk/go/protos/feast/core/DataFormat.pb.go index 13c6cdda989..64c4ec80714 100644 --- a/sdk/go/protos/feast/core/DataFormat.pb.go +++ b/sdk/go/protos/feast/core/DataFormat.pb.go @@ -17,7 +17,7 @@ // Code generated by protoc-gen-go. DO NOT EDIT. // versions: // protoc-gen-go v1.27.1 -// protoc v3.11.2 +// protoc v3.19.4 // source: feast/core/DataFormat.proto package core diff --git a/sdk/go/protos/feast/core/DataSource.pb.go b/sdk/go/protos/feast/core/DataSource.pb.go index 8af638a834f..d0d42c66dea 100644 --- a/sdk/go/protos/feast/core/DataSource.pb.go +++ b/sdk/go/protos/feast/core/DataSource.pb.go @@ -17,7 +17,7 @@ // Code generated by protoc-gen-go. DO NOT EDIT. // versions: // protoc-gen-go v1.27.1 -// protoc v3.11.2 +// protoc v3.19.4 // source: feast/core/DataSource.proto package core @@ -38,17 +38,19 @@ const ( ) // Type of Data Source. +// Next available id: 9 type DataSource_SourceType int32 const ( - DataSource_INVALID DataSource_SourceType = 0 - DataSource_BATCH_FILE DataSource_SourceType = 1 - DataSource_BATCH_BIGQUERY DataSource_SourceType = 2 - DataSource_STREAM_KAFKA DataSource_SourceType = 3 - DataSource_STREAM_KINESIS DataSource_SourceType = 4 - DataSource_BATCH_REDSHIFT DataSource_SourceType = 5 - DataSource_CUSTOM_SOURCE DataSource_SourceType = 6 - DataSource_REQUEST_SOURCE DataSource_SourceType = 7 + DataSource_INVALID DataSource_SourceType = 0 + DataSource_BATCH_FILE DataSource_SourceType = 1 + DataSource_BATCH_SNOWFLAKE DataSource_SourceType = 8 + DataSource_BATCH_BIGQUERY DataSource_SourceType = 2 + DataSource_BATCH_REDSHIFT DataSource_SourceType = 5 + DataSource_STREAM_KAFKA DataSource_SourceType = 3 + DataSource_STREAM_KINESIS DataSource_SourceType = 4 + DataSource_CUSTOM_SOURCE DataSource_SourceType = 6 + DataSource_REQUEST_SOURCE DataSource_SourceType = 7 ) // Enum value maps for DataSource_SourceType. @@ -56,22 +58,24 @@ var ( DataSource_SourceType_name = map[int32]string{ 0: "INVALID", 1: "BATCH_FILE", + 8: "BATCH_SNOWFLAKE", 2: "BATCH_BIGQUERY", + 5: "BATCH_REDSHIFT", 3: "STREAM_KAFKA", 4: "STREAM_KINESIS", - 5: "BATCH_REDSHIFT", 6: "CUSTOM_SOURCE", 7: "REQUEST_SOURCE", } DataSource_SourceType_value = map[string]int32{ - "INVALID": 0, - "BATCH_FILE": 1, - "BATCH_BIGQUERY": 2, - "STREAM_KAFKA": 3, - "STREAM_KINESIS": 4, - "BATCH_REDSHIFT": 5, - "CUSTOM_SOURCE": 6, - "REQUEST_SOURCE": 7, + "INVALID": 0, + "BATCH_FILE": 1, + "BATCH_SNOWFLAKE": 8, + "BATCH_BIGQUERY": 2, + "BATCH_REDSHIFT": 5, + "STREAM_KAFKA": 3, + "STREAM_KINESIS": 4, + "CUSTOM_SOURCE": 6, + "REQUEST_SOURCE": 7, } ) @@ -132,6 +136,7 @@ type DataSource struct { // *DataSource_RedshiftOptions_ // *DataSource_RequestDataOptions_ // *DataSource_CustomOptions + // *DataSource_SnowflakeOptions_ Options isDataSource_Options `protobuf_oneof:"options"` } @@ -265,6 +270,13 @@ func (x *DataSource) GetCustomOptions() *DataSource_CustomSourceOptions { return nil } +func (x *DataSource) GetSnowflakeOptions() *DataSource_SnowflakeOptions { + if x, ok := x.GetOptions().(*DataSource_SnowflakeOptions_); ok { + return x.SnowflakeOptions + } + return nil +} + type isDataSource_Options interface { isDataSource_Options() } @@ -297,6 +309,10 @@ type DataSource_CustomOptions struct { CustomOptions *DataSource_CustomSourceOptions `protobuf:"bytes,16,opt,name=custom_options,json=customOptions,proto3,oneof"` } +type DataSource_SnowflakeOptions_ struct { + SnowflakeOptions *DataSource_SnowflakeOptions `protobuf:"bytes,19,opt,name=snowflake_options,json=snowflakeOptions,proto3,oneof"` +} + func (*DataSource_FileOptions_) isDataSource_Options() {} func (*DataSource_BigqueryOptions) isDataSource_Options() {} @@ -311,6 +327,8 @@ func (*DataSource_RequestDataOptions_) isDataSource_Options() {} func (*DataSource_CustomOptions) isDataSource_Options() {} +func (*DataSource_SnowflakeOptions_) isDataSource_Options() {} + // Defines options for DataSource that sources features from a file type DataSource_FileOptions struct { state protoimpl.MessageState @@ -646,6 +664,83 @@ func (x *DataSource_RedshiftOptions) GetSchema() string { return "" } +// Defines options for DataSource that sources features from a Snowflake Query +type DataSource_SnowflakeOptions struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + // Snowflake table name + Table string `protobuf:"bytes,1,opt,name=table,proto3" json:"table,omitempty"` + // SQL query that returns a table containing feature data. Must contain an event_timestamp column, and respective + // entity columns + Query string `protobuf:"bytes,2,opt,name=query,proto3" json:"query,omitempty"` + // Snowflake schema name + Schema string `protobuf:"bytes,3,opt,name=schema,proto3" json:"schema,omitempty"` + // Snowflake schema name + Database string `protobuf:"bytes,4,opt,name=database,proto3" json:"database,omitempty"` +} + +func (x *DataSource_SnowflakeOptions) Reset() { + *x = DataSource_SnowflakeOptions{} + if protoimpl.UnsafeEnabled { + mi := &file_feast_core_DataSource_proto_msgTypes[7] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *DataSource_SnowflakeOptions) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*DataSource_SnowflakeOptions) ProtoMessage() {} + +func (x *DataSource_SnowflakeOptions) ProtoReflect() protoreflect.Message { + mi := &file_feast_core_DataSource_proto_msgTypes[7] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use DataSource_SnowflakeOptions.ProtoReflect.Descriptor instead. +func (*DataSource_SnowflakeOptions) Descriptor() ([]byte, []int) { + return file_feast_core_DataSource_proto_rawDescGZIP(), []int{0, 6} +} + +func (x *DataSource_SnowflakeOptions) GetTable() string { + if x != nil { + return x.Table + } + return "" +} + +func (x *DataSource_SnowflakeOptions) GetQuery() string { + if x != nil { + return x.Query + } + return "" +} + +func (x *DataSource_SnowflakeOptions) GetSchema() string { + if x != nil { + return x.Schema + } + return "" +} + +func (x *DataSource_SnowflakeOptions) GetDatabase() string { + if x != nil { + return x.Database + } + return "" +} + // Defines configuration for custom third-party data sources. type DataSource_CustomSourceOptions struct { state protoimpl.MessageState @@ -660,7 +755,7 @@ type DataSource_CustomSourceOptions struct { func (x *DataSource_CustomSourceOptions) Reset() { *x = DataSource_CustomSourceOptions{} if protoimpl.UnsafeEnabled { - mi := &file_feast_core_DataSource_proto_msgTypes[7] + mi := &file_feast_core_DataSource_proto_msgTypes[8] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -673,7 +768,7 @@ func (x *DataSource_CustomSourceOptions) String() string { func (*DataSource_CustomSourceOptions) ProtoMessage() {} func (x *DataSource_CustomSourceOptions) ProtoReflect() protoreflect.Message { - mi := &file_feast_core_DataSource_proto_msgTypes[7] + mi := &file_feast_core_DataSource_proto_msgTypes[8] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -686,7 +781,7 @@ func (x *DataSource_CustomSourceOptions) ProtoReflect() protoreflect.Message { // Deprecated: Use DataSource_CustomSourceOptions.ProtoReflect.Descriptor instead. func (*DataSource_CustomSourceOptions) Descriptor() ([]byte, []int) { - return file_feast_core_DataSource_proto_rawDescGZIP(), []int{0, 6} + return file_feast_core_DataSource_proto_rawDescGZIP(), []int{0, 7} } func (x *DataSource_CustomSourceOptions) GetConfiguration() []byte { @@ -711,7 +806,7 @@ type DataSource_RequestDataOptions struct { func (x *DataSource_RequestDataOptions) Reset() { *x = DataSource_RequestDataOptions{} if protoimpl.UnsafeEnabled { - mi := &file_feast_core_DataSource_proto_msgTypes[8] + mi := &file_feast_core_DataSource_proto_msgTypes[9] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -724,7 +819,7 @@ func (x *DataSource_RequestDataOptions) String() string { func (*DataSource_RequestDataOptions) ProtoMessage() {} func (x *DataSource_RequestDataOptions) ProtoReflect() protoreflect.Message { - mi := &file_feast_core_DataSource_proto_msgTypes[8] + mi := &file_feast_core_DataSource_proto_msgTypes[9] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -737,7 +832,7 @@ func (x *DataSource_RequestDataOptions) ProtoReflect() protoreflect.Message { // Deprecated: Use DataSource_RequestDataOptions.ProtoReflect.Descriptor instead. func (*DataSource_RequestDataOptions) Descriptor() ([]byte, []int) { - return file_feast_core_DataSource_proto_rawDescGZIP(), []int{0, 7} + return file_feast_core_DataSource_proto_rawDescGZIP(), []int{0, 8} } func (x *DataSource_RequestDataOptions) GetName() string { @@ -763,7 +858,7 @@ var file_feast_core_DataSource_proto_rawDesc = []byte{ 0x2f, 0x63, 0x6f, 0x72, 0x65, 0x2f, 0x44, 0x61, 0x74, 0x61, 0x46, 0x6f, 0x72, 0x6d, 0x61, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x1a, 0x17, 0x66, 0x65, 0x61, 0x73, 0x74, 0x2f, 0x74, 0x79, 0x70, 0x65, 0x73, 0x2f, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x22, - 0x85, 0x10, 0x0a, 0x0a, 0x44, 0x61, 0x74, 0x61, 0x53, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x12, 0x35, + 0xe6, 0x11, 0x0a, 0x0a, 0x44, 0x61, 0x74, 0x61, 0x53, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x12, 0x35, 0x0a, 0x04, 0x74, 0x79, 0x70, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x21, 0x2e, 0x66, 0x65, 0x61, 0x73, 0x74, 0x2e, 0x63, 0x6f, 0x72, 0x65, 0x2e, 0x44, 0x61, 0x74, 0x61, 0x53, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x2e, 0x53, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x54, 0x79, 0x70, 0x65, 0x52, @@ -822,82 +917,96 @@ var file_feast_core_DataSource_proto_rawDesc = []byte{ 0x72, 0x65, 0x2e, 0x44, 0x61, 0x74, 0x61, 0x53, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x2e, 0x43, 0x75, 0x73, 0x74, 0x6f, 0x6d, 0x53, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x48, 0x00, 0x52, 0x0d, 0x63, 0x75, 0x73, 0x74, 0x6f, 0x6d, 0x4f, 0x70, 0x74, 0x69, 0x6f, - 0x6e, 0x73, 0x1a, 0x3f, 0x0a, 0x11, 0x46, 0x69, 0x65, 0x6c, 0x64, 0x4d, 0x61, 0x70, 0x70, 0x69, - 0x6e, 0x67, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, - 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, - 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, - 0x02, 0x38, 0x01, 0x1a, 0x93, 0x01, 0x0a, 0x0b, 0x46, 0x69, 0x6c, 0x65, 0x4f, 0x70, 0x74, 0x69, - 0x6f, 0x6e, 0x73, 0x12, 0x37, 0x0a, 0x0b, 0x66, 0x69, 0x6c, 0x65, 0x5f, 0x66, 0x6f, 0x72, 0x6d, - 0x61, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x16, 0x2e, 0x66, 0x65, 0x61, 0x73, 0x74, - 0x2e, 0x63, 0x6f, 0x72, 0x65, 0x2e, 0x46, 0x69, 0x6c, 0x65, 0x46, 0x6f, 0x72, 0x6d, 0x61, 0x74, - 0x52, 0x0a, 0x66, 0x69, 0x6c, 0x65, 0x46, 0x6f, 0x72, 0x6d, 0x61, 0x74, 0x12, 0x19, 0x0a, 0x08, - 0x66, 0x69, 0x6c, 0x65, 0x5f, 0x75, 0x72, 0x6c, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, - 0x66, 0x69, 0x6c, 0x65, 0x55, 0x72, 0x6c, 0x12, 0x30, 0x0a, 0x14, 0x73, 0x33, 0x5f, 0x65, 0x6e, - 0x64, 0x70, 0x6f, 0x69, 0x6e, 0x74, 0x5f, 0x6f, 0x76, 0x65, 0x72, 0x72, 0x69, 0x64, 0x65, 0x18, - 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x12, 0x73, 0x33, 0x45, 0x6e, 0x64, 0x70, 0x6f, 0x69, 0x6e, - 0x74, 0x4f, 0x76, 0x65, 0x72, 0x72, 0x69, 0x64, 0x65, 0x1a, 0x44, 0x0a, 0x0f, 0x42, 0x69, 0x67, - 0x51, 0x75, 0x65, 0x72, 0x79, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x1b, 0x0a, 0x09, - 0x74, 0x61, 0x62, 0x6c, 0x65, 0x5f, 0x72, 0x65, 0x66, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, - 0x08, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x52, 0x65, 0x66, 0x12, 0x14, 0x0a, 0x05, 0x71, 0x75, 0x65, - 0x72, 0x79, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x71, 0x75, 0x65, 0x72, 0x79, 0x1a, - 0x92, 0x01, 0x0a, 0x0c, 0x4b, 0x61, 0x66, 0x6b, 0x61, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, - 0x12, 0x2b, 0x0a, 0x11, 0x62, 0x6f, 0x6f, 0x74, 0x73, 0x74, 0x72, 0x61, 0x70, 0x5f, 0x73, 0x65, - 0x72, 0x76, 0x65, 0x72, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x10, 0x62, 0x6f, 0x6f, - 0x74, 0x73, 0x74, 0x72, 0x61, 0x70, 0x53, 0x65, 0x72, 0x76, 0x65, 0x72, 0x73, 0x12, 0x14, 0x0a, - 0x05, 0x74, 0x6f, 0x70, 0x69, 0x63, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x74, 0x6f, - 0x70, 0x69, 0x63, 0x12, 0x3f, 0x0a, 0x0e, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x5f, 0x66, - 0x6f, 0x72, 0x6d, 0x61, 0x74, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x18, 0x2e, 0x66, 0x65, - 0x61, 0x73, 0x74, 0x2e, 0x63, 0x6f, 0x72, 0x65, 0x2e, 0x53, 0x74, 0x72, 0x65, 0x61, 0x6d, 0x46, - 0x6f, 0x72, 0x6d, 0x61, 0x74, 0x52, 0x0d, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x46, 0x6f, - 0x72, 0x6d, 0x61, 0x74, 0x1a, 0x88, 0x01, 0x0a, 0x0e, 0x4b, 0x69, 0x6e, 0x65, 0x73, 0x69, 0x73, - 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x16, 0x0a, 0x06, 0x72, 0x65, 0x67, 0x69, 0x6f, - 0x6e, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x72, 0x65, 0x67, 0x69, 0x6f, 0x6e, 0x12, - 0x1f, 0x0a, 0x0b, 0x73, 0x74, 0x72, 0x65, 0x61, 0x6d, 0x5f, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x02, - 0x20, 0x01, 0x28, 0x09, 0x52, 0x0a, 0x73, 0x74, 0x72, 0x65, 0x61, 0x6d, 0x4e, 0x61, 0x6d, 0x65, - 0x12, 0x3d, 0x0a, 0x0d, 0x72, 0x65, 0x63, 0x6f, 0x72, 0x64, 0x5f, 0x66, 0x6f, 0x72, 0x6d, 0x61, - 0x74, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x18, 0x2e, 0x66, 0x65, 0x61, 0x73, 0x74, 0x2e, - 0x63, 0x6f, 0x72, 0x65, 0x2e, 0x53, 0x74, 0x72, 0x65, 0x61, 0x6d, 0x46, 0x6f, 0x72, 0x6d, 0x61, - 0x74, 0x52, 0x0c, 0x72, 0x65, 0x63, 0x6f, 0x72, 0x64, 0x46, 0x6f, 0x72, 0x6d, 0x61, 0x74, 0x1a, - 0x55, 0x0a, 0x0f, 0x52, 0x65, 0x64, 0x73, 0x68, 0x69, 0x66, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, + 0x6e, 0x73, 0x12, 0x56, 0x0a, 0x11, 0x73, 0x6e, 0x6f, 0x77, 0x66, 0x6c, 0x61, 0x6b, 0x65, 0x5f, + 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x13, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x27, 0x2e, + 0x66, 0x65, 0x61, 0x73, 0x74, 0x2e, 0x63, 0x6f, 0x72, 0x65, 0x2e, 0x44, 0x61, 0x74, 0x61, 0x53, + 0x6f, 0x75, 0x72, 0x63, 0x65, 0x2e, 0x53, 0x6e, 0x6f, 0x77, 0x66, 0x6c, 0x61, 0x6b, 0x65, 0x4f, + 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x48, 0x00, 0x52, 0x10, 0x73, 0x6e, 0x6f, 0x77, 0x66, 0x6c, + 0x61, 0x6b, 0x65, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x3f, 0x0a, 0x11, 0x46, 0x69, + 0x65, 0x6c, 0x64, 0x4d, 0x61, 0x70, 0x70, 0x69, 0x6e, 0x67, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, + 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, + 0x79, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, + 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x1a, 0x93, 0x01, 0x0a, 0x0b, + 0x46, 0x69, 0x6c, 0x65, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x37, 0x0a, 0x0b, 0x66, + 0x69, 0x6c, 0x65, 0x5f, 0x66, 0x6f, 0x72, 0x6d, 0x61, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, + 0x32, 0x16, 0x2e, 0x66, 0x65, 0x61, 0x73, 0x74, 0x2e, 0x63, 0x6f, 0x72, 0x65, 0x2e, 0x46, 0x69, + 0x6c, 0x65, 0x46, 0x6f, 0x72, 0x6d, 0x61, 0x74, 0x52, 0x0a, 0x66, 0x69, 0x6c, 0x65, 0x46, 0x6f, + 0x72, 0x6d, 0x61, 0x74, 0x12, 0x19, 0x0a, 0x08, 0x66, 0x69, 0x6c, 0x65, 0x5f, 0x75, 0x72, 0x6c, + 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x66, 0x69, 0x6c, 0x65, 0x55, 0x72, 0x6c, 0x12, + 0x30, 0x0a, 0x14, 0x73, 0x33, 0x5f, 0x65, 0x6e, 0x64, 0x70, 0x6f, 0x69, 0x6e, 0x74, 0x5f, 0x6f, + 0x76, 0x65, 0x72, 0x72, 0x69, 0x64, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x12, 0x73, + 0x33, 0x45, 0x6e, 0x64, 0x70, 0x6f, 0x69, 0x6e, 0x74, 0x4f, 0x76, 0x65, 0x72, 0x72, 0x69, 0x64, + 0x65, 0x1a, 0x44, 0x0a, 0x0f, 0x42, 0x69, 0x67, 0x51, 0x75, 0x65, 0x72, 0x79, 0x4f, 0x70, 0x74, + 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x1b, 0x0a, 0x09, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x5f, 0x72, 0x65, + 0x66, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x52, 0x65, + 0x66, 0x12, 0x14, 0x0a, 0x05, 0x71, 0x75, 0x65, 0x72, 0x79, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, + 0x52, 0x05, 0x71, 0x75, 0x65, 0x72, 0x79, 0x1a, 0x92, 0x01, 0x0a, 0x0c, 0x4b, 0x61, 0x66, 0x6b, + 0x61, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x2b, 0x0a, 0x11, 0x62, 0x6f, 0x6f, 0x74, + 0x73, 0x74, 0x72, 0x61, 0x70, 0x5f, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x73, 0x18, 0x01, 0x20, + 0x01, 0x28, 0x09, 0x52, 0x10, 0x62, 0x6f, 0x6f, 0x74, 0x73, 0x74, 0x72, 0x61, 0x70, 0x53, 0x65, + 0x72, 0x76, 0x65, 0x72, 0x73, 0x12, 0x14, 0x0a, 0x05, 0x74, 0x6f, 0x70, 0x69, 0x63, 0x18, 0x02, + 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x74, 0x6f, 0x70, 0x69, 0x63, 0x12, 0x3f, 0x0a, 0x0e, 0x6d, + 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x5f, 0x66, 0x6f, 0x72, 0x6d, 0x61, 0x74, 0x18, 0x03, 0x20, + 0x01, 0x28, 0x0b, 0x32, 0x18, 0x2e, 0x66, 0x65, 0x61, 0x73, 0x74, 0x2e, 0x63, 0x6f, 0x72, 0x65, + 0x2e, 0x53, 0x74, 0x72, 0x65, 0x61, 0x6d, 0x46, 0x6f, 0x72, 0x6d, 0x61, 0x74, 0x52, 0x0d, 0x6d, + 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x46, 0x6f, 0x72, 0x6d, 0x61, 0x74, 0x1a, 0x88, 0x01, 0x0a, + 0x0e, 0x4b, 0x69, 0x6e, 0x65, 0x73, 0x69, 0x73, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, + 0x16, 0x0a, 0x06, 0x72, 0x65, 0x67, 0x69, 0x6f, 0x6e, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, + 0x06, 0x72, 0x65, 0x67, 0x69, 0x6f, 0x6e, 0x12, 0x1f, 0x0a, 0x0b, 0x73, 0x74, 0x72, 0x65, 0x61, + 0x6d, 0x5f, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0a, 0x73, 0x74, + 0x72, 0x65, 0x61, 0x6d, 0x4e, 0x61, 0x6d, 0x65, 0x12, 0x3d, 0x0a, 0x0d, 0x72, 0x65, 0x63, 0x6f, + 0x72, 0x64, 0x5f, 0x66, 0x6f, 0x72, 0x6d, 0x61, 0x74, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0b, 0x32, + 0x18, 0x2e, 0x66, 0x65, 0x61, 0x73, 0x74, 0x2e, 0x63, 0x6f, 0x72, 0x65, 0x2e, 0x53, 0x74, 0x72, + 0x65, 0x61, 0x6d, 0x46, 0x6f, 0x72, 0x6d, 0x61, 0x74, 0x52, 0x0c, 0x72, 0x65, 0x63, 0x6f, 0x72, + 0x64, 0x46, 0x6f, 0x72, 0x6d, 0x61, 0x74, 0x1a, 0x55, 0x0a, 0x0f, 0x52, 0x65, 0x64, 0x73, 0x68, + 0x69, 0x66, 0x74, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x14, 0x0a, 0x05, 0x74, 0x61, + 0x62, 0x6c, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x74, 0x61, 0x62, 0x6c, 0x65, + 0x12, 0x14, 0x0a, 0x05, 0x71, 0x75, 0x65, 0x72, 0x79, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, + 0x05, 0x71, 0x75, 0x65, 0x72, 0x79, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, + 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x1a, 0x72, + 0x0a, 0x10, 0x53, 0x6e, 0x6f, 0x77, 0x66, 0x6c, 0x61, 0x6b, 0x65, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x14, 0x0a, 0x05, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x12, 0x14, 0x0a, 0x05, 0x71, 0x75, 0x65, 0x72, 0x79, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x71, 0x75, 0x65, 0x72, 0x79, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, - 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x1a, 0x3b, 0x0a, 0x13, 0x43, 0x75, 0x73, 0x74, 0x6f, 0x6d, - 0x53, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x24, 0x0a, - 0x0d, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x01, - 0x20, 0x01, 0x28, 0x0c, 0x52, 0x0d, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x75, 0x72, 0x61, 0x74, - 0x69, 0x6f, 0x6e, 0x1a, 0xcf, 0x01, 0x0a, 0x12, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x44, - 0x61, 0x74, 0x61, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x6e, 0x61, - 0x6d, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x12, 0x4d, - 0x0a, 0x06, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x35, - 0x2e, 0x66, 0x65, 0x61, 0x73, 0x74, 0x2e, 0x63, 0x6f, 0x72, 0x65, 0x2e, 0x44, 0x61, 0x74, 0x61, - 0x53, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x2e, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x44, 0x61, - 0x74, 0x61, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x2e, 0x53, 0x63, 0x68, 0x65, 0x6d, 0x61, - 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x06, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x1a, 0x56, 0x0a, - 0x0b, 0x53, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, - 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x31, - 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x1b, 0x2e, - 0x66, 0x65, 0x61, 0x73, 0x74, 0x2e, 0x74, 0x79, 0x70, 0x65, 0x73, 0x2e, 0x56, 0x61, 0x6c, 0x75, - 0x65, 0x54, 0x79, 0x70, 0x65, 0x2e, 0x45, 0x6e, 0x75, 0x6d, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, - 0x65, 0x3a, 0x02, 0x38, 0x01, 0x22, 0x9e, 0x01, 0x0a, 0x0a, 0x53, 0x6f, 0x75, 0x72, 0x63, 0x65, - 0x54, 0x79, 0x70, 0x65, 0x12, 0x0b, 0x0a, 0x07, 0x49, 0x4e, 0x56, 0x41, 0x4c, 0x49, 0x44, 0x10, - 0x00, 0x12, 0x0e, 0x0a, 0x0a, 0x42, 0x41, 0x54, 0x43, 0x48, 0x5f, 0x46, 0x49, 0x4c, 0x45, 0x10, - 0x01, 0x12, 0x12, 0x0a, 0x0e, 0x42, 0x41, 0x54, 0x43, 0x48, 0x5f, 0x42, 0x49, 0x47, 0x51, 0x55, - 0x45, 0x52, 0x59, 0x10, 0x02, 0x12, 0x10, 0x0a, 0x0c, 0x53, 0x54, 0x52, 0x45, 0x41, 0x4d, 0x5f, - 0x4b, 0x41, 0x46, 0x4b, 0x41, 0x10, 0x03, 0x12, 0x12, 0x0a, 0x0e, 0x53, 0x54, 0x52, 0x45, 0x41, - 0x4d, 0x5f, 0x4b, 0x49, 0x4e, 0x45, 0x53, 0x49, 0x53, 0x10, 0x04, 0x12, 0x12, 0x0a, 0x0e, 0x42, - 0x41, 0x54, 0x43, 0x48, 0x5f, 0x52, 0x45, 0x44, 0x53, 0x48, 0x49, 0x46, 0x54, 0x10, 0x05, 0x12, - 0x11, 0x0a, 0x0d, 0x43, 0x55, 0x53, 0x54, 0x4f, 0x4d, 0x5f, 0x53, 0x4f, 0x55, 0x52, 0x43, 0x45, - 0x10, 0x06, 0x12, 0x12, 0x0a, 0x0e, 0x52, 0x45, 0x51, 0x55, 0x45, 0x53, 0x54, 0x5f, 0x53, 0x4f, - 0x55, 0x52, 0x43, 0x45, 0x10, 0x07, 0x42, 0x09, 0x0a, 0x07, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, - 0x73, 0x4a, 0x04, 0x08, 0x06, 0x10, 0x0b, 0x42, 0x58, 0x0a, 0x10, 0x66, 0x65, 0x61, 0x73, 0x74, - 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x63, 0x6f, 0x72, 0x65, 0x42, 0x0f, 0x44, 0x61, 0x74, - 0x61, 0x53, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x50, 0x72, 0x6f, 0x74, 0x6f, 0x5a, 0x33, 0x67, 0x69, - 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x66, 0x65, 0x61, 0x73, 0x74, 0x2d, 0x64, - 0x65, 0x76, 0x2f, 0x66, 0x65, 0x61, 0x73, 0x74, 0x2f, 0x73, 0x64, 0x6b, 0x2f, 0x67, 0x6f, 0x2f, - 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x73, 0x2f, 0x66, 0x65, 0x61, 0x73, 0x74, 0x2f, 0x63, 0x6f, 0x72, - 0x65, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, + 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x12, 0x1a, 0x0a, 0x08, 0x64, 0x61, 0x74, 0x61, 0x62, 0x61, + 0x73, 0x65, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x64, 0x61, 0x74, 0x61, 0x62, 0x61, + 0x73, 0x65, 0x1a, 0x3b, 0x0a, 0x13, 0x43, 0x75, 0x73, 0x74, 0x6f, 0x6d, 0x53, 0x6f, 0x75, 0x72, + 0x63, 0x65, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x24, 0x0a, 0x0d, 0x63, 0x6f, 0x6e, + 0x66, 0x69, 0x67, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0c, + 0x52, 0x0d, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x1a, + 0xcf, 0x01, 0x0a, 0x12, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x44, 0x61, 0x74, 0x61, 0x4f, + 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x01, + 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x12, 0x4d, 0x0a, 0x06, 0x73, 0x63, + 0x68, 0x65, 0x6d, 0x61, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x35, 0x2e, 0x66, 0x65, 0x61, + 0x73, 0x74, 0x2e, 0x63, 0x6f, 0x72, 0x65, 0x2e, 0x44, 0x61, 0x74, 0x61, 0x53, 0x6f, 0x75, 0x72, + 0x63, 0x65, 0x2e, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x44, 0x61, 0x74, 0x61, 0x4f, 0x70, + 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x2e, 0x53, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x45, 0x6e, 0x74, 0x72, + 0x79, 0x52, 0x06, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x1a, 0x56, 0x0a, 0x0b, 0x53, 0x63, 0x68, + 0x65, 0x6d, 0x61, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, + 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x31, 0x0a, 0x05, 0x76, 0x61, + 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x1b, 0x2e, 0x66, 0x65, 0x61, 0x73, + 0x74, 0x2e, 0x74, 0x79, 0x70, 0x65, 0x73, 0x2e, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x54, 0x79, 0x70, + 0x65, 0x2e, 0x45, 0x6e, 0x75, 0x6d, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, + 0x01, 0x22, 0xb3, 0x01, 0x0a, 0x0a, 0x53, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x54, 0x79, 0x70, 0x65, + 0x12, 0x0b, 0x0a, 0x07, 0x49, 0x4e, 0x56, 0x41, 0x4c, 0x49, 0x44, 0x10, 0x00, 0x12, 0x0e, 0x0a, + 0x0a, 0x42, 0x41, 0x54, 0x43, 0x48, 0x5f, 0x46, 0x49, 0x4c, 0x45, 0x10, 0x01, 0x12, 0x13, 0x0a, + 0x0f, 0x42, 0x41, 0x54, 0x43, 0x48, 0x5f, 0x53, 0x4e, 0x4f, 0x57, 0x46, 0x4c, 0x41, 0x4b, 0x45, + 0x10, 0x08, 0x12, 0x12, 0x0a, 0x0e, 0x42, 0x41, 0x54, 0x43, 0x48, 0x5f, 0x42, 0x49, 0x47, 0x51, + 0x55, 0x45, 0x52, 0x59, 0x10, 0x02, 0x12, 0x12, 0x0a, 0x0e, 0x42, 0x41, 0x54, 0x43, 0x48, 0x5f, + 0x52, 0x45, 0x44, 0x53, 0x48, 0x49, 0x46, 0x54, 0x10, 0x05, 0x12, 0x10, 0x0a, 0x0c, 0x53, 0x54, + 0x52, 0x45, 0x41, 0x4d, 0x5f, 0x4b, 0x41, 0x46, 0x4b, 0x41, 0x10, 0x03, 0x12, 0x12, 0x0a, 0x0e, + 0x53, 0x54, 0x52, 0x45, 0x41, 0x4d, 0x5f, 0x4b, 0x49, 0x4e, 0x45, 0x53, 0x49, 0x53, 0x10, 0x04, + 0x12, 0x11, 0x0a, 0x0d, 0x43, 0x55, 0x53, 0x54, 0x4f, 0x4d, 0x5f, 0x53, 0x4f, 0x55, 0x52, 0x43, + 0x45, 0x10, 0x06, 0x12, 0x12, 0x0a, 0x0e, 0x52, 0x45, 0x51, 0x55, 0x45, 0x53, 0x54, 0x5f, 0x53, + 0x4f, 0x55, 0x52, 0x43, 0x45, 0x10, 0x07, 0x42, 0x09, 0x0a, 0x07, 0x6f, 0x70, 0x74, 0x69, 0x6f, + 0x6e, 0x73, 0x4a, 0x04, 0x08, 0x06, 0x10, 0x0b, 0x42, 0x58, 0x0a, 0x10, 0x66, 0x65, 0x61, 0x73, + 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x63, 0x6f, 0x72, 0x65, 0x42, 0x0f, 0x44, 0x61, + 0x74, 0x61, 0x53, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x50, 0x72, 0x6f, 0x74, 0x6f, 0x5a, 0x33, 0x67, + 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x66, 0x65, 0x61, 0x73, 0x74, 0x2d, + 0x64, 0x65, 0x76, 0x2f, 0x66, 0x65, 0x61, 0x73, 0x74, 0x2f, 0x73, 0x64, 0x6b, 0x2f, 0x67, 0x6f, + 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x73, 0x2f, 0x66, 0x65, 0x61, 0x73, 0x74, 0x2f, 0x63, 0x6f, + 0x72, 0x65, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, } var ( @@ -913,7 +1022,7 @@ func file_feast_core_DataSource_proto_rawDescGZIP() []byte { } var file_feast_core_DataSource_proto_enumTypes = make([]protoimpl.EnumInfo, 1) -var file_feast_core_DataSource_proto_msgTypes = make([]protoimpl.MessageInfo, 10) +var file_feast_core_DataSource_proto_msgTypes = make([]protoimpl.MessageInfo, 11) var file_feast_core_DataSource_proto_goTypes = []interface{}{ (DataSource_SourceType)(0), // 0: feast.core.DataSource.SourceType (*DataSource)(nil), // 1: feast.core.DataSource @@ -923,12 +1032,13 @@ var file_feast_core_DataSource_proto_goTypes = []interface{}{ (*DataSource_KafkaOptions)(nil), // 5: feast.core.DataSource.KafkaOptions (*DataSource_KinesisOptions)(nil), // 6: feast.core.DataSource.KinesisOptions (*DataSource_RedshiftOptions)(nil), // 7: feast.core.DataSource.RedshiftOptions - (*DataSource_CustomSourceOptions)(nil), // 8: feast.core.DataSource.CustomSourceOptions - (*DataSource_RequestDataOptions)(nil), // 9: feast.core.DataSource.RequestDataOptions - nil, // 10: feast.core.DataSource.RequestDataOptions.SchemaEntry - (*FileFormat)(nil), // 11: feast.core.FileFormat - (*StreamFormat)(nil), // 12: feast.core.StreamFormat - (types.ValueType_Enum)(0), // 13: feast.types.ValueType.Enum + (*DataSource_SnowflakeOptions)(nil), // 8: feast.core.DataSource.SnowflakeOptions + (*DataSource_CustomSourceOptions)(nil), // 9: feast.core.DataSource.CustomSourceOptions + (*DataSource_RequestDataOptions)(nil), // 10: feast.core.DataSource.RequestDataOptions + nil, // 11: feast.core.DataSource.RequestDataOptions.SchemaEntry + (*FileFormat)(nil), // 12: feast.core.FileFormat + (*StreamFormat)(nil), // 13: feast.core.StreamFormat + (types.ValueType_Enum)(0), // 14: feast.types.ValueType.Enum } var file_feast_core_DataSource_proto_depIdxs = []int32{ 0, // 0: feast.core.DataSource.type:type_name -> feast.core.DataSource.SourceType @@ -938,18 +1048,19 @@ var file_feast_core_DataSource_proto_depIdxs = []int32{ 5, // 4: feast.core.DataSource.kafka_options:type_name -> feast.core.DataSource.KafkaOptions 6, // 5: feast.core.DataSource.kinesis_options:type_name -> feast.core.DataSource.KinesisOptions 7, // 6: feast.core.DataSource.redshift_options:type_name -> feast.core.DataSource.RedshiftOptions - 9, // 7: feast.core.DataSource.request_data_options:type_name -> feast.core.DataSource.RequestDataOptions - 8, // 8: feast.core.DataSource.custom_options:type_name -> feast.core.DataSource.CustomSourceOptions - 11, // 9: feast.core.DataSource.FileOptions.file_format:type_name -> feast.core.FileFormat - 12, // 10: feast.core.DataSource.KafkaOptions.message_format:type_name -> feast.core.StreamFormat - 12, // 11: feast.core.DataSource.KinesisOptions.record_format:type_name -> feast.core.StreamFormat - 10, // 12: feast.core.DataSource.RequestDataOptions.schema:type_name -> feast.core.DataSource.RequestDataOptions.SchemaEntry - 13, // 13: feast.core.DataSource.RequestDataOptions.SchemaEntry.value:type_name -> feast.types.ValueType.Enum - 14, // [14:14] is the sub-list for method output_type - 14, // [14:14] is the sub-list for method input_type - 14, // [14:14] is the sub-list for extension type_name - 14, // [14:14] is the sub-list for extension extendee - 0, // [0:14] is the sub-list for field type_name + 10, // 7: feast.core.DataSource.request_data_options:type_name -> feast.core.DataSource.RequestDataOptions + 9, // 8: feast.core.DataSource.custom_options:type_name -> feast.core.DataSource.CustomSourceOptions + 8, // 9: feast.core.DataSource.snowflake_options:type_name -> feast.core.DataSource.SnowflakeOptions + 12, // 10: feast.core.DataSource.FileOptions.file_format:type_name -> feast.core.FileFormat + 13, // 11: feast.core.DataSource.KafkaOptions.message_format:type_name -> feast.core.StreamFormat + 13, // 12: feast.core.DataSource.KinesisOptions.record_format:type_name -> feast.core.StreamFormat + 11, // 13: feast.core.DataSource.RequestDataOptions.schema:type_name -> feast.core.DataSource.RequestDataOptions.SchemaEntry + 14, // 14: feast.core.DataSource.RequestDataOptions.SchemaEntry.value:type_name -> feast.types.ValueType.Enum + 15, // [15:15] is the sub-list for method output_type + 15, // [15:15] is the sub-list for method input_type + 15, // [15:15] is the sub-list for extension type_name + 15, // [15:15] is the sub-list for extension extendee + 0, // [0:15] is the sub-list for field type_name } func init() { file_feast_core_DataSource_proto_init() } @@ -1032,7 +1143,7 @@ func file_feast_core_DataSource_proto_init() { } } file_feast_core_DataSource_proto_msgTypes[7].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*DataSource_CustomSourceOptions); i { + switch v := v.(*DataSource_SnowflakeOptions); i { case 0: return &v.state case 1: @@ -1044,6 +1155,18 @@ func file_feast_core_DataSource_proto_init() { } } file_feast_core_DataSource_proto_msgTypes[8].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*DataSource_CustomSourceOptions); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_feast_core_DataSource_proto_msgTypes[9].Exporter = func(v interface{}, i int) interface{} { switch v := v.(*DataSource_RequestDataOptions); i { case 0: return &v.state @@ -1064,6 +1187,7 @@ func file_feast_core_DataSource_proto_init() { (*DataSource_RedshiftOptions_)(nil), (*DataSource_RequestDataOptions_)(nil), (*DataSource_CustomOptions)(nil), + (*DataSource_SnowflakeOptions_)(nil), } type x struct{} out := protoimpl.TypeBuilder{ @@ -1071,7 +1195,7 @@ func file_feast_core_DataSource_proto_init() { GoPackagePath: reflect.TypeOf(x{}).PkgPath(), RawDescriptor: file_feast_core_DataSource_proto_rawDesc, NumEnums: 1, - NumMessages: 10, + NumMessages: 11, NumExtensions: 0, NumServices: 0, }, diff --git a/sdk/go/protos/feast/core/Entity.pb.go b/sdk/go/protos/feast/core/Entity.pb.go index c6d9014791e..245f724e0a2 100644 --- a/sdk/go/protos/feast/core/Entity.pb.go +++ b/sdk/go/protos/feast/core/Entity.pb.go @@ -17,7 +17,7 @@ // Code generated by protoc-gen-go. DO NOT EDIT. // versions: // protoc-gen-go v1.27.1 -// protoc v3.11.2 +// protoc v3.19.4 // source: feast/core/Entity.proto package core diff --git a/sdk/go/protos/feast/core/Feature.pb.go b/sdk/go/protos/feast/core/Feature.pb.go index 5d332dddff7..a30fafb9d37 100644 --- a/sdk/go/protos/feast/core/Feature.pb.go +++ b/sdk/go/protos/feast/core/Feature.pb.go @@ -17,7 +17,7 @@ // Code generated by protoc-gen-go. DO NOT EDIT. // versions: // protoc-gen-go v1.27.1 -// protoc v3.11.2 +// protoc v3.19.4 // source: feast/core/Feature.proto package core diff --git a/sdk/go/protos/feast/core/FeatureTable.pb.go b/sdk/go/protos/feast/core/FeatureTable.pb.go index 355ef50fb86..144d46d8e2b 100644 --- a/sdk/go/protos/feast/core/FeatureTable.pb.go +++ b/sdk/go/protos/feast/core/FeatureTable.pb.go @@ -17,7 +17,7 @@ // Code generated by protoc-gen-go. DO NOT EDIT. // versions: // protoc-gen-go v1.27.1 -// protoc v3.11.2 +// protoc v3.19.4 // source: feast/core/FeatureTable.proto package core diff --git a/sdk/go/protos/feast/core/Store.pb.go b/sdk/go/protos/feast/core/Store.pb.go index 6c46f10d24a..c56a4ede6dd 100644 --- a/sdk/go/protos/feast/core/Store.pb.go +++ b/sdk/go/protos/feast/core/Store.pb.go @@ -17,7 +17,7 @@ // Code generated by protoc-gen-go. DO NOT EDIT. // versions: // protoc-gen-go v1.27.1 -// protoc v3.11.2 +// protoc v3.19.4 // source: feast/core/Store.proto package core diff --git a/sdk/go/protos/feast/serving/ServingService.pb.go b/sdk/go/protos/feast/serving/ServingService.pb.go index 32e3461dfdf..3527c6688ea 100644 --- a/sdk/go/protos/feast/serving/ServingService.pb.go +++ b/sdk/go/protos/feast/serving/ServingService.pb.go @@ -16,7 +16,7 @@ // Code generated by protoc-gen-go. DO NOT EDIT. // versions: // protoc-gen-go v1.27.1 -// protoc v3.11.2 +// protoc v3.19.4 // source: feast/serving/ServingService.proto package serving @@ -24,7 +24,6 @@ package serving import ( context "context" types "github.com/feast-dev/feast/sdk/go/protos/feast/types" - _ "github.com/feast-dev/feast/sdk/go/protos/tensorflow_metadata/proto/v0" grpc "google.golang.org/grpc" codes "google.golang.org/grpc/codes" status "google.golang.org/grpc/status" @@ -42,88 +41,35 @@ const ( _ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20) ) -type FeastServingType int32 - -const ( - FeastServingType_FEAST_SERVING_TYPE_INVALID FeastServingType = 0 - // Online serving receives entity data directly and synchronously and will - // respond immediately. - FeastServingType_FEAST_SERVING_TYPE_ONLINE FeastServingType = 1 - // Batch serving receives entity data asynchronously and orchestrates the - // retrieval through a staging location. - FeastServingType_FEAST_SERVING_TYPE_BATCH FeastServingType = 2 -) - -// Enum value maps for FeastServingType. -var ( - FeastServingType_name = map[int32]string{ - 0: "FEAST_SERVING_TYPE_INVALID", - 1: "FEAST_SERVING_TYPE_ONLINE", - 2: "FEAST_SERVING_TYPE_BATCH", - } - FeastServingType_value = map[string]int32{ - "FEAST_SERVING_TYPE_INVALID": 0, - "FEAST_SERVING_TYPE_ONLINE": 1, - "FEAST_SERVING_TYPE_BATCH": 2, - } -) - -func (x FeastServingType) Enum() *FeastServingType { - p := new(FeastServingType) - *p = x - return p -} - -func (x FeastServingType) String() string { - return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x)) -} - -func (FeastServingType) Descriptor() protoreflect.EnumDescriptor { - return file_feast_serving_ServingService_proto_enumTypes[0].Descriptor() -} - -func (FeastServingType) Type() protoreflect.EnumType { - return &file_feast_serving_ServingService_proto_enumTypes[0] -} - -func (x FeastServingType) Number() protoreflect.EnumNumber { - return protoreflect.EnumNumber(x) -} - -// Deprecated: Use FeastServingType.Descriptor instead. -func (FeastServingType) EnumDescriptor() ([]byte, []int) { - return file_feast_serving_ServingService_proto_rawDescGZIP(), []int{0} -} - -type GetOnlineFeaturesResponse_FieldStatus int32 +type FieldStatus int32 const ( // Status is unset for this field. - GetOnlineFeaturesResponse_INVALID GetOnlineFeaturesResponse_FieldStatus = 0 + FieldStatus_INVALID FieldStatus = 0 // Field value is present for this field and age is within max age. - GetOnlineFeaturesResponse_PRESENT GetOnlineFeaturesResponse_FieldStatus = 1 + FieldStatus_PRESENT FieldStatus = 1 // Values could be found for entity key and age is within max age, but // this field value is assigned a value on ingestion into feast. - GetOnlineFeaturesResponse_NULL_VALUE GetOnlineFeaturesResponse_FieldStatus = 2 + FieldStatus_NULL_VALUE FieldStatus = 2 // Entity key did not return any values as they do not exist in Feast. // This could suggest that the feature values have not yet been ingested // into feast or the ingestion failed. - GetOnlineFeaturesResponse_NOT_FOUND GetOnlineFeaturesResponse_FieldStatus = 3 + FieldStatus_NOT_FOUND FieldStatus = 3 // Values could be found for entity key, but field values are outside the maximum // allowable range. - GetOnlineFeaturesResponse_OUTSIDE_MAX_AGE GetOnlineFeaturesResponse_FieldStatus = 4 + FieldStatus_OUTSIDE_MAX_AGE FieldStatus = 4 ) -// Enum value maps for GetOnlineFeaturesResponse_FieldStatus. +// Enum value maps for FieldStatus. var ( - GetOnlineFeaturesResponse_FieldStatus_name = map[int32]string{ + FieldStatus_name = map[int32]string{ 0: "INVALID", 1: "PRESENT", 2: "NULL_VALUE", 3: "NOT_FOUND", 4: "OUTSIDE_MAX_AGE", } - GetOnlineFeaturesResponse_FieldStatus_value = map[string]int32{ + FieldStatus_value = map[string]int32{ "INVALID": 0, "PRESENT": 1, "NULL_VALUE": 2, @@ -132,31 +78,31 @@ var ( } ) -func (x GetOnlineFeaturesResponse_FieldStatus) Enum() *GetOnlineFeaturesResponse_FieldStatus { - p := new(GetOnlineFeaturesResponse_FieldStatus) +func (x FieldStatus) Enum() *FieldStatus { + p := new(FieldStatus) *p = x return p } -func (x GetOnlineFeaturesResponse_FieldStatus) String() string { +func (x FieldStatus) String() string { return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x)) } -func (GetOnlineFeaturesResponse_FieldStatus) Descriptor() protoreflect.EnumDescriptor { - return file_feast_serving_ServingService_proto_enumTypes[1].Descriptor() +func (FieldStatus) Descriptor() protoreflect.EnumDescriptor { + return file_feast_serving_ServingService_proto_enumTypes[0].Descriptor() } -func (GetOnlineFeaturesResponse_FieldStatus) Type() protoreflect.EnumType { - return &file_feast_serving_ServingService_proto_enumTypes[1] +func (FieldStatus) Type() protoreflect.EnumType { + return &file_feast_serving_ServingService_proto_enumTypes[0] } -func (x GetOnlineFeaturesResponse_FieldStatus) Number() protoreflect.EnumNumber { +func (x FieldStatus) Number() protoreflect.EnumNumber { return protoreflect.EnumNumber(x) } -// Deprecated: Use GetOnlineFeaturesResponse_FieldStatus.Descriptor instead. -func (GetOnlineFeaturesResponse_FieldStatus) EnumDescriptor() ([]byte, []int) { - return file_feast_serving_ServingService_proto_rawDescGZIP(), []int{6, 0} +// Deprecated: Use FieldStatus.Descriptor instead. +func (FieldStatus) EnumDescriptor() ([]byte, []int) { + return file_feast_serving_ServingService_proto_rawDescGZIP(), []int{0} } type GetFeastServingInfoRequest struct { @@ -204,12 +150,6 @@ type GetFeastServingInfoResponse struct { // Feast version of this serving deployment. Version string `protobuf:"bytes,1,opt,name=version,proto3" json:"version,omitempty"` - // Type of serving deployment, either ONLINE or BATCH. Different store types support different - // feature retrieval methods. - Type FeastServingType `protobuf:"varint,2,opt,name=type,proto3,enum=feast.serving.FeastServingType" json:"type,omitempty"` - // Note: Batch specific options start from 10. - // Staging location for this serving store, if any. - JobStagingLocation string `protobuf:"bytes,10,opt,name=job_staging_location,json=jobStagingLocation,proto3" json:"job_staging_location,omitempty"` } func (x *GetFeastServingInfoResponse) Reset() { @@ -251,29 +191,15 @@ func (x *GetFeastServingInfoResponse) GetVersion() string { return "" } -func (x *GetFeastServingInfoResponse) GetType() FeastServingType { - if x != nil { - return x.Type - } - return FeastServingType_FEAST_SERVING_TYPE_INVALID -} - -func (x *GetFeastServingInfoResponse) GetJobStagingLocation() string { - if x != nil { - return x.JobStagingLocation - } - return "" -} - type FeatureReferenceV2 struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - // Name of the Feature Table to retrieve the feature from. - FeatureTable string `protobuf:"bytes,1,opt,name=feature_table,json=featureTable,proto3" json:"feature_table,omitempty"` + // Name of the Feature View to retrieve the feature from. + FeatureViewName string `protobuf:"bytes,1,opt,name=feature_view_name,json=featureViewName,proto3" json:"feature_view_name,omitempty"` // Name of the Feature to retrieve the feature from. - Name string `protobuf:"bytes,2,opt,name=name,proto3" json:"name,omitempty"` + FeatureName string `protobuf:"bytes,2,opt,name=feature_name,json=featureName,proto3" json:"feature_name,omitempty"` } func (x *FeatureReferenceV2) Reset() { @@ -308,20 +234,21 @@ func (*FeatureReferenceV2) Descriptor() ([]byte, []int) { return file_feast_serving_ServingService_proto_rawDescGZIP(), []int{2} } -func (x *FeatureReferenceV2) GetFeatureTable() string { +func (x *FeatureReferenceV2) GetFeatureViewName() string { if x != nil { - return x.FeatureTable + return x.FeatureViewName } return "" } -func (x *FeatureReferenceV2) GetName() string { +func (x *FeatureReferenceV2) GetFeatureName() string { if x != nil { - return x.Name + return x.FeatureName } return "" } +// ToDo (oleksii): remove this message (since it's not used) and move EntityRow on package level type GetOnlineFeaturesRequestV2 struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache @@ -453,6 +380,10 @@ type GetOnlineFeaturesRequest struct { // A map of entity name -> list of values Entities map[string]*types.RepeatedValue `protobuf:"bytes,3,rep,name=entities,proto3" json:"entities,omitempty" protobuf_key:"bytes,1,opt,name=key,proto3" protobuf_val:"bytes,2,opt,name=value,proto3"` FullFeatureNames bool `protobuf:"varint,4,opt,name=full_feature_names,json=fullFeatureNames,proto3" json:"full_feature_names,omitempty"` + // Context for OnDemand Feature Transformation + // (was moved to dedicated parameter to avoid unnecessary separation logic on serving side) + // A map of variable name -> list of values + RequestContext map[string]*types.RepeatedValue `protobuf:"bytes,5,rep,name=request_context,json=requestContext,proto3" json:"request_context,omitempty" protobuf_key:"bytes,1,opt,name=key,proto3" protobuf_val:"bytes,2,opt,name=value,proto3"` } func (x *GetOnlineFeaturesRequest) Reset() { @@ -522,6 +453,13 @@ func (x *GetOnlineFeaturesRequest) GetFullFeatureNames() bool { return false } +func (x *GetOnlineFeaturesRequest) GetRequestContext() map[string]*types.RepeatedValue { + if x != nil { + return x.RequestContext + } + return nil +} + type isGetOnlineFeaturesRequest_Kind interface { isGetOnlineFeaturesRequest_Kind() } @@ -543,8 +481,10 @@ type GetOnlineFeaturesResponse struct { sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - // Feature values retrieved from feast. - FieldValues []*GetOnlineFeaturesResponse_FieldValues `protobuf:"bytes,1,rep,name=field_values,json=fieldValues,proto3" json:"field_values,omitempty"` + Metadata *GetOnlineFeaturesResponseMetadata `protobuf:"bytes,1,opt,name=metadata,proto3" json:"metadata,omitempty"` + // Length of "results" array should match length of requested features. + // We also preserve the same order of features here as in metadata.feature_names + Results []*GetOnlineFeaturesResponse_FeatureVector `protobuf:"bytes,2,rep,name=results,proto3" json:"results,omitempty"` } func (x *GetOnlineFeaturesResponse) Reset() { @@ -579,9 +519,63 @@ func (*GetOnlineFeaturesResponse) Descriptor() ([]byte, []int) { return file_feast_serving_ServingService_proto_rawDescGZIP(), []int{6} } -func (x *GetOnlineFeaturesResponse) GetFieldValues() []*GetOnlineFeaturesResponse_FieldValues { +func (x *GetOnlineFeaturesResponse) GetMetadata() *GetOnlineFeaturesResponseMetadata { + if x != nil { + return x.Metadata + } + return nil +} + +func (x *GetOnlineFeaturesResponse) GetResults() []*GetOnlineFeaturesResponse_FeatureVector { if x != nil { - return x.FieldValues + return x.Results + } + return nil +} + +type GetOnlineFeaturesResponseMetadata struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + FeatureNames *FeatureList `protobuf:"bytes,1,opt,name=feature_names,json=featureNames,proto3" json:"feature_names,omitempty"` +} + +func (x *GetOnlineFeaturesResponseMetadata) Reset() { + *x = GetOnlineFeaturesResponseMetadata{} + if protoimpl.UnsafeEnabled { + mi := &file_feast_serving_ServingService_proto_msgTypes[7] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *GetOnlineFeaturesResponseMetadata) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*GetOnlineFeaturesResponseMetadata) ProtoMessage() {} + +func (x *GetOnlineFeaturesResponseMetadata) ProtoReflect() protoreflect.Message { + mi := &file_feast_serving_ServingService_proto_msgTypes[7] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use GetOnlineFeaturesResponseMetadata.ProtoReflect.Descriptor instead. +func (*GetOnlineFeaturesResponseMetadata) Descriptor() ([]byte, []int) { + return file_feast_serving_ServingService_proto_rawDescGZIP(), []int{7} +} + +func (x *GetOnlineFeaturesResponseMetadata) GetFeatureNames() *FeatureList { + if x != nil { + return x.FeatureNames } return nil } @@ -601,7 +595,7 @@ type GetOnlineFeaturesRequestV2_EntityRow struct { func (x *GetOnlineFeaturesRequestV2_EntityRow) Reset() { *x = GetOnlineFeaturesRequestV2_EntityRow{} if protoimpl.UnsafeEnabled { - mi := &file_feast_serving_ServingService_proto_msgTypes[7] + mi := &file_feast_serving_ServingService_proto_msgTypes[8] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -614,7 +608,7 @@ func (x *GetOnlineFeaturesRequestV2_EntityRow) String() string { func (*GetOnlineFeaturesRequestV2_EntityRow) ProtoMessage() {} func (x *GetOnlineFeaturesRequestV2_EntityRow) ProtoReflect() protoreflect.Message { - mi := &file_feast_serving_ServingService_proto_msgTypes[7] + mi := &file_feast_serving_ServingService_proto_msgTypes[8] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -644,35 +638,33 @@ func (x *GetOnlineFeaturesRequestV2_EntityRow) GetFields() map[string]*types.Val return nil } -type GetOnlineFeaturesResponse_FieldValues struct { +type GetOnlineFeaturesResponse_FeatureVector struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - // Map of feature or entity name to feature/entity values. - // Timestamps are not returned in this response. - Fields map[string]*types.Value `protobuf:"bytes,1,rep,name=fields,proto3" json:"fields,omitempty" protobuf_key:"bytes,1,opt,name=key,proto3" protobuf_val:"bytes,2,opt,name=value,proto3"` - // Map of feature or entity name to feature/entity statuses/metadata. - Statuses map[string]GetOnlineFeaturesResponse_FieldStatus `protobuf:"bytes,2,rep,name=statuses,proto3" json:"statuses,omitempty" protobuf_key:"bytes,1,opt,name=key,proto3" protobuf_val:"varint,2,opt,name=value,proto3,enum=feast.serving.GetOnlineFeaturesResponse_FieldStatus"` + Values []*types.Value `protobuf:"bytes,1,rep,name=values,proto3" json:"values,omitempty"` + Statuses []FieldStatus `protobuf:"varint,2,rep,packed,name=statuses,proto3,enum=feast.serving.FieldStatus" json:"statuses,omitempty"` + EventTimestamps []*timestamppb.Timestamp `protobuf:"bytes,3,rep,name=event_timestamps,json=eventTimestamps,proto3" json:"event_timestamps,omitempty"` } -func (x *GetOnlineFeaturesResponse_FieldValues) Reset() { - *x = GetOnlineFeaturesResponse_FieldValues{} +func (x *GetOnlineFeaturesResponse_FeatureVector) Reset() { + *x = GetOnlineFeaturesResponse_FeatureVector{} if protoimpl.UnsafeEnabled { - mi := &file_feast_serving_ServingService_proto_msgTypes[10] + mi := &file_feast_serving_ServingService_proto_msgTypes[12] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } } -func (x *GetOnlineFeaturesResponse_FieldValues) String() string { +func (x *GetOnlineFeaturesResponse_FeatureVector) String() string { return protoimpl.X.MessageStringOf(x) } -func (*GetOnlineFeaturesResponse_FieldValues) ProtoMessage() {} +func (*GetOnlineFeaturesResponse_FeatureVector) ProtoMessage() {} -func (x *GetOnlineFeaturesResponse_FieldValues) ProtoReflect() protoreflect.Message { - mi := &file_feast_serving_ServingService_proto_msgTypes[10] +func (x *GetOnlineFeaturesResponse_FeatureVector) ProtoReflect() protoreflect.Message { + mi := &file_feast_serving_ServingService_proto_msgTypes[12] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -683,25 +675,32 @@ func (x *GetOnlineFeaturesResponse_FieldValues) ProtoReflect() protoreflect.Mess return mi.MessageOf(x) } -// Deprecated: Use GetOnlineFeaturesResponse_FieldValues.ProtoReflect.Descriptor instead. -func (*GetOnlineFeaturesResponse_FieldValues) Descriptor() ([]byte, []int) { +// Deprecated: Use GetOnlineFeaturesResponse_FeatureVector.ProtoReflect.Descriptor instead. +func (*GetOnlineFeaturesResponse_FeatureVector) Descriptor() ([]byte, []int) { return file_feast_serving_ServingService_proto_rawDescGZIP(), []int{6, 0} } -func (x *GetOnlineFeaturesResponse_FieldValues) GetFields() map[string]*types.Value { +func (x *GetOnlineFeaturesResponse_FeatureVector) GetValues() []*types.Value { if x != nil { - return x.Fields + return x.Values } return nil } -func (x *GetOnlineFeaturesResponse_FieldValues) GetStatuses() map[string]GetOnlineFeaturesResponse_FieldStatus { +func (x *GetOnlineFeaturesResponse_FeatureVector) GetStatuses() []FieldStatus { if x != nil { return x.Statuses } return nil } +func (x *GetOnlineFeaturesResponse_FeatureVector) GetEventTimestamps() []*timestamppb.Timestamp { + if x != nil { + return x.EventTimestamps + } + return nil +} + var File_feast_serving_ServingService_proto protoreflect.FileDescriptor var file_feast_serving_ServingService_proto_rawDesc = []byte{ @@ -711,146 +710,140 @@ var file_feast_serving_ServingService_proto_rawDesc = []byte{ 0x69, 0x6e, 0x67, 0x1a, 0x1f, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2f, 0x74, 0x69, 0x6d, 0x65, 0x73, 0x74, 0x61, 0x6d, 0x70, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x1a, 0x17, 0x66, 0x65, 0x61, 0x73, 0x74, 0x2f, 0x74, 0x79, 0x70, 0x65, - 0x73, 0x2f, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x1a, 0x2d, 0x74, - 0x65, 0x6e, 0x73, 0x6f, 0x72, 0x66, 0x6c, 0x6f, 0x77, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, - 0x74, 0x61, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2f, 0x76, 0x30, 0x2f, 0x73, 0x74, 0x61, 0x74, - 0x69, 0x73, 0x74, 0x69, 0x63, 0x73, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x22, 0x1c, 0x0a, 0x1a, - 0x47, 0x65, 0x74, 0x46, 0x65, 0x61, 0x73, 0x74, 0x53, 0x65, 0x72, 0x76, 0x69, 0x6e, 0x67, 0x49, - 0x6e, 0x66, 0x6f, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x22, 0x9e, 0x01, 0x0a, 0x1b, 0x47, + 0x73, 0x2f, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x22, 0x1c, 0x0a, + 0x1a, 0x47, 0x65, 0x74, 0x46, 0x65, 0x61, 0x73, 0x74, 0x53, 0x65, 0x72, 0x76, 0x69, 0x6e, 0x67, + 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x22, 0x37, 0x0a, 0x1b, 0x47, 0x65, 0x74, 0x46, 0x65, 0x61, 0x73, 0x74, 0x53, 0x65, 0x72, 0x76, 0x69, 0x6e, 0x67, 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x76, 0x65, 0x72, - 0x73, 0x69, 0x6f, 0x6e, 0x12, 0x33, 0x0a, 0x04, 0x74, 0x79, 0x70, 0x65, 0x18, 0x02, 0x20, 0x01, - 0x28, 0x0e, 0x32, 0x1f, 0x2e, 0x66, 0x65, 0x61, 0x73, 0x74, 0x2e, 0x73, 0x65, 0x72, 0x76, 0x69, - 0x6e, 0x67, 0x2e, 0x46, 0x65, 0x61, 0x73, 0x74, 0x53, 0x65, 0x72, 0x76, 0x69, 0x6e, 0x67, 0x54, - 0x79, 0x70, 0x65, 0x52, 0x04, 0x74, 0x79, 0x70, 0x65, 0x12, 0x30, 0x0a, 0x14, 0x6a, 0x6f, 0x62, - 0x5f, 0x73, 0x74, 0x61, 0x67, 0x69, 0x6e, 0x67, 0x5f, 0x6c, 0x6f, 0x63, 0x61, 0x74, 0x69, 0x6f, - 0x6e, 0x18, 0x0a, 0x20, 0x01, 0x28, 0x09, 0x52, 0x12, 0x6a, 0x6f, 0x62, 0x53, 0x74, 0x61, 0x67, - 0x69, 0x6e, 0x67, 0x4c, 0x6f, 0x63, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x22, 0x4d, 0x0a, 0x12, 0x46, - 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x52, 0x65, 0x66, 0x65, 0x72, 0x65, 0x6e, 0x63, 0x65, 0x56, - 0x32, 0x12, 0x23, 0x0a, 0x0d, 0x66, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x5f, 0x74, 0x61, 0x62, - 0x6c, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0c, 0x66, 0x65, 0x61, 0x74, 0x75, 0x72, - 0x65, 0x54, 0x61, 0x62, 0x6c, 0x65, 0x12, 0x12, 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x02, - 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x22, 0xbb, 0x03, 0x0a, 0x1a, 0x47, - 0x65, 0x74, 0x4f, 0x6e, 0x6c, 0x69, 0x6e, 0x65, 0x46, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x73, - 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x56, 0x32, 0x12, 0x3d, 0x0a, 0x08, 0x66, 0x65, 0x61, - 0x74, 0x75, 0x72, 0x65, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x21, 0x2e, 0x66, 0x65, - 0x61, 0x73, 0x74, 0x2e, 0x73, 0x65, 0x72, 0x76, 0x69, 0x6e, 0x67, 0x2e, 0x46, 0x65, 0x61, 0x74, - 0x75, 0x72, 0x65, 0x52, 0x65, 0x66, 0x65, 0x72, 0x65, 0x6e, 0x63, 0x65, 0x56, 0x32, 0x52, 0x08, - 0x66, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x73, 0x12, 0x54, 0x0a, 0x0b, 0x65, 0x6e, 0x74, 0x69, - 0x74, 0x79, 0x5f, 0x72, 0x6f, 0x77, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x33, 0x2e, - 0x66, 0x65, 0x61, 0x73, 0x74, 0x2e, 0x73, 0x65, 0x72, 0x76, 0x69, 0x6e, 0x67, 0x2e, 0x47, 0x65, - 0x74, 0x4f, 0x6e, 0x6c, 0x69, 0x6e, 0x65, 0x46, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x73, 0x52, - 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x56, 0x32, 0x2e, 0x45, 0x6e, 0x74, 0x69, 0x74, 0x79, 0x52, - 0x6f, 0x77, 0x52, 0x0a, 0x65, 0x6e, 0x74, 0x69, 0x74, 0x79, 0x52, 0x6f, 0x77, 0x73, 0x12, 0x18, - 0x0a, 0x07, 0x70, 0x72, 0x6f, 0x6a, 0x65, 0x63, 0x74, 0x18, 0x05, 0x20, 0x01, 0x28, 0x09, 0x52, - 0x07, 0x70, 0x72, 0x6f, 0x6a, 0x65, 0x63, 0x74, 0x1a, 0xed, 0x01, 0x0a, 0x09, 0x45, 0x6e, 0x74, - 0x69, 0x74, 0x79, 0x52, 0x6f, 0x77, 0x12, 0x38, 0x0a, 0x09, 0x74, 0x69, 0x6d, 0x65, 0x73, 0x74, - 0x61, 0x6d, 0x70, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x67, 0x6f, 0x6f, 0x67, - 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x54, 0x69, 0x6d, 0x65, - 0x73, 0x74, 0x61, 0x6d, 0x70, 0x52, 0x09, 0x74, 0x69, 0x6d, 0x65, 0x73, 0x74, 0x61, 0x6d, 0x70, - 0x12, 0x57, 0x0a, 0x06, 0x66, 0x69, 0x65, 0x6c, 0x64, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, - 0x32, 0x3f, 0x2e, 0x66, 0x65, 0x61, 0x73, 0x74, 0x2e, 0x73, 0x65, 0x72, 0x76, 0x69, 0x6e, 0x67, - 0x2e, 0x47, 0x65, 0x74, 0x4f, 0x6e, 0x6c, 0x69, 0x6e, 0x65, 0x46, 0x65, 0x61, 0x74, 0x75, 0x72, - 0x65, 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x56, 0x32, 0x2e, 0x45, 0x6e, 0x74, 0x69, - 0x74, 0x79, 0x52, 0x6f, 0x77, 0x2e, 0x46, 0x69, 0x65, 0x6c, 0x64, 0x73, 0x45, 0x6e, 0x74, 0x72, - 0x79, 0x52, 0x06, 0x66, 0x69, 0x65, 0x6c, 0x64, 0x73, 0x1a, 0x4d, 0x0a, 0x0b, 0x46, 0x69, 0x65, - 0x6c, 0x64, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, - 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x28, 0x0a, 0x05, 0x76, 0x61, - 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x12, 0x2e, 0x66, 0x65, 0x61, 0x73, - 0x74, 0x2e, 0x74, 0x79, 0x70, 0x65, 0x73, 0x2e, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x52, 0x05, 0x76, - 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x22, 0x1f, 0x0a, 0x0b, 0x46, 0x65, 0x61, 0x74, - 0x75, 0x72, 0x65, 0x4c, 0x69, 0x73, 0x74, 0x12, 0x10, 0x0a, 0x03, 0x76, 0x61, 0x6c, 0x18, 0x01, - 0x20, 0x03, 0x28, 0x09, 0x52, 0x03, 0x76, 0x61, 0x6c, 0x22, 0xe1, 0x02, 0x0a, 0x18, 0x47, 0x65, + 0x73, 0x69, 0x6f, 0x6e, 0x22, 0x63, 0x0a, 0x12, 0x46, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x52, + 0x65, 0x66, 0x65, 0x72, 0x65, 0x6e, 0x63, 0x65, 0x56, 0x32, 0x12, 0x2a, 0x0a, 0x11, 0x66, 0x65, + 0x61, 0x74, 0x75, 0x72, 0x65, 0x5f, 0x76, 0x69, 0x65, 0x77, 0x5f, 0x6e, 0x61, 0x6d, 0x65, 0x18, + 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0f, 0x66, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x56, 0x69, + 0x65, 0x77, 0x4e, 0x61, 0x6d, 0x65, 0x12, 0x21, 0x0a, 0x0c, 0x66, 0x65, 0x61, 0x74, 0x75, 0x72, + 0x65, 0x5f, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x66, 0x65, + 0x61, 0x74, 0x75, 0x72, 0x65, 0x4e, 0x61, 0x6d, 0x65, 0x22, 0xbb, 0x03, 0x0a, 0x1a, 0x47, 0x65, 0x74, 0x4f, 0x6e, 0x6c, 0x69, 0x6e, 0x65, 0x46, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x73, 0x52, - 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x29, 0x0a, 0x0f, 0x66, 0x65, 0x61, 0x74, 0x75, 0x72, - 0x65, 0x5f, 0x73, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x48, - 0x00, 0x52, 0x0e, 0x66, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, - 0x65, 0x12, 0x38, 0x0a, 0x08, 0x66, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x73, 0x18, 0x02, 0x20, - 0x01, 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x66, 0x65, 0x61, 0x73, 0x74, 0x2e, 0x73, 0x65, 0x72, 0x76, - 0x69, 0x6e, 0x67, 0x2e, 0x46, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x4c, 0x69, 0x73, 0x74, 0x48, - 0x00, 0x52, 0x08, 0x66, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x73, 0x12, 0x51, 0x0a, 0x08, 0x65, - 0x6e, 0x74, 0x69, 0x74, 0x69, 0x65, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x35, 0x2e, - 0x66, 0x65, 0x61, 0x73, 0x74, 0x2e, 0x73, 0x65, 0x72, 0x76, 0x69, 0x6e, 0x67, 0x2e, 0x47, 0x65, - 0x74, 0x4f, 0x6e, 0x6c, 0x69, 0x6e, 0x65, 0x46, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x73, 0x52, - 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x2e, 0x45, 0x6e, 0x74, 0x69, 0x74, 0x69, 0x65, 0x73, 0x45, - 0x6e, 0x74, 0x72, 0x79, 0x52, 0x08, 0x65, 0x6e, 0x74, 0x69, 0x74, 0x69, 0x65, 0x73, 0x12, 0x2c, - 0x0a, 0x12, 0x66, 0x75, 0x6c, 0x6c, 0x5f, 0x66, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x5f, 0x6e, - 0x61, 0x6d, 0x65, 0x73, 0x18, 0x04, 0x20, 0x01, 0x28, 0x08, 0x52, 0x10, 0x66, 0x75, 0x6c, 0x6c, - 0x46, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x4e, 0x61, 0x6d, 0x65, 0x73, 0x1a, 0x57, 0x0a, 0x0d, - 0x45, 0x6e, 0x74, 0x69, 0x74, 0x69, 0x65, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, - 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, - 0x30, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1a, - 0x2e, 0x66, 0x65, 0x61, 0x73, 0x74, 0x2e, 0x74, 0x79, 0x70, 0x65, 0x73, 0x2e, 0x52, 0x65, 0x70, - 0x65, 0x61, 0x74, 0x65, 0x64, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, - 0x65, 0x3a, 0x02, 0x38, 0x01, 0x42, 0x06, 0x0a, 0x04, 0x6b, 0x69, 0x6e, 0x64, 0x22, 0xdd, 0x04, - 0x0a, 0x19, 0x47, 0x65, 0x74, 0x4f, 0x6e, 0x6c, 0x69, 0x6e, 0x65, 0x46, 0x65, 0x61, 0x74, 0x75, - 0x72, 0x65, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x57, 0x0a, 0x0c, 0x66, - 0x69, 0x65, 0x6c, 0x64, 0x5f, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, - 0x0b, 0x32, 0x34, 0x2e, 0x66, 0x65, 0x61, 0x73, 0x74, 0x2e, 0x73, 0x65, 0x72, 0x76, 0x69, 0x6e, - 0x67, 0x2e, 0x47, 0x65, 0x74, 0x4f, 0x6e, 0x6c, 0x69, 0x6e, 0x65, 0x46, 0x65, 0x61, 0x74, 0x75, - 0x72, 0x65, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x2e, 0x46, 0x69, 0x65, 0x6c, - 0x64, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x73, 0x52, 0x0b, 0x66, 0x69, 0x65, 0x6c, 0x64, 0x56, 0x61, - 0x6c, 0x75, 0x65, 0x73, 0x1a, 0x89, 0x03, 0x0a, 0x0b, 0x46, 0x69, 0x65, 0x6c, 0x64, 0x56, 0x61, - 0x6c, 0x75, 0x65, 0x73, 0x12, 0x58, 0x0a, 0x06, 0x66, 0x69, 0x65, 0x6c, 0x64, 0x73, 0x18, 0x01, - 0x20, 0x03, 0x28, 0x0b, 0x32, 0x40, 0x2e, 0x66, 0x65, 0x61, 0x73, 0x74, 0x2e, 0x73, 0x65, 0x72, - 0x76, 0x69, 0x6e, 0x67, 0x2e, 0x47, 0x65, 0x74, 0x4f, 0x6e, 0x6c, 0x69, 0x6e, 0x65, 0x46, 0x65, - 0x61, 0x74, 0x75, 0x72, 0x65, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x2e, 0x46, - 0x69, 0x65, 0x6c, 0x64, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x73, 0x2e, 0x46, 0x69, 0x65, 0x6c, 0x64, - 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x06, 0x66, 0x69, 0x65, 0x6c, 0x64, 0x73, 0x12, 0x5e, - 0x0a, 0x08, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x65, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, - 0x32, 0x42, 0x2e, 0x66, 0x65, 0x61, 0x73, 0x74, 0x2e, 0x73, 0x65, 0x72, 0x76, 0x69, 0x6e, 0x67, - 0x2e, 0x47, 0x65, 0x74, 0x4f, 0x6e, 0x6c, 0x69, 0x6e, 0x65, 0x46, 0x65, 0x61, 0x74, 0x75, 0x72, - 0x65, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x2e, 0x46, 0x69, 0x65, 0x6c, 0x64, - 0x56, 0x61, 0x6c, 0x75, 0x65, 0x73, 0x2e, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x65, 0x73, 0x45, - 0x6e, 0x74, 0x72, 0x79, 0x52, 0x08, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x65, 0x73, 0x1a, 0x4d, - 0x0a, 0x0b, 0x46, 0x69, 0x65, 0x6c, 0x64, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, - 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, - 0x28, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x12, - 0x2e, 0x66, 0x65, 0x61, 0x73, 0x74, 0x2e, 0x74, 0x79, 0x70, 0x65, 0x73, 0x2e, 0x56, 0x61, 0x6c, - 0x75, 0x65, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x1a, 0x71, 0x0a, - 0x0d, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x65, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, - 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, - 0x12, 0x4a, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0e, 0x32, - 0x34, 0x2e, 0x66, 0x65, 0x61, 0x73, 0x74, 0x2e, 0x73, 0x65, 0x72, 0x76, 0x69, 0x6e, 0x67, 0x2e, + 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x56, 0x32, 0x12, 0x3d, 0x0a, 0x08, 0x66, 0x65, 0x61, 0x74, + 0x75, 0x72, 0x65, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x21, 0x2e, 0x66, 0x65, 0x61, + 0x73, 0x74, 0x2e, 0x73, 0x65, 0x72, 0x76, 0x69, 0x6e, 0x67, 0x2e, 0x46, 0x65, 0x61, 0x74, 0x75, + 0x72, 0x65, 0x52, 0x65, 0x66, 0x65, 0x72, 0x65, 0x6e, 0x63, 0x65, 0x56, 0x32, 0x52, 0x08, 0x66, + 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x73, 0x12, 0x54, 0x0a, 0x0b, 0x65, 0x6e, 0x74, 0x69, 0x74, + 0x79, 0x5f, 0x72, 0x6f, 0x77, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x33, 0x2e, 0x66, + 0x65, 0x61, 0x73, 0x74, 0x2e, 0x73, 0x65, 0x72, 0x76, 0x69, 0x6e, 0x67, 0x2e, 0x47, 0x65, 0x74, + 0x4f, 0x6e, 0x6c, 0x69, 0x6e, 0x65, 0x46, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x73, 0x52, 0x65, + 0x71, 0x75, 0x65, 0x73, 0x74, 0x56, 0x32, 0x2e, 0x45, 0x6e, 0x74, 0x69, 0x74, 0x79, 0x52, 0x6f, + 0x77, 0x52, 0x0a, 0x65, 0x6e, 0x74, 0x69, 0x74, 0x79, 0x52, 0x6f, 0x77, 0x73, 0x12, 0x18, 0x0a, + 0x07, 0x70, 0x72, 0x6f, 0x6a, 0x65, 0x63, 0x74, 0x18, 0x05, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, + 0x70, 0x72, 0x6f, 0x6a, 0x65, 0x63, 0x74, 0x1a, 0xed, 0x01, 0x0a, 0x09, 0x45, 0x6e, 0x74, 0x69, + 0x74, 0x79, 0x52, 0x6f, 0x77, 0x12, 0x38, 0x0a, 0x09, 0x74, 0x69, 0x6d, 0x65, 0x73, 0x74, 0x61, + 0x6d, 0x70, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, + 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x54, 0x69, 0x6d, 0x65, 0x73, + 0x74, 0x61, 0x6d, 0x70, 0x52, 0x09, 0x74, 0x69, 0x6d, 0x65, 0x73, 0x74, 0x61, 0x6d, 0x70, 0x12, + 0x57, 0x0a, 0x06, 0x66, 0x69, 0x65, 0x6c, 0x64, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, + 0x3f, 0x2e, 0x66, 0x65, 0x61, 0x73, 0x74, 0x2e, 0x73, 0x65, 0x72, 0x76, 0x69, 0x6e, 0x67, 0x2e, 0x47, 0x65, 0x74, 0x4f, 0x6e, 0x6c, 0x69, 0x6e, 0x65, 0x46, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, - 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x2e, 0x46, 0x69, 0x65, 0x6c, 0x64, 0x53, - 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, - 0x22, 0x5b, 0x0a, 0x0b, 0x46, 0x69, 0x65, 0x6c, 0x64, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, - 0x0b, 0x0a, 0x07, 0x49, 0x4e, 0x56, 0x41, 0x4c, 0x49, 0x44, 0x10, 0x00, 0x12, 0x0b, 0x0a, 0x07, - 0x50, 0x52, 0x45, 0x53, 0x45, 0x4e, 0x54, 0x10, 0x01, 0x12, 0x0e, 0x0a, 0x0a, 0x4e, 0x55, 0x4c, - 0x4c, 0x5f, 0x56, 0x41, 0x4c, 0x55, 0x45, 0x10, 0x02, 0x12, 0x0d, 0x0a, 0x09, 0x4e, 0x4f, 0x54, - 0x5f, 0x46, 0x4f, 0x55, 0x4e, 0x44, 0x10, 0x03, 0x12, 0x13, 0x0a, 0x0f, 0x4f, 0x55, 0x54, 0x53, - 0x49, 0x44, 0x45, 0x5f, 0x4d, 0x41, 0x58, 0x5f, 0x41, 0x47, 0x45, 0x10, 0x04, 0x2a, 0x6f, 0x0a, - 0x10, 0x46, 0x65, 0x61, 0x73, 0x74, 0x53, 0x65, 0x72, 0x76, 0x69, 0x6e, 0x67, 0x54, 0x79, 0x70, - 0x65, 0x12, 0x1e, 0x0a, 0x1a, 0x46, 0x45, 0x41, 0x53, 0x54, 0x5f, 0x53, 0x45, 0x52, 0x56, 0x49, - 0x4e, 0x47, 0x5f, 0x54, 0x59, 0x50, 0x45, 0x5f, 0x49, 0x4e, 0x56, 0x41, 0x4c, 0x49, 0x44, 0x10, - 0x00, 0x12, 0x1d, 0x0a, 0x19, 0x46, 0x45, 0x41, 0x53, 0x54, 0x5f, 0x53, 0x45, 0x52, 0x56, 0x49, - 0x4e, 0x47, 0x5f, 0x54, 0x59, 0x50, 0x45, 0x5f, 0x4f, 0x4e, 0x4c, 0x49, 0x4e, 0x45, 0x10, 0x01, - 0x12, 0x1c, 0x0a, 0x18, 0x46, 0x45, 0x41, 0x53, 0x54, 0x5f, 0x53, 0x45, 0x52, 0x56, 0x49, 0x4e, - 0x47, 0x5f, 0x54, 0x59, 0x50, 0x45, 0x5f, 0x42, 0x41, 0x54, 0x43, 0x48, 0x10, 0x02, 0x32, 0xea, - 0x01, 0x0a, 0x0e, 0x53, 0x65, 0x72, 0x76, 0x69, 0x6e, 0x67, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, - 0x65, 0x12, 0x6c, 0x0a, 0x13, 0x47, 0x65, 0x74, 0x46, 0x65, 0x61, 0x73, 0x74, 0x53, 0x65, 0x72, - 0x76, 0x69, 0x6e, 0x67, 0x49, 0x6e, 0x66, 0x6f, 0x12, 0x29, 0x2e, 0x66, 0x65, 0x61, 0x73, 0x74, - 0x2e, 0x73, 0x65, 0x72, 0x76, 0x69, 0x6e, 0x67, 0x2e, 0x47, 0x65, 0x74, 0x46, 0x65, 0x61, 0x73, - 0x74, 0x53, 0x65, 0x72, 0x76, 0x69, 0x6e, 0x67, 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x65, 0x71, 0x75, - 0x65, 0x73, 0x74, 0x1a, 0x2a, 0x2e, 0x66, 0x65, 0x61, 0x73, 0x74, 0x2e, 0x73, 0x65, 0x72, 0x76, - 0x69, 0x6e, 0x67, 0x2e, 0x47, 0x65, 0x74, 0x46, 0x65, 0x61, 0x73, 0x74, 0x53, 0x65, 0x72, 0x76, - 0x69, 0x6e, 0x67, 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, - 0x6a, 0x0a, 0x13, 0x47, 0x65, 0x74, 0x4f, 0x6e, 0x6c, 0x69, 0x6e, 0x65, 0x46, 0x65, 0x61, 0x74, - 0x75, 0x72, 0x65, 0x73, 0x56, 0x32, 0x12, 0x29, 0x2e, 0x66, 0x65, 0x61, 0x73, 0x74, 0x2e, 0x73, - 0x65, 0x72, 0x76, 0x69, 0x6e, 0x67, 0x2e, 0x47, 0x65, 0x74, 0x4f, 0x6e, 0x6c, 0x69, 0x6e, 0x65, - 0x46, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x56, - 0x32, 0x1a, 0x28, 0x2e, 0x66, 0x65, 0x61, 0x73, 0x74, 0x2e, 0x73, 0x65, 0x72, 0x76, 0x69, 0x6e, + 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x56, 0x32, 0x2e, 0x45, 0x6e, 0x74, 0x69, 0x74, + 0x79, 0x52, 0x6f, 0x77, 0x2e, 0x46, 0x69, 0x65, 0x6c, 0x64, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, + 0x52, 0x06, 0x66, 0x69, 0x65, 0x6c, 0x64, 0x73, 0x1a, 0x4d, 0x0a, 0x0b, 0x46, 0x69, 0x65, 0x6c, + 0x64, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, + 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x28, 0x0a, 0x05, 0x76, 0x61, 0x6c, + 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x12, 0x2e, 0x66, 0x65, 0x61, 0x73, 0x74, + 0x2e, 0x74, 0x79, 0x70, 0x65, 0x73, 0x2e, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x52, 0x05, 0x76, 0x61, + 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x22, 0x1f, 0x0a, 0x0b, 0x46, 0x65, 0x61, 0x74, 0x75, + 0x72, 0x65, 0x4c, 0x69, 0x73, 0x74, 0x12, 0x10, 0x0a, 0x03, 0x76, 0x61, 0x6c, 0x18, 0x01, 0x20, + 0x03, 0x28, 0x09, 0x52, 0x03, 0x76, 0x61, 0x6c, 0x22, 0xa6, 0x04, 0x0a, 0x18, 0x47, 0x65, 0x74, + 0x4f, 0x6e, 0x6c, 0x69, 0x6e, 0x65, 0x46, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x73, 0x52, 0x65, + 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x29, 0x0a, 0x0f, 0x66, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, + 0x5f, 0x73, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x48, 0x00, + 0x52, 0x0e, 0x66, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, + 0x12, 0x38, 0x0a, 0x08, 0x66, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x73, 0x18, 0x02, 0x20, 0x01, + 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x66, 0x65, 0x61, 0x73, 0x74, 0x2e, 0x73, 0x65, 0x72, 0x76, 0x69, + 0x6e, 0x67, 0x2e, 0x46, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x4c, 0x69, 0x73, 0x74, 0x48, 0x00, + 0x52, 0x08, 0x66, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x73, 0x12, 0x51, 0x0a, 0x08, 0x65, 0x6e, + 0x74, 0x69, 0x74, 0x69, 0x65, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x35, 0x2e, 0x66, + 0x65, 0x61, 0x73, 0x74, 0x2e, 0x73, 0x65, 0x72, 0x76, 0x69, 0x6e, 0x67, 0x2e, 0x47, 0x65, 0x74, + 0x4f, 0x6e, 0x6c, 0x69, 0x6e, 0x65, 0x46, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x73, 0x52, 0x65, + 0x71, 0x75, 0x65, 0x73, 0x74, 0x2e, 0x45, 0x6e, 0x74, 0x69, 0x74, 0x69, 0x65, 0x73, 0x45, 0x6e, + 0x74, 0x72, 0x79, 0x52, 0x08, 0x65, 0x6e, 0x74, 0x69, 0x74, 0x69, 0x65, 0x73, 0x12, 0x2c, 0x0a, + 0x12, 0x66, 0x75, 0x6c, 0x6c, 0x5f, 0x66, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x5f, 0x6e, 0x61, + 0x6d, 0x65, 0x73, 0x18, 0x04, 0x20, 0x01, 0x28, 0x08, 0x52, 0x10, 0x66, 0x75, 0x6c, 0x6c, 0x46, + 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x4e, 0x61, 0x6d, 0x65, 0x73, 0x12, 0x64, 0x0a, 0x0f, 0x72, + 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x5f, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x18, 0x05, + 0x20, 0x03, 0x28, 0x0b, 0x32, 0x3b, 0x2e, 0x66, 0x65, 0x61, 0x73, 0x74, 0x2e, 0x73, 0x65, 0x72, + 0x76, 0x69, 0x6e, 0x67, 0x2e, 0x47, 0x65, 0x74, 0x4f, 0x6e, 0x6c, 0x69, 0x6e, 0x65, 0x46, 0x65, + 0x61, 0x74, 0x75, 0x72, 0x65, 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x2e, 0x52, 0x65, + 0x71, 0x75, 0x65, 0x73, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x45, 0x6e, 0x74, 0x72, + 0x79, 0x52, 0x0e, 0x72, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, + 0x74, 0x1a, 0x57, 0x0a, 0x0d, 0x45, 0x6e, 0x74, 0x69, 0x74, 0x69, 0x65, 0x73, 0x45, 0x6e, 0x74, + 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, + 0x03, 0x6b, 0x65, 0x79, 0x12, 0x30, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, + 0x01, 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x66, 0x65, 0x61, 0x73, 0x74, 0x2e, 0x74, 0x79, 0x70, 0x65, + 0x73, 0x2e, 0x52, 0x65, 0x70, 0x65, 0x61, 0x74, 0x65, 0x64, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x52, + 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x1a, 0x5d, 0x0a, 0x13, 0x52, 0x65, + 0x71, 0x75, 0x65, 0x73, 0x74, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x45, 0x6e, 0x74, 0x72, + 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, + 0x6b, 0x65, 0x79, 0x12, 0x30, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, + 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x66, 0x65, 0x61, 0x73, 0x74, 0x2e, 0x74, 0x79, 0x70, 0x65, 0x73, + 0x2e, 0x52, 0x65, 0x70, 0x65, 0x61, 0x74, 0x65, 0x64, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x52, 0x05, + 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x42, 0x06, 0x0a, 0x04, 0x6b, 0x69, 0x6e, + 0x64, 0x22, 0xf8, 0x02, 0x0a, 0x19, 0x47, 0x65, 0x74, 0x4f, 0x6e, 0x6c, 0x69, 0x6e, 0x65, 0x46, + 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, + 0x4c, 0x0a, 0x08, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x18, 0x01, 0x20, 0x01, 0x28, + 0x0b, 0x32, 0x30, 0x2e, 0x66, 0x65, 0x61, 0x73, 0x74, 0x2e, 0x73, 0x65, 0x72, 0x76, 0x69, 0x6e, 0x67, 0x2e, 0x47, 0x65, 0x74, 0x4f, 0x6e, 0x6c, 0x69, 0x6e, 0x65, 0x46, 0x65, 0x61, 0x74, 0x75, - 0x72, 0x65, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x42, 0x5e, 0x0a, 0x13, 0x66, - 0x65, 0x61, 0x73, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x73, 0x65, 0x72, 0x76, 0x69, - 0x6e, 0x67, 0x42, 0x0f, 0x53, 0x65, 0x72, 0x76, 0x69, 0x6e, 0x67, 0x41, 0x50, 0x49, 0x50, 0x72, - 0x6f, 0x74, 0x6f, 0x5a, 0x36, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, - 0x66, 0x65, 0x61, 0x73, 0x74, 0x2d, 0x64, 0x65, 0x76, 0x2f, 0x66, 0x65, 0x61, 0x73, 0x74, 0x2f, - 0x73, 0x64, 0x6b, 0x2f, 0x67, 0x6f, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x73, 0x2f, 0x66, 0x65, - 0x61, 0x73, 0x74, 0x2f, 0x73, 0x65, 0x72, 0x76, 0x69, 0x6e, 0x67, 0x62, 0x06, 0x70, 0x72, 0x6f, - 0x74, 0x6f, 0x33, + 0x72, 0x65, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x4d, 0x65, 0x74, 0x61, 0x64, + 0x61, 0x74, 0x61, 0x52, 0x08, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x12, 0x50, 0x0a, + 0x07, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x36, + 0x2e, 0x66, 0x65, 0x61, 0x73, 0x74, 0x2e, 0x73, 0x65, 0x72, 0x76, 0x69, 0x6e, 0x67, 0x2e, 0x47, + 0x65, 0x74, 0x4f, 0x6e, 0x6c, 0x69, 0x6e, 0x65, 0x46, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x73, + 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x2e, 0x46, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, + 0x56, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x52, 0x07, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x73, 0x1a, + 0xba, 0x01, 0x0a, 0x0d, 0x46, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x56, 0x65, 0x63, 0x74, 0x6f, + 0x72, 0x12, 0x2a, 0x0a, 0x06, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, + 0x0b, 0x32, 0x12, 0x2e, 0x66, 0x65, 0x61, 0x73, 0x74, 0x2e, 0x74, 0x79, 0x70, 0x65, 0x73, 0x2e, + 0x56, 0x61, 0x6c, 0x75, 0x65, 0x52, 0x06, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x73, 0x12, 0x36, 0x0a, + 0x08, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x65, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0e, 0x32, + 0x1a, 0x2e, 0x66, 0x65, 0x61, 0x73, 0x74, 0x2e, 0x73, 0x65, 0x72, 0x76, 0x69, 0x6e, 0x67, 0x2e, + 0x46, 0x69, 0x65, 0x6c, 0x64, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x08, 0x73, 0x74, 0x61, + 0x74, 0x75, 0x73, 0x65, 0x73, 0x12, 0x45, 0x0a, 0x10, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x5f, 0x74, + 0x69, 0x6d, 0x65, 0x73, 0x74, 0x61, 0x6d, 0x70, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0b, 0x32, + 0x1a, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, + 0x66, 0x2e, 0x54, 0x69, 0x6d, 0x65, 0x73, 0x74, 0x61, 0x6d, 0x70, 0x52, 0x0f, 0x65, 0x76, 0x65, + 0x6e, 0x74, 0x54, 0x69, 0x6d, 0x65, 0x73, 0x74, 0x61, 0x6d, 0x70, 0x73, 0x22, 0x64, 0x0a, 0x21, + 0x47, 0x65, 0x74, 0x4f, 0x6e, 0x6c, 0x69, 0x6e, 0x65, 0x46, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, + 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, + 0x61, 0x12, 0x3f, 0x0a, 0x0d, 0x66, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x5f, 0x6e, 0x61, 0x6d, + 0x65, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x66, 0x65, 0x61, 0x73, 0x74, + 0x2e, 0x73, 0x65, 0x72, 0x76, 0x69, 0x6e, 0x67, 0x2e, 0x46, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, + 0x4c, 0x69, 0x73, 0x74, 0x52, 0x0c, 0x66, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x4e, 0x61, 0x6d, + 0x65, 0x73, 0x2a, 0x5b, 0x0a, 0x0b, 0x46, 0x69, 0x65, 0x6c, 0x64, 0x53, 0x74, 0x61, 0x74, 0x75, + 0x73, 0x12, 0x0b, 0x0a, 0x07, 0x49, 0x4e, 0x56, 0x41, 0x4c, 0x49, 0x44, 0x10, 0x00, 0x12, 0x0b, + 0x0a, 0x07, 0x50, 0x52, 0x45, 0x53, 0x45, 0x4e, 0x54, 0x10, 0x01, 0x12, 0x0e, 0x0a, 0x0a, 0x4e, + 0x55, 0x4c, 0x4c, 0x5f, 0x56, 0x41, 0x4c, 0x55, 0x45, 0x10, 0x02, 0x12, 0x0d, 0x0a, 0x09, 0x4e, + 0x4f, 0x54, 0x5f, 0x46, 0x4f, 0x55, 0x4e, 0x44, 0x10, 0x03, 0x12, 0x13, 0x0a, 0x0f, 0x4f, 0x55, + 0x54, 0x53, 0x49, 0x44, 0x45, 0x5f, 0x4d, 0x41, 0x58, 0x5f, 0x41, 0x47, 0x45, 0x10, 0x04, 0x32, + 0xe6, 0x01, 0x0a, 0x0e, 0x53, 0x65, 0x72, 0x76, 0x69, 0x6e, 0x67, 0x53, 0x65, 0x72, 0x76, 0x69, + 0x63, 0x65, 0x12, 0x6c, 0x0a, 0x13, 0x47, 0x65, 0x74, 0x46, 0x65, 0x61, 0x73, 0x74, 0x53, 0x65, + 0x72, 0x76, 0x69, 0x6e, 0x67, 0x49, 0x6e, 0x66, 0x6f, 0x12, 0x29, 0x2e, 0x66, 0x65, 0x61, 0x73, + 0x74, 0x2e, 0x73, 0x65, 0x72, 0x76, 0x69, 0x6e, 0x67, 0x2e, 0x47, 0x65, 0x74, 0x46, 0x65, 0x61, + 0x73, 0x74, 0x53, 0x65, 0x72, 0x76, 0x69, 0x6e, 0x67, 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x65, 0x71, + 0x75, 0x65, 0x73, 0x74, 0x1a, 0x2a, 0x2e, 0x66, 0x65, 0x61, 0x73, 0x74, 0x2e, 0x73, 0x65, 0x72, + 0x76, 0x69, 0x6e, 0x67, 0x2e, 0x47, 0x65, 0x74, 0x46, 0x65, 0x61, 0x73, 0x74, 0x53, 0x65, 0x72, + 0x76, 0x69, 0x6e, 0x67, 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, + 0x12, 0x66, 0x0a, 0x11, 0x47, 0x65, 0x74, 0x4f, 0x6e, 0x6c, 0x69, 0x6e, 0x65, 0x46, 0x65, 0x61, + 0x74, 0x75, 0x72, 0x65, 0x73, 0x12, 0x27, 0x2e, 0x66, 0x65, 0x61, 0x73, 0x74, 0x2e, 0x73, 0x65, + 0x72, 0x76, 0x69, 0x6e, 0x67, 0x2e, 0x47, 0x65, 0x74, 0x4f, 0x6e, 0x6c, 0x69, 0x6e, 0x65, 0x46, + 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x28, + 0x2e, 0x66, 0x65, 0x61, 0x73, 0x74, 0x2e, 0x73, 0x65, 0x72, 0x76, 0x69, 0x6e, 0x67, 0x2e, 0x47, + 0x65, 0x74, 0x4f, 0x6e, 0x6c, 0x69, 0x6e, 0x65, 0x46, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x73, + 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x42, 0x5e, 0x0a, 0x13, 0x66, 0x65, 0x61, 0x73, + 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x73, 0x65, 0x72, 0x76, 0x69, 0x6e, 0x67, 0x42, + 0x0f, 0x53, 0x65, 0x72, 0x76, 0x69, 0x6e, 0x67, 0x41, 0x50, 0x49, 0x50, 0x72, 0x6f, 0x74, 0x6f, + 0x5a, 0x36, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x66, 0x65, 0x61, + 0x73, 0x74, 0x2d, 0x64, 0x65, 0x76, 0x2f, 0x66, 0x65, 0x61, 0x73, 0x74, 0x2f, 0x73, 0x64, 0x6b, + 0x2f, 0x67, 0x6f, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x73, 0x2f, 0x66, 0x65, 0x61, 0x73, 0x74, + 0x2f, 0x73, 0x65, 0x72, 0x76, 0x69, 0x6e, 0x67, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, } var ( @@ -865,52 +858,53 @@ func file_feast_serving_ServingService_proto_rawDescGZIP() []byte { return file_feast_serving_ServingService_proto_rawDescData } -var file_feast_serving_ServingService_proto_enumTypes = make([]protoimpl.EnumInfo, 2) +var file_feast_serving_ServingService_proto_enumTypes = make([]protoimpl.EnumInfo, 1) var file_feast_serving_ServingService_proto_msgTypes = make([]protoimpl.MessageInfo, 13) var file_feast_serving_ServingService_proto_goTypes = []interface{}{ - (FeastServingType)(0), // 0: feast.serving.FeastServingType - (GetOnlineFeaturesResponse_FieldStatus)(0), // 1: feast.serving.GetOnlineFeaturesResponse.FieldStatus - (*GetFeastServingInfoRequest)(nil), // 2: feast.serving.GetFeastServingInfoRequest - (*GetFeastServingInfoResponse)(nil), // 3: feast.serving.GetFeastServingInfoResponse - (*FeatureReferenceV2)(nil), // 4: feast.serving.FeatureReferenceV2 - (*GetOnlineFeaturesRequestV2)(nil), // 5: feast.serving.GetOnlineFeaturesRequestV2 - (*FeatureList)(nil), // 6: feast.serving.FeatureList - (*GetOnlineFeaturesRequest)(nil), // 7: feast.serving.GetOnlineFeaturesRequest - (*GetOnlineFeaturesResponse)(nil), // 8: feast.serving.GetOnlineFeaturesResponse + (FieldStatus)(0), // 0: feast.serving.FieldStatus + (*GetFeastServingInfoRequest)(nil), // 1: feast.serving.GetFeastServingInfoRequest + (*GetFeastServingInfoResponse)(nil), // 2: feast.serving.GetFeastServingInfoResponse + (*FeatureReferenceV2)(nil), // 3: feast.serving.FeatureReferenceV2 + (*GetOnlineFeaturesRequestV2)(nil), // 4: feast.serving.GetOnlineFeaturesRequestV2 + (*FeatureList)(nil), // 5: feast.serving.FeatureList + (*GetOnlineFeaturesRequest)(nil), // 6: feast.serving.GetOnlineFeaturesRequest + (*GetOnlineFeaturesResponse)(nil), // 7: feast.serving.GetOnlineFeaturesResponse + (*GetOnlineFeaturesResponseMetadata)(nil), // 8: feast.serving.GetOnlineFeaturesResponseMetadata (*GetOnlineFeaturesRequestV2_EntityRow)(nil), // 9: feast.serving.GetOnlineFeaturesRequestV2.EntityRow nil, // 10: feast.serving.GetOnlineFeaturesRequestV2.EntityRow.FieldsEntry nil, // 11: feast.serving.GetOnlineFeaturesRequest.EntitiesEntry - (*GetOnlineFeaturesResponse_FieldValues)(nil), // 12: feast.serving.GetOnlineFeaturesResponse.FieldValues - nil, // 13: feast.serving.GetOnlineFeaturesResponse.FieldValues.FieldsEntry - nil, // 14: feast.serving.GetOnlineFeaturesResponse.FieldValues.StatusesEntry - (*timestamppb.Timestamp)(nil), // 15: google.protobuf.Timestamp - (*types.Value)(nil), // 16: feast.types.Value - (*types.RepeatedValue)(nil), // 17: feast.types.RepeatedValue + nil, // 12: feast.serving.GetOnlineFeaturesRequest.RequestContextEntry + (*GetOnlineFeaturesResponse_FeatureVector)(nil), // 13: feast.serving.GetOnlineFeaturesResponse.FeatureVector + (*timestamppb.Timestamp)(nil), // 14: google.protobuf.Timestamp + (*types.Value)(nil), // 15: feast.types.Value + (*types.RepeatedValue)(nil), // 16: feast.types.RepeatedValue } var file_feast_serving_ServingService_proto_depIdxs = []int32{ - 0, // 0: feast.serving.GetFeastServingInfoResponse.type:type_name -> feast.serving.FeastServingType - 4, // 1: feast.serving.GetOnlineFeaturesRequestV2.features:type_name -> feast.serving.FeatureReferenceV2 - 9, // 2: feast.serving.GetOnlineFeaturesRequestV2.entity_rows:type_name -> feast.serving.GetOnlineFeaturesRequestV2.EntityRow - 6, // 3: feast.serving.GetOnlineFeaturesRequest.features:type_name -> feast.serving.FeatureList - 11, // 4: feast.serving.GetOnlineFeaturesRequest.entities:type_name -> feast.serving.GetOnlineFeaturesRequest.EntitiesEntry - 12, // 5: feast.serving.GetOnlineFeaturesResponse.field_values:type_name -> feast.serving.GetOnlineFeaturesResponse.FieldValues - 15, // 6: feast.serving.GetOnlineFeaturesRequestV2.EntityRow.timestamp:type_name -> google.protobuf.Timestamp - 10, // 7: feast.serving.GetOnlineFeaturesRequestV2.EntityRow.fields:type_name -> feast.serving.GetOnlineFeaturesRequestV2.EntityRow.FieldsEntry - 16, // 8: feast.serving.GetOnlineFeaturesRequestV2.EntityRow.FieldsEntry.value:type_name -> feast.types.Value - 17, // 9: feast.serving.GetOnlineFeaturesRequest.EntitiesEntry.value:type_name -> feast.types.RepeatedValue - 13, // 10: feast.serving.GetOnlineFeaturesResponse.FieldValues.fields:type_name -> feast.serving.GetOnlineFeaturesResponse.FieldValues.FieldsEntry - 14, // 11: feast.serving.GetOnlineFeaturesResponse.FieldValues.statuses:type_name -> feast.serving.GetOnlineFeaturesResponse.FieldValues.StatusesEntry - 16, // 12: feast.serving.GetOnlineFeaturesResponse.FieldValues.FieldsEntry.value:type_name -> feast.types.Value - 1, // 13: feast.serving.GetOnlineFeaturesResponse.FieldValues.StatusesEntry.value:type_name -> feast.serving.GetOnlineFeaturesResponse.FieldStatus - 2, // 14: feast.serving.ServingService.GetFeastServingInfo:input_type -> feast.serving.GetFeastServingInfoRequest - 5, // 15: feast.serving.ServingService.GetOnlineFeaturesV2:input_type -> feast.serving.GetOnlineFeaturesRequestV2 - 3, // 16: feast.serving.ServingService.GetFeastServingInfo:output_type -> feast.serving.GetFeastServingInfoResponse - 8, // 17: feast.serving.ServingService.GetOnlineFeaturesV2:output_type -> feast.serving.GetOnlineFeaturesResponse - 16, // [16:18] is the sub-list for method output_type - 14, // [14:16] is the sub-list for method input_type - 14, // [14:14] is the sub-list for extension type_name - 14, // [14:14] is the sub-list for extension extendee - 0, // [0:14] is the sub-list for field type_name + 3, // 0: feast.serving.GetOnlineFeaturesRequestV2.features:type_name -> feast.serving.FeatureReferenceV2 + 9, // 1: feast.serving.GetOnlineFeaturesRequestV2.entity_rows:type_name -> feast.serving.GetOnlineFeaturesRequestV2.EntityRow + 5, // 2: feast.serving.GetOnlineFeaturesRequest.features:type_name -> feast.serving.FeatureList + 11, // 3: feast.serving.GetOnlineFeaturesRequest.entities:type_name -> feast.serving.GetOnlineFeaturesRequest.EntitiesEntry + 12, // 4: feast.serving.GetOnlineFeaturesRequest.request_context:type_name -> feast.serving.GetOnlineFeaturesRequest.RequestContextEntry + 8, // 5: feast.serving.GetOnlineFeaturesResponse.metadata:type_name -> feast.serving.GetOnlineFeaturesResponseMetadata + 13, // 6: feast.serving.GetOnlineFeaturesResponse.results:type_name -> feast.serving.GetOnlineFeaturesResponse.FeatureVector + 5, // 7: feast.serving.GetOnlineFeaturesResponseMetadata.feature_names:type_name -> feast.serving.FeatureList + 14, // 8: feast.serving.GetOnlineFeaturesRequestV2.EntityRow.timestamp:type_name -> google.protobuf.Timestamp + 10, // 9: feast.serving.GetOnlineFeaturesRequestV2.EntityRow.fields:type_name -> feast.serving.GetOnlineFeaturesRequestV2.EntityRow.FieldsEntry + 15, // 10: feast.serving.GetOnlineFeaturesRequestV2.EntityRow.FieldsEntry.value:type_name -> feast.types.Value + 16, // 11: feast.serving.GetOnlineFeaturesRequest.EntitiesEntry.value:type_name -> feast.types.RepeatedValue + 16, // 12: feast.serving.GetOnlineFeaturesRequest.RequestContextEntry.value:type_name -> feast.types.RepeatedValue + 15, // 13: feast.serving.GetOnlineFeaturesResponse.FeatureVector.values:type_name -> feast.types.Value + 0, // 14: feast.serving.GetOnlineFeaturesResponse.FeatureVector.statuses:type_name -> feast.serving.FieldStatus + 14, // 15: feast.serving.GetOnlineFeaturesResponse.FeatureVector.event_timestamps:type_name -> google.protobuf.Timestamp + 1, // 16: feast.serving.ServingService.GetFeastServingInfo:input_type -> feast.serving.GetFeastServingInfoRequest + 6, // 17: feast.serving.ServingService.GetOnlineFeatures:input_type -> feast.serving.GetOnlineFeaturesRequest + 2, // 18: feast.serving.ServingService.GetFeastServingInfo:output_type -> feast.serving.GetFeastServingInfoResponse + 7, // 19: feast.serving.ServingService.GetOnlineFeatures:output_type -> feast.serving.GetOnlineFeaturesResponse + 18, // [18:20] is the sub-list for method output_type + 16, // [16:18] is the sub-list for method input_type + 16, // [16:16] is the sub-list for extension type_name + 16, // [16:16] is the sub-list for extension extendee + 0, // [0:16] is the sub-list for field type_name } func init() { file_feast_serving_ServingService_proto_init() } @@ -1004,6 +998,18 @@ func file_feast_serving_ServingService_proto_init() { } } file_feast_serving_ServingService_proto_msgTypes[7].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GetOnlineFeaturesResponseMetadata); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_feast_serving_ServingService_proto_msgTypes[8].Exporter = func(v interface{}, i int) interface{} { switch v := v.(*GetOnlineFeaturesRequestV2_EntityRow); i { case 0: return &v.state @@ -1015,8 +1021,8 @@ func file_feast_serving_ServingService_proto_init() { return nil } } - file_feast_serving_ServingService_proto_msgTypes[10].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*GetOnlineFeaturesResponse_FieldValues); i { + file_feast_serving_ServingService_proto_msgTypes[12].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GetOnlineFeaturesResponse_FeatureVector); i { case 0: return &v.state case 1: @@ -1037,7 +1043,7 @@ func file_feast_serving_ServingService_proto_init() { File: protoimpl.DescBuilder{ GoPackagePath: reflect.TypeOf(x{}).PkgPath(), RawDescriptor: file_feast_serving_ServingService_proto_rawDesc, - NumEnums: 2, + NumEnums: 1, NumMessages: 13, NumExtensions: 0, NumServices: 1, @@ -1067,8 +1073,8 @@ const _ = grpc.SupportPackageIsVersion6 type ServingServiceClient interface { // Get information about this Feast serving. GetFeastServingInfo(ctx context.Context, in *GetFeastServingInfoRequest, opts ...grpc.CallOption) (*GetFeastServingInfoResponse, error) - // Get online features (v2) synchronously. - GetOnlineFeaturesV2(ctx context.Context, in *GetOnlineFeaturesRequestV2, opts ...grpc.CallOption) (*GetOnlineFeaturesResponse, error) + // Get online features synchronously. + GetOnlineFeatures(ctx context.Context, in *GetOnlineFeaturesRequest, opts ...grpc.CallOption) (*GetOnlineFeaturesResponse, error) } type servingServiceClient struct { @@ -1088,9 +1094,9 @@ func (c *servingServiceClient) GetFeastServingInfo(ctx context.Context, in *GetF return out, nil } -func (c *servingServiceClient) GetOnlineFeaturesV2(ctx context.Context, in *GetOnlineFeaturesRequestV2, opts ...grpc.CallOption) (*GetOnlineFeaturesResponse, error) { +func (c *servingServiceClient) GetOnlineFeatures(ctx context.Context, in *GetOnlineFeaturesRequest, opts ...grpc.CallOption) (*GetOnlineFeaturesResponse, error) { out := new(GetOnlineFeaturesResponse) - err := c.cc.Invoke(ctx, "/feast.serving.ServingService/GetOnlineFeaturesV2", in, out, opts...) + err := c.cc.Invoke(ctx, "/feast.serving.ServingService/GetOnlineFeatures", in, out, opts...) if err != nil { return nil, err } @@ -1101,8 +1107,8 @@ func (c *servingServiceClient) GetOnlineFeaturesV2(ctx context.Context, in *GetO type ServingServiceServer interface { // Get information about this Feast serving. GetFeastServingInfo(context.Context, *GetFeastServingInfoRequest) (*GetFeastServingInfoResponse, error) - // Get online features (v2) synchronously. - GetOnlineFeaturesV2(context.Context, *GetOnlineFeaturesRequestV2) (*GetOnlineFeaturesResponse, error) + // Get online features synchronously. + GetOnlineFeatures(context.Context, *GetOnlineFeaturesRequest) (*GetOnlineFeaturesResponse, error) } // UnimplementedServingServiceServer can be embedded to have forward compatible implementations. @@ -1112,8 +1118,8 @@ type UnimplementedServingServiceServer struct { func (*UnimplementedServingServiceServer) GetFeastServingInfo(context.Context, *GetFeastServingInfoRequest) (*GetFeastServingInfoResponse, error) { return nil, status.Errorf(codes.Unimplemented, "method GetFeastServingInfo not implemented") } -func (*UnimplementedServingServiceServer) GetOnlineFeaturesV2(context.Context, *GetOnlineFeaturesRequestV2) (*GetOnlineFeaturesResponse, error) { - return nil, status.Errorf(codes.Unimplemented, "method GetOnlineFeaturesV2 not implemented") +func (*UnimplementedServingServiceServer) GetOnlineFeatures(context.Context, *GetOnlineFeaturesRequest) (*GetOnlineFeaturesResponse, error) { + return nil, status.Errorf(codes.Unimplemented, "method GetOnlineFeatures not implemented") } func RegisterServingServiceServer(s *grpc.Server, srv ServingServiceServer) { @@ -1138,20 +1144,20 @@ func _ServingService_GetFeastServingInfo_Handler(srv interface{}, ctx context.Co return interceptor(ctx, in, info, handler) } -func _ServingService_GetOnlineFeaturesV2_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { - in := new(GetOnlineFeaturesRequestV2) +func _ServingService_GetOnlineFeatures_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(GetOnlineFeaturesRequest) if err := dec(in); err != nil { return nil, err } if interceptor == nil { - return srv.(ServingServiceServer).GetOnlineFeaturesV2(ctx, in) + return srv.(ServingServiceServer).GetOnlineFeatures(ctx, in) } info := &grpc.UnaryServerInfo{ Server: srv, - FullMethod: "/feast.serving.ServingService/GetOnlineFeaturesV2", + FullMethod: "/feast.serving.ServingService/GetOnlineFeatures", } handler := func(ctx context.Context, req interface{}) (interface{}, error) { - return srv.(ServingServiceServer).GetOnlineFeaturesV2(ctx, req.(*GetOnlineFeaturesRequestV2)) + return srv.(ServingServiceServer).GetOnlineFeatures(ctx, req.(*GetOnlineFeaturesRequest)) } return interceptor(ctx, in, info, handler) } @@ -1165,8 +1171,8 @@ var _ServingService_serviceDesc = grpc.ServiceDesc{ Handler: _ServingService_GetFeastServingInfo_Handler, }, { - MethodName: "GetOnlineFeaturesV2", - Handler: _ServingService_GetOnlineFeaturesV2_Handler, + MethodName: "GetOnlineFeatures", + Handler: _ServingService_GetOnlineFeatures_Handler, }, }, Streams: []grpc.StreamDesc{}, diff --git a/sdk/go/protos/feast/storage/Redis.pb.go b/sdk/go/protos/feast/storage/Redis.pb.go index 08ee629b7c0..35f38ba2a7e 100644 --- a/sdk/go/protos/feast/storage/Redis.pb.go +++ b/sdk/go/protos/feast/storage/Redis.pb.go @@ -16,7 +16,7 @@ // Code generated by protoc-gen-go. DO NOT EDIT. // versions: // protoc-gen-go v1.27.1 -// protoc v3.11.2 +// protoc v3.19.4 // source: feast/storage/Redis.proto package storage diff --git a/sdk/go/protos/feast/types/Field.pb.go b/sdk/go/protos/feast/types/Field.pb.go index 73f46bb1ac3..af964f2c6ea 100644 --- a/sdk/go/protos/feast/types/Field.pb.go +++ b/sdk/go/protos/feast/types/Field.pb.go @@ -16,7 +16,7 @@ // Code generated by protoc-gen-go. DO NOT EDIT. // versions: // protoc-gen-go v1.27.1 -// protoc v3.11.2 +// protoc v3.19.4 // source: feast/types/Field.proto package types diff --git a/sdk/go/protos/feast/types/Value.pb.go b/sdk/go/protos/feast/types/Value.pb.go index 9ae2806d515..79eaa160096 100644 --- a/sdk/go/protos/feast/types/Value.pb.go +++ b/sdk/go/protos/feast/types/Value.pb.go @@ -16,7 +16,7 @@ // Code generated by protoc-gen-go. DO NOT EDIT. // versions: // protoc-gen-go v1.27.1 -// protoc v3.11.2 +// protoc v3.19.4 // source: feast/types/Value.proto package types diff --git a/sdk/go/request.go b/sdk/go/request.go index 360603b3a3e..94fecea01ba 100644 --- a/sdk/go/request.go +++ b/sdk/go/request.go @@ -3,6 +3,7 @@ package feast import ( "fmt" "github.com/feast-dev/feast/sdk/go/protos/feast/serving" + "github.com/feast-dev/feast/sdk/go/protos/feast/types" "strings" ) @@ -29,24 +30,44 @@ type OnlineFeaturesRequest struct { } // Builds the feast-specified request payload from the wrapper. -func (r OnlineFeaturesRequest) buildRequest() (*serving.GetOnlineFeaturesRequestV2, error) { - featureRefs, err := buildFeatureRefs(r.Features) +func (r OnlineFeaturesRequest) buildRequest() (*serving.GetOnlineFeaturesRequest, error) { + _, err := buildFeatureRefs(r.Features) if err != nil { return nil, err } + if len(r.Entities) == 0 { + return nil, fmt.Errorf("Entities must be provided") + } + + firstRow := r.Entities[0] + columnSize := len(firstRow) // build request entity rows from native entities - entityRows := make([]*serving.GetOnlineFeaturesRequestV2_EntityRow, len(r.Entities)) - for i, entity := range r.Entities { - entityRows[i] = &serving.GetOnlineFeaturesRequestV2_EntityRow{ - Fields: entity, + entityColumns := make(map[string][]*types.Value, columnSize) + for rowIdx, entityRow := range r.Entities { + for name, val := range entityRow { + if _, ok := entityColumns[name]; !ok { + entityColumns[name] = make([]*types.Value, len(r.Entities)) + } + + entityColumns[name][rowIdx] = val + } + } + + entities := make(map[string]*types.RepeatedValue, len(entityColumns)) + for column, values := range entityColumns { + entities[column] = &types.RepeatedValue{ + Val: values, } } - return &serving.GetOnlineFeaturesRequestV2{ - Features: featureRefs, - EntityRows: entityRows, - Project: r.Project, + return &serving.GetOnlineFeaturesRequest{ + Kind: &serving.GetOnlineFeaturesRequest_Features{ + Features: &serving.FeatureList{ + Val: r.Features, + }, + }, + Entities: entities, }, nil } @@ -84,9 +105,9 @@ func parseFeatureRef(featureRefStr string) (*serving.FeatureReferenceV2, error) // parse featuretable if specified if strings.Contains(featureRefStr, ":") { refSplit := strings.Split(featureRefStr, ":") - featureRef.FeatureTable, featureRefStr = refSplit[0], refSplit[1] + featureRef.FeatureViewName, featureRefStr = refSplit[0], refSplit[1] } - featureRef.Name = featureRefStr + featureRef.FeatureName = featureRefStr return &featureRef, nil } diff --git a/sdk/go/request_test.go b/sdk/go/request_test.go index 0e9b89d119a..9122c8ca401 100644 --- a/sdk/go/request_test.go +++ b/sdk/go/request_test.go @@ -13,7 +13,7 @@ func TestGetOnlineFeaturesRequest(t *testing.T) { tt := []struct { name string req OnlineFeaturesRequest - want *serving.GetOnlineFeaturesRequestV2 + want *serving.GetOnlineFeaturesRequest wantErr bool err error }{ @@ -30,34 +30,24 @@ func TestGetOnlineFeaturesRequest(t *testing.T) { }, Project: "driver_project", }, - want: &serving.GetOnlineFeaturesRequestV2{ - Features: []*serving.FeatureReferenceV2{ - { - FeatureTable: "driver", - Name: "driver_id", + want: &serving.GetOnlineFeaturesRequest{ + Kind: &serving.GetOnlineFeaturesRequest_Features{ + Features: &serving.FeatureList{ + Val: []string{"driver:driver_id"}, }, }, - EntityRows: []*serving.GetOnlineFeaturesRequestV2_EntityRow{ - { - Fields: map[string]*types.Value{ - "entity1": Int64Val(1), - "entity2": StrVal("bob"), + Entities: map[string]*types.RepeatedValue{ + "entity1": { + Val: []*types.Value{ + Int64Val(1), Int64Val(1), Int64Val(1), }, }, - { - Fields: map[string]*types.Value{ - "entity1": Int64Val(1), - "entity2": StrVal("annie"), - }, - }, - { - Fields: map[string]*types.Value{ - "entity1": Int64Val(1), - "entity2": StrVal("jane"), + "entity2": { + Val: []*types.Value{ + StrVal("bob"), StrVal("annie"), StrVal("jane"), }, }, }, - Project: "driver_project", }, wantErr: false, err: nil, diff --git a/sdk/go/response.go b/sdk/go/response.go index 7fa50761b69..cdb2cbee382 100644 --- a/sdk/go/response.go +++ b/sdk/go/response.go @@ -24,46 +24,83 @@ type OnlineFeaturesResponse struct { // Rows retrieves the result of the request as a list of Rows. func (r OnlineFeaturesResponse) Rows() []Row { - rows := make([]Row, len(r.RawResponse.FieldValues)) - for i, fieldValues := range r.RawResponse.FieldValues { - rows[i] = fieldValues.Fields + if len(r.RawResponse.Results) == 0 { + return []Row{} + } + + rowsCount := len(r.RawResponse.Results[0].Values) + rows := make([]Row, rowsCount) + for rowIdx := 0; rowIdx < rowsCount; rowIdx++ { + row := make(map[string]*types.Value) + for featureIdx := 0; featureIdx < len(r.RawResponse.Results); featureIdx++ { + row[r.RawResponse.Metadata.FeatureNames.Val[featureIdx]] = r.RawResponse.Results[featureIdx].Values[rowIdx] + } + + rows[rowIdx] = row } return rows } // Statuses retrieves field level status metadata for each row in Rows(). // Each status map returned maps status 1:1 to each returned row from Rows() -func (r OnlineFeaturesResponse) Statuses() []map[string]serving.GetOnlineFeaturesResponse_FieldStatus { - statuses := make([]map[string]serving.GetOnlineFeaturesResponse_FieldStatus, len(r.RawResponse.FieldValues)) - for i, fieldValues := range r.RawResponse.FieldValues { - statuses[i] = fieldValues.Statuses +func (r OnlineFeaturesResponse) Statuses() []map[string]serving.FieldStatus { + if len(r.RawResponse.Results) == 0 { + return []map[string]serving.FieldStatus{} + } + + rowsCount := len(r.RawResponse.Results[0].Statuses) + rows := make([]map[string]serving.FieldStatus, rowsCount) + + for rowIdx := 0; rowIdx < rowsCount; rowIdx++ { + row := make(map[string]serving.FieldStatus) + for featureIdx := 0; featureIdx < len(r.RawResponse.Results); featureIdx++ { + row[r.RawResponse.Metadata.FeatureNames.Val[featureIdx]] = r.RawResponse.Results[featureIdx].Statuses[rowIdx] + } + + rows[rowIdx] = row } - return statuses + return rows } // Int64Arrays retrieves the result of the request as a list of int64 slices. Any missing values will be filled // with the missing values provided. func (r OnlineFeaturesResponse) Int64Arrays(order []string, fillNa []int64) ([][]int64, error) { - rows := make([][]int64, len(r.RawResponse.FieldValues)) if len(fillNa) != len(order) { return nil, fmt.Errorf(ErrLengthMismatch, len(fillNa), len(order)) } - for i, fieldValues := range r.RawResponse.FieldValues { - rows[i] = make([]int64, len(order)) - for j, fname := range order { - value, exists := fieldValues.Fields[fname] + + if len(r.RawResponse.Results) == 0 { + return [][]int64{}, nil + } + + rowsCount := len(r.RawResponse.Results[0].Values) + rows := make([][]int64, rowsCount) + + featureNameToIdx := make(map[string]int) + + for idx, featureName := range r.RawResponse.Metadata.FeatureNames.Val { + featureNameToIdx[featureName] = idx + } + + for rowIdx := 0; rowIdx < rowsCount; rowIdx++ { + row := make([]int64, len(order)) + for idx, feature := range order { + featureIdx, exists := featureNameToIdx[feature] if !exists { - return nil, fmt.Errorf(ErrFeatureNotFound, fname) + return nil, fmt.Errorf(ErrFeatureNotFound, feature) } - valType := value.GetVal() + + valType := r.RawResponse.Results[featureIdx].Values[rowIdx].GetVal() if valType == nil { - rows[i][j] = fillNa[j] + row[idx] = fillNa[idx] } else if int64Val, ok := valType.(*types.Value_Int64Val); ok { - rows[i][j] = int64Val.Int64Val + row[idx] = int64Val.Int64Val } else { return nil, fmt.Errorf(ErrTypeMismatch, "int64") } } + + rows[rowIdx] = row } return rows, nil } @@ -71,26 +108,42 @@ func (r OnlineFeaturesResponse) Int64Arrays(order []string, fillNa []int64) ([][ // Float64Arrays retrieves the result of the request as a list of float64 slices. Any missing values will be filled // with the missing values provided. func (r OnlineFeaturesResponse) Float64Arrays(order []string, fillNa []float64) ([][]float64, error) { - rows := make([][]float64, len(r.RawResponse.FieldValues)) if len(fillNa) != len(order) { return nil, fmt.Errorf(ErrLengthMismatch, len(fillNa), len(order)) } - for i, records := range r.RawResponse.FieldValues { - rows[i] = make([]float64, len(order)) - for j, fname := range order { - value, exists := records.Fields[fname] + + if len(r.RawResponse.Results) == 0 { + return [][]float64{}, nil + } + + rowsCount := len(r.RawResponse.Results[0].Values) + rows := make([][]float64, rowsCount) + + featureNameToIdx := make(map[string]int) + + for idx, featureName := range r.RawResponse.Metadata.FeatureNames.Val { + featureNameToIdx[featureName] = idx + } + + for rowIdx := 0; rowIdx < rowsCount; rowIdx++ { + row := make([]float64, len(order)) + for idx, feature := range order { + featureIdx, exists := featureNameToIdx[feature] if !exists { - return nil, fmt.Errorf(ErrFeatureNotFound, fname) + return nil, fmt.Errorf(ErrFeatureNotFound, feature) } - valType := value.GetVal() + + valType := r.RawResponse.Results[featureIdx].Values[rowIdx].GetVal() if valType == nil { - rows[i][j] = fillNa[j] + row[idx] = fillNa[idx] } else if doubleVal, ok := valType.(*types.Value_DoubleVal); ok { - rows[i][j] = doubleVal.DoubleVal + row[idx] = doubleVal.DoubleVal } else { return nil, fmt.Errorf(ErrTypeMismatch, "float64") } } + + rows[rowIdx] = row } return rows, nil } diff --git a/sdk/go/response_test.go b/sdk/go/response_test.go index a6176527451..693faae7e46 100644 --- a/sdk/go/response_test.go +++ b/sdk/go/response_test.go @@ -10,28 +10,27 @@ import ( var response = OnlineFeaturesResponse{ RawResponse: &serving.GetOnlineFeaturesResponse{ - FieldValues: []*serving.GetOnlineFeaturesResponse_FieldValues{ + Results: []*serving.GetOnlineFeaturesResponse_FeatureVector{ { - Fields: map[string]*types.Value{ - "featuretable1:feature1": Int64Val(1), - "featuretable1:feature2": {}, - }, - Statuses: map[string]serving.GetOnlineFeaturesResponse_FieldStatus{ - "featuretable1:feature1": serving.GetOnlineFeaturesResponse_PRESENT, - "featuretable1:feature2": serving.GetOnlineFeaturesResponse_NULL_VALUE, + Values: []*types.Value{Int64Val(1), Int64Val(2)}, + Statuses: []serving.FieldStatus{ + serving.FieldStatus_PRESENT, + serving.FieldStatus_PRESENT, }, }, { - Fields: map[string]*types.Value{ - "featuretable1:feature1": Int64Val(2), - "featuretable1:feature2": Int64Val(2), - }, - Statuses: map[string]serving.GetOnlineFeaturesResponse_FieldStatus{ - "featuretable1:feature1": serving.GetOnlineFeaturesResponse_PRESENT, - "featuretable1:feature2": serving.GetOnlineFeaturesResponse_PRESENT, + Values: []*types.Value{{}, Int64Val(2)}, + Statuses: []serving.FieldStatus{ + serving.FieldStatus_NULL_VALUE, + serving.FieldStatus_PRESENT, }, }, }, + Metadata: &serving.GetOnlineFeaturesResponseMetadata{ + FeatureNames: &serving.FeatureList{ + Val: []string{"featuretable1:feature1", "featuretable1:feature2"}, + }, + }, }, } @@ -53,14 +52,14 @@ func TestOnlineFeaturesResponseToRow(t *testing.T) { func TestOnlineFeaturesResponseoToStatuses(t *testing.T) { actual := response.Statuses() - expected := []map[string]serving.GetOnlineFeaturesResponse_FieldStatus{ + expected := []map[string]serving.FieldStatus{ { - "featuretable1:feature1": serving.GetOnlineFeaturesResponse_PRESENT, - "featuretable1:feature2": serving.GetOnlineFeaturesResponse_NULL_VALUE, + "featuretable1:feature1": serving.FieldStatus_PRESENT, + "featuretable1:feature2": serving.FieldStatus_NULL_VALUE, }, { - "featuretable1:feature1": serving.GetOnlineFeaturesResponse_PRESENT, - "featuretable1:feature2": serving.GetOnlineFeaturesResponse_PRESENT, + "featuretable1:feature1": serving.FieldStatus_PRESENT, + "featuretable1:feature2": serving.FieldStatus_PRESENT, }, } if len(expected) != len(actual) { diff --git a/sdk/python/feast/__init__.py b/sdk/python/feast/__init__.py index eada13f9952..9f78f9d98bf 100644 --- a/sdk/python/feast/__init__.py +++ b/sdk/python/feast/__init__.py @@ -5,6 +5,7 @@ from feast.infra.offline_stores.bigquery_source import BigQuerySource from feast.infra.offline_stores.file_source import FileSource from feast.infra.offline_stores.redshift_source import RedshiftSource +from feast.infra.offline_stores.snowflake_source import SnowflakeSource from .data_source import KafkaSource, KinesisSource, SourceType from .entity import Entity @@ -43,4 +44,5 @@ "BigQuerySource", "FileSource", "RedshiftSource", + "SnowflakeSource", ] diff --git a/sdk/python/feast/base_feature_view.py b/sdk/python/feast/base_feature_view.py index 10f949d9a1c..b2178ec6312 100644 --- a/sdk/python/feast/base_feature_view.py +++ b/sdk/python/feast/base_feature_view.py @@ -13,7 +13,8 @@ # limitations under the License. import warnings from abc import ABC, abstractmethod -from typing import List, Type +from datetime import datetime +from typing import List, Optional, Type from google.protobuf.json_format import MessageToJson from proto import Message @@ -27,11 +28,16 @@ class BaseFeatureView(ABC): """A FeatureView defines a logical grouping of features to be served.""" + created_timestamp: Optional[datetime] + last_updated_timestamp: Optional[datetime] + @abstractmethod def __init__(self, name: str, features: List[Feature]): self._name = name self._features = features self._projection = FeatureViewProjection.from_definition(self) + self.created_timestamp: Optional[datetime] = None + self.last_updated_timestamp: Optional[datetime] = None @property def name(self) -> str: diff --git a/sdk/python/feast/cli.py b/sdk/python/feast/cli.py index 186d0185efc..f6d326410a3 100644 --- a/sdk/python/feast/cli.py +++ b/sdk/python/feast/cli.py @@ -284,9 +284,8 @@ def feature_view_list(ctx: click.Context): if isinstance(feature_view, FeatureView): entities.update(feature_view.entities) elif isinstance(feature_view, OnDemandFeatureView): - for backing_fv in feature_view.inputs.values(): - if isinstance(backing_fv, FeatureView): - entities.update(backing_fv.entities) + for backing_fv in feature_view.input_feature_view_projections.values(): + entities.update(store.get_feature_view(backing_fv.name).entities) table.append( [ feature_view.name, @@ -478,7 +477,7 @@ def materialize_incremental_command(ctx: click.Context, end_ts: str, views: List @click.option( "--template", "-t", - type=click.Choice(["local", "gcp", "aws"], case_sensitive=False), + type=click.Choice(["local", "gcp", "aws", "snowflake"], case_sensitive=False), help="Specify a template for the created project", default="local", ) diff --git a/sdk/python/feast/constants.py b/sdk/python/feast/constants.py index ff93347130d..a2fe6f15c58 100644 --- a/sdk/python/feast/constants.py +++ b/sdk/python/feast/constants.py @@ -29,6 +29,9 @@ # Environment variable for toggling usage FEAST_USAGE = "FEAST_USAGE" +# Default value for FEAST_USAGE when environment variable is not set +DEFAULT_FEAST_USAGE_VALUE = "True" + # Environment variable for the path for overwriting universal test configs FULL_REPO_CONFIGS_MODULE_ENV_NAME: str = "FULL_REPO_CONFIGS_MODULE" diff --git a/sdk/python/feast/data_source.py b/sdk/python/feast/data_source.py index b30340f0d2e..94910c6c083 100644 --- a/sdk/python/feast/data_source.py +++ b/sdk/python/feast/data_source.py @@ -360,6 +360,12 @@ def from_proto(data_source: DataSourceProto) -> Any: from feast.infra.offline_stores.redshift_source import RedshiftSource data_source_obj = RedshiftSource.from_proto(data_source) + + elif data_source.snowflake_options.table or data_source.snowflake_options.query: + from feast.infra.offline_stores.snowflake_source import SnowflakeSource + + data_source_obj = SnowflakeSource.from_proto(data_source) + elif ( data_source.kafka_options.bootstrap_servers and data_source.kafka_options.topic diff --git a/sdk/python/feast/diff/FcoDiff.py b/sdk/python/feast/diff/FcoDiff.py deleted file mode 100644 index 09f76d42f10..00000000000 --- a/sdk/python/feast/diff/FcoDiff.py +++ /dev/null @@ -1,102 +0,0 @@ -from dataclasses import dataclass -from enum import Enum -from typing import Any, Iterable, List, Set, Tuple, TypeVar - -from feast.base_feature_view import BaseFeatureView -from feast.entity import Entity -from feast.feature_service import FeatureService -from feast.protos.feast.core.Entity_pb2 import Entity as EntityProto -from feast.protos.feast.core.FeatureView_pb2 import FeatureView as FeatureViewProto - - -@dataclass -class PropertyDiff: - property_name: str - val_existing: str - val_declared: str - - -class TransitionType(Enum): - UNKNOWN = 0 - CREATE = 1 - DELETE = 2 - UPDATE = 3 - UNCHANGED = 4 - - -@dataclass -class FcoDiff: - name: str - fco_type: str - current_fco: Any - new_fco: Any - fco_property_diffs: List[PropertyDiff] - transition_type: TransitionType - - -@dataclass -class RegistryDiff: - fco_diffs: List[FcoDiff] - - def __init__(self): - self.fco_diffs = [] - - def add_fco_diff(self, fco_diff: FcoDiff): - self.fco_diffs.append(fco_diff) - - -T = TypeVar("T", Entity, BaseFeatureView, FeatureService) - - -def tag_objects_for_keep_delete_add( - existing_objs: Iterable[T], desired_objs: Iterable[T] -) -> Tuple[Set[T], Set[T], Set[T]]: - existing_obj_names = {e.name for e in existing_objs} - desired_obj_names = {e.name for e in desired_objs} - - objs_to_add = {e for e in desired_objs if e.name not in existing_obj_names} - objs_to_keep = {e for e in desired_objs if e.name in existing_obj_names} - objs_to_delete = {e for e in existing_objs if e.name not in desired_obj_names} - - return objs_to_keep, objs_to_delete, objs_to_add - - -U = TypeVar("U", EntityProto, FeatureViewProto) - - -def tag_proto_objects_for_keep_delete_add( - existing_objs: Iterable[U], desired_objs: Iterable[U] -) -> Tuple[Iterable[U], Iterable[U], Iterable[U]]: - existing_obj_names = {e.spec.name for e in existing_objs} - desired_obj_names = {e.spec.name for e in desired_objs} - - objs_to_add = [e for e in desired_objs if e.spec.name not in existing_obj_names] - objs_to_keep = [e for e in desired_objs if e.spec.name in existing_obj_names] - objs_to_delete = [e for e in existing_objs if e.spec.name not in desired_obj_names] - - return objs_to_keep, objs_to_delete, objs_to_add - - -FIELDS_TO_IGNORE = {"project"} - - -def diff_between(current: U, new: U, object_type: str) -> FcoDiff: - assert current.DESCRIPTOR.full_name == new.DESCRIPTOR.full_name - property_diffs = [] - transition: TransitionType = TransitionType.UNCHANGED - if current.spec != new.spec: - for _field in current.spec.DESCRIPTOR.fields: - if _field.name in FIELDS_TO_IGNORE: - continue - if getattr(current.spec, _field.name) != getattr(new.spec, _field.name): - transition = TransitionType.UPDATE - property_diffs.append( - PropertyDiff( - _field.name, - getattr(current.spec, _field.name), - getattr(new.spec, _field.name), - ) - ) - return FcoDiff( - new.spec.name, object_type, current, new, property_diffs, transition, - ) diff --git a/sdk/python/feast/diff/infra_diff.py b/sdk/python/feast/diff/infra_diff.py new file mode 100644 index 00000000000..a09eaf39ebe --- /dev/null +++ b/sdk/python/feast/diff/infra_diff.py @@ -0,0 +1,203 @@ +from dataclasses import dataclass +from typing import Generic, Iterable, List, Tuple, TypeVar + +from feast.diff.property_diff import PropertyDiff, TransitionType +from feast.infra.infra_object import ( + DATASTORE_INFRA_OBJECT_CLASS_TYPE, + DYNAMODB_INFRA_OBJECT_CLASS_TYPE, + SQLITE_INFRA_OBJECT_CLASS_TYPE, + InfraObject, +) +from feast.protos.feast.core.DatastoreTable_pb2 import ( + DatastoreTable as DatastoreTableProto, +) +from feast.protos.feast.core.DynamoDBTable_pb2 import ( + DynamoDBTable as DynamoDBTableProto, +) +from feast.protos.feast.core.InfraObject_pb2 import Infra as InfraProto +from feast.protos.feast.core.SqliteTable_pb2 import SqliteTable as SqliteTableProto + +InfraObjectProto = TypeVar( + "InfraObjectProto", DatastoreTableProto, DynamoDBTableProto, SqliteTableProto +) + + +@dataclass +class InfraObjectDiff(Generic[InfraObjectProto]): + name: str + infra_object_type: str + current_infra_object: InfraObjectProto + new_infra_object: InfraObjectProto + infra_object_property_diffs: List[PropertyDiff] + transition_type: TransitionType + + +@dataclass +class InfraDiff: + infra_object_diffs: List[InfraObjectDiff] + + def __init__(self): + self.infra_object_diffs = [] + + def update(self): + """Apply the infrastructure changes specified in this object.""" + for infra_object_diff in self.infra_object_diffs: + if infra_object_diff.transition_type in [ + TransitionType.DELETE, + TransitionType.UPDATE, + ]: + infra_object = InfraObject.from_proto( + infra_object_diff.current_infra_object + ) + infra_object.teardown() + elif infra_object_diff.transition_type in [ + TransitionType.CREATE, + TransitionType.UPDATE, + ]: + infra_object = InfraObject.from_proto( + infra_object_diff.new_infra_object + ) + infra_object.update() + + def to_string(self): + from colorama import Fore, Style + + log_string = "" + + message_action_map = { + TransitionType.CREATE: ("Created", Fore.GREEN), + TransitionType.DELETE: ("Deleted", Fore.RED), + TransitionType.UNCHANGED: ("Unchanged", Fore.LIGHTBLUE_EX), + TransitionType.UPDATE: ("Updated", Fore.YELLOW), + } + for infra_object_diff in self.infra_object_diffs: + if infra_object_diff.transition_type == TransitionType.UNCHANGED: + continue + action, color = message_action_map[infra_object_diff.transition_type] + log_string += f"{action} {infra_object_diff.infra_object_type} {Style.BRIGHT + color}{infra_object_diff.name}{Style.RESET_ALL}\n" + if infra_object_diff.transition_type == TransitionType.UPDATE: + for _p in infra_object_diff.infra_object_property_diffs: + log_string += f"\t{_p.property_name}: {Style.BRIGHT + color}{_p.val_existing}{Style.RESET_ALL} -> {Style.BRIGHT + Fore.LIGHTGREEN_EX}{_p.val_declared}{Style.RESET_ALL}\n" + + log_string = ( + f"{Style.BRIGHT + Fore.LIGHTBLUE_EX}No changes to infrastructure" + if not log_string + else log_string + ) + + return log_string + + +def tag_infra_proto_objects_for_keep_delete_add( + existing_objs: Iterable[InfraObjectProto], desired_objs: Iterable[InfraObjectProto] +) -> Tuple[ + Iterable[InfraObjectProto], Iterable[InfraObjectProto], Iterable[InfraObjectProto] +]: + existing_obj_names = {e.name for e in existing_objs} + desired_obj_names = {e.name for e in desired_objs} + + objs_to_add = [e for e in desired_objs if e.name not in existing_obj_names] + objs_to_keep = [e for e in desired_objs if e.name in existing_obj_names] + objs_to_delete = [e for e in existing_objs if e.name not in desired_obj_names] + + return objs_to_keep, objs_to_delete, objs_to_add + + +def diff_infra_protos( + current_infra_proto: InfraProto, new_infra_proto: InfraProto +) -> InfraDiff: + infra_diff = InfraDiff() + + infra_object_class_types_to_str = { + DATASTORE_INFRA_OBJECT_CLASS_TYPE: "datastore table", + DYNAMODB_INFRA_OBJECT_CLASS_TYPE: "dynamodb table", + SQLITE_INFRA_OBJECT_CLASS_TYPE: "sqlite table", + } + + for infra_object_class_type in infra_object_class_types_to_str: + current_infra_objects = get_infra_object_protos_by_type( + current_infra_proto, infra_object_class_type + ) + new_infra_objects = get_infra_object_protos_by_type( + new_infra_proto, infra_object_class_type + ) + ( + infra_objects_to_keep, + infra_objects_to_delete, + infra_objects_to_add, + ) = tag_infra_proto_objects_for_keep_delete_add( + current_infra_objects, new_infra_objects, + ) + + for e in infra_objects_to_add: + infra_diff.infra_object_diffs.append( + InfraObjectDiff( + e.name, + infra_object_class_types_to_str[infra_object_class_type], + None, + e, + [], + TransitionType.CREATE, + ) + ) + for e in infra_objects_to_delete: + infra_diff.infra_object_diffs.append( + InfraObjectDiff( + e.name, + infra_object_class_types_to_str[infra_object_class_type], + e, + None, + [], + TransitionType.DELETE, + ) + ) + for e in infra_objects_to_keep: + current_infra_object = [ + _e for _e in current_infra_objects if _e.name == e.name + ][0] + infra_diff.infra_object_diffs.append( + diff_between( + current_infra_object, + e, + infra_object_class_types_to_str[infra_object_class_type], + ) + ) + + return infra_diff + + +def get_infra_object_protos_by_type( + infra_proto: InfraProto, infra_object_class_type: str +) -> List[InfraObjectProto]: + return [ + InfraObject.from_infra_object_proto(infra_object).to_proto() + for infra_object in infra_proto.infra_objects + if infra_object.infra_object_class_type == infra_object_class_type + ] + + +FIELDS_TO_IGNORE = {"project"} + + +def diff_between( + current: InfraObjectProto, new: InfraObjectProto, infra_object_type: str +) -> InfraObjectDiff: + assert current.DESCRIPTOR.full_name == new.DESCRIPTOR.full_name + property_diffs = [] + transition: TransitionType = TransitionType.UNCHANGED + if current != new: + for _field in current.DESCRIPTOR.fields: + if _field.name in FIELDS_TO_IGNORE: + continue + if getattr(current, _field.name) != getattr(new, _field.name): + transition = TransitionType.UPDATE + property_diffs.append( + PropertyDiff( + _field.name, + getattr(current, _field.name), + getattr(new, _field.name), + ) + ) + return InfraObjectDiff( + new.name, infra_object_type, current, new, property_diffs, transition, + ) diff --git a/sdk/python/feast/diff/property_diff.py b/sdk/python/feast/diff/property_diff.py new file mode 100644 index 00000000000..9136cada500 --- /dev/null +++ b/sdk/python/feast/diff/property_diff.py @@ -0,0 +1,17 @@ +from dataclasses import dataclass +from enum import Enum + + +@dataclass +class PropertyDiff: + property_name: str + val_existing: str + val_declared: str + + +class TransitionType(Enum): + UNKNOWN = 0 + CREATE = 1 + DELETE = 2 + UPDATE = 3 + UNCHANGED = 4 diff --git a/sdk/python/feast/diff/registry_diff.py b/sdk/python/feast/diff/registry_diff.py new file mode 100644 index 00000000000..1f68d3ff65c --- /dev/null +++ b/sdk/python/feast/diff/registry_diff.py @@ -0,0 +1,298 @@ +from dataclasses import dataclass +from typing import Any, Dict, Generic, Iterable, List, Set, Tuple, TypeVar + +from feast.base_feature_view import BaseFeatureView +from feast.diff.property_diff import PropertyDiff, TransitionType +from feast.entity import Entity +from feast.feature_service import FeatureService +from feast.feature_view import DUMMY_ENTITY_NAME +from feast.protos.feast.core.Entity_pb2 import Entity as EntityProto +from feast.protos.feast.core.FeatureService_pb2 import ( + FeatureService as FeatureServiceProto, +) +from feast.protos.feast.core.FeatureView_pb2 import FeatureView as FeatureViewProto +from feast.protos.feast.core.OnDemandFeatureView_pb2 import ( + OnDemandFeatureView as OnDemandFeatureViewProto, +) +from feast.protos.feast.core.RequestFeatureView_pb2 import ( + RequestFeatureView as RequestFeatureViewProto, +) +from feast.registry import FEAST_OBJECT_TYPES, FeastObjectType, Registry +from feast.repo_contents import RepoContents + +FeastObject = TypeVar("FeastObject", Entity, BaseFeatureView, FeatureService) + + +@dataclass +class FeastObjectDiff(Generic[FeastObject]): + name: str + feast_object_type: FeastObjectType + current_feast_object: FeastObject + new_feast_object: FeastObject + feast_object_property_diffs: List[PropertyDiff] + transition_type: TransitionType + + +@dataclass +class RegistryDiff: + feast_object_diffs: List[FeastObjectDiff] + + def __init__(self): + self.feast_object_diffs = [] + + def add_feast_object_diff(self, feast_object_diff: FeastObjectDiff): + self.feast_object_diffs.append(feast_object_diff) + + def to_string(self): + from colorama import Fore, Style + + log_string = "" + + message_action_map = { + TransitionType.CREATE: ("Created", Fore.GREEN), + TransitionType.DELETE: ("Deleted", Fore.RED), + TransitionType.UNCHANGED: ("Unchanged", Fore.LIGHTBLUE_EX), + TransitionType.UPDATE: ("Updated", Fore.YELLOW), + } + for feast_object_diff in self.feast_object_diffs: + if feast_object_diff.name == DUMMY_ENTITY_NAME: + continue + if feast_object_diff.transition_type == TransitionType.UNCHANGED: + continue + action, color = message_action_map[feast_object_diff.transition_type] + log_string += f"{action} {feast_object_diff.feast_object_type.value} {Style.BRIGHT + color}{feast_object_diff.name}{Style.RESET_ALL}\n" + if feast_object_diff.transition_type == TransitionType.UPDATE: + for _p in feast_object_diff.feast_object_property_diffs: + log_string += f"\t{_p.property_name}: {Style.BRIGHT + color}{_p.val_existing}{Style.RESET_ALL} -> {Style.BRIGHT + Fore.LIGHTGREEN_EX}{_p.val_declared}{Style.RESET_ALL}\n" + + log_string = ( + f"{Style.BRIGHT + Fore.LIGHTBLUE_EX}No changes to registry" + if not log_string + else log_string + ) + + return log_string + + +def tag_objects_for_keep_delete_update_add( + existing_objs: Iterable[FeastObject], desired_objs: Iterable[FeastObject] +) -> Tuple[Set[FeastObject], Set[FeastObject], Set[FeastObject], Set[FeastObject]]: + existing_obj_names = {e.name for e in existing_objs} + desired_obj_names = {e.name for e in desired_objs} + + objs_to_add = {e for e in desired_objs if e.name not in existing_obj_names} + objs_to_update = {e for e in desired_objs if e.name in existing_obj_names} + objs_to_keep = {e for e in existing_objs if e.name in desired_obj_names} + objs_to_delete = {e for e in existing_objs if e.name not in desired_obj_names} + + return objs_to_keep, objs_to_delete, objs_to_update, objs_to_add + + +FeastObjectProto = TypeVar( + "FeastObjectProto", + EntityProto, + FeatureViewProto, + FeatureServiceProto, + OnDemandFeatureViewProto, + RequestFeatureViewProto, +) + + +FIELDS_TO_IGNORE = {"project"} + + +def diff_registry_objects( + current: FeastObject, new: FeastObject, object_type: FeastObjectType +) -> FeastObjectDiff: + current_proto = current.to_proto() + new_proto = new.to_proto() + assert current_proto.DESCRIPTOR.full_name == new_proto.DESCRIPTOR.full_name + property_diffs = [] + transition: TransitionType = TransitionType.UNCHANGED + if current_proto.spec != new_proto.spec: + for _field in current_proto.spec.DESCRIPTOR.fields: + if _field.name in FIELDS_TO_IGNORE: + continue + if getattr(current_proto.spec, _field.name) != getattr( + new_proto.spec, _field.name + ): + transition = TransitionType.UPDATE + property_diffs.append( + PropertyDiff( + _field.name, + getattr(current_proto.spec, _field.name), + getattr(new_proto.spec, _field.name), + ) + ) + return FeastObjectDiff( + name=new_proto.spec.name, + feast_object_type=object_type, + current_feast_object=current, + new_feast_object=new, + feast_object_property_diffs=property_diffs, + transition_type=transition, + ) + + +def extract_objects_for_keep_delete_update_add( + registry: Registry, current_project: str, desired_repo_contents: RepoContents, +) -> Tuple[ + Dict[FeastObjectType, Set[FeastObject]], + Dict[FeastObjectType, Set[FeastObject]], + Dict[FeastObjectType, Set[FeastObject]], + Dict[FeastObjectType, Set[FeastObject]], +]: + """ + Returns the objects in the registry that must be modified to achieve the desired repo state. + + Args: + registry: The registry storing the current repo state. + current_project: The Feast project whose objects should be compared. + desired_repo_contents: The desired repo state. + """ + objs_to_keep = {} + objs_to_delete = {} + objs_to_update = {} + objs_to_add = {} + + registry_object_type_to_objects: Dict[ + FeastObjectType, List[Any] + ] = FeastObjectType.get_objects_from_registry(registry, current_project) + registry_object_type_to_repo_contents: Dict[ + FeastObjectType, Set[Any] + ] = FeastObjectType.get_objects_from_repo_contents(desired_repo_contents) + + for object_type in FEAST_OBJECT_TYPES: + ( + to_keep, + to_delete, + to_update, + to_add, + ) = tag_objects_for_keep_delete_update_add( + registry_object_type_to_objects[object_type], + registry_object_type_to_repo_contents[object_type], + ) + + objs_to_keep[object_type] = to_keep + objs_to_delete[object_type] = to_delete + objs_to_update[object_type] = to_update + objs_to_add[object_type] = to_add + + return objs_to_keep, objs_to_delete, objs_to_update, objs_to_add + + +def diff_between( + registry: Registry, current_project: str, desired_repo_contents: RepoContents, +) -> RegistryDiff: + """ + Returns the difference between the current and desired repo states. + + Args: + registry: The registry storing the current repo state. + current_project: The Feast project for which the diff is being computed. + desired_repo_contents: The desired repo state. + """ + diff = RegistryDiff() + + ( + objs_to_keep, + objs_to_delete, + objs_to_update, + objs_to_add, + ) = extract_objects_for_keep_delete_update_add( + registry, current_project, desired_repo_contents + ) + + for object_type in FEAST_OBJECT_TYPES: + objects_to_keep = objs_to_keep[object_type] + objects_to_delete = objs_to_delete[object_type] + objects_to_update = objs_to_update[object_type] + objects_to_add = objs_to_add[object_type] + + for e in objects_to_add: + diff.add_feast_object_diff( + FeastObjectDiff( + name=e.name, + feast_object_type=object_type, + current_feast_object=None, + new_feast_object=e, + feast_object_property_diffs=[], + transition_type=TransitionType.CREATE, + ) + ) + for e in objects_to_delete: + diff.add_feast_object_diff( + FeastObjectDiff( + name=e.name, + feast_object_type=object_type, + current_feast_object=e, + new_feast_object=None, + feast_object_property_diffs=[], + transition_type=TransitionType.DELETE, + ) + ) + for e in objects_to_update: + current_obj = [_e for _e in objects_to_keep if _e.name == e.name][0] + diff.add_feast_object_diff( + diff_registry_objects(current_obj, e, object_type) + ) + + return diff + + +def apply_diff_to_registry( + registry: Registry, registry_diff: RegistryDiff, project: str, commit: bool = True +): + """ + Applies the given diff to the given Feast project in the registry. + + Args: + registry: The registry to be updated. + registry_diff: The diff to apply. + project: Feast project to be updated. + commit: Whether the change should be persisted immediately + """ + for feast_object_diff in registry_diff.feast_object_diffs: + # There is no need to delete the object on an update, since applying the new object + # will automatically delete the existing object. + if feast_object_diff.transition_type == TransitionType.DELETE: + if feast_object_diff.feast_object_type == FeastObjectType.ENTITY: + registry.delete_entity( + feast_object_diff.current_feast_object.name, project, commit=False + ) + elif feast_object_diff.feast_object_type == FeastObjectType.FEATURE_SERVICE: + registry.delete_feature_service( + feast_object_diff.current_feast_object.name, project, commit=False + ) + elif feast_object_diff.feast_object_type in [ + FeastObjectType.FEATURE_VIEW, + FeastObjectType.ON_DEMAND_FEATURE_VIEW, + FeastObjectType.REQUEST_FEATURE_VIEW, + ]: + registry.delete_feature_view( + feast_object_diff.current_feast_object.name, project, commit=False, + ) + + if feast_object_diff.transition_type in [ + TransitionType.CREATE, + TransitionType.UPDATE, + ]: + if feast_object_diff.feast_object_type == FeastObjectType.ENTITY: + registry.apply_entity( + feast_object_diff.new_feast_object, project, commit=False + ) + elif feast_object_diff.feast_object_type == FeastObjectType.FEATURE_SERVICE: + registry.apply_feature_service( + feast_object_diff.new_feast_object, project, commit=False + ) + elif feast_object_diff.feast_object_type in [ + FeastObjectType.FEATURE_VIEW, + FeastObjectType.ON_DEMAND_FEATURE_VIEW, + FeastObjectType.REQUEST_FEATURE_VIEW, + ]: + registry.apply_feature_view( + feast_object_diff.new_feast_object, project, commit=False + ) + + if commit: + registry.commit() diff --git a/sdk/python/feast/dqm/__init__.py b/sdk/python/feast/dqm/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/sdk/python/feast/dqm/errors.py b/sdk/python/feast/dqm/errors.py new file mode 100644 index 00000000000..c4179f72b3c --- /dev/null +++ b/sdk/python/feast/dqm/errors.py @@ -0,0 +1,13 @@ +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from .profilers.profiler import ValidationReport + + +class ValidationFailed(Exception): + def __init__(self, validation_report: "ValidationReport"): + self.validation_report = validation_report + + @property + def report(self) -> "ValidationReport": + return self.validation_report diff --git a/sdk/python/feast/dqm/profilers/__init__.py b/sdk/python/feast/dqm/profilers/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/sdk/python/feast/dqm/profilers/ge_profiler.py b/sdk/python/feast/dqm/profilers/ge_profiler.py new file mode 100644 index 00000000000..f1780754de3 --- /dev/null +++ b/sdk/python/feast/dqm/profilers/ge_profiler.py @@ -0,0 +1,162 @@ +import json +from typing import Any, Callable, Dict, List + +import dill +import great_expectations as ge +import numpy as np +import pandas as pd +from great_expectations.core import ExpectationSuite +from great_expectations.dataset import PandasDataset +from great_expectations.profile.base import ProfilerTypeMapping + +from feast.dqm.profilers.profiler import ( + Profile, + Profiler, + ValidationError, + ValidationReport, +) +from feast.protos.feast.core.ValidationProfile_pb2 import ( + GEValidationProfile as GEValidationProfileProto, +) +from feast.protos.feast.core.ValidationProfile_pb2 import ( + GEValidationProfiler as GEValidationProfilerProto, +) + + +def _prepare_dataset(dataset: PandasDataset) -> PandasDataset: + dataset_copy = dataset.copy(deep=True) + + for column in dataset.columns: + if dataset.expect_column_values_to_be_in_type_list( + column, type_list=sorted(list(ProfilerTypeMapping.DATETIME_TYPE_NAMES)) + ).success: + # GE cannot parse Timestamp or other pandas datetime time + dataset_copy[column] = dataset[column].dt.strftime("%Y-%m-%dT%H:%M:%S") + + if dataset[column].dtype == np.float32: + # GE converts expectation arguments into native Python float + # This could cause error on comparison => so better to convert to double prematurely + dataset_copy[column] = dataset[column].astype(np.float64) + + return dataset_copy + + +class GEProfile(Profile): + """ + GEProfile is an implementation of abstract Profile for integration with Great Expectations. + It executes validation by applying expectations from ExpectationSuite instance to a given dataset. + """ + + expectation_suite: ExpectationSuite + + def __init__(self, expectation_suite: ExpectationSuite): + self.expectation_suite = expectation_suite + + def validate(self, df: pd.DataFrame) -> "GEValidationReport": + """ + Validate provided dataframe against GE expectation suite. + 1. Pandas dataframe is converted into PandasDataset (GE type) + 2. Some fixes applied to the data to avoid crashes inside GE (see _prepare_dataset) + 3. Each expectation from ExpectationSuite instance tested against resulting dataset + + Return GEValidationReport, which parses great expectation's schema into list of generic ValidationErrors. + """ + dataset = PandasDataset(df) + + dataset = _prepare_dataset(dataset) + + results = ge.validate( + dataset, expectation_suite=self.expectation_suite, result_format="COMPLETE" + ) + return GEValidationReport(results) + + def to_proto(self): + return GEValidationProfileProto( + expectation_suite=json.dumps(self.expectation_suite.to_json_dict()).encode() + ) + + @classmethod + def from_proto(cls, proto: GEValidationProfileProto) -> "GEProfile": + return GEProfile( + expectation_suite=ExpectationSuite(**json.loads(proto.expectation_suite)) + ) + + def __repr__(self): + expectations = json.dumps( + [e.to_json_dict() for e in self.expectation_suite.expectations], indent=2 + ) + return f"" + + +class GEProfiler(Profiler): + """ + GEProfiler is an implementation of abstract Profiler for integration with Great Expectations. + It wraps around user defined profiler that should accept dataset (in a form of pandas dataframe) + and return ExpectationSuite. + """ + + def __init__( + self, user_defined_profiler: Callable[[pd.DataFrame], ExpectationSuite] + ): + self.user_defined_profiler = user_defined_profiler + + def analyze_dataset(self, df: pd.DataFrame) -> Profile: + """ + Generate GEProfile with ExpectationSuite (set of expectations) + from a given pandas dataframe by applying user defined profiler. + + Some fixes are also applied to the dataset (see _prepare_dataset function) to make it compatible with GE. + + Return GEProfile + """ + dataset = PandasDataset(df) + + dataset = _prepare_dataset(dataset) + + return GEProfile(expectation_suite=self.user_defined_profiler(dataset)) + + def to_proto(self): + return GEValidationProfilerProto( + profiler=GEValidationProfilerProto.UserDefinedProfiler( + body=dill.dumps(self.user_defined_profiler, recurse=True) + ) + ) + + @classmethod + def from_proto(cls, proto: GEValidationProfilerProto) -> "GEProfiler": + return GEProfiler(user_defined_profiler=dill.loads(proto.profiler.body)) + + +class GEValidationReport(ValidationReport): + def __init__(self, validation_result: Dict[Any, Any]): + self._validation_result = validation_result + + @property + def is_success(self) -> bool: + return self._validation_result["success"] + + @property + def errors(self) -> List["ValidationError"]: + return [ + ValidationError( + check_name=res.expectation_config.expectation_type, + column_name=res.expectation_config.kwargs["column"], + check_config=res.expectation_config.kwargs, + missing_count=res["result"].get("missing_count"), + missing_percent=res["result"].get("missing_percent"), + ) + for res in self._validation_result["results"] + if not res["success"] + ] + + def __repr__(self): + failed_expectations = [ + res.to_json_dict() + for res in self._validation_result["results"] + if not res["success"] + ] + return json.dumps(failed_expectations, indent=2) + + +def ge_profiler(func): + return GEProfiler(user_defined_profiler=func) diff --git a/sdk/python/feast/dqm/profilers/profiler.py b/sdk/python/feast/dqm/profilers/profiler.py new file mode 100644 index 00000000000..5d2e9d36bc1 --- /dev/null +++ b/sdk/python/feast/dqm/profilers/profiler.py @@ -0,0 +1,88 @@ +import abc +from typing import Any, List, Optional + +import pandas as pd + + +class Profile: + @abc.abstractmethod + def validate(self, dataset: pd.DataFrame) -> "ValidationReport": + """ + Run set of rules / expectations from current profile against given dataset. + + Return ValidationReport + """ + ... + + @abc.abstractmethod + def to_proto(self): + ... + + @classmethod + @abc.abstractmethod + def from_proto(cls, proto) -> "Profile": + ... + + +class Profiler: + @abc.abstractmethod + def analyze_dataset(self, dataset: pd.DataFrame) -> Profile: + """ + Generate Profile object with dataset's characteristics (with rules / expectations) + from given dataset (as pandas dataframe). + """ + ... + + @abc.abstractmethod + def to_proto(self): + ... + + @classmethod + @abc.abstractmethod + def from_proto(cls, proto) -> "Profiler": + ... + + +class ValidationReport: + @property + @abc.abstractmethod + def is_success(self) -> bool: + """ + Return whether validation was successful + """ + ... + + @property + @abc.abstractmethod + def errors(self) -> List["ValidationError"]: + """ + Return list of ValidationErrors if validation failed (is_success = false) + """ + ... + + +class ValidationError: + check_name: str + column_name: str + + check_config: Optional[Any] + + missing_count: Optional[int] + missing_percent: Optional[float] + + def __init__( + self, + check_name: str, + column_name: str, + check_config: Optional[Any] = None, + missing_count: Optional[int] = None, + missing_percent: Optional[float] = None, + ): + self.check_name = check_name + self.column_name = column_name + self.check_config = check_config + self.missing_count = missing_count + self.missing_percent = missing_percent + + def __repr__(self): + return f"" diff --git a/sdk/python/feast/driver_test_data.py b/sdk/python/feast/driver_test_data.py index 1c9a1dd20bc..117bfcbd9cb 100644 --- a/sdk/python/feast/driver_test_data.py +++ b/sdk/python/feast/driver_test_data.py @@ -264,3 +264,29 @@ def create_global_daily_stats_df(start_date, end_date) -> pd.DataFrame: # TODO: Remove created timestamp in order to test whether its really optional df_daily["created"] = pd.to_datetime(pd.Timestamp.now(tz=None).round("ms")) return df_daily + + +def create_field_mapping_df(start_date, end_date) -> pd.DataFrame: + """ + Example df generated by this function: + | event_timestamp | column_name | created | + |------------------+-------------+------------------| + | 2021-03-17 19:00 | 99 | 2021-03-24 19:38 | + | 2021-03-17 19:00 | 22 | 2021-03-24 19:38 | + | 2021-03-17 19:00 | 7 | 2021-03-24 19:38 | + | 2021-03-17 19:00 | 45 | 2021-03-24 19:38 | + """ + size = 10 + df = pd.DataFrame() + df["column_name"] = np.random.randint(1, 100, size=size).astype(np.int32) + df[DEFAULT_ENTITY_DF_EVENT_TIMESTAMP_COL] = [ + _convert_event_timestamp( + pd.Timestamp(dt, unit="ms", tz="UTC").round("ms"), + EventTimestampType(idx % 4), + ) + for idx, dt in enumerate( + pd.date_range(start=start_date, end=end_date, periods=size) + ) + ] + df["created"] = pd.to_datetime(pd.Timestamp.now(tz=None).round("ms")) + return df diff --git a/sdk/python/feast/errors.py b/sdk/python/feast/errors.py index f6a66bea5a0..17147f8a603 100644 --- a/sdk/python/feast/errors.py +++ b/sdk/python/feast/errors.py @@ -74,6 +74,11 @@ def __init__(self, bucket): super().__init__(f"S3 bucket {bucket} for the Feast registry can't be accessed") +class SavedDatasetNotFound(FeastObjectNotFoundException): + def __init__(self, name: str, project: str): + super().__init__(f"Saved dataset {name} does not exist in project {project}") + + class FeastProviderLoginError(Exception): """Error class that indicates a user has not authenticated with their provider.""" @@ -103,14 +108,16 @@ def __init__(self, feature_server_type: str): class FeastModuleImportError(Exception): - def __init__(self, module_name: str, module_type: str): - super().__init__(f"Could not import {module_type} module '{module_name}'") + def __init__(self, module_name: str, class_name: str): + super().__init__( + f"Could not import module '{module_name}' while attempting to load class '{class_name}'" + ) class FeastClassImportError(Exception): - def __init__(self, module_name, class_name, class_type="provider"): + def __init__(self, module_name: str, class_name: str): super().__init__( - f"Could not import {class_type} '{class_name}' from module '{module_name}'" + f"Could not import class '{class_name}' from module '{module_name}'" ) @@ -168,11 +175,10 @@ def __init__(self, online_store_class_name: str): ) -class FeastClassInvalidName(Exception): +class FeastInvalidBaseClass(Exception): def __init__(self, class_name: str, class_type: str): super().__init__( - f"Config Class '{class_name}' " - f"should end with the string `{class_type}`.'" + f"Class '{class_name}' should have `{class_type}` as a base class." ) @@ -237,6 +243,23 @@ def __init__(self, details): super().__init__(f"Redshift SQL Query failed to finish. Details: {details}") +class RedshiftTableNameTooLong(Exception): + def __init__(self, table_name: str): + super().__init__( + f"Redshift table names have a maximum length of 127 characters, but the table name {table_name} has length {len(table_name)} characters." + ) + + +class SnowflakeCredentialsError(Exception): + def __init__(self): + super().__init__("Snowflake Connector failed due to incorrect credentials") + + +class SnowflakeQueryError(Exception): + def __init__(self, details): + super().__init__(f"Snowflake SQL Query failed to finish. Details: {details}") + + class EntityTimestampInferenceException(Exception): def __init__(self, expected_column_name: str): super().__init__( @@ -292,3 +315,18 @@ def __init__(self, actual_class: str, expected_class: str): super().__init__( f"The registry store class was expected to be {expected_class}, but was instead {actual_class}." ) + + +class FeastInvalidInfraObjectType(Exception): + def __init__(self): + super().__init__("Could not identify the type of the InfraObject.") + + +class SnowflakeIncompleteConfig(Exception): + def __init__(self, e: KeyError): + super().__init__(f"{e} not defined in a config file or feature_store.yaml file") + + +class SnowflakeQueryUnknownError(Exception): + def __init__(self, query: str): + super().__init__(f"Snowflake query failed: {query}") diff --git a/sdk/python/feast/feature_server.py b/sdk/python/feast/feature_server.py index b813af1c63d..1f4513fa371 100644 --- a/sdk/python/feast/feature_server.py +++ b/sdk/python/feast/feature_server.py @@ -1,3 +1,5 @@ +import traceback + import click import uvicorn from fastapi import FastAPI, HTTPException, Request @@ -8,7 +10,6 @@ import feast from feast import proto_json from feast.protos.feast.serving.ServingService_pb2 import GetOnlineFeaturesRequest -from feast.type_map import feast_value_type_to_python_type def get_app(store: "feast.FeatureStore"): @@ -41,16 +42,11 @@ def get_online_features(body=Depends(get_body)): if any(batch_size != num_entities for batch_size in batch_sizes): raise HTTPException(status_code=500, detail="Uneven number of columns") - entity_rows = [ - { - k: feast_value_type_to_python_type(v.val[idx]) - for k, v in request_proto.entities.items() - } - for idx in range(num_entities) - ] - - response_proto = store.get_online_features( - features, entity_rows, full_feature_names=full_feature_names + response_proto = store._get_online_features( + features, + request_proto.entities, + full_feature_names=full_feature_names, + native_entity_values=False, ).proto # Convert the Protobuf object to JSON and return it @@ -59,7 +55,7 @@ def get_online_features(body=Depends(get_body)): ) except Exception as e: # Print the original exception on the server side - logger.exception(e) + logger.exception(traceback.format_exc()) # Raise HTTPException to return the error message to the client raise HTTPException(status_code=500, detail=str(e)) diff --git a/sdk/python/feast/feature_service.py b/sdk/python/feast/feature_service.py index 9bb4fb5e5df..16815531a3b 100644 --- a/sdk/python/feast/feature_service.py +++ b/sdk/python/feast/feature_service.py @@ -30,12 +30,12 @@ class FeatureService: Services. """ - name: str - feature_view_projections: List[FeatureViewProjection] - tags: Dict[str, str] - description: Optional[str] = None - created_timestamp: Optional[datetime] = None - last_updated_timestamp: Optional[datetime] = None + _name: str + _feature_view_projections: List[FeatureViewProjection] + _tags: Dict[str, str] + _description: Optional[str] = None + _created_timestamp: Optional[datetime] = None + _last_updated_timestamp: Optional[datetime] = None @log_exceptions def __init__( @@ -51,22 +51,22 @@ def __init__( Raises: ValueError: If one of the specified features is not a valid type. """ - self.name = name - self.feature_view_projections = [] + self._name = name + self._feature_view_projections = [] for feature_grouping in features: if isinstance(feature_grouping, BaseFeatureView): - self.feature_view_projections.append(feature_grouping.projection) + self._feature_view_projections.append(feature_grouping.projection) else: raise ValueError( "The FeatureService {fs_name} has been provided with an invalid type" f'{type(feature_grouping)} as part of the "features" argument.)' ) - self.tags = tags or {} - self.description = description - self.created_timestamp = None - self.last_updated_timestamp = None + self._tags = tags or {} + self._description = description + self._created_timestamp = None + self._last_updated_timestamp = None def __repr__(self): items = (f"{k} = {v}" for k, v in self.__dict__.items()) @@ -93,6 +93,56 @@ def __eq__(self, other): return True + @property + def name(self) -> str: + return self._name + + @name.setter + def name(self, name: str): + self._name = name + + @property + def feature_view_projections(self) -> List[FeatureViewProjection]: + return self._feature_view_projections + + @feature_view_projections.setter + def feature_view_projections( + self, feature_view_projections: List[FeatureViewProjection] + ): + self._feature_view_projections = feature_view_projections + + @property + def tags(self) -> Dict[str, str]: + return self._tags + + @tags.setter + def tags(self, tags: Dict[str, str]): + self._tags = tags + + @property + def description(self) -> Optional[str]: + return self._description + + @description.setter + def description(self, description: str): + self._description = description + + @property + def created_timestamp(self) -> Optional[datetime]: + return self._created_timestamp + + @created_timestamp.setter + def created_timestamp(self, created_timestamp: datetime): + self._created_timestamp = created_timestamp + + @property + def last_updated_timestamp(self) -> Optional[datetime]: + return self._last_updated_timestamp + + @last_updated_timestamp.setter + def last_updated_timestamp(self, last_updated_timestamp: datetime): + self._last_updated_timestamp = last_updated_timestamp + @staticmethod def from_proto(feature_service_proto: FeatureServiceProto): """ diff --git a/sdk/python/feast/feature_store.py b/sdk/python/feast/feature_store.py index ce8125520ee..fcd94f9bea8 100644 --- a/sdk/python/feast/feature_store.py +++ b/sdk/python/feast/feature_store.py @@ -15,7 +15,7 @@ import itertools import os import warnings -from collections import Counter, OrderedDict, defaultdict +from collections import Counter, defaultdict from datetime import datetime from pathlib import Path from typing import ( @@ -23,8 +23,9 @@ Dict, Iterable, List, - NamedTuple, + Mapping, Optional, + Sequence, Set, Tuple, Union, @@ -33,11 +34,13 @@ import pandas as pd from colorama import Fore, Style +from google.protobuf.timestamp_pb2 import Timestamp from tqdm import tqdm from feast import feature_server, flags, flags_helper, utils from feast.base_feature_view import BaseFeatureView -from feast.diff.FcoDiff import RegistryDiff +from feast.diff.infra_diff import InfraDiff, diff_infra_protos +from feast.diff.registry_diff import RegistryDiff, apply_diff_to_registry, diff_between from feast.entity import Entity from feast.errors import ( EntityNotFoundException, @@ -60,18 +63,22 @@ update_entities_with_inferred_types_from_feature_views, update_feature_views_with_inferred_features, ) +from feast.infra.infra_object import Infra from feast.infra.provider import Provider, RetrievalJob, get_provider from feast.on_demand_feature_view import OnDemandFeatureView -from feast.online_response import OnlineResponse, _infer_online_entity_rows -from feast.protos.feast.core.Registry_pb2 import Registry as RegistryProto +from feast.online_response import OnlineResponse +from feast.protos.feast.core.InfraObject_pb2 import Infra as InfraProto from feast.protos.feast.serving.ServingService_pb2 import ( - GetOnlineFeaturesRequestV2, + FieldStatus, GetOnlineFeaturesResponse, ) from feast.protos.feast.types.EntityKey_pb2 import EntityKey as EntityKeyProto +from feast.protos.feast.types.Value_pb2 import RepeatedValue, Value from feast.registry import Registry from feast.repo_config import RepoConfig, load_repo_config +from feast.repo_contents import RepoContents from feast.request_feature_view import RequestFeatureView +from feast.saved_dataset import SavedDataset, SavedDatasetStorage from feast.type_map import python_values_to_proto_values from feast.usage import log_exceptions, log_exceptions_and_usage, set_usage_attribute from feast.value_type import ValueType @@ -80,31 +87,6 @@ warnings.simplefilter("once", DeprecationWarning) -class RepoContents(NamedTuple): - feature_views: Set[FeatureView] - on_demand_feature_views: Set[OnDemandFeatureView] - request_feature_views: Set[RequestFeatureView] - entities: Set[Entity] - feature_services: Set[FeatureService] - - def to_registry_proto(self) -> RegistryProto: - registry_proto = RegistryProto() - registry_proto.entities.extend([e.to_proto() for e in self.entities]) - registry_proto.feature_views.extend( - [fv.to_proto() for fv in self.feature_views] - ) - registry_proto.on_demand_feature_views.extend( - [fv.to_proto() for fv in self.on_demand_feature_views] - ) - registry_proto.request_feature_views.extend( - [fv.to_proto() for fv in self.request_feature_views] - ) - registry_proto.feature_services.extend( - [fs.to_proto() for fs in self.feature_services] - ) - return registry_proto - - class FeatureStore: """ A FeatureStore object is used to define, create, and retrieve features. @@ -143,6 +125,7 @@ def __init__( registry_config = self.config.get_registry_config() self._registry = Registry(registry_config, repo_path=self.repo_path) + self._registry._initialize_registry() self._provider = get_provider(self.config, self.repo_path) @log_exceptions @@ -263,14 +246,18 @@ def _list_feature_views( return feature_views @log_exceptions_and_usage - def list_on_demand_feature_views(self) -> List[OnDemandFeatureView]: + def list_on_demand_feature_views( + self, allow_cache: bool = False + ) -> List[OnDemandFeatureView]: """ Retrieves the list of on demand feature views from the registry. Returns: A list of on demand feature views. """ - return self._registry.list_on_demand_feature_views(self.project) + return self._registry.list_on_demand_feature_views( + self.project, allow_cache=allow_cache + ) @log_exceptions_and_usage def get_entity(self, name: str) -> Entity: @@ -403,8 +390,58 @@ def _get_features( _feature_refs = _features return _feature_refs + def _should_use_plan(self): + """Returns True if _plan and _apply_diffs should be used, False otherwise.""" + # Currently only the local provider supports _plan and _apply_diffs. + return self.config.provider == "local" + + def _validate_all_feature_views( + self, + views_to_update: List[FeatureView], + odfvs_to_update: List[OnDemandFeatureView], + request_views_to_update: List[RequestFeatureView], + ): + """Validates all feature views.""" + if ( + not flags_helper.enable_on_demand_feature_views(self.config) + and len(odfvs_to_update) > 0 + ): + raise ExperimentalFeatureNotEnabled(flags.FLAG_ON_DEMAND_TRANSFORM_NAME) + + set_usage_attribute("odfv", bool(odfvs_to_update)) + + _validate_feature_views( + [*views_to_update, *odfvs_to_update, *request_views_to_update] + ) + + def _make_inferences( + self, + entities_to_update: List[Entity], + views_to_update: List[FeatureView], + odfvs_to_update: List[OnDemandFeatureView], + ): + """Makes inferences for entities, feature views, and odfvs.""" + update_entities_with_inferred_types_from_feature_views( + entities_to_update, views_to_update, self.config + ) + + update_data_sources_with_inferred_event_timestamp_col( + [view.batch_source for view in views_to_update], self.config + ) + + # New feature views may reference previously applied entities. + entities = self._list_entities() + update_feature_views_with_inferred_features( + views_to_update, entities + entities_to_update, self.config + ) + + for odfv in odfvs_to_update: + odfv.infer_features() + @log_exceptions_and_usage - def plan(self, desired_repo_objects: RepoContents) -> RegistryDiff: + def _plan( + self, desired_repo_contents: RepoContents + ) -> Tuple[RegistryDiff, InfraDiff, Infra]: """Dry-run registering objects to metadata store. The plan method dry-runs registering one or more definitions (e.g., Entity, FeatureView), and produces @@ -439,18 +476,57 @@ def plan(self, desired_repo_objects: RepoContents) -> RegistryDiff: ... ttl=timedelta(seconds=86400 * 1), ... batch_source=driver_hourly_stats, ... ) - >>> diff = fs.plan(RepoContents({driver_hourly_stats_view}, set(), set(), {driver}, set())) # register entity and feature view + >>> registry_diff, infra_diff, new_infra = fs._plan(RepoContents({driver_hourly_stats_view}, set(), set(), {driver}, set())) # register entity and feature view """ + # Validate and run inference on all the objects to be registered. + self._validate_all_feature_views( + list(desired_repo_contents.feature_views), + list(desired_repo_contents.on_demand_feature_views), + list(desired_repo_contents.request_feature_views), + ) + self._make_inferences( + list(desired_repo_contents.entities), + list(desired_repo_contents.feature_views), + list(desired_repo_contents.on_demand_feature_views), + ) + + # Compute the desired difference between the current objects in the registry and + # the desired repo state. + registry_diff = diff_between( + self._registry, self.project, desired_repo_contents + ) - current_registry_proto = ( - self._registry.cached_registry_proto.__deepcopy__() + # Compute the desired difference between the current infra, as stored in the registry, + # and the desired infra. + self._registry.refresh() + current_infra_proto = ( + self._registry.cached_registry_proto.infra.__deepcopy__() if self._registry.cached_registry_proto - else RegistryProto() + else InfraProto() ) + desired_registry_proto = desired_repo_contents.to_registry_proto() + new_infra = self._provider.plan_infra(self.config, desired_registry_proto) + new_infra_proto = new_infra.to_proto() + infra_diff = diff_infra_protos(current_infra_proto, new_infra_proto) - desired_registry_proto = desired_repo_objects.to_registry_proto() - diffs = Registry.diff_between(current_registry_proto, desired_registry_proto) - return diffs + return (registry_diff, infra_diff, new_infra) + + @log_exceptions_and_usage + def _apply_diffs( + self, registry_diff: RegistryDiff, infra_diff: InfraDiff, new_infra: Infra + ): + """Applies the given diffs to the metadata store and infrastructure. + + Args: + registry_diff: The diff between the current registry and the desired registry. + infra_diff: The diff between the current infra and the desired infra. + new_infra: The desired infra. + """ + infra_diff.update() + apply_diff_to_registry( + self._registry, registry_diff, self.project, commit=False + ) + self._registry.update_infra(new_infra, self.project, commit=True) @log_exceptions_and_usage def apply( @@ -483,7 +559,7 @@ def apply( ] ] = None, partial: bool = True, - ) -> RegistryDiff: + ): """Register objects to metadata store and update related infrastructure. The apply method registers one or more definitions (e.g., Entity, FeatureView) and registers or updates these @@ -519,7 +595,7 @@ def apply( ... ttl=timedelta(seconds=86400 * 1), ... batch_source=driver_hourly_stats, ... ) - >>> diff = fs.apply([driver_hourly_stats_view, driver]) # register entity and feature view + >>> fs.apply([driver_hourly_stats_view, driver]) # register entity and feature view """ # TODO: Add locking if not isinstance(objects, Iterable): @@ -529,12 +605,6 @@ def apply( if not objects_to_delete: objects_to_delete = [] - current_registry_proto = ( - self._registry.cached_registry_proto.__deepcopy__() - if self._registry.cached_registry_proto - else RegistryProto() - ) - # Separate all objects into entities, feature services, and different feature view types. entities_to_update = [ob for ob in objects if isinstance(ob, Entity)] views_to_update = [ob for ob in objects if isinstance(ob, FeatureView)] @@ -549,34 +619,11 @@ def apply( ) + len(odfvs_to_update) + len(services_to_update) != len(objects): raise ValueError("Unknown object type provided as part of apply() call") - # Validate all types of feature views. - if ( - not flags_helper.enable_on_demand_feature_views(self.config) - and len(odfvs_to_update) > 0 - ): - raise ExperimentalFeatureNotEnabled(flags.FLAG_ON_DEMAND_TRANSFORM_NAME) - - set_usage_attribute("odfv", bool(odfvs_to_update)) - - _validate_feature_views( - [*views_to_update, *odfvs_to_update, *request_views_to_update] - ) - - # Make inferences - update_entities_with_inferred_types_from_feature_views( - entities_to_update, views_to_update, self.config - ) - - update_data_sources_with_inferred_event_timestamp_col( - [view.batch_source for view in views_to_update], self.config - ) - - update_feature_views_with_inferred_features( - views_to_update, entities_to_update, self.config + # Validate all feature views and make inferences. + self._validate_all_feature_views( + views_to_update, odfvs_to_update, request_views_to_update ) - - for odfv in odfvs_to_update: - odfv.infer_features() + self._make_inferences(entities_to_update, views_to_update, odfvs_to_update) # Handle all entityless feature views by using DUMMY_ENTITY as a placeholder entity. entities_to_update.append(DUMMY_ENTITY) @@ -632,22 +679,6 @@ def apply( service.name, project=self.project, commit=False ) - new_registry_proto = ( - self._registry.cached_registry_proto - if self._registry.cached_registry_proto - else RegistryProto() - ) - - diffs = Registry.diff_between(current_registry_proto, new_registry_proto) - - entities_to_update = [ob for ob in objects if isinstance(ob, Entity)] - views_to_update = [ob for ob in objects if isinstance(ob, FeatureView)] - - entities_to_delete = [ob for ob in objects_to_delete if isinstance(ob, Entity)] - views_to_delete = [ - ob for ob in objects_to_delete if isinstance(ob, FeatureView) - ] - self._get_provider().update_infra( project=self.project, tables_to_delete=views_to_delete if not partial else [], @@ -659,8 +690,6 @@ def apply( self._registry.commit() - return diffs - @log_exceptions_and_usage def teardown(self): """Tears down all local and cloud resources for the feature store.""" @@ -796,6 +825,93 @@ def get_historical_features( return job + @log_exceptions_and_usage + def create_saved_dataset( + self, + from_: RetrievalJob, + name: str, + storage: SavedDatasetStorage, + tags: Optional[Dict[str, str]] = None, + ) -> SavedDataset: + """ + Execute provided retrieval job and persist its outcome in given storage. + Storage type (eg, BigQuery or Redshift) must be the same as globally configured offline store. + After data successfully persisted saved dataset object with dataset metadata is committed to the registry. + Name for the saved dataset should be unique within project, since it's possible to overwrite previously stored dataset + with the same name. + + Returns: + SavedDataset object with attached RetrievalJob + + Raises: + ValueError if given retrieval job doesn't have metadata + """ + warnings.warn( + "Saving dataset is an experimental feature. " + "This API is unstable and it could and most probably will be changed in the future. " + "We do not guarantee that future changes will maintain backward compatibility.", + RuntimeWarning, + ) + + if not from_.metadata: + raise ValueError( + "RetrievalJob must contains metadata. " + "Use RetrievalJob produced by get_historical_features" + ) + + dataset = SavedDataset( + name=name, + features=from_.metadata.features, + join_keys=from_.metadata.keys, + full_feature_names=from_.full_feature_names, + storage=storage, + tags=tags, + ) + + dataset.min_event_timestamp = from_.metadata.min_event_timestamp + dataset.max_event_timestamp = from_.metadata.max_event_timestamp + + from_.persist(storage) + + self._registry.apply_saved_dataset(dataset, self.project, commit=True) + + return dataset.with_retrieval_job( + self._get_provider().retrieve_saved_dataset( + config=self.config, dataset=dataset + ) + ) + + @log_exceptions_and_usage + def get_saved_dataset(self, name: str) -> SavedDataset: + """ + Find a saved dataset in the registry by provided name and + create a retrieval job to pull whole dataset from storage (offline store). + + If dataset couldn't be found by provided name SavedDatasetNotFound exception will be raised. + + Data will be retrieved from globally configured offline store. + + Returns: + SavedDataset with RetrievalJob attached + + Raises: + SavedDatasetNotFound + """ + warnings.warn( + "Retrieving datasets is an experimental feature. " + "This API is unstable and it could and most probably will be changed in the future. " + "We do not guarantee that future changes will maintain backward compatibility.", + RuntimeWarning, + ) + + dataset = self._registry.get_saved_dataset(name, self.project) + provider = self._get_provider() + + retrieval_job = provider.retrieve_saved_dataset( + config=self.config, dataset=dataset + ) + return dataset.with_retrieval_job(retrieval_job) + @log_exceptions_and_usage def materialize_incremental( self, end_date: datetime, feature_views: Optional[List[str]] = None, @@ -1048,6 +1164,30 @@ def get_online_features( ... ) >>> online_response_dict = online_response.to_dict() """ + columnar: Dict[str, List[Any]] = {k: [] for k in entity_rows[0].keys()} + for entity_row in entity_rows: + for key, value in entity_row.items(): + try: + columnar[key].append(value) + except KeyError as e: + raise ValueError("All entity_rows must have the same keys.") from e + + return self._get_online_features( + features=features, + entity_values=columnar, + full_feature_names=full_feature_names, + native_entity_values=True, + ) + + def _get_online_features( + self, + features: Union[List[str], FeatureService], + entity_values: Mapping[ + str, Union[Sequence[Any], Sequence[Value], RepeatedValue] + ], + full_feature_names: bool = False, + native_entity_values: bool = True, + ): _feature_refs = self._get_features(features, allow_cache=True) ( requested_feature_views, @@ -1057,6 +1197,29 @@ def get_online_features( features=features, allow_cache=True, hide_dummy_entity=False ) + entity_name_to_join_key_map, entity_type_map = self._get_entity_maps( + requested_feature_views + ) + + # Extract Sequence from RepeatedValue Protobuf. + entity_value_lists: Dict[str, Union[List[Any], List[Value]]] = { + k: list(v) if isinstance(v, Sequence) else list(v.val) + for k, v in entity_values.items() + } + + entity_proto_values: Dict[str, List[Value]] + if native_entity_values: + # Convert values to Protobuf once. + entity_proto_values = { + k: python_values_to_proto_values( + v, entity_type_map.get(k, ValueType.UNKNOWN) + ) + for k, v in entity_value_lists.items() + } + else: + entity_proto_values = entity_value_lists + + num_rows = _validate_entity_values(entity_proto_values) _validate_feature_refs(_feature_refs, full_feature_names) ( grouped_refs, @@ -1072,19 +1235,132 @@ def get_online_features( set_usage_attribute("odfv", bool(grouped_odfv_refs)) set_usage_attribute("request_fv", bool(grouped_request_fv_refs)) + # All requested features should be present in the result. + requested_result_row_names = { + feat_ref.replace(":", "__") for feat_ref in _feature_refs + } + if not full_feature_names: + requested_result_row_names = { + name.rpartition("__")[-1] for name in requested_result_row_names + } + feature_views = list(view for view, _ in grouped_refs) + + needed_request_data, needed_request_fv_features = self.get_needed_request_data( + grouped_odfv_refs, grouped_request_fv_refs + ) + + join_key_values: Dict[str, List[Value]] = {} + request_data_features: Dict[str, List[Value]] = {} + # Entity rows may be either entities or request data. + for entity_name, values in entity_proto_values.items(): + # Found request data + if ( + entity_name in needed_request_data + or entity_name in needed_request_fv_features + ): + if entity_name in needed_request_fv_features: + # If the data was requested as a feature then + # make sure it appears in the result. + requested_result_row_names.add(entity_name) + request_data_features[entity_name] = values + else: + try: + join_key = entity_name_to_join_key_map[entity_name] + except KeyError: + raise EntityNotFoundException(entity_name, self.project) + # All join keys should be returned in the result. + requested_result_row_names.add(join_key) + join_key_values[join_key] = values + + self.ensure_request_data_values_exist( + needed_request_data, needed_request_fv_features, request_data_features + ) + + # Populate online features response proto with join keys and request data features + online_features_response = GetOnlineFeaturesResponse( + results=[GetOnlineFeaturesResponse.FeatureVector() for _ in range(num_rows)] + ) + self._populate_result_rows_from_columnar( + online_features_response=online_features_response, + data=dict(**join_key_values, **request_data_features), + ) + + # Add the Entityless case after populating result rows to avoid having to remove + # it later. entityless_case = DUMMY_ENTITY_NAME in [ entity_name for feature_view in feature_views for entity_name in feature_view.entities ] + if entityless_case: + join_key_values[DUMMY_ENTITY_ID] = python_values_to_proto_values( + [DUMMY_ENTITY_VAL] * num_rows, DUMMY_ENTITY.value_type + ) provider = self._get_provider() + for table, requested_features in grouped_refs: + # Get the correct set of entity values with the correct join keys. + table_entity_values, idxs = self._get_unique_entities( + table, join_key_values, entity_name_to_join_key_map, + ) + + # Fetch feature data for the minimum set of Entities. + feature_data = self._read_from_online_store( + table_entity_values, provider, requested_features, table, + ) + + # Populate the result_rows with the Features from the OnlineStore inplace. + self._populate_response_from_feature_data( + feature_data, + idxs, + online_features_response, + full_feature_names, + requested_features, + table, + ) + + if grouped_odfv_refs: + self._augment_response_with_on_demand_transforms( + online_features_response, + _feature_refs, + requested_on_demand_feature_views, + full_feature_names, + ) + + self._drop_unneeded_columns( + online_features_response, requested_result_row_names + ) + return OnlineResponse(online_features_response) + + @staticmethod + def _get_columnar_entity_values( + rowise: Optional[List[Dict[str, Any]]], columnar: Optional[Dict[str, List[Any]]] + ) -> Dict[str, List[Any]]: + if (rowise is None and columnar is None) or ( + rowise is not None and columnar is not None + ): + raise ValueError( + "Exactly one of `columnar_entity_values` and `rowise_entity_values` must be set." + ) + + if rowise is not None: + # Convert entity_rows from rowise to columnar. + res = defaultdict(list) + for entity_row in rowise: + for key, value in entity_row.items(): + res[key].append(value) + return res + return cast(Dict[str, List[Any]], columnar) + + def _get_entity_maps(self, feature_views): entities = self._list_entities(allow_cache=True, hide_dummy_entity=False) entity_name_to_join_key_map: Dict[str, str] = {} + entity_type_map: Dict[str, ValueType] = {} for entity in entities: entity_name_to_join_key_map[entity.name] = entity.join_key - for feature_view in requested_feature_views: + entity_type_map[entity.name] = entity.value_type + for feature_view in feature_views: for entity_name in feature_view.entities: entity = self._registry.get_entity( entity_name, self.project, allow_cache=True @@ -1098,147 +1374,51 @@ def get_online_features( entity.join_key, entity.join_key ) entity_name_to_join_key_map[entity_name] = join_key + entity_type_map[join_key] = entity.value_type + return entity_name_to_join_key_map, entity_type_map - needed_request_data, needed_request_fv_features = self.get_needed_request_data( - grouped_odfv_refs, grouped_request_fv_refs - ) - - join_key_rows = [] - request_data_features: Dict[str, List[Any]] = {} - # Entity rows may be either entities or request data. - for row in entity_rows: - join_key_row = {} - for entity_name, entity_value in row.items(): - # Found request data - if ( - entity_name in needed_request_data - or entity_name in needed_request_fv_features - ): - if entity_name not in request_data_features: - request_data_features[entity_name] = [] - request_data_features[entity_name].append(entity_value) - continue - try: - join_key = entity_name_to_join_key_map[entity_name] - except KeyError: - raise EntityNotFoundException(entity_name, self.project) - join_key_row[join_key] = entity_value - if entityless_case: - join_key_row[DUMMY_ENTITY_ID] = DUMMY_ENTITY_VAL - if len(join_key_row) > 0: - # May be empty if this entity row was request data - join_key_rows.append(join_key_row) - - self.ensure_request_data_values_exist( - needed_request_data, needed_request_fv_features, request_data_features - ) - - entity_row_proto_list = _infer_online_entity_rows(join_key_rows) - - union_of_entity_keys: List[EntityKeyProto] = [] - result_rows: List[GetOnlineFeaturesResponse.FieldValues] = [] - - for entity_row_proto in entity_row_proto_list: - # Create a list of entity keys to filter down for each feature view at lookup time. - union_of_entity_keys.append(_entity_row_to_key(entity_row_proto)) - # Also create entity values to append to the result - result_rows.append(_entity_row_to_field_values(entity_row_proto)) - - for table, requested_features in grouped_refs: - table_join_keys = [ - entity_name_to_join_key_map[entity_name] - for entity_name in table.entities - ] - self._populate_result_rows_from_feature_view( - table_join_keys, - full_feature_names, - provider, - requested_features, - result_rows, - table, - union_of_entity_keys, - ) + @staticmethod + def _get_table_entity_values( + table: FeatureView, + entity_name_to_join_key_map: Dict[str, str], + join_key_proto_values: Dict[str, List[Value]], + ) -> Dict[str, List[Value]]: + # The correct join_keys expected by the OnlineStore for this Feature View. + table_join_keys = [ + entity_name_to_join_key_map[entity_name] for entity_name in table.entities + ] - requested_result_row_names = self._get_requested_result_fields( - result_rows, needed_request_fv_features - ) - self._populate_odfv_dependencies( - entity_name_to_join_key_map, - full_feature_names, - grouped_odfv_refs, - provider, - request_data_features, - result_rows, - union_of_entity_keys, - ) + # If the FeatureView has a Projection then the join keys may be aliased. + alias_to_join_key_map = {v: k for k, v in table.projection.join_key_map.items()} - self._augment_response_with_on_demand_transforms( - _feature_refs, - requested_result_row_names, - requested_on_demand_feature_views, - full_feature_names, - result_rows, - ) - return OnlineResponse(GetOnlineFeaturesResponse(field_values=result_rows)) + # Subset to columns which are relevant to this FeatureView and + # give them the correct names. + entity_values = { + alias_to_join_key_map.get(k, k): v + for k, v in join_key_proto_values.items() + if alias_to_join_key_map.get(k, k) in table_join_keys + } + return entity_values - def _get_requested_result_fields( - self, - result_rows: List[GetOnlineFeaturesResponse.FieldValues], - needed_request_fv_features: Set[str], + @staticmethod + def _populate_result_rows_from_columnar( + online_features_response: GetOnlineFeaturesResponse, + data: Dict[str, List[Value]], ): - # Get requested feature values so we can drop odfv dependencies that aren't requested - requested_result_row_names: Set[str] = set() - for result_row in result_rows: - for feature_name in result_row.fields.keys(): - requested_result_row_names.add(feature_name) - # Request feature view values are also request data features that should be in the - # final output - requested_result_row_names.update(needed_request_fv_features) - return requested_result_row_names - - def _populate_odfv_dependencies( - self, - entity_name_to_join_key_map: Dict[str, str], - full_feature_names: bool, - grouped_odfv_refs: List[Tuple[OnDemandFeatureView, List[str]]], - provider: Provider, - request_data_features: Dict[str, List[Any]], - result_rows: List[GetOnlineFeaturesResponse.FieldValues], - union_of_entity_keys: List[EntityKeyProto], - ): - # Add more feature values to the existing result rows for the request data features - for feature_name, feature_values in request_data_features.items(): - proto_values = python_values_to_proto_values( - feature_values, ValueType.UNKNOWN - ) + timestamp = Timestamp() # Only initialize this timestamp once. + # Add more values to the existing result rows + for feature_name, feature_values in data.items(): - for row_idx, proto_value in enumerate(proto_values): - result_row = result_rows[row_idx] - result_row.fields[feature_name].CopyFrom(proto_value) - result_row.statuses[ - feature_name - ] = GetOnlineFeaturesResponse.FieldStatus.PRESENT - - # Add data if odfv requests specific feature views as dependencies - if len(grouped_odfv_refs) > 0: - for odfv, _ in grouped_odfv_refs: - for fv in odfv.input_feature_views.values(): - table_join_keys = [ - entity_name_to_join_key_map[entity_name] - for entity_name in fv.entities - ] - self._populate_result_rows_from_feature_view( - table_join_keys, - full_feature_names, - provider, - [feature.name for feature in fv.features], - result_rows, - fv, - union_of_entity_keys, - ) + online_features_response.metadata.feature_names.val.append(feature_name) + + for row_idx, proto_value in enumerate(feature_values): + result_row = online_features_response.results[row_idx] + result_row.values.append(proto_value) + result_row.statuses.append(FieldStatus.PRESENT) + result_row.event_timestamps.append(timestamp) + @staticmethod def get_needed_request_data( - self, grouped_odfv_refs: List[Tuple[OnDemandFeatureView, List[str]]], grouped_request_fv_refs: List[Tuple[RequestFeatureView, List[str]]], ) -> Tuple[Set[str], Set[str]]: @@ -1252,8 +1432,8 @@ def get_needed_request_data( needed_request_fv_features.add(feature.name) return needed_request_data, needed_request_fv_features + @staticmethod def ensure_request_data_values_exist( - self, needed_request_data: Set[str], needed_request_fv_features: Set[str], request_data_features: Dict[str, List[Any]], @@ -1272,82 +1452,176 @@ def ensure_request_data_values_exist( feature_names=missing_features ) - def _populate_result_rows_from_feature_view( + def _get_unique_entities( self, - table_join_keys: List[str], - full_feature_names: bool, + table: FeatureView, + join_key_values: Dict[str, List[Value]], + entity_name_to_join_key_map: Dict[str, str], + ) -> Tuple[Tuple[Dict[str, Value], ...], Tuple[List[int], ...]]: + """ Return the set of unique composite Entities for a Feature View and the indexes at which they appear. + + This method allows us to query the OnlineStore for data we need only once + rather than requesting and processing data for the same combination of + Entities multiple times. + """ + # Get the correct set of entity values with the correct join keys. + table_entity_values = self._get_table_entity_values( + table, entity_name_to_join_key_map, join_key_values, + ) + + # Convert back to rowise. + keys = table_entity_values.keys() + # Sort the rowise data to allow for grouping but keep original index. This lambda is + # sufficient as Entity types cannot be complex (ie. lists). + rowise = list(enumerate(zip(*table_entity_values.values()))) + rowise.sort( + key=lambda row: tuple(getattr(x, x.WhichOneof("val")) for x in row[1]) + ) + + # Identify unique entities and the indexes at which they occur. + unique_entities: Tuple[Dict[str, Value], ...] + indexes: Tuple[List[int], ...] + unique_entities, indexes = tuple( + zip( + *[ + (dict(zip(keys, k)), [_[0] for _ in g]) + for k, g in itertools.groupby(rowise, key=lambda x: x[1]) + ] + ) + ) + return unique_entities, indexes + + def _read_from_online_store( + self, + entity_rows: Iterable[Mapping[str, Value]], provider: Provider, requested_features: List[str], - result_rows: List[GetOnlineFeaturesResponse.FieldValues], table: FeatureView, - union_of_entity_keys: List[EntityKeyProto], - ): - entity_keys = _get_table_entity_keys( - table, union_of_entity_keys, table_join_keys - ) + ) -> List[Tuple[List[Timestamp], List["FieldStatus.ValueType"], List[Value]]]: + """ Read and process data from the OnlineStore for a given FeatureView. + + This method guarentees that the order of the data in each element of the + List returned is the same as the order of `requested_features`. + + This method assumes that `provider.online_read` returns data for each + combination of Entities in `entity_rows` in the same order as they + are provided. + """ + # Instantiate one EntityKeyProto per Entity. + entity_key_protos = [ + EntityKeyProto(join_keys=row.keys(), entity_values=row.values()) + for row in entity_rows + ] + + # Fetch data for Entities. read_rows = provider.online_read( config=self.config, table=table, - entity_keys=entity_keys, + entity_keys=entity_key_protos, requested_features=requested_features, ) - # Each row is a set of features for a given entity key - for row_idx, read_row in enumerate(read_rows): - row_ts, feature_data = read_row - result_row = result_rows[row_idx] + # Each row is a set of features for a given entity key. We only need to convert + # the data to Protobuf once. + row_ts_proto = Timestamp() + null_value = Value() + read_row_protos = [] + for read_row in read_rows: + row_ts, feature_data = read_row + if row_ts is not None: + row_ts_proto.FromDatetime(row_ts) + event_timestamps = [row_ts_proto] * len(requested_features) if feature_data is None: - for feature_name in requested_features: - feature_ref = ( - f"{table.projection.name_to_use()}__{feature_name}" - if full_feature_names - else feature_name - ) - result_row.statuses[ - feature_ref - ] = GetOnlineFeaturesResponse.FieldStatus.NOT_FOUND + statuses = [FieldStatus.NOT_FOUND] * len(requested_features) + values = [null_value] * len(requested_features) else: - for feature_name in feature_data: - feature_ref = ( - f"{table.projection.name_to_use()}__{feature_name}" - if full_feature_names - else feature_name - ) - if feature_name in requested_features: - result_row.fields[feature_ref].CopyFrom( - feature_data[feature_name] - ) - result_row.statuses[ - feature_ref - ] = GetOnlineFeaturesResponse.FieldStatus.PRESENT + statuses = [] + values = [] + for feature_name in requested_features: + # Make sure order of data is the same as requested_features. + if feature_name not in feature_data: + statuses.append(FieldStatus.NOT_FOUND) + values.append(null_value) + else: + statuses.append(FieldStatus.PRESENT) + values.append(feature_data[feature_name]) + read_row_protos.append((event_timestamps, statuses, values)) + return read_row_protos + + @staticmethod + def _populate_response_from_feature_data( + feature_data: Iterable[ + Tuple[ + Iterable[Timestamp], Iterable["FieldStatus.ValueType"], Iterable[Value] + ] + ], + indexes: Iterable[Iterable[int]], + online_features_response: GetOnlineFeaturesResponse, + full_feature_names: bool, + requested_features: Iterable[str], + table: FeatureView, + ): + """ Populate the GetOnlineFeaturesReponse with feature data. + + This method assumes that `_read_from_online_store` returns data for each + combination of Entities in `entity_rows` in the same order as they + are provided. + Args: + feature_data: A list of data in Protobuf form which was retrieved from the OnlineStore. + indexes: A list of indexes which should be the same length as `feature_data`. Each list + of indexes corresponds to a set of result rows in `online_features_response`. + online_features_response: The object to populate. + full_feature_names: A boolean that provides the option to add the feature view prefixes to the feature names, + changing them from the format "feature" to "feature_view__feature" (e.g., "daily_transactions" changes to + "customer_fv__daily_transactions"). + requested_features: The names of the features in `feature_data`. This should be ordered in the same way as the + data in `feature_data`. + table: The FeatureView that `feature_data` was retrieved from. + """ + # Add the feature names to the response. + requested_feature_refs = [ + f"{table.projection.name_to_use()}__{feature_name}" + if full_feature_names + else feature_name + for feature_name in requested_features + ] + online_features_response.metadata.feature_names.val.extend( + requested_feature_refs + ) + + # Populate the result with data fetched from the OnlineStore + # which is guarenteed to be aligned with `requested_features`. + for feature_row, dest_idxs in zip(feature_data, indexes): + event_timestamps, statuses, values = feature_row + for dest_idx in dest_idxs: + result_row = online_features_response.results[dest_idx] + result_row.event_timestamps.extend(event_timestamps) + result_row.statuses.extend(statuses) + result_row.values.extend(values) + + @staticmethod def _augment_response_with_on_demand_transforms( - self, + online_features_response: GetOnlineFeaturesResponse, feature_refs: List[str], - requested_result_row_names: Set[str], requested_on_demand_feature_views: List[OnDemandFeatureView], full_feature_names: bool, - result_rows: List[GetOnlineFeaturesResponse.FieldValues], ): """Computes on demand feature values and adds them to the result rows. - Assumes that 'result_rows' already contains the necessary request data and input feature + Assumes that 'online_features_response' already contains the necessary request data and input feature views for the on demand feature views. Unneeded feature values such as request data and - unrequested input feature views will be removed from 'result_rows'. + unrequested input feature views will be removed from 'online_features_response'. Args: + online_features_response: Protobuf object to populate feature_refs: List of all feature references to be returned. - requested_result_row_names: Fields from 'result_rows' that have been requested, and - therefore should not be dropped. requested_on_demand_feature_views: List of all odfvs that have been requested. full_feature_names: A boolean that provides the option to add the feature view prefixes to the feature names, changing them from the format "feature" to "feature_view__feature" (e.g., "daily_transactions" changes to "customer_fv__daily_transactions"). result_rows: List of result rows to be augmented with on demand feature values. """ - if len(requested_on_demand_feature_views) == 0: - return - requested_odfv_map = { odfv.name: odfv for odfv in requested_on_demand_feature_views } @@ -1363,9 +1637,7 @@ def _augment_response_with_on_demand_transforms( else feature_name ) - initial_response = OnlineResponse( - GetOnlineFeaturesResponse(field_values=result_rows) - ) + initial_response = OnlineResponse(online_features_response) initial_response_df = initial_response.to_df() # Apply on demand transformations and augment the result rows @@ -1379,36 +1651,56 @@ def _augment_response_with_on_demand_transforms( f for f in transformed_features_df.columns if f in _feature_refs ] - proto_values_by_column = { - feature: python_values_to_proto_values( + proto_values = [ + python_values_to_proto_values( transformed_features_df[feature].values, ValueType.UNKNOWN ) for feature in selected_subset - } + ] - for row_idx in range(len(result_rows)): - result_row = result_rows[row_idx] + odfv_result_names |= set(selected_subset) - for transformed_feature in selected_subset: - odfv_result_names.add(transformed_feature) - result_row.fields[transformed_feature].CopyFrom( - proto_values_by_column[transformed_feature][row_idx] - ) - result_row.statuses[ - transformed_feature - ] = GetOnlineFeaturesResponse.FieldStatus.PRESENT + online_features_response.metadata.feature_names.val.extend(selected_subset) + for row_idx in range(len(online_features_response.results)): + result_row = online_features_response.results[row_idx] + for feature_idx, transformed_feature in enumerate(selected_subset): + result_row.values.append(proto_values[feature_idx][row_idx]) + result_row.statuses.append(FieldStatus.PRESENT) + result_row.event_timestamps.append(Timestamp()) + + @staticmethod + def _drop_unneeded_columns( + online_features_response: GetOnlineFeaturesResponse, + requested_result_row_names: Set[str], + ): + """ + Unneeded feature values such as request data and unrequested input feature views will + be removed from 'online_features_response'. + + Args: + online_features_response: Protobuf object to populate + requested_result_row_names: Fields from 'result_rows' that have been requested, and + therefore should not be dropped. + """ # Drop values that aren't needed - unneeded_features = [ - val - for val in result_rows[0].fields - if val not in requested_result_row_names and val not in odfv_result_names + unneeded_feature_indices = [ + idx + for idx, val in enumerate( + online_features_response.metadata.feature_names.val + ) + if val not in requested_result_row_names ] - for row_idx in range(len(result_rows)): - result_row = result_rows[row_idx] - for unneeded_feature in unneeded_features: - result_row.fields.pop(unneeded_feature) - result_row.statuses.pop(unneeded_feature) + + for idx in reversed(unneeded_feature_indices): + del online_features_response.metadata.feature_names.val[idx] + + for row_idx in range(len(online_features_response.results)): + result_row = online_features_response.results[row_idx] + for idx in reversed(unneeded_feature_indices): + del result_row.values[idx] + del result_row.statuses[idx] + del result_row.event_timestamps[idx] def _get_feature_views_to_use( self, @@ -1451,9 +1743,13 @@ def _get_feature_views_to_use( request_fvs[fv_name].with_projection(copy.copy(projection)) ) elif fv_name in od_fvs: - od_fvs_to_use.append( - od_fvs[fv_name].with_projection(copy.copy(projection)) - ) + odfv = od_fvs[fv_name].with_projection(copy.copy(projection)) + od_fvs_to_use.append(odfv) + # Let's make sure to include an FVs which the ODFV requires Features from. + for projection in odfv.input_feature_view_projections.values(): + fv = fvs[projection.name].with_projection(copy.copy(projection)) + if fv not in fvs_to_use: + fvs_to_use.append(fv) else: raise ValueError( f"The provided feature service {features.name} contains a reference to a feature view" @@ -1473,9 +1769,6 @@ def _get_feature_views_to_use( @log_exceptions_and_usage def serve(self, host: str, port: int, no_access_log: bool) -> None: """Start the feature consumption server locally on a given port.""" - if not flags_helper.enable_python_feature_server(self.config): - raise ExperimentalFeatureNotEnabled(flags.FLAG_PYTHON_FEATURE_SERVER_NAME) - feature_server.start_server(self, host, port, no_access_log) @log_exceptions_and_usage @@ -1486,8 +1779,6 @@ def get_feature_server_endpoint(self) -> Optional[str]: @log_exceptions_and_usage def serve_transformations(self, port: int) -> None: """Start the feature transformation server locally on a given port.""" - if not flags_helper.enable_python_feature_server(self.config): - raise ExperimentalFeatureNotEnabled(flags.FLAG_PYTHON_FEATURE_SERVER_NAME) if not flags_helper.enable_on_demand_feature_views(self.config): raise ExperimentalFeatureNotEnabled(flags.FLAG_ON_DEMAND_TRANSFORM_NAME) @@ -1496,20 +1787,11 @@ def serve_transformations(self, port: int) -> None: transformation_server.start_server(self, port) -def _entity_row_to_key(row: GetOnlineFeaturesRequestV2.EntityRow) -> EntityKeyProto: - names, values = zip(*row.fields.items()) - return EntityKeyProto(join_keys=names, entity_values=values) - - -def _entity_row_to_field_values( - row: GetOnlineFeaturesRequestV2.EntityRow, -) -> GetOnlineFeaturesResponse.FieldValues: - result = GetOnlineFeaturesResponse.FieldValues() - for k in row.fields: - result.fields[k].CopyFrom(row.fields[k]) - result.statuses[k] = GetOnlineFeaturesResponse.FieldStatus.PRESENT - - return result +def _validate_entity_values(join_key_values: Dict[str, List[Value]]): + set_of_row_lengths = {len(v) for v in join_key_values.values()} + if len(set_of_row_lengths) > 1: + raise ValueError("All entity rows must have the same columns.") + return set_of_row_lengths.pop() def _validate_feature_refs(feature_refs: List[str], full_feature_names: bool = False): @@ -1565,21 +1847,27 @@ def _group_feature_refs( } # view name to feature names - views_features = defaultdict(list) - request_views_features = defaultdict(list) + views_features = defaultdict(set) + request_views_features = defaultdict(set) request_view_refs = set() # on demand view name to feature names - on_demand_view_features = defaultdict(list) + on_demand_view_features = defaultdict(set) for ref in features: view_name, feat_name = ref.split(":") if view_name in view_index: - views_features[view_name].append(feat_name) + views_features[view_name].add(feat_name) elif view_name in on_demand_view_index: - on_demand_view_features[view_name].append(feat_name) + on_demand_view_features[view_name].add(feat_name) + # Let's also add in any FV Feature dependencies here. + for input_fv_projection in on_demand_view_index[ + view_name + ].input_feature_view_projections.values(): + for input_feat in input_fv_projection.features: + views_features[input_fv_projection.name].add(input_feat.name) elif view_name in request_view_index: - request_views_features[view_name].append(feat_name) + request_views_features[view_name].add(feat_name) request_view_refs.add(ref) else: raise FeatureViewNotFoundException(view_name) @@ -1589,54 +1877,14 @@ def _group_feature_refs( request_fvs_result: List[Tuple[RequestFeatureView, List[str]]] = [] for view_name, feature_names in views_features.items(): - fvs_result.append((view_index[view_name], feature_names)) + fvs_result.append((view_index[view_name], list(feature_names))) for view_name, feature_names in request_views_features.items(): - request_fvs_result.append((request_view_index[view_name], feature_names)) + request_fvs_result.append((request_view_index[view_name], list(feature_names))) for view_name, feature_names in on_demand_view_features.items(): - odfvs_result.append((on_demand_view_index[view_name], feature_names)) + odfvs_result.append((on_demand_view_index[view_name], list(feature_names))) return fvs_result, odfvs_result, request_fvs_result, request_view_refs -def _get_table_entity_keys( - table: FeatureView, entity_keys: List[EntityKeyProto], table_join_keys: List[str] -) -> List[EntityKeyProto]: - reverse_join_key_map = { - alias: original for original, alias in table.projection.join_key_map.items() - } - required_entities = OrderedDict.fromkeys(sorted(table_join_keys)) - entity_key_protos = [] - for entity_key in entity_keys: - required_entities_to_values = required_entities.copy() - for i in range(len(entity_key.join_keys)): - entity_name = reverse_join_key_map.get( - entity_key.join_keys[i], entity_key.join_keys[i] - ) - entity_value = entity_key.entity_values[i] - - if entity_name in required_entities_to_values: - if required_entities_to_values[entity_name] is not None: - raise ValueError( - f"Duplicate entity keys detected. Table {table.name} expects {table_join_keys}. The entity " - f"{entity_name} was provided at least twice" - ) - required_entities_to_values[entity_name] = entity_value - - entity_names = [] - entity_values = [] - for entity_name, entity_value in required_entities_to_values.items(): - if entity_value is None: - raise ValueError( - f"Table {table.name} expects entity field {table_join_keys}. No entity value was found for " - f"{entity_name}" - ) - entity_names.append(entity_name) - entity_values.append(entity_value) - entity_key_protos.append( - EntityKeyProto(join_keys=entity_names, entity_values=entity_values) - ) - return entity_key_protos - - def _print_materialization_log( start_date, end_date, num_feature_views: int, online_store: str ): diff --git a/sdk/python/feast/feature_view.py b/sdk/python/feast/feature_view.py index ee22ae12663..2c1d0675d4a 100644 --- a/sdk/python/feast/feature_view.py +++ b/sdk/python/feast/feature_view.py @@ -44,7 +44,7 @@ DUMMY_ENTITY_NAME = "__dummy" DUMMY_ENTITY_VAL = "" DUMMY_ENTITY = Entity( - name=DUMMY_ENTITY_NAME, join_key=DUMMY_ENTITY_ID, value_type=ValueType.INT32, + name=DUMMY_ENTITY_NAME, join_key=DUMMY_ENTITY_ID, value_type=ValueType.STRING, ) @@ -74,9 +74,7 @@ class FeatureView(BaseFeatureView): online: bool input: DataSource batch_source: DataSource - stream_source: Optional[DataSource] = None - created_timestamp: Optional[datetime] = None - last_updated_timestamp: Optional[datetime] = None + stream_source: Optional[DataSource] materialization_intervals: List[Tuple[datetime, datetime]] @log_exceptions @@ -137,9 +135,6 @@ def __init__( self.materialization_intervals = [] - self.created_timestamp: Optional[datetime] = None - self.last_updated_timestamp: Optional[datetime] = None - # Note: Python requires redefining hash in child classes that override __eq__ def __hash__(self): return super().__hash__() diff --git a/sdk/python/feast/flags.py b/sdk/python/feast/flags.py index 5c6357ec26f..a1ca0c3b736 100644 --- a/sdk/python/feast/flags.py +++ b/sdk/python/feast/flags.py @@ -1,6 +1,5 @@ FLAG_ALPHA_FEATURES_NAME = "alpha_features" FLAG_ON_DEMAND_TRANSFORM_NAME = "on_demand_transforms" -FLAG_PYTHON_FEATURE_SERVER_NAME = "python_feature_server" FLAG_AWS_LAMBDA_FEATURE_SERVER_NAME = "aws_lambda_feature_server" FLAG_DIRECT_INGEST_TO_ONLINE_STORE = "direct_ingest_to_online_store" ENV_FLAG_IS_TEST = "IS_TEST" @@ -8,7 +7,6 @@ FLAG_NAMES = { FLAG_ALPHA_FEATURES_NAME, FLAG_ON_DEMAND_TRANSFORM_NAME, - FLAG_PYTHON_FEATURE_SERVER_NAME, FLAG_AWS_LAMBDA_FEATURE_SERVER_NAME, FLAG_DIRECT_INGEST_TO_ONLINE_STORE, } diff --git a/sdk/python/feast/flags_helper.py b/sdk/python/feast/flags_helper.py index 89784d6ecca..89905e7d36a 100644 --- a/sdk/python/feast/flags_helper.py +++ b/sdk/python/feast/flags_helper.py @@ -35,10 +35,6 @@ def enable_on_demand_feature_views(repo_config: RepoConfig) -> bool: return feature_flag_enabled(repo_config, flags.FLAG_ON_DEMAND_TRANSFORM_NAME) -def enable_python_feature_server(repo_config: RepoConfig) -> bool: - return feature_flag_enabled(repo_config, flags.FLAG_PYTHON_FEATURE_SERVER_NAME) - - def enable_aws_lambda_feature_server(repo_config: RepoConfig) -> bool: return feature_flag_enabled(repo_config, flags.FLAG_AWS_LAMBDA_FEATURE_SERVER_NAME) diff --git a/sdk/python/feast/importer.py b/sdk/python/feast/importer.py index 5dcd7c71c12..bbd592101a6 100644 --- a/sdk/python/feast/importer.py +++ b/sdk/python/feast/importer.py @@ -1,28 +1,47 @@ import importlib -from feast import errors +from feast.errors import ( + FeastClassImportError, + FeastInvalidBaseClass, + FeastModuleImportError, +) -def get_class_from_type(module_name: str, class_name: str, class_type: str): - if not class_name.endswith(class_type): - raise errors.FeastClassInvalidName(class_name, class_type) +def import_class(module_name: str, class_name: str, class_type: str = None): + """ + Dynamically loads and returns a class from a module. - # Try importing the module that contains the custom provider + Args: + module_name: The name of the module. + class_name: The name of the class. + class_type: Optional name of a base class of the class. + + Raises: + FeastInvalidBaseClass: If the class name does not end with the specified suffix. + FeastModuleImportError: If the module cannot be imported. + FeastClassImportError: If the class cannot be imported. + """ + # Try importing the module. try: module = importlib.import_module(module_name) except Exception as e: # The original exception can be anything - either module not found, # or any other kind of error happening during the module import time. # So we should include the original error as well in the stack trace. - raise errors.FeastModuleImportError(module_name, class_type) from e + raise FeastModuleImportError(module_name, class_name) from e - # Try getting the provider class definition + # Try getting the class. try: _class = getattr(module, class_name) except AttributeError: # This can only be one type of error, when class_name attribute does not exist in the module # So we don't have to include the original exception here - raise errors.FeastClassImportError( - module_name, class_name, class_type=class_type - ) from None + raise FeastClassImportError(module_name, class_name) from None + + # Check if the class is a subclass of the base class. + if class_type and not any( + base_class.__name__ == class_type for base_class in _class.mro() + ): + raise FeastInvalidBaseClass(class_name, class_type) + return _class diff --git a/sdk/python/feast/inference.py b/sdk/python/feast/inference.py index 39a77264bcb..ce8fa919f13 100644 --- a/sdk/python/feast/inference.py +++ b/sdk/python/feast/inference.py @@ -1,7 +1,14 @@ import re from typing import List -from feast import BigQuerySource, Entity, Feature, FileSource, RedshiftSource +from feast import ( + BigQuerySource, + Entity, + Feature, + FileSource, + RedshiftSource, + SnowflakeSource, +) from feast.data_source import DataSource from feast.errors import RegistryInferenceFailure from feast.feature_view import FeatureView @@ -13,7 +20,12 @@ def update_entities_with_inferred_types_from_feature_views( entities: List[Entity], feature_views: List[FeatureView], config: RepoConfig ) -> None: """ - Infer entity value type by examining schema of feature view batch sources + Infers the types of the entities by examining the schemas of feature view batch sources. + + Args: + entities: The entities to be updated. + feature_views: A list containing feature views associated with the entities. + config: The config for the current feature store. """ incomplete_entities = { entity.name: entity @@ -78,6 +90,8 @@ def update_data_sources_with_inferred_event_timestamp_col( ts_column_type_regex_pattern = "TIMESTAMP|DATETIME" elif isinstance(data_source, RedshiftSource): ts_column_type_regex_pattern = "TIMESTAMP[A-Z]*" + elif isinstance(data_source, SnowflakeSource): + ts_column_type_regex_pattern = "TIMESTAMP_[A-Z]*" else: raise RegistryInferenceFailure( "DataSource", @@ -87,8 +101,10 @@ def update_data_sources_with_inferred_event_timestamp_col( """, ) # for informing the type checker - assert isinstance(data_source, FileSource) or isinstance( - data_source, BigQuerySource + assert ( + isinstance(data_source, FileSource) + or isinstance(data_source, BigQuerySource) + or isinstance(data_source, SnowflakeSource) ) # loop through table columns to find singular match @@ -127,6 +143,11 @@ def update_feature_views_with_inferred_features( Infers the set of features associated to each FeatureView and updates the FeatureView with those features. Inference occurs through considering each column of the underlying data source as a feature except columns that are associated with the data source's timestamp columns and the FeatureView's entity columns. + + Args: + fvs: The feature views to be updated. + entities: A list containing entities associated with the feature views. + config: The config for the current feature store. """ entity_name_to_join_key_map = {entity.name: entity.join_key for entity in entities} diff --git a/sdk/python/feast/infra/aws.py b/sdk/python/feast/infra/aws.py index 735b2f62e72..104e20388a2 100644 --- a/sdk/python/feast/infra/aws.py +++ b/sdk/python/feast/infra/aws.py @@ -62,14 +62,16 @@ def update_infra( entities_to_keep: Sequence[Entity], partial: bool, ): - self.online_store.update( - config=self.repo_config, - tables_to_delete=tables_to_delete, - tables_to_keep=tables_to_keep, - entities_to_keep=entities_to_keep, - entities_to_delete=entities_to_delete, - partial=partial, - ) + # Call update only if there is an online store + if self.online_store: + self.online_store.update( + config=self.repo_config, + tables_to_delete=tables_to_delete, + tables_to_keep=tables_to_keep, + entities_to_keep=entities_to_keep, + entities_to_delete=entities_to_delete, + partial=partial, + ) if self.repo_config.feature_server and self.repo_config.feature_server.enabled: if not enable_aws_lambda_feature_server(self.repo_config): @@ -194,7 +196,8 @@ def _deploy_feature_server(self, project: str, image_uri: str): def teardown_infra( self, project: str, tables: Sequence[FeatureView], entities: Sequence[Entity], ) -> None: - self.online_store.teardown(self.repo_config, tables, entities) + if self.online_store: + self.online_store.teardown(self.repo_config, tables, entities) if ( self.repo_config.feature_server is not None diff --git a/sdk/python/feast/infra/feature_servers/aws_lambda/__init__.py b/sdk/python/feast/infra/feature_servers/aws_lambda/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/sdk/python/feast/infra/feature_servers/gcp_cloudrun/__init__.py b/sdk/python/feast/infra/feature_servers/gcp_cloudrun/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/sdk/python/feast/infra/infra_object.py b/sdk/python/feast/infra/infra_object.py index f1eda19581e..f21016dea54 100644 --- a/sdk/python/feast/infra/infra_object.py +++ b/sdk/python/feast/infra/infra_object.py @@ -15,9 +15,21 @@ from dataclasses import dataclass, field from typing import Any, List -from feast.importer import get_class_from_type +from feast.errors import FeastInvalidInfraObjectType +from feast.importer import import_class +from feast.protos.feast.core.DatastoreTable_pb2 import ( + DatastoreTable as DatastoreTableProto, +) +from feast.protos.feast.core.DynamoDBTable_pb2 import ( + DynamoDBTable as DynamoDBTableProto, +) from feast.protos.feast.core.InfraObject_pb2 import Infra as InfraProto from feast.protos.feast.core.InfraObject_pb2 import InfraObject as InfraObjectProto +from feast.protos.feast.core.SqliteTable_pb2 import SqliteTable as SqliteTableProto + +DATASTORE_INFRA_OBJECT_CLASS_TYPE = "feast.infra.online_stores.datastore.DatastoreTable" +DYNAMODB_INFRA_OBJECT_CLASS_TYPE = "feast.infra.online_stores.dynamodb.DynamoDBTable" +SQLITE_INFRA_OBJECT_CLASS_TYPE = "feast.infra.online_stores.sqlite.SqliteTable" class InfraObject(ABC): @@ -26,13 +38,18 @@ class InfraObject(ABC): """ @abstractmethod - def to_proto(self) -> InfraObjectProto: + def to_infra_object_proto(self) -> InfraObjectProto: + """Converts an InfraObject to its protobuf representation, wrapped in an InfraObjectProto.""" + pass + + @abstractmethod + def to_proto(self) -> Any: """Converts an InfraObject to its protobuf representation.""" pass @staticmethod @abstractmethod - def from_proto(infra_object_proto: InfraObjectProto) -> Any: + def from_infra_object_proto(infra_object_proto: InfraObjectProto) -> Any: """ Returns an InfraObject created from a protobuf representation. @@ -40,15 +57,38 @@ def from_proto(infra_object_proto: InfraObjectProto) -> Any: infra_object_proto: A protobuf representation of an InfraObject. Raises: - ValueError: The type of InfraObject could not be identified. + FeastInvalidInfraObjectType: The type of InfraObject could not be identified. """ if infra_object_proto.infra_object_class_type: cls = _get_infra_object_class_from_type( infra_object_proto.infra_object_class_type ) - return cls.from_proto(infra_object_proto) + return cls.from_infra_object_proto(infra_object_proto) - raise ValueError("Could not identify the type of the InfraObject.") + raise FeastInvalidInfraObjectType() + + @staticmethod + def from_proto(infra_object_proto: Any) -> Any: + """ + Converts a protobuf representation of a subclass to an object of that subclass. + + Args: + infra_object_proto: A protobuf representation of an InfraObject. + + Raises: + FeastInvalidInfraObjectType: The type of InfraObject could not be identified. + """ + if isinstance(infra_object_proto, DatastoreTableProto): + infra_object_class_type = DATASTORE_INFRA_OBJECT_CLASS_TYPE + elif isinstance(infra_object_proto, DynamoDBTableProto): + infra_object_class_type = DYNAMODB_INFRA_OBJECT_CLASS_TYPE + elif isinstance(infra_object_proto, SqliteTableProto): + infra_object_class_type = SQLITE_INFRA_OBJECT_CLASS_TYPE + else: + raise FeastInvalidInfraObjectType() + + cls = _get_infra_object_class_from_type(infra_object_class_type) + return cls.from_proto(infra_object_proto) @abstractmethod def update(self): @@ -85,7 +125,7 @@ def to_proto(self) -> InfraProto: """ infra_proto = InfraProto() for infra_object in self.infra_objects: - infra_object_proto = infra_object.to_proto() + infra_object_proto = infra_object.to_infra_object_proto() infra_proto.infra_objects.append(infra_object_proto) return infra_proto @@ -97,7 +137,7 @@ def from_proto(cls, infra_proto: InfraProto): """ infra = cls() cls.infra_objects += [ - InfraObject.from_proto(infra_object_proto) + InfraObject.from_infra_object_proto(infra_object_proto) for infra_object_proto in infra_proto.infra_objects ] @@ -106,4 +146,4 @@ def from_proto(cls, infra_proto: InfraProto): def _get_infra_object_class_from_type(infra_object_class_type: str): module_name, infra_object_class_name = infra_object_class_type.rsplit(".", 1) - return get_class_from_type(module_name, infra_object_class_name, "Object") + return import_class(module_name, infra_object_class_name) diff --git a/sdk/python/feast/infra/local.py b/sdk/python/feast/infra/local.py index 31c46cf2823..c5a15c8a91b 100644 --- a/sdk/python/feast/infra/local.py +++ b/sdk/python/feast/infra/local.py @@ -1,12 +1,13 @@ import uuid from datetime import datetime from pathlib import Path +from typing import List -from feast.feature_view import FeatureView +from feast.infra.infra_object import Infra, InfraObject from feast.infra.passthrough_provider import PassthroughProvider from feast.protos.feast.core.Registry_pb2 import Registry as RegistryProto from feast.registry_store import RegistryStore -from feast.repo_config import RegistryConfig +from feast.repo_config import RegistryConfig, RepoConfig from feast.usage import log_exceptions_and_usage @@ -15,11 +16,16 @@ class LocalProvider(PassthroughProvider): This class only exists for backwards compatibility. """ - pass - - -def _table_id(project: str, table: FeatureView) -> str: - return f"{project}_{table.name}" + def plan_infra( + self, config: RepoConfig, desired_registry_proto: RegistryProto + ) -> Infra: + infra = Infra() + if self.online_store: + infra_objects: List[InfraObject] = self.online_store.plan( + config, desired_registry_proto + ) + infra.infra_objects += infra_objects + return infra class LocalRegistryStore(RegistryStore): diff --git a/sdk/python/feast/infra/offline_stores/bigquery.py b/sdk/python/feast/infra/offline_stores/bigquery.py index 34dde7aa7b9..44e62d6ad1a 100644 --- a/sdk/python/feast/infra/offline_stores/bigquery.py +++ b/sdk/python/feast/infra/offline_stores/bigquery.py @@ -30,19 +30,24 @@ ) from feast.feature_view import DUMMY_ENTITY_ID, DUMMY_ENTITY_VAL, FeatureView from feast.infra.offline_stores import offline_utils -from feast.infra.offline_stores.offline_store import OfflineStore, RetrievalJob +from feast.infra.offline_stores.offline_store import ( + OfflineStore, + RetrievalJob, + RetrievalMetadata, +) from feast.on_demand_feature_view import OnDemandFeatureView from feast.registry import Registry from feast.repo_config import FeastConfigBaseModel, RepoConfig +from ...saved_dataset import SavedDatasetStorage from ...usage import log_exceptions_and_usage -from .bigquery_source import BigQuerySource +from .bigquery_source import BigQuerySource, SavedDatasetBigQueryStorage try: from google.api_core.exceptions import NotFound from google.auth.exceptions import DefaultCredentialsError from google.cloud import bigquery - from google.cloud.bigquery import Client + from google.cloud.bigquery import Client, Table except ImportError as e: from feast.errors import FeastExtrasDependencyImportError @@ -119,6 +124,36 @@ def pull_latest_from_table_or_query( query=query, client=client, config=config, full_feature_names=False, ) + @staticmethod + @log_exceptions_and_usage(offline_store="bigquery") + def pull_all_from_table_or_query( + config: RepoConfig, + data_source: DataSource, + join_key_columns: List[str], + feature_name_columns: List[str], + event_timestamp_column: str, + start_date: datetime, + end_date: datetime, + ) -> RetrievalJob: + assert isinstance(data_source, BigQuerySource) + from_expression = data_source.get_table_query_string() + + client = _get_bigquery_client( + project=config.offline_store.project_id, + location=config.offline_store.location, + ) + field_string = ", ".join( + join_key_columns + feature_name_columns + [event_timestamp_column] + ) + query = f""" + SELECT {field_string} + FROM {from_expression} + WHERE {event_timestamp_column} BETWEEN TIMESTAMP('{start_date}') AND TIMESTAMP('{end_date}') + """ + return BigQueryRetrievalJob( + query=query, client=client, config=config, full_feature_names=False, + ) + @staticmethod @log_exceptions_and_usage(offline_store="bigquery") def get_historical_features( @@ -147,16 +182,22 @@ def get_historical_features( config.offline_store.location, ) + entity_schema = _get_entity_schema(client=client, entity_df=entity_df,) + + entity_df_event_timestamp_col = offline_utils.infer_event_timestamp_from_entity_df( + entity_schema + ) + + entity_df_event_timestamp_range = _get_entity_df_event_timestamp_range( + entity_df, entity_df_event_timestamp_col, client, + ) + @contextlib.contextmanager def query_generator() -> Iterator[str]: - entity_schema = _upload_entity_df_and_get_entity_schema( + _upload_entity_df( client=client, table_name=table_reference, entity_df=entity_df, ) - entity_df_event_timestamp_col = offline_utils.infer_event_timestamp_from_entity_df( - entity_schema - ) - expected_join_keys = offline_utils.get_expected_join_keys( project, feature_views, registry ) @@ -165,10 +206,6 @@ def query_generator() -> Iterator[str]: entity_schema, expected_join_keys, entity_df_event_timestamp_col ) - entity_df_event_timestamp_range = _get_entity_df_event_timestamp_range( - entity_df, entity_df_event_timestamp_col, client, table_reference, - ) - # Build a query context containing all information required to template the BigQuery SQL query query_context = offline_utils.get_feature_view_query_context( feature_refs, @@ -203,6 +240,12 @@ def query_generator() -> Iterator[str]: on_demand_feature_views=OnDemandFeatureView.get_requested_odfvs( feature_refs, project, registry ), + metadata=RetrievalMetadata( + features=feature_refs, + keys=list(entity_schema.keys() - {entity_df_event_timestamp_col}), + min_event_timestamp=entity_df_event_timestamp_range[0], + max_event_timestamp=entity_df_event_timestamp_range[1], + ), ) @@ -214,6 +257,7 @@ def __init__( config: RepoConfig, full_feature_names: bool, on_demand_feature_views: Optional[List[OnDemandFeatureView]] = None, + metadata: Optional[RetrievalMetadata] = None, ): if not isinstance(query, str): self._query_generator = query @@ -231,6 +275,7 @@ def query_generator() -> Iterator[str]: self._on_demand_feature_views = ( on_demand_feature_views if on_demand_feature_views else [] ) + self._metadata = metadata @property def full_feature_names(self) -> bool: @@ -310,6 +355,17 @@ def _execute_query( block_until_done(client=self.client, bq_job=bq_job, timeout=timeout) return bq_job + def persist(self, storage: SavedDatasetStorage): + assert isinstance(storage, SavedDatasetBigQueryStorage) + + self.to_bigquery( + bigquery.QueryJobConfig(destination=storage.bigquery_options.table_ref) + ) + + @property + def metadata(self) -> Optional[RetrievalMetadata]: + return self._metadata + def block_until_done( client: Client, @@ -380,34 +436,45 @@ def _get_table_reference_for_new_entity( return f"{dataset_project}.{dataset_name}.{table_name}" -def _upload_entity_df_and_get_entity_schema( +def _upload_entity_df( client: Client, table_name: str, entity_df: Union[pd.DataFrame, str], -) -> Dict[str, np.dtype]: +) -> Table: """Uploads a Pandas entity dataframe into a BigQuery table and returns the resulting table""" if isinstance(entity_df, str): job = client.query(f"CREATE TABLE {table_name} AS ({entity_df})") - block_until_done(client, job) - - limited_entity_df = ( - client.query(f"SELECT * FROM {table_name} LIMIT 1").result().to_dataframe() - ) - entity_schema = dict(zip(limited_entity_df.columns, limited_entity_df.dtypes)) elif isinstance(entity_df, pd.DataFrame): - # Drop the index so that we dont have unnecessary columns + # Drop the index so that we don't have unnecessary columns entity_df.reset_index(drop=True, inplace=True) job = client.load_table_from_dataframe(entity_df, table_name) - block_until_done(client, job) - entity_schema = dict(zip(entity_df.columns, entity_df.dtypes)) else: raise InvalidEntityType(type(entity_df)) + block_until_done(client, job) + # Ensure that the table expires after some time table = client.get_table(table=table_name) table.expires = datetime.utcnow() + timedelta(minutes=30) client.update_table(table, ["expires"]) + return table + + +def _get_entity_schema( + client: Client, entity_df: Union[pd.DataFrame, str] +) -> Dict[str, np.dtype]: + if isinstance(entity_df, str): + entity_df_sample = ( + client.query(f"SELECT * FROM ({entity_df}) LIMIT 1").result().to_dataframe() + ) + + entity_schema = dict(zip(entity_df_sample.columns, entity_df_sample.dtypes)) + elif isinstance(entity_df, pd.DataFrame): + entity_schema = dict(zip(entity_df.columns, entity_df.dtypes)) + else: + raise InvalidEntityType(type(entity_df)) + return entity_schema @@ -415,11 +482,11 @@ def _get_entity_df_event_timestamp_range( entity_df: Union[pd.DataFrame, str], entity_df_event_timestamp_col: str, client: Client, - table_name: str, ) -> Tuple[datetime, datetime]: if type(entity_df) is str: job = client.query( - f"SELECT MIN({entity_df_event_timestamp_col}) AS min, MAX({entity_df_event_timestamp_col}) AS max FROM {table_name}" + f"SELECT MIN({entity_df_event_timestamp_col}) AS min, MAX({entity_df_event_timestamp_col}) AS max " + f"FROM ({entity_df})" ) res = next(job.result()) entity_df_event_timestamp_range = ( @@ -435,8 +502,8 @@ def _get_entity_df_event_timestamp_range( entity_df_event_timestamp, utc=True ) entity_df_event_timestamp_range = ( - entity_df_event_timestamp.min(), - entity_df_event_timestamp.max(), + entity_df_event_timestamp.min().to_pydatetime(), + entity_df_event_timestamp.max().to_pydatetime(), ) else: raise InvalidEntityType(type(entity_df)) @@ -491,7 +558,7 @@ def _get_bigquery_client(project: Optional[str] = None, location: Optional[str] ,CAST({{entity_df_event_timestamp_col}} AS STRING) AS {{featureview.name}}__entity_row_unique_id {% endif %} {% endfor %} - FROM {{ left_table_query_string }} + FROM `{{ left_table_query_string }}` ), {% for featureview in featureviews %} @@ -531,7 +598,7 @@ def _get_bigquery_client(project: Optional[str] = None, location: Optional[str] {{ featureview.created_timestamp_column ~ ' as created_timestamp,' if featureview.created_timestamp_column else '' }} {{ featureview.entity_selections | join(', ')}}{% if featureview.entity_selections %},{% else %}{% endif %} {% for feature in featureview.features %} - {{ feature }} as {% if full_feature_names %}{{ featureview.name }}__{{feature}}{% else %}{{ feature }}{% endif %}{% if loop.last %}{% else %}, {% endif %} + {{ feature }} as {% if full_feature_names %}{{ featureview.name }}__{{featureview.field_mapping.get(feature, feature)}}{% else %}{{ featureview.field_mapping.get(feature, feature) }}{% endif %}{% if loop.last %}{% else %}, {% endif %} {% endfor %} FROM {{ featureview.table_subquery }} WHERE {{ featureview.event_timestamp_column }} <= '{{ featureview.max_event_timestamp }}' @@ -632,7 +699,7 @@ def _get_bigquery_client(project: Optional[str] = None, location: Optional[str] SELECT {{featureview.name}}__entity_row_unique_id {% for feature in featureview.features %} - ,{% if full_feature_names %}{{ featureview.name }}__{{feature}}{% else %}{{ feature }}{% endif %} + ,{% if full_feature_names %}{{ featureview.name }}__{{featureview.field_mapping.get(feature, feature)}}{% else %}{{ featureview.field_mapping.get(feature, feature) }}{% endif %} {% endfor %} FROM {{ featureview.name }}__cleaned ) USING ({{featureview.name}}__entity_row_unique_id) diff --git a/sdk/python/feast/infra/offline_stores/bigquery_source.py b/sdk/python/feast/infra/offline_stores/bigquery_source.py index a5c1afa3e02..f97f687b0f6 100644 --- a/sdk/python/feast/infra/offline_stores/bigquery_source.py +++ b/sdk/python/feast/infra/offline_stores/bigquery_source.py @@ -1,10 +1,14 @@ -from typing import Callable, Dict, Iterable, Optional, Tuple +from typing import Callable, Dict, Iterable, List, Optional, Tuple from feast import type_map from feast.data_source import DataSource from feast.errors import DataSourceNotFoundException from feast.protos.feast.core.DataSource_pb2 import DataSource as DataSourceProto +from feast.protos.feast.core.SavedDataset_pb2 import ( + SavedDatasetStorage as SavedDatasetStorageProto, +) from feast.repo_config import RepoConfig +from feast.saved_dataset import SavedDatasetStorage from feast.value_type import ValueType @@ -119,18 +123,20 @@ def get_table_column_names_and_types( client = bigquery.Client() if self.table_ref is not None: - table_schema = client.get_table(self.table_ref).schema - if not isinstance(table_schema[0], bigquery.schema.SchemaField): + schema = client.get_table(self.table_ref).schema + if not isinstance(schema[0], bigquery.schema.SchemaField): raise TypeError("Could not parse BigQuery table schema.") - - name_type_pairs = [(field.name, field.field_type) for field in table_schema] else: bq_columns_query = f"SELECT * FROM ({self.query}) LIMIT 1" queryRes = client.query(bq_columns_query).result() - name_type_pairs = [ - (schema_field.name, schema_field.field_type) - for schema_field in queryRes.schema - ] + schema = queryRes.schema + + name_type_pairs: List[Tuple[str, str]] = [] + for field in schema: + bq_type_as_str = field.field_type + if field.mode == "REPEATED": + bq_type_as_str = "ARRAY<" + bq_type_as_str + ">" + name_type_pairs.append((field.name, bq_type_as_str)) return name_type_pairs @@ -204,3 +210,28 @@ def to_proto(self) -> DataSourceProto.BigQueryOptions: ) return bigquery_options_proto + + +class SavedDatasetBigQueryStorage(SavedDatasetStorage): + _proto_attr_name = "bigquery_storage" + + bigquery_options: BigQueryOptions + + def __init__(self, table_ref: str): + self.bigquery_options = BigQueryOptions(table_ref=table_ref, query=None) + + @staticmethod + def from_proto(storage_proto: SavedDatasetStorageProto) -> SavedDatasetStorage: + return SavedDatasetBigQueryStorage( + table_ref=BigQueryOptions.from_proto( + storage_proto.bigquery_storage + ).table_ref + ) + + def to_proto(self) -> SavedDatasetStorageProto: + return SavedDatasetStorageProto( + bigquery_storage=self.bigquery_options.to_proto() + ) + + def to_data_source(self) -> DataSource: + return BigQuerySource(table_ref=self.bigquery_options.table_ref) diff --git a/sdk/python/feast/infra/offline_stores/file.py b/sdk/python/feast/infra/offline_stores/file.py index 723e9eb5335..a49ce643d0b 100644 --- a/sdk/python/feast/infra/offline_stores/file.py +++ b/sdk/python/feast/infra/offline_stores/file.py @@ -1,5 +1,5 @@ from datetime import datetime -from typing import Callable, List, Optional, Union +from typing import Callable, List, Optional, Tuple, Union import pandas as pd import pyarrow @@ -10,7 +10,12 @@ from feast.data_source import DataSource from feast.errors import FeastJoinKeysDuringMaterialization from feast.feature_view import DUMMY_ENTITY_ID, DUMMY_ENTITY_VAL, FeatureView -from feast.infra.offline_stores.offline_store import OfflineStore, RetrievalJob +from feast.infra.offline_stores.file_source import SavedDatasetFileStorage +from feast.infra.offline_stores.offline_store import ( + OfflineStore, + RetrievalJob, + RetrievalMetadata, +) from feast.infra.offline_stores.offline_utils import ( DEFAULT_ENTITY_DF_EVENT_TIMESTAMP_COL, ) @@ -20,6 +25,7 @@ ) from feast.registry import Registry from feast.repo_config import FeastConfigBaseModel, RepoConfig +from feast.saved_dataset import SavedDatasetStorage from feast.usage import log_exceptions_and_usage @@ -36,6 +42,7 @@ def __init__( evaluation_function: Callable, full_feature_names: bool, on_demand_feature_views: Optional[List[OnDemandFeatureView]] = None, + metadata: Optional[RetrievalMetadata] = None, ): """Initialize a lazy historical retrieval job""" @@ -45,6 +52,7 @@ def __init__( self._on_demand_feature_views = ( on_demand_feature_views if on_demand_feature_views else [] ) + self._metadata = metadata @property def full_feature_names(self) -> bool: @@ -66,6 +74,27 @@ def _to_arrow_internal(self): df = self.evaluation_function() return pyarrow.Table.from_pandas(df) + def persist(self, storage: SavedDatasetStorage): + assert isinstance(storage, SavedDatasetFileStorage) + + filesystem, path = FileSource.create_filesystem_and_path( + storage.file_options.file_url, storage.file_options.s3_endpoint_override, + ) + + if path.endswith(".parquet"): + pyarrow.parquet.write_table( + self.to_arrow(), where=path, filesystem=filesystem + ) + else: + # otherwise assume destination is directory + pyarrow.parquet.write_to_dataset( + self.to_arrow(), root_path=path, filesystem=filesystem + ) + + @property + def metadata(self) -> Optional[RetrievalMetadata]: + return self._metadata + class FileOfflineStore(OfflineStore): @staticmethod @@ -106,6 +135,10 @@ def get_historical_features( registry.list_on_demand_feature_views(config.project), ) + entity_df_event_timestamp_range = _get_entity_df_event_timestamp_range( + entity_df, entity_df_event_timestamp_col + ) + # Create lazy function that is only called from the RetrievalJob object def evaluate_historical_retrieval(): @@ -266,6 +299,12 @@ def evaluate_historical_retrieval(): on_demand_feature_views=OnDemandFeatureView.get_requested_odfvs( feature_refs, project, registry ), + metadata=RetrievalMetadata( + features=feature_refs, + keys=list(set(entity_df.columns) - {entity_df_event_timestamp_col}), + min_event_timestamp=entity_df_event_timestamp_range[0], + max_event_timestamp=entity_df_event_timestamp_range[1], + ), ) return job @@ -337,3 +376,46 @@ def evaluate_offline_job(): return FileRetrievalJob( evaluation_function=evaluate_offline_job, full_feature_names=False, ) + + @staticmethod + @log_exceptions_and_usage(offline_store="file") + def pull_all_from_table_or_query( + config: RepoConfig, + data_source: DataSource, + join_key_columns: List[str], + feature_name_columns: List[str], + event_timestamp_column: str, + start_date: datetime, + end_date: datetime, + ) -> RetrievalJob: + return FileOfflineStore.pull_latest_from_table_or_query( + config=config, + data_source=data_source, + join_key_columns=join_key_columns + + [event_timestamp_column], # avoid deduplication + feature_name_columns=feature_name_columns, + event_timestamp_column=event_timestamp_column, + created_timestamp_column=None, + start_date=start_date, + end_date=end_date, + ) + + +def _get_entity_df_event_timestamp_range( + entity_df: Union[pd.DataFrame, str], entity_df_event_timestamp_col: str, +) -> Tuple[datetime, datetime]: + if not isinstance(entity_df, pd.DataFrame): + raise ValueError( + f"Please provide an entity_df of type {type(pd.DataFrame)} instead of type {type(entity_df)}" + ) + + entity_df_event_timestamp = entity_df.loc[ + :, entity_df_event_timestamp_col + ].infer_objects() + if pd.api.types.is_string_dtype(entity_df_event_timestamp): + entity_df_event_timestamp = pd.to_datetime(entity_df_event_timestamp, utc=True) + + return ( + entity_df_event_timestamp.min().to_pydatetime(), + entity_df_event_timestamp.max().to_pydatetime(), + ) diff --git a/sdk/python/feast/infra/offline_stores/file_source.py b/sdk/python/feast/infra/offline_stores/file_source.py index 31eb5f037f0..7d52110985a 100644 --- a/sdk/python/feast/infra/offline_stores/file_source.py +++ b/sdk/python/feast/infra/offline_stores/file_source.py @@ -5,10 +5,14 @@ from pyarrow.parquet import ParquetFile from feast import type_map -from feast.data_format import FileFormat +from feast.data_format import FileFormat, ParquetFormat from feast.data_source import DataSource from feast.protos.feast.core.DataSource_pb2 import DataSource as DataSourceProto +from feast.protos.feast.core.SavedDataset_pb2 import ( + SavedDatasetStorage as SavedDatasetStorageProto, +) from feast.repo_config import RepoConfig +from feast.saved_dataset import SavedDatasetStorage from feast.value_type import ValueType @@ -260,3 +264,40 @@ def to_proto(self) -> DataSourceProto.FileOptions: ) return file_options_proto + + +class SavedDatasetFileStorage(SavedDatasetStorage): + _proto_attr_name = "file_storage" + + file_options: FileOptions + + def __init__( + self, + path: str, + file_format: FileFormat = ParquetFormat(), + s3_endpoint_override: Optional[str] = None, + ): + self.file_options = FileOptions( + file_url=path, + file_format=file_format, + s3_endpoint_override=s3_endpoint_override, + ) + + @staticmethod + def from_proto(storage_proto: SavedDatasetStorageProto) -> SavedDatasetStorage: + file_options = FileOptions.from_proto(storage_proto.file_storage) + return SavedDatasetFileStorage( + path=file_options.file_url, + file_format=file_options.file_format, + s3_endpoint_override=file_options.s3_endpoint_override, + ) + + def to_proto(self) -> SavedDatasetStorageProto: + return SavedDatasetStorageProto(file_storage=self.file_options.to_proto()) + + def to_data_source(self) -> DataSource: + return FileSource( + path=self.file_options.file_url, + file_format=self.file_options.file_format, + s3_endpoint_override=self.file_options.s3_endpoint_override, + ) diff --git a/sdk/python/feast/infra/offline_stores/offline_store.py b/sdk/python/feast/infra/offline_stores/offline_store.py index 0ba81971543..1e5fe573774 100644 --- a/sdk/python/feast/infra/offline_stores/offline_store.py +++ b/sdk/python/feast/infra/offline_stores/offline_store.py @@ -11,18 +11,46 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import warnings from abc import ABC, abstractmethod from datetime import datetime -from typing import List, Optional, Union +from typing import TYPE_CHECKING, List, Optional, Union import pandas as pd import pyarrow from feast.data_source import DataSource +from feast.dqm.errors import ValidationFailed from feast.feature_view import FeatureView from feast.on_demand_feature_view import OnDemandFeatureView from feast.registry import Registry from feast.repo_config import RepoConfig +from feast.saved_dataset import SavedDatasetStorage + +if TYPE_CHECKING: + from feast.saved_dataset import ValidationReference + + +class RetrievalMetadata: + min_event_timestamp: Optional[datetime] + max_event_timestamp: Optional[datetime] + + # List of feature references + features: List[str] + # List of entity keys + ODFV inputs + keys: List[str] + + def __init__( + self, + features: List[str], + keys: List[str], + min_event_timestamp: Optional[datetime] = None, + max_event_timestamp: Optional[datetime] = None, + ): + self.features = features + self.keys = keys + self.min_event_timestamp = min_event_timestamp + self.max_event_timestamp = max_event_timestamp class RetrievalJob(ABC): @@ -38,17 +66,37 @@ def full_feature_names(self) -> bool: def on_demand_feature_views(self) -> Optional[List[OnDemandFeatureView]]: pass - def to_df(self) -> pd.DataFrame: - """Return dataset as Pandas DataFrame synchronously including on demand transforms""" + def to_df( + self, validation_reference: Optional["ValidationReference"] = None + ) -> pd.DataFrame: + """ + Return dataset as Pandas DataFrame synchronously including on demand transforms + Args: + validation_reference: If provided resulting dataset will be validated against this reference profile. + """ features_df = self._to_df_internal() - if not self.on_demand_feature_views: - return features_df - # TODO(adchia): Fix requirement to specify dependent feature views in feature_refs - for odfv in self.on_demand_feature_views: - features_df = features_df.join( - odfv.get_transformed_features_df(features_df, self.full_feature_names,) + if self.on_demand_feature_views: + # TODO(adchia): Fix requirement to specify dependent feature views in feature_refs + for odfv in self.on_demand_feature_views: + features_df = features_df.join( + odfv.get_transformed_features_df( + features_df, self.full_feature_names, + ) + ) + + if validation_reference: + warnings.warn( + "Dataset validation is an experimental feature. " + "This API is unstable and it could and most probably will be changed in the future. " + "We do not guarantee that future changes will maintain backward compatibility.", + RuntimeWarning, ) + + validation_result = validation_reference.profile.validate(features_df) + if not validation_result.is_success: + raise ValidationFailed(validation_result) + return features_df @abstractmethod @@ -61,18 +109,57 @@ def _to_arrow_internal(self) -> pyarrow.Table: """Return dataset as pyarrow Table synchronously""" pass - def to_arrow(self) -> pyarrow.Table: - """Return dataset as pyarrow Table synchronously""" - if not self.on_demand_feature_views: + def to_arrow( + self, validation_reference: Optional["ValidationReference"] = None + ) -> pyarrow.Table: + """ + Return dataset as pyarrow Table synchronously + Args: + validation_reference: If provided resulting dataset will be validated against this reference profile. + + """ + if not self.on_demand_feature_views and not validation_reference: return self._to_arrow_internal() features_df = self._to_df_internal() - for odfv in self.on_demand_feature_views: - features_df = features_df.join( - odfv.get_transformed_features_df(features_df, self.full_feature_names,) + if self.on_demand_feature_views: + for odfv in self.on_demand_feature_views: + features_df = features_df.join( + odfv.get_transformed_features_df( + features_df, self.full_feature_names, + ) + ) + + if validation_reference: + warnings.warn( + "Dataset validation is an experimental feature. " + "This API is unstable and it could and most probably will be changed in the future. " + "We do not guarantee that future changes will maintain backward compatibility.", + RuntimeWarning, ) + + validation_result = validation_reference.profile.validate(features_df) + if not validation_result.is_success: + raise ValidationFailed(validation_result) + return pyarrow.Table.from_pandas(features_df) + @abstractmethod + def persist(self, storage: SavedDatasetStorage): + """ + Run the retrieval and persist the results in the same offline store used for read. + """ + pass + + @property + @abstractmethod + def metadata(self) -> Optional[RetrievalMetadata]: + """ + Return metadata information about retrieval. + Should be available even before materializing the dataset itself. + """ + pass + class OfflineStore(ABC): """ @@ -111,3 +198,21 @@ def get_historical_features( full_feature_names: bool = False, ) -> RetrievalJob: pass + + @staticmethod + @abstractmethod + def pull_all_from_table_or_query( + config: RepoConfig, + data_source: DataSource, + join_key_columns: List[str], + feature_name_columns: List[str], + event_timestamp_column: str, + start_date: datetime, + end_date: datetime, + ) -> RetrievalJob: + """ + Note that join_key_columns, feature_name_columns, event_timestamp_column, and created_timestamp_column + have all already been mapped to column names of the source table and those column names are the values passed + into this function. + """ + pass diff --git a/sdk/python/feast/infra/offline_stores/offline_utils.py b/sdk/python/feast/infra/offline_stores/offline_utils.py index 6debe14ca00..eaf4925266d 100644 --- a/sdk/python/feast/infra/offline_stores/offline_utils.py +++ b/sdk/python/feast/infra/offline_stores/offline_utils.py @@ -1,4 +1,3 @@ -import importlib import uuid from dataclasses import asdict, dataclass from datetime import datetime, timedelta @@ -12,11 +11,10 @@ import feast from feast.errors import ( EntityTimestampInferenceException, - FeastClassImportError, FeastEntityDFMissingColumnsError, - FeastModuleImportError, ) from feast.feature_view import FeatureView +from feast.importer import import_class from feast.infra.offline_stores.offline_store import OfflineStore from feast.infra.provider import _get_requested_feature_views_to_features_dict from feast.registry import Registry @@ -87,6 +85,7 @@ class FeatureViewQueryContext: ttl: int entities: List[str] features: List[str] # feature reference format + field_mapping: Dict[str, str] event_timestamp_column: str created_timestamp_column: Optional[str] table_subquery: str @@ -146,7 +145,10 @@ def get_feature_view_query_context( name=feature_view.projection.name_to_use(), ttl=ttl_seconds, entities=join_keys, - features=features, + features=[ + reverse_field_mapping.get(feature, feature) for feature in features + ], + field_mapping=feature_view.input.field_mapping, event_timestamp_column=reverse_field_mapping.get( event_timestamp_column, event_timestamp_column ), @@ -177,7 +179,11 @@ def build_point_in_time_query( final_output_feature_names = list(entity_df_columns) final_output_feature_names.extend( [ - (f"{fv.name}__{feature}" if full_feature_names else feature) + ( + f"{fv.name}__{fv.field_mapping.get(feature, feature)}" + if full_feature_names + else fv.field_mapping.get(feature, feature) + ) for fv in feature_view_query_contexts for feature in fv.features ] @@ -204,27 +210,10 @@ def get_temp_entity_table_name() -> str: return "feast_entity_df_" + uuid.uuid4().hex -def get_offline_store_from_config(offline_store_config: Any,) -> OfflineStore: - """Get the offline store from offline store config""" - +def get_offline_store_from_config(offline_store_config: Any) -> OfflineStore: + """Creates an offline store corresponding to the given offline store config.""" module_name = offline_store_config.__module__ qualified_name = type(offline_store_config).__name__ - store_class_name = qualified_name.replace("Config", "") - try: - module = importlib.import_module(module_name) - except Exception as e: - # The original exception can be anything - either module not found, - # or any other kind of error happening during the module import time. - # So we should include the original error as well in the stack trace. - raise FeastModuleImportError(module_name, "OfflineStore") from e - - # Try getting the provider class definition - try: - offline_store_class = getattr(module, store_class_name) - except AttributeError: - # This can only be one type of error, when class_name attribute does not exist in the module - # So we don't have to include the original exception here - raise FeastClassImportError( - module_name, store_class_name, class_type="OfflineStore" - ) from None + class_name = qualified_name.replace("Config", "") + offline_store_class = import_class(module_name, class_name, "OfflineStore") return offline_store_class() diff --git a/sdk/python/feast/infra/offline_stores/redshift.py b/sdk/python/feast/infra/offline_stores/redshift.py index df363967d6e..3efd45bc741 100644 --- a/sdk/python/feast/infra/offline_stores/redshift.py +++ b/sdk/python/feast/infra/offline_stores/redshift.py @@ -25,10 +25,16 @@ from feast.errors import InvalidEntityType from feast.feature_view import DUMMY_ENTITY_ID, DUMMY_ENTITY_VAL, FeatureView from feast.infra.offline_stores import offline_utils -from feast.infra.offline_stores.offline_store import OfflineStore, RetrievalJob +from feast.infra.offline_stores.offline_store import ( + OfflineStore, + RetrievalJob, + RetrievalMetadata, +) +from feast.infra.offline_stores.redshift_source import SavedDatasetRedshiftStorage from feast.infra.utils import aws_utils from feast.registry import Registry from feast.repo_config import FeastConfigBaseModel, RepoConfig +from feast.saved_dataset import SavedDatasetStorage from feast.usage import log_exceptions_and_usage @@ -117,6 +123,46 @@ def pull_latest_from_table_or_query( full_feature_names=False, ) + @staticmethod + @log_exceptions_and_usage(offline_store="redshift") + def pull_all_from_table_or_query( + config: RepoConfig, + data_source: DataSource, + join_key_columns: List[str], + feature_name_columns: List[str], + event_timestamp_column: str, + start_date: datetime, + end_date: datetime, + ) -> RetrievalJob: + assert isinstance(data_source, RedshiftSource) + from_expression = data_source.get_table_query_string() + + field_string = ", ".join( + join_key_columns + feature_name_columns + [event_timestamp_column] + ) + + redshift_client = aws_utils.get_redshift_data_client( + config.offline_store.region + ) + s3_resource = aws_utils.get_s3_resource(config.offline_store.region) + + start_date = start_date.astimezone(tz=utc) + end_date = end_date.astimezone(tz=utc) + + query = f""" + SELECT {field_string} + FROM {from_expression} + WHERE {event_timestamp_column} BETWEEN TIMESTAMP '{start_date}' AND TIMESTAMP '{end_date}' + """ + + return RedshiftRetrievalJob( + query=query, + redshift_client=redshift_client, + s3_resource=s3_resource, + config=config, + full_feature_names=False, + ) + @staticmethod @log_exceptions_and_usage(offline_store="redshift") def get_historical_features( @@ -135,18 +181,26 @@ def get_historical_features( ) s3_resource = aws_utils.get_s3_resource(config.offline_store.region) + entity_schema = _get_entity_schema( + entity_df, redshift_client, config, s3_resource + ) + + entity_df_event_timestamp_col = offline_utils.infer_event_timestamp_from_entity_df( + entity_schema + ) + + entity_df_event_timestamp_range = _get_entity_df_event_timestamp_range( + entity_df, entity_df_event_timestamp_col, redshift_client, config, + ) + @contextlib.contextmanager def query_generator() -> Iterator[str]: table_name = offline_utils.get_temp_entity_table_name() - entity_schema = _upload_entity_df_and_get_entity_schema( + _upload_entity_df( entity_df, redshift_client, config, s3_resource, table_name ) - entity_df_event_timestamp_col = offline_utils.infer_event_timestamp_from_entity_df( - entity_schema - ) - expected_join_keys = offline_utils.get_expected_join_keys( project, feature_views, registry ) @@ -155,14 +209,6 @@ def query_generator() -> Iterator[str]: entity_schema, expected_join_keys, entity_df_event_timestamp_col ) - entity_df_event_timestamp_range = _get_entity_df_event_timestamp_range( - entity_df, - entity_df_event_timestamp_col, - redshift_client, - config, - table_name, - ) - # Build a query context containing all information required to template the Redshift SQL query query_context = offline_utils.get_feature_view_query_context( feature_refs, @@ -203,6 +249,12 @@ def query_generator() -> Iterator[str]: on_demand_feature_views=OnDemandFeatureView.get_requested_odfvs( feature_refs, project, registry ), + metadata=RetrievalMetadata( + features=feature_refs, + keys=list(entity_schema.keys() - {entity_df_event_timestamp_col}), + min_event_timestamp=entity_df_event_timestamp_range[0], + max_event_timestamp=entity_df_event_timestamp_range[1], + ), ) @@ -215,6 +267,7 @@ def __init__( config: RepoConfig, full_feature_names: bool, on_demand_feature_views: Optional[List[OnDemandFeatureView]] = None, + metadata: Optional[RetrievalMetadata] = None, ): """Initialize RedshiftRetrievalJob object. @@ -248,6 +301,7 @@ def query_generator() -> Iterator[str]: self._on_demand_feature_views = ( on_demand_feature_views if on_demand_feature_views else [] ) + self._metadata = metadata @property def full_feature_names(self) -> bool: @@ -334,17 +388,24 @@ def to_redshift(self, table_name: str) -> None: query, ) + def persist(self, storage: SavedDatasetStorage): + assert isinstance(storage, SavedDatasetRedshiftStorage) + self.to_redshift(table_name=storage.redshift_options.table) -def _upload_entity_df_and_get_entity_schema( + @property + def metadata(self) -> Optional[RetrievalMetadata]: + return self._metadata + + +def _upload_entity_df( entity_df: Union[pd.DataFrame, str], redshift_client, config: RepoConfig, s3_resource, table_name: str, -) -> Dict[str, np.dtype]: +): if isinstance(entity_df, pd.DataFrame): # If the entity_df is a pandas dataframe, upload it to Redshift - # and construct the schema from the original entity_df dataframe aws_utils.upload_df_to_redshift( redshift_client, config.offline_store.cluster_id, @@ -356,10 +417,8 @@ def _upload_entity_df_and_get_entity_schema( table_name, entity_df, ) - return dict(zip(entity_df.columns, entity_df.dtypes)) elif isinstance(entity_df, str): - # If the entity_df is a string (SQL query), create a Redshift table out of it, - # get pandas dataframe consisting of 1 row (LIMIT 1) and generate the schema out of it + # If the entity_df is a string (SQL query), create a Redshift table out of it aws_utils.execute_redshift_statement( redshift_client, config.offline_store.cluster_id, @@ -367,14 +426,29 @@ def _upload_entity_df_and_get_entity_schema( config.offline_store.user, f"CREATE TABLE {table_name} AS ({entity_df})", ) - limited_entity_df = RedshiftRetrievalJob( - f"SELECT * FROM {table_name} LIMIT 1", + else: + raise InvalidEntityType(type(entity_df)) + + +def _get_entity_schema( + entity_df: Union[pd.DataFrame, str], + redshift_client, + config: RepoConfig, + s3_resource, +) -> Dict[str, np.dtype]: + if isinstance(entity_df, pd.DataFrame): + return dict(zip(entity_df.columns, entity_df.dtypes)) + + elif isinstance(entity_df, str): + # get pandas dataframe consisting of 1 row (LIMIT 1) and generate the schema out of it + entity_df_sample = RedshiftRetrievalJob( + f"SELECT * FROM ({entity_df}) LIMIT 1", redshift_client, s3_resource, config, full_feature_names=False, ).to_df() - return dict(zip(limited_entity_df.columns, limited_entity_df.dtypes)) + return dict(zip(entity_df_sample.columns, entity_df_sample.dtypes)) else: raise InvalidEntityType(type(entity_df)) @@ -384,7 +458,6 @@ def _get_entity_df_event_timestamp_range( entity_df_event_timestamp_col: str, redshift_client, config: RepoConfig, - table_name: str, ) -> Tuple[datetime, datetime]: if isinstance(entity_df, pd.DataFrame): entity_df_event_timestamp = entity_df.loc[ @@ -395,8 +468,8 @@ def _get_entity_df_event_timestamp_range( entity_df_event_timestamp, utc=True ) entity_df_event_timestamp_range = ( - entity_df_event_timestamp.min(), - entity_df_event_timestamp.max(), + entity_df_event_timestamp.min().to_pydatetime(), + entity_df_event_timestamp.max().to_pydatetime(), ) elif isinstance(entity_df, str): # If the entity_df is a string (SQL query), determine range @@ -406,7 +479,8 @@ def _get_entity_df_event_timestamp_range( config.offline_store.cluster_id, config.offline_store.database, config.offline_store.user, - f"SELECT MIN({entity_df_event_timestamp_col}) AS min, MAX({entity_df_event_timestamp_col}) AS max FROM {table_name}", + f"SELECT MIN({entity_df_event_timestamp_col}) AS min, MAX({entity_df_event_timestamp_col}) AS max " + f"FROM ({entity_df})", ) res = aws_utils.get_redshift_statement_result(redshift_client, statement_id)[ "Records" @@ -489,7 +563,7 @@ def _get_entity_df_event_timestamp_range( {{ featureview.created_timestamp_column ~ ' as created_timestamp,' if featureview.created_timestamp_column else '' }} {{ featureview.entity_selections | join(', ')}}{% if featureview.entity_selections %},{% else %}{% endif %} {% for feature in featureview.features %} - {{ feature }} as {% if full_feature_names %}{{ featureview.name }}__{{feature}}{% else %}{{ feature }}{% endif %}{% if loop.last %}{% else %}, {% endif %} + {{ feature }} as {% if full_feature_names %}{{ featureview.name }}__{{featureview.field_mapping.get(feature, feature)}}{% else %}{{ featureview.field_mapping.get(feature, feature) }}{% endif %}{% if loop.last %}{% else %}, {% endif %} {% endfor %} FROM {{ featureview.table_subquery }} WHERE {{ featureview.event_timestamp_column }} <= '{{ featureview.max_event_timestamp }}' @@ -590,7 +664,7 @@ def _get_entity_df_event_timestamp_range( SELECT {{featureview.name}}__entity_row_unique_id {% for feature in featureview.features %} - ,{% if full_feature_names %}{{ featureview.name }}__{{feature}}{% else %}{{ feature }}{% endif %} + ,{% if full_feature_names %}{{ featureview.name }}__{{featureview.field_mapping.get(feature, feature)}}{% else %}{{ featureview.field_mapping.get(feature, feature) }}{% endif %} {% endfor %} FROM {{ featureview.name }}__cleaned ) USING ({{featureview.name}}__entity_row_unique_id) diff --git a/sdk/python/feast/infra/offline_stores/redshift_source.py b/sdk/python/feast/infra/offline_stores/redshift_source.py index e7e88a54ef7..949f1c9221c 100644 --- a/sdk/python/feast/infra/offline_stores/redshift_source.py +++ b/sdk/python/feast/infra/offline_stores/redshift_source.py @@ -4,7 +4,11 @@ from feast.data_source import DataSource from feast.errors import DataSourceNotFoundException, RedshiftCredentialsError from feast.protos.feast.core.DataSource_pb2 import DataSource as DataSourceProto +from feast.protos.feast.core.SavedDataset_pb2 import ( + SavedDatasetStorage as SavedDatasetStorageProto, +) from feast.repo_config import RepoConfig +from feast.saved_dataset import SavedDatasetStorage from feast.value_type import ValueType @@ -269,3 +273,29 @@ def to_proto(self) -> DataSourceProto.RedshiftOptions: ) return redshift_options_proto + + +class SavedDatasetRedshiftStorage(SavedDatasetStorage): + _proto_attr_name = "redshift_storage" + + redshift_options: RedshiftOptions + + def __init__(self, table_ref: str): + self.redshift_options = RedshiftOptions( + table=table_ref, schema=None, query=None + ) + + @staticmethod + def from_proto(storage_proto: SavedDatasetStorageProto) -> SavedDatasetStorage: + + return SavedDatasetRedshiftStorage( + table_ref=RedshiftOptions.from_proto(storage_proto.redshift_storage).table + ) + + def to_proto(self) -> SavedDatasetStorageProto: + return SavedDatasetStorageProto( + redshift_storage=self.redshift_options.to_proto() + ) + + def to_data_source(self) -> DataSource: + return RedshiftSource(table=self.redshift_options.table) diff --git a/sdk/python/feast/infra/offline_stores/snowflake.py b/sdk/python/feast/infra/offline_stores/snowflake.py new file mode 100644 index 00000000000..ee8cd71ce05 --- /dev/null +++ b/sdk/python/feast/infra/offline_stores/snowflake.py @@ -0,0 +1,632 @@ +import contextlib +import os +from datetime import datetime +from pathlib import Path +from typing import ( + Callable, + ContextManager, + Dict, + Iterator, + List, + Optional, + Tuple, + Union, + cast, +) + +import numpy as np +import pandas as pd +import pyarrow as pa +from pydantic import Field +from pydantic.typing import Literal +from pytz import utc + +from feast import OnDemandFeatureView +from feast.data_source import DataSource +from feast.errors import InvalidEntityType +from feast.feature_view import DUMMY_ENTITY_ID, DUMMY_ENTITY_VAL, FeatureView +from feast.infra.offline_stores import offline_utils +from feast.infra.offline_stores.offline_store import ( + OfflineStore, + RetrievalJob, + RetrievalMetadata, +) +from feast.infra.offline_stores.snowflake_source import ( + SavedDatasetSnowflakeStorage, + SnowflakeSource, +) +from feast.infra.utils.snowflake_utils import ( + execute_snowflake_statement, + get_snowflake_conn, + write_pandas, +) +from feast.registry import Registry +from feast.repo_config import FeastConfigBaseModel, RepoConfig +from feast.saved_dataset import SavedDatasetStorage +from feast.usage import log_exceptions_and_usage + +try: + from snowflake.connector import SnowflakeConnection +except ImportError as e: + from feast.errors import FeastExtrasDependencyImportError + + raise FeastExtrasDependencyImportError("snowflake", str(e)) + + +class SnowflakeOfflineStoreConfig(FeastConfigBaseModel): + """ Offline store config for Snowflake """ + + type: Literal["snowflake.offline"] = "snowflake.offline" + """ Offline store type selector""" + + config_path: Optional[str] = ( + Path(os.environ["HOME"]) / ".snowsql/config" + ).__str__() + """ Snowflake config path -- absolute path required (Cant use ~)""" + + account: Optional[str] = None + """ Snowflake deployment identifier -- drop .snowflakecomputing.com""" + + user: Optional[str] = None + """ Snowflake user name """ + + password: Optional[str] = None + """ Snowflake password """ + + role: Optional[str] = None + """ Snowflake role name""" + + warehouse: Optional[str] = None + """ Snowflake warehouse name """ + + database: Optional[str] = None + """ Snowflake database name """ + + schema_: Optional[str] = Field("PUBLIC", alias="schema") + """ Snowflake schema name """ + + class Config: + allow_population_by_field_name = True + + +class SnowflakeOfflineStore(OfflineStore): + @staticmethod + @log_exceptions_and_usage(offline_store="snowflake") + def pull_latest_from_table_or_query( + config: RepoConfig, + data_source: DataSource, + join_key_columns: List[str], + feature_name_columns: List[str], + event_timestamp_column: str, + created_timestamp_column: Optional[str], + start_date: datetime, + end_date: datetime, + ) -> RetrievalJob: + assert isinstance(data_source, SnowflakeSource) + assert isinstance(config.offline_store, SnowflakeOfflineStoreConfig) + + from_expression = ( + data_source.get_table_query_string() + ) # returns schema.table as a string + + if join_key_columns: + partition_by_join_key_string = '"' + '", "'.join(join_key_columns) + '"' + partition_by_join_key_string = ( + "PARTITION BY " + partition_by_join_key_string + ) + else: + partition_by_join_key_string = "" + + timestamp_columns = [event_timestamp_column] + if created_timestamp_column: + timestamp_columns.append(created_timestamp_column) + + timestamp_desc_string = '"' + '" DESC, "'.join(timestamp_columns) + '" DESC' + field_string = ( + '"' + + '", "'.join(join_key_columns + feature_name_columns + timestamp_columns) + + '"' + ) + + snowflake_conn = get_snowflake_conn(config.offline_store) + + query = f""" + SELECT + {field_string} + {f''', TRIM({repr(DUMMY_ENTITY_VAL)}::VARIANT,'"') AS "{DUMMY_ENTITY_ID}"''' if not join_key_columns else ""} + FROM ( + SELECT {field_string}, + ROW_NUMBER() OVER({partition_by_join_key_string} ORDER BY {timestamp_desc_string}) AS "_feast_row" + FROM {from_expression} + WHERE "{event_timestamp_column}" BETWEEN TO_TIMESTAMP_NTZ({start_date.timestamp()}) AND TO_TIMESTAMP_NTZ({end_date.timestamp()}) + ) + WHERE "_feast_row" = 1 + """ + + return SnowflakeRetrievalJob( + query=query, + snowflake_conn=snowflake_conn, + config=config, + full_feature_names=False, + on_demand_feature_views=None, + ) + + @staticmethod + @log_exceptions_and_usage(offline_store="snowflake") + def pull_all_from_table_or_query( + config: RepoConfig, + data_source: DataSource, + join_key_columns: List[str], + feature_name_columns: List[str], + event_timestamp_column: str, + start_date: datetime, + end_date: datetime, + ) -> RetrievalJob: + assert isinstance(data_source, SnowflakeSource) + from_expression = data_source.get_table_query_string() + + field_string = ( + '"' + + '", "'.join( + join_key_columns + feature_name_columns + [event_timestamp_column] + ) + + '"' + ) + + snowflake_conn = get_snowflake_conn(config.offline_store) + + start_date = start_date.astimezone(tz=utc) + end_date = end_date.astimezone(tz=utc) + + query = f""" + SELECT {field_string} + FROM {from_expression} + WHERE "{event_timestamp_column}" BETWEEN TIMESTAMP '{start_date}' AND TIMESTAMP '{end_date}' + """ + + return SnowflakeRetrievalJob( + query=query, + snowflake_conn=snowflake_conn, + config=config, + full_feature_names=False, + ) + + @staticmethod + @log_exceptions_and_usage(offline_store="snowflake") + def get_historical_features( + config: RepoConfig, + feature_views: List[FeatureView], + feature_refs: List[str], + entity_df: Union[pd.DataFrame, str], + registry: Registry, + project: str, + full_feature_names: bool = False, + ) -> RetrievalJob: + assert isinstance(config.offline_store, SnowflakeOfflineStoreConfig) + + snowflake_conn = get_snowflake_conn(config.offline_store) + + entity_schema = _get_entity_schema(entity_df, snowflake_conn, config) + + entity_df_event_timestamp_col = offline_utils.infer_event_timestamp_from_entity_df( + entity_schema + ) + + entity_df_event_timestamp_range = _get_entity_df_event_timestamp_range( + entity_df, entity_df_event_timestamp_col, snowflake_conn, + ) + + @contextlib.contextmanager + def query_generator() -> Iterator[str]: + + table_name = offline_utils.get_temp_entity_table_name() + + _upload_entity_df(entity_df, snowflake_conn, config, table_name) + + expected_join_keys = offline_utils.get_expected_join_keys( + project, feature_views, registry + ) + + offline_utils.assert_expected_columns_in_entity_df( + entity_schema, expected_join_keys, entity_df_event_timestamp_col + ) + + # Build a query context containing all information required to template the Snowflake SQL query + query_context = offline_utils.get_feature_view_query_context( + feature_refs, + feature_views, + registry, + project, + entity_df_event_timestamp_range, + ) + + query_context = _fix_entity_selections_identifiers(query_context) + + # Generate the Snowflake SQL query from the query context + query = offline_utils.build_point_in_time_query( + query_context, + left_table_query_string=table_name, + entity_df_event_timestamp_col=entity_df_event_timestamp_col, + entity_df_columns=entity_schema.keys(), + query_template=MULTIPLE_FEATURE_VIEW_POINT_IN_TIME_JOIN, + full_feature_names=full_feature_names, + ) + + yield query + + return SnowflakeRetrievalJob( + query=query_generator, + snowflake_conn=snowflake_conn, + config=config, + full_feature_names=full_feature_names, + on_demand_feature_views=OnDemandFeatureView.get_requested_odfvs( + feature_refs, project, registry + ), + metadata=RetrievalMetadata( + features=feature_refs, + keys=list(entity_schema.keys() - {entity_df_event_timestamp_col}), + min_event_timestamp=entity_df_event_timestamp_range[0], + max_event_timestamp=entity_df_event_timestamp_range[1], + ), + ) + + +class SnowflakeRetrievalJob(RetrievalJob): + def __init__( + self, + query: Union[str, Callable[[], ContextManager[str]]], + snowflake_conn: SnowflakeConnection, + config: RepoConfig, + full_feature_names: bool, + on_demand_feature_views: Optional[List[OnDemandFeatureView]] = None, + metadata: Optional[RetrievalMetadata] = None, + ): + + if not isinstance(query, str): + self._query_generator = query + else: + + @contextlib.contextmanager + def query_generator() -> Iterator[str]: + assert isinstance(query, str) + yield query + + self._query_generator = query_generator + + self.snowflake_conn = snowflake_conn + self.config = config + self._full_feature_names = full_feature_names + self._on_demand_feature_views = ( + on_demand_feature_views if on_demand_feature_views else [] + ) + self._metadata = metadata + + @property + def full_feature_names(self) -> bool: + return self._full_feature_names + + @property + def on_demand_feature_views(self) -> Optional[List[OnDemandFeatureView]]: + return self._on_demand_feature_views + + def _to_df_internal(self) -> pd.DataFrame: + with self._query_generator() as query: + + df = execute_snowflake_statement( + self.snowflake_conn, query + ).fetch_pandas_all() + + return df + + def _to_arrow_internal(self) -> pa.Table: + with self._query_generator() as query: + + pa_table = execute_snowflake_statement( + self.snowflake_conn, query + ).fetch_arrow_all() + + if pa_table: + + return pa_table + else: + empty_result = execute_snowflake_statement(self.snowflake_conn, query) + + return pa.Table.from_pandas( + pd.DataFrame(columns=[md.name for md in empty_result.description]) + ) + + def to_snowflake(self, table_name: str) -> None: + """ Save dataset as a new Snowflake table """ + if self.on_demand_feature_views is not None: + transformed_df = self.to_df() + + write_pandas( + self.snowflake_conn, transformed_df, table_name, auto_create_table=True + ) + + return None + + with self._query_generator() as query: + query = f'CREATE TABLE IF NOT EXISTS "{table_name}" AS ({query});\n' + + execute_snowflake_statement(self.snowflake_conn, query) + + def to_sql(self) -> str: + """ + Returns the SQL query that will be executed in Snowflake to build the historical feature table. + """ + with self._query_generator() as query: + return query + + def to_arrow_chunks(self, arrow_options: Optional[Dict] = None) -> Optional[List]: + with self._query_generator() as query: + + arrow_batches = execute_snowflake_statement( + self.snowflake_conn, query + ).get_result_batches() + + return arrow_batches + + def persist(self, storage: SavedDatasetStorage): + assert isinstance(storage, SavedDatasetSnowflakeStorage) + self.to_snowflake(table_name=storage.snowflake_options.table) + + @property + def metadata(self) -> Optional[RetrievalMetadata]: + return self._metadata + + +def _get_entity_schema( + entity_df: Union[pd.DataFrame, str], + snowflake_conn: SnowflakeConnection, + config: RepoConfig, +) -> Dict[str, np.dtype]: + + if isinstance(entity_df, pd.DataFrame): + + return dict(zip(entity_df.columns, entity_df.dtypes)) + + else: + + query = f"SELECT * FROM ({entity_df}) LIMIT 1" + limited_entity_df = execute_snowflake_statement( + snowflake_conn, query + ).fetch_pandas_all() + + return dict(zip(limited_entity_df.columns, limited_entity_df.dtypes)) + + +def _upload_entity_df( + entity_df: Union[pd.DataFrame, str], + snowflake_conn: SnowflakeConnection, + config: RepoConfig, + table_name: str, +) -> None: + + if isinstance(entity_df, pd.DataFrame): + # Write the data from the DataFrame to the table + write_pandas( + snowflake_conn, + entity_df, + table_name, + auto_create_table=True, + create_temp_table=True, + ) + + return None + elif isinstance(entity_df, str): + # If the entity_df is a string (SQL query), create a Snowflake table out of it, + query = f'CREATE TEMPORARY TABLE "{table_name}" AS ({entity_df})' + execute_snowflake_statement(snowflake_conn, query) + + return None + else: + raise InvalidEntityType(type(entity_df)) + + +def _fix_entity_selections_identifiers(query_context) -> list: + + for i, qc in enumerate(query_context): + for j, es in enumerate(qc.entity_selections): + query_context[i].entity_selections[j] = f'"{es}"'.replace(" AS ", '" AS "') + + return query_context + + +def _get_entity_df_event_timestamp_range( + entity_df: Union[pd.DataFrame, str], + entity_df_event_timestamp_col: str, + snowflake_conn: SnowflakeConnection, +) -> Tuple[datetime, datetime]: + if isinstance(entity_df, pd.DataFrame): + entity_df_event_timestamp = entity_df.loc[ + :, entity_df_event_timestamp_col + ].infer_objects() + if pd.api.types.is_string_dtype(entity_df_event_timestamp): + entity_df_event_timestamp = pd.to_datetime( + entity_df_event_timestamp, utc=True + ) + entity_df_event_timestamp_range = ( + entity_df_event_timestamp.min().to_pydatetime(), + entity_df_event_timestamp.max().to_pydatetime(), + ) + elif isinstance(entity_df, str): + # If the entity_df is a string (SQL query), determine range + # from table + query = f'SELECT MIN("{entity_df_event_timestamp_col}") AS "min_value", MAX("{entity_df_event_timestamp_col}") AS "max_value" FROM ({entity_df})' + results = execute_snowflake_statement(snowflake_conn, query).fetchall() + + entity_df_event_timestamp_range = cast(Tuple[datetime, datetime], results[0]) + else: + raise InvalidEntityType(type(entity_df)) + + return entity_df_event_timestamp_range + + +MULTIPLE_FEATURE_VIEW_POINT_IN_TIME_JOIN = """ +/* + Compute a deterministic hash for the `left_table_query_string` that will be used throughout + all the logic as the field to GROUP BY the data +*/ +WITH "entity_dataframe" AS ( + SELECT *, + "{{entity_df_event_timestamp_col}}" AS "entity_timestamp" + {% for featureview in featureviews %} + {% if featureview.entities %} + ,( + {% for entity in featureview.entities %} + CAST("{{entity}}" AS VARCHAR) || + {% endfor %} + CAST("{{entity_df_event_timestamp_col}}" AS VARCHAR) + ) AS "{{featureview.name}}__entity_row_unique_id" + {% else %} + ,CAST("{{entity_df_event_timestamp_col}}" AS VARCHAR) AS "{{featureview.name}}__entity_row_unique_id" + {% endif %} + {% endfor %} + FROM "{{ left_table_query_string }}" +), + +{% for featureview in featureviews %} + +"{{ featureview.name }}__entity_dataframe" AS ( + SELECT + {{ featureview.entities | map('tojson') | join(', ')}}{% if featureview.entities %},{% else %}{% endif %} + "entity_timestamp", + "{{featureview.name}}__entity_row_unique_id" + FROM "entity_dataframe" + GROUP BY + {{ featureview.entities | map('tojson') | join(', ')}}{% if featureview.entities %},{% else %}{% endif %} + "entity_timestamp", + "{{featureview.name}}__entity_row_unique_id" +), + +/* + This query template performs the point-in-time correctness join for a single feature set table + to the provided entity table. + + 1. We first join the current feature_view to the entity dataframe that has been passed. + This JOIN has the following logic: + - For each row of the entity dataframe, only keep the rows where the `event_timestamp_column` + is less than the one provided in the entity dataframe + - If there a TTL for the current feature_view, also keep the rows where the `event_timestamp_column` + is higher the the one provided minus the TTL + - For each row, Join on the entity key and retrieve the `entity_row_unique_id` that has been + computed previously + + The output of this CTE will contain all the necessary information and already filtered out most + of the data that is not relevant. +*/ + +"{{ featureview.name }}__subquery" AS ( + SELECT + "{{ featureview.event_timestamp_column }}" as "event_timestamp", + {{'"' ~ featureview.created_timestamp_column ~ '" as "created_timestamp",' if featureview.created_timestamp_column else '' }} + {{featureview.entity_selections | join(', ')}}{% if featureview.entity_selections %},{% else %}{% endif %} + {% for feature in featureview.features %} + "{{ feature }}" as {% if full_feature_names %}"{{ featureview.name }}__{{featureview.field_mapping.get(feature, feature)}}"{% else %}"{{ featureview.field_mapping.get(feature, feature) }}"{% endif %}{% if loop.last %}{% else %}, {% endif %} + {% endfor %} + FROM {{ featureview.table_subquery }} + WHERE "{{ featureview.event_timestamp_column }}" <= '{{ featureview.max_event_timestamp }}' + {% if featureview.ttl == 0 %}{% else %} + AND "{{ featureview.event_timestamp_column }}" >= '{{ featureview.min_event_timestamp }}' + {% endif %} +), + +"{{ featureview.name }}__base" AS ( + SELECT + "subquery".*, + "entity_dataframe"."entity_timestamp", + "entity_dataframe"."{{featureview.name}}__entity_row_unique_id" + FROM "{{ featureview.name }}__subquery" AS "subquery" + INNER JOIN "{{ featureview.name }}__entity_dataframe" AS "entity_dataframe" + ON TRUE + AND "subquery"."event_timestamp" <= "entity_dataframe"."entity_timestamp" + + {% if featureview.ttl == 0 %}{% else %} + AND "subquery"."event_timestamp" >= TIMESTAMPADD(second,-{{ featureview.ttl }},"entity_dataframe"."entity_timestamp") + {% endif %} + + {% for entity in featureview.entities %} + AND "subquery"."{{ entity }}" = "entity_dataframe"."{{ entity }}" + {% endfor %} +), + +/* + 2. If the `created_timestamp_column` has been set, we need to + deduplicate the data first. This is done by calculating the + `MAX(created_at_timestamp)` for each event_timestamp. + We then join the data on the next CTE +*/ +{% if featureview.created_timestamp_column %} +"{{ featureview.name }}__dedup" AS ( + SELECT + "{{featureview.name}}__entity_row_unique_id", + "event_timestamp", + MAX("created_timestamp") AS "created_timestamp" + FROM "{{ featureview.name }}__base" + GROUP BY "{{featureview.name}}__entity_row_unique_id", "event_timestamp" +), +{% endif %} + +/* + 3. The data has been filtered during the first CTE "*__base" + Thus we only need to compute the latest timestamp of each feature. +*/ +"{{ featureview.name }}__latest" AS ( + SELECT + "event_timestamp", + {% if featureview.created_timestamp_column %}"created_timestamp",{% endif %} + "{{featureview.name}}__entity_row_unique_id" + FROM + ( + SELECT *, + ROW_NUMBER() OVER( + PARTITION BY "{{featureview.name}}__entity_row_unique_id" + ORDER BY "event_timestamp" DESC{% if featureview.created_timestamp_column %},"created_timestamp" DESC{% endif %} + ) AS "row_number" + FROM "{{ featureview.name }}__base" + {% if featureview.created_timestamp_column %} + INNER JOIN "{{ featureview.name }}__dedup" + USING ("{{featureview.name}}__entity_row_unique_id", "event_timestamp", "created_timestamp") + {% endif %} + ) + WHERE "row_number" = 1 +), + +/* + 4. Once we know the latest value of each feature for a given timestamp, + we can join again the data back to the original "base" dataset +*/ +"{{ featureview.name }}__cleaned" AS ( + SELECT "base".* + FROM "{{ featureview.name }}__base" AS "base" + INNER JOIN "{{ featureview.name }}__latest" + USING( + "{{featureview.name}}__entity_row_unique_id", + "event_timestamp" + {% if featureview.created_timestamp_column %} + ,"created_timestamp" + {% endif %} + ) +){% if loop.last %}{% else %}, {% endif %} + + +{% endfor %} +/* + Joins the outputs of multiple time travel joins to a single table. + The entity_dataframe dataset being our source of truth here. + */ + +SELECT "{{ final_output_feature_names | join('", "')}}" +FROM "entity_dataframe" +{% for featureview in featureviews %} +LEFT JOIN ( + SELECT + "{{featureview.name}}__entity_row_unique_id" + {% for feature in featureview.features %} + ,{% if full_feature_names %}"{{ featureview.name }}__{{featureview.field_mapping.get(feature, feature)}}"{% else %}"{{ featureview.field_mapping.get(feature, feature) }}"{% endif %} + {% endfor %} + FROM "{{ featureview.name }}__cleaned" +) "{{ featureview.name }}__cleaned" USING ("{{featureview.name}}__entity_row_unique_id") +{% endfor %} +""" diff --git a/sdk/python/feast/infra/offline_stores/snowflake_source.py b/sdk/python/feast/infra/offline_stores/snowflake_source.py new file mode 100644 index 00000000000..b5d50be0f4d --- /dev/null +++ b/sdk/python/feast/infra/offline_stores/snowflake_source.py @@ -0,0 +1,315 @@ +from typing import Callable, Dict, Iterable, Optional, Tuple + +from feast import type_map +from feast.data_source import DataSource +from feast.protos.feast.core.DataSource_pb2 import DataSource as DataSourceProto +from feast.protos.feast.core.SavedDataset_pb2 import ( + SavedDatasetStorage as SavedDatasetStorageProto, +) +from feast.repo_config import RepoConfig +from feast.saved_dataset import SavedDatasetStorage +from feast.value_type import ValueType + + +class SnowflakeSource(DataSource): + def __init__( + self, + database: Optional[str] = None, + schema: Optional[str] = None, + table: Optional[str] = None, + query: Optional[str] = None, + event_timestamp_column: Optional[str] = "", + created_timestamp_column: Optional[str] = "", + field_mapping: Optional[Dict[str, str]] = None, + date_partition_column: Optional[str] = "", + ): + """ + Creates a SnowflakeSource object. + + Args: + database (optional): Snowflake database where the features are stored. + schema (optional): Snowflake schema in which the table is located. + table (optional): Snowflake table where the features are stored. + event_timestamp_column (optional): Event timestamp column used for point in + time joins of feature values. + query (optional): The query to be executed to obtain the features. + created_timestamp_column (optional): Timestamp column indicating when the + row was created, used for deduplicating rows. + field_mapping (optional): A dictionary mapping of column names in this data + source to column names in a feature table or view. + date_partition_column (optional): Timestamp column used for partitioning. + + """ + super().__init__( + event_timestamp_column, + created_timestamp_column, + field_mapping, + date_partition_column, + ) + + # The default Snowflake schema is named "PUBLIC". + _schema = "PUBLIC" if (database and table and not schema) else schema + + self._snowflake_options = SnowflakeOptions( + database=database, schema=_schema, table=table, query=query + ) + + @staticmethod + def from_proto(data_source: DataSourceProto): + """ + Creates a SnowflakeSource from a protobuf representation of a SnowflakeSource. + + Args: + data_source: A protobuf representation of a SnowflakeSource + + Returns: + A SnowflakeSource object based on the data_source protobuf. + """ + return SnowflakeSource( + field_mapping=dict(data_source.field_mapping), + database=data_source.snowflake_options.database, + schema=data_source.snowflake_options.schema, + table=data_source.snowflake_options.table, + event_timestamp_column=data_source.event_timestamp_column, + created_timestamp_column=data_source.created_timestamp_column, + date_partition_column=data_source.date_partition_column, + query=data_source.snowflake_options.query, + ) + + def __eq__(self, other): + if not isinstance(other, SnowflakeSource): + raise TypeError( + "Comparisons should only involve SnowflakeSource class objects." + ) + + return ( + self.snowflake_options.database == other.snowflake_options.database + and self.snowflake_options.schema == other.snowflake_options.schema + and self.snowflake_options.table == other.snowflake_options.table + and self.snowflake_options.query == other.snowflake_options.query + and self.event_timestamp_column == other.event_timestamp_column + and self.created_timestamp_column == other.created_timestamp_column + and self.field_mapping == other.field_mapping + ) + + @property + def database(self): + """Returns the database of this snowflake source.""" + return self._snowflake_options.database + + @property + def schema(self): + """Returns the schema of this snowflake source.""" + return self._snowflake_options.schema + + @property + def table(self): + """Returns the table of this snowflake source.""" + return self._snowflake_options.table + + @property + def query(self): + """Returns the snowflake options of this snowflake source.""" + return self._snowflake_options.query + + @property + def snowflake_options(self): + """Returns the snowflake options of this snowflake source.""" + return self._snowflake_options + + @snowflake_options.setter + def snowflake_options(self, _snowflake_options): + """Sets the snowflake options of this snowflake source.""" + self._snowflake_options = _snowflake_options + + def to_proto(self) -> DataSourceProto: + """ + Converts a SnowflakeSource object to its protobuf representation. + + Returns: + A DataSourceProto object. + """ + data_source_proto = DataSourceProto( + type=DataSourceProto.BATCH_SNOWFLAKE, + field_mapping=self.field_mapping, + snowflake_options=self.snowflake_options.to_proto(), + ) + + data_source_proto.event_timestamp_column = self.event_timestamp_column + data_source_proto.created_timestamp_column = self.created_timestamp_column + data_source_proto.date_partition_column = self.date_partition_column + + return data_source_proto + + def validate(self, config: RepoConfig): + # As long as the query gets successfully executed, or the table exists, + # the data source is validated. We don't need the results though. + self.get_table_column_names_and_types(config) + + def get_table_query_string(self) -> str: + """Returns a string that can directly be used to reference this table in SQL.""" + if self.database and self.table: + return f'"{self.database}"."{self.schema}"."{self.table}"' + elif self.table: + return f'"{self.table}"' + else: + return f"({self.query})" + + @staticmethod + def source_datatype_to_feast_value_type() -> Callable[[str], ValueType]: + return type_map.snowflake_python_type_to_feast_value_type + + def get_table_column_names_and_types( + self, config: RepoConfig + ) -> Iterable[Tuple[str, str]]: + """ + Returns a mapping of column names to types for this snowflake source. + + Args: + config: A RepoConfig describing the feature repo + """ + + from feast.infra.offline_stores.snowflake import SnowflakeOfflineStoreConfig + from feast.infra.utils.snowflake_utils import ( + execute_snowflake_statement, + get_snowflake_conn, + ) + + assert isinstance(config.offline_store, SnowflakeOfflineStoreConfig) + + snowflake_conn = get_snowflake_conn(config.offline_store) + + if self.database and self.table: + query = f'SELECT * FROM "{self.database}"."{self.schema}"."{self.table}" LIMIT 1' + elif self.table: + query = f'SELECT * FROM "{self.table}" LIMIT 1' + else: + query = f"SELECT * FROM ({self.query}) LIMIT 1" + + result = execute_snowflake_statement(snowflake_conn, query).fetch_pandas_all() + + if not result.empty: + metadata = result.dtypes.apply(str) + return list(zip(metadata.index, metadata)) + else: + raise ValueError("The following source:\n" + query + "\n ... is empty") + + +class SnowflakeOptions: + """ + DataSource snowflake options used to source features from snowflake query. + """ + + def __init__( + self, + database: Optional[str], + schema: Optional[str], + table: Optional[str], + query: Optional[str], + ): + self._database = database + self._schema = schema + self._table = table + self._query = query + + @property + def query(self): + """Returns the snowflake SQL query referenced by this source.""" + return self._query + + @query.setter + def query(self, query): + """Sets the snowflake SQL query referenced by this source.""" + self._query = query + + @property + def database(self): + """Returns the database name of this snowflake table.""" + return self._database + + @database.setter + def database(self, database): + """Sets the database ref of this snowflake table.""" + self._database = database + + @property + def schema(self): + """Returns the schema name of this snowflake table.""" + return self._schema + + @schema.setter + def schema(self, schema): + """Sets the schema of this snowflake table.""" + self._schema = schema + + @property + def table(self): + """Returns the table name of this snowflake table.""" + return self._table + + @table.setter + def table(self, table): + """Sets the table ref of this snowflake table.""" + self._table = table + + @classmethod + def from_proto(cls, snowflake_options_proto: DataSourceProto.SnowflakeOptions): + """ + Creates a SnowflakeOptions from a protobuf representation of a snowflake option. + + Args: + snowflake_options_proto: A protobuf representation of a DataSource + + Returns: + A SnowflakeOptions object based on the snowflake_options protobuf. + """ + snowflake_options = cls( + database=snowflake_options_proto.database, + schema=snowflake_options_proto.schema, + table=snowflake_options_proto.table, + query=snowflake_options_proto.query, + ) + + return snowflake_options + + def to_proto(self) -> DataSourceProto.SnowflakeOptions: + """ + Converts an SnowflakeOptionsProto object to its protobuf representation. + + Returns: + A SnowflakeOptionsProto protobuf. + """ + snowflake_options_proto = DataSourceProto.SnowflakeOptions( + database=self.database, + schema=self.schema, + table=self.table, + query=self.query, + ) + + return snowflake_options_proto + + +class SavedDatasetSnowflakeStorage(SavedDatasetStorage): + _proto_attr_name = "snowflake_storage" + + snowflake_options: SnowflakeOptions + + def __init__(self, table_ref: str): + self.snowflake_options = SnowflakeOptions( + database=None, schema=None, table=table_ref, query=None + ) + + @staticmethod + def from_proto(storage_proto: SavedDatasetStorageProto) -> SavedDatasetStorage: + + return SavedDatasetSnowflakeStorage( + table_ref=SnowflakeOptions.from_proto(storage_proto.snowflake_storage).table + ) + + def to_proto(self) -> SavedDatasetStorageProto: + return SavedDatasetStorageProto( + snowflake_storage=self.snowflake_options.to_proto() + ) + + def to_data_source(self) -> DataSource: + return SnowflakeSource(table=self.snowflake_options.table) diff --git a/sdk/python/feast/infra/online_stores/datastore.py b/sdk/python/feast/infra/online_stores/datastore.py index 0442eda1220..5a8d4b71803 100644 --- a/sdk/python/feast/infra/online_stores/datastore.py +++ b/sdk/python/feast/infra/online_stores/datastore.py @@ -12,16 +12,20 @@ # See the License for the specific language governing permissions and # limitations under the License. import itertools +import logging from datetime import datetime from multiprocessing.pool import ThreadPool +from queue import Queue +from threading import Lock, Thread from typing import Any, Callable, Dict, Iterator, List, Optional, Sequence, Tuple from pydantic import PositiveInt, StrictStr from pydantic.typing import Literal from feast import Entity, utils +from feast.errors import FeastProviderLoginError from feast.feature_view import FeatureView -from feast.infra.infra_object import InfraObject +from feast.infra.infra_object import DATASTORE_INFRA_OBJECT_CLASS_TYPE, InfraObject from feast.infra.online_stores.helpers import compute_entity_id from feast.infra.online_stores.online_store import OnlineStore from feast.protos.feast.core.DatastoreTable_pb2 import ( @@ -33,12 +37,14 @@ from feast.repo_config import FeastConfigBaseModel, RepoConfig from feast.usage import log_exceptions_and_usage, tracing_span +LOGGER = logging.getLogger(__name__) + try: from google.auth.exceptions import DefaultCredentialsError from google.cloud import datastore from google.cloud.datastore.client import Key except ImportError as e: - from feast.errors import FeastExtrasDependencyImportError, FeastProviderLoginError + from feast.errors import FeastExtrasDependencyImportError raise FeastExtrasDependencyImportError("gcp", str(e)) @@ -196,18 +202,18 @@ def _write_minibatch( key=key, exclude_from_indexes=("created_ts", "event_ts", "values") ) - entity.update( - dict( - key=entity_key.SerializeToString(), - values={k: v.SerializeToString() for k, v in features.items()}, - event_ts=utils.make_tzaware(timestamp), - created_ts=( - utils.make_tzaware(created_ts) - if created_ts is not None - else None - ), - ) + content_entity = datastore.Entity( + exclude_from_indexes=tuple(features.keys()) + ) + for k, v in features.items(): + content_entity[k] = v.SerializeToString() + entity["key"] = entity_key.SerializeToString() + entity["values"] = content_entity + entity["event_ts"] = utils.make_tzaware(timestamp) + entity["created_ts"] = ( + utils.make_tzaware(created_ts) if created_ts is not None else None ) + entities.append(entity) with client.transaction(): client.put_multi(entities) @@ -262,15 +268,46 @@ def online_read( def _delete_all_values(client, key): """ Delete all data under the key path in datastore. + + Creates and uses a queue of lists of entity keys, which are batch deleted + by multiple threads. """ + + class AtomicCounter(object): + # for tracking how many deletions have already occurred; not used outside this method + def __init__(self): + self.value = 0 + self.lock = Lock() + + def increment(self): + with self.lock: + self.value += 1 + + BATCH_SIZE = 500 # Dec 2021: delete_multi has a max size of 500: https://cloud.google.com/datastore/docs/concepts/limits + NUM_THREADS = 3 + deletion_queue = Queue() + status_info_counter = AtomicCounter() + + def worker(shared_counter): + while True: + client.delete_multi(deletion_queue.get()) + shared_counter.increment() + LOGGER.debug( + f"batch deletions completed: {shared_counter.value} ({shared_counter.value * BATCH_SIZE} total entries) & outstanding queue size: {deletion_queue.qsize()}" + ) + deletion_queue.task_done() + + for _ in range(NUM_THREADS): + Thread(target=worker, args=(status_info_counter,), daemon=True).start() + + query = client.query(kind="Row", ancestor=key) while True: - query = client.query(kind="Row", ancestor=key) - entities = list(query.fetch(limit=1000)) + entities = list(query.fetch(limit=BATCH_SIZE)) if not entities: - return + break + deletion_queue.put([entity.key for entity in entities]) - for entity in entities: - client.delete(entity.key) + deletion_queue.join() def _initialize_client( @@ -296,14 +333,12 @@ class DatastoreTable(InfraObject): name: The name of the table. project_id (optional): The GCP project id. namespace (optional): Datastore namespace. - client: Datastore client. """ project: str name: str project_id: Optional[str] namespace: Optional[str] - client: datastore.Client def __init__( self, @@ -316,51 +351,70 @@ def __init__( self.name = name self.project_id = project_id self.namespace = namespace - self.client = _initialize_client(self.project_id, self.namespace) - def to_proto(self) -> InfraObjectProto: + def to_infra_object_proto(self) -> InfraObjectProto: + datastore_table_proto = self.to_proto() + return InfraObjectProto( + infra_object_class_type=DATASTORE_INFRA_OBJECT_CLASS_TYPE, + datastore_table=datastore_table_proto, + ) + + def to_proto(self) -> Any: datastore_table_proto = DatastoreTableProto() datastore_table_proto.project = self.project datastore_table_proto.name = self.name if self.project_id: - datastore_table_proto.project_id.FromString(bytes(self.project_id, "utf-8")) + datastore_table_proto.project_id.value = self.project_id if self.namespace: - datastore_table_proto.namespace.FromString(bytes(self.namespace, "utf-8")) - - return InfraObjectProto( - infra_object_class_type="feast.infra.online_stores.datastore.DatastoreTable", - datastore_table=datastore_table_proto, - ) + datastore_table_proto.namespace.value = self.namespace + return datastore_table_proto @staticmethod - def from_proto(infra_object_proto: InfraObjectProto) -> Any: + def from_infra_object_proto(infra_object_proto: InfraObjectProto) -> Any: datastore_table = DatastoreTable( project=infra_object_proto.datastore_table.project, name=infra_object_proto.datastore_table.name, ) + # Distinguish between null and empty string, since project_id and namespace are StringValues. if infra_object_proto.datastore_table.HasField("project_id"): datastore_table.project_id = ( - infra_object_proto.datastore_table.project_id.SerializeToString() - ).decode("utf-8") + infra_object_proto.datastore_table.project_id.value + ) if infra_object_proto.datastore_table.HasField("namespace"): datastore_table.namespace = ( - infra_object_proto.datastore_table.namespace.SerializeToString() - ).decode("utf-8") + infra_object_proto.datastore_table.namespace.value + ) + + return datastore_table + + @staticmethod + def from_proto(datastore_table_proto: DatastoreTableProto) -> Any: + datastore_table = DatastoreTable( + project=datastore_table_proto.project, name=datastore_table_proto.name, + ) + + # Distinguish between null and empty string, since project_id and namespace are StringValues. + if datastore_table_proto.HasField("project_id"): + datastore_table.project_id = datastore_table_proto.project_id.value + if datastore_table_proto.HasField("namespace"): + datastore_table.namespace = datastore_table_proto.namespace.value return datastore_table def update(self): - key = self.client.key("Project", self.project, "Table", self.name) + client = _initialize_client(self.project_id, self.namespace) + key = client.key("Project", self.project, "Table", self.name) entity = datastore.Entity( key=key, exclude_from_indexes=("created_ts", "event_ts", "values") ) entity.update({"created_ts": datetime.utcnow()}) - self.client.put(entity) + client.put(entity) def teardown(self): - key = self.client.key("Project", self.project, "Table", self.name) - _delete_all_values(self.client, key) + client = _initialize_client(self.project_id, self.namespace) + key = client.key("Project", self.project, "Table", self.name) + _delete_all_values(client, key) # Delete the table metadata datastore entity - self.client.delete(key) + client.delete(key) diff --git a/sdk/python/feast/infra/online_stores/dynamodb.py b/sdk/python/feast/infra/online_stores/dynamodb.py index 377e10c3081..46592bf2a3d 100644 --- a/sdk/python/feast/infra/online_stores/dynamodb.py +++ b/sdk/python/feast/infra/online_stores/dynamodb.py @@ -19,7 +19,7 @@ from pydantic.typing import Literal from feast import Entity, FeatureView, utils -from feast.infra.infra_object import InfraObject +from feast.infra.infra_object import DYNAMODB_INFRA_OBJECT_CLASS_TYPE, InfraObject from feast.infra.online_stores.helpers import compute_entity_id from feast.infra.online_stores.online_store import OnlineStore from feast.protos.feast.core.DynamoDBTable_pb2 import ( @@ -173,7 +173,7 @@ def online_read( val = ValueProto() val.ParseFromString(value_bin.value) res[feature_name] = val - result.append((value["event_ts"], res)) + result.append((datetime.fromisoformat(value["event_ts"]), res)) else: result.append((None, None)) return result @@ -234,23 +234,32 @@ def __init__(self, name: str, region: str): self.name = name self.region = region - def to_proto(self) -> InfraObjectProto: - dynamodb_table_proto = DynamoDBTableProto() - dynamodb_table_proto.name = self.name - dynamodb_table_proto.region = self.region - + def to_infra_object_proto(self) -> InfraObjectProto: + dynamodb_table_proto = self.to_proto() return InfraObjectProto( - infra_object_class_type="feast.infra.online_stores.dynamodb.DynamoDBTable", + infra_object_class_type=DYNAMODB_INFRA_OBJECT_CLASS_TYPE, dynamodb_table=dynamodb_table_proto, ) + def to_proto(self) -> Any: + dynamodb_table_proto = DynamoDBTableProto() + dynamodb_table_proto.name = self.name + dynamodb_table_proto.region = self.region + return dynamodb_table_proto + @staticmethod - def from_proto(infra_object_proto: InfraObjectProto) -> Any: + def from_infra_object_proto(infra_object_proto: InfraObjectProto) -> Any: return DynamoDBTable( name=infra_object_proto.dynamodb_table.name, region=infra_object_proto.dynamodb_table.region, ) + @staticmethod + def from_proto(dynamodb_table_proto: DynamoDBTableProto) -> Any: + return DynamoDBTable( + name=dynamodb_table_proto.name, region=dynamodb_table_proto.region, + ) + def update(self): dynamodb_client = _initialize_dynamodb_client(region=self.region) dynamodb_resource = _initialize_dynamodb_resource(region=self.region) diff --git a/sdk/python/feast/infra/online_stores/helpers.py b/sdk/python/feast/infra/online_stores/helpers.py index 5e01ddb263f..b206c08b7c4 100644 --- a/sdk/python/feast/infra/online_stores/helpers.py +++ b/sdk/python/feast/infra/online_stores/helpers.py @@ -1,10 +1,9 @@ -import importlib import struct from typing import Any, List import mmh3 -from feast import errors +from feast.importer import import_class from feast.infra.key_encoding_utils import ( serialize_entity_key, serialize_entity_key_prefix, @@ -13,29 +12,12 @@ from feast.protos.feast.types.EntityKey_pb2 import EntityKey as EntityKeyProto -def get_online_store_from_config(online_store_config: Any,) -> OnlineStore: - """Get the online store from online store config""" - +def get_online_store_from_config(online_store_config: Any) -> OnlineStore: + """Creates an online store corresponding to the given online store config.""" module_name = online_store_config.__module__ qualified_name = type(online_store_config).__name__ - store_class_name = qualified_name.replace("Config", "") - try: - module = importlib.import_module(module_name) - except Exception as e: - # The original exception can be anything - either module not found, - # or any other kind of error happening during the module import time. - # So we should include the original error as well in the stack trace. - raise errors.FeastModuleImportError(module_name, "OnlineStore") from e - - # Try getting the provider class definition - try: - online_store_class = getattr(module, store_class_name) - except AttributeError: - # This can only be one type of error, when class_name attribute does not exist in the module - # So we don't have to include the original exception here - raise errors.FeastClassImportError( - module_name, store_class_name, class_type="OnlineStore" - ) from None + class_name = qualified_name.replace("Config", "") + online_store_class = import_class(module_name, class_name, "OnlineStore") return online_store_class() diff --git a/sdk/python/feast/infra/online_stores/online_store.py b/sdk/python/feast/infra/online_stores/online_store.py index b2aa1e46d04..1f177996dea 100644 --- a/sdk/python/feast/infra/online_stores/online_store.py +++ b/sdk/python/feast/infra/online_stores/online_store.py @@ -18,6 +18,8 @@ from feast import Entity from feast.feature_view import FeatureView +from feast.infra.infra_object import InfraObject +from feast.protos.feast.core.Registry_pb2 import Registry as RegistryProto from feast.protos.feast.types.EntityKey_pb2 import EntityKey as EntityKeyProto from feast.protos.feast.types.Value_pb2 import Value as ValueProto from feast.repo_config import RepoConfig @@ -92,6 +94,18 @@ def update( ): ... + def plan( + self, config: RepoConfig, desired_registry_proto: RegistryProto + ) -> List[InfraObject]: + """ + Returns the set of InfraObjects required to support the desired registry. + + Args: + config: The RepoConfig for the current FeatureStore. + desired_registry_proto: The desired registry, in proto form. + """ + return [] + @abstractmethod def teardown( self, diff --git a/sdk/python/feast/infra/online_stores/redis.py b/sdk/python/feast/infra/online_stores/redis.py index 9f203393432..752ed7d009d 100644 --- a/sdk/python/feast/infra/online_stores/redis.py +++ b/sdk/python/feast/infra/online_stores/redis.py @@ -41,7 +41,7 @@ try: from redis import Redis - from rediscluster import RedisCluster + from redis.cluster import RedisCluster except ImportError as e: from feast.errors import FeastExtrasDependencyImportError @@ -72,11 +72,11 @@ class RedisOnlineStoreConfig(FeastConfigBaseModel): class RedisOnlineStore(OnlineStore): _client: Optional[Union[Redis, RedisCluster]] = None - def delete_table_values(self, config: RepoConfig, table: FeatureView): + def delete_entity_values(self, config: RepoConfig, join_keys: List[str]): client = self._get_client(config.online_store) deleted_count = 0 pipeline = client.pipeline() - prefix = _redis_key_prefix(table.entities) + prefix = _redis_key_prefix(join_keys) for _k in client.scan_iter( b"".join([prefix, b"*", config.project.encode("utf8")]) @@ -85,7 +85,7 @@ def delete_table_values(self, config: RepoConfig, table: FeatureView): deleted_count += 1 pipeline.execute() - logger.debug(f"Deleted {deleted_count} keys for {table.name}") + logger.debug(f"Deleted {deleted_count} rows for entity {', '.join(join_keys)}") @log_exceptions_and_usage(online_store="redis") def update( @@ -98,10 +98,16 @@ def update( partial: bool, ): """ - We delete the keys in redis for tables/views being removed. + Look for join_keys (list of entities) that are not in use anymore + (usually this happens when the last feature view that was using specific compound key is deleted) + and remove all features attached to this "join_keys". """ - for table in tables_to_delete: - self.delete_table_values(config, table) + join_keys_to_keep = set(tuple(table.entities) for table in tables_to_keep) + + join_keys_to_delete = set(tuple(table.entities) for table in tables_to_delete) + + for join_keys in join_keys_to_delete - join_keys_to_keep: + self.delete_entity_values(config, list(join_keys)) def teardown( self, @@ -112,8 +118,10 @@ def teardown( """ We delete the keys in redis for tables/views being removed. """ - for table in tables: - self.delete_table_values(config, table) + join_keys_to_delete = set(tuple(table.entities) for table in tables) + + for join_keys in join_keys_to_delete: + self.delete_entity_values(config, list(join_keys)) @staticmethod def _parse_connection_string(connection_string: str): @@ -277,13 +285,13 @@ def _get_features_for_entity( res_ts = Timestamp() ts_val = res_val.pop(f"_ts:{feature_view}") if ts_val: - res_ts.ParseFromString(ts_val) + res_ts.ParseFromString(bytes(ts_val)) res = {} for feature_name, val_bin in res_val.items(): val = ValueProto() if val_bin: - val.ParseFromString(val_bin) + val.ParseFromString(bytes(val_bin)) res[feature_name] = val if not res: diff --git a/sdk/python/feast/infra/online_stores/sqlite.py b/sdk/python/feast/infra/online_stores/sqlite.py index 206e2eb0d50..1e7ecf1024a 100644 --- a/sdk/python/feast/infra/online_stores/sqlite.py +++ b/sdk/python/feast/infra/online_stores/sqlite.py @@ -23,10 +23,11 @@ from feast import Entity from feast.feature_view import FeatureView -from feast.infra.infra_object import InfraObject +from feast.infra.infra_object import SQLITE_INFRA_OBJECT_CLASS_TYPE, InfraObject from feast.infra.key_encoding_utils import serialize_entity_key from feast.infra.online_stores.online_store import OnlineStore from feast.protos.feast.core.InfraObject_pb2 import InfraObject as InfraObjectProto +from feast.protos.feast.core.Registry_pb2 import Registry as RegistryProto from feast.protos.feast.core.SqliteTable_pb2 import SqliteTable as SqliteTableProto from feast.protos.feast.types.EntityKey_pb2 import EntityKey as EntityKeyProto from feast.protos.feast.types.Value_pb2 import Value as ValueProto @@ -199,6 +200,21 @@ def update( for table in tables_to_delete: conn.execute(f"DROP TABLE IF EXISTS {_table_id(project, table)}") + @log_exceptions_and_usage(online_store="sqlite") + def plan( + self, config: RepoConfig, desired_registry_proto: RegistryProto + ) -> List[InfraObject]: + project = config.project + + infra_objects: List[InfraObject] = [ + SqliteTable( + path=self._get_db_path(config), + name=_table_id(project, FeatureView.from_proto(view)), + ) + for view in desired_registry_proto.feature_views + ] + return infra_objects + def teardown( self, config: RepoConfig, @@ -241,23 +257,30 @@ def __init__(self, path: str, name: str): self.name = name self.conn = _initialize_conn(path) - def to_proto(self) -> InfraObjectProto: - sqlite_table_proto = SqliteTableProto() - sqlite_table_proto.path = self.path - sqlite_table_proto.name = self.name - + def to_infra_object_proto(self) -> InfraObjectProto: + sqlite_table_proto = self.to_proto() return InfraObjectProto( - infra_object_class_type="feast.infra.online_store.sqlite.SqliteTable", + infra_object_class_type=SQLITE_INFRA_OBJECT_CLASS_TYPE, sqlite_table=sqlite_table_proto, ) + def to_proto(self) -> Any: + sqlite_table_proto = SqliteTableProto() + sqlite_table_proto.path = self.path + sqlite_table_proto.name = self.name + return sqlite_table_proto + @staticmethod - def from_proto(infra_object_proto: InfraObjectProto) -> Any: + def from_infra_object_proto(infra_object_proto: InfraObjectProto) -> Any: return SqliteTable( path=infra_object_proto.sqlite_table.path, name=infra_object_proto.sqlite_table.name, ) + @staticmethod + def from_proto(sqlite_table_proto: SqliteTableProto) -> Any: + return SqliteTable(path=sqlite_table_proto.path, name=sqlite_table_proto.name,) + def update(self): self.conn.execute( f"CREATE TABLE IF NOT EXISTS {self.name} (entity_key BLOB, feature_name TEXT, value BLOB, event_ts timestamp, created_ts timestamp, PRIMARY KEY(entity_key, feature_name))" diff --git a/sdk/python/feast/infra/passthrough_provider.py b/sdk/python/feast/infra/passthrough_provider.py index b42f5b0daf7..3468b9dc927 100644 --- a/sdk/python/feast/infra/passthrough_provider.py +++ b/sdk/python/feast/infra/passthrough_provider.py @@ -1,4 +1,4 @@ -from datetime import datetime +from datetime import datetime, timedelta from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple, Union import pandas @@ -20,7 +20,9 @@ from feast.protos.feast.types.Value_pb2 import Value as ValueProto from feast.registry import Registry from feast.repo_config import RepoConfig +from feast.saved_dataset import SavedDataset from feast.usage import RatioSampler, log_exceptions_and_usage, set_usage_attribute +from feast.utils import make_tzaware DEFAULT_BATCH_SIZE = 10_000 @@ -35,7 +37,11 @@ def __init__(self, config: RepoConfig): self.repo_config = config self.offline_store = get_offline_store_from_config(config.offline_store) - self.online_store = get_online_store_from_config(config.online_store) + self.online_store = ( + get_online_store_from_config(config.online_store) + if config.online_store + else None + ) def update_infra( self, @@ -47,20 +53,24 @@ def update_infra( partial: bool, ): set_usage_attribute("provider", self.__class__.__name__) - self.online_store.update( - config=self.repo_config, - tables_to_delete=tables_to_delete, - tables_to_keep=tables_to_keep, - entities_to_keep=entities_to_keep, - entities_to_delete=entities_to_delete, - partial=partial, - ) + + # Call update only if there is an online store + if self.online_store: + self.online_store.update( + config=self.repo_config, + tables_to_delete=tables_to_delete, + tables_to_keep=tables_to_keep, + entities_to_keep=entities_to_keep, + entities_to_delete=entities_to_delete, + partial=partial, + ) def teardown_infra( self, project: str, tables: Sequence[FeatureView], entities: Sequence[Entity], ) -> None: set_usage_attribute("provider", self.__class__.__name__) - self.online_store.teardown(self.repo_config, tables, entities) + if self.online_store: + self.online_store.teardown(self.repo_config, tables, entities) def online_write_batch( self, @@ -72,7 +82,8 @@ def online_write_batch( progress: Optional[Callable[[int], Any]], ) -> None: set_usage_attribute("provider", self.__class__.__name__) - self.online_store.online_write_batch(config, table, data, progress) + if self.online_store: + self.online_store.online_write_batch(config, table, data, progress) @log_exceptions_and_usage(sampler=RatioSampler(ratio=0.001)) def online_read( @@ -81,12 +92,13 @@ def online_read( table: FeatureView, entity_keys: List[EntityKeyProto], requested_features: List[str] = None, - ) -> List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]]: + ) -> List: set_usage_attribute("provider", self.__class__.__name__) - result = self.online_store.online_read( - config, table, entity_keys, requested_features - ) - + result = [] + if self.online_store: + result = self.online_store.online_read( + config, table, entity_keys, requested_features + ) return result def ingest_df( @@ -98,7 +110,7 @@ def ingest_df( if feature_view.batch_source.field_mapping is not None: table = _run_field_mapping(table, feature_view.batch_source.field_mapping) - join_keys = [entity.join_key for entity in entities] + join_keys = {entity.join_key: entity.value_type for entity in entities} rows_to_write = _convert_arrow_to_proto(table, feature_view, join_keys) self.online_write_batch( @@ -144,7 +156,7 @@ def materialize_single_feature_view( if feature_view.batch_source.field_mapping is not None: table = _run_field_mapping(table, feature_view.batch_source.field_mapping) - join_keys = [entity.join_key for entity in entities] + join_keys = {entity.join_key: entity.value_type for entity in entities} with tqdm_builder(table.num_rows) as pbar: for batch in table.to_batches(DEFAULT_BATCH_SIZE): @@ -177,4 +189,28 @@ def get_historical_features( project=project, full_feature_names=full_feature_names, ) + return job + + def retrieve_saved_dataset( + self, config: RepoConfig, dataset: SavedDataset + ) -> RetrievalJob: + set_usage_attribute("provider", self.__class__.__name__) + + feature_name_columns = [ + ref.replace(":", "__") if dataset.full_feature_names else ref.split(":")[1] + for ref in dataset.features + ] + + # ToDo: replace hardcoded value + event_ts_column = "event_timestamp" + + return self.offline_store.pull_all_from_table_or_query( + config=config, + data_source=dataset.storage.to_data_source(), + join_key_columns=dataset.join_keys, + feature_name_columns=feature_name_columns, + event_timestamp_column=event_ts_column, + start_date=make_tzaware(dataset.min_event_timestamp), # type: ignore + end_date=make_tzaware(dataset.max_event_timestamp + timedelta(seconds=1)), # type: ignore + ) diff --git a/sdk/python/feast/infra/provider.py b/sdk/python/feast/infra/provider.py index 00591725fcc..a53030b74f9 100644 --- a/sdk/python/feast/infra/provider.py +++ b/sdk/python/feast/infra/provider.py @@ -8,15 +8,19 @@ import pyarrow from tqdm import tqdm -from feast import errors, importer +from feast import errors from feast.entity import Entity from feast.feature_view import DUMMY_ENTITY_ID, FeatureView +from feast.importer import import_class +from feast.infra.infra_object import Infra from feast.infra.offline_stores.offline_store import RetrievalJob from feast.on_demand_feature_view import OnDemandFeatureView +from feast.protos.feast.core.Registry_pb2 import Registry as RegistryProto from feast.protos.feast.types.EntityKey_pb2 import EntityKey as EntityKeyProto from feast.protos.feast.types.Value_pb2 import Value as ValueProto from feast.registry import Registry from feast.repo_config import RepoConfig +from feast.saved_dataset import SavedDataset from feast.type_map import python_values_to_proto_values from feast.value_type import ValueType @@ -60,6 +64,18 @@ def update_infra( """ ... + def plan_infra( + self, config: RepoConfig, desired_registry_proto: RegistryProto + ) -> Infra: + """ + Returns the Infra required to support the desired registry. + + Args: + config: The RepoConfig for the current FeatureStore. + desired_registry_proto: The desired registry, in proto form. + """ + return Infra() + @abc.abstractmethod def teardown_infra( self, project: str, tables: Sequence[FeatureView], entities: Sequence[Entity], @@ -154,6 +170,21 @@ def online_read( """ ... + @abc.abstractmethod + def retrieve_saved_dataset( + self, config: RepoConfig, dataset: SavedDataset + ) -> RetrievalJob: + """ + Read saved dataset from offline store. + All parameters for retrieval (like path, datetime boundaries, column names for both keys and features, etc) + are determined from SavedDataset object. + + Returns: + RetrievalJob object, which is lazy wrapper for actual query performed under the hood. + + """ + ... + def get_feature_server_endpoint(self) -> Optional[str]: """Returns endpoint for the feature server, if it exists.""" return None @@ -172,7 +203,7 @@ def get_provider(config: RepoConfig, repo_path: Path) -> Provider: # For example, provider 'foo.bar.MyProvider' will be parsed into 'foo.bar' and 'MyProvider' module_name, class_name = provider.rsplit(".", 1) - cls = importer.get_class_from_type(module_name, class_name, "Provider") + cls = import_class(module_name, class_name, "Provider") return cls(config) @@ -300,21 +331,21 @@ def _coerce_datetime(ts): def _convert_arrow_to_proto( table: Union[pyarrow.Table, pyarrow.RecordBatch], feature_view: FeatureView, - join_keys: List[str], + join_keys: Dict[str, ValueType], ) -> List[Tuple[EntityKeyProto, Dict[str, ValueProto], datetime, Optional[datetime]]]: # Avoid ChunkedArrays which guarentees `zero_copy_only` availiable. if isinstance(table, pyarrow.Table): table = table.to_batches()[0] - columns = [(f.name, f.dtype) for f in feature_view.features] + [ - (key, ValueType.UNKNOWN) for key in join_keys - ] + columns = [(f.name, f.dtype) for f in feature_view.features] + list( + join_keys.items() + ) proto_values_by_column = { column: python_values_to_proto_values( - table.column(column).to_numpy(zero_copy_only=False), dtype + table.column(column).to_numpy(zero_copy_only=False), value_type ) - for column, dtype in columns + for column, value_type in columns } entity_keys = [ diff --git a/sdk/python/feast/infra/utils/aws_utils.py b/sdk/python/feast/infra/utils/aws_utils.py index 6211c75e375..b25454ca6ad 100644 --- a/sdk/python/feast/infra/utils/aws_utils.py +++ b/sdk/python/feast/infra/utils/aws_utils.py @@ -15,7 +15,11 @@ wait_exponential, ) -from feast.errors import RedshiftCredentialsError, RedshiftQueryError +from feast.errors import ( + RedshiftCredentialsError, + RedshiftQueryError, + RedshiftTableNameTooLong, +) from feast.type_map import pa_to_redshift_value_type try: @@ -28,6 +32,9 @@ raise FeastExtrasDependencyImportError("aws", str(e)) +REDSHIFT_TABLE_NAME_MAX_LENGTH = 127 + + def get_redshift_data_client(aws_region: str): """ Get the Redshift Data API Service client for the given AWS region. @@ -184,7 +191,7 @@ def upload_df_to_redshift( iam_role: str, table_name: str, df: pd.DataFrame, -) -> None: +): """Uploads a Pandas DataFrame to Redshift as a new table. The caller is responsible for deleting the table when no longer necessary. @@ -208,9 +215,12 @@ def upload_df_to_redshift( table_name: The name of the new Redshift table where we copy the dataframe df: The Pandas DataFrame to upload - Returns: None - + Raises: + RedshiftTableNameTooLong: The specified table name is too long. """ + if len(table_name) > REDSHIFT_TABLE_NAME_MAX_LENGTH: + raise RedshiftTableNameTooLong(table_name) + bucket, key = get_bucket_and_key(s3_path) # Drop the index so that we dont have unnecessary columns diff --git a/sdk/python/feast/infra/utils/snowflake_utils.py b/sdk/python/feast/infra/utils/snowflake_utils.py new file mode 100644 index 00000000000..f280cfa218b --- /dev/null +++ b/sdk/python/feast/infra/utils/snowflake_utils.py @@ -0,0 +1,279 @@ +import configparser +import os +import random +import string +from logging import getLogger +from tempfile import TemporaryDirectory +from typing import Dict, Iterator, List, Optional, Tuple, cast + +import pandas as pd +import snowflake.connector +from snowflake.connector import ProgrammingError, SnowflakeConnection +from snowflake.connector.cursor import SnowflakeCursor +from tenacity import ( + retry, + retry_if_exception_type, + stop_after_attempt, + wait_exponential, +) + +from feast.errors import SnowflakeIncompleteConfig, SnowflakeQueryUnknownError + +getLogger("snowflake.connector.cursor").disabled = True +getLogger("snowflake.connector.connection").disabled = True +getLogger("snowflake.connector.network").disabled = True +logger = getLogger(__name__) + + +def execute_snowflake_statement(conn: SnowflakeConnection, query) -> SnowflakeCursor: + cursor = conn.cursor().execute(query) + if cursor is None: + raise SnowflakeQueryUnknownError(query) + return cursor + + +def get_snowflake_conn(config, autocommit=True) -> SnowflakeConnection: + if config.type == "snowflake.offline": + config_header = "connections.feast_offline_store" + + config = dict(config) + + # read config file + config_reader = configparser.ConfigParser() + config_reader.read([config["config_path"]]) + if config_reader.has_section(config_header): + kwargs = dict(config_reader[config_header]) + else: + kwargs = {} + + kwargs.update((k, v) for k, v in config.items() if v is not None) + + try: + conn = snowflake.connector.connect( + account=kwargs["account"], + user=kwargs["user"], + password=kwargs["password"], + role=f'''"{kwargs['role']}"''', + warehouse=f'''"{kwargs['warehouse']}"''', + database=f'''"{kwargs['database']}"''', + schema=f'''"{kwargs['schema_']}"''', + application="feast", + autocommit=autocommit, + ) + + return conn + except KeyError as e: + raise SnowflakeIncompleteConfig(e) + + +# TO DO -- sfc-gh-madkins +# Remove dependency on write_pandas function by falling back to native snowflake python connector +# Current issue is datetime[ns] types are read incorrectly in Snowflake, need to coerce to datetime[ns, UTC] +def write_pandas( + conn: SnowflakeConnection, + df: pd.DataFrame, + table_name: str, + database: Optional[str] = None, + schema: Optional[str] = None, + chunk_size: Optional[int] = None, + compression: str = "gzip", + on_error: str = "abort_statement", + parallel: int = 4, + quote_identifiers: bool = True, + auto_create_table: bool = False, + create_temp_table: bool = False, +): + """Allows users to most efficiently write back a pandas DataFrame to Snowflake. + + It works by dumping the DataFrame into Parquet files, uploading them and finally copying their data into the table. + + Returns whether all files were ingested correctly, number of chunks uploaded, and number of rows ingested + with all of the COPY INTO command's output for debugging purposes. + + Example usage: + import pandas + from snowflake.connector.pandas_tools import write_pandas + + df = pandas.DataFrame([('Mark', 10), ('Luke', 20)], columns=['name', 'balance']) + success, nchunks, nrows, _ = write_pandas(cnx, df, 'customers') + + Args: + conn: Connection to be used to communicate with Snowflake. + df: Dataframe we'd like to write back. + table_name: Table name where we want to insert into. + database: Database schema and table is in, if not provided the default one will be used (Default value = None). + schema: Schema table is in, if not provided the default one will be used (Default value = None). + chunk_size: Number of elements to be inserted once, if not provided all elements will be dumped once + (Default value = None). + compression: The compression used on the Parquet files, can only be gzip, or snappy. Gzip gives supposedly a + better compression, while snappy is faster. Use whichever is more appropriate (Default value = 'gzip'). + on_error: Action to take when COPY INTO statements fail, default follows documentation at: + https://docs.snowflake.com/en/sql-reference/sql/copy-into-table.html#copy-options-copyoptions + (Default value = 'abort_statement'). + parallel: Number of threads to be used when uploading chunks, default follows documentation at: + https://docs.snowflake.com/en/sql-reference/sql/put.html#optional-parameters (Default value = 4). + quote_identifiers: By default, identifiers, specifically database, schema, table and column names + (from df.columns) will be quoted. If set to False, identifiers are passed on to Snowflake without quoting. + I.e. identifiers will be coerced to uppercase by Snowflake. (Default value = True) + auto_create_table: When true, will automatically create a table with corresponding columns for each column in + the passed in DataFrame. The table will not be created if it already exists + create_temp_table: Will make the auto-created table as a temporary table + """ + if database is not None and schema is None: + raise ProgrammingError( + "Schema has to be provided to write_pandas when a database is provided" + ) + # This dictionary maps the compression algorithm to Snowflake put copy into command type + # https://docs.snowflake.com/en/sql-reference/sql/copy-into-table.html#type-parquet + compression_map = {"gzip": "auto", "snappy": "snappy"} + if compression not in compression_map.keys(): + raise ProgrammingError( + "Invalid compression '{}', only acceptable values are: {}".format( + compression, compression_map.keys() + ) + ) + if quote_identifiers: + location = ( + (('"' + database + '".') if database else "") + + (('"' + schema + '".') if schema else "") + + ('"' + table_name + '"') + ) + else: + location = ( + (database + "." if database else "") + + (schema + "." if schema else "") + + (table_name) + ) + if chunk_size is None: + chunk_size = len(df) + cursor: SnowflakeCursor = conn.cursor() + stage_name = create_temporary_sfc_stage(cursor) + + with TemporaryDirectory() as tmp_folder: + for i, chunk in chunk_helper(df, chunk_size): + chunk_path = os.path.join(tmp_folder, "file{}.txt".format(i)) + # Dump chunk into parquet file + chunk.to_parquet( + chunk_path, + compression=compression, + use_deprecated_int96_timestamps=True, + ) + # Upload parquet file + upload_sql = ( + "PUT /* Python:snowflake.connector.pandas_tools.write_pandas() */ " + "'file://{path}' @\"{stage_name}\" PARALLEL={parallel}" + ).format( + path=chunk_path.replace("\\", "\\\\").replace("'", "\\'"), + stage_name=stage_name, + parallel=parallel, + ) + logger.debug(f"uploading files with '{upload_sql}'") + cursor.execute(upload_sql, _is_internal=True) + # Remove chunk file + os.remove(chunk_path) + if quote_identifiers: + columns = '"' + '","'.join(list(df.columns)) + '"' + else: + columns = ",".join(list(df.columns)) + + if auto_create_table: + file_format_name = create_file_format(compression, compression_map, cursor) + infer_schema_sql = f"SELECT COLUMN_NAME, TYPE FROM table(infer_schema(location=>'@\"{stage_name}\"', file_format=>'{file_format_name}'))" + logger.debug(f"inferring schema with '{infer_schema_sql}'") + result_cursor = cursor.execute(infer_schema_sql, _is_internal=True) + if result_cursor is None: + raise SnowflakeQueryUnknownError(infer_schema_sql) + result = cast(List[Tuple[str, str]], result_cursor.fetchall()) + column_type_mapping: Dict[str, str] = dict(result) + # Infer schema can return the columns out of order depending on the chunking we do when uploading + # so we have to iterate through the dataframe columns to make sure we create the table with its + # columns in order + quote = '"' if quote_identifiers else "" + create_table_columns = ", ".join( + [f"{quote}{c}{quote} {column_type_mapping[c]}" for c in df.columns] + ) + create_table_sql = ( + f"CREATE {'TEMP ' if create_temp_table else ''}TABLE IF NOT EXISTS {location} " + f"({create_table_columns})" + f" /* Python:snowflake.connector.pandas_tools.write_pandas() */ " + ) + logger.debug(f"auto creating table with '{create_table_sql}'") + cursor.execute(create_table_sql, _is_internal=True) + drop_file_format_sql = f"DROP FILE FORMAT IF EXISTS {file_format_name}" + logger.debug(f"dropping file format with '{drop_file_format_sql}'") + cursor.execute(drop_file_format_sql, _is_internal=True) + + # in Snowflake, all parquet data is stored in a single column, $1, so we must select columns explicitly + # see (https://docs.snowflake.com/en/user-guide/script-data-load-transform-parquet.html) + if quote_identifiers: + parquet_columns = "$1:" + ",$1:".join(f'"{c}"' for c in df.columns) + else: + parquet_columns = "$1:" + ",$1:".join(df.columns) + copy_into_sql = ( + "COPY INTO {location} /* Python:snowflake.connector.pandas_tools.write_pandas() */ " + "({columns}) " + 'FROM (SELECT {parquet_columns} FROM @"{stage_name}") ' + "FILE_FORMAT=(TYPE=PARQUET COMPRESSION={compression}) " + "PURGE=TRUE ON_ERROR={on_error}" + ).format( + location=location, + columns=columns, + parquet_columns=parquet_columns, + stage_name=stage_name, + compression=compression_map[compression], + on_error=on_error, + ) + logger.debug("copying into with '{}'".format(copy_into_sql)) + # Snowflake returns the original cursor if the query execution succeeded. + result_cursor = cursor.execute(copy_into_sql, _is_internal=True) + if result_cursor is None: + raise SnowflakeQueryUnknownError(copy_into_sql) + result_cursor.close() + + +@retry( + wait=wait_exponential(multiplier=1, max=4), + retry=retry_if_exception_type(ProgrammingError), + stop=stop_after_attempt(5), + reraise=True, +) +def create_file_format( + compression: str, compression_map: Dict[str, str], cursor: SnowflakeCursor +) -> str: + file_format_name = ( + '"' + "".join(random.choice(string.ascii_lowercase) for _ in range(5)) + '"' + ) + file_format_sql = ( + f"CREATE FILE FORMAT {file_format_name} " + f"/* Python:snowflake.connector.pandas_tools.write_pandas() */ " + f"TYPE=PARQUET COMPRESSION={compression_map[compression]}" + ) + logger.debug(f"creating file format with '{file_format_sql}'") + cursor.execute(file_format_sql, _is_internal=True) + return file_format_name + + +@retry( + wait=wait_exponential(multiplier=1, max=4), + retry=retry_if_exception_type(ProgrammingError), + stop=stop_after_attempt(5), + reraise=True, +) +def create_temporary_sfc_stage(cursor: SnowflakeCursor) -> str: + stage_name = "".join(random.choice(string.ascii_lowercase) for _ in range(5)) + create_stage_sql = ( + "create temporary stage /* Python:snowflake.connector.pandas_tools.write_pandas() */ " + '"{stage_name}"' + ).format(stage_name=stage_name) + logger.debug(f"creating stage with '{create_stage_sql}'") + result_cursor = cursor.execute(create_stage_sql, _is_internal=True) + if result_cursor is None: + raise SnowflakeQueryUnknownError(create_stage_sql) + result_cursor.fetchall() + return stage_name + + +def chunk_helper(lst: pd.DataFrame, n: int) -> Iterator[Tuple[int, pd.DataFrame]]: + """Helper generator to chunk a sequence efficiently with current index like if enumerate was called on sequence.""" + for i in range(0, len(lst), n): + yield int(i / n), lst[i : i + n] diff --git a/sdk/python/feast/on_demand_feature_view.py b/sdk/python/feast/on_demand_feature_view.py index 86eece9de96..04b7f33cc66 100644 --- a/sdk/python/feast/on_demand_feature_view.py +++ b/sdk/python/feast/on_demand_feature_view.py @@ -6,10 +6,9 @@ import dill import pandas as pd -from feast import errors from feast.base_feature_view import BaseFeatureView from feast.data_source import RequestDataSource -from feast.errors import RegistryInferenceFailure +from feast.errors import RegistryInferenceFailure, SpecifiedFeaturesNotPresentError from feast.feature import Feature from feast.feature_view import FeatureView from feast.feature_view_projection import FeatureViewProjection @@ -17,6 +16,7 @@ OnDemandFeatureView as OnDemandFeatureViewProto, ) from feast.protos.feast.core.OnDemandFeatureView_pb2 import ( + OnDemandFeatureViewMeta, OnDemandFeatureViewSpec, OnDemandInput, ) @@ -44,8 +44,7 @@ class OnDemandFeatureView(BaseFeatureView): """ # TODO(adchia): remove inputs from proto and declaration - inputs: Dict[str, Union[FeatureView, RequestDataSource]] - input_feature_views: Dict[str, FeatureView] + input_feature_view_projections: Dict[str, FeatureViewProjection] input_request_data_sources: Dict[str, RequestDataSource] udf: MethodType @@ -54,21 +53,22 @@ def __init__( self, name: str, features: List[Feature], - inputs: Dict[str, Union[FeatureView, RequestDataSource]], + inputs: Dict[str, Union[FeatureView, FeatureViewProjection, RequestDataSource]], udf: MethodType, ): """ Creates an OnDemandFeatureView object. """ super().__init__(name, features) - self.inputs = inputs - self.input_feature_views = {} - self.input_request_data_sources = {} + self.input_feature_view_projections: Dict[str, FeatureViewProjection] = {} + self.input_request_data_sources: Dict[str, RequestDataSource] = {} for input_ref, odfv_input in inputs.items(): if isinstance(odfv_input, RequestDataSource): self.input_request_data_sources[input_ref] = odfv_input + elif isinstance(odfv_input, FeatureViewProjection): + self.input_feature_view_projections[input_ref] = odfv_input else: - self.input_feature_views[input_ref] = odfv_input + self.input_feature_view_projections[input_ref] = odfv_input.projection self.udf = udf @@ -78,11 +78,37 @@ def proto_class(self) -> Type[OnDemandFeatureViewProto]: def __copy__(self): fv = OnDemandFeatureView( - name=self.name, features=self.features, inputs=self.inputs, udf=self.udf + name=self.name, + features=self.features, + inputs=dict( + **self.input_feature_view_projections, **self.input_request_data_sources + ), + udf=self.udf, ) fv.projection = copy.copy(self.projection) return fv + def __eq__(self, other): + if not super().__eq__(other): + return False + + if ( + not self.input_feature_view_projections + == other.input_feature_view_projections + ): + return False + + if not self.input_request_data_sources == other.input_request_data_sources: + return False + + if not self.udf.__code__.co_code == other.udf.__code__.co_code: + return False + + return True + + def __hash__(self): + return super().__hash__() + def to_proto(self) -> OnDemandFeatureViewProto: """ Converts an on demand feature view object to its protobuf representation. @@ -90,9 +116,16 @@ def to_proto(self) -> OnDemandFeatureViewProto: Returns: A OnDemandFeatureViewProto protobuf. """ + meta = OnDemandFeatureViewMeta() + if self.created_timestamp: + meta.created_timestamp.FromDatetime(self.created_timestamp) + if self.last_updated_timestamp: + meta.last_updated_timestamp.FromDatetime(self.last_updated_timestamp) inputs = {} - for input_ref, fv in self.input_feature_views.items(): - inputs[input_ref] = OnDemandInput(feature_view=fv.to_proto()) + for input_ref, fv_projection in self.input_feature_view_projections.items(): + inputs[input_ref] = OnDemandInput( + feature_view_projection=fv_projection.to_proto() + ) for input_ref, request_data_source in self.input_request_data_sources.items(): inputs[input_ref] = OnDemandInput( request_data_source=request_data_source.to_proto() @@ -107,7 +140,7 @@ def to_proto(self) -> OnDemandFeatureViewProto: ), ) - return OnDemandFeatureViewProto(spec=spec) + return OnDemandFeatureViewProto(spec=spec, meta=meta) @classmethod def from_proto(cls, on_demand_feature_view_proto: OnDemandFeatureViewProto): @@ -128,6 +161,10 @@ def from_proto(cls, on_demand_feature_view_proto: OnDemandFeatureViewProto): if on_demand_input.WhichOneof("input") == "feature_view": inputs[input_name] = FeatureView.from_proto( on_demand_input.feature_view + ).projection + elif on_demand_input.WhichOneof("input") == "feature_view_projection": + inputs[input_name] = FeatureViewProjection.from_proto( + on_demand_input.feature_view_projection ) else: inputs[input_name] = RequestDataSource.from_proto( @@ -155,6 +192,15 @@ def from_proto(cls, on_demand_feature_view_proto: OnDemandFeatureViewProto): on_demand_feature_view_obj ) + if on_demand_feature_view_proto.meta.HasField("created_timestamp"): + on_demand_feature_view_obj.created_timestamp = ( + on_demand_feature_view_proto.meta.created_timestamp.ToDatetime() + ) + if on_demand_feature_view_proto.meta.HasField("last_updated_timestamp"): + on_demand_feature_view_obj.last_updated_timestamp = ( + on_demand_feature_view_proto.meta.last_updated_timestamp.ToDatetime() + ) + return on_demand_feature_view_obj def get_request_data_schema(self) -> Dict[str, ValueType]: @@ -168,9 +214,9 @@ def get_transformed_features_df( ) -> pd.DataFrame: # Apply on demand transformations columns_to_cleanup = [] - for input_fv in self.input_feature_views.values(): - for feature in input_fv.features: - full_feature_ref = f"{input_fv.name}__{feature.name}" + for input_fv_projection in self.input_feature_view_projections.values(): + for feature in input_fv_projection.features: + full_feature_ref = f"{input_fv_projection.name}__{feature.name}" if full_feature_ref in df_with_features.keys(): # Make sure the partial feature name is always present df_with_features[feature.name] = df_with_features[full_feature_ref] @@ -209,10 +255,12 @@ def infer_features(self): RegistryInferenceFailure: The set of features could not be inferred. """ df = pd.DataFrame() - for feature_view in self.input_feature_views.values(): - for feature in feature_view.features: + for feature_view_projection in self.input_feature_view_projections.values(): + for feature in feature_view_projection.features: dtype = feast_value_type_to_pandas_type(feature.dtype) - df[f"{feature_view.name}__{feature.name}"] = pd.Series(dtype=dtype) + df[f"{feature_view_projection.name}__{feature.name}"] = pd.Series( + dtype=dtype + ) df[f"{feature.name}"] = pd.Series(dtype=dtype) for request_data in self.input_request_data_sources.values(): for feature_name, feature_type in request_data.schema.items(): @@ -233,7 +281,7 @@ def infer_features(self): if specified_features not in inferred_features: missing_features.append(specified_features) if missing_features: - raise errors.SpecifiedFeaturesNotPresentError( + raise SpecifiedFeaturesNotPresentError( [f.name for f in missing_features], self.name ) else: diff --git a/sdk/python/feast/online_response.py b/sdk/python/feast/online_response.py index 359e216165f..bb69c6b9d95 100644 --- a/sdk/python/feast/online_response.py +++ b/sdk/python/feast/online_response.py @@ -12,24 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -from collections import defaultdict -from typing import Any, Dict, List, cast +from typing import Any, Dict, List import pandas as pd from feast.feature_view import DUMMY_ENTITY_ID -from feast.protos.feast.serving.ServingService_pb2 import ( - GetOnlineFeaturesRequestV2, - GetOnlineFeaturesResponse, -) -from feast.protos.feast.types.Value_pb2 import Value as Value -from feast.type_map import ( - _proto_value_to_value_type, - _python_value_to_proto_value, - feast_value_type_to_python_type, - python_values_to_feast_value_type, -) -from feast.value_type import ValueType +from feast.protos.feast.serving.ServingService_pb2 import GetOnlineFeaturesResponse +from feast.type_map import feast_value_type_to_python_type class OnlineResponse: @@ -46,33 +35,30 @@ def __init__(self, online_response_proto: GetOnlineFeaturesResponse): """ self.proto = online_response_proto # Delete DUMMY_ENTITY_ID from proto if it exists - for item in self.proto.field_values: - if DUMMY_ENTITY_ID in item.statuses: - del item.statuses[DUMMY_ENTITY_ID] - if DUMMY_ENTITY_ID in item.fields: - del item.fields[DUMMY_ENTITY_ID] - - @property - def field_values(self): - """ - Getter for GetOnlineResponse's field_values. - """ - return self.proto.field_values + for idx, val in enumerate(self.proto.metadata.feature_names.val): + if val == DUMMY_ENTITY_ID: + del self.proto.metadata.feature_names.val[idx] + for result in self.proto.results: + del result.values[idx] + del result.statuses[idx] + del result.event_timestamps[idx] + break def to_dict(self) -> Dict[str, Any]: """ Converts GetOnlineFeaturesResponse features into a dictionary form. """ - features_dict: Dict[str, List[Any]] = { - k: list() for row in self.field_values for k, _ in row.statuses.items() - } + response: Dict[str, List[Any]] = {} - for row in self.field_values: - for feature in features_dict.keys(): - native_type_value = feast_value_type_to_python_type(row.fields[feature]) - features_dict[feature].append(native_type_value) + for result in self.proto.results: + for idx, feature_ref in enumerate(self.proto.metadata.feature_names.val): + native_type_value = feast_value_type_to_python_type(result.values[idx]) + if feature_ref not in response: + response[feature_ref] = [native_type_value] + else: + response[feature_ref].append(native_type_value) - return features_dict + return response def to_df(self) -> pd.DataFrame: """ @@ -80,65 +66,3 @@ def to_df(self) -> pd.DataFrame: """ return pd.DataFrame(self.to_dict()) - - -def _infer_online_entity_rows( - entity_rows: List[Dict[str, Any]] -) -> List[GetOnlineFeaturesRequestV2.EntityRow]: - """ - Builds a list of EntityRow protos from Python native type format passed by user. - - Args: - entity_rows: A list of dictionaries where each key-value is an entity-name, entity-value pair. - Returns: - A list of EntityRow protos parsed from args. - """ - - entity_rows_dicts = cast(List[Dict[str, Any]], entity_rows) - entity_row_list = [] - entity_type_map: Dict[str, ValueType] = dict() - entity_python_values_map = defaultdict(list) - - # Flatten keys-value dicts into lists for type inference - for entity in entity_rows_dicts: - for key, value in entity.items(): - if isinstance(value, Value): - inferred_type = _proto_value_to_value_type(value) - # If any ProtoValues were present their types must all be the same - if key in entity_type_map and entity_type_map.get(key) != inferred_type: - raise TypeError( - f"Input entity {key} has mixed types, {entity_type_map.get(key)} and {inferred_type}. That is not allowed." - ) - entity_type_map[key] = inferred_type - else: - entity_python_values_map[key].append(value) - - # Loop over all entities to infer dtype first in case of empty lists or nulls - for key, values in entity_python_values_map.items(): - inferred_type = python_values_to_feast_value_type(key, values) - - # If any ProtoValues were present their types must match the inferred type - if key in entity_type_map and entity_type_map.get(key) != inferred_type: - raise TypeError( - f"Input entity {key} has mixed types, {entity_type_map.get(key)} and {inferred_type}. That is not allowed." - ) - - entity_type_map[key] = inferred_type - - for entity in entity_rows_dicts: - fields = {} - for key, value in entity.items(): - if key not in entity_type_map: - raise ValueError( - f"field {key} cannot have all null values for type inference." - ) - - if isinstance(value, Value): - proto_value = value - else: - proto_value = _python_value_to_proto_value( - entity_type_map[key], [value] - )[0] - fields[key] = proto_value - entity_row_list.append(GetOnlineFeaturesRequestV2.EntityRow(fields=fields)) - return entity_row_list diff --git a/sdk/python/feast/registry.py b/sdk/python/feast/registry.py index 57eae83ac59..07c4c59b012 100644 --- a/sdk/python/feast/registry.py +++ b/sdk/python/feast/registry.py @@ -14,24 +14,17 @@ import logging from collections import defaultdict from datetime import datetime, timedelta +from enum import Enum from pathlib import Path from threading import Lock -from typing import Any, Dict, List, Optional +from typing import Any, Dict, List, Optional, Set from urllib.parse import urlparse from google.protobuf.internal.containers import RepeatedCompositeFieldContainer from google.protobuf.json_format import MessageToDict from proto import Message -from feast import importer from feast.base_feature_view import BaseFeatureView -from feast.diff.FcoDiff import ( - FcoDiff, - RegistryDiff, - TransitionType, - diff_between, - tag_proto_objects_for_keep_delete_add, -) from feast.entity import Entity from feast.errors import ( ConflictingFeatureViewNames, @@ -39,14 +32,19 @@ FeatureServiceNotFoundException, FeatureViewNotFoundException, OnDemandFeatureViewNotFoundException, + SavedDatasetNotFound, ) from feast.feature_service import FeatureService from feast.feature_view import FeatureView +from feast.importer import import_class +from feast.infra.infra_object import Infra from feast.on_demand_feature_view import OnDemandFeatureView from feast.protos.feast.core.Registry_pb2 import Registry as RegistryProto from feast.registry_store import NoopRegistryStore from feast.repo_config import RegistryConfig +from feast.repo_contents import RepoContents from feast.request_feature_view import RequestFeatureView +from feast.saved_dataset import SavedDataset REGISTRY_SCHEMA_VERSION = "1" @@ -64,6 +62,48 @@ "": "LocalRegistryStore", } + +class FeastObjectType(Enum): + ENTITY = "entity" + FEATURE_VIEW = "feature view" + ON_DEMAND_FEATURE_VIEW = "on demand feature view" + REQUEST_FEATURE_VIEW = "request feature view" + FEATURE_SERVICE = "feature service" + + @staticmethod + def get_objects_from_registry( + registry: "Registry", project: str + ) -> Dict["FeastObjectType", List[Any]]: + return { + FeastObjectType.ENTITY: registry.list_entities(project=project), + FeastObjectType.FEATURE_VIEW: registry.list_feature_views(project=project), + FeastObjectType.ON_DEMAND_FEATURE_VIEW: registry.list_on_demand_feature_views( + project=project + ), + FeastObjectType.REQUEST_FEATURE_VIEW: registry.list_request_feature_views( + project=project + ), + FeastObjectType.FEATURE_SERVICE: registry.list_feature_services( + project=project + ), + } + + @staticmethod + def get_objects_from_repo_contents( + repo_contents: RepoContents, + ) -> Dict["FeastObjectType", Set[Any]]: + return { + FeastObjectType.ENTITY: repo_contents.entities, + FeastObjectType.FEATURE_VIEW: repo_contents.feature_views, + FeastObjectType.ON_DEMAND_FEATURE_VIEW: repo_contents.on_demand_feature_views, + FeastObjectType.REQUEST_FEATURE_VIEW: repo_contents.request_feature_views, + FeastObjectType.FEATURE_SERVICE: repo_contents.feature_services, + } + + +FEAST_OBJECT_TYPES = [feast_object_type for feast_object_type in FeastObjectType] + + logger = logging.getLogger(__name__) @@ -74,9 +114,7 @@ def get_registry_store_class_from_type(registry_store_type: str): registry_store_type = REGISTRY_STORE_CLASS_FOR_TYPE[registry_store_type] module_name, registry_store_class_name = registry_store_type.rsplit(".", 1) - return importer.get_class_from_type( - module_name, registry_store_class_name, "RegistryStore" - ) + return import_class(module_name, registry_store_class_name, "RegistryStore") def get_registry_store_class_from_scheme(registry_path: str): @@ -144,75 +182,6 @@ def clone(self) -> "Registry": new_registry._registry_store = NoopRegistryStore() return new_registry - # TODO(achals): This method needs to be filled out and used in the feast plan/apply methods. - @staticmethod - def diff_between( - current_registry: RegistryProto, new_registry: RegistryProto - ) -> RegistryDiff: - diff = RegistryDiff() - - attribute_to_object_type_str = { - "entities": "entity", - "feature_views": "feature view", - "feature_tables": "feature table", - "on_demand_feature_views": "on demand feature view", - "request_feature_views": "request feature view", - "feature_services": "feature service", - } - - for object_type in [ - "entities", - "feature_views", - "feature_tables", - "on_demand_feature_views", - "request_feature_views", - "feature_services", - ]: - ( - objects_to_keep, - objects_to_delete, - objects_to_add, - ) = tag_proto_objects_for_keep_delete_add( - getattr(current_registry, object_type), - getattr(new_registry, object_type), - ) - - for e in objects_to_add: - diff.add_fco_diff( - FcoDiff( - e.spec.name, - attribute_to_object_type_str[object_type], - None, - e, - [], - TransitionType.CREATE, - ) - ) - for e in objects_to_delete: - diff.add_fco_diff( - FcoDiff( - e.spec.name, - attribute_to_object_type_str[object_type], - e, - None, - [], - TransitionType.DELETE, - ) - ) - for e in objects_to_keep: - current_obj_proto = [ - _e - for _e in getattr(current_registry, object_type) - if _e.spec.name == e.spec.name - ][0] - diff.add_fco_diff( - diff_between( - current_obj_proto, e, attribute_to_object_type_str[object_type] - ) - ) - - return diff - def _initialize_registry(self): """Explicitly initializes the registry with an empty proto if it doesn't exist.""" try: @@ -222,6 +191,36 @@ def _initialize_registry(self): registry_proto.registry_schema_version = REGISTRY_SCHEMA_VERSION self._registry_store.update_registry_proto(registry_proto) + def update_infra(self, infra: Infra, project: str, commit: bool = True): + """ + Updates the stored Infra object. + + Args: + infra: The new Infra object to be stored. + project: Feast project that the Infra object refers to + commit: Whether the change should be persisted immediately + """ + self._prepare_registry_for_changes() + assert self.cached_registry_proto + + self.cached_registry_proto.infra.CopyFrom(infra.to_proto()) + if commit: + self.commit() + + def get_infra(self, project: str, allow_cache: bool = False) -> Infra: + """ + Retrieves the stored Infra object. + + Args: + project: Feast project that the Infra object refers to + allow_cache: Whether to allow returning this entity from a cached registry + + Returns: + The stored Infra object. + """ + registry_proto = self._get_registry_proto(allow_cache=allow_cache) + return Infra.from_proto(registry_proto.infra) + def apply_entity(self, entity: Entity, project: str, commit: bool = True): """ Registers a single entity with Feast @@ -232,6 +231,12 @@ def apply_entity(self, entity: Entity, project: str, commit: bool = True): commit: Whether the change should be persisted immediately """ entity.is_valid() + + now = datetime.utcnow() + if not entity.created_timestamp: + entity._created_timestamp = now + entity._last_updated_timestamp = now + entity_proto = entity.to_proto() entity_proto.spec.project = project self._prepare_registry_for_changes() @@ -279,6 +284,11 @@ def apply_feature_service( feature_service: A feature service that will be registered project: Feast project that this entity belongs to """ + now = datetime.utcnow() + if not feature_service.created_timestamp: + feature_service.created_timestamp = now + feature_service.last_updated_timestamp = now + feature_service_proto = feature_service.to_proto() feature_service_proto.spec.project = project @@ -374,6 +384,12 @@ def apply_feature_view( commit: Whether the change should be persisted immediately """ feature_view.ensure_valid() + + now = datetime.utcnow() + if not feature_view.created_timestamp: + feature_view.created_timestamp = now + feature_view.last_updated_timestamp = now + feature_view_proto = feature_view.to_proto() feature_view_proto.spec.project = project self._prepare_registry_for_changes() @@ -497,6 +513,7 @@ def apply_materialization( existing_feature_view.materialization_intervals.append( (start_date, end_date) ) + existing_feature_view.last_updated_timestamp = datetime.utcnow() feature_view_proto = existing_feature_view.to_proto() feature_view_proto.spec.project = project del self.cached_registry_proto.feature_views[idx] @@ -634,6 +651,18 @@ def delete_feature_view(self, name: str, project: str, commit: bool = True): self.commit() return + for idx, existing_on_demand_feature_view_proto in enumerate( + self.cached_registry_proto.on_demand_feature_views + ): + if ( + existing_on_demand_feature_view_proto.spec.name == name + and existing_on_demand_feature_view_proto.spec.project == project + ): + del self.cached_registry_proto.on_demand_feature_views[idx] + if commit: + self.commit() + return + raise FeatureViewNotFoundException(name, project) def delete_entity(self, name: str, project: str, commit: bool = True): @@ -662,6 +691,85 @@ def delete_entity(self, name: str, project: str, commit: bool = True): raise EntityNotFoundException(name, project) + def apply_saved_dataset( + self, saved_dataset: SavedDataset, project: str, commit: bool = True + ): + """ + Registers a single entity with Feast + + Args: + saved_dataset: SavedDataset that will be added / updated to registry + project: Feast project that this dataset belongs to + commit: Whether the change should be persisted immediately + """ + now = datetime.utcnow() + if not saved_dataset.created_timestamp: + saved_dataset.created_timestamp = now + saved_dataset.last_updated_timestamp = now + + saved_dataset_proto = saved_dataset.to_proto() + saved_dataset_proto.spec.project = project + self._prepare_registry_for_changes() + assert self.cached_registry_proto + + for idx, existing_saved_dataset_proto in enumerate( + self.cached_registry_proto.saved_datasets + ): + if ( + existing_saved_dataset_proto.spec.name == saved_dataset_proto.spec.name + and existing_saved_dataset_proto.spec.project == project + ): + del self.cached_registry_proto.saved_datasets[idx] + break + + self.cached_registry_proto.saved_datasets.append(saved_dataset_proto) + if commit: + self.commit() + + def get_saved_dataset( + self, name: str, project: str, allow_cache: bool = False + ) -> SavedDataset: + """ + Retrieves a saved dataset. + + Args: + name: Name of dataset + project: Feast project that this dataset belongs to + allow_cache: Whether to allow returning this dataset from a cached registry + + Returns: + Returns either the specified SavedDataset, or raises an exception if + none is found + """ + registry_proto = self._get_registry_proto(allow_cache=allow_cache) + for saved_dataset in registry_proto.saved_datasets: + if ( + saved_dataset.spec.name == name + and saved_dataset.spec.project == project + ): + return SavedDataset.from_proto(saved_dataset) + raise SavedDatasetNotFound(name, project=project) + + def list_saved_datasets( + self, project: str, allow_cache: bool = False + ) -> List[SavedDataset]: + """ + Retrieves a list of all saved datasets in specified project + + Args: + project: Feast project + allow_cache: Whether to allow returning this dataset from a cached registry + + Returns: + Returns the list of SavedDatasets + """ + registry_proto = self._get_registry_proto(allow_cache=allow_cache) + return [ + SavedDataset.from_proto(saved_dataset) + for saved_dataset in registry_proto.saved_datasets + if saved_dataset.spec.project == project + ] + def commit(self): """Commits the state of the registry cache to the remote registry store.""" if self.cached_registry_proto: @@ -716,6 +824,12 @@ def to_dict(self, project: str) -> Dict[str, List[Any]]: registry_dict["requestFeatureViews"].append( MessageToDict(request_feature_view.to_proto()) ) + for saved_dataset in sorted( + self.list_saved_datasets(project=project), key=lambda item: item.name + ): + registry_dict["savedDatasets"].append( + MessageToDict(saved_dataset.to_proto()) + ) return registry_dict def _prepare_registry_for_changes(self): @@ -726,7 +840,7 @@ def _prepare_registry_for_changes(self): registry_proto = RegistryProto() registry_proto.registry_schema_version = REGISTRY_SCHEMA_VERSION self.cached_registry_proto = registry_proto - self.cached_registry_proto_created = datetime.now() + self.cached_registry_proto_created = datetime.utcnow() return self.cached_registry_proto def _get_registry_proto(self, allow_cache: bool = False) -> RegistryProto: @@ -745,7 +859,7 @@ def _get_registry_proto(self, allow_cache: bool = False) -> RegistryProto: self.cached_registry_proto_ttl.total_seconds() > 0 # 0 ttl means infinity and ( - datetime.now() + datetime.utcnow() > ( self.cached_registry_proto_created + self.cached_registry_proto_ttl @@ -759,7 +873,7 @@ def _get_registry_proto(self, allow_cache: bool = False) -> RegistryProto: registry_proto = self._registry_store.get_registry_proto() self.cached_registry_proto = registry_proto - self.cached_registry_proto_created = datetime.now() + self.cached_registry_proto_created = datetime.utcnow() return registry_proto diff --git a/sdk/python/feast/repo_config.py b/sdk/python/feast/repo_config.py index 70e64c845c5..3f32d18b80b 100644 --- a/sdk/python/feast/repo_config.py +++ b/sdk/python/feast/repo_config.py @@ -20,7 +20,7 @@ FeastFeatureServerTypeSetError, FeastProviderNotSetError, ) -from feast.importer import get_class_from_type +from feast.importer import import_class from feast.usage import log_exceptions # These dict exists so that: @@ -31,12 +31,14 @@ "datastore": "feast.infra.online_stores.datastore.DatastoreOnlineStore", "redis": "feast.infra.online_stores.redis.RedisOnlineStore", "dynamodb": "feast.infra.online_stores.dynamodb.DynamoDBOnlineStore", + "snowflake.online": "feast.infra.online_stores.snowflake.SnowflakeOnlineStore", } OFFLINE_STORE_CLASS_FOR_TYPE = { "file": "feast.infra.offline_stores.file.FileOfflineStore", "bigquery": "feast.infra.offline_stores.bigquery.BigQueryOfflineStore", "redshift": "feast.infra.offline_stores.redshift.RedshiftOfflineStore", + "snowflake.offline": "feast.infra.offline_stores.snowflake.SnowflakeOfflineStore", } FEATURE_SERVER_CONFIG_CLASS_FOR_TYPE = { @@ -152,8 +154,12 @@ def _validate_online_store_config(cls, values): if "online_store" not in values: values["online_store"] = dict() - # Skip if we aren't creating the configuration from a dict + # Skip if we aren't creating the configuration from a dict or online store is null or it is a string like "None" or "null" if not isinstance(values["online_store"], Dict): + if isinstance(values["online_store"], str) and values[ + "online_store" + ].lower() in {"none", "null"}: + values["online_store"] = None return values # Make sure that the provider configuration is set. We need it to set the defaults @@ -302,7 +308,7 @@ def __repr__(self) -> str: def get_data_source_class_from_type(data_source_type: str): module_name, config_class_name = data_source_type.rsplit(".", 1) - return get_class_from_type(module_name, config_class_name, "Source") + return import_class(module_name, config_class_name, "DataSource") def get_online_config_from_type(online_store_type: str): @@ -313,7 +319,7 @@ def get_online_config_from_type(online_store_type: str): module_name, online_store_class_type = online_store_type.rsplit(".", 1) config_class_name = f"{online_store_class_type}Config" - return get_class_from_type(module_name, config_class_name, config_class_name) + return import_class(module_name, config_class_name, config_class_name) def get_offline_config_from_type(offline_store_type: str): @@ -324,7 +330,7 @@ def get_offline_config_from_type(offline_store_type: str): module_name, offline_store_class_type = offline_store_type.rsplit(".", 1) config_class_name = f"{offline_store_class_type}Config" - return get_class_from_type(module_name, config_class_name, config_class_name) + return import_class(module_name, config_class_name, config_class_name) def get_feature_server_config_from_type(feature_server_type: str): @@ -334,7 +340,7 @@ def get_feature_server_config_from_type(feature_server_type: str): feature_server_type = FEATURE_SERVER_CONFIG_CLASS_FOR_TYPE[feature_server_type] module_name, config_class_name = feature_server_type.rsplit(".", 1) - return get_class_from_type(module_name, config_class_name, config_class_name) + return import_class(module_name, config_class_name, config_class_name) def load_repo_config(repo_path: Path) -> RepoConfig: diff --git a/sdk/python/feast/repo_contents.py b/sdk/python/feast/repo_contents.py new file mode 100644 index 00000000000..9190af11eec --- /dev/null +++ b/sdk/python/feast/repo_contents.py @@ -0,0 +1,50 @@ +# Copyright 2022 The Feast Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import NamedTuple, Set + +from feast.entity import Entity +from feast.feature_service import FeatureService +from feast.feature_view import FeatureView +from feast.on_demand_feature_view import OnDemandFeatureView +from feast.protos.feast.core.Registry_pb2 import Registry as RegistryProto +from feast.request_feature_view import RequestFeatureView + + +class RepoContents(NamedTuple): + """ + Represents the objects in a Feast feature repo. + """ + + feature_views: Set[FeatureView] + on_demand_feature_views: Set[OnDemandFeatureView] + request_feature_views: Set[RequestFeatureView] + entities: Set[Entity] + feature_services: Set[FeatureService] + + def to_registry_proto(self) -> RegistryProto: + registry_proto = RegistryProto() + registry_proto.entities.extend([e.to_proto() for e in self.entities]) + registry_proto.feature_views.extend( + [fv.to_proto() for fv in self.feature_views] + ) + registry_proto.on_demand_feature_views.extend( + [fv.to_proto() for fv in self.on_demand_feature_views] + ) + registry_proto.request_feature_views.extend( + [fv.to_proto() for fv in self.request_feature_views] + ) + registry_proto.feature_services.extend( + [fs.to_proto() for fs in self.feature_services] + ) + return registry_proto diff --git a/sdk/python/feast/repo_operations.py b/sdk/python/feast/repo_operations.py index 9299a36123f..8a3a202c6dd 100644 --- a/sdk/python/feast/repo_operations.py +++ b/sdk/python/feast/repo_operations.py @@ -5,22 +5,23 @@ import re import sys from importlib.abc import Loader +from importlib.machinery import ModuleSpec from pathlib import Path -from typing import List, Set, Union, cast +from typing import List, Set, Union import click from click.exceptions import BadParameter -from feast.base_feature_view import BaseFeatureView -from feast.diff.FcoDiff import TransitionType, tag_objects_for_keep_delete_add +from feast.diff.registry_diff import extract_objects_for_keep_delete_update_add from feast.entity import Entity from feast.feature_service import FeatureService -from feast.feature_store import FeatureStore, RepoContents -from feast.feature_view import DUMMY_ENTITY, DUMMY_ENTITY_NAME, FeatureView +from feast.feature_store import FeatureStore +from feast.feature_view import DUMMY_ENTITY, FeatureView from feast.names import adjectives, animals from feast.on_demand_feature_view import OnDemandFeatureView -from feast.registry import Registry +from feast.registry import FEAST_OBJECT_TYPES, FeastObjectType, Registry from feast.repo_config import RepoConfig +from feast.repo_contents import RepoContents from feast.request_feature_view import RequestFeatureView from feast.usage import log_exceptions_and_usage @@ -78,7 +79,11 @@ def get_repo_files(repo_root: Path) -> List[Path]: ignore_files = get_ignore_files(repo_root, ignore_paths) # List all Python files in the root directory (recursively) - repo_files = {p.resolve() for p in repo_root.glob("**/*.py") if p.is_file()} + repo_files = { + p.resolve() + for p in repo_root.glob("**/*.py") + if p.is_file() and "__init__.py" != p.name + } # Ignore all files that match any of the ignore paths in .feastignore repo_files -= ignore_files @@ -127,20 +132,9 @@ def plan(repo_config: RepoConfig, repo_path: Path, skip_source_validation: bool) for data_source in data_sources: data_source.validate(store.config) - diff = store.plan(repo) - views_to_delete = [ - v - for v in diff.fco_diffs - if v.fco_type == "feature view" and v.transition_type == TransitionType.DELETE - ] - views_to_keep = [ - v - for v in diff.fco_diffs - if v.fco_type == "feature view" - and v.transition_type in {TransitionType.CREATE, TransitionType.UNCHANGED} - ] - - log_cli_output(diff, views_to_delete, views_to_keep) + registry_diff, infra_diff, _ = store._plan(repo) + click.echo(registry_diff.to_string()) + click.echo(infra_diff.to_string()) def _prepare_registry_and_repo(repo_config, repo_path): @@ -153,102 +147,64 @@ def _prepare_registry_and_repo(repo_config, repo_path): ) sys.exit(1) registry = store.registry - registry._initialize_registry() sys.dont_write_bytecode = True repo = parse_repo(repo_path) return project, registry, repo, store def extract_objects_for_apply_delete(project, registry, repo): - ( - entities_to_keep, - entities_to_delete, - entities_to_add, - ) = tag_objects_for_keep_delete_add( - set(registry.list_entities(project=project)), repo.entities - ) # TODO(achals): This code path should be refactored to handle added & kept entities separately. - entities_to_keep = set(entities_to_keep).union(entities_to_add) - views = tag_objects_for_keep_delete_add( - set(registry.list_feature_views(project=project)), repo.feature_views - ) - views_to_keep, views_to_delete, views_to_add = ( - cast(Set[FeatureView], views[0]), - cast(Set[FeatureView], views[1]), - cast(Set[FeatureView], views[2]), - ) - request_views = tag_objects_for_keep_delete_add( - set(registry.list_request_feature_views(project=project)), - repo.request_feature_views, - ) - request_views_to_keep: Set[RequestFeatureView] - request_views_to_delete: Set[RequestFeatureView] - request_views_to_add: Set[RequestFeatureView] - request_views_to_keep, request_views_to_delete, request_views_to_add = ( - cast(Set[RequestFeatureView], request_views[0]), - cast(Set[RequestFeatureView], request_views[1]), - cast(Set[RequestFeatureView], request_views[2]), - ) - base_views_to_keep: Set[Union[RequestFeatureView, FeatureView]] = { - *views_to_keep, - *views_to_add, - *request_views_to_keep, - *request_views_to_add, - } - base_views_to_delete: Set[Union[RequestFeatureView, FeatureView]] = { - *views_to_delete, - *request_views_to_delete, - } - odfvs = tag_objects_for_keep_delete_add( - set(registry.list_on_demand_feature_views(project=project)), - repo.on_demand_feature_views, - ) - odfvs_to_keep, odfvs_to_delete, odfvs_to_add = ( - cast(Set[OnDemandFeatureView], odfvs[0]), - cast(Set[OnDemandFeatureView], odfvs[1]), - cast(Set[OnDemandFeatureView], odfvs[2]), - ) - odfvs_to_keep = odfvs_to_keep.union(odfvs_to_add) ( - services_to_keep, - services_to_delete, - services_to_add, - ) = tag_objects_for_keep_delete_add( - set(registry.list_feature_services(project=project)), repo.feature_services - ) - services_to_keep = services_to_keep.union(services_to_add) - sys.dont_write_bytecode = False - # Apply all changes to the registry and infrastructure. + _, + objs_to_delete, + objs_to_update, + objs_to_add, + ) = extract_objects_for_keep_delete_update_add(registry, project, repo) + all_to_apply: List[ - Union[Entity, BaseFeatureView, FeatureService, OnDemandFeatureView] + Union[ + Entity, FeatureView, RequestFeatureView, OnDemandFeatureView, FeatureService + ] ] = [] - all_to_apply.extend(entities_to_keep) - all_to_apply.extend(base_views_to_keep) - all_to_apply.extend(services_to_keep) - all_to_apply.extend(odfvs_to_keep) + for object_type in FEAST_OBJECT_TYPES: + to_apply = set(objs_to_add[object_type]).union(objs_to_update[object_type]) + all_to_apply.extend(to_apply) + all_to_delete: List[ - Union[Entity, BaseFeatureView, FeatureService, OnDemandFeatureView] + Union[ + Entity, FeatureView, RequestFeatureView, OnDemandFeatureView, FeatureService + ] ] = [] - all_to_delete.extend(entities_to_delete) - all_to_delete.extend(base_views_to_delete) - all_to_delete.extend(services_to_delete) - all_to_delete.extend(odfvs_to_delete) + for object_type in FEAST_OBJECT_TYPES: + all_to_delete.extend(objs_to_delete[object_type]) - return all_to_apply, all_to_delete, views_to_delete, views_to_keep - - -@log_exceptions_and_usage -def apply_total(repo_config: RepoConfig, repo_path: Path, skip_source_validation: bool): + return ( + all_to_apply, + all_to_delete, + set( + objs_to_add[FeastObjectType.FEATURE_VIEW].union( + objs_to_update[FeastObjectType.FEATURE_VIEW] + ) + ), + objs_to_delete[FeastObjectType.FEATURE_VIEW], + ) - os.chdir(repo_path) - project, registry, repo, store = _prepare_registry_and_repo(repo_config, repo_path) +def apply_total_with_repo_instance( + store: FeatureStore, + project: str, + registry: Registry, + repo: RepoContents, + skip_source_validation: bool, +): if not skip_source_validation: data_sources = [t.batch_source for t in repo.feature_views] # Make sure the data source used by this feature view is supported by Feast for data_source in data_sources: data_source.validate(store.config) + registry_diff, infra_diff, new_infra = store._plan(repo) + # For each object in the registry, determine whether it should be kept or deleted. ( all_to_apply, @@ -257,49 +213,41 @@ def apply_total(repo_config: RepoConfig, repo_path: Path, skip_source_validation views_to_keep, ) = extract_objects_for_apply_delete(project, registry, repo) - diff = store.apply(all_to_apply, objects_to_delete=all_to_delete, partial=False) + click.echo(registry_diff.to_string()) - log_cli_output(diff, views_to_delete, views_to_keep) + if store._should_use_plan(): + store._apply_diffs(registry_diff, infra_diff, new_infra) + click.echo(infra_diff.to_string()) + else: + store.apply(all_to_apply, objects_to_delete=all_to_delete, partial=False) + log_infra_changes(views_to_keep, views_to_delete) -def log_cli_output(diff, views_to_delete, views_to_keep): +def log_infra_changes( + views_to_keep: List[FeatureView], views_to_delete: List[FeatureView] +): from colorama import Fore, Style - message_action_map = { - TransitionType.CREATE: ("Created", Fore.GREEN), - TransitionType.DELETE: ("Deleted", Fore.RED), - TransitionType.UNCHANGED: ("Unchanged", Fore.LIGHTBLUE_EX), - TransitionType.UPDATE: ("Updated", Fore.YELLOW), - } - for fco_diff in diff.fco_diffs: - if fco_diff.name == DUMMY_ENTITY_NAME: - continue - action, color = message_action_map[fco_diff.transition_type] - click.echo( - f"{action} {fco_diff.fco_type} {Style.BRIGHT + color}{fco_diff.name}{Style.RESET_ALL}" - ) - if fco_diff.transition_type == TransitionType.UPDATE: - for _p in fco_diff.fco_property_diffs: - click.echo( - f"\t{_p.property_name}: {Style.BRIGHT + color}{_p.val_existing}{Style.RESET_ALL} -> {Style.BRIGHT + Fore.LIGHTGREEN_EX}{_p.val_declared}{Style.RESET_ALL}" - ) - - views_to_keep_in_infra = [ - view for view in views_to_keep if isinstance(view, FeatureView) - ] - for name in [view.name for view in views_to_keep_in_infra]: + for view in views_to_keep: click.echo( - f"Deploying infrastructure for {Style.BRIGHT + Fore.GREEN}{name}{Style.RESET_ALL}" + f"Deploying infrastructure for {Style.BRIGHT + Fore.GREEN}{view.name}{Style.RESET_ALL}" ) - views_to_delete_from_infra = [ - view for view in views_to_delete if isinstance(view, FeatureView) - ] - for name in [view.name for view in views_to_delete_from_infra]: + for view in views_to_delete: click.echo( - f"Removing infrastructure for {Style.BRIGHT + Fore.RED}{name}{Style.RESET_ALL}" + f"Removing infrastructure for {Style.BRIGHT + Fore.RED}{view.name}{Style.RESET_ALL}" ) +@log_exceptions_and_usage +def apply_total(repo_config: RepoConfig, repo_path: Path, skip_source_validation: bool): + + os.chdir(repo_path) + project, registry, repo, store = _prepare_registry_and_repo(repo_config, repo_path) + apply_total_with_repo_instance( + store, project, registry, repo, skip_source_validation + ) + + @log_exceptions_and_usage def teardown(repo_config: RepoConfig, repo_path: Path): # Cannot pass in both repo_path and repo_config to FeatureStore. @@ -375,6 +323,7 @@ def init_repo(repo_name: str, template: str): import importlib.util spec = importlib.util.spec_from_file_location("bootstrap", str(bootstrap_path)) + assert isinstance(spec, ModuleSpec) bootstrap = importlib.util.module_from_spec(spec) assert isinstance(spec.loader, Loader) spec.loader.exec_module(bootstrap) diff --git a/sdk/python/feast/saved_dataset.py b/sdk/python/feast/saved_dataset.py new file mode 100644 index 00000000000..75b6d2c199f --- /dev/null +++ b/sdk/python/feast/saved_dataset.py @@ -0,0 +1,209 @@ +from abc import abstractmethod +from datetime import datetime +from typing import TYPE_CHECKING, Dict, List, Optional, Type, cast + +import pandas as pd +import pyarrow +from google.protobuf.json_format import MessageToJson + +from feast.data_source import DataSource +from feast.dqm.profilers.profiler import Profile, Profiler +from feast.protos.feast.core.SavedDataset_pb2 import SavedDataset as SavedDatasetProto +from feast.protos.feast.core.SavedDataset_pb2 import SavedDatasetMeta, SavedDatasetSpec +from feast.protos.feast.core.SavedDataset_pb2 import ( + SavedDatasetStorage as SavedDatasetStorageProto, +) + +if TYPE_CHECKING: + from feast.infra.offline_stores.offline_store import RetrievalJob + + +class _StorageRegistry(type): + classes_by_proto_attr_name: Dict[str, Type["SavedDatasetStorage"]] = {} + + def __new__(cls, name, bases, dct): + kls = type.__new__(cls, name, bases, dct) + if dct.get("_proto_attr_name"): + cls.classes_by_proto_attr_name[dct["_proto_attr_name"]] = kls + return kls + + +class SavedDatasetStorage(metaclass=_StorageRegistry): + _proto_attr_name: str + + @staticmethod + def from_proto(storage_proto: SavedDatasetStorageProto) -> "SavedDatasetStorage": + proto_attr_name = cast(str, storage_proto.WhichOneof("kind")) + return _StorageRegistry.classes_by_proto_attr_name[proto_attr_name].from_proto( + storage_proto + ) + + @abstractmethod + def to_proto(self) -> SavedDatasetStorageProto: + ... + + @abstractmethod + def to_data_source(self) -> DataSource: + ... + + +class SavedDataset: + name: str + features: List[str] + join_keys: List[str] + full_feature_names: bool + storage: SavedDatasetStorage + tags: Dict[str, str] + + created_timestamp: Optional[datetime] = None + last_updated_timestamp: Optional[datetime] = None + + min_event_timestamp: Optional[datetime] = None + max_event_timestamp: Optional[datetime] = None + + _retrieval_job: Optional["RetrievalJob"] = None + + def __init__( + self, + name: str, + features: List[str], + join_keys: List[str], + storage: SavedDatasetStorage, + full_feature_names: bool = False, + tags: Optional[Dict[str, str]] = None, + ): + self.name = name + self.features = features + self.join_keys = join_keys + self.storage = storage + self.full_feature_names = full_feature_names + self.tags = tags or {} + + self._retrieval_job = None + + def __repr__(self): + items = (f"{k} = {v}" for k, v in self.__dict__.items()) + return f"<{self.__class__.__name__}({', '.join(items)})>" + + def __str__(self): + return str(MessageToJson(self.to_proto())) + + def __hash__(self): + return hash((id(self), self.name)) + + def __eq__(self, other): + if not isinstance(other, SavedDataset): + raise TypeError( + "Comparisons should only involve FeatureService class objects." + ) + if self.name != other.name: + return False + + if sorted(self.features) != sorted(other.features): + return False + + return True + + @staticmethod + def from_proto(saved_dataset_proto: SavedDatasetProto): + """ + Converts a SavedDatasetProto to a SavedDataset object. + + Args: + saved_dataset_proto: A protobuf representation of a SavedDataset. + """ + ds = SavedDataset( + name=saved_dataset_proto.spec.name, + features=list(saved_dataset_proto.spec.features), + join_keys=list(saved_dataset_proto.spec.join_keys), + full_feature_names=saved_dataset_proto.spec.full_feature_names, + storage=SavedDatasetStorage.from_proto(saved_dataset_proto.spec.storage), + tags=dict(saved_dataset_proto.spec.tags.items()), + ) + + if saved_dataset_proto.meta.HasField("created_timestamp"): + ds.created_timestamp = ( + saved_dataset_proto.meta.created_timestamp.ToDatetime() + ) + if saved_dataset_proto.meta.HasField("last_updated_timestamp"): + ds.last_updated_timestamp = ( + saved_dataset_proto.meta.last_updated_timestamp.ToDatetime() + ) + if saved_dataset_proto.meta.HasField("min_event_timestamp"): + ds.min_event_timestamp = ( + saved_dataset_proto.meta.min_event_timestamp.ToDatetime() + ) + if saved_dataset_proto.meta.HasField("max_event_timestamp"): + ds.max_event_timestamp = ( + saved_dataset_proto.meta.max_event_timestamp.ToDatetime() + ) + + return ds + + def to_proto(self) -> SavedDatasetProto: + """ + Converts a SavedDataset to its protobuf representation. + + Returns: + A SavedDatasetProto protobuf. + """ + meta = SavedDatasetMeta() + if self.created_timestamp: + meta.created_timestamp.FromDatetime(self.created_timestamp) + if self.min_event_timestamp: + meta.min_event_timestamp.FromDatetime(self.min_event_timestamp) + if self.max_event_timestamp: + meta.max_event_timestamp.FromDatetime(self.max_event_timestamp) + + spec = SavedDatasetSpec( + name=self.name, + features=self.features, + join_keys=self.join_keys, + full_feature_names=self.full_feature_names, + storage=self.storage.to_proto(), + tags=self.tags, + ) + + feature_service_proto = SavedDatasetProto(spec=spec, meta=meta) + return feature_service_proto + + def with_retrieval_job(self, retrieval_job: "RetrievalJob") -> "SavedDataset": + self._retrieval_job = retrieval_job + return self + + def to_df(self) -> pd.DataFrame: + if not self._retrieval_job: + raise RuntimeError( + "To load this dataset use FeatureStore.get_saved_dataset() " + "instead of instantiating it directly." + ) + + return self._retrieval_job.to_df() + + def to_arrow(self) -> pyarrow.Table: + if not self._retrieval_job: + raise RuntimeError( + "To load this dataset use FeatureStore.get_saved_dataset() " + "instead of instantiating it directly." + ) + + return self._retrieval_job.to_arrow() + + def as_reference(self, profiler: "Profiler") -> "ValidationReference": + return ValidationReference(profiler=profiler, dataset=self) + + def get_profile(self, profiler: Profiler) -> Profile: + return profiler.analyze_dataset(self.to_df()) + + +class ValidationReference: + dataset: SavedDataset + profiler: Profiler + + def __init__(self, dataset: SavedDataset, profiler: Profiler): + self.dataset = dataset + self.profiler = profiler + + @property + def profile(self) -> Profile: + return self.profiler.analyze_dataset(self.dataset.to_df()) diff --git a/sdk/python/feast/templates/aws/__init__.py b/sdk/python/feast/templates/aws/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/sdk/python/feast/templates/gcp/__init__.py b/sdk/python/feast/templates/gcp/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/sdk/python/feast/templates/local/__init__.py b/sdk/python/feast/templates/local/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/sdk/python/feast/templates/snowflake/bootstrap.py b/sdk/python/feast/templates/snowflake/bootstrap.py new file mode 100644 index 00000000000..3712651a5d9 --- /dev/null +++ b/sdk/python/feast/templates/snowflake/bootstrap.py @@ -0,0 +1,91 @@ +import click +import snowflake.connector + +from feast.infra.utils.snowflake_utils import write_pandas + + +def bootstrap(): + # Bootstrap() will automatically be called from the init_repo() during `feast init` + + import pathlib + from datetime import datetime, timedelta + + from feast.driver_test_data import create_driver_hourly_stats_df + + repo_path = pathlib.Path(__file__).parent.absolute() + config_file = repo_path / "feature_store.yaml" + + project_name = str(repo_path)[str(repo_path).rfind("/") + 1 :] + + end_date = datetime.now().replace(microsecond=0, second=0, minute=0) + start_date = end_date - timedelta(days=15) + + driver_entities = [1001, 1002, 1003, 1004, 1005] + driver_df = create_driver_hourly_stats_df(driver_entities, start_date, end_date) + + repo_path = pathlib.Path(__file__).parent.absolute() + data_path = repo_path / "data" + data_path.mkdir(exist_ok=True) + driver_stats_path = data_path / "driver_stats.parquet" + driver_df.to_parquet(path=str(driver_stats_path), allow_truncated_timestamps=True) + + snowflake_deployment_url = click.prompt( + "Snowflake Deployment URL (exclude .snowflakecomputing.com):" + ) + snowflake_user = click.prompt("Snowflake User Name:") + snowflake_password = click.prompt("Snowflake Password:", hide_input=True) + snowflake_role = click.prompt("Snowflake Role Name (Case Sensitive):") + snowflake_warehouse = click.prompt("Snowflake Warehouse Name (Case Sensitive):") + snowflake_database = click.prompt("Snowflake Database Name (Case Sensitive):") + + if click.confirm( + f'Should I upload example data to Snowflake (overwriting "{project_name}_feast_driver_hourly_stats" table)?', + default=True, + ): + + conn = snowflake.connector.connect( + account=snowflake_deployment_url, + user=snowflake_user, + password=snowflake_password, + role=snowflake_role, + warehouse=snowflake_warehouse, + application="feast", + ) + + cur = conn.cursor() + cur.execute(f'CREATE DATABASE IF NOT EXISTS "{snowflake_database}"') + cur.execute(f'USE DATABASE "{snowflake_database}"') + cur.execute('CREATE SCHEMA IF NOT EXISTS "PUBLIC"') + cur.execute('USE SCHEMA "PUBLIC"') + cur.execute(f'DROP TABLE IF EXISTS "{project_name}_feast_driver_hourly_stats"') + write_pandas( + conn, + driver_df, + f"{project_name}_feast_driver_hourly_stats", + auto_create_table=True, + ) + conn.close() + + repo_path = pathlib.Path(__file__).parent.absolute() + config_file = repo_path / "feature_store.yaml" + + replace_str_in_file( + config_file, "SNOWFLAKE_DEPLOYMENT_URL", snowflake_deployment_url + ) + replace_str_in_file(config_file, "SNOWFLAKE_USER", snowflake_user) + replace_str_in_file(config_file, "SNOWFLAKE_PASSWORD", snowflake_password) + replace_str_in_file(config_file, "SNOWFLAKE_ROLE", snowflake_role) + replace_str_in_file(config_file, "SNOWFLAKE_WAREHOUSE", snowflake_warehouse) + replace_str_in_file(config_file, "SNOWFLAKE_DATABASE", snowflake_database) + + +def replace_str_in_file(file_path, match_str, sub_str): + with open(file_path, "r") as f: + contents = f.read() + contents = contents.replace(match_str, sub_str) + with open(file_path, "wt") as f: + f.write(contents) + + +if __name__ == "__main__": + bootstrap() diff --git a/sdk/python/feast/templates/snowflake/driver_repo.py b/sdk/python/feast/templates/snowflake/driver_repo.py new file mode 100644 index 00000000000..a63c6cb5030 --- /dev/null +++ b/sdk/python/feast/templates/snowflake/driver_repo.py @@ -0,0 +1,64 @@ +from datetime import timedelta + +import yaml + +from feast import Entity, Feature, FeatureView, SnowflakeSource, ValueType + +# Define an entity for the driver. Entities can be thought of as primary keys used to +# retrieve features. Entities are also used to join multiple tables/views during the +# construction of feature vectors +driver = Entity( + # Name of the entity. Must be unique within a project + name="driver_id", + # The join key of an entity describes the storage level field/column on which + # features can be looked up. The join key is also used to join feature + # tables/views when building feature vectors + join_key="driver_id", +) + +# Indicates a data source from which feature values can be retrieved. Sources are queried when building training +# datasets or materializing features into an online store. +project_name = yaml.safe_load(open("feature_store.yaml"))["project"] + +driver_stats_source = SnowflakeSource( + # The Snowflake table where features can be found + database=yaml.safe_load(open("feature_store.yaml"))["offline_store"]["database"], + table=f"{project_name}_feast_driver_hourly_stats", + # The event timestamp is used for point-in-time joins and for ensuring only + # features within the TTL are returned + event_timestamp_column="event_timestamp", + # The (optional) created timestamp is used to ensure there are no duplicate + # feature rows in the offline store or when building training datasets + created_timestamp_column="created", +) + +# Feature views are a grouping based on how features are stored in either the +# online or offline store. +driver_stats_fv = FeatureView( + # The unique name of this feature view. Two feature views in a single + # project cannot have the same name + name="driver_hourly_stats", + # The list of entities specifies the keys required for joining or looking + # up features from this feature view. The reference provided in this field + # correspond to the name of a defined entity (or entities) + entities=["driver_id"], + # The timedelta is the maximum age that each feature value may have + # relative to its lookup time. For historical features (used in training), + # TTL is relative to each timestamp provided in the entity dataframe. + # TTL also allows for eviction of keys from online stores and limits the + # amount of historical scanning required for historical feature values + # during retrieval + ttl=timedelta(weeks=52), + # The list of features defined below act as a schema to both define features + # for both materialization of features into a store, and are used as references + # during retrieval for building a training dataset or serving features + features=[ + Feature(name="conv_rate", dtype=ValueType.FLOAT), + Feature(name="acc_rate", dtype=ValueType.FLOAT), + Feature(name="avg_daily_trips", dtype=ValueType.INT64), + ], + # Batch sources are used to find feature values. In the case of this feature + # view we will query a source table on Redshift for driver statistics + # features + batch_source=driver_stats_source, +) diff --git a/sdk/python/feast/templates/snowflake/feature_store.yaml b/sdk/python/feast/templates/snowflake/feature_store.yaml new file mode 100644 index 00000000000..9757ea2ead0 --- /dev/null +++ b/sdk/python/feast/templates/snowflake/feature_store.yaml @@ -0,0 +1,11 @@ +project: my_project +registry: registry.db +provider: local +offline_store: + type: snowflake.offline + account: SNOWFLAKE_DEPLOYMENT_URL + user: SNOWFLAKE_USER + password: SNOWFLAKE_PASSWORD + role: SNOWFLAKE_ROLE + warehouse: SNOWFLAKE_WAREHOUSE + database: SNOWFLAKE_DATABASE diff --git a/sdk/python/feast/templates/snowflake/test.py b/sdk/python/feast/templates/snowflake/test.py new file mode 100644 index 00000000000..32aa6380d51 --- /dev/null +++ b/sdk/python/feast/templates/snowflake/test.py @@ -0,0 +1,65 @@ +from datetime import datetime, timedelta + +import pandas as pd +from driver_repo import driver, driver_stats_fv + +from feast import FeatureStore + + +def main(): + pd.set_option("display.max_columns", None) + pd.set_option("display.width", 1000) + + # Load the feature store from the current path + fs = FeatureStore(repo_path=".") + + # Deploy the feature store to Snowflake + print("Deploying feature store to Snowflake...") + fs.apply([driver, driver_stats_fv]) + + # Select features + features = ["driver_hourly_stats:conv_rate", "driver_hourly_stats:acc_rate"] + + # Create an entity dataframe. This is the dataframe that will be enriched with historical features + entity_df = pd.DataFrame( + { + "event_timestamp": [ + pd.Timestamp(dt, unit="ms", tz="UTC").round("ms") + for dt in pd.date_range( + start=datetime.now() - timedelta(days=3), + end=datetime.now(), + periods=3, + ) + ], + "driver_id": [1001, 1002, 1003], + } + ) + + print("Retrieving training data...") + + # Retrieve historical features by joining the entity dataframe to the Snowflake table source + training_df = fs.get_historical_features( + features=features, entity_df=entity_df + ).to_df() + + print() + print(training_df) + + print() + print("Loading features into the online store...") + fs.materialize_incremental(end_date=datetime.now()) + + print() + print("Retrieving online features...") + + # Retrieve features from the online store + online_features = fs.get_online_features( + features=features, entity_rows=[{"driver_id": 1001}, {"driver_id": 1002}], + ).to_dict() + + print() + print(pd.DataFrame.from_dict(online_features)) + + +if __name__ == "__main__": + main() diff --git a/sdk/python/feast/type_map.py b/sdk/python/feast/type_map.py index 969ca658625..15ddd147391 100644 --- a/sdk/python/feast/type_map.py +++ b/sdk/python/feast/type_map.py @@ -12,14 +12,24 @@ # See the License for the specific language governing permissions and # limitations under the License. -import re -from datetime import datetime -from typing import Any, Dict, List, Optional, Set, Sized, Tuple, Type +from datetime import datetime, timezone +from typing import ( + Any, + Dict, + List, + Optional, + Sequence, + Set, + Sized, + Tuple, + Type, + Union, + cast, +) import numpy as np import pandas as pd import pyarrow -from google.protobuf.pyext.cpp_message import GeneratedProtocolMessageType from google.protobuf.timestamp_pb2 import Timestamp from feast.protos.feast.types.Value_pb2 import ( @@ -32,7 +42,7 @@ StringList, ) from feast.protos.feast.types.Value_pb2 import Value as ProtoValue -from feast.value_type import ValueType +from feast.value_type import ListType, ValueType def feast_value_type_to_python_type(field_value_proto: ProtoValue) -> Any: @@ -50,8 +60,17 @@ def feast_value_type_to_python_type(field_value_proto: ProtoValue) -> Any: if val_attr is None: return None val = getattr(field_value_proto, val_attr) + + # If it's a _LIST type extract the list. if hasattr(val, "val"): val = list(val.val) + + # Convert UNIX_TIMESTAMP values to `datetime` + if val_attr == "unix_timestamp_list_val": + val = [datetime.fromtimestamp(v, tz=timezone.utc) for v in val] + elif val_attr == "unix_timestamp_val": + val = datetime.fromtimestamp(val, tz=timezone.utc) + return val @@ -97,6 +116,7 @@ def python_type_to_feast_value_type( type_map = { "int": ValueType.INT64, "str": ValueType.STRING, + "string": ValueType.STRING, # pandas.StringDtype "float": ValueType.DOUBLE, "bytes": ValueType.BYTES, "float64": ValueType.DOUBLE, @@ -105,6 +125,8 @@ def python_type_to_feast_value_type( "uint64": ValueType.INT64, "int32": ValueType.INT32, "uint32": ValueType.INT32, + "int16": ValueType.INT32, + "uint16": ValueType.INT32, "uint8": ValueType.INT32, "int8": ValueType.INT32, "bool": ValueType.BOOL, @@ -119,48 +141,50 @@ def python_type_to_feast_value_type( if type_name in type_map: return type_map[type_name] - if type_name == "ndarray" or isinstance(value, list): - if recurse: - - # Convert to list type - list_items = pd.core.series.Series(value) - - # This is the final type which we infer from the list - common_item_value_type = None - for item in list_items: - if isinstance(item, ProtoValue): - current_item_value_type: ValueType = _proto_value_to_value_type( - item - ) - else: - # Get the type from the current item, only one level deep - current_item_value_type = python_type_to_feast_value_type( - name=name, value=item, recurse=False - ) - # Validate whether the type stays consistent - if ( - common_item_value_type - and not common_item_value_type == current_item_value_type - ): - raise ValueError( - f"List value type for field {name} is inconsistent. " - f"{common_item_value_type} different from " - f"{current_item_value_type}." - ) - common_item_value_type = current_item_value_type - if common_item_value_type is None: - return ValueType.UNKNOWN - return ValueType[common_item_value_type.name + "_LIST"] - else: - assert value + if isinstance(value, np.ndarray) and str(value.dtype) in type_map: + item_type = type_map[str(value.dtype)] + return ValueType[item_type.name + "_LIST"] + + if isinstance(value, (list, np.ndarray)): + # if the value's type is "ndarray" and we couldn't infer from "value.dtype" + # this is most probably array of "object", + # so we need to iterate over objects and try to infer type of each item + if not recurse: raise ValueError( - f"Value type for field {name} is {value.dtype.__str__()} but " + f"Value type for field {name} is {type(value)} but " f"recursion is not allowed. Array types can only be one level " f"deep." ) - assert value - return type_map[value.dtype.__str__()] + # This is the final type which we infer from the list + common_item_value_type = None + for item in value: + if isinstance(item, ProtoValue): + current_item_value_type: ValueType = _proto_value_to_value_type(item) + else: + # Get the type from the current item, only one level deep + current_item_value_type = python_type_to_feast_value_type( + name=name, value=item, recurse=False + ) + # Validate whether the type stays consistent + if ( + common_item_value_type + and not common_item_value_type == current_item_value_type + ): + raise ValueError( + f"List value type for field {name} is inconsistent. " + f"{common_item_value_type} different from " + f"{current_item_value_type}." + ) + common_item_value_type = current_item_value_type + if common_item_value_type is None: + return ValueType.UNKNOWN + return ValueType[common_item_value_type.name + "_LIST"] + + raise ValueError( + f"Value with native type {type_name} " + f"cannot be converted into Feast value type" + ) def python_values_to_feast_value_type( @@ -195,7 +219,7 @@ def _type_err(item, dtype): PYTHON_LIST_VALUE_TYPE_TO_PROTO_VALUE: Dict[ - ValueType, Tuple[GeneratedProtocolMessageType, str, List[Type]] + ValueType, Tuple[ListType, str, List[Type]] ] = { ValueType.FLOAT_LIST: ( FloatList, @@ -212,7 +236,7 @@ def _type_err(item, dtype): ValueType.UNIX_TIMESTAMP_LIST: ( Int64List, "int64_list_val", - [np.int64, np.int32, int], + [np.datetime64, np.int64, np.int32, int, datetime, Timestamp], ), ValueType.STRING_LIST: (StringList, "string_list_val", [np.str_, str]), ValueType.BOOL_LIST: (BoolList, "bool_list_val", [np.bool_, bool]), @@ -238,6 +262,28 @@ def _type_err(item, dtype): } +def _python_datetime_to_int_timestamp( + values: Sequence[Any], +) -> Sequence[Union[int, np.int_]]: + # Fast path for Numpy array. + if isinstance(values, np.ndarray) and isinstance(values.dtype, np.datetime64): + if values.ndim != 1: + raise ValueError("Only 1 dimensional arrays are supported.") + return cast(Sequence[np.int_], values.astype("datetime64[s]").astype(np.int_)) + + int_timestamps = [] + for value in values: + if isinstance(value, datetime): + int_timestamps.append(int(value.timestamp())) + elif isinstance(value, Timestamp): + int_timestamps.append(int(value.ToSeconds())) + elif isinstance(value, np.datetime64): + int_timestamps.append(value.astype("datetime64[s]").astype(np.int_)) + else: + int_timestamps.append(int(value)) + return int_timestamps + + def _python_value_to_proto_value( feast_value_type: ValueType, values: List[Any] ) -> List[ProtoValue]: @@ -272,8 +318,18 @@ def _python_value_to_proto_value( ) raise _type_err(first_invalid, valid_types[0]) + if feast_value_type == ValueType.UNIX_TIMESTAMP_LIST: + int_timestamps_lists = ( + _python_datetime_to_int_timestamp(value) for value in values + ) + return [ + # ProtoValue does actually accept `np.int_` but the typing complains. + ProtoValue(unix_timestamp_list_val=Int64List(val=ts)) # type: ignore + for ts in int_timestamps_lists + ] + return [ - ProtoValue(**{field_name: proto_type(val=value)}) + ProtoValue(**{field_name: proto_type(val=value)}) # type: ignore if value is not None else ProtoValue() for value in values @@ -282,15 +338,9 @@ def _python_value_to_proto_value( # Handle scalar types below else: if feast_value_type == ValueType.UNIX_TIMESTAMP: - if isinstance(sample, datetime): - return [ - ProtoValue(int64_val=int(value.timestamp())) for value in values - ] - elif isinstance(sample, Timestamp): - return [ - ProtoValue(int64_val=int(value.ToSeconds())) for value in values - ] - return [ProtoValue(int64_val=int(value)) for value in values] + int_timestamps = _python_datetime_to_int_timestamp(values) + # ProtoValue does actually accept `np.int_` but the typing complains. + return [ProtoValue(unix_timestamp_val=ts) for ts in int_timestamps] # type: ignore if feast_value_type in PYTHON_SCALAR_VALUE_TYPE_TO_PROTO_VALUE: ( @@ -365,30 +415,38 @@ def _proto_value_to_value_type(proto_value: ProtoValue) -> ValueType: def pa_to_feast_value_type(pa_type_as_str: str) -> ValueType: - if re.match(r"^timestamp", pa_type_as_str): - return ValueType.INT64 + is_list = False + if pa_type_as_str.startswith("list", "") - type_map = { - "int32": ValueType.INT32, - "int64": ValueType.INT64, - "double": ValueType.DOUBLE, - "float": ValueType.FLOAT, - "string": ValueType.STRING, - "binary": ValueType.BYTES, - "bool": ValueType.BOOL, - "list": ValueType.INT32_LIST, - "list": ValueType.INT64_LIST, - "list": ValueType.DOUBLE_LIST, - "list": ValueType.FLOAT_LIST, - "list": ValueType.STRING_LIST, - "list": ValueType.BYTES_LIST, - "list": ValueType.BOOL_LIST, - "null": ValueType.NULL, - } - return type_map[pa_type_as_str] + if pa_type_as_str.startswith("timestamp"): + value_type = ValueType.UNIX_TIMESTAMP + else: + type_map = { + "int32": ValueType.INT32, + "int64": ValueType.INT64, + "double": ValueType.DOUBLE, + "float": ValueType.FLOAT, + "string": ValueType.STRING, + "binary": ValueType.BYTES, + "bool": ValueType.BOOL, + "null": ValueType.NULL, + } + value_type = type_map[pa_type_as_str] + + if is_list: + value_type = ValueType[value_type.name + "_LIST"] + + return value_type def bq_to_feast_value_type(bq_type_as_str: str) -> ValueType: + is_list = False + if bq_type_as_str.startswith("ARRAY<"): + is_list = True + bq_type_as_str = bq_type_as_str[6:-1] + type_map: Dict[str, ValueType] = { "DATETIME": ValueType.UNIX_TIMESTAMP, "TIMESTAMP": ValueType.UNIX_TIMESTAMP, @@ -400,15 +458,14 @@ def bq_to_feast_value_type(bq_type_as_str: str) -> ValueType: "BYTES": ValueType.BYTES, "BOOL": ValueType.BOOL, "BOOLEAN": ValueType.BOOL, # legacy sql data type - "ARRAY": ValueType.INT64_LIST, - "ARRAY": ValueType.DOUBLE_LIST, - "ARRAY": ValueType.STRING_LIST, - "ARRAY": ValueType.BYTES_LIST, - "ARRAY": ValueType.BOOL_LIST, "NULL": ValueType.NULL, } - return type_map[bq_type_as_str] + value_type = type_map[bq_type_as_str] + if is_list: + value_type = ValueType[value_type.name + "_LIST"] + + return value_type def redshift_to_feast_value_type(redshift_type_as_str: str) -> ValueType: @@ -431,6 +488,28 @@ def redshift_to_feast_value_type(redshift_type_as_str: str) -> ValueType: return type_map[redshift_type_as_str.lower()] +def snowflake_python_type_to_feast_value_type( + snowflake_python_type_as_str: str, +) -> ValueType: + + type_map = { + "str": ValueType.STRING, + "float64": ValueType.DOUBLE, + "int64": ValueType.INT64, + "uint64": ValueType.INT64, + "int32": ValueType.INT32, + "uint32": ValueType.INT32, + "int16": ValueType.INT32, + "uint16": ValueType.INT32, + "uint8": ValueType.INT32, + "int8": ValueType.INT32, + "datetime64[ns]": ValueType.UNIX_TIMESTAMP, + "object": ValueType.UNKNOWN, + } + + return type_map[snowflake_python_type_as_str.lower()] + + def pa_to_redshift_value_type(pa_type: pyarrow.DataType) -> str: # PyArrow types: https://arrow.apache.org/docs/python/api/datatypes.html # Redshift type: https://docs.aws.amazon.com/redshift/latest/dg/c_Supported_data_types.html diff --git a/sdk/python/feast/usage.py b/sdk/python/feast/usage.py index 1a2bf2e2907..6a6a7146ce7 100644 --- a/sdk/python/feast/usage.py +++ b/sdk/python/feast/usage.py @@ -29,7 +29,7 @@ import requests -from feast.constants import FEAST_USAGE +from feast.constants import DEFAULT_FEAST_USAGE_VALUE, FEAST_USAGE from feast.version import get_version USAGE_ENDPOINT = "https://usage.feast.dev" @@ -37,7 +37,7 @@ _logger = logging.getLogger(__name__) _executor = concurrent.futures.ThreadPoolExecutor(max_workers=1) -_is_enabled = os.getenv(FEAST_USAGE, default="True") == "True" +_is_enabled = os.getenv(FEAST_USAGE, default=DEFAULT_FEAST_USAGE_VALUE) == "True" _constant_attributes = { "session_id": str(uuid.uuid4()), diff --git a/sdk/python/feast/value_type.py b/sdk/python/feast/value_type.py index 3d1817421a2..1904baf7bbb 100644 --- a/sdk/python/feast/value_type.py +++ b/sdk/python/feast/value_type.py @@ -12,6 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. import enum +from typing import Type, Union + +from feast.protos.feast.types.Value_pb2 import ( + BoolList, + BytesList, + DoubleList, + FloatList, + Int32List, + Int64List, + StringList, +) class ValueType(enum.Enum): @@ -37,3 +48,14 @@ class ValueType(enum.Enum): BOOL_LIST = 17 UNIX_TIMESTAMP_LIST = 18 NULL = 19 + + +ListType = Union[ + Type[BoolList], + Type[BytesList], + Type[DoubleList], + Type[FloatList], + Type[Int32List], + Type[Int64List], + Type[StringList], +] diff --git a/sdk/python/requirements/py3.7-ci-requirements.txt b/sdk/python/requirements/py3.7-ci-requirements.txt index 017652873a8..d5f654e515c 100644 --- a/sdk/python/requirements/py3.7-ci-requirements.txt +++ b/sdk/python/requirements/py3.7-ci-requirements.txt @@ -4,7 +4,7 @@ # # pip-compile --extra=ci --output-file=requirements/py3.7-ci-requirements.txt # -absl-py==0.12.0 +absl-py==1.0.0 # via tensorflow-metadata adal==1.2.7 # via @@ -20,19 +20,33 @@ aiosignal==1.2.0 # via aiohttp alabaster==0.7.12 # via sphinx -anyio==3.4.0 +altair==4.2.0 + # via great-expectations +anyio==3.5.0 # via starlette appdirs==1.4.4 # via black -asgiref==3.4.1 +appnope==0.1.2 + # via + # ipykernel + # ipython +argon2-cffi==21.3.0 + # via notebook +argon2-cffi-bindings==21.2.0 + # via argon2-cffi +asgiref==3.5.0 # via uvicorn +asn1crypto==1.4.0 + # via + # oscrypto + # snowflake-connector-python assertpy==1.1 # via feast (setup.py) -async-timeout==4.0.1 +async-timeout==4.0.2 # via aiohttp asynctest==0.13.0 # via aiohttp -attrs==21.2.0 +attrs==21.4.0 # via # aiohttp # black @@ -40,7 +54,7 @@ attrs==21.2.0 # pytest avro==1.10.0 # via feast (setup.py) -azure-core==1.21.0 +azure-core==1.21.1 # via # adlfs # azure-identity @@ -53,15 +67,21 @@ azure-storage-blob==12.9.0 # via adlfs babel==2.9.1 # via sphinx -backports.entry-points-selectable==1.1.1 - # via virtualenv +backcall==0.2.0 + # via ipython +backports.zoneinfo==0.2.1 + # via + # pytz-deprecation-shim + # tzlocal black==19.10b0 # via feast (setup.py) -boto3==1.17.112 +bleach==4.1.0 + # via nbconvert +boto3==1.20.46 # via # feast (setup.py) # moto -botocore==1.20.112 +botocore==1.23.46 # via # boto3 # moto @@ -75,25 +95,30 @@ certifi==2021.10.8 # minio # msrest # requests + # snowflake-connector-python cffi==1.15.0 # via + # argon2-cffi-bindings # azure-datalake-store # cryptography + # snowflake-connector-python cfgv==3.3.1 # via pre-commit -charset-normalizer==2.0.8 +charset-normalizer==2.0.11 # via # aiohttp # requests -click==7.1.2 + # snowflake-connector-python +click==8.0.3 # via # black # feast (setup.py) + # great-expectations # pip-tools # uvicorn colorama==0.4.4 # via feast (setup.py) -coverage[toml]==6.2 +coverage[toml]==6.3 # via pytest-cov cryptography==3.3.2 # via @@ -103,13 +128,24 @@ cryptography==3.3.2 # feast (setup.py) # moto # msal -decorator==5.1.0 - # via gcsfs + # pyjwt + # pyopenssl + # snowflake-connector-python +debugpy==1.5.1 + # via ipykernel +decorator==5.1.1 + # via + # gcsfs + # ipython +defusedxml==0.7.1 + # via nbconvert +deprecated==1.2.13 + # via redis deprecation==2.1.0 # via testcontainers dill==0.3.4 # via feast (setup.py) -distlib==0.3.3 +distlib==0.3.4 # via virtualenv docker==5.0.3 # via @@ -119,31 +155,36 @@ docutils==0.17.1 # via # sphinx # sphinx-rtd-theme +entrypoints==0.3 + # via + # altair + # jupyter-client + # nbconvert execnet==1.9.0 # via pytest-xdist -fastapi==0.70.0 +fastapi==0.73.0 # via feast (setup.py) -fastavro==1.4.7 +fastavro==1.4.9 # via # feast (setup.py) # pandavro -filelock==3.4.0 +filelock==3.4.2 # via virtualenv firebase-admin==4.5.2 # via feast (setup.py) flake8==4.0.1 # via feast (setup.py) -frozenlist==1.2.0 +frozenlist==1.3.0 # via # aiohttp # aiosignal -fsspec==2021.11.1 +fsspec==2022.1.0 # via # adlfs # gcsfs -gcsfs==2021.11.1 +gcsfs==2022.1.0 # via feast (setup.py) -google-api-core[grpc]==1.31.4 +google-api-core[grpc]==1.31.5 # via # feast (setup.py) # firebase-admin @@ -153,7 +194,7 @@ google-api-core[grpc]==1.31.4 # google-cloud-core # google-cloud-datastore # google-cloud-firestore -google-api-python-client==2.32.0 +google-api-python-client==2.36.0 # via firebase-admin google-auth==1.35.0 # via @@ -162,16 +203,17 @@ google-auth==1.35.0 # google-api-python-client # google-auth-httplib2 # google-auth-oauthlib + # google-cloud-core # google-cloud-storage google-auth-httplib2==0.1.0 # via google-api-python-client google-auth-oauthlib==0.4.6 # via gcsfs -google-cloud-bigquery==2.31.0 +google-cloud-bigquery==2.32.0 # via feast (setup.py) -google-cloud-bigquery-storage==2.10.1 +google-cloud-bigquery-storage==2.11.0 # via feast (setup.py) -google-cloud-core==1.4.4 +google-cloud-core==1.7.2 # via # feast (setup.py) # google-cloud-bigquery @@ -198,7 +240,9 @@ googleapis-common-protos==1.52.0 # feast (setup.py) # google-api-core # tensorflow-metadata -grpcio==1.42.0 +great-expectations==0.14.4 + # via feast (setup.py) +grpcio==1.43.0 # via # feast (setup.py) # google-api-core @@ -206,13 +250,13 @@ grpcio==1.42.0 # grpcio-reflection # grpcio-testing # grpcio-tools -grpcio-reflection==1.42.0 +grpcio-reflection==1.43.0 # via feast (setup.py) grpcio-testing==1.34.0 # via feast (setup.py) grpcio-tools==1.34.0 # via feast (setup.py) -h11==0.12.0 +h11==0.13.0 # via uvicorn hiredis==2.0.0 # via feast (setup.py) @@ -220,48 +264,95 @@ httplib2==0.20.2 # via # google-api-python-client # google-auth-httplib2 -httptools==0.2.0 +httptools==0.3.0 # via uvicorn -identify==2.4.0 +identify==2.4.7 # via pre-commit idna==3.3 # via # anyio # requests + # snowflake-connector-python # yarl imagesize==1.3.0 # via sphinx importlib-metadata==4.2.0 # via - # backports.entry-points-selectable + # click # flake8 + # great-expectations # jsonschema # moto # pep517 # pluggy # pre-commit # pytest + # redis # virtualenv importlib-resources==5.4.0 # via jsonschema iniconfig==1.1.1 # via pytest -isodate==0.6.0 +ipykernel==6.7.0 + # via + # ipywidgets + # notebook +ipython==7.31.1 + # via + # ipykernel + # ipywidgets +ipython-genutils==0.2.0 + # via + # ipywidgets + # nbformat + # notebook +ipywidgets==7.6.5 + # via great-expectations +isodate==0.6.1 # via msrest isort==5.10.1 # via feast (setup.py) +jedi==0.18.1 + # via ipython jinja2==3.0.3 # via + # altair # feast (setup.py) + # great-expectations # moto + # nbconvert + # notebook # sphinx jmespath==0.10.0 # via # boto3 # botocore -jsonschema==4.2.1 - # via feast (setup.py) -libcst==0.3.23 +jsonpatch==1.32 + # via great-expectations +jsonpointer==2.2 + # via jsonpatch +jsonschema==4.4.0 + # via + # altair + # feast (setup.py) + # great-expectations + # nbformat +jupyter-client==7.1.2 + # via + # ipykernel + # nbclient + # notebook +jupyter-core==4.9.1 + # via + # jupyter-client + # nbconvert + # nbformat + # notebook +jupyterlab-pygments==0.1.2 + # via nbconvert +jupyterlab-widgets==1.0.2 + # via ipywidgets +libcst==0.4.1 # via # google-cloud-bigquery-storage # google-cloud-datastore @@ -269,23 +360,29 @@ markupsafe==2.0.1 # via # jinja2 # moto +matplotlib-inline==0.1.3 + # via + # ipykernel + # ipython mccabe==0.6.1 # via flake8 minio==7.1.0 # via feast (setup.py) +mistune==0.8.4 + # via + # great-expectations + # nbconvert mmh3==3.0.0 # via feast (setup.py) mock==2.0.0 # via feast (setup.py) -more-itertools==8.12.0 - # via pytest -moto==2.2.17 +moto==3.0.2 # via feast (setup.py) msal==1.16.0 # via # azure-identity # msal-extensions -msal-extensions==0.3.0 +msal-extensions==0.3.1 # via azure-identity msgpack==1.0.3 # via cachecontrol @@ -295,57 +392,97 @@ msrest==0.6.21 # msrestazure msrestazure==0.6.4 # via adlfs -multidict==5.2.0 +multidict==6.0.2 # via # aiohttp # yarl -mypy==0.790 +mypy==0.931 # via feast (setup.py) mypy-extensions==0.4.3 # via # mypy # typing-inspect -mypy-protobuf==1.24 - # via feast (setup.py) +mypy-protobuf==3.1.0 + # via feast (setup.py) +nbclient==0.5.10 + # via nbconvert +nbconvert==6.4.1 + # via notebook +nbformat==5.1.3 + # via + # ipywidgets + # nbclient + # nbconvert + # notebook +nest-asyncio==1.5.4 + # via + # ipykernel + # jupyter-client + # nbclient + # notebook nodeenv==1.6.0 # via pre-commit -numpy==1.21.4 +notebook==6.4.8 + # via widgetsnbextension +numpy==1.21.5 # via + # altair + # great-expectations # pandas # pandavro # pyarrow -oauthlib==3.1.1 + # scipy +oauthlib==3.2.0 # via requests-oauthlib +oscrypto==1.2.1 + # via snowflake-connector-python packaging==21.3 # via + # bleach # deprecation # google-api-core # google-cloud-bigquery # google-cloud-firestore # pytest + # redis # sphinx -pandas==1.3.4 +pandas==1.3.5 # via + # altair # feast (setup.py) + # great-expectations # pandavro + # snowflake-connector-python pandavro==1.5.2 # via feast (setup.py) +pandocfilters==1.5.0 + # via nbconvert +parso==0.8.3 + # via jedi pathspec==0.9.0 # via black pbr==5.8.0 # via mock pep517==0.12.0 # via pip-tools +pexpect==4.8.0 + # via ipython +pickleshare==0.7.5 + # via ipython pip-tools==6.4.0 # via feast (setup.py) -platformdirs==2.4.0 +platformdirs==2.4.1 # via virtualenv -pluggy==0.13.1 +pluggy==1.0.0 # via pytest -portalocker==1.7.1 +portalocker==2.3.2 # via msal-extensions -pre-commit==2.16.0 +pre-commit==2.17.0 # via feast (setup.py) +prometheus-client==0.13.1 + # via notebook +prompt-toolkit==3.0.26 + # via ipython proto-plus==1.19.6 # via # feast (setup.py) @@ -353,7 +490,7 @@ proto-plus==1.19.6 # google-cloud-bigquery-storage # google-cloud-datastore # google-cloud-firestore -protobuf==3.19.1 +protobuf==3.19.4 # via # feast (setup.py) # google-api-core @@ -365,6 +502,10 @@ protobuf==3.19.1 # mypy-protobuf # proto-plus # tensorflow-metadata +ptyprocess==0.7.0 + # via + # pexpect + # terminado py==1.11.0 # via # pytest @@ -372,7 +513,9 @@ py==1.11.0 py-cpuinfo==8.0.0 # via pytest-benchmark pyarrow==6.0.1 - # via feast (setup.py) + # via + # feast (setup.py) + # snowflake-connector-python pyasn1==0.4.8 # via # pyasn1-modules @@ -383,25 +526,35 @@ pycodestyle==2.8.0 # via flake8 pycparser==2.21 # via cffi -pydantic==1.8.2 +pycryptodomex==3.14.0 + # via snowflake-connector-python +pydantic==1.9.0 # via # fastapi # feast (setup.py) pyflakes==2.4.0 # via flake8 -pygments==2.10.0 - # via sphinx +pygments==2.11.2 + # via + # ipython + # jupyterlab-pygments + # nbconvert + # sphinx pyjwt[crypto]==2.3.0 # via # adal # msal -pyparsing==3.0.6 + # snowflake-connector-python +pyopenssl==21.0.0 + # via snowflake-connector-python +pyparsing==2.4.7 # via + # great-expectations # httplib2 # packaging -pyrsistent==0.18.0 +pyrsistent==0.18.1 # via jsonschema -pytest==6.0.0 +pytest==6.2.5 # via # feast (setup.py) # pytest-benchmark @@ -416,7 +569,7 @@ pytest-benchmark==3.4.1 # via feast (setup.py) pytest-cov==3.0.0 # via feast (setup.py) -pytest-forked==1.3.0 +pytest-forked==1.4.0 # via pytest-xdist pytest-lazy-fixture==0.6.3 # via feast (setup.py) @@ -426,13 +579,15 @@ pytest-ordering==0.6 # via feast (setup.py) pytest-timeout==1.4.2 # via feast (setup.py) -pytest-xdist==2.4.0 +pytest-xdist==2.5.0 # via feast (setup.py) python-dateutil==2.8.2 # via # adal # botocore # google-cloud-bigquery + # great-expectations + # jupyter-client # moto # pandas python-dotenv==0.19.2 @@ -441,21 +596,27 @@ pytz==2021.3 # via # babel # google-api-core + # great-expectations # moto # pandas + # snowflake-connector-python +pytz-deprecation-shim==0.1.0.post0 + # via tzlocal pyyaml==6.0 # via # feast (setup.py) # libcst # pre-commit # uvicorn -redis==3.5.3 - # via redis-py-cluster -redis-py-cluster==2.1.2 +pyzmq==22.3.0 + # via + # jupyter-client + # notebook +redis==4.1.2 # via feast (setup.py) -regex==2021.11.10 +regex==2022.1.18 # via black -requests==2.26.0 +requests==2.27.1 # via # adal # adlfs @@ -467,27 +628,38 @@ requests==2.26.0 # google-api-core # google-cloud-bigquery # google-cloud-storage + # great-expectations # moto # msal # msrest # requests-oauthlib # responses + # snowflake-connector-python # sphinx -requests-oauthlib==1.3.0 +requests-oauthlib==1.3.1 # via # google-auth-oauthlib # msrest -responses==0.16.0 +responses==0.17.0 # via moto rsa==4.8 # via google-auth -s3transfer==0.4.2 +ruamel.yaml==0.17.17 + # via great-expectations +ruamel.yaml.clib==0.2.6 + # via ruamel.yaml +s3transfer==0.5.0 # via boto3 +scipy==1.7.3 + # via great-expectations +send2trash==1.8.0 + # via notebook six==1.16.0 # via # absl-py # azure-core # azure-identity + # bleach # cryptography # google-api-core # google-auth @@ -495,9 +667,11 @@ six==1.16.0 # google-cloud-core # google-resumable-media # grpcio + # isodate # mock # msrestazure # pandavro + # pyopenssl # python-dateutil # responses # virtualenv @@ -505,7 +679,9 @@ sniffio==1.2.0 # via anyio snowballstemmer==2.2.0 # via sphinx -sphinx==4.3.1 +snowflake-connector-python[pandas]==2.7.3 + # via feast (setup.py) +sphinx==4.3.2 # via # feast (setup.py) # sphinx-rtd-theme @@ -523,39 +699,94 @@ sphinxcontrib-qthelp==1.0.3 # via sphinx sphinxcontrib-serializinghtml==1.1.5 # via sphinx -starlette==0.16.0 +starlette==0.17.1 # via fastapi tabulate==0.8.9 # via feast (setup.py) tenacity==8.0.1 # via feast (setup.py) -tensorflow-metadata==1.5.0 +tensorflow-metadata==1.6.0 # via feast (setup.py) +termcolor==1.1.0 + # via great-expectations +terminado==0.13.1 + # via notebook testcontainers==3.4.2 # via feast (setup.py) +testpath==0.5.0 + # via nbconvert toml==0.10.2 # via # black # feast (setup.py) # pre-commit # pytest -tomli==1.2.2 +tomli==2.0.0 # via # coverage + # mypy # pep517 +toolz==0.11.2 + # via altair +tornado==6.1 + # via + # ipykernel + # jupyter-client + # notebook + # terminado tqdm==4.62.3 - # via feast (setup.py) -typed-ast==1.4.3 + # via + # feast (setup.py) + # great-expectations +traitlets==5.1.1 + # via + # ipykernel + # ipython + # ipywidgets + # jupyter-client + # jupyter-core + # matplotlib-inline + # nbclient + # nbconvert + # nbformat + # notebook +typed-ast==1.5.2 # via # black # mypy +types-futures==3.3.8 + # via types-protobuf +types-protobuf==3.19.7 + # via + # feast (setup.py) + # mypy-protobuf +types-python-dateutil==2.8.9 + # via feast (setup.py) +types-pytz==2021.3.4 + # via feast (setup.py) +types-pyyaml==6.0.4 + # via feast (setup.py) +types-redis==4.1.13 + # via feast (setup.py) +types-requests==2.27.8 + # via feast (setup.py) +types-setuptools==57.4.8 + # via feast (setup.py) +types-tabulate==0.8.5 + # via feast (setup.py) +types-urllib3==1.26.8 + # via types-requests typing-extensions==4.0.1 # via # aiohttp # anyio + # argon2-cffi # asgiref # async-timeout + # great-expectations + # h11 # importlib-metadata + # jsonschema # libcst # mypy # pydantic @@ -565,38 +796,50 @@ typing-extensions==4.0.1 # yarl typing-inspect==0.7.1 # via libcst +tzdata==2021.5 + # via pytz-deprecation-shim +tzlocal==4.1 + # via great-expectations uritemplate==4.1.1 # via google-api-python-client -urllib3==1.26.7 +urllib3==1.26.8 # via # botocore # feast (setup.py) # minio # requests # responses -uvicorn[standard]==0.15.0 +uvicorn[standard]==0.17.1 # via feast (setup.py) uvloop==0.16.0 # via uvicorn -virtualenv==20.10.0 +virtualenv==20.13.0 # via pre-commit watchgod==0.7 # via uvicorn -websocket-client==1.2.1 +wcwidth==0.2.5 + # via prompt-toolkit +webencodings==0.5.1 + # via bleach +websocket-client==1.2.3 # via docker websockets==10.1 # via uvicorn werkzeug==2.0.2 # via moto -wheel==0.37.0 +wheel==0.37.1 # via pip-tools +widgetsnbextension==3.5.2 + # via ipywidgets wrapt==1.13.3 - # via testcontainers + # via + # deprecated + # testcontainers xmltodict==0.12.0 # via moto yarl==1.7.2 # via aiohttp -zipp==3.6.0 +zipp==3.7.0 # via # importlib-metadata # importlib-resources diff --git a/sdk/python/requirements/py3.7-requirements.txt b/sdk/python/requirements/py3.7-requirements.txt index b2473f1c70c..c2ad63fdea3 100644 --- a/sdk/python/requirements/py3.7-requirements.txt +++ b/sdk/python/requirements/py3.7-requirements.txt @@ -4,21 +4,21 @@ # # pip-compile --output-file=requirements/py3.7-requirements.txt # -absl-py==0.12.0 +absl-py==1.0.0 # via tensorflow-metadata -anyio==3.4.0 +anyio==3.5.0 # via starlette asgiref==3.4.1 # via uvicorn -attrs==21.2.0 +attrs==21.4.0 # via jsonschema cachetools==4.2.4 # via google-auth certifi==2021.10.8 # via requests -charset-normalizer==2.0.8 +charset-normalizer==2.0.10 # via requests -click==7.1.2 +click==8.0.3 # via # feast (setup.py) # uvicorn @@ -26,13 +26,13 @@ colorama==0.4.4 # via feast (setup.py) dill==0.3.4 # via feast (setup.py) -fastapi==0.70.0 +fastapi==0.72.0 # via feast (setup.py) -fastavro==1.4.7 +fastavro==1.4.9 # via # feast (setup.py) # pandavro -google-api-core==2.2.2 +google-api-core==2.4.0 # via feast (setup.py) google-auth==2.3.3 # via google-api-core @@ -41,38 +41,40 @@ googleapis-common-protos==1.52.0 # feast (setup.py) # google-api-core # tensorflow-metadata -grpcio==1.42.0 +grpcio==1.43.0 # via # feast (setup.py) # grpcio-reflection -grpcio-reflection==1.42.0 +grpcio-reflection==1.43.0 # via feast (setup.py) -h11==0.12.0 +h11==0.13.0 # via uvicorn -httptools==0.2.0 +httptools==0.3.0 # via uvicorn idna==3.3 # via # anyio # requests -importlib-metadata==4.8.2 - # via jsonschema +importlib-metadata==4.10.1 + # via + # click + # jsonschema importlib-resources==5.4.0 # via jsonschema jinja2==3.0.3 # via feast (setup.py) -jsonschema==4.2.1 +jsonschema==4.4.0 # via feast (setup.py) markupsafe==2.0.1 # via jinja2 mmh3==3.0.0 # via feast (setup.py) -numpy==1.21.4 +numpy==1.21.5 # via # pandas # pandavro # pyarrow -pandas==1.3.4 +pandas==1.3.5 # via # feast (setup.py) # pandavro @@ -80,7 +82,7 @@ pandavro==1.5.2 # via feast (setup.py) proto-plus==1.19.6 # via feast (setup.py) -protobuf==3.19.1 +protobuf==3.19.3 # via # feast (setup.py) # google-api-core @@ -96,11 +98,11 @@ pyasn1==0.4.8 # rsa pyasn1-modules==0.2.8 # via google-auth -pydantic==1.8.2 +pydantic==1.9.0 # via # fastapi # feast (setup.py) -pyrsistent==0.18.0 +pyrsistent==0.18.1 # via jsonschema python-dateutil==2.8.2 # via pandas @@ -112,7 +114,7 @@ pyyaml==6.0 # via # feast (setup.py) # uvicorn -requests==2.26.0 +requests==2.27.1 # via google-api-core rsa==4.8 # via google-auth @@ -125,13 +127,13 @@ six==1.16.0 # python-dateutil sniffio==1.2.0 # via anyio -starlette==0.16.0 +starlette==0.17.1 # via fastapi tabulate==0.8.9 # via feast (setup.py) tenacity==8.0.1 # via feast (setup.py) -tensorflow-metadata==1.5.0 +tensorflow-metadata==1.6.0 # via feast (setup.py) toml==0.10.2 # via feast (setup.py) @@ -141,13 +143,15 @@ typing-extensions==4.0.1 # via # anyio # asgiref + # h11 # importlib-metadata + # jsonschema # pydantic # starlette # uvicorn -urllib3==1.26.7 +urllib3==1.26.8 # via requests -uvicorn[standard]==0.15.0 +uvicorn[standard]==0.17.0 # via feast (setup.py) uvloop==0.16.0 # via uvicorn @@ -155,7 +159,7 @@ watchgod==0.7 # via uvicorn websockets==10.1 # via uvicorn -zipp==3.6.0 +zipp==3.7.0 # via # importlib-metadata # importlib-resources diff --git a/sdk/python/requirements/py3.8-ci-requirements.txt b/sdk/python/requirements/py3.8-ci-requirements.txt index a2df153c01e..7a94294c956 100644 --- a/sdk/python/requirements/py3.8-ci-requirements.txt +++ b/sdk/python/requirements/py3.8-ci-requirements.txt @@ -4,7 +4,7 @@ # # pip-compile --extra=ci --output-file=requirements/py3.8-ci-requirements.txt # -absl-py==0.12.0 +absl-py==1.0.0 # via tensorflow-metadata adal==1.2.7 # via @@ -20,17 +20,33 @@ aiosignal==1.2.0 # via aiohttp alabaster==0.7.12 # via sphinx -anyio==3.4.0 +altair==4.2.0 + # via great-expectations +anyio==3.5.0 # via starlette appdirs==1.4.4 # via black -asgiref==3.4.1 +appnope==0.1.2 + # via + # ipykernel + # ipython +argon2-cffi==21.3.0 + # via notebook +argon2-cffi-bindings==21.2.0 + # via argon2-cffi +asgiref==3.5.0 # via uvicorn +asn1crypto==1.4.0 + # via + # oscrypto + # snowflake-connector-python assertpy==1.1 # via feast (setup.py) -async-timeout==4.0.1 +async-timeout==4.0.2 + # via aiohttp +asynctest==0.13.0 # via aiohttp -attrs==21.2.0 +attrs==21.4.0 # via # aiohttp # black @@ -38,7 +54,7 @@ attrs==21.2.0 # pytest avro==1.10.0 # via feast (setup.py) -azure-core==1.21.0 +azure-core==1.21.1 # via # adlfs # azure-identity @@ -51,15 +67,21 @@ azure-storage-blob==12.9.0 # via adlfs babel==2.9.1 # via sphinx -backports.entry-points-selectable==1.1.1 - # via virtualenv +backcall==0.2.0 + # via ipython +backports.zoneinfo==0.2.1 + # via + # pytz-deprecation-shim + # tzlocal black==19.10b0 # via feast (setup.py) -boto3==1.17.112 +bleach==4.1.0 + # via nbconvert +boto3==1.20.46 # via # feast (setup.py) # moto -botocore==1.20.112 +botocore==1.23.46 # via # boto3 # moto @@ -73,25 +95,30 @@ certifi==2021.10.8 # minio # msrest # requests + # snowflake-connector-python cffi==1.15.0 # via + # argon2-cffi-bindings # azure-datalake-store # cryptography + # snowflake-connector-python cfgv==3.3.1 # via pre-commit -charset-normalizer==2.0.8 +charset-normalizer==2.0.11 # via # aiohttp # requests -click==7.1.2 + # snowflake-connector-python +click==8.0.3 # via # black # feast (setup.py) + # great-expectations # pip-tools # uvicorn colorama==0.4.4 # via feast (setup.py) -coverage[toml]==6.2 +coverage[toml]==6.3 # via pytest-cov cryptography==3.3.2 # via @@ -101,13 +128,24 @@ cryptography==3.3.2 # feast (setup.py) # moto # msal -decorator==5.1.0 - # via gcsfs + # pyjwt + # pyopenssl + # snowflake-connector-python +debugpy==1.5.1 + # via ipykernel +decorator==5.1.1 + # via + # gcsfs + # ipython +defusedxml==0.7.1 + # via nbconvert +deprecated==1.2.13 + # via redis deprecation==2.1.0 # via testcontainers dill==0.3.4 # via feast (setup.py) -distlib==0.3.3 +distlib==0.3.4 # via virtualenv docker==5.0.3 # via @@ -117,31 +155,36 @@ docutils==0.17.1 # via # sphinx # sphinx-rtd-theme +entrypoints==0.3 + # via + # altair + # jupyter-client + # nbconvert execnet==1.9.0 # via pytest-xdist -fastapi==0.70.0 +fastapi==0.73.0 # via feast (setup.py) -fastavro==1.4.7 +fastavro==1.4.9 # via # feast (setup.py) # pandavro -filelock==3.4.0 +filelock==3.4.2 # via virtualenv firebase-admin==4.5.2 # via feast (setup.py) flake8==4.0.1 # via feast (setup.py) -frozenlist==1.2.0 +frozenlist==1.3.0 # via # aiohttp # aiosignal -fsspec==2021.11.1 +fsspec==2022.1.0 # via # adlfs # gcsfs -gcsfs==2021.11.1 +gcsfs==2022.1.0 # via feast (setup.py) -google-api-core[grpc]==1.31.4 +google-api-core[grpc]==1.31.5 # via # feast (setup.py) # firebase-admin @@ -151,7 +194,7 @@ google-api-core[grpc]==1.31.4 # google-cloud-core # google-cloud-datastore # google-cloud-firestore -google-api-python-client==2.32.0 +google-api-python-client==2.36.0 # via firebase-admin google-auth==1.35.0 # via @@ -160,16 +203,17 @@ google-auth==1.35.0 # google-api-python-client # google-auth-httplib2 # google-auth-oauthlib + # google-cloud-core # google-cloud-storage google-auth-httplib2==0.1.0 # via google-api-python-client google-auth-oauthlib==0.4.6 # via gcsfs -google-cloud-bigquery==2.31.0 +google-cloud-bigquery==2.32.0 # via feast (setup.py) -google-cloud-bigquery-storage==2.10.1 +google-cloud-bigquery-storage==2.11.0 # via feast (setup.py) -google-cloud-core==1.4.4 +google-cloud-core==1.7.2 # via # feast (setup.py) # google-cloud-bigquery @@ -196,7 +240,9 @@ googleapis-common-protos==1.52.0 # feast (setup.py) # google-api-core # tensorflow-metadata -grpcio==1.42.0 +great-expectations==0.14.4 + # via feast (setup.py) +grpcio==1.43.0 # via # feast (setup.py) # google-api-core @@ -204,13 +250,13 @@ grpcio==1.42.0 # grpcio-reflection # grpcio-testing # grpcio-tools -grpcio-reflection==1.42.0 +grpcio-reflection==1.43.0 # via feast (setup.py) grpcio-testing==1.34.0 # via feast (setup.py) grpcio-tools==1.34.0 # via feast (setup.py) -h11==0.12.0 +h11==0.13.0 # via uvicorn hiredis==2.0.0 # via feast (setup.py) @@ -218,37 +264,95 @@ httplib2==0.20.2 # via # google-api-python-client # google-auth-httplib2 -httptools==0.2.0 +httptools==0.3.0 # via uvicorn -identify==2.4.0 +identify==2.4.7 # via pre-commit idna==3.3 # via # anyio # requests + # snowflake-connector-python # yarl imagesize==1.3.0 # via sphinx +importlib-metadata==4.2.0 + # via + # click + # flake8 + # great-expectations + # jsonschema + # moto + # pep517 + # pluggy + # pre-commit + # pytest + # redis + # virtualenv importlib-resources==5.4.0 # via jsonschema iniconfig==1.1.1 # via pytest -isodate==0.6.0 +ipykernel==6.7.0 + # via + # ipywidgets + # notebook +ipython==7.31.1 + # via + # ipykernel + # ipywidgets +ipython-genutils==0.2.0 + # via + # ipywidgets + # nbformat + # notebook +ipywidgets==7.6.5 + # via great-expectations +isodate==0.6.1 # via msrest isort==5.10.1 # via feast (setup.py) +jedi==0.18.1 + # via ipython jinja2==3.0.3 # via + # altair # feast (setup.py) + # great-expectations # moto + # nbconvert + # notebook # sphinx jmespath==0.10.0 # via # boto3 # botocore -jsonschema==4.2.1 - # via feast (setup.py) -libcst==0.3.23 +jsonpatch==1.32 + # via great-expectations +jsonpointer==2.2 + # via jsonpatch +jsonschema==4.4.0 + # via + # altair + # feast (setup.py) + # great-expectations + # nbformat +jupyter-client==7.1.2 + # via + # ipykernel + # nbclient + # notebook +jupyter-core==4.9.1 + # via + # jupyter-client + # nbconvert + # nbformat + # notebook +jupyterlab-pygments==0.1.2 + # via nbconvert +jupyterlab-widgets==1.0.2 + # via ipywidgets +libcst==0.4.1 # via # google-cloud-bigquery-storage # google-cloud-datastore @@ -256,23 +360,29 @@ markupsafe==2.0.1 # via # jinja2 # moto +matplotlib-inline==0.1.3 + # via + # ipykernel + # ipython mccabe==0.6.1 # via flake8 minio==7.1.0 # via feast (setup.py) +mistune==0.8.4 + # via + # great-expectations + # nbconvert mmh3==3.0.0 # via feast (setup.py) mock==2.0.0 # via feast (setup.py) -more-itertools==8.12.0 - # via pytest -moto==2.2.17 +moto==3.0.2 # via feast (setup.py) msal==1.16.0 # via # azure-identity # msal-extensions -msal-extensions==0.3.0 +msal-extensions==0.3.1 # via azure-identity msgpack==1.0.3 # via cachecontrol @@ -282,57 +392,97 @@ msrest==0.6.21 # msrestazure msrestazure==0.6.4 # via adlfs -multidict==5.2.0 +multidict==6.0.2 # via # aiohttp # yarl -mypy==0.790 +mypy==0.931 # via feast (setup.py) mypy-extensions==0.4.3 # via # mypy # typing-inspect -mypy-protobuf==1.24 - # via feast (setup.py) +mypy-protobuf==3.1.0 + # via feast (setup.py) +nbclient==0.5.10 + # via nbconvert +nbconvert==6.4.1 + # via notebook +nbformat==5.1.3 + # via + # ipywidgets + # nbclient + # nbconvert + # notebook +nest-asyncio==1.5.4 + # via + # ipykernel + # jupyter-client + # nbclient + # notebook nodeenv==1.6.0 # via pre-commit -numpy==1.21.4 +notebook==6.4.8 + # via widgetsnbextension +numpy==1.21.5 # via + # altair + # great-expectations # pandas # pandavro # pyarrow -oauthlib==3.1.1 + # scipy +oauthlib==3.2.0 # via requests-oauthlib +oscrypto==1.2.1 + # via snowflake-connector-python packaging==21.3 # via + # bleach # deprecation # google-api-core # google-cloud-bigquery # google-cloud-firestore # pytest + # redis # sphinx -pandas==1.3.4 +pandas==1.3.5 # via + # altair # feast (setup.py) + # great-expectations # pandavro + # snowflake-connector-python pandavro==1.5.2 # via feast (setup.py) +pandocfilters==1.5.0 + # via nbconvert +parso==0.8.3 + # via jedi pathspec==0.9.0 # via black pbr==5.8.0 # via mock pep517==0.12.0 # via pip-tools +pexpect==4.8.0 + # via ipython +pickleshare==0.7.5 + # via ipython pip-tools==6.4.0 # via feast (setup.py) -platformdirs==2.4.0 +platformdirs==2.4.1 # via virtualenv -pluggy==0.13.1 +pluggy==1.0.0 # via pytest -portalocker==1.7.1 +portalocker==2.3.2 # via msal-extensions -pre-commit==2.16.0 +pre-commit==2.17.0 # via feast (setup.py) +prometheus-client==0.13.1 + # via notebook +prompt-toolkit==3.0.26 + # via ipython proto-plus==1.19.6 # via # feast (setup.py) @@ -340,7 +490,7 @@ proto-plus==1.19.6 # google-cloud-bigquery-storage # google-cloud-datastore # google-cloud-firestore -protobuf==3.19.1 +protobuf==3.19.4 # via # feast (setup.py) # google-api-core @@ -352,6 +502,10 @@ protobuf==3.19.1 # mypy-protobuf # proto-plus # tensorflow-metadata +ptyprocess==0.7.0 + # via + # pexpect + # terminado py==1.11.0 # via # pytest @@ -359,7 +513,9 @@ py==1.11.0 py-cpuinfo==8.0.0 # via pytest-benchmark pyarrow==6.0.1 - # via feast (setup.py) + # via + # feast (setup.py) + # snowflake-connector-python pyasn1==0.4.8 # via # pyasn1-modules @@ -370,25 +526,35 @@ pycodestyle==2.8.0 # via flake8 pycparser==2.21 # via cffi -pydantic==1.8.2 +pycryptodomex==3.14.0 + # via snowflake-connector-python +pydantic==1.9.0 # via # fastapi # feast (setup.py) pyflakes==2.4.0 # via flake8 -pygments==2.10.0 - # via sphinx +pygments==2.11.2 + # via + # ipython + # jupyterlab-pygments + # nbconvert + # sphinx pyjwt[crypto]==2.3.0 # via # adal # msal -pyparsing==3.0.6 + # snowflake-connector-python +pyopenssl==21.0.0 + # via snowflake-connector-python +pyparsing==2.4.7 # via + # great-expectations # httplib2 # packaging -pyrsistent==0.18.0 +pyrsistent==0.18.1 # via jsonschema -pytest==6.0.0 +pytest==6.2.5 # via # feast (setup.py) # pytest-benchmark @@ -403,7 +569,7 @@ pytest-benchmark==3.4.1 # via feast (setup.py) pytest-cov==3.0.0 # via feast (setup.py) -pytest-forked==1.3.0 +pytest-forked==1.4.0 # via pytest-xdist pytest-lazy-fixture==0.6.3 # via feast (setup.py) @@ -413,13 +579,15 @@ pytest-ordering==0.6 # via feast (setup.py) pytest-timeout==1.4.2 # via feast (setup.py) -pytest-xdist==2.4.0 +pytest-xdist==2.5.0 # via feast (setup.py) python-dateutil==2.8.2 # via # adal # botocore # google-cloud-bigquery + # great-expectations + # jupyter-client # moto # pandas python-dotenv==0.19.2 @@ -428,21 +596,27 @@ pytz==2021.3 # via # babel # google-api-core + # great-expectations # moto # pandas + # snowflake-connector-python +pytz-deprecation-shim==0.1.0.post0 + # via tzlocal pyyaml==6.0 # via # feast (setup.py) # libcst # pre-commit # uvicorn -redis==3.5.3 - # via redis-py-cluster -redis-py-cluster==2.1.2 +pyzmq==22.3.0 + # via + # jupyter-client + # notebook +redis==4.1.2 # via feast (setup.py) -regex==2021.11.10 +regex==2022.1.18 # via black -requests==2.26.0 +requests==2.27.1 # via # adal # adlfs @@ -454,27 +628,38 @@ requests==2.26.0 # google-api-core # google-cloud-bigquery # google-cloud-storage + # great-expectations # moto # msal # msrest # requests-oauthlib # responses + # snowflake-connector-python # sphinx -requests-oauthlib==1.3.0 +requests-oauthlib==1.3.1 # via # google-auth-oauthlib # msrest -responses==0.16.0 +responses==0.17.0 # via moto rsa==4.8 # via google-auth -s3transfer==0.4.2 +ruamel.yaml==0.17.17 + # via great-expectations +ruamel.yaml.clib==0.2.6 + # via ruamel.yaml +s3transfer==0.5.0 # via boto3 +scipy==1.7.3 + # via great-expectations +send2trash==1.8.0 + # via notebook six==1.16.0 # via # absl-py # azure-core # azure-identity + # bleach # cryptography # google-api-core # google-auth @@ -482,9 +667,11 @@ six==1.16.0 # google-cloud-core # google-resumable-media # grpcio + # isodate # mock # msrestazure # pandavro + # pyopenssl # python-dateutil # responses # virtualenv @@ -492,7 +679,9 @@ sniffio==1.2.0 # via anyio snowballstemmer==2.2.0 # via sphinx -sphinx==4.3.1 +snowflake-connector-python[pandas]==2.7.3 + # via feast (setup.py) +sphinx==4.3.2 # via # feast (setup.py) # sphinx-rtd-theme @@ -510,74 +699,151 @@ sphinxcontrib-qthelp==1.0.3 # via sphinx sphinxcontrib-serializinghtml==1.1.5 # via sphinx -starlette==0.16.0 +starlette==0.17.1 # via fastapi tabulate==0.8.9 # via feast (setup.py) tenacity==8.0.1 # via feast (setup.py) -tensorflow-metadata==1.5.0 +tensorflow-metadata==1.6.0 # via feast (setup.py) +termcolor==1.1.0 + # via great-expectations +terminado==0.13.1 + # via notebook testcontainers==3.4.2 # via feast (setup.py) +testpath==0.5.0 + # via nbconvert toml==0.10.2 # via # black # feast (setup.py) # pre-commit # pytest -tomli==1.2.2 +tomli==2.0.0 # via # coverage + # mypy # pep517 +toolz==0.11.2 + # via altair +tornado==6.1 + # via + # ipykernel + # jupyter-client + # notebook + # terminado tqdm==4.62.3 - # via feast (setup.py) -typed-ast==1.4.3 + # via + # feast (setup.py) + # great-expectations +traitlets==5.1.1 + # via + # ipykernel + # ipython + # ipywidgets + # jupyter-client + # jupyter-core + # matplotlib-inline + # nbclient + # nbconvert + # nbformat + # notebook +typed-ast==1.5.2 # via # black # mypy +types-futures==3.3.8 + # via types-protobuf +types-protobuf==3.19.7 + # via + # feast (setup.py) + # mypy-protobuf +types-python-dateutil==2.8.9 + # via feast (setup.py) +types-pytz==2021.3.4 + # via feast (setup.py) +types-pyyaml==6.0.4 + # via feast (setup.py) +types-redis==4.1.13 + # via feast (setup.py) +types-requests==2.27.8 + # via feast (setup.py) +types-setuptools==57.4.8 + # via feast (setup.py) +types-tabulate==0.8.5 + # via feast (setup.py) +types-urllib3==1.26.8 + # via types-requests typing-extensions==4.0.1 # via + # aiohttp + # anyio + # argon2-cffi + # asgiref # async-timeout + # great-expectations + # h11 + # importlib-metadata + # jsonschema # libcst # mypy # pydantic + # starlette # typing-inspect + # uvicorn + # yarl typing-inspect==0.7.1 # via libcst +tzdata==2021.5 + # via pytz-deprecation-shim +tzlocal==4.1 + # via great-expectations uritemplate==4.1.1 # via google-api-python-client -urllib3==1.26.7 +urllib3==1.26.8 # via # botocore # feast (setup.py) # minio # requests # responses -uvicorn[standard]==0.15.0 +uvicorn[standard]==0.17.1 # via feast (setup.py) uvloop==0.16.0 # via uvicorn -virtualenv==20.10.0 +virtualenv==20.13.0 # via pre-commit watchgod==0.7 # via uvicorn -websocket-client==1.2.1 +wcwidth==0.2.5 + # via prompt-toolkit +webencodings==0.5.1 + # via bleach +websocket-client==1.2.3 # via docker websockets==10.1 # via uvicorn werkzeug==2.0.2 # via moto -wheel==0.37.0 +wheel==0.37.1 # via pip-tools +widgetsnbextension==3.5.2 + # via ipywidgets wrapt==1.13.3 - # via testcontainers + # via + # deprecated + # testcontainers xmltodict==0.12.0 # via moto yarl==1.7.2 # via aiohttp -zipp==3.6.0 - # via importlib-resources +zipp==3.7.0 + # via + # importlib-metadata + # importlib-resources + # pep517 # The following packages are considered to be unsafe in a requirements file: # pip diff --git a/sdk/python/requirements/py3.8-requirements.txt b/sdk/python/requirements/py3.8-requirements.txt index e6887dea556..90b42760132 100644 --- a/sdk/python/requirements/py3.8-requirements.txt +++ b/sdk/python/requirements/py3.8-requirements.txt @@ -4,21 +4,21 @@ # # pip-compile --output-file=requirements/py3.8-requirements.txt # -absl-py==0.12.0 +absl-py==1.0.0 # via tensorflow-metadata -anyio==3.4.0 +anyio==3.5.0 # via starlette asgiref==3.4.1 # via uvicorn -attrs==21.2.0 +attrs==21.4.0 # via jsonschema cachetools==4.2.4 # via google-auth certifi==2021.10.8 # via requests -charset-normalizer==2.0.8 +charset-normalizer==2.0.10 # via requests -click==7.1.2 +click==8.0.3 # via # feast (setup.py) # uvicorn @@ -26,13 +26,13 @@ colorama==0.4.4 # via feast (setup.py) dill==0.3.4 # via feast (setup.py) -fastapi==0.70.0 +fastapi==0.72.0 # via feast (setup.py) -fastavro==1.4.7 +fastavro==1.4.9 # via # feast (setup.py) # pandavro -google-api-core==2.2.2 +google-api-core==2.4.0 # via feast (setup.py) google-auth==2.3.3 # via google-api-core @@ -41,15 +41,15 @@ googleapis-common-protos==1.52.0 # feast (setup.py) # google-api-core # tensorflow-metadata -grpcio==1.42.0 +grpcio==1.43.0 # via # feast (setup.py) # grpcio-reflection -grpcio-reflection==1.42.0 +grpcio-reflection==1.43.0 # via feast (setup.py) -h11==0.12.0 +h11==0.13.0 # via uvicorn -httptools==0.2.0 +httptools==0.3.0 # via uvicorn idna==3.3 # via @@ -59,18 +59,18 @@ importlib-resources==5.4.0 # via jsonschema jinja2==3.0.3 # via feast (setup.py) -jsonschema==4.2.1 +jsonschema==4.4.0 # via feast (setup.py) markupsafe==2.0.1 # via jinja2 mmh3==3.0.0 # via feast (setup.py) -numpy==1.21.4 +numpy==1.21.5 # via # pandas # pandavro # pyarrow -pandas==1.3.4 +pandas==1.3.5 # via # feast (setup.py) # pandavro @@ -78,7 +78,7 @@ pandavro==1.5.2 # via feast (setup.py) proto-plus==1.19.6 # via feast (setup.py) -protobuf==3.19.1 +protobuf==3.19.3 # via # feast (setup.py) # google-api-core @@ -94,11 +94,11 @@ pyasn1==0.4.8 # rsa pyasn1-modules==0.2.8 # via google-auth -pydantic==1.8.2 +pydantic==1.9.0 # via # fastapi # feast (setup.py) -pyrsistent==0.18.0 +pyrsistent==0.18.1 # via jsonschema python-dateutil==2.8.2 # via pandas @@ -110,7 +110,7 @@ pyyaml==6.0 # via # feast (setup.py) # uvicorn -requests==2.26.0 +requests==2.27.1 # via google-api-core rsa==4.8 # via google-auth @@ -123,13 +123,13 @@ six==1.16.0 # python-dateutil sniffio==1.2.0 # via anyio -starlette==0.16.0 +starlette==0.17.1 # via fastapi tabulate==0.8.9 # via feast (setup.py) tenacity==8.0.1 # via feast (setup.py) -tensorflow-metadata==1.5.0 +tensorflow-metadata==1.6.0 # via feast (setup.py) toml==0.10.2 # via feast (setup.py) @@ -137,9 +137,9 @@ tqdm==4.62.3 # via feast (setup.py) typing-extensions==4.0.1 # via pydantic -urllib3==1.26.7 +urllib3==1.26.8 # via requests -uvicorn[standard]==0.15.0 +uvicorn[standard]==0.17.0 # via feast (setup.py) uvloop==0.16.0 # via uvicorn @@ -147,7 +147,7 @@ watchgod==0.7 # via uvicorn websockets==10.1 # via uvicorn -zipp==3.6.0 +zipp==3.7.0 # via importlib-resources # The following packages are considered to be unsafe in a requirements file: diff --git a/sdk/python/requirements/py3.9-ci-requirements.txt b/sdk/python/requirements/py3.9-ci-requirements.txt index aa4cee54e46..1421d7e3c31 100644 --- a/sdk/python/requirements/py3.9-ci-requirements.txt +++ b/sdk/python/requirements/py3.9-ci-requirements.txt @@ -4,7 +4,7 @@ # # pip-compile --extra=ci --output-file=requirements/py3.9-ci-requirements.txt # -absl-py==0.12.0 +absl-py==1.0.0 # via tensorflow-metadata adal==1.2.7 # via @@ -20,17 +20,33 @@ aiosignal==1.2.0 # via aiohttp alabaster==0.7.12 # via sphinx -anyio==3.4.0 +altair==4.2.0 + # via great-expectations +anyio==3.5.0 # via starlette appdirs==1.4.4 # via black -asgiref==3.4.1 +appnope==0.1.2 + # via + # ipykernel + # ipython +argon2-cffi==21.3.0 + # via notebook +argon2-cffi-bindings==21.2.0 + # via argon2-cffi +asgiref==3.5.0 # via uvicorn +asn1crypto==1.4.0 + # via + # oscrypto + # snowflake-connector-python assertpy==1.1 # via feast (setup.py) -async-timeout==4.0.1 +async-timeout==4.0.2 + # via aiohttp +asynctest==0.13.0 # via aiohttp -attrs==21.2.0 +attrs==21.4.0 # via # aiohttp # black @@ -38,7 +54,7 @@ attrs==21.2.0 # pytest avro==1.10.0 # via feast (setup.py) -azure-core==1.21.0 +azure-core==1.21.1 # via # adlfs # azure-identity @@ -51,15 +67,21 @@ azure-storage-blob==12.9.0 # via adlfs babel==2.9.1 # via sphinx -backports.entry-points-selectable==1.1.1 - # via virtualenv +backcall==0.2.0 + # via ipython +backports.zoneinfo==0.2.1 + # via + # pytz-deprecation-shim + # tzlocal black==19.10b0 # via feast (setup.py) -boto3==1.17.112 +bleach==4.1.0 + # via nbconvert +boto3==1.20.46 # via # feast (setup.py) # moto -botocore==1.20.112 +botocore==1.23.46 # via # boto3 # moto @@ -73,25 +95,30 @@ certifi==2021.10.8 # minio # msrest # requests + # snowflake-connector-python cffi==1.15.0 # via + # argon2-cffi-bindings # azure-datalake-store # cryptography + # snowflake-connector-python cfgv==3.3.1 # via pre-commit -charset-normalizer==2.0.8 +charset-normalizer==2.0.11 # via # aiohttp # requests -click==7.1.2 + # snowflake-connector-python +click==8.0.3 # via # black # feast (setup.py) + # great-expectations # pip-tools # uvicorn colorama==0.4.4 # via feast (setup.py) -coverage[toml]==6.2 +coverage[toml]==6.3 # via pytest-cov cryptography==3.3.2 # via @@ -101,13 +128,24 @@ cryptography==3.3.2 # feast (setup.py) # moto # msal -decorator==5.1.0 - # via gcsfs + # pyjwt + # pyopenssl + # snowflake-connector-python +debugpy==1.5.1 + # via ipykernel +decorator==5.1.1 + # via + # gcsfs + # ipython +defusedxml==0.7.1 + # via nbconvert +deprecated==1.2.13 + # via redis deprecation==2.1.0 # via testcontainers dill==0.3.4 # via feast (setup.py) -distlib==0.3.3 +distlib==0.3.4 # via virtualenv docker==5.0.3 # via @@ -117,31 +155,36 @@ docutils==0.17.1 # via # sphinx # sphinx-rtd-theme +entrypoints==0.3 + # via + # altair + # jupyter-client + # nbconvert execnet==1.9.0 # via pytest-xdist -fastapi==0.70.0 +fastapi==0.73.0 # via feast (setup.py) -fastavro==1.4.7 +fastavro==1.4.9 # via # feast (setup.py) # pandavro -filelock==3.4.0 +filelock==3.4.2 # via virtualenv firebase-admin==4.5.2 # via feast (setup.py) flake8==4.0.1 # via feast (setup.py) -frozenlist==1.2.0 +frozenlist==1.3.0 # via # aiohttp # aiosignal -fsspec==2021.11.1 +fsspec==2022.1.0 # via # adlfs # gcsfs -gcsfs==2021.11.1 +gcsfs==2022.1.0 # via feast (setup.py) -google-api-core[grpc]==1.31.4 +google-api-core[grpc]==1.31.5 # via # feast (setup.py) # firebase-admin @@ -151,7 +194,7 @@ google-api-core[grpc]==1.31.4 # google-cloud-core # google-cloud-datastore # google-cloud-firestore -google-api-python-client==2.32.0 +google-api-python-client==2.36.0 # via firebase-admin google-auth==1.35.0 # via @@ -160,16 +203,17 @@ google-auth==1.35.0 # google-api-python-client # google-auth-httplib2 # google-auth-oauthlib + # google-cloud-core # google-cloud-storage google-auth-httplib2==0.1.0 # via google-api-python-client google-auth-oauthlib==0.4.6 # via gcsfs -google-cloud-bigquery==2.31.0 +google-cloud-bigquery==2.32.0 # via feast (setup.py) -google-cloud-bigquery-storage==2.10.1 +google-cloud-bigquery-storage==2.11.0 # via feast (setup.py) -google-cloud-core==1.4.4 +google-cloud-core==1.7.2 # via # feast (setup.py) # google-cloud-bigquery @@ -196,7 +240,9 @@ googleapis-common-protos==1.52.0 # feast (setup.py) # google-api-core # tensorflow-metadata -grpcio==1.42.0 +great-expectations==0.14.4 + # via feast (setup.py) +grpcio==1.43.0 # via # feast (setup.py) # google-api-core @@ -204,13 +250,13 @@ grpcio==1.42.0 # grpcio-reflection # grpcio-testing # grpcio-tools -grpcio-reflection==1.42.0 +grpcio-reflection==1.43.0 # via feast (setup.py) grpcio-testing==1.34.0 # via feast (setup.py) grpcio-tools==1.34.0 # via feast (setup.py) -h11==0.12.0 +h11==0.13.0 # via uvicorn hiredis==2.0.0 # via feast (setup.py) @@ -218,35 +264,95 @@ httplib2==0.20.2 # via # google-api-python-client # google-auth-httplib2 -httptools==0.2.0 +httptools==0.3.0 # via uvicorn -identify==2.4.0 +identify==2.4.7 # via pre-commit idna==3.3 # via # anyio # requests + # snowflake-connector-python # yarl imagesize==1.3.0 # via sphinx +importlib-metadata==4.2.0 + # via + # click + # flake8 + # great-expectations + # jsonschema + # moto + # pep517 + # pluggy + # pre-commit + # pytest + # redis + # virtualenv +importlib-resources==5.4.0 + # via jsonschema iniconfig==1.1.1 # via pytest -isodate==0.6.0 +ipykernel==6.7.0 + # via + # ipywidgets + # notebook +ipython==7.31.1 + # via + # ipykernel + # ipywidgets +ipython-genutils==0.2.0 + # via + # ipywidgets + # nbformat + # notebook +ipywidgets==7.6.5 + # via great-expectations +isodate==0.6.1 # via msrest isort==5.10.1 # via feast (setup.py) +jedi==0.18.1 + # via ipython jinja2==3.0.3 # via + # altair # feast (setup.py) + # great-expectations # moto + # nbconvert + # notebook # sphinx jmespath==0.10.0 # via # boto3 # botocore -jsonschema==4.2.1 - # via feast (setup.py) -libcst==0.3.23 +jsonpatch==1.32 + # via great-expectations +jsonpointer==2.2 + # via jsonpatch +jsonschema==4.4.0 + # via + # altair + # feast (setup.py) + # great-expectations + # nbformat +jupyter-client==7.1.2 + # via + # ipykernel + # nbclient + # notebook +jupyter-core==4.9.1 + # via + # jupyter-client + # nbconvert + # nbformat + # notebook +jupyterlab-pygments==0.1.2 + # via nbconvert +jupyterlab-widgets==1.0.2 + # via ipywidgets +libcst==0.4.1 # via # google-cloud-bigquery-storage # google-cloud-datastore @@ -254,23 +360,29 @@ markupsafe==2.0.1 # via # jinja2 # moto +matplotlib-inline==0.1.3 + # via + # ipykernel + # ipython mccabe==0.6.1 # via flake8 minio==7.1.0 # via feast (setup.py) +mistune==0.8.4 + # via + # great-expectations + # nbconvert mmh3==3.0.0 # via feast (setup.py) mock==2.0.0 # via feast (setup.py) -more-itertools==8.12.0 - # via pytest -moto==2.2.17 +moto==3.0.2 # via feast (setup.py) msal==1.16.0 # via # azure-identity # msal-extensions -msal-extensions==0.3.0 +msal-extensions==0.3.1 # via azure-identity msgpack==1.0.3 # via cachecontrol @@ -280,57 +392,97 @@ msrest==0.6.21 # msrestazure msrestazure==0.6.4 # via adlfs -multidict==5.2.0 +multidict==6.0.2 # via # aiohttp # yarl -mypy==0.790 +mypy==0.931 # via feast (setup.py) mypy-extensions==0.4.3 # via # mypy # typing-inspect -mypy-protobuf==1.24 - # via feast (setup.py) +mypy-protobuf==3.1.0 + # via feast (setup.py) +nbclient==0.5.10 + # via nbconvert +nbconvert==6.4.1 + # via notebook +nbformat==5.1.3 + # via + # ipywidgets + # nbclient + # nbconvert + # notebook +nest-asyncio==1.5.4 + # via + # ipykernel + # jupyter-client + # nbclient + # notebook nodeenv==1.6.0 # via pre-commit -numpy==1.21.4 +notebook==6.4.8 + # via widgetsnbextension +numpy==1.21.5 # via + # altair + # great-expectations # pandas # pandavro # pyarrow -oauthlib==3.1.1 + # scipy +oauthlib==3.2.0 # via requests-oauthlib +oscrypto==1.2.1 + # via snowflake-connector-python packaging==21.3 # via + # bleach # deprecation # google-api-core # google-cloud-bigquery # google-cloud-firestore # pytest + # redis # sphinx -pandas==1.3.4 +pandas==1.3.5 # via + # altair # feast (setup.py) + # great-expectations # pandavro + # snowflake-connector-python pandavro==1.5.2 # via feast (setup.py) +pandocfilters==1.5.0 + # via nbconvert +parso==0.8.3 + # via jedi pathspec==0.9.0 # via black pbr==5.8.0 # via mock pep517==0.12.0 # via pip-tools +pexpect==4.8.0 + # via ipython +pickleshare==0.7.5 + # via ipython pip-tools==6.4.0 # via feast (setup.py) -platformdirs==2.4.0 +platformdirs==2.4.1 # via virtualenv -pluggy==0.13.1 +pluggy==1.0.0 # via pytest -portalocker==1.7.1 +portalocker==2.3.2 # via msal-extensions -pre-commit==2.16.0 +pre-commit==2.17.0 # via feast (setup.py) +prometheus-client==0.13.1 + # via notebook +prompt-toolkit==3.0.26 + # via ipython proto-plus==1.19.6 # via # feast (setup.py) @@ -338,7 +490,7 @@ proto-plus==1.19.6 # google-cloud-bigquery-storage # google-cloud-datastore # google-cloud-firestore -protobuf==3.19.1 +protobuf==3.19.4 # via # feast (setup.py) # google-api-core @@ -350,6 +502,10 @@ protobuf==3.19.1 # mypy-protobuf # proto-plus # tensorflow-metadata +ptyprocess==0.7.0 + # via + # pexpect + # terminado py==1.11.0 # via # pytest @@ -357,7 +513,9 @@ py==1.11.0 py-cpuinfo==8.0.0 # via pytest-benchmark pyarrow==6.0.1 - # via feast (setup.py) + # via + # feast (setup.py) + # snowflake-connector-python pyasn1==0.4.8 # via # pyasn1-modules @@ -368,25 +526,35 @@ pycodestyle==2.8.0 # via flake8 pycparser==2.21 # via cffi -pydantic==1.8.2 +pycryptodomex==3.14.0 + # via snowflake-connector-python +pydantic==1.9.0 # via # fastapi # feast (setup.py) pyflakes==2.4.0 # via flake8 -pygments==2.10.0 - # via sphinx +pygments==2.11.2 + # via + # ipython + # jupyterlab-pygments + # nbconvert + # sphinx pyjwt[crypto]==2.3.0 # via # adal # msal -pyparsing==3.0.6 + # snowflake-connector-python +pyopenssl==21.0.0 + # via snowflake-connector-python +pyparsing==2.4.7 # via + # great-expectations # httplib2 # packaging -pyrsistent==0.18.0 +pyrsistent==0.18.1 # via jsonschema -pytest==6.0.0 +pytest==6.2.5 # via # feast (setup.py) # pytest-benchmark @@ -401,7 +569,7 @@ pytest-benchmark==3.4.1 # via feast (setup.py) pytest-cov==3.0.0 # via feast (setup.py) -pytest-forked==1.3.0 +pytest-forked==1.4.0 # via pytest-xdist pytest-lazy-fixture==0.6.3 # via feast (setup.py) @@ -411,13 +579,15 @@ pytest-ordering==0.6 # via feast (setup.py) pytest-timeout==1.4.2 # via feast (setup.py) -pytest-xdist==2.4.0 +pytest-xdist==2.5.0 # via feast (setup.py) python-dateutil==2.8.2 # via # adal # botocore # google-cloud-bigquery + # great-expectations + # jupyter-client # moto # pandas python-dotenv==0.19.2 @@ -426,21 +596,27 @@ pytz==2021.3 # via # babel # google-api-core + # great-expectations # moto # pandas + # snowflake-connector-python +pytz-deprecation-shim==0.1.0.post0 + # via tzlocal pyyaml==6.0 # via # feast (setup.py) # libcst # pre-commit # uvicorn -redis==3.5.3 - # via redis-py-cluster -redis-py-cluster==2.1.2 +pyzmq==22.3.0 + # via + # jupyter-client + # notebook +redis==4.1.2 # via feast (setup.py) -regex==2021.11.10 +regex==2022.1.18 # via black -requests==2.26.0 +requests==2.27.1 # via # adal # adlfs @@ -452,27 +628,38 @@ requests==2.26.0 # google-api-core # google-cloud-bigquery # google-cloud-storage + # great-expectations # moto # msal # msrest # requests-oauthlib # responses + # snowflake-connector-python # sphinx -requests-oauthlib==1.3.0 +requests-oauthlib==1.3.1 # via # google-auth-oauthlib # msrest -responses==0.16.0 +responses==0.17.0 # via moto rsa==4.8 # via google-auth -s3transfer==0.4.2 +ruamel.yaml==0.17.17 + # via great-expectations +ruamel.yaml.clib==0.2.6 + # via ruamel.yaml +s3transfer==0.5.0 # via boto3 +scipy==1.7.3 + # via great-expectations +send2trash==1.8.0 + # via notebook six==1.16.0 # via # absl-py # azure-core # azure-identity + # bleach # cryptography # google-api-core # google-auth @@ -480,9 +667,11 @@ six==1.16.0 # google-cloud-core # google-resumable-media # grpcio + # isodate # mock # msrestazure # pandavro + # pyopenssl # python-dateutil # responses # virtualenv @@ -490,7 +679,9 @@ sniffio==1.2.0 # via anyio snowballstemmer==2.2.0 # via sphinx -sphinx==4.3.1 +snowflake-connector-python[pandas]==2.7.3 + # via feast (setup.py) +sphinx==4.3.2 # via # feast (setup.py) # sphinx-rtd-theme @@ -508,72 +699,151 @@ sphinxcontrib-qthelp==1.0.3 # via sphinx sphinxcontrib-serializinghtml==1.1.5 # via sphinx -starlette==0.16.0 +starlette==0.17.1 # via fastapi tabulate==0.8.9 # via feast (setup.py) tenacity==8.0.1 # via feast (setup.py) -tensorflow-metadata==1.5.0 +tensorflow-metadata==1.6.0 # via feast (setup.py) +termcolor==1.1.0 + # via great-expectations +terminado==0.13.1 + # via notebook testcontainers==3.4.2 # via feast (setup.py) +testpath==0.5.0 + # via nbconvert toml==0.10.2 # via # black # feast (setup.py) # pre-commit # pytest -tomli==1.2.2 +tomli==2.0.0 # via # coverage + # mypy # pep517 +toolz==0.11.2 + # via altair +tornado==6.1 + # via + # ipykernel + # jupyter-client + # notebook + # terminado tqdm==4.62.3 - # via feast (setup.py) -typed-ast==1.4.3 + # via + # feast (setup.py) + # great-expectations +traitlets==5.1.1 + # via + # ipykernel + # ipython + # ipywidgets + # jupyter-client + # jupyter-core + # matplotlib-inline + # nbclient + # nbconvert + # nbformat + # notebook +typed-ast==1.5.2 # via # black # mypy +types-futures==3.3.8 + # via types-protobuf +types-protobuf==3.19.7 + # via + # feast (setup.py) + # mypy-protobuf +types-python-dateutil==2.8.9 + # via feast (setup.py) +types-pytz==2021.3.4 + # via feast (setup.py) +types-pyyaml==6.0.4 + # via feast (setup.py) +types-redis==4.1.13 + # via feast (setup.py) +types-requests==2.27.8 + # via feast (setup.py) +types-setuptools==57.4.8 + # via feast (setup.py) +types-tabulate==0.8.5 + # via feast (setup.py) +types-urllib3==1.26.8 + # via types-requests typing-extensions==4.0.1 # via + # aiohttp + # anyio + # argon2-cffi + # asgiref # async-timeout + # great-expectations + # h11 + # importlib-metadata + # jsonschema # libcst # mypy # pydantic + # starlette # typing-inspect + # uvicorn + # yarl typing-inspect==0.7.1 # via libcst +tzdata==2021.5 + # via pytz-deprecation-shim +tzlocal==4.1 + # via great-expectations uritemplate==4.1.1 # via google-api-python-client -urllib3==1.26.7 +urllib3==1.26.8 # via # botocore # feast (setup.py) # minio # requests # responses -uvicorn[standard]==0.15.0 +uvicorn[standard]==0.17.1 # via feast (setup.py) uvloop==0.16.0 # via uvicorn -virtualenv==20.10.0 +virtualenv==20.13.0 # via pre-commit watchgod==0.7 # via uvicorn -websocket-client==1.2.1 +wcwidth==0.2.5 + # via prompt-toolkit +webencodings==0.5.1 + # via bleach +websocket-client==1.2.3 # via docker websockets==10.1 # via uvicorn werkzeug==2.0.2 # via moto -wheel==0.37.0 +wheel==0.37.1 # via pip-tools +widgetsnbextension==3.5.2 + # via ipywidgets wrapt==1.13.3 - # via testcontainers + # via + # deprecated + # testcontainers xmltodict==0.12.0 # via moto yarl==1.7.2 # via aiohttp +zipp==3.7.0 + # via + # importlib-metadata + # importlib-resources + # pep517 # The following packages are considered to be unsafe in a requirements file: # pip diff --git a/sdk/python/requirements/py3.9-requirements.txt b/sdk/python/requirements/py3.9-requirements.txt index 4cb45fd8098..8db9fd4b14f 100644 --- a/sdk/python/requirements/py3.9-requirements.txt +++ b/sdk/python/requirements/py3.9-requirements.txt @@ -4,21 +4,21 @@ # # pip-compile --output-file=requirements/py3.9-requirements.txt # -absl-py==0.12.0 +absl-py==1.0.0 # via tensorflow-metadata -anyio==3.4.0 +anyio==3.5.0 # via starlette asgiref==3.4.1 # via uvicorn -attrs==21.2.0 +attrs==21.4.0 # via jsonschema cachetools==4.2.4 # via google-auth certifi==2021.10.8 # via requests -charset-normalizer==2.0.8 +charset-normalizer==2.0.10 # via requests -click==7.1.2 +click==8.0.3 # via # feast (setup.py) # uvicorn @@ -26,13 +26,13 @@ colorama==0.4.4 # via feast (setup.py) dill==0.3.4 # via feast (setup.py) -fastapi==0.70.0 +fastapi==0.72.0 # via feast (setup.py) -fastavro==1.4.7 +fastavro==1.4.9 # via # feast (setup.py) # pandavro -google-api-core==2.2.2 +google-api-core==2.4.0 # via feast (setup.py) google-auth==2.3.3 # via google-api-core @@ -41,15 +41,15 @@ googleapis-common-protos==1.52.0 # feast (setup.py) # google-api-core # tensorflow-metadata -grpcio==1.42.0 +grpcio==1.43.0 # via # feast (setup.py) # grpcio-reflection -grpcio-reflection==1.42.0 +grpcio-reflection==1.43.0 # via feast (setup.py) -h11==0.12.0 +h11==0.13.0 # via uvicorn -httptools==0.2.0 +httptools==0.3.0 # via uvicorn idna==3.3 # via @@ -57,18 +57,18 @@ idna==3.3 # requests jinja2==3.0.3 # via feast (setup.py) -jsonschema==4.2.1 +jsonschema==4.4.0 # via feast (setup.py) markupsafe==2.0.1 # via jinja2 mmh3==3.0.0 # via feast (setup.py) -numpy==1.21.4 +numpy==1.21.5 # via # pandas # pandavro # pyarrow -pandas==1.3.4 +pandas==1.3.5 # via # feast (setup.py) # pandavro @@ -76,7 +76,7 @@ pandavro==1.5.2 # via feast (setup.py) proto-plus==1.19.6 # via feast (setup.py) -protobuf==3.19.1 +protobuf==3.19.3 # via # feast (setup.py) # google-api-core @@ -92,11 +92,11 @@ pyasn1==0.4.8 # rsa pyasn1-modules==0.2.8 # via google-auth -pydantic==1.8.2 +pydantic==1.9.0 # via # fastapi # feast (setup.py) -pyrsistent==0.18.0 +pyrsistent==0.18.1 # via jsonschema python-dateutil==2.8.2 # via pandas @@ -108,7 +108,7 @@ pyyaml==6.0 # via # feast (setup.py) # uvicorn -requests==2.26.0 +requests==2.27.1 # via google-api-core rsa==4.8 # via google-auth @@ -121,13 +121,13 @@ six==1.16.0 # python-dateutil sniffio==1.2.0 # via anyio -starlette==0.16.0 +starlette==0.17.1 # via fastapi tabulate==0.8.9 # via feast (setup.py) tenacity==8.0.1 # via feast (setup.py) -tensorflow-metadata==1.5.0 +tensorflow-metadata==1.6.0 # via feast (setup.py) toml==0.10.2 # via feast (setup.py) @@ -135,9 +135,9 @@ tqdm==4.62.3 # via feast (setup.py) typing-extensions==4.0.1 # via pydantic -urllib3==1.26.7 +urllib3==1.26.8 # via requests -uvicorn[standard]==0.15.0 +uvicorn[standard]==0.17.0 # via feast (setup.py) uvloop==0.16.0 # via uvicorn diff --git a/sdk/python/setup.py b/sdk/python/setup.py index e797a1216ca..7535987f833 100644 --- a/sdk/python/setup.py +++ b/sdk/python/setup.py @@ -40,7 +40,7 @@ REQUIRES_PYTHON = ">=3.7.0" REQUIRED = [ - "Click==7.*", + "Click==8.*", "colorama>=0.3.9", "dill==0.3.*", "fastavro>=1.1.0", @@ -77,7 +77,7 @@ ] REDIS_REQUIRED = [ - "redis-py-cluster==2.1.2", + "redis>=4.1.0", "hiredis>=2.0.0", ] @@ -86,40 +86,63 @@ "docker>=5.0.2", ] -CI_REQUIRED = [ - "cryptography==3.3.2", - "flake8", - "black==19.10b0", - "isort>=5", - "grpcio-tools==1.34.0", - "grpcio-testing==1.34.0", - "minio==7.1.0", - "mock==2.0.0", - "moto", - "mypy==0.790", - "mypy-protobuf==1.24", - "avro==1.10.0", - "gcsfs", - "urllib3>=1.25.4", - "pytest==6.0.0", - "pytest-cov", - "pytest-xdist", - "pytest-benchmark>=3.4.1", - "pytest-lazy-fixture==0.6.3", - "pytest-timeout==1.4.2", - "pytest-ordering==0.6.*", - "pytest-mock==1.10.4", - "Sphinx!=4.0.0", - "sphinx-rtd-theme", - "testcontainers==3.4.2", - "adlfs==0.5.9", - "firebase-admin==4.5.2", - "pre-commit", - "assertpy==1.1", - "pip-tools", -] + GCP_REQUIRED + REDIS_REQUIRED + AWS_REQUIRED - -DEV_REQUIRED = ["mypy-protobuf==1.*", "grpcio-testing==1.*"] + CI_REQUIRED +SNOWFLAKE_REQUIRED = [ + "snowflake-connector-python[pandas]>=2.7.3", +] + +GE_REQUIRED = [ + "great_expectations>=0.14.0,<0.15.0" +] + +CI_REQUIRED = ( + [ + "cryptography==3.3.2", + "flake8", + "black==19.10b0", + "isort>=5", + "grpcio-tools==1.34.0", + "grpcio-testing==1.34.0", + "minio==7.1.0", + "mock==2.0.0", + "moto", + "mypy==0.931", + "mypy-protobuf==3.1.0", + "avro==1.10.0", + "gcsfs", + "urllib3>=1.25.4", + "pytest>=6.0.0", + "pytest-cov", + "pytest-xdist", + "pytest-benchmark>=3.4.1", + "pytest-lazy-fixture==0.6.3", + "pytest-timeout==1.4.2", + "pytest-ordering==0.6.*", + "pytest-mock==1.10.4", + "Sphinx!=4.0.0,<4.4.0", + "sphinx-rtd-theme", + "testcontainers==3.4.2", + "adlfs==0.5.9", + "firebase-admin==4.5.2", + "pre-commit", + "assertpy==1.1", + "pip-tools", + "types-protobuf", + "types-python-dateutil", + "types-pytz", + "types-PyYAML", + "types-redis", + "types-requests", + "types-setuptools", + "types-tabulate", + ] + + GCP_REQUIRED + + REDIS_REQUIRED + + AWS_REQUIRED + + SNOWFLAKE_REQUIRED + + GE_REQUIRED +) + +DEV_REQUIRED = ["mypy-protobuf>=3.1.0", "grpcio-testing==1.*"] + CI_REQUIRED # Get git repo root directory repo_root = str(pathlib.Path(__file__).resolve().parent.parent.parent) @@ -218,6 +241,8 @@ def run(self): "gcp": GCP_REQUIRED, "aws": AWS_REQUIRED, "redis": REDIS_REQUIRED, + "snowflake": SNOWFLAKE_REQUIRED, + "ge": GE_REQUIRED, }, include_package_data=True, license="Apache", @@ -231,7 +256,7 @@ def run(self): ], entry_points={"console_scripts": ["feast=feast.cli:cli"]}, use_scm_version=use_scm_version, - setup_requires=["setuptools_scm", "grpcio", "grpcio-tools==1.34.0", "mypy-protobuf==1.*", "sphinx!=4.0.0"], + setup_requires=["setuptools_scm", "grpcio", "grpcio-tools==1.34.0", "mypy-protobuf==3.1.0", "sphinx!=4.0.0"], package_data={ "": [ "protos/feast/**/*.proto", diff --git a/sdk/python/tests/__init__.py b/sdk/python/tests/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/sdk/python/tests/conftest.py b/sdk/python/tests/conftest.py index 61e591f2373..49f32379a3b 100644 --- a/sdk/python/tests/conftest.py +++ b/sdk/python/tests/conftest.py @@ -13,7 +13,9 @@ # limitations under the License. import logging import multiprocessing +import time from datetime import datetime, timedelta +from multiprocessing import Process from sys import platform from typing import List @@ -21,6 +23,7 @@ import pytest from _pytest.nodes import Item +from feast import FeatureStore from tests.data.data_creator import create_dataset from tests.integration.feature_repos.integration_test_repo_config import ( IntegrationTestRepoConfig, @@ -137,23 +140,41 @@ def simple_dataset_2() -> pd.DataFrame: return pd.DataFrame.from_dict(data) +def start_test_local_server(repo_path: str, port: int): + fs = FeatureStore(repo_path) + fs.serve("localhost", port, no_access_log=True) + + @pytest.fixture( params=FULL_REPO_CONFIGS, scope="session", ids=[str(c) for c in FULL_REPO_CONFIGS] ) -def environment(request): - e = construct_test_environment(request.param) +def environment(request, worker_id: str): + e = construct_test_environment(request.param, worker_id=worker_id) + proc = Process( + target=start_test_local_server, + args=(e.feature_store.repo_path, e.get_local_server_port()), + daemon=True, + ) + if e.python_feature_server and e.test_repo_config.provider == "local": + proc.start() + # Wait for server to start + time.sleep(3) def cleanup(): e.feature_store.teardown() + if proc.is_alive(): + proc.kill() request.addfinalizer(cleanup) + return e @pytest.fixture() def local_redis_environment(request, worker_id): - - e = construct_test_environment(IntegrationTestRepoConfig(online_store=REDIS_CONFIG)) + e = construct_test_environment( + IntegrationTestRepoConfig(online_store=REDIS_CONFIG), worker_id=worker_id + ) def cleanup(): e.feature_store.teardown() diff --git a/sdk/python/tests/data/__init__.py b/sdk/python/tests/data/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/sdk/python/tests/data/data_creator.py b/sdk/python/tests/data/data_creator.py index e5355b40bbc..e08597b67b2 100644 --- a/sdk/python/tests/data/data_creator.py +++ b/sdk/python/tests/data/data_creator.py @@ -1,5 +1,5 @@ from datetime import datetime, timedelta -from typing import List +from typing import Dict, List, Optional import pandas as pd from pytz import timezone, utc @@ -38,7 +38,7 @@ def create_dataset( def get_entities_for_value_type(value_type: ValueType) -> List: - value_type_map = { + value_type_map: Dict[ValueType, List] = { ValueType.INT32: [1, 2, 1, 3, 3], ValueType.INT64: [1, 2, 1, 3, 3], ValueType.FLOAT: [1.0, 2.0, 1.0, 3.0, 3.0], @@ -48,18 +48,25 @@ def get_entities_for_value_type(value_type: ValueType) -> List: def get_feature_values_for_dtype( - dtype: str, is_list: bool, has_empty_list: bool + dtype: Optional[str], is_list: bool, has_empty_list: bool ) -> List: if dtype is None: return [0.1, None, 0.3, 4, 5] # TODO(adchia): for int columns, consider having a better error when dealing with None values (pandas int dfs can't # have na) - dtype_map = { + dtype_map: Dict[str, List] = { "int32": [1, 2, 3, 4, 5], "int64": [1, 2, 3, 4, 5], "float": [1.0, None, 3.0, 4.0, 5.0], "string": ["1", None, "3", "4", "5"], "bool": [True, None, False, True, False], + "datetime": [ + datetime(1980, 1, 1), + None, + datetime(1981, 1, 1), + datetime(1982, 1, 1), + datetime(1982, 1, 1), + ], } non_list_val = dtype_map[dtype] if is_list: diff --git a/sdk/python/tests/example_repos/example_feature_repo_with_duplicated_featureview_names.py b/sdk/python/tests/example_repos/example_feature_repo_with_duplicated_featureview_names.py index e4c7abed0f4..84d57bf0381 100644 --- a/sdk/python/tests/example_repos/example_feature_repo_with_duplicated_featureview_names.py +++ b/sdk/python/tests/example_repos/example_feature_repo_with_duplicated_featureview_names.py @@ -10,7 +10,7 @@ name="driver_hourly_stats", # Intentionally use the same FeatureView name entities=["driver_id"], online=False, - input=driver_hourly_stats, + batch_source=driver_hourly_stats, ttl=Duration(seconds=10), tags={}, ) @@ -19,7 +19,7 @@ name="driver_hourly_stats", # Intentionally use the same FeatureView name entities=["driver_id"], online=False, - input=driver_hourly_stats, + batch_source=driver_hourly_stats, ttl=Duration(seconds=10), tags={}, ) diff --git a/sdk/python/tests/foo_provider.py b/sdk/python/tests/foo_provider.py index 8e9254cd3d0..1d4ce7d6cb6 100644 --- a/sdk/python/tests/foo_provider.py +++ b/sdk/python/tests/foo_provider.py @@ -10,6 +10,7 @@ from feast.protos.feast.types.EntityKey_pb2 import EntityKey as EntityKeyProto from feast.protos.feast.types.Value_pb2 import Value as ValueProto from feast.registry import Registry +from feast.saved_dataset import SavedDataset class FooProvider(Provider): @@ -75,3 +76,6 @@ def online_read( requested_features: List[str] = None, ) -> List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]]: pass + + def retrieve_saved_dataset(self, config: RepoConfig, dataset: SavedDataset): + pass diff --git a/sdk/python/tests/integration/e2e/test_usage_e2e.py b/sdk/python/tests/integration/e2e/test_usage_e2e.py index f55fbce55cf..0bae9730632 100644 --- a/sdk/python/tests/integration/e2e/test_usage_e2e.py +++ b/sdk/python/tests/integration/e2e/test_usage_e2e.py @@ -66,10 +66,16 @@ def test_usage_on(dummy_exporter, enabling_toggle): test_feature_store.apply([entity]) - assert len(dummy_exporter) == 1 + assert len(dummy_exporter) == 3 assert { - "entrypoint": "feast.feature_store.FeatureStore.apply" + "entrypoint": "feast.infra.local.LocalRegistryStore.get_registry_proto" }.items() <= dummy_exporter[0].items() + assert { + "entrypoint": "feast.infra.local.LocalRegistryStore.update_registry_proto" + }.items() <= dummy_exporter[1].items() + assert { + "entrypoint": "feast.feature_store.FeatureStore.apply" + }.items() <= dummy_exporter[2].items() @pytest.mark.integration diff --git a/sdk/python/tests/integration/e2e/test_validation.py b/sdk/python/tests/integration/e2e/test_validation.py new file mode 100644 index 00000000000..2bd1e3cbbc9 --- /dev/null +++ b/sdk/python/tests/integration/e2e/test_validation.py @@ -0,0 +1,134 @@ +import pandas as pd +import pytest +from great_expectations.core import ExpectationSuite +from great_expectations.dataset import PandasDataset + +from feast.dqm.errors import ValidationFailed +from feast.dqm.profilers.ge_profiler import ge_profiler +from tests.integration.feature_repos.repo_configuration import ( + construct_universal_feature_views, +) +from tests.integration.feature_repos.universal.entities import ( + customer, + driver, + location, +) + +_features = [ + "customer_profile:current_balance", + "customer_profile:avg_passenger_count", + "customer_profile:lifetime_trip_count", + "order:order_is_success", + "global_stats:num_rides", + "global_stats:avg_ride_length", +] + + +@ge_profiler +def configurable_profiler(dataset: PandasDataset) -> ExpectationSuite: + from great_expectations.profile.user_configurable_profiler import ( + UserConfigurableProfiler, + ) + + return UserConfigurableProfiler( + profile_dataset=dataset, + excluded_expectations=[ + "expect_table_columns_to_match_ordered_list", + "expect_table_row_count_to_be_between", + ], + value_set_threshold="few", + ).build_suite() + + +@ge_profiler +def profiler_with_unrealistic_expectations(dataset: PandasDataset) -> ExpectationSuite: + # need to create dataframe with corrupted data first + df = pd.DataFrame() + df["current_balance"] = [-100] + df["avg_passenger_count"] = [0] + + other_ds = PandasDataset(df) + other_ds.expect_column_max_to_be_between("current_balance", -1000, -100) + other_ds.expect_column_values_to_be_in_set("avg_passenger_count", value_set={0}) + + # this should pass + other_ds.expect_column_min_to_be_between("avg_passenger_count", 0, 1000) + + return other_ds.get_expectation_suite() + + +@pytest.mark.integration +@pytest.mark.universal +def test_historical_retrieval_with_validation(environment, universal_data_sources): + store = environment.feature_store + + (entities, datasets, data_sources) = universal_data_sources + feature_views = construct_universal_feature_views(data_sources) + + store.apply([driver(), customer(), location(), *feature_views.values()]) + + entity_df = datasets["entity"].drop( + columns=["order_id", "origin_id", "destination_id"] + ) + + reference_job = store.get_historical_features( + entity_df=entity_df, features=_features, + ) + + store.create_saved_dataset( + from_=reference_job, + name="my_training_dataset", + storage=environment.data_source_creator.create_saved_dataset_destination(), + ) + + job = store.get_historical_features(entity_df=entity_df, features=_features,) + + # if validation pass there will be no exceptions on this point + job.to_df( + validation_reference=store.get_saved_dataset( + "my_training_dataset" + ).as_reference(profiler=configurable_profiler) + ) + + +@pytest.mark.integration +@pytest.mark.universal +def test_historical_retrieval_fails_on_validation(environment, universal_data_sources): + store = environment.feature_store + + (entities, datasets, data_sources) = universal_data_sources + feature_views = construct_universal_feature_views(data_sources) + + store.apply([driver(), customer(), location(), *feature_views.values()]) + + entity_df = datasets["entity"].drop( + columns=["order_id", "origin_id", "destination_id"] + ) + + reference_job = store.get_historical_features( + entity_df=entity_df, features=_features, + ) + + store.create_saved_dataset( + from_=reference_job, + name="my_other_dataset", + storage=environment.data_source_creator.create_saved_dataset_destination(), + ) + + job = store.get_historical_features(entity_df=entity_df, features=_features,) + + with pytest.raises(ValidationFailed) as exc_info: + job.to_df( + validation_reference=store.get_saved_dataset( + "my_other_dataset" + ).as_reference(profiler=profiler_with_unrealistic_expectations) + ) + + failed_expectations = exc_info.value.report.errors + assert len(failed_expectations) == 2 + + assert failed_expectations[0].check_name == "expect_column_max_to_be_between" + assert failed_expectations[0].column_name == "current_balance" + + assert failed_expectations[1].check_name == "expect_column_values_to_be_in_set" + assert failed_expectations[1].column_name == "avg_passenger_count" diff --git a/sdk/python/tests/integration/feature_repos/repo_configuration.py b/sdk/python/tests/integration/feature_repos/repo_configuration.py index 6dedfb63b24..a9953d5977e 100644 --- a/sdk/python/tests/integration/feature_repos/repo_configuration.py +++ b/sdk/python/tests/integration/feature_repos/repo_configuration.py @@ -1,20 +1,22 @@ import importlib import json import os +import re import tempfile import uuid from dataclasses import dataclass, field from datetime import datetime, timedelta from pathlib import Path -from typing import Any, Dict, List, Optional +from typing import Any, Dict, List, Optional, Union import pandas as pd import yaml -from feast import FeatureStore, FeatureView, RepoConfig, driver_test_data +from feast import FeatureStore, FeatureView, driver_test_data from feast.constants import FULL_REPO_CONFIGS_MODULE_ENV_NAME from feast.data_source import DataSource from feast.errors import FeastModuleImportError +from feast.repo_config import RegistryConfig, RepoConfig from tests.integration.feature_repos.integration_test_repo_config import ( IntegrationTestRepoConfig, ) @@ -27,12 +29,16 @@ from tests.integration.feature_repos.universal.data_sources.redshift import ( RedshiftDataSourceCreator, ) +from tests.integration.feature_repos.universal.data_sources.snowflake import ( + SnowflakeDataSourceCreator, +) from tests.integration.feature_repos.universal.feature_views import ( conv_rate_plus_100_feature_view, create_conv_rate_request_data_source, create_customer_daily_profile_feature_view, create_driver_age_request_feature_view, create_driver_hourly_stats_feature_view, + create_field_mapping_feature_view, create_global_stats_feature_view, create_location_stats_feature_view, create_order_feature_view, @@ -51,6 +57,7 @@ DEFAULT_FULL_REPO_CONFIGS: List[IntegrationTestRepoConfig] = [ # Local configurations IntegrationTestRepoConfig(), + IntegrationTestRepoConfig(python_feature_server=True), ] if os.getenv("FEAST_IS_LOCAL_TEST", "False") != "True": DEFAULT_FULL_REPO_CONFIGS.extend( @@ -79,6 +86,12 @@ offline_store_creator=RedshiftDataSourceCreator, online_store=REDIS_CONFIG, ), + # Snowflake configurations + IntegrationTestRepoConfig( + provider="aws", # no list features, no feature server + offline_store_creator=SnowflakeDataSourceCreator, + online_store=REDIS_CONFIG, + ), ] ) full_repo_configs_module = os.environ.get(FULL_REPO_CONFIGS_MODULE_ENV_NAME) @@ -123,6 +136,7 @@ def construct_universal_datasets( order_count=20, ) global_df = driver_test_data.create_global_daily_stats_df(start_time, end_time) + field_mapping_df = driver_test_data.create_field_mapping_df(start_time, end_time) entity_df = orders_df[ [ "customer_id", @@ -140,6 +154,7 @@ def construct_universal_datasets( "location": location_df, "orders": orders_df, "global": global_df, + "field_mapping": field_mapping_df, "entity": entity_df, } @@ -177,12 +192,20 @@ def construct_universal_data_sources( event_timestamp_column="event_timestamp", created_timestamp_column="created", ) + field_mapping_ds = data_source_creator.create_data_source( + datasets["field_mapping"], + destination_name="field_mapping", + event_timestamp_column="event_timestamp", + created_timestamp_column="created", + field_mapping={"column_name": "feature_name"}, + ) return { "customer": customer_ds, "driver": driver_ds, "location": location_ds, "orders": orders_ds, "global": global_ds, + "field_mapping": field_mapping_ds, } @@ -207,6 +230,9 @@ def construct_universal_feature_views( "driver_age_request_fv": create_driver_age_request_feature_view(), "order": create_order_feature_view(data_sources["orders"]), "location": create_location_stats_feature_view(data_sources["location"]), + "field_mapping": create_field_mapping_feature_view( + data_sources["field_mapping"] + ), } @@ -217,6 +243,7 @@ class Environment: feature_store: FeatureStore data_source_creator: DataSourceCreator python_feature_server: bool + worker_id: str end_date: datetime = field( default=datetime.utcnow().replace(microsecond=0, second=0, minute=0) @@ -225,21 +252,36 @@ class Environment: def __post_init__(self): self.start_date: datetime = self.end_date - timedelta(days=3) + def get_feature_server_endpoint(self) -> str: + if self.python_feature_server and self.test_repo_config.provider == "local": + return f"http://localhost:{self.get_local_server_port()}" + return self.feature_store.get_feature_server_endpoint() + + def get_local_server_port(self) -> int: + # Heuristic when running with xdist to extract unique ports for each worker + parsed_worker_id = re.findall("gw(\\d+)", self.worker_id) + if len(parsed_worker_id) != 0: + worker_id_num = int(parsed_worker_id[0]) + else: + worker_id_num = 0 + return 6566 + worker_id_num + def table_name_from_data_source(ds: DataSource) -> Optional[str]: if hasattr(ds, "table_ref"): - return ds.table_ref + return ds.table_ref # type: ignore elif hasattr(ds, "table"): - return ds.table + return ds.table # type: ignore return None def construct_test_environment( test_repo_config: IntegrationTestRepoConfig, test_suite_name: str = "integration_test", + worker_id: str = "worker_id", ) -> Environment: - _uuid = str(uuid.uuid4()).replace("-", "")[:8] + _uuid = str(uuid.uuid4()).replace("-", "")[:6] run_id = os.getenv("GITHUB_RUN_ID", default=None) run_id = f"gh_run_{run_id}_{_uuid}" if run_id else _uuid @@ -254,7 +296,7 @@ def construct_test_environment( repo_dir_name = tempfile.mkdtemp() - if test_repo_config.python_feature_server: + if test_repo_config.python_feature_server and test_repo_config.provider == "aws": from feast.infra.feature_servers.aws_lambda.config import ( AwsLambdaFeatureServerConfig, ) @@ -264,10 +306,15 @@ def construct_test_environment( execution_role_name="arn:aws:iam::402087665549:role/lambda_execution_role", ) - registry = f"s3://feast-integration-tests/registries/{project}/registry.db" + registry = ( + f"s3://feast-integration-tests/registries/{project}/registry.db" + ) # type: Union[str, RegistryConfig] else: + # Note: even if it's a local feature server, the repo config does not have this configured feature_server = None - registry = str(Path(repo_dir_name) / "registry.db") + registry = RegistryConfig( + path=str(Path(repo_dir_name) / "registry.db"), cache_ttl_seconds=1, + ) config = RepoConfig( registry=registry, @@ -293,6 +340,7 @@ def construct_test_environment( feature_store=fs, data_source_creator=offline_creator, python_feature_server=test_repo_config.python_feature_server, + worker_id=worker_id, ) return environment diff --git a/sdk/python/tests/integration/feature_repos/universal/data_source_creator.py b/sdk/python/tests/integration/feature_repos/universal/data_source_creator.py index e0d6983bf15..dcefa29df1e 100644 --- a/sdk/python/tests/integration/feature_repos/universal/data_source_creator.py +++ b/sdk/python/tests/integration/feature_repos/universal/data_source_creator.py @@ -5,6 +5,7 @@ from feast.data_source import DataSource from feast.repo_config import FeastConfigBaseModel +from feast.saved_dataset import SavedDatasetStorage class DataSourceCreator(ABC): @@ -40,6 +41,10 @@ def create_data_source( def create_offline_store_config(self) -> FeastConfigBaseModel: ... + @abstractmethod + def create_saved_dataset_destination(self) -> SavedDatasetStorage: + ... + @abstractmethod def teardown(self): ... diff --git a/sdk/python/tests/integration/feature_repos/universal/data_sources/bigquery.py b/sdk/python/tests/integration/feature_repos/universal/data_sources/bigquery.py index 766c31150e1..e0ac2050ea5 100644 --- a/sdk/python/tests/integration/feature_repos/universal/data_sources/bigquery.py +++ b/sdk/python/tests/integration/feature_repos/universal/data_sources/bigquery.py @@ -1,4 +1,5 @@ -from typing import Dict, Optional +import uuid +from typing import Dict, List, Optional import pandas as pd from google.cloud import bigquery @@ -7,6 +8,7 @@ from feast import BigQuerySource from feast.data_source import DataSource from feast.infra.offline_stores.bigquery import BigQueryOfflineStoreConfig +from feast.infra.offline_stores.bigquery_source import SavedDatasetBigQueryStorage from tests.integration.feature_repos.universal.data_source_creator import ( DataSourceCreator, ) @@ -21,7 +23,7 @@ def __init__(self, project_name: str): self.gcp_project = self.client.project self.dataset_id = f"{self.gcp_project}.{project_name}" - self.tables = [] + self.tables: List[str] = [] def create_dataset(self): if not self.dataset: @@ -50,7 +52,7 @@ def create_offline_store_config(self): def create_data_source( self, df: pd.DataFrame, - destination_name: Optional[str] = None, + destination_name: str, event_timestamp_column="ts", created_timestamp_column="created_ts", field_mapping: Dict[str, str] = None, @@ -79,5 +81,11 @@ def create_data_source( field_mapping=field_mapping or {"ts_1": "ts"}, ) + def create_saved_dataset_destination(self) -> SavedDatasetBigQueryStorage: + table = self.get_prefixed_table_name( + f"persisted_{str(uuid.uuid4()).replace('-', '_')}" + ) + return SavedDatasetBigQueryStorage(table_ref=table) + def get_prefixed_table_name(self, suffix: str) -> str: return f"{self.client.project}.{self.project_name}.{suffix}" diff --git a/sdk/python/tests/integration/feature_repos/universal/data_sources/file.py b/sdk/python/tests/integration/feature_repos/universal/data_sources/file.py index 0d402b23149..baa3db6afc1 100644 --- a/sdk/python/tests/integration/feature_repos/universal/data_sources/file.py +++ b/sdk/python/tests/integration/feature_repos/universal/data_sources/file.py @@ -1,4 +1,5 @@ import tempfile +import uuid from typing import Any, Dict, List, Optional import pandas as pd @@ -10,6 +11,7 @@ from feast.data_format import ParquetFormat from feast.data_source import DataSource from feast.infra.offline_stores.file import FileOfflineStoreConfig +from feast.infra.offline_stores.file_source import SavedDatasetFileStorage from feast.repo_config import FeastConfigBaseModel from tests.integration.feature_repos.universal.data_source_creator import ( DataSourceCreator, @@ -50,6 +52,12 @@ def create_data_source( field_mapping=field_mapping or {"ts_1": "ts"}, ) + def create_saved_dataset_destination(self) -> SavedDatasetFileStorage: + d = tempfile.mkdtemp(prefix=self.project_name) + return SavedDatasetFileStorage( + path=d, file_format=ParquetFormat(), s3_endpoint_override=None + ) + def get_prefixed_table_name(self, suffix: str) -> str: return f"{self.project_name}.{suffix}" @@ -127,6 +135,16 @@ def create_data_source( s3_endpoint_override=f"http://{host}:{port}", ) + def create_saved_dataset_destination(self) -> SavedDatasetFileStorage: + port = self.minio.get_exposed_port("9000") + host = self.minio.get_container_host_ip() + + return SavedDatasetFileStorage( + path=f"s3://{self.bucket}/persisted/{str(uuid.uuid4())}", + file_format=ParquetFormat(), + s3_endpoint_override=f"http://{host}:{port}", + ) + def get_prefixed_table_name(self, suffix: str) -> str: return f"{suffix}" diff --git a/sdk/python/tests/integration/feature_repos/universal/data_sources/redshift.py b/sdk/python/tests/integration/feature_repos/universal/data_sources/redshift.py index 88780f07a07..49b31263cf9 100644 --- a/sdk/python/tests/integration/feature_repos/universal/data_sources/redshift.py +++ b/sdk/python/tests/integration/feature_repos/universal/data_sources/redshift.py @@ -1,10 +1,12 @@ -from typing import Dict, Optional +import uuid +from typing import Dict, List, Optional import pandas as pd from feast import RedshiftSource from feast.data_source import DataSource from feast.infra.offline_stores.redshift import RedshiftOfflineStoreConfig +from feast.infra.offline_stores.redshift_source import SavedDatasetRedshiftStorage from feast.infra.utils import aws_utils from feast.repo_config import FeastConfigBaseModel from tests.integration.feature_repos.universal.data_source_creator import ( @@ -14,7 +16,7 @@ class RedshiftDataSourceCreator(DataSourceCreator): - tables = [] + tables: List[str] = [] def __init__(self, project_name: str): super().__init__() @@ -65,6 +67,14 @@ def create_data_source( field_mapping=field_mapping or {"ts_1": "ts"}, ) + def create_saved_dataset_destination(self) -> SavedDatasetRedshiftStorage: + table = self.get_prefixed_table_name( + f"persisted_ds_{str(uuid.uuid4()).replace('-', '_')}" + ) + self.tables.append(table) + + return SavedDatasetRedshiftStorage(table_ref=table) + def create_offline_store_config(self) -> FeastConfigBaseModel: return self.offline_store_config diff --git a/sdk/python/tests/integration/feature_repos/universal/data_sources/snowflake.py b/sdk/python/tests/integration/feature_repos/universal/data_sources/snowflake.py new file mode 100644 index 00000000000..1ecae0317bf --- /dev/null +++ b/sdk/python/tests/integration/feature_repos/universal/data_sources/snowflake.py @@ -0,0 +1,81 @@ +import os +import uuid +from typing import Dict, List, Optional + +import pandas as pd + +from feast import SnowflakeSource +from feast.data_source import DataSource +from feast.infra.offline_stores.snowflake import SnowflakeOfflineStoreConfig +from feast.infra.offline_stores.snowflake_source import SavedDatasetSnowflakeStorage +from feast.infra.utils.snowflake_utils import get_snowflake_conn, write_pandas +from feast.repo_config import FeastConfigBaseModel +from tests.integration.feature_repos.universal.data_source_creator import ( + DataSourceCreator, +) + + +class SnowflakeDataSourceCreator(DataSourceCreator): + + tables: List[str] = [] + + def __init__(self, project_name: str): + super().__init__() + self.project_name = project_name + self.offline_store_config = SnowflakeOfflineStoreConfig( + type="snowflake.offline", + account=os.environ["SNOWFLAKE_CI_DEPLOYMENT"], + user=os.environ["SNOWFLAKE_CI_USER"], + password=os.environ["SNOWFLAKE_CI_PASSWORD"], + role=os.environ["SNOWFLAKE_CI_ROLE"], + warehouse=os.environ["SNOWFLAKE_CI_WAREHOUSE"], + database="FEAST", + ) + + def create_data_source( + self, + df: pd.DataFrame, + destination_name: str, + suffix: Optional[str] = None, + event_timestamp_column="ts", + created_timestamp_column="created_ts", + field_mapping: Dict[str, str] = None, + ) -> DataSource: + + snowflake_conn = get_snowflake_conn(self.offline_store_config) + + destination_name = self.get_prefixed_table_name(destination_name) + + write_pandas(snowflake_conn, df, destination_name, auto_create_table=True) + + self.tables.append(destination_name) + + return SnowflakeSource( + table=destination_name, + event_timestamp_column=event_timestamp_column, + created_timestamp_column=created_timestamp_column, + date_partition_column="", + field_mapping=field_mapping or {"ts_1": "ts"}, + ) + + def create_saved_dataset_destination(self) -> SavedDatasetSnowflakeStorage: + table = self.get_prefixed_table_name( + f"persisted_ds_{str(uuid.uuid4()).replace('-', '_')}" + ) + self.tables.append(table) + + return SavedDatasetSnowflakeStorage(table_ref=table) + + def create_offline_store_config(self) -> FeastConfigBaseModel: + return self.offline_store_config + + def get_prefixed_table_name(self, suffix: str) -> str: + return f"{self.project_name}_{suffix}" + + def teardown(self): + snowflake_conn = get_snowflake_conn(self.offline_store_config) + + with snowflake_conn as conn: + cur = conn.cursor() + for table in self.tables: + cur.execute(f'DROP TABLE IF EXISTS "{table}"') diff --git a/sdk/python/tests/integration/feature_repos/universal/feature_views.py b/sdk/python/tests/integration/feature_repos/universal/feature_views.py index 3d19212f485..b0dc34197f3 100644 --- a/sdk/python/tests/integration/feature_repos/universal/feature_views.py +++ b/sdk/python/tests/integration/feature_repos/universal/feature_views.py @@ -20,7 +20,7 @@ def driver_feature_view( entities=["driver"], features=None if infer_features else [Feature("value", value_type)], ttl=timedelta(days=5), - input=data_source, + batch_source=data_source, ) @@ -35,7 +35,7 @@ def global_feature_view( entities=[], features=None if infer_features else [Feature("entityless_value", value_type)], ttl=timedelta(days=5), - input=data_source, + batch_source=data_source, ) @@ -217,3 +217,13 @@ def create_location_stats_feature_view(source, infer_features: bool = False): ttl=timedelta(days=2), ) return location_stats_feature_view + + +def create_field_mapping_feature_view(source): + return FeatureView( + name="field_mapping", + entities=[], + features=[Feature(name="feature_name", dtype=ValueType.INT32)], + batch_source=source, + ttl=timedelta(days=2), + ) diff --git a/sdk/python/tests/integration/offline_store/test_universal_historical_retrieval.py b/sdk/python/tests/integration/offline_store/test_universal_historical_retrieval.py index dad14ac5aad..4a396c7e4d8 100644 --- a/sdk/python/tests/integration/offline_store/test_universal_historical_retrieval.py +++ b/sdk/python/tests/integration/offline_store/test_universal_historical_retrieval.py @@ -6,7 +6,7 @@ import numpy as np import pandas as pd import pytest -from pandas.testing import assert_frame_equal +from pandas.testing import assert_frame_equal as pd_assert_frame_equal from pytz import utc from feast import utils @@ -26,6 +26,9 @@ construct_universal_feature_views, table_name_from_data_source, ) +from tests.integration.feature_repos.universal.data_sources.snowflake import ( + SnowflakeDataSourceCreator, +) from tests.integration.feature_repos.universal.entities import ( customer, driver, @@ -55,7 +58,7 @@ def find_asof_record( filter_keys = filter_keys or [] filter_values = filter_values or [] assert len(filter_keys) == len(filter_values) - found_record = {} + found_record: Dict[str, Any] = {} for record in records: if ( all( @@ -82,6 +85,8 @@ def get_expected_training_df( location_fv: FeatureView, global_df: pd.DataFrame, global_fv: FeatureView, + field_mapping_df: pd.DataFrame, + field_mapping_fv: FeatureView, entity_df: pd.DataFrame, event_timestamp: str, full_feature_names: bool = False, @@ -102,6 +107,10 @@ def get_expected_training_df( global_records = convert_timestamp_records_to_utc( global_df.to_dict("records"), global_fv.batch_source.event_timestamp_column ) + field_mapping_records = convert_timestamp_records_to_utc( + field_mapping_df.to_dict("records"), + field_mapping_fv.batch_source.event_timestamp_column, + ) entity_rows = convert_timestamp_records_to_utc( entity_df.to_dict("records"), event_timestamp ) @@ -156,6 +165,13 @@ def get_expected_training_df( ts_end=order_record[event_timestamp], ) + field_mapping_record = find_asof_record( + field_mapping_records, + ts_key=field_mapping_fv.batch_source.event_timestamp_column, + ts_start=order_record[event_timestamp] - field_mapping_fv.ttl, + ts_end=order_record[event_timestamp], + ) + entity_row.update( { ( @@ -197,6 +213,16 @@ def get_expected_training_df( } ) + # get field_mapping_record by column name, but label by feature name + entity_row.update( + { + ( + f"field_mapping__{feature}" if full_feature_names else feature + ): field_mapping_record.get(column, None) + for (column, feature) in field_mapping_fv.input.field_mapping.items() + } + ) + # Convert records back to pandas dataframe expected_df = pd.DataFrame(entity_rows) @@ -213,6 +239,7 @@ def get_expected_training_df( "customer_profile__current_balance": "float32", "customer_profile__avg_passenger_count": "float32", "global_stats__avg_ride_length": "float32", + "field_mapping__feature_name": "int32", } else: expected_column_types = { @@ -221,6 +248,7 @@ def get_expected_training_df( "current_balance": "float32", "avg_passenger_count": "float32", "avg_ride_length": "float32", + "feature_name": "int32", } for col, typ in expected_column_types.items(): @@ -239,9 +267,10 @@ def get_expected_training_df( .round() .astype(pd.Int32Dtype()) ) - expected_df[ - response_feature_name("conv_rate_plus_val_to_add", full_feature_names) - ] = (expected_df[conv_feature_name] + expected_df["val_to_add"]) + if "val_to_add" in expected_df.columns: + expected_df[ + response_feature_name("conv_rate_plus_val_to_add", full_feature_names) + ] = (expected_df[conv_feature_name] + expected_df["val_to_add"]) return expected_df @@ -255,15 +284,7 @@ def test_historical_features(environment, universal_data_sources, full_feature_n (entities, datasets, data_sources) = universal_data_sources feature_views = construct_universal_feature_views(data_sources) - customer_df, driver_df, location_df, orders_df, global_df, entity_df = ( - datasets["customer"], - datasets["driver"], - datasets["location"], - datasets["orders"], - datasets["global"], - datasets["entity"], - ) - entity_df_with_request_data = entity_df.copy(deep=True) + entity_df_with_request_data = datasets["entity"].copy(deep=True) entity_df_with_request_data["val_to_add"] = [ i for i in range(len(entity_df_with_request_data)) ] @@ -271,84 +292,55 @@ def test_historical_features(environment, universal_data_sources, full_feature_n i + 100 for i in range(len(entity_df_with_request_data)) ] - ( - customer_fv, - driver_fv, - driver_odfv, - location_fv, - order_fv, - global_fv, - driver_age_request_fv, - ) = ( - feature_views["customer"], - feature_views["driver"], - feature_views["driver_odfv"], - feature_views["location"], - feature_views["order"], - feature_views["global"], - feature_views["driver_age_request_fv"], - ) - feature_service = FeatureService( name="convrate_plus100", features=[ feature_views["driver"][["conv_rate"]], - driver_odfv, - driver_age_request_fv, + feature_views["driver_odfv"], + feature_views["driver_age_request_fv"], ], ) feature_service_entity_mapping = FeatureService( name="entity_mapping", features=[ - location_fv.with_name("origin").with_join_key_map( - {"location_id": "origin_id"} - ), - location_fv.with_name("destination").with_join_key_map( - {"location_id": "destination_id"} - ), + feature_views["location"] + .with_name("origin") + .with_join_key_map({"location_id": "origin_id"}), + feature_views["location"] + .with_name("destination") + .with_join_key_map({"location_id": "destination_id"}), ], ) - feast_objects = [] - feast_objects.extend( + store.apply( [ - customer_fv, - driver_fv, - driver_odfv, - location_fv, - order_fv, - global_fv, - driver_age_request_fv, driver(), customer(), location(), feature_service, feature_service_entity_mapping, + *feature_views.values(), ] ) - store.apply(feast_objects) - - entity_df_query = None - orders_table = table_name_from_data_source(data_sources["orders"]) - if orders_table: - entity_df_query = f"SELECT customer_id, driver_id, order_id, origin_id, destination_id, event_timestamp FROM {orders_table}" event_timestamp = ( DEFAULT_ENTITY_DF_EVENT_TIMESTAMP_COL - if DEFAULT_ENTITY_DF_EVENT_TIMESTAMP_COL in orders_df.columns + if DEFAULT_ENTITY_DF_EVENT_TIMESTAMP_COL in datasets["orders"].columns else "e_ts" ) full_expected_df = get_expected_training_df( - customer_df, - customer_fv, - driver_df, - driver_fv, - orders_df, - order_fv, - location_df, - location_fv, - global_df, - global_fv, + datasets["customer"], + feature_views["customer"], + datasets["driver"], + feature_views["driver"], + datasets["orders"], + feature_views["order"], + datasets["location"], + feature_views["location"], + datasets["global"], + feature_views["global"], + datasets["field_mapping"], + feature_views["field_mapping"], entity_df_with_request_data, event_timestamp, full_feature_names, @@ -359,76 +351,6 @@ def test_historical_features(environment, universal_data_sources, full_feature_n columns=["origin__temperature", "destination__temperature"], ) - if entity_df_query: - job_from_sql = store.get_historical_features( - entity_df=entity_df_query, - features=[ - "driver_stats:conv_rate", - "driver_stats:avg_daily_trips", - "customer_profile:current_balance", - "customer_profile:avg_passenger_count", - "customer_profile:lifetime_trip_count", - "order:order_is_success", - "global_stats:num_rides", - "global_stats:avg_ride_length", - ], - full_feature_names=full_feature_names, - ) - - start_time = datetime.utcnow() - actual_df_from_sql_entities = job_from_sql.to_df() - end_time = datetime.utcnow() - print( - str(f"\nTime to execute job_from_sql.to_df() = '{(end_time - start_time)}'") - ) - - # Not requesting the on demand transform with an entity_df query (can't add request data in them) - expected_df_query = expected_df.drop( - columns=[ - response_feature_name("conv_rate_plus_100", full_feature_names), - response_feature_name("conv_rate_plus_100_rounded", full_feature_names), - response_feature_name("conv_rate_plus_val_to_add", full_feature_names), - "val_to_add", - "driver_age", - ] - ) - assert sorted(expected_df_query.columns) == sorted( - actual_df_from_sql_entities.columns - ) - - actual_df_from_sql_entities = ( - actual_df_from_sql_entities[expected_df_query.columns] - .sort_values(by=[event_timestamp, "order_id", "driver_id", "customer_id"]) - .drop_duplicates() - .reset_index(drop=True) - ) - expected_df_query = ( - expected_df_query.sort_values( - by=[event_timestamp, "order_id", "driver_id", "customer_id"] - ) - .drop_duplicates() - .reset_index(drop=True) - ) - - assert_frame_equal( - actual_df_from_sql_entities, expected_df_query, check_dtype=False, - ) - - table_from_sql_entities = job_from_sql.to_arrow() - df_from_sql_entities = ( - table_from_sql_entities.to_pandas()[expected_df_query.columns] - .sort_values(by=[event_timestamp, "order_id", "driver_id", "customer_id"]) - .drop_duplicates() - .reset_index(drop=True) - ) - - for col in df_from_sql_entities.columns: - expected_df_query[col] = expected_df_query[col].astype( - df_from_sql_entities[col].dtype - ) - - assert_frame_equal(expected_df_query, df_from_sql_entities) - job_from_df = store.get_historical_features( entity_df=entity_df_with_request_data, features=[ @@ -444,6 +366,7 @@ def test_historical_features(environment, universal_data_sources, full_feature_n "global_stats:num_rides", "global_stats:avg_ride_length", "driver_age:driver_age", + "field_mapping:feature_name", ], full_feature_names=full_feature_names, ) @@ -456,23 +379,12 @@ def test_historical_features(environment, universal_data_sources, full_feature_n print(str(f"Time to execute job_from_df.to_df() = '{(end_time - start_time)}'\n")) assert sorted(expected_df.columns) == sorted(actual_df_from_df_entities.columns) - expected_df: pd.DataFrame = ( - expected_df.sort_values( - by=[event_timestamp, "order_id", "driver_id", "customer_id"] - ) - .drop_duplicates() - .reset_index(drop=True) - ) - actual_df_from_df_entities = ( - actual_df_from_df_entities[expected_df.columns] - .sort_values(by=[event_timestamp, "order_id", "driver_id", "customer_id"]) - .drop_duplicates() - .reset_index(drop=True) - ) - assert_frame_equal( - expected_df, actual_df_from_df_entities, check_dtype=False, + expected_df, + actual_df_from_df_entities, + keys=[event_timestamp, "order_id", "driver_id", "customer_id"], ) + assert_feature_service_correctness( store, feature_service, @@ -489,26 +401,33 @@ def test_historical_features(environment, universal_data_sources, full_feature_n full_expected_df, event_timestamp, ) - table_from_df_entities: pd.DataFrame = job_from_df.to_arrow().to_pandas() - columns_expected_in_table = expected_df.columns.tolist() - - table_from_df_entities = ( - table_from_df_entities[columns_expected_in_table] - .sort_values(by=[event_timestamp, "order_id", "driver_id", "customer_id"]) - .drop_duplicates() - .reset_index(drop=True) + assert_frame_equal( + expected_df, + table_from_df_entities, + keys=[event_timestamp, "order_id", "driver_id", "customer_id"], ) - assert_frame_equal(actual_df_from_df_entities, table_from_df_entities) + + +@pytest.mark.integration +@pytest.mark.universal +@pytest.mark.parametrize("full_feature_names", [True, False], ids=lambda v: str(v)) +def test_historical_features_with_missing_request_data( + environment, universal_data_sources, full_feature_names +): + store = environment.feature_store + + (_, datasets, data_sources) = universal_data_sources + feature_views = construct_universal_feature_views(data_sources) + + store.apply([driver(), customer(), location(), *feature_views.values()]) # If request data is missing that's needed for on demand transform, throw an error with pytest.raises(RequestDataNotFoundInEntityDfException): store.get_historical_features( - entity_df=entity_df, + entity_df=datasets["entity"], features=[ - "driver_stats:conv_rate", - "driver_stats:avg_daily_trips", "customer_profile:current_balance", "customer_profile:avg_passenger_count", "customer_profile:lifetime_trip_count", @@ -516,27 +435,204 @@ def test_historical_features(environment, universal_data_sources, full_feature_n "conv_rate_plus_100:conv_rate_plus_val_to_add", "global_stats:num_rides", "global_stats:avg_ride_length", + "field_mapping:feature_name", ], full_feature_names=full_feature_names, ) + # If request data is missing that's needed for a request feature view, throw an error with pytest.raises(RequestDataNotFoundInEntityDfException): store.get_historical_features( - entity_df=entity_df, + entity_df=datasets["entity"], features=[ - "driver_stats:conv_rate", - "driver_stats:avg_daily_trips", "customer_profile:current_balance", "customer_profile:avg_passenger_count", "customer_profile:lifetime_trip_count", "driver_age:driver_age", "global_stats:num_rides", "global_stats:avg_ride_length", + "field_mapping:feature_name", ], full_feature_names=full_feature_names, ) +@pytest.mark.integration +@pytest.mark.universal +@pytest.mark.parametrize("full_feature_names", [True, False], ids=lambda v: str(v)) +def test_historical_features_with_entities_from_query( + environment, universal_data_sources, full_feature_names +): + store = environment.feature_store + + (entities, datasets, data_sources) = universal_data_sources + feature_views = construct_universal_feature_views(data_sources) + + orders_table = table_name_from_data_source(data_sources["orders"]) + if not orders_table: + raise pytest.skip("Offline source is not sql-based") + + if ( + environment.test_repo_config.offline_store_creator.__name__ + == SnowflakeDataSourceCreator.__name__ + ): + entity_df_query = f'''SELECT "customer_id", "driver_id", "order_id", "origin_id", "destination_id", "event_timestamp" FROM "{orders_table}"''' + else: + entity_df_query = f"SELECT customer_id, driver_id, order_id, origin_id, destination_id, event_timestamp FROM {orders_table}" + + store.apply([driver(), customer(), location(), *feature_views.values()]) + + job_from_sql = store.get_historical_features( + entity_df=entity_df_query, + features=[ + "customer_profile:current_balance", + "customer_profile:avg_passenger_count", + "customer_profile:lifetime_trip_count", + "order:order_is_success", + "global_stats:num_rides", + "global_stats:avg_ride_length", + "field_mapping:feature_name", + ], + full_feature_names=full_feature_names, + ) + + start_time = datetime.utcnow() + actual_df_from_sql_entities = job_from_sql.to_df() + end_time = datetime.utcnow() + print(str(f"\nTime to execute job_from_sql.to_df() = '{(end_time - start_time)}'")) + + event_timestamp = ( + DEFAULT_ENTITY_DF_EVENT_TIMESTAMP_COL + if DEFAULT_ENTITY_DF_EVENT_TIMESTAMP_COL in datasets["orders"].columns + else "e_ts" + ) + full_expected_df = get_expected_training_df( + datasets["customer"], + feature_views["customer"], + datasets["driver"], + feature_views["driver"], + datasets["orders"], + feature_views["order"], + datasets["location"], + feature_views["location"], + datasets["global"], + feature_views["global"], + datasets["field_mapping"], + feature_views["field_mapping"], + datasets["entity"], + event_timestamp, + full_feature_names, + ) + + # Not requesting the on demand transform with an entity_df query (can't add request data in them) + expected_df_query = full_expected_df.drop( + columns=[ + response_feature_name("conv_rate_plus_100", full_feature_names), + response_feature_name("conv_rate_plus_100_rounded", full_feature_names), + response_feature_name("avg_daily_trips", full_feature_names), + response_feature_name("conv_rate", full_feature_names), + "origin__temperature", + "destination__temperature", + ] + ) + assert_frame_equal( + expected_df_query, + actual_df_from_sql_entities, + keys=[event_timestamp, "order_id", "driver_id", "customer_id"], + ) + + table_from_sql_entities = job_from_sql.to_arrow().to_pandas() + for col in table_from_sql_entities.columns: + expected_df_query[col] = expected_df_query[col].astype( + table_from_sql_entities[col].dtype + ) + + assert_frame_equal( + expected_df_query, + table_from_sql_entities, + keys=[event_timestamp, "order_id", "driver_id", "customer_id"], + ) + + +@pytest.mark.integration +@pytest.mark.universal +@pytest.mark.parametrize("full_feature_names", [True, False], ids=lambda v: str(v)) +def test_historical_features_persisting( + environment, universal_data_sources, full_feature_names +): + store = environment.feature_store + + (entities, datasets, data_sources) = universal_data_sources + feature_views = construct_universal_feature_views(data_sources) + + store.apply([driver(), customer(), location(), *feature_views.values()]) + + entity_df = datasets["entity"].drop( + columns=["order_id", "origin_id", "destination_id"] + ) + + job = store.get_historical_features( + entity_df=entity_df, + features=[ + "customer_profile:current_balance", + "customer_profile:avg_passenger_count", + "customer_profile:lifetime_trip_count", + "order:order_is_success", + "global_stats:num_rides", + "global_stats:avg_ride_length", + "field_mapping:feature_name", + ], + full_feature_names=full_feature_names, + ) + + saved_dataset = store.create_saved_dataset( + from_=job, + name="saved_dataset", + storage=environment.data_source_creator.create_saved_dataset_destination(), + tags={"env": "test"}, + ) + + event_timestamp = DEFAULT_ENTITY_DF_EVENT_TIMESTAMP_COL + expected_df = get_expected_training_df( + datasets["customer"], + feature_views["customer"], + datasets["driver"], + feature_views["driver"], + datasets["orders"], + feature_views["order"], + datasets["location"], + feature_views["location"], + datasets["global"], + feature_views["global"], + datasets["field_mapping"], + feature_views["field_mapping"], + entity_df, + event_timestamp, + full_feature_names, + ).drop( + columns=[ + response_feature_name("conv_rate_plus_100", full_feature_names), + response_feature_name("conv_rate_plus_100_rounded", full_feature_names), + response_feature_name("avg_daily_trips", full_feature_names), + response_feature_name("conv_rate", full_feature_names), + "origin__temperature", + "destination__temperature", + ] + ) + + assert_frame_equal( + expected_df, + saved_dataset.to_df(), + keys=[event_timestamp, "driver_id", "customer_id"], + ) + + assert_frame_equal( + job.to_df(), + saved_dataset.to_df(), + keys=[event_timestamp, "driver_id", "customer_id"], + ) + + @pytest.mark.integration @pytest.mark.universal def test_historical_features_from_bigquery_sources_containing_backfills(environment): @@ -630,13 +726,7 @@ def test_historical_features_from_bigquery_sources_containing_backfills(environm print(str(f"Time to execute job_from_df.to_df() = '{(end_time - start_time)}'\n")) assert sorted(expected_df.columns) == sorted(actual_df.columns) - assert_frame_equal( - expected_df.sort_values(by=["driver_id"]).reset_index(drop=True), - actual_df[expected_df.columns] - .sort_values(by=["driver_id"]) - .reset_index(drop=True), - check_dtype=False, - ) + assert_frame_equal(expected_df, actual_df, keys=["driver_id"]) def response_feature_name(feature: str, full_feature_names: bool) -> str: @@ -669,13 +759,6 @@ def assert_feature_service_correctness( actual_df_from_df_entities = job_from_df.to_df() - expected_df: pd.DataFrame = ( - expected_df.sort_values( - by=[event_timestamp, "order_id", "driver_id", "customer_id"] - ) - .drop_duplicates() - .reset_index(drop=True) - ) expected_df = expected_df[ [ event_timestamp, @@ -687,15 +770,11 @@ def assert_feature_service_correctness( "driver_age", ] ] - actual_df_from_df_entities = ( - actual_df_from_df_entities[expected_df.columns] - .sort_values(by=[event_timestamp, "order_id", "driver_id", "customer_id"]) - .drop_duplicates() - .reset_index(drop=True) - ) assert_frame_equal( - expected_df, actual_df_from_df_entities, check_dtype=False, + expected_df, + actual_df_from_df_entities, + keys=[event_timestamp, "order_id", "driver_id", "customer_id"], ) @@ -736,24 +815,18 @@ def assert_feature_service_entity_mapping_correctness( "destination__temperature", ] ] - actual_df_from_df_entities = ( - actual_df_from_df_entities[expected_df.columns] - .sort_values( - by=[ - event_timestamp, - "order_id", - "driver_id", - "customer_id", - "origin_id", - "destination_id", - ] - ) - .drop_duplicates() - .reset_index(drop=True) - ) assert_frame_equal( - expected_df, actual_df_from_df_entities, check_dtype=False, + expected_df, + actual_df_from_df_entities, + keys=[ + event_timestamp, + "order_id", + "driver_id", + "customer_id", + "origin_id", + "destination_id", + ], ) else: # using 2 of the same FeatureView without full_feature_names=True will result in collision @@ -763,3 +836,20 @@ def assert_feature_service_entity_mapping_correctness( features=feature_service, full_feature_names=full_feature_names, ) + + +def assert_frame_equal(expected_df, actual_df, keys): + expected_df: pd.DataFrame = ( + expected_df.sort_values(by=keys).drop_duplicates().reset_index(drop=True) + ) + + actual_df = ( + actual_df[expected_df.columns] + .sort_values(by=keys) + .drop_duplicates() + .reset_index(drop=True) + ) + + pd_assert_frame_equal( + expected_df, actual_df, check_dtype=False, + ) diff --git a/sdk/python/tests/integration/online_store/test_e2e_local.py b/sdk/python/tests/integration/online_store/test_e2e_local.py index dd900e90dc0..79902273440 100644 --- a/sdk/python/tests/integration/online_store/test_e2e_local.py +++ b/sdk/python/tests/integration/online_store/test_e2e_local.py @@ -40,7 +40,14 @@ def _assert_online_features( # Float features should still be floats from the online store... assert ( - response.field_values[0].fields["driver_hourly_stats__conv_rate"].float_val > 0 + response.proto.results[0] + .values[ + list(response.proto.metadata.feature_names.val).index( + "driver_hourly_stats__conv_rate" + ) + ] + .float_val + > 0 ) result = response.to_dict() diff --git a/sdk/python/tests/integration/online_store/test_online_retrieval.py b/sdk/python/tests/integration/online_store/test_online_retrieval.py index b94f6f1772c..265fedd2826 100644 --- a/sdk/python/tests/integration/online_store/test_online_retrieval.py +++ b/sdk/python/tests/integration/online_store/test_online_retrieval.py @@ -54,7 +54,7 @@ def test_online() -> None: ) customer_key = EntityKeyProto( - join_keys=["customer"], entity_values=[ValueProto(int64_val=5)] + join_keys=["customer"], entity_values=[ValueProto(string_val="5")] ) provider.online_write_batch( config=store.config, @@ -76,7 +76,7 @@ def test_online() -> None: customer_key = EntityKeyProto( join_keys=["customer", "driver"], - entity_values=[ValueProto(int64_val=5), ValueProto(int64_val=1)], + entity_values=[ValueProto(string_val="5"), ValueProto(int64_val=1)], ) provider.online_write_batch( config=store.config, @@ -100,7 +100,7 @@ def test_online() -> None: "customer_profile:name", "customer_driver_combined:trips", ], - entity_rows=[{"driver": 1, "customer": 5}, {"driver": 1, "customer": 5}], + entity_rows=[{"driver": 1, "customer": "5"}, {"driver": 1, "customer": 5}], full_feature_names=False, ).to_dict() @@ -108,7 +108,7 @@ def test_online() -> None: assert "avg_orders_day" in result assert "name" in result assert result["driver"] == [1, 1] - assert result["customer"] == [5, 5] + assert result["customer"] == ["5", "5"] assert result["lon"] == ["1.0", "1.0"] assert result["avg_orders_day"] == [1.0, 1.0] assert result["name"] == ["John", "John"] @@ -311,7 +311,7 @@ def test_online_to_df(): 6 6.0 foo6 60 """ customer_key = EntityKeyProto( - join_keys=["customer"], entity_values=[ValueProto(int64_val=c)] + join_keys=["customer"], entity_values=[ValueProto(string_val=str(c))] ) provider.online_write_batch( config=store.config, @@ -341,7 +341,7 @@ def test_online_to_df(): """ combo_keys = EntityKeyProto( join_keys=["customer", "driver"], - entity_values=[ValueProto(int64_val=c), ValueProto(int64_val=d)], + entity_values=[ValueProto(string_val=str(c)), ValueProto(int64_val=d)], ) provider.online_write_batch( config=store.config, @@ -382,7 +382,7 @@ def test_online_to_df(): """ df_dict = { "driver": driver_ids, - "customer": customer_ids, + "customer": [str(c) for c in customer_ids], "lon": [str(d * lon_multiply) for d in driver_ids], "lat": [d * lat_multiply for d in driver_ids], "avg_orders_day": [c * avg_order_day_multiply for c in customer_ids], diff --git a/sdk/python/tests/integration/online_store/test_universal_online.py b/sdk/python/tests/integration/online_store/test_universal_online.py index c47f2bbfd07..f483d54f6b3 100644 --- a/sdk/python/tests/integration/online_store/test_universal_online.py +++ b/sdk/python/tests/integration/online_store/test_universal_online.py @@ -28,6 +28,7 @@ ) from tests.integration.feature_repos.universal.feature_views import ( create_driver_hourly_stats_feature_view, + driver_feature_view, ) from tests.utils.data_source_utils import prep_file_source @@ -185,7 +186,7 @@ def _get_online_features_dict_remotely( The output should be identical to: - >>> fs.get_online_features(features=features, entity_rows=entity_rows, full_feature_names=full_feature_names).to_dict() + fs.get_online_features(features=features, entity_rows=entity_rows, full_feature_names=full_feature_names).to_dict() This makes it easy to test the remote feature server by comparing the output to the local method. @@ -212,11 +213,15 @@ def _get_online_features_dict_remotely( time.sleep(1) else: raise Exception("Failed to get online features from remote feature server") - keys = response["field_values"][0]["statuses"].keys() + if "metadata" not in response: + raise Exception( + f"Failed to get online features from remote feature server {response}" + ) + keys = response["metadata"]["feature_names"] # Get rid of unnecessary structure in the response, leaving list of dicts - response = [row["fields"] for row in response["field_values"]] + response = [row["values"] for row in response["results"]] # Convert list of dicts (response) into dict of lists which is the format of the return value - return {key: [row.get(key) for row in response] for key in keys} + return {key: [row[idx] for row in response] for idx, key in enumerate(keys)} def get_online_features_dict( @@ -238,8 +243,8 @@ def get_online_features_dict( assertpy.assert_that(online_features).is_not_none() dict1 = online_features.to_dict() - endpoint = environment.feature_store.get_feature_server_endpoint() - # If endpoint is None, it means that the remote feature server isn't configured + endpoint = environment.get_feature_server_endpoint() + # If endpoint is None, it means that a local / remote feature server aren't configured if endpoint is not None: dict2 = _get_online_features_dict_remotely( endpoint=endpoint, @@ -499,6 +504,79 @@ def test_online_retrieval(environment, universal_data_sources, full_feature_name ) +@pytest.mark.integration +@pytest.mark.universal +def test_online_store_cleanup(environment, universal_data_sources): + """ + Some online store implementations (like Redis) keep features from different features views + but with common entities together. + This might end up with deletion of all features attached to the entity, + when only one feature view was deletion target (see https://github.com/feast-dev/feast/issues/2150). + + Plan: + 1. Register two feature views with common entity "driver" + 2. Materialize data + 3. Check if features are available (via online retrieval) + 4. Delete one feature view + 5. Check that features for other are still available + 6. Delete another feature view (and create again) + 7. Verify that features for both feature view were deleted + """ + fs = environment.feature_store + entities, datasets, data_sources = universal_data_sources + driver_stats_fv = construct_universal_feature_views(data_sources)["driver"] + + df = pd.DataFrame( + { + "ts_1": [environment.end_date] * len(entities["driver"]), + "created_ts": [environment.end_date] * len(entities["driver"]), + "driver_id": entities["driver"], + "value": np.random.random(size=len(entities["driver"])), + } + ) + + ds = environment.data_source_creator.create_data_source( + df, destination_name="simple_driver_dataset" + ) + + simple_driver_fv = driver_feature_view( + data_source=ds, name="test_universal_online_simple_driver" + ) + + fs.apply([driver(), simple_driver_fv, driver_stats_fv]) + + fs.materialize( + environment.start_date - timedelta(days=1), + environment.end_date + timedelta(days=1), + ) + expected_values = df.sort_values(by="driver_id") + + features = [f"{simple_driver_fv.name}:value"] + entity_rows = [{"driver": driver_id} for driver_id in sorted(entities["driver"])] + + online_features = fs.get_online_features( + features=features, entity_rows=entity_rows + ).to_dict() + assert np.allclose(expected_values["value"], online_features["value"]) + + fs.apply( + objects=[simple_driver_fv], objects_to_delete=[driver_stats_fv], partial=False + ) + + online_features = fs.get_online_features( + features=features, entity_rows=entity_rows + ).to_dict() + assert np.allclose(expected_values["value"], online_features["value"]) + + fs.apply(objects=[], objects_to_delete=[simple_driver_fv], partial=False) + fs.apply([simple_driver_fv]) + + online_features = fs.get_online_features( + features=features, entity_rows=entity_rows + ).to_dict() + assert all(v is None for v in online_features["value"]) + + def response_feature_name(feature: str, full_feature_names: bool) -> str: if ( feature in {"current_balance", "avg_passenger_count", "lifetime_trip_count"} diff --git a/sdk/python/tests/integration/registration/test_cli.py b/sdk/python/tests/integration/registration/test_cli.py index b92dc52642d..bba12056ce8 100644 --- a/sdk/python/tests/integration/registration/test_cli.py +++ b/sdk/python/tests/integration/registration/test_cli.py @@ -1,18 +1,32 @@ +import os import tempfile import uuid from contextlib import contextmanager -from pathlib import Path, PosixPath +from pathlib import Path from textwrap import dedent +from typing import List import pytest import yaml from assertpy import assertpy from feast import FeatureStore, RepoConfig +from tests.integration.feature_repos.integration_test_repo_config import ( + IntegrationTestRepoConfig, +) from tests.integration.feature_repos.repo_configuration import FULL_REPO_CONFIGS from tests.integration.feature_repos.universal.data_source_creator import ( DataSourceCreator, ) +from tests.integration.feature_repos.universal.data_sources.bigquery import ( + BigQueryDataSourceCreator, +) +from tests.integration.feature_repos.universal.data_sources.file import ( + FileDataSourceCreator, +) +from tests.integration.feature_repos.universal.data_sources.redshift import ( + RedshiftDataSourceCreator, +) from tests.utils.cli_utils import CliRunner, get_example_repo from tests.utils.online_read_write_test import basic_rw_test @@ -21,15 +35,14 @@ @pytest.mark.parametrize("test_repo_config", FULL_REPO_CONFIGS) def test_universal_cli(test_repo_config) -> None: project = f"test_universal_cli_{str(uuid.uuid4()).replace('-', '')[:8]}" - runner = CliRunner() with tempfile.TemporaryDirectory() as repo_dir_name: try: + repo_path = Path(repo_dir_name) feature_store_yaml = make_feature_store_yaml( - project, test_repo_config, repo_dir_name + project, test_repo_config, repo_path ) - repo_path = Path(repo_dir_name) repo_config = repo_path / "feature_store.yaml" @@ -44,6 +57,12 @@ def test_universal_cli(test_repo_config) -> None: fs = FeatureStore(repo_path=str(repo_path)) registry_dict = fs.registry.to_dict(project=project) + # Save only the specs, not the metadata. + registry_specs = { + key: [fco["spec"] for fco in value] + for key, value in registry_dict.items() + } + # entity & feature view list commands should succeed result = runner.run(["entities", "list"], cwd=repo_path) assertpy.assert_that(result.returncode).is_equal_to(0) @@ -83,8 +102,12 @@ def test_universal_cli(test_repo_config) -> None: ) # Confirm that registry contents have not changed. - assertpy.assert_that(registry_dict).is_equal_to( - fs.registry.to_dict(project=project) + registry_dict = fs.registry.to_dict(project=project) + assertpy.assert_that(registry_specs).is_equal_to( + { + key: [fco["spec"] for fco in value] + for key, value in registry_dict.items() + } ) result = runner.run(["teardown"], cwd=repo_path) @@ -93,7 +116,7 @@ def test_universal_cli(test_repo_config) -> None: runner.run(["teardown"], cwd=repo_path) -def make_feature_store_yaml(project, test_repo_config, repo_dir_name: PosixPath): +def make_feature_store_yaml(project, test_repo_config, repo_dir_name: Path): offline_creator: DataSourceCreator = test_repo_config.offline_store_creator(project) offline_store_config = offline_creator.create_offline_store_config() @@ -118,6 +141,56 @@ def make_feature_store_yaml(project, test_repo_config, repo_dir_name: PosixPath) return yaml.safe_dump(config_dict) +NULLABLE_ONLINE_STORE_CONFIGS: List[IntegrationTestRepoConfig] = [ + IntegrationTestRepoConfig( + provider="local", + offline_store_creator=FileDataSourceCreator, + online_store=None, + ), +] + +if os.getenv("FEAST_IS_LOCAL_TEST", "False") == "True": + NULLABLE_ONLINE_STORE_CONFIGS.extend( + [ + IntegrationTestRepoConfig( + provider="gcp", + offline_store_creator=BigQueryDataSourceCreator, + online_store=None, + ), + IntegrationTestRepoConfig( + provider="aws", + offline_store_creator=RedshiftDataSourceCreator, + online_store=None, + ), + ] + ) + + +@pytest.mark.integration +@pytest.mark.parametrize("test_nullable_online_store", NULLABLE_ONLINE_STORE_CONFIGS) +def test_nullable_online_store(test_nullable_online_store) -> None: + project = f"test_nullable_online_store{str(uuid.uuid4()).replace('-', '')[:8]}" + runner = CliRunner() + + with tempfile.TemporaryDirectory() as repo_dir_name: + try: + repo_path = Path(repo_dir_name) + feature_store_yaml = make_feature_store_yaml( + project, test_nullable_online_store, repo_path + ) + + repo_config = repo_path / "feature_store.yaml" + + repo_config.write_text(dedent(feature_store_yaml)) + + repo_example = repo_path / "example.py" + repo_example.write_text(get_example_repo("example_feature_repo_1.py")) + result = runner.run(["apply"], cwd=repo_path) + assertpy.assert_that(result.returncode).is_equal_to(0) + finally: + runner.run(["teardown"], cwd=repo_path) + + @contextmanager def setup_third_party_provider_repo(provider_name: str): with tempfile.TemporaryDirectory() as repo_dir_name: @@ -201,14 +274,14 @@ def test_3rd_party_providers() -> None: return_code, output = runner.run_with_output(["apply"], cwd=repo_path) assertpy.assert_that(return_code).is_equal_to(1) assertpy.assert_that(output).contains( - b"Could not import Provider module 'feast_foo'" + b"Could not import module 'feast_foo' while attempting to load class 'Provider'" ) # Check with incorrect third-party provider name (with dots) with setup_third_party_provider_repo("foo.FooProvider") as repo_path: return_code, output = runner.run_with_output(["apply"], cwd=repo_path) assertpy.assert_that(return_code).is_equal_to(1) assertpy.assert_that(output).contains( - b"Could not import Provider 'FooProvider' from module 'foo'" + b"Could not import class 'FooProvider' from module 'foo'" ) # Check with correct third-party provider name with setup_third_party_provider_repo("foo.provider.FooProvider") as repo_path: @@ -233,14 +306,14 @@ def test_3rd_party_registry_store() -> None: return_code, output = runner.run_with_output(["apply"], cwd=repo_path) assertpy.assert_that(return_code).is_equal_to(1) assertpy.assert_that(output).contains( - b"Could not import RegistryStore module 'feast_foo'" + b"Could not import module 'feast_foo' while attempting to load class 'RegistryStore'" ) # Check with incorrect third-party registry store name (with dots) with setup_third_party_registry_store_repo("foo.FooRegistryStore") as repo_path: return_code, output = runner.run_with_output(["apply"], cwd=repo_path) assertpy.assert_that(return_code).is_equal_to(1) assertpy.assert_that(output).contains( - b"Could not import RegistryStore 'FooRegistryStore' from module 'foo'" + b"Could not import class 'FooRegistryStore' from module 'foo'" ) # Check with correct third-party registry store name with setup_third_party_registry_store_repo( diff --git a/sdk/python/tests/integration/registration/test_inference.py b/sdk/python/tests/integration/registration/test_inference.py index 14aa1e13ad5..ca5f56c435e 100644 --- a/sdk/python/tests/integration/registration/test_inference.py +++ b/sdk/python/tests/integration/registration/test_inference.py @@ -3,7 +3,7 @@ from feast import Entity, Feature, RepoConfig, ValueType from feast.data_source import RequestDataSource -from feast.errors import RegistryInferenceFailure +from feast.errors import RegistryInferenceFailure, SpecifiedFeaturesNotPresentError from feast.feature_view import FeatureView from feast.inference import ( update_data_sources_with_inferred_event_timestamp_col, @@ -86,7 +86,7 @@ def test_update_data_sources_with_inferred_event_timestamp_col(simple_dataset_1) ) -def test_modify_feature_views_success(): +def test_on_demand_features_type_inference(): # Create Feature Views date_request = RequestDataSource( name="date_request", schema={"some_date": ValueType.UNIX_TIMESTAMP} @@ -94,11 +94,46 @@ def test_modify_feature_views_success(): @on_demand_feature_view( inputs={"date_request": date_request}, - features=[Feature("output", ValueType.UNIX_TIMESTAMP)], + features=[ + Feature("output", ValueType.UNIX_TIMESTAMP), + Feature("string_output", ValueType.STRING), + ], ) def test_view(features_df: pd.DataFrame) -> pd.DataFrame: data = pd.DataFrame() data["output"] = features_df["some_date"] + data["string_output"] = features_df["some_date"].astype(pd.StringDtype()) return data test_view.infer_features() + + @on_demand_feature_view( + inputs={"date_request": date_request}, + features=[ + Feature("output", ValueType.UNIX_TIMESTAMP), + Feature("object_output", ValueType.STRING), + ], + ) + def invalid_test_view(features_df: pd.DataFrame) -> pd.DataFrame: + data = pd.DataFrame() + data["output"] = features_df["some_date"] + data["object_output"] = features_df["some_date"].astype(str) + return data + + with pytest.raises(ValueError, match="Value with native type object"): + invalid_test_view.infer_features() + + @on_demand_feature_view( + inputs={"date_request": date_request}, + features=[ + Feature("output", ValueType.UNIX_TIMESTAMP), + Feature("missing", ValueType.STRING), + ], + ) + def test_view_with_missing_feature(features_df: pd.DataFrame) -> pd.DataFrame: + data = pd.DataFrame() + data["output"] = features_df["some_date"] + return data + + with pytest.raises(SpecifiedFeaturesNotPresentError): + test_view_with_missing_feature.infer_features() diff --git a/sdk/python/tests/integration/registration/test_universal_types.py b/sdk/python/tests/integration/registration/test_universal_types.py index c007d56c35d..663ba55ccb3 100644 --- a/sdk/python/tests/integration/registration/test_universal_types.py +++ b/sdk/python/tests/integration/registration/test_universal_types.py @@ -1,10 +1,11 @@ import logging from dataclasses import dataclass from datetime import datetime, timedelta -from typing import List +from typing import Any, Dict, List, Tuple, Union import numpy as np import pandas as pd +import pyarrow as pa import pytest from feast.infra.offline_stores.offline_store import RetrievalJob @@ -28,6 +29,7 @@ def populate_test_configs(offline: bool): (ValueType.INT64, "int64"), (ValueType.STRING, "float"), (ValueType.STRING, "bool"), + (ValueType.INT32, "datetime"), ] configs: List[TypeTestConfig] = [] for test_repo_config in FULL_REPO_CONFIGS: @@ -217,7 +219,7 @@ def test_feature_get_online_features_types_match(online_types_test_fixtures): ) fs = environment.feature_store features = [fv.name + ":value"] - entity = driver(value_type=ValueType.UNKNOWN) + entity = driver(value_type=config.entity_type) fs.apply([fv, entity]) fs.materialize(environment.start_date, environment.end_date) @@ -232,6 +234,7 @@ def test_feature_get_online_features_types_match(online_types_test_fixtures): "float": float, "string": str, "bool": bool, + "datetime": datetime, } expected_dtype = feature_list_dtype_to_expected_online_response_value_type[ config.feature_dtype @@ -258,6 +261,8 @@ def create_feature_view( value_type = ValueType.FLOAT_LIST elif feature_dtype == "bool": value_type = ValueType.BOOL_LIST + elif feature_dtype == "datetime": + value_type = ValueType.UNIX_TIMESTAMP_LIST else: if feature_dtype == "int32": value_type = ValueType.INT32 @@ -267,6 +272,8 @@ def create_feature_view( value_type = ValueType.FLOAT elif feature_dtype == "bool": value_type = ValueType.BOOL + elif feature_dtype == "datetime": + value_type = ValueType.UNIX_TIMESTAMP return driver_feature_view(data_source, name=name, value_type=value_type,) @@ -281,6 +288,7 @@ def assert_expected_historical_feature_types( "float": (pd.api.types.is_float_dtype,), "string": (pd.api.types.is_string_dtype,), "bool": (pd.api.types.is_bool_dtype, pd.api.types.is_object_dtype), + "datetime": (pd.api.types.is_datetime64_any_dtype,), } dtype_checkers = feature_dtype_to_expected_historical_feature_dtype[feature_dtype] assert any( @@ -292,7 +300,9 @@ def assert_feature_list_types( provider: str, feature_dtype: str, historical_features_df: pd.DataFrame ): print("Asserting historical feature list types") - feature_list_dtype_to_expected_historical_feature_list_dtype = { + feature_list_dtype_to_expected_historical_feature_list_dtype: Dict[ + str, Union[type, Tuple[Union[type, Tuple[Any, ...]], ...]] + ] = { "int32": ( int, np.int64, @@ -307,6 +317,7 @@ def assert_feature_list_types( bool, np.bool_, ), # Can be `np.bool_` if from `np.array` rather that `list` + "datetime": np.datetime64, } expected_dtype = feature_list_dtype_to_expected_historical_feature_list_dtype[ feature_dtype @@ -328,22 +339,21 @@ def assert_expected_arrow_types( historical_features_arrow = historical_features.to_arrow() print(historical_features_arrow) feature_list_dtype_to_expected_historical_feature_arrow_type = { - "int32": "int64", - "int64": "int64", - "float": "double", - "string": "string", - "bool": "bool", + "int32": pa.types.is_int64, + "int64": pa.types.is_int64, + "float": pa.types.is_float64, + "string": pa.types.is_string, + "bool": pa.types.is_boolean, + "date": pa.types.is_date, + "datetime": pa.types.is_timestamp, } - arrow_type = feature_list_dtype_to_expected_historical_feature_arrow_type[ + arrow_type_checker = feature_list_dtype_to_expected_historical_feature_arrow_type[ feature_dtype ] + pa_type = historical_features_arrow.schema.field("value").type + if feature_is_list: - assert ( - str(historical_features_arrow.schema.field_by_name("value").type) - == f"list" - ) + assert pa.types.is_list(pa_type) + assert arrow_type_checker(pa_type.value_type) else: - assert ( - str(historical_features_arrow.schema.field_by_name("value").type) - == arrow_type - ) + assert arrow_type_checker(pa_type) diff --git a/sdk/python/tests/integration/scaffolding/test_repo_config.py b/sdk/python/tests/integration/scaffolding/test_repo_config.py index dfa80cb6186..3ec91c0044c 100644 --- a/sdk/python/tests/integration/scaffolding/test_repo_config.py +++ b/sdk/python/tests/integration/scaffolding/test_repo_config.py @@ -34,6 +34,49 @@ def _test_config(config_text, expect_error: Optional[str]): return rc +def test_nullable_online_store_aws(): + _test_config( + dedent( + """ + project: foo + registry: "registry.db" + provider: aws + online_store: null + """ + ), + expect_error="__root__ -> offline_store -> cluster_id\n" + " field required (type=value_error.missing)", + ) + + +def test_nullable_online_store_gcp(): + _test_config( + dedent( + """ + project: foo + registry: "registry.db" + provider: gcp + online_store: null + """ + ), + expect_error=None, + ) + + +def test_nullable_online_store_local(): + _test_config( + dedent( + """ + project: foo + registry: "registry.db" + provider: local + online_store: null + """ + ), + expect_error=None, + ) + + def test_local_config(): _test_config( dedent( diff --git a/sdk/python/tests/unit/diff/test_infra_diff.py b/sdk/python/tests/unit/diff/test_infra_diff.py new file mode 100644 index 00000000000..8e3d5b765f0 --- /dev/null +++ b/sdk/python/tests/unit/diff/test_infra_diff.py @@ -0,0 +1,154 @@ +from google.protobuf import wrappers_pb2 as wrappers + +from feast.diff.infra_diff import ( + diff_between, + diff_infra_protos, + tag_infra_proto_objects_for_keep_delete_add, +) +from feast.diff.property_diff import TransitionType +from feast.infra.online_stores.datastore import DatastoreTable +from feast.infra.online_stores.dynamodb import DynamoDBTable +from feast.protos.feast.core.InfraObject_pb2 import Infra as InfraProto + + +def test_tag_infra_proto_objects_for_keep_delete_add(): + to_delete = DynamoDBTable(name="to_delete", region="us-west-2").to_proto() + to_add = DynamoDBTable(name="to_add", region="us-west-2").to_proto() + unchanged_table = DynamoDBTable(name="unchanged", region="us-west-2").to_proto() + pre_changed = DynamoDBTable(name="table", region="us-west-2").to_proto() + post_changed = DynamoDBTable(name="table", region="us-east-2").to_proto() + + keep, delete, add = tag_infra_proto_objects_for_keep_delete_add( + [to_delete, unchanged_table, pre_changed], + [to_add, unchanged_table, post_changed], + ) + + assert len(list(keep)) == 2 + assert unchanged_table in keep + assert post_changed in keep + assert to_add not in keep + assert len(list(delete)) == 1 + assert to_delete in delete + assert unchanged_table not in delete + assert pre_changed not in delete + assert len(list(add)) == 1 + assert to_add in add + assert unchanged_table not in add + assert post_changed not in add + + +def test_diff_between_datastore_tables(): + pre_changed = DatastoreTable( + project="test", name="table", project_id="pre", namespace="pre" + ).to_proto() + post_changed = DatastoreTable( + project="test", name="table", project_id="post", namespace="post" + ).to_proto() + + infra_object_diff = diff_between(pre_changed, pre_changed, "datastore table") + infra_object_property_diffs = infra_object_diff.infra_object_property_diffs + assert len(infra_object_property_diffs) == 0 + + infra_object_diff = diff_between(pre_changed, post_changed, "datastore table") + infra_object_property_diffs = infra_object_diff.infra_object_property_diffs + assert len(infra_object_property_diffs) == 2 + + assert infra_object_property_diffs[0].property_name == "project_id" + assert infra_object_property_diffs[0].val_existing == wrappers.StringValue( + value="pre" + ) + assert infra_object_property_diffs[0].val_declared == wrappers.StringValue( + value="post" + ) + assert infra_object_property_diffs[1].property_name == "namespace" + assert infra_object_property_diffs[1].val_existing == wrappers.StringValue( + value="pre" + ) + assert infra_object_property_diffs[1].val_declared == wrappers.StringValue( + value="post" + ) + + +def test_diff_infra_protos(): + to_delete = DynamoDBTable(name="to_delete", region="us-west-2") + to_add = DynamoDBTable(name="to_add", region="us-west-2") + unchanged_table = DynamoDBTable(name="unchanged", region="us-west-2") + pre_changed = DatastoreTable( + project="test", name="table", project_id="pre", namespace="pre" + ) + post_changed = DatastoreTable( + project="test", name="table", project_id="post", namespace="post" + ) + + infra_objects_before = [to_delete, unchanged_table, pre_changed] + infra_objects_after = [to_add, unchanged_table, post_changed] + + infra_proto_before = InfraProto() + infra_proto_before.infra_objects.extend( + [obj.to_infra_object_proto() for obj in infra_objects_before] + ) + + infra_proto_after = InfraProto() + infra_proto_after.infra_objects.extend( + [obj.to_infra_object_proto() for obj in infra_objects_after] + ) + + infra_diff = diff_infra_protos(infra_proto_before, infra_proto_after) + infra_object_diffs = infra_diff.infra_object_diffs + + # There should be one addition, one deletion, one unchanged, and one changed. + assert len(infra_object_diffs) == 4 + + additions = [ + infra_object_diff + for infra_object_diff in infra_object_diffs + if infra_object_diff.transition_type == TransitionType.CREATE + ] + assert len(additions) == 1 + assert not additions[0].current_infra_object + assert additions[0].new_infra_object == to_add.to_proto() + assert len(additions[0].infra_object_property_diffs) == 0 + + deletions = [ + infra_object_diff + for infra_object_diff in infra_object_diffs + if infra_object_diff.transition_type == TransitionType.DELETE + ] + assert len(deletions) == 1 + assert deletions[0].current_infra_object == to_delete.to_proto() + assert not deletions[0].new_infra_object + assert len(deletions[0].infra_object_property_diffs) == 0 + + unchanged = [ + infra_object_diff + for infra_object_diff in infra_object_diffs + if infra_object_diff.transition_type == TransitionType.UNCHANGED + ] + assert len(unchanged) == 1 + assert unchanged[0].current_infra_object == unchanged_table.to_proto() + assert unchanged[0].new_infra_object == unchanged_table.to_proto() + assert len(unchanged[0].infra_object_property_diffs) == 0 + + updates = [ + infra_object_diff + for infra_object_diff in infra_object_diffs + if infra_object_diff.transition_type == TransitionType.UPDATE + ] + assert len(updates) == 1 + assert updates[0].current_infra_object == pre_changed.to_proto() + assert updates[0].new_infra_object == post_changed.to_proto() + assert len(updates[0].infra_object_property_diffs) == 2 + assert updates[0].infra_object_property_diffs[0].property_name == "project_id" + assert updates[0].infra_object_property_diffs[ + 0 + ].val_existing == wrappers.StringValue(value="pre") + assert updates[0].infra_object_property_diffs[ + 0 + ].val_declared == wrappers.StringValue(value="post") + assert updates[0].infra_object_property_diffs[1].property_name == "namespace" + assert updates[0].infra_object_property_diffs[ + 1 + ].val_existing == wrappers.StringValue(value="pre") + assert updates[0].infra_object_property_diffs[ + 1 + ].val_declared == wrappers.StringValue(value="post") diff --git a/sdk/python/tests/unit/diff/test_fco_diff.py b/sdk/python/tests/unit/diff/test_registry_diff.py similarity index 56% rename from sdk/python/tests/unit/diff/test_fco_diff.py rename to sdk/python/tests/unit/diff/test_registry_diff.py index 802a6438c3c..0322ab47abf 100644 --- a/sdk/python/tests/unit/diff/test_fco_diff.py +++ b/sdk/python/tests/unit/diff/test_registry_diff.py @@ -1,51 +1,58 @@ -from feast.diff.FcoDiff import diff_between, tag_proto_objects_for_keep_delete_add +from feast.diff.registry_diff import ( + diff_registry_objects, + tag_objects_for_keep_delete_update_add, +) from feast.feature_view import FeatureView from tests.utils.data_source_utils import prep_file_source -def test_tag_proto_objects_for_keep_delete_add(simple_dataset_1): +def test_tag_objects_for_keep_delete_update_add(simple_dataset_1): with prep_file_source( df=simple_dataset_1, event_timestamp_column="ts_1" ) as file_source: to_delete = FeatureView( name="to_delete", entities=["id"], batch_source=file_source, ttl=None, - ).to_proto() + ) unchanged_fv = FeatureView( name="fv1", entities=["id"], batch_source=file_source, ttl=None, - ).to_proto() + ) pre_changed = FeatureView( name="fv2", entities=["id"], batch_source=file_source, ttl=None, tags={"when": "before"}, - ).to_proto() + ) post_changed = FeatureView( name="fv2", entities=["id"], batch_source=file_source, ttl=None, tags={"when": "after"}, - ).to_proto() + ) to_add = FeatureView( name="to_add", entities=["id"], batch_source=file_source, ttl=None, - ).to_proto() + ) - keep, delete, add = tag_proto_objects_for_keep_delete_add( + keep, delete, update, add = tag_objects_for_keep_delete_update_add( [unchanged_fv, pre_changed, to_delete], [unchanged_fv, post_changed, to_add] ) assert len(list(keep)) == 2 assert unchanged_fv in keep - assert post_changed in keep - assert pre_changed not in keep + assert pre_changed in keep + assert post_changed not in keep assert len(list(delete)) == 1 assert to_delete in delete + assert len(list(update)) == 2 + assert unchanged_fv in update + assert post_changed in update + assert pre_changed not in update assert len(list(add)) == 1 assert to_add in add -def test_diff_between_feature_views(simple_dataset_1): +def test_diff_registry_objects_feature_views(simple_dataset_1): with prep_file_source( df=simple_dataset_1, event_timestamp_column="ts_1" ) as file_source: @@ -55,21 +62,29 @@ def test_diff_between_feature_views(simple_dataset_1): batch_source=file_source, ttl=None, tags={"when": "before"}, - ).to_proto() + ) post_changed = FeatureView( name="fv2", entities=["id"], batch_source=file_source, ttl=None, tags={"when": "after"}, - ).to_proto() + ) - fco_diffs = diff_between(pre_changed, pre_changed, "feature view") - assert len(fco_diffs.fco_property_diffs) == 0 + feast_object_diffs = diff_registry_objects( + pre_changed, pre_changed, "feature view" + ) + assert len(feast_object_diffs.feast_object_property_diffs) == 0 - fco_diffs = diff_between(pre_changed, post_changed, "feature view") - assert len(fco_diffs.fco_property_diffs) == 1 + feast_object_diffs = diff_registry_objects( + pre_changed, post_changed, "feature view" + ) + assert len(feast_object_diffs.feast_object_property_diffs) == 1 - assert fco_diffs.fco_property_diffs[0].property_name == "tags" - assert fco_diffs.fco_property_diffs[0].val_existing == {"when": "before"} - assert fco_diffs.fco_property_diffs[0].val_declared == {"when": "after"} + assert feast_object_diffs.feast_object_property_diffs[0].property_name == "tags" + assert feast_object_diffs.feast_object_property_diffs[0].val_existing == { + "when": "before" + } + assert feast_object_diffs.feast_object_property_diffs[0].val_declared == { + "when": "after" + } diff --git a/sdk/python/tests/unit/test_proto_json.py b/sdk/python/tests/unit/test_proto_json.py index 1b352ccb19f..6bfdbbbf91b 100644 --- a/sdk/python/tests/unit/test_proto_json.py +++ b/sdk/python/tests/unit/test_proto_json.py @@ -9,7 +9,7 @@ ) from feast.protos.feast.types.Value_pb2 import RepeatedValue -FieldValues = GetOnlineFeaturesResponse.FieldValues +FeatureVector = GetOnlineFeaturesResponse.FeatureVector @pytest.fixture(scope="module") @@ -17,70 +17,63 @@ def proto_json_patch(): proto_json.patch() -def test_feast_value(proto_json_patch): - # FieldValues contains "map fields" proto field. +def test_feature_vector_values(proto_json_patch): + # FeatureVector contains "repeated values" proto field. # We want to test that feast.types.Value can take different types in JSON # without using additional structure (e.g. 1 instead of {int64_val: 1}). - field_values_str = """{ - "fields": { - "a": 1, - "b": 2.0, - "c": true, - "d": "foo", - "e": [1, 2, 3], - "f": [2.0, 3.0, 4.0, null], - "g": [true, false, true], - "h": ["foo", "bar", "foobar"], - "i": null - } + feature_vector_str = """{ + "values": [ + 1, + 2.0, + true, + "foo", + [1, 2, 3], + [2.0, 3.0, 4.0, null], + [true, false, true], + ["foo", "bar", "foobar"] + ] }""" - field_values_proto = FieldValues() - Parse(field_values_str, field_values_proto) - assertpy.assert_that(field_values_proto.fields.keys()).is_equal_to( - {"a", "b", "c", "d", "e", "f", "g", "h", "i"} - ) - assertpy.assert_that(field_values_proto.fields["a"].int64_val).is_equal_to(1) - assertpy.assert_that(field_values_proto.fields["b"].double_val).is_equal_to(2.0) - assertpy.assert_that(field_values_proto.fields["c"].bool_val).is_equal_to(True) - assertpy.assert_that(field_values_proto.fields["d"].string_val).is_equal_to("foo") - assertpy.assert_that(field_values_proto.fields["e"].int64_list_val.val).is_equal_to( + feature_vector_proto = FeatureVector() + Parse(feature_vector_str, feature_vector_proto) + assertpy.assert_that(len(feature_vector_proto.values)).is_equal_to(8) + assertpy.assert_that(feature_vector_proto.values[0].int64_val).is_equal_to(1) + assertpy.assert_that(feature_vector_proto.values[1].double_val).is_equal_to(2.0) + assertpy.assert_that(feature_vector_proto.values[2].bool_val).is_equal_to(True) + assertpy.assert_that(feature_vector_proto.values[3].string_val).is_equal_to("foo") + assertpy.assert_that(feature_vector_proto.values[4].int64_list_val.val).is_equal_to( [1, 2, 3] ) # Can't directly check equality to [2.0, 3.0, 4.0, float("nan")], because float("nan") != float("nan") assertpy.assert_that( - field_values_proto.fields["f"].double_list_val.val[:3] + feature_vector_proto.values[5].double_list_val.val[:3] ).is_equal_to([2.0, 3.0, 4.0]) - assertpy.assert_that(field_values_proto.fields["f"].double_list_val.val[3]).is_nan() - assertpy.assert_that(field_values_proto.fields["g"].bool_list_val.val).is_equal_to( + assertpy.assert_that(feature_vector_proto.values[5].double_list_val.val[3]).is_nan() + assertpy.assert_that(feature_vector_proto.values[6].bool_list_val.val).is_equal_to( [True, False, True] ) assertpy.assert_that( - field_values_proto.fields["h"].string_list_val.val + feature_vector_proto.values[7].string_list_val.val ).is_equal_to(["foo", "bar", "foobar"]) - assertpy.assert_that(field_values_proto.fields["i"].null_val).is_equal_to(0) # Now convert protobuf back to json and check that - field_values_json = MessageToDict(field_values_proto) - assertpy.assert_that(field_values_json["fields"].keys()).is_equal_to( - {"a", "b", "c", "d", "e", "f", "g", "h", "i"} - ) - assertpy.assert_that(field_values_json["fields"]["a"]).is_equal_to(1) - assertpy.assert_that(field_values_json["fields"]["b"]).is_equal_to(2.0) - assertpy.assert_that(field_values_json["fields"]["c"]).is_equal_to(True) - assertpy.assert_that(field_values_json["fields"]["d"]).is_equal_to("foo") - assertpy.assert_that(field_values_json["fields"]["e"]).is_equal_to([1, 2, 3]) + feature_vector_json = MessageToDict(feature_vector_proto) + assertpy.assert_that(len(feature_vector_json["values"])).is_equal_to(8) + assertpy.assert_that(feature_vector_json["values"][0]).is_equal_to(1) + assertpy.assert_that(feature_vector_json["values"][1]).is_equal_to(2.0) + assertpy.assert_that(feature_vector_json["values"][2]).is_equal_to(True) + assertpy.assert_that(feature_vector_json["values"][3]).is_equal_to("foo") + assertpy.assert_that(feature_vector_json["values"][4]).is_equal_to([1, 2, 3]) # Can't directly check equality to [2.0, 3.0, 4.0, float("nan")], because float("nan") != float("nan") - assertpy.assert_that(field_values_json["fields"]["f"][:3]).is_equal_to( + assertpy.assert_that(feature_vector_json["values"][5][:3]).is_equal_to( [2.0, 3.0, 4.0] ) - assertpy.assert_that(field_values_json["fields"]["f"][3]).is_nan() - assertpy.assert_that(field_values_json["fields"]["g"]).is_equal_to( + assertpy.assert_that(feature_vector_json["values"][5][3]).is_nan() + assertpy.assert_that(feature_vector_json["values"][6]).is_equal_to( [True, False, True] ) - assertpy.assert_that(field_values_json["fields"]["h"]).is_equal_to( + assertpy.assert_that(feature_vector_json["values"][7]).is_equal_to( ["foo", "bar", "foobar"] ) - assertpy.assert_that(field_values_json["fields"]["i"]).is_equal_to(None) def test_feast_repeated_value(proto_json_patch): diff --git a/sdk/python/tests/unit/test_unit_feature_store.py b/sdk/python/tests/unit/test_unit_feature_store.py new file mode 100644 index 00000000000..6f9dd6acb08 --- /dev/null +++ b/sdk/python/tests/unit/test_unit_feature_store.py @@ -0,0 +1,50 @@ +from dataclasses import dataclass +from typing import Dict, List + +from feast import FeatureStore +from feast.protos.feast.types.Value_pb2 import Value + + +@dataclass +class MockFeatureViewProjection: + join_key_map: Dict[str, str] + + +@dataclass +class MockFeatureView: + name: str + entities: List[str] + projection: MockFeatureViewProjection + + +def test__get_unique_entities(): + entity_values = { + "entity_1": [Value(int64_val=1), Value(int64_val=2), Value(int64_val=1)], + "entity_2": [ + Value(string_val="1"), + Value(string_val="2"), + Value(string_val="1"), + ], + "entity_3": [Value(int64_val=8), Value(int64_val=9), Value(int64_val=10)], + } + + entity_name_to_join_key_map = {"entity_1": "entity_1", "entity_2": "entity_2"} + + fv = MockFeatureView( + name="fv_1", + entities=["entity_1", "entity_2"], + projection=MockFeatureViewProjection(join_key_map={}), + ) + + unique_entities, indexes = FeatureStore._get_unique_entities( + FeatureStore, + table=fv, + join_key_values=entity_values, + entity_name_to_join_key_map=entity_name_to_join_key_map, + ) + + assert unique_entities == ( + {"entity_1": Value(int64_val=1), "entity_2": Value(string_val="1")}, + {"entity_1": Value(int64_val=2), "entity_2": Value(string_val="2")}, + ) + assert indexes == ([0, 2], [1]) diff --git a/sdk/python/tests/utils/data_source_utils.py b/sdk/python/tests/utils/data_source_utils.py index 6e3d77ead0b..12870186bfc 100644 --- a/sdk/python/tests/utils/data_source_utils.py +++ b/sdk/python/tests/utils/data_source_utils.py @@ -2,6 +2,7 @@ import random import tempfile import time +from typing import Iterator from google.cloud import bigquery @@ -10,7 +11,7 @@ @contextlib.contextmanager -def prep_file_source(df, event_timestamp_column=None) -> FileSource: +def prep_file_source(df, event_timestamp_column=None) -> Iterator[FileSource]: with tempfile.NamedTemporaryFile(suffix=".parquet") as f: f.close() df.to_parquet(f.name)