diff --git a/.github/workflows/build_wheels.yml b/.github/workflows/build_wheels.yml index 0e99fe8ba6..9cf635a892 100644 --- a/.github/workflows/build_wheels.yml +++ b/.github/workflows/build_wheels.yml @@ -204,7 +204,7 @@ jobs: env: COMPILE_GO: "True" run: | - pip install 'grpcio-tools==1.48.0' 'pybindgen==0.22.0' + pip install 'grpcio-tools==1.47.0' 'pybindgen==0.22.0' go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.26.0 go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@v1.1.0 pip install dist/*tar.gz diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 184fdb3cb6..9eb561263c 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -7,6 +7,7 @@ on: jobs: get-version: + if: github.repository == 'feast-dev/feast' runs-on: ubuntu-latest outputs: release_version: ${{ steps.get_release_version.outputs.release_version }} @@ -100,6 +101,7 @@ jobs: fi publish-helm-charts: + if: github.repository == 'feast-dev/feast' runs-on: ubuntu-latest needs: get-version env: @@ -129,6 +131,7 @@ jobs: uses: ./.github/workflows/build_wheels.yml publish-python-sdk: + if: github.repository == 'feast-dev/feast' runs-on: ubuntu-latest needs: [build_wheels] steps: @@ -142,6 +145,7 @@ jobs: password: ${{ secrets.PYPI_PASSWORD }} publish-java-sdk: + if: github.repository == 'feast-dev/feast' container: maven:3.6-jdk-11 runs-on: ubuntu-latest needs: get-version @@ -177,23 +181,3 @@ jobs: mkdir -p /root/.m2/ echo -n "$MAVEN_SETTINGS" > /root/.m2/settings.xml infra/scripts/publish-java-sdk.sh --revision ${VERSION_WITHOUT_PREFIX} --gpg-key-import-dir /root - - publish-web-ui-npm: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - uses: actions/setup-node@v2 - with: - node-version: '17.x' - registry-url: 'https://registry.npmjs.org' - - name: Install yarn dependencies - working-directory: ./ui - run: yarn install - - name: Build yarn rollup - working-directory: ./ui - run: yarn build:lib - - name: Publish UI package - working-directory: ./ui - run: npm publish - env: - NODE_AUTH_TOKEN: ${{secrets.NPM_TOKEN}} diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 2f4d15590a..9fcbc1e052 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -15,6 +15,63 @@ on: type: string jobs: + + get_dry_release_versions: + runs-on: ubuntu-latest + env: + GITHUB_TOKEN: ${{ github.event.inputs.token }} + outputs: + current_version: ${{ steps.get_versions.outputs.current_version }} + next_version: ${{ steps.get_versions.outputs.next_version }} + steps: + - name: Checkout + uses: actions/checkout@v2 + with: + persist-credentials: false + - name: Setup Node.js + uses: actions/setup-node@v2 + with: + node-version: '16' + - name: Release (Dry Run) + id: get_versions + run: | + CURRENT_VERSION=$(npx -p @semantic-release/changelog -p @semantic-release/git -p @semantic-release/exec -p semantic-release semantic-release --dry-run | grep "associated with version " | sed -E 's/.* version//' | sed -E 's/ on.*//') + NEXT_VERSION=$(npx -p @semantic-release/changelog -p @semantic-release/git -p @semantic-release/exec -p semantic-release semantic-release --dry-run | grep 'The next release version is' | sed -E 's/.* ([[:digit:].]+)$/\1/') + echo ::set-output name=current_version::$CURRENT_VERSION + echo ::set-output name=next_version::$NEXT_VERSION + echo "Current version is ${CURRENT_VERSION}" + echo "Next version is ${NEXT_VERSION}" + + publish-web-ui-npm: + if: github.repository == 'feast-dev/feast' + needs: get_dry_release_versions + runs-on: ubuntu-latest + env: + # This publish is working using an NPM automation token to bypass 2FA + NPM_TOKEN: ${{ secrets.NPM_TOKEN }} + CURRENT_VERSION: ${{ needs.get_dry_release_versions.outputs.current_version }} + NEXT_VERSION: ${{ needs.get_dry_release_versions.outputs.next_version }} + steps: + - uses: actions/checkout@v2 + - uses: actions/setup-node@v2 + with: + node-version: '17.x' + registry-url: 'https://registry.npmjs.org' + - name: Bump file versions (temporarily for Web UI publish) + run: python ./infra/scripts/release/bump_file_versions.py ${CURRENT_VERSION} ${NEXT_VERSION} + - name: Install yarn dependencies + working-directory: ./ui + run: yarn install + - name: Build yarn rollup + working-directory: ./ui + run: yarn build:lib + - name: Publish UI package + working-directory: ./ui + run: npm publish + env: + # This publish is working using an NPM automation token to bypass 2FA + NPM_TOKEN: ${{ secrets.NPM_TOKEN }} + release: name: release runs-on: ubuntu-latest diff --git a/.releaserc.js b/.releaserc.js index 2acf9b7350..bb8b9d4b75 100644 --- a/.releaserc.js +++ b/.releaserc.js @@ -16,7 +16,7 @@ if (current_branch !== 'master') { // We have to dynamically generate all the supported branches for Feast because we use the `vA.B-branch` pattern for // maintenance branches -possible_branches = [{name: "master"}, {name: current_branch}] +const possible_branches = [{name: "master"}, {name: current_branch, range: '0.22.x'}, {name: "v0.23-branch", range: '0.23.x'},]; // Below is the configuration for semantic release module.exports = { @@ -28,18 +28,26 @@ module.exports = { "releaseRules": [ {breaking: true, release: 'minor'}, {tag: 'Breaking', release: 'minor'}, - ] + {type: '*!', release: 'minor'}, + ], + // Ensure that the "BREAKING CHANGE" notes in commit footers are parsed + "parserOpts": { + "noteKeywords": ["BREAKING CHANGE", "BREAKING CHANGES"] + } }], ["@semantic-release/exec", { // Validate the type of release we are doing "verifyReleaseCmd": "./infra/scripts/validate-release.sh ${nextRelease.type} " + current_branch, - // Bump all version files - "prepareCmd": "python ./infra/scripts/release/bump_file_versions.py ${lastRelease.version} ${nextRelease.version}" + // Bump all version files and build UI / update yarn.lock + "prepareCmd": "python ./infra/scripts/release/bump_file_versions.py ${lastRelease.version} ${nextRelease.version}; make build-ui" }], - "@semantic-release/release-notes-generator", + ["@semantic-release/release-notes-generator", { + // Ensure that a "Breaking Changes" section is added to the release notes + "preset": "angular" + }], // Update the changelog [ @@ -58,7 +66,8 @@ module.exports = { "CHANGELOG.md", "java/pom.xml", "infra/charts/**/*.*", - "ui/package.json" + "ui/package.json", + "sdk/python/feast/ui/yarn.lock" ], message: "chore(release): release ${nextRelease.version}\n\n${nextRelease.notes}" } diff --git a/CHANGELOG.md b/CHANGELOG.md index 9307e4ccf3..0a61e944b4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,16 @@ # Changelog +## [0.22.3](https://github.com/feast-dev/feast/compare/v0.22.2...v0.22.3) (2022-08-10) + + +### Bug Fixes + +* Check if on_demand_feature_views is an empty list rather than None for snowflake provider ([#3046](https://github.com/feast-dev/feast/issues/3046)) ([67af727](https://github.com/feast-dev/feast/commit/67af727da3380634e1e9940ee5f9cb8720df1ab5)) +* Fixing Web UI, which fails for the SQL registry ([#3028](https://github.com/feast-dev/feast/issues/3028)) ([56d645c](https://github.com/feast-dev/feast/commit/56d645ca8b792a49ee87ad2b4f4c54d0bb1fdf8b)) +* More explicit error messages ([#2708](https://github.com/feast-dev/feast/issues/2708)) ([93c1c15](https://github.com/feast-dev/feast/commit/93c1c1517e7ebeaf6ded8487934eba465d7eb9d5)) +* Move gcp back to 1.47.0 since grpcio-tools 1.48.0 got yanked from pypi ([#2990](https://github.com/feast-dev/feast/issues/2990)) ([f7e44da](https://github.com/feast-dev/feast/commit/f7e44da49f92ccab0bd9c016b30de7dc28ffa796)) +* Return an empty infra object from sql registry when it doesn't exist ([#3022](https://github.com/feast-dev/feast/issues/3022)) ([329bc47](https://github.com/feast-dev/feast/commit/329bc475fb0d81a2afdce38eed8342d1446e37ed)) + ## [0.22.2](https://github.com/feast-dev/feast/compare/v0.22.1...v0.22.2) (2022-07-29) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 9c25a835bd..3597ef6e66 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -198,6 +198,8 @@ The services with containerized replacements currently implemented are: You can run `make test-python-integration-container` to run tests against the containerized versions of dependencies. +## Feast UI +See [Feast contributing guide](ui/CONTRIBUTING.md) ## Feast Java Serving See [Java contributing guide](java/CONTRIBUTING.md) diff --git a/Makefile b/Makefile index 288da43fcd..915ac907f7 100644 --- a/Makefile +++ b/Makefile @@ -196,7 +196,7 @@ install-go-ci-dependencies: python -m pip install pybindgen==0.22.0 protobuf==3.20.1 install-protoc-dependencies: - pip install grpcio-tools==1.48.0 mypy-protobuf==3.1.0 + pip install grpcio-tools==1.47.0 mypy-protobuf==3.1.0 compile-protos-go: install-go-proto-dependencies install-protoc-dependencies python setup.py build_go_protos diff --git a/README.md b/README.md index 84de59f65e..3b11d0efa2 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,14 @@ ## Overview -Feast is an open source feature store for machine learning. Feast is the fastest path to productionizing analytic data for model training and online inference. +Feast (**Fea**ture **St**ore) is an open source feature store for machine learning. Feast is the fastest path to manage existing infrastructure to productionize analytic data for model training and online inference. + + +Feast allows ML platform teams to: + +* **Make features consistently available for training and serving** by managing an _offline store_ (to process historical data for scale-out batch scoring or model training), a low-latency _online store_ (to power real-time prediction)_,_ and a battle-tested _feature server_ (for serving pre-computed features online). +* **Avoid data leakage** by generating point-in-time correct feature sets so data scientists can focus on feature engineering rather than debugging error-prone dataset joining logic. This ensure that future feature values do not leak to models during training. +* **Decouple ML from data infrastructure** by providing a single data access layer that abstracts feature storage from feature retrieval, ensuring models remain portable as you move from training models to serving models, from batch models to realtime models, and from one data infra system to another. Please see our [documentation](https://docs.feast.dev/) for more information about the project. @@ -163,6 +170,7 @@ The list below contains the functionality that contributors are planning to deve * [x] [In-memory / Pandas](https://docs.feast.dev/reference/offline-stores/file) * [x] [Custom offline store support](https://docs.feast.dev/how-to-guides/adding-a-new-offline-store) * **Online Stores** + * [x] [Snowflake](https://docs.feast.dev/reference/online-stores/snowflake) * [x] [DynamoDB](https://docs.feast.dev/reference/online-stores/dynamodb) * [x] [Redis](https://docs.feast.dev/reference/online-stores/redis) * [x] [Datastore](https://docs.feast.dev/reference/online-stores/datastore) @@ -198,6 +206,7 @@ The list below contains the functionality that contributors are planning to deve * [x] DataHub integration (see [DataHub Feast docs](https://datahubproject.io/docs/generated/ingestion/sources/feast/)) * [x] Feast Web UI (Alpha release. See [documentation](https://docs.feast.dev/reference/alpha-web-ui)) + ## πŸŽ“ Important Resources Please refer to the official documentation at [Documentation](https://docs.feast.dev/) diff --git a/docs/README.md b/docs/README.md index f8b9af3c32..1b70f8fedc 100644 --- a/docs/README.md +++ b/docs/README.md @@ -2,43 +2,59 @@ ## What is Feast? -Feast (**Fea**ture **St**ore) is an operational data system for managing and serving machine learning features to models in production. Feast is able to serve feature data to models from a low-latency online store (for real-time prediction) or from an offline store (for scale-out batch scoring or model training). +Feast (**Fea**ture **St**ore) is a customizable operational data system that re-uses existing infrastructure to manage and serve machine learning features to realtime models. -![](assets/feast-marchitecture.png) - -## Problems Feast Solves +Feast allows ML platform teams to: -**Models need consistent access to data:** Machine Learning (ML) systems built on traditional data infrastructure are often coupled to databases, object stores, streams, and files. A result of this coupling, however, is that any change in data infrastructure may break dependent ML systems. Another challenge is that dual implementations of data retrieval for training and serving can lead to inconsistencies in data, which in turn can lead to training-serving skew. +* **Make features consistently available for training and serving** by managing an _offline store_ (to process historical data for scale-out batch scoring or model training), a low-latency _online store_ (to power real-time prediction)_,_ and a battle-tested _feature server_ (for serving pre-computed features online). +* **Avoid data leakage** by generating point-in-time correct feature sets so data scientists can focus on feature engineering rather than debugging error-prone dataset joining logic. This ensure that future feature values do not leak to models during training. +* **Decouple ML from data infrastructure** by providing a single data access layer that abstracts feature storage from feature retrieval, ensuring models remain portable as you move from training models to serving models, from batch models to realtime models, and from one data infra system to another. -Feast decouples your models from your data infrastructure by providing a single data access layer that abstracts feature storage from feature retrieval. Feast also provides a consistent means of referencing feature data for retrieval, and therefore ensures that models remain portable when moving from training to serving. +{% hint style="info" %} +**Note:** Feast today primarily addresses _timestamped structured data_. +{% endhint %} -**Deploying new features into production is difficult:** Many ML teams consist of members with different objectives. Data scientists, for example, aim to deploy features into production as soon as possible, while engineers want to ensure that production systems remain stable. These differing objectives can create an organizational friction that slows time-to-market for new features. +![](assets/feast-marchitecture.png) -Feast addresses this friction by providing both a centralized registry to which data scientists can publish features and a battle-hardened serving layer. Together, these enable non-engineering teams to ship features into production with minimal oversight. +## Who is Feast for? -**Models need point-in-time correct data:** ML models in production require a view of data consistent with the one on which they are trained, otherwise the accuracy of these models could be compromised. Despite this need, many data science projects suffer from inconsistencies introduced by future feature values being leaked to models during training. +Feast helps ML platform teams with DevOps experience productionize real-time models. Feast can also help these teams build towards a feature platform that improves collaboration between engineers and data scientists. -Feast solves the challenge of data leakage by providing point-in-time correct feature retrieval when exporting feature datasets for model training. + -**Features aren't reused across projects:** Different teams within an organization are often unable to reuse features across projects. The siloed nature of development and the monolithic design of end-to-end ML systems contribute to duplication of feature creation and usage across teams and projects. +Feast is likely **not** the right tool if you -Feast addresses this problem by introducing feature reuse through a centralized registry. This registry enables multiple teams working on different projects not only to contribute features, but also to reuse these same features. With Feast, data scientists can start new ML projects by selecting previously engineered features from a centralized registry, and are no longer required to develop new features for each project. +* are in an organization that’s just getting started with ML and is not yet sure what the business impact of ML is +* rely primarily on unstructured data +* need very low latency feature retrieval (e.g. p99 feature retrieval << 10ms) +* have a small team to support a large number of use cases -## Problems Feast does not yet solve +## What Feast is not? -**Feature engineering:** We aim for Feast to support light-weight feature engineering as part of our API. +### Feast is not -**Feature discovery:** We also aim for Feast to include a first-class user interface for exploring and discovering entities and features. +* **an** [**ETL**](https://en.wikipedia.org/wiki/Extract,\_transform,\_load) / [**ELT**](https://en.wikipedia.org/wiki/Extract,\_load,\_transform) **system:** Feast is not (and does not plan to become) a general purpose data transformation or pipelining system. Users often leverage tools like [dbt](https://www.getdbt.com) to manage upstream data transformations. +* **a data orchestration tool:** Feast does not manage or orchestrate complex workflow DAGs. It relies on upstream data pipelines to produce feature values and integrations with tools like [Airflow](https://airflow.apache.org) to make features consistently available. +* **a data warehouse:** Feast is not a replacement for your data warehouse or the source of truth for all transformed data in your organization. Rather, Feast is a light-weight downstream layer that can serve data from an existing data warehouse (or other data sources) to models in production. +* **a database:** Feast is not a database, but helps manage data stored in other systems (e.g. BigQuery, Snowflake, DynamoDB, Redis) to make features consistently available at training / serving time -**Feature validation:** We additionally aim for Feast to improve support for statistics generation of feature data and subsequent validation of these statistics. Current support is limited. +### Feast does not _fully_ solve -## What Feast is not +* **reproducible model training / model backtesting / experiment management**: Feast captures feature and model metadata, but does not version-control datasets / labels or manage train / test splits. Other tools like [DVC](https://dvc.org/), [MLflow](https://www.mlflow.org/), and [Kubeflow](https://www.kubeflow.org/) are better suited for this. +* **batch + streaming feature engineering**: Feast primarily processes already transformed feature values (though it offers experimental light-weight transformations). Users usually integrate Feast with upstream systems (e.g. existing ETL/ELT pipelines). [Tecton](http://tecton.ai/) is a more fully featured feature platform which addresses these needs. +* **native streaming feature integration:** Feast enables users to push streaming features, but does not pull from streaming sources or manage streaming pipelines. [Tecton](http://tecton.ai/) is a more fully featured feature platform which orchestrates end to end streaming pipelines. +* **feature sharing**: Feast has experimental functionality to enable discovery and cataloguing of feature metadata with a [Feast web UI (alpha)](https://docs.feast.dev/reference/alpha-web-ui). Feast also has community contributed plugins with [DataHub](https://datahubproject.io/docs/generated/ingestion/sources/feast/) and [Amundsen](https://github.com/amundsen-io/amundsen/blob/4a9d60176767c4d68d1cad5b093320ea22e26a49/databuilder/databuilder/extractor/feast\_extractor.py). [Tecton](http://tecton.ai/) also more robustly addresses these needs. +* **lineage:** Feast helps tie feature values to model versions, but is not a complete solution for capturing end-to-end lineage from raw data sources to model versions. Feast also has community contributed plugins with [DataHub](https://datahubproject.io/docs/generated/ingestion/sources/feast/) and [Amundsen](https://github.com/amundsen-io/amundsen/blob/4a9d60176767c4d68d1cad5b093320ea22e26a49/databuilder/databuilder/extractor/feast\_extractor.py). [Tecton](http://tecton.ai/) captures more end-to-end lineage by also managing feature transformations. +* **data quality / drift detection**: Feast has experimental integrations with [Great Expectations](https://greatexpectations.io/), but is not purpose built to solve data drift / data quality issues. This requires more sophisticated monitoring across data pipelines, served feature values, labels, and model versions. -[**ETL**](https://en.wikipedia.org/wiki/Extract,\_transform,\_load) **or** [**ELT**](https://en.wikipedia.org/wiki/Extract,\_load,\_transform) **system:** Feast is not (and does not plan to become) a general purpose data transformation or pipelining system. Feast plans to include a light-weight feature engineering toolkit, but we encourage teams to integrate Feast with upstream ETL/ELT systems that are specialized in transformation. +## Example use cases -**Data warehouse:** Feast is not a replacement for your data warehouse or the source of truth for all transformed data in your organization. Rather, Feast is a light-weight downstream layer that can serve data from an existing data warehouse (or other data sources) to models in production. +Many companies have used Feast to power real-world ML use cases such as: -**Data catalog:** Feast is not a general purpose data catalog for your organization. Feast is purely focused on cataloging features for use in ML pipelines or systems, and only to the extent of facilitating the reuse of features. +* Personalizing online recommendations by leveraging pre-computed historical user or item features. +* Online fraud detection, using features that compare against (pre-computed) historical transaction patterns +* Churn prediction (an offline model), generating feature values for all users at a fixed cadence in batch +* Credit scoring, using pre-computed historical features to compute probability of default ## How can I get started? diff --git a/docs/getting-started/quickstart.md b/docs/getting-started/quickstart.md index 972ffa13a9..7bbcb78732 100644 --- a/docs/getting-started/quickstart.md +++ b/docs/getting-started/quickstart.md @@ -82,7 +82,7 @@ online_store: from datetime import timedelta -from feast import Entity, FeatureView, Field, FileSource, ValueType +from feast import Entity, FeatureService, FeatureView, Field, FileSource, ValueType from feast.types import Float32, Int64 # Read data from parquet files. Parquet is convenient for local development mode. For diff --git a/docs/how-to-guides/adding-or-reusing-tests.md b/docs/how-to-guides/adding-or-reusing-tests.md index 86c116442f..28d0cbf781 100644 --- a/docs/how-to-guides/adding-or-reusing-tests.md +++ b/docs/how-to-guides/adding-or-reusing-tests.md @@ -14,103 +14,198 @@ Let's inspect the test setup in `sdk/python/tests/integration`: ```bash $ tree - . β”œβ”€β”€ e2e -β”‚ └── test_universal_e2e.py +β”‚ β”œβ”€β”€ test_go_feature_server.py +β”‚ β”œβ”€β”€ test_python_feature_server.py +β”‚ β”œβ”€β”€ test_universal_e2e.py +β”‚ β”œβ”€β”€ test_usage_e2e.py +β”‚ └── test_validation.py β”œβ”€β”€ feature_repos +β”‚ β”œβ”€β”€ integration_test_repo_config.py β”‚ β”œβ”€β”€ repo_configuration.py β”‚ └── universal +β”‚ β”œβ”€β”€ catalog β”‚ β”œβ”€β”€ data_source_creator.py β”‚ β”œβ”€β”€ data_sources +β”‚ β”‚ β”œβ”€β”€ __init__.py β”‚ β”‚ β”œβ”€β”€ bigquery.py β”‚ β”‚ β”œβ”€β”€ file.py -β”‚ β”‚ └── redshift.py +β”‚ β”‚ β”œβ”€β”€ redshift.py +β”‚ β”‚ └── snowflake.py β”‚ β”œβ”€β”€ entities.py -β”‚ └── feature_views.py +β”‚ β”œβ”€β”€ feature_views.py +β”‚ β”œβ”€β”€ online_store +β”‚ β”‚ β”œβ”€β”€ __init__.py +β”‚ β”‚ β”œβ”€β”€ datastore.py +β”‚ β”‚ β”œβ”€β”€ dynamodb.py +β”‚ β”‚ β”œβ”€β”€ hbase.py +β”‚ β”‚ └── redis.py +β”‚ └── online_store_creator.py +β”œβ”€β”€ materialization +β”‚ └── test_lambda.py β”œβ”€β”€ offline_store +β”‚ β”œβ”€β”€ test_feature_logging.py +β”‚ β”œβ”€β”€ test_offline_write.py +β”‚ β”œβ”€β”€ test_push_features_to_offline_store.py β”‚ β”œβ”€β”€ test_s3_custom_endpoint.py β”‚ └── test_universal_historical_retrieval.py β”œβ”€β”€ online_store -β”‚ β”œβ”€β”€ test_e2e_local.py -β”‚ β”œβ”€β”€ test_feature_service_read.py β”‚ β”œβ”€β”€ test_online_retrieval.py +β”‚ β”œβ”€β”€ test_push_features_to_online_store.py β”‚ └── test_universal_online.py -β”œβ”€β”€ registration -β”‚ β”œβ”€β”€ test_cli.py -β”‚ β”œβ”€β”€ test_cli_apply_duplicated_featureview_names.py -β”‚ β”œβ”€β”€ test_cli_chdir.py -β”‚ β”œβ”€β”€ test_feature_service_apply.py -β”‚ β”œβ”€β”€ test_feature_store.py -β”‚ β”œβ”€β”€ test_inference.py -β”‚ β”œβ”€β”€ test_registry.py -β”‚ β”œβ”€β”€ test_universal_odfv_feature_inference.py -β”‚ └── test_universal_types.py -└── scaffolding - β”œβ”€β”€ test_init.py - β”œβ”€β”€ test_partial_apply.py - β”œβ”€β”€ test_repo_config.py - └── test_repo_operations.py - -8 directories, 27 files -``` +└── registration + β”œβ”€β”€ test_feature_store.py + β”œβ”€β”€ test_inference.py + β”œβ”€β”€ test_registry.py + β”œβ”€β”€ test_sql_registry.py + β”œβ”€β”€ test_universal_cli.py + β”œβ”€β”€ test_universal_odfv_feature_inference.py + └── test_universal_types.py -`feature_repos` has setup files for most tests in the test suite and pytest fixtures for other tests. These fixtures parametrize on different offline stores, online stores, etc. and thus abstract away store specific implementations so tests don't need to rewrite e.g. uploading dataframes to a specific store for setup. +``` -## Understanding an example test +* `feature_repos` has setup files for most tests in the test suite. +* `conftest.py` and some of the individual test files contain fixtures which can be used to on different offline stores, online stores, etc. and thus abstract away store specific implementations so we don't need to rewrite the same test implementation for different stores. + +## Structure of the test suite + +### What is the universal test suite? + +The universal test suite verifies that crucial Feast functions (e.g `get_historical_features`, `get_online_features` etc.) have the correct behavior for each of the different environments that Feast could be used in. These environments are combinations of an offline store, online store, and provider and the universal test suite serves to run basic functional verification against all of these different permutations. + +We use pytest [fixtures](https://docs.pytest.org/en/6.2.x/fixture.html) to accomplish this without writing excess code. + +Tests in Feast are split into integration and unit tests. + +### Is it an integration or unit test? + +* Integration tests test non local Feast behavior. Integration tests mainly involve testing of Feast components that connect to services outside of Feast(e.g connecting to gcp or aws clients). + * Generally if the test requires the initialization of a feature store in an external environment in order to test (i.e using our universal test fixtures), it is probably an integration test. +* Unit tests, on the other hand, unit tests primarily test local and class level behavior that does not require spinning up an external service. If your test can be run locally without using any other services besides pytest, it is a unit test. + +### Main types of tests + +#### Integration tests + +1. E2E tests + * E2E tests test end-to-end functionality of Feast over the various codepaths (initialize a feature store, apply, and materialize). + * The main codepaths include: + * basic e2e tests for offline stores + * `test_universal_e2e.py` + * go feature server + * `test_go_feature_server.py` + * python http server + * `test_python_feature_server.py` + * usage tracking + * `test_usage_e2e.py` + * data quality monitoring feature validation + * `test_validation.py` +2. Offline and Online Store Tests + * Offline and online store tests mainly test for the offline and online retrieval functionality. + * The various specific functionalities that are tested include: + * push API tests + * `test_push_features_to_offline_store.py` + * `test_push_features_to_online_store.py` + * `test_offline_write.py` + * historical retrieval tests + * `test_universal_historical_retrieval.py` + * online retrieval tests + * `test_universal_online.py` + * data quality monitoring feature logging tests + * `test_feature_logging.py` + * online store tests + * `test_universal_online.py` +3. Registration Tests + * The registration folder contains all of the registry tests and some universal cli tests. This includes: + * CLI Apply and Materialize tests tested against on the universal test suite + * Data type inference tests + * Registry tests +4. Miscellaneous Tests + * AWS Lambda Materialization Tests (Currently do not work) + * `test_lambda.py` + +#### Unit tests + +1. Registry Diff Tests + * These are tests for the infrastructure and registry diff functionality that Feast uses to determine if changes to the registry or infrastructure is needed. +2. Local CLI Tests and Local Feast Tests + * These tests test all of the cli commands against the local file offline store. +3. Infrastructure Unit Tests + * DynamoDB tests with dynamo mocked out + * Repository configuration tests + * Schema inference unit tests + * Key serialization tests + * Basic provider unit tests +4. Feature Store Validation Tests + * These test mainly contain class level validation like hashing tests, protobuf and class serialization, and error and warning handling. + * Data source unit tests + * Feature service unit tests + * Feature service, feature view, and feature validation tests + * Protobuf/json tests for Feast ValueTypes + * Serialization tests + * Type mapping + * Feast types + * Serialization tests due to this [issue](https://github.com/feast-dev/feast/issues/2345) + * Feast usage tracking unit tests + +#### Docstring tests + +Docstring tests are primarily smoke tests to make sure imports and setup functions can be executed without errors. + +## Understanding the test suite with an example test + +### Example test Let's look at a sample test using the universal repo: {% tabs %} -{% tab title="Python" %} +{% tab code="sdk/python/tests/integration/offline_store/test_universal_historical_retrieval.py" %} ```python @pytest.mark.integration -@pytest.mark.parametrize("full_feature_names", [True, False], ids=lambda v: str(v)) +@pytest.mark.universal_offline_stores +@pytest.mark.parametrize("full_feature_names", [True, False], ids=lambda v: f"full:{v}") def test_historical_features(environment, universal_data_sources, full_feature_names): store = environment.feature_store (entities, datasets, data_sources) = universal_data_sources - feature_views = construct_universal_feature_views(data_sources) - customer_df, driver_df, orders_df, global_df, entity_df = ( - datasets["customer"], - datasets["driver"], - datasets["orders"], - datasets["global"], - datasets["entity"], - ) - # ... more test code + feature_views = construct_universal_feature_views(data_sources) - customer_fv, driver_fv, driver_odfv, order_fv, global_fv = ( - feature_views["customer"], - feature_views["driver"], - feature_views["driver_odfv"], - feature_views["order"], - feature_views["global"], - ) + entity_df_with_request_data = datasets.entity_df.copy(deep=True) + entity_df_with_request_data["val_to_add"] = [ + i for i in range(len(entity_df_with_request_data)) + ] + entity_df_with_request_data["driver_age"] = [ + i + 100 for i in range(len(entity_df_with_request_data)) + ] feature_service = FeatureService( - "convrate_plus100", + name="convrate_plus100", + features=[feature_views.driver[["conv_rate"]], feature_views.driver_odfv], + ) + feature_service_entity_mapping = FeatureService( + name="entity_mapping", features=[ - feature_views["driver"][["conv_rate"]], - feature_views["driver_odfv"] + feature_views.location.with_name("origin").with_join_key_map( + {"location_id": "origin_id"} + ), + feature_views.location.with_name("destination").with_join_key_map( + {"location_id": "destination_id"} + ), ], ) - feast_objects = [] - feast_objects.extend( + store.apply( [ - customer_fv, - driver_fv, - driver_odfv, - order_fv, - global_fv, driver(), customer(), + location(), feature_service, + feature_service_entity_mapping, + *feature_views.values(), ] ) - store.apply(feast_objects) # ... more test code job_from_df = store.get_historical_features( @@ -122,48 +217,85 @@ def test_historical_features(environment, universal_data_sources, full_feature_n "customer_profile:avg_passenger_count", "customer_profile:lifetime_trip_count", "conv_rate_plus_100:conv_rate_plus_100", + "conv_rate_plus_100:conv_rate_plus_100_rounded", "conv_rate_plus_100:conv_rate_plus_val_to_add", "order:order_is_success", "global_stats:num_rides", "global_stats:avg_ride_length", + "field_mapping:feature_name", ], full_feature_names=full_feature_names, ) + + if job_from_df.supports_remote_storage_export(): + files = job_from_df.to_remote_storage() + print(files) + assert len(files) > 0 # This test should be way more detailed + + start_time = datetime.utcnow() actual_df_from_df_entities = job_from_df.to_df() # ... more test code - assert_frame_equal( - expected_df, actual_df_from_df_entities, check_dtype=False, + validate_dataframes( + expected_df, + table_from_df_entities, + keys=[event_timestamp, "order_id", "driver_id", "customer_id"], ) # ... more test code ``` {% endtab %} {% endtabs %} -The key fixtures are the `environment` and `universal_data_sources` fixtures, which are defined in the `feature_repos` directories. This by default pulls in a standard dataset with driver and customer entities, certain feature views, and feature values. By including the environment as a parameter, the test automatically parametrizes across other offline / online store combinations. +* The key fixtures are the `environment` and `universal_data_sources` fixtures, which are defined in the `feature_repos` directories and the `conftest.py` file. This by default pulls in a standard dataset with driver and customer entities (that we have pre-defined), certain feature views, and feature values. + * The `environment` fixture sets up a feature store, parametrized by the provider and the online/offline store. It allows the test to query against that feature store without needing to worry about the underlying implementation or any setup that may be involved in creating instances of these datastores. + * Each fixture creates a different integration test with its own `IntegrationTestRepoConfig` which is used by pytest to generate a unique test testing one of the different environments that require testing. + +* Feast tests also use a variety of markers: + * The `@pytest.mark.integration` marker is used to designate integration tests which will cause the test to be run when you call `make test-python-integration`. + * The `@pytest.mark.universal_offline_stores` marker will parametrize the test on all of the universal offline stores including file, redshift, bigquery and snowflake. + * The `full_feature_names` parametrization defines whether or not the test should reference features as their full feature name (fully qualified path) or just the feature name itself. + ## Writing a new test or reusing existing tests ### To add a new test to an existing test file -* Use the same function signatures as an existing test (e.g. use `environment` as an argument) to include the relevant test fixtures. -* If possible, expand an individual test instead of writing a new test, due to the cost of standing up offline / online stores. +* Use the same function signatures as an existing test (e.g. use `environment` and `universal_data_sources` as an argument) to include the relevant test fixtures. +* If possible, expand an individual test instead of writing a new test, due to the cost of starting up offline / online stores. +* Use the `universal_offline_stores` and `universal_online_store` markers to parametrize the test against different offline store and online store combinations. You can also designate specific online and offline stores to test by using the `only` parameter on the marker. +```python +@pytest.mark.universal_online_stores(only=["redis"]) +``` ### To test a new offline / online store from a plugin repo * Install Feast in editable mode with `pip install -e`. * The core tests for offline / online store behavior are parametrized by the `FULL_REPO_CONFIGS` variable defined in `feature_repos/repo_configuration.py`. To overwrite this variable without modifying the Feast repo, create your own file that contains a `FULL_REPO_CONFIGS` (which will require adding a new `IntegrationTestRepoConfig` or two) and set the environment variable `FULL_REPO_CONFIGS_MODULE` to point to that file. Then the core offline / online store tests can be run with `make test-python-universal`. * See the [custom offline store demo](https://github.com/feast-dev/feast-custom-offline-store-demo) and the [custom online store demo](https://github.com/feast-dev/feast-custom-online-store-demo) for examples. +### What are some important things to keep in mind when adding a new offline / online store? + +#### Type mapping/Inference + +Many problems arise when implementing your data store's type conversion to interface with Feast datatypes. +1. You will need to correctly update `inference.py` so that Feast can infer your datasource schemas +2. You also need to update `type_map.py` so that Feast knows how to convert your datastores types to Feast-recognized types in `feast/types.py`. + +#### Historical and online retrieval + +The most important functionality in Feast is historical and online retrieval. Most of the e2e and universal integration test test this functionality in some way. Making sure this functionality works also indirectly asserts that reading and writing from your datastore works as intended. + + ### To include a new offline / online store in the main Feast repo * Extend `data_source_creator.py` for your offline store. -* In `repo_configuration.py` add a new`IntegrationTestRepoConfig` or two (depending on how many online stores you want to test). +* In `repo_configuration.py` add a new `IntegrationTestRepoConfig` or two (depending on how many online stores you want to test). + * Generally, you should only need to test against sqlite. However, if you need to test against a production online store, then you can also test against Redis or dynamodb. * Run the full test suite with `make test-python-integration.` ### Including a new offline / online store in the main Feast repo from external plugins with community maintainers. -* This folder is for plugins that are officially maintained with community owners. Place the APIs in feast/infra/offline_stores/contrib/. +* This folder is for plugins that are officially maintained with community owners. Place the APIs in `feast/infra/offline_stores/contrib/`. * Extend `data_source_creator.py` for your offline store and implement the required APIs. * In `contrib_repo_configuration.py` add a new `IntegrationTestRepoConfig` (depending on how many online stores you want to test). * Run the test suite on the contrib test suite with `make test-python-contrib-universal`. @@ -171,7 +303,7 @@ The key fixtures are the `environment` and `universal_data_sources` fixtures, wh ### To include a new online store * In `repo_configuration.py` add a new config that maps to a serialized version of configuration you need in `feature_store.yaml` to setup the online store. -* In `repo_configuration.py`, add new`IntegrationTestRepoConfig` for offline stores you want to test. +* In `repo_configuration.py`, add new `IntegrationTestRepoConfig` for online stores you want to test. * Run the full test suite with `make test-python-integration` ### To use custom data in a new test @@ -193,11 +325,11 @@ def your_test(environment: Environment): # ... run test ``` -### Running your own redis cluster for testing +### Running your own Redis cluster for testing -* Install redis on your computer. If you are a mac user, you should be able to `brew install redis`. +* Install Redis on your computer. If you are a mac user, you should be able to `brew install redis`. * Running `redis-server --help` and `redis-cli --help` should show corresponding help menus. -* Run `cd scripts/create-cluster` and run `./create-cluster start` then `./create-cluster create` to start the server. You should see output that looks like this: +* * Run `./infra/scripts/redis-cluster.sh start` then `./infra/scripts/redis-cluster.sh create` to start the Redis cluster locally. You should see output that looks like this: ~~~~ Starting 6001 Starting 6002 @@ -206,6 +338,6 @@ Starting 6004 Starting 6005 Starting 6006 ~~~~ -* You should be able to run the integration tests and have the redis cluster tests pass. -* If you would like to run your own redis cluster, you can run the above commands with your own specified ports and connect to the newly configured cluster. -* To stop the cluster, run `./create-cluster stop` and then `./create-cluster clean`. +* You should be able to run the integration tests and have the Redis cluster tests pass. +* If you would like to run your own Redis cluster, you can run the above commands with your own specified ports and connect to the newly configured cluster. +* To stop the cluster, run `./infra/scripts/redis-cluster.sh stop` and then `./infra/scripts/redis-cluster.sh clean`. diff --git a/docs/how-to-guides/running-feast-in-production.md b/docs/how-to-guides/running-feast-in-production.md index 6023c5ac66..3a9dd9b591 100644 --- a/docs/how-to-guides/running-feast-in-production.md +++ b/docs/how-to-guides/running-feast-in-production.md @@ -242,14 +242,12 @@ This service will provide an HTTP API with JSON I/O, which can be easily used wi [Read more about this feature](../reference/alpha-aws-lambda-feature-server.md) -### 4.3. Java based Feature Server deployed on Kubernetes +### 4.3. Go feature server deployed on Kubernetes -For users with very latency-sensitive and high QPS use-cases, Feast offers a high-performance Java feature server. -Besides the benefits of running on JVM, this implementation also provides a gRPC API, which guarantees good connection utilization and -small request / response body size (compared to JSON). -You will need the Feast Java SDK to retrieve features from this service. This SDK wraps all the gRPC logic for you and provides more convenient APIs. +For users with very latency-sensitive and high QPS use-cases, Feast offers a high-performance [Go feature server](../reference/feature-servers/go-feature-server.md). +It can use either HTTP or gRPC. -The Java based feature server can be deployed to Kubernetes cluster via Helm charts in a few simple steps: +The Go feature server can be deployed to a Kubernetes cluster via Helm charts in a few simple steps: 1. Install [kubectl](https://kubernetes.io/docs/tasks/tools/install-kubectl/) and [helm 3](https://helm.sh/) 2. Add the Feast Helm repository and download the latest charts: @@ -259,18 +257,15 @@ helm repo update ``` 3. Run Helm Install ``` -helm install feast-release feast-charts/feast \ +helm install feast-release feast-charts/feast-feature-server \ --set global.registry.path=s3://feast/registries/prod \ --set global.project= ``` -This chart will deploy two services: `feature-server` and `transformation-service`. -Both must have read access to the registry file on cloud storage. Both will keep a copy of the registry in their memory and periodically refresh it, so expect some delays in update propagation in exchange for better performance. - -#### Load balancing - -The next step would be to install an L7 Load Balancer (eg, [Envoy](https://www.envoyproxy.io/)) in front of the Java feature server. -For seamless integration with Kubernetes (including services created by Feast Helm chart) we recommend using [Istio](https://istio.io/) as Envoy's orchestrator. +This chart will deploy a single service. +The service must have read access to the registry file on cloud storage. +It will keep a copy of the registry in their memory and periodically refresh it, so expect some delays in update propagation in exchange for better performance. +In order for the Go feature server to be enabled, you should set `go_feature_serving: True` in the `feature_store.yaml`. ## 5. Ingesting features from a stream source diff --git a/docs/project/release-process.md b/docs/project/release-process.md index e9f3295d91..7fb9c2a560 100644 --- a/docs/project/release-process.md +++ b/docs/project/release-process.md @@ -22,31 +22,33 @@ For Feast maintainers, these are the concrete steps for making a new release. ### Release for Python and Java SDK 1. Generate a [Personal Access Token](https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/creating-a-personal-access-token) or retrieve your saved personal access token. - - The personal access token should have all of the permissions under the `repo` checkbox. + * The personal access token should have all of the permissions under the `repo` checkbox. 2. Access the `Actions` tab on the main `feast-dev/feast` repo and find the `release` action. 3. Look for the header `This workflow has a workflow_dispatch event trigger` again and click `Run Workflow` on the right. + * If you are making a minor or major release, you should run it off of the master branch. + * If you are making a patch release, run it off of the corresponding minor release branch. 4. Try the dry run first with your personal access token. If this succeeds, uncheck `Dry Run` and run the release workflow. 5. All of the jobs should succeed besides the UI job which needs to be released separately. Ping a maintainer on Slack to run the UI release manually. 6. Try to install the feast release in your local environment and test out the `feast init` -> `feast apply` workflow to verify as a sanity check that the release worked correctly. ### (for minor releases) Post-release steps 1. Create a new branch based on master (i.e. v0.22-branch) and push to the main Feast repo. This will be where cherry-picks go for future patch releases and where documentation will point. -2. Write a summary of the release in the GitHub release +2. Write a summary of the release in the GitHub release 1. By default, Semantic Release will pull in messages from commits (features vs fixes, etc). But this is hard to digest still, so it helps to have a high level overview. ### Update documentation -In the Feast Gitbook (ask [Danny Chiao](https://tectonfeast.slack.com/team/U029405HFEU) in Slack for access): +In the Feast Gitbook (ask [Danny Chiao](https://tectonfeast.slack.com/team/U029405HFEU) in Slack for access): 1. Create a new space within the Feast collection -2. Go to the overflow menu on the top -> Synchronize with Git +2. Go to the overflow menu on the top -> Synchronize with Git 1. Specify GitHub as the provider - + ![](new_branch_part_1.png) 2. Configure to point to the new release branch ![](new_branch_part_2.png) 3. Publish the new page for this branch as part of the collection - + ![](new_branch_part_3.png) 4. Go back to the main Feast collection and go to the overflow menu -> "Customize collection" diff --git a/docs/reference/alpha-on-demand-feature-view.md b/docs/reference/alpha-on-demand-feature-view.md index eb8c4f6291..912099de98 100644 --- a/docs/reference/alpha-on-demand-feature-view.md +++ b/docs/reference/alpha-on-demand-feature-view.md @@ -2,10 +2,6 @@ **Warning**: This is an _experimental_ feature. It's intended for early testing and feedback, and could change without warnings in future releases. -{% hint style="info" %} -To enable this feature, run **`feast alpha enable on_demand_transforms`** -{% endhint %} - ## Overview On demand feature views allows users to use existing features and request time data \(features only available at request time\) to transform and create new features. Users define python transformation logic which is executed in both historical retrieval and online retrieval paths. diff --git a/docs/reference/feature-servers/README.md b/docs/reference/feature-servers/README.md index 301cea372c..f9a40104c3 100644 --- a/docs/reference/feature-servers/README.md +++ b/docs/reference/feature-servers/README.md @@ -2,4 +2,14 @@ Feast users can choose to retrieve features from a feature server, as opposed to through the Python SDK. -{% page-ref page="python-feature-server.md" %} +{% content-ref url="python-feature-server.md" %} +[python-feature-server.md](python-feature-server.md) +{% endcontent-ref %} + +{% content-ref url="go-feature-server.md" %} +[go-feature-server.md](go-feature-server.md) +{% endcontent-ref %} + +{% content-ref url="alpha-aws-lambda-feature-server.md" %} +[alpha-aws-lambda-feature-server.md](alpha-aws-lambda-feature-server.md) +{% endcontent-ref %} \ No newline at end of file diff --git a/docs/reference/alpha-aws-lambda-feature-server.md b/docs/reference/feature-servers/alpha-aws-lambda-feature-server.md similarity index 67% rename from docs/reference/alpha-aws-lambda-feature-server.md rename to docs/reference/feature-servers/alpha-aws-lambda-feature-server.md index eadcf40bb4..6950f68aa5 100644 --- a/docs/reference/alpha-aws-lambda-feature-server.md +++ b/docs/reference/feature-servers/alpha-aws-lambda-feature-server.md @@ -2,10 +2,6 @@ **Warning**: This is an _experimental_ feature. It's intended for early testing and feedback, and could change without warnings in future releases. -{% hint style="info" %} -To enable this feature, run **`feast alpha enable aws_lambda_feature_server`** -{% endhint %} - ## Overview The AWS Lambda feature server is an HTTP endpoint that serves features with JSON I/O, deployed as a Docker image through AWS Lambda and AWS API Gateway. This enables users to get features from Feast using any programming language that can make HTTP requests. A [local feature server](feature-servers/python-feature-server.md) is also available. A remote feature server on GCP Cloud Run is currently being developed. @@ -14,7 +10,7 @@ The AWS Lambda feature server is an HTTP endpoint that serves features with JSON The AWS Lambda feature server is only available to projects using the `AwsProvider` with registries on S3. It is disabled by default. To enable it, `feature_store.yaml` must be modified; specifically, the `enable` flag must be on and an `execution_role_name` must be specified. For example, after running `feast init -t aws`, changing the registry to be on S3, and enabling the feature server, the contents of `feature_store.yaml` should look similar to the following: -```text +``` project: dev registry: s3://feast/registries/dev provider: aws @@ -27,9 +23,6 @@ offline_store: database: feast s3_staging_location: s3://feast/redshift/tests/staging_location iam_role: arn:aws:iam::{aws_account}:role/redshift_s3_access_role -flags: - alpha_features: true - aws_lambda_feature_server: true feature_server: enabled: True execution_role_name: arn:aws:iam::{aws_account}:role/lambda_execution_role @@ -41,12 +34,12 @@ If enabled, the feature server will be deployed during `feast apply`. After it i Feast requires the following permissions in order to deploy and teardown AWS Lambda feature server: -| Permissions | Resources | -| ----------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------- | -|

lambda:CreateFunction

lambda:GetFunction

lambda:DeleteFunction

lambda:AddPermission

lambda:UpdateFunctionConfiguration

| arn:aws:lambda:\:\:function:feast-\* | -|

ecr:CreateRepository

ecr:DescribeRepositories

ecr:DeleteRepository

ecr:PutImage

ecr:DescribeImages

ecr:BatchDeleteImage

ecr:CompleteLayerUpload

ecr:UploadLayerPart

ecr:InitiateLayerUpload

ecr:BatchCheckLayerAvailability

ecr:GetDownloadUrlForLayer

ecr:GetRepositoryPolicy

ecr:SetRepositoryPolicy

ecr:GetAuthorizationToken

| \* | -|

iam:PassRole

| arn:aws:iam::\:role/ | -|

apigateway:*

|

arn:aws:apigateway:*::/apis/*/routes/*/routeresponses

arn:aws:apigateway:*::/apis/*/routes/*/routeresponses/*

arn:aws:apigateway:*::/apis/*/routes/*

arn:aws:apigateway:*::/apis/*/routes

arn:aws:apigateway:*::/apis/*/integrations

arn:aws:apigateway:*::/apis/*/stages/*/routesettings/*

arn:aws:apigateway:*::/apis/*

arn:aws:apigateway:*::/apis

| +| Permissions | Resources | +| --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +|

lambda:CreateFunction

lambda:GetFunction

lambda:DeleteFunction

lambda:AddPermission

lambda:UpdateFunctionConfiguration

| arn:aws:lambda:\:\:function:feast-\* | +|

ecr:CreateRepository

ecr:DescribeRepositories

ecr:DeleteRepository

ecr:PutImage

ecr:DescribeImages

ecr:BatchDeleteImage

ecr:CompleteLayerUpload

ecr:UploadLayerPart

ecr:InitiateLayerUpload

ecr:BatchCheckLayerAvailability

ecr:GetDownloadUrlForLayer

ecr:GetRepositoryPolicy

ecr:SetRepositoryPolicy

ecr:GetAuthorizationToken

| \* | +|

iam:PassRole

| arn:aws:iam::\:role/ | +|

apigateway:*

|

arn:aws:apigateway:*::/apis/*/routes/*/routeresponses

arn:aws:apigateway:*::/apis/*/routes/*/routeresponses/*

arn:aws:apigateway:*::/apis/*/routes/*

arn:aws:apigateway:*::/apis/*/routes

arn:aws:apigateway:*::/apis/*/integrations

arn:aws:apigateway:*::/apis/*/stages/*/routesettings/*

arn:aws:apigateway:*::/apis/*

arn:aws:apigateway:*::/apis

| The following inline policy can be used to grant Feast the necessary permissions: diff --git a/examples/java-demo/README.md b/examples/java-demo/README.md index b908bb7625..2b1d7f75a5 100644 --- a/examples/java-demo/README.md +++ b/examples/java-demo/README.md @@ -39,9 +39,6 @@ For this tutorial, we setup Feast with Redis, using the Feast CLI to register an connection_string: localhost:6379,password=[YOUR PASSWORD] offline_store: type: file - flags: - alpha_features: true - on_demand_transforms: true ``` 4. Run `feast apply` to apply your local features to the remote registry 5. Materialize features to the online store: diff --git a/examples/java-demo/feature_repo/feature_store.yaml b/examples/java-demo/feature_repo/feature_store.yaml index 91c65b512a..03e7c5cc9c 100644 --- a/examples/java-demo/feature_repo/feature_store.yaml +++ b/examples/java-demo/feature_repo/feature_store.yaml @@ -6,6 +6,3 @@ online_store: connection_string: localhost:6379,password=[YOUR PASSWORD] offline_store: type: file -flags: - alpha_features: true - on_demand_transforms: true diff --git a/infra/charts/feast-feature-server/.helmignore b/infra/charts/feast-feature-server/.helmignore new file mode 100644 index 0000000000..0e8a0eb36f --- /dev/null +++ b/infra/charts/feast-feature-server/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/infra/charts/feast-feature-server/Chart.yaml b/infra/charts/feast-feature-server/Chart.yaml new file mode 100644 index 0000000000..aabe071357 --- /dev/null +++ b/infra/charts/feast-feature-server/Chart.yaml @@ -0,0 +1,12 @@ +apiVersion: v2 +name: feast-feature-server +description: Feast Feature Server in Go or Python +type: application +version: 0.22.3 +keywords: + - machine learning + - big data + - mlops +home: https://feast.dev/ +sources: + - https://github.com/feast-dev/feast diff --git a/infra/charts/feast-feature-server/README.md b/infra/charts/feast-feature-server/README.md new file mode 100644 index 0000000000..8c215a7068 --- /dev/null +++ b/infra/charts/feast-feature-server/README.md @@ -0,0 +1,82 @@ +# feast-feature-server + +![Version: 0.22.3](https://img.shields.io/badge/Version-0.22.3-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) + +Feast Feature Server in Go or Python + +**Homepage:** + +## Source Code + +* + +## Values + +| Key | Type | Default | Description | +| ---------------------------------- | ------ | ---------------- | ----------- | +| affinity | object | `{}` | | +| fullnameOverride | string | `""` | | +| image.pullPolicy | string | `"IfNotPresent"` | | +| image.repository | string | `""` | | +| image.tag | string | `""` | | +| imagePullSecrets | list | `[]` | | +| livenessProbe.initialDelaySeconds | int | `30` | | +| livenessProbe.periodSeconds | int | `30` | | +| nameOverride | string | `""` | | +| nodeSelector | object | `{}` | | +| podAnnotations | object | `{}` | | +| podSecurityContext | object | `{}` | | +| readinessProbe.initialDelaySeconds | int | `20` | | +| readinessProbe.periodSeconds | int | `10` | | +| replicaCount | int | `1` | | +| resources | object | `{}` | | +| securityContext | object | `{}` | | +| service.port | int | `80` | | +| service.type | string | `"ClusterIP"` | | +| tolerations | list | `[]` | | + +---------------------------------------------- +Autogenerated from chart metadata using [helm-docs v1.11.0](https://github.com/norwoodj/helm-docs/releases/v1.11.0) + + +Docker repository and tag are required. Helm install example: +``` +helm install feast-feature-server . --set image.repository=REPO --set image.tag=TAG +``` + +Deployment assumes that `feature_store.yaml` exists on docker image. Example docker image: +``` +FROM python:3.8 + +RUN apt update && \ + apt install -y jq + +RUN pip install pip --upgrade + +RUN pip install feast + +COPY feature_store.yaml /feature_store.yaml +``` + +Furthermore, if you wish to use the Go feature server, then you must install the Apache Arrow C++ libraries, and your `feature_store.yaml` should include `go_feature_server: True`. +For more details, see the [docs](https://docs.feast.dev/reference/feature-servers/go-feature-server). +The docker image might look like: +``` +FROM python:3.8 + +RUN apt update && \ + apt install -y jq + +RUN pip install pip --upgrade + +RUN pip install feast + +RUN apt update +RUN apt install -y -V ca-certificates lsb-release wget +RUN wget https://apache.jfrog.io/artifactory/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb +RUN apt install -y -V ./apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb +RUN apt update +RUN apt -y install libarrow-dev + +COPY feature_store.yaml /feature_store.yaml +``` \ No newline at end of file diff --git a/infra/charts/feast-feature-server/templates/_helpers.tpl b/infra/charts/feast-feature-server/templates/_helpers.tpl new file mode 100644 index 0000000000..19c2febd13 --- /dev/null +++ b/infra/charts/feast-feature-server/templates/_helpers.tpl @@ -0,0 +1,52 @@ +{{/* vim: set filetype=mustache: */}} +{{/* +Expand the name of the chart. +*/}} +{{- define "feast-feature-server.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "feast-feature-server.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "feast-feature-server.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "feast-feature-server.labels" -}} +helm.sh/chart: {{ include "feast-feature-server.chart" . }} +{{ include "feast-feature-server.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "feast-feature-server.selectorLabels" -}} +app.kubernetes.io/name: {{ include "feast-feature-server.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} diff --git a/infra/charts/feast-feature-server/templates/deployment.yaml b/infra/charts/feast-feature-server/templates/deployment.yaml new file mode 100644 index 0000000000..69cf92f6c0 --- /dev/null +++ b/infra/charts/feast-feature-server/templates/deployment.yaml @@ -0,0 +1,61 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "feast-feature-server.fullname" . }} + labels: + {{- include "feast-feature-server.labels" . | nindent 4 }} +spec: + replicas: {{ .Values.replicaCount }} + selector: + matchLabels: + {{- include "feast-feature-server.selectorLabels" . | nindent 6 }} + template: + metadata: + {{- with .Values.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "feast-feature-server.selectorLabels" . | nindent 8 }} + spec: + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + securityContext: + {{- toYaml .Values.podSecurityContext | nindent 8 }} + containers: + - name: {{ .Chart.Name }} + securityContext: + {{- toYaml .Values.securityContext | nindent 12 }} + image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + command: ["feast", "serve", "-h", "0.0.0.0"] + ports: + - name: http + containerPort: 6566 + protocol: TCP + livenessProbe: + tcpSocket: + port: http + initialDelaySeconds: {{ .Values.livenessProbe.initialDelaySeconds }} + periodSeconds: {{ .Values.livenessProbe.periodSeconds }} + readinessProbe: + tcpSocket: + port: http + initialDelaySeconds: {{ .Values.readinessProbe.initialDelaySeconds }} + periodSeconds: {{ .Values.readinessProbe.periodSeconds }} + resources: + {{- toYaml .Values.resources | nindent 12 }} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} diff --git a/infra/charts/feast-feature-server/templates/service.yaml b/infra/charts/feast-feature-server/templates/service.yaml new file mode 100644 index 0000000000..d6914828e4 --- /dev/null +++ b/infra/charts/feast-feature-server/templates/service.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ include "feast-feature-server.name" . }} + labels: + {{- include "feast-feature-server.labels" . | nindent 4 }} +spec: + type: {{ .Values.service.type }} + ports: + - port: {{ .Values.service.port }} + targetPort: http + protocol: TCP + name: http + selector: + {{- include "feast-feature-server.selectorLabels" . | nindent 4 }} diff --git a/infra/charts/feast-feature-server/values.yaml b/infra/charts/feast-feature-server/values.yaml new file mode 100644 index 0000000000..f62f95a757 --- /dev/null +++ b/infra/charts/feast-feature-server/values.yaml @@ -0,0 +1,57 @@ +# Default values for feast. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +replicaCount: 1 + +image: + repository: "" + pullPolicy: IfNotPresent + tag: "" + +imagePullSecrets: [] +nameOverride: "" +fullnameOverride: "" + +podAnnotations: {} + +podSecurityContext: {} + # fsGroup: 2000 + +securityContext: {} + # capabilities: + # drop: + # - ALL + # readOnlyRootFilesystem: true + # runAsNonRoot: true + # runAsUser: 1000 + +service: + type: ClusterIP + port: 80 + +resources: {} + # We usually recommend not to specify default resources and to leave this as a conscious + # choice for the user. This also increases chances charts run on environments with little + # resources, such as Minikube. If you do want to specify resources, uncomment the following + # lines, adjust them as necessary, and remove the curly braces after 'resources:'. + # limits: + # cpu: 100m + # memory: 128Mi + # requests: + # cpu: 100m + # memory: 128Mi + +nodeSelector: {} + +tolerations: [] + +affinity: {} + +livenessProbe: + initialDelaySeconds: 30 + periodSeconds: 30 + +readinessProbe: + initialDelaySeconds: 20 + periodSeconds: 10 diff --git a/infra/charts/feast-python-server/Chart.yaml b/infra/charts/feast-python-server/Chart.yaml index aace4f88cc..04447304f3 100644 --- a/infra/charts/feast-python-server/Chart.yaml +++ b/infra/charts/feast-python-server/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: feast-python-server description: Feast Feature Server in Python type: application -version: 0.22.2 +version: 0.22.3 keywords: - machine learning - big data diff --git a/infra/charts/feast-python-server/README.md b/infra/charts/feast-python-server/README.md index 358510e23d..232e9a7ff1 100644 --- a/infra/charts/feast-python-server/README.md +++ b/infra/charts/feast-python-server/README.md @@ -1,6 +1,6 @@ # feast-python-server -![Version: 0.22.2](https://img.shields.io/badge/Version-0.22.2-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) +![Version: 0.22.3](https://img.shields.io/badge/Version-0.22.3-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) Feast Feature Server in Python @@ -46,7 +46,7 @@ helm install feast-python-server . --set image.repository=REPO --set image.tag=T Deployment assumes that `feature_store.yaml` exists on docker image. Example docker image: ``` -FROM python:3.7 +FROM python:3.8 RUN apt update && \ apt install -y jq diff --git a/infra/charts/feast/Chart.yaml b/infra/charts/feast/Chart.yaml index 0ab010bb2f..12145b010a 100644 --- a/infra/charts/feast/Chart.yaml +++ b/infra/charts/feast/Chart.yaml @@ -1,7 +1,7 @@ apiVersion: v1 description: Feature store for machine learning name: feast -version: 0.22.2 +version: 0.22.3 keywords: - machine learning - big data diff --git a/infra/charts/feast/README.md b/infra/charts/feast/README.md index 9105b0b5d1..19c81d3d4d 100644 --- a/infra/charts/feast/README.md +++ b/infra/charts/feast/README.md @@ -8,7 +8,7 @@ This repo contains Helm charts for Feast components that are being installed on ## Chart: Feast -Feature store for machine learning Current chart version is `0.22.2` +Feature store for machine learning Current chart version is `0.22.3` ## Installation @@ -55,8 +55,8 @@ For more details, please see: https://docs.feast.dev/how-to-guides/running-feast | Repository | Name | Version | |------------|------|---------| | https://charts.helm.sh/stable | redis | 10.5.6 | -| https://feast-helm-charts.storage.googleapis.com | feature-server(feature-server) | 0.22.2 | -| https://feast-helm-charts.storage.googleapis.com | transformation-service(transformation-service) | 0.22.2 | +| https://feast-helm-charts.storage.googleapis.com | feature-server(feature-server) | 0.22.3 | +| https://feast-helm-charts.storage.googleapis.com | transformation-service(transformation-service) | 0.22.3 | ## Values diff --git a/infra/charts/feast/charts/feature-server/Chart.yaml b/infra/charts/feast/charts/feature-server/Chart.yaml index c53e9906ca..cb5cfa0455 100644 --- a/infra/charts/feast/charts/feature-server/Chart.yaml +++ b/infra/charts/feast/charts/feature-server/Chart.yaml @@ -1,8 +1,8 @@ apiVersion: v1 description: "Feast Feature Server: Online feature serving service for Feast" name: feature-server -version: 0.22.2 -appVersion: v0.22.2 +version: 0.22.3 +appVersion: v0.22.3 keywords: - machine learning - big data diff --git a/infra/charts/feast/charts/feature-server/README.md b/infra/charts/feast/charts/feature-server/README.md index 86b5fd26af..148cfe5cbf 100644 --- a/infra/charts/feast/charts/feature-server/README.md +++ b/infra/charts/feast/charts/feature-server/README.md @@ -1,6 +1,6 @@ # feature-server -![Version: 0.22.2](https://img.shields.io/badge/Version-0.22.2-informational?style=flat-square) ![AppVersion: v0.22.2](https://img.shields.io/badge/AppVersion-v0.22.2-informational?style=flat-square) +![Version: 0.22.3](https://img.shields.io/badge/Version-0.22.3-informational?style=flat-square) ![AppVersion: v0.22.3](https://img.shields.io/badge/AppVersion-v0.22.3-informational?style=flat-square) Feast Feature Server: Online feature serving service for Feast @@ -17,7 +17,7 @@ Feast Feature Server: Online feature serving service for Feast | envOverrides | object | `{}` | Extra environment variables to set | | image.pullPolicy | string | `"IfNotPresent"` | Image pull policy | | image.repository | string | `"feastdev/feature-server-java"` | Docker image for Feature Server repository | -| image.tag | string | `"0.22.2"` | Image tag | +| image.tag | string | `"0.22.3"` | Image tag | | ingress.grpc.annotations | object | `{}` | Extra annotations for the ingress | | ingress.grpc.auth.enabled | bool | `false` | Flag to enable auth | | ingress.grpc.class | string | `"nginx"` | Which ingress controller to use | diff --git a/infra/charts/feast/charts/feature-server/values.yaml b/infra/charts/feast/charts/feature-server/values.yaml index 00d5ab400d..b12fa588d1 100644 --- a/infra/charts/feast/charts/feature-server/values.yaml +++ b/infra/charts/feast/charts/feature-server/values.yaml @@ -5,7 +5,7 @@ image: # image.repository -- Docker image for Feature Server repository repository: feastdev/feature-server-java # image.tag -- Image tag - tag: 0.22.2 + tag: 0.22.3 # image.pullPolicy -- Image pull policy pullPolicy: IfNotPresent diff --git a/infra/charts/feast/charts/transformation-service/Chart.yaml b/infra/charts/feast/charts/transformation-service/Chart.yaml index 650773dd8e..7c347e1186 100644 --- a/infra/charts/feast/charts/transformation-service/Chart.yaml +++ b/infra/charts/feast/charts/transformation-service/Chart.yaml @@ -1,8 +1,8 @@ apiVersion: v1 description: "Transformation service: to compute on-demand features" name: transformation-service -version: 0.22.2 -appVersion: v0.22.2 +version: 0.22.3 +appVersion: v0.22.3 keywords: - machine learning - big data diff --git a/infra/charts/feast/charts/transformation-service/README.md b/infra/charts/feast/charts/transformation-service/README.md index 395f92145d..c89fe2a811 100644 --- a/infra/charts/feast/charts/transformation-service/README.md +++ b/infra/charts/feast/charts/transformation-service/README.md @@ -1,6 +1,6 @@ # transformation-service -![Version: 0.22.2](https://img.shields.io/badge/Version-0.22.2-informational?style=flat-square) ![AppVersion: v0.22.2](https://img.shields.io/badge/AppVersion-v0.22.2-informational?style=flat-square) +![Version: 0.22.3](https://img.shields.io/badge/Version-0.22.3-informational?style=flat-square) ![AppVersion: v0.22.3](https://img.shields.io/badge/AppVersion-v0.22.3-informational?style=flat-square) Transformation service: to compute on-demand features @@ -13,7 +13,7 @@ Transformation service: to compute on-demand features | envOverrides | object | `{}` | Extra environment variables to set | | image.pullPolicy | string | `"IfNotPresent"` | Image pull policy | | image.repository | string | `"feastdev/feature-transformation-server"` | Docker image for Transformation Server repository | -| image.tag | string | `"0.22.2"` | Image tag | +| image.tag | string | `"0.22.3"` | Image tag | | nodeSelector | object | `{}` | Node labels for pod assignment | | podLabels | object | `{}` | Labels to be added to Feast Serving pods | | replicaCount | int | `1` | Number of pods that will be created | diff --git a/infra/charts/feast/charts/transformation-service/config/feature_store.yaml b/infra/charts/feast/charts/transformation-service/config/feature_store.yaml index 555e93a306..c003b87cc2 100644 --- a/infra/charts/feast/charts/transformation-service/config/feature_store.yaml +++ b/infra/charts/feast/charts/transformation-service/config/feature_store.yaml @@ -2,7 +2,4 @@ registry: path: {{ .Values.global.registry.path }} cache_ttl_seconds: {{ .Values.global.registry.cache_ttl_seconds }} provider: local -project: {{ .Values.global.project }} -flags: - on_demand_transforms: true - alpha_features: true \ No newline at end of file +project: {{ .Values.global.project }} \ No newline at end of file diff --git a/infra/charts/feast/charts/transformation-service/values.yaml b/infra/charts/feast/charts/transformation-service/values.yaml index 830607da65..f8e52a931c 100644 --- a/infra/charts/feast/charts/transformation-service/values.yaml +++ b/infra/charts/feast/charts/transformation-service/values.yaml @@ -5,7 +5,7 @@ image: # image.repository -- Docker image for Transformation Server repository repository: feastdev/feature-transformation-server # image.tag -- Image tag - tag: 0.22.2 + tag: 0.22.3 # image.pullPolicy -- Image pull policy pullPolicy: IfNotPresent diff --git a/infra/charts/feast/requirements.yaml b/infra/charts/feast/requirements.yaml index 0cc7653de2..6eb54a422f 100644 --- a/infra/charts/feast/requirements.yaml +++ b/infra/charts/feast/requirements.yaml @@ -1,12 +1,12 @@ dependencies: - name: feature-server alias: feature-server - version: 0.22.2 + version: 0.22.3 condition: feature-server.enabled repository: https://feast-helm-charts.storage.googleapis.com - name: transformation-service alias: transformation-service - version: 0.22.2 + version: 0.22.3 condition: transformation-service.enabled repository: https://feast-helm-charts.storage.googleapis.com - name: redis diff --git a/infra/scripts/helm/push-helm-charts.sh b/infra/scripts/helm/push-helm-charts.sh index 08753adb3c..1c32ee985b 100755 --- a/infra/scripts/helm/push-helm-charts.sh +++ b/infra/scripts/helm/push-helm-charts.sh @@ -17,7 +17,9 @@ helm repo add feast-helm-chart-repo $bucket cd infra/charts helm package feast helm package feast-python-server +helm package feast-feature-server helm gcs push --public feast-${1}.tgz feast-helm-chart-repo --force helm gcs push --public feast-python-server-${1}.tgz feast-helm-chart-repo --force +helm gcs push --public feast-feature-server-${1}.tgz feast-helm-chart-repo --force rm -f ./*.tgz \ No newline at end of file diff --git a/infra/scripts/helm/validate-helm-chart-versions.sh b/infra/scripts/helm/validate-helm-chart-versions.sh index 0ba75bd744..aac79d9315 100755 --- a/infra/scripts/helm/validate-helm-chart-versions.sh +++ b/infra/scripts/helm/validate-helm-chart-versions.sh @@ -3,7 +3,7 @@ set -e # Amount of file locations that need to be bumped in unison when versions increment -UNIQUE_VERSIONS_COUNT=18 +UNIQUE_VERSIONS_COUNT=20 if [ $# -ne 1 ]; then echo "Please provide a single semver version (without a \"v\" prefix) to test the repository against, e.g 0.99.0" diff --git a/infra/scripts/create-cluster.sh b/infra/scripts/redis-cluster.sh similarity index 100% rename from infra/scripts/create-cluster.sh rename to infra/scripts/redis-cluster.sh diff --git a/infra/scripts/release/files_to_bump.txt b/infra/scripts/release/files_to_bump.txt index a1e2d29623..37f5393326 100644 --- a/infra/scripts/release/files_to_bump.txt +++ b/infra/scripts/release/files_to_bump.txt @@ -9,4 +9,7 @@ infra/charts/feast/charts/feature-server/values.yaml 8 infra/charts/feast/README.md 11 58 59 infra/charts/feast-python-server/Chart.yaml 5 infra/charts/feast-python-server/README.md 3 +infra/charts/feast-feature-server/Chart.yaml 5 +infra/charts/feast-feature-server/README.md 3 java/pom.xml 41 +ui/package.json 3 diff --git a/infra/templates/README.md.jinja2 b/infra/templates/README.md.jinja2 index cd6e42c1d1..6a8ebdbab7 100644 --- a/infra/templates/README.md.jinja2 +++ b/infra/templates/README.md.jinja2 @@ -16,7 +16,14 @@ ## Overview -Feast is an open source feature store for machine learning. Feast is the fastest path to productionizing analytic data for model training and online inference. +Feast (**Fea**ture **St**ore) is an open source feature store for machine learning. Feast is the fastest path to manage existing infrastructure to productionize analytic data for model training and online inference. + + +Feast allows ML platform teams to: + +* **Make features consistently available for training and serving** by managing an _offline store_ (to process historical data for scale-out batch scoring or model training), a low-latency _online store_ (to power real-time prediction)_,_ and a battle-tested _feature server_ (for serving pre-computed features online). +* **Avoid data leakage** by generating point-in-time correct feature sets so data scientists can focus on feature engineering rather than debugging error-prone dataset joining logic. This ensure that future feature values do not leak to models during training. +* **Decouple ML from data infrastructure** by providing a single data access layer that abstracts feature storage from feature retrieval, ensuring models remain portable as you move from training models to serving models, from batch models to realtime models, and from one data infra system to another. Please see our [documentation](https://docs.feast.dev/) for more information about the project. diff --git a/java/pom.xml b/java/pom.xml index 74e8261929..9f849ed9b5 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -38,7 +38,7 @@ - 0.22.2 + 0.22.3 https://github.com/feast-dev/feast UTF-8 diff --git a/java/serving/src/test/resources/docker-compose/feast10/feature_store.yaml b/java/serving/src/test/resources/docker-compose/feast10/feature_store.yaml index 2e6625c025..8e863caa69 100644 --- a/java/serving/src/test/resources/docker-compose/feast10/feature_store.yaml +++ b/java/serving/src/test/resources/docker-compose/feast10/feature_store.yaml @@ -5,6 +5,3 @@ online_store: type: redis connection_string: "redis:6379,password=testpw" offline_store: {} -flags: - alpha_features: true - on_demand_transforms: true diff --git a/sdk/python/docs/source/feast.rst b/sdk/python/docs/source/feast.rst index c000ac2e2b..b1fb70a362 100644 --- a/sdk/python/docs/source/feast.rst +++ b/sdk/python/docs/source/feast.rst @@ -169,14 +169,6 @@ feast.field module :undoc-members: :show-inheritance: -feast.flags module ------------------- - -.. automodule:: feast.flags - :members: - :undoc-members: - :show-inheritance: - feast.flags\_helper module -------------------------- diff --git a/sdk/python/feast/cli.py b/sdk/python/feast/cli.py index 91815d30fd..6b182c618c 100644 --- a/sdk/python/feast/cli.py +++ b/sdk/python/feast/cli.py @@ -25,7 +25,7 @@ from dateutil import parser from pygments import formatters, highlight, lexers -from feast import flags, flags_helper, utils +from feast import utils from feast.constants import DEFAULT_FEATURE_TRANSFORMATION_SERVER_PORT from feast.errors import FeastObjectNotFoundException, FeastProviderLoginError from feast.feature_store import FeatureStore @@ -662,113 +662,6 @@ def serve_transformations_command(ctx: click.Context, port: int): store.serve_transformations(port) -@cli.group(name="alpha") -def alpha_cmd(): - """ - Access alpha features - """ - pass - - -@alpha_cmd.command("list") -@click.pass_context -def list_alpha_features(ctx: click.Context): - """ - Lists all alpha features - """ - repo = ctx.obj["CHDIR"] - cli_check_repo(repo) - repo_path = str(repo) - store = FeatureStore(repo_path=repo_path) - - flags_to_show = flags.FLAG_NAMES.copy() - flags_to_show.remove(flags.FLAG_ALPHA_FEATURES_NAME) - print("Alpha features:") - for flag in flags_to_show: - enabled_string = ( - "enabled" - if flags_helper.feature_flag_enabled(store.config, flag) - else "disabled" - ) - print(f"{flag}: {enabled_string}") - - -@alpha_cmd.command("enable-all") -@click.pass_context -def enable_alpha_features(ctx: click.Context): - """ - Enables all alpha features - """ - repo = ctx.obj["CHDIR"] - cli_check_repo(repo) - repo_path = str(repo) - store = FeatureStore(repo_path=repo_path) - - if store.config.flags is None: - store.config.flags = {} - for flag_name in flags.FLAG_NAMES: - store.config.flags[flag_name] = True - store.config.write_to_path(Path(repo_path)) - - -@alpha_cmd.command("enable") -@click.argument("name", type=click.STRING) -@click.pass_context -def enable_alpha_feature(ctx: click.Context, name: str): - """ - Enables an alpha feature - """ - if name not in flags.FLAG_NAMES: - raise ValueError(f"Flag name, {name}, not valid.") - - repo = ctx.obj["CHDIR"] - cli_check_repo(repo) - repo_path = str(repo) - store = FeatureStore(repo_path=repo_path) - - if store.config.flags is None: - store.config.flags = {} - store.config.flags[flags.FLAG_ALPHA_FEATURES_NAME] = True - store.config.flags[name] = True - store.config.write_to_path(Path(repo_path)) - - -@alpha_cmd.command("disable") -@click.argument("name", type=click.STRING) -@click.pass_context -def disable_alpha_feature(ctx: click.Context, name: str): - """ - Disables an alpha feature - """ - if name not in flags.FLAG_NAMES: - raise ValueError(f"Flag name, {name}, not valid.") - - repo = ctx.obj["CHDIR"] - cli_check_repo(repo) - repo_path = str(repo) - store = FeatureStore(repo_path=repo_path) - - if store.config.flags is None or name not in store.config.flags: - return - store.config.flags[name] = False - store.config.write_to_path(Path(repo_path)) - - -@alpha_cmd.command("disable-all") -@click.pass_context -def disable_alpha_features(ctx: click.Context): - """ - Disables all alpha features - """ - repo = ctx.obj["CHDIR"] - cli_check_repo(repo) - repo_path = str(repo) - store = FeatureStore(repo_path=repo_path) - - store.config.flags = None - store.config.write_to_path(Path(repo_path)) - - @cli.command("validate") @click.option( "--feature-service", "-f", help="Specify a feature service name", diff --git a/sdk/python/feast/errors.py b/sdk/python/feast/errors.py index 980dfd470f..518d648533 100644 --- a/sdk/python/feast/errors.py +++ b/sdk/python/feast/errors.py @@ -197,6 +197,13 @@ def __init__( ) +class FeastOfflineStoreInvalidName(Exception): + def __init__(self, offline_store_class_name: str): + super().__init__( + f"Offline Store Class '{offline_store_class_name}' should end with the string `OfflineStore`.'" + ) + + class FeastOnlineStoreInvalidName(Exception): def __init__(self, online_store_class_name: str): super().__init__( @@ -312,14 +319,6 @@ def __init__(self, feature_view_name: str): ) -class ExperimentalFeatureNotEnabled(Exception): - def __init__(self, feature_flag_name: str): - super().__init__( - f"You are attempting to use an experimental feature that is not enabled. Please run " - f"`feast alpha enable {feature_flag_name}` " - ) - - class RepoConfigPathDoesNotExist(Exception): def __init__(self): super().__init__("The repo_path attribute does not exist for the repo_config.") diff --git a/sdk/python/feast/feature_store.py b/sdk/python/feast/feature_store.py index c49ce7dc77..499bb152ed 100644 --- a/sdk/python/feast/feature_store.py +++ b/sdk/python/feast/feature_store.py @@ -40,7 +40,7 @@ from google.protobuf.timestamp_pb2 import Timestamp from tqdm import tqdm -from feast import feature_server, flags, flags_helper, ui_server, utils +from feast import feature_server, ui_server, utils from feast.base_feature_view import BaseFeatureView from feast.batch_feature_view import BatchFeatureView from feast.data_source import DataSource, PushMode @@ -50,7 +50,6 @@ from feast.entity import Entity from feast.errors import ( EntityNotFoundException, - ExperimentalFeatureNotEnabled, FeatureNameCollisionError, FeatureViewNotFoundException, RequestDataNotFoundInEntityDfException, @@ -494,8 +493,8 @@ def _get_features( ) if feature_service_from_registry != _features: warnings.warn( - "The FeatureService object that has been passed in as an argument is" - "inconsistent with the version from Registry. Potentially a newer version" + "The FeatureService object that has been passed in as an argument is " + "inconsistent with the version from the registry. Potentially a newer version " "of the FeatureService has been applied to the registry." ) for projection in feature_service_from_registry.feature_view_projections: @@ -525,11 +524,12 @@ def _validate_all_feature_views( sfvs_to_update: List[StreamFeatureView], ): """Validates all feature views.""" - if ( - not flags_helper.enable_on_demand_feature_views(self.config) - and len(odfvs_to_update) > 0 - ): - raise ExperimentalFeatureNotEnabled(flags.FLAG_ON_DEMAND_TRANSFORM_NAME) + if len(odfvs_to_update) > 0: + warnings.warn( + "On demand feature view is an experimental feature. " + "This API is stable, but the functionality does not scale well for offline retrieval", + RuntimeWarning, + ) set_usage_attribute("odfv", bool(odfvs_to_update)) @@ -2243,8 +2243,11 @@ def serve_ui( @log_exceptions_and_usage def serve_transformations(self, port: int) -> None: """Start the feature transformation server locally on a given port.""" - if not flags_helper.enable_on_demand_feature_views(self.config): - raise ExperimentalFeatureNotEnabled(flags.FLAG_ON_DEMAND_TRANSFORM_NAME) + warnings.warn( + "On demand feature view is an experimental feature. " + "This API is stable, but the functionality does not scale well for offline retrieval", + RuntimeWarning, + ) from feast import transformation_server diff --git a/sdk/python/feast/flags.py b/sdk/python/feast/flags.py deleted file mode 100644 index 26e20d81f6..0000000000 --- a/sdk/python/feast/flags.py +++ /dev/null @@ -1,10 +0,0 @@ -FLAG_ALPHA_FEATURES_NAME = "alpha_features" -FLAG_ON_DEMAND_TRANSFORM_NAME = "on_demand_transforms" -FLAG_AWS_LAMBDA_FEATURE_SERVER_NAME = "aws_lambda_feature_server" -ENV_FLAG_IS_TEST = "IS_TEST" - -FLAG_NAMES = { - FLAG_ALPHA_FEATURES_NAME, - FLAG_ON_DEMAND_TRANSFORM_NAME, - FLAG_AWS_LAMBDA_FEATURE_SERVER_NAME, -} diff --git a/sdk/python/feast/flags_helper.py b/sdk/python/feast/flags_helper.py index 7cf16dbf0b..4763f4a283 100644 --- a/sdk/python/feast/flags_helper.py +++ b/sdk/python/feast/flags_helper.py @@ -1,39 +1,11 @@ import os -from feast import flags -from feast.repo_config import RepoConfig +ENV_FLAG_IS_TEST = "IS_TEST" def _env_flag_enabled(name: str) -> bool: return os.getenv(name, default="False") == "True" -def feature_flag_enabled(repo_config: RepoConfig, flag_name: str) -> bool: - if is_test(): - return True - return ( - _alpha_feature_flag_enabled(repo_config) - and repo_config.flags is not None - and flag_name in repo_config.flags - and repo_config.flags[flag_name] - ) - - -def _alpha_feature_flag_enabled(repo_config: RepoConfig) -> bool: - return ( - repo_config.flags is not None - and flags.FLAG_ALPHA_FEATURES_NAME in repo_config.flags - and repo_config.flags[flags.FLAG_ALPHA_FEATURES_NAME] - ) - - def is_test() -> bool: - return _env_flag_enabled(flags.ENV_FLAG_IS_TEST) - - -def enable_on_demand_feature_views(repo_config: RepoConfig) -> bool: - return feature_flag_enabled(repo_config, flags.FLAG_ON_DEMAND_TRANSFORM_NAME) - - -def enable_aws_lambda_feature_server(repo_config: RepoConfig) -> bool: - return feature_flag_enabled(repo_config, flags.FLAG_AWS_LAMBDA_FEATURE_SERVER_NAME) + return _env_flag_enabled(ENV_FLAG_IS_TEST) diff --git a/sdk/python/feast/infra/aws.py b/sdk/python/feast/infra/aws.py index 14301faf19..11eff6200d 100644 --- a/sdk/python/feast/infra/aws.py +++ b/sdk/python/feast/infra/aws.py @@ -3,6 +3,7 @@ import logging import os import uuid +import warnings from datetime import datetime from pathlib import Path from tempfile import TemporaryFile @@ -22,15 +23,12 @@ from feast.errors import ( AwsAPIGatewayDoesNotExist, AwsLambdaDoesNotExist, - ExperimentalFeatureNotEnabled, IncompatibleRegistryStoreClass, RepoConfigPathDoesNotExist, S3RegistryBucketForbiddenAccess, S3RegistryBucketNotExist, ) from feast.feature_view import FeatureView -from feast.flags import FLAG_AWS_LAMBDA_FEATURE_SERVER_NAME -from feast.flags_helper import enable_aws_lambda_feature_server from feast.infra.feature_servers.aws_lambda.config import AwsLambdaFeatureServerConfig from feast.infra.passthrough_provider import PassthroughProvider from feast.infra.utils import aws_utils @@ -74,8 +72,11 @@ def update_infra( ) if self.repo_config.feature_server and self.repo_config.feature_server.enabled: - if not enable_aws_lambda_feature_server(self.repo_config): - raise ExperimentalFeatureNotEnabled(FLAG_AWS_LAMBDA_FEATURE_SERVER_NAME) + warnings.warn( + "AWS Lambda based feature serving is an experimental feature. " + "We do not guarantee that future changes will maintain backward compatibility.", + RuntimeWarning, + ) # Since the AWS Lambda feature server will attempt to load the registry, we # only allow the registry to be in S3. diff --git a/sdk/python/feast/infra/offline_stores/snowflake.py b/sdk/python/feast/infra/offline_stores/snowflake.py index ec06d8dce1..66787c060b 100644 --- a/sdk/python/feast/infra/offline_stores/snowflake.py +++ b/sdk/python/feast/infra/offline_stores/snowflake.py @@ -415,7 +415,7 @@ def _to_arrow_internal(self) -> pa.Table: def to_snowflake(self, table_name: str) -> None: """Save dataset as a new Snowflake table""" - if self.on_demand_feature_views is not None: + if self.on_demand_feature_views: transformed_df = self.to_df() write_pandas( diff --git a/sdk/python/feast/infra/registry_stores/sql.py b/sdk/python/feast/infra/registry_stores/sql.py index 2d3ac9d683..83a46f23ab 100644 --- a/sdk/python/feast/infra/registry_stores/sql.py +++ b/sdk/python/feast/infra/registry_stores/sql.py @@ -2,7 +2,7 @@ from datetime import datetime from enum import Enum from pathlib import Path -from typing import Any, List, Optional, Set, Union +from typing import Any, Callable, List, Optional, Set, Union from sqlalchemy import ( # type: ignore BigInteger, @@ -555,7 +555,7 @@ def update_infra(self, infra: Infra, project: str, commit: bool = True): ) def get_infra(self, project: str, allow_cache: bool = False) -> Infra: - return self._get_object( + infra_object = self._get_object( managed_infra, "infra_obj", project, @@ -565,6 +565,8 @@ def get_infra(self, project: str, allow_cache: bool = False) -> Infra: "infra_proto", None, ) + infra_object = infra_object or InfraProto() + return Infra.from_proto(infra_object) def apply_user_metadata( self, @@ -676,11 +678,19 @@ def commit(self): pass def _apply_object( - self, table, project: str, id_field_name, obj, proto_field_name, name=None + self, + table: Table, + project: str, + id_field_name: str, + obj: Any, + proto_field_name: str, + name: Optional[str] = None, ): self._maybe_init_project_metadata(project) - name = name or obj.name + name = name or obj.name if hasattr(obj, "name") else None + assert name, f"name needs to be provided for {obj}" + with self.engine.connect() as conn: update_datetime = datetime.utcnow() update_time = int(update_datetime.timestamp()) @@ -703,9 +713,16 @@ def _apply_object( ) conn.execute(update_stmt) else: + obj_proto = obj.to_proto() + + if hasattr(obj_proto, "meta") and hasattr( + obj_proto.meta, "created_timestamp" + ): + obj_proto.meta.created_timestamp.FromDatetime(update_datetime) + values = { id_field_name: name, - proto_field_name: obj.to_proto().SerializeToString(), + proto_field_name: obj_proto.SerializeToString(), "last_updated_timestamp": update_time, "project_id": project, } @@ -738,7 +755,14 @@ def _maybe_init_project_metadata(self, project): conn.execute(insert_stmt) usage.set_current_project_uuid(new_project_uuid) - def _delete_object(self, table, name, project, id_field_name, not_found_exception): + def _delete_object( + self, + table: Table, + name: str, + project: str, + id_field_name: str, + not_found_exception: Optional[Callable], + ): with self.engine.connect() as conn: stmt = delete(table).where( getattr(table.c, id_field_name) == name, table.c.project_id == project @@ -752,14 +776,14 @@ def _delete_object(self, table, name, project, id_field_name, not_found_exceptio def _get_object( self, - table, - name, - project, - proto_class, - python_class, - id_field_name, - proto_field_name, - not_found_exception, + table: Table, + name: str, + project: str, + proto_class: Any, + python_class: Any, + id_field_name: str, + proto_field_name: str, + not_found_exception: Optional[Callable], ): self._maybe_init_project_metadata(project) @@ -771,10 +795,18 @@ def _get_object( if row: _proto = proto_class.FromString(row[proto_field_name]) return python_class.from_proto(_proto) - raise not_found_exception(name, project) + if not_found_exception: + raise not_found_exception(name, project) + else: + return None def _list_objects( - self, table, project, proto_class, python_class, proto_field_name + self, + table: Table, + project: str, + proto_class: Any, + python_class: Any, + proto_field_name: str, ): self._maybe_init_project_metadata(project) with self.engine.connect() as conn: diff --git a/sdk/python/feast/repo_config.py b/sdk/python/feast/repo_config.py index b7cf1683dc..9103341161 100644 --- a/sdk/python/feast/repo_config.py +++ b/sdk/python/feast/repo_config.py @@ -16,10 +16,11 @@ from pydantic.error_wrappers import ErrorWrapper from pydantic.typing import Dict, Optional, Union -from feast import flags from feast.errors import ( FeastFeatureServerTypeInvalidError, FeastFeatureServerTypeSetError, + FeastOfflineStoreInvalidName, + FeastOnlineStoreInvalidName, FeastProviderNotSetError, ) from feast.importer import import_class @@ -124,7 +125,7 @@ class RepoConfig(FeastBaseModel): """ FeatureServerConfig: Feature server configuration (optional depending on provider) """ flags: Any - """ Flags: Feature flags for experimental features (optional) """ + """ Flags (deprecated field): Feature flags for experimental features """ repo_path: Optional[Path] = None @@ -217,7 +218,8 @@ def _validate_online_store_config(cls, values): return values # Make sure that the provider configuration is set. We need it to set the defaults - assert "provider" in values + if "provider" not in values: + raise FeastProviderNotSetError() # Set the default type # This is only direct reference to a provider or online store that we should have @@ -253,7 +255,8 @@ def _validate_offline_store_config(cls, values): return values # Make sure that the provider configuration is set. We need it to set the defaults - assert "provider" in values + if "provider" not in values: + raise FeastProviderNotSetError() # Set the default type if "type" not in values["offline_store"]: @@ -327,15 +330,9 @@ def _validate_flags(cls, v): if not isinstance(v, Dict): return - for flag_name, val in v.items(): - if flag_name not in flags.FLAG_NAMES: - _logger.warn( - "Unrecognized flag: %s. This feature may be invalid, or may refer " - "to a previously experimental feature which has graduated to production.", - flag_name, - ) - if type(val) is not bool: - raise ValueError(f"Flag value, {val}, not valid.") + _logger.warning( + "Flags are no longer necessary in Feast. Experimental features will log warnings instead." + ) return v @@ -375,8 +372,8 @@ def get_data_source_class_from_type(data_source_type: str): def get_online_config_from_type(online_store_type: str): if online_store_type in ONLINE_STORE_CLASS_FOR_TYPE: online_store_type = ONLINE_STORE_CLASS_FOR_TYPE[online_store_type] - else: - assert online_store_type.endswith("OnlineStore") + elif not online_store_type.endswith("OnlineStore"): + raise FeastOnlineStoreInvalidName(online_store_type) module_name, online_store_class_type = online_store_type.rsplit(".", 1) config_class_name = f"{online_store_class_type}Config" @@ -386,8 +383,8 @@ def get_online_config_from_type(online_store_type: str): def get_offline_config_from_type(offline_store_type: str): if offline_store_type in OFFLINE_STORE_CLASS_FOR_TYPE: offline_store_type = OFFLINE_STORE_CLASS_FOR_TYPE[offline_store_type] - else: - assert offline_store_type.endswith("OfflineStore") + elif not offline_store_type.endswith("OfflineStore"): + raise FeastOfflineStoreInvalidName(offline_store_type) module_name, offline_store_class_type = offline_store_type.rsplit(".", 1) config_class_name = f"{offline_store_class_type}Config" diff --git a/sdk/python/feast/ui/package.json b/sdk/python/feast/ui/package.json index 883c19660b..358aa2cdd2 100644 --- a/sdk/python/feast/ui/package.json +++ b/sdk/python/feast/ui/package.json @@ -6,7 +6,7 @@ "@elastic/datemath": "^5.0.3", "@elastic/eui": "^57.0.0", "@emotion/react": "^11.9.0", - "@feast-dev/feast-ui": "^0.20.5", + "@feast-dev/feast-ui": "latest", "@testing-library/jest-dom": "^5.16.4", "@testing-library/react": "^13.2.0", "@testing-library/user-event": "^13.5.0", diff --git a/sdk/python/feast/ui/yarn.lock b/sdk/python/feast/ui/yarn.lock index f2fd12b4e5..92b3de6f0d 100644 --- a/sdk/python/feast/ui/yarn.lock +++ b/sdk/python/feast/ui/yarn.lock @@ -1345,10 +1345,10 @@ minimatch "^3.1.2" strip-json-comments "^3.1.1" -"@feast-dev/feast-ui@^0.20.5": - version "0.20.5" - resolved "https://registry.yarnpkg.com/@feast-dev/feast-ui/-/feast-ui-0.20.5.tgz#bb0d6fc81cbd92ca69b779982ab151a8d9cabaee" - integrity sha512-BwMPJSv1MkylHxPnU/2fZX77AC/G4H2DIf+HAj80ZklwB0zbmeZzhXFrVh4xSheevGZFh0L839JeL14WfXPZsA== +"@feast-dev/feast-ui@latest": + version "0.23.1" + resolved "https://registry.yarnpkg.com/@feast-dev/feast-ui/-/feast-ui-0.23.1.tgz#391ba62efb3976b87745fe1a09ead50f3b6bd408" + integrity sha512-jMElqDfdNRrC+CxIXJ3Kkxa1CDwLyr4cCyO67e8KVEhiIb9eIYkIrfU8GpGm9sU2dErpIIqziWFvdR4aU/hNJA== dependencies: "@elastic/datemath" "^5.0.3" "@elastic/eui" "^55.0.1" diff --git a/sdk/python/feast/usage.py b/sdk/python/feast/usage.py index 471a1b9671..4c36ac5cf3 100644 --- a/sdk/python/feast/usage.py +++ b/sdk/python/feast/usage.py @@ -29,6 +29,7 @@ import requests +from feast import flags_helper from feast.constants import DEFAULT_FEAST_USAGE_VALUE, FEAST_USAGE from feast.version import get_version @@ -172,7 +173,8 @@ def _export(event: typing.Dict[str, typing.Any]): def _produce_event(ctx: UsageContext): - is_test = bool({"pytest", "unittest"} & sys.modules.keys()) + # Cannot check for unittest because typeguard pulls in unittest + is_test = flags_helper.is_test() or bool({"pytest"} & sys.modules.keys()) event = { "timestamp": datetime.utcnow().isoformat(), "is_test": is_test, diff --git a/sdk/python/requirements/py3.10-ci-requirements.txt b/sdk/python/requirements/py3.10-ci-requirements.txt index d453eca1c2..e4ba51eb9b 100644 --- a/sdk/python/requirements/py3.10-ci-requirements.txt +++ b/sdk/python/requirements/py3.10-ci-requirements.txt @@ -261,7 +261,7 @@ great-expectations==0.14.13 # via feast (setup.py) greenlet==1.1.2 # via sqlalchemy -grpcio==1.48.0 +grpcio==1.47.0 # via # feast (setup.py) # google-api-core @@ -269,11 +269,11 @@ grpcio==1.48.0 # grpcio-reflection # grpcio-testing # grpcio-tools -grpcio-reflection==1.48.0 +grpcio-reflection==1.47.0 # via feast (setup.py) -grpcio-testing==1.48.0 +grpcio-testing==1.47.0 # via feast (setup.py) -grpcio-tools==1.48.0 +grpcio-tools==1.47.0 # via feast (setup.py) h11==0.13.0 # via uvicorn diff --git a/sdk/python/requirements/py3.10-requirements.txt b/sdk/python/requirements/py3.10-requirements.txt index 30b76b6aa8..0932459153 100644 --- a/sdk/python/requirements/py3.10-requirements.txt +++ b/sdk/python/requirements/py3.10-requirements.txt @@ -40,7 +40,7 @@ dill==0.3.5.1 # via feast (setup.py) fastapi==0.79.0 # via feast (setup.py) -fastavro==1.5.3 +fastavro==1.5.4 # via # feast (setup.py) # pandavro @@ -57,13 +57,11 @@ googleapis-common-protos==1.56.4 # feast (setup.py) # google-api-core # tensorflow-metadata -greenlet==1.1.2 - # via sqlalchemy -grpcio==1.48.0 +grpcio==1.47.0 # via # feast (setup.py) # grpcio-reflection -grpcio-reflection==1.48.0 +grpcio-reflection==1.47.0 # via feast (setup.py) h11==0.13.0 # via uvicorn @@ -75,7 +73,7 @@ idna==3.3 # requests jinja2==3.1.2 # via feast (setup.py) -jsonschema==4.8.0 +jsonschema==4.9.0 # via feast (setup.py) locket==1.0.0 # via partd diff --git a/sdk/python/requirements/py3.7-ci-requirements.txt b/sdk/python/requirements/py3.7-ci-requirements.txt index 0de82b11a9..6667777463 100644 --- a/sdk/python/requirements/py3.7-ci-requirements.txt +++ b/sdk/python/requirements/py3.7-ci-requirements.txt @@ -265,7 +265,7 @@ great-expectations==0.14.13 # via feast (setup.py) greenlet==1.1.2 # via sqlalchemy -grpcio==1.48.0 +grpcio==1.47.0 # via # feast (setup.py) # google-api-core @@ -273,11 +273,11 @@ grpcio==1.48.0 # grpcio-reflection # grpcio-testing # grpcio-tools -grpcio-reflection==1.48.0 +grpcio-reflection==1.47.0 # via feast (setup.py) -grpcio-testing==1.48.0 +grpcio-testing==1.47.0 # via feast (setup.py) -grpcio-tools==1.48.0 +grpcio-tools==1.47.0 # via feast (setup.py) h11==0.13.0 # via uvicorn diff --git a/sdk/python/requirements/py3.7-requirements.txt b/sdk/python/requirements/py3.7-requirements.txt index bebffbe5dc..abab7b4b2b 100644 --- a/sdk/python/requirements/py3.7-requirements.txt +++ b/sdk/python/requirements/py3.7-requirements.txt @@ -59,11 +59,11 @@ googleapis-common-protos==1.56.4 # tensorflow-metadata greenlet==1.1.2 # via sqlalchemy -grpcio==1.48.0 +grpcio==1.47.0 # via # feast (setup.py) # grpcio-reflection -grpcio-reflection==1.48.0 +grpcio-reflection==1.47.0 # via feast (setup.py) h11==0.13.0 # via uvicorn diff --git a/sdk/python/requirements/py3.8-ci-requirements.txt b/sdk/python/requirements/py3.8-ci-requirements.txt index f3a58fcf5e..de6cfe9afb 100644 --- a/sdk/python/requirements/py3.8-ci-requirements.txt +++ b/sdk/python/requirements/py3.8-ci-requirements.txt @@ -265,7 +265,7 @@ great-expectations==0.14.13 # via feast (setup.py) greenlet==1.1.2 # via sqlalchemy -grpcio==1.48.0 +grpcio==1.47.0 # via # feast (setup.py) # google-api-core @@ -273,11 +273,11 @@ grpcio==1.48.0 # grpcio-reflection # grpcio-testing # grpcio-tools -grpcio-reflection==1.48.0 +grpcio-reflection==1.47.0 # via feast (setup.py) -grpcio-testing==1.48.0 +grpcio-testing==1.47.0 # via feast (setup.py) -grpcio-tools==1.48.0 +grpcio-tools==1.47.0 # via feast (setup.py) h11==0.13.0 # via uvicorn diff --git a/sdk/python/requirements/py3.8-requirements.txt b/sdk/python/requirements/py3.8-requirements.txt index aa74d34054..9fee334205 100644 --- a/sdk/python/requirements/py3.8-requirements.txt +++ b/sdk/python/requirements/py3.8-requirements.txt @@ -40,7 +40,7 @@ dill==0.3.5.1 # via feast (setup.py) fastapi==0.79.0 # via feast (setup.py) -fastavro==1.5.3 +fastavro==1.5.4 # via # feast (setup.py) # pandavro @@ -57,13 +57,11 @@ googleapis-common-protos==1.56.4 # feast (setup.py) # google-api-core # tensorflow-metadata -greenlet==1.1.2 - # via sqlalchemy -grpcio==1.48.0 +grpcio==1.47.0 # via # feast (setup.py) # grpcio-reflection -grpcio-reflection==1.48.0 +grpcio-reflection==1.47.0 # via feast (setup.py) h11==0.13.0 # via uvicorn @@ -77,7 +75,7 @@ importlib-resources==5.9.0 # via jsonschema jinja2==3.1.2 # via feast (setup.py) -jsonschema==4.8.0 +jsonschema==4.9.0 # via feast (setup.py) locket==1.0.0 # via partd @@ -107,6 +105,8 @@ pandavro==1.5.2 # via feast (setup.py) partd==1.2.0 # via dask +pkgutil-resolve-name==1.3.10 + # via jsonschema proto-plus==1.20.6 # via feast (setup.py) protobuf==3.20.1 diff --git a/sdk/python/requirements/py3.9-ci-requirements.txt b/sdk/python/requirements/py3.9-ci-requirements.txt index ef8a5ee352..c945bdbc59 100644 --- a/sdk/python/requirements/py3.9-ci-requirements.txt +++ b/sdk/python/requirements/py3.9-ci-requirements.txt @@ -261,7 +261,7 @@ great-expectations==0.14.13 # via feast (setup.py) greenlet==1.1.2 # via sqlalchemy -grpcio==1.48.0 +grpcio==1.47.0 # via # feast (setup.py) # google-api-core @@ -269,11 +269,11 @@ grpcio==1.48.0 # grpcio-reflection # grpcio-testing # grpcio-tools -grpcio-reflection==1.48.0 +grpcio-reflection==1.47.0 # via feast (setup.py) -grpcio-testing==1.48.0 +grpcio-testing==1.47.0 # via feast (setup.py) -grpcio-tools==1.48.0 +grpcio-tools==1.47.0 # via feast (setup.py) h11==0.13.0 # via uvicorn diff --git a/sdk/python/requirements/py3.9-requirements.txt b/sdk/python/requirements/py3.9-requirements.txt index 4d155bf540..2750706477 100644 --- a/sdk/python/requirements/py3.9-requirements.txt +++ b/sdk/python/requirements/py3.9-requirements.txt @@ -40,7 +40,7 @@ dill==0.3.5.1 # via feast (setup.py) fastapi==0.79.0 # via feast (setup.py) -fastavro==1.5.3 +fastavro==1.5.4 # via # feast (setup.py) # pandavro @@ -57,13 +57,11 @@ googleapis-common-protos==1.56.4 # feast (setup.py) # google-api-core # tensorflow-metadata -greenlet==1.1.2 - # via sqlalchemy -grpcio==1.48.0 +grpcio==1.47.0 # via # feast (setup.py) # grpcio-reflection -grpcio-reflection==1.48.0 +grpcio-reflection==1.47.0 # via feast (setup.py) h11==0.13.0 # via uvicorn @@ -75,7 +73,7 @@ idna==3.3 # requests jinja2==3.1.2 # via feast (setup.py) -jsonschema==4.8.0 +jsonschema==4.9.0 # via feast (setup.py) locket==1.0.0 # via partd diff --git a/sdk/python/tests/README.md b/sdk/python/tests/README.md new file mode 100644 index 0000000000..0f56e0eee2 --- /dev/null +++ b/sdk/python/tests/README.md @@ -0,0 +1,343 @@ +# Testing Suite + +## Overview + +This guide will go over: + +1. how Feast tests are setup +2. how to extend the test suite to test new functionality +3. how to use the existing test suite to test a new custom offline / online store. + +## Test suite overview + +Let's inspect the test setup in `sdk/python/tests/integration`: + +```bash +$ tree +. +β”œβ”€β”€ e2e +β”‚ β”œβ”€β”€ test_go_feature_server.py +β”‚ β”œβ”€β”€ test_python_feature_server.py +β”‚ β”œβ”€β”€ test_universal_e2e.py +β”‚ β”œβ”€β”€ test_usage_e2e.py +β”‚ └── test_validation.py +β”œβ”€β”€ feature_repos +β”‚ β”œβ”€β”€ integration_test_repo_config.py +β”‚ β”œβ”€β”€ repo_configuration.py +β”‚ └── universal +β”‚ β”œβ”€β”€ catalog +β”‚ β”œβ”€β”€ data_source_creator.py +β”‚ β”œβ”€β”€ data_sources +β”‚ β”‚ β”œβ”€β”€ __init__.py +β”‚ β”‚ β”œβ”€β”€ bigquery.py +β”‚ β”‚ β”œβ”€β”€ file.py +β”‚ β”‚ β”œβ”€β”€ redshift.py +β”‚ β”‚ └── snowflake.py +β”‚ β”œβ”€β”€ entities.py +β”‚ β”œβ”€β”€ feature_views.py +β”‚ β”œβ”€β”€ online_store +β”‚ β”‚ β”œβ”€β”€ __init__.py +β”‚ β”‚ β”œβ”€β”€ datastore.py +β”‚ β”‚ β”œβ”€β”€ dynamodb.py +β”‚ β”‚ β”œβ”€β”€ hbase.py +β”‚ β”‚ └── redis.py +β”‚ └── online_store_creator.py +β”œβ”€β”€ materialization +β”‚ └── test_lambda.py +β”œβ”€β”€ offline_store +β”‚ β”œβ”€β”€ test_feature_logging.py +β”‚ β”œβ”€β”€ test_offline_write.py +β”‚ β”œβ”€β”€ test_push_features_to_offline_store.py +β”‚ β”œβ”€β”€ test_s3_custom_endpoint.py +β”‚ └── test_universal_historical_retrieval.py +β”œβ”€β”€ online_store +β”‚ β”œβ”€β”€ test_online_retrieval.py +β”‚ β”œβ”€β”€ test_push_features_to_online_store.py +β”‚ └── test_universal_online.py +└── registration + β”œβ”€β”€ test_feature_store.py + β”œβ”€β”€ test_inference.py + β”œβ”€β”€ test_registry.py + β”œβ”€β”€ test_sql_registry.py + β”œβ”€β”€ test_universal_cli.py + β”œβ”€β”€ test_universal_odfv_feature_inference.py + └── test_universal_types.py + +``` + +* `feature_repos` has setup files for most tests in the test suite. +* `conftest.py` and some of the individual test files contain fixtures which can be used to on different offline stores, online stores, etc. and thus abstract away store specific implementations so we don't need to rewrite the same test implementation for different stores. + +## Structure of the test suite + +### What is the universal test suite? + +The universal test suite verifies that crucial Feast functions (e.g `get_historical_features`, `get_online_features` etc.) have the correct behavior for each of the different environments that Feast could be used in. These environments are combinations of an offline store, online store, and provider and the universal test suite serves to run basic functional verification against all of these different permutations. + +We use pytest [fixtures](https://docs.pytest.org/en/6.2.x/fixture.html) to accomplish this without writing excess code. + +Tests in Feast are split into integration and unit tests. + +### Is it an integration or unit test? + +* Integration tests test non local Feast behavior. Integration tests mainly involve testing of Feast components that connect to services outside of Feast(e.g connecting to gcp or aws clients). + * Generally if the test requires the initialization of a feature store in an external environment in order to test (i.e using our universal test fixtures), it is probably an integration test. +* Unit tests, on the other hand, unit tests primarily test local and class level behavior that does not require spinning up an external service. If your test can be run locally without using any other services besides pytest, it is a unit test. + +### Main types of tests + +#### Integration tests + +1. E2E tests + * E2E tests test end-to-end functionality of Feast over the various codepaths (initialize a feature store, apply, and materialize). + * The main codepaths include: + * basic e2e tests for offline stores + * `test_universal_e2e.py` + * go feature server + * `test_go_feature_server.py` + * python http server + * `test_python_feature_server.py` + * usage tracking + * `test_usage_e2e.py` + * data quality monitoring feature validation + * `test_validation.py` +2. Offline and Online Store Tests + * Offline and online store tests mainly test for the offline and online retrieval functionality. + * The various specific functionalities that are tested include: + * push API tests + * `test_push_features_to_offline_store.py` + * `test_push_features_to_online_store.py` + * `test_offline_write.py` + * historical retrieval tests + * `test_universal_historical_retrieval.py` + * online retrieval tests + * `test_universal_online.py` + * data quality monitoring feature logging tests + * `test_feature_logging.py` + * online store tests + * `test_universal_online.py` +3. Registration Tests + * The registration folder contains all of the registry tests and some universal cli tests. This includes: + * CLI Apply and Materialize tests tested against on the universal test suite + * Data type inference tests + * Registry tests +4. Miscellaneous Tests + * AWS Lambda Materialization Tests (Currently do not work) + * `test_lambda.py` + +#### Unit tests + +1. Registry Diff Tests + * These are tests for the infrastructure and registry diff functionality that Feast uses to determine if changes to the registry or infrastructure is needed. +2. Local CLI Tests and Local Feast Tests + * These tests test all of the cli commands against the local file offline store. +3. Infrastructure Unit Tests + * DynamoDB tests with dynamo mocked out + * Repository configuration tests + * Schema inference unit tests + * Key serialization tests + * Basic provider unit tests +4. Feature Store Validation Tests + * These test mainly contain class level validation like hashing tests, protobuf and class serialization, and error and warning handling. + * Data source unit tests + * Feature service unit tests + * Feature service, feature view, and feature validation tests + * Protobuf/json tests for Feast ValueTypes + * Serialization tests + * Type mapping + * Feast types + * Serialization tests due to this [issue](https://github.com/feast-dev/feast/issues/2345) + * Feast usage tracking unit tests + +#### Docstring tests + +Docstring tests are primarily smoke tests to make sure imports and setup functions can be executed without errors. + +## Understanding the test suite with an example test + +### Example test + +Let's look at a sample test using the universal repo: + +{% tabs %} +{% tab code="sdk/python/tests/integration/offline_store/test_universal_historical_retrieval.py" %} +```python +@pytest.mark.integration +@pytest.mark.universal_offline_stores +@pytest.mark.parametrize("full_feature_names", [True, False], ids=lambda v: f"full:{v}") +def test_historical_features(environment, universal_data_sources, full_feature_names): + store = environment.feature_store + + (entities, datasets, data_sources) = universal_data_sources + + feature_views = construct_universal_feature_views(data_sources) + + entity_df_with_request_data = datasets.entity_df.copy(deep=True) + entity_df_with_request_data["val_to_add"] = [ + i for i in range(len(entity_df_with_request_data)) + ] + entity_df_with_request_data["driver_age"] = [ + i + 100 for i in range(len(entity_df_with_request_data)) + ] + + feature_service = FeatureService( + name="convrate_plus100", + features=[feature_views.driver[["conv_rate"]], feature_views.driver_odfv], + ) + feature_service_entity_mapping = FeatureService( + name="entity_mapping", + features=[ + feature_views.location.with_name("origin").with_join_key_map( + {"location_id": "origin_id"} + ), + feature_views.location.with_name("destination").with_join_key_map( + {"location_id": "destination_id"} + ), + ], + ) + + store.apply( + [ + driver(), + customer(), + location(), + feature_service, + feature_service_entity_mapping, + *feature_views.values(), + ] + ) + # ... more test code + + job_from_df = store.get_historical_features( + entity_df=entity_df_with_request_data, + features=[ + "driver_stats:conv_rate", + "driver_stats:avg_daily_trips", + "customer_profile:current_balance", + "customer_profile:avg_passenger_count", + "customer_profile:lifetime_trip_count", + "conv_rate_plus_100:conv_rate_plus_100", + "conv_rate_plus_100:conv_rate_plus_100_rounded", + "conv_rate_plus_100:conv_rate_plus_val_to_add", + "order:order_is_success", + "global_stats:num_rides", + "global_stats:avg_ride_length", + "field_mapping:feature_name", + ], + full_feature_names=full_feature_names, + ) + + if job_from_df.supports_remote_storage_export(): + files = job_from_df.to_remote_storage() + print(files) + assert len(files) > 0 # This test should be way more detailed + + start_time = datetime.utcnow() + actual_df_from_df_entities = job_from_df.to_df() + # ... more test code + + validate_dataframes( + expected_df, + table_from_df_entities, + keys=[event_timestamp, "order_id", "driver_id", "customer_id"], + ) + # ... more test code +``` +{% endtab %} +{% endtabs %} + +* The key fixtures are the `environment` and `universal_data_sources` fixtures, which are defined in the `feature_repos` directories and the `conftest.py` file. This by default pulls in a standard dataset with driver and customer entities (that we have pre-defined), certain feature views, and feature values. + * The `environment` fixture sets up a feature store, parametrized by the provider and the online/offline store. It allows the test to query against that feature store without needing to worry about the underlying implementation or any setup that may be involved in creating instances of these datastores. + * Each fixture creates a different integration test with its own `IntegrationTestRepoConfig` which is used by pytest to generate a unique test testing one of the different environments that require testing. + +* Feast tests also use a variety of markers: + * The `@pytest.mark.integration` marker is used to designate integration tests which will cause the test to be run when you call `make test-python-integration`. + * The `@pytest.mark.universal_offline_stores` marker will parametrize the test on all of the universal offline stores including file, redshift, bigquery and snowflake. + * The `full_feature_names` parametrization defines whether or not the test should reference features as their full feature name (fully qualified path) or just the feature name itself. + + +## Writing a new test or reusing existing tests + +### To add a new test to an existing test file + +* Use the same function signatures as an existing test (e.g. use `environment` and `universal_data_sources` as an argument) to include the relevant test fixtures. +* If possible, expand an individual test instead of writing a new test, due to the cost of starting up offline / online stores. +* Use the `universal_offline_stores` and `universal_online_store` markers to parametrize the test against different offline store and online store combinations. You can also designate specific online and offline stores to test by using the `only` parameter on the marker. + +```python +@pytest.mark.universal_online_stores(only=["redis"]) +``` +### To test a new offline / online store from a plugin repo + +* Install Feast in editable mode with `pip install -e`. +* The core tests for offline / online store behavior are parametrized by the `FULL_REPO_CONFIGS` variable defined in `feature_repos/repo_configuration.py`. To overwrite this variable without modifying the Feast repo, create your own file that contains a `FULL_REPO_CONFIGS` (which will require adding a new `IntegrationTestRepoConfig` or two) and set the environment variable `FULL_REPO_CONFIGS_MODULE` to point to that file. Then the core offline / online store tests can be run with `make test-python-universal`. +* See the [custom offline store demo](https://github.com/feast-dev/feast-custom-offline-store-demo) and the [custom online store demo](https://github.com/feast-dev/feast-custom-online-store-demo) for examples. + +### What are some important things to keep in mind when adding a new offline / online store? + +#### Type mapping/Inference + +Many problems arise when implementing your data store's type conversion to interface with Feast datatypes. +1. You will need to correctly update `inference.py` so that Feast can infer your datasource schemas +2. You also need to update `type_map.py` so that Feast knows how to convert your datastores types to Feast-recognized types in `feast/types.py`. + +#### Historical and online retrieval + +The most important functionality in Feast is historical and online retrieval. Most of the e2e and universal integration test test this functionality in some way. Making sure this functionality works also indirectly asserts that reading and writing from your datastore works as intended. + + +### To include a new offline / online store in the main Feast repo + +* Extend `data_source_creator.py` for your offline store. +* In `repo_configuration.py` add a new `IntegrationTestRepoConfig` or two (depending on how many online stores you want to test). + * Generally, you should only need to test against sqlite. However, if you need to test against a production online store, then you can also test against Redis or dynamodb. +* Run the full test suite with `make test-python-integration.` + +### Including a new offline / online store in the main Feast repo from external plugins with community maintainers. + +* This folder is for plugins that are officially maintained with community owners. Place the APIs in `feast/infra/offline_stores/contrib/`. +* Extend `data_source_creator.py` for your offline store and implement the required APIs. +* In `contrib_repo_configuration.py` add a new `IntegrationTestRepoConfig` (depending on how many online stores you want to test). +* Run the test suite on the contrib test suite with `make test-python-contrib-universal`. + +### To include a new online store + +* In `repo_configuration.py` add a new config that maps to a serialized version of configuration you need in `feature_store.yaml` to setup the online store. +* In `repo_configuration.py`, add new `IntegrationTestRepoConfig` for online stores you want to test. +* Run the full test suite with `make test-python-integration` + +### To use custom data in a new test + +* Check `test_universal_types.py` for an example of how to do this. + +```python +@pytest.mark.integration +def your_test(environment: Environment): + df = #...# + data_source = environment.data_source_creator.create_data_source( + df, + destination_name=environment.feature_store.project + ) + your_fv = driver_feature_view(data_source) + entity = driver(value_type=ValueType.UNKNOWN) + fs.apply([fv, entity]) + + # ... run test +``` + +### Running your own Redis cluster for testing + +* Install Redis on your computer. If you are a mac user, you should be able to `brew install redis`. + * Running `redis-server --help` and `redis-cli --help` should show corresponding help menus. +* * Run `./infra/scripts/redis-cluster.sh start` then `./infra/scripts/redis-cluster.sh create` to start the Redis cluster locally. You should see output that looks like this: +~~~~ +Starting 6001 +Starting 6002 +Starting 6003 +Starting 6004 +Starting 6005 +Starting 6006 +~~~~ +* You should be able to run the integration tests and have the Redis cluster tests pass. +* If you would like to run your own Redis cluster, you can run the above commands with your own specified ports and connect to the newly configured cluster. +* To stop the cluster, run `./infra/scripts/redis-cluster.sh stop` and then `./infra/scripts/redis-cluster.sh clean`. diff --git a/sdk/python/tests/integration/registration/test_sql_registry.py b/sdk/python/tests/integration/registration/test_sql_registry.py index 56aff8c6d1..912907198f 100644 --- a/sdk/python/tests/integration/registration/test_sql_registry.py +++ b/sdk/python/tests/integration/registration/test_sql_registry.py @@ -143,6 +143,9 @@ def test_apply_entity_success(sql_registry): and entity.tags["team"] == "matchmaking" ) + # After the first apply, the created_timestamp should be the same as the last_update_timestamp. + assert entity.created_timestamp == entity.last_updated_timestamp + sql_registry.delete_entity("driver_car_id", project) assert_project_uuid(project, project_uuid, sql_registry) entities = sql_registry.list_entities(project) @@ -226,6 +229,9 @@ def test_apply_feature_view_success(sql_registry): and feature_view.entities[0] == "fs1_my_entity_1" ) + # After the first apply, the created_timestamp should be the same as the last_update_timestamp. + assert feature_view.created_timestamp == feature_view.last_updated_timestamp + sql_registry.delete_feature_view("my_feature_view_1", project) feature_views = sql_registry.list_feature_views(project) assert len(feature_views) == 0 diff --git a/ui/.npmrc b/ui/.npmrc new file mode 100644 index 0000000000..bd3327ab5a --- /dev/null +++ b/ui/.npmrc @@ -0,0 +1 @@ +//registry.npmjs.org/:_authToken=${NPM_TOKEN} \ No newline at end of file diff --git a/ui/CONTRIBUTING.md b/ui/CONTRIBUTING.md new file mode 100644 index 0000000000..970bd3676c --- /dev/null +++ b/ui/CONTRIBUTING.md @@ -0,0 +1,103 @@ +

Table of contents

+ +- [General contributor notes](#general-contributor-notes) + - [`feast ui` command](#feast-ui-command) + - [NPM package project structure](#npm-package-project-structure) + - [Tests](#tests) + - [Yarn commands](#yarn-commands) + - [`yarn install`](#yarn-install) + - [`yarn start`](#yarn-start) + - [`yarn test`](#yarn-test) +- [Release process](#release-process) + - [(Advanced) Manually publishing the Feast Package to NPM](#advanced-manually-publishing-the-feast-package-to-npm) + - [Requirements](#requirements) + - [Steps for Publishing](#steps-for-publishing) + +# General contributor notes +In this doc, we describe how to contribute both to the Feast Web UI NPM package as well as the embedded Feast UI in the Python SDK (i.e. what's run when you run `feast ui`) + +## `feast ui` command +You can see the logic in [../sdk/python/feast/ui](../sdk/python/feast/ui/). This instance is loaded in [../sdk/python/feast/ui_server.py](../sdk/python/feast/ui_server.py). + +Under the hood, what happens is that the Feast SDK spins up a server which exposes an endpoint to the registry. It then mounts the UI on the server and points it to fetch data from that registry. + +## NPM package project structure +The Web UI is powered by a JSON registry dump from Feast (running `feast registry-dump`). Running `yarn start` launches a UI +powered by test data. +- `public/` contains assets as well as demo data loaded by the Web UI. + - There is a `projects-list.json` which represents all Feast projects the UI shows. + - There is also a `registry.json` which is the registry dump for the feature repo. +- `feature_repo/` contains a sample Feast repo which generates the `registry.json` +- `src/` contains the Web UI source code. + - `src/contexts` has React context objects around project level metadata or registry path metadata to inject into pages. The contexts are static contexts provided by [FeastUISansProviders.tsx](src/FeastUISansProviders.tsx) + - `src/parsers` parses the `registry.json` into in memory representations of Feast objects (feature views, data sources, entities, feature services). + - This has ~1:1 mappings to the protobuf objects in [feast/protos/feast/core](https://github.com/feast-dev/feast/tree/master/protos/feast/core). + - There are also "relationships" which create an in-memory lineage graph which can be used to construct links in pages. + - This generates state which pages will load via React queries (to the registry path). + - `src/pages` has all individual web pages and their layouts. For any given Feast object (e.g. entity), there exist: + - an **Index page** (which is the first page you hit when you click on that object). This loads using a React query the in memory representation of all objects (parsed from `src/parsers`) and embeds: + - a **Listing page** (i.e. listing all the objects in the registry in a table). This creates links to the instance pages + - an **Instance page** (which shows details for an individual entity, feature view, etc). This embeds: + - a default Overview tab, which shows all the Feast metadata (e.g. for a given entity) + - custom tabs from `src/custom-tabs`. + - Other subdirectories: + - `src/components` has common React components that are re-used across pages + - `src/custom-tabs` houses custom tabs and a custom tab React context which exist on the core pages. There is a `TabsRegistryContext` which is also supplied by the [FeastUISansProviders.tsx](src/FeastUISansProviders.tsx), and if there are custom tabs, the Feast UI will embed them as a new tab in the corresponding page (e.g. feature view page). + - `src/graphics` houses icons that are used throughout the UI + - `src/hooks` has React hooks. The most complex hooks here define the bulk of the search / filter functionality. + +## Tests +There are very few tests for this UI. There is a smoke test that ensures pages can load in [FeastUISansProviders.test.tsx](src/FeastUISansProviders.test.tsx) + + +## Yarn commands + +If you would like to simply try things out and see how the UI works, you can simply run the code in this repo. + +> **Note**: there is an `.npmrc` which is setup for automatic releases. You'll need to comment out the line in there and continue + +First: + +### `yarn install` + +That will install the all the dependencies that the UI needs, as well as development dependencies. Then in the project directory, you can run: + +### `yarn start` + +Runs the app in the development mode.\ +Open [http://localhost:3000](http://localhost:3000) to view it in the browser. + +The page will reload if you make edits.\ +You will also see any lint errors in the console. + +### `yarn test` + +Launches the test runner in the interactive watch mode.\ +See the section about [running tests](https://facebook.github.io/create-react-app/docs/running-tests) for more information. + +# Release process +There are a couple of components in Feast that are tied to the Web UI. These are all automatically handled during the release GitHub action: +1. the npm package + - The release process for Feast automatically bumps the package version (see [bump_file_versions.py](../infra/scripts/release/bump_file_versions.py)) and releases the new NPM package (see [publish.yml](../.github/workflows/publish.yml) in the `publish-web-ui-npm` job) +2. the Feast Python SDK, which bundles in a compiled version of the Feast Web UI which is run on a `feast ui` CLI command. + - The bundled Web UI in the Python SDK always compiles in the latest npm version + +## (Advanced) Manually publishing the Feast Package to NPM + +This generally should not be necessary, since new package versions are released with the overall Feast release workflow (see [publish.yml](../.github/workflows/publish.yml) in the `publish-web-ui-npm` job) + +The Feast UI is published as a module to NPM and can be found here: https://www.npmjs.com/package/@feast-dev/feast-ui + +### Requirements + +To publish a new version of the module, you will need: +- to be part of the @feast-dev team in NPM. Ask `#feast-development` on http://slack.feast.dev to add you if necessary. +- to [login to your NPM account on the command line](https://docs.npmjs.com/cli/v8/commands/npm-adduser). + +### Steps for Publishing + +1. Make sure tests are passing. Run tests with `yarn test` in the ui directory. +2. Bump the version number in `package.json` as appropriate. +3. Package the modules for distributions. Run the library build script with `yarn build:lib`. We use [Rollup](https://rollupjs.org/) for building the module, and the configs are in the `rollup.config.js` file. +4. Publish the package to NPM. Run `npm publish` +5. [Check NPM to see that the package was properly published](https://www.npmjs.com/package/@feast-dev/feast-ui). \ No newline at end of file diff --git a/ui/PUBLISHING_TO_NPM.md b/ui/PUBLISHING_TO_NPM.md deleted file mode 100644 index 0ab1af3923..0000000000 --- a/ui/PUBLISHING_TO_NPM.md +++ /dev/null @@ -1,13 +0,0 @@ -# Publishing the Feast Package to NPM - -The Feast UI is published as a module to NPM and can be found here: https://www.npmjs.com/package/@feast-dev/feast-ui - -To publish a new version of the module, you will need to be part of the @feast-dev team in NPM. Ask Tony to add you if necessary. You will also need to [login to your NPM account on the command line](https://docs.npmjs.com/cli/v8/commands/npm-adduser). - -## Steps for Publishing - -1. Make sure tests are passing. Run tests with `yarn test` in the ui directory. -2. Bump the version number in `package.json` as appropriate. -3. Package the modules for distributions. Run the library build script with `yarn build:lib`. We use [Rollup](https://rollupjs.org/) for building the module, and the configs are in the `rollup.config.js` file. -4. Publish the package to NPM. Run `npm publish` -5. [Check NPM to see that the package was properly publish](https://www.npmjs.com/package/@feast-dev/feast-ui). diff --git a/ui/README.md b/ui/README.md index 713d1c17c5..e91a8741ec 100644 --- a/ui/README.md +++ b/ui/README.md @@ -4,22 +4,10 @@ This project was bootstrapped with [Create React App](https://github.com/facebook/create-react-app). -## Project structure -The Web UI is powered by a JSON registry dump from Feast (running `feast registry-dump`). Running `yarn start` launches a UI -powered by test data. -- `public/` contains assets as well as demo data loaded by the Web UI. - - There is a `projects-list.json` which represents all Feast projects the UI shows. - - There is also a `registry.json` which is the registry dump for the feature repo. -- `feature_repo/` contains a sample Feast repo which generates the `registry.json` -- `src/` contains the Web UI source code. This parses the registry json blob in `src/parsers` to make this data -available for the rest of the UI. -- `src/custom-tabs` includes sample custom tabs. This is a WIP plugin system where users can inject their own tabs and -data to the UI. - ## Usage There are three modes of usage: -- via the 'feast ui' CLI to view the current feature repository +- via the `feast ui` CLI to view the current feature repository - importing the UI as a module - running the entire build as a React app. @@ -124,28 +112,6 @@ const tabsRegistry = { Examples of custom tabs can be found in the `/custom-tabs` folder. -### Alternative: Run this Repo - -If you would like to simply try things out and see how the UI works, you can simply run the code in this repo. First: - -### `yarn install` - -That will install the all the dependencies that the UI needs, as well as development dependencies. Then in the project directory, you can run: - -### `yarn start` - -Runs the app in the development mode.\ -Open [http://localhost:3000](http://localhost:3000) to view it in the browser. - -The page will reload if you make edits.\ -You will also see any lint errors in the console. - -### `yarn test` - -Launches the test runner in the interactive watch mode.\ -See the section about [running tests](https://facebook.github.io/create-react-app/docs/running-tests) for more information. - - ## On React and Create React App This project was bootstrapped with Create React App, and uses its scripts to simplify UI development. You can learn more in the [Create React App documentation](https://facebook.github.io/create-react-app/docs/getting-started). diff --git a/ui/feature_repo/feature_store.yaml b/ui/feature_repo/feature_store.yaml index 31b27e2385..6ecad3eb51 100644 --- a/ui/feature_repo/feature_store.yaml +++ b/ui/feature_repo/feature_store.yaml @@ -5,6 +5,3 @@ online_store: type: sqlite offline_store: type: file -flags: - alpha_features: true - on_demand_transforms: true diff --git a/ui/package.json b/ui/package.json index 22128cc968..3793fb42c0 100644 --- a/ui/package.json +++ b/ui/package.json @@ -1,6 +1,6 @@ { "name": "@feast-dev/feast-ui", - "version": "0.20.5", + "version": "0.22.3", "private": false, "files": [ "dist" diff --git a/ui/src/custom-tabs/data-tab/DataQuery.tsx b/ui/src/custom-tabs/data-tab/DataQuery.tsx new file mode 100644 index 0000000000..c79764ef99 --- /dev/null +++ b/ui/src/custom-tabs/data-tab/DataQuery.tsx @@ -0,0 +1,20 @@ +import { useQuery } from "react-query"; + +const DataQuery = (featureView: string) => { + const queryKey = `data-tab-namespace:${featureView}`; + + return useQuery( + queryKey, + () => { + // Customizing the URL based on your needs + const url = `/demo-custom-tabs/demo.json`; + + return fetch(url).then((res) => res.json()); + }, + { + enabled: !!featureView, // Only start the query when the variable is not undefined + } + ); +}; + +export default DataQuery; diff --git a/ui/src/custom-tabs/data-tab/DataTab.tsx b/ui/src/custom-tabs/data-tab/DataTab.tsx new file mode 100644 index 0000000000..4592d197e2 --- /dev/null +++ b/ui/src/custom-tabs/data-tab/DataTab.tsx @@ -0,0 +1,101 @@ +import React from "react"; +import { z } from "zod"; +import { + EuiCode, + EuiFlexGroup, + EuiHorizontalRule, + EuiLoadingSpinner, + EuiTable, + EuiTitle, + EuiTableHeader, + EuiTableHeaderCell, + EuiPanel, + EuiFlexItem, + EuiTableRow, + EuiTableRowCell, +} from "@elastic/eui"; +import DataQuery from "./DataQuery"; + +const FeatureViewDataRow = z.object({ + name: z.string(), + value: z.string(), +}); + +type FeatureViewDataRowType = z.infer; + +const LineHeightProp: React.CSSProperties = { + lineHeight: 1, +}; + +const EuiFeatureViewDataRow = ({ name, value }: FeatureViewDataRowType) => { + return ( + + {name} + + +
{value}
+
+
+
+ ); +}; + +const FeatureViewDataTable = (data: any) => { + var items: FeatureViewDataRowType[] = []; + + for (let element in data.data) { + const row: FeatureViewDataRowType = { + name: element, + value: JSON.stringify(data.data[element], null, 2), + }; + items.push(row); + console.log(row); + } + + return ( + + + Data Item Name + Data Item Value + + {items.map((item) => { + return ; + })} + + ); +}; + +const DataTab = () => { + const fName = "credit_history"; + const { isLoading, isError, isSuccess, data } = DataQuery(fName); + const isEmpty = data === undefined; + + return ( + + {isLoading && ( + + Loading + + )} + {isEmpty &&

No feature view with name: {fName}

} + {isError &&

Error loading feature view: {fName}

} + {isSuccess && data && ( + + + + + +

Properties

+
+ + +
+
+
+
+ )} +
+ ); +}; + +export default DataTab; diff --git a/ui/src/pages/feature-services/FeatureServiceListingTable.tsx b/ui/src/pages/feature-services/FeatureServiceListingTable.tsx index c6205b020a..b865da6e23 100644 --- a/ui/src/pages/feature-services/FeatureServiceListingTable.tsx +++ b/ui/src/pages/feature-services/FeatureServiceListingTable.tsx @@ -53,10 +53,10 @@ const FeatureServiceListingTable = ({ }, }, { - name: "Created at", - field: "meta.createdTimestamp", + name: "Last updated", + field: "meta.lastUpdatedTimestamp", render: (date: Date) => { - return date.toLocaleDateString("en-CA"); + return date ? date.toLocaleDateString("en-CA") : "n/a"; }, }, ]; diff --git a/ui/src/pages/feature-services/FeatureServiceOverviewTab.tsx b/ui/src/pages/feature-services/FeatureServiceOverviewTab.tsx index a3fc897325..ea62b3b3a7 100644 --- a/ui/src/pages/feature-services/FeatureServiceOverviewTab.tsx +++ b/ui/src/pages/feature-services/FeatureServiceOverviewTab.tsx @@ -66,14 +66,20 @@ const FeatureServiceOverviewTab = () => { description="Feature Views" /> - - - + {data.meta.lastUpdatedTimestamp ? ( + + + + ) : ( + + No last updated timestamp specified on this feature service. + + )} diff --git a/ui/src/parsers/feastFeatureServices.ts b/ui/src/parsers/feastFeatureServices.ts index 96c03e38ef..6812b7e02c 100644 --- a/ui/src/parsers/feastFeatureServices.ts +++ b/ui/src/parsers/feastFeatureServices.ts @@ -19,7 +19,8 @@ const FeastFeatureServiceSchema = z.object({ description: z.string().optional(), }), meta: z.object({ - createdTimestamp: z.string().transform((val) => new Date(val)), + createdTimestamp: z.string().transform((val) => new Date(val)).optional(), + lastUpdatedTimestamp: z.string().transform((val) => new Date(val)).optional(), }), }); diff --git a/ui/src/parsers/feastODFVS.ts b/ui/src/parsers/feastODFVS.ts index 8341438d50..4d09cc72df 100644 --- a/ui/src/parsers/feastODFVS.ts +++ b/ui/src/parsers/feastODFVS.ts @@ -1,6 +1,5 @@ import { z } from "zod"; import { FeastFeatureColumnSchema } from "./feastFeatureViews"; -import { FEAST_FEATURE_VALUE_TYPES } from "./types"; const FeatureViewProjectionSchema = z.object({ featureViewProjection: z.object({