diff --git a/.github/workflows/java_master_only.yml b/.github/workflows/java_master_only.yml index fc2bb52387..9d6625f29d 100644 --- a/.github/workflows/java_master_only.yml +++ b/.github/workflows/java_master_only.yml @@ -66,6 +66,12 @@ jobs: java-version: '11' java-package: jdk architecture: x64 + - uses: actions/cache@v2 + with: + path: ~/.m2/repository + key: ${{ runner.os }}-it-maven-${{ hashFiles('**/pom.xml') }} + restore-keys: | + ${{ runner.os }}-it-maven- - uses: actions/cache@v2 with: path: ~/.m2/repository @@ -81,6 +87,8 @@ jobs: integration-test: runs-on: ubuntu-latest + env: + PYTHON: 3.8 steps: - uses: actions/checkout@v2 with: @@ -91,10 +99,46 @@ jobs: java-version: '11' java-package: jdk architecture: x64 - - uses: actions/setup-python@v2 + - name: Setup Python (to call feast apply) + uses: actions/setup-python@v2 + id: setup-python with: - python-version: '3.8' - architecture: 'x64' + python-version: 3.8 + architecture: x64 + - name: Setup Go + id: setup-go + uses: actions/setup-go@v2 + with: + go-version: 1.18.0 + - name: Upgrade pip version + run: | + pip install --upgrade "pip>=21.3.1,<22.1" + - name: Get pip cache dir + id: pip-cache + run: | + echo "::set-output name=dir::$(pip cache dir)" + - name: pip cache + uses: actions/cache@v2 + with: + path: | + ${{ steps.pip-cache.outputs.dir }} + /opt/hostedtoolcache/Python + /Users/runner/hostedtoolcache/Python + key: ${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-pip-${{ hashFiles(format('**/py{0}-ci-requirements.txt', env.PYTHON)) }} + restore-keys: | + ${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-pip- + - name: Install pip-tools + run: pip install pip-tools + - name: Install apache-arrow on ubuntu + run: | + sudo apt update + sudo apt install -y -V ca-certificates lsb-release wget + wget https://apache.jfrog.io/artifactory/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb + sudo apt install -y -V ./apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb + sudo apt update + sudo apt install -y -V libarrow-dev + - name: Install Python dependencies + run: make install-python-ci-dependencies - uses: actions/cache@v2 with: path: ~/.m2/repository diff --git a/.github/workflows/java_pr.yml b/.github/workflows/java_pr.yml index 39593f02ce..bf32885858 100644 --- a/.github/workflows/java_pr.yml +++ b/.github/workflows/java_pr.yml @@ -38,6 +38,12 @@ jobs: java-version: '11' java-package: jdk architecture: x64 + - uses: actions/cache@v2 + with: + path: ~/.m2/repository + key: ${{ runner.os }}-it-maven-${{ hashFiles('**/pom.xml') }} + restore-keys: | + ${{ runner.os }}-it-maven- - uses: actions/cache@v2 with: path: ~/.m2/repository @@ -58,6 +64,8 @@ jobs: (github.event.action != 'labeled' && (contains(github.event.pull_request.labels.*.name, 'ok-to-test') || contains(github.event.pull_request.labels.*.name, 'approved'))) runs-on: ubuntu-latest needs: unit-test-java + env: + PYTHON: 3.8 steps: - uses: actions/checkout@v2 with: @@ -98,6 +106,46 @@ jobs: aws-region: us-west-2 - name: Use AWS CLI run: aws sts get-caller-identity + - name: Setup Python (to call feast apply) + uses: actions/setup-python@v2 + id: setup-python + with: + python-version: 3.8 + architecture: x64 + - name: Setup Go + id: setup-go + uses: actions/setup-go@v2 + with: + go-version: 1.18.0 + - name: Upgrade pip version + run: | + pip install --upgrade "pip>=21.3.1,<22.1" + - name: Get pip cache dir + id: pip-cache + run: | + echo "::set-output name=dir::$(pip cache dir)" + - name: pip cache + uses: actions/cache@v2 + with: + path: | + ${{ steps.pip-cache.outputs.dir }} + /opt/hostedtoolcache/Python + /Users/runner/hostedtoolcache/Python + key: ${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-pip-${{ hashFiles(format('**/py{0}-ci-requirements.txt', env.PYTHON)) }} + restore-keys: | + ${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-pip- + - name: Install pip-tools + run: pip install pip-tools + - name: Install apache-arrow on ubuntu + run: | + sudo apt update + sudo apt install -y -V ca-certificates lsb-release wget + wget https://apache.jfrog.io/artifactory/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb + sudo apt install -y -V ./apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb + sudo apt update + sudo apt install -y -V libarrow-dev + - name: Install Python dependencies + run: make install-python-ci-dependencies - name: Run integration tests run: make test-java-integration - name: Save report diff --git a/CHANGELOG.md b/CHANGELOG.md index 0a61e944b4..d6e3e8159a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,15 @@ # Changelog +## [0.22.4](https://github.com/feast-dev/feast/compare/v0.22.3...v0.22.4) (2022-08-15) + + +### Bug Fixes + +* Fix field mapping logic during feature inference ([#3067](https://github.com/feast-dev/feast/issues/3067)) ([3668702](https://github.com/feast-dev/feast/commit/3668702c69e83e21f237e40727d745f399f5fcd9)) +* Fix incorrect on demand feature view diffing and improve Java tests ([#3074](https://github.com/feast-dev/feast/issues/3074)) ([dd46d45](https://github.com/feast-dev/feast/commit/dd46d451c0d550a49128b79c00f60f47822dcbf1)) +* Fix on demand feature view output in feast plan + Web UI crash ([#3057](https://github.com/feast-dev/feast/issues/3057)) ([a44fe66](https://github.com/feast-dev/feast/commit/a44fe66fe4d6d4609effea358243ebc5a27faea9)) +* Fix Spark offline store type conversion to arrow ([#3071](https://github.com/feast-dev/feast/issues/3071)) ([8e6a6b1](https://github.com/feast-dev/feast/commit/8e6a6b10df51c6ee9c5d49d7e032678e5a703aaf)) + ## [0.22.3](https://github.com/feast-dev/feast/compare/v0.22.2...v0.22.3) (2022-08-10) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 3597ef6e66..f986931784 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -8,6 +8,8 @@ the main Feast repository: - [Feast Java Serving](#feast-java-serving) - [Feast Go Client](#feast-go-client) +Please see [this page](https://docs.feast.dev/reference/codebase-structure) for more details on the structure of the entire codebase. + ## Community See [Contribution process](https://docs.feast.dev/project/contributing) and [Community](https://docs.feast.dev/community) for details on how to get more involved in the community. diff --git a/Makefile b/Makefile index 915ac907f7..a88f933c53 100644 --- a/Makefile +++ b/Makefile @@ -196,7 +196,7 @@ install-go-ci-dependencies: python -m pip install pybindgen==0.22.0 protobuf==3.20.1 install-protoc-dependencies: - pip install grpcio-tools==1.47.0 mypy-protobuf==3.1.0 + pip install --ignore-installed protobuf grpcio-tools==1.47.0 mypy-protobuf==3.1.0 compile-protos-go: install-go-proto-dependencies install-protoc-dependencies python setup.py build_go_protos @@ -209,7 +209,7 @@ install-feast-ci-locally: # Needs feast package to setup the feature store # CGO flag is due to this issue: https://github.com/golang/go/wiki/InvalidFlag -test-go: compile-protos-go compile-go-lib install-feast-ci-locally +test-go: compile-protos-go compile-protos-python compile-go-lib install-feast-ci-locally CGO_LDFLAGS_ALLOW=".*" go test -tags cgo,ccalloc ./... format-go: diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index 23049455e3..8f14e984d2 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -59,6 +59,7 @@ ## Reference +* [Codebase Structure](reference/codebase-structure.md) * [Data sources](reference/data-sources/README.md) * [File](reference/data-sources/file.md) * [Snowflake](reference/data-sources/snowflake.md) diff --git a/docs/project/release-process.md b/docs/project/release-process.md index 7fb9c2a560..2ddc697730 100644 --- a/docs/project/release-process.md +++ b/docs/project/release-process.md @@ -4,23 +4,49 @@ For Feast maintainers, these are the concrete steps for making a new release. -### Pre-release Verification (Verification that wheels are built correctly) for minor release. +### 1. (for patch releases) Cherry-pick changes into the branch from master +If you were cutting Feast 0.22.3, for example, you might do: +1. `git checkout v0.22-branch` (or `git pull upstream v0.22-branch --rebase` if you've cut a release before) +2. `git cherry-pick [COMMIT FROM MASTER]` +3. `git push upstream v0.22-branch` to commit changes to the release branch + +> Note: if you're handling a maintenance release (i.e. an older version), semantic release may complain at you. See +> [Sample PR](https://github.com/feast-dev/feast/commit/40f2a6e13dd7d2a5ca5bff1af378e8712621d4f2) to enable an older +> branch to cut releases. + +After this step, you will have all the changes you need in the branch. + +### 2. Pre-release verification +A lot of things can go wrong. One of the most common is getting the wheels to build correctly (and not accidentally +building dev wheels from improper tagging or local code changes during the release process). + +We verify the wheels building in **your fork** of Feast, not the main feast-dev/feast repo. + +#### For minor releases (e.g. v0.22.0) 1. Merge upstream master changes into your **fork**. Make sure you are running the workflow off of your fork! 2. Create a tag manually for the release on your fork. For example, if you are doing a release for version 0.22.0, create a tag by doing the following. - Checkout master branch and run `git tag v0.22.0`. - Run `git push --tags` to push the tag to your forks master branch. -3. Access the `Actions` tab on your github UI on your fork and click the `build_wheels` action. This workflow will build the python sdk wheels for Python 3.8-3.10 on MacOS 10.15 and Linux and verify that these wheels are correct. The publish workflow uses this action to publish the python wheels for a new release to pypi. + > This is important. If you don't have a tag, then the wheels you build will be **dev wheels**, which we can't + > push. The release process will automatically produce a tag for you via Semantic Release. +3. Access the `Actions` tab on your GitHub UI on your fork and click the `build_wheels` action. This workflow will + build the python sdk wheels for Python 3.8-3.10 on MacOS 10.15 and Linux and verify that these wheels are correct. + The publish workflow uses this action to publish the python wheels for a new release to PyPI. 4. Look for the header `This workflow has a workflow_dispatch event trigger` and click `Run Workflow` on the right. -5. Run the workflow off of the tag you just created(`v0.22.0` in this case) and verify that the workflow worked (i.e ensure that all jobs are green). +5. Run the workflow off of the tag you just created(`v0.22.0` in this case, **not** the master branch) and verify that + the workflow worked (i.e ensure that all jobs are green). -### Pre-release Verification (Verification that wheels are built correctly) for patch release. -1. Check out the branch of your release (e.g `v0.22-branch` on your local **fork**) and push this to your fork (`git push -u origin `). -2. Cherry pick commits that are relevant to the patch release onto your forked branch. -3. Checkout the release branch and add a patch release tag (e.g `v0.22.1`) by running `git tag `. -4. Push tags to your origin branch with `git push origin `. -5. Kick off `build_wheels` workflow in the same way as is detailed in the last section on of the patch release tag. +#### For patch releases (e.g. v0.22.3) +You should already have checked out the existing minor release branch from step 1 (e.g. `v0.22-branch`). +1. Push the minor release branch to your fork (`git push -u origin `). +2. Add a patch release tag (e.g `v0.22.1`) by running `git tag `. + > This is important. If you don't have a tag, then the wheels you build will be **dev wheels**, which we can't + > push. The release process will automatically produce a tag for you via Semantic Release. +3. Push tags to your **origin branch** (not the upstream feast-dev/feast branch) with `git push origin `. +4. Kick off `build_wheels` workflow in your fork in the same way as is detailed in the last section, running the + workflow from this tag you just pushed up. -### Release for Python and Java SDK +### 3. Release for Python and Java SDK 1. Generate a [Personal Access Token](https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/creating-a-personal-access-token) or retrieve your saved personal access token. * The personal access token should have all of the permissions under the `repo` checkbox. 2. Access the `Actions` tab on the main `feast-dev/feast` repo and find the `release` action. @@ -28,15 +54,31 @@ For Feast maintainers, these are the concrete steps for making a new release. * If you are making a minor or major release, you should run it off of the master branch. * If you are making a patch release, run it off of the corresponding minor release branch. 4. Try the dry run first with your personal access token. If this succeeds, uncheck `Dry Run` and run the release workflow. -5. All of the jobs should succeed besides the UI job which needs to be released separately. Ping a maintainer on Slack to run the UI release manually. -6. Try to install the feast release in your local environment and test out the `feast init` -> `feast apply` workflow to verify as a sanity check that the release worked correctly. +5. Then try running normally (without dry run). + - First, the `release` workflow will kick off. This publishes an NPM package for the Web UI ([NPM package](http://npmjs.com/package/@feast-dev/feast-ui)), + bumps files versions (e.g. helm chart, UI, Java pom.xml files), and generate a changelog using Semantic Release. + All jobs should succeed. + - Second, the `publish` workflow will kick off. This builds all the Python wheels ([PyPI link](https://pypi.org/project/feast/), + publishes helm charts, publishes the Python and Java feature servers to Docker ([DockerHub images](https://hub.docker.com/u/feastdev)), + publishes the Java Serving Client + Datatypes libraries to Maven ([Maven repo](https://mvnrepository.com/artifact/dev.feast)) +6. Try to install the Feast Python release in your local environment and test out the `feast init` -> `feast apply` + workflow to verify as a sanity check that the release worked correctly. +7. Verify the releases all show the new version: + - [NPM package](http://npmjs.com/package/@feast-dev/feast-ui) + - [PyPI link](https://pypi.org/project/feast/) + - [DockerHub images (Java + Python feature servers, feature transformation server)](https://hub.docker.com/u/feastdev) + - [Maven repo (feast-datatypes, feast-serving-client)](https://mvnrepository.com/artifact/dev.feast) + +### 4. (for minor releases) Post-release steps +#### 4a: Creating a new branch +Create a new branch based on master (i.e. v0.22-branch) and push to the main Feast repo. This will be where +cherry-picks go for future patch releases and where documentation will point. -### (for minor releases) Post-release steps -1. Create a new branch based on master (i.e. v0.22-branch) and push to the main Feast repo. This will be where cherry-picks go for future patch releases and where documentation will point. -2. Write a summary of the release in the GitHub release - 1. By default, Semantic Release will pull in messages from commits (features vs fixes, etc). But this is hard to digest still, so it helps to have a high level overview. +#### 4b: Adding a high level summary in the GitHub release notes +By default, Semantic Release will pull in messages from commits (features vs fixes, etc). But this is hard to digest, +so it helps to have a high level overview. See https://github.com/feast-dev/feast/releases for the releases. -### Update documentation +#### 4c: Update documentation In the Feast Gitbook (ask [Danny Chiao](https://tectonfeast.slack.com/team/U029405HFEU) in Slack for access): 1. Create a new space within the Feast collection @@ -56,4 +98,5 @@ In the Feast Gitbook (ask [Danny Chiao](https://tectonfeast.slack.com/team/U0294 5. Configure the default space to be your new branch and save ![](new_branch_part_5.png) -6. Verify on docs.feast.dev that this new space is the default (this may take a few minutes to propagate, and your browser cache may be caching the old branch as the default) \ No newline at end of file +6. Verify on [docs.feast.dev](http://docs.feast.dev) that this new space is the default (this may take a few minutes to + propagate, and your browser cache may be caching the old branch as the default) \ No newline at end of file diff --git a/docs/reference/codebase-structure.md b/docs/reference/codebase-structure.md new file mode 100644 index 0000000000..b75227860b --- /dev/null +++ b/docs/reference/codebase-structure.md @@ -0,0 +1,131 @@ +# Codebase structure + +Let's examine the Feast codebase. +This analysis is accurate as of Feast 0.23. + +``` +$ tree -L 1 -d +. +├── docs +├── examples +├── go +├── infra +├── java +├── protos +├── sdk +└── ui +``` + +## Python SDK + +The Python SDK lives in `sdk/python/feast`. +The majority of Feast logic lives in these Python files: +* The core Feast objects ([entities](../getting-started/concepts/entity.md), [feature views](../getting-started/concepts/feature-view.md), [data sources](../getting-started/concepts/dataset.md), etc.) are defined in their respective Python files, such as `entity.py`, `feature_view.py`, and `data_source.py`. +* The `FeatureStore` class is defined in `feature_store.py` and the associated configuration object (the Python representation of the `feature_store.yaml` file) are defined in `repo_config.py`. +* The CLI and other core feature store logic are defined in `cli.py` and `repo_operations.py`. +* The type system that is used to manage conversion between Feast types and external typing systems is managed in `type_map.py`. +* The Python feature server (the server that is started through the `feast serve` command) is defined in `feature_server.py`. + +There are also several important submodules: +* `infra/` contains all the infrastructure components, such as the provider, offline store, online store, batch materialization engine, and registry. +* `dqm/` covers data quality monitoring, such as the dataset profiler. +* `diff/` covers the logic for determining how to apply infrastructure changes upon feature repo changes (e.g. the output of `feast plan` and `feast apply`). +* `embedded_go/` covers the Go feature server. +* `ui/` contains the embedded Web UI, to be launched on the `feast ui` command. + +Of these submodules, `infra/` is the most important. +It contains the interfaces for the [provider](getting-started/architecture-and-components/provider.md), [offline store](getting-started/architecture-and-components/offline-store.md), [online store](getting-started/architecture-and-components/online-store.md), [batch materialization engine](getting-started/architecture-and-components/batch-materialization-engine.md), and [registry](getting-started/architecture-and-components/registry.md), as well as all of their individual implementations. + +``` +$ tree --dirsfirst -L 1 infra +infra +├── contrib +├── feature_servers +├── materialization +├── offline_stores +├── online_stores +├── registry +├── transformation_servers +├── utils +├── __init__.py +├── aws.py +├── gcp.py +├── infra_object.py +├── key_encoding_utils.py +├── local.py +├── passthrough_provider.py +└── provider.py +``` + +The tests for the Python SDK are contained in `sdk/python/tests`. +For more details, see this [overview](../how-to-guides/adding-or-reusing-tests.md#test-suite-overview) of the test suite. + +### Example flow: `feast apply` + +Let's walk through how `feast apply` works by tracking its execution across the codebase. + +1. All CLI commands are in `cli.py`. + Most of these commands are backed by methods in `repo_operations.py`. + The `feast apply` command triggers `apply_total_command`, which then calls `apply_total` in `repo_operations.py`. +2. With a `FeatureStore` object (from `feature_store.py`) that is initialized based on the `feature_store.yaml` in the current working directory, `apply_total` first parses the feature repo with `parse_repo` and then calls either `FeatureStore.apply` or `FeatureStore._apply_diffs` to apply those changes to the feature store. +3. Let's examine `FeatureStore.apply`. + It splits the objects based on class (e.g. `Entity`, `FeatureView`, etc.) and then calls the appropriate registry method to apply or delete the object. + For example, it might call `self._registry.apply_entity` to apply an entity. + If the default file-based registry is used, this logic can be found in `infra/registry/registry.py`. +4. Then the feature store must update its cloud infrastructure (e.g. online store tables) to match the new feature repo, so it calls `Provider.update_infra`, which can be found in `infra/provider.py`. +5. Assuming the provider is a built-in provider (e.g. one of the local, GCP, or AWS providers), it will call `PassthroughProvider.update_infra` in `infra/passthrough_provider.py`. +6. This delegates to the online store and batch materialization engine. + For example, if the feature store is configured to use the Redis online store then the `update` method from `infra/online_stores/redis.py` will be called. + And if the local materialization engine is configured then the `update` method from `infra/materialization/local_engine.py` will be called. + +At this point, the `feast apply` command is complete. + +### Example flow: `feast materialize` + +Let's walk through how `feast materialize` works by tracking its execution across the codebase. + +1. The `feast materialize` command triggers `materialize_command` in `cli.py`, which then calls `FeatureStore.materialize` from `feature_store.py`. +2. This then calls `Provider.materialize_single_feature_view`, which can be found in `infra/provider.py`. +3. As with `feast apply`, the provider is most likely backed by the passthrough provider, in which case `PassthroughProvider.materialize_single_feature_view` will be called. +4. This delegates to the underlying batch materialization engine. + Assuming that the local engine has been configured, `LocalMaterializationEngine.materialize` from `infra/materialization/local_engine.py` will be called. +5. Since materialization involves reading features from the offline store and writing them to the online store, the local engine will delegate to both the offline store and online store. + Specifically, it will call `OfflineStore.pull_latest_from_table_or_query` and `OnlineStore.online_write_batch`. + These two calls will be routed to the offline store and online store that have been configured. + +### Example flow: `get_historical_features` + +Let's walk through how `get_historical_features` works by tracking its execution across the codebase. + +1. We start with `FeatureStore.get_historical_features` in `feature_store.py`. + This method does some internal preparation, and then delegates the actual execution to the underlying provider by calling `Provider.get_historical_features`, which can be found in `infra/provider.py`. +2. As with `feast apply`, the provider is most likely backed by the passthrough provider, in which case `PassthroughProvider.get_historical_features` will be called. +3. That call simply delegates to `OfflineStore.get_historical_features`. + So if the feature store is configured to use Snowflake as the offline store, `SnowflakeOfflineStore.get_historical_features` will be executed. + +## Java SDK + +The `java/` directory contains the Java serving component. +See [here](https://github.com/feast-dev/feast/blob/master/java/CONTRIBUTING.md) for more details on how the repo is structured. + +## Go feature server + +The `go/` directory contains the Go feature server. +Most of the files here have logic to help with reading features from the online store. +Within `go/`, the `internal/feast/` directory contains most of the core logic: +* `onlineserving/` covers the core serving logic. +* `model/` contains the implementations of the Feast objects (entity, feature view, etc.). + * For example, `entity.go` is the Go equivalent of `entity.py`. It contains a very simple Go implementation of the entity object. +* `registry/` covers the registry. + * Currently only the file-based registry supported (the sql-based registry is unsupported). Additionally, the file-based registry only supports a file-based registry store, not the GCS or S3 registry stores. +* `onlinestore/` covers the online stores (currently only Redis and SQLite are supported). + +## Protobufs + +Feast uses [protobuf](https://github.com/protocolbuffers/protobuf) to store serialized versions of the core Feast objects. +The protobuf definitions are stored in `protos/feast`. + +## Web UI + +The `ui/` directory contains the Web UI. +See [here](https://github.com/feast-dev/feast/blob/master/ui/CONTRIBUTING.md) for more details on the structure of the Web UI. diff --git a/infra/charts/feast-feature-server/Chart.yaml b/infra/charts/feast-feature-server/Chart.yaml index aabe071357..bac4c7c74e 100644 --- a/infra/charts/feast-feature-server/Chart.yaml +++ b/infra/charts/feast-feature-server/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: feast-feature-server description: Feast Feature Server in Go or Python type: application -version: 0.22.3 +version: 0.22.4 keywords: - machine learning - big data diff --git a/infra/charts/feast-feature-server/README.md b/infra/charts/feast-feature-server/README.md index 8c215a7068..9b9df7582f 100644 --- a/infra/charts/feast-feature-server/README.md +++ b/infra/charts/feast-feature-server/README.md @@ -1,6 +1,6 @@ # feast-feature-server -![Version: 0.22.3](https://img.shields.io/badge/Version-0.22.3-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) +![Version: 0.22.4](https://img.shields.io/badge/Version-0.22.4-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) Feast Feature Server in Go or Python diff --git a/infra/charts/feast-python-server/Chart.yaml b/infra/charts/feast-python-server/Chart.yaml index 04447304f3..4591394161 100644 --- a/infra/charts/feast-python-server/Chart.yaml +++ b/infra/charts/feast-python-server/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: feast-python-server description: Feast Feature Server in Python type: application -version: 0.22.3 +version: 0.22.4 keywords: - machine learning - big data diff --git a/infra/charts/feast-python-server/README.md b/infra/charts/feast-python-server/README.md index 232e9a7ff1..8ffb351482 100644 --- a/infra/charts/feast-python-server/README.md +++ b/infra/charts/feast-python-server/README.md @@ -1,6 +1,6 @@ # feast-python-server -![Version: 0.22.3](https://img.shields.io/badge/Version-0.22.3-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) +![Version: 0.22.4](https://img.shields.io/badge/Version-0.22.4-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) Feast Feature Server in Python diff --git a/infra/charts/feast/Chart.yaml b/infra/charts/feast/Chart.yaml index 12145b010a..0869ab443e 100644 --- a/infra/charts/feast/Chart.yaml +++ b/infra/charts/feast/Chart.yaml @@ -1,7 +1,7 @@ apiVersion: v1 description: Feature store for machine learning name: feast -version: 0.22.3 +version: 0.22.4 keywords: - machine learning - big data diff --git a/infra/charts/feast/README.md b/infra/charts/feast/README.md index 19c81d3d4d..897bc583fc 100644 --- a/infra/charts/feast/README.md +++ b/infra/charts/feast/README.md @@ -8,7 +8,7 @@ This repo contains Helm charts for Feast components that are being installed on ## Chart: Feast -Feature store for machine learning Current chart version is `0.22.3` +Feature store for machine learning Current chart version is `0.22.4` ## Installation @@ -55,8 +55,8 @@ For more details, please see: https://docs.feast.dev/how-to-guides/running-feast | Repository | Name | Version | |------------|------|---------| | https://charts.helm.sh/stable | redis | 10.5.6 | -| https://feast-helm-charts.storage.googleapis.com | feature-server(feature-server) | 0.22.3 | -| https://feast-helm-charts.storage.googleapis.com | transformation-service(transformation-service) | 0.22.3 | +| https://feast-helm-charts.storage.googleapis.com | feature-server(feature-server) | 0.22.4 | +| https://feast-helm-charts.storage.googleapis.com | transformation-service(transformation-service) | 0.22.4 | ## Values diff --git a/infra/charts/feast/charts/feature-server/Chart.yaml b/infra/charts/feast/charts/feature-server/Chart.yaml index cb5cfa0455..f25472aeca 100644 --- a/infra/charts/feast/charts/feature-server/Chart.yaml +++ b/infra/charts/feast/charts/feature-server/Chart.yaml @@ -1,8 +1,8 @@ apiVersion: v1 description: "Feast Feature Server: Online feature serving service for Feast" name: feature-server -version: 0.22.3 -appVersion: v0.22.3 +version: 0.22.4 +appVersion: v0.22.4 keywords: - machine learning - big data diff --git a/infra/charts/feast/charts/feature-server/README.md b/infra/charts/feast/charts/feature-server/README.md index 148cfe5cbf..6aa6068579 100644 --- a/infra/charts/feast/charts/feature-server/README.md +++ b/infra/charts/feast/charts/feature-server/README.md @@ -1,6 +1,6 @@ # feature-server -![Version: 0.22.3](https://img.shields.io/badge/Version-0.22.3-informational?style=flat-square) ![AppVersion: v0.22.3](https://img.shields.io/badge/AppVersion-v0.22.3-informational?style=flat-square) +![Version: 0.22.4](https://img.shields.io/badge/Version-0.22.4-informational?style=flat-square) ![AppVersion: v0.22.4](https://img.shields.io/badge/AppVersion-v0.22.4-informational?style=flat-square) Feast Feature Server: Online feature serving service for Feast @@ -17,7 +17,7 @@ Feast Feature Server: Online feature serving service for Feast | envOverrides | object | `{}` | Extra environment variables to set | | image.pullPolicy | string | `"IfNotPresent"` | Image pull policy | | image.repository | string | `"feastdev/feature-server-java"` | Docker image for Feature Server repository | -| image.tag | string | `"0.22.3"` | Image tag | +| image.tag | string | `"0.22.4"` | Image tag | | ingress.grpc.annotations | object | `{}` | Extra annotations for the ingress | | ingress.grpc.auth.enabled | bool | `false` | Flag to enable auth | | ingress.grpc.class | string | `"nginx"` | Which ingress controller to use | diff --git a/infra/charts/feast/charts/feature-server/values.yaml b/infra/charts/feast/charts/feature-server/values.yaml index b12fa588d1..b70f1fdda0 100644 --- a/infra/charts/feast/charts/feature-server/values.yaml +++ b/infra/charts/feast/charts/feature-server/values.yaml @@ -5,7 +5,7 @@ image: # image.repository -- Docker image for Feature Server repository repository: feastdev/feature-server-java # image.tag -- Image tag - tag: 0.22.3 + tag: 0.22.4 # image.pullPolicy -- Image pull policy pullPolicy: IfNotPresent diff --git a/infra/charts/feast/charts/transformation-service/Chart.yaml b/infra/charts/feast/charts/transformation-service/Chart.yaml index 7c347e1186..bd4cb9fbec 100644 --- a/infra/charts/feast/charts/transformation-service/Chart.yaml +++ b/infra/charts/feast/charts/transformation-service/Chart.yaml @@ -1,8 +1,8 @@ apiVersion: v1 description: "Transformation service: to compute on-demand features" name: transformation-service -version: 0.22.3 -appVersion: v0.22.3 +version: 0.22.4 +appVersion: v0.22.4 keywords: - machine learning - big data diff --git a/infra/charts/feast/charts/transformation-service/README.md b/infra/charts/feast/charts/transformation-service/README.md index c89fe2a811..63b9d44c48 100644 --- a/infra/charts/feast/charts/transformation-service/README.md +++ b/infra/charts/feast/charts/transformation-service/README.md @@ -1,6 +1,6 @@ # transformation-service -![Version: 0.22.3](https://img.shields.io/badge/Version-0.22.3-informational?style=flat-square) ![AppVersion: v0.22.3](https://img.shields.io/badge/AppVersion-v0.22.3-informational?style=flat-square) +![Version: 0.22.4](https://img.shields.io/badge/Version-0.22.4-informational?style=flat-square) ![AppVersion: v0.22.4](https://img.shields.io/badge/AppVersion-v0.22.4-informational?style=flat-square) Transformation service: to compute on-demand features @@ -13,7 +13,7 @@ Transformation service: to compute on-demand features | envOverrides | object | `{}` | Extra environment variables to set | | image.pullPolicy | string | `"IfNotPresent"` | Image pull policy | | image.repository | string | `"feastdev/feature-transformation-server"` | Docker image for Transformation Server repository | -| image.tag | string | `"0.22.3"` | Image tag | +| image.tag | string | `"0.22.4"` | Image tag | | nodeSelector | object | `{}` | Node labels for pod assignment | | podLabels | object | `{}` | Labels to be added to Feast Serving pods | | replicaCount | int | `1` | Number of pods that will be created | diff --git a/infra/charts/feast/charts/transformation-service/values.yaml b/infra/charts/feast/charts/transformation-service/values.yaml index f8e52a931c..53d221ded7 100644 --- a/infra/charts/feast/charts/transformation-service/values.yaml +++ b/infra/charts/feast/charts/transformation-service/values.yaml @@ -5,7 +5,7 @@ image: # image.repository -- Docker image for Transformation Server repository repository: feastdev/feature-transformation-server # image.tag -- Image tag - tag: 0.22.3 + tag: 0.22.4 # image.pullPolicy -- Image pull policy pullPolicy: IfNotPresent diff --git a/infra/charts/feast/requirements.yaml b/infra/charts/feast/requirements.yaml index 6eb54a422f..4b3c4a01a6 100644 --- a/infra/charts/feast/requirements.yaml +++ b/infra/charts/feast/requirements.yaml @@ -1,12 +1,12 @@ dependencies: - name: feature-server alias: feature-server - version: 0.22.3 + version: 0.22.4 condition: feature-server.enabled repository: https://feast-helm-charts.storage.googleapis.com - name: transformation-service alias: transformation-service - version: 0.22.3 + version: 0.22.4 condition: transformation-service.enabled repository: https://feast-helm-charts.storage.googleapis.com - name: redis diff --git a/java/CONTRIBUTING.md b/java/CONTRIBUTING.md index f6c789d984..27039534b9 100644 --- a/java/CONTRIBUTING.md +++ b/java/CONTRIBUTING.md @@ -36,6 +36,7 @@ mvn spotless:apply #### Project Makefile The Project Makefile provides useful shorthands for common development tasks: +> Note: These commands rely on a local version of `feast` (Python) to be installed Run all Unit tests: ``` diff --git a/java/infra/docker/feature-server/Dockerfile b/java/infra/docker/feature-server/Dockerfile index dbd8c91472..960bddc56d 100644 --- a/java/infra/docker/feature-server/Dockerfile +++ b/java/infra/docker/feature-server/Dockerfile @@ -28,7 +28,7 @@ COPY protos/feast datatypes/src/main/proto/feast ARG VERSION=dev RUN mvn --also-make --projects serving -Drevision=$VERSION \ - -DskipUTs=true --batch-mode clean package + -DskipUTs=true -DskipITs=true --batch-mode clean package # # Download grpc_health_probe to run health check for Feast Serving # https://kubernetes.io/blog/2018/10/01/health-checking-grpc-servers-on-kubernetes/ diff --git a/java/pom.xml b/java/pom.xml index 9f849ed9b5..1cb73c42ca 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -38,7 +38,7 @@ - 0.22.3 + 0.22.4 https://github.com/feast-dev/feast UTF-8 diff --git a/java/serving/README.md b/java/serving/README.md index 5ac7194924..6a11fbee67 100644 --- a/java/serving/README.md +++ b/java/serving/README.md @@ -136,4 +136,6 @@ Unit & Integration Tests can be used to verify functionality: mvn test -pl serving --also-make # run integration tests mvn verify -pl serving --also-make +# run integration tests with debugger +mvn -Dmaven.failsafe.debug verify -pl serving --also-make ``` \ No newline at end of file diff --git a/java/serving/pom.xml b/java/serving/pom.xml index 47a636d7d5..fedcc3f749 100644 --- a/java/serving/pom.xml +++ b/java/serving/pom.xml @@ -82,6 +82,29 @@ + + + org.codehaus.mojo + exec-maven-plugin + 1.6.0 + + + + python + src/test/resources/docker-compose/feast10/ + + setup_it.py + + ${skipITs} + + feast_test_apply + process-test-resources + + exec + + + + diff --git a/java/serving/src/test/resources/docker-compose/feast10/definitions.py b/java/serving/src/test/resources/docker-compose/feast10/definitions.py index 806995ec06..0693358a12 100644 --- a/java/serving/src/test/resources/docker-compose/feast10/definitions.py +++ b/java/serving/src/test/resources/docker-compose/feast10/definitions.py @@ -73,8 +73,9 @@ def transformed_conv_rate(features_df: pd.DataFrame) -> pd.DataFrame: entity = Entity(name="entity", value_type=ValueType.STRING,) -benchmark_feature_views = [ - FeatureView( +benchmark_feature_views = [] +for i in range(25): + fv = FeatureView( name=f"feature_view_{i}", entities=[entity], ttl=Duration(seconds=86400), @@ -82,8 +83,7 @@ def transformed_conv_rate(features_df: pd.DataFrame) -> pd.DataFrame: online=True, source=generated_data_source, ) - for i in range(25) -] + benchmark_feature_views.append(fv) benchmark_feature_service = FeatureService( name=f"benchmark_feature_service", features=benchmark_feature_views, diff --git a/java/serving/src/test/resources/docker-compose/feast10/registry.db b/java/serving/src/test/resources/docker-compose/feast10/registry.db deleted file mode 100644 index 746934e3d0..0000000000 Binary files a/java/serving/src/test/resources/docker-compose/feast10/registry.db and /dev/null differ diff --git a/java/serving/src/test/resources/docker-compose/feast10/setup_it.py b/java/serving/src/test/resources/docker-compose/feast10/setup_it.py new file mode 100644 index 0000000000..503b66f328 --- /dev/null +++ b/java/serving/src/test/resources/docker-compose/feast10/setup_it.py @@ -0,0 +1,86 @@ +from pathlib import Path +from feast.repo_config import load_repo_config +from datetime import datetime, timedelta + +import numpy as np +import pandas as pd + +from definitions import ( + benchmark_feature_service, + benchmark_feature_views, + driver, + driver_hourly_stats_view, + entity, + transformed_conv_rate, +) + +from feast import FeatureStore + + +def setup_data(): + start = datetime.now() - timedelta(days=10) + + df = pd.DataFrame() + df["driver_id"] = np.arange(1000, 1010) + df["created"] = datetime.now() + df["conv_rate"] = np.arange(0, 1, 0.1) + df["acc_rate"] = np.arange(0.5, 1, 0.05) + df["avg_daily_trips"] = np.arange(0, 1000, 100) + + # some of rows are beyond 7 days to test OUTSIDE_MAX_AGE status + df["event_timestamp"] = start + pd.Series(np.arange(0, 10)).map( + lambda days: timedelta(days=days) + ) + + # Store data in parquet files. Parquet is convenient for local development mode. For + # production, you can use your favorite DWH, such as BigQuery. See Feast documentation + # for more info. + df.to_parquet("driver_stats.parquet") + + # For Benchmarks + # Please read more in Feast RFC-031 + # (link https://docs.google.com/document/d/12UuvTQnTTCJhdRgy6h10zSbInNGSyEJkIxpOcgOen1I/edit) + # about this benchmark setup + def generate_data( + num_rows, num_features, destination + ): + features = [f"feature_{i}" for i in range(num_features)] + columns = ["entity", "event_timestamp"] + features + df = pd.DataFrame(0, index=np.arange(num_rows), columns=columns) + df["event_timestamp"] = datetime.utcnow() + for column in features: + df[column] = np.random.randint(1, num_rows, num_rows) + + df["entity"] = "key-" + pd.Series(np.arange(1, num_rows + 1)).astype( + pd.StringDtype() + ) + + df.to_parquet(destination) + + generate_data(10**3, 250, "benchmark_data.parquet") + + +def main(): + print("Running setup_it.py") + + setup_data() + existing_repo_config = load_repo_config(Path(".")) + + # Update to default online store since otherwise, relies on Dockerized Redis service + fs = FeatureStore(config=existing_repo_config.copy(update={"online_store": {}})) + fs.apply( + [ + driver_hourly_stats_view, + transformed_conv_rate, + driver, + entity, + benchmark_feature_service, + *benchmark_feature_views, + ] + ) + + print("setup_it finished") + + +if __name__ == "__main__": + main() diff --git a/protos/feast/core/OnDemandFeatureView.proto b/protos/feast/core/OnDemandFeatureView.proto index 33c51f5c4d..50bf8b6f55 100644 --- a/protos/feast/core/OnDemandFeatureView.proto +++ b/protos/feast/core/OnDemandFeatureView.proto @@ -83,4 +83,7 @@ message UserDefinedFunction { // The python-syntax function body (serialized by dill) bytes body = 2; + + // The string representation of the udf + string body_text = 3; } diff --git a/sdk/python/feast/diff/registry_diff.py b/sdk/python/feast/diff/registry_diff.py index 7a5b9b7564..288de0ec95 100644 --- a/sdk/python/feast/diff/registry_diff.py +++ b/sdk/python/feast/diff/registry_diff.py @@ -17,6 +17,7 @@ from feast.protos.feast.core.OnDemandFeatureView_pb2 import ( OnDemandFeatureView as OnDemandFeatureViewProto, ) +from feast.protos.feast.core.OnDemandFeatureView_pb2 import OnDemandFeatureViewSpec from feast.protos.feast.core.RequestFeatureView_pb2 import ( RequestFeatureView as RequestFeatureViewProto, ) @@ -137,19 +138,39 @@ def diff_registry_objects( else: current_spec = current_proto.spec new_spec = new_proto.spec - if current_spec != new_spec: + if current != new: for _field in current_spec.DESCRIPTOR.fields: if _field.name in FIELDS_TO_IGNORE: continue - if getattr(current_spec, _field.name) != getattr(new_spec, _field.name): - transition = TransitionType.UPDATE - property_diffs.append( - PropertyDiff( - _field.name, - getattr(current_spec, _field.name), - getattr(new_spec, _field.name), + elif getattr(current_spec, _field.name) != getattr(new_spec, _field.name): + if _field.name == "user_defined_function": + current_spec = cast(OnDemandFeatureViewSpec, current_spec) + new_spec = cast(OnDemandFeatureViewSpec, new_spec) + current_udf = current_spec.user_defined_function + new_udf = new_spec.user_defined_function + for _udf_field in current_udf.DESCRIPTOR.fields: + if _udf_field.name == "body": + continue + if getattr(current_udf, _udf_field.name) != getattr( + new_udf, _udf_field.name + ): + transition = TransitionType.UPDATE + property_diffs.append( + PropertyDiff( + _field.name + "." + _udf_field.name, + getattr(current_udf, _udf_field.name), + getattr(new_udf, _udf_field.name), + ) + ) + else: + transition = TransitionType.UPDATE + property_diffs.append( + PropertyDiff( + _field.name, + getattr(current_spec, _field.name), + getattr(new_spec, _field.name), + ) ) - ) return FeastObjectDiff( name=new_spec.name, feast_object_type=object_type, diff --git a/sdk/python/feast/feature_logging.py b/sdk/python/feast/feature_logging.py index 275bde72ec..da9a0c9fe5 100644 --- a/sdk/python/feast/feature_logging.py +++ b/sdk/python/feast/feature_logging.py @@ -34,12 +34,12 @@ class LoggingSource: @abc.abstractmethod def get_schema(self, registry: "BaseRegistry") -> pa.Schema: - """ Generate schema for logs destination. """ + """Generate schema for logs destination.""" raise NotImplementedError @abc.abstractmethod def get_log_timestamp_column(self) -> str: - """ Return timestamp column that must exist in generated schema. """ + """Return timestamp column that must exist in generated schema.""" raise NotImplementedError diff --git a/sdk/python/feast/feature_store.py b/sdk/python/feast/feature_store.py index 499bb152ed..9121550155 100644 --- a/sdk/python/feast/feature_store.py +++ b/sdk/python/feast/feature_store.py @@ -2349,10 +2349,10 @@ def get_validation_reference( self, name: str, allow_cache: bool = False ) -> ValidationReference: """ - Retrieves a validation reference. + Retrieves a validation reference. - Raises: - ValidationReferenceNotFoundException: The validation reference could not be found. + Raises: + ValidationReferenceNotFoundException: The validation reference could not be found. """ ref = self._registry.get_validation_reference( name, project=self.project, allow_cache=allow_cache diff --git a/sdk/python/feast/inference.py b/sdk/python/feast/inference.py index 011a3b99b2..1e3363d4bc 100644 --- a/sdk/python/feast/inference.py +++ b/sdk/python/feast/inference.py @@ -192,10 +192,10 @@ def _infer_features_and_entities( fv.batch_source.timestamp_field, fv.batch_source.created_timestamp_column, } - for column in columns_to_exclude: - if column in fv.batch_source.field_mapping: - columns_to_exclude.remove(column) - columns_to_exclude.add(fv.batch_source.field_mapping[column]) + for original_col, mapped_col in fv.batch_source.field_mapping.items(): + if mapped_col in columns_to_exclude: + columns_to_exclude.remove(mapped_col) + columns_to_exclude.add(original_col) table_column_names_and_types = fv.batch_source.get_table_column_names_and_types( config diff --git a/sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/spark.py b/sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/spark.py index 2a0925d929..cef5dae7ac 100644 --- a/sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/spark.py +++ b/sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/spark.py @@ -1,3 +1,4 @@ +import tempfile import warnings from datetime import datetime from typing import Dict, List, Optional, Tuple, Union @@ -6,6 +7,7 @@ import pandas import pandas as pd import pyarrow +import pyarrow.parquet as pq import pyspark from pydantic import StrictStr from pyspark import SparkConf @@ -264,8 +266,11 @@ def _to_df_internal(self) -> pd.DataFrame: def _to_arrow_internal(self) -> pyarrow.Table: """Return dataset as pyarrow Table synchronously""" - df = self.to_df() - return pyarrow.Table.from_pandas(df) # noqa + + # write to temp parquet and then load it as pyarrow table from disk + with tempfile.TemporaryDirectory() as temp_dir: + self.to_spark_df().write.parquet(temp_dir, mode="overwrite") + return pq.read_table(temp_dir) def persist(self, storage: SavedDatasetStorage): """ diff --git a/sdk/python/feast/on_demand_feature_view.py b/sdk/python/feast/on_demand_feature_view.py index bad4edba81..1978f51130 100644 --- a/sdk/python/feast/on_demand_feature_view.py +++ b/sdk/python/feast/on_demand_feature_view.py @@ -61,12 +61,12 @@ class OnDemandFeatureView(BaseFeatureView): maintainer. """ - # TODO(adchia): remove inputs from proto and declaration name: str features: List[Field] source_feature_view_projections: Dict[str, FeatureViewProjection] source_request_sources: Dict[str, RequestSource] udf: FunctionType + udf_string: str description: str tags: Dict[str, str] owner: str @@ -81,6 +81,7 @@ def __init__( # noqa: C901 List[Any] ] = None, # Typed as Any because @typechecked can't deal with the List[Union] udf: Optional[FunctionType] = None, + udf_string: str = "", inputs: Optional[ Dict[str, Union[FeatureView, FeatureViewProjection, RequestSource]] ] = None, @@ -99,8 +100,9 @@ def __init__( # noqa: C901 sources (optional): A map from input source names to the actual input sources, which may be feature views, or request data sources. These sources serve as inputs to the udf, which will refer to them by name. - udf (optional): The user defined transformation function, which must take pandas + udf: The user defined transformation function, which must take pandas dataframes as inputs. + udf_string: The source code version of the udf (for diffing and displaying in Web UI) inputs (optional): (Deprecated) A map from input source names to the actual input sources, which may be feature views, feature view projections, or request data sources. These sources serve as inputs to the udf, which will refer to them by name. @@ -233,9 +235,8 @@ def __init__( # noqa: C901 odfv_source.name ] = odfv_source.projection - if _udf is None: - raise ValueError("The `udf` parameter must be specified.") - self.udf = _udf # type: ignore + self.udf = udf # type: ignore + self.udf_string = udf_string @property def proto_class(self) -> Type[OnDemandFeatureViewProto]: @@ -249,6 +250,7 @@ def __copy__(self): sources=list(self.source_feature_view_projections.values()) + list(self.source_request_sources.values()), udf=self.udf, + udf_string=self.udf_string, description=self.description, tags=self.tags, owner=self.owner, @@ -269,6 +271,7 @@ def __eq__(self, other): self.source_feature_view_projections != other.source_feature_view_projections or self.source_request_sources != other.source_request_sources + or self.udf_string != other.udf_string or self.udf.__code__.co_code != other.udf.__code__.co_code ): return False @@ -305,7 +308,9 @@ def to_proto(self) -> OnDemandFeatureViewProto: features=[feature.to_proto() for feature in self.features], sources=sources, user_defined_function=UserDefinedFunctionProto( - name=self.udf.__name__, body=dill.dumps(self.udf, recurse=True), + name=self.udf.__name__, + body=dill.dumps(self.udf, recurse=True), + body_text=self.udf_string, ), description=self.description, tags=self.tags, @@ -354,6 +359,7 @@ def from_proto(cls, on_demand_feature_view_proto: OnDemandFeatureViewProto): udf=dill.loads( on_demand_feature_view_proto.spec.user_defined_function.body ), + udf_string=on_demand_feature_view_proto.spec.user_defined_function.body_text, description=on_demand_feature_view_proto.spec.description, tags=dict(on_demand_feature_view_proto.spec.tags), owner=on_demand_feature_view_proto.spec.owner, @@ -641,6 +647,7 @@ def mainify(obj): obj.__module__ = "__main__" def decorator(user_function): + udf_string = dill.source.getsource(user_function) mainify(user_function) on_demand_feature_view_obj = OnDemandFeatureView( name=user_function.__name__, @@ -650,6 +657,7 @@ def decorator(user_function): description=description, tags=tags, owner=owner, + udf_string=udf_string, ) functools.update_wrapper( wrapper=on_demand_feature_view_obj, wrapped=user_function diff --git a/sdk/python/feast/registry.py b/sdk/python/feast/registry.py index f72fd717d2..248d156a81 100644 --- a/sdk/python/feast/registry.py +++ b/sdk/python/feast/registry.py @@ -24,7 +24,6 @@ from typing import Any, Dict, List, Optional from urllib.parse import urlparse -import dill from google.protobuf.internal.containers import RepeatedCompositeFieldContainer from google.protobuf.json_format import MessageToJson from proto import Message @@ -581,17 +580,17 @@ def get_validation_reference( self, name: str, project: str, allow_cache: bool = False ) -> ValidationReference: """ - Retrieves a validation reference. + Retrieves a validation reference. - Args: - name: Name of dataset - project: Feast project that this dataset belongs to - allow_cache: Whether to allow returning this dataset from a cached registry + Args: + name: Name of dataset + project: Feast project that this dataset belongs to + allow_cache: Whether to allow returning this dataset from a cached registry - Returns: - Returns either the specified ValidationReference, or raises an exception if - none is found - """ + Returns: + Returns either the specified ValidationReference, or raises an exception if + none is found + """ # TODO: Needs to be implemented. def list_validation_references( @@ -729,9 +728,10 @@ def to_dict(self, project: str) -> Dict[str, List[Any]]: key=lambda on_demand_feature_view: on_demand_feature_view.name, ): odfv_dict = self._message_to_sorted_dict(on_demand_feature_view.to_proto()) - odfv_dict["spec"]["userDefinedFunction"]["body"] = dill.source.getsource( - on_demand_feature_view.udf - ) + + odfv_dict["spec"]["userDefinedFunction"][ + "body" + ] = on_demand_feature_view.udf_string registry_dict["onDemandFeatureViews"].append(odfv_dict) for request_feature_view in sorted( self.list_request_feature_views(project=project), @@ -1692,17 +1692,17 @@ def get_validation_reference( self, name: str, project: str, allow_cache: bool = False ) -> ValidationReference: """ - Retrieves a validation reference. + Retrieves a validation reference. - Args: - name: Name of dataset - project: Feast project that this dataset belongs to - allow_cache: Whether to allow returning this dataset from a cached registry + Args: + name: Name of dataset + project: Feast project that this dataset belongs to + allow_cache: Whether to allow returning this dataset from a cached registry - Returns: - Returns either the specified ValidationReference, or raises an exception if - none is found - """ + Returns: + Returns either the specified ValidationReference, or raises an exception if + none is found + """ registry_proto = self._get_registry_proto( project=project, allow_cache=allow_cache ) diff --git a/sdk/python/tests/integration/feature_repos/universal/feature_views.py b/sdk/python/tests/integration/feature_repos/universal/feature_views.py index 3fee0b7001..6f5acc6881 100644 --- a/sdk/python/tests/integration/feature_repos/universal/feature_views.py +++ b/sdk/python/tests/integration/feature_repos/universal/feature_views.py @@ -88,6 +88,7 @@ def conv_rate_plus_100_feature_view( schema=[] if infer_features else _features, sources=sources, udf=conv_rate_plus_100, + udf_string="raw udf source", ) @@ -125,6 +126,7 @@ def similarity_feature_view( sources=sources, schema=[] if infer_features else _fields, udf=similarity, + udf_string="similarity raw udf", ) diff --git a/sdk/python/tests/unit/diff/test_registry_diff.py b/sdk/python/tests/unit/diff/test_registry_diff.py index ae10c834c8..899142611d 100644 --- a/sdk/python/tests/unit/diff/test_registry_diff.py +++ b/sdk/python/tests/unit/diff/test_registry_diff.py @@ -1,9 +1,14 @@ +import pandas as pd + +from feast import Field from feast.diff.registry_diff import ( diff_registry_objects, tag_objects_for_keep_delete_update_add, ) from feast.entity import Entity from feast.feature_view import FeatureView +from feast.on_demand_feature_view import on_demand_feature_view +from feast.types import String from tests.utils.data_source_utils import prep_file_source @@ -87,3 +92,49 @@ def test_diff_registry_objects_feature_views(simple_dataset_1): assert feast_object_diffs.feast_object_property_diffs[0].val_declared == { "when": "after" } + + +def test_diff_odfv(simple_dataset_1): + with prep_file_source(df=simple_dataset_1, timestamp_field="ts_1") as file_source: + entity = Entity(name="id", join_keys=["id"]) + fv = FeatureView( + name="fv2", entities=[entity], source=file_source, tags={"when": "before"}, + ) + + @on_demand_feature_view( + sources=[fv], schema=[Field(name="first_char", dtype=String)], + ) + def pre_changed(inputs: pd.DataFrame) -> pd.DataFrame: + df = pd.DataFrame() + df["first_char"] = inputs["string_col"].str[:1].astype("string") + return df + + @on_demand_feature_view( + sources=[fv], schema=[Field(name="first_char", dtype=String)], + ) + def post_changed(inputs: pd.DataFrame) -> pd.DataFrame: + df = pd.DataFrame() + df["first_char"] = inputs["string_col"].str[:1].astype("string") + "hi" + return df + + feast_object_diffs = diff_registry_objects( + pre_changed, pre_changed, "on demand feature view" + ) + assert len(feast_object_diffs.feast_object_property_diffs) == 0 + + feast_object_diffs = diff_registry_objects( + pre_changed, post_changed, "on demand feature view" + ) + + # Note that user_defined_function.body is excluded because it always changes (dill is non-deterministic), even + # if no code is changed + assert len(feast_object_diffs.feast_object_property_diffs) == 3 + assert feast_object_diffs.feast_object_property_diffs[0].property_name == "name" + assert ( + feast_object_diffs.feast_object_property_diffs[1].property_name + == "user_defined_function.name" + ) + assert ( + feast_object_diffs.feast_object_property_diffs[2].property_name + == "user_defined_function.body_text" + ) diff --git a/sdk/python/tests/unit/test_on_demand_feature_view.py b/sdk/python/tests/unit/test_on_demand_feature_view.py index 33435b8557..f8364a036a 100644 --- a/sdk/python/tests/unit/test_on_demand_feature_view.py +++ b/sdk/python/tests/unit/test_on_demand_feature_view.py @@ -57,6 +57,7 @@ def test_hash(): Field(name="output2", dtype=Float32), ], udf=udf1, + udf_string="udf1 source code", ) on_demand_feature_view_2 = OnDemandFeatureView( name="my-on-demand-feature-view", @@ -66,6 +67,7 @@ def test_hash(): Field(name="output2", dtype=Float32), ], udf=udf1, + udf_string="udf1 source code", ) on_demand_feature_view_3 = OnDemandFeatureView( name="my-on-demand-feature-view", @@ -75,6 +77,7 @@ def test_hash(): Field(name="output2", dtype=Float32), ], udf=udf2, + udf_string="udf2 source code", ) on_demand_feature_view_4 = OnDemandFeatureView( name="my-on-demand-feature-view", @@ -84,6 +87,7 @@ def test_hash(): Field(name="output2", dtype=Float32), ], udf=udf2, + udf_string="udf2 source code", description="test", ) diff --git a/ui/package.json b/ui/package.json index 3793fb42c0..2278f9043c 100644 --- a/ui/package.json +++ b/ui/package.json @@ -1,6 +1,6 @@ { "name": "@feast-dev/feast-ui", - "version": "0.22.3", + "version": "0.22.4", "private": false, "files": [ "dist"