diff --git a/.github/fork_workflows/fork_pr_integration_tests_aws.yml b/.github/fork_workflows/fork_pr_integration_tests_aws.yml index ef53fc1c7d7..e4362af7d3e 100644 --- a/.github/fork_workflows/fork_pr_integration_tests_aws.yml +++ b/.github/fork_workflows/fork_pr_integration_tests_aws.yml @@ -139,7 +139,9 @@ jobs: sudo apt install -y -V libarrow-dev - name: Install apache-arrow on macos if: matrix.os == 'macOS-latest' - run: brew install apache-arrow + run: | + brew install apache-arrow + brew install pkg-config - name: Install dependencies run: make install-python-ci-dependencies - name: Setup Redis Cluster diff --git a/.github/fork_workflows/fork_pr_integration_tests_gcp.yml b/.github/fork_workflows/fork_pr_integration_tests_gcp.yml index 4cb22f33fba..d77c1052e7c 100644 --- a/.github/fork_workflows/fork_pr_integration_tests_gcp.yml +++ b/.github/fork_workflows/fork_pr_integration_tests_gcp.yml @@ -83,7 +83,9 @@ jobs: sudo apt install -y -V libarrow-dev - name: Install apache-arrow on macos if: matrix.os == 'macOS-latest' - run: brew install apache-arrow + run: | + brew install apache-arrow + brew install pkg-config - name: Install dependencies run: make install-python-ci-dependencies - name: Setup Redis Cluster diff --git a/.github/fork_workflows/fork_pr_integration_tests_snowflake.yml b/.github/fork_workflows/fork_pr_integration_tests_snowflake.yml index 8832c75fcaf..56b4c268b70 100644 --- a/.github/fork_workflows/fork_pr_integration_tests_snowflake.yml +++ b/.github/fork_workflows/fork_pr_integration_tests_snowflake.yml @@ -74,7 +74,9 @@ jobs: sudo apt install -y -V libarrow-dev - name: Install apache-arrow on macos if: matrix.os == 'macOS-latest' - run: brew install apache-arrow + run: | + brew install apache-arrow + brew install pkg-config - name: Install dependencies run: make install-python-ci-dependencies - name: Setup Redis Cluster diff --git a/.github/workflows/build_wheels.yml b/.github/workflows/build_wheels.yml index aef486d601a..20a9f57a2a4 100644 --- a/.github/workflows/build_wheels.yml +++ b/.github/workflows/build_wheels.yml @@ -53,12 +53,14 @@ jobs: build-python-wheel: name: Build wheels on ${{ matrix.os }} - runs-on: ${{ matrix.os }} - strategy: - matrix: - os: [ ubuntu-latest, macos-10.15 ] + runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 + - name: Setup Python + uses: actions/setup-python@v2 + with: + python-version: "3.8" + architecture: x64 - name: Setup Node uses: actions/setup-node@v2 with: @@ -67,50 +69,9 @@ jobs: - name: Build UI run: make build-ui - name: Build wheels - uses: pypa/cibuildwheel@v2.7.0 - env: - CIBW_BUILD: "cp3*_x86_64" - CIBW_SKIP: "cp36-* cp37-* *-musllinux_x86_64 cp310-macosx_x86_64" - CIBW_ARCHS: "native" - CIBW_ENVIRONMENT: > - COMPILE_GO=True PATH=$PATH:/usr/local/go/bin - CIBW_BEFORE_ALL_LINUX: | - curl -o go.tar.gz https://dl.google.com/go/go1.18.2.linux-amd64.tar.gz - tar -C /usr/local -xzf go.tar.gz - go version - yum -y update && - yum install -y epel-release || yum install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-$(cut -d: -f5 /etc/system-release-cpe | cut -d. -f1).noarch.rpm && - yum install -y https://apache.jfrog.io/artifactory/arrow/centos/$(cut -d: -f5 /etc/system-release-cpe | cut -d. -f1)/apache-arrow-release-latest.rpm && - yum install -y --enablerepo=epel arrow-devel # For C++ - CIBW_BEFORE_ALL_MACOS: | - brew install apache-arrow - curl -o python.pkg https://www.python.org/ftp/python/3.9.12/python-3.9.12-macosx10.9.pkg - sudo installer -pkg python.pkg -target / - # There's a `git restore` in here because `make install-go-ci-dependencies` is actually messing up go.mod & go.sum. - CIBW_BEFORE_BUILD: | - make install-protoc-dependencies - make install-go-proto-dependencies - make install-go-ci-dependencies - git status - git restore go.mod go.sum - git restore sdk/python/feast/ui/yarn.lock - CIBW_BEFORE_TEST: "cd {project} && git status" - # py3.10 on MacOS does not work with Go so we have to install separately. Issue is tracked here: https://github.com/feast-dev/feast/issues/2881. - - name: Build py310 specific wheels for macos - if: matrix.os == 'macos-10.15' - uses: pypa/cibuildwheel@v2.7.0 - env: - CIBW_BUILD: "cp310-macosx_x86_64" - CIBW_ARCHS: "native" - # Need this environment variable because of this issue: https://github.com/pypa/cibuildwheel/issues/952. - CIBW_ENVIRONMENT: > - _PYTHON_HOST_PLATFORM=macosx-10.15-x86_64 - # There's a `git restore` in here because remnant go.mod, go.sum changes from the build mess up the wheel naming. - CIBW_BEFORE_BUILD: | - git status - git restore go.mod go.sum - git restore sdk/python/feast/ui/yarn.lock - brew install apache-arrow + run: | + python -m pip install build + python -m build --wheel --outdir wheelhouse/ - uses: actions/upload-artifact@v2 with: name: wheels @@ -137,8 +98,6 @@ jobs: run: | pip install -U pip setuptools wheel twine make install-protoc-dependencies - make install-go-proto-dependencies - make install-go-ci-dependencies make build-ui git status git restore go.mod go.sum @@ -201,9 +160,6 @@ jobs: with: python-version: ${{ matrix.python-version }} architecture: x64 - - uses: actions/setup-go@v3 - with: - go-version: '>=1.17.0' - uses: actions/download-artifact@v2 with: name: wheels @@ -215,34 +171,10 @@ jobs: cd dist/ pip install wheel for f in *.whl; do pip install $f || true; done - - name: Install apache-arrow on ubuntu - if: ${{ matrix.from-source && matrix.os == 'ubuntu-latest' }} - run: | - sudo apt update - sudo apt install -y -V ca-certificates lsb-release wget - wget https://apache.jfrog.io/artifactory/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb - sudo apt install -y -V ./apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb - sudo apt update - sudo apt install -y -V libarrow-dev - - name: Install apache-arrow on macos - if: ${{ matrix.from-source && matrix.os == 'macos-10.15' && matrix.python-version != '3.10' }} - run: brew install apache-arrow - - name: Install dist with go - if: ${{ matrix.from-source && (matrix.python-version != '3.10' || matrix.os == 'ubuntu-latest')}} - env: - COMPILE_GO: "True" - run: | - pip install 'grpcio-tools==1.47.0' 'pybindgen==0.22.0' - go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.26.0 - go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@v1.1.0 - pip install dist/*tar.gz - # py3.10 on MacOS does not work with Go so we have to install separately. Issue is tracked here: https://github.com/feast-dev/feast/issues/2881 - - name: Install dist w/o go - if: ${{ matrix.from-source && matrix.python-version == '3.10' && matrix.os == 'macos-10.15'}} + - name: Install sdist + # try to install the sdist + if: ${{ matrix.from-source }} run: pip install dist/*tar.gz - - name: Install OS X dependencies - if: matrix.os == 'macos-10.15' - run: brew install coreutils # Validate that the feast version installed is not development and is the correct version of the tag we ran it off of. - name: Validate Feast Version run: | @@ -265,13 +197,3 @@ jobs: echo "$TEST_SCRIPT" > run-and-wait.sh bash run-and-wait.sh feast serve bash run-and-wait.sh feast ui - # We disable this test for the Python 3.10 binary since it does not include Go. - - name: Smoke test with go - if: matrix.python-version != '3.10' || matrix.os == 'ubuntu-latest' - run: | - cd test_repo/feature_repo - feast apply - echo "$TEST_SCRIPT" > run-and-wait.sh - pip install cffi - printf "\ngo_feature_serving: True" >> feature_store.yaml - bash run-and-wait.sh feast serve \ No newline at end of file diff --git a/.github/workflows/java_master_only.yml b/.github/workflows/java_master_only.yml index 356208332fa..f4c280d682d 100644 --- a/.github/workflows/java_master_only.yml +++ b/.github/workflows/java_master_only.yml @@ -112,14 +112,9 @@ jobs: with: python-version: 3.8 architecture: x64 - - name: Setup Go - id: setup-go - uses: actions/setup-go@v2 - with: - go-version: 1.18.0 - name: Upgrade pip version run: | - pip install --upgrade "pip>=21.3.1,<22.1" + pip install --upgrade pip - name: Get pip cache dir id: pip-cache run: | @@ -136,14 +131,7 @@ jobs: ${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-pip- - name: Install pip-tools run: pip install pip-tools - - name: Install apache-arrow on ubuntu - run: | - sudo apt update - sudo apt install -y -V ca-certificates lsb-release wget - wget https://apache.jfrog.io/artifactory/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb - sudo apt install -y -V ./apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb - sudo apt update - sudo apt install -y -V libarrow-dev + - name: Install Python dependencies run: make install-python-ci-dependencies - uses: actions/cache@v2 diff --git a/.github/workflows/java_pr.yml b/.github/workflows/java_pr.yml index ea0ec0d9a9b..ad8700c0722 100644 --- a/.github/workflows/java_pr.yml +++ b/.github/workflows/java_pr.yml @@ -148,14 +148,9 @@ jobs: with: python-version: 3.8 architecture: x64 - - name: Setup Go - id: setup-go - uses: actions/setup-go@v2 - with: - go-version: 1.18.0 - name: Upgrade pip version run: | - pip install --upgrade "pip>=21.3.1,<22.1" + pip install --upgrade pip - name: Get pip cache dir id: pip-cache run: | @@ -172,14 +167,6 @@ jobs: ${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-pip- - name: Install pip-tools run: pip install pip-tools - - name: Install apache-arrow on ubuntu - run: | - sudo apt update - sudo apt install -y -V ca-certificates lsb-release wget - wget https://apache.jfrog.io/artifactory/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb - sudo apt install -y -V ./apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb - sudo apt update - sudo apt install -y -V libarrow-dev - name: Install Python dependencies run: make install-python-ci-dependencies - name: Run integration tests diff --git a/.github/workflows/linter.yml b/.github/workflows/linter.yml index ba475e2585b..31657d3dfcb 100644 --- a/.github/workflows/linter.yml +++ b/.github/workflows/linter.yml @@ -15,14 +15,9 @@ jobs: with: python-version: "3.8" architecture: x64 - - name: Setup Go - id: setup-go - uses: actions/setup-go@v2 - with: - go-version: 1.18.0 - name: Upgrade pip version run: | - pip install --upgrade "pip>=21.3.1,<22.1" + pip install --upgrade pip - name: Get pip cache dir id: pip-cache run: | @@ -39,45 +34,8 @@ jobs: ${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-pip- - name: Install pip-tools run: pip install pip-tools - - name: Install apache-arrow on ubuntu - run: | - sudo apt update - sudo apt install -y -V ca-certificates lsb-release wget - wget https://apache.jfrog.io/artifactory/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb - sudo apt install -y -V ./apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb - sudo apt update - sudo apt install -y -V libarrow-dev - name: Install dependencies run: | - make compile-protos-go make install-python-ci-dependencies - name: Lint python run: make lint-python - - lint-go: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - name: Setup Go - id: setup-go - uses: actions/setup-go@v2 - with: - go-version: 1.18.0 - - name: Setup Python - id: setup-python - uses: actions/setup-python@v2 - with: - python-version: "3.8" - - name: Upgrade pip version - run: | - pip install --upgrade "pip>=21.3.1,<22.1" - - name: Install apache-arrow on ubuntu - run: | - sudo apt update - sudo apt install -y -V ca-certificates lsb-release wget - wget https://apache.jfrog.io/artifactory/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb - sudo apt install -y -V ./apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb - sudo apt update - sudo apt install -y -V libarrow-dev - - name: Lint go - run: make lint-go \ No newline at end of file diff --git a/.github/workflows/master_only.yml b/.github/workflows/master_only.yml index 32c967c6eb3..49d6fa4f856 100644 --- a/.github/workflows/master_only.yml +++ b/.github/workflows/master_only.yml @@ -58,7 +58,7 @@ jobs: docker push $ECR_REGISTRY/$ECR_REPOSITORY:${{ steps.image-tag.outputs.DOCKER_IMAGE_TAG }} outputs: DOCKER_IMAGE_TAG: ${{ steps.image-tag.outputs.DOCKER_IMAGE_TAG }} - integration-test-python-and-go: + integration-test-python: if: github.repository == 'feast-dev/feast' needs: build-lambda-docker-image runs-on: ${{ matrix.os }} @@ -66,7 +66,6 @@ jobs: fail-fast: false matrix: python-version: [ "3.8", "3.9", "3.10" ] - go-version: [ 1.17.0 ] os: [ ubuntu-latest ] env: OS: ${{ matrix.os }} @@ -89,11 +88,6 @@ jobs: with: python-version: ${{ matrix.python-version }} architecture: x64 - - name: Setup Go - id: setup-go - uses: actions/setup-go@v2 - with: - go-version: ${{ matrix.go-version }} - name: Authenticate to Google Cloud uses: 'google-github-actions/auth@v1' with: @@ -114,7 +108,7 @@ jobs: run: aws sts get-caller-identity - name: Upgrade pip version run: | - pip install --upgrade "pip>=21.3.1,<22.1" + pip install --upgrade pip - name: Get pip cache dir id: pip-cache run: | @@ -131,18 +125,6 @@ jobs: ${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-pip- - name: Install pip-tools run: pip install pip-tools - - name: Install apache-arrow on ubuntu - if: matrix.os == 'ubuntu-latest' - run: | - sudo apt update - sudo apt install -y -V ca-certificates lsb-release wget - wget https://apache.jfrog.io/artifactory/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb - sudo apt install -y -V ./apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb - sudo apt update - sudo apt install -y -V libarrow-dev - - name: Install apache-arrow on macos - if: matrix.os == 'macOS-latest' - run: brew install apache-arrow - name: Install dependencies run: make install-python-ci-dependencies - name: Setup Redis Cluster diff --git a/.github/workflows/nightly-ci.yml b/.github/workflows/nightly-ci.yml index 2b7eac93018..7a22435fda9 100644 --- a/.github/workflows/nightly-ci.yml +++ b/.github/workflows/nightly-ci.yml @@ -164,7 +164,7 @@ jobs: run: aws sts get-caller-identity - name: Upgrade pip version run: | - pip install --upgrade "pip>=21.3.1,<22.1" + pip install --upgrade pip - name: Get pip cache dir id: pip-cache run: | diff --git a/.github/workflows/pr_integration_tests.yml b/.github/workflows/pr_integration_tests.yml index 9aba7e3bfdc..1fd49f08aff 100644 --- a/.github/workflows/pr_integration_tests.yml +++ b/.github/workflows/pr_integration_tests.yml @@ -115,11 +115,6 @@ jobs: with: python-version: ${{ matrix.python-version }} architecture: x64 - - name: Setup Go - id: setup-go - uses: actions/setup-go@v2 - with: - go-version: 1.18.0 - name: Authenticate to Google Cloud uses: 'google-github-actions/auth@v1' with: @@ -140,7 +135,7 @@ jobs: run: aws sts get-caller-identity - name: Upgrade pip version run: | - pip install --upgrade "pip>=21.3.1,<22.1" + pip install --upgrade pip - name: Get pip cache dir id: pip-cache run: | @@ -157,18 +152,6 @@ jobs: ${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-pip- - name: Install pip-tools run: pip install pip-tools - - name: Install apache-arrow on ubuntu - if: matrix.os == 'ubuntu-latest' - run: | - sudo apt update - sudo apt install -y -V ca-certificates lsb-release wget - wget https://apache.jfrog.io/artifactory/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb - sudo apt install -y -V ./apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb - sudo apt update - sudo apt install -y -V libarrow-dev - - name: Install apache-arrow on macos - if: matrix.os == 'macOS-latest' - run: brew install apache-arrow - name: Install dependencies run: make install-python-ci-dependencies - name: Setup Redis Cluster diff --git a/.github/workflows/pr_local_integration_tests.yml b/.github/workflows/pr_local_integration_tests.yml index 4705771911c..41df3aefff2 100644 --- a/.github/workflows/pr_local_integration_tests.yml +++ b/.github/workflows/pr_local_integration_tests.yml @@ -40,7 +40,7 @@ jobs: architecture: x64 - name: Upgrade pip version run: | - pip install --upgrade "pip>=21.3.1,<22.1" + pip install --upgrade pip - name: Get pip cache dir id: pip-cache run: | @@ -57,15 +57,6 @@ jobs: ${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-pip- - name: Install pip-tools run: pip install pip-tools - - name: Install apache-arrow on ubuntu - if: matrix.os == 'ubuntu-latest' - run: | - sudo apt update - sudo apt install -y -V ca-certificates lsb-release wget - wget https://apache.jfrog.io/artifactory/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb - sudo apt install -y -V ./apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb - sudo apt update - sudo apt install -y -V libarrow-dev - name: Install dependencies run: make install-python-ci-dependencies - name: Test local integration tests diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 46d10adb0f2..f18ee10cb1a 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -13,6 +13,11 @@ on: required: true default: "" type: string + publish_ui: + description: 'Publish to NPM?' + required: true + default: true + type: boolean jobs: @@ -110,7 +115,7 @@ jobs: working-directory: ./ui run: yarn build:lib - name: Publish UI package - if: github.event.inputs.dry_run == 'false' + if: github.event.inputs.publish_ui == 'true' working-directory: ./ui run: npm publish env: diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml index ea8bef2e2fa..e0572015726 100644 --- a/.github/workflows/unit_tests.yml +++ b/.github/workflows/unit_tests.yml @@ -25,11 +25,6 @@ jobs: with: python-version: ${{ matrix.python-version }} architecture: x64 - - name: Setup Go - id: setup-go - uses: actions/setup-go@v2 - with: - go-version: 1.18.0 - name: Install mysql on macOS if: startsWith(matrix.os, 'macOS') run: | @@ -37,7 +32,7 @@ jobs: PATH=$PATH:/usr/local/mysql/bin - name: Upgrade pip version run: | - pip install --upgrade "pip>=22.1,<23" + pip install --upgrade pip - name: Get pip cache dir id: pip-cache run: | @@ -54,50 +49,11 @@ jobs: ${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-pip- - name: Install pip-tools run: pip install pip-tools - - name: Install apache-arrow on ubuntu - if: matrix.os == 'ubuntu-latest' - run: | - sudo apt update - sudo apt install -y -V ca-certificates lsb-release wget - wget https://apache.jfrog.io/artifactory/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb - sudo apt install -y -V ./apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb - sudo apt update - sudo apt install -y -V libarrow-dev - - name: Install apache-arrow on macos - if: matrix.os == 'macOS-latest' - run: brew install apache-arrow - name: Install dependencies run: make install-python-ci-dependencies - name: Test Python run: pytest -n 8 --cov=./ --cov-report=xml --color=yes sdk/python/tests - unit-test-go: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - name: Setup Python - id: setup-python - uses: actions/setup-python@v2 - with: - python-version: "3.8" - - name: Upgrade pip version - run: | - pip install --upgrade "pip>=22.1,<23" - - name: Setup Go - id: setup-go - uses: actions/setup-go@v2 - with: - go-version: 1.18.0 - - name: Install apache-arrow on ubuntu - run: | - sudo apt update - sudo apt install -y -V ca-certificates lsb-release wget - wget https://apache.jfrog.io/artifactory/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb - sudo apt install -y -V ./apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb - sudo apt update - sudo apt install -y -V libarrow-dev - - name: Test - run: make test-go unit-test-ui: runs-on: ubuntu-latest diff --git a/CHANGELOG.md b/CHANGELOG.md index 6afa5f251bc..cc3ebe2457b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,34 @@ # Changelog +# [0.30.0](https://github.com/feast-dev/feast/compare/v0.29.0...v0.30.0) (2023-03-24) + + +### Bug Fixes + +* Add description attribute to the Field.from_proto method ([#3469](https://github.com/feast-dev/feast/issues/3469)) ([473f8d9](https://github.com/feast-dev/feast/commit/473f8d93fa8d565e53fc59b3c444a1b8ed061c51)) +* Add filesystem kwargs when read prev_table on FileRetrievalJob (… ([#3491](https://github.com/feast-dev/feast/issues/3491)) ([dca4745](https://github.com/feast-dev/feast/commit/dca47458c81c211fee485a502feebe28426848f0)), closes [#3490](https://github.com/feast-dev/feast/issues/3490) +* Bytewax image pull secret config ([#3547](https://github.com/feast-dev/feast/issues/3547)) ([d2d13b1](https://github.com/feast-dev/feast/commit/d2d13b1762ba67c386fcd48351f3872b92671450)) +* Clean up Rockset Online Store for use ([#3549](https://github.com/feast-dev/feast/issues/3549)) ([a76c6d0](https://github.com/feast-dev/feast/commit/a76c6d0f9e3aa28d03d430f6f85ce4e91870c844)) +* Feature view `entities` from_proto type ([#3524](https://github.com/feast-dev/feast/issues/3524)) ([57bbb61](https://github.com/feast-dev/feast/commit/57bbb61829fffe08ff3e09aceb1e82ea862b55a2)) +* Fix missing requests requirement after GCP requirement removed. Make BigQuerySource not require gcp extra ([2c85421](https://github.com/feast-dev/feast/commit/2c85421fef02dc85854960b4616f00e613934c01)) +* Fix SQL Registry cache miss ([#3482](https://github.com/feast-dev/feast/issues/3482)) ([3249b97](https://github.com/feast-dev/feast/commit/3249b97b5471322e068f81fc65d9072f2eed1ba3)) +* Fixed path inside quickstart notebook ([#3456](https://github.com/feast-dev/feast/issues/3456)) ([66edc32](https://github.com/feast-dev/feast/commit/66edc32b97f51049dd0fc97765c714c0c6e374e5)) +* Improve BQ point-in-time joining scalability ([#3429](https://github.com/feast-dev/feast/issues/3429)) ([ff66784](https://github.com/feast-dev/feast/commit/ff66784ddb1e63e68b88c47132996eccb13891ae)) +* Pin typeguard to 2.13.3 which is what we are currently using. ([#3542](https://github.com/feast-dev/feast/issues/3542)) ([61f6fb0](https://github.com/feast-dev/feast/commit/61f6fb03b27cfa92672beb054ee8aba41145645c)) +* Protobuf lower bound to 3.20 to alert that Feast is incompatible with tensorflow ([#3476](https://github.com/feast-dev/feast/issues/3476)) ([9ca59e3](https://github.com/feast-dev/feast/commit/9ca59e32d2f1c01cec3b5afaff6802e6036dcad8)) +* Spark kafka processor sorting ([#3479](https://github.com/feast-dev/feast/issues/3479)) ([f2cbf43](https://github.com/feast-dev/feast/commit/f2cbf43d4be6829ce3affb72b8a5416d8e084ba9)) +* UI working behind base url ([#3514](https://github.com/feast-dev/feast/issues/3514)) ([9a3fd98](https://github.com/feast-dev/feast/commit/9a3fd98468edc6e5fd185d05b8dd1cabac73845c)) +* Update go dependencies ([#3512](https://github.com/feast-dev/feast/issues/3512)) ([bada97c](https://github.com/feast-dev/feast/commit/bada97c9dadf05bb369e6f820290b0411bc7412d)) + + +### Features + +* Add Rockset as an OnlineStore ([#3405](https://github.com/feast-dev/feast/issues/3405)) ([fd91cda](https://github.com/feast-dev/feast/commit/fd91cda1af47bde948ef6a2f4688785358ae38ec)) +* Add Snowflake Registry ([#3363](https://github.com/feast-dev/feast/issues/3363)) ([ec1e61d](https://github.com/feast-dev/feast/commit/ec1e61d11c24247996b0f8508f44ec7f31b9145c)) +* Added SnowflakeConnection caching ([#3531](https://github.com/feast-dev/feast/issues/3531)) ([f9f8df2](https://github.com/feast-dev/feast/commit/f9f8df2802b980a3ee161222b174d397764f755a)) +* Adding query timeout to `to_df` and `to_arrow` retrieval methods ([#3505](https://github.com/feast-dev/feast/issues/3505)) ([bab6644](https://github.com/feast-dev/feast/commit/bab6644308efb1aa8bce52f2f47df9de87492fc9)) +* adds k8s config options to Bytewax materialization engine ([#3518](https://github.com/feast-dev/feast/issues/3518)) ([1883f55](https://github.com/feast-dev/feast/commit/1883f55729ffddaef8d730ba3ffb76eb50cc7201)) + # [0.29.0](https://github.com/feast-dev/feast/compare/v0.28.0...v0.29.0) (2023-01-31) diff --git a/Makefile b/Makefile index 8b7eb39c604..e1fd342881d 100644 --- a/Makefile +++ b/Makefile @@ -24,19 +24,19 @@ TRINO_VERSION ?= 376 # General -format: format-python format-java format-go +format: format-python format-java -lint: lint-python lint-java lint-go +lint: lint-python lint-java -test: test-python test-java test-go +test: test-python test-java -protos: compile-protos-go compile-protos-python compile-protos-docs +protos: compile-protos-python compile-protos-docs build: protos build-java build-docker # Python SDK -install-python-ci-dependencies: install-go-proto-dependencies install-go-ci-dependencies +install-python-ci-dependencies: python -m piptools sync sdk/python/requirements/py$(PYTHON)-ci-requirements.txt COMPILE_GO=true python setup.py develop @@ -281,9 +281,6 @@ test-python-universal-cassandra-no-cloud-providers: test-python-universal: FEAST_USAGE=False IS_TEST=True python -m pytest -n 8 --integration sdk/python/tests -test-python-go-server: compile-go-lib - FEAST_USAGE=False IS_TEST=True pytest --integration --goserver sdk/python/tests - format-python: # Sort cd ${ROOT_DIR}/sdk/python; python -m isort feast/ tests/ @@ -334,48 +331,15 @@ test-trino-plugin-locally: kill-trino-locally: cd ${ROOT_DIR}; docker stop trino -# Go SDK & embedded - -install-go-proto-dependencies: - go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.26.0 - go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@v1.1.0 - -install-go-ci-dependencies: - # TODO: currently gopy installation doesn't work w/o explicit go get in the next line - # TODO: there should be a better way to install gopy - go get github.com/go-python/gopy@v0.4.4 - go install golang.org/x/tools/cmd/goimports - # The `go get` command on the previous lines download the lib along with replacing the dep to `feast-dev/gopy` - # but the following command is needed to install it for some reason. - go install github.com/go-python/gopy - python -m pip install pybindgen==0.22.0 protobuf==3.20.1 - install-protoc-dependencies: pip install --ignore-installed protobuf grpcio-tools==1.47.0 mypy-protobuf==3.1.0 -compile-protos-go: install-go-proto-dependencies install-protoc-dependencies - python setup.py build_go_protos - -compile-go-lib: install-go-proto-dependencies install-go-ci-dependencies - CGO_LDFLAGS_ALLOW=".*" COMPILE_GO=True python setup.py build_ext --inplace - install-feast-ci-locally: pip install -e ".[ci]" -# Needs feast package to setup the feature store -# CGO flag is due to this issue: https://github.com/golang/go/wiki/InvalidFlag -test-go: compile-protos-go compile-protos-python compile-go-lib install-feast-ci-locally - CGO_LDFLAGS_ALLOW=".*" go test -tags cgo,ccalloc ./... - -format-go: - gofmt -s -w go/ - -lint-go: compile-protos-go compile-go-lib - go vet -tags cgo,ccalloc ./go/internal/feast ./go/embedded - # Docker -build-docker: build-feature-server-python-docker build-feature-server-python-aws-docker build-feature-transformation-server-docker build-feature-server-java-docker +build-docker: build-feature-server-python-aws-docker build-feature-transformation-server-docker build-feature-server-java-docker push-ci-docker: docker push $(REGISTRY)/feast-ci:$(VERSION) diff --git a/OWNERS b/OWNERS index c34fd6baafe..d726837e570 100644 --- a/OWNERS +++ b/OWNERS @@ -6,10 +6,10 @@ approvers: - achals - adchia - felixwang9817 - - mavysavydav - MattDelac - kevjumba - chhabrakadabra + - gbmarc1 - sfc-gh-madkins - zhilingc - whoahbot @@ -23,10 +23,10 @@ reviewers: - tedhtchang - adchia - felixwang9817 - - mavysavydav - MattDelac - kevjumba - chhabrakadabra + - gbmarc1 - sfc-gh-madkins - zhilingc - whoahbot diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index 1bab8a61ef2..cdca6f37844 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -96,6 +96,7 @@ * [PostgreSQL (contrib)](reference/online-stores/postgres.md) * [Cassandra + Astra DB (contrib)](reference/online-stores/cassandra.md) * [MySQL (contrib)](reference/online-stores/mysql.md) + * [Rockset (contrib)](reference/online-stores/rockset.md) * [Providers](reference/providers/README.md) * [Local](reference/providers/local.md) * [Google Cloud Platform](reference/providers/google-cloud-platform.md) diff --git a/docs/reference/batch-materialization/bytewax.md b/docs/reference/batch-materialization/bytewax.md index bd98a4dc6e3..2e28937f50e 100644 --- a/docs/reference/batch-materialization/bytewax.md +++ b/docs/reference/batch-materialization/bytewax.md @@ -23,6 +23,8 @@ To configure secrets, first create them using `kubectl`: kubectl create secret generic -n bytewax aws-credentials --from-literal=aws-access-key-id='' --from-literal=aws-secret-access-key='' ``` +If your Docker registry requires authentication to store/pull containers, you can use this same approach to store your repository access credential and use when running the materialization engine. + Then configure them in the batch_engine section of `feature_store.yaml`: ``` yaml @@ -40,6 +42,8 @@ batch_engine: secretKeyRef: name: aws-credentials key: aws-secret-access-key + image_pull_secrets: + - docker-repository-access-secret ``` #### Configuration @@ -51,9 +55,28 @@ batch_engine: type: bytewax namespace: bytewax image: bytewax/bytewax-feast:latest + image_pull_secrets: + - my_container_secret + service_account_name: my-k8s-service-account + annotations: + # example annotation you might include if running on AWS EKS + iam.amazonaws.com/role: arn:aws:iam:::role/MyBytewaxPlatformRole + resources: + limits: + cpu: 1000m + memory: 2048Mi + requests: + cpu: 500m + memory: 1024Mi ``` -The `namespace` configuration directive specifies which Kubernetes [namespace](https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/) jobs, services and configuration maps will be created in. +**Notes:** + +* The `namespace` configuration directive specifies which Kubernetes [namespace](https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/) jobs, services and configuration maps will be created in. +* The `image_pull_secrets` configuration directive specifies the pre-configured secret to use when pulling the image container from your registry +* The `service_account_name` specifies which Kubernetes service account to run the job under +* `annotations` allows you to include additional Kubernetes annotations to the job. This is particularly useful for IAM roles which grant the running pod access to cloud platform resources (for example). +* The `resources` configuration directive sets the standard Kubernetes [resource requests](https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/) for the job containers to utilise when materializing data. #### Building a custom Bytewax Docker image diff --git a/docs/reference/online-stores/README.md b/docs/reference/online-stores/README.md index e46fc28d162..64b707a7e5d 100644 --- a/docs/reference/online-stores/README.md +++ b/docs/reference/online-stores/README.md @@ -42,3 +42,7 @@ Please see [Online Store](../../getting-started/architecture-and-components/onli [mysql.md](mysql.md) {% endcontent-ref %} +{% content-ref url="rockset.md" %} +[rockset.md](rockset.md) +{% endcontent-ref %} + diff --git a/docs/reference/online-stores/rockset.md b/docs/reference/online-stores/rockset.md new file mode 100644 index 00000000000..523bf9a9a12 --- /dev/null +++ b/docs/reference/online-stores/rockset.md @@ -0,0 +1,84 @@ +# Rockset (contrib) + +## Description + +In Alpha Development. + +The [Rockset](https://rockset.com/demo-signup/) online store provides support for materializing feature values within a Rockset collection in order to serve features in real-time. + +* Each document is uniquely identified by its '_id' value. Repeated inserts into the same document '_id' will result in an upsert. + +Rockset indexes all columns allowing for quick per feature look up and also allows for a dynamic typed schema that can change based on any new requirements. API Keys can be found in the Rockset console. +You can also find host urls on the same tab by clicking "View Region Endpoint Urls". + +Data Model Used Per Doc + +``` +{ + "_id": (STRING) Unique Identifier for the feature document. + : (STRING) Feature Values Mapped by Feature Name. Feature + values stored as a serialized hex string. + .... + "event_ts": (STRING) ISO Stringified Timestamp. + "created_ts": (STRING) ISO Stringified Timestamp. +} +``` + + +## Example + +```yaml +project: my_feature_app +registry: data/registry.db +provider: local +online_stores + ## Basic Configs ## + + # If apikey or host is left blank the driver will try to pull + # these values from environment variables ROCKSET_APIKEY and + # ROCKSET_APISERVER respectively. + type: rockset + apikey: + host: + + ## Advanced Configs ## + + # Batch size of records that will be turned per page when + # paginating a batched read. + # + # read_pagination_batch_size: 100 + + # The amount of time, in seconds, we will wait for the + # collection to become visible to the API. + # + # collection_created_timeout_secs: 60 + + # The amount of time, in seconds, we will wait for the + # collection to enter READY state. + # + # collection_ready_timeout_secs: 1800 + + # Whether to wait for all writes to be flushed from log + # and queryable before returning write as completed. If + # False, documents that are written may not be seen + # immediately in subsequent reads. + # + # fence_all_writes: True + + # The amount of time we will wait, in seconds, for the + # write fence to be passed + # + # fence_timeout_secs: 600 + + # Initial backoff, in seconds, we will wait between + # requests when polling for a response. + # + # initial_request_backoff_secs: 2 + + # Initial backoff, in seconds, we will wait between + # requests when polling for a response. + # max_request_backoff_secs: 30 + + # The max amount of times we will retry a failed request. + # max_request_attempts: 10000 +``` diff --git a/docs/tutorials/azure/notebooks/src/score.py b/docs/tutorials/azure/notebooks/src/score.py index 93b248240d3..7def7d2d2ad 100644 --- a/docs/tutorials/azure/notebooks/src/score.py +++ b/docs/tutorials/azure/notebooks/src/score.py @@ -6,9 +6,11 @@ import json import joblib from feast import FeatureStore, RepoConfig -from feast.infra.registry.registry import RegistryConfig +from feast.repo_config import RegistryConfig -from feast.infra.offline_stores.contrib.mssql_offline_store.mssql import MsSqlServerOfflineStoreConfig +from feast.infra.offline_stores.contrib.mssql_offline_store.mssql import ( + MsSqlServerOfflineStoreConfig, +) from feast.infra.online_stores.redis import RedisOnlineStoreConfig, RedisOnlineStore @@ -73,4 +75,4 @@ def run(raw_data): y_hat = model.predict(data) return y_hat.tolist() else: - return 0.0 \ No newline at end of file + return 0.0 diff --git a/examples/quickstart/quickstart.ipynb b/examples/quickstart/quickstart.ipynb index cec4df91b11..6e07d3e23bc 100644 --- a/examples/quickstart/quickstart.ipynb +++ b/examples/quickstart/quickstart.ipynb @@ -155,7 +155,7 @@ } ], "source": [ - "%cd feature_repo\n", + "%cd feature_repo/feature_repo\n", "!ls -R" ] }, @@ -1101,4 +1101,4 @@ }, "nbformat": 4, "nbformat_minor": 0 -} +} \ No newline at end of file diff --git a/go.mod b/go.mod index 3c05383ffc2..a051a13f6fe 100644 --- a/go.mod +++ b/go.mod @@ -41,10 +41,10 @@ require ( github.com/zeebo/xxh3 v1.0.2 // indirect golang.org/x/exp v0.0.0-20220407100705-7b9b53b0aca4 // indirect golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4 // indirect - golang.org/x/net v0.0.0-20220407224826-aac1ed45d8e3 // indirect - golang.org/x/sys v0.0.0-20220412211240-33da011f77ad // indirect - golang.org/x/text v0.3.7 // indirect - golang.org/x/tools v0.1.11-0.20220413170336-afc6aad76eb1 // indirect + golang.org/x/net v0.7.0 // indirect + golang.org/x/sys v0.5.0 // indirect + golang.org/x/text v0.7.0 // indirect + golang.org/x/tools v0.1.12 // indirect golang.org/x/xerrors v0.0.0-20220411194840-2f41105eb62f // indirect google.golang.org/genproto v0.0.0-20220407144326-9054f6ed7bac // indirect gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c // indirect diff --git a/go.sum b/go.sum index 11f1ba4d74e..cbcfc75ac44 100644 --- a/go.sum +++ b/go.sum @@ -87,12 +87,6 @@ github.com/envoyproxy/go-control-plane v0.9.10-0.20210907150352-cf90f659a021/go. github.com/envoyproxy/go-control-plane v0.10.2-0.20220325020618-49ff273808a1/go.mod h1:KJwIaB5Mv44NWtYuAOFCVOjcI94vtpEz2JU/D2v6IjE= github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4= -github.com/feast-dev/gopy v0.4.1-0.20220714205859-591500e3215f h1:tTjEpVu4H/ZGh4wo3WETbA9dutNM6bXMXvyZbb9GLCs= -github.com/feast-dev/gopy v0.4.1-0.20220714205859-591500e3215f/go.mod h1:tlA/KcD7rM8B+NQJR4SASwiinfKY0aiMFanHszR8BZA= -github.com/feast-dev/gopy v0.4.1-0.20220714211038-aa312c13fd79 h1:oFj6GDGR8E4S5GeMyLBvaKtvMZxj3hHqsB5Xndjxjz8= -github.com/feast-dev/gopy v0.4.1-0.20220714211038-aa312c13fd79/go.mod h1:tlA/KcD7rM8B+NQJR4SASwiinfKY0aiMFanHszR8BZA= -github.com/feast-dev/gopy v0.4.1-0.20220714211330-67b016d61ed4 h1:UfzPdqqAfrt8f+jDIY61lbzqFZYsX2BhVyNcCbdpE+U= -github.com/feast-dev/gopy v0.4.1-0.20220714211330-67b016d61ed4/go.mod h1:tlA/KcD7rM8B+NQJR4SASwiinfKY0aiMFanHszR8BZA= github.com/feast-dev/gopy v0.4.1-0.20220714211711-252048177d85 h1:BKmfqWiDbxvviB6vemPbbNjF+ywRsBMCdk1QvrcGgkc= github.com/feast-dev/gopy v0.4.1-0.20220714211711-252048177d85/go.mod h1:tlA/KcD7rM8B+NQJR4SASwiinfKY0aiMFanHszR8BZA= github.com/fogleman/gg v1.2.1-0.20190220221249-0403632d5b90/go.mod h1:R/bRT+9gY/C5z7JzPU0zXsXHKM4/ayA+zqcVNZzPa1k= @@ -375,6 +369,7 @@ github.com/urfave/cli v1.22.1/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtX github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.4.1/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= +github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= github.com/zeebo/assert v1.3.0 h1:g7C04CbJuIDKNPFHmsk4hwZDO5O+kntRxzaUoNXj+IQ= github.com/zeebo/assert v1.3.0/go.mod h1:Pq9JiuJQpG8JLJdtkwrJESF0Foym2/D9XMU5ciN/wJ0= github.com/zeebo/xxh3 v1.0.1/go.mod h1:8VHV24/3AZLn3b6Mlp/KuC33LWH687Wq6EnziEB+rsA= @@ -444,8 +439,6 @@ golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzB golang.org/x/mod v0.1.1-0.20191209134235-331c550502dd/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.5.1/go.mod h1:5OXOZSfqPIIbmVBIIKWRFfZjPR0E5r58TLhUjH0a2Ro= -golang.org/x/mod v0.5.1/go.mod h1:5OXOZSfqPIIbmVBIIKWRFfZjPR0E5r58TLhUjH0a2Ro= -golang.org/x/mod v0.6.0-dev.0.20211013180041-c96bc1413d57/go.mod h1:3p9vT2HGsQu2K1YbXdKPJLVgG5VJdoTa1poYQBtP1AY= golang.org/x/mod v0.6.0-dev.0.20211013180041-c96bc1413d57/go.mod h1:3p9vT2HGsQu2K1YbXdKPJLVgG5VJdoTa1poYQBtP1AY= golang.org/x/mod v0.6.0-dev.0.20220106191415-9b9b3d81d5e3/go.mod h1:3p9vT2HGsQu2K1YbXdKPJLVgG5VJdoTa1poYQBtP1AY= golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4 h1:6zppjxzCulZykYSLyVDYbneBfbaBIQPYMevg0bEwv2s= @@ -474,8 +467,10 @@ golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96b golang.org/x/net v0.0.0-20210428140749-89ef3d95e781/go.mod h1:OJAsFXCWl8Ukc7SiCT/9KSuxbyM7479/AVlXFRxuMCk= golang.org/x/net v0.0.0-20211015210444-4f30a5c0130f/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20220127200216-cd36cc0744dd/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk= -golang.org/x/net v0.0.0-20220407224826-aac1ed45d8e3 h1:EN5+DfgmRMvRUrMGERW2gQl3Vc+Z7ZMnI/xdEpPSf0c= -golang.org/x/net v0.0.0-20220407224826-aac1ed45d8e3/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk= +golang.org/x/net v0.0.0-20220722155237-a158d28d115b h1:PxfKdU9lEEDYjdIzOtC4qFWgkU2rGHdKlKowJSMN9h0= +golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= +golang.org/x/net v0.7.0 h1:rJrUqqhjsgNp7KqAIc25s9pZnjU7TUcSY7HcVZjdn1g= +golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= @@ -487,6 +482,7 @@ golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sys v0.0.0-20180823144017-11551d06cbcc/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= @@ -519,8 +515,12 @@ golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20211019181941-9d821ace8654/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20220412211240-33da011f77ad h1:ntjMns5wyP/fN65tdBD4g8J5w8n015+iIIs9rtjXkY0= golang.org/x/sys v0.0.0-20220412211240-33da011f77ad/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f h1:v4INt8xihDGvnrfjMDVXGxw9wrfxYyCjk0KbXjhR55s= +golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.5.0 h1:MUK/U/4lj1t1oPg0HfuXDN/Z1wv31ZJ/YcPiGccS4DU= +golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= @@ -528,8 +528,11 @@ golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.3.7 h1:olpwvP2KacW1ZWvsR7uQhoyTYvKAupfQrRGBFM352Gk= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= +golang.org/x/text v0.3.8 h1:nAL+RVCQ9uMn3vJZbV+MRnydTJFPf8qqY42YiA6MrqY= +golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ= +golang.org/x/text v0.7.0 h1:4BRB4x83lYWy72KwLD/qYDuTu7q9PjSagHvijDw7cLo= +golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/time v0.0.0-20180412165947-fbb02b2291d2/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/tools v0.0.0-20180221164845-07fd8470d635/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= @@ -555,8 +558,9 @@ golang.org/x/tools v0.0.0-20201224043029-2b0845dc783e/go.mod h1:emZCQorbCU4vsT4f golang.org/x/tools v0.1.0/go.mod h1:xkSsbof2nBLbhDlRMhhhyNLN/zl3eTqcnHD5viDpcZ0= golang.org/x/tools v0.1.8-0.20211029000441-d6a9af8af023/go.mod h1:nABZi5QlRsZVlzPpHl034qft6wpY4eDcsTt5AaioBiU= golang.org/x/tools v0.1.10/go.mod h1:Uh6Zz+xoGYZom868N8YTex3t7RhtHDBrE8Gzo9bV56E= -golang.org/x/tools v0.1.11-0.20220413170336-afc6aad76eb1 h1:Z3vE1sGlC7qiyFJkkDcZms8Y3+yV8+W7HmDSmuf71tM= golang.org/x/tools v0.1.11-0.20220413170336-afc6aad76eb1/go.mod h1:Uh6Zz+xoGYZom868N8YTex3t7RhtHDBrE8Gzo9bV56E= +golang.org/x/tools v0.1.12 h1:VveCTK38A2rkS8ZqFY25HIDFscX5X9OoEhJd3quQmXU= +golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= diff --git a/infra/charts/feast-feature-server/Chart.yaml b/infra/charts/feast-feature-server/Chart.yaml index b03e3ba7bfb..bdd0d6b0eb2 100644 --- a/infra/charts/feast-feature-server/Chart.yaml +++ b/infra/charts/feast-feature-server/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: feast-feature-server description: Feast Feature Server in Go or Python type: application -version: 0.29.0 +version: 0.30.0 keywords: - machine learning - big data diff --git a/infra/charts/feast-feature-server/README.md b/infra/charts/feast-feature-server/README.md index d6f38b36169..63d103075db 100644 --- a/infra/charts/feast-feature-server/README.md +++ b/infra/charts/feast-feature-server/README.md @@ -1,6 +1,6 @@ # Feast Python / Go Feature Server Helm Charts -Current chart version is `0.29.0` +Current chart version is `0.30.0` ## Installation @@ -30,7 +30,7 @@ See [here](https://github.com/feast-dev/feast/tree/master/examples/python-helm-d | fullnameOverride | string | `""` | | | image.pullPolicy | string | `"IfNotPresent"` | | | image.repository | string | `"feastdev/feature-server"` | Docker image for Feature Server repository | -| image.tag | string | `"0.29.0"` | The Docker image tag (can be overwritten if custom feature server deps are needed for on demand transforms) | +| image.tag | string | `"0.30.0"` | The Docker image tag (can be overwritten if custom feature server deps are needed for on demand transforms) | | imagePullSecrets | list | `[]` | | | livenessProbe.initialDelaySeconds | int | `30` | | | livenessProbe.periodSeconds | int | `30` | | diff --git a/infra/charts/feast-feature-server/values.yaml b/infra/charts/feast-feature-server/values.yaml index f21f0c9ecf7..56d4f724e31 100644 --- a/infra/charts/feast-feature-server/values.yaml +++ b/infra/charts/feast-feature-server/values.yaml @@ -9,7 +9,7 @@ image: repository: feastdev/feature-server pullPolicy: IfNotPresent # image.tag -- The Docker image tag (can be overwritten if custom feature server deps are needed for on demand transforms) - tag: 0.29.0 + tag: 0.30.0 imagePullSecrets: [] nameOverride: "" diff --git a/infra/charts/feast/Chart.yaml b/infra/charts/feast/Chart.yaml index fbcec2b8033..99efef8723d 100644 --- a/infra/charts/feast/Chart.yaml +++ b/infra/charts/feast/Chart.yaml @@ -1,7 +1,7 @@ apiVersion: v1 description: Feature store for machine learning name: feast -version: 0.29.0 +version: 0.30.0 keywords: - machine learning - big data diff --git a/infra/charts/feast/README.md b/infra/charts/feast/README.md index 6364377f2c1..36a6561c45a 100644 --- a/infra/charts/feast/README.md +++ b/infra/charts/feast/README.md @@ -8,7 +8,7 @@ This repo contains Helm charts for Feast Java components that are being installe ## Chart: Feast -Feature store for machine learning Current chart version is `0.29.0` +Feature store for machine learning Current chart version is `0.30.0` ## Installation @@ -65,8 +65,8 @@ See [here](https://github.com/feast-dev/feast/tree/master/examples/java-demo) fo | Repository | Name | Version | |------------|------|---------| | https://charts.helm.sh/stable | redis | 10.5.6 | -| https://feast-helm-charts.storage.googleapis.com | feature-server(feature-server) | 0.29.0 | -| https://feast-helm-charts.storage.googleapis.com | transformation-service(transformation-service) | 0.29.0 | +| https://feast-helm-charts.storage.googleapis.com | feature-server(feature-server) | 0.30.0 | +| https://feast-helm-charts.storage.googleapis.com | transformation-service(transformation-service) | 0.30.0 | ## Values diff --git a/infra/charts/feast/charts/feature-server/Chart.yaml b/infra/charts/feast/charts/feature-server/Chart.yaml index d21357b7923..0d7100b545e 100644 --- a/infra/charts/feast/charts/feature-server/Chart.yaml +++ b/infra/charts/feast/charts/feature-server/Chart.yaml @@ -1,8 +1,8 @@ apiVersion: v1 description: "Feast Feature Server: Online feature serving service for Feast" name: feature-server -version: 0.29.0 -appVersion: v0.29.0 +version: 0.30.0 +appVersion: v0.30.0 keywords: - machine learning - big data diff --git a/infra/charts/feast/charts/feature-server/README.md b/infra/charts/feast/charts/feature-server/README.md index f2e0b281829..3b1df0e9396 100644 --- a/infra/charts/feast/charts/feature-server/README.md +++ b/infra/charts/feast/charts/feature-server/README.md @@ -1,6 +1,6 @@ # feature-server -![Version: 0.29.0](https://img.shields.io/badge/Version-0.29.0-informational?style=flat-square) ![AppVersion: v0.29.0](https://img.shields.io/badge/AppVersion-v0.29.0-informational?style=flat-square) +![Version: 0.30.0](https://img.shields.io/badge/Version-0.30.0-informational?style=flat-square) ![AppVersion: v0.30.0](https://img.shields.io/badge/AppVersion-v0.30.0-informational?style=flat-square) Feast Feature Server: Online feature serving service for Feast @@ -17,7 +17,7 @@ Feast Feature Server: Online feature serving service for Feast | envOverrides | object | `{}` | Extra environment variables to set | | image.pullPolicy | string | `"IfNotPresent"` | Image pull policy | | image.repository | string | `"feastdev/feature-server-java"` | Docker image for Feature Server repository | -| image.tag | string | `"0.29.0"` | Image tag | +| image.tag | string | `"0.30.0"` | Image tag | | ingress.grpc.annotations | object | `{}` | Extra annotations for the ingress | | ingress.grpc.auth.enabled | bool | `false` | Flag to enable auth | | ingress.grpc.class | string | `"nginx"` | Which ingress controller to use | diff --git a/infra/charts/feast/charts/feature-server/values.yaml b/infra/charts/feast/charts/feature-server/values.yaml index 842cfc43e8a..45eba9a96d1 100644 --- a/infra/charts/feast/charts/feature-server/values.yaml +++ b/infra/charts/feast/charts/feature-server/values.yaml @@ -5,7 +5,7 @@ image: # image.repository -- Docker image for Feature Server repository repository: feastdev/feature-server-java # image.tag -- Image tag - tag: 0.29.0 + tag: 0.30.0 # image.pullPolicy -- Image pull policy pullPolicy: IfNotPresent diff --git a/infra/charts/feast/charts/transformation-service/Chart.yaml b/infra/charts/feast/charts/transformation-service/Chart.yaml index 253533ef39f..59ef9e5b7cc 100644 --- a/infra/charts/feast/charts/transformation-service/Chart.yaml +++ b/infra/charts/feast/charts/transformation-service/Chart.yaml @@ -1,8 +1,8 @@ apiVersion: v1 description: "Transformation service: to compute on-demand features" name: transformation-service -version: 0.29.0 -appVersion: v0.29.0 +version: 0.30.0 +appVersion: v0.30.0 keywords: - machine learning - big data diff --git a/infra/charts/feast/charts/transformation-service/README.md b/infra/charts/feast/charts/transformation-service/README.md index 65ba7916cf7..96d2a1cc284 100644 --- a/infra/charts/feast/charts/transformation-service/README.md +++ b/infra/charts/feast/charts/transformation-service/README.md @@ -1,6 +1,6 @@ # transformation-service -![Version: 0.29.0](https://img.shields.io/badge/Version-0.29.0-informational?style=flat-square) ![AppVersion: v0.29.0](https://img.shields.io/badge/AppVersion-v0.29.0-informational?style=flat-square) +![Version: 0.30.0](https://img.shields.io/badge/Version-0.30.0-informational?style=flat-square) ![AppVersion: v0.30.0](https://img.shields.io/badge/AppVersion-v0.30.0-informational?style=flat-square) Transformation service: to compute on-demand features @@ -13,7 +13,7 @@ Transformation service: to compute on-demand features | envOverrides | object | `{}` | Extra environment variables to set | | image.pullPolicy | string | `"IfNotPresent"` | Image pull policy | | image.repository | string | `"feastdev/feature-transformation-server"` | Docker image for Transformation Server repository | -| image.tag | string | `"0.29.0"` | Image tag | +| image.tag | string | `"0.30.0"` | Image tag | | nodeSelector | object | `{}` | Node labels for pod assignment | | podLabels | object | `{}` | Labels to be added to Feast Serving pods | | replicaCount | int | `1` | Number of pods that will be created | diff --git a/infra/charts/feast/charts/transformation-service/values.yaml b/infra/charts/feast/charts/transformation-service/values.yaml index 2a41668ffd0..2178f0dece2 100644 --- a/infra/charts/feast/charts/transformation-service/values.yaml +++ b/infra/charts/feast/charts/transformation-service/values.yaml @@ -5,7 +5,7 @@ image: # image.repository -- Docker image for Transformation Server repository repository: feastdev/feature-transformation-server # image.tag -- Image tag - tag: 0.29.0 + tag: 0.30.0 # image.pullPolicy -- Image pull policy pullPolicy: IfNotPresent diff --git a/infra/charts/feast/requirements.yaml b/infra/charts/feast/requirements.yaml index c5700116187..34f3c885422 100644 --- a/infra/charts/feast/requirements.yaml +++ b/infra/charts/feast/requirements.yaml @@ -1,12 +1,12 @@ dependencies: - name: feature-server alias: feature-server - version: 0.29.0 + version: 0.30.0 condition: feature-server.enabled repository: https://feast-helm-charts.storage.googleapis.com - name: transformation-service alias: transformation-service - version: 0.29.0 + version: 0.30.0 condition: transformation-service.enabled repository: https://feast-helm-charts.storage.googleapis.com - name: redis diff --git a/java/pom.xml b/java/pom.xml index b92edac7ddf..b7e8b99dea5 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -35,7 +35,7 @@ - 0.29.0 + 0.30.0 https://github.com/feast-dev/feast UTF-8 diff --git a/java/serving/README.md b/java/serving/README.md index dc23702d0f1..5cf5ec65354 100644 --- a/java/serving/README.md +++ b/java/serving/README.md @@ -2,9 +2,14 @@ ### Overview This guide is targeted at developers looking to contribute to Feast Serving: -- [Building and running Feast Serving locally](#building-and-running-feast-serving-locally) -- [Unit / Integration Tests](#unit-/-integration-tests) -- [Developing against Feast Helm charts](#developing-against-feast-helm-charts) +- [Getting Started Guide for Feast Serving Developers](#getting-started-guide-for-feast-serving-developers) + - [Overview](#overview) + - [Building and running Feast Serving locally:](#building-and-running-feast-serving-locally) + - [Pre-requisites](#pre-requisites) + - [Steps](#steps) + - [Debugging Feast Serving](#debugging-feast-serving) + - [Unit / Integration Tests](#unit--integration-tests) + - [Developing against Feast Helm charts](#developing-against-feast-helm-charts) ### Building and running Feast Serving locally: @@ -25,12 +30,14 @@ From the Feast GitHub root, run: feast: project: feast_demo registry: /Users/[your username]/GitHub/feast-demo/feature_repo/data/registry.db + entityKeySerializationVersion: 2 ``` 2. An example of if you're using Redis with a remote registry: ```yaml feast: project: feast_java_demo registry: gs://[YOUR BUCKET]/demo-repo/registry.db + entityKeySerializationVersion: 2 activeStore: online stores: - name: online diff --git a/sdk/python/docs/source/feast.infra.online_stores.contrib.rockset_online_store.rst b/sdk/python/docs/source/feast.infra.online_stores.contrib.rockset_online_store.rst new file mode 100644 index 00000000000..b3de7479a0e --- /dev/null +++ b/sdk/python/docs/source/feast.infra.online_stores.contrib.rockset_online_store.rst @@ -0,0 +1,21 @@ +feast.infra.online\_stores.contrib.rockset\_online\_store package +================================================================= + +Submodules +---------- + +feast.infra.online\_stores.contrib.rockset\_online\_store.rockset module +------------------------------------------------------------------------ + +.. automodule:: feast.infra.online_stores.contrib.rockset_online_store.rockset + :members: + :undoc-members: + :show-inheritance: + +Module contents +--------------- + +.. automodule:: feast.infra.online_stores.contrib.rockset_online_store + :members: + :undoc-members: + :show-inheritance: diff --git a/sdk/python/docs/source/feast.infra.online_stores.contrib.rst b/sdk/python/docs/source/feast.infra.online_stores.contrib.rst index 6b175f4584f..f10ff306f32 100644 --- a/sdk/python/docs/source/feast.infra.online_stores.contrib.rst +++ b/sdk/python/docs/source/feast.infra.online_stores.contrib.rst @@ -10,6 +10,7 @@ Subpackages feast.infra.online_stores.contrib.cassandra_online_store feast.infra.online_stores.contrib.hbase_online_store feast.infra.online_stores.contrib.mysql_online_store + feast.infra.online_stores.contrib.rockset_online_store Submodules ---------- @@ -54,6 +55,14 @@ feast.infra.online\_stores.contrib.postgres\_repo\_configuration module :undoc-members: :show-inheritance: +feast.infra.online\_stores.contrib.rockset\_repo\_configuration module +---------------------------------------------------------------------- + +.. automodule:: feast.infra.online_stores.contrib.rockset_repo_configuration + :members: + :undoc-members: + :show-inheritance: + Module contents --------------- diff --git a/sdk/python/feast/cli.py b/sdk/python/feast/cli.py index b0284654924..8adf1152261 100644 --- a/sdk/python/feast/cli.py +++ b/sdk/python/feast/cli.py @@ -631,7 +631,17 @@ def materialize_incremental_command(ctx: click.Context, end_ts: str, views: List "--template", "-t", type=click.Choice( - ["local", "gcp", "aws", "snowflake", "spark", "postgres", "hbase", "cassandra"], + [ + "local", + "gcp", + "aws", + "snowflake", + "spark", + "postgres", + "hbase", + "cassandra", + "rockset", + ], case_sensitive=False, ), help="Specify a template for the created project", @@ -674,12 +684,6 @@ def init_command(project_directory, minimal: bool, template: str): show_default=True, help="Specify a server type: 'http' or 'grpc'", ) -@click.option( - "--go", - is_flag=True, - show_default=True, - help="Use Go to serve", -) @click.option( "--no-access-log", is_flag=True, @@ -698,7 +702,6 @@ def serve_command( host: str, port: int, type_: str, - go: bool, no_access_log: bool, no_feature_log: bool, ): @@ -720,10 +723,6 @@ def serve_command( cli_check_repo(repo, fs_yaml_file) store = FeatureStore(repo_path=str(repo), fs_yaml_file=fs_yaml_file) - if go: - # Turn on Go feature retrieval. - store.config.go_feature_serving = True - store.serve(host, port, type_, no_access_log, no_feature_log) diff --git a/sdk/python/feast/errors.py b/sdk/python/feast/errors.py index 042a3622a98..57d04c27004 100644 --- a/sdk/python/feast/errors.py +++ b/sdk/python/feast/errors.py @@ -132,6 +132,11 @@ def __init__(self): super().__init__("Provider is not set, but is required") +class FeastRegistryNotSetError(Exception): + def __init__(self): + super().__init__("Registry is not set, but is required") + + class FeastFeatureServerTypeSetError(Exception): def __init__(self, feature_server_type: str): super().__init__( @@ -146,6 +151,13 @@ def __init__(self, feature_server_type: str): ) +class FeastRegistryTypeInvalidError(Exception): + def __init__(self, registry_type: str): + super().__init__( + f"Feature server type was set to {registry_type}, but this type is invalid" + ) + + class FeastModuleImportError(Exception): def __init__(self, module_name: str, class_name: str): super().__init__( diff --git a/sdk/python/feast/feature_store.py b/sdk/python/feast/feature_store.py index 43787701bf6..12d85be0178 100644 --- a/sdk/python/feast/feature_store.py +++ b/sdk/python/feast/feature_store.py @@ -19,7 +19,6 @@ from datetime import datetime, timedelta from pathlib import Path from typing import ( - TYPE_CHECKING, Any, Callable, Dict, @@ -94,19 +93,13 @@ from feast.request_feature_view import RequestFeatureView from feast.saved_dataset import SavedDataset, SavedDatasetStorage, ValidationReference from feast.stream_feature_view import StreamFeatureView -from feast.type_map import ( - feast_value_type_to_python_type, - python_values_to_proto_values, -) +from feast.type_map import python_values_to_proto_values from feast.usage import log_exceptions, log_exceptions_and_usage, set_usage_attribute from feast.value_type import ValueType from feast.version import get_version warnings.simplefilter("once", DeprecationWarning) -if TYPE_CHECKING: - from feast.embedded_go.online_features_service import EmbeddedOnlineFeatureServer - class FeatureStore: """ @@ -117,14 +110,12 @@ class FeatureStore: repo_path: The path to the feature repo. _registry: The registry for the feature store. _provider: The provider for the feature store. - _go_server: The (optional) Go feature server for the feature store. """ config: RepoConfig repo_path: Path _registry: BaseRegistry _provider: Provider - _go_server: Optional["EmbeddedOnlineFeatureServer"] @log_exceptions def __init__( @@ -164,16 +155,21 @@ def __init__( self.repo_path, utils.get_default_yaml_file_path(self.repo_path) ) - registry_config = self.config.get_registry_config() + registry_config = self.config.registry if registry_config.registry_type == "sql": - self._registry = SqlRegistry(registry_config, None) + self._registry = SqlRegistry(registry_config, self.config.project, None) + elif registry_config.registry_type == "snowflake.registry": + from feast.infra.registry.snowflake import SnowflakeRegistry + + self._registry = SnowflakeRegistry( + registry_config, self.config.project, None + ) else: - r = Registry(registry_config, repo_path=self.repo_path) + r = Registry(self.config.project, registry_config, repo_path=self.repo_path) r._initialize_registry(self.config.project) self._registry = r self._provider = get_provider(self.config) - self._go_server = None @log_exceptions def version(self) -> str: @@ -209,8 +205,10 @@ def refresh_registry(self): greater than 0, then once the cache becomes stale (more time than the TTL has passed), a new cache will be downloaded synchronously, which may increase latencies if the triggering method is get_online_features(). """ - registry_config = self.config.get_registry_config() - registry = Registry(registry_config, repo_path=self.repo_path) + registry_config = self.config.registry + registry = Registry( + self.config.project, registry_config, repo_path=self.repo_path + ) registry.refresh(self.config.project) self._registry = registry @@ -1001,11 +999,6 @@ def apply( self._registry.commit() - # go server needs to be reloaded to apply new configuration. - # we're stopping it here - # new server will be instantiated on the next online request - self._teardown_go_server() - @log_exceptions_and_usage def teardown(self): """Tears down all local and cloud resources for the feature store.""" @@ -1018,7 +1011,6 @@ def teardown(self): self._get_provider().teardown_infra(self.project, tables, entities) self._registry.teardown() - self._teardown_go_server() @log_exceptions_and_usage def get_historical_features( @@ -1595,18 +1587,6 @@ def get_online_features( native_entity_values=True, ) - def _lazy_init_go_server(self): - """Lazily initialize self._go_server if it hasn't been initialized before.""" - from feast.embedded_go.online_features_service import ( - EmbeddedOnlineFeatureServer, - ) - - # Lazily start the go server on the first request - if self._go_server is None: - self._go_server = EmbeddedOnlineFeatureServer( - str(self.repo_path.absolute()), self.config, self - ) - def _get_online_features( self, features: Union[List[str], FeatureService], @@ -1622,35 +1602,6 @@ def _get_online_features( for k, v in entity_values.items() } - # If the embedded Go code is enabled, send request to it instead of going through regular Python logic. - if self.config.go_feature_retrieval and self._go_server: - self._lazy_init_go_server() - - entity_native_values: Dict[str, List[Any]] - if not native_entity_values: - # Convert proto types to native types since Go feature server currently - # only handles native types. - # TODO(felixwang9817): Remove this logic once native types are supported. - entity_native_values = { - k: [ - feast_value_type_to_python_type(proto_value) - for proto_value in v - ] - for k, v in entity_value_lists.items() - } - else: - entity_native_values = entity_value_lists - - return self._go_server.get_online_features( - features_refs=features if isinstance(features, list) else [], - feature_service=features - if isinstance(features, FeatureService) - else None, - entities=entity_native_values, - request_data={}, # TODO: add request data parameter to public API - full_feature_names=full_feature_names, - ) - _feature_refs = self._get_features(features, allow_cache=True) ( requested_feature_views, @@ -2275,45 +2226,12 @@ def serve( ) -> None: """Start the feature consumption server locally on a given port.""" type_ = type_.lower() - if self.config.go_feature_serving and self._go_server: - # Start go server instead of python if the flag is enabled - self._lazy_init_go_server() - enable_logging = ( - self.config.feature_server - and self.config.feature_server.feature_logging - and self.config.feature_server.feature_logging.enabled - and not no_feature_log - ) - logging_options = ( - self.config.feature_server.feature_logging - if enable_logging and self.config.feature_server - else None + if type_ != "http": + raise ValueError( + f"Python server only supports 'http'. Got '{type_}' instead." ) - if type_ == "http": - self._go_server.start_http_server( - host, - port, - enable_logging=enable_logging, - logging_options=logging_options, - ) - elif type_ == "grpc": - self._go_server.start_grpc_server( - host, - port, - enable_logging=enable_logging, - logging_options=logging_options, - ) - else: - raise ValueError( - f"Unsupported server type '{type_}'. Must be one of 'http' or 'grpc'." - ) - else: - if type_ != "http": - raise ValueError( - f"Python server only supports 'http'. Got '{type_}' instead." - ) - # Start the python server if go server isn't enabled - feature_server.start_server(self, host, port, no_access_log) + # Start the python server + feature_server.start_server(self, host, port, no_access_log) @log_exceptions_and_usage def get_feature_server_endpoint(self) -> Optional[str]: @@ -2359,9 +2277,6 @@ def serve_transformations(self, port: int) -> None: transformation_server.start_server(self, port) - def _teardown_go_server(self): - self._go_server = None - @log_exceptions_and_usage def write_logged_features( self, logs: Union[pa.Table, Path], source: FeatureService diff --git a/sdk/python/feast/feature_view.py b/sdk/python/feast/feature_view.py index d91ee9080d5..e26759ba92e 100644 --- a/sdk/python/feast/feature_view.py +++ b/sdk/python/feast/feature_view.py @@ -400,7 +400,7 @@ def from_proto(cls, feature_view_proto: FeatureViewProto): feature_view.stream_source = stream_source # This avoids the deprecation warning. - feature_view.entities = feature_view_proto.spec.entities + feature_view.entities = list(feature_view_proto.spec.entities) # Instead of passing in a schema, we set the features and entity columns. feature_view.features = [ diff --git a/sdk/python/feast/field.py b/sdk/python/feast/field.py index 245bb24f52b..b07bddfeacb 100644 --- a/sdk/python/feast/field.py +++ b/sdk/python/feast/field.py @@ -109,6 +109,7 @@ def from_proto(cls, field_proto: FieldProto): name=field_proto.name, dtype=from_value_type(value_type=value_type), tags=dict(field_proto.tags), + description=field_proto.description, ) @classmethod diff --git a/sdk/python/feast/infra/contrib/spark_kafka_processor.py b/sdk/python/feast/infra/contrib/spark_kafka_processor.py index 32d91b2010f..ea55d89988a 100644 --- a/sdk/python/feast/infra/contrib/spark_kafka_processor.py +++ b/sdk/python/feast/infra/contrib/spark_kafka_processor.py @@ -131,7 +131,7 @@ def batch_write(row: DataFrame, batch_id: int): # Also add a 'created' column. rows = ( rows.sort_values( - by=self.join_keys + [self.sfv.timestamp_field], ascending=True + by=[*self.join_keys, self.sfv.timestamp_field], ascending=False ) .groupby(self.join_keys) .nth(0) diff --git a/sdk/python/feast/infra/materialization/contrib/bytewax/bytewax_materialization_engine.py b/sdk/python/feast/infra/materialization/contrib/bytewax/bytewax_materialization_engine.py index 9a456376bf4..991eafa641c 100644 --- a/sdk/python/feast/infra/materialization/contrib/bytewax/bytewax_materialization_engine.py +++ b/sdk/python/feast/infra/materialization/contrib/bytewax/bytewax_materialization_engine.py @@ -46,6 +46,18 @@ class BytewaxMaterializationEngineConfig(FeastConfigBaseModel): These environment variables can be used to reference Kubernetes secrets. """ + image_pull_secrets: List[dict] = [] + """ (optional) The secrets to use when pulling the image to run for the materialization job """ + + resources: dict = {} + """ (optional) The resource requests and limits for the materialization containers """ + + service_account_name: StrictStr = "" + """ (optional) The service account name to use when running the job """ + + annotations: dict = {} + """ (optional) Annotations to apply to the job container. Useful for linking the service account to IAM roles, operational metadata, etc """ + class BytewaxMaterializationEngine(BatchMaterializationEngine): def __init__( @@ -248,9 +260,14 @@ def _create_job_definition(self, job_id, namespace, pods, env): "parallelism": pods, "completionMode": "Indexed", "template": { + "metadata": { + "annotations": self.batch_engine_config.annotations, + }, "spec": { "restartPolicy": "Never", "subdomain": f"dataflow-{job_id}", + "imagePullSecrets": self.batch_engine_config.image_pull_secrets, + "serviceAccountName": self.batch_engine_config.service_account_name, "initContainers": [ { "env": [ @@ -300,7 +317,7 @@ def _create_job_definition(self, job_id, namespace, pods, env): "protocol": "TCP", } ], - "resources": {}, + "resources": self.batch_engine_config.resources, "securityContext": { "allowPrivilegeEscalation": False, "capabilities": { @@ -334,7 +351,7 @@ def _create_job_definition(self, job_id, namespace, pods, env): "name": f"feast-{job_id}", }, ], - } + }, }, }, } diff --git a/sdk/python/feast/infra/materialization/snowflake_engine.py b/sdk/python/feast/infra/materialization/snowflake_engine.py index d8fc5f5611d..8a63e008911 100644 --- a/sdk/python/feast/infra/materialization/snowflake_engine.py +++ b/sdk/python/feast/infra/materialization/snowflake_engine.py @@ -25,10 +25,10 @@ from feast.infra.online_stores.online_store import OnlineStore from feast.infra.registry.base_registry import BaseRegistry from feast.infra.utils.snowflake.snowflake_utils import ( + GetSnowflakeConnection, _run_snowflake_field_mapping, assert_snowflake_feature_names, execute_snowflake_statement, - get_snowflake_conn, get_snowflake_online_store_path, package_snowpark_zip, ) @@ -121,7 +121,7 @@ def update( ): stage_context = f'"{self.repo_config.batch_engine.database}"."{self.repo_config.batch_engine.schema_}"' stage_path = f'{stage_context}."feast_{project}"' - with get_snowflake_conn(self.repo_config.batch_engine) as conn: + with GetSnowflakeConnection(self.repo_config.batch_engine) as conn: query = f"SHOW STAGES IN {stage_context}" cursor = execute_snowflake_statement(conn, query) stage_list = pd.DataFrame( @@ -132,6 +132,10 @@ def update( # if the stage already exists, # assumes that the materialization functions have been deployed if f"feast_{project}" in stage_list["name"].tolist(): + click.echo( + f"Materialization functions for {Style.BRIGHT + Fore.GREEN}{project}{Style.RESET_ALL} already detected." + ) + click.echo() return None click.echo( @@ -169,7 +173,7 @@ def teardown_infra( ): stage_path = f'"{self.repo_config.batch_engine.database}"."{self.repo_config.batch_engine.schema_}"."feast_{project}"' - with get_snowflake_conn(self.repo_config.batch_engine) as conn: + with GetSnowflakeConnection(self.repo_config.batch_engine) as conn: query = f"DROP STAGE IF EXISTS {stage_path}" execute_snowflake_statement(conn, query) @@ -259,10 +263,11 @@ def _materialize_one( # Lets check and see if we can skip this query, because the table hasnt changed # since before the start date of this query - with get_snowflake_conn(self.repo_config.offline_store) as conn: + with GetSnowflakeConnection(self.repo_config.offline_store) as conn: query = f"""SELECT SYSTEM$LAST_CHANGE_COMMIT_TIME('{feature_view.batch_source.get_table_query_string()}') AS last_commit_change_time""" last_commit_change_time = ( - conn.cursor().execute(query).fetchall()[0][0] / 1_000_000_000 + execute_snowflake_statement(conn, query).fetchall()[0][0] + / 1_000_000_000 ) if last_commit_change_time < start_date.astimezone(tz=utc).timestamp(): return SnowflakeMaterializationJob( @@ -428,7 +433,7 @@ def materialize_to_snowflake_online_store( ) """ - with get_snowflake_conn(repo_config.batch_engine) as conn: + with GetSnowflakeConnection(repo_config.batch_engine) as conn: query_id = execute_snowflake_statement(conn, query).sfqid click.echo( @@ -446,7 +451,7 @@ def materialize_to_external_online_store( feature_names = [feature.name for feature in feature_view.features] - with get_snowflake_conn(repo_config.batch_engine) as conn: + with GetSnowflakeConnection(repo_config.batch_engine) as conn: query = materialization_sql cursor = execute_snowflake_statement(conn, query) for i, df in enumerate(cursor.fetch_pandas_batches()): diff --git a/sdk/python/feast/infra/offline_stores/bigquery.py b/sdk/python/feast/infra/offline_stores/bigquery.py index b3a500665ed..7871cea02c8 100644 --- a/sdk/python/feast/infra/offline_stores/bigquery.py +++ b/sdk/python/feast/infra/offline_stores/bigquery.py @@ -441,9 +441,11 @@ def full_feature_names(self) -> bool: def on_demand_feature_views(self) -> List[OnDemandFeatureView]: return self._on_demand_feature_views - def _to_df_internal(self) -> pd.DataFrame: + def _to_df_internal(self, timeout: Optional[int] = None) -> pd.DataFrame: with self._query_generator() as query: - df = self._execute_query(query).to_dataframe(create_bqstorage_client=True) + df = self._execute_query(query=query, timeout=timeout).to_dataframe( + create_bqstorage_client=True + ) return df def to_sql(self) -> str: @@ -488,20 +490,34 @@ def to_bigquery( return str(job_config.destination) with self._query_generator() as query: - self._execute_query(query, job_config, timeout) - - print(f"Done writing to '{job_config.destination}'.") - return str(job_config.destination) - - def _to_arrow_internal(self) -> pyarrow.Table: + dest = job_config.destination + # because setting destination for scripts is not valid + # remove destination attribute if provided + job_config.destination = None + bq_job = self._execute_query(query, job_config, timeout) + + if not job_config.dry_run: + config = bq_job.to_api_repr()["configuration"] + # get temp table created by BQ + tmp_dest = config["query"]["destinationTable"] + temp_dest_table = f"{tmp_dest['projectId']}.{tmp_dest['datasetId']}.{tmp_dest['tableId']}" + + # persist temp table + sql = f"CREATE TABLE {dest} AS SELECT * FROM {temp_dest_table}" + self._execute_query(sql, timeout=timeout) + + print(f"Done writing to '{dest}'.") + return str(dest) + + def _to_arrow_internal(self, timeout: Optional[int] = None) -> pyarrow.Table: with self._query_generator() as query: - q = self._execute_query(query=query) + q = self._execute_query(query=query, timeout=timeout) assert q return q.to_arrow() @log_exceptions_and_usage def _execute_query( - self, query, job_config=None, timeout: int = 1800 + self, query, job_config=None, timeout: Optional[int] = None ) -> Optional[bigquery.job.query.QueryJob]: bq_job = self.client.query(query, job_config=job_config) @@ -511,7 +527,7 @@ def _execute_query( ) return None - block_until_done(client=self.client, bq_job=bq_job, timeout=timeout) + block_until_done(client=self.client, bq_job=bq_job, timeout=timeout or 1800) return bq_job def persist(self, storage: SavedDatasetStorage, allow_overwrite: bool = False): @@ -777,7 +793,7 @@ def arrow_schema_to_bq_schema(arrow_schema: pyarrow.Schema) -> List[SchemaField] Compute a deterministic hash for the `left_table_query_string` that will be used throughout all the logic as the field to GROUP BY the data */ -WITH entity_dataframe AS ( +CREATE TEMP TABLE entity_dataframe AS ( SELECT *, {{entity_df_event_timestamp_col}} AS entity_timestamp {% for featureview in featureviews %} @@ -793,95 +809,95 @@ def arrow_schema_to_bq_schema(arrow_schema: pyarrow.Schema) -> List[SchemaField] {% endif %} {% endfor %} FROM `{{ left_table_query_string }}` -), +); {% for featureview in featureviews %} - -{{ featureview.name }}__entity_dataframe AS ( - SELECT - {{ featureview.entities | join(', ')}}{% if featureview.entities %},{% else %}{% endif %} - entity_timestamp, - {{featureview.name}}__entity_row_unique_id - FROM entity_dataframe - GROUP BY - {{ featureview.entities | join(', ')}}{% if featureview.entities %},{% else %}{% endif %} - entity_timestamp, - {{featureview.name}}__entity_row_unique_id -), - -/* - This query template performs the point-in-time correctness join for a single feature set table - to the provided entity table. - - 1. We first join the current feature_view to the entity dataframe that has been passed. - This JOIN has the following logic: - - For each row of the entity dataframe, only keep the rows where the `timestamp_field` - is less than the one provided in the entity dataframe - - If there a TTL for the current feature_view, also keep the rows where the `timestamp_field` - is higher the the one provided minus the TTL - - For each row, Join on the entity key and retrieve the `entity_row_unique_id` that has been - computed previously - - The output of this CTE will contain all the necessary information and already filtered out most - of the data that is not relevant. -*/ - -{{ featureview.name }}__subquery AS ( - SELECT - {{ featureview.timestamp_field }} as event_timestamp, - {{ featureview.created_timestamp_column ~ ' as created_timestamp,' if featureview.created_timestamp_column else '' }} - {{ featureview.entity_selections | join(', ')}}{% if featureview.entity_selections %},{% else %}{% endif %} - {% for feature in featureview.features %} - {{ feature }} as {% if full_feature_names %}{{ featureview.name }}__{{featureview.field_mapping.get(feature, feature)}}{% else %}{{ featureview.field_mapping.get(feature, feature) }}{% endif %}{% if loop.last %}{% else %}, {% endif %} - {% endfor %} - FROM {{ featureview.table_subquery }} - WHERE {{ featureview.timestamp_field }} <= '{{ featureview.max_event_timestamp }}' - {% if featureview.ttl == 0 %}{% else %} - AND {{ featureview.timestamp_field }} >= '{{ featureview.min_event_timestamp }}' - {% endif %} -), - -{{ featureview.name }}__base AS ( - SELECT - subquery.*, - entity_dataframe.entity_timestamp, - entity_dataframe.{{featureview.name}}__entity_row_unique_id - FROM {{ featureview.name }}__subquery AS subquery - INNER JOIN {{ featureview.name }}__entity_dataframe AS entity_dataframe - ON TRUE - AND subquery.event_timestamp <= entity_dataframe.entity_timestamp - +CREATE TEMP TABLE {{ featureview.name }}__cleaned AS ( + WITH {{ featureview.name }}__entity_dataframe AS ( + SELECT + {{ featureview.entities | join(', ')}}{% if featureview.entities %},{% else %}{% endif %} + entity_timestamp, + {{featureview.name}}__entity_row_unique_id + FROM entity_dataframe + GROUP BY + {{ featureview.entities | join(', ')}}{% if featureview.entities %},{% else %}{% endif %} + entity_timestamp, + {{featureview.name}}__entity_row_unique_id + ), + + /* + This query template performs the point-in-time correctness join for a single feature set table + to the provided entity table. + + 1. We first join the current feature_view to the entity dataframe that has been passed. + This JOIN has the following logic: + - For each row of the entity dataframe, only keep the rows where the `timestamp_field` + is less than the one provided in the entity dataframe + - If there a TTL for the current feature_view, also keep the rows where the `timestamp_field` + is higher the the one provided minus the TTL + - For each row, Join on the entity key and retrieve the `entity_row_unique_id` that has been + computed previously + + The output of this CTE will contain all the necessary information and already filtered out most + of the data that is not relevant. + */ + + {{ featureview.name }}__subquery AS ( + SELECT + {{ featureview.timestamp_field }} as event_timestamp, + {{ featureview.created_timestamp_column ~ ' as created_timestamp,' if featureview.created_timestamp_column else '' }} + {{ featureview.entity_selections | join(', ')}}{% if featureview.entity_selections %},{% else %}{% endif %} + {% for feature in featureview.features %} + {{ feature }} as {% if full_feature_names %}{{ featureview.name }}__{{featureview.field_mapping.get(feature, feature)}}{% else %}{{ featureview.field_mapping.get(feature, feature) }}{% endif %}{% if loop.last %}{% else %}, {% endif %} + {% endfor %} + FROM {{ featureview.table_subquery }} + WHERE {{ featureview.timestamp_field }} <= '{{ featureview.max_event_timestamp }}' {% if featureview.ttl == 0 %}{% else %} - AND subquery.event_timestamp >= Timestamp_sub(entity_dataframe.entity_timestamp, interval {{ featureview.ttl }} second) + AND {{ featureview.timestamp_field }} >= '{{ featureview.min_event_timestamp }}' {% endif %} + ), + + {{ featureview.name }}__base AS ( + SELECT + subquery.*, + entity_dataframe.entity_timestamp, + entity_dataframe.{{featureview.name}}__entity_row_unique_id + FROM {{ featureview.name }}__subquery AS subquery + INNER JOIN {{ featureview.name }}__entity_dataframe AS entity_dataframe + ON TRUE + AND subquery.event_timestamp <= entity_dataframe.entity_timestamp + + {% if featureview.ttl == 0 %}{% else %} + AND subquery.event_timestamp >= Timestamp_sub(entity_dataframe.entity_timestamp, interval {{ featureview.ttl }} second) + {% endif %} - {% for entity in featureview.entities %} - AND subquery.{{ entity }} = entity_dataframe.{{ entity }} - {% endfor %} -), - -/* - 2. If the `created_timestamp_column` has been set, we need to - deduplicate the data first. This is done by calculating the - `MAX(created_at_timestamp)` for each event_timestamp. - We then join the data on the next CTE -*/ -{% if featureview.created_timestamp_column %} -{{ featureview.name }}__dedup AS ( - SELECT - {{featureview.name}}__entity_row_unique_id, - event_timestamp, - MAX(created_timestamp) as created_timestamp - FROM {{ featureview.name }}__base - GROUP BY {{featureview.name}}__entity_row_unique_id, event_timestamp -), -{% endif %} + {% for entity in featureview.entities %} + AND subquery.{{ entity }} = entity_dataframe.{{ entity }} + {% endfor %} + ), + + /* + 2. If the `created_timestamp_column` has been set, we need to + deduplicate the data first. This is done by calculating the + `MAX(created_at_timestamp)` for each event_timestamp. + We then join the data on the next CTE + */ + {% if featureview.created_timestamp_column %} + {{ featureview.name }}__dedup AS ( + SELECT + {{featureview.name}}__entity_row_unique_id, + event_timestamp, + MAX(created_timestamp) as created_timestamp + FROM {{ featureview.name }}__base + GROUP BY {{featureview.name}}__entity_row_unique_id, event_timestamp + ), + {% endif %} -/* - 3. The data has been filtered during the first CTE "*__base" - Thus we only need to compute the latest timestamp of each feature. -*/ -{{ featureview.name }}__latest AS ( + /* + 3. The data has been filtered during the first CTE "*__base" + Thus we only need to compute the latest timestamp of each feature. + */ + {{ featureview.name }}__latest AS ( SELECT event_timestamp, {% if featureview.created_timestamp_column %}created_timestamp,{% endif %} @@ -900,13 +916,13 @@ def arrow_schema_to_bq_schema(arrow_schema: pyarrow.Schema) -> List[SchemaField] {% endif %} ) WHERE row_number = 1 -), +) /* 4. Once we know the latest value of each feature for a given timestamp, we can join again the data back to the original "base" dataset */ -{{ featureview.name }}__cleaned AS ( + SELECT base.* FROM {{ featureview.name }}__base as base INNER JOIN {{ featureview.name }}__latest @@ -917,7 +933,7 @@ def arrow_schema_to_bq_schema(arrow_schema: pyarrow.Schema) -> List[SchemaField] ,created_timestamp {% endif %} ) -){% if loop.last %}{% else %}, {% endif %} +); {% endfor %} diff --git a/sdk/python/feast/infra/offline_stores/bigquery_source.py b/sdk/python/feast/infra/offline_stores/bigquery_source.py index 28d6a3ed77e..536dbc5c141 100644 --- a/sdk/python/feast/infra/offline_stores/bigquery_source.py +++ b/sdk/python/feast/infra/offline_stores/bigquery_source.py @@ -18,13 +18,6 @@ from feast.usage import get_user_agent from feast.value_type import ValueType -try: - from google.api_core import client_info as http_client_info -except ImportError as e: - from feast.errors import FeastExtrasDependencyImportError - - raise FeastExtrasDependencyImportError("gcp", str(e)) - @typechecked class BigQuerySource(DataSource): @@ -165,6 +158,13 @@ def source_datatype_to_feast_value_type() -> Callable[[str], ValueType]: def get_table_column_names_and_types( self, config: RepoConfig ) -> Iterable[Tuple[str, str]]: + try: + from google.api_core import client_info as http_client_info + except ImportError as e: + from feast.errors import FeastExtrasDependencyImportError + + raise FeastExtrasDependencyImportError("gcp", str(e)) + from google.cloud import bigquery project_id = ( diff --git a/sdk/python/feast/infra/offline_stores/contrib/athena_offline_store/athena.py b/sdk/python/feast/infra/offline_stores/contrib/athena_offline_store/athena.py index e3bb4e8ccaa..2e1fc0d983d 100644 --- a/sdk/python/feast/infra/offline_stores/contrib/athena_offline_store/athena.py +++ b/sdk/python/feast/infra/offline_stores/contrib/athena_offline_store/athena.py @@ -375,7 +375,7 @@ def get_temp_table_dml_header( return temp_table_dml_header @log_exceptions_and_usage - def _to_df_internal(self) -> pd.DataFrame: + def _to_df_internal(self, timeout: Optional[int] = None) -> pd.DataFrame: with self._query_generator() as query: temp_table_name = "_" + str(uuid.uuid4()).replace("-", "") temp_external_location = self.get_temp_s3_path() @@ -392,7 +392,7 @@ def _to_df_internal(self) -> pd.DataFrame: ) @log_exceptions_and_usage - def _to_arrow_internal(self) -> pa.Table: + def _to_arrow_internal(self, timeout: Optional[int] = None) -> pa.Table: with self._query_generator() as query: temp_table_name = "_" + str(uuid.uuid4()).replace("-", "") temp_external_location = self.get_temp_s3_path() diff --git a/sdk/python/feast/infra/offline_stores/contrib/mssql_offline_store/mssql.py b/sdk/python/feast/infra/offline_stores/contrib/mssql_offline_store/mssql.py index 8dc5f6c6545..5849105869a 100644 --- a/sdk/python/feast/infra/offline_stores/contrib/mssql_offline_store/mssql.py +++ b/sdk/python/feast/infra/offline_stores/contrib/mssql_offline_store/mssql.py @@ -327,7 +327,7 @@ def __init__( engine: Engine, config: MsSqlServerOfflineStoreConfig, full_feature_names: bool, - on_demand_feature_views: Optional[List[OnDemandFeatureView]], + on_demand_feature_views: Optional[List[OnDemandFeatureView]] = None, metadata: Optional[RetrievalMetadata] = None, drop_columns: Optional[List[str]] = None, ): @@ -347,10 +347,10 @@ def full_feature_names(self) -> bool: def on_demand_feature_views(self) -> List[OnDemandFeatureView]: return self._on_demand_feature_views - def _to_df_internal(self) -> pandas.DataFrame: + def _to_df_internal(self, timeout: Optional[int] = None) -> pandas.DataFrame: return pandas.read_sql(self.query, con=self.engine).fillna(value=np.nan) - def _to_arrow_internal(self) -> pyarrow.Table: + def _to_arrow_internal(self, timeout: Optional[int] = None) -> pyarrow.Table: result = pandas.read_sql(self.query, con=self.engine).fillna(value=np.nan) return pyarrow.Table.from_pandas(result) diff --git a/sdk/python/feast/infra/offline_stores/contrib/postgres_offline_store/postgres.py b/sdk/python/feast/infra/offline_stores/contrib/postgres_offline_store/postgres.py index ada41c023b9..837b9091e72 100644 --- a/sdk/python/feast/infra/offline_stores/contrib/postgres_offline_store/postgres.py +++ b/sdk/python/feast/infra/offline_stores/contrib/postgres_offline_store/postgres.py @@ -241,7 +241,7 @@ def __init__( query: Union[str, Callable[[], ContextManager[str]]], config: RepoConfig, full_feature_names: bool, - on_demand_feature_views: Optional[List[OnDemandFeatureView]], + on_demand_feature_views: Optional[List[OnDemandFeatureView]] = None, metadata: Optional[RetrievalMetadata] = None, ): if not isinstance(query, str): @@ -267,7 +267,7 @@ def full_feature_names(self) -> bool: def on_demand_feature_views(self) -> List[OnDemandFeatureView]: return self._on_demand_feature_views - def _to_df_internal(self) -> pd.DataFrame: + def _to_df_internal(self, timeout: Optional[int] = None) -> pd.DataFrame: # We use arrow format because it gives better control of the table schema return self._to_arrow_internal().to_pandas() @@ -275,7 +275,7 @@ def to_sql(self) -> str: with self._query_generator() as query: return query - def _to_arrow_internal(self) -> pa.Table: + def _to_arrow_internal(self, timeout: Optional[int] = None) -> pa.Table: with self._query_generator() as query: with _get_conn(self.config.offline_store) as conn, conn.cursor() as cur: conn.set_session(readonly=True) diff --git a/sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/spark.py b/sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/spark.py index 665a65fec53..f51bd810ea1 100644 --- a/sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/spark.py +++ b/sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/spark.py @@ -336,13 +336,13 @@ def to_spark_df(self) -> pyspark.sql.DataFrame: *_, last = map(self.spark_session.sql, statements) return last - def _to_df_internal(self) -> pd.DataFrame: + def _to_df_internal(self, timeout: Optional[int] = None) -> pd.DataFrame: """Return dataset as Pandas DataFrame synchronously""" return self.to_spark_df().toPandas() - def _to_arrow_internal(self) -> pyarrow.Table: + def _to_arrow_internal(self, timeout: Optional[int] = None) -> pyarrow.Table: """Return dataset as pyarrow Table synchronously""" - return pyarrow.Table.from_pandas(self._to_df_internal()) + return pyarrow.Table.from_pandas(self._to_df_internal(timeout=timeout)) def persist(self, storage: SavedDatasetStorage, allow_overwrite: bool = False): """ diff --git a/sdk/python/feast/infra/offline_stores/contrib/trino_offline_store/trino.py b/sdk/python/feast/infra/offline_stores/contrib/trino_offline_store/trino.py index a5a51311eb9..7a7afa1665b 100644 --- a/sdk/python/feast/infra/offline_stores/contrib/trino_offline_store/trino.py +++ b/sdk/python/feast/infra/offline_stores/contrib/trino_offline_store/trino.py @@ -85,16 +85,16 @@ def full_feature_names(self) -> bool: def on_demand_feature_views(self) -> List[OnDemandFeatureView]: return self._on_demand_feature_views - def _to_df_internal(self) -> pd.DataFrame: + def _to_df_internal(self, timeout: Optional[int] = None) -> pd.DataFrame: """Return dataset as Pandas DataFrame synchronously including on demand transforms""" results = self._client.execute_query(query_text=self._query) self.pyarrow_schema = results.pyarrow_schema return results.to_dataframe() - def _to_arrow_internal(self) -> pyarrow.Table: + def _to_arrow_internal(self, timeout: Optional[int] = None) -> pyarrow.Table: """Return payrrow dataset as synchronously including on demand transforms""" return pyarrow.Table.from_pandas( - self._to_df_internal(), schema=self.pyarrow_schema + self._to_df_internal(timeout=timeout), schema=self.pyarrow_schema ) def to_sql(self) -> str: diff --git a/sdk/python/feast/infra/offline_stores/file.py b/sdk/python/feast/infra/offline_stores/file.py index 15e614a5a39..d6cce78bd4c 100644 --- a/sdk/python/feast/infra/offline_stores/file.py +++ b/sdk/python/feast/infra/offline_stores/file.py @@ -76,14 +76,14 @@ def on_demand_feature_views(self) -> List[OnDemandFeatureView]: return self._on_demand_feature_views @log_exceptions_and_usage - def _to_df_internal(self) -> pd.DataFrame: + def _to_df_internal(self, timeout: Optional[int] = None) -> pd.DataFrame: # Only execute the evaluation function to build the final historical retrieval dataframe at the last moment. df = self.evaluation_function().compute() df = df.reset_index(drop=True) return df @log_exceptions_and_usage - def _to_arrow_internal(self): + def _to_arrow_internal(self, timeout: Optional[int] = None): # Only execute the evaluation function to build the final historical retrieval dataframe at the last moment. df = self.evaluation_function().compute() return pyarrow.Table.from_pandas(df) @@ -453,7 +453,9 @@ def offline_write_batch( filesystem, path = FileSource.create_filesystem_and_path( file_options.uri, file_options.s3_endpoint_override ) - prev_table = pyarrow.parquet.read_table(path, memory_map=True) + prev_table = pyarrow.parquet.read_table( + path, filesystem=filesystem, memory_map=True + ) if table.schema != prev_table.schema: table = table.cast(prev_table.schema) new_table = pyarrow.concat_tables([table, prev_table]) diff --git a/sdk/python/feast/infra/offline_stores/offline_store.py b/sdk/python/feast/infra/offline_stores/offline_store.py index 9331b75ec25..27a98a120f4 100644 --- a/sdk/python/feast/infra/offline_stores/offline_store.py +++ b/sdk/python/feast/infra/offline_stores/offline_store.py @@ -62,7 +62,9 @@ class RetrievalJob(ABC): """A RetrievalJob manages the execution of a query to retrieve data from the offline store.""" def to_df( - self, validation_reference: Optional["ValidationReference"] = None + self, + validation_reference: Optional["ValidationReference"] = None, + timeout: Optional[int] = None, ) -> pd.DataFrame: """ Synchronously executes the underlying query and returns the result as a pandas dataframe. @@ -72,8 +74,9 @@ def to_df( Args: validation_reference (optional): The validation to apply against the retrieved dataframe. + timeout (optional): The query timeout if applicable. """ - features_df = self._to_df_internal() + features_df = self._to_df_internal(timeout=timeout) if self.on_demand_feature_views: # TODO(adchia): Fix requirement to specify dependent feature views in feature_refs @@ -101,7 +104,9 @@ def to_df( return features_df def to_arrow( - self, validation_reference: Optional["ValidationReference"] = None + self, + validation_reference: Optional["ValidationReference"] = None, + timeout: Optional[int] = None, ) -> pyarrow.Table: """ Synchronously executes the underlying query and returns the result as an arrow table. @@ -111,11 +116,12 @@ def to_arrow( Args: validation_reference (optional): The validation to apply against the retrieved dataframe. + timeout (optional): The query timeout if applicable. """ if not self.on_demand_feature_views and not validation_reference: - return self._to_arrow_internal() + return self._to_arrow_internal(timeout=timeout) - features_df = self._to_df_internal() + features_df = self._to_df_internal(timeout=timeout) if self.on_demand_feature_views: for odfv in self.on_demand_feature_views: features_df = features_df.join( @@ -147,20 +153,24 @@ def to_sql(self) -> str: pass @abstractmethod - def _to_df_internal(self) -> pd.DataFrame: + def _to_df_internal(self, timeout: Optional[int] = None) -> pd.DataFrame: """ Synchronously executes the underlying query and returns the result as a pandas dataframe. + timeout: RetreivalJob implementations may implement a timeout. + Does not handle on demand transformations or dataset validation. For either of those, `to_df` should be used. """ pass @abstractmethod - def _to_arrow_internal(self) -> pyarrow.Table: + def _to_arrow_internal(self, timeout: Optional[int] = None) -> pyarrow.Table: """ Synchronously executes the underlying query and returns the result as an arrow table. + timeout: RetreivalJob implementations may implement a timeout. + Does not handle on demand transformations or dataset validation. For either of those, `to_arrow` should be used. """ diff --git a/sdk/python/feast/infra/offline_stores/redshift.py b/sdk/python/feast/infra/offline_stores/redshift.py index 82b5150eaf0..ffa30ba015e 100644 --- a/sdk/python/feast/infra/offline_stores/redshift.py +++ b/sdk/python/feast/infra/offline_stores/redshift.py @@ -400,7 +400,7 @@ def on_demand_feature_views(self) -> List[OnDemandFeatureView]: return self._on_demand_feature_views @log_exceptions_and_usage - def _to_df_internal(self) -> pd.DataFrame: + def _to_df_internal(self, timeout: Optional[int] = None) -> pd.DataFrame: with self._query_generator() as query: return aws_utils.unload_redshift_query_to_df( self._redshift_client, @@ -414,7 +414,7 @@ def _to_df_internal(self) -> pd.DataFrame: ) @log_exceptions_and_usage - def _to_arrow_internal(self) -> pa.Table: + def _to_arrow_internal(self, timeout: Optional[int] = None) -> pa.Table: with self._query_generator() as query: return aws_utils.unload_redshift_query_to_pa( self._redshift_client, diff --git a/sdk/python/feast/infra/offline_stores/snowflake.py b/sdk/python/feast/infra/offline_stores/snowflake.py index e126b059342..404927146a7 100644 --- a/sdk/python/feast/infra/offline_stores/snowflake.py +++ b/sdk/python/feast/infra/offline_stores/snowflake.py @@ -48,8 +48,8 @@ ) from feast.infra.registry.base_registry import BaseRegistry from feast.infra.utils.snowflake.snowflake_utils import ( + GetSnowflakeConnection, execute_snowflake_statement, - get_snowflake_conn, write_pandas, write_parquet, ) @@ -74,13 +74,13 @@ class SnowflakeOfflineStoreConfig(FeastConfigBaseModel): """Offline store config for Snowflake""" type: Literal["snowflake.offline"] = "snowflake.offline" - """ Offline store type selector""" + """ Offline store type selector """ config_path: Optional[str] = os.path.expanduser("~/.snowsql/config") """ Snowflake config path -- absolute path required (Cant use ~)""" account: Optional[str] = None - """ Snowflake deployment identifier -- drop .snowflakecomputing.com""" + """ Snowflake deployment identifier -- drop .snowflakecomputing.com """ user: Optional[str] = None """ Snowflake user name """ @@ -89,7 +89,7 @@ class SnowflakeOfflineStoreConfig(FeastConfigBaseModel): """ Snowflake password """ role: Optional[str] = None - """ Snowflake role name""" + """ Snowflake role name """ warehouse: Optional[str] = None """ Snowflake warehouse name """ @@ -155,7 +155,8 @@ def pull_latest_from_table_or_query( if data_source.snowflake_options.warehouse: config.offline_store.warehouse = data_source.snowflake_options.warehouse - snowflake_conn = get_snowflake_conn(config.offline_store) + with GetSnowflakeConnection(config.offline_store) as conn: + snowflake_conn = conn start_date = start_date.astimezone(tz=utc) end_date = end_date.astimezone(tz=utc) @@ -208,7 +209,8 @@ def pull_all_from_table_or_query( if data_source.snowflake_options.warehouse: config.offline_store.warehouse = data_source.snowflake_options.warehouse - snowflake_conn = get_snowflake_conn(config.offline_store) + with GetSnowflakeConnection(config.offline_store) as conn: + snowflake_conn = conn start_date = start_date.astimezone(tz=utc) end_date = end_date.astimezone(tz=utc) @@ -241,7 +243,8 @@ def get_historical_features( for fv in feature_views: assert isinstance(fv.batch_source, SnowflakeSource) - snowflake_conn = get_snowflake_conn(config.offline_store) + with GetSnowflakeConnection(config.offline_store) as conn: + snowflake_conn = conn entity_schema = _get_entity_schema(entity_df, snowflake_conn, config) @@ -319,7 +322,8 @@ def write_logged_features( ): assert isinstance(logging_config.destination, SnowflakeLoggingDestination) - snowflake_conn = get_snowflake_conn(config.offline_store) + with GetSnowflakeConnection(config.offline_store) as conn: + snowflake_conn = conn if isinstance(data, Path): write_parquet( @@ -359,7 +363,8 @@ def offline_write_batch( if table.schema != pa_schema: table = table.cast(pa_schema) - snowflake_conn = get_snowflake_conn(config.offline_store) + with GetSnowflakeConnection(config.offline_store) as conn: + snowflake_conn = conn write_pandas( snowflake_conn, @@ -410,7 +415,7 @@ def full_feature_names(self) -> bool: def on_demand_feature_views(self) -> List[OnDemandFeatureView]: return self._on_demand_feature_views - def _to_df_internal(self) -> pd.DataFrame: + def _to_df_internal(self, timeout: Optional[int] = None) -> pd.DataFrame: with self._query_generator() as query: df = execute_snowflake_statement( @@ -419,7 +424,7 @@ def _to_df_internal(self) -> pd.DataFrame: return df - def _to_arrow_internal(self) -> pyarrow.Table: + def _to_arrow_internal(self, timeout: Optional[int] = None) -> pyarrow.Table: with self._query_generator() as query: pa_table = execute_snowflake_statement( @@ -427,7 +432,6 @@ def _to_arrow_internal(self) -> pyarrow.Table: ).fetch_arrow_all() if pa_table: - return pa_table else: empty_result = execute_snowflake_statement(self.snowflake_conn, query) diff --git a/sdk/python/feast/infra/offline_stores/snowflake_source.py b/sdk/python/feast/infra/offline_stores/snowflake_source.py index cc5208a6761..63533214ea8 100644 --- a/sdk/python/feast/infra/offline_stores/snowflake_source.py +++ b/sdk/python/feast/infra/offline_stores/snowflake_source.py @@ -213,13 +213,13 @@ def get_table_column_names_and_types( """ from feast.infra.offline_stores.snowflake import SnowflakeOfflineStoreConfig from feast.infra.utils.snowflake.snowflake_utils import ( + GetSnowflakeConnection, execute_snowflake_statement, - get_snowflake_conn, ) assert isinstance(config.offline_store, SnowflakeOfflineStoreConfig) - with get_snowflake_conn(config.offline_store) as conn: + with GetSnowflakeConnection(config.offline_store) as conn: query = f"SELECT * FROM {self.get_table_query_string()} LIMIT 5" cursor = execute_snowflake_statement(conn, query) @@ -250,7 +250,7 @@ def get_table_column_names_and_types( else: column = row["column_name"] - with get_snowflake_conn(config.offline_store) as conn: + with GetSnowflakeConnection(config.offline_store) as conn: query = f'SELECT MAX("{column}") AS "{column}" FROM {self.get_table_query_string()}' result = execute_snowflake_statement( conn, query diff --git a/sdk/python/feast/infra/online_stores/contrib/rockset_online_store/__init__.py b/sdk/python/feast/infra/online_stores/contrib/rockset_online_store/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/sdk/python/feast/infra/online_stores/contrib/rockset_online_store/rockset.py b/sdk/python/feast/infra/online_stores/contrib/rockset_online_store/rockset.py new file mode 100644 index 00000000000..37cfbd86afd --- /dev/null +++ b/sdk/python/feast/infra/online_stores/contrib/rockset_online_store/rockset.py @@ -0,0 +1,525 @@ +# Copyright 2022 The Feast Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +import logging +import os +import random +import time +from datetime import datetime +from typing import Any, Callable, Dict, List, Literal, Optional, Sequence, Tuple, cast + +import requests +from rockset.exceptions import BadRequestException, RocksetException +from rockset.models import QueryRequestSql +from rockset.query_paginator import QueryPaginator +from rockset.rockset_client import RocksetClient + +from feast.entity import Entity +from feast.feature_view import FeatureView +from feast.infra.online_stores.helpers import compute_entity_id +from feast.infra.online_stores.online_store import OnlineStore +from feast.protos.feast.types.EntityKey_pb2 import EntityKey as EntityKeyProto +from feast.protos.feast.types.Value_pb2 import Value as ValueProto +from feast.repo_config import FeastConfigBaseModel, RepoConfig +from feast.usage import log_exceptions_and_usage + +logger = logging.getLogger(__name__) + + +class RocksetOnlineStoreConfig(FeastConfigBaseModel): + """Online store config for Rockset store""" + + type: Literal["rockset"] = "rockset" + """Online store type selector""" + + api_key: Optional[str] = None + """Api Key to be used for Rockset Account. If not set the env var ROCKSET_APIKEY will be used.""" + + host: Optional[str] = None + """The Host Url for Rockset requests. If not set the env var ROCKSET_APISERVER will be used.""" + + read_pagination_batch_size: int = 100 + """Batch size of records that will be turned per page when paginating a batched read""" + + collection_created_timeout_secs: int = 60 + """The amount of time, in seconds, we will wait for the collection to become visible to the API""" + + collection_ready_timeout_secs: int = 30 * 60 + """The amount of time, in seconds, we will wait for the collection to enter READY state""" + + fence_all_writes: bool = True + """Whether to wait for all writes to be flushed from log and queryable. If False, documents that are written may not be seen immediately in subsequent reads""" + + fence_timeout_secs: int = 10 * 60 + """The amount of time we will wait, in seconds, for the write fence to be passed""" + + initial_request_backoff_secs: int = 2 + """Initial backoff, in seconds, we will wait between requests when polling for a response""" + + max_request_backoff_secs: int = 30 + """Initial backoff, in seconds, we will wait between requests when polling for a response""" + + max_request_attempts: int = 10 * 1000 + """The max amount of times we will retry a failed request""" + + +class RocksetOnlineStore(OnlineStore): + """ + Rockset implementation of the online store interface. + + Attributes: + _rockset_client: Rockset openapi client. + """ + + _rockset_client = None + + @log_exceptions_and_usage(online_store="rockset") + def online_write_batch( + self, + config: RepoConfig, + table: FeatureView, + data: List[ + Tuple[EntityKeyProto, Dict[str, ValueProto], datetime, Optional[datetime]] + ], + progress: Optional[Callable[[int], Any]], + ) -> None: + """ + Write a batch of feature rows to online Rockset store. + + Args: + config: The RepoConfig for the current FeatureStore. + table: Feast FeatureView. + data: a list of quadruplets containing Feature data. Each quadruplet contains an Entity Key, + a dict containing feature values, an event timestamp for the row, and + the created timestamp for the row if it exists. + progress: Optional function to be called once every mini-batch of rows is written to + the online store. Can be used to display progress. + """ + + online_config = config.online_store + assert isinstance(online_config, RocksetOnlineStoreConfig) + + rs = self.get_rockset_client(online_config) + collection_name = self.get_collection_name(config, table) + + # We need to deduplicate on entity_id and we will save the latest timestamp version. + dedup_dict = {} + for feature_vals in data: + entity_key, features, timestamp, created_ts = feature_vals + serialized_key = compute_entity_id( + entity_key=entity_key, + entity_key_serialization_version=config.entity_key_serialization_version, + ) + + if serialized_key not in dedup_dict: + dedup_dict[serialized_key] = feature_vals + continue + + # If the entity already existings in the dictionary ignore the entry if it has a lower timestamp. + if timestamp <= dedup_dict[serialized_key][2]: + continue + + dedup_dict[serialized_key] = feature_vals + + request_batch = [] + for serialized_key, feature_vals in dedup_dict.items(): + document = {} + entity_key, features, timestamp, created_ts = feature_vals + document["_id"] = serialized_key + + # Rockset python client currently does not handle datetime correctly and will convert + # to string instead of native Rockset DATETIME. This will be fixed, but until then we + # use isoformat. + document["event_ts"] = timestamp.isoformat() + document["created_ts"] = ( + "" if created_ts is None else created_ts.isoformat() + ) + for k, v in features.items(): + # Rockset client currently does not support bytes type. + document[k] = v.SerializeToString().hex() + + # TODO: Implement async batching with retries. + request_batch.append(document) + + if progress: + progress(1) + + resp = rs.Documents.add_documents( + collection=collection_name, data=request_batch + ) + if online_config.fence_all_writes: + self.wait_for_fence(rs, collection_name, resp["last_offset"], online_config) + + return None + + @log_exceptions_and_usage(online_store="rockset") + def online_read( + self, + config: RepoConfig, + table: FeatureView, + entity_keys: List[EntityKeyProto], + requested_features: Optional[List[str]] = None, + ) -> List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]]: + """ + Retrieve feature values from the online Rockset store. + + Args: + config: The RepoConfig for the current FeatureStore. + table: Feast FeatureView. + entity_keys: a list of entity keys that should be read from the FeatureStore. + """ + online_config = config.online_store + assert isinstance(online_config, RocksetOnlineStoreConfig) + + rs = self.get_rockset_client(online_config) + collection_name = self.get_collection_name(config, table) + + feature_list = "" + if requested_features is not None: + feature_list = ",".join(requested_features) + + entity_serialized_key_list = [ + compute_entity_id( + k, + entity_key_serialization_version=config.entity_key_serialization_version, + ) + for k in entity_keys + ] + + entity_query_str = ",".join( + "'{id}'".format(id=s) for s in entity_serialized_key_list + ) + + query_str = f""" + SELECT + "_id", + "event_ts", + {feature_list} + FROM + {collection_name} + WHERE + "_id" IN ({entity_query_str}) + """ + + feature_set = set() + if requested_features: + feature_set.update(requested_features) + + result_map = {} + for page in QueryPaginator( + rs, + rs.Queries.query( + sql=QueryRequestSql( + query=query_str, + paginate=True, + initial_paginate_response_doc_count=online_config.read_pagination_batch_size, + ) + ), + ): + for doc in page: + result = {} + for k, v in doc.items(): + if k not in feature_set: + # We want to skip deserializing values that are not feature values like bookeeping values. + continue + + val = ValueProto() + + # TODO: Remove bytes <-> string parsing once client supports bytes. + val.ParseFromString(bytes.fromhex(v)) + result[k] = val + result_map[doc["_id"]] = ( + datetime.fromisoformat(doc["event_ts"]), + result, + ) + + results_list: List[ + Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]] + ] = [] + for key in entity_serialized_key_list: + if key not in result_map: + # If not found, we add a gap to let the client know. + results_list.append((None, None)) + continue + + results_list.append(result_map[key]) + + return results_list + + @log_exceptions_and_usage(online_store="rockset") + def update( + self, + config: RepoConfig, + tables_to_delete: Sequence[FeatureView], + tables_to_keep: Sequence[FeatureView], + entities_to_delete: Sequence[Entity], + entities_to_keep: Sequence[Entity], + partial: bool, + ): + """ + Update tables from the Rockset Online Store. + + Args: + config: The RepoConfig for the current FeatureStore. + tables_to_delete: Tables to delete from the Rockset Online Store. + tables_to_keep: Tables to keep in the Rockset Online Store. + """ + online_config = config.online_store + assert isinstance(online_config, RocksetOnlineStoreConfig) + rs = self.get_rockset_client(online_config) + + created_collections = [] + for table_instance in tables_to_keep: + try: + collection_name = self.get_collection_name(config, table_instance) + rs.Collections.create_file_upload_collection(name=collection_name) + created_collections.append(collection_name) + except BadRequestException as e: + if self.parse_request_error_type(e) == "AlreadyExists": + # Table already exists nothing to do. We should still make sure it is ready though. + created_collections.append(collection_name) + continue + raise + + for table_to_delete in tables_to_delete: + self.delete_collection( + rs, collection_name=self.get_collection_name(config, table_to_delete) + ) + + # Now wait for all collections to be READY. + self.wait_for_ready_collections( + rs, created_collections, online_config=online_config + ) + + @log_exceptions_and_usage(online_store="rockset") + def teardown( + self, + config: RepoConfig, + tables: Sequence[FeatureView], + entities: Sequence[Entity], + ): + """ + Delete all collections from the Rockset Online Store. + + Args: + config: The RepoConfig for the current FeatureStore. + tables: Tables to delete from the feature repo. + """ + online_config = config.online_store + assert isinstance(online_config, RocksetOnlineStoreConfig) + rs = self.get_rockset_client(online_config) + for table in tables: + self.delete_collection( + rs, collection_name=self.get_collection_name(config, table) + ) + + def get_rockset_client( + self, onlineConfig: RocksetOnlineStoreConfig + ) -> RocksetClient: + """ + Fetches the RocksetClient to be used for all requests for this online store based on the api + configuration in the provided config. If no configuration provided local ENV vars will be used. + + Args: + onlineConfig: The RocksetOnlineStoreConfig associated with this online store. + """ + if self._rockset_client is not None: + return self._rockset_client + + _api_key = ( + os.getenv("ROCKSET_APIKEY") + if isinstance(onlineConfig.api_key, type(None)) + else onlineConfig.api_key + ) + _host = ( + os.getenv("ROCKSET_APISERVER") + if isinstance(onlineConfig.host, type(None)) + else onlineConfig.host + ) + self._rockset_client = RocksetClient(host=_host, api_key=_api_key) + return self._rockset_client + + @staticmethod + def delete_collection(rs: RocksetClient, collection_name: str): + """ + Deletes the collection whose name was provided + + Args: + rs: The RocksetClient to be used for the deletion. + collection_name: The name of the collection to be deleted. + """ + + try: + rs.Collections.delete(collection=collection_name) + except RocksetException as e: + if RocksetOnlineStore.parse_request_error_type(e) == "NotFound": + logger.warning( + f"Trying to delete collection that does not exist {collection_name}" + ) + return + raise + + @staticmethod + def get_collection_name(config: RepoConfig, feature_view: FeatureView) -> str: + """ + Returns the collection name based on the provided config and FeatureView. + + Args: + config: RepoConfig for the online store. + feature_view: FeatureView that is backed by the returned collection name. + + Returns: + The collection name as a string. + """ + project_val = config.project if config.project else "feast" + table_name = feature_view.name if feature_view.name else "feature_store" + return f"{project_val}_{table_name}" + + @staticmethod + def parse_request_error_type(e: RocksetException) -> str: + """ + Parse a throw RocksetException. Will return a string representing the type of error that was thrown. + + Args: + e: The RockException that is being parsed. + + Returns: + Error type parsed as a string. + """ + + body_dict = json.loads(e.body) + return body_dict["type"] + + @staticmethod + def wait_for_fence( + rs: RocksetClient, + collection_name: str, + last_offset: str, + online_config: RocksetOnlineStoreConfig, + ): + """ + Waits until 'last_offset' is flushed and values are ready to be read. If wait lasts longer than the timeout specified in config + a timeout exception will be throw. + + Args: + rs: Rockset client that will be used to make all requests. + collection_name: Collection associated with the offsets we are waiting for. + last_offset: The actual offsets we are waiting to be flushed. + online_config: The config that will be used to determine timeouts and backout configurations. + """ + + resource_path = ( + f"/v1/orgs/self/ws/commons/collections/{collection_name}/offsets/commit" + ) + request = {"name": [last_offset]} + + headers = {} + headers["Content-Type"] = "application/json" + headers["Authorization"] = f"ApiKey {rs.api_client.configuration.api_key}" + + t_start = time.time() + for num_attempts in range(online_config.max_request_attempts): + delay = time.time() - t_start + resp = requests.post( + url=f"{rs.api_client.configuration.host}{resource_path}", + json=request, + headers=headers, + ) + + if resp.status_code == 200 and resp.json()["data"]["passed"] is True: + break + + if delay > online_config.fence_timeout_secs: + raise TimeoutError( + f"Write to collection {collection_name} at offset {last_offset} was not available for read after {delay} secs" + ) + + if resp.status_code == 429: + RocksetOnlineStore.backoff_sleep(num_attempts, online_config) + continue + elif resp.status_code != 200: + raise Exception(f"[{resp.status_code}]: {resp.reason}") + + RocksetOnlineStore.backoff_sleep(num_attempts, online_config) + + @staticmethod + def wait_for_ready_collections( + rs: RocksetClient, + collection_names: List[str], + online_config: RocksetOnlineStoreConfig, + ): + """ + Waits until all collections provided have entered READY state and can accept new documents. If wait + lasts longer than timeout a TimeoutError exception will be thrown. + + Args: + rs: Rockset client that will be used to make all requests. + collection_names: All collections that we will wait for. + timeout: The max amount of time we will wait for the collections to become READY. + """ + + t_start = time.time() + for cname in collection_names: + # We will wait until the provided timeout for all collections to become READY. + for num_attempts in range(online_config.max_request_attempts): + resp = None + delay = time.time() - t_start + try: + resp = rs.Collections.get(collection=cname) + except RocksetException as e: + error_type = RocksetOnlineStore.parse_request_error_type(e) + if error_type == "NotFound": + if delay > online_config.collection_created_timeout_secs: + raise TimeoutError( + f"Collection {cname} failed to become visible after {delay} seconds" + ) + elif error_type == "RateLimitExceeded": + RocksetOnlineStore.backoff_sleep(num_attempts, online_config) + continue + else: + raise + + if ( + resp is not None + and cast(Dict[str, dict], resp)["data"]["status"] == "READY" + ): + break + + if delay > online_config.collection_ready_timeout_secs: + raise TimeoutError( + f"Collection {cname} failed to become ready after {delay} seconds" + ) + + RocksetOnlineStore.backoff_sleep(num_attempts, online_config) + + @staticmethod + def backoff_sleep(attempts: int, online_config: RocksetOnlineStoreConfig): + """ + Sleep for the needed amount of time based on the number of request attempts. + + Args: + backoff: The amount of time we will sleep for + max_backoff: The max amount of time we should ever backoff for. + rate_limited: Whether this method is being called as part of a rate limited request. + """ + + default_backoff = online_config.initial_request_backoff_secs + + # Full jitter, exponential backoff. + backoff = random.uniform( + default_backoff, + min(default_backoff << attempts, online_config.max_request_backoff_secs), + ) + time.sleep(backoff) diff --git a/sdk/python/feast/infra/online_stores/snowflake.py b/sdk/python/feast/infra/online_stores/snowflake.py index c4474dff38d..c1a03a2862c 100644 --- a/sdk/python/feast/infra/online_stores/snowflake.py +++ b/sdk/python/feast/infra/online_stores/snowflake.py @@ -13,8 +13,8 @@ from feast.infra.key_encoding_utils import serialize_entity_key from feast.infra.online_stores.online_store import OnlineStore from feast.infra.utils.snowflake.snowflake_utils import ( + GetSnowflakeConnection, execute_snowflake_statement, - get_snowflake_conn, get_snowflake_online_store_path, write_pandas_binary, ) @@ -29,13 +29,13 @@ class SnowflakeOnlineStoreConfig(FeastConfigBaseModel): """Online store config for Snowflake""" type: Literal["snowflake.online"] = "snowflake.online" - """ Online store type selector""" + """ Online store type selector """ config_path: Optional[str] = os.path.expanduser("~/.snowsql/config") """ Snowflake config path -- absolute path required (Can't use ~)""" account: Optional[str] = None - """ Snowflake deployment identifier -- drop .snowflakecomputing.com""" + """ Snowflake deployment identifier -- drop .snowflakecomputing.com """ user: Optional[str] = None """ Snowflake user name """ @@ -44,7 +44,7 @@ class SnowflakeOnlineStoreConfig(FeastConfigBaseModel): """ Snowflake password """ role: Optional[str] = None - """ Snowflake role name""" + """ Snowflake role name """ warehouse: Optional[str] = None """ Snowflake warehouse name """ @@ -114,7 +114,7 @@ def online_write_batch( # This combines both the data upload plus the overwrite in the same transaction online_path = get_snowflake_online_store_path(config, table) - with get_snowflake_conn(config.online_store, autocommit=False) as conn: + with GetSnowflakeConnection(config.online_store, autocommit=False) as conn: write_pandas_binary( conn, agg_df, @@ -178,7 +178,7 @@ def online_read( ) online_path = get_snowflake_online_store_path(config, table) - with get_snowflake_conn(config.online_store) as conn: + with GetSnowflakeConnection(config.online_store) as conn: query = f""" SELECT "entity_key", "feature_name", "value", "event_ts" @@ -220,7 +220,7 @@ def update( ): assert isinstance(config.online_store, SnowflakeOnlineStoreConfig) - with get_snowflake_conn(config.online_store) as conn: + with GetSnowflakeConnection(config.online_store) as conn: for table in tables_to_keep: online_path = get_snowflake_online_store_path(config, table) query = f""" @@ -248,7 +248,7 @@ def teardown( ): assert isinstance(config.online_store, SnowflakeOnlineStoreConfig) - with get_snowflake_conn(config.online_store) as conn: + with GetSnowflakeConnection(config.online_store) as conn: for table in tables: online_path = get_snowflake_online_store_path(config, table) query = f'DROP TABLE IF EXISTS {online_path}."[online-transient] {config.project}_{table.name}"' diff --git a/sdk/python/feast/infra/registry/proto_registry_utils.py b/sdk/python/feast/infra/registry/proto_registry_utils.py index 4dbc95d2a5a..2a275703dbb 100644 --- a/sdk/python/feast/infra/registry/proto_registry_utils.py +++ b/sdk/python/feast/infra/registry/proto_registry_utils.py @@ -1,5 +1,7 @@ -from typing import List +import uuid +from typing import List, Optional +from feast import usage from feast.data_source import DataSource from feast.entity import Entity from feast.errors import ( @@ -7,7 +9,6 @@ EntityNotFoundException, FeatureServiceNotFoundException, FeatureViewNotFoundException, - OnDemandFeatureViewNotFoundException, SavedDatasetNotFound, ValidationReferenceNotFound, ) @@ -15,12 +16,32 @@ from feast.feature_view import FeatureView from feast.on_demand_feature_view import OnDemandFeatureView from feast.project_metadata import ProjectMetadata +from feast.protos.feast.core.Registry_pb2 import ProjectMetadata as ProjectMetadataProto from feast.protos.feast.core.Registry_pb2 import Registry as RegistryProto from feast.request_feature_view import RequestFeatureView from feast.saved_dataset import SavedDataset, ValidationReference from feast.stream_feature_view import StreamFeatureView +def init_project_metadata(cached_registry_proto: RegistryProto, project: str): + new_project_uuid = f"{uuid.uuid4()}" + usage.set_current_project_uuid(new_project_uuid) + cached_registry_proto.project_metadata.append( + ProjectMetadata(project_name=project, project_uuid=new_project_uuid).to_proto() + ) + + +def get_project_metadata( + registry_proto: Optional[RegistryProto], project: str +) -> Optional[ProjectMetadataProto]: + if not registry_proto: + return None + for pm in registry_proto.project_metadata: + if pm.project == project: + return pm + return None + + def get_feature_service( registry_proto: RegistryProto, name: str, project: str ) -> FeatureService: @@ -76,7 +97,7 @@ def get_on_demand_feature_view( and on_demand_feature_view.spec.name == name ): return OnDemandFeatureView.from_proto(on_demand_feature_view) - raise OnDemandFeatureViewNotFoundException(name, project=project) + raise FeatureViewNotFoundException(name, project=project) def get_data_source( @@ -116,10 +137,6 @@ def get_validation_reference( raise ValidationReferenceNotFound(name, project=project) -def list_validation_references(registry_proto: RegistryProto): - return registry_proto.validation_references - - def list_feature_services( registry_proto: RegistryProto, project: str, allow_cache: bool = False ) -> List[FeatureService]: @@ -193,13 +210,25 @@ def list_data_sources(registry_proto: RegistryProto, project: str) -> List[DataS def list_saved_datasets( - registry_proto: RegistryProto, project: str, allow_cache: bool = False + registry_proto: RegistryProto, project: str ) -> List[SavedDataset]: - return [ - SavedDataset.from_proto(saved_dataset) - for saved_dataset in registry_proto.saved_datasets - if saved_dataset.spec.project == project - ] + saved_datasets = [] + for saved_dataset in registry_proto.saved_datasets: + if saved_dataset.project == project: + saved_datasets.append(SavedDataset.from_proto(saved_dataset)) + return saved_datasets + + +def list_validation_references( + registry_proto: RegistryProto, project: str +) -> List[ValidationReference]: + validation_references = [] + for validation_reference in registry_proto.validation_references: + if validation_reference.project == project: + validation_references.append( + ValidationReference.from_proto(validation_reference) + ) + return validation_references def list_project_metadata( diff --git a/sdk/python/feast/infra/registry/registry.py b/sdk/python/feast/infra/registry/registry.py index 3aee7e12f6d..d2cf6a54ec0 100644 --- a/sdk/python/feast/infra/registry/registry.py +++ b/sdk/python/feast/infra/registry/registry.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. import logging -import uuid from datetime import datetime, timedelta from enum import Enum from pathlib import Path @@ -44,7 +43,6 @@ from feast.infra.registry.registry_store import NoopRegistryStore from feast.on_demand_feature_view import OnDemandFeatureView from feast.project_metadata import ProjectMetadata -from feast.protos.feast.core.Registry_pb2 import ProjectMetadata as ProjectMetadataProto from feast.protos.feast.core.Registry_pb2 import Registry as RegistryProto from feast.repo_config import RegistryConfig from feast.repo_contents import RepoContents @@ -143,25 +141,6 @@ def get_registry_store_class_from_scheme(registry_path: str): return get_registry_store_class_from_type(registry_store_type) -def _get_project_metadata( - registry_proto: Optional[RegistryProto], project: str -) -> Optional[ProjectMetadataProto]: - if not registry_proto: - return None - for pm in registry_proto.project_metadata: - if pm.project == project: - return pm - return None - - -def _init_project_metadata(cached_registry_proto: RegistryProto, project: str): - new_project_uuid = f"{uuid.uuid4()}" - usage.set_current_project_uuid(new_project_uuid) - cached_registry_proto.project_metadata.append( - ProjectMetadata(project_name=project, project_uuid=new_project_uuid).to_proto() - ) - - class Registry(BaseRegistry): def apply_user_metadata( self, @@ -184,19 +163,29 @@ def get_user_metadata( cached_registry_proto_ttl: timedelta def __new__( - cls, registry_config: Optional[RegistryConfig], repo_path: Optional[Path] + cls, + project: str, + registry_config: Optional[RegistryConfig], + repo_path: Optional[Path], ): # We override __new__ so that we can inspect registry_config and create a SqlRegistry without callers # needing to make any changes. if registry_config and registry_config.registry_type == "sql": from feast.infra.registry.sql import SqlRegistry - return SqlRegistry(registry_config, repo_path) + return SqlRegistry(registry_config, project, repo_path) + elif registry_config and registry_config.registry_type == "snowflake.registry": + from feast.infra.registry.snowflake import SnowflakeRegistry + + return SnowflakeRegistry(registry_config, project, repo_path) else: return super(Registry, cls).__new__(cls) def __init__( - self, registry_config: Optional[RegistryConfig], repo_path: Optional[Path] + self, + project: str, + registry_config: Optional[RegistryConfig], + repo_path: Optional[Path], ): """ Create the Registry object. @@ -225,7 +214,7 @@ def __init__( ) def clone(self) -> "Registry": - new_registry = Registry(None, None) + new_registry = Registry("project", None, None) new_registry.cached_registry_proto_ttl = timedelta(seconds=0) new_registry.cached_registry_proto = ( self.cached_registry_proto.__deepcopy__() @@ -243,7 +232,7 @@ def _initialize_registry(self, project: str): except FileNotFoundError: registry_proto = RegistryProto() registry_proto.registry_schema_version = REGISTRY_SCHEMA_VERSION - _init_project_metadata(registry_proto, project) + proto_registry_utils.init_project_metadata(registry_proto, project) self._registry_store.update_registry_proto(registry_proto) def update_infra(self, infra: Infra, project: str, commit: bool = True): @@ -746,7 +735,7 @@ def list_validation_references( registry_proto = self._get_registry_proto( project=project, allow_cache=allow_cache ) - return proto_registry_utils.list_validation_references(registry_proto) + return proto_registry_utils.list_validation_references(registry_proto, project) def delete_validation_reference(self, name: str, project: str, commit: bool = True): registry_proto = self._prepare_registry_for_changes(project) @@ -791,7 +780,12 @@ def _prepare_registry_for_changes(self, project: str): """Prepares the Registry for changes by refreshing the cache if necessary.""" try: self._get_registry_proto(project=project, allow_cache=True) - if _get_project_metadata(self.cached_registry_proto, project) is None: + if ( + proto_registry_utils.get_project_metadata( + self.cached_registry_proto, project + ) + is None + ): # Project metadata not initialized yet. Try pulling without cache self._get_registry_proto(project=project, allow_cache=False) except FileNotFoundError: @@ -802,8 +796,15 @@ def _prepare_registry_for_changes(self, project: str): # Initialize project metadata if needed assert self.cached_registry_proto - if _get_project_metadata(self.cached_registry_proto, project) is None: - _init_project_metadata(self.cached_registry_proto, project) + if ( + proto_registry_utils.get_project_metadata( + self.cached_registry_proto, project + ) + is None + ): + proto_registry_utils.init_project_metadata( + self.cached_registry_proto, project + ) self.commit() return self.cached_registry_proto @@ -836,7 +837,7 @@ def _get_registry_proto( ) if project: - old_project_metadata = _get_project_metadata( + old_project_metadata = proto_registry_utils.get_project_metadata( registry_proto=self.cached_registry_proto, project=project ) @@ -854,13 +855,13 @@ def _get_registry_proto( if not project: return registry_proto - project_metadata = _get_project_metadata( + project_metadata = proto_registry_utils.get_project_metadata( registry_proto=registry_proto, project=project ) if project_metadata: usage.set_current_project_uuid(project_metadata.project_uuid) else: - _init_project_metadata(registry_proto, project) + proto_registry_utils.init_project_metadata(registry_proto, project) self.commit() return registry_proto diff --git a/sdk/python/feast/infra/registry/snowflake.py b/sdk/python/feast/infra/registry/snowflake.py new file mode 100644 index 00000000000..12682bdca2f --- /dev/null +++ b/sdk/python/feast/infra/registry/snowflake.py @@ -0,0 +1,1096 @@ +import os +import uuid +from binascii import hexlify +from datetime import datetime, timedelta +from enum import Enum +from threading import Lock +from typing import Any, Callable, List, Optional, Set, Union + +from pydantic import Field, StrictStr +from pydantic.schema import Literal + +import feast +from feast import usage +from feast.base_feature_view import BaseFeatureView +from feast.data_source import DataSource +from feast.entity import Entity +from feast.errors import ( + DataSourceObjectNotFoundException, + EntityNotFoundException, + FeatureServiceNotFoundException, + FeatureViewNotFoundException, + SavedDatasetNotFound, + ValidationReferenceNotFound, +) +from feast.feature_service import FeatureService +from feast.feature_view import FeatureView +from feast.infra.infra_object import Infra +from feast.infra.registry import proto_registry_utils +from feast.infra.registry.base_registry import BaseRegistry +from feast.infra.utils.snowflake.snowflake_utils import ( + GetSnowflakeConnection, + execute_snowflake_statement, +) +from feast.on_demand_feature_view import OnDemandFeatureView +from feast.project_metadata import ProjectMetadata +from feast.protos.feast.core.DataSource_pb2 import DataSource as DataSourceProto +from feast.protos.feast.core.Entity_pb2 import Entity as EntityProto +from feast.protos.feast.core.FeatureService_pb2 import ( + FeatureService as FeatureServiceProto, +) +from feast.protos.feast.core.FeatureView_pb2 import FeatureView as FeatureViewProto +from feast.protos.feast.core.InfraObject_pb2 import Infra as InfraProto +from feast.protos.feast.core.OnDemandFeatureView_pb2 import ( + OnDemandFeatureView as OnDemandFeatureViewProto, +) +from feast.protos.feast.core.Registry_pb2 import Registry as RegistryProto +from feast.protos.feast.core.RequestFeatureView_pb2 import ( + RequestFeatureView as RequestFeatureViewProto, +) +from feast.protos.feast.core.SavedDataset_pb2 import SavedDataset as SavedDatasetProto +from feast.protos.feast.core.StreamFeatureView_pb2 import ( + StreamFeatureView as StreamFeatureViewProto, +) +from feast.protos.feast.core.ValidationProfile_pb2 import ( + ValidationReference as ValidationReferenceProto, +) +from feast.repo_config import RegistryConfig +from feast.request_feature_view import RequestFeatureView +from feast.saved_dataset import SavedDataset, ValidationReference +from feast.stream_feature_view import StreamFeatureView + + +class FeastMetadataKeys(Enum): + LAST_UPDATED_TIMESTAMP = "last_updated_timestamp" + PROJECT_UUID = "project_uuid" + + +class SnowflakeRegistryConfig(RegistryConfig): + """Registry config for Snowflake""" + + registry_type: Literal["snowflake.registry"] = "snowflake.registry" + """ Registry type selector """ + + type: Literal["snowflake.registry"] = "snowflake.registry" + """ Registry type selector """ + + config_path: Optional[str] = os.path.expanduser("~/.snowsql/config") + """ Snowflake config path -- absolute path required (Cant use ~) """ + + account: Optional[str] = None + """ Snowflake deployment identifier -- drop .snowflakecomputing.com """ + + user: Optional[str] = None + """ Snowflake user name """ + + password: Optional[str] = None + """ Snowflake password """ + + role: Optional[str] = None + """ Snowflake role name """ + + warehouse: Optional[str] = None + """ Snowflake warehouse name """ + + authenticator: Optional[str] = None + """ Snowflake authenticator name """ + + database: StrictStr + """ Snowflake database name """ + + schema_: Optional[str] = Field("PUBLIC", alias="schema") + """ Snowflake schema name """ + + class Config: + allow_population_by_field_name = True + + +class SnowflakeRegistry(BaseRegistry): + def __init__( + self, + registry_config, + project: str, + repo_path, + ): + assert registry_config is not None and isinstance( + registry_config, SnowflakeRegistryConfig + ), "SnowflakeRegistry needs a valid registry_config, a path does not work" + + self.registry_config = registry_config + self.registry_path = ( + f'"{self.registry_config.database}"."{self.registry_config.schema_}"' + ) + + with GetSnowflakeConnection(self.registry_config) as conn: + sql_function_file = f"{os.path.dirname(feast.__file__)}/infra/utils/snowflake/registry/snowflake_table_creation.sql" + with open(sql_function_file, "r") as file: + sqlFile = file.read() + + sqlCommands = sqlFile.split(";") + for command in sqlCommands: + query = command.replace("REGISTRY_PATH", f"{self.registry_path}") + execute_snowflake_statement(conn, query) + + self.cached_registry_proto = self.proto() + proto_registry_utils.init_project_metadata(self.cached_registry_proto, project) + self.cached_registry_proto_created = datetime.utcnow() + self._refresh_lock = Lock() + self.cached_registry_proto_ttl = timedelta( + seconds=registry_config.cache_ttl_seconds + if registry_config.cache_ttl_seconds is not None + else 0 + ) + self.project = project + + def refresh(self, project: Optional[str] = None): + if project: + project_metadata = proto_registry_utils.get_project_metadata( + registry_proto=self.cached_registry_proto, project=project + ) + if project_metadata: + usage.set_current_project_uuid(project_metadata.project_uuid) + else: + proto_registry_utils.init_project_metadata( + self.cached_registry_proto, project + ) + self.cached_registry_proto = self.proto() + self.cached_registry_proto_created = datetime.utcnow() + + def _refresh_cached_registry_if_necessary(self): + with self._refresh_lock: + expired = ( + self.cached_registry_proto is None + or self.cached_registry_proto_created is None + ) or ( + self.cached_registry_proto_ttl.total_seconds() + > 0 # 0 ttl means infinity + and ( + datetime.utcnow() + > ( + self.cached_registry_proto_created + + self.cached_registry_proto_ttl + ) + ) + ) + + if expired: + self.refresh() + + def teardown(self): + with GetSnowflakeConnection(self.registry_config) as conn: + sql_function_file = f"{os.path.dirname(feast.__file__)}/infra/utils/snowflake/registry/snowflake_table_deletion.sql" + with open(sql_function_file, "r") as file: + sqlFile = file.read() + + sqlCommands = sqlFile.split(";") + for command in sqlCommands: + query = command.replace("REGISTRY_PATH", f"{self.registry_path}") + execute_snowflake_statement(conn, query) + + # apply operations + def apply_data_source( + self, data_source: DataSource, project: str, commit: bool = True + ): + return self._apply_object( + "DATA_SOURCES", + project, + "DATA_SOURCE_NAME", + data_source, + "DATA_SOURCE_PROTO", + ) + + def apply_entity(self, entity: Entity, project: str, commit: bool = True): + return self._apply_object( + "ENTITIES", project, "ENTITY_NAME", entity, "ENTITY_PROTO" + ) + + def apply_feature_service( + self, feature_service: FeatureService, project: str, commit: bool = True + ): + return self._apply_object( + "FEATURE_SERVICES", + project, + "FEATURE_SERVICE_NAME", + feature_service, + "FEATURE_SERVICE_PROTO", + ) + + def apply_feature_view( + self, feature_view: BaseFeatureView, project: str, commit: bool = True + ): + fv_table_str = self._infer_fv_table(feature_view) + fv_column_name = fv_table_str[:-1] + return self._apply_object( + fv_table_str, + project, + f"{fv_column_name}_NAME", + feature_view, + f"{fv_column_name}_PROTO", + ) + + def apply_saved_dataset( + self, + saved_dataset: SavedDataset, + project: str, + commit: bool = True, + ): + return self._apply_object( + "SAVED_DATASETS", + project, + "SAVED_DATASET_NAME", + saved_dataset, + "SAVED_DATASET_PROTO", + ) + + def apply_validation_reference( + self, + validation_reference: ValidationReference, + project: str, + commit: bool = True, + ): + return self._apply_object( + "VALIDATION_REFERENCES", + project, + "VALIDATION_REFERENCE_NAME", + validation_reference, + "VALIDATION_REFERENCE_PROTO", + ) + + def update_infra(self, infra: Infra, project: str, commit: bool = True): + self._apply_object( + "MANAGED_INFRA", + project, + "INFRA_NAME", + infra, + "INFRA_PROTO", + name="infra_obj", + ) + + def _apply_object( + self, + table: str, + project: str, + id_field_name: str, + obj: Any, + proto_field_name: str, + name: Optional[str] = None, + ): + self._maybe_init_project_metadata(project) + + name = name or (obj.name if hasattr(obj, "name") else None) + assert name, f"name needs to be provided for {obj}" + + update_datetime = datetime.utcnow() + if hasattr(obj, "last_updated_timestamp"): + obj.last_updated_timestamp = update_datetime + + with GetSnowflakeConnection(self.registry_config) as conn: + query = f""" + SELECT + project_id + FROM + {self.registry_path}."{table}" + WHERE + project_id = '{project}' + AND {id_field_name.lower()} = '{name}' + LIMIT 1 + """ + df = execute_snowflake_statement(conn, query).fetch_pandas_all() + + if not df.empty: + proto = hexlify(obj.to_proto().SerializeToString()).__str__()[1:] + query = f""" + UPDATE {self.registry_path}."{table}" + SET + {proto_field_name} = TO_BINARY({proto}), + last_updated_timestamp = CURRENT_TIMESTAMP() + WHERE + {id_field_name.lower()} = '{name}' + """ + execute_snowflake_statement(conn, query) + + else: + obj_proto = obj.to_proto() + + if hasattr(obj_proto, "meta") and hasattr( + obj_proto.meta, "created_timestamp" + ): + obj_proto.meta.created_timestamp.FromDatetime(update_datetime) + + proto = hexlify(obj_proto.SerializeToString()).__str__()[1:] + if table == "FEATURE_VIEWS": + query = f""" + INSERT INTO {self.registry_path}."{table}" + VALUES + ('{name}', '{project}', CURRENT_TIMESTAMP(), TO_BINARY({proto}), '', '') + """ + elif "_FEATURE_VIEWS" in table: + query = f""" + INSERT INTO {self.registry_path}."{table}" + VALUES + ('{name}', '{project}', CURRENT_TIMESTAMP(), TO_BINARY({proto}), '') + """ + else: + query = f""" + INSERT INTO {self.registry_path}."{table}" + VALUES + ('{name}', '{project}', CURRENT_TIMESTAMP(), TO_BINARY({proto})) + """ + execute_snowflake_statement(conn, query) + + self._set_last_updated_metadata(update_datetime, project) + + # delete operations + def delete_data_source(self, name: str, project: str, commit: bool = True): + return self._delete_object( + "DATA_SOURCES", + name, + project, + "DATA_SOURCE_NAME", + DataSourceObjectNotFoundException, + ) + + def delete_entity(self, name: str, project: str, commit: bool = True): + return self._delete_object( + "ENTITIES", name, project, "ENTITY_NAME", EntityNotFoundException + ) + + def delete_feature_service(self, name: str, project: str, commit: bool = True): + return self._delete_object( + "FEATURE_SERVICES", + name, + project, + "FEATURE_SERVICE_NAME", + FeatureServiceNotFoundException, + ) + + # can you have featureviews with the same name + def delete_feature_view(self, name: str, project: str, commit: bool = True): + deleted_count = 0 + for table in { + "FEATURE_VIEWS", + "REQUEST_FEATURE_VIEWS", + "ON_DEMAND_FEATURE_VIEWS", + "STREAM_FEATURE_VIEWS", + }: + deleted_count += self._delete_object( + table, name, project, "FEATURE_VIEW_NAME", None + ) + if deleted_count == 0: + raise FeatureViewNotFoundException(name, project) + + def delete_saved_dataset(self, name: str, project: str, allow_cache: bool = False): + self._delete_object( + "SAVED_DATASETS", + name, + project, + "SAVED_DATASET_NAME", + SavedDatasetNotFound, + ) + + def delete_validation_reference(self, name: str, project: str, commit: bool = True): + self._delete_object( + "VALIDATION_REFERENCES", + name, + project, + "VALIDATION_REFERENCE_NAME", + ValidationReferenceNotFound, + ) + + def _delete_object( + self, + table: str, + name: str, + project: str, + id_field_name: str, + not_found_exception: Optional[Callable], + ): + with GetSnowflakeConnection(self.registry_config) as conn: + query = f""" + DELETE FROM {self.registry_path}."{table}" + WHERE + project_id = '{project}' + AND {id_field_name.lower()} = '{name}' + """ + cursor = execute_snowflake_statement(conn, query) + + if cursor.rowcount < 1 and not_found_exception: + raise not_found_exception(name, project) + self._set_last_updated_metadata(datetime.utcnow(), project) + + return cursor.rowcount + + # get operations + def get_data_source( + self, name: str, project: str, allow_cache: bool = False + ) -> DataSource: + if allow_cache: + self._refresh_cached_registry_if_necessary() + return proto_registry_utils.get_data_source( + self.cached_registry_proto, name, project + ) + return self._get_object( + "DATA_SOURCES", + name, + project, + DataSourceProto, + DataSource, + "DATA_SOURCE_NAME", + "DATA_SOURCE_PROTO", + DataSourceObjectNotFoundException, + ) + + def get_entity(self, name: str, project: str, allow_cache: bool = False) -> Entity: + if allow_cache: + self._refresh_cached_registry_if_necessary() + return proto_registry_utils.get_entity( + self.cached_registry_proto, name, project + ) + return self._get_object( + "ENTITIES", + name, + project, + EntityProto, + Entity, + "ENTITY_NAME", + "ENTITY_PROTO", + EntityNotFoundException, + ) + + def get_feature_service( + self, name: str, project: str, allow_cache: bool = False + ) -> FeatureService: + if allow_cache: + self._refresh_cached_registry_if_necessary() + return proto_registry_utils.get_feature_service( + self.cached_registry_proto, name, project + ) + return self._get_object( + "FEATURE_SERVICES", + name, + project, + FeatureServiceProto, + FeatureService, + "FEATURE_SERVICE_NAME", + "FEATURE_SERVICE_PROTO", + FeatureServiceNotFoundException, + ) + + def get_feature_view( + self, name: str, project: str, allow_cache: bool = False + ) -> FeatureView: + if allow_cache: + self._refresh_cached_registry_if_necessary() + return proto_registry_utils.get_feature_view( + self.cached_registry_proto, name, project + ) + return self._get_object( + "FEATURE_VIEWS", + name, + project, + FeatureViewProto, + FeatureView, + "FEATURE_VIEW_NAME", + "FEATURE_VIEW_PROTO", + FeatureViewNotFoundException, + ) + + def get_infra(self, project: str, allow_cache: bool = False) -> Infra: + infra_object = self._get_object( + "MANAGED_INFRA", + "infra_obj", + project, + InfraProto, + Infra, + "INFRA_NAME", + "INFRA_PROTO", + None, + ) + infra_object = infra_object or InfraProto() + return Infra.from_proto(infra_object) + + def get_on_demand_feature_view( + self, name: str, project: str, allow_cache: bool = False + ) -> OnDemandFeatureView: + if allow_cache: + self._refresh_cached_registry_if_necessary() + return proto_registry_utils.get_on_demand_feature_view( + self.cached_registry_proto, name, project + ) + return self._get_object( + "ON_DEMAND_FEATURE_VIEWS", + name, + project, + OnDemandFeatureViewProto, + OnDemandFeatureView, + "ON_DEMAND_FEATURE_VIEW_NAME", + "ON_DEMAND_FEATURE_VIEW_PROTO", + FeatureViewNotFoundException, + ) + + def get_request_feature_view( + self, name: str, project: str, allow_cache: bool = False + ) -> RequestFeatureView: + if allow_cache: + self._refresh_cached_registry_if_necessary() + return proto_registry_utils.get_request_feature_view( + self.cached_registry_proto, name, project + ) + return self._get_object( + "REQUEST_FEATURE_VIEWS", + name, + project, + RequestFeatureViewProto, + RequestFeatureView, + "REQUEST_FEATURE_VIEW_NAME", + "REQUEST_FEATURE_VIEW_PROTO", + FeatureViewNotFoundException, + ) + + def get_saved_dataset( + self, name: str, project: str, allow_cache: bool = False + ) -> SavedDataset: + if allow_cache: + self._refresh_cached_registry_if_necessary() + return proto_registry_utils.get_saved_dataset( + self.cached_registry_proto, name, project + ) + return self._get_object( + "SAVED_DATASETS", + name, + project, + SavedDatasetProto, + SavedDataset, + "SAVED_DATASET_NAME", + "SAVED_DATASET_PROTO", + SavedDatasetNotFound, + ) + + def get_stream_feature_view( + self, name: str, project: str, allow_cache: bool = False + ): + if allow_cache: + self._refresh_cached_registry_if_necessary() + return proto_registry_utils.get_stream_feature_view( + self.cached_registry_proto, name, project + ) + return self._get_object( + "STREAM_FEATURE_VIEWS", + name, + project, + StreamFeatureViewProto, + StreamFeatureView, + "STREAM_FEATURE_VIEW_NAME", + "STREAM_FEATURE_VIEW_PROTO", + FeatureViewNotFoundException, + ) + + def get_validation_reference( + self, name: str, project: str, allow_cache: bool = False + ) -> ValidationReference: + if allow_cache: + self._refresh_cached_registry_if_necessary() + return proto_registry_utils.get_validation_reference( + self.cached_registry_proto, name, project + ) + return self._get_object( + "VALIDATION_REFERENCES", + name, + project, + ValidationReferenceProto, + ValidationReference, + "VALIDATION_REFERENCE_NAME", + "VALIDATION_REFERENCE_PROTO", + ValidationReferenceNotFound, + ) + + def _get_object( + self, + table: str, + name: str, + project: str, + proto_class: Any, + python_class: Any, + id_field_name: str, + proto_field_name: str, + not_found_exception: Optional[Callable], + ): + self._maybe_init_project_metadata(project) + with GetSnowflakeConnection(self.registry_config) as conn: + query = f""" + SELECT + {proto_field_name} + FROM + {self.registry_path}."{table}" + WHERE + project_id = '{project}' + AND {id_field_name.lower()} = '{name}' + LIMIT 1 + """ + df = execute_snowflake_statement(conn, query).fetch_pandas_all() + + if not df.empty: + _proto = proto_class.FromString(df.squeeze()) + return python_class.from_proto(_proto) + elif not_found_exception: + raise not_found_exception(name, project) + else: + return None + + # list operations + def list_data_sources( + self, project: str, allow_cache: bool = False + ) -> List[DataSource]: + if allow_cache: + self._refresh_cached_registry_if_necessary() + return proto_registry_utils.list_data_sources( + self.cached_registry_proto, project + ) + return self._list_objects( + "DATA_SOURCES", project, DataSourceProto, DataSource, "DATA_SOURCE_PROTO" + ) + + def list_entities(self, project: str, allow_cache: bool = False) -> List[Entity]: + if allow_cache: + self._refresh_cached_registry_if_necessary() + return proto_registry_utils.list_entities( + self.cached_registry_proto, project + ) + return self._list_objects( + "ENTITIES", project, EntityProto, Entity, "ENTITY_PROTO" + ) + + def list_feature_services( + self, project: str, allow_cache: bool = False + ) -> List[FeatureService]: + if allow_cache: + self._refresh_cached_registry_if_necessary() + return proto_registry_utils.list_feature_services( + self.cached_registry_proto, project + ) + return self._list_objects( + "FEATURE_SERVICES", + project, + FeatureServiceProto, + FeatureService, + "FEATURE_SERVICE_PROTO", + ) + + def list_feature_views( + self, project: str, allow_cache: bool = False + ) -> List[FeatureView]: + if allow_cache: + self._refresh_cached_registry_if_necessary() + return proto_registry_utils.list_feature_views( + self.cached_registry_proto, project + ) + return self._list_objects( + "FEATURE_VIEWS", + project, + FeatureViewProto, + FeatureView, + "FEATURE_VIEW_PROTO", + ) + + def list_on_demand_feature_views( + self, project: str, allow_cache: bool = False + ) -> List[OnDemandFeatureView]: + if allow_cache: + self._refresh_cached_registry_if_necessary() + return proto_registry_utils.list_on_demand_feature_views( + self.cached_registry_proto, project + ) + return self._list_objects( + "ON_DEMAND_FEATURE_VIEWS", + project, + OnDemandFeatureViewProto, + OnDemandFeatureView, + "ON_DEMAND_FEATURE_VIEW_PROTO", + ) + + def list_request_feature_views( + self, project: str, allow_cache: bool = False + ) -> List[RequestFeatureView]: + if allow_cache: + self._refresh_cached_registry_if_necessary() + return proto_registry_utils.list_request_feature_views( + self.cached_registry_proto, project + ) + return self._list_objects( + "REQUEST_FEATURE_VIEWS", + project, + RequestFeatureViewProto, + RequestFeatureView, + "REQUEST_FEATURE_VIEW_PROTO", + ) + + def list_saved_datasets( + self, project: str, allow_cache: bool = False + ) -> List[SavedDataset]: + if allow_cache: + self._refresh_cached_registry_if_necessary() + return proto_registry_utils.list_saved_datasets( + self.cached_registry_proto, project + ) + return self._list_objects( + "SAVED_DATASETS", + project, + SavedDatasetProto, + SavedDataset, + "SAVED_DATASET_PROTO", + ) + + def list_stream_feature_views( + self, project: str, allow_cache: bool = False + ) -> List[StreamFeatureView]: + if allow_cache: + self._refresh_cached_registry_if_necessary() + return proto_registry_utils.list_stream_feature_views( + self.cached_registry_proto, project + ) + return self._list_objects( + "STREAM_FEATURE_VIEWS", + project, + StreamFeatureViewProto, + StreamFeatureView, + "STREAM_FEATURE_VIEW_PROTO", + ) + + def list_validation_references( + self, project: str, allow_cache: bool = False + ) -> List[ValidationReference]: + return self._list_objects( + "VALIDATION_REFERENCES", + project, + ValidationReferenceProto, + ValidationReference, + "VALIDATION_REFERENCE_PROTO", + ) + + def _list_objects( + self, + table: str, + project: str, + proto_class: Any, + python_class: Any, + proto_field_name: str, + ): + self._maybe_init_project_metadata(project) + with GetSnowflakeConnection(self.registry_config) as conn: + query = f""" + SELECT + {proto_field_name} + FROM + {self.registry_path}."{table}" + WHERE + project_id = '{project}' + """ + df = execute_snowflake_statement(conn, query).fetch_pandas_all() + + if not df.empty: + return [ + python_class.from_proto( + proto_class.FromString(row[1][proto_field_name]) + ) + for row in df.iterrows() + ] + return [] + + def apply_materialization( + self, + feature_view: FeatureView, + project: str, + start_date: datetime, + end_date: datetime, + commit: bool = True, + ): + fv_table_str = self._infer_fv_table(feature_view) + fv_column_name = fv_table_str[:-1] + python_class, proto_class = self._infer_fv_classes(feature_view) + + if python_class in {RequestFeatureView, OnDemandFeatureView}: + raise ValueError( + f"Cannot apply materialization for feature {feature_view.name} of type {python_class}" + ) + fv: Union[FeatureView, StreamFeatureView] = self._get_object( + fv_table_str, + feature_view.name, + project, + proto_class, + python_class, + f"{fv_column_name}_NAME", + f"{fv_column_name}_PROTO", + FeatureViewNotFoundException, + ) + fv.materialization_intervals.append((start_date, end_date)) + self._apply_object( + fv_table_str, + project, + f"{fv_column_name}_NAME", + fv, + f"{fv_column_name}_PROTO", + ) + + def list_project_metadata( + self, project: str, allow_cache: bool = False + ) -> List[ProjectMetadata]: + if allow_cache: + self._refresh_cached_registry_if_necessary() + return proto_registry_utils.list_project_metadata( + self.cached_registry_proto, project + ) + with GetSnowflakeConnection(self.registry_config) as conn: + query = f""" + SELECT + metadata_key, + metadata_value + FROM + {self.registry_path}."FEAST_METADATA" + WHERE + project_id = '{project}' + """ + df = execute_snowflake_statement(conn, query).fetch_pandas_all() + + if not df.empty: + project_metadata = ProjectMetadata(project_name=project) + for row in df.iterrows(): + if row[1]["METADATA_KEY"] == FeastMetadataKeys.PROJECT_UUID.value: + project_metadata.project_uuid = row[1]["METADATA_VALUE"] + break + # TODO(adchia): Add other project metadata in a structured way + return [project_metadata] + return [] + + def apply_user_metadata( + self, + project: str, + feature_view: BaseFeatureView, + metadata_bytes: Optional[bytes], + ): + fv_table_str = self._infer_fv_table(feature_view) + fv_column_name = fv_table_str[:-1].lower() + with GetSnowflakeConnection(self.registry_config) as conn: + query = f""" + SELECT + project_id + FROM + {self.registry_path}."{fv_table_str}" + WHERE + project_id = '{project}' + AND {fv_column_name}_name = '{feature_view.name}' + LIMIT 1 + """ + df = execute_snowflake_statement(conn, query).fetch_pandas_all() + + if not df.empty: + if metadata_bytes: + metadata_hex = hexlify(metadata_bytes).__str__()[1:] + else: + metadata_hex = "''" + query = f""" + UPDATE {self.registry_path}."{fv_table_str}" + SET + user_metadata = TO_BINARY({metadata_hex}), + last_updated_timestamp = CURRENT_TIMESTAMP() + WHERE + project_id = '{project}' + AND {fv_column_name}_name = '{feature_view.name}' + """ + execute_snowflake_statement(conn, query) + else: + raise FeatureViewNotFoundException(feature_view.name, project=project) + + def get_user_metadata( + self, project: str, feature_view: BaseFeatureView + ) -> Optional[bytes]: + fv_table_str = self._infer_fv_table(feature_view) + fv_column_name = fv_table_str[:-1].lower() + with GetSnowflakeConnection(self.registry_config) as conn: + query = f""" + SELECT + user_metadata + FROM + {self.registry_path}."{fv_table_str}" + WHERE + {fv_column_name}_name = '{feature_view.name}' + LIMIT 1 + """ + df = execute_snowflake_statement(conn, query).fetch_pandas_all() + + if not df.empty: + return df.squeeze() + else: + raise FeatureViewNotFoundException(feature_view.name, project=project) + + def proto(self) -> RegistryProto: + r = RegistryProto() + last_updated_timestamps = [] + projects = self._get_all_projects() + for project in projects: + for lister, registry_proto_field in [ + (self.list_entities, r.entities), + (self.list_feature_views, r.feature_views), + (self.list_data_sources, r.data_sources), + (self.list_on_demand_feature_views, r.on_demand_feature_views), + (self.list_request_feature_views, r.request_feature_views), + (self.list_stream_feature_views, r.stream_feature_views), + (self.list_feature_services, r.feature_services), + (self.list_saved_datasets, r.saved_datasets), + (self.list_validation_references, r.validation_references), + (self.list_project_metadata, r.project_metadata), + ]: + objs: List[Any] = lister(project) # type: ignore + if objs: + obj_protos = [obj.to_proto() for obj in objs] + for obj_proto in obj_protos: + if "spec" in obj_proto.DESCRIPTOR.fields_by_name: + obj_proto.spec.project = project + else: + obj_proto.project = project + registry_proto_field.extend(obj_protos) + + # This is suuuper jank. Because of https://github.com/feast-dev/feast/issues/2783, + # the registry proto only has a single infra field, which we're currently setting as the "last" project. + r.infra.CopyFrom(self.get_infra(project).to_proto()) + last_updated_timestamps.append(self._get_last_updated_metadata(project)) + + if last_updated_timestamps: + r.last_updated.FromDatetime(max(last_updated_timestamps)) + + return r + + def _get_all_projects(self) -> Set[str]: + projects = set() + + base_tables = [ + "DATA_SOURCES", + "ENTITIES", + "FEATURE_VIEWS", + "ON_DEMAND_FEATURE_VIEWS", + "REQUEST_FEATURE_VIEWS", + "STREAM_FEATURE_VIEWS", + ] + + with GetSnowflakeConnection(self.registry_config) as conn: + for table in base_tables: + query = ( + f'SELECT DISTINCT project_id FROM {self.registry_path}."{table}"' + ) + df = execute_snowflake_statement(conn, query).fetch_pandas_all() + + for row in df.iterrows(): + projects.add(row[1]["PROJECT_ID"]) + + return projects + + def _get_last_updated_metadata(self, project: str): + with GetSnowflakeConnection(self.registry_config) as conn: + query = f""" + SELECT + metadata_value + FROM + {self.registry_path}."FEAST_METADATA" + WHERE + project_id = '{project}' + AND metadata_key = '{FeastMetadataKeys.LAST_UPDATED_TIMESTAMP.value}' + LIMIT 1 + """ + df = execute_snowflake_statement(conn, query).fetch_pandas_all() + + if df.empty: + return None + + return datetime.utcfromtimestamp(int(df.squeeze())) + + def _infer_fv_classes(self, feature_view): + if isinstance(feature_view, StreamFeatureView): + python_class, proto_class = StreamFeatureView, StreamFeatureViewProto + elif isinstance(feature_view, FeatureView): + python_class, proto_class = FeatureView, FeatureViewProto + elif isinstance(feature_view, OnDemandFeatureView): + python_class, proto_class = OnDemandFeatureView, OnDemandFeatureViewProto + elif isinstance(feature_view, RequestFeatureView): + python_class, proto_class = RequestFeatureView, RequestFeatureViewProto + else: + raise ValueError(f"Unexpected feature view type: {type(feature_view)}") + return python_class, proto_class + + def _infer_fv_table(self, feature_view) -> str: + if isinstance(feature_view, StreamFeatureView): + table = "STREAM_FEATURE_VIEWS" + elif isinstance(feature_view, FeatureView): + table = "FEATURE_VIEWS" + elif isinstance(feature_view, OnDemandFeatureView): + table = "ON_DEMAND_FEATURE_VIEWS" + elif isinstance(feature_view, RequestFeatureView): + table = "REQUEST_FEATURE_VIEWS" + else: + raise ValueError(f"Unexpected feature view type: {type(feature_view)}") + return table + + def _maybe_init_project_metadata(self, project): + with GetSnowflakeConnection(self.registry_config) as conn: + query = f""" + SELECT + metadata_value + FROM + {self.registry_path}."FEAST_METADATA" + WHERE + project_id = '{project}' + AND metadata_key = '{FeastMetadataKeys.PROJECT_UUID.value}' + LIMIT 1 + """ + df = execute_snowflake_statement(conn, query).fetch_pandas_all() + + if not df.empty: + usage.set_current_project_uuid(df.squeeze()) + else: + new_project_uuid = f"{uuid.uuid4()}" + query = f""" + INSERT INTO {self.registry_path}."FEAST_METADATA" + VALUES + ('{project}', '{FeastMetadataKeys.PROJECT_UUID.value}', '{new_project_uuid}', CURRENT_TIMESTAMP()) + """ + execute_snowflake_statement(conn, query) + + usage.set_current_project_uuid(new_project_uuid) + + def _set_last_updated_metadata(self, last_updated: datetime, project: str): + with GetSnowflakeConnection(self.registry_config) as conn: + query = f""" + SELECT + project_id + FROM + {self.registry_path}."FEAST_METADATA" + WHERE + project_id = '{project}' + AND metadata_key = '{FeastMetadataKeys.LAST_UPDATED_TIMESTAMP.value}' + LIMIT 1 + """ + df = execute_snowflake_statement(conn, query).fetch_pandas_all() + + update_time = int(last_updated.timestamp()) + if not df.empty: + query = f""" + UPDATE {self.registry_path}."FEAST_METADATA" + SET + project_id = '{project}', + metadata_key = '{FeastMetadataKeys.LAST_UPDATED_TIMESTAMP.value}', + metadata_value = '{update_time}', + last_updated_timestamp = CURRENT_TIMESTAMP() + WHERE + project_id = '{project}' + AND metadata_key = '{FeastMetadataKeys.LAST_UPDATED_TIMESTAMP.value}' + """ + execute_snowflake_statement(conn, query) + + else: + query = f""" + INSERT INTO {self.registry_path}."FEAST_METADATA" + VALUES + ('{project}', '{FeastMetadataKeys.LAST_UPDATED_TIMESTAMP.value}', '{update_time}', CURRENT_TIMESTAMP()) + """ + execute_snowflake_statement(conn, query) + + def commit(self): + pass diff --git a/sdk/python/feast/infra/registry/sql.py b/sdk/python/feast/infra/registry/sql.py index 2326651b1c0..628b6d1e656 100644 --- a/sdk/python/feast/infra/registry/sql.py +++ b/sdk/python/feast/infra/registry/sql.py @@ -180,12 +180,16 @@ class FeastMetadataKeys(Enum): class SqlRegistry(BaseRegistry): def __init__( - self, registry_config: Optional[RegistryConfig], repo_path: Optional[Path] + self, + registry_config: Optional[RegistryConfig], + project: str, + repo_path: Optional[Path], ): assert registry_config is not None, "SqlRegistry needs a valid registry_config" self.engine: Engine = create_engine(registry_config.path, echo=False) metadata.create_all(self.engine) self.cached_registry_proto = self.proto() + proto_registry_utils.init_project_metadata(self.cached_registry_proto, project) self.cached_registry_proto_created = datetime.utcnow() self._refresh_lock = Lock() self.cached_registry_proto_ttl = timedelta( @@ -193,6 +197,7 @@ def __init__( if registry_config.cache_ttl_seconds is not None else 0 ) + self.project = project def teardown(self): for t in { @@ -210,6 +215,16 @@ def teardown(self): conn.execute(stmt) def refresh(self, project: Optional[str] = None): + if project: + project_metadata = proto_registry_utils.get_project_metadata( + registry_proto=self.cached_registry_proto, project=project + ) + if project_metadata: + usage.set_current_project_uuid(project_metadata.project_uuid) + else: + proto_registry_utils.init_project_metadata( + self.cached_registry_proto, project + ) self.cached_registry_proto = self.proto() self.cached_registry_proto_created = datetime.utcnow() @@ -414,7 +429,7 @@ def list_validation_references( if allow_cache: self._refresh_cached_registry_if_necessary() return proto_registry_utils.list_validation_references( - self.cached_registry_proto + self.cached_registry_proto, project ) return self._list_objects( table=validation_references, @@ -816,7 +831,13 @@ def proto(self) -> RegistryProto: ]: objs: List[Any] = lister(project) # type: ignore if objs: - registry_proto_field.extend([obj.to_proto() for obj in objs]) + obj_protos = [obj.to_proto() for obj in objs] + for obj_proto in obj_protos: + if "spec" in obj_proto.DESCRIPTOR.fields_by_name: + obj_proto.spec.project = project + else: + obj_proto.project = project + registry_proto_field.extend(obj_protos) # This is suuuper jank. Because of https://github.com/feast-dev/feast/issues/2783, # the registry proto only has a single infra field, which we're currently setting as the "last" project. diff --git a/sdk/python/feast/infra/utils/snowflake/registry/snowflake_table_creation.sql b/sdk/python/feast/infra/utils/snowflake/registry/snowflake_table_creation.sql new file mode 100644 index 00000000000..4b53d6bb3f6 --- /dev/null +++ b/sdk/python/feast/infra/utils/snowflake/registry/snowflake_table_creation.sql @@ -0,0 +1,92 @@ +CREATE TABLE IF NOT EXISTS REGISTRY_PATH."DATA_SOURCES" ( + data_source_name VARCHAR, + project_id VARCHAR, + last_updated_timestamp TIMESTAMP_LTZ NOT NULL, + data_source_proto BINARY NOT NULL, + PRIMARY KEY (data_source_name, project_id) +); + +CREATE TABLE IF NOT EXISTS REGISTRY_PATH."ENTITIES" ( + entity_name VARCHAR, + project_id VARCHAR, + last_updated_timestamp TIMESTAMP_LTZ NOT NULL, + entity_proto BINARY NOT NULL, + PRIMARY KEY (entity_name, project_id) +); + +CREATE TABLE IF NOT EXISTS REGISTRY_PATH."FEAST_METADATA" ( + project_id VARCHAR, + metadata_key VARCHAR, + metadata_value VARCHAR NOT NULL, + last_updated_timestamp TIMESTAMP_LTZ NOT NULL, + PRIMARY KEY (project_id, metadata_key) +); + +CREATE TABLE IF NOT EXISTS REGISTRY_PATH."FEATURE_SERVICES" ( + feature_service_name VARCHAR, + project_id VARCHAR, + last_updated_timestamp TIMESTAMP_LTZ NOT NULL, + feature_service_proto BINARY NOT NULL, + PRIMARY KEY (feature_service_name, project_id) +); + +CREATE TABLE IF NOT EXISTS REGISTRY_PATH."FEATURE_VIEWS" ( + feature_view_name VARCHAR, + project_id VARCHAR, + last_updated_timestamp TIMESTAMP_LTZ NOT NULL, + feature_view_proto BINARY NOT NULL, + materialized_intervals BINARY, + user_metadata BINARY, + PRIMARY KEY (feature_view_name, project_id) +); + +CREATE TABLE IF NOT EXISTS REGISTRY_PATH."MANAGED_INFRA" ( + infra_name VARCHAR, + project_id VARCHAR, + last_updated_timestamp TIMESTAMP_LTZ NOT NULL, + infra_proto BINARY NOT NULL, + PRIMARY KEY (infra_name, project_id) +); + +CREATE TABLE IF NOT EXISTS REGISTRY_PATH."ON_DEMAND_FEATURE_VIEWS" ( + on_demand_feature_view_name VARCHAR, + project_id VARCHAR, + last_updated_timestamp TIMESTAMP_LTZ NOT NULL, + on_demand_feature_view_proto BINARY NOT NULL, + user_metadata BINARY, + PRIMARY KEY (on_demand_feature_view_name, project_id) +); + +CREATE TABLE IF NOT EXISTS REGISTRY_PATH."REQUEST_FEATURE_VIEWS" ( + request_feature_view_name VARCHAR, + project_id VARCHAR, + last_updated_timestamp TIMESTAMP_LTZ NOT NULL, + request_feature_view_proto BINARY NOT NULL, + user_metadata BINARY, + PRIMARY KEY (request_feature_view_name, project_id) +); + +CREATE TABLE IF NOT EXISTS REGISTRY_PATH."SAVED_DATASETS" ( + saved_dataset_name VARCHAR, + project_id VARCHAR, + last_updated_timestamp TIMESTAMP_LTZ NOT NULL, + saved_dataset_proto BINARY NOT NULL, + PRIMARY KEY (saved_dataset_name, project_id) +); + +CREATE TABLE IF NOT EXISTS REGISTRY_PATH."STREAM_FEATURE_VIEWS" ( + stream_feature_view_name VARCHAR, + project_id VARCHAR, + last_updated_timestamp TIMESTAMP_LTZ NOT NULL, + stream_feature_view_proto BINARY NOT NULL, + user_metadata BINARY, + PRIMARY KEY (stream_feature_view_name, project_id) +); + +CREATE TABLE IF NOT EXISTS REGISTRY_PATH."VALIDATION_REFERENCES" ( + validation_reference_name VARCHAR, + project_id VARCHAR, + last_updated_timestamp TIMESTAMP_LTZ NOT NULL, + validation_reference_proto BINARY NOT NULL, + PRIMARY KEY (validation_reference_name, project_id) +) diff --git a/sdk/python/feast/infra/utils/snowflake/registry/snowflake_table_deletion.sql b/sdk/python/feast/infra/utils/snowflake/registry/snowflake_table_deletion.sql new file mode 100644 index 00000000000..7f5c1991eac --- /dev/null +++ b/sdk/python/feast/infra/utils/snowflake/registry/snowflake_table_deletion.sql @@ -0,0 +1,21 @@ +DROP TABLE IF EXISTS REGISTRY_PATH."DATA_SOURCES"; + +DROP TABLE IF EXISTS REGISTRY_PATH."ENTITIES"; + +DROP TABLE IF EXISTS REGISTRY_PATH."FEAST_METADATA"; + +DROP TABLE IF EXISTS REGISTRY_PATH."FEATURE_SERVICES"; + +DROP TABLE IF EXISTS REGISTRY_PATH."FEATURE_VIEWS"; + +DROP TABLE IF EXISTS REGISTRY_PATH."MANAGED_INFRA"; + +DROP TABLE IF EXISTS REGISTRY_PATH."ON_DEMAND_FEATURE_VIEWS"; + +DROP TABLE IF EXISTS REGISTRY_PATH."REQUEST_FEATURE_VIEWS"; + +DROP TABLE IF EXISTS REGISTRY_PATH."SAVED_DATASETS"; + +DROP TABLE IF EXISTS REGISTRY_PATH."STREAM_FEATURE_VIEWS"; + +DROP TABLE IF EXISTS REGISTRY_PATH."VALIDATION_REFERENCES" diff --git a/sdk/python/feast/infra/utils/snowflake/snowflake_utils.py b/sdk/python/feast/infra/utils/snowflake/snowflake_utils.py index a5d2b05d45d..a4cda89a6f6 100644 --- a/sdk/python/feast/infra/utils/snowflake/snowflake_utils.py +++ b/sdk/python/feast/infra/utils/snowflake/snowflake_utils.py @@ -39,6 +39,77 @@ getLogger("snowflake.connector.network").disabled = True logger = getLogger(__name__) +_cache = {} + + +class GetSnowflakeConnection: + def __init__(self, config: str, autocommit=True): + self.config = config + self.autocommit = autocommit + + def __enter__(self): + + assert self.config.type in [ + "snowflake.registry", + "snowflake.offline", + "snowflake.engine", + "snowflake.online", + ] + + if self.config.type not in _cache: + if self.config.type == "snowflake.registry": + config_header = "connections.feast_registry" + elif self.config.type == "snowflake.offline": + config_header = "connections.feast_offline_store" + if self.config.type == "snowflake.engine": + config_header = "connections.feast_batch_engine" + elif self.config.type == "snowflake.online": + config_header = "connections.feast_online_store" + + config_dict = dict(self.config) + + # read config file + config_reader = configparser.ConfigParser() + config_reader.read([config_dict["config_path"]]) + kwargs: Dict[str, Any] = {} + if config_reader.has_section(config_header): + kwargs = dict(config_reader[config_header]) + + kwargs.update((k, v) for k, v in config_dict.items() if v is not None) + + for k, v in kwargs.items(): + if k in ["role", "warehouse", "database", "schema_"]: + kwargs[k] = f'"{v}"' + + kwargs["schema"] = kwargs.pop("schema_") + + # https://docs.snowflake.com/en/user-guide/python-connector-example.html#using-key-pair-authentication-key-pair-rotation + # https://docs.snowflake.com/en/user-guide/key-pair-auth.html#configuring-key-pair-authentication + if "private_key" in kwargs: + kwargs["private_key"] = parse_private_key_path( + kwargs["private_key"], kwargs["private_key_passphrase"] + ) + + try: + _cache[self.config.type] = snowflake.connector.connect( + application="feast", + client_session_keep_alive=True, + autocommit=self.autocommit, + **kwargs, + ) + _cache[self.config.type].cursor().execute( + "ALTER SESSION SET TIMEZONE = 'UTC'", _is_internal=True + ) + + except KeyError as e: + raise SnowflakeIncompleteConfig(e) + + self.client = _cache[self.config.type] + return self.client + + def __exit__(self, exc_type, exc_val, exc_tb): + pass + def assert_snowflake_feature_names(feature_view: FeatureView) -> None: for feature in feature_view.features: @@ -57,54 +128,6 @@ def execute_snowflake_statement(conn: SnowflakeConnection, query) -> SnowflakeCu return cursor -def get_snowflake_conn(config, autocommit=True) -> SnowflakeConnection: - assert config.type in ["snowflake.offline", "snowflake.engine", "snowflake.online"] - - if config.type == "snowflake.offline": - config_header = "connections.feast_offline_store" - if config.type == "snowflake.engine": - config_header = "connections.feast_batch_engine" - elif config.type == "snowflake.online": - config_header = "connections.feast_online_store" - - config_dict = dict(config) - - # read config file - config_reader = configparser.ConfigParser() - config_reader.read([config_dict["config_path"]]) - kwargs: Dict[str, Any] = {} - if config_reader.has_section(config_header): - kwargs = dict(config_reader[config_header]) - - kwargs.update((k, v) for k, v in config_dict.items() if v is not None) - - for k, v in kwargs.items(): - if k in ["role", "warehouse", "database", "schema_"]: - kwargs[k] = f'"{v}"' - - kwargs["schema"] = kwargs.pop("schema_") - - # https://docs.snowflake.com/en/user-guide/python-connector-example.html#using-key-pair-authentication-key-pair-rotation - # https://docs.snowflake.com/en/user-guide/key-pair-auth.html#configuring-key-pair-authentication - if "private_key" in kwargs: - kwargs["private_key"] = parse_private_key_path( - kwargs["private_key"], kwargs["private_key_passphrase"] - ) - - try: - conn = snowflake.connector.connect( - application="feast", - autocommit=autocommit, - **kwargs, - ) - - conn.cursor().execute("ALTER SESSION SET TIMEZONE = 'UTC'", _is_internal=True) - - return conn - except KeyError as e: - raise SnowflakeIncompleteConfig(e) - - def get_snowflake_online_store_path( config: RepoConfig, feature_view: FeatureView, diff --git a/sdk/python/feast/repo_config.py b/sdk/python/feast/repo_config.py index 673d039ff0b..200f9d284ee 100644 --- a/sdk/python/feast/repo_config.py +++ b/sdk/python/feast/repo_config.py @@ -15,7 +15,7 @@ validator, ) from pydantic.error_wrappers import ErrorWrapper -from pydantic.typing import Dict, Optional, Union +from pydantic.typing import Dict, Optional from feast.errors import ( FeastFeatureServerTypeInvalidError, @@ -23,6 +23,8 @@ FeastOfflineStoreInvalidName, FeastOnlineStoreInvalidName, FeastProviderNotSetError, + FeastRegistryNotSetError, + FeastRegistryTypeInvalidError, ) from feast.importer import import_class from feast.usage import log_exceptions @@ -34,6 +36,12 @@ # These dict exists so that: # - existing values for the online store type in featurestore.yaml files continue to work in a backwards compatible way # - first party and third party implementations can use the same class loading code path. +REGISTRY_CLASS_FOR_TYPE = { + "file": "feast.infra.registry.registry.Registry", + "sql": "feast.infra.registry.sql.SqlRegistry", + "snowflake.registry": "feast.infra.registry.snowflake.SnowflakeRegistry", +} + BATCH_ENGINE_CLASS_FOR_TYPE = { "local": "feast.infra.materialization.local_engine.LocalMaterializationEngine", "snowflake.engine": "feast.infra.materialization.snowflake_engine.SnowflakeMaterializationEngine", @@ -53,6 +61,7 @@ "hbase": "feast.infra.online_stores.contrib.hbase_online_store.hbase.HbaseOnlineStore", "cassandra": "feast.infra.online_stores.contrib.cassandra_online_store.cassandra_online_store.CassandraOnlineStore", "mysql": "feast.infra.online_stores.contrib.mysql_online_store.mysql.MySQLOnlineStore", + "rockset": "feast.infra.online_stores.contrib.rockset_online_store.rockset.RocksetOnlineStore", } OFFLINE_STORE_CLASS_FOR_TYPE = { @@ -100,14 +109,15 @@ class RegistryConfig(FeastBaseModel): """Metadata Store Configuration. Configuration that relates to reading from and writing to the Feast registry.""" registry_type: StrictStr = "file" - """ str: Provider name or a class name that implements RegistryStore. - If specified, registry_store_type should be redundant.""" + """ str: Provider name or a class name that implements Registry.""" registry_store_type: Optional[StrictStr] """ str: Provider name or a class name that implements RegistryStore. """ - path: StrictStr - """ str: Path to metadata store. Can be a local path, or remote object storage path, e.g. a GCS URI """ + path: StrictStr = "" + """ str: Path to metadata store. + If registry_type is 'file', then an be a local path, or remote object storage path, e.g. a GCS URI + If registry_type is 'sql', then this is a database URL as expected by SQLAlchemy """ cache_ttl_seconds: StrictInt = 600 """int: The cache TTL is the amount of time registry state will be cached in memory. If this TTL is exceeded then @@ -122,9 +132,6 @@ class RegistryConfig(FeastBaseModel): class RepoConfig(FeastBaseModel): """Repo config. Typically loaded from `feature_store.yaml`""" - registry: Union[StrictStr, RegistryConfig] = "data/registry.db" - """ str: Path to metadata store. Can be a local path, or remote object storage path, e.g. a GCS URI """ - project: StrictStr """ str: Feast project id. This can be any alphanumeric string up to 16 characters. You can have multiple independent feature repositories deployed to the same cloud @@ -134,6 +141,14 @@ class RepoConfig(FeastBaseModel): provider: StrictStr """ str: local or gcp or aws """ + _registry_config: Any = Field(alias="registry", default="data/registry.db") + """ Configures the registry. + Can be: + 1. str: a path to a file based registry (a local path, or remote object storage path, e.g. a GCS URI) + 2. RegistryConfig: A fully specified file based registry or SQL based registry + 3. SnowflakeRegistryConfig: Using a Snowflake table to store the registry + """ + _online_config: Any = Field(alias="online_store") """ OnlineStoreConfig: Online store configuration (optional depending on provider) """ @@ -151,12 +166,6 @@ class RepoConfig(FeastBaseModel): repo_path: Optional[Path] = None - go_feature_serving: Optional[bool] = False - """ If True, use the Go feature server instead of the Python feature server. """ - - go_feature_retrieval: Optional[bool] = False - """ If True, use the embedded Go code to retrieve features instead of the Python SDK. """ - entity_key_serialization_version: StrictInt = 1 """ Entity key serialization version: This version is used to control what serialization scheme is used when writing data to the online store. @@ -174,6 +183,11 @@ class RepoConfig(FeastBaseModel): def __init__(self, **data: Any): super().__init__(**data) + self._registry = None + if "registry" not in data: + raise FeastRegistryNotSetError() + self._registry_config = data["registry"] + self._offline_store = None if "offline_store" in data: self._offline_config = data["offline_store"] @@ -197,6 +211,8 @@ def __init__(self, **data: Any): self._online_config = "datastore" elif data["provider"] == "aws": self._online_config = "dynamodb" + elif data["provider"] == "rockset": + self._online_config = "rockset" self._batch_engine = None if "batch_engine" in data: @@ -222,11 +238,25 @@ def __init__(self, **data: Any): RuntimeWarning, ) - def get_registry_config(self): - if isinstance(self.registry, str): - return RegistryConfig(path=self.registry) - else: - return self.registry + @property + def registry(self): + if not self._registry: + if isinstance(self._registry_config, Dict): + if "registry_type" in self._registry_config: + self._registry = get_registry_config_from_type( + self._registry_config["registry_type"] + )(**self._registry_config) + else: + # This may be a custom registry store, which does not need a 'registry_type' + self._registry = RegistryConfig(**self._registry_config) + elif isinstance(self._registry_config, str): + # User passed in just a path to file registry + self._registry = get_registry_config_from_type("file")( + path=self._registry_config + ) + elif self._registry_config: + self._registry = self._registry_config + return self._registry @property def offline_store(self): @@ -456,6 +486,16 @@ def get_data_source_class_from_type(data_source_type: str): return import_class(module_name, config_class_name, "DataSource") +def get_registry_config_from_type(registry_type: str): + # We do not support custom registry's right now + if registry_type not in REGISTRY_CLASS_FOR_TYPE: + raise FeastRegistryTypeInvalidError(registry_type) + registry_type = REGISTRY_CLASS_FOR_TYPE[registry_type] + module_name, registry_class_type = registry_type.rsplit(".", 1) + config_class_name = f"{registry_class_type}Config" + return import_class(module_name, config_class_name, config_class_name) + + def get_batch_engine_config_from_type(batch_engine_type: str): if batch_engine_type in BATCH_ENGINE_CLASS_FOR_TYPE: batch_engine_type = BATCH_ENGINE_CLASS_FOR_TYPE[batch_engine_type] diff --git a/sdk/python/feast/repo_operations.py b/sdk/python/feast/repo_operations.py index e019ac71782..a66edc86cda 100644 --- a/sdk/python/feast/repo_operations.py +++ b/sdk/python/feast/repo_operations.py @@ -347,9 +347,9 @@ def teardown(repo_config: RepoConfig, repo_path: Path): @log_exceptions_and_usage def registry_dump(repo_config: RepoConfig, repo_path: Path) -> str: """For debugging only: output contents of the metadata registry""" - registry_config = repo_config.get_registry_config() + registry_config = repo_config.registry project = repo_config.project - registry = Registry(registry_config=registry_config, repo_path=repo_path) + registry = Registry(project, registry_config=registry_config, repo_path=repo_path) registry_dict = registry.to_dict(project=project) return json.dumps(registry_dict, indent=2, sort_keys=True) diff --git a/sdk/python/feast/templates/rockset/README.md b/sdk/python/feast/templates/rockset/README.md new file mode 100644 index 00000000000..d4f1ef6faf4 --- /dev/null +++ b/sdk/python/feast/templates/rockset/README.md @@ -0,0 +1,21 @@ +# Feast Quickstart +A quick view of what's in this repository: + +* `data/` contains raw demo parquet data +* `feature_repo/driver_repo.py` contains demo feature definitions +* `feature_repo/feature_store.yaml` contains a demo setup configuring where data sources are +* `test_workflow.py` showcases how to run all key Feast commands, including defining, retrieving, and pushing features. + +You can run the overall workflow with `python test_workflow.py`. + +## To move from this into a more production ready workflow: +> See more details in [Running Feast in production](https://docs.feast.dev/how-to-guides/running-feast-in-production) + +1. `feature_store.yaml` points to a local file as a registry. You'll want to setup a remote file (e.g. in S3/GCS) or a + SQL registry. See [registry docs](https://docs.feast.dev/getting-started/concepts/registry) for more details. +2. Setup CI/CD + dev vs staging vs prod environments to automatically update the registry as you change Feast feature definitions. See [docs](https://docs.feast.dev/how-to-guides/running-feast-in-production#1.-automatically-deploying-changes-to-your-feature-definitions). +3. (optional) Regularly scheduled materialization to power low latency feature retrieval (e.g. via Airflow). See [Batch data ingestion](https://docs.feast.dev/getting-started/concepts/data-ingestion#batch-data-ingestion) + for more details. +4. (optional) Deploy feature server instances with `feast serve` to expose endpoints to retrieve online features. + - See [Python feature server](https://docs.feast.dev/reference/feature-servers/python-feature-server) for details. + - Use cases can also directly call the Feast client to fetch features as per [Feature retrieval](https://docs.feast.dev/getting-started/concepts/feature-retrieval) diff --git a/sdk/python/feast/templates/rockset/__init__.py b/sdk/python/feast/templates/rockset/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/sdk/python/feast/templates/rockset/bootstrap.py b/sdk/python/feast/templates/rockset/bootstrap.py new file mode 100644 index 00000000000..a3dc17f18ea --- /dev/null +++ b/sdk/python/feast/templates/rockset/bootstrap.py @@ -0,0 +1,30 @@ +import click + +from feast.file_utils import replace_str_in_file + + +def bootstrap(): + # Bootstrap() will automatically be called from the init_repo() during `feast init` + import pathlib + + repo_path = pathlib.Path(__file__).parent.absolute() / "feature_repo" + config_file = repo_path / "feature_store.yaml" + data_path = repo_path / "data" + data_path.mkdir(exist_ok=True) + + rockset_apikey = click.prompt( + "Rockset Api Key (If blank will be read from ROCKSET_APIKEY in ENV):", + default="", + ) + + rockset_host = click.prompt( + "Rockset Host (If blank will be read from ROCKSET_APISERVER in ENV):", + default="", + ) + + replace_str_in_file(config_file, "ROCKSET_APIKEY", rockset_apikey) + replace_str_in_file(config_file, "ROCKSET_APISERVER", rockset_host) + + +if __name__ == "__main__": + bootstrap() diff --git a/sdk/python/feast/templates/rockset/feature_repo/feature_store.yaml b/sdk/python/feast/templates/rockset/feature_repo/feature_store.yaml new file mode 100644 index 00000000000..57cf8e73bb6 --- /dev/null +++ b/sdk/python/feast/templates/rockset/feature_repo/feature_store.yaml @@ -0,0 +1,8 @@ +project: my_project +registry: registry.db +provider: local +online_store: + type: rockset + api_key: ROCKSET_APIKEY + host: ROCKSET_APISERVER # (api.usw2a1.rockset.com, api.euc1a1.rockset.com, api.use1a1.rockset.com) +entity_key_serialization_version: 2 diff --git a/sdk/python/feast/type_map.py b/sdk/python/feast/type_map.py index 78b625aa89c..0188c580215 100644 --- a/sdk/python/feast/type_map.py +++ b/sdk/python/feast/type_map.py @@ -300,7 +300,7 @@ def _type_err(item, dtype): ValueType.DOUBLE: ("double_val", lambda x: x, {float, np.float64}), ValueType.STRING: ("string_val", lambda x: str(x), None), ValueType.BYTES: ("bytes_val", lambda x: x, {bytes}), - ValueType.BOOL: ("bool_val", lambda x: x, {bool, np.bool_}), + ValueType.BOOL: ("bool_val", lambda x: x, {bool, np.bool_, int, np.int_}), } @@ -405,9 +405,14 @@ def _python_value_to_proto_value( if (sample == 0 or sample == 0.0) and feast_value_type != ValueType.BOOL: # Numpy convert 0 to int. However, in the feature view definition, the type of column may be a float. # So, if value is 0, type validation must pass if scalar_types are either int or float. - assert type(sample) in [np.int64, int, np.float64, float] + allowed_types = {np.int64, int, np.float64, float} + assert ( + type(sample) in allowed_types + ), f"Type `{type(sample)}` not in {allowed_types}" else: - assert type(sample) in valid_scalar_types + assert ( + type(sample) in valid_scalar_types + ), f"Type `{type(sample)}` not in {valid_scalar_types}" if feast_value_type == ValueType.BOOL: # ProtoValue does not support conversion of np.bool_ so we need to convert it to support np.bool_. return [ diff --git a/sdk/python/feast/ui/package.json b/sdk/python/feast/ui/package.json index 30ef44e4156..75eebe275b8 100644 --- a/sdk/python/feast/ui/package.json +++ b/sdk/python/feast/ui/package.json @@ -6,7 +6,7 @@ "@elastic/datemath": "^5.0.3", "@elastic/eui": "^55.0.1", "@emotion/react": "^11.9.0", - "@feast-dev/feast-ui": "0.29.0", + "@feast-dev/feast-ui": "0.30.0", "@testing-library/jest-dom": "^5.16.4", "@testing-library/react": "^13.2.0", "@testing-library/user-event": "^13.5.0", diff --git a/sdk/python/feast/ui/yarn.lock b/sdk/python/feast/ui/yarn.lock index 61c75e653c8..b43ef7d0871 100644 --- a/sdk/python/feast/ui/yarn.lock +++ b/sdk/python/feast/ui/yarn.lock @@ -1300,10 +1300,10 @@ minimatch "^3.1.2" strip-json-comments "^3.1.1" -"@feast-dev/feast-ui@0.29.0": - version "0.29.0" - resolved "https://registry.yarnpkg.com/@feast-dev/feast-ui/-/feast-ui-0.29.0.tgz#b78070b51c3f83b2b823946b64fea4f223820429" - integrity sha512-XF/C3CcLmQTAUV9vHbW37BEACoNXXbUaMUoWPIJMrZvW6IStoVUlBuA4bx995XSE4gUcZ7j/5SmrOUAAlanL9Q== +"@feast-dev/feast-ui@0.30.0": + version "0.30.0" + resolved "https://registry.yarnpkg.com/@feast-dev/feast-ui/-/feast-ui-0.30.0.tgz#6c68b243d65f8a3a1df029a39f4c382d17a4b272" + integrity sha512-o6YOAhSAHS8nCTZOB8ZTflM8HzJSWMngx4Ruy2EpO7vpMfjoHZu6OnV+ezX7GGqkpsxpTKxykqDNm0M8rtTKPw== dependencies: "@elastic/datemath" "^5.0.3" "@elastic/eui" "^55.0.1" @@ -2881,7 +2881,7 @@ acorn@^7.0.0, acorn@^7.1.1: resolved "https://registry.yarnpkg.com/acorn/-/acorn-7.4.1.tgz#feaed255973d2e77555b83dbc08851a6c63520fa" integrity sha512-nQyp0o1/mNdbTO1PO6kHkwSrmgZ0MT/jCCpNiwbUjGoRN4dlBhqJtoQuCnEOKzgTVwg0ZWiCoQy6SxMebQVh8A== -acorn@^8.2.4, acorn@^8.4.1, acorn@^8.5.0, acorn@^8.7.1: +acorn@^8.2.4, acorn@^8.5.0, acorn@^8.7.1: version "8.7.1" resolved "https://registry.yarnpkg.com/acorn/-/acorn-8.7.1.tgz#0197122c843d1bf6d0a5e83220a788f278f63c30" integrity sha512-Xx54uLJQZ19lKygFXOWsscKUbsBZW0CPykPhVQdhIeIwrbPmJzqeASDInc8nKBnp/JT6igTs82qPXz069H8I/A== @@ -4475,9 +4475,9 @@ dns-equal@^1.0.0: integrity sha1-s55/HabrCnW6nBcySzR1PEfgZU0= dns-packet@^5.2.2: - version "5.3.1" - resolved "https://registry.yarnpkg.com/dns-packet/-/dns-packet-5.3.1.tgz#eb94413789daec0f0ebe2fcc230bdc9d7c91b43d" - integrity sha512-spBwIj0TK0Ey3666GwIdWVfUpLyubpU53BTCu8iPn4r4oXd9O14Hjg3EHw3ts2oed77/SeckunUYCyRlSngqHw== + version "5.4.0" + resolved "https://registry.yarnpkg.com/dns-packet/-/dns-packet-5.4.0.tgz#1f88477cf9f27e78a213fb6d118ae38e759a879b" + integrity sha512-EgqGeaBB8hLiHLZtp/IbaDQTL8pZ0+IvwzSHA6d7VyMDM+B9hgddEMa9xjK5oYnw0ci0JQ6g2XCD7/f6cafU6g== dependencies: "@leichtgewicht/ip-codec" "^2.0.1" @@ -4640,10 +4640,10 @@ encodeurl@~1.0.2: resolved "https://registry.yarnpkg.com/encodeurl/-/encodeurl-1.0.2.tgz#ad3ff4c86ec2d029322f5a02c3a9a606c95b3f59" integrity sha1-rT/0yG7C0CkyL1oCw6mmBslbP1k= -enhanced-resolve@^5.9.3: - version "5.9.3" - resolved "https://registry.yarnpkg.com/enhanced-resolve/-/enhanced-resolve-5.9.3.tgz#44a342c012cbc473254af5cc6ae20ebd0aae5d88" - integrity sha512-Bq9VSor+kjvW3f9/MiiR4eE3XYgOl7/rS8lnSxbRbF3kS0B2r+Y9w5krBWxZgDxASVZbdYrn5wT4j/Wb0J9qow== +enhanced-resolve@^5.10.0: + version "5.12.0" + resolved "https://registry.yarnpkg.com/enhanced-resolve/-/enhanced-resolve-5.12.0.tgz#300e1c90228f5b570c4d35babf263f6da7155634" + integrity sha512-QHTXI/sZQmko1cbDoNAa3mJ5qhWUUNAq3vR0/YiD379fWQrcfuoX1+HW2S0MTt7XmoPLapdaDKUtelUSPic7hQ== dependencies: graceful-fs "^4.2.4" tapable "^2.2.0" @@ -10197,10 +10197,10 @@ walker@^1.0.7: dependencies: makeerror "1.0.12" -watchpack@^2.3.1: - version "2.3.1" - resolved "https://registry.yarnpkg.com/watchpack/-/watchpack-2.3.1.tgz#4200d9447b401156eeca7767ee610f8809bc9d25" - integrity sha512-x0t0JuydIo8qCNctdDrn1OzH/qDzk2+rdCOC3YzumZ42fiMqmQ7T3xQurykYMhYfHaPHTp4ZxAx2NfUo1K6QaA== +watchpack@^2.4.0: + version "2.4.0" + resolved "https://registry.yarnpkg.com/watchpack/-/watchpack-2.4.0.tgz#fa33032374962c78113f93c7f2fb4c54c9862a5d" + integrity sha512-Lcvm7MGST/4fup+ifyKi2hjyIAwcdI4HRgtvTpIUxBRhB+RFtUh8XtDOxUfctVCnhVi+QQj49i91OyvzkJl6cg== dependencies: glob-to-regexp "^0.4.1" graceful-fs "^4.1.2" @@ -10312,20 +10312,20 @@ webpack-sources@^3.2.3: integrity sha512-/DyMEOrDgLKKIG0fmvtz+4dUX/3Ghozwgm6iPp8KRhvn+eQf9+Q7GWxVNMk3+uCPWfdXYC4ExGBckIXdFEfH1w== webpack@^5.64.4: - version "5.72.1" - resolved "https://registry.yarnpkg.com/webpack/-/webpack-5.72.1.tgz#3500fc834b4e9ba573b9f430b2c0a61e1bb57d13" - integrity sha512-dXG5zXCLspQR4krZVR6QgajnZOjW2K/djHvdcRaDQvsjV9z9vaW6+ja5dZOYbqBBjF6kGXka/2ZyxNdc+8Jung== + version "5.76.1" + resolved "https://registry.yarnpkg.com/webpack/-/webpack-5.76.1.tgz#7773de017e988bccb0f13c7d75ec245f377d295c" + integrity sha512-4+YIK4Abzv8172/SGqObnUjaIHjLEuUasz9EwQj/9xmPPkYJy2Mh03Q/lJfSD3YLzbxy5FeTq5Uw0323Oh6SJQ== dependencies: "@types/eslint-scope" "^3.7.3" "@types/estree" "^0.0.51" "@webassemblyjs/ast" "1.11.1" "@webassemblyjs/wasm-edit" "1.11.1" "@webassemblyjs/wasm-parser" "1.11.1" - acorn "^8.4.1" + acorn "^8.7.1" acorn-import-assertions "^1.7.6" browserslist "^4.14.5" chrome-trace-event "^1.0.2" - enhanced-resolve "^5.9.3" + enhanced-resolve "^5.10.0" es-module-lexer "^0.9.0" eslint-scope "5.1.1" events "^3.2.0" @@ -10338,7 +10338,7 @@ webpack@^5.64.4: schema-utils "^3.1.0" tapable "^2.1.1" terser-webpack-plugin "^5.1.3" - watchpack "^2.3.1" + watchpack "^2.4.0" webpack-sources "^3.2.3" websocket-driver@>=0.5.1, websocket-driver@^0.7.4: diff --git a/sdk/python/feast/ui_server.py b/sdk/python/feast/ui_server.py index 94860bdf739..e750f280ad7 100644 --- a/sdk/python/feast/ui_server.py +++ b/sdk/python/feast/ui_server.py @@ -13,11 +13,9 @@ def get_app( store: "feast.FeatureStore", - get_registry_dump: Callable, project_id: str, registry_ttl_secs: int, - host: str, - port: int, + root_path: str = "", ): app = FastAPI() @@ -62,7 +60,7 @@ def shutdown_event(): "name": "Project", "description": "Test project", "id": project_id, - "registryPath": "/registry", + "registryPath": f"{root_path}/registry", } ] } @@ -105,10 +103,8 @@ def start_server( ): app = get_app( store, - get_registry_dump, project_id, registry_ttl_sec, - host, - port, + root_path, ) - uvicorn.run(app, host=host, port=port, root_path=root_path) + uvicorn.run(app, host=host, port=port) diff --git a/sdk/python/requirements/py3.10-ci-requirements.txt b/sdk/python/requirements/py3.10-ci-requirements.txt index c001cbae61b..ae99962f67b 100644 --- a/sdk/python/requirements/py3.10-ci-requirements.txt +++ b/sdk/python/requirements/py3.10-ci-requirements.txt @@ -12,7 +12,7 @@ adlfs==0.5.9 # via feast (setup.py) aiobotocore==2.1.2 # via s3fs -aiohttp==3.8.3 +aiohttp==3.8.4 # via # adlfs # aiobotocore @@ -24,17 +24,25 @@ aiosignal==1.3.1 # via aiohttp alabaster==0.7.13 # via sphinx -altair==4.2.2 +altair==4.2.0 # via great-expectations anyio==3.6.2 # via # httpcore + # jupyter-server # starlette # watchfiles appdirs==1.4.4 # via fissix -appnope==0.1.3 - # via ipython +argon2-cffi==21.3.0 + # via + # jupyter-server + # nbclassic + # notebook +argon2-cffi-bindings==21.2.0 + # via argon2-cffi +arrow==1.2.3 + # via isoduration asn1crypto==1.5.1 # via # oscrypto @@ -55,7 +63,7 @@ attrs==22.2.0 # pytest avro==1.10.0 # via feast (setup.py) -azure-core==1.26.2 +azure-core==1.26.3 # via # adlfs # azure-identity @@ -67,16 +75,20 @@ azure-identity==1.12.0 # via # adlfs # feast (setup.py) -azure-storage-blob==12.14.1 +azure-storage-blob==12.15.0 # via # adlfs # feast (setup.py) -babel==2.11.0 +babel==2.12.1 # via sphinx backcall==0.2.0 # via ipython +beautifulsoup4==4.11.2 + # via nbconvert black==22.12.0 # via feast (setup.py) +bleach==6.0.0 + # via nbconvert boto3==1.20.23 # via # feast (setup.py) @@ -112,6 +124,7 @@ certifi==2022.12.7 # snowflake-connector-python cffi==1.15.1 # via + # argon2-cffi-bindings # azure-datalake-store # cryptography # snowflake-connector-python @@ -139,7 +152,9 @@ colorama==0.4.6 # via # feast (setup.py) # great-expectations -coverage[toml]==7.1.0 +comm==0.1.2 + # via ipykernel +coverage[toml]==7.2.1 # via pytest-cov cryptography==35.0.0 # via @@ -155,16 +170,18 @@ cryptography==35.0.0 # snowflake-connector-python # types-pyopenssl # types-redis -dask==2023.1.1 +dask==2023.3.0 # via feast (setup.py) -dataclasses==0.6 - # via great-expectations db-dtypes==1.0.5 # via google-cloud-bigquery +debugpy==1.6.6 + # via ipykernel decorator==5.1.1 # via # gcsfs # ipython +defusedxml==0.7.1 + # via nbconvert deprecated==1.2.13 # via redis deprecation==2.1.0 @@ -190,13 +207,13 @@ execnet==1.9.0 # via pytest-xdist executing==1.2.0 # via stack-data -fastapi==0.89.1 +fastapi==0.93.0 # via feast (setup.py) -fastavro==1.7.1 +fastavro==1.7.3 # via # feast (setup.py) # pandavro -fastjsonschema==2.16.2 +fastjsonschema==2.16.3 # via nbformat filelock==3.9.0 # via @@ -208,6 +225,8 @@ fissix==21.11.13 # via bowler flake8==6.0.0 # via feast (setup.py) +fqdn==1.5.1 + # via jsonschema frozenlist==1.3.3 # via # aiohttp @@ -220,6 +239,8 @@ fsspec==2022.1.0 # s3fs gcsfs==2022.1.0 # via feast (setup.py) +geojson==2.5.0 + # via rockset geomet==0.2.1.post1 # via cassandra-driver google-api-core[grpc]==2.11.0 @@ -234,9 +255,9 @@ google-api-core[grpc]==2.11.0 # google-cloud-datastore # google-cloud-firestore # google-cloud-storage -google-api-python-client==2.74.0 +google-api-python-client==2.80.0 # via firebase-admin -google-auth==2.16.0 +google-auth==2.16.2 # via # gcsfs # google-api-core @@ -248,13 +269,13 @@ google-auth==2.16.0 # kubernetes google-auth-httplib2==0.1.0 # via google-api-python-client -google-auth-oauthlib==0.8.0 +google-auth-oauthlib==1.0.0 # via gcsfs -google-cloud-bigquery[pandas]==3.4.2 +google-cloud-bigquery[pandas]==3.6.0 # via feast (setup.py) -google-cloud-bigquery-storage==2.18.1 +google-cloud-bigquery-storage==2.19.0 # via feast (setup.py) -google-cloud-bigtable==2.15.0 +google-cloud-bigtable==2.17.0 # via feast (setup.py) google-cloud-core==2.3.2 # via @@ -263,9 +284,9 @@ google-cloud-core==2.3.2 # google-cloud-datastore # google-cloud-firestore # google-cloud-storage -google-cloud-datastore==2.13.2 +google-cloud-datastore==2.15.0 # via feast (setup.py) -google-cloud-firestore==2.9.1 +google-cloud-firestore==2.10.0 # via firebase-admin google-cloud-storage==2.7.0 # via @@ -284,13 +305,13 @@ googleapis-common-protos[grpc]==1.58.0 # google-api-core # grpc-google-iam-v1 # grpcio-status -great-expectations==0.14.13 +great-expectations==0.15.50 # via feast (setup.py) greenlet==2.0.2 # via sqlalchemy grpc-google-iam-v1==0.12.6 # via google-cloud-bigtable -grpcio==1.51.1 +grpcio==1.51.3 # via # feast (setup.py) # google-api-core @@ -301,13 +322,13 @@ grpcio==1.51.1 # grpcio-status # grpcio-testing # grpcio-tools -grpcio-reflection==1.51.1 +grpcio-reflection==1.51.3 # via feast (setup.py) -grpcio-status==1.51.1 +grpcio-status==1.51.3 # via google-api-core -grpcio-testing==1.51.1 +grpcio-testing==1.51.3 # via feast (setup.py) -grpcio-tools==1.51.1 +grpcio-tools==1.51.3 # via feast (setup.py) h11==0.14.0 # via @@ -315,7 +336,7 @@ h11==0.14.0 # uvicorn happybase==1.2.0 # via feast (setup.py) -hiredis==2.1.1 +hiredis==2.2.2 # via feast (setup.py) httpcore==0.16.3 # via httpx @@ -327,11 +348,12 @@ httptools==0.5.0 # via uvicorn httpx==0.23.3 # via feast (setup.py) -identify==2.5.17 +identify==2.5.19 # via pre-commit idna==3.4 # via # anyio + # jsonschema # requests # rfc3986 # snowflake-connector-python @@ -342,20 +364,42 @@ importlib-metadata==6.0.0 # via great-expectations iniconfig==2.0.0 # via pytest -ipython==8.9.0 +ipykernel==6.21.3 + # via + # ipywidgets + # nbclassic + # notebook +ipython==8.11.0 + # via + # great-expectations + # ipykernel + # ipywidgets +ipython-genutils==0.2.0 + # via + # nbclassic + # notebook +ipywidgets==8.0.4 # via great-expectations isodate==0.6.1 - # via msrest + # via + # azure-storage-blob + # msrest +isoduration==20.11.0 + # via jsonschema isort==5.12.0 # via feast (setup.py) jedi==0.18.2 # via ipython -jinja2==3.0.3 +jinja2==3.1.2 # via # altair # feast (setup.py) # great-expectations + # jupyter-server # moto + # nbclassic + # nbconvert + # notebook # sphinx jmespath==0.10.0 # via @@ -364,51 +408,88 @@ jmespath==0.10.0 jsonpatch==1.32 # via great-expectations jsonpointer==2.3 - # via jsonpatch -jsonschema==4.17.3 + # via + # jsonpatch + # jsonschema +jsonschema[format-nongpl]==4.17.3 # via # altair # feast (setup.py) # great-expectations + # jupyter-events # nbformat +jupyter-client==8.0.3 + # via + # ipykernel + # jupyter-server + # nbclassic + # nbclient + # notebook jupyter-core==5.2.0 - # via nbformat + # via + # ipykernel + # jupyter-client + # jupyter-server + # nbclassic + # nbclient + # nbconvert + # nbformat + # notebook +jupyter-events==0.6.3 + # via jupyter-server +jupyter-server==2.4.0 + # via + # nbclassic + # notebook-shim +jupyter-server-terminals==0.4.4 + # via jupyter-server +jupyterlab-pygments==0.2.2 + # via nbconvert +jupyterlab-widgets==3.0.5 + # via ipywidgets kubernetes==20.13.0 # via feast (setup.py) locket==1.0.0 # via partd +makefun==1.15.1 + # via great-expectations markupsafe==2.1.2 # via # jinja2 - # moto + # nbconvert + # werkzeug +marshmallow==3.19.0 + # via great-expectations matplotlib-inline==0.1.6 - # via ipython + # via + # ipykernel + # ipython mccabe==0.7.0 # via flake8 minio==7.1.0 # via feast (setup.py) -mistune==2.0.4 - # via great-expectations +mistune==2.0.5 + # via + # great-expectations + # nbconvert mmh3==3.0.0 # via feast (setup.py) mock==2.0.0 # via feast (setup.py) moreorless==0.4.0 # via bowler -moto==3.1.18 +moto==4.1.4 # via feast (setup.py) -msal==1.20.0 +msal==1.21.0 # via # azure-identity # msal-extensions msal-extensions==1.0.0 # via azure-identity -msgpack==1.0.4 +msgpack==1.0.5 # via cachecontrol msrest==0.7.1 - # via - # azure-storage-blob - # msrestazure + # via msrestazure msrestazure==0.6.4 # via adlfs multidict==6.0.4 @@ -421,7 +502,7 @@ mypy==0.982 # via # feast (setup.py) # sqlalchemy -mypy-extensions==0.4.3 +mypy-extensions==1.0.0 # via # black # mypy @@ -429,11 +510,35 @@ mypy-protobuf==3.1 # via feast (setup.py) mysqlclient==2.1.1 # via feast (setup.py) +nbclassic==0.5.3 + # via notebook +nbclient==0.7.2 + # via nbconvert +nbconvert==7.2.9 + # via + # jupyter-server + # nbclassic + # notebook nbformat==5.7.3 - # via great-expectations + # via + # great-expectations + # jupyter-server + # nbclassic + # nbclient + # nbconvert + # notebook +nest-asyncio==1.5.6 + # via + # ipykernel + # nbclassic + # notebook nodeenv==1.7.0 # via pre-commit -numpy==1.24.1 +notebook==6.5.3 + # via great-expectations +notebook-shim==0.2.2 + # via nbclassic +numpy==1.24.2 # via # altair # db-dtypes @@ -456,6 +561,10 @@ packaging==23.0 # docker # google-cloud-bigquery # great-expectations + # ipykernel + # jupyter-server + # marshmallow + # nbconvert # pytest # redis # sphinx @@ -470,6 +579,8 @@ pandas==1.5.3 # snowflake-connector-python pandavro==1.5.2 # via feast (setup.py) +pandocfilters==1.5.0 + # via nbconvert parso==0.8.3 # via jedi partd==1.3.0 @@ -482,9 +593,9 @@ pexpect==4.8.0 # via ipython pickleshare==0.7.5 # via ipython -pip-tools==6.12.1 +pip-tools==6.12.3 # via feast (setup.py) -platformdirs==2.6.2 +platformdirs==3.1.1 # via # black # jupyter-core @@ -495,9 +606,14 @@ ply==3.11 # via thriftpy2 portalocker==2.7.0 # via msal-extensions -pre-commit==3.0.2 +pre-commit==3.1.1 # via feast (setup.py) -prompt-toolkit==3.0.36 +prometheus-client==0.16.0 + # via + # jupyter-server + # nbclassic + # notebook +prompt-toolkit==3.0.38 # via ipython proto-plus==1.22.2 # via @@ -507,7 +623,7 @@ proto-plus==1.22.2 # google-cloud-bigtable # google-cloud-datastore # google-cloud-firestore -protobuf==4.21.12 +protobuf==4.22.1 # via # feast (setup.py) # google-api-core @@ -525,11 +641,15 @@ protobuf==4.21.12 # mypy-protobuf # proto-plus psutil==5.9.0 - # via feast (setup.py) + # via + # feast (setup.py) + # ipykernel psycopg2-binary==2.9.5 # via feast (setup.py) ptyprocess==0.7.0 - # via pexpect + # via + # pexpect + # terminado pure-eval==0.2.2 # via stack-data py==1.11.0 @@ -558,16 +678,18 @@ pycparser==2.21 # via cffi pycryptodomex==3.17 # via snowflake-connector-python -pydantic==1.10.4 +pydantic==1.10.6 # via # fastapi # feast (setup.py) + # great-expectations pyflakes==3.0.1 # via flake8 pygments==2.14.0 # via # feast (setup.py) # ipython + # nbconvert # sphinx pyjwt[crypto]==2.6.0 # via @@ -584,7 +706,7 @@ pyopenssl==22.0.0 # via # feast (setup.py) # snowflake-connector-python -pyparsing==2.4.7 +pyparsing==3.0.9 # via # great-expectations # httplib2 @@ -592,9 +714,9 @@ pyproject-hooks==1.0.0 # via build pyrsistent==0.19.3 # via jsonschema -pyspark==3.3.1 +pyspark==3.3.2 # via feast (setup.py) -pytest==7.2.1 +pytest==7.2.2 # via # feast (setup.py) # pytest-benchmark @@ -616,24 +738,27 @@ pytest-ordering==0.6 # via feast (setup.py) pytest-timeout==1.4.2 # via feast (setup.py) -pytest-xdist==3.1.0 +pytest-xdist==3.2.0 # via feast (setup.py) python-dateutil==2.8.2 # via # adal + # arrow # botocore # google-cloud-bigquery # great-expectations + # jupyter-client # kubernetes # moto # pandas -python-dotenv==0.21.1 + # rockset +python-dotenv==1.0.0 # via uvicorn +python-json-logger==2.0.7 + # via jupyter-events pytz==2022.7.1 # via - # babel # great-expectations - # moto # pandas # snowflake-connector-python # trino @@ -643,11 +768,22 @@ pyyaml==6.0 # via # dask # feast (setup.py) + # jupyter-events # kubernetes # pre-commit + # responses # uvicorn +pyzmq==25.0.0 + # via + # ipykernel + # jupyter-client + # jupyter-server + # nbclassic + # notebook redis==4.2.2 # via feast (setup.py) +regex==2022.10.31 + # via feast (setup.py) requests==2.28.2 # via # adal @@ -656,6 +792,7 @@ requests==2.28.2 # azure-datalake-store # cachecontrol # docker + # feast (setup.py) # gcsfs # google-api-core # google-cloud-bigquery @@ -675,10 +812,20 @@ requests-oauthlib==1.3.1 # google-auth-oauthlib # kubernetes # msrest -responses==0.22.0 +responses==0.23.0 # via moto +rfc3339-validator==0.1.4 + # via + # jsonschema + # jupyter-events rfc3986[idna2008]==1.5.0 # via httpx +rfc3986-validator==0.1.1 + # via + # jsonschema + # jupyter-events +rockset==1.0.5 + # via feast (setup.py) rsa==4.9 # via google-auth ruamel-yaml==0.17.17 @@ -687,12 +834,19 @@ s3fs==2022.1.0 # via feast (setup.py) s3transfer==0.5.2 # via boto3 -scipy==1.10.0 +scipy==1.10.1 # via great-expectations +send2trash==1.8.0 + # via + # jupyter-server + # nbclassic + # notebook six==1.16.0 # via + # asttokens # azure-core # azure-identity + # bleach # cassandra-driver # geomet # google-auth @@ -704,6 +858,7 @@ six==1.16.0 # msrestazure # pandavro # python-dateutil + # rfc3339-validator # thriftpy2 sniffio==1.3.0 # via @@ -714,13 +869,15 @@ snowballstemmer==2.2.0 # via sphinx snowflake-connector-python[pandas]==2.9.0 # via feast (setup.py) +soupsieve==2.4 + # via beautifulsoup4 sphinx==6.1.3 # via feast (setup.py) sphinxcontrib-applehelp==1.0.4 # via sphinx sphinxcontrib-devhelp==1.0.2 # via sphinx -sphinxcontrib-htmlhelp==2.0.0 +sphinxcontrib-htmlhelp==2.0.1 # via sphinx sphinxcontrib-jsmath==1.0.1 # via sphinx @@ -734,22 +891,26 @@ sqlalchemy2-stubs==0.0.2a32 # via sqlalchemy stack-data==0.6.2 # via ipython -starlette==0.22.0 +starlette==0.25.0 # via fastapi tabulate==0.9.0 # via feast (setup.py) -tenacity==8.1.0 +tenacity==8.2.2 # via feast (setup.py) -termcolor==2.2.0 - # via great-expectations +terminado==0.17.1 + # via + # jupyter-server + # jupyter-server-terminals + # nbclassic + # notebook testcontainers==3.7.1 # via feast (setup.py) thriftpy2==0.4.16 # via happybase +tinycss2==1.2.1 + # via nbconvert toml==0.10.2 - # via - # feast (setup.py) - # responses + # via feast (setup.py) tomli==2.0.1 # via # black @@ -763,51 +924,68 @@ toolz==0.12.0 # altair # dask # partd -tqdm==4.64.1 +tornado==6.2 + # via + # ipykernel + # jupyter-client + # jupyter-server + # nbclassic + # notebook + # terminado +tqdm==4.65.0 # via # feast (setup.py) # great-expectations traitlets==5.9.0 # via + # comm + # ipykernel # ipython + # ipywidgets + # jupyter-client # jupyter-core + # jupyter-events + # jupyter-server # matplotlib-inline + # nbclassic + # nbclient + # nbconvert # nbformat -trino==0.321.0 + # notebook +trino==0.322.0 # via feast (setup.py) typeguard==2.13.3 # via feast (setup.py) -types-docutils==0.19.1.2 - # via types-setuptools types-protobuf==3.19.22 # via # feast (setup.py) # mypy-protobuf -types-pymysql==1.0.19.2 +types-pymysql==1.0.19.5 # via feast (setup.py) -types-pyopenssl==23.0.0.2 +types-pyopenssl==23.0.0.4 # via types-redis -types-python-dateutil==2.8.19.6 +types-python-dateutil==2.8.19.10 # via feast (setup.py) -types-pytz==2022.7.1.0 +types-pytz==2022.7.1.2 # via feast (setup.py) -types-pyyaml==6.0.12.3 - # via feast (setup.py) -types-redis==4.4.0.4 +types-pyyaml==6.0.12.8 + # via + # feast (setup.py) + # responses +types-redis==4.5.1.4 # via feast (setup.py) -types-requests==2.28.11.8 +types-requests==2.28.11.15 # via feast (setup.py) -types-setuptools==65.7.0.3 +types-setuptools==67.6.0.0 # via feast (setup.py) -types-tabulate==0.9.0.0 +types-tabulate==0.9.0.1 # via feast (setup.py) -types-toml==0.10.8.1 - # via responses -types-urllib3==1.26.25.4 +types-urllib3==1.26.25.8 # via types-requests -typing-extensions==4.4.0 +typing-extensions==4.5.0 # via # azure-core + # azure-storage-blob # great-expectations # mypy # pydantic @@ -819,6 +997,8 @@ tzlocal==4.2 # via # great-expectations # trino +uri-template==1.2.0 + # via jsonschema uritemplate==4.1.1 # via google-api-python-client urllib3==1.26.14 @@ -831,12 +1011,13 @@ urllib3==1.26.14 # minio # requests # responses + # rockset # snowflake-connector-python -uvicorn[standard]==0.20.0 +uvicorn[standard]==0.21.0 # via feast (setup.py) uvloop==0.17.0 # via uvicorn -virtualenv==20.17.1 +virtualenv==20.20.0 # via pre-commit volatile==2.1.0 # via bowler @@ -844,17 +1025,26 @@ watchfiles==0.18.1 # via uvicorn wcwidth==0.2.6 # via prompt-toolkit -websocket-client==1.5.0 +webcolors==1.12 + # via jsonschema +webencodings==0.5.1 + # via + # bleach + # tinycss2 +websocket-client==1.5.1 # via # docker + # jupyter-server # kubernetes websockets==10.4 # via uvicorn -werkzeug==2.1.2 +werkzeug==2.2.3 # via moto wheel==0.38.4 # via pip-tools -wrapt==1.14.1 +widgetsnbextension==4.0.5 + # via ipywidgets +wrapt==1.15.0 # via # aiobotocore # deprecated @@ -863,7 +1053,7 @@ xmltodict==0.13.0 # via moto yarl==1.8.2 # via aiohttp -zipp==3.12.0 +zipp==3.15.0 # via importlib-metadata # The following packages are considered to be unsafe in a requirements file: diff --git a/sdk/python/requirements/py3.10-requirements.txt b/sdk/python/requirements/py3.10-requirements.txt index df32d0ff479..63d21e7fa27 100644 --- a/sdk/python/requirements/py3.10-requirements.txt +++ b/sdk/python/requirements/py3.10-requirements.txt @@ -17,14 +17,12 @@ attrs==22.2.0 # jsonschema bowler==0.9.0 # via feast (setup.py) -cachetools==5.3.0 - # via google-auth certifi==2022.12.7 # via # httpcore # httpx # requests -charset-normalizer==3.0.1 +charset-normalizer==3.1.0 # via requests click==8.1.3 # via @@ -37,35 +35,27 @@ cloudpickle==2.2.1 # via dask colorama==0.4.6 # via feast (setup.py) -dask==2023.1.1 +dask==2023.3.0 # via feast (setup.py) dill==0.3.6 # via feast (setup.py) -fastapi==0.89.1 +fastapi==0.93.0 # via feast (setup.py) -fastavro==1.7.1 +fastavro==1.7.3 # via # feast (setup.py) # pandavro fissix==21.11.13 # via bowler -fsspec==2023.1.0 +fsspec==2023.3.0 # via dask -google-api-core==2.11.0 - # via feast (setup.py) -google-auth==2.16.0 - # via google-api-core -googleapis-common-protos==1.58.0 - # via - # feast (setup.py) - # google-api-core greenlet==2.0.2 # via sqlalchemy -grpcio==1.51.1 +grpcio==1.51.3 # via # feast (setup.py) # grpcio-reflection -grpcio-reflection==1.51.1 +grpcio-reflection==1.51.3 # via feast (setup.py) h11==0.14.0 # via @@ -94,11 +84,11 @@ mmh3==3.0.0 # via feast (setup.py) moreorless==0.4.0 # via bowler -mypy==0.991 +mypy==1.1.1 # via sqlalchemy -mypy-extensions==0.4.3 +mypy-extensions==1.0.0 # via mypy -numpy==1.24.1 +numpy==1.24.2 # via # feast (setup.py) # pandas @@ -116,22 +106,14 @@ partd==1.3.0 # via dask proto-plus==1.22.2 # via feast (setup.py) -protobuf==4.21.12 +protobuf==4.22.1 # via # feast (setup.py) - # google-api-core - # googleapis-common-protos # grpcio-reflection # proto-plus -pyarrow==8.0.0 +pyarrow==11.0.0 # via feast (setup.py) -pyasn1==0.4.8 - # via - # pyasn1-modules - # rsa -pyasn1-modules==0.2.8 - # via google-auth -pydantic==1.10.4 +pydantic==1.10.6 # via # fastapi # feast (setup.py) @@ -141,7 +123,7 @@ pyrsistent==0.19.3 # via jsonschema python-dateutil==2.8.2 # via pandas -python-dotenv==0.21.1 +python-dotenv==1.0.0 # via uvicorn pytz==2022.7.1 # via pandas @@ -151,14 +133,11 @@ pyyaml==6.0 # feast (setup.py) # uvicorn requests==2.28.2 - # via google-api-core + # via feast (setup.py) rfc3986[idna2008]==1.5.0 # via httpx -rsa==4.9 - # via google-auth six==1.16.0 # via - # google-auth # pandavro # python-dateutil sniffio==1.3.0 @@ -170,11 +149,11 @@ sqlalchemy[mypy]==1.4.46 # via feast (setup.py) sqlalchemy2-stubs==0.0.2a32 # via sqlalchemy -starlette==0.22.0 +starlette==0.25.0 # via fastapi tabulate==0.9.0 # via feast (setup.py) -tenacity==8.1.0 +tenacity==8.2.2 # via feast (setup.py) toml==0.10.2 # via feast (setup.py) @@ -184,18 +163,18 @@ toolz==0.12.0 # via # dask # partd -tqdm==4.64.1 +tqdm==4.65.0 # via feast (setup.py) typeguard==2.13.3 # via feast (setup.py) -typing-extensions==4.4.0 +typing-extensions==4.5.0 # via # mypy # pydantic # sqlalchemy2-stubs urllib3==1.26.14 # via requests -uvicorn[standard]==0.20.0 +uvicorn[standard]==0.21.0 # via feast (setup.py) uvloop==0.17.0 # via uvicorn diff --git a/sdk/python/requirements/py3.8-ci-requirements.txt b/sdk/python/requirements/py3.8-ci-requirements.txt index ba05d8fe4c2..b1476660b68 100644 --- a/sdk/python/requirements/py3.8-ci-requirements.txt +++ b/sdk/python/requirements/py3.8-ci-requirements.txt @@ -12,7 +12,7 @@ adlfs==0.5.9 # via feast (setup.py) aiobotocore==2.1.2 # via s3fs -aiohttp==3.8.3 +aiohttp==3.8.4 # via # adlfs # aiobotocore @@ -24,17 +24,25 @@ aiosignal==1.3.1 # via aiohttp alabaster==0.7.13 # via sphinx -altair==4.2.2 +altair==4.2.0 # via great-expectations anyio==3.6.2 # via # httpcore + # jupyter-server # starlette # watchfiles appdirs==1.4.4 # via fissix -appnope==0.1.3 - # via ipython +argon2-cffi==21.3.0 + # via + # jupyter-server + # nbclassic + # notebook +argon2-cffi-bindings==21.2.0 + # via argon2-cffi +arrow==1.2.3 + # via isoduration asn1crypto==1.5.1 # via # oscrypto @@ -55,7 +63,7 @@ attrs==22.2.0 # pytest avro==1.10.0 # via feast (setup.py) -azure-core==1.26.2 +azure-core==1.26.3 # via # adlfs # azure-identity @@ -67,11 +75,11 @@ azure-identity==1.12.0 # via # adlfs # feast (setup.py) -azure-storage-blob==12.14.1 +azure-storage-blob==12.15.0 # via # adlfs # feast (setup.py) -babel==2.11.0 +babel==2.12.1 # via sphinx backcall==0.2.0 # via ipython @@ -79,8 +87,12 @@ backports-zoneinfo==0.2.1 # via # pytz-deprecation-shim # tzlocal +beautifulsoup4==4.11.2 + # via nbconvert black==22.12.0 # via feast (setup.py) +bleach==6.0.0 + # via nbconvert boto3==1.20.23 # via # feast (setup.py) @@ -116,6 +128,7 @@ certifi==2022.12.7 # snowflake-connector-python cffi==1.15.1 # via + # argon2-cffi-bindings # azure-datalake-store # cryptography # snowflake-connector-python @@ -143,7 +156,9 @@ colorama==0.4.6 # via # feast (setup.py) # great-expectations -coverage[toml]==7.1.0 +comm==0.1.2 + # via ipykernel +coverage[toml]==7.2.1 # via pytest-cov cryptography==35.0.0 # via @@ -159,16 +174,18 @@ cryptography==35.0.0 # snowflake-connector-python # types-pyopenssl # types-redis -dask==2023.1.1 +dask==2023.3.0 # via feast (setup.py) -dataclasses==0.6 - # via great-expectations db-dtypes==1.0.5 # via google-cloud-bigquery +debugpy==1.6.6 + # via ipykernel decorator==5.1.1 # via # gcsfs # ipython +defusedxml==0.7.1 + # via nbconvert deprecated==1.2.13 # via redis deprecation==2.1.0 @@ -194,13 +211,13 @@ execnet==1.9.0 # via pytest-xdist executing==1.2.0 # via stack-data -fastapi==0.89.1 +fastapi==0.94.0 # via feast (setup.py) -fastavro==1.7.1 +fastavro==1.7.3 # via # feast (setup.py) # pandavro -fastjsonschema==2.16.2 +fastjsonschema==2.16.3 # via nbformat filelock==3.9.0 # via @@ -212,6 +229,8 @@ fissix==21.11.13 # via bowler flake8==6.0.0 # via feast (setup.py) +fqdn==1.5.1 + # via jsonschema frozenlist==1.3.3 # via # aiohttp @@ -224,6 +243,8 @@ fsspec==2022.1.0 # s3fs gcsfs==2022.1.0 # via feast (setup.py) +geojson==2.5.0 + # via rockset geomet==0.2.1.post1 # via cassandra-driver google-api-core[grpc]==2.11.0 @@ -238,9 +259,9 @@ google-api-core[grpc]==2.11.0 # google-cloud-datastore # google-cloud-firestore # google-cloud-storage -google-api-python-client==2.74.0 +google-api-python-client==2.80.0 # via firebase-admin -google-auth==2.16.0 +google-auth==2.16.2 # via # gcsfs # google-api-core @@ -252,13 +273,13 @@ google-auth==2.16.0 # kubernetes google-auth-httplib2==0.1.0 # via google-api-python-client -google-auth-oauthlib==0.8.0 +google-auth-oauthlib==1.0.0 # via gcsfs -google-cloud-bigquery[pandas]==3.4.2 +google-cloud-bigquery[pandas]==3.6.0 # via feast (setup.py) -google-cloud-bigquery-storage==2.18.1 +google-cloud-bigquery-storage==2.19.0 # via feast (setup.py) -google-cloud-bigtable==2.15.0 +google-cloud-bigtable==2.17.0 # via feast (setup.py) google-cloud-core==2.3.2 # via @@ -267,9 +288,9 @@ google-cloud-core==2.3.2 # google-cloud-datastore # google-cloud-firestore # google-cloud-storage -google-cloud-datastore==2.13.2 +google-cloud-datastore==2.15.0 # via feast (setup.py) -google-cloud-firestore==2.9.1 +google-cloud-firestore==2.10.0 # via firebase-admin google-cloud-storage==2.7.0 # via @@ -288,13 +309,13 @@ googleapis-common-protos[grpc]==1.58.0 # google-api-core # grpc-google-iam-v1 # grpcio-status -great-expectations==0.14.13 +great-expectations==0.15.50 # via feast (setup.py) greenlet==2.0.2 # via sqlalchemy grpc-google-iam-v1==0.12.6 # via google-cloud-bigtable -grpcio==1.51.1 +grpcio==1.51.3 # via # feast (setup.py) # google-api-core @@ -305,13 +326,13 @@ grpcio==1.51.1 # grpcio-status # grpcio-testing # grpcio-tools -grpcio-reflection==1.51.1 +grpcio-reflection==1.51.3 # via feast (setup.py) -grpcio-status==1.51.1 +grpcio-status==1.51.3 # via google-api-core -grpcio-testing==1.51.1 +grpcio-testing==1.51.3 # via feast (setup.py) -grpcio-tools==1.51.1 +grpcio-tools==1.51.3 # via feast (setup.py) h11==0.14.0 # via @@ -319,7 +340,7 @@ h11==0.14.0 # uvicorn happybase==1.2.0 # via feast (setup.py) -hiredis==2.1.1 +hiredis==2.2.2 # via feast (setup.py) httpcore==0.16.3 # via httpx @@ -331,11 +352,12 @@ httptools==0.5.0 # via uvicorn httpx==0.23.3 # via feast (setup.py) -identify==2.5.17 +identify==2.5.19 # via pre-commit idna==3.4 # via # anyio + # jsonschema # requests # rfc3986 # snowflake-connector-python @@ -345,25 +367,49 @@ imagesize==1.4.1 importlib-metadata==6.0.0 # via # great-expectations + # jupyter-client + # nbconvert # sphinx -importlib-resources==5.10.2 +importlib-resources==5.12.0 # via jsonschema iniconfig==2.0.0 # via pytest -ipython==8.9.0 +ipykernel==6.21.3 + # via + # ipywidgets + # nbclassic + # notebook +ipython==8.11.0 + # via + # great-expectations + # ipykernel + # ipywidgets +ipython-genutils==0.2.0 + # via + # nbclassic + # notebook +ipywidgets==8.0.4 # via great-expectations isodate==0.6.1 - # via msrest + # via + # azure-storage-blob + # msrest +isoduration==20.11.0 + # via jsonschema isort==5.12.0 # via feast (setup.py) jedi==0.18.2 # via ipython -jinja2==3.0.3 +jinja2==3.1.2 # via # altair # feast (setup.py) # great-expectations + # jupyter-server # moto + # nbclassic + # nbconvert + # notebook # sphinx jmespath==0.10.0 # via @@ -372,51 +418,88 @@ jmespath==0.10.0 jsonpatch==1.32 # via great-expectations jsonpointer==2.3 - # via jsonpatch -jsonschema==4.17.3 + # via + # jsonpatch + # jsonschema +jsonschema[format-nongpl]==4.17.3 # via # altair # feast (setup.py) # great-expectations + # jupyter-events # nbformat +jupyter-client==8.0.3 + # via + # ipykernel + # jupyter-server + # nbclassic + # nbclient + # notebook jupyter-core==5.2.0 - # via nbformat + # via + # ipykernel + # jupyter-client + # jupyter-server + # nbclassic + # nbclient + # nbconvert + # nbformat + # notebook +jupyter-events==0.6.3 + # via jupyter-server +jupyter-server==2.4.0 + # via + # nbclassic + # notebook-shim +jupyter-server-terminals==0.4.4 + # via jupyter-server +jupyterlab-pygments==0.2.2 + # via nbconvert +jupyterlab-widgets==3.0.5 + # via ipywidgets kubernetes==20.13.0 # via feast (setup.py) locket==1.0.0 # via partd +makefun==1.15.1 + # via great-expectations markupsafe==2.1.2 # via # jinja2 - # moto + # nbconvert + # werkzeug +marshmallow==3.19.0 + # via great-expectations matplotlib-inline==0.1.6 - # via ipython + # via + # ipykernel + # ipython mccabe==0.7.0 # via flake8 minio==7.1.0 # via feast (setup.py) -mistune==2.0.4 - # via great-expectations +mistune==2.0.5 + # via + # great-expectations + # nbconvert mmh3==3.0.0 # via feast (setup.py) mock==2.0.0 # via feast (setup.py) moreorless==0.4.0 # via bowler -moto==3.1.18 +moto==4.1.4 # via feast (setup.py) -msal==1.20.0 +msal==1.21.0 # via # azure-identity # msal-extensions msal-extensions==1.0.0 # via azure-identity -msgpack==1.0.4 +msgpack==1.0.5 # via cachecontrol msrest==0.7.1 - # via - # azure-storage-blob - # msrestazure + # via msrestazure msrestazure==0.6.4 # via adlfs multidict==6.0.4 @@ -429,7 +512,7 @@ mypy==0.982 # via # feast (setup.py) # sqlalchemy -mypy-extensions==0.4.3 +mypy-extensions==1.0.0 # via # black # mypy @@ -437,11 +520,35 @@ mypy-protobuf==3.1 # via feast (setup.py) mysqlclient==2.1.1 # via feast (setup.py) +nbclassic==0.5.3 + # via notebook +nbclient==0.7.2 + # via nbconvert +nbconvert==7.2.9 + # via + # jupyter-server + # nbclassic + # notebook nbformat==5.7.3 - # via great-expectations + # via + # great-expectations + # jupyter-server + # nbclassic + # nbclient + # nbconvert + # notebook +nest-asyncio==1.5.6 + # via + # ipykernel + # nbclassic + # notebook nodeenv==1.7.0 # via pre-commit -numpy==1.24.1 +notebook==6.5.3 + # via great-expectations +notebook-shim==0.2.2 + # via nbclassic +numpy==1.24.2 # via # altair # db-dtypes @@ -464,6 +571,10 @@ packaging==23.0 # docker # google-cloud-bigquery # great-expectations + # ipykernel + # jupyter-server + # marshmallow + # nbconvert # pytest # redis # sphinx @@ -478,6 +589,8 @@ pandas==1.5.3 # snowflake-connector-python pandavro==1.5.2 # via feast (setup.py) +pandocfilters==1.5.0 + # via nbconvert parso==0.8.3 # via jedi partd==1.3.0 @@ -490,11 +603,11 @@ pexpect==4.8.0 # via ipython pickleshare==0.7.5 # via ipython -pip-tools==6.12.1 +pip-tools==6.12.3 # via feast (setup.py) pkgutil-resolve-name==1.3.10 # via jsonschema -platformdirs==2.6.2 +platformdirs==3.1.1 # via # black # jupyter-core @@ -505,9 +618,14 @@ ply==3.11 # via thriftpy2 portalocker==2.7.0 # via msal-extensions -pre-commit==3.0.2 +pre-commit==3.1.1 # via feast (setup.py) -prompt-toolkit==3.0.36 +prometheus-client==0.16.0 + # via + # jupyter-server + # nbclassic + # notebook +prompt-toolkit==3.0.38 # via ipython proto-plus==1.22.2 # via @@ -517,7 +635,7 @@ proto-plus==1.22.2 # google-cloud-bigtable # google-cloud-datastore # google-cloud-firestore -protobuf==4.21.12 +protobuf==4.22.1 # via # feast (setup.py) # google-api-core @@ -535,11 +653,15 @@ protobuf==4.21.12 # mypy-protobuf # proto-plus psutil==5.9.0 - # via feast (setup.py) + # via + # feast (setup.py) + # ipykernel psycopg2-binary==2.9.5 # via feast (setup.py) ptyprocess==0.7.0 - # via pexpect + # via + # pexpect + # terminado pure-eval==0.2.2 # via stack-data py==1.11.0 @@ -568,16 +690,18 @@ pycparser==2.21 # via cffi pycryptodomex==3.17 # via snowflake-connector-python -pydantic==1.10.4 +pydantic==1.10.6 # via # fastapi # feast (setup.py) + # great-expectations pyflakes==3.0.1 # via flake8 pygments==2.14.0 # via # feast (setup.py) # ipython + # nbconvert # sphinx pyjwt[crypto]==2.6.0 # via @@ -594,7 +718,7 @@ pyopenssl==22.0.0 # via # feast (setup.py) # snowflake-connector-python -pyparsing==2.4.7 +pyparsing==3.0.9 # via # great-expectations # httplib2 @@ -602,9 +726,9 @@ pyproject-hooks==1.0.0 # via build pyrsistent==0.19.3 # via jsonschema -pyspark==3.3.1 +pyspark==3.3.2 # via feast (setup.py) -pytest==7.2.1 +pytest==7.2.2 # via # feast (setup.py) # pytest-benchmark @@ -626,24 +750,28 @@ pytest-ordering==0.6 # via feast (setup.py) pytest-timeout==1.4.2 # via feast (setup.py) -pytest-xdist==3.1.0 +pytest-xdist==3.2.0 # via feast (setup.py) python-dateutil==2.8.2 # via # adal + # arrow # botocore # google-cloud-bigquery # great-expectations + # jupyter-client # kubernetes # moto # pandas -python-dotenv==0.21.1 + # rockset +python-dotenv==1.0.0 # via uvicorn +python-json-logger==2.0.7 + # via jupyter-events pytz==2022.7.1 # via # babel # great-expectations - # moto # pandas # snowflake-connector-python # trino @@ -653,11 +781,22 @@ pyyaml==6.0 # via # dask # feast (setup.py) + # jupyter-events # kubernetes # pre-commit + # responses # uvicorn +pyzmq==25.0.0 + # via + # ipykernel + # jupyter-client + # jupyter-server + # nbclassic + # notebook redis==4.2.2 # via feast (setup.py) +regex==2022.10.31 + # via feast (setup.py) requests==2.28.2 # via # adal @@ -666,6 +805,7 @@ requests==2.28.2 # azure-datalake-store # cachecontrol # docker + # feast (setup.py) # gcsfs # google-api-core # google-cloud-bigquery @@ -685,10 +825,20 @@ requests-oauthlib==1.3.1 # google-auth-oauthlib # kubernetes # msrest -responses==0.22.0 +responses==0.23.0 # via moto +rfc3339-validator==0.1.4 + # via + # jsonschema + # jupyter-events rfc3986[idna2008]==1.5.0 # via httpx +rfc3986-validator==0.1.1 + # via + # jsonschema + # jupyter-events +rockset==1.0.5 + # via feast (setup.py) rsa==4.9 # via google-auth ruamel-yaml==0.17.17 @@ -699,12 +849,19 @@ s3fs==2022.1.0 # via feast (setup.py) s3transfer==0.5.2 # via boto3 -scipy==1.10.0 +scipy==1.10.1 # via great-expectations +send2trash==1.8.0 + # via + # jupyter-server + # nbclassic + # notebook six==1.16.0 # via + # asttokens # azure-core # azure-identity + # bleach # cassandra-driver # geomet # google-auth @@ -716,6 +873,7 @@ six==1.16.0 # msrestazure # pandavro # python-dateutil + # rfc3339-validator # thriftpy2 sniffio==1.3.0 # via @@ -726,13 +884,15 @@ snowballstemmer==2.2.0 # via sphinx snowflake-connector-python[pandas]==2.9.0 # via feast (setup.py) +soupsieve==2.4 + # via beautifulsoup4 sphinx==6.1.3 # via feast (setup.py) sphinxcontrib-applehelp==1.0.4 # via sphinx sphinxcontrib-devhelp==1.0.2 # via sphinx -sphinxcontrib-htmlhelp==2.0.0 +sphinxcontrib-htmlhelp==2.0.1 # via sphinx sphinxcontrib-jsmath==1.0.1 # via sphinx @@ -746,22 +906,26 @@ sqlalchemy2-stubs==0.0.2a32 # via sqlalchemy stack-data==0.6.2 # via ipython -starlette==0.22.0 +starlette==0.26.0.post1 # via fastapi tabulate==0.9.0 # via feast (setup.py) -tenacity==8.1.0 +tenacity==8.2.2 # via feast (setup.py) -termcolor==2.2.0 - # via great-expectations +terminado==0.17.1 + # via + # jupyter-server + # jupyter-server-terminals + # nbclassic + # notebook testcontainers==3.7.1 # via feast (setup.py) thriftpy2==0.4.16 # via happybase +tinycss2==1.2.1 + # via nbconvert toml==0.10.2 - # via - # feast (setup.py) - # responses + # via feast (setup.py) tomli==2.0.1 # via # black @@ -775,52 +939,69 @@ toolz==0.12.0 # altair # dask # partd -tqdm==4.64.1 +tornado==6.2 + # via + # ipykernel + # jupyter-client + # jupyter-server + # nbclassic + # notebook + # terminado +tqdm==4.65.0 # via # feast (setup.py) # great-expectations traitlets==5.9.0 # via + # comm + # ipykernel # ipython + # ipywidgets + # jupyter-client # jupyter-core + # jupyter-events + # jupyter-server # matplotlib-inline + # nbclassic + # nbclient + # nbconvert # nbformat -trino==0.321.0 + # notebook +trino==0.322.0 # via feast (setup.py) typeguard==2.13.3 # via feast (setup.py) -types-docutils==0.19.1.2 - # via types-setuptools types-protobuf==3.19.22 # via # feast (setup.py) # mypy-protobuf -types-pymysql==1.0.19.2 +types-pymysql==1.0.19.5 # via feast (setup.py) -types-pyopenssl==23.0.0.2 +types-pyopenssl==23.0.0.4 # via types-redis -types-python-dateutil==2.8.19.6 +types-python-dateutil==2.8.19.10 # via feast (setup.py) -types-pytz==2022.7.1.0 +types-pytz==2022.7.1.2 # via feast (setup.py) -types-pyyaml==6.0.12.3 - # via feast (setup.py) -types-redis==4.4.0.4 +types-pyyaml==6.0.12.8 + # via + # feast (setup.py) + # responses +types-redis==4.5.1.4 # via feast (setup.py) -types-requests==2.28.11.8 +types-requests==2.28.11.15 # via feast (setup.py) -types-setuptools==65.7.0.3 +types-setuptools==67.6.0.0 # via feast (setup.py) -types-tabulate==0.9.0.0 +types-tabulate==0.9.0.1 # via feast (setup.py) -types-toml==0.10.8.1 - # via responses -types-urllib3==1.26.25.4 +types-urllib3==1.26.25.8 # via types-requests -typing-extensions==4.4.0 +typing-extensions==4.5.0 # via # aioitertools # azure-core + # azure-storage-blob # black # great-expectations # mypy @@ -834,6 +1015,8 @@ tzlocal==4.2 # via # great-expectations # trino +uri-template==1.2.0 + # via jsonschema uritemplate==4.1.1 # via google-api-python-client urllib3==1.26.14 @@ -846,12 +1029,13 @@ urllib3==1.26.14 # minio # requests # responses + # rockset # snowflake-connector-python -uvicorn[standard]==0.20.0 +uvicorn[standard]==0.21.0 # via feast (setup.py) uvloop==0.17.0 # via uvicorn -virtualenv==20.17.1 +virtualenv==20.20.0 # via pre-commit volatile==2.1.0 # via bowler @@ -859,17 +1043,26 @@ watchfiles==0.18.1 # via uvicorn wcwidth==0.2.6 # via prompt-toolkit -websocket-client==1.5.0 +webcolors==1.12 + # via jsonschema +webencodings==0.5.1 + # via + # bleach + # tinycss2 +websocket-client==1.5.1 # via # docker + # jupyter-server # kubernetes websockets==10.4 # via uvicorn -werkzeug==2.1.2 +werkzeug==2.2.3 # via moto wheel==0.38.4 # via pip-tools -wrapt==1.14.1 +widgetsnbextension==4.0.5 + # via ipywidgets +wrapt==1.15.0 # via # aiobotocore # deprecated @@ -878,7 +1071,7 @@ xmltodict==0.13.0 # via moto yarl==1.8.2 # via aiohttp -zipp==3.12.0 +zipp==3.15.0 # via # importlib-metadata # importlib-resources diff --git a/sdk/python/requirements/py3.8-requirements.txt b/sdk/python/requirements/py3.8-requirements.txt index 0f040f9a6fc..ca09b953c83 100644 --- a/sdk/python/requirements/py3.8-requirements.txt +++ b/sdk/python/requirements/py3.8-requirements.txt @@ -17,14 +17,12 @@ attrs==22.2.0 # jsonschema bowler==0.9.0 # via feast (setup.py) -cachetools==5.3.0 - # via google-auth certifi==2022.12.7 # via # httpcore # httpx # requests -charset-normalizer==3.0.1 +charset-normalizer==3.1.0 # via requests click==8.1.3 # via @@ -37,35 +35,27 @@ cloudpickle==2.2.1 # via dask colorama==0.4.6 # via feast (setup.py) -dask==2023.1.1 +dask==2023.3.0 # via feast (setup.py) dill==0.3.6 # via feast (setup.py) -fastapi==0.89.1 +fastapi==0.94.0 # via feast (setup.py) -fastavro==1.7.1 +fastavro==1.7.3 # via # feast (setup.py) # pandavro fissix==21.11.13 # via bowler -fsspec==2023.1.0 +fsspec==2023.3.0 # via dask -google-api-core==2.11.0 - # via feast (setup.py) -google-auth==2.16.0 - # via google-api-core -googleapis-common-protos==1.58.0 - # via - # feast (setup.py) - # google-api-core greenlet==2.0.2 # via sqlalchemy -grpcio==1.51.1 +grpcio==1.51.3 # via # feast (setup.py) # grpcio-reflection -grpcio-reflection==1.51.1 +grpcio-reflection==1.51.3 # via feast (setup.py) h11==0.14.0 # via @@ -82,7 +72,7 @@ idna==3.4 # anyio # requests # rfc3986 -importlib-resources==5.10.2 +importlib-resources==5.12.0 # via jsonschema jinja2==3.1.2 # via feast (setup.py) @@ -96,11 +86,11 @@ mmh3==3.0.0 # via feast (setup.py) moreorless==0.4.0 # via bowler -mypy==0.991 +mypy==1.1.1 # via sqlalchemy -mypy-extensions==0.4.3 +mypy-extensions==1.0.0 # via mypy -numpy==1.24.1 +numpy==1.24.2 # via # feast (setup.py) # pandas @@ -120,22 +110,14 @@ pkgutil-resolve-name==1.3.10 # via jsonschema proto-plus==1.22.2 # via feast (setup.py) -protobuf==4.21.12 +protobuf==4.22.1 # via # feast (setup.py) - # google-api-core - # googleapis-common-protos # grpcio-reflection # proto-plus -pyarrow==8.0.0 +pyarrow==11.0.0 # via feast (setup.py) -pyasn1==0.4.8 - # via - # pyasn1-modules - # rsa -pyasn1-modules==0.2.8 - # via google-auth -pydantic==1.10.4 +pydantic==1.10.6 # via # fastapi # feast (setup.py) @@ -145,7 +127,7 @@ pyrsistent==0.19.3 # via jsonschema python-dateutil==2.8.2 # via pandas -python-dotenv==0.21.1 +python-dotenv==1.0.0 # via uvicorn pytz==2022.7.1 # via pandas @@ -155,14 +137,11 @@ pyyaml==6.0 # feast (setup.py) # uvicorn requests==2.28.2 - # via google-api-core + # via feast (setup.py) rfc3986[idna2008]==1.5.0 # via httpx -rsa==4.9 - # via google-auth six==1.16.0 # via - # google-auth # pandavro # python-dateutil sniffio==1.3.0 @@ -174,11 +153,11 @@ sqlalchemy[mypy]==1.4.46 # via feast (setup.py) sqlalchemy2-stubs==0.0.2a32 # via sqlalchemy -starlette==0.22.0 +starlette==0.26.0.post1 # via fastapi tabulate==0.9.0 # via feast (setup.py) -tenacity==8.1.0 +tenacity==8.2.2 # via feast (setup.py) toml==0.10.2 # via feast (setup.py) @@ -188,11 +167,11 @@ toolz==0.12.0 # via # dask # partd -tqdm==4.64.1 +tqdm==4.65.0 # via feast (setup.py) typeguard==2.13.3 # via feast (setup.py) -typing-extensions==4.4.0 +typing-extensions==4.5.0 # via # mypy # pydantic @@ -200,7 +179,7 @@ typing-extensions==4.4.0 # starlette urllib3==1.26.14 # via requests -uvicorn[standard]==0.20.0 +uvicorn[standard]==0.21.0 # via feast (setup.py) uvloop==0.17.0 # via uvicorn @@ -210,5 +189,5 @@ watchfiles==0.18.1 # via uvicorn websockets==10.4 # via uvicorn -zipp==3.12.0 +zipp==3.15.0 # via importlib-resources diff --git a/sdk/python/requirements/py3.9-ci-requirements.txt b/sdk/python/requirements/py3.9-ci-requirements.txt index 1ec1c03fd13..9b8dbf9353d 100644 --- a/sdk/python/requirements/py3.9-ci-requirements.txt +++ b/sdk/python/requirements/py3.9-ci-requirements.txt @@ -12,7 +12,7 @@ adlfs==0.5.9 # via feast (setup.py) aiobotocore==2.1.2 # via s3fs -aiohttp==3.8.3 +aiohttp==3.8.4 # via # adlfs # aiobotocore @@ -24,17 +24,25 @@ aiosignal==1.3.1 # via aiohttp alabaster==0.7.13 # via sphinx -altair==4.2.2 +altair==4.2.0 # via great-expectations anyio==3.6.2 # via # httpcore + # jupyter-server # starlette # watchfiles appdirs==1.4.4 # via fissix -appnope==0.1.3 - # via ipython +argon2-cffi==21.3.0 + # via + # jupyter-server + # nbclassic + # notebook +argon2-cffi-bindings==21.2.0 + # via argon2-cffi +arrow==1.2.3 + # via isoduration asn1crypto==1.5.1 # via # oscrypto @@ -55,7 +63,7 @@ attrs==22.2.0 # pytest avro==1.10.0 # via feast (setup.py) -azure-core==1.26.2 +azure-core==1.26.3 # via # adlfs # azure-identity @@ -67,16 +75,20 @@ azure-identity==1.12.0 # via # adlfs # feast (setup.py) -azure-storage-blob==12.14.1 +azure-storage-blob==12.15.0 # via # adlfs # feast (setup.py) -babel==2.11.0 +babel==2.12.1 # via sphinx backcall==0.2.0 # via ipython +beautifulsoup4==4.11.2 + # via nbconvert black==22.12.0 # via feast (setup.py) +bleach==6.0.0 + # via nbconvert boto3==1.20.23 # via # feast (setup.py) @@ -112,6 +124,7 @@ certifi==2022.12.7 # snowflake-connector-python cffi==1.15.1 # via + # argon2-cffi-bindings # azure-datalake-store # cryptography # snowflake-connector-python @@ -139,7 +152,9 @@ colorama==0.4.6 # via # feast (setup.py) # great-expectations -coverage[toml]==7.1.0 +comm==0.1.2 + # via ipykernel +coverage[toml]==7.2.1 # via pytest-cov cryptography==35.0.0 # via @@ -155,16 +170,18 @@ cryptography==35.0.0 # snowflake-connector-python # types-pyopenssl # types-redis -dask==2023.1.1 +dask==2023.3.0 # via feast (setup.py) -dataclasses==0.6 - # via great-expectations db-dtypes==1.0.5 # via google-cloud-bigquery +debugpy==1.6.6 + # via ipykernel decorator==5.1.1 # via # gcsfs # ipython +defusedxml==0.7.1 + # via nbconvert deprecated==1.2.13 # via redis deprecation==2.1.0 @@ -190,13 +207,13 @@ execnet==1.9.0 # via pytest-xdist executing==1.2.0 # via stack-data -fastapi==0.89.1 +fastapi==0.93.0 # via feast (setup.py) -fastavro==1.7.1 +fastavro==1.7.3 # via # feast (setup.py) # pandavro -fastjsonschema==2.16.2 +fastjsonschema==2.16.3 # via nbformat filelock==3.9.0 # via @@ -208,6 +225,8 @@ fissix==21.11.13 # via bowler flake8==6.0.0 # via feast (setup.py) +fqdn==1.5.1 + # via jsonschema frozenlist==1.3.3 # via # aiohttp @@ -220,6 +239,8 @@ fsspec==2022.1.0 # s3fs gcsfs==2022.1.0 # via feast (setup.py) +geojson==2.5.0 + # via rockset geomet==0.2.1.post1 # via cassandra-driver google-api-core[grpc]==2.11.0 @@ -234,9 +255,9 @@ google-api-core[grpc]==2.11.0 # google-cloud-datastore # google-cloud-firestore # google-cloud-storage -google-api-python-client==2.74.0 +google-api-python-client==2.80.0 # via firebase-admin -google-auth==2.16.0 +google-auth==2.16.2 # via # gcsfs # google-api-core @@ -248,13 +269,13 @@ google-auth==2.16.0 # kubernetes google-auth-httplib2==0.1.0 # via google-api-python-client -google-auth-oauthlib==0.8.0 +google-auth-oauthlib==1.0.0 # via gcsfs -google-cloud-bigquery[pandas]==3.4.2 +google-cloud-bigquery[pandas]==3.6.0 # via feast (setup.py) -google-cloud-bigquery-storage==2.18.1 +google-cloud-bigquery-storage==2.19.0 # via feast (setup.py) -google-cloud-bigtable==2.15.0 +google-cloud-bigtable==2.17.0 # via feast (setup.py) google-cloud-core==2.3.2 # via @@ -263,9 +284,9 @@ google-cloud-core==2.3.2 # google-cloud-datastore # google-cloud-firestore # google-cloud-storage -google-cloud-datastore==2.13.2 +google-cloud-datastore==2.15.0 # via feast (setup.py) -google-cloud-firestore==2.9.1 +google-cloud-firestore==2.10.0 # via firebase-admin google-cloud-storage==2.7.0 # via @@ -284,13 +305,13 @@ googleapis-common-protos[grpc]==1.58.0 # google-api-core # grpc-google-iam-v1 # grpcio-status -great-expectations==0.14.13 +great-expectations==0.15.50 # via feast (setup.py) greenlet==2.0.2 # via sqlalchemy grpc-google-iam-v1==0.12.6 # via google-cloud-bigtable -grpcio==1.51.1 +grpcio==1.51.3 # via # feast (setup.py) # google-api-core @@ -301,13 +322,13 @@ grpcio==1.51.1 # grpcio-status # grpcio-testing # grpcio-tools -grpcio-reflection==1.51.1 +grpcio-reflection==1.51.3 # via feast (setup.py) -grpcio-status==1.51.1 +grpcio-status==1.51.3 # via google-api-core -grpcio-testing==1.51.1 +grpcio-testing==1.51.3 # via feast (setup.py) -grpcio-tools==1.51.1 +grpcio-tools==1.51.3 # via feast (setup.py) h11==0.14.0 # via @@ -315,7 +336,7 @@ h11==0.14.0 # uvicorn happybase==1.2.0 # via feast (setup.py) -hiredis==2.1.1 +hiredis==2.2.2 # via feast (setup.py) httpcore==0.16.3 # via httpx @@ -327,11 +348,12 @@ httptools==0.5.0 # via uvicorn httpx==0.23.3 # via feast (setup.py) -identify==2.5.17 +identify==2.5.19 # via pre-commit idna==3.4 # via # anyio + # jsonschema # requests # rfc3986 # snowflake-connector-python @@ -341,23 +363,47 @@ imagesize==1.4.1 importlib-metadata==6.0.0 # via # great-expectations + # jupyter-client + # nbconvert # sphinx iniconfig==2.0.0 # via pytest -ipython==8.9.0 +ipykernel==6.21.3 + # via + # ipywidgets + # nbclassic + # notebook +ipython==8.11.0 + # via + # great-expectations + # ipykernel + # ipywidgets +ipython-genutils==0.2.0 + # via + # nbclassic + # notebook +ipywidgets==8.0.4 # via great-expectations isodate==0.6.1 - # via msrest + # via + # azure-storage-blob + # msrest +isoduration==20.11.0 + # via jsonschema isort==5.12.0 # via feast (setup.py) jedi==0.18.2 # via ipython -jinja2==3.0.3 +jinja2==3.1.2 # via # altair # feast (setup.py) # great-expectations + # jupyter-server # moto + # nbclassic + # nbconvert + # notebook # sphinx jmespath==0.10.0 # via @@ -366,51 +412,88 @@ jmespath==0.10.0 jsonpatch==1.32 # via great-expectations jsonpointer==2.3 - # via jsonpatch -jsonschema==4.17.3 + # via + # jsonpatch + # jsonschema +jsonschema[format-nongpl]==4.17.3 # via # altair # feast (setup.py) # great-expectations + # jupyter-events # nbformat +jupyter-client==8.0.3 + # via + # ipykernel + # jupyter-server + # nbclassic + # nbclient + # notebook jupyter-core==5.2.0 - # via nbformat + # via + # ipykernel + # jupyter-client + # jupyter-server + # nbclassic + # nbclient + # nbconvert + # nbformat + # notebook +jupyter-events==0.6.3 + # via jupyter-server +jupyter-server==2.4.0 + # via + # nbclassic + # notebook-shim +jupyter-server-terminals==0.4.4 + # via jupyter-server +jupyterlab-pygments==0.2.2 + # via nbconvert +jupyterlab-widgets==3.0.5 + # via ipywidgets kubernetes==20.13.0 # via feast (setup.py) locket==1.0.0 # via partd +makefun==1.15.1 + # via great-expectations markupsafe==2.1.2 # via # jinja2 - # moto + # nbconvert + # werkzeug +marshmallow==3.19.0 + # via great-expectations matplotlib-inline==0.1.6 - # via ipython + # via + # ipykernel + # ipython mccabe==0.7.0 # via flake8 minio==7.1.0 # via feast (setup.py) -mistune==2.0.4 - # via great-expectations +mistune==2.0.5 + # via + # great-expectations + # nbconvert mmh3==3.0.0 # via feast (setup.py) mock==2.0.0 # via feast (setup.py) moreorless==0.4.0 # via bowler -moto==3.1.18 +moto==4.1.4 # via feast (setup.py) -msal==1.20.0 +msal==1.21.0 # via # azure-identity # msal-extensions msal-extensions==1.0.0 # via azure-identity -msgpack==1.0.4 +msgpack==1.0.5 # via cachecontrol msrest==0.7.1 - # via - # azure-storage-blob - # msrestazure + # via msrestazure msrestazure==0.6.4 # via adlfs multidict==6.0.4 @@ -423,7 +506,7 @@ mypy==0.982 # via # feast (setup.py) # sqlalchemy -mypy-extensions==0.4.3 +mypy-extensions==1.0.0 # via # black # mypy @@ -431,11 +514,35 @@ mypy-protobuf==3.1 # via feast (setup.py) mysqlclient==2.1.1 # via feast (setup.py) +nbclassic==0.5.3 + # via notebook +nbclient==0.7.2 + # via nbconvert +nbconvert==7.2.9 + # via + # jupyter-server + # nbclassic + # notebook nbformat==5.7.3 - # via great-expectations + # via + # great-expectations + # jupyter-server + # nbclassic + # nbclient + # nbconvert + # notebook +nest-asyncio==1.5.6 + # via + # ipykernel + # nbclassic + # notebook nodeenv==1.7.0 # via pre-commit -numpy==1.24.1 +notebook==6.5.3 + # via great-expectations +notebook-shim==0.2.2 + # via nbclassic +numpy==1.24.2 # via # altair # db-dtypes @@ -458,6 +565,10 @@ packaging==23.0 # docker # google-cloud-bigquery # great-expectations + # ipykernel + # jupyter-server + # marshmallow + # nbconvert # pytest # redis # sphinx @@ -472,6 +583,8 @@ pandas==1.5.3 # snowflake-connector-python pandavro==1.5.2 # via feast (setup.py) +pandocfilters==1.5.0 + # via nbconvert parso==0.8.3 # via jedi partd==1.3.0 @@ -484,9 +597,9 @@ pexpect==4.8.0 # via ipython pickleshare==0.7.5 # via ipython -pip-tools==6.12.1 +pip-tools==6.12.3 # via feast (setup.py) -platformdirs==2.6.2 +platformdirs==3.1.1 # via # black # jupyter-core @@ -497,9 +610,14 @@ ply==3.11 # via thriftpy2 portalocker==2.7.0 # via msal-extensions -pre-commit==3.0.2 +pre-commit==3.1.1 # via feast (setup.py) -prompt-toolkit==3.0.36 +prometheus-client==0.16.0 + # via + # jupyter-server + # nbclassic + # notebook +prompt-toolkit==3.0.38 # via ipython proto-plus==1.22.2 # via @@ -509,7 +627,7 @@ proto-plus==1.22.2 # google-cloud-bigtable # google-cloud-datastore # google-cloud-firestore -protobuf==4.21.12 +protobuf==4.22.1 # via # feast (setup.py) # google-api-core @@ -527,11 +645,15 @@ protobuf==4.21.12 # mypy-protobuf # proto-plus psutil==5.9.0 - # via feast (setup.py) + # via + # feast (setup.py) + # ipykernel psycopg2-binary==2.9.5 # via feast (setup.py) ptyprocess==0.7.0 - # via pexpect + # via + # pexpect + # terminado pure-eval==0.2.2 # via stack-data py==1.11.0 @@ -560,16 +682,18 @@ pycparser==2.21 # via cffi pycryptodomex==3.17 # via snowflake-connector-python -pydantic==1.10.4 +pydantic==1.10.6 # via # fastapi # feast (setup.py) + # great-expectations pyflakes==3.0.1 # via flake8 pygments==2.14.0 # via # feast (setup.py) # ipython + # nbconvert # sphinx pyjwt[crypto]==2.6.0 # via @@ -586,7 +710,7 @@ pyopenssl==22.0.0 # via # feast (setup.py) # snowflake-connector-python -pyparsing==2.4.7 +pyparsing==3.0.9 # via # great-expectations # httplib2 @@ -594,9 +718,9 @@ pyproject-hooks==1.0.0 # via build pyrsistent==0.19.3 # via jsonschema -pyspark==3.3.1 +pyspark==3.3.2 # via feast (setup.py) -pytest==7.2.1 +pytest==7.2.2 # via # feast (setup.py) # pytest-benchmark @@ -618,24 +742,27 @@ pytest-ordering==0.6 # via feast (setup.py) pytest-timeout==1.4.2 # via feast (setup.py) -pytest-xdist==3.1.0 +pytest-xdist==3.2.0 # via feast (setup.py) python-dateutil==2.8.2 # via # adal + # arrow # botocore # google-cloud-bigquery # great-expectations + # jupyter-client # kubernetes # moto # pandas -python-dotenv==0.21.1 + # rockset +python-dotenv==1.0.0 # via uvicorn +python-json-logger==2.0.7 + # via jupyter-events pytz==2022.7.1 # via - # babel # great-expectations - # moto # pandas # snowflake-connector-python # trino @@ -645,11 +772,22 @@ pyyaml==6.0 # via # dask # feast (setup.py) + # jupyter-events # kubernetes # pre-commit + # responses # uvicorn +pyzmq==25.0.0 + # via + # ipykernel + # jupyter-client + # jupyter-server + # nbclassic + # notebook redis==4.2.2 # via feast (setup.py) +regex==2022.10.31 + # via feast (setup.py) requests==2.28.2 # via # adal @@ -658,6 +796,7 @@ requests==2.28.2 # azure-datalake-store # cachecontrol # docker + # feast (setup.py) # gcsfs # google-api-core # google-cloud-bigquery @@ -677,10 +816,20 @@ requests-oauthlib==1.3.1 # google-auth-oauthlib # kubernetes # msrest -responses==0.22.0 +responses==0.23.0 # via moto +rfc3339-validator==0.1.4 + # via + # jsonschema + # jupyter-events rfc3986[idna2008]==1.5.0 # via httpx +rfc3986-validator==0.1.1 + # via + # jsonschema + # jupyter-events +rockset==1.0.5 + # via feast (setup.py) rsa==4.9 # via google-auth ruamel-yaml==0.17.17 @@ -691,12 +840,19 @@ s3fs==2022.1.0 # via feast (setup.py) s3transfer==0.5.2 # via boto3 -scipy==1.10.0 +scipy==1.10.1 # via great-expectations +send2trash==1.8.0 + # via + # jupyter-server + # nbclassic + # notebook six==1.16.0 # via + # asttokens # azure-core # azure-identity + # bleach # cassandra-driver # geomet # google-auth @@ -708,6 +864,7 @@ six==1.16.0 # msrestazure # pandavro # python-dateutil + # rfc3339-validator # thriftpy2 sniffio==1.3.0 # via @@ -718,13 +875,15 @@ snowballstemmer==2.2.0 # via sphinx snowflake-connector-python[pandas]==2.9.0 # via feast (setup.py) +soupsieve==2.4 + # via beautifulsoup4 sphinx==6.1.3 # via feast (setup.py) sphinxcontrib-applehelp==1.0.4 # via sphinx sphinxcontrib-devhelp==1.0.2 # via sphinx -sphinxcontrib-htmlhelp==2.0.0 +sphinxcontrib-htmlhelp==2.0.1 # via sphinx sphinxcontrib-jsmath==1.0.1 # via sphinx @@ -738,22 +897,26 @@ sqlalchemy2-stubs==0.0.2a32 # via sqlalchemy stack-data==0.6.2 # via ipython -starlette==0.22.0 +starlette==0.25.0 # via fastapi tabulate==0.9.0 # via feast (setup.py) -tenacity==8.1.0 +tenacity==8.2.2 # via feast (setup.py) -termcolor==2.2.0 - # via great-expectations +terminado==0.17.1 + # via + # jupyter-server + # jupyter-server-terminals + # nbclassic + # notebook testcontainers==3.7.1 # via feast (setup.py) thriftpy2==0.4.16 # via happybase +tinycss2==1.2.1 + # via nbconvert toml==0.10.2 - # via - # feast (setup.py) - # responses + # via feast (setup.py) tomli==2.0.1 # via # black @@ -767,52 +930,69 @@ toolz==0.12.0 # altair # dask # partd -tqdm==4.64.1 +tornado==6.2 + # via + # ipykernel + # jupyter-client + # jupyter-server + # nbclassic + # notebook + # terminado +tqdm==4.65.0 # via # feast (setup.py) # great-expectations traitlets==5.9.0 # via + # comm + # ipykernel # ipython + # ipywidgets + # jupyter-client # jupyter-core + # jupyter-events + # jupyter-server # matplotlib-inline + # nbclassic + # nbclient + # nbconvert # nbformat -trino==0.321.0 + # notebook +trino==0.322.0 # via feast (setup.py) typeguard==2.13.3 # via feast (setup.py) -types-docutils==0.19.1.2 - # via types-setuptools types-protobuf==3.19.22 # via # feast (setup.py) # mypy-protobuf -types-pymysql==1.0.19.2 +types-pymysql==1.0.19.5 # via feast (setup.py) -types-pyopenssl==23.0.0.2 +types-pyopenssl==23.0.0.4 # via types-redis -types-python-dateutil==2.8.19.6 +types-python-dateutil==2.8.19.10 # via feast (setup.py) -types-pytz==2022.7.1.0 +types-pytz==2022.7.1.2 # via feast (setup.py) -types-pyyaml==6.0.12.3 - # via feast (setup.py) -types-redis==4.4.0.4 +types-pyyaml==6.0.12.8 + # via + # feast (setup.py) + # responses +types-redis==4.5.1.4 # via feast (setup.py) -types-requests==2.28.11.8 +types-requests==2.28.11.15 # via feast (setup.py) -types-setuptools==65.7.0.3 +types-setuptools==67.6.0.0 # via feast (setup.py) -types-tabulate==0.9.0.0 +types-tabulate==0.9.0.1 # via feast (setup.py) -types-toml==0.10.8.1 - # via responses -types-urllib3==1.26.25.4 +types-urllib3==1.26.25.8 # via types-requests -typing-extensions==4.4.0 +typing-extensions==4.5.0 # via # aioitertools # azure-core + # azure-storage-blob # black # great-expectations # mypy @@ -826,6 +1006,8 @@ tzlocal==4.2 # via # great-expectations # trino +uri-template==1.2.0 + # via jsonschema uritemplate==4.1.1 # via google-api-python-client urllib3==1.26.14 @@ -838,12 +1020,13 @@ urllib3==1.26.14 # minio # requests # responses + # rockset # snowflake-connector-python -uvicorn[standard]==0.20.0 +uvicorn[standard]==0.21.0 # via feast (setup.py) uvloop==0.17.0 # via uvicorn -virtualenv==20.17.1 +virtualenv==20.20.0 # via pre-commit volatile==2.1.0 # via bowler @@ -851,17 +1034,26 @@ watchfiles==0.18.1 # via uvicorn wcwidth==0.2.6 # via prompt-toolkit -websocket-client==1.5.0 +webcolors==1.12 + # via jsonschema +webencodings==0.5.1 + # via + # bleach + # tinycss2 +websocket-client==1.5.1 # via # docker + # jupyter-server # kubernetes websockets==10.4 # via uvicorn -werkzeug==2.1.2 +werkzeug==2.2.3 # via moto wheel==0.38.4 # via pip-tools -wrapt==1.14.1 +widgetsnbextension==4.0.5 + # via ipywidgets +wrapt==1.15.0 # via # aiobotocore # deprecated @@ -870,7 +1062,7 @@ xmltodict==0.13.0 # via moto yarl==1.8.2 # via aiohttp -zipp==3.12.0 +zipp==3.15.0 # via importlib-metadata # The following packages are considered to be unsafe in a requirements file: diff --git a/sdk/python/requirements/py3.9-requirements.txt b/sdk/python/requirements/py3.9-requirements.txt index 76d297a3618..9235d8fdf2b 100644 --- a/sdk/python/requirements/py3.9-requirements.txt +++ b/sdk/python/requirements/py3.9-requirements.txt @@ -17,14 +17,12 @@ attrs==22.2.0 # jsonschema bowler==0.9.0 # via feast (setup.py) -cachetools==5.3.0 - # via google-auth certifi==2022.12.7 # via # httpcore # httpx # requests -charset-normalizer==3.0.1 +charset-normalizer==3.1.0 # via requests click==8.1.3 # via @@ -37,35 +35,27 @@ cloudpickle==2.2.1 # via dask colorama==0.4.6 # via feast (setup.py) -dask==2023.1.1 +dask==2023.3.0 # via feast (setup.py) dill==0.3.6 # via feast (setup.py) -fastapi==0.89.1 +fastapi==0.93.0 # via feast (setup.py) -fastavro==1.7.1 +fastavro==1.7.3 # via # feast (setup.py) # pandavro fissix==21.11.13 # via bowler -fsspec==2023.1.0 +fsspec==2023.3.0 # via dask -google-api-core==2.11.0 - # via feast (setup.py) -google-auth==2.16.0 - # via google-api-core -googleapis-common-protos==1.58.0 - # via - # feast (setup.py) - # google-api-core greenlet==2.0.2 # via sqlalchemy -grpcio==1.51.1 +grpcio==1.51.3 # via # feast (setup.py) # grpcio-reflection -grpcio-reflection==1.51.1 +grpcio-reflection==1.51.3 # via feast (setup.py) h11==0.14.0 # via @@ -94,11 +84,11 @@ mmh3==3.0.0 # via feast (setup.py) moreorless==0.4.0 # via bowler -mypy==0.991 +mypy==1.1.1 # via sqlalchemy -mypy-extensions==0.4.3 +mypy-extensions==1.0.0 # via mypy -numpy==1.24.1 +numpy==1.24.2 # via # feast (setup.py) # pandas @@ -116,22 +106,14 @@ partd==1.3.0 # via dask proto-plus==1.22.2 # via feast (setup.py) -protobuf==4.21.12 +protobuf==4.22.1 # via # feast (setup.py) - # google-api-core - # googleapis-common-protos # grpcio-reflection # proto-plus -pyarrow==8.0.0 +pyarrow==11.0.0 # via feast (setup.py) -pyasn1==0.4.8 - # via - # pyasn1-modules - # rsa -pyasn1-modules==0.2.8 - # via google-auth -pydantic==1.10.4 +pydantic==1.10.6 # via # fastapi # feast (setup.py) @@ -141,7 +123,7 @@ pyrsistent==0.19.3 # via jsonschema python-dateutil==2.8.2 # via pandas -python-dotenv==0.21.1 +python-dotenv==1.0.0 # via uvicorn pytz==2022.7.1 # via pandas @@ -151,14 +133,11 @@ pyyaml==6.0 # feast (setup.py) # uvicorn requests==2.28.2 - # via google-api-core + # via feast (setup.py) rfc3986[idna2008]==1.5.0 # via httpx -rsa==4.9 - # via google-auth six==1.16.0 # via - # google-auth # pandavro # python-dateutil sniffio==1.3.0 @@ -170,11 +149,11 @@ sqlalchemy[mypy]==1.4.46 # via feast (setup.py) sqlalchemy2-stubs==0.0.2a32 # via sqlalchemy -starlette==0.22.0 +starlette==0.25.0 # via fastapi tabulate==0.9.0 # via feast (setup.py) -tenacity==8.1.0 +tenacity==8.2.2 # via feast (setup.py) toml==0.10.2 # via feast (setup.py) @@ -184,11 +163,11 @@ toolz==0.12.0 # via # dask # partd -tqdm==4.64.1 +tqdm==4.65.0 # via feast (setup.py) typeguard==2.13.3 # via feast (setup.py) -typing-extensions==4.4.0 +typing-extensions==4.5.0 # via # mypy # pydantic @@ -196,7 +175,7 @@ typing-extensions==4.4.0 # starlette urllib3==1.26.14 # via requests -uvicorn[standard]==0.20.0 +uvicorn[standard]==0.21.0 # via feast (setup.py) uvloop==0.17.0 # via uvicorn diff --git a/sdk/python/tests/conftest.py b/sdk/python/tests/conftest.py index 69ff7f681c2..e1ae5f7a42a 100644 --- a/sdk/python/tests/conftest.py +++ b/sdk/python/tests/conftest.py @@ -80,9 +80,6 @@ def pytest_configure(config): "markers", "integration: mark test that has external dependencies" ) config.addinivalue_line("markers", "benchmark: mark benchmarking tests") - config.addinivalue_line( - "markers", "goserver: mark tests that use the go feature server" - ) config.addinivalue_line( "markers", "universal_online_stores: mark tests that can be run against different online stores", @@ -106,18 +103,11 @@ def pytest_addoption(parser): default=False, help="Run benchmark tests", ) - parser.addoption( - "--goserver", - action="store_true", - default=False, - help="Run tests that use the go feature server", - ) def pytest_collection_modifyitems(config, items: List[Item]): should_run_integration = config.getoption("--integration") is True should_run_benchmark = config.getoption("--benchmark") is True - should_run_goserver = config.getoption("--goserver") is True integration_tests = [t for t in items if "integration" in t.keywords] if not should_run_integration: @@ -137,15 +127,6 @@ def pytest_collection_modifyitems(config, items: List[Item]): for t in benchmark_tests: items.append(t) - goserver_tests = [t for t in items if "goserver" in t.keywords] - if not should_run_goserver: - for t in goserver_tests: - items.remove(t) - else: - items.clear() - for t in goserver_tests: - items.append(t) - @pytest.fixture def simple_dataset_1() -> pd.DataFrame: @@ -276,9 +257,6 @@ def pytest_generate_tests(metafunc: pytest.Metafunc): ] ) - if "goserver" in markers: - extra_dimensions.append({"go_feature_serving": True}) - configs = [] if offline_stores: for provider, offline_store_creator in offline_stores: @@ -291,12 +269,6 @@ def pytest_generate_tests(metafunc: pytest.Metafunc): "online_store_creator": online_store_creator, **dim, } - # temporary Go works only with redis - if config.get("go_feature_serving") and ( - not isinstance(online_store, dict) - or online_store["type"] != "redis" - ): - continue # aws lambda works only with dynamo if ( diff --git a/sdk/python/tests/integration/e2e/test_go_feature_server.py b/sdk/python/tests/integration/e2e/test_go_feature_server.py deleted file mode 100644 index 0f972e45df5..00000000000 --- a/sdk/python/tests/integration/e2e/test_go_feature_server.py +++ /dev/null @@ -1,263 +0,0 @@ -import threading -import time -from datetime import datetime -from typing import List - -import grpc -import pandas as pd -import pytest -import pytz -import requests - -from feast.embedded_go.online_features_service import EmbeddedOnlineFeatureServer -from feast.feast_object import FeastObject -from feast.feature_logging import LoggingConfig -from feast.feature_service import FeatureService -from feast.infra.feature_servers.base_config import FeatureLoggingConfig -from feast.protos.feast.serving.ServingService_pb2 import ( - FieldStatus, - GetOnlineFeaturesRequest, - GetOnlineFeaturesResponse, -) -from feast.protos.feast.serving.ServingService_pb2_grpc import ServingServiceStub -from feast.protos.feast.types.Value_pb2 import RepeatedValue -from feast.type_map import python_values_to_proto_values -from feast.value_type import ValueType -from feast.wait import wait_retry_backoff -from tests.integration.feature_repos.repo_configuration import ( - construct_universal_feature_views, -) -from tests.integration.feature_repos.universal.entities import ( - customer, - driver, - location, -) -from tests.utils.http_server import check_port_open, free_port -from tests.utils.test_log_creator import generate_expected_logs, get_latest_rows - - -@pytest.mark.integration -@pytest.mark.goserver -def test_go_grpc_server(grpc_client): - resp: GetOnlineFeaturesResponse = grpc_client.GetOnlineFeatures( - GetOnlineFeaturesRequest( - feature_service="driver_features", - entities={ - "driver_id": RepeatedValue( - val=python_values_to_proto_values( - [5001, 5002], feature_type=ValueType.INT64 - ) - ) - }, - full_feature_names=True, - ) - ) - assert list(resp.metadata.feature_names.val) == [ - "driver_id", - "driver_stats__conv_rate", - "driver_stats__acc_rate", - "driver_stats__avg_daily_trips", - ] - for vector in resp.results: - assert all([s == FieldStatus.PRESENT for s in vector.statuses]) - - -@pytest.mark.integration -@pytest.mark.goserver -def test_go_http_server(http_server_port): - response = requests.post( - f"http://localhost:{http_server_port}/get-online-features", - json={ - "feature_service": "driver_features", - "entities": {"driver_id": [5001, 5002]}, - "full_feature_names": True, - }, - ) - assert response.status_code == 200, response.text - response = response.json() - assert set(response.keys()) == {"metadata", "results"} - metadata = response["metadata"] - results = response["results"] - assert response["metadata"] == { - "feature_names": [ - "driver_id", - "driver_stats__conv_rate", - "driver_stats__acc_rate", - "driver_stats__avg_daily_trips", - ] - }, metadata - assert len(results) == 4, results - assert all( - set(result.keys()) == {"event_timestamps", "statuses", "values"} - for result in results - ), results - assert all( - result["statuses"] == ["PRESENT", "PRESENT"] for result in results - ), results - assert results[0]["values"] == [5001, 5002], results - for result in results[1:]: - assert len(result["values"]) == 2, result - assert all(value is not None for value in result["values"]), result - - -@pytest.mark.integration -@pytest.mark.goserver -@pytest.mark.universal_offline_stores -@pytest.mark.parametrize("full_feature_names", [True, False], ids=lambda v: str(v)) -def test_feature_logging( - grpc_client, environment, universal_data_sources, full_feature_names -): - fs = environment.feature_store - feature_service = fs.get_feature_service("driver_features") - log_start_date = datetime.now().astimezone(pytz.UTC) - driver_ids = list(range(5001, 5011)) - - for driver_id in driver_ids: - # send each driver id in separate request - grpc_client.GetOnlineFeatures( - GetOnlineFeaturesRequest( - feature_service="driver_features", - entities={ - "driver_id": RepeatedValue( - val=python_values_to_proto_values( - [driver_id], feature_type=ValueType.INT64 - ) - ) - }, - full_feature_names=full_feature_names, - ) - ) - # with some pause - time.sleep(0.1) - - _, datasets, _ = universal_data_sources - latest_rows = get_latest_rows(datasets.driver_df, "driver_id", driver_ids) - feature_view = fs.get_feature_view("driver_stats") - features = [ - feature.name - for proj in feature_service.feature_view_projections - for feature in proj.features - ] - expected_logs = generate_expected_logs( - latest_rows, feature_view, features, ["driver_id"], "event_timestamp" - ) - - def retrieve(): - retrieval_job = fs._get_provider().retrieve_feature_service_logs( - feature_service=feature_service, - start_date=log_start_date, - end_date=datetime.now().astimezone(pytz.UTC), - config=fs.config, - registry=fs._registry, - ) - try: - df = retrieval_job.to_df() - except Exception: - # Table or directory was not created yet - return None, False - - return df, df.shape[0] == len(driver_ids) - - persisted_logs = wait_retry_backoff( - retrieve, timeout_secs=60, timeout_msg="Logs retrieval failed" - ) - - persisted_logs = persisted_logs.sort_values(by="driver_id").reset_index(drop=True) - persisted_logs = persisted_logs[expected_logs.columns] - pd.testing.assert_frame_equal(expected_logs, persisted_logs, check_dtype=False) - - -""" -Start go feature server either on http or grpc based on the repo configuration for testing. -""" - - -def _server_port(environment, server_type: str): - if not environment.test_repo_config.go_feature_serving: - pytest.skip("Only for Go path") - - fs = environment.feature_store - - embedded = EmbeddedOnlineFeatureServer( - repo_path=str(fs.repo_path.absolute()), - repo_config=fs.config, - feature_store=fs, - ) - port = free_port() - if server_type == "grpc": - target = embedded.start_grpc_server - elif server_type == "http": - target = embedded.start_http_server - else: - raise ValueError("Server Type must be either 'http' or 'grpc'") - - t = threading.Thread( - target=target, - args=("127.0.0.1", port), - kwargs=dict( - enable_logging=True, - logging_options=FeatureLoggingConfig( - enabled=True, - queue_capacity=100, - write_to_disk_interval_secs=1, - flush_interval_secs=1, - emit_timeout_micro_secs=10000, - ), - ), - ) - t.start() - - wait_retry_backoff( - lambda: (None, check_port_open("127.0.0.1", port)), timeout_secs=15 - ) - - yield port - if server_type == "grpc": - embedded.stop_grpc_server() - else: - embedded.stop_http_server() - - # wait for graceful stop - time.sleep(5) - - -# Go test fixtures - - -@pytest.fixture -def initialized_registry(environment, universal_data_sources): - fs = environment.feature_store - - _, _, data_sources = universal_data_sources - feature_views = construct_universal_feature_views(data_sources) - - feature_service = FeatureService( - name="driver_features", - features=[feature_views.driver], - logging_config=LoggingConfig( - destination=environment.data_source_creator.create_logged_features_destination(), - sample_rate=1.0, - ), - ) - feast_objects: List[FeastObject] = [feature_service] - feast_objects.extend(feature_views.values()) - feast_objects.extend([driver(), customer(), location()]) - - fs.apply(feast_objects) - fs.materialize(environment.start_date, environment.end_date) - - -@pytest.fixture -def grpc_server_port(environment, initialized_registry): - yield from _server_port(environment, "grpc") - - -@pytest.fixture -def http_server_port(environment, initialized_registry): - yield from _server_port(environment, "http") - - -@pytest.fixture -def grpc_client(grpc_server_port): - ch = grpc.insecure_channel(f"localhost:{grpc_server_port}") - yield ServingServiceStub(ch) diff --git a/sdk/python/tests/integration/feature_repos/integration_test_repo_config.py b/sdk/python/tests/integration/feature_repos/integration_test_repo_config.py index 4662734383f..309f92005a3 100644 --- a/sdk/python/tests/integration/feature_repos/integration_test_repo_config.py +++ b/sdk/python/tests/integration/feature_repos/integration_test_repo_config.py @@ -37,7 +37,6 @@ class IntegrationTestRepoConfig: full_feature_names: bool = True infer_features: bool = False python_feature_server: bool = False - go_feature_serving: bool = False def __repr__(self) -> str: if not self.online_store_creator: @@ -61,7 +60,6 @@ def __repr__(self) -> str: f"{self.offline_store_creator.__name__.split('.')[-1].replace('DataSourceCreator', '')}", online_store_type, f"python_fs:{self.python_feature_server}", - f"go_fs:{self.go_feature_serving}", ] ) @@ -77,6 +75,5 @@ def __eq__(self, other): and self.online_store == other.online_store and self.offline_store_creator == other.offline_store_creator and self.online_store_creator == other.online_store_creator - and self.go_feature_serving == other.go_feature_serving and self.python_feature_server == other.python_feature_server ) diff --git a/sdk/python/tests/integration/feature_repos/repo_configuration.py b/sdk/python/tests/integration/feature_repos/repo_configuration.py index 174b0b91ad1..fda5b3c11de 100644 --- a/sdk/python/tests/integration/feature_repos/repo_configuration.py +++ b/sdk/python/tests/integration/feature_repos/repo_configuration.py @@ -93,6 +93,12 @@ "instance": os.getenv("BIGTABLE_INSTANCE_ID", "feast-integration-tests"), } +ROCKSET_CONFIG = { + "type": "rockset", + "api_key": os.getenv("ROCKSET_APIKEY", ""), + "host": os.getenv("ROCKSET_APISERVER", "api.rs2.usw2.rockset.com"), +} + OFFLINE_STORE_TO_PROVIDER_CONFIG: Dict[str, DataSourceCreator] = { "file": ("local", FileDataSourceCreator), "bigquery": ("gcp", BigQueryDataSourceCreator), @@ -126,6 +132,11 @@ AVAILABLE_ONLINE_STORES["snowflake"] = (SNOWFLAKE_CONFIG, None) AVAILABLE_ONLINE_STORES["bigtable"] = (BIGTABLE_CONFIG, None) + # Uncomment to test using private Rockset account. Currently not enabled as + # there is no dedicated Rockset instance for CI testing and there is no + # containerized version of Rockset. + # AVAILABLE_ONLINE_STORES["rockset"] = (ROCKSET_CONFIG, None) + full_repo_configs_module = os.environ.get(FULL_REPO_CONFIGS_MODULE_ENV_NAME) if full_repo_configs_module is not None: @@ -449,7 +460,6 @@ def construct_test_environment( batch_engine=test_repo_config.batch_engine, repo_path=repo_dir_name, feature_server=feature_server, - go_feature_serving=test_repo_config.go_feature_serving, entity_key_serialization_version=entity_key_serialization_version, ) diff --git a/sdk/python/tests/integration/feature_repos/universal/data_sources/snowflake.py b/sdk/python/tests/integration/feature_repos/universal/data_sources/snowflake.py index f0a09b4d5ba..257e46df197 100644 --- a/sdk/python/tests/integration/feature_repos/universal/data_sources/snowflake.py +++ b/sdk/python/tests/integration/feature_repos/universal/data_sources/snowflake.py @@ -13,8 +13,8 @@ SnowflakeLoggingDestination, ) from feast.infra.utils.snowflake.snowflake_utils import ( + GetSnowflakeConnection, execute_snowflake_statement, - get_snowflake_conn, write_pandas, ) from feast.repo_config import FeastConfigBaseModel @@ -54,11 +54,10 @@ def create_data_source( field_mapping: Dict[str, str] = None, ) -> DataSource: - snowflake_conn = get_snowflake_conn(self.offline_store_config) - destination_name = self.get_prefixed_table_name(destination_name) - write_pandas(snowflake_conn, df, destination_name, auto_create_table=True) + with GetSnowflakeConnection(self.offline_store_config) as conn: + write_pandas(conn, df, destination_name, auto_create_table=True) self.tables.append(destination_name) @@ -93,7 +92,7 @@ def get_prefixed_table_name(self, suffix: str) -> str: return f"{self.project_name}_{suffix}" def teardown(self): - with get_snowflake_conn(self.offline_store_config) as conn: + with GetSnowflakeConnection(self.offline_store_config) as conn: for table in self.tables: query = f'DROP TABLE IF EXISTS "{table}"' execute_snowflake_statement(conn, query) diff --git a/sdk/python/tests/integration/online_store/test_universal_online.py b/sdk/python/tests/integration/online_store/test_universal_online.py index 78529916520..51f39a56670 100644 --- a/sdk/python/tests/integration/online_store/test_universal_online.py +++ b/sdk/python/tests/integration/online_store/test_universal_online.py @@ -1,5 +1,4 @@ import datetime -import itertools import os import time import unittest @@ -14,10 +13,7 @@ from botocore.exceptions import BotoCoreError from feast.entity import Entity -from feast.errors import ( - FeatureNameCollisionError, - RequestDataNotFoundInEntityRowsException, -) +from feast.errors import FeatureNameCollisionError from feast.feature_service import FeatureService from feast.feature_view import FeatureView from feast.field import Field @@ -28,11 +24,7 @@ Environment, construct_universal_feature_views, ) -from tests.integration.feature_repos.universal.entities import ( - customer, - driver, - location, -) +from tests.integration.feature_repos.universal.entities import driver from tests.integration.feature_repos.universal.feature_views import ( create_driver_hourly_stats_feature_view, driver_feature_view, @@ -445,246 +437,6 @@ def test_online_retrieval_with_event_timestamps( ) -@pytest.mark.integration -@pytest.mark.universal_online_stores -@pytest.mark.goserver -@pytest.mark.parametrize("full_feature_names", [True, False], ids=lambda v: str(v)) -def test_online_retrieval( - environment, universal_data_sources, feature_server_endpoint, full_feature_names -): - fs = environment.feature_store - entities, datasets, data_sources = universal_data_sources - feature_views = construct_universal_feature_views(data_sources) - - feature_service = FeatureService( - "convrate_plus100", - features=[ - feature_views.driver[["conv_rate"]], - feature_views.driver_odfv, - feature_views.customer[["current_balance"]], - feature_views.pushed_locations, - ], - ) - feature_service_entity_mapping = FeatureService( - name="entity_mapping", - features=[ - feature_views.location.with_name("origin").with_join_key_map( - {"location_id": "origin_id"} - ), - feature_views.location.with_name("destination").with_join_key_map( - {"location_id": "destination_id"} - ), - ], - ) - - feast_objects = [] - feast_objects.extend(feature_views.values()) - feast_objects.extend( - [ - driver(), - customer(), - location(), - feature_service, - feature_service_entity_mapping, - ] - ) - fs.apply(feast_objects) - fs.materialize( - environment.start_date - timedelta(days=1), - environment.end_date + timedelta(days=1), - ) - - entity_sample = datasets.orders_df.sample(10)[ - ["customer_id", "driver_id", "order_id", "origin_id", "event_timestamp"] - ] - orders_df = datasets.orders_df[ - ( - datasets.orders_df["customer_id"].isin(entity_sample["customer_id"]) - & datasets.orders_df["driver_id"].isin(entity_sample["driver_id"]) - ) - ] - - sample_drivers = entity_sample["driver_id"] - drivers_df = datasets.driver_df[ - datasets.driver_df["driver_id"].isin(sample_drivers) - ] - - sample_customers = entity_sample["customer_id"] - customers_df = datasets.customer_df[ - datasets.customer_df["customer_id"].isin(sample_customers) - ] - - sample_origins = entity_sample["origin_id"] - - location_pairs = np.array(list(itertools.permutations(entities.location_vals, 2))) - sample_location_pairs = location_pairs[ - np.random.choice(len(location_pairs), 10) - ].T.tolist() - origins_df = datasets.location_df[ - datasets.location_df["location_id"].isin(sample_location_pairs[0]) - ] - destinations_df = datasets.location_df[ - datasets.location_df["location_id"].isin(sample_location_pairs[1]) - ] - - global_df = datasets.global_df - location_df = datasets.location_df - - entity_rows = [ - {"driver_id": d, "customer_id": c, "location_id": o, "val_to_add": 50} - for (d, c, o) in zip(sample_drivers, sample_customers, sample_origins) - ] - - feature_refs = [ - "driver_stats:conv_rate", - "driver_stats:avg_daily_trips", - "customer_profile:current_balance", - "customer_profile:avg_passenger_count", - "customer_profile:lifetime_trip_count", - "conv_rate_plus_100:conv_rate_plus_100", - "conv_rate_plus_100:conv_rate_plus_val_to_add", - "order:order_is_success", - "global_stats:num_rides", - "global_stats:avg_ride_length", - "pushable_location_stats:temperature", - ] - unprefixed_feature_refs = [f.rsplit(":", 1)[-1] for f in feature_refs if ":" in f] - # Remove the on demand feature view output features, since they're not present in the source dataframe - unprefixed_feature_refs.remove("conv_rate_plus_100") - unprefixed_feature_refs.remove("conv_rate_plus_val_to_add") - - online_features_dict = get_online_features_dict( - environment=environment, - endpoint=feature_server_endpoint, - features=feature_refs, - entity_rows=entity_rows, - full_feature_names=full_feature_names, - ) - - # Test that the on demand feature views compute properly even if the dependent conv_rate - # feature isn't requested. - online_features_no_conv_rate = get_online_features_dict( - environment=environment, - endpoint=feature_server_endpoint, - features=[ref for ref in feature_refs if ref != "driver_stats:conv_rate"], - entity_rows=entity_rows, - full_feature_names=full_feature_names, - ) - - assert online_features_no_conv_rate is not None - - keys = set(online_features_dict.keys()) - expected_keys = set( - f.replace(":", "__") if full_feature_names else f.split(":")[-1] - for f in feature_refs - ) | {"customer_id", "driver_id", "location_id"} - assert ( - keys == expected_keys - ), f"Response keys are different from expected: {keys - expected_keys} (extra) and {expected_keys - keys} (missing)" - - tc = unittest.TestCase() - for i, entity_row in enumerate(entity_rows): - df_features = get_latest_feature_values_from_dataframes( - driver_df=drivers_df, - customer_df=customers_df, - orders_df=orders_df, - global_df=global_df, - entity_row=entity_row, - location_df=location_df, - ) - - assert df_features["customer_id"] == online_features_dict["customer_id"][i] - assert df_features["driver_id"] == online_features_dict["driver_id"][i] - tc.assertAlmostEqual( - online_features_dict[ - response_feature_name( - "conv_rate_plus_100", feature_refs, full_feature_names - ) - ][i], - df_features["conv_rate"] + 100, - delta=0.0001, - ) - tc.assertAlmostEqual( - online_features_dict[ - response_feature_name( - "conv_rate_plus_val_to_add", feature_refs, full_feature_names - ) - ][i], - df_features["conv_rate"] + df_features["val_to_add"], - delta=0.0001, - ) - for unprefixed_feature_ref in unprefixed_feature_refs: - tc.assertAlmostEqual( - df_features[unprefixed_feature_ref], - online_features_dict[ - response_feature_name( - unprefixed_feature_ref, feature_refs, full_feature_names - ) - ][i], - delta=0.0001, - ) - - # Check what happens for missing values - missing_responses_dict = get_online_features_dict( - environment=environment, - endpoint=feature_server_endpoint, - features=feature_refs, - entity_rows=[ - {"driver_id": 0, "customer_id": 0, "location_id": 0, "val_to_add": 100} - ], - full_feature_names=full_feature_names, - ) - assert missing_responses_dict is not None - for unprefixed_feature_ref in unprefixed_feature_refs: - if unprefixed_feature_ref not in {"num_rides", "avg_ride_length"}: - tc.assertIsNone( - missing_responses_dict[ - response_feature_name( - unprefixed_feature_ref, feature_refs, full_feature_names - ) - ][0] - ) - - # Check what happens for missing request data - with pytest.raises(RequestDataNotFoundInEntityRowsException): - get_online_features_dict( - environment=environment, - endpoint=feature_server_endpoint, - features=feature_refs, - entity_rows=[{"driver_id": 0, "customer_id": 0, "location_id": 0}], - full_feature_names=full_feature_names, - ) - - assert_feature_service_correctness( - environment, - feature_server_endpoint, - feature_service, - entity_rows, - full_feature_names, - drivers_df, - customers_df, - orders_df, - global_df, - location_df, - ) - - entity_rows = [ - {"origin_id": origin, "destination_id": destination} - for (_driver, _customer, origin, destination) in zip( - sample_drivers, sample_customers, *sample_location_pairs - ) - ] - assert_feature_service_entity_mapping_correctness( - environment, - feature_server_endpoint, - feature_service_entity_mapping, - entity_rows, - full_feature_names, - origins_df, - destinations_df, - ) - - @pytest.mark.integration @pytest.mark.universal_online_stores(only=["redis"]) def test_online_store_cleanup(environment, universal_data_sources): diff --git a/sdk/python/tests/integration/registration/test_inference.py b/sdk/python/tests/integration/registration/test_inference.py index 17bb09933e2..9f490d7f4e2 100644 --- a/sdk/python/tests/integration/registration/test_inference.py +++ b/sdk/python/tests/integration/registration/test_inference.py @@ -20,7 +20,10 @@ def test_update_file_data_source_with_inferred_event_timestamp_col(simple_datase update_data_sources_with_inferred_event_timestamp_col( data_sources, RepoConfig( - provider="local", project="test", entity_key_serialization_version=2 + provider="local", + project="test", + registry="test.pb", + entity_key_serialization_version=2, ), ) actual_event_timestamp_cols = [ @@ -35,7 +38,10 @@ def test_update_file_data_source_with_inferred_event_timestamp_col(simple_datase update_data_sources_with_inferred_event_timestamp_col( [file_source], RepoConfig( - provider="local", project="test", entity_key_serialization_version=2 + provider="local", + project="test", + registry="test.pb", + entity_key_serialization_version=2, ), ) @@ -53,7 +59,10 @@ def test_update_data_sources_with_inferred_event_timestamp_col(universal_data_so update_data_sources_with_inferred_event_timestamp_col( data_sources_copy.values(), RepoConfig( - provider="local", project="test", entity_key_serialization_version=2 + provider="local", + project="test", + registry="test.pb", + entity_key_serialization_version=2, ), ) actual_event_timestamp_cols = [ diff --git a/sdk/python/tests/integration/registration/test_registry.py b/sdk/python/tests/integration/registration/test_registry.py index 739fb9ec5c8..57e625e66b8 100644 --- a/sdk/python/tests/integration/registration/test_registry.py +++ b/sdk/python/tests/integration/registration/test_registry.py @@ -45,7 +45,7 @@ def gcs_registry() -> Registry: registry_config = RegistryConfig( path=f"gs://{bucket_name}/registry.db", cache_ttl_seconds=600 ) - return Registry(registry_config, None) + return Registry("project", registry_config, None) @pytest.fixture @@ -57,7 +57,7 @@ def s3_registry() -> Registry: path=f"{aws_registry_path}/{int(time.time() * 1000)}/registry.db", cache_ttl_seconds=600, ) - return Registry(registry_config, None) + return Registry("project", registry_config, None) @pytest.mark.integration diff --git a/sdk/python/tests/unit/cli/test_cli.py b/sdk/python/tests/unit/cli/test_cli.py index 25a1dfed349..d15e1d16164 100644 --- a/sdk/python/tests/unit/cli/test_cli.py +++ b/sdk/python/tests/unit/cli/test_cli.py @@ -122,6 +122,7 @@ def setup_third_party_provider_repo(provider_name: str): type: sqlite offline_store: type: file + entity_key_serialization_version: 2 """ ) ) @@ -159,6 +160,7 @@ def setup_third_party_registry_store_repo( type: sqlite offline_store: type: file + entity_key_serialization_version: 2 """ ) ) diff --git a/sdk/python/tests/unit/infra/offline_stores/test_bigquery.py b/sdk/python/tests/unit/infra/offline_stores/test_bigquery.py new file mode 100644 index 00000000000..662be20b316 --- /dev/null +++ b/sdk/python/tests/unit/infra/offline_stores/test_bigquery.py @@ -0,0 +1,84 @@ +from unittest.mock import Mock, patch + +import pandas as pd +import pyarrow +import pytest + +from feast.infra.offline_stores.bigquery import ( + BigQueryOfflineStoreConfig, + BigQueryRetrievalJob, +) +from feast.infra.online_stores.sqlite import SqliteOnlineStoreConfig +from feast.repo_config import RepoConfig + + +@pytest.fixture +def pandas_dataframe(): + return pd.DataFrame( + data={ + "key": [1, 2, 3], + "value": ["a", None, "c"], + } + ) + + +@pytest.fixture +def big_query_result(pandas_dataframe): + class BigQueryResult: + def to_dataframe(self, **kwargs): + return pandas_dataframe + + def to_arrow(self, **kwargs): + return pyarrow.Table.from_pandas(pandas_dataframe) + + def exception(self, timeout=None): + return None + + return BigQueryResult() + + +class TestBigQueryRetrievalJob: + query = "SELECT * FROM bigquery" + client = Mock() + retrieval_job = BigQueryRetrievalJob( + query=query, + client=client, + config=RepoConfig( + registry="gs://ml-test/repo/registry.db", + project="test", + provider="gcp", + online_store=SqliteOnlineStoreConfig(type="sqlite"), + offline_store=BigQueryOfflineStoreConfig(type="bigquery", dataset="feast"), + ), + full_feature_names=True, + on_demand_feature_views=[], + ) + + def test_to_sql(self): + assert self.retrieval_job.to_sql() == self.query + + def test_to_df(self, big_query_result, pandas_dataframe): + self.client.query.return_value = big_query_result + actual = self.retrieval_job.to_df() + pd.testing.assert_frame_equal(actual, pandas_dataframe) + + def test_to_df_timeout(self, big_query_result): + self.client.query.return_value = big_query_result + with patch.object(self.retrieval_job, "_execute_query"): + self.retrieval_job.to_df(timeout=30) + self.retrieval_job._execute_query.assert_called_once_with( + query=self.query, timeout=30 + ) + + def test_to_arrow(self, big_query_result, pandas_dataframe): + self.client.query.return_value = big_query_result + actual = self.retrieval_job.to_arrow() + pd.testing.assert_frame_equal(actual.to_pandas(), pandas_dataframe) + + def test_to_arrow_timeout(self, big_query_result): + self.client.query.return_value = big_query_result + with patch.object(self.retrieval_job, "_execute_query"): + self.retrieval_job.to_arrow(timeout=30) + self.retrieval_job._execute_query.assert_called_once_with( + query=self.query, timeout=30 + ) diff --git a/sdk/python/tests/unit/infra/offline_stores/test_offline_store.py b/sdk/python/tests/unit/infra/offline_stores/test_offline_store.py new file mode 100644 index 00000000000..53e9d061ade --- /dev/null +++ b/sdk/python/tests/unit/infra/offline_stores/test_offline_store.py @@ -0,0 +1,220 @@ +from typing import List, Optional +from unittest.mock import MagicMock, patch + +import pandas as pd +import pyarrow +import pytest + +from feast.infra.offline_stores.contrib.athena_offline_store.athena import ( + AthenaOfflineStoreConfig, + AthenaRetrievalJob, +) +from feast.infra.offline_stores.contrib.mssql_offline_store.mssql import ( + MsSqlServerOfflineStoreConfig, + MsSqlServerRetrievalJob, +) +from feast.infra.offline_stores.contrib.postgres_offline_store.postgres import ( + PostgreSQLOfflineStoreConfig, + PostgreSQLRetrievalJob, +) +from feast.infra.offline_stores.contrib.spark_offline_store.spark import ( + SparkOfflineStoreConfig, + SparkRetrievalJob, +) +from feast.infra.offline_stores.contrib.trino_offline_store.trino import ( + TrinoRetrievalJob, +) +from feast.infra.offline_stores.file import FileRetrievalJob +from feast.infra.offline_stores.offline_store import RetrievalJob, RetrievalMetadata +from feast.infra.offline_stores.redshift import ( + RedshiftOfflineStoreConfig, + RedshiftRetrievalJob, +) +from feast.infra.offline_stores.snowflake import ( + SnowflakeOfflineStoreConfig, + SnowflakeRetrievalJob, +) +from feast.on_demand_feature_view import OnDemandFeatureView +from feast.saved_dataset import SavedDatasetStorage + + +class MockRetrievalJob(RetrievalJob): + def _to_df_internal(self, timeout: Optional[int] = None) -> pd.DataFrame: + """ + Synchronously executes the underlying query and returns the result as a pandas dataframe. + + Does not handle on demand transformations or dataset validation. For either of those, + `to_df` should be used. + """ + pass + + def _to_arrow_internal(self, timeout: Optional[int] = None) -> pyarrow.Table: + """ + Synchronously executes the underlying query and returns the result as an arrow table. + + Does not handle on demand transformations or dataset validation. For either of those, + `to_arrow` should be used. + """ + pass + + @property + def full_feature_names(self) -> bool: + """Returns True if full feature names should be applied to the results of the query.""" + pass + + @property + def on_demand_feature_views(self) -> List[OnDemandFeatureView]: + """Returns a list containing all the on demand feature views to be handled.""" + pass + + def persist(self, storage: SavedDatasetStorage, allow_overwrite: bool = False): + """ + Synchronously executes the underlying query and persists the result in the same offline store + at the specified destination. + + Args: + storage: The saved dataset storage object specifying where the result should be persisted. + allow_overwrite: If True, a pre-existing location (e.g. table or file) can be overwritten. + Currently not all individual offline store implementations make use of this parameter. + """ + pass + + @property + def metadata(self) -> Optional[RetrievalMetadata]: + """Returns metadata about the retrieval job.""" + pass + + +# Since RetreivalJob are not really tested for subclasses we add some tests here. +@pytest.fixture( + params=[ + MockRetrievalJob, + FileRetrievalJob, + RedshiftRetrievalJob, + SnowflakeRetrievalJob, + AthenaRetrievalJob, + MsSqlServerRetrievalJob, + PostgreSQLRetrievalJob, + SparkRetrievalJob, + TrinoRetrievalJob, + ] +) +def retrieval_job(request, environment): + if request.param is FileRetrievalJob: + return FileRetrievalJob(lambda: 1, full_feature_names=False) + elif request.param is RedshiftRetrievalJob: + offline_store_config = RedshiftOfflineStoreConfig( + cluster_id="feast-integration-tests", + region="us-west-2", + user="admin", + database="feast", + s3_staging_location="s3://feast-integration-tests/redshift/tests/ingestion", + iam_role="arn:aws:iam::402087665549:role/redshift_s3_access_role", + ) + environment.test_repo_config.offline_store = offline_store_config + return RedshiftRetrievalJob( + query="query", + redshift_client="", + s3_resource="", + config=environment.test_repo_config, + full_feature_names=False, + ) + elif request.param is SnowflakeRetrievalJob: + offline_store_config = SnowflakeOfflineStoreConfig( + type="snowflake.offline", + account="snow", + user="snow", + password="snow", + role="snow", + warehouse="snow", + database="FEAST", + schema="OFFLINE", + storage_integration_name="FEAST_S3", + blob_export_location="s3://feast-snowflake-offload/export", + ) + environment.test_repo_config.offline_store = offline_store_config + environment.test_repo_config.project = "project" + return SnowflakeRetrievalJob( + query="query", + snowflake_conn=MagicMock(), + config=environment.test_repo_config, + full_feature_names=False, + ) + elif request.param is AthenaRetrievalJob: + offline_store_config = AthenaOfflineStoreConfig( + data_source="athena", + region="athena", + database="athena", + workgroup="athena", + s3_staging_location="athena", + ) + + environment.test_repo_config.offline_store = offline_store_config + return AthenaRetrievalJob( + query="query", + athena_client="client", + s3_resource="", + config=environment.test_repo_config.offline_store, + full_feature_names=False, + ) + elif request.param is MsSqlServerRetrievalJob: + + return MsSqlServerRetrievalJob( + query="query", + engine=MagicMock(), + config=MsSqlServerOfflineStoreConfig( + connection_string="str" + ), # TODO: this does not match the RetrievalJob pattern. Suppose to be RepoConfig + full_feature_names=False, + ) + elif request.param is PostgreSQLRetrievalJob: + offline_store_config = PostgreSQLOfflineStoreConfig( + host="str", + database="str", + user="str", + password="str", + ) + environment.test_repo_config.offline_store = offline_store_config + return PostgreSQLRetrievalJob( + query="query", + config=environment.test_repo_config.offline_store, + full_feature_names=False, + ) + elif request.param is SparkRetrievalJob: + offline_store_config = SparkOfflineStoreConfig() + environment.test_repo_config.offline_store = offline_store_config + return SparkRetrievalJob( + spark_session=MagicMock(), + query="str", + full_feature_names=False, + config=environment.test_repo_config, + ) + elif request.param is TrinoRetrievalJob: + offline_store_config = SparkOfflineStoreConfig() + environment.test_repo_config.offline_store = offline_store_config + return TrinoRetrievalJob( + query="str", + client=MagicMock(), + config=environment.test_repo_config, + full_feature_names=False, + ) + else: + return request.param() + + +def test_to_sql(): + assert MockRetrievalJob().to_sql() is None + + +@pytest.mark.parametrize("timeout", (None, 30)) +def test_to_df_timeout(retrieval_job, timeout: Optional[int]): + with patch.object(retrieval_job, "_to_df_internal") as mock_to_df_internal: + retrieval_job.to_df(timeout=timeout) + mock_to_df_internal.assert_called_once_with(timeout=timeout) + + +@pytest.mark.parametrize("timeout", (None, 30)) +def test_to_arrow_timeout(retrieval_job, timeout: Optional[int]): + with patch.object(retrieval_job, "_to_arrow_internal") as mock_to_arrow_internal: + retrieval_job.to_arrow(timeout=timeout) + mock_to_arrow_internal.assert_called_once_with(timeout=timeout) diff --git a/sdk/python/tests/unit/infra/online_store/test_dynamodb_online_store.py b/sdk/python/tests/unit/infra/online_store/test_dynamodb_online_store.py index 9dca44dc09f..6045dbc6ce0 100644 --- a/sdk/python/tests/unit/infra/online_store/test_dynamodb_online_store.py +++ b/sdk/python/tests/unit/infra/online_store/test_dynamodb_online_store.py @@ -3,7 +3,7 @@ import boto3 import pytest -from moto import mock_dynamodb2 +from moto import mock_dynamodb from feast.infra.offline_stores.file import FileOfflineStoreConfig from feast.infra.online_stores.dynamodb import ( @@ -159,7 +159,7 @@ def test_dynamodb_table_dynamodb_resource(): assert dynamodb_resource.meta.client.meta.endpoint_url == endpoint_url -@mock_dynamodb2 +@mock_dynamodb @pytest.mark.parametrize("n_samples", [5, 50, 100]) def test_dynamodb_online_store_online_read( repo_config, dynamodb_online_store, n_samples @@ -180,7 +180,7 @@ def test_dynamodb_online_store_online_read( assert [item[1] for item in returned_items] == list(features) -@mock_dynamodb2 +@mock_dynamodb @pytest.mark.parametrize("n_samples", [5, 50, 100]) def test_dynamodb_online_store_online_write_batch( repo_config, dynamodb_online_store, n_samples @@ -207,7 +207,7 @@ def test_dynamodb_online_store_online_write_batch( assert [item[1] for item in stored_items] == list(features) -@mock_dynamodb2 +@mock_dynamodb def test_dynamodb_online_store_update(repo_config, dynamodb_online_store): """Test DynamoDBOnlineStore update method.""" # create dummy table to keep @@ -236,7 +236,7 @@ def test_dynamodb_online_store_update(repo_config, dynamodb_online_store): assert existing_tables[0] == f"test_aws.{db_table_keep_name}" -@mock_dynamodb2 +@mock_dynamodb def test_dynamodb_online_store_teardown(repo_config, dynamodb_online_store): """Test DynamoDBOnlineStore teardown method.""" db_table_delete_name_one = f"{TABLE_NAME}_delete_teardown_1" @@ -262,7 +262,7 @@ def test_dynamodb_online_store_teardown(repo_config, dynamodb_online_store): assert len(existing_tables) == 0 -@mock_dynamodb2 +@mock_dynamodb def test_dynamodb_online_store_online_read_unknown_entity( repo_config, dynamodb_online_store ): @@ -301,7 +301,7 @@ def test_dynamodb_online_store_online_read_unknown_entity( assert returned_items[pos] == (None, None) -@mock_dynamodb2 +@mock_dynamodb def test_write_batch_non_duplicates(repo_config, dynamodb_online_store): """Test DynamoDBOnline Store deduplicate write batch request items.""" dynamodb_tbl = f"{TABLE_NAME}_batch_non_duplicates" @@ -321,7 +321,7 @@ def test_write_batch_non_duplicates(repo_config, dynamodb_online_store): assert len(returned_items) == len(data) -@mock_dynamodb2 +@mock_dynamodb def test_dynamodb_online_store_online_read_unknown_entity_end_of_batch( repo_config, dynamodb_online_store ): diff --git a/sdk/python/tests/unit/infra/test_inference_unit_tests.py b/sdk/python/tests/unit/infra/test_inference_unit_tests.py index 46a131e1b57..a108d397bd9 100644 --- a/sdk/python/tests/unit/infra/test_inference_unit_tests.py +++ b/sdk/python/tests/unit/infra/test_inference_unit_tests.py @@ -194,7 +194,10 @@ def test_feature_view_inference_respects_basic_inference(): [feature_view_1], [entity1], RepoConfig( - provider="local", project="test", entity_key_serialization_version=2 + provider="local", + project="test", + entity_key_serialization_version=2, + registry="dummy_registry.pb", ), ) assert len(feature_view_1.schema) == 2 @@ -209,7 +212,10 @@ def test_feature_view_inference_respects_basic_inference(): [feature_view_2], [entity1, entity2], RepoConfig( - provider="local", project="test", entity_key_serialization_version=2 + provider="local", + project="test", + entity_key_serialization_version=2, + registry="dummy_registry.pb", ), ) assert len(feature_view_2.schema) == 3 @@ -240,7 +246,10 @@ def test_feature_view_inference_on_entity_value_types(): [feature_view_1], [entity1], RepoConfig( - provider="local", project="test", entity_key_serialization_version=2 + provider="local", + project="test", + entity_key_serialization_version=2, + registry="dummy_registry.pb", ), ) @@ -310,7 +319,10 @@ def test_feature_view_inference_on_entity_columns(simple_dataset_1): [feature_view_1], [entity1], RepoConfig( - provider="local", project="test", entity_key_serialization_version=2 + provider="local", + project="test", + entity_key_serialization_version=2, + registry="dummy_registry.pb", ), ) @@ -345,7 +357,10 @@ def test_feature_view_inference_on_feature_columns(simple_dataset_1): [feature_view_1], [entity1], RepoConfig( - provider="local", project="test", entity_key_serialization_version=2 + provider="local", + project="test", + entity_key_serialization_version=2, + registry="dummy_registry.pb", ), ) @@ -397,7 +412,10 @@ def test_update_feature_services_with_inferred_features(simple_dataset_1): [feature_view_1, feature_view_2], [entity1], RepoConfig( - provider="local", project="test", entity_key_serialization_version=2 + provider="local", + project="test", + entity_key_serialization_version=2, + registry="dummy_registry.pb", ), ) feature_service.infer_features( @@ -454,7 +472,10 @@ def test_update_feature_services_with_specified_features(simple_dataset_1): [feature_view_1, feature_view_2], [entity1], RepoConfig( - provider="local", project="test", entity_key_serialization_version=2 + provider="local", + project="test", + entity_key_serialization_version=2, + registry="dummy_registry.pb", ), ) assert len(feature_view_1.features) == 1 diff --git a/sdk/python/tests/unit/infra/test_local_registry.py b/sdk/python/tests/unit/infra/test_local_registry.py index 1e3b2aec886..b5e7d23a979 100644 --- a/sdk/python/tests/unit/infra/test_local_registry.py +++ b/sdk/python/tests/unit/infra/test_local_registry.py @@ -39,7 +39,7 @@ def local_registry() -> Registry: fd, registry_path = mkstemp() registry_config = RegistryConfig(path=registry_path, cache_ttl_seconds=600) - return Registry(registry_config, None) + return Registry("project", registry_config, None) @pytest.mark.parametrize( @@ -443,7 +443,7 @@ def test_apply_data_source(test_registry: Registry): def test_commit(): fd, registry_path = mkstemp() registry_config = RegistryConfig(path=registry_path, cache_ttl_seconds=600) - test_registry = Registry(registry_config, None) + test_registry = Registry("project", registry_config, None) entity = Entity( name="driver_car_id", @@ -484,7 +484,7 @@ def test_commit(): validate_project_uuid(project_uuid, test_registry) # Create new registry that points to the same store - registry_with_same_store = Registry(registry_config, None) + registry_with_same_store = Registry("project", registry_config, None) # Retrieving the entity should fail since the store is empty entities = registry_with_same_store.list_entities(project) @@ -495,7 +495,7 @@ def test_commit(): test_registry.commit() # Reconstruct the new registry in order to read the newly written store - registry_with_same_store = Registry(registry_config, None) + registry_with_same_store = Registry("project", registry_config, None) # Retrieving the entity should now succeed entities = registry_with_same_store.list_entities(project) diff --git a/sdk/python/tests/unit/online_store/test_online_retrieval.py b/sdk/python/tests/unit/online_store/test_online_retrieval.py index 6f96e7b5d91..926c7226fc8 100644 --- a/sdk/python/tests/unit/online_store/test_online_retrieval.py +++ b/sdk/python/tests/unit/online_store/test_online_retrieval.py @@ -137,7 +137,7 @@ def test_online() -> None: fs_fast_ttl = FeatureStore( config=RepoConfig( registry=RegistryConfig( - path=store.config.registry, cache_ttl_seconds=cache_ttl + path=store.config.registry.path, cache_ttl_seconds=cache_ttl ), online_store=store.config.online_store, project=store.project, @@ -161,7 +161,7 @@ def test_online() -> None: assert result["trips"] == [7] # Rename the registry.db so that it cant be used for refreshes - os.rename(store.config.registry, store.config.registry + "_fake") + os.rename(store.config.registry.path, store.config.registry.path + "_fake") # Wait for registry to expire time.sleep(cache_ttl) @@ -180,7 +180,7 @@ def test_online() -> None: ).to_dict() # Restore registry.db so that we can see if it actually reloads registry - os.rename(store.config.registry + "_fake", store.config.registry) + os.rename(store.config.registry.path + "_fake", store.config.registry.path) # Test if registry is actually reloaded and whether results return result = fs_fast_ttl.get_online_features( @@ -200,7 +200,7 @@ def test_online() -> None: fs_infinite_ttl = FeatureStore( config=RepoConfig( registry=RegistryConfig( - path=store.config.registry, cache_ttl_seconds=0 + path=store.config.registry.path, cache_ttl_seconds=0 ), online_store=store.config.online_store, project=store.project, @@ -227,7 +227,7 @@ def test_online() -> None: time.sleep(2) # Rename the registry.db so that it cant be used for refreshes - os.rename(store.config.registry, store.config.registry + "_fake") + os.rename(store.config.registry.path, store.config.registry.path + "_fake") # TTL is infinite so this method should use registry cache result = fs_infinite_ttl.get_online_features( @@ -248,7 +248,7 @@ def test_online() -> None: fs_infinite_ttl.refresh_registry() # Restore registry.db so that teardown works - os.rename(store.config.registry + "_fake", store.config.registry) + os.rename(store.config.registry.path + "_fake", store.config.registry.path) def test_online_to_df(): diff --git a/sdk/python/tests/unit/test_feature.py b/sdk/python/tests/unit/test_feature.py index a8cfeef3dab..ca0dce44457 100644 --- a/sdk/python/tests/unit/test_feature.py +++ b/sdk/python/tests/unit/test_feature.py @@ -27,3 +27,6 @@ def test_field_serialization_with_description(): assert serialized_field.description == expected_description assert field_from_feature.description == expected_description + + field = Field.from_proto(serialized_field) + assert field.description == expected_description diff --git a/sdk/python/tests/unit/test_sql_registry.py b/sdk/python/tests/unit/test_sql_registry.py index 51cb430c9eb..1c2b5a36ddf 100644 --- a/sdk/python/tests/unit/test_sql_registry.py +++ b/sdk/python/tests/unit/test_sql_registry.py @@ -72,7 +72,7 @@ def pg_registry(): path=f"postgresql://{POSTGRES_USER}:{POSTGRES_PASSWORD}@127.0.0.1:{container_port}/{POSTGRES_DB}", ) - yield SqlRegistry(registry_config, None) + yield SqlRegistry(registry_config, "project", None) container.stop() @@ -106,7 +106,7 @@ def mysql_registry(): path=f"mysql+mysqldb://{POSTGRES_USER}:{POSTGRES_PASSWORD}@127.0.0.1:{container_port}/{POSTGRES_DB}", ) - yield SqlRegistry(registry_config, None) + yield SqlRegistry(registry_config, "project", None) container.stop() @@ -118,7 +118,7 @@ def sqlite_registry(): path="sqlite://", ) - yield SqlRegistry(registry_config, None) + yield SqlRegistry(registry_config, "project", None) @pytest.mark.skipif( @@ -565,6 +565,76 @@ def test_apply_data_source(sql_registry): sql_registry.teardown() +@pytest.mark.skipif( + sys.platform == "darwin" and "GITHUB_REF" in os.environ, + reason="does not run on mac github actions", +) +@pytest.mark.parametrize( + "sql_registry", + [ + lazy_fixture("mysql_registry"), + lazy_fixture("pg_registry"), + lazy_fixture("sqlite_registry"), + ], +) +def test_registry_cache(sql_registry): + # Create Feature Views + batch_source = FileSource( + name="test_source", + file_format=ParquetFormat(), + path="file://feast/*", + timestamp_field="ts_col", + created_timestamp_column="timestamp", + ) + + entity = Entity(name="fs1_my_entity_1", join_keys=["test"]) + + fv1 = FeatureView( + name="my_feature_view_1", + schema=[ + Field(name="fs1_my_feature_1", dtype=Int64), + Field(name="fs1_my_feature_2", dtype=String), + Field(name="fs1_my_feature_3", dtype=Array(String)), + Field(name="fs1_my_feature_4", dtype=Array(Bytes)), + ], + entities=[entity], + tags={"team": "matchmaking"}, + source=batch_source, + ttl=timedelta(minutes=5), + ) + + project = "project" + + # Register data source and feature view + sql_registry.apply_data_source(batch_source, project) + sql_registry.apply_feature_view(fv1, project) + registry_feature_views_cached = sql_registry.list_feature_views( + project, allow_cache=True + ) + registry_data_sources_cached = sql_registry.list_data_sources( + project, allow_cache=True + ) + # Not refreshed cache, so cache miss + assert len(registry_feature_views_cached) == 0 + assert len(registry_data_sources_cached) == 0 + sql_registry.refresh(project) + # Now objects exist + registry_feature_views_cached = sql_registry.list_feature_views( + project, allow_cache=True + ) + registry_data_sources_cached = sql_registry.list_data_sources( + project, allow_cache=True + ) + assert len(registry_feature_views_cached) == 1 + assert len(registry_data_sources_cached) == 1 + registry_feature_view = registry_feature_views_cached[0] + assert registry_feature_view.batch_source == batch_source + registry_data_source = registry_data_sources_cached[0] + assert registry_data_source == batch_source + + sql_registry.teardown() + + @pytest.mark.skipif( sys.platform == "darwin" and "GITHUB_REF" in os.environ, reason="does not run on mac github actions", diff --git a/sdk/python/tests/unit/test_type_map.py b/sdk/python/tests/unit/test_type_map.py index 0ba259ab73f..78ff15fe931 100644 --- a/sdk/python/tests/unit/test_type_map.py +++ b/sdk/python/tests/unit/test_type_map.py @@ -1,4 +1,5 @@ import numpy as np +import pytest from feast.type_map import ( feast_value_type_to_python_type, @@ -26,3 +27,24 @@ def test_null_unix_timestamp_list(): converted = feast_value_type_to_python_type(protos[0]) assert converted[0] is None + + +@pytest.mark.parametrize( + "values", + ( + np.array([True]), + np.array([False]), + np.array([0]), + np.array([1]), + [True], + [False], + [0], + [1], + ), +) +def test_python_values_to_proto_values_bool(values): + + protos = python_values_to_proto_values(values, ValueType.BOOL) + converted = feast_value_type_to_python_type(protos[0]) + + assert converted is bool(values[0]) diff --git a/setup.cfg b/setup.cfg index e2d707e2720..2781169a713 100644 --- a/setup.cfg +++ b/setup.cfg @@ -20,3 +20,6 @@ exclude = .git,__pycache__,docs/conf.py,dist,feast/protos,feast/embedded_go/lib files=feast,tests ignore_missing_imports=true exclude=feast/embedded_go/lib + +[bdist_wheel] +universal = 1 diff --git a/setup.py b/setup.py index 7f7be301244..09a02479cc9 100644 --- a/setup.py +++ b/setup.py @@ -11,9 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import copy import glob -import json import os import pathlib import re @@ -21,11 +19,9 @@ import subprocess import sys from distutils.cmd import Command -from distutils.dir_util import copy_tree from pathlib import Path -from subprocess import CalledProcessError -from setuptools import Extension, find_packages +from setuptools import find_packages try: from setuptools import setup @@ -50,8 +46,6 @@ "colorama>=0.3.9,<1", "dill~=0.3.0", "fastavro>=1.1.0,<2", - "google-api-core>=1.23.0,<3", - "googleapis-common-protos>=1.52.0,<2", "grpcio>=1.47.0,<2", "grpcio-reflection>=1.47.0,<2", "Jinja2>=2,<4", @@ -60,18 +54,19 @@ "numpy>=1.22,<3", "pandas>=1.4.3,<2", "pandavro~=1.5.0", # For some reason pandavro higher than 1.5.* only support pandas less than 1.3. - "protobuf<5,>3", + "protobuf<5,>3.20", "proto-plus>=1.20.0,<2", - "pyarrow>=4,<9", + "pyarrow>=4,<12", "pydantic>=1,<2", "pygments>=2.12.0,<3", "PyYAML>=5.4.0,<7", + "requests", "SQLAlchemy[mypy]>1,<2", "tabulate>=0.8.0,<1", "tenacity>=7,<9", "toml>=0.10.0,<1", "tqdm>=4,<5", - "typeguard", + "typeguard==2.13.3", "fastapi>=0.68.0,<1", "uvicorn[standard]>=0.14.0,<1", "dask>=2021.1.0", @@ -80,6 +75,8 @@ ] GCP_REQUIRED = [ + "google-api-core>=1.23.0,<3", + "googleapis-common-protos>=1.52.0,<2", "google-cloud-bigquery[pandas]>=2,<4", "google-cloud-bigquery-storage >= 2.0.0,<3", "google-cloud-datastore>=2.1.0,<3", @@ -94,7 +91,7 @@ AWS_REQUIRED = ["boto3>=1.17.0,<=1.20.23", "docker>=5.0.2", "s3fs>=0.4.0,<=2022.01.0"] -BYTEWAX_REQUIRED = ["bytewax==0.13.1", "docker>=5.0.2", "kubernetes<=20.13.0"] +BYTEWAX_REQUIRED = ["bytewax==0.15.1", "docker>=5.0.2", "kubernetes<=20.13.0"] SNOWFLAKE_REQUIRED = [ "snowflake-connector-python[pandas]>=2.7.3,<3", @@ -109,7 +106,7 @@ ] TRINO_REQUIRED = [ - "trino>=0.305.0,<0.400.0", + "trino>=0.305.0,<0.400.0", "regex" ] POSTGRES_REQUIRED = [ @@ -126,11 +123,7 @@ "cassandra-driver>=3.24.0,<4", ] -GE_REQUIRED = ["great_expectations>=0.14.0,<0.15.0"] - -GO_REQUIRED = [ - "cffi~=1.15.0", -] +GE_REQUIRED = ["great_expectations>=0.15.41,<0.16.0"] AZURE_REQUIRED = [ "azure-storage-blob>=0.37.0", @@ -140,6 +133,10 @@ "pymssql", ] +ROCKSET_REQUIRED = [ + "rockset>=1.0.3", +] + CI_REQUIRED = ( [ "build", @@ -151,7 +148,7 @@ "grpcio-testing>=1.47.0", "minio==7.1.0", "mock==2.0.0", - "moto<4", + "moto", "mypy>=0.981,<0.990", "mypy-protobuf==3.1", "avro==1.10.0", @@ -197,6 +194,7 @@ + HBASE_REQUIRED + CASSANDRA_REQUIRED + AZURE_REQUIRED + + ROCKSET_REQUIRED ) @@ -310,93 +308,12 @@ def run(self): file.write(filedata) -def _generate_path_with_gopath(): - go_path = subprocess.check_output(["go", "env", "GOPATH"]).decode("utf-8") - go_path = go_path.strip() - path_val = os.getenv("PATH") - path_val = f"{path_val}:{go_path}/bin" - - return path_val - - -def _ensure_go_and_proto_toolchain(): - try: - version = subprocess.check_output(["go", "version"]) - except Exception as e: - raise RuntimeError("Unable to find go toolchain") from e - - semver_string = re.search(r"go[\S]+", str(version)).group().lstrip("go") - parts = semver_string.split(".") - if not (int(parts[0]) >= 1 and int(parts[1]) >= 16): - raise RuntimeError(f"Go compiler too old; expected 1.16+ found {semver_string}") - - path_val = _generate_path_with_gopath() - - try: - subprocess.check_call(["protoc-gen-go", "--version"], env={"PATH": path_val}) - subprocess.check_call( - ["protoc-gen-go-grpc", "--version"], env={"PATH": path_val} - ) - except Exception as e: - raise RuntimeError("Unable to find go/grpc extensions for protoc") from e - - -class BuildGoProtosCommand(Command): - description = "Builds the proto files into Go files." - user_options = [] - - def initialize_options(self): - self.go_protoc = [ - sys.executable, - "-m", - "grpc_tools.protoc", - ] # find_executable("protoc") - self.proto_folder = os.path.join(repo_root, "protos") - self.go_folder = os.path.join(repo_root, "go/protos") - self.sub_folders = PROTO_SUBDIRS - self.path_val = _generate_path_with_gopath() - - def finalize_options(self): - pass - - def _generate_go_protos(self, path: str): - proto_files = glob.glob(os.path.join(self.proto_folder, path)) - - try: - subprocess.check_call( - self.go_protoc - + [ - "-I", - self.proto_folder, - "--go_out", - self.go_folder, - "--go_opt=module=github.com/feast-dev/feast/go/protos", - "--go-grpc_out", - self.go_folder, - "--go-grpc_opt=module=github.com/feast-dev/feast/go/protos", - ] - + proto_files, - env={"PATH": self.path_val}, - ) - except CalledProcessError as e: - print(f"Stderr: {e.stderr}") - print(f"Stdout: {e.stdout}") - - def run(self): - go_dir = Path(repo_root) / "go" / "protos" - go_dir.mkdir(exist_ok=True) - for sub_folder in self.sub_folders: - self._generate_go_protos(f"feast/{sub_folder}/*.proto") - class BuildCommand(build_py): """Custom build command.""" def run(self): self.run_command("build_python_protos") - if os.getenv("COMPILE_GO", "false").lower() == "true": - _ensure_go_and_proto_toolchain() - self.run_command("build_go_protos") self.run_command("build_ext") build_py.run(self) @@ -408,99 +325,10 @@ class DevelopCommand(develop): def run(self): self.reinitialize_command("build_python_protos", inplace=1) self.run_command("build_python_protos") - if os.getenv("COMPILE_GO", "false").lower() == "true": - _ensure_go_and_proto_toolchain() - self.run_command("build_go_protos") develop.run(self) -class build_ext(_build_ext): - def finalize_options(self) -> None: - super().finalize_options() - if os.getenv("COMPILE_GO", "false").lower() == "false": - self.extensions = [e for e in self.extensions if not self._is_go_ext(e)] - - def _is_go_ext(self, ext: Extension): - return any( - source.endswith(".go") or source.startswith("github") - for source in ext.sources - ) - - def build_extension(self, ext: Extension): - print(f"Building extension {ext}") - if not self._is_go_ext(ext): - # the base class may mutate `self.compiler` - compiler = copy.deepcopy(self.compiler) - self.compiler, compiler = compiler, self.compiler - try: - return _build_ext.build_extension(self, ext) - finally: - self.compiler, compiler = compiler, self.compiler - - bin_path = _generate_path_with_gopath() - go_env = json.loads( - subprocess.check_output(["go", "env", "-json"]).decode("utf-8").strip() - ) - - print(f"Go env: {go_env}") - print(f"CWD: {os.getcwd()}") - - destination = os.path.dirname(os.path.abspath(self.get_ext_fullpath(ext.name))) - subprocess.check_call( - ["go", "install", "golang.org/x/tools/cmd/goimports"], - env={"PATH": bin_path, **go_env}, - ) - subprocess.check_call( - ["go", "get", "github.com/go-python/gopy@v0.4.4"], - env={"PATH": bin_path, **go_env}, - ) - subprocess.check_call( - ["go", "install", "github.com/go-python/gopy"], - env={"PATH": bin_path, **go_env}, - ) - subprocess.check_call( - [ - "gopy", - "build", - "-output", - destination, - "-vm", - sys.executable, - "--build-tags", - "cgo,ccalloc", - "--dynamic-link=True", - "-no-make", - *ext.sources, - ], - env={ - "PATH": bin_path, - "CGO_LDFLAGS_ALLOW": ".*", - **go_env, - }, - ) - - def copy_extensions_to_source(self): - build_py = self.get_finalized_command("build_py") - for ext in self.extensions: - fullname = self.get_ext_fullname(ext.name) - modpath = fullname.split(".") - package = ".".join(modpath[:-1]) - package_dir = build_py.get_package_dir(package) - - src_dir = dest_dir = package_dir - - if src_dir.startswith(PYTHON_CODE_PREFIX): - src_dir = package_dir[len(PYTHON_CODE_PREFIX) :] - src_dir = src_dir.lstrip("/") - - src_dir = os.path.join(self.build_lib, src_dir) - - # copy whole directory - print(f"Copying from {src_dir} to {dest_dir}") - copy_tree(src_dir, dest_dir) - - setup( name=NAME, author=AUTHOR, @@ -531,7 +359,6 @@ def copy_extensions_to_source(self): "mysql": MYSQL_REQUIRED, "ge": GE_REQUIRED, "hbase": HBASE_REQUIRED, - "go": GO_REQUIRED, "docs": DOCS_REQUIRED, "cassandra": CASSANDRA_REQUIRED, }, @@ -556,15 +383,7 @@ def copy_extensions_to_source(self): ], cmdclass={ "build_python_protos": BuildPythonProtosCommand, - "build_go_protos": BuildGoProtosCommand, "build_py": BuildCommand, "develop": DevelopCommand, - "build_ext": build_ext, }, - ext_modules=[ - Extension( - "feast.embedded_go.lib._embedded", - ["github.com/feast-dev/feast/go/embedded"], - ) - ], ) diff --git a/ui/package.json b/ui/package.json index 6c456f7b007..35cb893959e 100644 --- a/ui/package.json +++ b/ui/package.json @@ -1,6 +1,6 @@ { "name": "@feast-dev/feast-ui", - "version": "0.29.0", + "version": "0.30.0", "private": false, "files": [ "dist" diff --git a/ui/src/FeastUISansProviders.tsx b/ui/src/FeastUISansProviders.tsx index 8a0e0b94db0..8a12abdc39f 100644 --- a/ui/src/FeastUISansProviders.tsx +++ b/ui/src/FeastUISansProviders.tsx @@ -62,6 +62,8 @@ const FeastUISansProviders = ({ isCustom: true, } : { projectsListPromise: defaultProjectListPromise(), isCustom: false }; + + const BASE_URL = process.env.PUBLIC_URL || "" return ( @@ -74,9 +76,9 @@ const FeastUISansProviders = ({ > - }> + }> } /> - }> + }> } /> } /> { render: (name: string) => { return ( {name} diff --git a/ui/src/components/FeaturesListDisplay.tsx b/ui/src/components/FeaturesListDisplay.tsx index a40730c6873..2a0628b0f56 100644 --- a/ui/src/components/FeaturesListDisplay.tsx +++ b/ui/src/components/FeaturesListDisplay.tsx @@ -21,8 +21,8 @@ const FeaturesList = ({ field: "name", render: (item: string) => ( {item} diff --git a/ui/src/components/ObjectsCountStats.tsx b/ui/src/components/ObjectsCountStats.tsx index bf1dd2dc9dd..eff3f8a2ca7 100644 --- a/ui/src/components/ObjectsCountStats.tsx +++ b/ui/src/components/ObjectsCountStats.tsx @@ -55,7 +55,7 @@ const ObjectsCountStats = () => { navigate(`/p/${projectName}/feature-service`)} + onClick={() => navigate(`${process.env.PUBLIC_URL || ""}/p/${projectName}/feature-service`)} description="Feature Services→" title={data.featureServices} reverse @@ -65,7 +65,7 @@ const ObjectsCountStats = () => { navigate(`/p/${projectName}/feature-view`)} + onClick={() => navigate(`${process.env.PUBLIC_URL || ""}/p/${projectName}/feature-view`)} title={data.featureViews} reverse /> @@ -74,7 +74,7 @@ const ObjectsCountStats = () => { navigate(`/p/${projectName}/entity`)} + onClick={() => navigate(`${process.env.PUBLIC_URL || ""}/p/${projectName}/entity`)} title={data.entities} reverse /> @@ -83,7 +83,7 @@ const ObjectsCountStats = () => { navigate(`/p/${projectName}/data-source`)} + onClick={() => navigate(`${process.env.PUBLIC_URL || ""}/p/${projectName}/data-source`)} title={data.dataSources} reverse /> diff --git a/ui/src/components/ProjectSelector.tsx b/ui/src/components/ProjectSelector.tsx index 1bb7ebf85a7..edbcf9d98fe 100644 --- a/ui/src/components/ProjectSelector.tsx +++ b/ui/src/components/ProjectSelector.tsx @@ -22,7 +22,7 @@ const ProjectSelector = () => { const basicSelectId = useGeneratedHtmlId({ prefix: "basicSelect" }); const onChange = (e: React.ChangeEvent) => { - navigate(`/p/${e.target.value}`); + navigate(`${process.env.PUBLIC_URL || ""}/p/${e.target.value}`); }; return ( diff --git a/ui/src/index.tsx b/ui/src/index.tsx index e38570929d4..82c709d5d39 100644 --- a/ui/src/index.tsx +++ b/ui/src/index.tsx @@ -97,6 +97,13 @@ ReactDOM.render( reactQueryClient={queryClient} feastUIConfigs={{ tabsRegistry: tabsRegistry, + projectListPromise: fetch(process.env.PUBLIC_URL || "" + "/projects-list.json", { + headers: { + "Content-Type": "application/json", + }, + }).then((res) => { + return res.json(); + }) }} /> , diff --git a/ui/src/pages/RootProjectSelectionPage.tsx b/ui/src/pages/RootProjectSelectionPage.tsx index 424e93c85d9..d287342055f 100644 --- a/ui/src/pages/RootProjectSelectionPage.tsx +++ b/ui/src/pages/RootProjectSelectionPage.tsx @@ -22,12 +22,12 @@ const RootProjectSelectionPage = () => { useEffect(() => { if (data && data.default) { // If a default is set, redirect there. - navigate(`/p/${data.default}`); + navigate(`${process.env.PUBLIC_URL || ""}/p/${data.default}`); } if (data && data.projects.length === 1) { // If there is only one project, redirect there. - navigate(`/p/${data.projects[0].id}`); + navigate(`${process.env.PUBLIC_URL || ""}/p/${data.projects[0].id}`); } }, [data, navigate]); @@ -39,7 +39,7 @@ const RootProjectSelectionPage = () => { title={`${item.name}`} description={item?.description || ""} onClick={() => { - navigate(`/p/${item.id}`); + navigate(`${process.env.PUBLIC_URL || ""}/p/${item.id}`); }} /> diff --git a/ui/src/pages/Sidebar.tsx b/ui/src/pages/Sidebar.tsx index 9fc1a532f21..2b652fc08dd 100644 --- a/ui/src/pages/Sidebar.tsx +++ b/ui/src/pages/Sidebar.tsx @@ -60,7 +60,7 @@ const SideNav = () => { name: "Home", id: htmlIdGenerator("basicExample")(), onClick: () => { - navigate(`/p/${projectName}/`); + navigate(`${process.env.PUBLIC_URL || ""}/p/${projectName}/`); }, items: [ { @@ -68,7 +68,7 @@ const SideNav = () => { id: htmlIdGenerator("dataSources")(), icon: , onClick: () => { - navigate(`/p/${projectName}/data-source`); + navigate(`${process.env.PUBLIC_URL || ""}/p/${projectName}/data-source`); }, isSelected: useMatchSubpath("data-source"), }, @@ -77,7 +77,7 @@ const SideNav = () => { id: htmlIdGenerator("entities")(), icon: , onClick: () => { - navigate(`/p/${projectName}/entity`); + navigate(`${process.env.PUBLIC_URL || ""}/p/${projectName}/entity`); }, isSelected: useMatchSubpath("entity"), }, @@ -86,7 +86,7 @@ const SideNav = () => { id: htmlIdGenerator("featureView")(), icon: , onClick: () => { - navigate(`/p/${projectName}/feature-view`); + navigate(`${process.env.PUBLIC_URL || ""}/p/${projectName}/feature-view`); }, isSelected: useMatchSubpath("feature-view"), }, @@ -95,7 +95,7 @@ const SideNav = () => { id: htmlIdGenerator("featureService")(), icon: , onClick: () => { - navigate(`/p/${projectName}/feature-service`); + navigate(`${process.env.PUBLIC_URL || ""}/p/${projectName}/feature-service`); }, isSelected: useMatchSubpath("feature-service"), }, @@ -104,7 +104,7 @@ const SideNav = () => { id: htmlIdGenerator("savedDatasets")(), icon: , onClick: () => { - navigate(`/p/${projectName}/data-set`); + navigate(`${process.env.PUBLIC_URL || ""}/p/${projectName}/data-set`); }, isSelected: useMatchSubpath("data-set"), }, diff --git a/ui/src/pages/data-sources/DataSourcesListingTable.tsx b/ui/src/pages/data-sources/DataSourcesListingTable.tsx index ad549f991e0..e4f06d6bd0a 100644 --- a/ui/src/pages/data-sources/DataSourcesListingTable.tsx +++ b/ui/src/pages/data-sources/DataSourcesListingTable.tsx @@ -21,8 +21,8 @@ const DatasourcesListingTable = ({ render: (name: string) => { return ( {name} diff --git a/ui/src/pages/entities/EntitiesListingTable.tsx b/ui/src/pages/entities/EntitiesListingTable.tsx index 2a017b18aac..baf4ddb8e47 100644 --- a/ui/src/pages/entities/EntitiesListingTable.tsx +++ b/ui/src/pages/entities/EntitiesListingTable.tsx @@ -21,8 +21,8 @@ const EntitiesListingTable = ({ entities }: EntitiesListingTableProps) => { render: (name: string) => { return ( {name} diff --git a/ui/src/pages/entities/FeatureViewEdgesList.tsx b/ui/src/pages/entities/FeatureViewEdgesList.tsx index 95bc51c56d7..ab1fbfb6df6 100644 --- a/ui/src/pages/entities/FeatureViewEdgesList.tsx +++ b/ui/src/pages/entities/FeatureViewEdgesList.tsx @@ -54,8 +54,8 @@ const FeatureViewEdgesList = ({ fvNames }: FeatureViewEdgesListInterace) => { render: (name: string) => { return ( {name} diff --git a/ui/src/pages/feature-services/FeatureServiceListingTable.tsx b/ui/src/pages/feature-services/FeatureServiceListingTable.tsx index c81edeaeb58..13ffa764092 100644 --- a/ui/src/pages/feature-services/FeatureServiceListingTable.tsx +++ b/ui/src/pages/feature-services/FeatureServiceListingTable.tsx @@ -31,8 +31,8 @@ const FeatureServiceListingTable = ({ render: (name: string) => { return ( {name} diff --git a/ui/src/pages/feature-services/FeatureServiceOverviewTab.tsx b/ui/src/pages/feature-services/FeatureServiceOverviewTab.tsx index 387320778ff..f43a0cb68fa 100644 --- a/ui/src/pages/feature-services/FeatureServiceOverviewTab.tsx +++ b/ui/src/pages/feature-services/FeatureServiceOverviewTab.tsx @@ -109,7 +109,7 @@ const FeatureServiceOverviewTab = () => { tags={data.spec.tags} createLink={(key, value) => { return ( - `/p/${projectName}/feature-service?` + + `${process.env.PUBLIC_URL || ""}/p/${projectName}/feature-service?` + encodeSearchQueryString(`${key}:${value}`) ); }} @@ -133,7 +133,7 @@ const FeatureServiceOverviewTab = () => { color="primary" onClick={() => { navigate( - `/p/${projectName}/entity/${entity.name}` + `${process.env.PUBLIC_URL || ""}/p/${projectName}/entity/${entity.name}` ); }} onClickAriaLabel={entity.name} diff --git a/ui/src/pages/feature-views/ConsumingFeatureServicesList.tsx b/ui/src/pages/feature-views/ConsumingFeatureServicesList.tsx index fc98fe8e5ea..44df7b5111a 100644 --- a/ui/src/pages/feature-views/ConsumingFeatureServicesList.tsx +++ b/ui/src/pages/feature-views/ConsumingFeatureServicesList.tsx @@ -19,8 +19,8 @@ const ConsumingFeatureServicesList = ({ render: (name: string) => { return ( {name} diff --git a/ui/src/pages/feature-views/FeatureViewListingTable.tsx b/ui/src/pages/feature-views/FeatureViewListingTable.tsx index e4eccecc975..ff1a31c4162 100644 --- a/ui/src/pages/feature-views/FeatureViewListingTable.tsx +++ b/ui/src/pages/feature-views/FeatureViewListingTable.tsx @@ -32,8 +32,8 @@ const FeatureViewListingTable = ({ render: (name: string, item: genericFVType) => { return ( {name} {(item.type === "ondemand" && ondemand) || (item.type === "stream" && stream)} diff --git a/ui/src/pages/feature-views/RegularFeatureViewOverviewTab.tsx b/ui/src/pages/feature-views/RegularFeatureViewOverviewTab.tsx index 3bbb906e05b..cde4f46d4ed 100644 --- a/ui/src/pages/feature-views/RegularFeatureViewOverviewTab.tsx +++ b/ui/src/pages/feature-views/RegularFeatureViewOverviewTab.tsx @@ -96,7 +96,7 @@ const RegularFeatureViewOverviewTab = ({ { - navigate(`/p/${projectName}/entity/${entity}`); + navigate(`${process.env.PUBLIC_URL || ""}/p/${projectName}/entity/${entity}`); }} onClickAriaLabel={entity} data-test-sub="testExample1" @@ -134,7 +134,7 @@ const RegularFeatureViewOverviewTab = ({ tags={data.spec.tags} createLink={(key, value) => { return ( - `/p/${projectName}/feature-view?` + + `${process.env.PUBLIC_URL || ""}/p/${projectName}/feature-view?` + encodeSearchQueryString(`${key}:${value}`) ); }} diff --git a/ui/src/pages/feature-views/StreamFeatureViewOverviewTab.tsx b/ui/src/pages/feature-views/StreamFeatureViewOverviewTab.tsx index 3584cccdd82..99f82d3e74e 100644 --- a/ui/src/pages/feature-views/StreamFeatureViewOverviewTab.tsx +++ b/ui/src/pages/feature-views/StreamFeatureViewOverviewTab.tsx @@ -96,8 +96,8 @@ const StreamFeatureViewOverviewTab = ({ {inputGroup?.name} diff --git a/ui/src/pages/feature-views/components/FeatureViewProjectionDisplayPanel.tsx b/ui/src/pages/feature-views/components/FeatureViewProjectionDisplayPanel.tsx index 156f6db1ec6..f6856471e0a 100644 --- a/ui/src/pages/feature-views/components/FeatureViewProjectionDisplayPanel.tsx +++ b/ui/src/pages/feature-views/components/FeatureViewProjectionDisplayPanel.tsx @@ -31,8 +31,8 @@ const FeatureViewProjectionDisplayPanel = (featureViewProjection: RequestDataDis {featureViewProjection?.featureViewName} diff --git a/ui/src/pages/feature-views/components/RequestDataDisplayPanel.tsx b/ui/src/pages/feature-views/components/RequestDataDisplayPanel.tsx index e8e6854389a..f3adaa28f02 100644 --- a/ui/src/pages/feature-views/components/RequestDataDisplayPanel.tsx +++ b/ui/src/pages/feature-views/components/RequestDataDisplayPanel.tsx @@ -38,8 +38,8 @@ const RequestDataDisplayPanel = ({ {requestDataSource?.name} diff --git a/ui/src/pages/features/FeatureOverviewTab.tsx b/ui/src/pages/features/FeatureOverviewTab.tsx index e339c30fc97..cc7879b0383 100644 --- a/ui/src/pages/features/FeatureOverviewTab.tsx +++ b/ui/src/pages/features/FeatureOverviewTab.tsx @@ -63,8 +63,8 @@ const FeatureOverviewTab = () => { FeatureView + href={`${process.env.PUBLIC_URL || ""}/p/${projectName}/feature-view/${FeatureViewName}`} + to={`${process.env.PUBLIC_URL || ""}/p/${projectName}/feature-view/${FeatureViewName}`}> {FeatureViewName} diff --git a/ui/src/pages/saved-data-sets/DatasetsListingTable.tsx b/ui/src/pages/saved-data-sets/DatasetsListingTable.tsx index a1a97084171..af794a35f98 100644 --- a/ui/src/pages/saved-data-sets/DatasetsListingTable.tsx +++ b/ui/src/pages/saved-data-sets/DatasetsListingTable.tsx @@ -20,8 +20,8 @@ const DatasetsListingTable = ({ datasets }: DatasetsListingTableProps) => { render: (name: string) => { return ( {name} diff --git a/ui/yarn.lock b/ui/yarn.lock index 1fd7c9c888d..72c8835ce2c 100644 --- a/ui/yarn.lock +++ b/ui/yarn.lock @@ -2217,10 +2217,10 @@ "@types/d3-transition" "*" "@types/d3-zoom" "*" -"@types/eslint-scope@^3.7.0": - version "3.7.3" - resolved "https://registry.yarnpkg.com/@types/eslint-scope/-/eslint-scope-3.7.3.tgz#125b88504b61e3c8bc6f870882003253005c3224" - integrity sha512-PB3ldyrcnAicT35TWPs5IcwKD8S333HMaa2VVv4+wdvebJkjWuW/xESoB8IwRcog8HYVYamb1g/R31Qv5Bx03g== +"@types/eslint-scope@^3.7.3": + version "3.7.4" + resolved "https://registry.yarnpkg.com/@types/eslint-scope/-/eslint-scope-3.7.4.tgz#37fc1223f0786c39627068a12e94d6e6fc61de16" + integrity sha512-9K4zoImiZc3HlIp6AVUDE4CWYx22a+lhSZMYNpbjW04+YF0KWj4pJXnEMjdnFTiQibFFmElcsasJXDbdI/EPhA== dependencies: "@types/eslint" "*" "@types/estree" "*" @@ -2241,7 +2241,7 @@ "@types/estree" "*" "@types/json-schema" "*" -"@types/estree@*", "@types/estree@^0.0.50": +"@types/estree@*": version "0.0.50" resolved "https://registry.yarnpkg.com/@types/estree/-/estree-0.0.50.tgz#1e0caa9364d3fccd2931c3ed96fdbeaa5d4cca83" integrity sha512-C6N5s2ZFtuZRj54k2/zyRhNDjJwwcViAM3Nbm8zjBpbqAdZ00mr0CFxvSKeO8Y/e03WVFLpQMdHYVfUd6SB+Hw== @@ -2251,6 +2251,11 @@ resolved "https://registry.yarnpkg.com/@types/estree/-/estree-0.0.39.tgz#e177e699ee1b8c22d23174caaa7422644389509f" integrity sha512-EYNwp3bU+98cpU4lAWYYL7Zz+2gryWH1qbdDTidVd6hkiR6weksdbMadyXKXNPEkQFhXM+hVO9ZygomHXp+AIw== +"@types/estree@^0.0.51": + version "0.0.51" + resolved "https://registry.yarnpkg.com/@types/estree/-/estree-0.0.51.tgz#cfd70924a25a3fd32b218e5e420e6897e1ac4f40" + integrity sha512-CuPgU6f3eT/XgKKPqKd/gLZV1Xmvf1a2R5POBOGQa6uv82xpls89HU5zKeVoyR8XzHd1RGNOlQlvUe3CFkjWNQ== + "@types/express-serve-static-core@*", "@types/express-serve-static-core@^4.17.18": version "4.17.28" resolved "https://registry.yarnpkg.com/@types/express-serve-static-core/-/express-serve-static-core-4.17.28.tgz#c47def9f34ec81dc6328d0b1b5303d1ec98d86b8" @@ -2914,11 +2919,16 @@ acorn@^7.0.0, acorn@^7.1.1: resolved "https://registry.yarnpkg.com/acorn/-/acorn-7.4.1.tgz#feaed255973d2e77555b83dbc08851a6c63520fa" integrity sha512-nQyp0o1/mNdbTO1PO6kHkwSrmgZ0MT/jCCpNiwbUjGoRN4dlBhqJtoQuCnEOKzgTVwg0ZWiCoQy6SxMebQVh8A== -acorn@^8.2.4, acorn@^8.4.1, acorn@^8.5.0, acorn@^8.7.0: +acorn@^8.2.4, acorn@^8.5.0, acorn@^8.7.0: version "8.7.1" resolved "https://registry.yarnpkg.com/acorn/-/acorn-8.7.1.tgz#0197122c843d1bf6d0a5e83220a788f278f63c30" integrity sha512-Xx54uLJQZ19lKygFXOWsscKUbsBZW0CPykPhVQdhIeIwrbPmJzqeASDInc8nKBnp/JT6igTs82qPXz069H8I/A== +acorn@^8.7.1: + version "8.8.2" + resolved "https://registry.yarnpkg.com/acorn/-/acorn-8.8.2.tgz#1b2f25db02af965399b9776b0c2c391276d37c4a" + integrity sha512-xjIYgE8HBrkpd/sJqOGNspf8uHG+NOHGOw6a/Urj8taM2EXfdNAH2oFcPeIFfsv3+kz/mJrS5VuMqbNLjCa2vw== + acorn@^8.8.0: version "8.8.0" resolved "https://registry.yarnpkg.com/acorn/-/acorn-8.8.0.tgz#88c0187620435c7f6015803f5539dae05a9dbea8" @@ -4823,10 +4833,10 @@ encodeurl@~1.0.2: resolved "https://registry.yarnpkg.com/encodeurl/-/encodeurl-1.0.2.tgz#ad3ff4c86ec2d029322f5a02c3a9a606c95b3f59" integrity sha512-TPJXq8JqFaVYm2CWmPvnP2Iyo4ZSM7/QKcSmuMLDObfpH5fi7RUGmd/rTDf+rut/saiDiQEeVTNgAmJEdAOx0w== -enhanced-resolve@^5.8.3: - version "5.8.3" - resolved "https://registry.yarnpkg.com/enhanced-resolve/-/enhanced-resolve-5.8.3.tgz#6d552d465cce0423f5b3d718511ea53826a7b2f0" - integrity sha512-EGAbGvH7j7Xt2nc0E7D99La1OiEs8LnyimkRgwExpUMScN6O+3x9tIWs7PLQZVNx4YD+00skHXPXi1yQHpAmZA== +enhanced-resolve@^5.10.0: + version "5.12.0" + resolved "https://registry.yarnpkg.com/enhanced-resolve/-/enhanced-resolve-5.12.0.tgz#300e1c90228f5b570c4d35babf263f6da7155634" + integrity sha512-QHTXI/sZQmko1cbDoNAa3mJ5qhWUUNAq3vR0/YiD379fWQrcfuoX1+HW2S0MTt7XmoPLapdaDKUtelUSPic7hQ== dependencies: graceful-fs "^4.2.4" tapable "^2.2.0" @@ -7095,12 +7105,7 @@ jsesc@~0.5.0: resolved "https://registry.yarnpkg.com/jsesc/-/jsesc-0.5.0.tgz#e7dee66e35d6fc16f710fe91d5cf69f70f08911d" integrity sha1-597mbjXW/Bb3EP6R1c9p9w8IkR0= -json-parse-better-errors@^1.0.2: - version "1.0.2" - resolved "https://registry.yarnpkg.com/json-parse-better-errors/-/json-parse-better-errors-1.0.2.tgz#bb867cfb3450e69107c131d1c514bab3dc8bcaa9" - integrity sha512-mrqyZKfX5EhL7hvqcV6WG1yYjnjeuYDzDhhcAAUrq8Po85NBQBJP+ZDUT75qZQ98IkUoBqdkExkukOU7Ts2wrw== - -json-parse-even-better-errors@^2.3.0: +json-parse-even-better-errors@^2.3.0, json-parse-even-better-errors@^2.3.1: version "2.3.1" resolved "https://registry.yarnpkg.com/json-parse-even-better-errors/-/json-parse-even-better-errors-2.3.1.tgz#7c47805a94319928e05777405dc12e1f7a4ee02d" integrity sha512-xyFwyhro/JEof6Ghe2iz2NcXoj2sloNsWr/XsERDK/oiPCfaNhl5ONfp+jQdAZRQQ0IJWNzH9zIZF7li91kh2w== @@ -10863,10 +10868,10 @@ walker@^1.0.7: dependencies: makeerror "1.0.12" -watchpack@^2.3.1: - version "2.3.1" - resolved "https://registry.yarnpkg.com/watchpack/-/watchpack-2.3.1.tgz#4200d9447b401156eeca7767ee610f8809bc9d25" - integrity sha512-x0t0JuydIo8qCNctdDrn1OzH/qDzk2+rdCOC3YzumZ42fiMqmQ7T3xQurykYMhYfHaPHTp4ZxAx2NfUo1K6QaA== +watchpack@^2.4.0: + version "2.4.0" + resolved "https://registry.yarnpkg.com/watchpack/-/watchpack-2.4.0.tgz#fa33032374962c78113f93c7f2fb4c54c9862a5d" + integrity sha512-Lcvm7MGST/4fup+ifyKi2hjyIAwcdI4HRgtvTpIUxBRhB+RFtUh8XtDOxUfctVCnhVi+QQj49i91OyvzkJl6cg== dependencies: glob-to-regexp "^0.4.1" graceful-fs "^4.1.2" @@ -10986,33 +10991,33 @@ webpack-sources@^3.2.3: integrity sha512-/DyMEOrDgLKKIG0fmvtz+4dUX/3Ghozwgm6iPp8KRhvn+eQf9+Q7GWxVNMk3+uCPWfdXYC4ExGBckIXdFEfH1w== webpack@^5.64.4: - version "5.67.0" - resolved "https://registry.yarnpkg.com/webpack/-/webpack-5.67.0.tgz#cb43ca2aad5f7cc81c4cd36b626e6b819805dbfd" - integrity sha512-LjFbfMh89xBDpUMgA1W9Ur6Rn/gnr2Cq1jjHFPo4v6a79/ypznSYbAyPgGhwsxBtMIaEmDD1oJoA7BEYw/Fbrw== + version "5.76.1" + resolved "https://registry.yarnpkg.com/webpack/-/webpack-5.76.1.tgz#7773de017e988bccb0f13c7d75ec245f377d295c" + integrity sha512-4+YIK4Abzv8172/SGqObnUjaIHjLEuUasz9EwQj/9xmPPkYJy2Mh03Q/lJfSD3YLzbxy5FeTq5Uw0323Oh6SJQ== dependencies: - "@types/eslint-scope" "^3.7.0" - "@types/estree" "^0.0.50" + "@types/eslint-scope" "^3.7.3" + "@types/estree" "^0.0.51" "@webassemblyjs/ast" "1.11.1" "@webassemblyjs/wasm-edit" "1.11.1" "@webassemblyjs/wasm-parser" "1.11.1" - acorn "^8.4.1" + acorn "^8.7.1" acorn-import-assertions "^1.7.6" browserslist "^4.14.5" chrome-trace-event "^1.0.2" - enhanced-resolve "^5.8.3" + enhanced-resolve "^5.10.0" es-module-lexer "^0.9.0" eslint-scope "5.1.1" events "^3.2.0" glob-to-regexp "^0.4.1" graceful-fs "^4.2.9" - json-parse-better-errors "^1.0.2" + json-parse-even-better-errors "^2.3.1" loader-runner "^4.2.0" mime-types "^2.1.27" neo-async "^2.6.2" schema-utils "^3.1.0" tapable "^2.1.1" terser-webpack-plugin "^5.1.3" - watchpack "^2.3.1" + watchpack "^2.4.0" webpack-sources "^3.2.3" websocket-driver@>=0.5.1, websocket-driver@^0.7.4: