fix: Fix incorrect on demand feature view diffing and improve Java tests (#3074)

adchia · adchia · commit dd46d451c0d5 · 2022-08-15T10:57:58.000-04:00
* fix: Fix ODFV bug

Signed-off-by: Danny Chiao &lt;danny@tecton.ai&gt;
diff --git a/.github/workflows/java_master_only.yml b/.github/workflows/java_master_only.yml
@@ -66,6 +66,52 @@ jobs:
           java-version: '11'
           java-package: jdk
           architecture: x64
+      - name: Setup Python (to call feast apply)
+        uses: actions/setup-python@v2
+        id: setup-python
+        with:
+          python-version: 3.8
+          architecture: x64
+      - name: Setup Go
+        id: setup-go
+        uses: actions/setup-go@v2
+        with:
+          go-version: 1.18.0
+      - name: Upgrade pip version
+        run: |
+          pip install --upgrade "pip>=21.3.1,<22.1"
+      - name: Get pip cache dir
+        id: pip-cache
+        run: |
+          echo "::set-output name=dir::$(pip cache dir)"
+      - name: pip cache
+        uses: actions/cache@v2
+        with:
+          path: |
+            ${{ steps.pip-cache.outputs.dir }}
+            /opt/hostedtoolcache/Python
+            /Users/runner/hostedtoolcache/Python
+          key: ${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-pip-${{ hashFiles(format('**/py{0}-ci-requirements.txt', env.PYTHON)) }}
+          restore-keys: |
+            ${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-pip-
+      - name: Install pip-tools
+        run: pip install pip-tools
+      - name: Install apache-arrow on ubuntu
+        run: |
+          sudo apt update
+          sudo apt install -y -V ca-certificates lsb-release wget
+          wget https://apache.jfrog.io/artifactory/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb
+          sudo apt install -y -V ./apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb
+          sudo apt update
+          sudo apt install -y -V libarrow-dev
+      - name: Install Python dependencies
+        run: make install-python-ci-dependencies
+      - uses: actions/cache@v2
+        with:
+          path: ~/.m2/repository
+          key: ${{ runner.os }}-it-maven-${{ hashFiles('**/pom.xml') }}
+          restore-keys: |
+            ${{ runner.os }}-it-maven-
       - uses: actions/cache@v2
         with:
           path: ~/.m2/repository
@@ -91,10 +137,46 @@ jobs:
           java-version: '11'
           java-package: jdk
           architecture: x64
-      - uses: actions/setup-python@v2
+      - name: Setup Python (to call feast apply)
+        uses: actions/setup-python@v2
+        id: setup-python
+        with:
+          python-version: 3.8
+          architecture: x64
+      - name: Setup Go
+        id: setup-go
+        uses: actions/setup-go@v2
         with:
-          python-version: '3.8'
-          architecture: 'x64'
+          go-version: 1.18.0
+      - name: Upgrade pip version
+        run: |
+          pip install --upgrade "pip>=21.3.1,<22.1"
+      - name: Get pip cache dir
+        id: pip-cache
+        run: |
+          echo "::set-output name=dir::$(pip cache dir)"
+      - name: pip cache
+        uses: actions/cache@v2
+        with:
+          path: |
+            ${{ steps.pip-cache.outputs.dir }}
+            /opt/hostedtoolcache/Python
+            /Users/runner/hostedtoolcache/Python
+          key: ${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-pip-${{ hashFiles(format('**/py{0}-ci-requirements.txt', env.PYTHON)) }}
+          restore-keys: |
+            ${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-pip-
+      - name: Install pip-tools
+        run: pip install pip-tools
+      - name: Install apache-arrow on ubuntu
+        run: |
+          sudo apt update
+          sudo apt install -y -V ca-certificates lsb-release wget
+          wget https://apache.jfrog.io/artifactory/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb
+          sudo apt install -y -V ./apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb
+          sudo apt update
+          sudo apt install -y -V libarrow-dev
+      - name: Install Python dependencies
+        run: make install-python-ci-dependencies
       - uses: actions/cache@v2
         with:
           path: ~/.m2/repository
diff --git a/.github/workflows/java_pr.yml b/.github/workflows/java_pr.yml
@@ -38,6 +38,52 @@ jobs:
           java-version: '11'
           java-package: jdk
           architecture: x64
+      - name: Setup Python (to call feast apply)
+        uses: actions/setup-python@v2
+        id: setup-python
+        with:
+          python-version: 3.8
+          architecture: x64
+      - name: Setup Go
+        id: setup-go
+        uses: actions/setup-go@v2
+        with:
+          go-version: 1.18.0
+      - name: Upgrade pip version
+        run: |
+          pip install --upgrade "pip>=21.3.1,<22.1"
+      - name: Get pip cache dir
+        id: pip-cache
+        run: |
+          echo "::set-output name=dir::$(pip cache dir)"
+      - name: pip cache
+        uses: actions/cache@v2
+        with:
+          path: |
+            ${{ steps.pip-cache.outputs.dir }}
+            /opt/hostedtoolcache/Python
+            /Users/runner/hostedtoolcache/Python
+          key: ${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-pip-${{ hashFiles(format('**/py{0}-ci-requirements.txt', env.PYTHON)) }}
+          restore-keys: |
+            ${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-pip-
+      - name: Install pip-tools
+        run: pip install pip-tools
+      - name: Install apache-arrow on ubuntu
+        run: |
+          sudo apt update
+          sudo apt install -y -V ca-certificates lsb-release wget
+          wget https://apache.jfrog.io/artifactory/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb
+          sudo apt install -y -V ./apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb
+          sudo apt update
+          sudo apt install -y -V libarrow-dev
+      - name: Install Python dependencies
+        run: make install-python-ci-dependencies
+      - uses: actions/cache@v2
+        with:
+          path: ~/.m2/repository
+          key: ${{ runner.os }}-it-maven-${{ hashFiles('**/pom.xml') }}
+          restore-keys: |
+            ${{ runner.os }}-it-maven-
       - uses: actions/cache@v2
         with:
           path: ~/.m2/repository
@@ -98,6 +144,46 @@ jobs:
           aws-region: us-west-2
       - name: Use AWS CLI
         run: aws sts get-caller-identity
+      - name: Setup Python (to call feast apply)
+        uses: actions/setup-python@v2
+        id: setup-python
+        with:
+          python-version: 3.8
+          architecture: x64
+      - name: Setup Go
+        id: setup-go
+        uses: actions/setup-go@v2
+        with:
+          go-version: 1.18.0
+      - name: Upgrade pip version
+        run: |
+          pip install --upgrade "pip>=21.3.1,<22.1"
+      - name: Get pip cache dir
+        id: pip-cache
+        run: |
+          echo "::set-output name=dir::$(pip cache dir)"
+      - name: pip cache
+        uses: actions/cache@v2
+        with:
+          path: |
+            ${{ steps.pip-cache.outputs.dir }}
+            /opt/hostedtoolcache/Python
+            /Users/runner/hostedtoolcache/Python
+          key: ${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-pip-${{ hashFiles(format('**/py{0}-ci-requirements.txt', env.PYTHON)) }}
+          restore-keys: |
+            ${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-pip-
+      - name: Install pip-tools
+        run: pip install pip-tools
+      - name: Install apache-arrow on ubuntu
+        run: |
+          sudo apt update
+          sudo apt install -y -V ca-certificates lsb-release wget
+          wget https://apache.jfrog.io/artifactory/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb
+          sudo apt install -y -V ./apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb
+          sudo apt update
+          sudo apt install -y -V libarrow-dev
+      - name: Install Python dependencies
+        run: make install-python-ci-dependencies
       - name: Run integration tests
         run:  make test-java-integration
       - name: Save report
diff --git a/java/CONTRIBUTING.md b/java/CONTRIBUTING.md
@@ -36,6 +36,7 @@ mvn spotless:apply
 #### Project Makefile
 The Project Makefile provides useful shorthands for common development tasks:
 
+> Note: These commands rely on a local version of `feast` (Python) to be installed
 
 Run all Unit tests:
 ```
diff --git a/java/serving/README.md b/java/serving/README.md
@@ -136,4 +136,6 @@ Unit &amp; Integration Tests can be used to verify functionality:
 mvn test -pl serving --also-make
 # run integration tests
 mvn verify -pl serving --also-make
+# run integration tests with debugger
+mvn -Dmaven.failsafe.debug verify -pl serving --also-make
 ```
diff --git a/java/serving/pom.xml b/java/serving/pom.xml
@@ -82,6 +82,28 @@
         </configuration>
       </plugin>
 
+      <!-- Call feast apply before running integration tests -->
+      <plugin>
+        <groupId>org.codehaus.mojo</groupId>
+        <artifactId>exec-maven-plugin</artifactId>
+        <version>1.6.0</version>
+        <executions>
+          <execution>
+            <configuration>
+              <executable>python</executable>
+              <workingDirectory>src/test/resources/docker-compose/feast10/</workingDirectory>
+              <arguments>
+                <argument>setup_it.py</argument>
+              </arguments>
+            </configuration>
+            <id>feast_test_apply</id>
+            <phase>process-test-resources</phase>
+            <goals>
+              <goal>exec</goal>
+            </goals>
+          </execution>
+        </executions>
+      </plugin>
     </plugins>
   </build>
 
diff --git a/java/serving/src/test/resources/docker-compose/feast10/definitions.py b/java/serving/src/test/resources/docker-compose/feast10/definitions.py
@@ -73,17 +73,17 @@ def transformed_conv_rate(features_df: pd.DataFrame) -> pd.DataFrame:
 
 entity = Entity(name="entity", value_type=ValueType.STRING,)
 
-benchmark_feature_views = [
-    FeatureView(
+benchmark_feature_views = []
+for i in range(25):
+    fv = FeatureView(
         name=f"feature_view_{i}",
         entities=[entity],
         ttl=Duration(seconds=86400),
         schema=[Field(name=f"feature_{10 * i + j}", dtype=Int64) for j in range(10)],
         online=True,
         source=generated_data_source,
     )
-    for i in range(25)
-]
+    benchmark_feature_views.append(fv)
 
 benchmark_feature_service = FeatureService(
     name=f"benchmark_feature_service", features=benchmark_feature_views,
diff --git a/java/serving/src/test/resources/docker-compose/feast10/registry.db b/java/serving/src/test/resources/docker-compose/feast10/registry.db
diff --git a/java/serving/src/test/resources/docker-compose/feast10/setup_it.py b/java/serving/src/test/resources/docker-compose/feast10/setup_it.py
@@ -0,0 +1,86 @@
+from pathlib import Path
+from feast.repo_config import load_repo_config
+from datetime import datetime, timedelta
+
+import numpy as np
+import pandas as pd
+
+from definitions import (
+    benchmark_feature_service,
+    benchmark_feature_views,
+    driver,
+    driver_hourly_stats_view,
+    entity,
+    transformed_conv_rate,
+)
+
+from feast import FeatureStore
+
+
+def setup_data():
+    start = datetime.now() - timedelta(days=10)
+
+    df = pd.DataFrame()
+    df["driver_id"] = np.arange(1000, 1010)
+    df["created"] = datetime.now()
+    df["conv_rate"] = np.arange(0, 1, 0.1)
+    df["acc_rate"] = np.arange(0.5, 1, 0.05)
+    df["avg_daily_trips"] = np.arange(0, 1000, 100)
+
+    # some of rows are beyond 7 days to test OUTSIDE_MAX_AGE status
+    df["event_timestamp"] = start + pd.Series(np.arange(0, 10)).map(
+        lambda days: timedelta(days=days)
+    )
+
+    # Store data in parquet files. Parquet is convenient for local development mode. For
+    # production, you can use your favorite DWH, such as BigQuery. See Feast documentation
+    # for more info.
+    df.to_parquet("driver_stats.parquet")
+
+    # For Benchmarks
+    # Please read more in Feast RFC-031
+    # (link https://docs.google.com/document/d/12UuvTQnTTCJhdRgy6h10zSbInNGSyEJkIxpOcgOen1I/edit)
+    # about this benchmark setup
+    def generate_data(
+        num_rows: int, num_features: int, destination: str
+    ) -> pd.DataFrame:
+        features = [f"feature_{i}" for i in range(num_features)]
+        columns = ["entity", "event_timestamp"] + features
+        df = pd.DataFrame(0, index=np.arange(num_rows), columns=columns)
+        df["event_timestamp"] = datetime.utcnow()
+        for column in features:
+            df[column] = np.random.randint(1, num_rows, num_rows)
+
+        df["entity"] = "key-" + pd.Series(np.arange(1, num_rows + 1)).astype(
+            pd.StringDtype()
+        )
+
+        df.to_parquet(destination)
+
+    generate_data(10**3, 250, "benchmark_data.parquet")
+
+
+def main():
+    print("Running setup_it.py")
+
+    setup_data()
+    existing_repo_config = load_repo_config(Path("."))
+
+    # Update to default online store since otherwise, relies on Dockerized Redis service
+    fs = FeatureStore(config=existing_repo_config.copy(update={"online_store": {}}))
+    fs.apply(
+        [
+            driver_hourly_stats_view,
+            transformed_conv_rate,
+            driver,
+            entity,
+            benchmark_feature_service,
+            *benchmark_feature_views,
+        ]
+    )
+
+    print("setup_it finished")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/sdk/python/feast/diff/registry_diff.py b/sdk/python/feast/diff/registry_diff.py
@@ -144,8 +144,8 @@ def diff_registry_objects(
                 continue
             elif getattr(current_spec, _field.name) != getattr(new_spec, _field.name):
                 if _field.name == "user_defined_function":
-                    current_spec = cast(OnDemandFeatureViewSpec, current_proto)
-                    new_spec = cast(OnDemandFeatureViewSpec, new_proto)
+                    current_spec = cast(OnDemandFeatureViewSpec, current_spec)
+                    new_spec = cast(OnDemandFeatureViewSpec, new_spec)
                     current_udf = current_spec.user_defined_function
                     new_udf = new_spec.user_defined_function
                     for _udf_field in current_udf.DESCRIPTOR.fields:
diff --git a/sdk/python/feast/feature_logging.py b/sdk/python/feast/feature_logging.py
@@ -34,12 +34,12 @@ class LoggingSource:
 
     @abc.abstractmethod
     def get_schema(self, registry: "BaseRegistry") -> pa.Schema:
-        """ Generate schema for logs destination. """
+        """Generate schema for logs destination."""
         raise NotImplementedError
 
     @abc.abstractmethod
     def get_log_timestamp_column(self) -> str:
-        """ Return timestamp column that must exist in generated schema. """
+        """Return timestamp column that must exist in generated schema."""
         raise NotImplementedError
 
 
diff --git a/sdk/python/feast/feature_store.py b/sdk/python/feast/feature_store.py
@@ -2349,10 +2349,10 @@ def get_validation_reference(
         self, name: str, allow_cache: bool = False
     ) -> ValidationReference:
         """
-            Retrieves a validation reference.
+        Retrieves a validation reference.
 
-            Raises:
-                ValidationReferenceNotFoundException: The validation reference could not be found.
+        Raises:
+            ValidationReferenceNotFoundException: The validation reference could not be found.
         """
         ref = self._registry.get_validation_reference(
             name, project=self.project, allow_cache=allow_cache
diff --git a/sdk/python/feast/registry.py b/sdk/python/feast/registry.py
diff --git a/sdk/python/tests/unit/diff/test_registry_diff.py b/sdk/python/tests/unit/diff/test_registry_diff.py