chore: Remove unnecessary test and clean up Go feature server test construction (feast-dev#2548)

felixwang9817 · web-flow · commit e240e89dd6c6 · 2022-04-14T16:18:21.000-07:00
Signed-off-by: Felix Wang &lt;wangfelix98@gmail.com&gt;
diff --git a/Makefile b/Makefile
@@ -81,7 +81,7 @@ test-python-universal:
 	FEAST_USAGE=False IS_TEST=True python -m pytest -n 8 --integration --universal sdk/python/tests
 
 test-python-go-server: compile-go-lib
-	FEAST_USAGE=False IS_TEST=True python -m pytest -n 8 --integration --goserver sdk/python/tests
+	FEAST_USAGE=False IS_TEST=True FEAST_GO_FEATURE_RETRIEVAL=True pytest --integration --goserver sdk/python/tests
 
 format-python:
 	# Sort
diff --git a/go.mod b/go.mod
@@ -25,13 +25,16 @@ require (
 	github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect
 	github.com/goccy/go-json v0.9.6 // indirect
 	github.com/golang/snappy v0.0.4 // indirect
+	github.com/gonuts/commander v0.1.0 // indirect
+	github.com/gonuts/flag v0.1.0 // indirect
 	github.com/google/flatbuffers v2.0.6+incompatible // indirect
 	github.com/klauspost/asmfmt v1.3.2 // indirect
 	github.com/klauspost/compress v1.15.1 // indirect
 	github.com/klauspost/cpuid/v2 v2.0.12 // indirect
 	github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8 // indirect
 	github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3 // indirect
 	github.com/pierrec/lz4/v4 v4.1.14 // indirect
+	github.com/pkg/errors v0.9.1 // indirect
 	github.com/pmezard/go-difflib v1.0.0 // indirect
 	github.com/zeebo/xxh3 v1.0.2 // indirect
 	golang.org/x/exp v0.0.0-20220407100705-7b9b53b0aca4 // indirect
diff --git a/go.sum b/go.sum
@@ -146,7 +146,9 @@ github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiu
 github.com/golang/snappy v0.0.0-20180518054509-2e65f85255db/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
 github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM=
 github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
+github.com/gonuts/commander v0.1.0 h1:EcDTiVw9oAVORFjQOEOuHQqcl6OXMyTgELocTq6zJ0I=
 github.com/gonuts/commander v0.1.0/go.mod h1:qkb5mSlcWodYgo7vs8ulLnXhfinhZsZcm6+H/z1JjgY=
+github.com/gonuts/flag v0.1.0 h1:fqMv/MZ+oNGu0i9gp0/IQ/ZaPIDoAZBOBaJoV7viCWM=
 github.com/gonuts/flag v0.1.0/go.mod h1:ZTmTGtrSPejTo/SRNhCqwLTmiAgyBdCkLYhHrAoBdz4=
 github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
 github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
@@ -304,6 +306,7 @@ github.com/pierrec/lz4/v4 v4.1.14 h1:+fL8AQEZtz/ijeNnpduH0bROTu0O3NZAlPjQxGn8LwE
 github.com/pierrec/lz4/v4 v4.1.14/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4=
 github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
 github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
+github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
 github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
 github.com/pkg/profile v1.2.1/go.mod h1:hJw3o1OdXxsrSjjVksARp5W95eeEaEfptyVZyv6JUPA=
 github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
diff --git a/sdk/python/tests/conftest.py b/sdk/python/tests/conftest.py
@@ -33,7 +33,6 @@
 )
 from tests.integration.feature_repos.repo_configuration import (
     FULL_REPO_CONFIGS,
-    GO_REPO_CONFIGS,
     REDIS_CLUSTER_CONFIG,
     REDIS_CONFIG,
     Environment,
@@ -60,9 +59,6 @@ def pytest_configure(config):
     config.addinivalue_line(
         "markers", "goserver: mark tests that use the go feature server"
     )
-    config.addinivalue_line(
-        "markers", "goserverlifecycle: mark tests that use the go feature server"
-    )
 
 
 def pytest_addoption(parser):
@@ -233,19 +229,6 @@ def cleanup():
     return e
 
 
-@pytest.fixture(
-    params=GO_REPO_CONFIGS, scope="session", ids=[str(c) for c in GO_REPO_CONFIGS]
-)
-def go_environment(request, worker_id: str):
-    e = construct_test_environment(request.param, worker_id=worker_id)
-
-    def cleanup():
-        e.feature_store.teardown()
-
-    request.addfinalizer(cleanup)
-    return e
-
-
 @pytest.fixture(
     params=[REDIS_CONFIG, REDIS_CLUSTER_CONFIG],
     scope="session",
@@ -283,18 +266,6 @@ def cleanup():
     return construct_universal_test_data(local_redis_environment)
 
 
-@pytest.fixture(scope="session")
-def go_data_sources(request, go_environment):
-    def cleanup():
-        # logger.info("Running cleanup in %s, Request: %s", worker_id, request.param)
-        go_environment.data_source_creator.teardown()
-        if go_environment.online_store_creator:
-            go_environment.online_store_creator.teardown()
-
-    request.addfinalizer(cleanup)
-    return construct_universal_test_data(go_environment)
-
-
 @pytest.fixture(scope="session")
 def e2e_data_sources(environment: Environment, request):
     df = create_dataset()
diff --git a/sdk/python/tests/integration/feature_repos/repo_configuration.py b/sdk/python/tests/integration/feature_repos/repo_configuration.py
@@ -126,6 +126,12 @@
             # ),
         ]
     )
+if os.getenv("FEAST_GO_FEATURE_RETRIEVAL", "False") == "True":
+    DEFAULT_FULL_REPO_CONFIGS = [
+        IntegrationTestRepoConfig(
+            online_store=REDIS_CONFIG, go_feature_retrieval=True,
+        ),
+    ]
 full_repo_configs_module = os.environ.get(FULL_REPO_CONFIGS_MODULE_ENV_NAME)
 if full_repo_configs_module is not None:
     try:
@@ -153,11 +159,6 @@
             c.online_store_creator = replacements[c.online_store]
 
 
-GO_REPO_CONFIGS = [
-    IntegrationTestRepoConfig(online_store=REDIS_CONFIG, go_feature_retrieval=True,),
-]
-
-
 @dataclass
 class UniversalEntities:
     customer_vals: List[Any]
diff --git a/sdk/python/tests/integration/online_store/test_universal_online.py b/sdk/python/tests/integration/online_store/test_universal_online.py
@@ -390,6 +390,7 @@ def test_online_retrieval_with_event_timestamps(
 
 @pytest.mark.integration
 @pytest.mark.universal
+@pytest.mark.goserver
 @pytest.mark.parametrize("full_feature_names", [True, False], ids=lambda v: str(v))
 def test_online_retrieval(environment, universal_data_sources, full_feature_names):
     fs = environment.feature_store
@@ -695,206 +696,6 @@ def eventually_apply() -> Tuple[None, bool]:
     assert all(v is None for v in online_features["value"])
 
 
-@pytest.mark.integration
-@pytest.mark.goserver
-@pytest.mark.parametrize("full_feature_names", [True, False], ids=lambda v: str(v))
-def test_online_retrieval_with_go_server(
-    go_environment, go_data_sources, full_feature_names
-):
-    fs = go_environment.feature_store
-    entities, datasets, data_sources = go_data_sources
-    feature_views = construct_universal_feature_views(data_sources, with_odfv=False)
-
-    feature_service_entity_mapping = FeatureService(
-        name="entity_mapping",
-        features=[
-            feature_views.location.with_name("origin").with_join_key_map(
-                {"location_id": "origin_id"}
-            ),
-            feature_views.location.with_name("destination").with_join_key_map(
-                {"location_id": "destination_id"}
-            ),
-        ],
-    )
-
-    feast_objects = []
-    feast_objects.extend(
-        [feature_view for feature_view in feature_views.values() if feature_view]
-    )
-    feast_objects.extend(
-        [driver(), customer(), location(), feature_service_entity_mapping]
-    )
-    fs.apply(feast_objects)
-    fs.materialize(
-        go_environment.start_date - timedelta(days=1),
-        go_environment.end_date + timedelta(days=1),
-    )
-
-    entity_sample = datasets.orders_df.sample(10)[
-        ["customer_id", "driver_id", "order_id", "event_timestamp"]
-    ]
-    orders_df = datasets.orders_df[
-        (
-            datasets.orders_df["customer_id"].isin(entity_sample["customer_id"])
-            & datasets.orders_df["driver_id"].isin(entity_sample["driver_id"])
-        )
-    ]
-
-    sample_drivers = entity_sample["driver_id"]
-    drivers_df = datasets.driver_df[
-        datasets.driver_df["driver_id"].isin(sample_drivers)
-    ]
-
-    sample_customers = entity_sample["customer_id"]
-    customers_df = datasets.customer_df[
-        datasets.customer_df["customer_id"].isin(sample_customers)
-    ]
-
-    location_pairs = np.array(list(itertools.permutations(entities.location_vals, 2)))
-    sample_location_pairs = location_pairs[
-        np.random.choice(len(location_pairs), 10)
-    ].T.tolist()
-    origins_df = datasets.location_df[
-        datasets.location_df["location_id"].isin(sample_location_pairs[0])
-    ]
-    destinations_df = datasets.location_df[
-        datasets.location_df["location_id"].isin(sample_location_pairs[1])
-    ]
-
-    global_df = datasets.global_df
-
-    entity_rows = [
-        {"driver_id": d, "customer_id": c}
-        for (d, c) in zip(sample_drivers, sample_customers)
-    ]
-
-    # All returned features are numbers
-    feature_refs = [
-        "driver_stats:conv_rate",
-        "driver_stats:avg_daily_trips",
-        "customer_profile:current_balance",
-        "customer_profile:avg_passenger_count",
-        "customer_profile:lifetime_trip_count",
-        "order:order_is_success",
-        "global_stats:num_rides",
-        "global_stats:avg_ride_length",
-    ]
-    unprefixed_feature_refs = [f.rsplit(":", 1)[-1] for f in feature_refs if ":" in f]
-    # Remove the on demand feature view output features, since they're not present in the source dataframe
-
-    online_features_dict = get_online_features_dict(
-        environment=go_environment,
-        features=feature_refs,
-        entity_rows=entity_rows,
-        full_feature_names=full_feature_names,
-    )
-
-    keys = online_features_dict.keys()
-    assert (
-        len(keys) == len(feature_refs) + 2
-    )  # Add two for the driver id and the customer id entity keys
-    for feature in feature_refs:
-
-        if full_feature_names:
-            assert feature.replace(":", "__") in keys
-        else:
-            assert feature.rsplit(":", 1)[-1] in keys
-            assert (
-                "driver_stats" not in keys
-                and "customer_profile" not in keys
-                and "order" not in keys
-                and "global_stats" not in keys
-            )
-
-    tc = unittest.TestCase()
-    for i, entity_row in enumerate(entity_rows):
-        df_features = get_latest_feature_values_from_dataframes(
-            driver_df=drivers_df,
-            customer_df=customers_df,
-            orders_df=orders_df,
-            global_df=global_df,
-            entity_row=entity_row,
-        )
-
-        assert df_features["customer_id"] == online_features_dict["customer_id"][i]
-        assert df_features["driver_id"] == online_features_dict["driver_id"][i]
-
-        # All returned features are numbers
-        for unprefixed_feature_ref in unprefixed_feature_refs:
-            tc.assertAlmostEqual(
-                df_features[unprefixed_feature_ref],
-                online_features_dict[
-                    response_feature_name(
-                        unprefixed_feature_ref, feature_refs, full_feature_names
-                    )
-                ][i],
-                delta=0.0001,
-            )
-
-    # Check what happens for missing values
-    missing_responses_dict = get_online_features_dict(
-        environment=go_environment,
-        features=feature_refs,
-        entity_rows=[{"driver_id": 0, "customer_id": 0}],
-        full_feature_names=full_feature_names,
-    )
-    assert missing_responses_dict is not None
-    for unprefixed_feature_ref in unprefixed_feature_refs:
-        if unprefixed_feature_ref not in {"num_rides", "avg_ride_length"}:
-            tc.assertIsNone(
-                missing_responses_dict[
-                    response_feature_name(
-                        unprefixed_feature_ref, feature_refs, full_feature_names
-                    )
-                ][0]
-            )
-
-    entity_rows = [
-        {"origin_id": origin, "destination_id": destination}
-        for (_driver, _customer, origin, destination) in zip(
-            sample_drivers, sample_customers, *sample_location_pairs
-        )
-    ]
-    assert_feature_service_entity_mapping_correctness(
-        go_environment,
-        feature_service_entity_mapping,
-        entity_rows,
-        full_feature_names,
-        origins_df,
-        destinations_df,
-    )
-
-
-def setup_feature_store(environment, go_data_sources):
-    fs = environment.feature_store
-    fs.kill_go_server()
-    entities, datasets, data_sources = go_data_sources
-    driver_stats_fv = construct_universal_feature_views(
-        data_sources, with_odfv=False
-    ).driver
-    driver_entities = entities.driver_vals
-    df = pd.DataFrame(
-        {
-            "ts_1": [environment.end_date] * len(driver_entities),
-            "created_ts": [environment.end_date] * len(driver_entities),
-            "driver_id": driver_entities,
-            "value": np.random.random(size=len(driver_entities)),
-        }
-    )
-    ds = environment.data_source_creator.create_data_source(
-        df, destination_name="simple_driver_dataset"
-    )
-    simple_driver_fv = driver_feature_view(
-        data_source=ds, name="test_universal_online_simple_driver"
-    )
-    fs.apply([driver(), simple_driver_fv, driver_stats_fv])
-    fs.materialize(
-        environment.start_date - timedelta(days=1),
-        environment.end_date + timedelta(days=1),
-    )
-    return driver_entities, fs, simple_driver_fv, driver_stats_fv, df
-
-
 def response_feature_name(
     feature: str, feature_refs: List[str], full_feature_names: bool
 ) -> str: