Skip to content

Commit 97fbd3e

Browse files
authored
Respect full_feature_names for ODFVs (#2144)
* Respect `full_feature_names` for ODFVs Signed-off-by: Judah Rand <17158624+judahrand@users.noreply.github.com> * Add test for ODFV `full_feature_names` Signed-off-by: Judah Rand <17158624+judahrand@users.noreply.github.com> * Correct feature names in tests Signed-off-by: Judah Rand <17158624+judahrand@users.noreply.github.com>
1 parent 5ce0fdb commit 97fbd3e

File tree

5 files changed

+52
-26
lines changed

5 files changed

+52
-26
lines changed

sdk/python/feast/feature_store.py

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1347,7 +1347,11 @@ def _augment_response_with_on_demand_transforms(
13471347
for feature_ref in feature_refs:
13481348
view_name, feature_name = feature_ref.split(":")
13491349
if view_name in requested_odfv_feature_names:
1350-
odfv_feature_refs[view_name].append(feature_name)
1350+
odfv_feature_refs[view_name].append(
1351+
f"{requested_odfv_map[view_name].projection.name_to_use()}__{feature_name}"
1352+
if full_feature_names
1353+
else feature_name
1354+
)
13511355

13521356
initial_response = OnlineResponse(
13531357
GetOnlineFeaturesResponse(field_values=result_rows)
@@ -1359,7 +1363,7 @@ def _augment_response_with_on_demand_transforms(
13591363
for odfv_name, _feature_refs in odfv_feature_refs.items():
13601364
odfv = requested_odfv_map[odfv_name]
13611365
transformed_features_df = odfv.get_transformed_features_df(
1362-
initial_response_df
1366+
initial_response_df, full_feature_names,
13631367
)
13641368
for row_idx in range(len(result_rows)):
13651369
result_row = result_rows[row_idx]
@@ -1369,18 +1373,13 @@ def _augment_response_with_on_demand_transforms(
13691373
]
13701374

13711375
for transformed_feature in selected_subset:
1372-
transformed_feature_name = (
1373-
f"{odfv.projection.name_to_use()}__{transformed_feature}"
1374-
if full_feature_names
1375-
else transformed_feature
1376-
)
1377-
odfv_result_names.add(transformed_feature_name)
1376+
odfv_result_names.add(transformed_feature)
13781377
proto_value = python_value_to_proto_value(
13791378
transformed_features_df[transformed_feature].values[row_idx]
13801379
)
1381-
result_row.fields[transformed_feature_name].CopyFrom(proto_value)
1380+
result_row.fields[transformed_feature].CopyFrom(proto_value)
13821381
result_row.statuses[
1383-
transformed_feature_name
1382+
transformed_feature
13841383
] = GetOnlineFeaturesResponse.FieldStatus.PRESENT
13851384

13861385
# Drop values that aren't needed

sdk/python/feast/infra/offline_stores/offline_store.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ def to_df(self) -> pd.DataFrame:
4747
# TODO(adchia): Fix requirement to specify dependent feature views in feature_refs
4848
for odfv in self.on_demand_feature_views:
4949
features_df = features_df.join(
50-
odfv.get_transformed_features_df(features_df)
50+
odfv.get_transformed_features_df(features_df, self.full_feature_names,)
5151
)
5252
return features_df
5353

@@ -69,7 +69,7 @@ def to_arrow(self) -> pyarrow.Table:
6969
features_df = self._to_df_internal()
7070
for odfv in self.on_demand_feature_views:
7171
features_df = features_df.join(
72-
odfv.get_transformed_features_df(features_df)
72+
odfv.get_transformed_features_df(features_df, self.full_feature_names,)
7373
)
7474
return pyarrow.Table.from_pandas(features_df)
7575

sdk/python/feast/on_demand_feature_view.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,7 @@ def get_request_data_schema(self) -> Dict[str, ValueType]:
164164
return schema
165165

166166
def get_transformed_features_df(
167-
self, df_with_features: pd.DataFrame
167+
self, df_with_features: pd.DataFrame, full_feature_names: bool = False,
168168
) -> pd.DataFrame:
169169
# Apply on demand transformations
170170
columns_to_cleanup = []
@@ -183,9 +183,23 @@ def get_transformed_features_df(
183183
# Compute transformed values and apply to each result row
184184
df_with_transformed_features = self.udf.__call__(df_with_features)
185185

186+
# Work out whether the correct columns names are used.
187+
rename_columns: Dict[str, str] = {}
188+
for feature in self.features:
189+
short_name = feature.name
190+
long_name = f"{self.projection.name_to_use()}__{feature.name}"
191+
if (
192+
short_name in df_with_transformed_features.columns
193+
and full_feature_names
194+
):
195+
rename_columns[short_name] = long_name
196+
elif not full_feature_names:
197+
# Long name must be in dataframe.
198+
rename_columns[long_name] = short_name
199+
186200
# Cleanup extra columns used for transformation
187201
df_with_features.drop(columns=columns_to_cleanup, inplace=True)
188-
return df_with_transformed_features
202+
return df_with_transformed_features.rename(columns=rename_columns)
189203

190204
def infer_features(self):
191205
"""

sdk/python/feast/transformation_server.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ def TransformFeatures(self, request, context):
4747

4848
df = pa.ipc.open_file(request.transformation_input.arrow_value).read_pandas()
4949

50-
result_df = odfv.get_transformed_features_df(df)
50+
result_df = odfv.get_transformed_features_df(df, True)
5151
result_arrow = pa.Table.from_pandas(result_df)
5252
sink = pa.BufferOutputStream()
5353
writer = pa.ipc.new_file(sink, result_arrow.schema)

sdk/python/tests/integration/offline_store/test_universal_historical_retrieval.py

Lines changed: 24 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -227,16 +227,21 @@ def get_expected_training_df(
227227
expected_df[col] = expected_df[col].astype(typ)
228228

229229
conv_feature_name = "driver_stats__conv_rate" if full_feature_names else "conv_rate"
230-
expected_df["conv_rate_plus_100"] = expected_df[conv_feature_name] + 100
231-
expected_df["conv_rate_plus_100_rounded"] = (
232-
expected_df["conv_rate_plus_100"]
230+
conv_plus_feature_name = response_feature_name(
231+
"conv_rate_plus_100", full_feature_names
232+
)
233+
expected_df[conv_plus_feature_name] = expected_df[conv_feature_name] + 100
234+
expected_df[
235+
response_feature_name("conv_rate_plus_100_rounded", full_feature_names)
236+
] = (
237+
expected_df[conv_plus_feature_name]
233238
.astype("float")
234239
.round()
235240
.astype(pd.Int32Dtype())
236241
)
237-
expected_df["conv_rate_plus_val_to_add"] = (
238-
expected_df[conv_feature_name] + expected_df["val_to_add"]
239-
)
242+
expected_df[
243+
response_feature_name("conv_rate_plus_val_to_add", full_feature_names)
244+
] = (expected_df[conv_feature_name] + expected_df["val_to_add"])
240245

241246
return expected_df
242247

@@ -380,10 +385,10 @@ def test_historical_features(environment, universal_data_sources, full_feature_n
380385
# Not requesting the on demand transform with an entity_df query (can't add request data in them)
381386
expected_df_query = expected_df.drop(
382387
columns=[
383-
"conv_rate_plus_100",
384-
"conv_rate_plus_100_rounded",
388+
response_feature_name("conv_rate_plus_100", full_feature_names),
389+
response_feature_name("conv_rate_plus_100_rounded", full_feature_names),
390+
response_feature_name("conv_rate_plus_val_to_add", full_feature_names),
385391
"val_to_add",
386-
"conv_rate_plus_val_to_add",
387392
"driver_age",
388393
]
389394
)
@@ -638,7 +643,15 @@ def response_feature_name(feature: str, full_feature_names: bool) -> str:
638643
if feature in {"conv_rate", "avg_daily_trips"} and full_feature_names:
639644
return f"driver_stats__{feature}"
640645

641-
if feature in {"conv_rate_plus_100"} and full_feature_names:
646+
if (
647+
feature
648+
in {
649+
"conv_rate_plus_100",
650+
"conv_rate_plus_100_rounded",
651+
"conv_rate_plus_val_to_add",
652+
}
653+
and full_feature_names
654+
):
642655
return f"conv_rate_plus_100__{feature}"
643656

644657
return feature
@@ -670,7 +683,7 @@ def assert_feature_service_correctness(
670683
"driver_id",
671684
"customer_id",
672685
response_feature_name("conv_rate", full_feature_names),
673-
"conv_rate_plus_100",
686+
response_feature_name("conv_rate_plus_100", full_feature_names),
674687
"driver_age",
675688
]
676689
]

0 commit comments

Comments
 (0)