Skip to content
Closed
Changes from 1 commit
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
2a7935b
historical_field_mappings fix CONTRIBUTING docs for creating venv
michelle-rascati-sp Jan 26, 2022
8b95281
historical_field_mappings fix mypy-protobuf conflict
michelle-rascati-sp Jan 26, 2022
6a8d126
historical_field_mappings adding tests for get_historical_features to…
michelle-rascati-sp Jan 27, 2022
99bf1ed
historical_field_mappings bigquery tests passing
michelle-rascati-sp Jan 27, 2022
a895594
historical_field_mappings redshift tests pass
michelle-rascati-sp Jan 27, 2022
12753f3
historical_field_mappings formatting
michelle-rascati-sp Jan 27, 2022
d15a210
historical_field_mappings make required so no .get() from None
michelle-rascati-sp Jan 27, 2022
ed011a3
historical_field_mappings type the registry so linter is happy
michelle-rascati-sp Jan 27, 2022
af8fc85
historical_field_mappings making pyling happy
michelle-rascati-sp Jan 27, 2022
e804fe9
historical_field_mappings formatting
michelle-rascati-sp Jan 27, 2022
b28aa79
historical_field_mappings Merge branch 'master' into historical_field…
michelle-rascati-sp Jan 27, 2022
0c1e79f
historical_field_mappings Revert "historical_field_mappings making py…
michelle-rascati-sp Jan 27, 2022
60e3692
historical_field_mappings redo FieldStatusValue
michelle-rascati-sp Jan 27, 2022
33dd5b9
historical_field_mappings already fixed upstream
michelle-rascati-sp Jan 27, 2022
c3c6747
historical_field_mappings formatting
michelle-rascati-sp Jan 27, 2022
2d43b74
historical_field_mappings remove unused import
michelle-rascati-sp Jan 27, 2022
cbcbed7
historical_field_mappings Revert "historical_field_mappings redo Fiel…
michelle-rascati-sp Jan 27, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
historical_field_mappings Merge branch 'master' into historical_field…
…_mappings
  • Loading branch information
michelle-rascati-sp committed Jan 27, 2022
commit b28aa790c79a3cbb5b55f3915590668db24a5fcf
Original file line number Diff line number Diff line change
Expand Up @@ -281,51 +281,14 @@ def test_historical_features(environment, universal_data_sources, full_feature_n
(entities, datasets, data_sources) = universal_data_sources
feature_views = construct_universal_feature_views(data_sources)

(
customer_df,
driver_df,
location_df,
orders_df,
global_df,
entity_df,
field_mapping_df,
) = (
datasets["customer"],
datasets["driver"],
datasets["location"],
datasets["orders"],
datasets["global"],
datasets["entity"],
datasets["field_mapping"],
)
entity_df_with_request_data = entity_df.copy(deep=True)
entity_df_with_request_data = datasets["entity"].copy(deep=True)
entity_df_with_request_data["val_to_add"] = [
i for i in range(len(entity_df_with_request_data))
]
entity_df_with_request_data["driver_age"] = [
i + 100 for i in range(len(entity_df_with_request_data))
]

(
customer_fv,
driver_fv,
driver_odfv,
location_fv,
order_fv,
global_fv,
driver_age_request_fv,
field_mapping_fv,
) = (
feature_views["customer"],
feature_views["driver"],
feature_views["driver_odfv"],
feature_views["location"],
feature_views["order"],
feature_views["global"],
feature_views["driver_age_request_fv"],
feature_views["field_mapping"],
)

feature_service = FeatureService(
name="convrate_plus100",
features=[
Expand Down Expand Up @@ -353,7 +316,7 @@ def test_historical_features(environment, universal_data_sources, full_feature_n
location(),
feature_service,
feature_service_entity_mapping,
field_mapping_fv,
*feature_views.values(),
]
)

Expand All @@ -363,18 +326,18 @@ def test_historical_features(environment, universal_data_sources, full_feature_n
else "e_ts"
)
full_expected_df = get_expected_training_df(
customer_df,
customer_fv,
driver_df,
driver_fv,
orders_df,
order_fv,
location_df,
location_fv,
global_df,
global_fv,
field_mapping_df,
field_mapping_fv,
datasets["customer"],
feature_views["customer"],
datasets["driver"],
feature_views["driver"],
datasets["orders"],
feature_views["order"],
datasets["location"],
feature_views["location"],
datasets["global"],
feature_views["global"],
datasets["field_mapping"],
feature_views["field_mapping"],
entity_df_with_request_data,
event_timestamp,
full_feature_names,
Expand All @@ -385,77 +348,6 @@ def test_historical_features(environment, universal_data_sources, full_feature_n
columns=["origin__temperature", "destination__temperature"],
)

if entity_df_query:
job_from_sql = store.get_historical_features(
entity_df=entity_df_query,
features=[
"driver_stats:conv_rate",
"driver_stats:avg_daily_trips",
"customer_profile:current_balance",
"customer_profile:avg_passenger_count",
"customer_profile:lifetime_trip_count",
"order:order_is_success",
"global_stats:num_rides",
"global_stats:avg_ride_length",
"field_mapping:feature_name",
],
full_feature_names=full_feature_names,
)

start_time = datetime.utcnow()
actual_df_from_sql_entities = job_from_sql.to_df()
end_time = datetime.utcnow()
print(
str(f"\nTime to execute job_from_sql.to_df() = '{(end_time - start_time)}'")
)

# Not requesting the on demand transform with an entity_df query (can't add request data in them)
expected_df_query = expected_df.drop(
columns=[
response_feature_name("conv_rate_plus_100", full_feature_names),
response_feature_name("conv_rate_plus_100_rounded", full_feature_names),
response_feature_name("conv_rate_plus_val_to_add", full_feature_names),
"val_to_add",
"driver_age",
]
)
assert sorted(expected_df_query.columns) == sorted(
actual_df_from_sql_entities.columns
)

actual_df_from_sql_entities = (
actual_df_from_sql_entities[expected_df_query.columns]
.sort_values(by=[event_timestamp, "order_id", "driver_id", "customer_id"])
.drop_duplicates()
.reset_index(drop=True)
)
expected_df_query = (
expected_df_query.sort_values(
by=[event_timestamp, "order_id", "driver_id", "customer_id"]
)
.drop_duplicates()
.reset_index(drop=True)
)

assert_frame_equal(
actual_df_from_sql_entities, expected_df_query, check_dtype=False,
)

table_from_sql_entities = job_from_sql.to_arrow()
df_from_sql_entities = (
table_from_sql_entities.to_pandas()[expected_df_query.columns]
.sort_values(by=[event_timestamp, "order_id", "driver_id", "customer_id"])
.drop_duplicates()
.reset_index(drop=True)
)

for col in df_from_sql_entities.columns:
expected_df_query[col] = expected_df_query[col].astype(
df_from_sql_entities[col].dtype
)

assert_frame_equal(expected_df_query, df_from_sql_entities)

job_from_df = store.get_historical_features(
entity_df=entity_df_with_request_data,
features=[
Expand Down Expand Up @@ -590,6 +482,7 @@ def test_historical_features_with_entities_from_query(
"order:order_is_success",
"global_stats:num_rides",
"global_stats:avg_ride_length",
"field_mapping:feature_name",
],
full_feature_names=full_feature_names,
)
Expand All @@ -615,6 +508,8 @@ def test_historical_features_with_entities_from_query(
feature_views["location"],
datasets["global"],
feature_views["global"],
datasets["field_mapping"],
feature_views["field_mapping"],
datasets["entity"],
event_timestamp,
full_feature_names,
Expand Down Expand Up @@ -676,6 +571,7 @@ def test_historical_features_persisting(
"order:order_is_success",
"global_stats:num_rides",
"global_stats:avg_ride_length",
"field_mapping:feature_name",
],
full_feature_names=full_feature_names,
)
Expand All @@ -699,6 +595,8 @@ def test_historical_features_persisting(
feature_views["location"],
datasets["global"],
feature_views["global"],
datasets["field_mapping"],
feature_views["field_mapping"],
entity_df,
event_timestamp,
full_feature_names,
Expand Down
You are viewing a condensed version of this merge commit. You can view the full changes here.