Skip to content

Commit b943382

Browse files
chore: Fix ODFV decorator (#2527)
* Enforce kwargs for ODFV decorator and switch from `features` to `schema` Signed-off-by: Felix Wang <wangfelix98@gmail.com> * Fix ODFV decorators in docs Signed-off-by: Felix Wang <wangfelix98@gmail.com> * Fix ODFV decorators in Java Signed-off-by: Felix Wang <wangfelix98@gmail.com> * Fix ODFV references in ui Signed-off-by: Felix Wang <wangfelix98@gmail.com> * Fix ODFV decorator in integration tests Signed-off-by: Felix Wang <wangfelix98@gmail.com>
1 parent 27e7ae8 commit b943382

File tree

10 files changed

+204
-95
lines changed

10 files changed

+204
-95
lines changed

docs/getting-started/concepts/feature-view.md

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,8 @@ Feature names must be unique within a [feature view](feature-view.md#feature-vie
138138
On demand feature views allows users to use existing features and request time data (features only available at request time) to transform and create new features. Users define python transformation logic which is executed in both historical retrieval and online retrieval paths:
139139

140140
```python
141+
from feast import Field, Float64, RequestSource
142+
141143
# Define a request data source which encodes features / information only
142144
# available at request time (e.g. part of the user initiated HTTP request)
143145
input_request = RequestSource(
@@ -150,13 +152,13 @@ input_request = RequestSource(
150152

151153
# Use the input data and feature view features to create new features
152154
@on_demand_feature_view(
153-
inputs={
155+
sources={
154156
'driver_hourly_stats': driver_hourly_stats_view,
155157
'vals_to_add': input_request
156158
},
157-
features=[
158-
Feature(name='conv_rate_plus_val1', dtype=ValueType.DOUBLE),
159-
Feature(name='conv_rate_plus_val2', dtype=ValueType.DOUBLE)
159+
schema=[
160+
Field(name='conv_rate_plus_val1', dtype=Float64),
161+
Field(name='conv_rate_plus_val2', dtype=Float64)
160162
]
161163
)
162164
def transformed_conv_rate(features_df: pd.DataFrame) -> pd.DataFrame:

docs/reference/alpha-on-demand-feature-view.md

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@ See [https://github.com/feast-dev/on-demand-feature-views-demo](https://github.c
2828
We register `RequestDataSource` inputs and the transform in `on_demand_feature_view`:
2929

3030
```python
31+
from feast import Field, Float64, RequestSource
32+
3133
# Define a request data source which encodes features / information only
3234
# available at request time (e.g. part of the user initiated HTTP request)
3335
input_request = RequestDataSource(
@@ -40,13 +42,13 @@ input_request = RequestDataSource(
4042

4143
# Use the input data and feature view features to create new features
4244
@on_demand_feature_view(
43-
inputs={
45+
sources={
4446
'driver_hourly_stats': driver_hourly_stats_view,
4547
'vals_to_add': input_request
4648
},
47-
features=[
48-
Feature(name='conv_rate_plus_val1', dtype=ValueType.DOUBLE),
49-
Feature(name='conv_rate_plus_val2', dtype=ValueType.DOUBLE)
49+
schema=[
50+
Field(name='conv_rate_plus_val1', dtype=Float64),
51+
Field(name='conv_rate_plus_val2', dtype=Float64)
5052
]
5153
)
5254
def transformed_conv_rate(features_df: pd.DataFrame) -> pd.DataFrame:

docs/tutorials/validating-historical-features.md

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ pyarrow.parquet.write_table(entities_2019_table, "entities.parquet")
107107
import pyarrow.parquet
108108
import pandas as pd
109109

110-
from feast import Feature, FeatureView, Entity, FeatureStore, Field, Float64, Int64
110+
from feast import FeatureView, Entity, FeatureStore, Field, Float64, Int64
111111
from feast.value_type import ValueType
112112
from feast.data_format import ParquetFormat
113113
from feast.on_demand_feature_view import on_demand_feature_view
@@ -153,11 +153,11 @@ trips_stats_fv = FeatureView(
153153

154154
```python
155155
@on_demand_feature_view(
156-
features=[
157-
Feature("avg_fare", ValueType.DOUBLE),
158-
Feature("avg_speed", ValueType.DOUBLE),
159-
Feature("avg_trip_seconds", ValueType.DOUBLE),
160-
Feature("earned_per_hour", ValueType.DOUBLE),
156+
schema=[
157+
Field("avg_fare", Float64),
158+
Field("avg_speed", Float64),
159+
Field("avg_trip_seconds", Float64),
160+
Field("earned_per_hour", Float64),
161161
],
162162
inputs={
163163
"stats": trips_stats_fv

examples/java-demo/feature_repo/driver_repo.py

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
11
import pandas as pd
2-
from feast import Entity, Feature, FeatureView, FileSource, ValueType
32
from feast.data_source import RequestSource
3+
from feast.field import Field
44
from feast.on_demand_feature_view import on_demand_feature_view
55
from feast.request_feature_view import RequestFeatureView
6+
from feast.types import Float32, Float64, Int64, String
67
from google.protobuf.duration_pb2 import Duration
78

9+
from feast import Entity, Feature, FeatureView, FileSource, ValueType
10+
811
driver_hourly_stats = FileSource(
912
path="data/driver_stats_with_string.parquet",
1013
timestamp_field="event_timestamp",
@@ -15,11 +18,11 @@
1518
name="driver_hourly_stats",
1619
entities=["driver_id"],
1720
ttl=Duration(seconds=86400000),
18-
features=[
19-
Feature(name="conv_rate", dtype=ValueType.FLOAT),
20-
Feature(name="acc_rate", dtype=ValueType.FLOAT),
21-
Feature(name="avg_daily_trips", dtype=ValueType.INT64),
22-
Feature(name="string_feature", dtype=ValueType.STRING),
21+
schema=[
22+
Field(name="conv_rate", dtype=Float32),
23+
Field(name="acc_rate", dtype=Float32),
24+
Field(name="avg_daily_trips", dtype=Int64),
25+
Field(name="string_feature", dtype=String),
2326
],
2427
online=True,
2528
batch_source=driver_hourly_stats,
@@ -40,9 +43,9 @@
4043
"driver_hourly_stats": driver_hourly_stats_view,
4144
"vals_to_add": input_request,
4245
},
43-
features=[
44-
Feature(name="conv_rate_plus_val1", dtype=ValueType.DOUBLE),
45-
Feature(name="conv_rate_plus_val2", dtype=ValueType.DOUBLE),
46+
schema=[
47+
Field(name="conv_rate_plus_val1", dtype=Float64),
48+
Field(name="conv_rate_plus_val2", dtype=Float64),
4649
],
4750
)
4851
def transformed_conv_rate(inputs: pd.DataFrame) -> pd.DataFrame:
Lines changed: 26 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,15 @@
11
import pandas as pd
2-
3-
from google.protobuf.duration_pb2 import Duration
4-
5-
from feast.value_type import ValueType
6-
from feast.feature import Feature
7-
from feast.feature_view import FeatureView
2+
from feast.data_source import RequestSource
83
from feast.entity import Entity
94
from feast.feature_service import FeatureService
10-
from feast.data_source import RequestSource
5+
from feast.feature_view import FeatureView
6+
from feast.field import Field
117
from feast.on_demand_feature_view import on_demand_feature_view
12-
from feast import FileSource
8+
from feast.types import Float32, Float64, Int64
9+
from feast.value_type import ValueType
10+
from google.protobuf.duration_pb2 import Duration
1311

12+
from feast import FileSource
1413

1514
file_path = "driver_stats.parquet"
1615
driver_hourly_stats = FileSource(
@@ -30,10 +29,10 @@
3029
name="driver_hourly_stats",
3130
entities=["driver_id"],
3231
ttl=Duration(seconds=86400 * 7),
33-
features=[
34-
Feature(name="conv_rate", dtype=ValueType.DOUBLE),
35-
Feature(name="acc_rate", dtype=ValueType.FLOAT),
36-
Feature(name="avg_daily_trips", dtype=ValueType.INT64),
32+
schema=[
33+
Field(name="conv_rate", dtype=Float64),
34+
Field(name="acc_rate", dtype=Float32),
35+
Field(name="avg_daily_trips", dtype=Int64),
3736
],
3837
online=True,
3938
batch_source=driver_hourly_stats,
@@ -43,56 +42,45 @@
4342

4443
input_request = RequestSource(
4544
name="vals_to_add",
46-
schema={
47-
"val_to_add": ValueType.INT64,
48-
"val_to_add_2": ValueType.INT64
49-
}
45+
schema={"val_to_add": ValueType.INT64, "val_to_add_2": ValueType.INT64},
5046
)
5147

5248

5349
@on_demand_feature_view(
54-
sources={
55-
'driver_hourly_stats': driver_hourly_stats_view,
56-
'vals_to_add': input_request
57-
},
58-
features=[
59-
Feature(name='conv_rate_plus_val1', dtype=ValueType.DOUBLE),
60-
Feature(name='conv_rate_plus_val2', dtype=ValueType.DOUBLE)
61-
]
50+
sources={
51+
"driver_hourly_stats": driver_hourly_stats_view,
52+
"vals_to_add": input_request,
53+
},
54+
schema=[
55+
Field(name="conv_rate_plus_val1", dtype=Float64),
56+
Field(name="conv_rate_plus_val2", dtype=Float64),
57+
],
6258
)
6359
def transformed_conv_rate(features_df: pd.DataFrame) -> pd.DataFrame:
6460
df = pd.DataFrame()
65-
df['conv_rate_plus_val1'] = (features_df['conv_rate'] + features_df['val_to_add'])
66-
df['conv_rate_plus_val2'] = (features_df['conv_rate'] + features_df['val_to_add_2'])
61+
df["conv_rate_plus_val1"] = features_df["conv_rate"] + features_df["val_to_add"]
62+
df["conv_rate_plus_val2"] = features_df["conv_rate"] + features_df["val_to_add_2"]
6763
return df
6864

6965

7066
generated_data_source = FileSource(
71-
path="benchmark_data.parquet",
72-
timestamp_field="event_timestamp",
67+
path="benchmark_data.parquet", timestamp_field="event_timestamp",
7368
)
7469

75-
entity = Entity(
76-
name="entity",
77-
value_type=ValueType.STRING,
78-
)
70+
entity = Entity(name="entity", value_type=ValueType.STRING,)
7971

8072
benchmark_feature_views = [
8173
FeatureView(
8274
name=f"feature_view_{i}",
8375
entities=["entity"],
8476
ttl=Duration(seconds=86400),
85-
features=[
86-
Feature(name=f"feature_{10 * i + j}", dtype=ValueType.INT64)
87-
for j in range(10)
88-
],
77+
schema=[Field(name=f"feature_{10 * i + j}", dtype=Int64) for j in range(10)],
8978
online=True,
9079
batch_source=generated_data_source,
9180
)
9281
for i in range(25)
9382
]
9483

9584
benchmark_feature_service = FeatureService(
96-
name=f"benchmark_feature_service",
97-
features=benchmark_feature_views,
85+
name=f"benchmark_feature_service", features=benchmark_feature_views,
9886
)

java/serving/src/test/resources/docker-compose/feast10/materialize.py

Lines changed: 28 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,17 @@
1-
import pandas as pd
2-
import numpy as np
3-
41
from datetime import datetime, timedelta
5-
from feast import FeatureStore
62

7-
from definitions import driver_hourly_stats_view, driver, entity,\
8-
benchmark_feature_service, benchmark_feature_views, transformed_conv_rate
3+
import numpy as np
4+
import pandas as pd
5+
from definitions import (
6+
benchmark_feature_service,
7+
benchmark_feature_views,
8+
driver,
9+
driver_hourly_stats_view,
10+
entity,
11+
transformed_conv_rate,
12+
)
913

14+
from feast import FeatureStore
1015

1116
print("Running materialize.py")
1217

@@ -21,7 +26,9 @@
2126
df["avg_daily_trips"] = np.arange(0, 1000, 100)
2227

2328
# some of rows are beyond 7 days to test OUTSIDE_MAX_AGE status
24-
df["event_timestamp"] = start + pd.Series(np.arange(0, 10)).map(lambda days: timedelta(days=days))
29+
df["event_timestamp"] = start + pd.Series(np.arange(0, 10)).map(
30+
lambda days: timedelta(days=days)
31+
)
2532

2633
# Store data in parquet files. Parquet is convenient for local development mode. For
2734
# production, you can use your favorite DWH, such as BigQuery. See Feast documentation
@@ -41,21 +48,27 @@ def generate_data(num_rows: int, num_features: int, destination: str) -> pd.Data
4148
for column in features:
4249
df[column] = np.random.randint(1, num_rows, num_rows)
4350

44-
df["entity"] = "key-" + \
45-
pd.Series(np.arange(1, num_rows + 1)).astype(pd.StringDtype())
51+
df["entity"] = "key-" + pd.Series(np.arange(1, num_rows + 1)).astype(
52+
pd.StringDtype()
53+
)
4654

4755
df.to_parquet(destination)
4856

4957

50-
generate_data(10**3, 250, "benchmark_data.parquet")
58+
generate_data(10 ** 3, 250, "benchmark_data.parquet")
5159

5260

5361
fs = FeatureStore(".")
54-
fs.apply([driver_hourly_stats_view,
55-
transformed_conv_rate,
56-
driver,
57-
entity, benchmark_feature_service,
58-
*benchmark_feature_views])
62+
fs.apply(
63+
[
64+
driver_hourly_stats_view,
65+
transformed_conv_rate,
66+
driver,
67+
entity,
68+
benchmark_feature_service,
69+
*benchmark_feature_views,
70+
]
71+
)
5972

6073
now = datetime.now()
6174
fs.materialize(start, now)

0 commit comments

Comments
 (0)