Skip to content

Commit 7b759e3

Browse files
committed
Add value_type parameter back to entities
Signed-off-by: Felix Wang <wangfelix98@gmail.com>
1 parent 857876b commit 7b759e3

File tree

5 files changed

+115
-3
lines changed

5 files changed

+115
-3
lines changed

sdk/python/feast/entity.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ def __init__(
5858
*,
5959
name: str,
6060
join_keys: Optional[List[str]] = None,
61+
value_type: Optional[ValueType] = None,
6162
description: str = "",
6263
tags: Optional[Dict[str, str]] = None,
6364
owner: str = "",
@@ -70,6 +71,8 @@ def __init__(
7071
join_keys (optional): A list of properties that uniquely identifies different entities
7172
within the collection. This currently only supports a list of size one, but is
7273
intended to eventually support multiple join keys.
74+
value_type (optional): The type of the entity, such as string or float. If not specified,
75+
it will be inferred from the schema of the underlying data source.
7376
description (optional): A human-readable description.
7477
tags (optional): A dictionary of key-value pairs to store arbitrary metadata.
7578
owner (optional): The owner of the entity, typically the email of the primary maintainer.
@@ -78,7 +81,7 @@ def __init__(
7881
ValueError: Parameters are specified incorrectly.
7982
"""
8083
self.name = name
81-
self.value_type = ValueType.UNKNOWN
84+
self.value_type = value_type or ValueType.UNKNOWN
8285

8386
if join_keys and len(join_keys) > 1:
8487
# TODO(felixwang9817): When multiple join keys are supported, add a `join_keys` attribute

sdk/python/feast/feature_view.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,9 @@
3535
from feast.protos.feast.core.FeatureView_pb2 import (
3636
MaterializationInterval as MaterializationIntervalProto,
3737
)
38+
from feast.types import from_value_type
3839
from feast.usage import log_exceptions
40+
from feast.value_type import ValueType
3941

4042
warnings.simplefilter("once", DeprecationWarning)
4143

@@ -115,6 +117,7 @@ def __init__(
115117
If a stream source, the source should contain a batch_source for backfills & batch materialization.
116118
schema (optional): The schema of the feature view, including feature, timestamp,
117119
and entity columns.
120+
# TODO: clarify that schema is only useful here...
118121
entities (optional): The list of entities with which this group of features is associated.
119122
ttl (optional): The amount of time this group of features lives. A ttl of 0 indicates that
120123
this group of features lives forever. Note that large ttl's or a ttl of 0
@@ -160,9 +163,29 @@ def __init__(
160163
for entity in entities:
161164
join_keys.append(entity.join_key)
162165

166+
# Ensure that entities have unique join keys.
167+
if len(set(join_keys)) < len(join_keys):
168+
raise ValueError(
169+
"A feature view should not have entities that share a join key."
170+
)
171+
163172
for field in self.schema:
164173
if field.name in join_keys:
165174
self.entity_columns.append(field)
175+
176+
# Confirm that the inferred type matches the specified entity type, if it exists.
177+
matching_entities = (
178+
[e for e in entities if e.join_key == field.name]
179+
if entities
180+
else []
181+
)
182+
assert len(matching_entities) == 1
183+
entity = matching_entities[0]
184+
if entity.value_type != ValueType.UNKNOWN:
185+
if from_value_type(entity.value_type) != field.dtype:
186+
raise ValueError(
187+
f"Entity {entity.name} has type {entity.value_type}, which does not match the inferred type {field.dtype}."
188+
)
166189
else:
167190
features.append(field)
168191

sdk/python/feast/inference.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -135,8 +135,7 @@ def update_feature_views_with_inferred_features_and_entities(
135135
if field.name not in [feature.name for feature in fv.features]:
136136
fv.features.append(field)
137137

138-
# Since the `value_type` parameter has not yet been fully deprecated for
139-
# entities, we respect the `value_type` attribute if it still exists.
138+
# Respect the `value_type` attribute of the entity, if it is specified.
140139
for entity_name in fv.entities:
141140
entity = entity_name_to_entity_map[entity_name]
142141
if (

sdk/python/tests/unit/infra/test_inference_unit_tests.py

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from feast.on_demand_feature_view import on_demand_feature_view
1616
from feast.repo_config import RepoConfig
1717
from feast.types import Float32, Float64, Int64, String, UnixTimestamp
18+
from feast.value_type import ValueType
1819
from tests.utils.data_source_test_creator import prep_file_source
1920

2021

@@ -216,6 +217,78 @@ def test_feature_view_inference_respects_basic_inference():
216217
assert len(feature_view_2.entity_columns) == 2
217218

218219

220+
def test_feature_view_inference_on_entity_value_types():
221+
"""
222+
Tests that feature view inference correctly uses the entity `value_type` attribute.
223+
"""
224+
entity1 = Entity(
225+
name="test1", join_keys=["id_join_key"], value_type=ValueType.INT64
226+
)
227+
file_source = FileSource(path="some path")
228+
feature_view_1 = FeatureView(
229+
name="test1",
230+
entities=[entity1],
231+
schema=[Field(name="int64_col", dtype=Int64)],
232+
source=file_source,
233+
)
234+
235+
assert len(feature_view_1.schema) == 1
236+
assert len(feature_view_1.features) == 1
237+
assert len(feature_view_1.entity_columns) == 0
238+
239+
update_feature_views_with_inferred_features_and_entities(
240+
[feature_view_1],
241+
[entity1],
242+
RepoConfig(
243+
provider="local", project="test", entity_key_serialization_version=2
244+
),
245+
)
246+
247+
# The schema is only used as a parameter, as is therefore not updated during inference.
248+
assert len(feature_view_1.schema) == 1
249+
250+
# Since there is already a feature specified, additional features are not inferred.
251+
assert len(feature_view_1.features) == 1
252+
253+
# The single entity column is inferred correctly and has the expected type.
254+
assert len(feature_view_1.entity_columns) == 1
255+
assert feature_view_1.entity_columns[0].dtype == Int64
256+
257+
258+
def test_conflicting_entity_value_types():
259+
"""
260+
Tests that an error is thrown when the entity value types conflict.
261+
"""
262+
entity1 = Entity(
263+
name="test1", join_keys=["id_join_key"], value_type=ValueType.INT64
264+
)
265+
file_source = FileSource(path="some path")
266+
267+
with pytest.raises(ValueError):
268+
_ = FeatureView(
269+
name="test1",
270+
entities=[entity1],
271+
schema=[
272+
Field(name="int64_col", dtype=Int64),
273+
Field(
274+
name="id_join_key", dtype=Float64
275+
), # Conflicts with the defined entity
276+
],
277+
source=file_source,
278+
)
279+
280+
# There should be no error here.
281+
_ = FeatureView(
282+
name="test1",
283+
entities=[entity1],
284+
schema=[
285+
Field(name="int64_col", dtype=Int64),
286+
Field(name="id_join_key", dtype=Int64), # Conflicts with the defined entity
287+
],
288+
source=file_source,
289+
)
290+
291+
219292
def test_feature_view_inference_on_entity_columns(simple_dataset_1):
220293
"""
221294
Tests that feature view inference correctly infers entity columns.

sdk/python/tests/unit/test_feature_views.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,20 @@
1515
from feast.types import Float32
1616

1717

18+
def test_create_feature_view_with_conflicting_entities():
19+
user1 = Entity(name="user1", join_keys=["user_id"])
20+
user2 = Entity(name="user2", join_keys=["user_id"])
21+
batch_source = FileSource(path="some path")
22+
23+
with pytest.raises(ValueError):
24+
_ = FeatureView(
25+
name="test",
26+
entities=[user1, user2],
27+
ttl=timedelta(days=30),
28+
source=batch_source,
29+
)
30+
31+
1832
def test_create_batch_feature_view():
1933
batch_source = FileSource(path="some path")
2034
BatchFeatureView(

0 commit comments

Comments
 (0)