Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
39733b2
Switch `entities` from List[str] to List[Entity]
felixwang9817 Apr 25, 2022
a6a675b
Remove `value_type` from SDK
felixwang9817 Apr 26, 2022
10523c7
Remove `value_type` from tests
felixwang9817 Apr 26, 2022
705e952
Deprecate `value_type` parameter for Entity
felixwang9817 May 2, 2022
afbb5dd
Add fields for entities to avoid type inference after removing `value…
felixwang9817 Apr 26, 2022
c66976c
Fix Go
felixwang9817 May 2, 2022
00bb4d8
Fix type inference
felixwang9817 May 3, 2022
bc88d5c
Another fix
felixwang9817 May 3, 2022
a4d8e44
Another fix
felixwang9817 May 3, 2022
3dfa9d6
Rename Entities to EntityNames in go
felixwang9817 May 3, 2022
49b01b4
Rename lookup
felixwang9817 May 3, 2022
ac31bb7
Rename Feature to Field
felixwang9817 May 3, 2022
a2158e7
Clean up inference
felixwang9817 May 3, 2022
aad0805
Refactor
felixwang9817 May 3, 2022
21d7e55
Use old `value_type` attribute if it still exists
felixwang9817 May 3, 2022
dc750e8
Refactor
felixwang9817 May 3, 2022
6432029
Add TODO
felixwang9817 May 4, 2022
9808789
Another fix
felixwang9817 May 11, 2022
6c92dc2
Fix test
felixwang9817 May 11, 2022
2be1e40
Add pytest.ini file to suppress pytest warnings about markers
felixwang9817 May 11, 2022
9b341b7
Fix type test
felixwang9817 May 11, 2022
2ed0384
Fix type test
felixwang9817 May 13, 2022
7c5b80d
Modify entity and feature inference to occur separately and add tests
felixwang9817 May 13, 2022
0f129d3
Lint
felixwang9817 May 13, 2022
18fbb0d
Refactor inference to pass lint
felixwang9817 May 13, 2022
7a32b01
Fix Java
felixwang9817 May 13, 2022
6c4e9d6
Another fix
felixwang9817 May 13, 2022
d23ee23
Fix ODFV repo
felixwang9817 May 17, 2022
8eaa65c
Switch deprecation version from 0.22 to 0.23
felixwang9817 May 18, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Deprecate value_type parameter for Entity
Signed-off-by: Felix Wang <wangfelix98@gmail.com>
  • Loading branch information
felixwang9817 committed May 17, 2022
commit 705e9528fd161ca522876b0cdad536ef0568c36b
10 changes: 6 additions & 4 deletions protos/feast/core/FeatureView.proto
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ message FeatureView {
FeatureViewMeta meta = 2;
}

// Next available id: 12
// Next available id: 13
// TODO(adchia): refactor common fields from this and ODFV into separate metadata proto
message FeatureViewSpec {
// Name of the feature view. Must be unique. Not updated.
Expand All @@ -44,13 +44,15 @@ message FeatureViewSpec {
// Name of Feast project that this feature view belongs to.
string project = 2;

// List names of entities to associate with the Features defined in this
// Feature View. Not updatable.
// List of names of entities associated with this feature view.
repeated string entities = 3;

// List of specifications for each field defined as part of this feature view.
// List of specifications for each feature defined as part of this feature view.
repeated FeatureSpecV2 features = 4;

// List of specifications for each entity defined as part of this feature view.
repeated FeatureSpecV2 entity_columns = 12;

// Description of the feature view.
string description = 10;

Expand Down
29 changes: 24 additions & 5 deletions sdk/python/feast/entity.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ class Entity:

Attributes:
name: The unique name of the entity.
value_type: The type of the entity, such as string or float.
value_type (deprecated): The type of the entity, such as string or float.
join_key: A property that uniquely identifies different entities within the
collection. The join_key property is typically used for joining entities
with their associated features. If not specified, defaults to the name.
Expand Down Expand Up @@ -60,7 +60,7 @@ def __init__(
self,
*args,
name: Optional[str] = None,
value_type: ValueType = ValueType.UNKNOWN,
value_type: Optional[ValueType] = None,
description: str = "",
join_key: Optional[str] = None,
tags: Optional[Dict[str, str]] = None,
Expand All @@ -72,7 +72,7 @@ def __init__(

Args:
name: The unique name of the entity.
value_type: The type of the entity, such as string or float.
value_type (deprecated): The type of the entity, such as string or float.
description: A human-readable description.
join_key (deprecated): A property that uniquely identifies different entities within the
collection. The join_key property is typically used for joining entities
Expand Down Expand Up @@ -104,8 +104,23 @@ def __init__(
if not self.name:
raise ValueError("Name needs to be specified")

self.value_type = value_type
if value_type:
warnings.warn(
(
"The `value_type` parameter is being deprecated. Instead, the type of an entity "
"should be specified as a Field in the schema of a feature view. Feast 0.22 and "
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit 0.23

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fixed

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit 0.23

"onwards will not support the `value_type` parameter. The `entities` parameter of "
"feature views should also be changed to a List[Entity] instead of a List[str]; if "
"this is not done, entity columns will be mistakenly interpreted as feature columns."
),
DeprecationWarning,
)
self.value_type = value_type or ValueType.UNKNOWN

# For now, both the `join_key` and `join_keys` attributes are set correctly,
# so both are usable.
# TODO(felixwang9817): Remove the usage of `join_key` throughout the codebase
# when the usage of `join_key` as a parameter is removed.
if join_key:
warnings.warn(
(
Expand All @@ -125,6 +140,8 @@ def __init__(
self.join_key = join_keys[0]
else:
self.join_key = join_key if join_key else self.name
if not self.join_keys:
self.join_keys = [self.join_key]
self.description = description
self.tags = tags if tags is not None else {}
self.owner = owner
Expand Down Expand Up @@ -153,6 +170,9 @@ def __eq__(self, other):
def __str__(self):
return str(MessageToJson(self.to_proto()))

def __lt__(self, other):
return self.name < other.name

def is_valid(self):
"""
Validates the state of this entity locally.
Expand Down Expand Up @@ -210,7 +230,6 @@ def to_proto(self) -> EntityProto:

spec = EntitySpecProto(
name=self.name,
value_type=self.value_type.value,
join_key=self.join_key,
description=self.description,
tags=self.tags,
Expand Down
12 changes: 8 additions & 4 deletions sdk/python/feast/feature_logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
from feast.protos.feast.core.FeatureService_pb2 import (
LoggingConfig as LoggingConfigProto,
)
from feast.types import from_value_type

if TYPE_CHECKING:
from feast import FeatureService
Expand Down Expand Up @@ -86,9 +85,14 @@ def get_schema(self, registry: "Registry") -> pa.Schema:
join_key = projection.join_key_map.get(
entity.join_key, entity.join_key
)
fields[join_key] = FEAST_TYPE_TO_ARROW_TYPE[
from_value_type(entity.value_type)
]
entity_columns = list(
filter(
lambda x: x.name == join_key, feature_view.entity_columns
)
)
assert len(entity_columns) == 1
entity_column = entity_columns[0]
fields[join_key] = FEAST_TYPE_TO_ARROW_TYPE[entity_column.dtype]

for feature in projection.features:
fields[
Expand Down
20 changes: 8 additions & 12 deletions sdk/python/feast/feature_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,7 @@
)
from feast.inference import (
update_data_sources_with_inferred_event_timestamp_col,
update_entities_with_inferred_types_from_feature_views,
update_feature_views_with_inferred_features,
update_feature_views_with_inferred_features_and_entities,
)
from feast.infra.infra_object import Infra
from feast.infra.provider import Provider, RetrievalJob, get_provider
Expand Down Expand Up @@ -480,10 +479,6 @@ def _make_inferences(
feature_services_to_update: List[FeatureService],
):
"""Makes inferences for entities, feature views, odfvs, and feature services."""
update_entities_with_inferred_types_from_feature_views(
entities_to_update, views_to_update, self.config
)

update_data_sources_with_inferred_event_timestamp_col(
data_sources_to_update, self.config
)
Expand All @@ -494,7 +489,7 @@ def _make_inferences(

# New feature views may reference previously applied entities.
entities = self._list_entities()
update_feature_views_with_inferred_features(
update_feature_views_with_inferred_features_and_entities(
views_to_update, entities + entities_to_update, self.config
)

Expand Down Expand Up @@ -695,6 +690,9 @@ def apply(

data_sources_to_update = list(data_sources_set_to_update)

# Handle all entityless feature views by using DUMMY_ENTITY as a placeholder entity.
entities_to_update.append(DUMMY_ENTITY)

# Validate all feature views and make inferences.
self._validate_all_feature_views(
views_to_update, odfvs_to_update, request_views_to_update
Expand All @@ -707,9 +705,6 @@ def apply(
services_to_update,
)

# Handle all entityless feature views by using DUMMY_ENTITY as a placeholder entity.
entities_to_update.append(DUMMY_ENTITY)

# Add all objects to the registry and update the provider's infrastructure.
for ds in data_sources_to_update:
self._registry.apply_data_source(ds, project=self.project, commit=False)
Expand Down Expand Up @@ -1567,7 +1562,6 @@ def _get_entity_maps(
entity_type_map: Dict[str, ValueType] = {}
for entity in entities:
entity_name_to_join_key_map[entity.name] = entity.join_key
entity_type_map[entity.name] = entity.value_type
for feature_view in feature_views:
for entity_name in feature_view.entities:
entity = self._registry.get_entity(
Expand All @@ -1582,7 +1576,9 @@ def _get_entity_maps(
entity.join_key, entity.join_key
)
entity_name_to_join_key_map[entity_name] = join_key
entity_type_map[join_key] = entity.value_type
for entity_field in feature_view.entity_columns:
entity_type_map[entity_field.name] = entity_field.dtype.to_value_type()

return (
entity_name_to_join_key_map,
entity_type_map,
Expand Down
90 changes: 69 additions & 21 deletions sdk/python/feast/feature_view.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ class FeatureView(BaseFeatureView):

Attributes:
name: The unique name of the feature view.
entities: The list of entities with which this group of features is associated.
entities: The list of names of entities that this feature view is associated with.
ttl: The amount of time this group of features lives. A ttl of 0 indicates that
this group of features lives forever. Note that large ttl's or a ttl of 0
can result in extremely computationally intensive queries.
Expand All @@ -62,9 +62,11 @@ class FeatureView(BaseFeatureView):
stream_source (optional): The stream source of data where this group of features
is stored. This is deprecated in favor of `source`.
schema: The schema of the feature view, including feature, timestamp, and entity
columns.
features: The list of features defined as part of this feature view. Each
feature should also be included in the schema.
columns. If not specified, can be inferred from the underlying data source.
entity_columns: The list of entity columns contained in the schema. If not specified,
can be inferred from the underlying data source.
features: The list of feature columns contained in the schema. If not specified,
can be inferred from the underlying data source.
online: A boolean indicating whether online retrieval is enabled for this feature
view.
description: A human-readable description.
Expand All @@ -81,6 +83,7 @@ class FeatureView(BaseFeatureView):
batch_source: DataSource
stream_source: Optional[DataSource]
schema: List[Field]
entity_columns: List[Field]
features: List[Field]
online: bool
description: str
Expand Down Expand Up @@ -126,14 +129,15 @@ def __init__(
owner (optional): The owner of the feature view, typically the email of the
primary maintainer.
schema (optional): The schema of the feature view, including feature, timestamp,
and entity columns.
and entity columns. If entity columns are included in the schema, a List[Entity]
must be passed to `entities` instead of a List[str]; otherwise, the entity columns
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not the worst, but seems like we should able to avoid this for users by doing some entity name lookup no? Or is the idea that entity names are no longer unique

will be mistakenly interpreted as feature columns.
source (optional): The source of data for this group of features. May be a stream source, or a batch source.
If a stream source, the source should contain a batch_source for backfills & batch materialization.

Raises:
ValueError: A field mapping conflicts with an Entity or a Feature.
"""

positional_attributes = ["name", "entities", "ttl"]

_name = name
Expand Down Expand Up @@ -164,11 +168,21 @@ def __init__(
raise ValueError("feature view name needs to be specified")

self.name = _name

self.entities = (
[e.name if isinstance(e, Entity) else e for e in _entities]
if _entities
else [DUMMY_ENTITY_NAME]
)
if _entities and isinstance(_entities[0], str):
warnings.warn(
(
"The `entities` parameter should be a list of `Entity` objects. "
"Feast 0.22 and onwards will not support passing in a list of "
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit 0.23

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fixed

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit 0.23

"strings to define entities."
),
DeprecationWarning,
)

self._initialize_sources(_name, batch_source, stream_source, source)

Expand Down Expand Up @@ -203,14 +217,31 @@ def __init__(
_schema = [Field.from_feature(feature) for feature in features]
self.schema = _schema

# TODO(felixwang9817): Infer which fields in the schema are features, timestamps,
# and entities. For right now we assume that all fields are features, since the
# current `features` parameter only accepts feature columns.
_features = _schema
# If a user has added entity fields to schema, then they should also have switched
# to using a List[Entity], in which case entity and feature columns can be separated
# here. Conversely, if the user is still using a List[str], they must not have added
# added entity fields, in which case we can set the `features` attribute directly
# equal to the schema.
# TODO(felixwang9817): Use old value_type if it exists.
_features: List[Field] = []
self.entity_columns = []
if _entities and len(_entities) > 0 and isinstance(_entities[0], str):
_features = _schema
else:
join_keys = []
if _entities:
for entity in _entities:
if isinstance(entity, Entity):
join_keys += entity.join_keys

for field in _schema:
if field.name in join_keys:
self.entity_columns.append(field)
else:
_features.append(field)

cols = [entity for entity in self.entities] + [
field.name for field in _features
]
# TODO(felixwang9817): Add more robust validation of features.
cols = [field.name for field in _schema]
for col in cols:
if (
self.batch_source.field_mapping is not None
Expand Down Expand Up @@ -273,14 +304,20 @@ def __hash__(self):
def __copy__(self):
fv = FeatureView(
name=self.name,
entities=self.entities,
ttl=self.ttl,
source=self.batch_source,
stream_source=self.stream_source,
schema=self.schema,
tags=self.tags,
online=self.online,
)

# This is deliberately set outside of the FV initialization to avoid the deprecation warning.
# TODO(felixwang9817): Move this into the FV initialization when the deprecation warning
# is removed.
fv.entities = self.entities
fv.features = copy.copy(self.features)
fv.entity_columns = copy.copy(self.entity_columns)
fv.projection = copy.copy(self.projection)
fv.entities = self.entities
return fv
Expand All @@ -300,12 +337,16 @@ def __eq__(self, other):
or self.online != other.online
or self.batch_source != other.batch_source
or self.stream_source != other.stream_source
or self.schema != other.schema
):
return False

return True

@property
def join_keys(self) -> List[str]:
"""Returns a list of all the join keys."""
return [entity.name for entity in self.entity_columns]

def ensure_valid(self):
"""
Validates the state of this feature view locally.
Expand Down Expand Up @@ -391,7 +432,8 @@ def to_proto(self) -> FeatureViewProto:
spec = FeatureViewSpecProto(
name=self.name,
entities=self.entities,
features=[field.to_proto() for field in self.schema],
entity_columns=[field.to_proto() for field in self.entity_columns],
features=[field.to_proto() for field in self.features],
description=self.description,
tags=self.tags,
owner=self.owner,
Expand Down Expand Up @@ -422,11 +464,7 @@ def from_proto(cls, feature_view_proto: FeatureViewProto):
)
feature_view = cls(
name=feature_view_proto.spec.name,
entities=[entity for entity in feature_view_proto.spec.entities],
schema=[
Field.from_proto(field_proto)
for field_proto in feature_view_proto.spec.features
],
entities=feature_view_proto.spec.entities,
description=feature_view_proto.spec.description,
tags=dict(feature_view_proto.spec.tags),
owner=feature_view_proto.spec.owner,
Expand All @@ -441,6 +479,16 @@ def from_proto(cls, feature_view_proto: FeatureViewProto):
if stream_source:
feature_view.stream_source = stream_source

# Instead of passing in a schema, we set the features and entity columns.
feature_view.features = [
Field.from_proto(field_proto)
for field_proto in feature_view_proto.spec.features
]
feature_view.entity_columns = [
Field.from_proto(field_proto)
for field_proto in feature_view_proto.spec.entity_columns
]

# FeatureViewProjections are not saved in the FeatureView proto.
# Create the default projection.
feature_view.projection = FeatureViewProjection.from_definition(feature_view)
Expand Down
Loading