-
Notifications
You must be signed in to change notification settings - Fork 1.3k
chore: Deprecate value type #2673
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
39733b2
a6a675b
10523c7
705e952
afbb5dd
c66976c
00bb4d8
bc88d5c
a4d8e44
3dfa9d6
49b01b4
ac31bb7
a2158e7
aad0805
21d7e55
dc750e8
6432029
9808789
6c92dc2
2be1e40
9b341b7
2ed0384
7c5b80d
0f129d3
18fbb0d
7a32b01
6c4e9d6
d23ee23
8eaa65c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
value_type parameter for Entity
Signed-off-by: Felix Wang <wangfelix98@gmail.com>
- Loading branch information
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -31,7 +31,7 @@ class Entity: | |
|
|
||
| Attributes: | ||
| name: The unique name of the entity. | ||
| value_type: The type of the entity, such as string or float. | ||
| value_type (deprecated): The type of the entity, such as string or float. | ||
| join_key: A property that uniquely identifies different entities within the | ||
| collection. The join_key property is typically used for joining entities | ||
| with their associated features. If not specified, defaults to the name. | ||
|
|
@@ -60,7 +60,7 @@ def __init__( | |
| self, | ||
| *args, | ||
| name: Optional[str] = None, | ||
| value_type: ValueType = ValueType.UNKNOWN, | ||
| value_type: Optional[ValueType] = None, | ||
| description: str = "", | ||
| join_key: Optional[str] = None, | ||
| tags: Optional[Dict[str, str]] = None, | ||
|
|
@@ -72,7 +72,7 @@ def __init__( | |
|
|
||
| Args: | ||
| name: The unique name of the entity. | ||
| value_type: The type of the entity, such as string or float. | ||
| value_type (deprecated): The type of the entity, such as string or float. | ||
| description: A human-readable description. | ||
| join_key (deprecated): A property that uniquely identifies different entities within the | ||
| collection. The join_key property is typically used for joining entities | ||
|
|
@@ -104,8 +104,23 @@ def __init__( | |
| if not self.name: | ||
| raise ValueError("Name needs to be specified") | ||
|
|
||
| self.value_type = value_type | ||
| if value_type: | ||
| warnings.warn( | ||
| ( | ||
| "The `value_type` parameter is being deprecated. Instead, the type of an entity " | ||
| "should be specified as a Field in the schema of a feature view. Feast 0.22 and " | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit 0.23 |
||
| "onwards will not support the `value_type` parameter. The `entities` parameter of " | ||
| "feature views should also be changed to a List[Entity] instead of a List[str]; if " | ||
| "this is not done, entity columns will be mistakenly interpreted as feature columns." | ||
| ), | ||
| DeprecationWarning, | ||
| ) | ||
| self.value_type = value_type or ValueType.UNKNOWN | ||
|
|
||
| # For now, both the `join_key` and `join_keys` attributes are set correctly, | ||
| # so both are usable. | ||
| # TODO(felixwang9817): Remove the usage of `join_key` throughout the codebase | ||
| # when the usage of `join_key` as a parameter is removed. | ||
| if join_key: | ||
| warnings.warn( | ||
| ( | ||
|
|
@@ -125,6 +140,8 @@ def __init__( | |
| self.join_key = join_keys[0] | ||
| else: | ||
| self.join_key = join_key if join_key else self.name | ||
| if not self.join_keys: | ||
| self.join_keys = [self.join_key] | ||
| self.description = description | ||
| self.tags = tags if tags is not None else {} | ||
| self.owner = owner | ||
|
|
@@ -153,6 +170,9 @@ def __eq__(self, other): | |
| def __str__(self): | ||
| return str(MessageToJson(self.to_proto())) | ||
|
|
||
| def __lt__(self, other): | ||
| return self.name < other.name | ||
|
|
||
| def is_valid(self): | ||
| """ | ||
| Validates the state of this entity locally. | ||
|
|
@@ -210,7 +230,6 @@ def to_proto(self) -> EntityProto: | |
|
|
||
| spec = EntitySpecProto( | ||
| name=self.name, | ||
| value_type=self.value_type.value, | ||
| join_key=self.join_key, | ||
| description=self.description, | ||
| tags=self.tags, | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -52,7 +52,7 @@ class FeatureView(BaseFeatureView): | |
|
|
||
| Attributes: | ||
| name: The unique name of the feature view. | ||
| entities: The list of entities with which this group of features is associated. | ||
| entities: The list of names of entities that this feature view is associated with. | ||
| ttl: The amount of time this group of features lives. A ttl of 0 indicates that | ||
| this group of features lives forever. Note that large ttl's or a ttl of 0 | ||
| can result in extremely computationally intensive queries. | ||
|
|
@@ -62,9 +62,11 @@ class FeatureView(BaseFeatureView): | |
| stream_source (optional): The stream source of data where this group of features | ||
| is stored. This is deprecated in favor of `source`. | ||
| schema: The schema of the feature view, including feature, timestamp, and entity | ||
| columns. | ||
| features: The list of features defined as part of this feature view. Each | ||
| feature should also be included in the schema. | ||
| columns. If not specified, can be inferred from the underlying data source. | ||
| entity_columns: The list of entity columns contained in the schema. If not specified, | ||
| can be inferred from the underlying data source. | ||
| features: The list of feature columns contained in the schema. If not specified, | ||
| can be inferred from the underlying data source. | ||
| online: A boolean indicating whether online retrieval is enabled for this feature | ||
| view. | ||
| description: A human-readable description. | ||
|
|
@@ -81,6 +83,7 @@ class FeatureView(BaseFeatureView): | |
| batch_source: DataSource | ||
| stream_source: Optional[DataSource] | ||
| schema: List[Field] | ||
| entity_columns: List[Field] | ||
| features: List[Field] | ||
| online: bool | ||
| description: str | ||
|
|
@@ -126,14 +129,15 @@ def __init__( | |
| owner (optional): The owner of the feature view, typically the email of the | ||
| primary maintainer. | ||
| schema (optional): The schema of the feature view, including feature, timestamp, | ||
| and entity columns. | ||
| and entity columns. If entity columns are included in the schema, a List[Entity] | ||
| must be passed to `entities` instead of a List[str]; otherwise, the entity columns | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not the worst, but seems like we should able to avoid this for users by doing some entity name lookup no? Or is the idea that entity names are no longer unique |
||
| will be mistakenly interpreted as feature columns. | ||
| source (optional): The source of data for this group of features. May be a stream source, or a batch source. | ||
| If a stream source, the source should contain a batch_source for backfills & batch materialization. | ||
|
|
||
| Raises: | ||
| ValueError: A field mapping conflicts with an Entity or a Feature. | ||
| """ | ||
|
|
||
| positional_attributes = ["name", "entities", "ttl"] | ||
|
|
||
| _name = name | ||
|
|
@@ -164,11 +168,21 @@ def __init__( | |
| raise ValueError("feature view name needs to be specified") | ||
|
|
||
| self.name = _name | ||
|
|
||
| self.entities = ( | ||
| [e.name if isinstance(e, Entity) else e for e in _entities] | ||
| if _entities | ||
| else [DUMMY_ENTITY_NAME] | ||
| ) | ||
| if _entities and isinstance(_entities[0], str): | ||
| warnings.warn( | ||
| ( | ||
| "The `entities` parameter should be a list of `Entity` objects. " | ||
| "Feast 0.22 and onwards will not support passing in a list of " | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit 0.23
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. fixed
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit 0.23 |
||
| "strings to define entities." | ||
| ), | ||
| DeprecationWarning, | ||
| ) | ||
|
|
||
| self._initialize_sources(_name, batch_source, stream_source, source) | ||
|
|
||
|
|
@@ -203,14 +217,31 @@ def __init__( | |
| _schema = [Field.from_feature(feature) for feature in features] | ||
| self.schema = _schema | ||
|
|
||
| # TODO(felixwang9817): Infer which fields in the schema are features, timestamps, | ||
| # and entities. For right now we assume that all fields are features, since the | ||
| # current `features` parameter only accepts feature columns. | ||
| _features = _schema | ||
| # If a user has added entity fields to schema, then they should also have switched | ||
| # to using a List[Entity], in which case entity and feature columns can be separated | ||
| # here. Conversely, if the user is still using a List[str], they must not have added | ||
| # added entity fields, in which case we can set the `features` attribute directly | ||
| # equal to the schema. | ||
| # TODO(felixwang9817): Use old value_type if it exists. | ||
| _features: List[Field] = [] | ||
| self.entity_columns = [] | ||
| if _entities and len(_entities) > 0 and isinstance(_entities[0], str): | ||
| _features = _schema | ||
| else: | ||
| join_keys = [] | ||
| if _entities: | ||
| for entity in _entities: | ||
| if isinstance(entity, Entity): | ||
| join_keys += entity.join_keys | ||
|
|
||
| for field in _schema: | ||
| if field.name in join_keys: | ||
| self.entity_columns.append(field) | ||
| else: | ||
| _features.append(field) | ||
|
|
||
| cols = [entity for entity in self.entities] + [ | ||
| field.name for field in _features | ||
| ] | ||
| # TODO(felixwang9817): Add more robust validation of features. | ||
| cols = [field.name for field in _schema] | ||
| for col in cols: | ||
| if ( | ||
| self.batch_source.field_mapping is not None | ||
|
|
@@ -273,14 +304,20 @@ def __hash__(self): | |
| def __copy__(self): | ||
| fv = FeatureView( | ||
| name=self.name, | ||
| entities=self.entities, | ||
| ttl=self.ttl, | ||
| source=self.batch_source, | ||
| stream_source=self.stream_source, | ||
| schema=self.schema, | ||
| tags=self.tags, | ||
| online=self.online, | ||
| ) | ||
|
|
||
| # This is deliberately set outside of the FV initialization to avoid the deprecation warning. | ||
| # TODO(felixwang9817): Move this into the FV initialization when the deprecation warning | ||
| # is removed. | ||
| fv.entities = self.entities | ||
| fv.features = copy.copy(self.features) | ||
| fv.entity_columns = copy.copy(self.entity_columns) | ||
| fv.projection = copy.copy(self.projection) | ||
| fv.entities = self.entities | ||
| return fv | ||
|
|
@@ -300,12 +337,16 @@ def __eq__(self, other): | |
| or self.online != other.online | ||
| or self.batch_source != other.batch_source | ||
| or self.stream_source != other.stream_source | ||
| or self.schema != other.schema | ||
| ): | ||
| return False | ||
|
|
||
| return True | ||
|
|
||
| @property | ||
| def join_keys(self) -> List[str]: | ||
| """Returns a list of all the join keys.""" | ||
| return [entity.name for entity in self.entity_columns] | ||
|
|
||
| def ensure_valid(self): | ||
| """ | ||
| Validates the state of this feature view locally. | ||
|
|
@@ -391,7 +432,8 @@ def to_proto(self) -> FeatureViewProto: | |
| spec = FeatureViewSpecProto( | ||
| name=self.name, | ||
| entities=self.entities, | ||
| features=[field.to_proto() for field in self.schema], | ||
| entity_columns=[field.to_proto() for field in self.entity_columns], | ||
| features=[field.to_proto() for field in self.features], | ||
| description=self.description, | ||
| tags=self.tags, | ||
| owner=self.owner, | ||
|
|
@@ -422,11 +464,7 @@ def from_proto(cls, feature_view_proto: FeatureViewProto): | |
| ) | ||
| feature_view = cls( | ||
| name=feature_view_proto.spec.name, | ||
| entities=[entity for entity in feature_view_proto.spec.entities], | ||
| schema=[ | ||
| Field.from_proto(field_proto) | ||
| for field_proto in feature_view_proto.spec.features | ||
| ], | ||
| entities=feature_view_proto.spec.entities, | ||
| description=feature_view_proto.spec.description, | ||
| tags=dict(feature_view_proto.spec.tags), | ||
| owner=feature_view_proto.spec.owner, | ||
|
|
@@ -441,6 +479,16 @@ def from_proto(cls, feature_view_proto: FeatureViewProto): | |
| if stream_source: | ||
| feature_view.stream_source = stream_source | ||
|
|
||
| # Instead of passing in a schema, we set the features and entity columns. | ||
| feature_view.features = [ | ||
| Field.from_proto(field_proto) | ||
| for field_proto in feature_view_proto.spec.features | ||
| ] | ||
| feature_view.entity_columns = [ | ||
| Field.from_proto(field_proto) | ||
| for field_proto in feature_view_proto.spec.entity_columns | ||
| ] | ||
|
|
||
| # FeatureViewProjections are not saved in the FeatureView proto. | ||
| # Create the default projection. | ||
| feature_view.projection = FeatureViewProjection.from_definition(feature_view) | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit 0.23
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
fixed