Skip to content

Commit 6fd0874

Browse files
Add support for labels on feature sets (feast-dev#707)
* Update Python SDK to support labels * Format python code * Fix equals comparison FeatureSet * Fix bug labels returns presence and initialize labels by default as empty dict * Propagate KeyErrors labels and fields and update e2e tests with labels in constructor Co-authored-by: Joost Rothweiler <=>
1 parent 1e12d3f commit 6fd0874

File tree

6 files changed

+187
-19
lines changed

6 files changed

+187
-19
lines changed

sdk/python/feast/feature.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ def to_proto(self) -> FeatureProto:
2828
return FeatureProto(
2929
name=self.name,
3030
value_type=value_type,
31+
labels=self.labels,
3132
presence=self.presence,
3233
group_presence=self.group_presence,
3334
shape=self.shape,
@@ -57,7 +58,9 @@ def from_proto(cls, feature_proto: FeatureProto):
5758
Feature object
5859
"""
5960
feature = cls(
60-
name=feature_proto.name, dtype=ValueType(feature_proto.value_type),
61+
name=feature_proto.name,
62+
dtype=ValueType(feature_proto.value_type),
63+
labels=feature_proto.labels,
6164
)
6265
feature.update_presence_constraints(feature_proto)
6366
feature.update_shape_type(feature_proto)

sdk/python/feast/feature_set.py

Lines changed: 38 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
# limitations under the License.
1414
import warnings
1515
from collections import OrderedDict
16-
from typing import Dict, List, Optional
16+
from typing import Dict, List, MutableMapping, Optional
1717

1818
import pandas as pd
1919
from google.protobuf import json_format
@@ -56,6 +56,7 @@ def __init__(
5656
entities: List[Entity] = None,
5757
source: Source = None,
5858
max_age: Optional[Duration] = None,
59+
labels: Optional[MutableMapping[str, str]] = None,
5960
):
6061
self._name = name
6162
self._project = project
@@ -68,6 +69,10 @@ def __init__(
6869
self._source = None
6970
else:
7071
self._source = source
72+
if labels is None:
73+
self._labels = OrderedDict()
74+
else:
75+
self._labels = labels
7176
self._max_age = max_age
7277
self._status = None
7378
self._created_timestamp = None
@@ -84,7 +89,8 @@ def __eq__(self, other):
8489
return False
8590

8691
if (
87-
self.name != other.name
92+
self.labels != other.labels
93+
or self.name != other.name
8894
or self.project != other.project
8995
or self.max_age != other.max_age
9096
):
@@ -217,6 +223,21 @@ def max_age(self, max_age):
217223
"""
218224
self._max_age = max_age
219225

226+
@property
227+
def labels(self):
228+
"""
229+
Returns the labels of this feature set. This is the user defined metadata
230+
defined as a dictionary.
231+
"""
232+
return self._labels
233+
234+
@labels.setter
235+
def labels(self, labels: MutableMapping[str, str]):
236+
"""
237+
Set the labels for this feature set
238+
"""
239+
self._labels = labels
240+
220241
@property
221242
def status(self):
222243
"""
@@ -245,6 +266,18 @@ def created_timestamp(self, created_timestamp):
245266
"""
246267
self._created_timestamp = created_timestamp
247268

269+
def set_label(self, key: str, value: str):
270+
"""
271+
Sets the label value for a given key
272+
"""
273+
self.labels[key] = value
274+
275+
def remove_label(self, key: str):
276+
"""
277+
Removes a label based on key
278+
"""
279+
del self.labels[key]
280+
248281
def add(self, resource):
249282
"""
250283
Adds a resource (Feature, Entity) to this Feature Set.
@@ -279,11 +312,7 @@ def drop(self, name: str):
279312
Args:
280313
name: Name of Feature or Entity to be removed
281314
"""
282-
if name not in self._fields:
283-
raise ValueError("Could not find field " + name + ", no action taken")
284-
if name in self._fields:
285-
del self._fields[name]
286-
return
315+
del self._fields[name]
287316

288317
def _add_fields(self, fields: List[Field]):
289318
"""
@@ -796,6 +825,7 @@ def from_proto(cls, feature_set_proto: FeatureSetProto):
796825
and feature_set_proto.spec.max_age.nanos == 0
797826
else feature_set_proto.spec.max_age
798827
),
828+
labels=feature_set_proto.spec.labels,
799829
source=(
800830
None
801831
if feature_set_proto.spec.source.type == 0
@@ -825,6 +855,7 @@ def to_proto(self) -> FeatureSetProto:
825855
name=self.name,
826856
project=self.project,
827857
max_age=self.max_age,
858+
labels=self.labels,
828859
source=self.source.to_proto() if self.source is not None else None,
829860
features=[
830861
field.to_proto()

sdk/python/feast/field.py

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,8 @@
1111
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
14-
from typing import Union
14+
from collections import OrderedDict
15+
from typing import MutableMapping, Optional, Union
1516

1617
from feast.core.FeatureSet_pb2 import EntitySpec, FeatureSpec
1718
from feast.value_type import ValueType
@@ -24,11 +25,20 @@ class Field:
2425
features.
2526
"""
2627

27-
def __init__(self, name: str, dtype: ValueType):
28+
def __init__(
29+
self,
30+
name: str,
31+
dtype: ValueType,
32+
labels: Optional[MutableMapping[str, str]] = None,
33+
):
2834
self._name = name
2935
if not isinstance(dtype, ValueType):
3036
raise ValueError("dtype is not a valid ValueType")
3137
self._dtype = dtype
38+
if labels is None:
39+
self._labels = OrderedDict()
40+
else:
41+
self._labels = labels
3242
self._presence = None
3343
self._group_presence = None
3444
self._shape = None
@@ -47,7 +57,11 @@ def __init__(self, name: str, dtype: ValueType):
4757
self._time_of_day_domain = None
4858

4959
def __eq__(self, other):
50-
if self.name != other.name or self.dtype != other.dtype:
60+
if (
61+
self.name != other.name
62+
or self.dtype != other.dtype
63+
or self.labels != other.labels
64+
):
5165
return False
5266
return True
5367

@@ -65,6 +79,13 @@ def dtype(self) -> ValueType:
6579
"""
6680
return self._dtype
6781

82+
@property
83+
def labels(self) -> MutableMapping[str, str]:
84+
"""
85+
Getter for labels of this field
86+
"""
87+
return self._labels
88+
6889
@property
6990
def presence(self) -> schema_pb2.FeaturePresence:
7091
"""

sdk/python/tests/test_client.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -276,6 +276,7 @@ def test_get_feature_set(self, mocked_client, mocker):
276276
spec=FeatureSetSpecProto(
277277
name="my_feature_set",
278278
max_age=Duration(seconds=3600),
279+
labels={"key1": "val1", "key2": "val2"},
279280
features=[
280281
FeatureSpecProto(
281282
name="my_feature_1",
@@ -308,6 +309,10 @@ def test_get_feature_set(self, mocked_client, mocker):
308309

309310
assert (
310311
feature_set.name == "my_feature_set"
312+
and "key1" in feature_set.labels
313+
and feature_set.labels["key1"] == "val1"
314+
and "key2" in feature_set.labels
315+
and feature_set.labels["key2"] == "val2"
311316
and feature_set.fields["my_feature_1"].name == "my_feature_1"
312317
and feature_set.fields["my_feature_1"].dtype == ValueType.FLOAT
313318
and feature_set.fields["my_entity_1"].name == "my_entity_1"

sdk/python/tests/test_feature_set.py

Lines changed: 94 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
# limitations under the License.
1414

1515
import pathlib
16+
from collections import OrderedDict
1617
from concurrent import futures
1718
from datetime import datetime
1819

@@ -62,7 +63,7 @@ def test_add_remove_features_success(self):
6263
assert len(fs.features) == 1 and fs.features[0].name == "my-feature-2"
6364

6465
def test_remove_feature_failure(self):
65-
with pytest.raises(ValueError):
66+
with pytest.raises(KeyError):
6667
fs = FeatureSet("my-feature-set")
6768
fs.drop(name="my-feature-1")
6869

@@ -287,6 +288,98 @@ def make_tfx_schema_domain_info_inline(schema):
287288
feature.int_domain.MergeFrom(domain_ref_to_int_domain[domain_ref])
288289

289290

291+
def test_feature_set_class_contains_labels():
292+
fs = FeatureSet("my-feature-set", labels={"key1": "val1", "key2": "val2"})
293+
assert "key1" in fs.labels.keys() and fs.labels["key1"] == "val1"
294+
assert "key2" in fs.labels.keys() and fs.labels["key2"] == "val2"
295+
296+
297+
def test_feature_class_contains_labels():
298+
fs = FeatureSet("my-feature-set", labels={"key1": "val1", "key2": "val2"})
299+
fs.add(
300+
Feature(
301+
name="my-feature-1",
302+
dtype=ValueType.INT64,
303+
labels={"feature_key1": "feature_val1"},
304+
)
305+
)
306+
assert "feature_key1" in fs.features[0].labels.keys()
307+
assert fs.features[0].labels["feature_key1"] == "feature_val1"
308+
309+
310+
def test_feature_set_without_labels_empty_dict():
311+
fs = FeatureSet("my-feature-set")
312+
assert fs.labels == OrderedDict()
313+
assert len(fs.labels) == 0
314+
315+
316+
def test_feature_without_labels_empty_dict():
317+
f = Feature("my feature", dtype=ValueType.INT64)
318+
assert f.labels == OrderedDict()
319+
assert len(f.labels) == 0
320+
321+
322+
def test_set_label_feature_set():
323+
fs = FeatureSet("my-feature-set")
324+
fs.set_label("k1", "v1")
325+
assert fs.labels["k1"] == "v1"
326+
327+
328+
def test_set_labels_overwrites_existing():
329+
fs = FeatureSet("my-feature-set")
330+
fs.set_label("k1", "v1")
331+
fs.set_label("k1", "v2")
332+
assert fs.labels["k1"] == "v2"
333+
334+
335+
def test_remove_labels_empty_failure():
336+
fs = FeatureSet("my-feature-set")
337+
with pytest.raises(KeyError):
338+
fs.remove_label("key1")
339+
340+
341+
def test_remove_labels_invalid_key_failure():
342+
fs = FeatureSet("my-feature-set")
343+
fs.set_label("k1", "v1")
344+
with pytest.raises(KeyError):
345+
fs.remove_label("key1")
346+
347+
348+
def test_unequal_feature_based_on_labels():
349+
f1 = Feature(name="feature-1", dtype=ValueType.INT64, labels={"k1": "v1"})
350+
f2 = Feature(name="feature-1", dtype=ValueType.INT64, labels={"k1": "v1"})
351+
assert f1 == f2
352+
f3 = Feature(name="feature-1", dtype=ValueType.INT64)
353+
assert f1 != f3
354+
f4 = Feature(name="feature-1", dtype=ValueType.INT64, labels={"k1": "notv1"})
355+
assert f1 != f4
356+
357+
358+
def test_unequal_feature_set_based_on_labels():
359+
fs1 = FeatureSet("my-feature-set")
360+
fs2 = FeatureSet("my-feature-set")
361+
assert fs1 == fs2
362+
fs1.set_label("k1", "v1")
363+
fs2.set_label("k1", "v1")
364+
assert fs1 == fs2
365+
fs2.set_label("k1", "unequal")
366+
assert not fs1 == fs2
367+
368+
369+
def test_unequal_feature_set_other_has_no_labels():
370+
fs1 = FeatureSet("my-feature-set")
371+
fs2 = FeatureSet("my-feature-set")
372+
assert fs1 == fs2
373+
fs1.set_label("k1", "v1")
374+
assert not fs1 == fs2
375+
376+
377+
def test_unequal_feature_other_has_no_labels():
378+
f1 = Feature(name="feature-1", dtype=ValueType.INT64, labels={"k1": "v1"})
379+
f2 = Feature(name="feature-1", dtype=ValueType.INT64)
380+
assert f1 != f2
381+
382+
290383
class TestFeatureSetRef:
291384
def test_from_feature_set(self):
292385
feature_set = FeatureSet("test", "test")

tests/e2e/redis/basic-ingest-redis-serving.py

Lines changed: 22 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -688,11 +688,16 @@ def get_feature_set(self, core_service_stub, name, project):
688688
@pytest.mark.run(order=51)
689689
def test_register_feature_set_with_labels(self, core_service_stub):
690690
feature_set_name = "test_feature_set_labels"
691-
feature_set_proto = FeatureSet(feature_set_name, PROJECT_NAME).to_proto()
692-
feature_set_proto.spec.labels[self.LABEL_KEY] = self.LABEL_VALUE
691+
feature_set_proto = FeatureSet(
692+
name=feature_set_name,
693+
project=PROJECT_NAME,
694+
labels={self.LABEL_KEY: self.LABEL_VALUE},
695+
).to_proto()
693696
self.apply_feature_set(core_service_stub, feature_set_proto)
694697

695-
retrieved_feature_set = self.get_feature_set(core_service_stub, feature_set_name, PROJECT_NAME)
698+
retrieved_feature_set = self.get_feature_set(
699+
core_service_stub, feature_set_name, PROJECT_NAME
700+
)
696701

697702
assert self.LABEL_KEY in retrieved_feature_set.spec.labels
698703
assert retrieved_feature_set.spec.labels[self.LABEL_KEY] == self.LABEL_VALUE
@@ -701,12 +706,22 @@ def test_register_feature_set_with_labels(self, core_service_stub):
701706
@pytest.mark.run(order=52)
702707
def test_register_feature_with_labels(self, core_service_stub):
703708
feature_set_name = "test_feature_labels"
704-
feature_set_proto = FeatureSet(feature_set_name, PROJECT_NAME, features=[Feature("rating", ValueType.INT64)]) \
705-
.to_proto()
706-
feature_set_proto.spec.features[0].labels[self.LABEL_KEY] = self.LABEL_VALUE
709+
feature_set_proto = FeatureSet(
710+
name=feature_set_name,
711+
project=PROJECT_NAME,
712+
features=[
713+
Feature(
714+
name="rating",
715+
dtype=ValueType.INT64,
716+
labels={self.LABEL_KEY: self.LABEL_VALUE},
717+
)
718+
],
719+
).to_proto()
707720
self.apply_feature_set(core_service_stub, feature_set_proto)
708721

709-
retrieved_feature_set = self.get_feature_set(core_service_stub, feature_set_name, PROJECT_NAME)
722+
retrieved_feature_set = self.get_feature_set(
723+
core_service_stub, feature_set_name, PROJECT_NAME
724+
)
710725
retrieved_feature = retrieved_feature_set.spec.features[0]
711726

712727
assert self.LABEL_KEY in retrieved_feature.labels

0 commit comments

Comments
 (0)