Skip to content

Commit 7d177b6

Browse files
authored
Handle case where_LIST type is empty (feast-dev#1703)
* Add test for empty `list` Signed-off-by: Judah Rand <17158624+judahrand@users.noreply.github.com> * Refactor how types are tested Signed-off-by: Judah Rand <17158624+judahrand@users.noreply.github.com> * Test the type of all elements in list features Signed-off-by: Judah Rand <17158624+judahrand@users.noreply.github.com> * Fix issue with empty list features Signed-off-by: Judah Rand <17158624+judahrand@users.noreply.github.com> * Remove `python_type_to_feast_value_type` from use in tests Signed-off-by: Judah Rand <17158624+judahrand@users.noreply.github.com> * Infer type from all data Signed-off-by: Judah Rand <17158624+judahrand@users.noreply.github.com> * Add one non-empty element to empty list test datasets Signed-off-by: Judah Rand <17158624+judahrand@users.noreply.github.com> * Accept empty lists in tests Signed-off-by: Judah Rand <17158624+judahrand@users.noreply.github.com> * Use `ValueType.UNKNOWN` instead of `None` Signed-off-by: Judah Rand <17158624+judahrand@users.noreply.github.com> * Handle mix of `null` and `non-null` values better Signed-off-by: Judah Rand <17158624+judahrand@users.noreply.github.com> * Handle entity row type inference when Protobuf values used Signed-off-by: Judah Rand <17158624+judahrand@users.noreply.github.com> * Fix typo Signed-off-by: Judah Rand <17158624+judahrand@users.noreply.github.com> * Make test config generate more clear Signed-off-by: Judah Rand <17158624+judahrand@users.noreply.github.com> * Add TODO Signed-off-by: Judah Rand <17158624+judahrand@users.noreply.github.com> * Be more strict about online entity type consistency Signed-off-by: Judah Rand <17158624+judahrand@users.noreply.github.com> * Rename variable to be more precise Signed-off-by: Judah Rand <17158624+judahrand@users.noreply.github.com> * Add `TODO: Add test where all lists are empty` Signed-off-by: Judah Rand <17158624+judahrand@users.noreply.github.com>
1 parent 21f1ef7 commit 7d177b6

File tree

5 files changed

+186
-91
lines changed

5 files changed

+186
-91
lines changed

sdk/python/feast/feature.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,8 @@ def __init__(
3939
self._name = name
4040
if not isinstance(dtype, ValueType):
4141
raise ValueError("dtype is not a valid ValueType")
42+
if dtype is ValueType.UNKNOWN:
43+
raise ValueError(f"dtype cannot be {dtype}")
4244
self._dtype = dtype
4345
if labels is None:
4446
self._labels = dict()

sdk/python/feast/online_response.py

Lines changed: 39 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,8 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
from typing import Any, Dict, List, cast
15+
from collections import defaultdict
16+
from typing import Any, Dict, List, Optional, cast
1617

1718
import pandas as pd
1819

@@ -23,10 +24,12 @@
2324
)
2425
from feast.protos.feast.types.Value_pb2 import Value as Value
2526
from feast.type_map import (
27+
_proto_str_to_value_type,
2628
_python_value_to_proto_value,
2729
feast_value_type_to_python_type,
28-
python_type_to_feast_value_type,
30+
python_values_to_feast_value_type,
2931
)
32+
from feast.value_type import ValueType
3033

3134

3235
class OnlineResponse:
@@ -93,26 +96,47 @@ def _infer_online_entity_rows(
9396

9497
entity_rows_dicts = cast(List[Dict[str, Any]], entity_rows)
9598
entity_row_list = []
96-
entity_type_map = dict()
99+
entity_type_map: Dict[str, Optional[ValueType]] = dict()
100+
entity_python_values_map = defaultdict(list)
101+
102+
# Flatten keys-value dicts into lists for type inference
103+
for entity in entity_rows_dicts:
104+
for key, value in entity.items():
105+
if isinstance(value, Value):
106+
inferred_type = _proto_str_to_value_type(str(value.WhichOneof("val")))
107+
# If any ProtoValues were present their types must all be the same
108+
if key in entity_type_map and entity_type_map.get(key) != inferred_type:
109+
raise TypeError(
110+
f"Input entity {key} has mixed types, {entity_type_map.get(key)} and {inferred_type}. That is not allowed."
111+
)
112+
entity_type_map[key] = inferred_type
113+
else:
114+
entity_python_values_map[key].append(value)
115+
116+
# Loop over all entities to infer dtype first in case of empty lists or nulls
117+
for key, values in entity_python_values_map.items():
118+
inferred_type = python_values_to_feast_value_type(key, values)
119+
120+
# If any ProtoValues were present their types must match the inferred type
121+
if key in entity_type_map and entity_type_map.get(key) != inferred_type:
122+
raise TypeError(
123+
f"Input entity {key} has mixed types, {entity_type_map.get(key)} and {inferred_type}. That is not allowed."
124+
)
125+
126+
entity_type_map[key] = inferred_type
97127

98128
for entity in entity_rows_dicts:
99129
fields = {}
100130
for key, value in entity.items():
101-
# Allow for feast.types.Value
131+
if key not in entity_type_map:
132+
raise ValueError(
133+
f"field {key} cannot have all null values for type inference."
134+
)
135+
102136
if isinstance(value, Value):
103137
proto_value = value
104138
else:
105-
# Infer the specific type for this row
106-
current_dtype = python_type_to_feast_value_type(name=key, value=value)
107-
108-
if key not in entity_type_map:
109-
entity_type_map[key] = current_dtype
110-
else:
111-
if current_dtype != entity_type_map[key]:
112-
raise TypeError(
113-
f"Input entity {key} has mixed types, {current_dtype} and {entity_type_map[key]}. That is not allowed. "
114-
)
115-
proto_value = _python_value_to_proto_value(current_dtype, value)
139+
proto_value = _python_value_to_proto_value(entity_type_map[key], value)
116140
fields[key] = proto_value
117141
entity_row_list.append(GetOnlineFeaturesRequestV2.EntityRow(fields=fields))
118142
return entity_row_list

sdk/python/feast/type_map.py

Lines changed: 39 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -48,23 +48,28 @@ def feast_value_type_to_python_type(field_value_proto: ProtoValue) -> Any:
4848
field_value_dict = MessageToDict(field_value_proto)
4949

5050
for k, v in field_value_dict.items():
51+
if "List" in k:
52+
val = v.get("val", [])
53+
else:
54+
val = v
55+
5156
if k == "int64Val":
52-
return int(v)
57+
return int(val)
5358
if k == "bytesVal":
54-
return bytes(v)
59+
return bytes(val)
5560
if (k == "int64ListVal") or (k == "int32ListVal"):
56-
return [int(item) for item in v["val"]]
61+
return [int(item) for item in val]
5762
if (k == "floatListVal") or (k == "doubleListVal"):
58-
return [float(item) for item in v["val"]]
63+
return [float(item) for item in val]
5964
if k == "stringListVal":
60-
return [str(item) for item in v["val"]]
65+
return [str(item) for item in val]
6166
if k == "bytesListVal":
62-
return [bytes(item) for item in v["val"]]
67+
return [bytes(item) for item in val]
6368
if k == "boolListVal":
64-
return [bool(item) for item in v["val"]]
69+
return [bool(item) for item in val]
6570

6671
if k in ["int32Val", "floatVal", "doubleVal", "stringVal", "boolVal"]:
67-
return v
72+
return val
6873
else:
6974
raise TypeError(
7075
f"Casting to Python native type for type {k} failed. "
@@ -144,9 +149,9 @@ def python_type_to_feast_value_type(
144149
common_item_value_type = None
145150
for item in list_items:
146151
if isinstance(item, ProtoValue):
147-
current_item_value_type = _proto_str_to_value_type(
148-
str(item.WhichOneof("val"))
149-
)
152+
current_item_value_type: Optional[
153+
ValueType
154+
] = _proto_str_to_value_type(str(item.WhichOneof("val")))
150155
else:
151156
# Get the type from the current item, only one level deep
152157
current_item_value_type = python_type_to_feast_value_type(
@@ -164,9 +169,7 @@ def python_type_to_feast_value_type(
164169
)
165170
common_item_value_type = current_item_value_type
166171
if common_item_value_type is None:
167-
raise ValueError(
168-
f"field {name} cannot have null values for type inference."
169-
)
172+
return ValueType.UNKNOWN
170173
return ValueType[common_item_value_type.name + "_LIST"]
171174
else:
172175
assert value
@@ -180,6 +183,28 @@ def python_type_to_feast_value_type(
180183
return type_map[value.dtype.__str__()]
181184

182185

186+
def python_values_to_feast_value_type(name: str, values: Any, recurse: bool = True):
187+
inferred_dtype = ValueType.UNKNOWN
188+
for row in values:
189+
current_dtype = python_type_to_feast_value_type(
190+
name, value=row, recurse=recurse
191+
)
192+
193+
if inferred_dtype is ValueType.UNKNOWN:
194+
inferred_dtype = current_dtype
195+
else:
196+
if current_dtype != inferred_dtype and current_dtype != ValueType.UNKNOWN:
197+
raise TypeError(
198+
f"Input entity {name} has mixed types, {current_dtype} and {inferred_dtype}. That is not allowed. "
199+
)
200+
if inferred_dtype is ValueType.UNKNOWN:
201+
raise ValueError(
202+
f"field {name} cannot have all null values for type inference."
203+
)
204+
205+
return inferred_dtype
206+
207+
183208
def _type_err(item, dtype):
184209
raise ValueError(f'Value "{item}" is of type {type(item)} not of type {dtype}')
185210

sdk/python/tests/data/data_creator.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,15 @@ def create_dataset(
1111
entity_type: ValueType = ValueType.INT32,
1212
feature_dtype: str = None,
1313
feature_is_list: bool = False,
14+
list_has_empty_list: bool = False,
1415
) -> pd.DataFrame:
1516
now = datetime.now().replace(microsecond=0, second=0, minute=0)
1617
ts = pd.Timestamp(now).round("ms")
1718
data = {
1819
"driver_id": get_entities_for_value_type(entity_type),
19-
"value": get_feature_values_for_dtype(feature_dtype, feature_is_list),
20+
"value": get_feature_values_for_dtype(
21+
feature_dtype, feature_is_list, list_has_empty_list
22+
),
2023
"ts_1": [
2124
ts - timedelta(hours=4),
2225
ts,
@@ -44,7 +47,9 @@ def get_entities_for_value_type(value_type: ValueType) -> List:
4447
return value_type_map[value_type]
4548

4649

47-
def get_feature_values_for_dtype(dtype: str, is_list: bool) -> List:
50+
def get_feature_values_for_dtype(
51+
dtype: str, is_list: bool, has_empty_list: bool
52+
) -> List:
4853
if dtype is None:
4954
return [0.1, None, 0.3, 4, 5]
5055
# TODO(adchia): for int columns, consider having a better error when dealing with None values (pandas int dfs can't
@@ -57,8 +62,11 @@ def get_feature_values_for_dtype(dtype: str, is_list: bool) -> List:
5762
"bool": [True, None, False, True, False],
5863
}
5964
non_list_val = dtype_map[dtype]
60-
# Duplicate the value once if this is a list
6165
if is_list:
66+
# TODO: Add test where all lists are empty and type inference is expected to fail.
67+
if has_empty_list:
68+
# Need at least one non-empty element for type inference
69+
return [[] for n in non_list_val[:-1]] + [non_list_val[-1:]]
6270
return [[n, n] if n is not None else None for n in non_list_val]
6371
else:
6472
return non_list_val

0 commit comments

Comments
 (0)