From 15d33a82e10b95b452e309e65d05d7f0c56a42da Mon Sep 17 00:00:00 2001 From: Judah Rand <17158624+judahrand@users.noreply.github.com> Date: Thu, 27 Jan 2022 10:20:42 +0000 Subject: [PATCH 1/4] Refactor `UNIX_TIMESTAMP` conversion Signed-off-by: Judah Rand <17158624+judahrand@users.noreply.github.com> --- sdk/python/feast/type_map.py | 61 +++++++++++++++++------------------- 1 file changed, 29 insertions(+), 32 deletions(-) diff --git a/sdk/python/feast/type_map.py b/sdk/python/feast/type_map.py index 74c4cb17ed..64e09868cc 100644 --- a/sdk/python/feast/type_map.py +++ b/sdk/python/feast/type_map.py @@ -13,10 +13,10 @@ # limitations under the License. import re -from datetime import datetime -from typing import Any, Dict, List, Optional, Set, Sized, Tuple, Type +from typing import Any, Dict, List, Optional, Sequence, Set, Sized, Tuple, Type, cast import numpy as np +import numpy.typing as npt import pandas as pd import pyarrow from google.protobuf.timestamp_pb2 import Timestamp @@ -240,6 +240,24 @@ def _type_err(item, dtype): } +def _python_datetime_to_int_timestamp(values: Sequence[Any]) -> Sequence[int]: + # Fast path for Numpy array. + if isinstance(values, np.ndarray) and isinstance(values.dtype, np.datetime64): + return cast(npt.NDArray[np.int_], values.astype("datetime64[s]").astype("int")) + + int_timestamps = [] + for sub_value in values: + if isinstance(sub_value, datetime): + int_timestamps.append(int(sub_value.timestamp())) + elif isinstance(sub_value, Timestamp): + int_timestamps.append(int(sub_value.ToSeconds())) + elif isinstance(sub_value, np.datetime64): + int_timestamps.append(sub_value.astype("datetime64[s]").astype("int")) + else: + int_timestamps.append(int(sub_value)) + return int_timestamps + + def _python_value_to_proto_value( feast_value_type: ValueType, values: List[Any] ) -> List[ProtoValue]: @@ -275,22 +293,13 @@ def _python_value_to_proto_value( raise _type_err(first_invalid, valid_types[0]) if feast_value_type == ValueType.UNIX_TIMESTAMP_LIST: - converted_values = [] - for value in values: - converted_sub_values = [] - for sub_value in value: - if isinstance(sub_value, datetime): - converted_sub_values.append(int(sub_value.timestamp())) - elif isinstance(sub_value, Timestamp): - converted_sub_values.append(int(sub_value.ToSeconds())) - elif isinstance(sub_value, np.datetime64): - converted_sub_values.append( - sub_value.astype("datetime64[s]").astype("int") - ) - else: - converted_sub_values.append(sub_value) - converted_values.append(converted_sub_values) - values = converted_values + int_timestamps_lists = ( + _python_datetime_to_int_timestamp(value) for value in values + ) + return [ + ProtoValue(unix_timestamp_list_val=Int64List(val=ts)) + for ts in int_timestamps_lists + ] return [ ProtoValue(**{field_name: proto_type(val=value)}) # type: ignore @@ -302,20 +311,8 @@ def _python_value_to_proto_value( # Handle scalar types below else: if feast_value_type == ValueType.UNIX_TIMESTAMP: - if isinstance(sample, datetime): - return [ - ProtoValue(int64_val=int(value.timestamp())) for value in values - ] - elif isinstance(sample, Timestamp): - return [ - ProtoValue(int64_val=int(value.ToSeconds())) for value in values - ] - elif isinstance(sample, np.datetime64): - return [ - ProtoValue(int64_val=value.astype("datetime64[s]").astype("int")) - for value in values - ] - return [ProtoValue(int64_val=int(value)) for value in values] + int_timestamps = _python_datetime_to_int_timestamp(values) + return [ProtoValue(unix_timestamp_val=ts) for ts in int_timestamps] if feast_value_type in PYTHON_SCALAR_VALUE_TYPE_TO_PROTO_VALUE: ( From 0f7f9b23d71bae35688a716f6855da377bf363a8 Mon Sep 17 00:00:00 2001 From: Judah Rand <17158624+judahrand@users.noreply.github.com> Date: Thu, 27 Jan 2022 10:21:23 +0000 Subject: [PATCH 2/4] Return `UNIX_TIMESTAMP` types as `datetime` to user Signed-off-by: Judah Rand <17158624+judahrand@users.noreply.github.com> --- sdk/python/feast/type_map.py | 10 ++++++++++ .../integration/registration/test_universal_types.py | 2 +- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/sdk/python/feast/type_map.py b/sdk/python/feast/type_map.py index 64e09868cc..93e7cfb23e 100644 --- a/sdk/python/feast/type_map.py +++ b/sdk/python/feast/type_map.py @@ -13,6 +13,7 @@ # limitations under the License. import re +from datetime import datetime, timezone from typing import Any, Dict, List, Optional, Sequence, Set, Sized, Tuple, Type, cast import numpy as np @@ -49,8 +50,17 @@ def feast_value_type_to_python_type(field_value_proto: ProtoValue) -> Any: if val_attr is None: return None val = getattr(field_value_proto, val_attr) + + # If it's a _LIST type extract the list. if hasattr(val, "val"): val = list(val.val) + + # Convert UNIX_TIMESTAMP values to `datetime` + if val_attr == "unix_timestamp_list_val": + val = [datetime.fromtimestamp(v, tz=timezone.utc) for v in val] + elif val_attr == "unix_timestamp_val": + val = datetime.fromtimestamp(val, tz=timezone.utc) + return val diff --git a/sdk/python/tests/integration/registration/test_universal_types.py b/sdk/python/tests/integration/registration/test_universal_types.py index 8cb21e6384..5c782306e6 100644 --- a/sdk/python/tests/integration/registration/test_universal_types.py +++ b/sdk/python/tests/integration/registration/test_universal_types.py @@ -234,7 +234,7 @@ def test_feature_get_online_features_types_match(online_types_test_fixtures): "float": float, "string": str, "bool": bool, - "datetime": int, + "datetime": datetime, } expected_dtype = feature_list_dtype_to_expected_online_response_value_type[ config.feature_dtype From b9be843f427144c939fd75c63ad2d92a608f6d6b Mon Sep 17 00:00:00 2001 From: Judah Rand <17158624+judahrand@users.noreply.github.com> Date: Thu, 27 Jan 2022 10:53:47 +0000 Subject: [PATCH 3/4] Fix linting errors Signed-off-by: Judah Rand <17158624+judahrand@users.noreply.github.com> --- sdk/python/feast/type_map.py | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/sdk/python/feast/type_map.py b/sdk/python/feast/type_map.py index 93e7cfb23e..9790e701e6 100644 --- a/sdk/python/feast/type_map.py +++ b/sdk/python/feast/type_map.py @@ -14,10 +14,21 @@ import re from datetime import datetime, timezone -from typing import Any, Dict, List, Optional, Sequence, Set, Sized, Tuple, Type, cast +from typing import ( + Any, + Dict, + List, + Optional, + Sequence, + Set, + Sized, + Tuple, + Type, + Union, + cast, +) import numpy as np -import numpy.typing as npt import pandas as pd import pyarrow from google.protobuf.timestamp_pb2 import Timestamp @@ -250,10 +261,14 @@ def _type_err(item, dtype): } -def _python_datetime_to_int_timestamp(values: Sequence[Any]) -> Sequence[int]: +def _python_datetime_to_int_timestamp( + values: Sequence[Any], +) -> Sequence[Union[int, np.int_]]: # Fast path for Numpy array. if isinstance(values, np.ndarray) and isinstance(values.dtype, np.datetime64): - return cast(npt.NDArray[np.int_], values.astype("datetime64[s]").astype("int")) + if values.ndim != 1: + raise ValueError("Only 1 dimensional arrays are supported.") + return cast(Sequence[np.int_], values.astype("datetime64[s]").astype(np.int_)) int_timestamps = [] for sub_value in values: @@ -262,7 +277,7 @@ def _python_datetime_to_int_timestamp(values: Sequence[Any]) -> Sequence[int]: elif isinstance(sub_value, Timestamp): int_timestamps.append(int(sub_value.ToSeconds())) elif isinstance(sub_value, np.datetime64): - int_timestamps.append(sub_value.astype("datetime64[s]").astype("int")) + int_timestamps.append(sub_value.astype("datetime64[s]").astype(np.int_)) else: int_timestamps.append(int(sub_value)) return int_timestamps @@ -307,7 +322,8 @@ def _python_value_to_proto_value( _python_datetime_to_int_timestamp(value) for value in values ) return [ - ProtoValue(unix_timestamp_list_val=Int64List(val=ts)) + # ProtoValue does actually accept `np.int_` but the typing complains. + ProtoValue(unix_timestamp_list_val=Int64List(val=ts)) # type: ignore for ts in int_timestamps_lists ] @@ -322,7 +338,8 @@ def _python_value_to_proto_value( else: if feast_value_type == ValueType.UNIX_TIMESTAMP: int_timestamps = _python_datetime_to_int_timestamp(values) - return [ProtoValue(unix_timestamp_val=ts) for ts in int_timestamps] + # ProtoValue does actually accept `np.int_` but the typing complains. + return [ProtoValue(unix_timestamp_val=ts) for ts in int_timestamps] # type: ignore if feast_value_type in PYTHON_SCALAR_VALUE_TYPE_TO_PROTO_VALUE: ( From 899e4e376a342be8660af43659d3a7b17db98b70 Mon Sep 17 00:00:00 2001 From: Judah Rand <17158624+judahrand@users.noreply.github.com> Date: Thu, 27 Jan 2022 11:13:11 +0000 Subject: [PATCH 4/4] Rename variable to something more sensible Signed-off-by: Judah Rand <17158624+judahrand@users.noreply.github.com> --- sdk/python/feast/type_map.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/sdk/python/feast/type_map.py b/sdk/python/feast/type_map.py index 9790e701e6..599be85fdf 100644 --- a/sdk/python/feast/type_map.py +++ b/sdk/python/feast/type_map.py @@ -271,15 +271,15 @@ def _python_datetime_to_int_timestamp( return cast(Sequence[np.int_], values.astype("datetime64[s]").astype(np.int_)) int_timestamps = [] - for sub_value in values: - if isinstance(sub_value, datetime): - int_timestamps.append(int(sub_value.timestamp())) - elif isinstance(sub_value, Timestamp): - int_timestamps.append(int(sub_value.ToSeconds())) - elif isinstance(sub_value, np.datetime64): - int_timestamps.append(sub_value.astype("datetime64[s]").astype(np.int_)) + for value in values: + if isinstance(value, datetime): + int_timestamps.append(int(value.timestamp())) + elif isinstance(value, Timestamp): + int_timestamps.append(int(value.ToSeconds())) + elif isinstance(value, np.datetime64): + int_timestamps.append(value.astype("datetime64[s]").astype(np.int_)) else: - int_timestamps.append(int(sub_value)) + int_timestamps.append(int(value)) return int_timestamps