1414
1515import re
1616from datetime import datetime
17- from typing import Any , Dict , List , Optional , Set , Tuple , Type
17+ from typing import Any , Dict , List , Optional , Set , Sized , Tuple , Type
1818
1919import numpy as np
2020import pandas as pd
@@ -238,50 +238,59 @@ def _type_err(item, dtype):
238238}
239239
240240
241- def _python_value_to_proto_value (feast_value_type : ValueType , value : Any ) -> ProtoValue :
241+ def _python_value_to_proto_value (
242+ feast_value_type : ValueType , values : List [Any ]
243+ ) -> List [ProtoValue ]:
242244 """
243245 Converts a Python (native, pandas) value to a Feast Proto Value based
244246 on a provided value type
245247
246248 Args:
247249 feast_value_type: The target value type
248- value: Value that will be converted
250+ values: List of Values that will be converted
249251
250252 Returns:
251- Feast Value Proto
253+ List of Feast Value Proto
252254 """
255+ # ToDo: make a better sample for type checks (more than one element)
256+ sample = next (filter (_non_empty_value , values ), None ) # first not empty value
257+ if sample is None :
258+ # all input values are None or empty lists
259+ return [ProtoValue ()] * len (values )
260+
253261 # Detect list type and handle separately
254262 if "list" in feast_value_type .name .lower ():
255263 # Feature can be list but None is still valid
256- if value is None :
257- return ProtoValue ()
258-
259264 if feast_value_type in PYTHON_LIST_VALUE_TYPE_TO_PROTO_VALUE :
260265 proto_type , field_name , valid_types = PYTHON_LIST_VALUE_TYPE_TO_PROTO_VALUE [
261266 feast_value_type
262267 ]
263- f = {
264- field_name : proto_type (
265- val = [
266- item
267- if type (item ) in valid_types
268- else _type_err (item , valid_types [0 ])
269- for item in value
270- ]
268+
269+ if not all (type (item ) in valid_types for item in sample ):
270+ first_invalid = next (
271+ item for item in sample if type (item ) not in valid_types
271272 )
272- }
273- return ProtoValue (** f )
273+ raise _type_err (first_invalid , valid_types [0 ])
274+
275+ return [
276+ ProtoValue (** {field_name : proto_type (val = value )})
277+ if value is not None
278+ else ProtoValue ()
279+ for value in values
280+ ]
281+
274282 # Handle scalar types below
275283 else :
276- if pd .isnull (value ):
277- return ProtoValue ()
278-
279284 if feast_value_type == ValueType .UNIX_TIMESTAMP :
280- if isinstance (value , datetime ):
281- return ProtoValue (int64_val = int (value .timestamp ()))
282- elif isinstance (value , Timestamp ):
283- return ProtoValue (int64_val = int (value .ToSeconds ()))
284- return ProtoValue (int64_val = int (value ))
285+ if isinstance (sample , datetime ):
286+ return [
287+ ProtoValue (int64_val = int (value .timestamp ())) for value in values
288+ ]
289+ elif isinstance (sample , Timestamp ):
290+ return [
291+ ProtoValue (int64_val = int (value .ToSeconds ())) for value in values
292+ ]
293+ return [ProtoValue (int64_val = int (value )) for value in values ]
285294
286295 if feast_value_type in PYTHON_SCALAR_VALUE_TYPE_TO_PROTO_VALUE :
287296 (
@@ -290,27 +299,37 @@ def _python_value_to_proto_value(feast_value_type: ValueType, value: Any) -> Pro
290299 valid_scalar_types ,
291300 ) = PYTHON_SCALAR_VALUE_TYPE_TO_PROTO_VALUE [feast_value_type ]
292301 if valid_scalar_types :
293- assert type (value ) in valid_scalar_types
294- kwargs = {field_name : func (value )}
295- return ProtoValue (** kwargs )
302+ assert type (sample ) in valid_scalar_types
296303
297- raise Exception (f"Unsupported data type: ${ str (type (value ))} " )
304+ return [
305+ ProtoValue (** {field_name : func (value )})
306+ if not pd .isnull (value )
307+ else ProtoValue ()
308+ for value in values
309+ ]
298310
311+ raise Exception (f"Unsupported data type: ${ str (type (values [0 ]))} " )
299312
300- def python_value_to_proto_value (
301- value : Any , feature_type : ValueType = ValueType .UNKNOWN
302- ) -> ProtoValue :
313+
314+ def python_values_to_proto_values (
315+ values : List [Any ], feature_type : ValueType = ValueType .UNKNOWN
316+ ) -> List [ProtoValue ]:
303317 value_type = feature_type
304- if value is not None and feature_type == ValueType .UNKNOWN :
305- if isinstance (value , (list , np .ndarray )):
318+ sample = next (filter (_non_empty_value , values ), None ) # first not empty value
319+ if sample is not None and feature_type == ValueType .UNKNOWN :
320+ if isinstance (sample , (list , np .ndarray )):
306321 value_type = (
307322 feature_type
308- if len (value ) == 0
309- else python_type_to_feast_value_type ("" , value )
323+ if len (sample ) == 0
324+ else python_type_to_feast_value_type ("" , sample )
310325 )
311326 else :
312- value_type = python_type_to_feast_value_type ("" , value )
313- return _python_value_to_proto_value (value_type , value )
327+ value_type = python_type_to_feast_value_type ("" , sample )
328+
329+ if value_type == ValueType .UNKNOWN :
330+ raise TypeError ("Couldn't infer value type from empty value" )
331+
332+ return _python_value_to_proto_value (value_type , values )
314333
315334
316335def _proto_value_to_value_type (proto_value : ProtoValue ) -> ValueType :
@@ -453,3 +472,15 @@ def pa_to_redshift_value_type(pa_type: pyarrow.DataType) -> str:
453472 }
454473
455474 return type_map [pa_type_as_str ]
475+
476+
477+ def _non_empty_value (value : Any ) -> bool :
478+ """
479+ Check that there's enough data we can use for type inference.
480+ If primitive type - just checking that it's not None
481+ If iterable - checking that there's some elements (len > 0)
482+ String is special case: "" - empty string is considered non empty
483+ """
484+ return value is not None and (
485+ not isinstance (value , Sized ) or len (value ) > 0 or isinstance (value , str )
486+ )
0 commit comments