Skip to content

Commit ecce3cf

Browse files
fix: base64-encode bytes_val and bytes_list_val for JSON serialization
bytes_val returned raw bytes which cause json.dumps TypeError when JSONResponse serializes the response. Base64-encode these values to match standard protobuf JSON encoding for bytes fields. Fixes Nikhil's review comment: bytes_val is still problem since you are using JSONResponse. Signed-off-by: abhijeet-dhumal <abhijeetdhumal652@gmail.com>
1 parent 388f89b commit ecce3cf

2 files changed

Lines changed: 33 additions & 16 deletions

File tree

sdk/python/feast/feature_server_utils.py

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
Values are serialized as native Python types (not wrapped dicts).
55
"""
66

7+
import base64
78
import logging
89
from datetime import datetime, timezone
910
from typing import Any, Dict, Optional
@@ -67,14 +68,18 @@ def convert_response_to_dict(response: GetOnlineFeaturesResponse) -> Dict[str, A
6768

6869

6970
def _value_to_native(v: Value) -> Optional[Any]:
70-
"""Convert a Value proto to native Python type (matches proto_json.patch() format).
71+
"""Convert a Value proto to a JSON-serializable Python type.
7172
72-
Note: proto_json.patch() modifies MessageToDict to return raw bytes instead of
73-
base64 encoding, so we return raw bytes here to match that behavior.
73+
bytes_val and bytes_list_val are base64-encoded (RFC 4648) so that
74+
JSONResponse can serialize them without TypeError. This matches standard
75+
protobuf JSON encoding for bytes fields and is safe for all HTTP clients.
7476
"""
7577
which = v.WhichOneof("val")
7678
if which is None or which == "null_val":
7779
return None
80+
# bytes must be base64-encoded for JSON serialization
81+
elif which == "bytes_val":
82+
return base64.b64encode(v.bytes_val).decode("ascii")
7883
# RepeatedValue — nested Values that must be recursively converted
7984
elif which in ("list_val", "set_val"):
8085
return [_value_to_native(nested) for nested in getattr(v, which).val]
@@ -94,13 +99,17 @@ def _value_to_native(v: Value) -> Optional[Any]:
9499
"scalar_map_val is not yet supported by convert_response_to_dict; value will be None"
95100
)
96101
return None
97-
# All simple list/set types (.val is a repeated scalar field)
102+
# bytes_list_val / bytes_set_val — base64-encode each element
103+
elif which in ("bytes_list_val", "bytes_set_val"):
104+
return [base64.b64encode(b).decode("ascii") for b in getattr(v, which).val]
105+
# All other list/set types have scalar .val fields
98106
elif "_list_" in which or "_set_" in which:
99107
return list(getattr(v, which).val)
100108
else:
101109
return getattr(v, which)
102110

103111

112+
104113
def _timestamp_to_str(ts) -> str:
105114
"""Convert protobuf Timestamp to RFC 3339 format with Z suffix.
106115

sdk/python/tests/unit/test_feature_server_utils.py

Lines changed: 20 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
Related issue: https://github.com/feast-dev/feast/issues/6013
2222
"""
2323

24+
import base64
2425
import json
2526
import time
2627

@@ -104,7 +105,7 @@ def test_bytes_val(self):
104105
data = b"\x00\x01\x02\x03"
105106
v = Value(bytes_val=data)
106107
result = _value_to_native(v)
107-
assert result == data
108+
assert result == base64.b64encode(data).decode("ascii")
108109

109110
def test_double_list_val(self):
110111
v = Value(double_list_val=DoubleList(val=[1.1, 2.2, 3.3]))
@@ -144,7 +145,10 @@ def test_unix_timestamp_val(self):
144145
def test_bytes_list_val(self):
145146
v = Value(bytes_list_val=BytesList(val=[b"\x00\x01", b"\x02\x03"]))
146147
result = _value_to_native(v)
147-
assert result == [b"\x00\x01", b"\x02\x03"]
148+
assert result == [
149+
base64.b64encode(b"\x00\x01").decode("ascii"),
150+
base64.b64encode(b"\x02\x03").decode("ascii"),
151+
]
148152

149153
def test_unix_timestamp_list_val(self):
150154
v = Value(unix_timestamp_list_val=Int64List(val=[1609459200, 1609545600]))
@@ -458,7 +462,6 @@ def test_double_val_precision(self):
458462
the only difference is how many trailing digits appear in the JSON string.
459463
This is an intentional trade-off for speed and is safe for all ML feature values.
460464
"""
461-
import json
462465
import struct
463466

464467
# Use a value with many significant digits
@@ -496,20 +499,25 @@ def test_set_types_return_flat_list(self):
496499
assert isinstance(values[0], list), "set type should be a flat list"
497500
assert set(values[0]) == {"a", "b", "c"}
498501

499-
def test_bytes_values_match_patched_message_to_dict(self):
500-
"""Ensure bytes serialization matches proto_json.patch() format (raw bytes)."""
502+
def test_bytes_val_is_base64_encoded(self):
503+
"""bytes_val is base64-encoded so JSONResponse can serialize it.
504+
505+
This intentionally differs from proto_json.patch() which returns raw bytes.
506+
Raw bytes are not JSON-serializable; base64 is the standard protobuf JSON
507+
encoding for bytes fields and is safe for all HTTP clients.
508+
"""
501509
response = GetOnlineFeaturesResponse()
502510
fv = response.results.add()
503-
fv.values.append(Value(bytes_val=b"\x00\x01\x02\xff"))
511+
data = b"\x00\x01\x02\xff"
512+
fv.values.append(Value(bytes_val=data))
504513
fv.statuses.append(FieldStatus.PRESENT)
505514

506-
fast_result = convert_response_to_dict(response)
507-
standard_result = MessageToDict(response, preserving_proto_field_name=True)
515+
result = convert_response_to_dict(response)
516+
encoded = result["results"][0]["values"][0]
508517

509-
assert (
510-
fast_result["results"][0]["values"]
511-
== standard_result["results"][0]["values"]
512-
)
518+
assert encoded == base64.b64encode(data).decode("ascii")
519+
# Must be JSON-serializable
520+
assert json.dumps(encoded)
513521

514522

515523
class TestJsonSerializability:

0 commit comments

Comments
 (0)