Skip to content

Commit a2ec95e

Browse files
committed
fix: None values handled gracefully
Signed-off-by: Jitendra Yejare <11752425+jyejare@users.noreply.github.com>
1 parent 862efd6 commit a2ec95e

5 files changed

Lines changed: 181 additions & 7 deletions

File tree

sdk/python/feast/infra/offline_stores/dask.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
JOB_COLUMNS,
4848
JOB_PK,
4949
normalize_monitoring_row,
50+
opt_float,
5051
)
5152
from feast.on_demand_feature_view import OnDemandFeatureView
5253
from feast.repo_config import FeastConfigBaseModel, RepoConfig
@@ -840,8 +841,8 @@ def _dask_compute_numeric_metrics(
840841
return result
841842

842843
float_array = pc.cast(valid, pyarrow.float64())
843-
result["mean"] = pc.mean(float_array).as_py() # type: ignore[attr-defined]
844-
result["stddev"] = pc.stddev(float_array, ddof=1).as_py() # type: ignore[attr-defined]
844+
result["mean"] = opt_float(pc.mean(float_array).as_py()) # type: ignore[attr-defined]
845+
result["stddev"] = opt_float(pc.stddev(float_array, ddof=1).as_py()) # type: ignore[attr-defined]
845846

846847
min_max = pc.min_max(float_array) # type: ignore[attr-defined]
847848
result["min_val"] = min_max["min"].as_py()

sdk/python/feast/monitoring/metrics_calculator.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import logging
2+
import math
23
from typing import Dict, List, Optional, Tuple
34

45
import numpy as np
@@ -9,6 +10,17 @@
910

1011
logger = logging.getLogger(__name__)
1112

13+
14+
def _safe_float(val):
15+
"""Return None for None/NaN/Inf, otherwise float."""
16+
if val is None:
17+
return None
18+
f = float(val)
19+
if math.isnan(f) or math.isinf(f):
20+
return None
21+
return f
22+
23+
1224
_NUMERIC_TYPES = {
1325
PrimitiveFeastType.INT32,
1426
PrimitiveFeastType.INT64,
@@ -83,8 +95,8 @@ def compute_numeric(self, array: pa.Array) -> Dict:
8395
return result
8496

8597
float_array = pc.cast(valid, pa.float64())
86-
result["mean"] = pc.mean(float_array).as_py() # type: ignore[attr-defined]
87-
result["stddev"] = pc.stddev(float_array, ddof=1).as_py() # type: ignore[attr-defined]
98+
result["mean"] = _safe_float(pc.mean(float_array).as_py()) # type: ignore[attr-defined]
99+
result["stddev"] = _safe_float(pc.stddev(float_array, ddof=1).as_py()) # type: ignore[attr-defined]
88100

89101
min_max = pc.min_max(float_array) # type: ignore[attr-defined]
90102
result["min_val"] = min_max["min"].as_py()

sdk/python/feast/monitoring/monitoring_service.py

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import logging
2+
import math
23
import time
34
from collections import defaultdict
45
from datetime import date, datetime, timedelta, timezone
@@ -28,6 +29,32 @@
2829
"quarterly": timedelta(days=90),
2930
}
3031

32+
_FLOAT_FIELDS = frozenset(
33+
{
34+
"null_rate",
35+
"mean",
36+
"stddev",
37+
"min_val",
38+
"max_val",
39+
"p50",
40+
"p75",
41+
"p90",
42+
"p95",
43+
"p99",
44+
"avg_null_rate",
45+
"max_null_rate",
46+
}
47+
)
48+
49+
50+
def _sanitize_floats(row: Dict[str, Any]) -> Dict[str, Any]:
51+
"""Replace NaN/Inf float values with None so JSON serialization succeeds."""
52+
for key in _FLOAT_FIELDS:
53+
val = row.get(key)
54+
if isinstance(val, float) and (math.isnan(val) or math.isinf(val)):
55+
row[key] = None
56+
return row
57+
3158

3259
class MonitoringService:
3360
def __init__(self, store: "FeatureStore"):
@@ -524,14 +551,15 @@ def _query(
524551
end_date=None,
525552
):
526553
self._ensure_monitoring_tables()
527-
return self._get_offline_store().query_monitoring_metrics(
554+
rows = self._get_offline_store().query_monitoring_metrics(
528555
config=self._store.config,
529556
project=project,
530557
metric_type=metric_type,
531558
filters=filters,
532559
start_date=start_date,
533560
end_date=end_date,
534561
)
562+
return [_sanitize_floats(r) for r in rows]
535563

536564
def get_feature_metrics(
537565
self,

sdk/python/feast/monitoring/monitoring_utils.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
"""
77

88
import json
9+
import math
910
from datetime import date, datetime
1011
from typing import Any, Dict, List, Optional, Tuple
1112

@@ -152,8 +153,13 @@ def monitoring_table_meta(
152153

153154

154155
def opt_float(val: Any) -> Optional[float]:
155-
"""Safely cast a value to float, returning None if input is None."""
156-
return float(val) if val is not None else None
156+
"""Safely cast a value to float, returning None for None/NaN/Inf."""
157+
if val is None:
158+
return None
159+
f = float(val)
160+
if math.isnan(f) or math.isinf(f):
161+
return None
162+
return f
157163

158164

159165
def empty_numeric_metric(feature_name: str) -> Dict[str, Any]:
@@ -206,10 +212,17 @@ def empty_categorical_metric(feature_name: str) -> Dict[str, Any]:
206212
def normalize_monitoring_row(record: Dict[str, Any]) -> Dict[str, Any]:
207213
"""Normalize a monitoring metric dict for JSON serialization.
208214
215+
- Replaces float NaN / Inf with None (not JSON-serializable).
209216
- Parses ``histogram`` from JSON string if needed.
210217
- Converts ``metric_date`` / ``computed_at`` to ISO strings.
211218
- Normalizes ``is_baseline`` to Python bool.
212219
"""
220+
import math
221+
222+
for key, val in record.items():
223+
if isinstance(val, float) and (math.isnan(val) or math.isinf(val)):
224+
record[key] = None
225+
213226
hist = record.get("histogram")
214227
if isinstance(hist, str):
215228
try:

sdk/python/tests/unit/monitoring/test_metrics_calculator.py

Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,11 @@
1+
import json
2+
import math
3+
14
import pyarrow as pa
25
import pytest
36

47
from feast.monitoring.metrics_calculator import MetricsCalculator
8+
from feast.monitoring.monitoring_utils import opt_float
59
from feast.types import PrimitiveFeastType
610

711

@@ -80,6 +84,7 @@ def test_single_value(self):
8084
assert result["mean"] == 42.0
8185
assert result["min_val"] == 42.0
8286
assert result["max_val"] == 42.0
87+
assert result["stddev"] is None # STDDEV_SAMP of 1 value is NaN → None
8388

8489
def test_histogram_bin_count(self):
8590
calc = _make_calc(bins=5)
@@ -167,3 +172,118 @@ def test_missing_column_skipped(self):
167172

168173
assert len(results) == 1
169174
assert results[0]["feature_name"] == "age"
175+
176+
177+
class TestNaNSanitization:
178+
"""Verify that NaN/Inf values never leak into metric results."""
179+
180+
def test_opt_float_none(self):
181+
assert opt_float(None) is None
182+
183+
def test_opt_float_normal(self):
184+
assert opt_float(3.14) == pytest.approx(3.14)
185+
186+
def test_opt_float_nan(self):
187+
assert opt_float(float("nan")) is None
188+
189+
def test_opt_float_inf(self):
190+
assert opt_float(float("inf")) is None
191+
192+
def test_opt_float_neg_inf(self):
193+
assert opt_float(float("-inf")) is None
194+
195+
def test_opt_float_zero(self):
196+
assert opt_float(0) == 0.0
197+
198+
def test_opt_float_integer(self):
199+
assert opt_float(42) == 42.0
200+
201+
def test_single_value_stddev_is_none_not_nan(self):
202+
"""pc.stddev(ddof=1) on a single value returns NaN; we must convert to None."""
203+
calc = _make_calc()
204+
arr = pa.array([7.0])
205+
result = calc.compute_numeric(arr)
206+
207+
assert result["stddev"] is None
208+
assert result["mean"] == pytest.approx(7.0)
209+
210+
def test_two_values_stddev_is_valid(self):
211+
calc = _make_calc()
212+
arr = pa.array([4.0, 6.0])
213+
result = calc.compute_numeric(arr)
214+
215+
assert result["stddev"] is not None
216+
assert result["stddev"] == pytest.approx(math.sqrt(2.0))
217+
218+
def test_all_numeric_results_json_serializable(self):
219+
"""Every field in a numeric result must be JSON-serializable (no NaN/Inf)."""
220+
calc = _make_calc(bins=5)
221+
for test_data in [
222+
[42.0], # single value
223+
[1.0, 2.0], # two values
224+
[1.0, None, 3.0], # with nulls
225+
list(range(100)), # many values
226+
]:
227+
arr = pa.array(test_data, type=pa.float64())
228+
result = calc.compute_numeric(arr)
229+
json.dumps(result) # raises ValueError if NaN/Inf present
230+
231+
def test_all_categorical_results_json_serializable(self):
232+
calc = _make_calc()
233+
for test_data in [
234+
["a", "b", "a"],
235+
["x", None, "y"],
236+
[None, None],
237+
]:
238+
arr = pa.array(test_data, type=pa.string())
239+
result = calc.compute_categorical(arr)
240+
json.dumps(result)
241+
242+
def test_sanitize_floats_cleans_nan(self):
243+
from feast.monitoring.monitoring_service import _sanitize_floats
244+
245+
row = {
246+
"feature_name": "test",
247+
"mean": float("nan"),
248+
"stddev": float("inf"),
249+
"null_rate": float("-inf"),
250+
"min_val": 1.0,
251+
"max_val": 10.0,
252+
"p50": 5.0,
253+
"p75": None,
254+
"row_count": 100,
255+
}
256+
result = _sanitize_floats(row)
257+
258+
assert result["mean"] is None
259+
assert result["stddev"] is None
260+
assert result["null_rate"] is None
261+
assert result["min_val"] == 1.0
262+
assert result["max_val"] == 10.0
263+
assert result["p50"] == 5.0
264+
assert result["p75"] is None
265+
assert result["row_count"] == 100 # non-float fields untouched
266+
assert result["feature_name"] == "test"
267+
json.dumps(result)
268+
269+
def test_sanitize_floats_preserves_valid_values(self):
270+
from feast.monitoring.monitoring_service import _sanitize_floats
271+
272+
row = {
273+
"mean": 5.5,
274+
"stddev": 2.3,
275+
"null_rate": 0.0,
276+
"min_val": 0.0,
277+
"max_val": 10.0,
278+
"p50": 5.0,
279+
"p75": 7.5,
280+
"p90": 9.0,
281+
"p95": 9.5,
282+
"p99": 9.9,
283+
"avg_null_rate": 0.05,
284+
"max_null_rate": 0.1,
285+
}
286+
result = _sanitize_floats(row)
287+
288+
for key, val in row.items():
289+
assert result[key] == val

0 commit comments

Comments
 (0)