fix: None values handled gracefully

jyejare · jyejare · commit a2ec95eb47ca · 2026-05-20T20:50:23.000+05:30
Signed-off-by: Jitendra Yejare &lt;11752425+jyejare@users.noreply.github.com&gt;
diff --git a/sdk/python/feast/infra/offline_stores/dask.py b/sdk/python/feast/infra/offline_stores/dask.py
@@ -47,6 +47,7 @@
     JOB_COLUMNS,
     JOB_PK,
     normalize_monitoring_row,
+    opt_float,
 )
 from feast.on_demand_feature_view import OnDemandFeatureView
 from feast.repo_config import FeastConfigBaseModel, RepoConfig
@@ -840,8 +841,8 @@ def _dask_compute_numeric_metrics(
         return result
 
     float_array = pc.cast(valid, pyarrow.float64())
-    result["mean"] = pc.mean(float_array).as_py()  # type: ignore[attr-defined]
-    result["stddev"] = pc.stddev(float_array, ddof=1).as_py()  # type: ignore[attr-defined]
+    result["mean"] = opt_float(pc.mean(float_array).as_py())  # type: ignore[attr-defined]
+    result["stddev"] = opt_float(pc.stddev(float_array, ddof=1).as_py())  # type: ignore[attr-defined]
 
     min_max = pc.min_max(float_array)  # type: ignore[attr-defined]
     result["min_val"] = min_max["min"].as_py()
diff --git a/sdk/python/feast/monitoring/metrics_calculator.py b/sdk/python/feast/monitoring/metrics_calculator.py
@@ -1,4 +1,5 @@
 import logging
+import math
 from typing import Dict, List, Optional, Tuple
 
 import numpy as np
@@ -9,6 +10,17 @@
 
 logger = logging.getLogger(__name__)
 
+
+def _safe_float(val):
+    """Return None for None/NaN/Inf, otherwise float."""
+    if val is None:
+        return None
+    f = float(val)
+    if math.isnan(f) or math.isinf(f):
+        return None
+    return f
+
+
 _NUMERIC_TYPES = {
     PrimitiveFeastType.INT32,
     PrimitiveFeastType.INT64,
@@ -83,8 +95,8 @@ def compute_numeric(self, array: pa.Array) -> Dict:
             return result
 
         float_array = pc.cast(valid, pa.float64())
-        result["mean"] = pc.mean(float_array).as_py()  # type: ignore[attr-defined]
-        result["stddev"] = pc.stddev(float_array, ddof=1).as_py()  # type: ignore[attr-defined]
+        result["mean"] = _safe_float(pc.mean(float_array).as_py())  # type: ignore[attr-defined]
+        result["stddev"] = _safe_float(pc.stddev(float_array, ddof=1).as_py())  # type: ignore[attr-defined]
 
         min_max = pc.min_max(float_array)  # type: ignore[attr-defined]
         result["min_val"] = min_max["min"].as_py()
diff --git a/sdk/python/feast/monitoring/monitoring_service.py b/sdk/python/feast/monitoring/monitoring_service.py
@@ -1,4 +1,5 @@
 import logging
+import math
 import time
 from collections import defaultdict
 from datetime import date, datetime, timedelta, timezone
@@ -28,6 +29,32 @@
     "quarterly": timedelta(days=90),
 }
 
+_FLOAT_FIELDS = frozenset(
+    {
+        "null_rate",
+        "mean",
+        "stddev",
+        "min_val",
+        "max_val",
+        "p50",
+        "p75",
+        "p90",
+        "p95",
+        "p99",
+        "avg_null_rate",
+        "max_null_rate",
+    }
+)
+
+
+def _sanitize_floats(row: Dict[str, Any]) -> Dict[str, Any]:
+    """Replace NaN/Inf float values with None so JSON serialization succeeds."""
+    for key in _FLOAT_FIELDS:
+        val = row.get(key)
+        if isinstance(val, float) and (math.isnan(val) or math.isinf(val)):
+            row[key] = None
+    return row
+
 
 class MonitoringService:
     def __init__(self, store: "FeatureStore"):
@@ -524,14 +551,15 @@ def _query(
         end_date=None,
     ):
         self._ensure_monitoring_tables()
-        return self._get_offline_store().query_monitoring_metrics(
+        rows = self._get_offline_store().query_monitoring_metrics(
             config=self._store.config,
             project=project,
             metric_type=metric_type,
             filters=filters,
             start_date=start_date,
             end_date=end_date,
         )
+        return [_sanitize_floats(r) for r in rows]
 
     def get_feature_metrics(
         self,
diff --git a/sdk/python/feast/monitoring/monitoring_utils.py b/sdk/python/feast/monitoring/monitoring_utils.py
@@ -6,6 +6,7 @@
 """
 
 import json
+import math
 from datetime import date, datetime
 from typing import Any, Dict, List, Optional, Tuple
 
@@ -152,8 +153,13 @@ def monitoring_table_meta(
 
 
 def opt_float(val: Any) -> Optional[float]:
-    """Safely cast a value to float, returning None if input is None."""
-    return float(val) if val is not None else None
+    """Safely cast a value to float, returning None for None/NaN/Inf."""
+    if val is None:
+        return None
+    f = float(val)
+    if math.isnan(f) or math.isinf(f):
+        return None
+    return f
 
 
 def empty_numeric_metric(feature_name: str) -> Dict[str, Any]:
@@ -206,10 +212,17 @@ def empty_categorical_metric(feature_name: str) -> Dict[str, Any]:
 def normalize_monitoring_row(record: Dict[str, Any]) -> Dict[str, Any]:
     """Normalize a monitoring metric dict for JSON serialization.
 
+    - Replaces float NaN / Inf with None (not JSON-serializable).
     - Parses ``histogram`` from JSON string if needed.
     - Converts ``metric_date`` / ``computed_at`` to ISO strings.
     - Normalizes ``is_baseline`` to Python bool.
     """
+    import math
+
+    for key, val in record.items():
+        if isinstance(val, float) and (math.isnan(val) or math.isinf(val)):
+            record[key] = None
+
     hist = record.get("histogram")
     if isinstance(hist, str):
         try:
diff --git a/sdk/python/tests/unit/monitoring/test_metrics_calculator.py b/sdk/python/tests/unit/monitoring/test_metrics_calculator.py
@@ -1,7 +1,11 @@
+import json
+import math
+
 import pyarrow as pa
 import pytest
 
 from feast.monitoring.metrics_calculator import MetricsCalculator
+from feast.monitoring.monitoring_utils import opt_float
 from feast.types import PrimitiveFeastType
 
 
@@ -80,6 +84,7 @@ def test_single_value(self):
         assert result["mean"] == 42.0
         assert result["min_val"] == 42.0
         assert result["max_val"] == 42.0
+        assert result["stddev"] is None  # STDDEV_SAMP of 1 value is NaN → None
 
     def test_histogram_bin_count(self):
         calc = _make_calc(bins=5)
@@ -167,3 +172,118 @@ def test_missing_column_skipped(self):
 
         assert len(results) == 1
         assert results[0]["feature_name"] == "age"
+
+
+class TestNaNSanitization:
+    """Verify that NaN/Inf values never leak into metric results."""
+
+    def test_opt_float_none(self):
+        assert opt_float(None) is None
+
+    def test_opt_float_normal(self):
+        assert opt_float(3.14) == pytest.approx(3.14)
+
+    def test_opt_float_nan(self):
+        assert opt_float(float("nan")) is None
+
+    def test_opt_float_inf(self):
+        assert opt_float(float("inf")) is None
+
+    def test_opt_float_neg_inf(self):
+        assert opt_float(float("-inf")) is None
+
+    def test_opt_float_zero(self):
+        assert opt_float(0) == 0.0
+
+    def test_opt_float_integer(self):
+        assert opt_float(42) == 42.0
+
+    def test_single_value_stddev_is_none_not_nan(self):
+        """pc.stddev(ddof=1) on a single value returns NaN; we must convert to None."""
+        calc = _make_calc()
+        arr = pa.array([7.0])
+        result = calc.compute_numeric(arr)
+
+        assert result["stddev"] is None
+        assert result["mean"] == pytest.approx(7.0)
+
+    def test_two_values_stddev_is_valid(self):
+        calc = _make_calc()
+        arr = pa.array([4.0, 6.0])
+        result = calc.compute_numeric(arr)
+
+        assert result["stddev"] is not None
+        assert result["stddev"] == pytest.approx(math.sqrt(2.0))
+
+    def test_all_numeric_results_json_serializable(self):
+        """Every field in a numeric result must be JSON-serializable (no NaN/Inf)."""
+        calc = _make_calc(bins=5)
+        for test_data in [
+            [42.0],  # single value
+            [1.0, 2.0],  # two values
+            [1.0, None, 3.0],  # with nulls
+            list(range(100)),  # many values
+        ]:
+            arr = pa.array(test_data, type=pa.float64())
+            result = calc.compute_numeric(arr)
+            json.dumps(result)  # raises ValueError if NaN/Inf present
+
+    def test_all_categorical_results_json_serializable(self):
+        calc = _make_calc()
+        for test_data in [
+            ["a", "b", "a"],
+            ["x", None, "y"],
+            [None, None],
+        ]:
+            arr = pa.array(test_data, type=pa.string())
+            result = calc.compute_categorical(arr)
+            json.dumps(result)
+
+    def test_sanitize_floats_cleans_nan(self):
+        from feast.monitoring.monitoring_service import _sanitize_floats
+
+        row = {
+            "feature_name": "test",
+            "mean": float("nan"),
+            "stddev": float("inf"),
+            "null_rate": float("-inf"),
+            "min_val": 1.0,
+            "max_val": 10.0,
+            "p50": 5.0,
+            "p75": None,
+            "row_count": 100,
+        }
+        result = _sanitize_floats(row)
+
+        assert result["mean"] is None
+        assert result["stddev"] is None
+        assert result["null_rate"] is None
+        assert result["min_val"] == 1.0
+        assert result["max_val"] == 10.0
+        assert result["p50"] == 5.0
+        assert result["p75"] is None
+        assert result["row_count"] == 100  # non-float fields untouched
+        assert result["feature_name"] == "test"
+        json.dumps(result)
+
+    def test_sanitize_floats_preserves_valid_values(self):
+        from feast.monitoring.monitoring_service import _sanitize_floats
+
+        row = {
+            "mean": 5.5,
+            "stddev": 2.3,
+            "null_rate": 0.0,
+            "min_val": 0.0,
+            "max_val": 10.0,
+            "p50": 5.0,
+            "p75": 7.5,
+            "p90": 9.0,
+            "p95": 9.5,
+            "p99": 9.9,
+            "avg_null_rate": 0.05,
+            "max_null_rate": 0.1,
+        }
+        result = _sanitize_floats(row)
+
+        for key, val in row.items():
+            assert result[key] == val