|
| 1 | +import json |
| 2 | +import math |
| 3 | + |
1 | 4 | import pyarrow as pa |
2 | 5 | import pytest |
3 | 6 |
|
4 | 7 | from feast.monitoring.metrics_calculator import MetricsCalculator |
| 8 | +from feast.monitoring.monitoring_utils import opt_float |
5 | 9 | from feast.types import PrimitiveFeastType |
6 | 10 |
|
7 | 11 |
|
@@ -80,6 +84,7 @@ def test_single_value(self): |
80 | 84 | assert result["mean"] == 42.0 |
81 | 85 | assert result["min_val"] == 42.0 |
82 | 86 | assert result["max_val"] == 42.0 |
| 87 | + assert result["stddev"] is None # STDDEV_SAMP of 1 value is NaN → None |
83 | 88 |
|
84 | 89 | def test_histogram_bin_count(self): |
85 | 90 | calc = _make_calc(bins=5) |
@@ -167,3 +172,118 @@ def test_missing_column_skipped(self): |
167 | 172 |
|
168 | 173 | assert len(results) == 1 |
169 | 174 | assert results[0]["feature_name"] == "age" |
| 175 | + |
| 176 | + |
| 177 | +class TestNaNSanitization: |
| 178 | + """Verify that NaN/Inf values never leak into metric results.""" |
| 179 | + |
| 180 | + def test_opt_float_none(self): |
| 181 | + assert opt_float(None) is None |
| 182 | + |
| 183 | + def test_opt_float_normal(self): |
| 184 | + assert opt_float(3.14) == pytest.approx(3.14) |
| 185 | + |
| 186 | + def test_opt_float_nan(self): |
| 187 | + assert opt_float(float("nan")) is None |
| 188 | + |
| 189 | + def test_opt_float_inf(self): |
| 190 | + assert opt_float(float("inf")) is None |
| 191 | + |
| 192 | + def test_opt_float_neg_inf(self): |
| 193 | + assert opt_float(float("-inf")) is None |
| 194 | + |
| 195 | + def test_opt_float_zero(self): |
| 196 | + assert opt_float(0) == 0.0 |
| 197 | + |
| 198 | + def test_opt_float_integer(self): |
| 199 | + assert opt_float(42) == 42.0 |
| 200 | + |
| 201 | + def test_single_value_stddev_is_none_not_nan(self): |
| 202 | + """pc.stddev(ddof=1) on a single value returns NaN; we must convert to None.""" |
| 203 | + calc = _make_calc() |
| 204 | + arr = pa.array([7.0]) |
| 205 | + result = calc.compute_numeric(arr) |
| 206 | + |
| 207 | + assert result["stddev"] is None |
| 208 | + assert result["mean"] == pytest.approx(7.0) |
| 209 | + |
| 210 | + def test_two_values_stddev_is_valid(self): |
| 211 | + calc = _make_calc() |
| 212 | + arr = pa.array([4.0, 6.0]) |
| 213 | + result = calc.compute_numeric(arr) |
| 214 | + |
| 215 | + assert result["stddev"] is not None |
| 216 | + assert result["stddev"] == pytest.approx(math.sqrt(2.0)) |
| 217 | + |
| 218 | + def test_all_numeric_results_json_serializable(self): |
| 219 | + """Every field in a numeric result must be JSON-serializable (no NaN/Inf).""" |
| 220 | + calc = _make_calc(bins=5) |
| 221 | + for test_data in [ |
| 222 | + [42.0], # single value |
| 223 | + [1.0, 2.0], # two values |
| 224 | + [1.0, None, 3.0], # with nulls |
| 225 | + list(range(100)), # many values |
| 226 | + ]: |
| 227 | + arr = pa.array(test_data, type=pa.float64()) |
| 228 | + result = calc.compute_numeric(arr) |
| 229 | + json.dumps(result) # raises ValueError if NaN/Inf present |
| 230 | + |
| 231 | + def test_all_categorical_results_json_serializable(self): |
| 232 | + calc = _make_calc() |
| 233 | + for test_data in [ |
| 234 | + ["a", "b", "a"], |
| 235 | + ["x", None, "y"], |
| 236 | + [None, None], |
| 237 | + ]: |
| 238 | + arr = pa.array(test_data, type=pa.string()) |
| 239 | + result = calc.compute_categorical(arr) |
| 240 | + json.dumps(result) |
| 241 | + |
| 242 | + def test_sanitize_floats_cleans_nan(self): |
| 243 | + from feast.monitoring.monitoring_service import _sanitize_floats |
| 244 | + |
| 245 | + row = { |
| 246 | + "feature_name": "test", |
| 247 | + "mean": float("nan"), |
| 248 | + "stddev": float("inf"), |
| 249 | + "null_rate": float("-inf"), |
| 250 | + "min_val": 1.0, |
| 251 | + "max_val": 10.0, |
| 252 | + "p50": 5.0, |
| 253 | + "p75": None, |
| 254 | + "row_count": 100, |
| 255 | + } |
| 256 | + result = _sanitize_floats(row) |
| 257 | + |
| 258 | + assert result["mean"] is None |
| 259 | + assert result["stddev"] is None |
| 260 | + assert result["null_rate"] is None |
| 261 | + assert result["min_val"] == 1.0 |
| 262 | + assert result["max_val"] == 10.0 |
| 263 | + assert result["p50"] == 5.0 |
| 264 | + assert result["p75"] is None |
| 265 | + assert result["row_count"] == 100 # non-float fields untouched |
| 266 | + assert result["feature_name"] == "test" |
| 267 | + json.dumps(result) |
| 268 | + |
| 269 | + def test_sanitize_floats_preserves_valid_values(self): |
| 270 | + from feast.monitoring.monitoring_service import _sanitize_floats |
| 271 | + |
| 272 | + row = { |
| 273 | + "mean": 5.5, |
| 274 | + "stddev": 2.3, |
| 275 | + "null_rate": 0.0, |
| 276 | + "min_val": 0.0, |
| 277 | + "max_val": 10.0, |
| 278 | + "p50": 5.0, |
| 279 | + "p75": 7.5, |
| 280 | + "p90": 9.0, |
| 281 | + "p95": 9.5, |
| 282 | + "p99": 9.9, |
| 283 | + "avg_null_rate": 0.05, |
| 284 | + "max_null_rate": 0.1, |
| 285 | + } |
| 286 | + result = _sanitize_floats(row) |
| 287 | + |
| 288 | + for key, val in row.items(): |
| 289 | + assert result[key] == val |
0 commit comments