From a9f31305627a2717564e3977ba3ace074cb5de41 Mon Sep 17 00:00:00 2001 From: Samson Gebre Date: Wed, 27 May 2026 21:39:07 -0700 Subject: [PATCH 1/2] Enhance error messaging: append innererror details to top-level error messages for better debugging; update precision defaults for float attributes; add tests for innererror handling and MultiIndex DataFrames. --- .../Dataverse/data/_batch_base.py | 8 +++ src/PowerPlatform/Dataverse/data/_odata.py | 8 +++ .../Dataverse/data/_odata_base.py | 29 +++++++-- .../Dataverse/operations/records.py | 7 ++- src/PowerPlatform/Dataverse/utils/_pandas.py | 7 +++ tests/unit/data/test_batch_edge_cases.py | 22 +++++++ tests/unit/data/test_odata_internal.py | 49 +++++++++++++++ tests/unit/data/test_sql_guardrails.py | 60 +++++++++++++++++++ tests/unit/test_pandas_helpers.py | 12 ++++ 9 files changed, 196 insertions(+), 6 deletions(-) diff --git a/src/PowerPlatform/Dataverse/data/_batch_base.py b/src/PowerPlatform/Dataverse/data/_batch_base.py index 46b247de..e6fa5faa 100644 --- a/src/PowerPlatform/Dataverse/data/_batch_base.py +++ b/src/PowerPlatform/Dataverse/data/_batch_base.py @@ -399,6 +399,14 @@ def _raise_top_level_batch_error(response: Any) -> None: error = payload.get("error", {}) service_error_code = error.get("code") or None message: str = error.get("message") or response.text or "Unexpected non-multipart response from $batch" + # Dataverse nests a more specific message (e.g. offending field/dtype) + # under error.innererror.message. Append it so users see actionable + # detail without inspecting the raw payload. + innererror = error.get("innererror") if isinstance(error, dict) else None + if isinstance(innererror, dict): + inner_msg = innererror.get("message") + if isinstance(inner_msg, str) and inner_msg.strip() and inner_msg.strip() != message: + message = f"{message}: {inner_msg.strip()}" except Exception: message = (getattr(response, "text", None) or "") or "Unexpected non-multipart response from $batch" raise HttpError( diff --git a/src/PowerPlatform/Dataverse/data/_odata.py b/src/PowerPlatform/Dataverse/data/_odata.py index bc92b973..03a9f367 100644 --- a/src/PowerPlatform/Dataverse/data/_odata.py +++ b/src/PowerPlatform/Dataverse/data/_odata.py @@ -144,6 +144,14 @@ def _request(self, method: str, url: str, *, expected: tuple[int, ...] = _DEFAUL imsg = inner.get("message") if isinstance(imsg, str) and imsg.strip(): msg = imsg.strip() + # Dataverse nests a more specific message (e.g. offending + # field/dtype) under error.innererror.message. Append it so + # users don't have to chase the wire payload to debug. + innererror = inner.get("innererror") + if isinstance(innererror, dict): + inner_msg = innererror.get("message") + if isinstance(inner_msg, str) and inner_msg.strip() and inner_msg.strip() != msg: + msg = f"{msg}: {inner_msg.strip()}" else: imsg2 = data.get("message") if isinstance(imsg2, str) and imsg2.strip(): diff --git a/src/PowerPlatform/Dataverse/data/_odata_base.py b/src/PowerPlatform/Dataverse/data/_odata_base.py index 700c5a42..a7daaa2a 100644 --- a/src/PowerPlatform/Dataverse/data/_odata_base.py +++ b/src/PowerPlatform/Dataverse/data/_odata_base.py @@ -511,7 +511,10 @@ def _attribute_payload( "RequiredLevel": {"Value": "None"}, "MinValue": -100000000000.0, "MaxValue": 100000000000.0, - "Precision": 2, + # Default to the max DoubleAttributeMetadata.Precision (5) so + # round-trips of typical scientific/ML floats don't silently + # truncate; "float" implies precision, unlike money/decimal. + "Precision": 5, } if dtype_l in ("datetime", "date"): return { @@ -768,10 +771,13 @@ def _build_get_relationship(self, schema_name: str) -> _RawRequest: # ----------------------- SQL guardrail patterns -------------------- _SQL_WRITE_RE = re.compile( - r"^\s*(?:INSERT|UPDATE|DELETE|DROP|TRUNCATE|ALTER|CREATE|EXEC|GRANT|REVOKE|BULK)\b", + r"\b(?:INSERT|UPDATE|DELETE|DROP|TRUNCATE|ALTER|CREATE|EXEC|GRANT|REVOKE|BULK)\b", re.IGNORECASE, ) _SQL_COMMENT_RE = re.compile(r"/\*[^*]*\*+(?:[^/*][^*]*\*+)*/|--[^\n]*", re.DOTALL) + # Mask single-quoted string literals (with '' escape) before keyword scans + # so values like 'DELETE ME' or ';' inside a WHERE clause don't false-positive. + _SQL_STRING_LITERAL_RE = re.compile(r"'(?:[^']|'')*'") _SQL_LEADING_WILDCARD_RE = re.compile(r"\bLIKE\s+'%[^']", re.IGNORECASE) _SQL_IMPLICIT_CROSS_JOIN_RE = re.compile( r"\bFROM\s+[A-Za-z0-9_]+(?:\s+[A-Za-z0-9_]+)?\s*,\s*[A-Za-z0-9_]+", @@ -831,9 +837,24 @@ def _sql_guardrails(self, sql: str) -> str: """ # --- BLOCKED (save server round-trip) --- - # 1. Block writes (strip SQL comments first to catch comment-prefixed writes) + # Strip comments and mask string literals before keyword/semicolon scans + # so write keywords and ';' tucked inside string values don't trip the + # checks, while ones hidden by comments or statement stacking do. sql_no_comments = self._SQL_COMMENT_RE.sub(" ", sql).strip() - if self._SQL_WRITE_RE.search(sql_no_comments): + sql_scan = self._SQL_STRING_LITERAL_RE.sub("''", sql_no_comments) + + # 1a. Block statement stacking (semicolons separating statements). + if ";" in sql_scan: + raise ValidationError( + "Multiple SQL statements are not supported. The Dataverse SQL " + "endpoint accepts exactly one SELECT statement per call. " + "Remove the ';' and submit each query separately.", + subcode=VALIDATION_SQL_UNSUPPORTED_SYNTAX, + ) + + # 1b. Block writes anywhere in the query (not just at the start), + # to catch writes hidden after comments or earlier SELECTs. + if self._SQL_WRITE_RE.search(sql_scan): raise ValidationError( "SQL endpoint is read-only. Use client.records or " "client.dataframe for write operations " diff --git a/src/PowerPlatform/Dataverse/operations/records.py b/src/PowerPlatform/Dataverse/operations/records.py index c9c66119..9d69823c 100644 --- a/src/PowerPlatform/Dataverse/operations/records.py +++ b/src/PowerPlatform/Dataverse/operations/records.py @@ -431,8 +431,11 @@ def get( """ if record_id is not None: warnings.warn( - "'records.get()' with a record_id is deprecated; " - "use 'client.records.retrieve(table, record_id)' instead.", + "'records.get(table, record_id)' is deprecated. " + "For a single record by ID, use " + "'client.records.retrieve(table, record_id)'. " + "For a filtered list, use " + "'client.records.list(table, filter=...)'.", DeprecationWarning, stacklevel=2, ) diff --git a/src/PowerPlatform/Dataverse/utils/_pandas.py b/src/PowerPlatform/Dataverse/utils/_pandas.py index 4ebd01a7..c71df49b 100644 --- a/src/PowerPlatform/Dataverse/utils/_pandas.py +++ b/src/PowerPlatform/Dataverse/utils/_pandas.py @@ -39,7 +39,14 @@ def dataframe_to_records(df: pd.DataFrame, na_as_null: bool = False) -> List[Dic :param df: Input DataFrame. :param na_as_null: When False (default), missing values are omitted from each dict. When True, missing values are included as None (sends null to Dataverse, clearing the field). + :raises ValueError: If ``df`` has ``MultiIndex`` columns. Tuple column keys + do not round-trip through Dataverse's JSON encoder and produce a + confusing error far from the call site. Flatten first. """ + if isinstance(df.columns, pd.MultiIndex): + raise ValueError( + "MultiIndex columns are not supported. Flatten via " "df.columns = df.columns.to_flat_index() first." + ) records = [] for row in df.to_dict(orient="records"): clean = {} diff --git a/tests/unit/data/test_batch_edge_cases.py b/tests/unit/data/test_batch_edge_cases.py index 01b4d629..02befe9b 100644 --- a/tests/unit/data/test_batch_edge_cases.py +++ b/tests/unit/data/test_batch_edge_cases.py @@ -417,6 +417,28 @@ def test_error_code_preserved_as_service_error_code(self): _raise_top_level_batch_error(mock_resp) self.assertEqual(ctx.exception.details.get("service_error_code"), "0x80040220") + def test_innererror_message_appended_to_top_level(self): + """error.innererror.message surfaces alongside the vague top-level + payload error, so the offending field reaches the user.""" + mock_resp = MagicMock() + mock_resp.status_code = 400 + mock_resp.json.return_value = { + "error": { + "code": "0x80060891", + "message": "Error identified in Payload provided by the user for Entity :contacts", + "innererror": { + "message": "Field 'birthdate' is a Date type and cannot accept a DateTime value with offset.", + "type": "System.Exception", + }, + } + } + mock_resp.text = json.dumps(mock_resp.json.return_value) + + with self.assertRaises(HttpError) as ctx: + _raise_top_level_batch_error(mock_resp) + self.assertIn("Error identified in Payload", str(ctx.exception)) + self.assertIn("birthdate", str(ctx.exception)) + # --------------------------------------------------------------------------- # 8. Batch response without continue-on-error (first failure stops) diff --git a/tests/unit/data/test_odata_internal.py b/tests/unit/data/test_odata_internal.py index ff738528..6f1cd106 100644 --- a/tests/unit/data/test_odata_internal.py +++ b/tests/unit/data/test_odata_internal.py @@ -636,6 +636,41 @@ def test_retry_after_int_stored_in_details(self): self.client._request("get", "http://example.com/test") self.assertEqual(ctx.exception.details.get("retry_after"), 30) + def test_innererror_message_appended_to_top_level(self): + """error.innererror.message is appended to the top-level message so the + offending field/dtype reaches the user instead of being hidden in the + wire payload.""" + response = self._make_raw_response( + 400, + json_data={ + "error": { + "code": "0x80060891", + "message": "Error identified in Payload provided by the user for Entity :contacts", + "innererror": { + "message": "Field 'birthdate' is a Date type and cannot accept a DateTime value with offset.", + "type": "System.Exception", + }, + } + }, + ) + self.client._raw_request = MagicMock(return_value=response) + with self.assertRaises(HttpError) as ctx: + self.client._request("post", "http://example.com/contacts") + self.assertIn("Error identified in Payload", str(ctx.exception)) + self.assertIn("birthdate", str(ctx.exception)) + self.assertIn("DateTime value with offset", str(ctx.exception)) + + def test_no_innererror_message_unchanged(self): + """Absence of innererror leaves the top-level message untouched.""" + response = self._make_raw_response( + 400, + json_data={"error": {"code": "0x0", "message": "Bad request"}}, + ) + self.client._raw_request = MagicMock(return_value=response) + with self.assertRaises(HttpError) as ctx: + self.client._request("get", "http://example.com/test") + self.assertEqual(ctx.exception.message, "Bad request") + class TestCreateMultiple(unittest.TestCase): """Unit tests for _ODataClient._create_multiple.""" @@ -1558,6 +1593,20 @@ def test_double_dtype_alias(self): result = self.od._attribute_payload("new_Score", "double") self.assertIn("Double", result["@odata.type"]) + def test_float_dtype_precision_default_is_5(self): + """'float' must default to the max Double precision (5) so values + like 2.718 round-trip without silent truncation. Regression guard + for the previous Precision=2 default.""" + result = self.od._attribute_payload("new_Score", "float") + self.assertEqual(result["Precision"], 5) + + def test_decimal_dtype_precision_unchanged_at_2(self): + """'decimal'/'money' should keep Precision=2 because that matches + currency semantics. Regression guard so a future cleanup doesn't + quietly broaden this along with float.""" + result = self.od._attribute_payload("new_Revenue", "decimal") + self.assertEqual(result["Precision"], 2) + def test_datetime_dtype(self): """'datetime' produces DateTimeAttributeMetadata.""" result = self.od._attribute_payload("new_CreatedDate", "datetime") diff --git a/tests/unit/data/test_sql_guardrails.py b/tests/unit/data/test_sql_guardrails.py index 6ddf3111..d3d2125b 100644 --- a/tests/unit/data/test_sql_guardrails.py +++ b/tests/unit/data/test_sql_guardrails.py @@ -92,6 +92,66 @@ def test_comment_prefixed_writes_blocked(self, sql): with pytest.raises(ValidationError, match="read-only"): c._sql_guardrails(sql) + @pytest.mark.parametrize( + "sql", + [ + # Statement-stacked writes after a SELECT and a semicolon + "SELECT name FROM account; DROP TABLE account", + # Write hidden after a line comment that strips a newline + "SELECT name FROM account -- \nINSERT INTO account(name) VALUES('x')", + # Write after a malformed/nested block-comment dance + "/* /* nested */ */ DELETE FROM account", + ], + ) + def test_writes_hidden_after_select_or_comments_blocked(self, sql): + c = _client() + with pytest.raises(ValidationError): + c._sql_guardrails(sql) + + @pytest.mark.parametrize( + "sql", + [ + "SELECT name FROM account; SELECT name FROM contact", + "SELECT name FROM account;", + ], + ) + def test_semicolon_stacking_blocked(self, sql): + c = _client() + with pytest.raises(ValidationError, match="Multiple SQL statements"): + c._sql_guardrails(sql) + + @pytest.mark.parametrize( + "sql", + [ + # Semicolon inside a string literal must not trip the check + "SELECT TOP 10 name FROM account WHERE name = ';'", + # Write keywords inside string literals must not trip the check + "SELECT TOP 10 name FROM account WHERE name = 'DROP ME'", + "SELECT TOP 10 name FROM account WHERE name = 'INSERT; DELETE'", + ], + ) + def test_keywords_and_semicolons_inside_string_literals_allowed(self, sql): + c = _client() + result = c._sql_guardrails(sql) + assert "SELECT" in result + + @pytest.mark.parametrize( + "sql", + [ + # ZWSP, ZWNJ, ZWJ, BOM before a write keyword must still be blocked. + # Python's \s does not match these, so an ^\s* anchor would miss + # them; the \b-based scan must catch them at the word boundary. + "​INSERT INTO account(name) VALUES('x')", + "‌DELETE FROM account", + "‍DROP TABLE account", + "UPDATE account SET name = 'x'", + ], + ) + def test_zero_width_prefixed_writes_blocked(self, sql): + c = _client() + with pytest.raises(ValidationError, match="read-only"): + c._sql_guardrails(sql) + # =================================================================== # 1b. Block server-rejected SQL patterns (save round-trip) diff --git a/tests/unit/test_pandas_helpers.py b/tests/unit/test_pandas_helpers.py index f6c0c2fc..de234e9c 100644 --- a/tests/unit/test_pandas_helpers.py +++ b/tests/unit/test_pandas_helpers.py @@ -189,6 +189,18 @@ def test_empty_dataframe(self): result = dataframe_to_records(df) self.assertEqual(result, []) + def test_multiindex_columns_raises(self): + """MultiIndex columns produce tuple keys that don't survive JSON + serialization. Reject up-front with a clear message instead of + letting a TypeError surface deep in the HTTP layer. + """ + cols = pd.MultiIndex.from_tuples([("a", "firstname"), ("a", "lastname")]) + df = pd.DataFrame([["x", "y"]], columns=cols) + with self.assertRaises(ValueError) as ctx: + dataframe_to_records(df) + self.assertIn("MultiIndex", str(ctx.exception)) + self.assertIn("to_flat_index", str(ctx.exception)) + def test_mixed_types(self): """DataFrame with mixed types (str, int, float, None, Timestamp) converts correctly.""" ts = pd.Timestamp("2024-06-01") From a0974c3638efc6b95a5a80ba577945bd70c4a77a Mon Sep 17 00:00:00 2001 From: Samson Gebre Date: Wed, 27 May 2026 21:57:15 -0700 Subject: [PATCH 2/2] Fix error message --- src/PowerPlatform/Dataverse/utils/_pandas.py | 5 +++-- tests/unit/test_pandas_helpers.py | 15 +++++++++++++-- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/src/PowerPlatform/Dataverse/utils/_pandas.py b/src/PowerPlatform/Dataverse/utils/_pandas.py index c71df49b..cb0676fe 100644 --- a/src/PowerPlatform/Dataverse/utils/_pandas.py +++ b/src/PowerPlatform/Dataverse/utils/_pandas.py @@ -41,11 +41,12 @@ def dataframe_to_records(df: pd.DataFrame, na_as_null: bool = False) -> List[Dic When True, missing values are included as None (sends null to Dataverse, clearing the field). :raises ValueError: If ``df`` has ``MultiIndex`` columns. Tuple column keys do not round-trip through Dataverse's JSON encoder and produce a - confusing error far from the call site. Flatten first. + confusing error far from the call site. Flatten to string column namesfirst. """ if isinstance(df.columns, pd.MultiIndex): raise ValueError( - "MultiIndex columns are not supported. Flatten via " "df.columns = df.columns.to_flat_index() first." + "MultiIndex columns are not supported. Flatten to string column names first, " + "for example: df.columns = ['_'.join(map(str, col)) for col in df.columns.to_flat_index()]." ) records = [] for row in df.to_dict(orient="records"): diff --git a/tests/unit/test_pandas_helpers.py b/tests/unit/test_pandas_helpers.py index de234e9c..ccbb5c7f 100644 --- a/tests/unit/test_pandas_helpers.py +++ b/tests/unit/test_pandas_helpers.py @@ -198,8 +198,19 @@ def test_multiindex_columns_raises(self): df = pd.DataFrame([["x", "y"]], columns=cols) with self.assertRaises(ValueError) as ctx: dataframe_to_records(df) - self.assertIn("MultiIndex", str(ctx.exception)) - self.assertIn("to_flat_index", str(ctx.exception)) + msg = str(ctx.exception) + self.assertIn("MultiIndex", msg) + # The remediation must produce string column names, not tuples. + self.assertIn("'_'.join(map(str, col)) for col in df.columns.to_flat_index()", msg) + + def test_multiindex_remediation_actually_works(self): + """The exact remediation in the error message must produce a + DataFrame that dataframe_to_records can serialize.""" + cols = pd.MultiIndex.from_tuples([("a", "firstname"), ("a", "lastname")]) + df = pd.DataFrame([["x", "y"]], columns=cols) + df.columns = ["_".join(map(str, col)) for col in df.columns.to_flat_index()] + records = dataframe_to_records(df) + self.assertEqual(records, [{"a_firstname": "x", "a_lastname": "y"}]) def test_mixed_types(self): """DataFrame with mixed types (str, int, float, None, Timestamp) converts correctly."""