diff --git a/src/PowerPlatform/Dataverse/data/_batch_base.py b/src/PowerPlatform/Dataverse/data/_batch_base.py index 46b247de..e6fa5faa 100644 --- a/src/PowerPlatform/Dataverse/data/_batch_base.py +++ b/src/PowerPlatform/Dataverse/data/_batch_base.py @@ -399,6 +399,14 @@ def _raise_top_level_batch_error(response: Any) -> None: error = payload.get("error", {}) service_error_code = error.get("code") or None message: str = error.get("message") or response.text or "Unexpected non-multipart response from $batch" + # Dataverse nests a more specific message (e.g. offending field/dtype) + # under error.innererror.message. Append it so users see actionable + # detail without inspecting the raw payload. + innererror = error.get("innererror") if isinstance(error, dict) else None + if isinstance(innererror, dict): + inner_msg = innererror.get("message") + if isinstance(inner_msg, str) and inner_msg.strip() and inner_msg.strip() != message: + message = f"{message}: {inner_msg.strip()}" except Exception: message = (getattr(response, "text", None) or "") or "Unexpected non-multipart response from $batch" raise HttpError( diff --git a/src/PowerPlatform/Dataverse/data/_odata.py b/src/PowerPlatform/Dataverse/data/_odata.py index bc92b973..03a9f367 100644 --- a/src/PowerPlatform/Dataverse/data/_odata.py +++ b/src/PowerPlatform/Dataverse/data/_odata.py @@ -144,6 +144,14 @@ def _request(self, method: str, url: str, *, expected: tuple[int, ...] = _DEFAUL imsg = inner.get("message") if isinstance(imsg, str) and imsg.strip(): msg = imsg.strip() + # Dataverse nests a more specific message (e.g. offending + # field/dtype) under error.innererror.message. Append it so + # users don't have to chase the wire payload to debug. + innererror = inner.get("innererror") + if isinstance(innererror, dict): + inner_msg = innererror.get("message") + if isinstance(inner_msg, str) and inner_msg.strip() and inner_msg.strip() != msg: + msg = f"{msg}: {inner_msg.strip()}" else: imsg2 = data.get("message") if isinstance(imsg2, str) and imsg2.strip(): diff --git a/src/PowerPlatform/Dataverse/data/_odata_base.py b/src/PowerPlatform/Dataverse/data/_odata_base.py index 3e27e285..182b307b 100644 --- a/src/PowerPlatform/Dataverse/data/_odata_base.py +++ b/src/PowerPlatform/Dataverse/data/_odata_base.py @@ -515,7 +515,10 @@ def _attribute_payload( "RequiredLevel": {"Value": "None"}, "MinValue": -100000000000.0, "MaxValue": 100000000000.0, - "Precision": 2, + # Default to the max DoubleAttributeMetadata.Precision (5) so + # round-trips of typical scientific/ML floats don't silently + # truncate; "float" implies precision, unlike money/decimal. + "Precision": 5, } if dtype_l in ("datetime", "date"): return { @@ -772,10 +775,13 @@ def _build_get_relationship(self, schema_name: str) -> _RawRequest: # ----------------------- SQL guardrail patterns -------------------- _SQL_WRITE_RE = re.compile( - r"^\s*(?:INSERT|UPDATE|DELETE|DROP|TRUNCATE|ALTER|CREATE|EXEC|GRANT|REVOKE|BULK)\b", + r"\b(?:INSERT|UPDATE|DELETE|DROP|TRUNCATE|ALTER|CREATE|EXEC|GRANT|REVOKE|BULK)\b", re.IGNORECASE, ) _SQL_COMMENT_RE = re.compile(r"/\*[^*]*\*+(?:[^/*][^*]*\*+)*/|--[^\n]*", re.DOTALL) + # Mask single-quoted string literals (with '' escape) before keyword scans + # so values like 'DELETE ME' or ';' inside a WHERE clause don't false-positive. + _SQL_STRING_LITERAL_RE = re.compile(r"'(?:[^']|'')*'") _SQL_LEADING_WILDCARD_RE = re.compile(r"\bLIKE\s+'%[^']", re.IGNORECASE) _SQL_IMPLICIT_CROSS_JOIN_RE = re.compile( r"\bFROM\s+[A-Za-z0-9_]+(?:\s+[A-Za-z0-9_]+)?\s*,\s*[A-Za-z0-9_]+", @@ -835,9 +841,24 @@ def _sql_guardrails(self, sql: str) -> str: """ # --- BLOCKED (save server round-trip) --- - # 1. Block writes (strip SQL comments first to catch comment-prefixed writes) + # Strip comments and mask string literals before keyword/semicolon scans + # so write keywords and ';' tucked inside string values don't trip the + # checks, while ones hidden by comments or statement stacking do. sql_no_comments = self._SQL_COMMENT_RE.sub(" ", sql).strip() - if self._SQL_WRITE_RE.search(sql_no_comments): + sql_scan = self._SQL_STRING_LITERAL_RE.sub("''", sql_no_comments) + + # 1a. Block statement stacking (semicolons separating statements). + if ";" in sql_scan: + raise ValidationError( + "Multiple SQL statements are not supported. The Dataverse SQL " + "endpoint accepts exactly one SELECT statement per call. " + "Remove the ';' and submit each query separately.", + subcode=VALIDATION_SQL_UNSUPPORTED_SYNTAX, + ) + + # 1b. Block writes anywhere in the query (not just at the start), + # to catch writes hidden after comments or earlier SELECTs. + if self._SQL_WRITE_RE.search(sql_scan): raise ValidationError( "SQL endpoint is read-only. Use client.records or " "client.dataframe for write operations " diff --git a/src/PowerPlatform/Dataverse/operations/records.py b/src/PowerPlatform/Dataverse/operations/records.py index 0a9f7b4c..0abae926 100644 --- a/src/PowerPlatform/Dataverse/operations/records.py +++ b/src/PowerPlatform/Dataverse/operations/records.py @@ -431,8 +431,11 @@ def get( """ if record_id is not None: warnings.warn( - "'records.get()' with a record_id is deprecated; " - "use 'client.records.retrieve(table, record_id)' instead.", + "'records.get(table, record_id)' is deprecated. " + "For a single record by ID, use " + "'client.records.retrieve(table, record_id)'. " + "For a filtered list, use " + "'client.records.list(table, filter=...)'.", DeprecationWarning, stacklevel=2, ) diff --git a/src/PowerPlatform/Dataverse/utils/_pandas.py b/src/PowerPlatform/Dataverse/utils/_pandas.py index 4ebd01a7..cb0676fe 100644 --- a/src/PowerPlatform/Dataverse/utils/_pandas.py +++ b/src/PowerPlatform/Dataverse/utils/_pandas.py @@ -39,7 +39,15 @@ def dataframe_to_records(df: pd.DataFrame, na_as_null: bool = False) -> List[Dic :param df: Input DataFrame. :param na_as_null: When False (default), missing values are omitted from each dict. When True, missing values are included as None (sends null to Dataverse, clearing the field). + :raises ValueError: If ``df`` has ``MultiIndex`` columns. Tuple column keys + do not round-trip through Dataverse's JSON encoder and produce a + confusing error far from the call site. Flatten to string column namesfirst. """ + if isinstance(df.columns, pd.MultiIndex): + raise ValueError( + "MultiIndex columns are not supported. Flatten to string column names first, " + "for example: df.columns = ['_'.join(map(str, col)) for col in df.columns.to_flat_index()]." + ) records = [] for row in df.to_dict(orient="records"): clean = {} diff --git a/tests/unit/data/test_batch_edge_cases.py b/tests/unit/data/test_batch_edge_cases.py index 01b4d629..02befe9b 100644 --- a/tests/unit/data/test_batch_edge_cases.py +++ b/tests/unit/data/test_batch_edge_cases.py @@ -417,6 +417,28 @@ def test_error_code_preserved_as_service_error_code(self): _raise_top_level_batch_error(mock_resp) self.assertEqual(ctx.exception.details.get("service_error_code"), "0x80040220") + def test_innererror_message_appended_to_top_level(self): + """error.innererror.message surfaces alongside the vague top-level + payload error, so the offending field reaches the user.""" + mock_resp = MagicMock() + mock_resp.status_code = 400 + mock_resp.json.return_value = { + "error": { + "code": "0x80060891", + "message": "Error identified in Payload provided by the user for Entity :contacts", + "innererror": { + "message": "Field 'birthdate' is a Date type and cannot accept a DateTime value with offset.", + "type": "System.Exception", + }, + } + } + mock_resp.text = json.dumps(mock_resp.json.return_value) + + with self.assertRaises(HttpError) as ctx: + _raise_top_level_batch_error(mock_resp) + self.assertIn("Error identified in Payload", str(ctx.exception)) + self.assertIn("birthdate", str(ctx.exception)) + # --------------------------------------------------------------------------- # 8. Batch response without continue-on-error (first failure stops) diff --git a/tests/unit/data/test_odata_internal.py b/tests/unit/data/test_odata_internal.py index ff738528..6f1cd106 100644 --- a/tests/unit/data/test_odata_internal.py +++ b/tests/unit/data/test_odata_internal.py @@ -636,6 +636,41 @@ def test_retry_after_int_stored_in_details(self): self.client._request("get", "http://example.com/test") self.assertEqual(ctx.exception.details.get("retry_after"), 30) + def test_innererror_message_appended_to_top_level(self): + """error.innererror.message is appended to the top-level message so the + offending field/dtype reaches the user instead of being hidden in the + wire payload.""" + response = self._make_raw_response( + 400, + json_data={ + "error": { + "code": "0x80060891", + "message": "Error identified in Payload provided by the user for Entity :contacts", + "innererror": { + "message": "Field 'birthdate' is a Date type and cannot accept a DateTime value with offset.", + "type": "System.Exception", + }, + } + }, + ) + self.client._raw_request = MagicMock(return_value=response) + with self.assertRaises(HttpError) as ctx: + self.client._request("post", "http://example.com/contacts") + self.assertIn("Error identified in Payload", str(ctx.exception)) + self.assertIn("birthdate", str(ctx.exception)) + self.assertIn("DateTime value with offset", str(ctx.exception)) + + def test_no_innererror_message_unchanged(self): + """Absence of innererror leaves the top-level message untouched.""" + response = self._make_raw_response( + 400, + json_data={"error": {"code": "0x0", "message": "Bad request"}}, + ) + self.client._raw_request = MagicMock(return_value=response) + with self.assertRaises(HttpError) as ctx: + self.client._request("get", "http://example.com/test") + self.assertEqual(ctx.exception.message, "Bad request") + class TestCreateMultiple(unittest.TestCase): """Unit tests for _ODataClient._create_multiple.""" @@ -1558,6 +1593,20 @@ def test_double_dtype_alias(self): result = self.od._attribute_payload("new_Score", "double") self.assertIn("Double", result["@odata.type"]) + def test_float_dtype_precision_default_is_5(self): + """'float' must default to the max Double precision (5) so values + like 2.718 round-trip without silent truncation. Regression guard + for the previous Precision=2 default.""" + result = self.od._attribute_payload("new_Score", "float") + self.assertEqual(result["Precision"], 5) + + def test_decimal_dtype_precision_unchanged_at_2(self): + """'decimal'/'money' should keep Precision=2 because that matches + currency semantics. Regression guard so a future cleanup doesn't + quietly broaden this along with float.""" + result = self.od._attribute_payload("new_Revenue", "decimal") + self.assertEqual(result["Precision"], 2) + def test_datetime_dtype(self): """'datetime' produces DateTimeAttributeMetadata.""" result = self.od._attribute_payload("new_CreatedDate", "datetime") diff --git a/tests/unit/data/test_sql_guardrails.py b/tests/unit/data/test_sql_guardrails.py index 6ddf3111..d3d2125b 100644 --- a/tests/unit/data/test_sql_guardrails.py +++ b/tests/unit/data/test_sql_guardrails.py @@ -92,6 +92,66 @@ def test_comment_prefixed_writes_blocked(self, sql): with pytest.raises(ValidationError, match="read-only"): c._sql_guardrails(sql) + @pytest.mark.parametrize( + "sql", + [ + # Statement-stacked writes after a SELECT and a semicolon + "SELECT name FROM account; DROP TABLE account", + # Write hidden after a line comment that strips a newline + "SELECT name FROM account -- \nINSERT INTO account(name) VALUES('x')", + # Write after a malformed/nested block-comment dance + "/* /* nested */ */ DELETE FROM account", + ], + ) + def test_writes_hidden_after_select_or_comments_blocked(self, sql): + c = _client() + with pytest.raises(ValidationError): + c._sql_guardrails(sql) + + @pytest.mark.parametrize( + "sql", + [ + "SELECT name FROM account; SELECT name FROM contact", + "SELECT name FROM account;", + ], + ) + def test_semicolon_stacking_blocked(self, sql): + c = _client() + with pytest.raises(ValidationError, match="Multiple SQL statements"): + c._sql_guardrails(sql) + + @pytest.mark.parametrize( + "sql", + [ + # Semicolon inside a string literal must not trip the check + "SELECT TOP 10 name FROM account WHERE name = ';'", + # Write keywords inside string literals must not trip the check + "SELECT TOP 10 name FROM account WHERE name = 'DROP ME'", + "SELECT TOP 10 name FROM account WHERE name = 'INSERT; DELETE'", + ], + ) + def test_keywords_and_semicolons_inside_string_literals_allowed(self, sql): + c = _client() + result = c._sql_guardrails(sql) + assert "SELECT" in result + + @pytest.mark.parametrize( + "sql", + [ + # ZWSP, ZWNJ, ZWJ, BOM before a write keyword must still be blocked. + # Python's \s does not match these, so an ^\s* anchor would miss + # them; the \b-based scan must catch them at the word boundary. + "​INSERT INTO account(name) VALUES('x')", + "‌DELETE FROM account", + "‍DROP TABLE account", + "UPDATE account SET name = 'x'", + ], + ) + def test_zero_width_prefixed_writes_blocked(self, sql): + c = _client() + with pytest.raises(ValidationError, match="read-only"): + c._sql_guardrails(sql) + # =================================================================== # 1b. Block server-rejected SQL patterns (save round-trip) diff --git a/tests/unit/test_pandas_helpers.py b/tests/unit/test_pandas_helpers.py index f6c0c2fc..ccbb5c7f 100644 --- a/tests/unit/test_pandas_helpers.py +++ b/tests/unit/test_pandas_helpers.py @@ -189,6 +189,29 @@ def test_empty_dataframe(self): result = dataframe_to_records(df) self.assertEqual(result, []) + def test_multiindex_columns_raises(self): + """MultiIndex columns produce tuple keys that don't survive JSON + serialization. Reject up-front with a clear message instead of + letting a TypeError surface deep in the HTTP layer. + """ + cols = pd.MultiIndex.from_tuples([("a", "firstname"), ("a", "lastname")]) + df = pd.DataFrame([["x", "y"]], columns=cols) + with self.assertRaises(ValueError) as ctx: + dataframe_to_records(df) + msg = str(ctx.exception) + self.assertIn("MultiIndex", msg) + # The remediation must produce string column names, not tuples. + self.assertIn("'_'.join(map(str, col)) for col in df.columns.to_flat_index()", msg) + + def test_multiindex_remediation_actually_works(self): + """The exact remediation in the error message must produce a + DataFrame that dataframe_to_records can serialize.""" + cols = pd.MultiIndex.from_tuples([("a", "firstname"), ("a", "lastname")]) + df = pd.DataFrame([["x", "y"]], columns=cols) + df.columns = ["_".join(map(str, col)) for col in df.columns.to_flat_index()] + records = dataframe_to_records(df) + self.assertEqual(records, [{"a_firstname": "x", "a_lastname": "y"}]) + def test_mixed_types(self): """DataFrame with mixed types (str, int, float, None, Timestamp) converts correctly.""" ts = pd.Timestamp("2024-06-01")