Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions src/PowerPlatform/Dataverse/data/_batch_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -399,6 +399,14 @@ def _raise_top_level_batch_error(response: Any) -> None:
error = payload.get("error", {})
service_error_code = error.get("code") or None
message: str = error.get("message") or response.text or "Unexpected non-multipart response from $batch"
# Dataverse nests a more specific message (e.g. offending field/dtype)
# under error.innererror.message. Append it so users see actionable
# detail without inspecting the raw payload.
innererror = error.get("innererror") if isinstance(error, dict) else None
if isinstance(innererror, dict):
inner_msg = innererror.get("message")
if isinstance(inner_msg, str) and inner_msg.strip() and inner_msg.strip() != message:
message = f"{message}: {inner_msg.strip()}"
except Exception:
message = (getattr(response, "text", None) or "") or "Unexpected non-multipart response from $batch"
raise HttpError(
Expand Down
8 changes: 8 additions & 0 deletions src/PowerPlatform/Dataverse/data/_odata.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,14 @@ def _request(self, method: str, url: str, *, expected: tuple[int, ...] = _DEFAUL
imsg = inner.get("message")
if isinstance(imsg, str) and imsg.strip():
msg = imsg.strip()
# Dataverse nests a more specific message (e.g. offending
# field/dtype) under error.innererror.message. Append it so
# users don't have to chase the wire payload to debug.
innererror = inner.get("innererror")
if isinstance(innererror, dict):
inner_msg = innererror.get("message")
if isinstance(inner_msg, str) and inner_msg.strip() and inner_msg.strip() != msg:
msg = f"{msg}: {inner_msg.strip()}"
else:
imsg2 = data.get("message")
if isinstance(imsg2, str) and imsg2.strip():
Expand Down
29 changes: 25 additions & 4 deletions src/PowerPlatform/Dataverse/data/_odata_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -515,7 +515,10 @@ def _attribute_payload(
"RequiredLevel": {"Value": "None"},
"MinValue": -100000000000.0,
"MaxValue": 100000000000.0,
"Precision": 2,
# Default to the max DoubleAttributeMetadata.Precision (5) so
# round-trips of typical scientific/ML floats don't silently
# truncate; "float" implies precision, unlike money/decimal.
"Precision": 5,
}
if dtype_l in ("datetime", "date"):
return {
Expand Down Expand Up @@ -772,10 +775,13 @@ def _build_get_relationship(self, schema_name: str) -> _RawRequest:

# ----------------------- SQL guardrail patterns --------------------
_SQL_WRITE_RE = re.compile(
r"^\s*(?:INSERT|UPDATE|DELETE|DROP|TRUNCATE|ALTER|CREATE|EXEC|GRANT|REVOKE|BULK)\b",
r"\b(?:INSERT|UPDATE|DELETE|DROP|TRUNCATE|ALTER|CREATE|EXEC|GRANT|REVOKE|BULK)\b",
re.IGNORECASE,
)
_SQL_COMMENT_RE = re.compile(r"/\*[^*]*\*+(?:[^/*][^*]*\*+)*/|--[^\n]*", re.DOTALL)
# Mask single-quoted string literals (with '' escape) before keyword scans
# so values like 'DELETE ME' or ';' inside a WHERE clause don't false-positive.
_SQL_STRING_LITERAL_RE = re.compile(r"'(?:[^']|'')*'")
_SQL_LEADING_WILDCARD_RE = re.compile(r"\bLIKE\s+'%[^']", re.IGNORECASE)
_SQL_IMPLICIT_CROSS_JOIN_RE = re.compile(
r"\bFROM\s+[A-Za-z0-9_]+(?:\s+[A-Za-z0-9_]+)?\s*,\s*[A-Za-z0-9_]+",
Expand Down Expand Up @@ -835,9 +841,24 @@ def _sql_guardrails(self, sql: str) -> str:
"""
# --- BLOCKED (save server round-trip) ---

# 1. Block writes (strip SQL comments first to catch comment-prefixed writes)
# Strip comments and mask string literals before keyword/semicolon scans
# so write keywords and ';' tucked inside string values don't trip the
# checks, while ones hidden by comments or statement stacking do.
sql_no_comments = self._SQL_COMMENT_RE.sub(" ", sql).strip()
if self._SQL_WRITE_RE.search(sql_no_comments):
sql_scan = self._SQL_STRING_LITERAL_RE.sub("''", sql_no_comments)

# 1a. Block statement stacking (semicolons separating statements).
if ";" in sql_scan:
raise ValidationError(
"Multiple SQL statements are not supported. The Dataverse SQL "
"endpoint accepts exactly one SELECT statement per call. "
"Remove the ';' and submit each query separately.",
subcode=VALIDATION_SQL_UNSUPPORTED_SYNTAX,
)

# 1b. Block writes anywhere in the query (not just at the start),
# to catch writes hidden after comments or earlier SELECTs.
if self._SQL_WRITE_RE.search(sql_scan):
raise ValidationError(
"SQL endpoint is read-only. Use client.records or "
"client.dataframe for write operations "
Expand Down
7 changes: 5 additions & 2 deletions src/PowerPlatform/Dataverse/operations/records.py
Original file line number Diff line number Diff line change
Expand Up @@ -431,8 +431,11 @@ def get(
"""
if record_id is not None:
warnings.warn(
"'records.get()' with a record_id is deprecated; "
"use 'client.records.retrieve(table, record_id)' instead.",
"'records.get(table, record_id)' is deprecated. "
"For a single record by ID, use "
"'client.records.retrieve(table, record_id)'. "
"For a filtered list, use "
"'client.records.list(table, filter=...)'.",
DeprecationWarning,
stacklevel=2,
)
Expand Down
8 changes: 8 additions & 0 deletions src/PowerPlatform/Dataverse/utils/_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,15 @@ def dataframe_to_records(df: pd.DataFrame, na_as_null: bool = False) -> List[Dic
:param df: Input DataFrame.
:param na_as_null: When False (default), missing values are omitted from each dict.
When True, missing values are included as None (sends null to Dataverse, clearing the field).
:raises ValueError: If ``df`` has ``MultiIndex`` columns. Tuple column keys
do not round-trip through Dataverse's JSON encoder and produce a
confusing error far from the call site. Flatten to string column namesfirst.
"""
if isinstance(df.columns, pd.MultiIndex):
raise ValueError(
"MultiIndex columns are not supported. Flatten to string column names first, "
"for example: df.columns = ['_'.join(map(str, col)) for col in df.columns.to_flat_index()]."
)
records = []
for row in df.to_dict(orient="records"):
clean = {}
Expand Down
22 changes: 22 additions & 0 deletions tests/unit/data/test_batch_edge_cases.py
Original file line number Diff line number Diff line change
Expand Up @@ -417,6 +417,28 @@ def test_error_code_preserved_as_service_error_code(self):
_raise_top_level_batch_error(mock_resp)
self.assertEqual(ctx.exception.details.get("service_error_code"), "0x80040220")

def test_innererror_message_appended_to_top_level(self):
"""error.innererror.message surfaces alongside the vague top-level
payload error, so the offending field reaches the user."""
mock_resp = MagicMock()
mock_resp.status_code = 400
mock_resp.json.return_value = {
"error": {
"code": "0x80060891",
"message": "Error identified in Payload provided by the user for Entity :contacts",
"innererror": {
"message": "Field 'birthdate' is a Date type and cannot accept a DateTime value with offset.",
"type": "System.Exception",
},
}
}
mock_resp.text = json.dumps(mock_resp.json.return_value)

with self.assertRaises(HttpError) as ctx:
_raise_top_level_batch_error(mock_resp)
self.assertIn("Error identified in Payload", str(ctx.exception))
self.assertIn("birthdate", str(ctx.exception))


# ---------------------------------------------------------------------------
# 8. Batch response without continue-on-error (first failure stops)
Expand Down
49 changes: 49 additions & 0 deletions tests/unit/data/test_odata_internal.py
Original file line number Diff line number Diff line change
Expand Up @@ -636,6 +636,41 @@ def test_retry_after_int_stored_in_details(self):
self.client._request("get", "http://example.com/test")
self.assertEqual(ctx.exception.details.get("retry_after"), 30)

def test_innererror_message_appended_to_top_level(self):
"""error.innererror.message is appended to the top-level message so the
offending field/dtype reaches the user instead of being hidden in the
wire payload."""
response = self._make_raw_response(
400,
json_data={
"error": {
"code": "0x80060891",
"message": "Error identified in Payload provided by the user for Entity :contacts",
"innererror": {
"message": "Field 'birthdate' is a Date type and cannot accept a DateTime value with offset.",
"type": "System.Exception",
},
}
},
)
self.client._raw_request = MagicMock(return_value=response)
with self.assertRaises(HttpError) as ctx:
self.client._request("post", "http://example.com/contacts")
self.assertIn("Error identified in Payload", str(ctx.exception))
self.assertIn("birthdate", str(ctx.exception))
self.assertIn("DateTime value with offset", str(ctx.exception))

def test_no_innererror_message_unchanged(self):
"""Absence of innererror leaves the top-level message untouched."""
response = self._make_raw_response(
400,
json_data={"error": {"code": "0x0", "message": "Bad request"}},
)
self.client._raw_request = MagicMock(return_value=response)
with self.assertRaises(HttpError) as ctx:
self.client._request("get", "http://example.com/test")
self.assertEqual(ctx.exception.message, "Bad request")


class TestCreateMultiple(unittest.TestCase):
"""Unit tests for _ODataClient._create_multiple."""
Expand Down Expand Up @@ -1558,6 +1593,20 @@ def test_double_dtype_alias(self):
result = self.od._attribute_payload("new_Score", "double")
self.assertIn("Double", result["@odata.type"])

def test_float_dtype_precision_default_is_5(self):
"""'float' must default to the max Double precision (5) so values
like 2.718 round-trip without silent truncation. Regression guard
for the previous Precision=2 default."""
result = self.od._attribute_payload("new_Score", "float")
self.assertEqual(result["Precision"], 5)

def test_decimal_dtype_precision_unchanged_at_2(self):
"""'decimal'/'money' should keep Precision=2 because that matches
currency semantics. Regression guard so a future cleanup doesn't
quietly broaden this along with float."""
result = self.od._attribute_payload("new_Revenue", "decimal")
self.assertEqual(result["Precision"], 2)

def test_datetime_dtype(self):
"""'datetime' produces DateTimeAttributeMetadata."""
result = self.od._attribute_payload("new_CreatedDate", "datetime")
Expand Down
60 changes: 60 additions & 0 deletions tests/unit/data/test_sql_guardrails.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,66 @@ def test_comment_prefixed_writes_blocked(self, sql):
with pytest.raises(ValidationError, match="read-only"):
c._sql_guardrails(sql)

@pytest.mark.parametrize(
"sql",
[
# Statement-stacked writes after a SELECT and a semicolon
"SELECT name FROM account; DROP TABLE account",
# Write hidden after a line comment that strips a newline
"SELECT name FROM account -- \nINSERT INTO account(name) VALUES('x')",
# Write after a malformed/nested block-comment dance
"/* /* nested */ */ DELETE FROM account",
],
)
def test_writes_hidden_after_select_or_comments_blocked(self, sql):
c = _client()
with pytest.raises(ValidationError):
c._sql_guardrails(sql)

@pytest.mark.parametrize(
"sql",
[
"SELECT name FROM account; SELECT name FROM contact",
"SELECT name FROM account;",
],
)
def test_semicolon_stacking_blocked(self, sql):
c = _client()
with pytest.raises(ValidationError, match="Multiple SQL statements"):
c._sql_guardrails(sql)

@pytest.mark.parametrize(
"sql",
[
# Semicolon inside a string literal must not trip the check
"SELECT TOP 10 name FROM account WHERE name = ';'",
# Write keywords inside string literals must not trip the check
"SELECT TOP 10 name FROM account WHERE name = 'DROP ME'",
"SELECT TOP 10 name FROM account WHERE name = 'INSERT; DELETE'",
],
)
def test_keywords_and_semicolons_inside_string_literals_allowed(self, sql):
c = _client()
result = c._sql_guardrails(sql)
assert "SELECT" in result

@pytest.mark.parametrize(
"sql",
[
# ZWSP, ZWNJ, ZWJ, BOM before a write keyword must still be blocked.
# Python's \s does not match these, so an ^\s* anchor would miss
# them; the \b-based scan must catch them at the word boundary.
"​INSERT INTO account(name) VALUES('x')",
"‌DELETE FROM account",
"‍DROP TABLE account",
"UPDATE account SET name = 'x'",
],
)
def test_zero_width_prefixed_writes_blocked(self, sql):
c = _client()
with pytest.raises(ValidationError, match="read-only"):
c._sql_guardrails(sql)


# ===================================================================
# 1b. Block server-rejected SQL patterns (save round-trip)
Expand Down
23 changes: 23 additions & 0 deletions tests/unit/test_pandas_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,29 @@ def test_empty_dataframe(self):
result = dataframe_to_records(df)
self.assertEqual(result, [])

def test_multiindex_columns_raises(self):
"""MultiIndex columns produce tuple keys that don't survive JSON
serialization. Reject up-front with a clear message instead of
letting a TypeError surface deep in the HTTP layer.
"""
cols = pd.MultiIndex.from_tuples([("a", "firstname"), ("a", "lastname")])
df = pd.DataFrame([["x", "y"]], columns=cols)
with self.assertRaises(ValueError) as ctx:
dataframe_to_records(df)
msg = str(ctx.exception)
self.assertIn("MultiIndex", msg)
# The remediation must produce string column names, not tuples.
self.assertIn("'_'.join(map(str, col)) for col in df.columns.to_flat_index()", msg)

def test_multiindex_remediation_actually_works(self):
"""The exact remediation in the error message must produce a
DataFrame that dataframe_to_records can serialize."""
cols = pd.MultiIndex.from_tuples([("a", "firstname"), ("a", "lastname")])
df = pd.DataFrame([["x", "y"]], columns=cols)
df.columns = ["_".join(map(str, col)) for col in df.columns.to_flat_index()]
records = dataframe_to_records(df)
self.assertEqual(records, [{"a_firstname": "x", "a_lastname": "y"}])

def test_mixed_types(self):
"""DataFrame with mixed types (str, int, float, None, Timestamp) converts correctly."""
ts = pd.Timestamp("2024-06-01")
Expand Down
Loading