Skip to content

Commit 817aac3

Browse files
authored
fix(python): add support for bytes parameters in BLOB fields (#5864)
* fix(python): add support for bytes parameters in BLOB fields The Python API was failing with AttributeError when passing bytes objects directly as parameters for BLOB fields. This was because the type system didn't recognize Python bytes objects as valid BLOB inputs. The implementation uses PyBytes_AsString() and PyBytes_Size() from the Python C API to safely extract binary data, preserving null bytes and ensuring correct round-trip conversion. Fixes #5859 * Clarify BLOB max size comment in test data Replaced ambiguous `"max"` comment with a clearer note indicating the documented maximum BLOB size (4KB) and added a direct link to the official docs for reference.
1 parent 15c3dd9 commit 817aac3

5 files changed

Lines changed: 186 additions & 0 deletions

File tree

src_cpp/include/py_conversion.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ enum class PythonObjectType : uint8_t {
1818
Datetime,
1919
Date,
2020
String,
21+
Bytes,
2122
List,
2223
UUID,
2324
Dict,

src_cpp/py_connection.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -383,6 +383,8 @@ static LogicalType pyLogicalType(const py::handle& val) {
383383
return LogicalType::DECIMAL(precision, -exponent);
384384
} else if (py::isinstance<py::str>(val)) {
385385
return LogicalType::STRING();
386+
} else if (py::isinstance<py::bytes>(val)) {
387+
return LogicalType::BLOB();
386388
} else if (py::isinstance(val, datetime_datetime)) {
387389
return LogicalType::TIMESTAMP();
388390
} else if (py::isinstance(val, datetime_date)) {
@@ -571,6 +573,13 @@ Value PyConnection::transformPythonValueAs(const py::handle& val, const LogicalT
571573
} else {
572574
return Value::createValue<std::string>(py::str(val));
573575
}
576+
case LogicalTypeID::BLOB: {
577+
auto bytes = py::cast<py::bytes>(val);
578+
const char* data = PyBytes_AsString(bytes.ptr());
579+
Py_ssize_t size = PyBytes_Size(bytes.ptr());
580+
std::string blobStr(data, size);
581+
return Value(LogicalType::BLOB(), blobStr);
582+
}
574583
case LogicalTypeID::TIMESTAMP: {
575584
// LCOV_EXCL_START
576585
if (!py::isinstance(val, datetime_datetime)) {

src_cpp/py_conversion.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@ PythonObjectType getPythonObjectType(py::handle& ele) {
3232
return PythonObjectType::Date;
3333
} else if (py::isinstance<py::str>(ele)) {
3434
return PythonObjectType::String;
35+
} else if (py::isinstance<py::bytes>(ele)) {
36+
return PythonObjectType::Bytes;
3537
} else if (py::isinstance<py::list>(ele)) {
3638
return PythonObjectType::List;
3739
} else if (py::isinstance(ele, uuid)) {
@@ -173,6 +175,14 @@ void transformPythonValue(common::ValueVector* outputVector, uint64_t pos, py::h
173175
outputVector->setNull(pos, false /* isNull */);
174176
common::StringVector::addString(outputVector, pos, ele.cast<std::string>());
175177
} break;
178+
case PythonObjectType::Bytes: {
179+
outputVector->setNull(pos, false /* isNull */);
180+
auto bytes = py::cast<py::bytes>(ele);
181+
const char* data = PyBytes_AsString(bytes.ptr());
182+
Py_ssize_t size = PyBytes_Size(bytes.ptr());
183+
std::string blobStr(data, size);
184+
outputVector->setValue(pos, blobStr);
185+
} break;
176186
case PythonObjectType::List: {
177187
transformListValue(outputVector, pos, ele);
178188
} break;

src_cpp/py_udf.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,8 @@ static LogicalType getLogicalTypeNonNested(const py::handle& ele) {
101101
return LogicalType::DOUBLE();
102102
} else if (ele.is(py::type::of(py::str()))) {
103103
return LogicalType::STRING();
104+
} else if (ele.is(py::type::of(py::bytes()))) {
105+
return LogicalType::BLOB();
104106
} else if (ele.is(py::type::of(datetime_val))) {
105107
return LogicalType::TIMESTAMP();
106108
} else if (ele.is(py::type::of(date_val))) {

test/test_blob_parameter.py

Lines changed: 164 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,164 @@
1+
from __future__ import annotations
2+
3+
import re
4+
5+
import pytest
6+
from type_aliases import ConnDB
7+
8+
9+
def test_bytes_param(conn_db_empty: ConnDB) -> None:
10+
conn, _ = conn_db_empty
11+
conn.execute("CREATE NODE TABLE tab(id SERIAL PRIMARY KEY, data BLOB)")
12+
13+
data_0 = b"\x00\x01\x02\x03\xff"
14+
data_1 = b"testing"
15+
data_2 = b"A" * 4096 # max size: 4KB, see https://docs.kuzudb.com/cypher/data-types/#blob
16+
data_3 = b"" # empty
17+
data_4 = None # null
18+
data_5 = "Hello 🌍" # str
19+
20+
cases = [
21+
data_0,
22+
data_1,
23+
data_2,
24+
data_3,
25+
data_4,
26+
data_5.encode("utf-8"), # to bytes
27+
]
28+
for data in cases:
29+
conn.execute("CREATE (t:tab {data: $data})", {"data": data})
30+
31+
result = conn.execute("MATCH (t:tab) RETURN t.data ORDER BY t.id")
32+
assert result.get_next() == [data_0]
33+
assert result.get_next() == [data_1]
34+
assert result.get_next() == [data_2]
35+
assert result.get_next() == [data_3]
36+
assert result.get_next() == [data_4]
37+
assert result.get_next()[0].decode("utf-8") == data_5
38+
result.close()
39+
40+
41+
def test_bytes_param_backwards_compatibility(conn_db_empty: ConnDB) -> None:
42+
conn, _ = conn_db_empty
43+
conn.execute("CREATE NODE TABLE tab(id SERIAL PRIMARY KEY, data BLOB)")
44+
45+
binary = b"backwards_compatibility"
46+
string = "".join(f"\\x{b:02x}" for b in binary) # to \xHH format
47+
48+
assert isinstance(string, str)
49+
assert re.match(r"^(\\x[0-9a-f]{2})+$", string)
50+
51+
conn.execute("CREATE (t:tab {data: BLOB($string)})", {"string": string})
52+
53+
result = conn.execute("MATCH (t:tab) RETURN t.data")
54+
assert result.get_next() == [binary]
55+
result.close()
56+
57+
58+
def test_bytes_param_where_clause(conn_db_empty: ConnDB) -> None:
59+
conn, _ = conn_db_empty
60+
conn.execute("CREATE NODE TABLE tab(id INT64 PRIMARY KEY, data BLOB)")
61+
62+
data = b"where_clause"
63+
64+
conn.execute("CREATE (t:tab {id: 0, data: $data})", {"data": b"some data"})
65+
conn.execute("CREATE (t:tab {id: 1, data: $data})", {"data": data})
66+
conn.execute("CREATE (t:tab {id: 2, data: $data})", {"data": b"some other data"})
67+
68+
result = conn.execute("MATCH (t:tab) WHERE t.data = $search RETURN t.id", {"search": data})
69+
assert result.get_next() == [1]
70+
assert not result.has_next()
71+
result.close()
72+
73+
74+
def test_bytes_param_update(conn_db_empty: ConnDB) -> None:
75+
conn, _ = conn_db_empty
76+
conn.execute("CREATE NODE TABLE tab(id SERIAL PRIMARY KEY, data BLOB)")
77+
78+
initial = b"initial"
79+
updated = b"updated"
80+
81+
conn.execute("CREATE (t:tab {data: $data})", {"data": initial})
82+
conn.execute("MATCH (t:tab) SET t.data = $new_data", {"new_data": updated})
83+
84+
result = conn.execute("MATCH (t:tab) RETURN t.data")
85+
assert result.get_next() == [updated]
86+
result.close()
87+
88+
89+
def test_bytes_param_mixed_types(conn_db_empty: ConnDB) -> None:
90+
conn, _ = conn_db_empty
91+
conn.execute("CREATE NODE TABLE tab(id SERIAL PRIMARY KEY, data BLOB, name STRING, value DOUBLE)")
92+
93+
data = b"data"
94+
name = "name"
95+
value = 3.14
96+
97+
params = {"data": data, "name": name, "value": value}
98+
conn.execute("CREATE (t:tab {data: $data, name: $name, value: $value})", params)
99+
100+
result = conn.execute("MATCH (t:tab) RETURN t.data, t.name, t.value")
101+
assert result.get_next() == [data, name, value]
102+
result.close()
103+
104+
105+
def test_bytes_param_relationship(conn_db_empty: ConnDB) -> None:
106+
conn, _ = conn_db_empty
107+
conn.execute("CREATE NODE TABLE person(name STRING PRIMARY KEY)")
108+
conn.execute("CREATE REL TABLE rel(FROM person TO person, data BLOB)")
109+
110+
conn.execute("CREATE (p:person {name: 'Alice'})")
111+
conn.execute("CREATE (p:person {name: 'Bob'})")
112+
113+
data = b"relationship"
114+
115+
conn.execute(
116+
"""
117+
MATCH (p1:person {name: 'Alice'}), (p2:person {name: 'Bob'})
118+
CREATE (p1)-[r:rel {data: $data}]->(p2)
119+
""",
120+
{"data": data},
121+
)
122+
123+
result = conn.execute("MATCH ()-[r:rel]->() RETURN r.data")
124+
assert result.get_next() == [data]
125+
result.close()
126+
127+
128+
def test_bytes_param_invalid_types(conn_db_empty: ConnDB) -> None:
129+
conn, _ = conn_db_empty
130+
conn.execute("CREATE NODE TABLE tab(id SERIAL PRIMARY KEY, data BLOB)")
131+
132+
cases = [
133+
("test", "STRING"),
134+
("", "STRING"),
135+
(1, "INT8"),
136+
(256, "INT16"),
137+
([1, 2], "INT8[]"),
138+
(True, "BOOL"),
139+
]
140+
141+
for data, real in cases:
142+
msg = f"Binder exception: Expression $data has data type {real} but expected BLOB. Implicit cast is not supported."
143+
with pytest.raises(RuntimeError, match=re.escape(msg)):
144+
conn.execute("CREATE (t:tab {data: $data})", {"data": data})
145+
146+
147+
def test_bytes_param_udf(conn_db_empty: ConnDB) -> None:
148+
conn, _ = conn_db_empty
149+
150+
def reverse_bytes(data: bytes) -> bytes:
151+
return data[::-1]
152+
153+
conn.create_function("reverse_bytes", reverse_bytes, ["BLOB"], "BLOB")
154+
155+
data = b"hello"
156+
expected = b"olleh"
157+
158+
result = conn.execute("RETURN reverse_bytes($data)", {"data": data})
159+
assert result.get_next() == [expected]
160+
161+
result = conn.execute("RETURN reverse_bytes(NULL)")
162+
assert result.get_next() == [None]
163+
164+
result.close()

0 commit comments

Comments
 (0)