Skip to content

Commit 2f4a2d4

Browse files
committed
Take more care with empty and negative sizes of binary data
1 parent ddebc44 commit 2f4a2d4

6 files changed

Lines changed: 74 additions & 114 deletions

File tree

cassandra/cython_protocol_handler.pyx

Lines changed: 0 additions & 73 deletions
This file was deleted.

cassandra/deserializers.pxd

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,5 +3,26 @@
33
from cassandra.buffer cimport Buffer
44

55
cdef class Deserializer:
6+
# The cqltypes._CassandraType corresponding to this deserializer
7+
cdef object cqltype
8+
9+
# String may be empty, whereas other values may not be.
10+
# Other values may be NULL, in which case the integer length
11+
# of the binary data is negative. However, non-string types
12+
# may also return a zero length for legacy reasons
13+
# (see http://code.metager.de/source/xref/apache/cassandra/doc/native_protocol_v3.spec
14+
# paragraph 6)
15+
cdef bint empty_binary_ok
16+
617
cdef deserialize(self, Buffer *buf, int protocol_version)
718
# cdef deserialize(self, CString byts, protocol_version)
19+
20+
21+
cdef inline object from_binary(Deserializer deserializer,
22+
Buffer *buf,
23+
int protocol_version):
24+
if buf.size <= 0 and not deserializer.empty_binary_ok:
25+
return _ret_empty(deserializer, buf.size)
26+
return deserializer.deserialize(buf, protocol_version)
27+
28+
cdef _ret_empty(Deserializer deserializer, Py_ssize_t buf_size)

cassandra/deserializers.pyx

Lines changed: 44 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,12 @@ from cassandra import util
2121

2222

2323
cdef class Deserializer:
24+
"""Cython-based deserializer class for a cqltype"""
25+
26+
def __init__(self, cqltype):
27+
self.cqltype = cqltype
28+
self.empty_binary_ok = False
29+
2430
cdef deserialize(self, Buffer *buf, int protocol_version):
2531
raise NotImplementedError
2632

@@ -144,25 +150,21 @@ cdef class DesVarcharType(DesUTF8Type):
144150

145151
cdef class _DesParameterizedType(Deserializer):
146152

147-
cdef object cqltype
148-
cdef object adapter
149153
cdef object subtypes
150154
cdef Deserializer[::1] deserializers
151155
cdef Py_ssize_t subtypes_len
152156

153157
def __init__(self, cqltype):
154-
assert cqltype.subtypes and len(cqltype.subtypes) == 1
155-
self.cqltype = cqltype
156-
self.adapter = cqltype.adapter
158+
super().__init__(cqltype)
157159
self.subtypes = cqltype.subtypes
158160
self.deserializers = make_deserializers(cqltype.subtypes)
159161

160162

161163
cdef class _DesSingleParamType(_DesParameterizedType):
162-
163164
cdef Deserializer deserializer
164165

165166
def __init__(self, cqltype):
167+
assert cqltype.subtypes and len(cqltype.subtypes) == 1, cqltype.subtypes
166168
super().__init__(cqltype)
167169
self.deserializer = self.deserializers[0]
168170

@@ -182,7 +184,7 @@ cdef class DesListType(_DesSingleParamType):
182184
result = _deserialize_list_or_set[uint16_t](
183185
v2_and_below, buf, protocol_version, self.deserializer)
184186

185-
return self.adapter(result)
187+
return self.cqltype.adapter(result)
186188

187189

188190
DesSetType = DesListType
@@ -214,7 +216,7 @@ cdef list _deserialize_list_or_set(itemlen_t dummy_version,
214216
sub_buf.ptr = buf.ptr + p
215217
sub_buf.size = itemlen
216218
p += itemlen
217-
result.append(deserializer.deserialize(&sub_buf, protocol_version))
219+
result.append(from_binary(deserializer, &sub_buf, protocol_version))
218220

219221
return result
220222

@@ -284,8 +286,8 @@ cdef _deserialize_map(itemlen_t dummy_version,
284286
val_buf.ptr = buf.ptr + p
285287
val_buf.size = val_len
286288
p += val_len
287-
key = key_deserializer.deserialize(&key_buf, protocol_version)
288-
val = val_deserializer.deserialize(&val_buf, protocol_version)
289+
key = from_binary(key_deserializer, &key_buf, protocol_version)
290+
val = from_binary(val_deserializer, &val_buf, protocol_version)
289291
themap._insert_unchecked(key, to_bytes(&key_buf), val)
290292

291293
return themap
@@ -316,7 +318,7 @@ cdef class DesTupleType(_DesParameterizedType):
316318
item_buf.ptr = buf.ptr + p
317319
item_buf.size = itemlen
318320
deserializer = self.deserializers[i]
319-
item = deserializer.deserialize(&item_buf, protocol_version)
321+
item = from_binary(deserializer, &item_buf, protocol_version)
320322
p += itemlen
321323

322324
tuple_set(res, i, item)
@@ -355,7 +357,7 @@ cdef class DesCompositeType(_DesParameterizedType):
355357
buf.ptr = buf.ptr + 2 + element_length + 1
356358
buf.size = buf.size - (2 + element_length + 1)
357359
deserializer = self.deserializers[i]
358-
item = deserializer.deserialize(&elem_buf, protocol_version)
360+
item = from_binary(deserializer, &elem_buf, protocol_version)
359361
tuple_set(res, i, item)
360362

361363
return res
@@ -366,12 +368,26 @@ DesDynamicCompositeType = DesCompositeType
366368

367369
cdef class DesReversedType(_DesSingleParamType):
368370
cdef deserialize(self, Buffer *buf, int protocol_version):
369-
return self.deserializer.deserialize(buf, protocol_version)
371+
return from_binary(self.deserializer, buf, protocol_version)
370372

371373

372374
cdef class DesFrozenType(_DesSingleParamType):
373375
cdef deserialize(self, Buffer *buf, int protocol_version):
374-
return self.deserializer.deserialize(buf, protocol_version)
376+
return from_binary(self.deserializer, buf, protocol_version)
377+
378+
#--------------------------------------------------------------------------
379+
380+
cdef _ret_empty(Deserializer deserializer, Py_ssize_t buf_size):
381+
"""
382+
Decide whether to return None or EMPTY when a buffer size is
383+
zero or negative. This is used by from_binary in deserializers.pxd.
384+
"""
385+
if buf_size < 0:
386+
return None
387+
elif deserializer.cqltype.support_empty_values:
388+
return cqltypes.EMPTY
389+
else:
390+
return None
375391

376392
#--------------------------------------------------------------------------
377393
# Generic deserialization
@@ -381,11 +397,6 @@ cdef class GenericDeserializer(Deserializer):
381397
Wrap a generic datatype for deserialization
382398
"""
383399

384-
cdef object cqltype
385-
386-
def __init__(self, cqltype):
387-
self.cqltype = cqltype
388-
389400
cdef deserialize(self, Buffer *buf, int protocol_version):
390401
return self.cqltype.deserialize(to_bytes(buf), protocol_version)
391402

@@ -401,31 +412,33 @@ def make_deserializers(cqltypes):
401412
cpdef Deserializer find_deserializer(cqltype):
402413
"""Find a deserializer for a cqltype"""
403414
name = 'Des' + cqltype.__name__
415+
404416
if name in globals():
405-
deserializer_cls = globals()[name]
406-
deserializer_cls()
417+
cls = globals()[name]
407418
elif issubclass(cqltype, cqltypes.ListType):
408-
return DesListType
419+
cls = DesListType
409420
elif issubclass(cqltype, cqltypes.SetType):
410-
return DesSetType
421+
cls = DesSetType
411422
elif issubclass(cqltype, cqltypes.MapType):
412-
return DesMapType
423+
cls = DesMapType
413424
elif issubclass(cqltype, cqltypes.UserType):
414425
# UserType is a subclass of TupleType, so should precede it
415-
return DesUserType
426+
cls = DesUserType
416427
elif issubclass(cqltype, cqltypes.TupleType):
417-
return DesTupleType
428+
cls = DesTupleType
418429
elif issubclass(cqltype, cqltypes.DynamicCompositeType):
419430
# DynamicCompositeType is a subclass of CompositeType, so should precede it
420-
return DesDynamicCompositeType
431+
cls = DesDynamicCompositeType
421432
elif issubclass(cqltype, cqltypes.CompositeType):
422-
return DesCompositeType
433+
cls = DesCompositeType
423434
elif issubclass(cqltype, cqltypes.ReversedType):
424-
return DesReversedType
435+
cls = DesReversedType
425436
elif issubclass(cqltype, cqltypes.FrozenType):
426-
return DesFrozenType
437+
cls = DesFrozenType
438+
else:
439+
cls = GenericDeserializer
427440

428-
return GenericDeserializer(cqltype)
441+
return cls(cqltype)
429442

430443

431444
def obj_array(list objs):

cassandra/ioutils.pyx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ cdef inline int get_buf(BytesIOReader reader, Buffer *buf_out) except -1:
1212
"""
1313
cdef Py_ssize_t raw_val_size = read_int(reader)
1414
if raw_val_size < 0:
15-
raise ValueError("Expected positive item size")
15+
raw_val_size = 0
1616

1717
buf_out.ptr = reader.read(raw_val_size)
1818
buf_out.size = raw_val_size

cassandra/numpyparser.pyx

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ from libc.stdint cimport uint64_t
1717
from cpython.ref cimport Py_INCREF, PyObject
1818

1919
from cassandra.bytesio cimport BytesIOReader
20-
from cassandra.deserializers cimport Deserializer
20+
from cassandra.deserializers cimport Deserializer, from_binary
2121
from cassandra.parsing cimport ParseDesc, ColumnParser, RowParser
2222
from cassandra import cqltypes
2323
from cassandra.util import is_little_endian
@@ -125,9 +125,11 @@ cdef inline int unpack_row(
125125
get_buf(reader, &buf)
126126
arr = arrays[i]
127127

128+
if buf.size == 0:
129+
raise ValueError("Cannot handle NULL value")
128130
if arr.is_object:
129131
deserializer = desc.deserializers[i]
130-
val = deserializer.deserialize(&buf, desc.protocol_version)
132+
val = from_binary(deserializer, &buf, desc.protocol_version)
131133
Py_INCREF(val)
132134
(<PyObject **> arr.buf_ptr)[0] = <PyObject *> val
133135
else:

cassandra/objparser.pyx

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
include "ioutils.pyx"
22

33
from cassandra.bytesio cimport BytesIOReader
4-
from cassandra.deserializers cimport Deserializer
4+
from cassandra.deserializers cimport Deserializer, from_binary
55
from cassandra.parsing cimport ParseDesc, ColumnParser, RowParser
66
from cassandra.tuple cimport tuple_new, tuple_set
77

@@ -49,12 +49,9 @@ cdef class TupleRowParser(RowParser):
4949
# Read the next few bytes
5050
get_buf(reader, &buf)
5151

52-
if buf.size == 0:
53-
val = None
54-
else:
55-
# Deserialize bytes to python object
56-
deserializer = desc.deserializers[i]
57-
val = deserializer.deserialize(&buf, desc.protocol_version)
52+
# Deserialize bytes to python object
53+
deserializer = desc.deserializers[i]
54+
val = from_binary(deserializer, &buf, desc.protocol_version)
5855

5956
# Insert new object into tuple
6057
tuple_set(res, i, val)

0 commit comments

Comments
 (0)