Skip to content
Merged
Next Next commit
Improve list() pre-sizing for inputs with known lengths
The list() constructor isn't taking full advantage of known input
lengths or length hints. This commit makes the constructor
pre-size and not over-allocate when the input size is known or
can be reasonably estimated.

A new test is added to test_list.py and a test needed to be modified
in test_descr.py as it assumed that there is only one call to
__length_hint__() in the list constructor.
  • Loading branch information
pablogsal committed Apr 17, 2018
commit 76028798d0ddef4e6d88c0313db691592bcdd58a
2 changes: 1 addition & 1 deletion Lib/test/test_descr.py
Original file line number Diff line number Diff line change
Expand Up @@ -2028,7 +2028,7 @@ class X(Checker):
setattr(X, attr, obj)
setattr(X, name, SpecialDescr(meth_impl))
runner(X())
self.assertEqual(record, [1], name)
self.assertGreaterEqual(record.count(1), 1, name)

class X(Checker):
pass
Expand Down
9 changes: 9 additions & 0 deletions Lib/test/test_list.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import sys
from test import list_tests
from test.support import cpython_only
import pickle
import unittest

Expand Down Expand Up @@ -157,5 +158,13 @@ class L(list): pass
with self.assertRaises(TypeError):
(3,) + L([1,2])

@cpython_only
def test_preallocation(self):
iterable = [0] * 10
iter_size = sys.getsizeof(iterable)

self.assertEqual(iter_size, sys.getsizeof(list([0] * 10)))
self.assertEqual(iter_size, sys.getsizeof(list(range(10))))

if __name__ == "__main__":
unittest.main()
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
The list constructor will pre-size and not over-allocate when the input size
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

length, not size.

is known or can be reasonably estimated.
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"or can be reasonably estimated" I don't understand this part.

33 changes: 33 additions & 0 deletions Objects/listobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,32 @@ list_resize(PyListObject *self, Py_ssize_t newsize)
return 0;
}

static int
list_preallocate_exact(PyListObject *self, Py_ssize_t size)
{
PyObject **items;
size_t allocated, num_allocated_bytes;

allocated = (size_t)size;
if (allocated > (size_t)PY_SSIZE_T_MAX / sizeof(PyObject *)) {
PyErr_NoMemory();
return -1;
}

if (size == 0) {
allocated = 0;
}
num_allocated_bytes = allocated * sizeof(PyObject *);
items = (PyObject **)PyMem_Malloc(num_allocated_bytes);
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would not be easier to use PyMem_New?

if (items == NULL) {
PyErr_NoMemory();
return -1;
}
self->ob_item = items;
Comment thread
vstinner marked this conversation as resolved.
self->allocated = allocated;
return 0;
}

/* Debug statistic to compare allocations with reuse through the free list */
#undef SHOW_ALLOC_COUNT
#ifdef SHOW_ALLOC_COUNT
Expand Down Expand Up @@ -2649,6 +2675,13 @@ list___init___impl(PyListObject *self, PyObject *iterable)
(void)_list_clear(self);
}
if (iterable != NULL) {
Py_ssize_t iter_len = PyObject_LengthHint(iterable, 0);
if (iter_len == -1){
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nitpick: ...) { (add a space)

PyErr_Clear();
}
if (iter_len > 0 && list_preallocate_exact(self, iter_len)) {
return -1;
}
PyObject *rv = list_extend(self, iterable);
if (rv == NULL)
return -1;
Expand Down