Skip to content

Commit ff72951

Browse files
committed
ARROW-1053: [Python] Remove unnecessary Py_INCREF in PyBuffer causing memory leak
cc @BryanCutler Author: Wes McKinney <wes.mckinney@twosigma.com> Closes #704 from wesm/ARROW-1053 and squashes the following commits: 2f90337 [Wes McKinney] Remove unnecessary Py_INCREF in PyBuffer causing memory leak
1 parent 37cdc6e commit ff72951

2 files changed

Lines changed: 26 additions & 2 deletions

File tree

cpp/src/arrow/python/common.cc

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,6 @@ PyBuffer::PyBuffer(PyObject* obj) : Buffer(nullptr, 0), obj_(nullptr) {
5555
size_ = buffer->len;
5656
capacity_ = buffer->len;
5757
is_mutable_ = false;
58-
Py_INCREF(obj_);
5958
}
6059
}
6160

python/scripts/test_leak.py

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import numpy as np
2222
import memory_profiler
2323
import gc
24+
import io
2425

2526

2627
def leak():
@@ -32,4 +33,28 @@ def leak():
3233
table.to_pandas()
3334
gc.collect()
3435

35-
leak()
36+
# leak()
37+
38+
39+
def leak2():
40+
data = [pa.array(np.concatenate([np.random.randn(100000)] * 10))]
41+
table = pa.Table.from_arrays(data, ['foo'])
42+
while True:
43+
print('calling to_pandas')
44+
print('memory_usage: {0}'.format(memory_profiler.memory_usage()))
45+
df = table.to_pandas()
46+
47+
batch = pa.RecordBatch.from_pandas(df)
48+
49+
sink = io.BytesIO()
50+
writer = pa.RecordBatchFileWriter(sink, batch.schema)
51+
writer.write_batch(batch)
52+
writer.close()
53+
54+
buf_reader = pa.BufferReader(sink.getvalue())
55+
reader = pa.open_file(buf_reader)
56+
reader.read_all()
57+
58+
gc.collect()
59+
60+
leak2()

0 commit comments

Comments
 (0)