Skip to content

Commit d8fdf6a

Browse files
author
lars.gustaebel
committed
Issue #2058: Remove the buf attribute and add __slots__ to the
TarInfo class in order to reduce tarfile's memory usage. git-svn-id: http://svn.python.org/projects/python/branches/py3k@62337 6015fed2-1504-0410-9fe1-9d1591cc4771
1 parent 475632b commit d8fdf6a

2 files changed

Lines changed: 46 additions & 34 deletions

File tree

Lib/tarfile.py

Lines changed: 43 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -767,7 +767,7 @@ def __init__(self, tarfile, tarinfo):
767767
self.fileobj = _FileInFile(tarfile.fileobj,
768768
tarinfo.offset_data,
769769
tarinfo.size,
770-
getattr(tarinfo, "sparse", None))
770+
tarinfo.sparse)
771771
self.name = tarinfo.name
772772
self.mode = "r"
773773
self.closed = False
@@ -906,6 +906,12 @@ class TarInfo(object):
906906
usually created internally.
907907
"""
908908

909+
__slots__ = ("name", "mode", "uid", "gid", "size", "mtime",
910+
"chksum", "type", "linkname", "uname", "gname",
911+
"devmajor", "devminor",
912+
"offset", "offset_data", "pax_headers", "sparse",
913+
"tarfile", "_sparse_structs", "_link_target")
914+
909915
def __init__(self, name=""):
910916
"""Construct a TarInfo object. name is the optional name
911917
of the member.
@@ -927,6 +933,7 @@ def __init__(self, name=""):
927933
self.offset = 0 # the tar header starts here
928934
self.offset_data = 0 # the file's data starts here
929935

936+
self.sparse = None # sparse member information
930937
self.pax_headers = {} # pax header information
931938

932939
# In pax headers the "name" and "linkname" field are called
@@ -1181,7 +1188,6 @@ def frombuf(cls, buf, encoding, errors):
11811188
raise HeaderError("bad checksum")
11821189

11831190
obj = cls()
1184-
obj.buf = buf
11851191
obj.name = nts(buf[0:100], encoding, errors)
11861192
obj.mode = nti(buf[100:108])
11871193
obj.uid = nti(buf[108:116])
@@ -1202,6 +1208,24 @@ def frombuf(cls, buf, encoding, errors):
12021208
if obj.type == AREGTYPE and obj.name.endswith("/"):
12031209
obj.type = DIRTYPE
12041210

1211+
# The old GNU sparse format occupies some of the unused
1212+
# space in the buffer for up to 4 sparse structures.
1213+
# Save the them for later processing in _proc_sparse().
1214+
if obj.type == GNUTYPE_SPARSE:
1215+
pos = 386
1216+
structs = []
1217+
for i in range(4):
1218+
try:
1219+
offset = nti(buf[pos:pos + 12])
1220+
numbytes = nti(buf[pos + 12:pos + 24])
1221+
except ValueError:
1222+
break
1223+
structs.append((offset, numbytes))
1224+
pos += 24
1225+
isextended = bool(buf[482])
1226+
origsize = nti(buf[483:495])
1227+
obj._sparse_structs = (structs, isextended, origsize)
1228+
12051229
# Remove redundant slashes from directories.
12061230
if obj.isdir():
12071231
obj.name = obj.name.rstrip("/")
@@ -1288,31 +1312,11 @@ def _proc_gnulong(self, tarfile):
12881312
def _proc_sparse(self, tarfile):
12891313
"""Process a GNU sparse header plus extra headers.
12901314
"""
1291-
buf = self.buf
1292-
sp = _ringbuffer()
1293-
pos = 386
1294-
lastpos = 0
1295-
realpos = 0
1296-
# There are 4 possible sparse structs in the
1297-
# first header.
1298-
for i in range(4):
1299-
try:
1300-
offset = nti(buf[pos:pos + 12])
1301-
numbytes = nti(buf[pos + 12:pos + 24])
1302-
except ValueError:
1303-
break
1304-
if offset > lastpos:
1305-
sp.append(_hole(lastpos, offset - lastpos))
1306-
sp.append(_data(offset, numbytes, realpos))
1307-
realpos += numbytes
1308-
lastpos = offset + numbytes
1309-
pos += 24
1310-
1311-
isextended = bool(buf[482])
1312-
origsize = nti(buf[483:495])
1315+
# We already collected some sparse structures in frombuf().
1316+
structs, isextended, origsize = self._sparse_structs
1317+
del self._sparse_structs
13131318

1314-
# If the isextended flag is given,
1315-
# there are extra headers to process.
1319+
# Collect sparse structures from extended header blocks.
13161320
while isextended:
13171321
buf = tarfile.fileobj.read(BLOCKSIZE)
13181322
pos = 0
@@ -1322,18 +1326,23 @@ def _proc_sparse(self, tarfile):
13221326
numbytes = nti(buf[pos + 12:pos + 24])
13231327
except ValueError:
13241328
break
1325-
if offset > lastpos:
1326-
sp.append(_hole(lastpos, offset - lastpos))
1327-
sp.append(_data(offset, numbytes, realpos))
1328-
realpos += numbytes
1329-
lastpos = offset + numbytes
1329+
structs.append((offset, numbytes))
13301330
pos += 24
13311331
isextended = bool(buf[504])
13321332

1333+
# Transform the sparse structures to something we can use
1334+
# in ExFileObject.
1335+
self.sparse = _ringbuffer()
1336+
lastpos = 0
1337+
realpos = 0
1338+
for offset, numbytes in structs:
1339+
if offset > lastpos:
1340+
self.sparse.append(_hole(lastpos, offset - lastpos))
1341+
self.sparse.append(_data(offset, numbytes, realpos))
1342+
realpos += numbytes
1343+
lastpos = offset + numbytes
13331344
if lastpos < origsize:
1334-
sp.append(_hole(lastpos, origsize - lastpos))
1335-
1336-
self.sparse = sp
1345+
self.sparse.append(_hole(lastpos, origsize - lastpos))
13371346

13381347
self.offset_data = tarfile.fileobj.tell()
13391348
tarfile.offset = self.offset_data + self._block(self.size)

Misc/NEWS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,9 @@ Extension Modules
2929
Library
3030
-------
3131

32+
- Issue #2058: Remove the buf attribute and add __slots__ to the TarInfo
33+
class in order to reduce tarfile's memory usage.
34+
3235
- Bug #2606: Avoid calling .sort() on a dict_keys object.
3336

3437
- The bundled libffi copy is now in sync with the recently released

0 commit comments

Comments
 (0)