Skip to content

Commit ea5712c

Browse files
author
loewis
committed
Patch #918101: Add tarfile open mode r|* for auto-detection of the
stream compression; add, for symmetry reasons, r:* as a synonym of r. git-svn-id: http://svn.python.org/projects/python/trunk@38581 6015fed2-1504-0410-9fe1-9d1591cc4771
1 parent 75e8013 commit ea5712c

4 files changed

Lines changed: 98 additions & 33 deletions

File tree

Doc/lib/libtarfile.tex

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ \section{\module{tarfile} --- Read and write tar archive files}
3232
it defaults to \code{'r'}. Here is a full list of mode combinations:
3333

3434
\begin{tableii}{c|l}{code}{mode}{action}
35-
\lineii{'r'}{Open for reading with transparent compression (recommended).}
35+
\lineii{'r' or 'r:*'}{Open for reading with transparent compression (recommended).}
3636
\lineii{'r:'}{Open for reading exclusively without compression.}
3737
\lineii{'r:gz'}{Open for reading with gzip compression.}
3838
\lineii{'r:bz2'}{Open for reading with bzip2 compression.}
@@ -65,6 +65,7 @@ \section{\module{tarfile} --- Read and write tar archive files}
6565
(section~\ref{tar-examples}). The currently possible modes:
6666

6767
\begin{tableii}{c|l}{code}{Mode}{Action}
68+
\lineii{'r|*'}{Open a \emph{stream} of tar blocks for reading with transparent compression.}
6869
\lineii{'r|'}{Open a \emph{stream} of uncompressed tar blocks for reading.}
6970
\lineii{'r|gz'}{Open a gzip compressed \emph{stream} for reading.}
7071
\lineii{'r|bz2'}{Open a bzip2 compressed \emph{stream} for reading.}

Lib/tarfile.py

Lines changed: 66 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -274,24 +274,30 @@ class _Stream:
274274
_Stream is intended to be used only internally.
275275
"""
276276

277-
def __init__(self, name, mode, type, fileobj, bufsize):
277+
def __init__(self, name, mode, comptype, fileobj, bufsize):
278278
"""Construct a _Stream object.
279279
"""
280280
self._extfileobj = True
281281
if fileobj is None:
282282
fileobj = _LowLevelFile(name, mode)
283283
self._extfileobj = False
284284

285-
self.name = name or ""
286-
self.mode = mode
287-
self.type = type
288-
self.fileobj = fileobj
289-
self.bufsize = bufsize
290-
self.buf = ""
291-
self.pos = 0L
292-
self.closed = False
293-
294-
if type == "gz":
285+
if comptype == '*':
286+
# Enable transparent compression detection for the
287+
# stream interface
288+
fileobj = _StreamProxy(fileobj)
289+
comptype = fileobj.getcomptype()
290+
291+
self.name = name or ""
292+
self.mode = mode
293+
self.comptype = comptype
294+
self.fileobj = fileobj
295+
self.bufsize = bufsize
296+
self.buf = ""
297+
self.pos = 0L
298+
self.closed = False
299+
300+
if comptype == "gz":
295301
try:
296302
import zlib
297303
except ImportError:
@@ -303,7 +309,7 @@ def __init__(self, name, mode, type, fileobj, bufsize):
303309
else:
304310
self._init_write_gz()
305311

306-
if type == "bz2":
312+
if comptype == "bz2":
307313
try:
308314
import bz2
309315
except ImportError:
@@ -315,7 +321,7 @@ def __init__(self, name, mode, type, fileobj, bufsize):
315321
self.cmp = bz2.BZ2Compressor()
316322

317323
def __del__(self):
318-
if not self.closed:
324+
if hasattr(self, "closed") and not self.closed:
319325
self.close()
320326

321327
def _init_write_gz(self):
@@ -334,10 +340,10 @@ def _init_write_gz(self):
334340
def write(self, s):
335341
"""Write string s to the stream.
336342
"""
337-
if self.type == "gz":
343+
if self.comptype == "gz":
338344
self.crc = self.zlib.crc32(s, self.crc)
339345
self.pos += len(s)
340-
if self.type != "tar":
346+
if self.comptype != "tar":
341347
s = self.cmp.compress(s)
342348
self.__write(s)
343349

@@ -357,12 +363,16 @@ def close(self):
357363
if self.closed:
358364
return
359365

360-
if self.mode == "w" and self.type != "tar":
366+
if self.mode == "w" and self.comptype != "tar":
361367
self.buf += self.cmp.flush()
368+
362369
if self.mode == "w" and self.buf:
370+
blocks, remainder = divmod(len(self.buf), self.bufsize)
371+
if remainder > 0:
372+
self.buf += NUL * (self.bufsize - remainder)
363373
self.fileobj.write(self.buf)
364374
self.buf = ""
365-
if self.type == "gz":
375+
if self.comptype == "gz":
366376
self.fileobj.write(struct.pack("<l", self.crc))
367377
self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFFL))
368378

@@ -441,7 +451,7 @@ def read(self, size=None):
441451
def _read(self, size):
442452
"""Return size bytes from the stream.
443453
"""
444-
if self.type == "tar":
454+
if self.comptype == "tar":
445455
return self.__read(size)
446456

447457
c = len(self.dbuf)
@@ -474,6 +484,30 @@ def __read(self, size):
474484
return t[:size]
475485
# class _Stream
476486

487+
class _StreamProxy(object):
488+
"""Small proxy class that enables transparent compression
489+
detection for the Stream interface (mode 'r|*').
490+
"""
491+
492+
def __init__(self, fileobj):
493+
self.fileobj = fileobj
494+
self.buf = self.fileobj.read(BLOCKSIZE)
495+
496+
def read(self, size):
497+
self.read = self.fileobj.read
498+
return self.buf
499+
500+
def getcomptype(self):
501+
if self.buf.startswith("\037\213\010"):
502+
return "gz"
503+
if self.buf.startswith("BZh91"):
504+
return "bz2"
505+
return "tar"
506+
507+
def close(self):
508+
self.fileobj.close()
509+
# class StreamProxy
510+
477511
#------------------------
478512
# Extraction file object
479513
#------------------------
@@ -879,14 +913,16 @@ def open(cls, name=None, mode="r", fileobj=None, bufsize=20*512):
879913
an appropriate TarFile class.
880914
881915
mode:
882-
'r' open for reading with transparent compression
916+
'r' or 'r:*' open for reading with transparent compression
883917
'r:' open for reading exclusively uncompressed
884918
'r:gz' open for reading with gzip compression
885919
'r:bz2' open for reading with bzip2 compression
886920
'a' or 'a:' open for appending
887921
'w' or 'w:' open for writing without compression
888922
'w:gz' open for writing with gzip compression
889923
'w:bz2' open for writing with bzip2 compression
924+
925+
'r|*' open a stream of tar blocks with transparent compression
890926
'r|' open an uncompressed stream of tar blocks for reading
891927
'r|gz' open a gzip compressed stream of tar blocks
892928
'r|bz2' open a bzip2 compressed stream of tar blocks
@@ -898,7 +934,17 @@ def open(cls, name=None, mode="r", fileobj=None, bufsize=20*512):
898934
if not name and not fileobj:
899935
raise ValueError, "nothing to open"
900936

901-
if ":" in mode:
937+
if mode in ("r", "r:*"):
938+
# Find out which *open() is appropriate for opening the file.
939+
for comptype in cls.OPEN_METH:
940+
func = getattr(cls, cls.OPEN_METH[comptype])
941+
try:
942+
return func(name, "r", fileobj)
943+
except (ReadError, CompressionError):
944+
continue
945+
raise ReadError, "file could not be opened successfully"
946+
947+
elif ":" in mode:
902948
filemode, comptype = mode.split(":", 1)
903949
filemode = filemode or "r"
904950
comptype = comptype or "tar"
@@ -924,16 +970,6 @@ def open(cls, name=None, mode="r", fileobj=None, bufsize=20*512):
924970
t._extfileobj = False
925971
return t
926972

927-
elif mode == "r":
928-
# Find out which *open() is appropriate for opening the file.
929-
for comptype in cls.OPEN_METH:
930-
func = getattr(cls, cls.OPEN_METH[comptype])
931-
try:
932-
return func(name, "r", fileobj)
933-
except (ReadError, CompressionError):
934-
continue
935-
raise ReadError, "file could not be opened successfully"
936-
937973
elif mode in "aw":
938974
return cls.taropen(name, mode, fileobj)
939975

Lib/test/test_tarfile.py

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,18 @@ def test_stream(self):
181181

182182
stream.close()
183183

184+
class ReadAsteriskTest(ReadTest):
185+
186+
def setUp(self):
187+
mode = self.mode + self.sep + "*"
188+
self.tar = tarfile.open(tarname(self.comp), mode)
189+
190+
class ReadStreamAsteriskTest(ReadStreamTest):
191+
192+
def setUp(self):
193+
mode = self.mode + self.sep + "*"
194+
self.tar = tarfile.open(tarname(self.comp), mode)
195+
184196
class WriteTest(BaseTest):
185197
mode = 'w'
186198

@@ -336,6 +348,11 @@ class WriteTestGzip(WriteTest):
336348
comp = "gz"
337349
class WriteStreamTestGzip(WriteStreamTest):
338350
comp = "gz"
351+
class ReadAsteriskTestGzip(ReadAsteriskTest):
352+
comp = "gz"
353+
class ReadStreamAsteriskTestGzip(ReadStreamAsteriskTest):
354+
comp = "gz"
355+
339356

340357
# Filemode test cases
341358

@@ -355,6 +372,10 @@ class WriteTestBzip2(WriteTest):
355372
comp = "bz2"
356373
class WriteStreamTestBzip2(WriteStreamTestGzip):
357374
comp = "bz2"
375+
class ReadAsteriskTestBzip2(ReadAsteriskTest):
376+
comp = "bz2"
377+
class ReadStreamAsteriskTestBzip2(ReadStreamAsteriskTest):
378+
comp = "bz2"
358379

359380
# If importing gzip failed, discard the Gzip TestCases.
360381
if not gzip:
@@ -375,6 +396,8 @@ def test_main():
375396
FileModeTest,
376397
ReadTest,
377398
ReadStreamTest,
399+
ReadAsteriskTest,
400+
ReadStreamAsteriskTest,
378401
WriteTest,
379402
WriteStreamTest,
380403
WriteGNULongTest,
@@ -386,13 +409,15 @@ def test_main():
386409
if gzip:
387410
tests.extend([
388411
ReadTestGzip, ReadStreamTestGzip,
389-
WriteTestGzip, WriteStreamTestGzip
412+
WriteTestGzip, WriteStreamTestGzip,
413+
ReadAsteriskTestGzip, ReadStreamAsteriskTestGzip
390414
])
391415

392416
if bz2:
393417
tests.extend([
394418
ReadTestBzip2, ReadStreamTestBzip2,
395-
WriteTestBzip2, WriteStreamTestBzip2
419+
WriteTestBzip2, WriteStreamTestBzip2,
420+
ReadAsteriskTestBzip2, ReadStreamAsteriskTestBzip2
396421
])
397422
try:
398423
test_support.run_unittest(*tests)

Misc/NEWS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,9 @@ Extension Modules
7878
Library
7979
-------
8080

81+
- Patch #918101: Add tarfile open mode r|* for auto-detection of the
82+
stream compression; add, for symmetry reasons, r:* as a synonym of r.
83+
8184
- Patch #1043890: Add extractall method to tarfile.
8285

8386
- Patch #1075887: Don't require MSVC in distutils if there is nothing

0 commit comments

Comments
 (0)