Skip to content

Commit 3b4f159

Browse files
Issue #19920: TarFile.list() no longer fails when outputs a listing
containing non-encodable characters. Added tests for TarFile.list(). Based on patch by Vajrasky Kok.
1 parent 1812bd4 commit 3b4f159

3 files changed

Lines changed: 100 additions & 11 deletions

File tree

Lib/tarfile.py

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -281,6 +281,12 @@ def filemode(mode):
281281
DeprecationWarning, 2)
282282
return stat.filemode(mode)
283283

284+
def _safe_print(s):
285+
encoding = getattr(sys.stdout, 'encoding', None)
286+
if encoding is not None:
287+
s = s.encode(encoding, 'backslashreplace').decode(encoding)
288+
print(s, end=' ')
289+
284290

285291
class TarError(Exception):
286292
"""Base exception."""
@@ -1870,24 +1876,24 @@ def list(self, verbose=True):
18701876

18711877
for tarinfo in self:
18721878
if verbose:
1873-
print(stat.filemode(tarinfo.mode), end=' ')
1874-
print("%s/%s" % (tarinfo.uname or tarinfo.uid,
1875-
tarinfo.gname or tarinfo.gid), end=' ')
1879+
_safe_print(stat.filemode(tarinfo.mode))
1880+
_safe_print("%s/%s" % (tarinfo.uname or tarinfo.uid,
1881+
tarinfo.gname or tarinfo.gid))
18761882
if tarinfo.ischr() or tarinfo.isblk():
1877-
print("%10s" % ("%d,%d" \
1878-
% (tarinfo.devmajor, tarinfo.devminor)), end=' ')
1883+
_safe_print("%10s" %
1884+
("%d,%d" % (tarinfo.devmajor, tarinfo.devminor)))
18791885
else:
1880-
print("%10d" % tarinfo.size, end=' ')
1881-
print("%d-%02d-%02d %02d:%02d:%02d" \
1882-
% time.localtime(tarinfo.mtime)[:6], end=' ')
1886+
_safe_print("%10d" % tarinfo.size)
1887+
_safe_print("%d-%02d-%02d %02d:%02d:%02d" \
1888+
% time.localtime(tarinfo.mtime)[:6])
18831889

1884-
print(tarinfo.name + ("/" if tarinfo.isdir() else ""), end=' ')
1890+
_safe_print(tarinfo.name + ("/" if tarinfo.isdir() else ""))
18851891

18861892
if verbose:
18871893
if tarinfo.issym():
1888-
print("->", tarinfo.linkname, end=' ')
1894+
_safe_print("-> " + tarinfo.linkname)
18891895
if tarinfo.islnk():
1890-
print("link to", tarinfo.linkname, end=' ')
1896+
_safe_print("link to " + tarinfo.linkname)
18911897
print()
18921898

18931899
def add(self, name, arcname=None, recursive=True, exclude=None, *, filter=None):

Lib/test/test_tarfile.py

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,84 @@ class LzmaUstarReadTest(LzmaTest, UstarReadTest):
217217
pass
218218

219219

220+
class ListTest(ReadTest, unittest.TestCase):
221+
222+
# Override setUp to use default encoding (UTF-8)
223+
def setUp(self):
224+
self.tar = tarfile.open(self.tarname, mode=self.mode)
225+
226+
def test_list(self):
227+
tio = io.TextIOWrapper(io.BytesIO(), 'ascii', newline='\n')
228+
with support.swap_attr(sys, 'stdout', tio):
229+
self.tar.list(verbose=False)
230+
out = tio.detach().getvalue()
231+
self.assertIn(b'ustar/conttype', out)
232+
self.assertIn(b'ustar/regtype', out)
233+
self.assertIn(b'ustar/lnktype', out)
234+
self.assertIn(b'ustar' + (b'/12345' * 40) + b'67/longname', out)
235+
self.assertIn(b'./ustar/linktest2/symtype', out)
236+
self.assertIn(b'./ustar/linktest2/lnktype', out)
237+
# Make sure it puts trailing slash for directory
238+
self.assertIn(b'ustar/dirtype/', out)
239+
self.assertIn(b'ustar/dirtype-with-size/', out)
240+
# Make sure it is able to print unencodable characters
241+
self.assertIn(br'ustar/umlauts-'
242+
br'\udcc4\udcd6\udcdc\udce4\udcf6\udcfc\udcdf', out)
243+
self.assertIn(br'misc/regtype-hpux-signed-chksum-'
244+
br'\udcc4\udcd6\udcdc\udce4\udcf6\udcfc\udcdf', out)
245+
self.assertIn(br'misc/regtype-old-v7-signed-chksum-'
246+
br'\udcc4\udcd6\udcdc\udce4\udcf6\udcfc\udcdf', out)
247+
self.assertIn(br'pax/bad-pax-\udce4\udcf6\udcfc', out)
248+
self.assertIn(br'pax/hdrcharset-\udce4\udcf6\udcfc', out)
249+
# Make sure it prints files separated by one newline without any
250+
# 'ls -l'-like accessories if verbose flag is not being used
251+
# ...
252+
# ustar/conttype
253+
# ustar/regtype
254+
# ...
255+
self.assertRegex(out, br'ustar/conttype ?\r?\n'
256+
br'ustar/regtype ?\r?\n')
257+
# Make sure it does not print the source of link without verbose flag
258+
self.assertNotIn(b'link to', out)
259+
self.assertNotIn(b'->', out)
260+
261+
def test_list_verbose(self):
262+
tio = io.TextIOWrapper(io.BytesIO(), 'ascii', newline='\n')
263+
with support.swap_attr(sys, 'stdout', tio):
264+
self.tar.list(verbose=True)
265+
out = tio.detach().getvalue()
266+
# Make sure it prints files separated by one newline with 'ls -l'-like
267+
# accessories if verbose flag is being used
268+
# ...
269+
# ?rw-r--r-- tarfile/tarfile 7011 2003-01-06 07:19:43 ustar/conttype
270+
# ?rw-r--r-- tarfile/tarfile 7011 2003-01-06 07:19:43 ustar/regtype
271+
# ...
272+
self.assertRegex(out, (br'-rw-r--r-- tarfile/tarfile\s+7011 '
273+
br'\d{4}-\d\d-\d\d\s+\d\d:\d\d:\d\d '
274+
br'ustar/\w+type ?\r?\n') * 2)
275+
# Make sure it prints the source of link with verbose flag
276+
self.assertIn(b'ustar/symtype -> regtype', out)
277+
self.assertIn(b'./ustar/linktest2/symtype -> ../linktest1/regtype', out)
278+
self.assertIn(b'./ustar/linktest2/lnktype link to '
279+
b'./ustar/linktest1/regtype', out)
280+
self.assertIn(b'gnu' + (b'/123' * 125) + b'/longlink link to gnu' +
281+
(b'/123' * 125) + b'/longname', out)
282+
self.assertIn(b'pax' + (b'/123' * 125) + b'/longlink link to pax' +
283+
(b'/123' * 125) + b'/longname', out)
284+
285+
286+
class GzipListTest(GzipTest, ListTest):
287+
pass
288+
289+
290+
class Bz2ListTest(Bz2Test, ListTest):
291+
pass
292+
293+
294+
class LzmaListTest(LzmaTest, ListTest):
295+
pass
296+
297+
220298
class CommonReadTest(ReadTest):
221299

222300
def test_empty_tarfile(self):

Misc/NEWS

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,9 @@ Core and Builtins
4545
Library
4646
-------
4747

48+
- Issue #19920: TarFile.list() no longer fails when outputs a listing
49+
containing non-encodable characters. Based on patch by Vajrasky Kok.
50+
4851
- Issue #20515: Fix NULL pointer dereference introduced by issue #20368.
4952

5053
- Issue #19186: Restore namespacing of expat symbols inside the pyexpat module.
@@ -321,6 +324,8 @@ IDLE
321324
Tests
322325
-----
323326

327+
- Issue #19920: Added tests for TarFile.list(). Based on patch by Vajrasky Kok.
328+
324329
- Issue #19990: Added tests for the imghdr module. Based on patch by
325330
Claudiu Popa.
326331

0 commit comments

Comments
 (0)