From cab12eadaad7802900e9bd6029e7578837c20439 Mon Sep 17 00:00:00 2001 From: Jack DeVries Date: Fri, 13 Aug 2021 22:57:00 -0400 Subject: [PATCH 1/7] bpo-39039: tarfile raises descriptive exception from zlib.error * during tarfile parsing, a zlib error indicates invalid data * tarfile.open now raises a descriptive exception from the zlib error * this makes it clear to the user that they may be trying to open a corrupted tar file --- Lib/tarfile.py | 3 +++ .../next/Library/2021-08-13-22-56-21.bpo-39039.7eFlk0.rst | 3 +++ 2 files changed, 6 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2021-08-13-22-56-21.bpo-39039.7eFlk0.rst diff --git a/Lib/tarfile.py b/Lib/tarfile.py index 18d415adf544184..ea471964f417ce2 100755 --- a/Lib/tarfile.py +++ b/Lib/tarfile.py @@ -46,6 +46,7 @@ import struct import copy import re +import zlib try: import pwd @@ -2349,6 +2350,8 @@ def next(self): raise ReadError(str(e)) from None except SubsequentHeaderError as e: raise ReadError(str(e)) from None + except zlib.error as e: + raise ExtractError('archive may be corrupted or invalid') from e break if tarinfo is not None: diff --git a/Misc/NEWS.d/next/Library/2021-08-13-22-56-21.bpo-39039.7eFlk0.rst b/Misc/NEWS.d/next/Library/2021-08-13-22-56-21.bpo-39039.7eFlk0.rst new file mode 100644 index 000000000000000..e8a9941154b1203 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2021-08-13-22-56-21.bpo-39039.7eFlk0.rst @@ -0,0 +1,3 @@ +tarfile.open raises :exc:`~tarfile.ExtractError` when a zlib error occurs +during file extraction. The message provides a user-friendly notice that the +tar file may be corrupt or invalid. From 84726b95aab776cb6e137d890185e3488ca39a88 Mon Sep 17 00:00:00 2001 From: Jack DeVries Date: Mon, 16 Aug 2021 23:54:02 -0400 Subject: [PATCH 2/7] temp: revert changes so far; return to "clean slate" --- Lib/tarfile.py | 3 --- .../next/Library/2021-08-13-22-56-21.bpo-39039.7eFlk0.rst | 3 --- 2 files changed, 6 deletions(-) delete mode 100644 Misc/NEWS.d/next/Library/2021-08-13-22-56-21.bpo-39039.7eFlk0.rst diff --git a/Lib/tarfile.py b/Lib/tarfile.py index ea471964f417ce2..18d415adf544184 100755 --- a/Lib/tarfile.py +++ b/Lib/tarfile.py @@ -46,7 +46,6 @@ import struct import copy import re -import zlib try: import pwd @@ -2350,8 +2349,6 @@ def next(self): raise ReadError(str(e)) from None except SubsequentHeaderError as e: raise ReadError(str(e)) from None - except zlib.error as e: - raise ExtractError('archive may be corrupted or invalid') from e break if tarinfo is not None: diff --git a/Misc/NEWS.d/next/Library/2021-08-13-22-56-21.bpo-39039.7eFlk0.rst b/Misc/NEWS.d/next/Library/2021-08-13-22-56-21.bpo-39039.7eFlk0.rst deleted file mode 100644 index e8a9941154b1203..000000000000000 --- a/Misc/NEWS.d/next/Library/2021-08-13-22-56-21.bpo-39039.7eFlk0.rst +++ /dev/null @@ -1,3 +0,0 @@ -tarfile.open raises :exc:`~tarfile.ExtractError` when a zlib error occurs -during file extraction. The message provides a user-friendly notice that the -tar file may be corrupt or invalid. From 6123c186216ac5400d64e175b0c6242321bf3446 Mon Sep 17 00:00:00 2001 From: Jack DeVries Date: Wed, 18 Aug 2021 10:36:24 -0400 Subject: [PATCH 3/7] first revision; no test yet --- Lib/tarfile.py | 9 +++++++++ .../Library/2021-08-18-10-36-14.bpo-39039.A63LYh.rst | 2 ++ 2 files changed, 11 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2021-08-18-10-36-14.bpo-39039.A63LYh.rst diff --git a/Lib/tarfile.py b/Lib/tarfile.py index 18d415adf544184..6898dbeada0d2df 100755 --- a/Lib/tarfile.py +++ b/Lib/tarfile.py @@ -2349,6 +2349,15 @@ def next(self): raise ReadError(str(e)) from None except SubsequentHeaderError as e: raise ReadError(str(e)) from None + except Exception as e: + try: + import zlib + if isinstance(e, zlib.error): + raise ReadError(f'zlib error: {e}') + else: + raise e + except ImportError: + raise e break if tarinfo is not None: diff --git a/Misc/NEWS.d/next/Library/2021-08-18-10-36-14.bpo-39039.A63LYh.rst b/Misc/NEWS.d/next/Library/2021-08-18-10-36-14.bpo-39039.A63LYh.rst new file mode 100644 index 000000000000000..7250055c2a4a9e4 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2021-08-18-10-36-14.bpo-39039.A63LYh.rst @@ -0,0 +1,2 @@ +tarfile.open raises :exc:`~tarfile.ReadError` when a zlib error occurs +during file extraction. From 325202a2b666a2dbd0f6e64f27c6258e14a1a77c Mon Sep 17 00:00:00 2001 From: Jack DeVries Date: Thu, 19 Aug 2021 22:41:16 -0400 Subject: [PATCH 4/7] draft: introduce test --- Lib/tarfile.py | 21 ++++++++++++--------- Lib/test/test_tarfile.py | 14 ++++++++++++++ 2 files changed, 26 insertions(+), 9 deletions(-) diff --git a/Lib/tarfile.py b/Lib/tarfile.py index 6898dbeada0d2df..fce437868e1d5e8 100755 --- a/Lib/tarfile.py +++ b/Lib/tarfile.py @@ -2349,15 +2349,18 @@ def next(self): raise ReadError(str(e)) from None except SubsequentHeaderError as e: raise ReadError(str(e)) from None - except Exception as e: - try: - import zlib - if isinstance(e, zlib.error): - raise ReadError(f'zlib error: {e}') - else: - raise e - except ImportError: - raise e + + # I am commenting out the fix in this commit to demonstrate that + # the test fails without it + # except Exception as e: + # try: + # import zlib + # if isinstance(e, zlib.error): + # raise ReadError(f'zlib error: {e}') + # else: + # raise e + # except ImportError: + # raise e break if tarinfo is not None: diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py index cfdda24a269f560..57f50199035fe87 100644 --- a/Lib/test/test_tarfile.py +++ b/Lib/test/test_tarfile.py @@ -19,6 +19,10 @@ import gzip except ImportError: gzip = None +try: + import zlib +except ImportError: + zlib = None try: import bz2 except ImportError: @@ -687,6 +691,16 @@ def test_parallel_iteration(self): self.assertEqual(m1.offset, m2.offset) self.assertEqual(m1.get_info(), m2.get_info()) + @unittest.skipIf(not zlib, "requires zlib") + def test_zlib_error_does_not_leak(self): + # bpo-39039: tarfile.open allowed zlib exceptions to bubble up when + # parsing certain types of invalid data + with unittest.mock.patch("tarfile.TarInfo.fromtarfile") as mock: + mock.side_effect = zlib.error + with self.assertRaises(tarfile.ReadError): + tarfile.open(self.tarname) + + class MiscReadTest(MiscReadTestBase, unittest.TestCase): test_fail_comp = None From 4711fd2848582250517747de14c6c9a588b51215 Mon Sep 17 00:00:00 2001 From: Jack DeVries Date: Thu, 19 Aug 2021 22:42:03 -0400 Subject: [PATCH 5/7] draft: restore fix --- Lib/tarfile.py | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/Lib/tarfile.py b/Lib/tarfile.py index fce437868e1d5e8..6898dbeada0d2df 100755 --- a/Lib/tarfile.py +++ b/Lib/tarfile.py @@ -2349,18 +2349,15 @@ def next(self): raise ReadError(str(e)) from None except SubsequentHeaderError as e: raise ReadError(str(e)) from None - - # I am commenting out the fix in this commit to demonstrate that - # the test fails without it - # except Exception as e: - # try: - # import zlib - # if isinstance(e, zlib.error): - # raise ReadError(f'zlib error: {e}') - # else: - # raise e - # except ImportError: - # raise e + except Exception as e: + try: + import zlib + if isinstance(e, zlib.error): + raise ReadError(f'zlib error: {e}') + else: + raise e + except ImportError: + raise e break if tarinfo is not None: From eb60bbe68ff20ca9eb0482deebe6d3ee3d6987a0 Mon Sep 17 00:00:00 2001 From: Jack DeVries Date: Thu, 19 Aug 2021 23:00:44 -0400 Subject: [PATCH 6/7] check if zlib is None, not "not zlib" --- Lib/test/test_tarfile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py index 57f50199035fe87..e4b5c52bf1eaf4a 100644 --- a/Lib/test/test_tarfile.py +++ b/Lib/test/test_tarfile.py @@ -691,7 +691,7 @@ def test_parallel_iteration(self): self.assertEqual(m1.offset, m2.offset) self.assertEqual(m1.get_info(), m2.get_info()) - @unittest.skipIf(not zlib, "requires zlib") + @unittest.skipIf(zlib is None, "requires zlib") def test_zlib_error_does_not_leak(self): # bpo-39039: tarfile.open allowed zlib exceptions to bubble up when # parsing certain types of invalid data From 4cc0c4c34df1a24e51c320ed10b04e762e97c471 Mon Sep 17 00:00:00 2001 From: Jack DeVries Date: Sun, 22 Aug 2021 15:21:37 -0400 Subject: [PATCH 7/7] raise ReadError from None --- Lib/tarfile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/tarfile.py b/Lib/tarfile.py index 6898dbeada0d2df..c1ee1222e09b5af 100755 --- a/Lib/tarfile.py +++ b/Lib/tarfile.py @@ -2353,7 +2353,7 @@ def next(self): try: import zlib if isinstance(e, zlib.error): - raise ReadError(f'zlib error: {e}') + raise ReadError(f'zlib error: {e}') from None else: raise e except ImportError: