From b612154f6ca4733446685cca6d0701e29f020465 Mon Sep 17 00:00:00 2001 From: Seth Michael Larson Date: Wed, 18 Mar 2026 14:54:37 -0500 Subject: [PATCH 1/6] Pass filter_function to TarFile._extract_one() during .extract() --- Lib/tarfile.py | 3 +- Lib/test/test_tarfile.py | 100 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 102 insertions(+), 1 deletion(-) diff --git a/Lib/tarfile.py b/Lib/tarfile.py index 7f0b0b3c632573..f0c73db9e19c0e 100644 --- a/Lib/tarfile.py +++ b/Lib/tarfile.py @@ -2495,7 +2495,8 @@ def extract(self, member, path="", set_attrs=True, *, numeric_owner=False, tarinfo, unfiltered = self._get_extract_tarinfo( member, filter_function, path) if tarinfo is not None: - self._extract_one(tarinfo, path, set_attrs, numeric_owner) + self._extract_one(tarinfo, path, set_attrs, numeric_owner, + filter_function=filter_function) def _get_extract_tarinfo(self, member, filter_function, path): """Get (filtered, unfiltered) TarInfos from *member* diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py index f2babaacc27d96..e35316095b1013 100644 --- a/Lib/test/test_tarfile.py +++ b/Lib/test/test_tarfile.py @@ -2,6 +2,7 @@ import sys import os import io +import tempfile from hashlib import sha256 from contextlib import contextmanager, ExitStack from random import Random @@ -4304,6 +4305,105 @@ def test_chmod_outside_dir(self): st_mode = cc.outerdir.stat().st_mode self.assertNotEqual(st_mode & 0o777, 0o777) + @symlink_test + def test_chown_links_on_extract(self): + for link_type in (tarfile.SYMTYPE, tarfile.LNKTYPE): + buf = io.BytesIO() + with tarfile.open(fileobj=buf, mode='w') as tf: + # Create a regular file entry with uid/gid + info = tarfile.TarInfo(name="test.txt") + info.uid = 1337 + info.gid = 1337 + info.uname = "" + info.gname = "" + info.mode = 0o755 + tf.addfile(info, io.BytesIO(b"")) + + # Create a link to the normal file. + link_info = tarfile.TarInfo(name="link") + link_info.type = link_type + link_info.linkname = "test.txt" + link_info.uid = 1337 + link_info.gid = 1337 + link_info.uname = "" + link_info.gname = "" + link_info.mode = 0o644 + tf.addfile(link_info) + + buf.seek(0) + with ( + self.subTest(f"type={link_type!r}"), + tempfile.TemporaryDirectory() as tmpdir, + tarfile.open(fileobj=buf) as tar, + unittest.mock.patch("os.chown") as mock_chown, + unittest.mock.patch("os.lchown") as mock_lchown, + unittest.mock.patch("os.geteuid") as mock_geteuid, + ): + # Set UID to 0 so chown() is attempted. + mock_geteuid.return_value = 0 + tar.extract("link", path=tmpdir, filter='data') + extract_path = os.path.join(tmpdir, "link") + + if link_type == tarfile.SYMTYPE: + mock_chown.assert_not_called() + mock_lchown.assert_called_once_with(extract_path, -1, -1) + else: + mock_chown.assert_has_calls([ + unittest.mock.call(extract_path, -1, -1), + unittest.mock.call(extract_path, -1, -1) + ]) + mock_lchown.assert_not_called() + + @symlink_test + def test_chown_links_on_extractall(self): + for link_type in (tarfile.SYMTYPE, tarfile.LNKTYPE): + buf = io.BytesIO() + with tarfile.open(fileobj=buf, mode='w') as tf: + # Create a regular file entry with uid/gid + info = tarfile.TarInfo(name="test.txt") + info.uid = 1337 + info.gid = 1337 + info.uname = "" + info.gname = "" + info.mode = 0o755 + tf.addfile(info, io.BytesIO(b"")) + + # Create a link to the normal file. + link_info = tarfile.TarInfo(name="link") + link_info.type = link_type + link_info.linkname = "test.txt" + link_info.uid = 1337 + link_info.gid = 1337 + link_info.uname = "" + link_info.gname = "" + link_info.mode = 0o644 + tf.addfile(link_info) + + buf.seek(0) + with ( + self.subTest(f"type={link_type!r}"), + tempfile.TemporaryDirectory() as tmpdir, + tarfile.open(fileobj=buf) as tar, + unittest.mock.patch("os.chown") as mock_chown, + unittest.mock.patch("os.lchown") as mock_lchown, + unittest.mock.patch("os.geteuid") as mock_geteuid, + ): + # Set UID to 0 so chown() is attempted. + mock_geteuid.return_value = 0 + tar.extractall(path=tmpdir, filter='data') + extract_link_path = os.path.join(tmpdir, "link") + extract_file_path = os.path.join(tmpdir, "test.txt") + + if link_type == tarfile.SYMTYPE: + mock_chown.assert_called_once_with(extract_file_path, -1, -1) + mock_lchown.assert_called_once_with(extract_link_path, -1, -1) + else: + mock_chown.assert_has_calls([ + unittest.mock.call(extract_file_path, -1, -1), + unittest.mock.call(extract_link_path, -1, -1) + ]) + mock_lchown.assert_not_called() + def test_link_fallback_normalizes(self): # Make sure hardlink fallbacks work for non-normalized paths for all # filters From 5edbc2ea14c4cc91e457f09686e13545fe69e49a Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Tue, 23 Jun 2026 13:37:49 +0200 Subject: [PATCH 2/6] Use os_helper for temporary directory --- Lib/test/test_tarfile.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py index e35316095b1013..c697e59e3ab66e 100644 --- a/Lib/test/test_tarfile.py +++ b/Lib/test/test_tarfile.py @@ -2,7 +2,6 @@ import sys import os import io -import tempfile from hashlib import sha256 from contextlib import contextmanager, ExitStack from random import Random @@ -4333,7 +4332,7 @@ def test_chown_links_on_extract(self): buf.seek(0) with ( self.subTest(f"type={link_type!r}"), - tempfile.TemporaryDirectory() as tmpdir, + os_helper.temp_dir() as tmpdir, tarfile.open(fileobj=buf) as tar, unittest.mock.patch("os.chown") as mock_chown, unittest.mock.patch("os.lchown") as mock_lchown, @@ -4382,7 +4381,7 @@ def test_chown_links_on_extractall(self): buf.seek(0) with ( self.subTest(f"type={link_type!r}"), - tempfile.TemporaryDirectory() as tmpdir, + os_helper.temp_dir() as tmpdir, tarfile.open(fileobj=buf) as tar, unittest.mock.patch("os.chown") as mock_chown, unittest.mock.patch("os.lchown") as mock_lchown, From 4318869913d426acb1eb7bd0ea58b3181dd38d15 Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Tue, 23 Jun 2026 13:39:48 +0200 Subject: [PATCH 3/6] Add test for filtering the hardlink targed --- Lib/test/test_tarfile.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py index c697e59e3ab66e..a1534a1722f9b8 100644 --- a/Lib/test/test_tarfile.py +++ b/Lib/test/test_tarfile.py @@ -4403,6 +4403,25 @@ def test_chown_links_on_extractall(self): ]) mock_lchown.assert_not_called() + def test_extract_filters_target(self): + # Test that when extract() falls back to extracting (rather than + # linking) a hardlink target, it filters the target. + with ArchiveMaker() as arc: + arc.add("target") + arc.add("link", hardlink_to="target") + def testing_filter(member, path): + if member.name == 'target': + # target: set read-only + return member.replace(mode=stat.S_IRUSR) + # link: don't overwrite the mode + return member.replace(mode=None) + tempdir = pathlib.Path(TEMPDIR) / 'extract' + with os_helper.temp_dir(tempdir), arc.open() as tar: + tar.extract("link", path=tempdir, filter=testing_filter) + path = tempdir / 'link' + if os_helper.can_chmod(): + self.assertFalse(path.stat().st_mode & stat.S_IWUSR) + def test_link_fallback_normalizes(self): # Make sure hardlink fallbacks work for non-normalized paths for all # filters From afc0beca6b5e86b11bd2bfecb25c841d15b1093e Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Tue, 23 Jun 2026 13:52:47 +0200 Subject: [PATCH 4/6] Use helpers for tests --- Lib/test/test_tarfile.py | 158 ++++++++++++++++----------------------- 1 file changed, 63 insertions(+), 95 deletions(-) diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py index d07583a2dd96ca..0837951baf2a99 100644 --- a/Lib/test/test_tarfile.py +++ b/Lib/test/test_tarfile.py @@ -4516,103 +4516,71 @@ def test_chmod_outside_dir(self): self.assertNotEqual(st_mode & 0o777, 0o777) @symlink_test - def test_chown_links_on_extract(self): - for link_type in (tarfile.SYMTYPE, tarfile.LNKTYPE): - buf = io.BytesIO() - with tarfile.open(fileobj=buf, mode='w') as tf: - # Create a regular file entry with uid/gid - info = tarfile.TarInfo(name="test.txt") - info.uid = 1337 - info.gid = 1337 - info.uname = "" - info.gname = "" - info.mode = 0o755 - tf.addfile(info, io.BytesIO(b"")) - - # Create a link to the normal file. - link_info = tarfile.TarInfo(name="link") - link_info.type = link_type - link_info.linkname = "test.txt" - link_info.uid = 1337 - link_info.gid = 1337 - link_info.uname = "" - link_info.gname = "" - link_info.mode = 0o644 - tf.addfile(link_info) - - buf.seek(0) - with ( - self.subTest(f"type={link_type!r}"), - os_helper.temp_dir() as tmpdir, - tarfile.open(fileobj=buf) as tar, - unittest.mock.patch("os.chown") as mock_chown, - unittest.mock.patch("os.lchown") as mock_lchown, - unittest.mock.patch("os.geteuid") as mock_geteuid, - ): - # Set UID to 0 so chown() is attempted. - mock_geteuid.return_value = 0 - tar.extract("link", path=tmpdir, filter='data') - extract_path = os.path.join(tmpdir, "link") - - if link_type == tarfile.SYMTYPE: - mock_chown.assert_not_called() - mock_lchown.assert_called_once_with(extract_path, -1, -1) - else: - mock_chown.assert_has_calls([ - unittest.mock.call(extract_path, -1, -1), - unittest.mock.call(extract_path, -1, -1) - ]) - mock_lchown.assert_not_called() + @support.subTests('link_type', (tarfile.SYMTYPE, tarfile.LNKTYPE)) + def test_chown_links_on_extract(self, link_type): + with ArchiveMaker() as arc: + arc.add("test.txt", + uid=1337, gid=1337, uname="", gname="", mode='-rwxr-xr-x') + arc.add("link", + type=link_type, + linkname='test.txt', + uid=1337, gid=1337, uname="", gname="", mode='-rwxr-xr-x') + + with ( + os_helper.temp_dir() as tmpdir, + arc.open() as tar, + unittest.mock.patch("os.chown") as mock_chown, + unittest.mock.patch("os.lchown") as mock_lchown, + unittest.mock.patch("os.geteuid") as mock_geteuid, + ): + # Set UID to 0 so chown() is attempted. + mock_geteuid.return_value = 0 + tar.extract("link", path=tmpdir, filter='data') + extract_path = os.path.join(tmpdir, "link") + + if link_type == tarfile.SYMTYPE: + mock_chown.assert_not_called() + mock_lchown.assert_called_once_with(extract_path, -1, -1) + else: + mock_chown.assert_has_calls([ + unittest.mock.call(extract_path, -1, -1), + unittest.mock.call(extract_path, -1, -1) + ]) + mock_lchown.assert_not_called() @symlink_test - def test_chown_links_on_extractall(self): - for link_type in (tarfile.SYMTYPE, tarfile.LNKTYPE): - buf = io.BytesIO() - with tarfile.open(fileobj=buf, mode='w') as tf: - # Create a regular file entry with uid/gid - info = tarfile.TarInfo(name="test.txt") - info.uid = 1337 - info.gid = 1337 - info.uname = "" - info.gname = "" - info.mode = 0o755 - tf.addfile(info, io.BytesIO(b"")) - - # Create a link to the normal file. - link_info = tarfile.TarInfo(name="link") - link_info.type = link_type - link_info.linkname = "test.txt" - link_info.uid = 1337 - link_info.gid = 1337 - link_info.uname = "" - link_info.gname = "" - link_info.mode = 0o644 - tf.addfile(link_info) - - buf.seek(0) - with ( - self.subTest(f"type={link_type!r}"), - os_helper.temp_dir() as tmpdir, - tarfile.open(fileobj=buf) as tar, - unittest.mock.patch("os.chown") as mock_chown, - unittest.mock.patch("os.lchown") as mock_lchown, - unittest.mock.patch("os.geteuid") as mock_geteuid, - ): - # Set UID to 0 so chown() is attempted. - mock_geteuid.return_value = 0 - tar.extractall(path=tmpdir, filter='data') - extract_link_path = os.path.join(tmpdir, "link") - extract_file_path = os.path.join(tmpdir, "test.txt") - - if link_type == tarfile.SYMTYPE: - mock_chown.assert_called_once_with(extract_file_path, -1, -1) - mock_lchown.assert_called_once_with(extract_link_path, -1, -1) - else: - mock_chown.assert_has_calls([ - unittest.mock.call(extract_file_path, -1, -1), - unittest.mock.call(extract_link_path, -1, -1) - ]) - mock_lchown.assert_not_called() + @support.subTests('link_type', (tarfile.SYMTYPE, tarfile.LNKTYPE)) + def test_chown_links_on_extractall(self, link_type): + with ArchiveMaker() as arc: + arc.add("test.txt", + uid=1337, gid=1337, uname="", gname="", mode='-rwxr-xr-x') + arc.add("link", + type=link_type, + linkname='test.txt', + uid=1337, gid=1337, uname="", gname="", mode='-rwxr-xr-x') + + with ( + os_helper.temp_dir() as tmpdir, + arc.open() as tar, + unittest.mock.patch("os.chown") as mock_chown, + unittest.mock.patch("os.lchown") as mock_lchown, + unittest.mock.patch("os.geteuid") as mock_geteuid, + ): + # Set UID to 0 so chown() is attempted. + mock_geteuid.return_value = 0 + tar.extractall(path=tmpdir, filter='data') + extract_link_path = os.path.join(tmpdir, "link") + extract_file_path = os.path.join(tmpdir, "test.txt") + + if link_type == tarfile.SYMTYPE: + mock_chown.assert_called_once_with(extract_file_path, -1, -1) + mock_lchown.assert_called_once_with(extract_link_path, -1, -1) + else: + mock_chown.assert_has_calls([ + unittest.mock.call(extract_file_path, -1, -1), + unittest.mock.call(extract_link_path, -1, -1) + ]) + mock_lchown.assert_not_called() def test_extract_filters_target(self): # Test that when extract() falls back to extracting (rather than From b873e058a66306c2fad9893a39ba8007e6900473 Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Tue, 23 Jun 2026 14:19:45 +0200 Subject: [PATCH 5/6] Add blurb --- .../next/Library/2026-06-23-14-19-30.gh-issue-151987.8mNIMf.rst | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2026-06-23-14-19-30.gh-issue-151987.8mNIMf.rst diff --git a/Misc/NEWS.d/next/Library/2026-06-23-14-19-30.gh-issue-151987.8mNIMf.rst b/Misc/NEWS.d/next/Library/2026-06-23-14-19-30.gh-issue-151987.8mNIMf.rst new file mode 100644 index 00000000000000..9eea7b32c4d2b4 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-06-23-14-19-30.gh-issue-151987.8mNIMf.rst @@ -0,0 +1,2 @@ +The :meth:`tarfile.TarFile.extract` method now applies the given filter when +it extracts a link target from the archive as a fallback. From c2831a9737da5e4fb4e3486e0a8ed571a39889c0 Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Tue, 23 Jun 2026 15:10:04 +0200 Subject: [PATCH 6/6] Skip test if mocked functions are unavailable --- Lib/test/test_tarfile.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py index 0837951baf2a99..50fa30756bf053 100644 --- a/Lib/test/test_tarfile.py +++ b/Lib/test/test_tarfile.py @@ -4516,6 +4516,9 @@ def test_chmod_outside_dir(self): self.assertNotEqual(st_mode & 0o777, 0o777) @symlink_test + @unittest.skipUnless(hasattr(os, 'chown'), "missing os.chown") + @unittest.skipUnless(hasattr(os, 'lchown'), "missing os.lchown") + @unittest.skipUnless(hasattr(os, 'geteuid'), "missing os.geteuid") @support.subTests('link_type', (tarfile.SYMTYPE, tarfile.LNKTYPE)) def test_chown_links_on_extract(self, link_type): with ArchiveMaker() as arc: @@ -4549,6 +4552,9 @@ def test_chown_links_on_extract(self, link_type): mock_lchown.assert_not_called() @symlink_test + @unittest.skipUnless(hasattr(os, 'chown'), "missing os.chown") + @unittest.skipUnless(hasattr(os, 'lchown'), "missing os.lchown") + @unittest.skipUnless(hasattr(os, 'geteuid'), "missing os.geteuid") @support.subTests('link_type', (tarfile.SYMTYPE, tarfile.LNKTYPE)) def test_chown_links_on_extractall(self, link_type): with ArchiveMaker() as arc: