From 190b3ef2cbf7ccae03c2a47ffe36ea8be2966a97 Mon Sep 17 00:00:00 2001 From: Grant Herman Date: Tue, 19 May 2026 19:22:28 -0400 Subject: [PATCH] fix(150075): adding tarinfo offset info before and after reading the file Signed-off-by: Grant Herman --- Lib/tarfile.py | 5 ++++- Lib/test/test_tarfile.py | 23 +++++++++++++++++++++++ 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/Lib/tarfile.py b/Lib/tarfile.py index 87500c726ce9a8..5c39a84b5ce855 100644 --- a/Lib/tarfile.py +++ b/Lib/tarfile.py @@ -2360,11 +2360,14 @@ def addfile(self, tarinfo, fileobj=None): raise ValueError("fileobj not provided for non zero-size regular file") tarinfo = copy.copy(tarinfo) - + # get current offset + tarinfo.offset = self.offset buf = tarinfo.tobuf(self.format, self.encoding, self.errors) self.fileobj.write(buf) self.offset += len(buf) + # add original offset to block size bufsize=self.copybufsize + tarinfo.offset_data = self.offset # If there's data to follow, append it. if fileobj is not None: copyfileobj(fileobj, self.fileobj, tarinfo.size, bufsize=bufsize) diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py index 02fd9620bcf33d..ca778bd12ab801 100644 --- a/Lib/test/test_tarfile.py +++ b/Lib/test/test_tarfile.py @@ -1456,6 +1456,29 @@ class WriteTest(WriteTestBase, unittest.TestCase): prefix = "w:" + def test_addfile_sets_offsets(self): + # gh-150075: addfile() must set offset and offset_data on the + # TarInfo stored in the archive so they match a subsequent read. + data = b"data" + + with tarfile.open(tmpname, self.mode) as tar: + t1 = tarfile.TarInfo("test1.txt") + t1.size = len(data) + tar.addfile(t1, io.BytesIO(data)) + + t2 = tarfile.TarInfo("test2.txt") + t2.size = len(data) + tar.addfile(t2, io.BytesIO(data)) + + write_members = tar.getmembers() + + with tarfile.open(tmpname) as tar: + read_members = tar.getmembers() + + for w, r in zip(write_members, read_members): + self.assertEqual(w.offset, r.offset) + self.assertEqual(w.offset_data, r.offset_data) + def test_100_char_name(self): # The name field in a tar header stores strings of at most 100 chars. # If a string is shorter than 100 chars it has to be padded with '\0',