Skip to content

Commit f99c8d7

Browse files
Adding tarfile member sanitization to extractall() (#153)
1 parent 5bd7b91 commit f99c8d7

1 file changed

Lines changed: 23 additions & 1 deletion

File tree

tutorials/ner_tweets/scripts/decompress.py

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,29 @@ def main(src: Path, dest: Path):
1717
"""
1818
if tarfile.is_tarfile(src):
1919
with tarfile.open(src, "r:gz") as input_file:
20-
input_file.extractall(dest)
20+
21+
import os
22+
23+
def is_within_directory(directory, target):
24+
25+
abs_directory = os.path.abspath(directory)
26+
abs_target = os.path.abspath(target)
27+
28+
prefix = os.path.commonprefix([abs_directory, abs_target])
29+
30+
return prefix == abs_directory
31+
32+
def safe_extract(tar, path=".", members=None, *, numeric_owner=False):
33+
34+
for member in tar.getmembers():
35+
member_path = os.path.join(path, member.name)
36+
if not is_within_directory(path, member_path):
37+
raise Exception("Attempted Path Traversal in Tar File")
38+
39+
tar.extractall(path, members, numeric_owner=numeric_owner)
40+
41+
42+
safe_extract(input_file, dest)
2143
msg.good(f"Decompressed {src} into {dest}")
2244
elif src.suffix == ".gz":
2345
with gzip.open(src, "rb") as input_file:

0 commit comments

Comments
 (0)