Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
17f02bb
GH-89727: Fix `shutil.rmtree()` recursion error on deep trees.
barneygale Apr 1, 2023
48dc5db
Delay urllib import
barneygale Apr 1, 2023
6e92b27
Fix up filenames in exceptions
barneygale Apr 1, 2023
c042220
Restore `shutil._rmtree_islink()`
barneygale Apr 1, 2023
e268e89
Tidy up exception handling
barneygale Apr 1, 2023
3f40cb0
Better compatibility with shutil tests.
barneygale Apr 1, 2023
b59bda8
Move main implementation into pathlib
barneygale Apr 1, 2023
c174376
Make `_fwalk()` private, tidy up naming.
barneygale Apr 1, 2023
0e8d8e6
Fix tests
barneygale Apr 1, 2023
fea58ab
Fix missing filename in exception, tidy up more naming.
barneygale Apr 1, 2023
bff0f5e
More cleanup
barneygale Apr 1, 2023
3b21684
Even more tidy-up
barneygale Apr 1, 2023
5abee55
Reduce diff a bit
barneygale Apr 2, 2023
57173d9
Performance improvements
barneygale Apr 2, 2023
58070d9
Simplify rmtree (h/t eryksun)
barneygale Apr 2, 2023
d61baa1
More performance tweaks
barneygale Apr 2, 2023
e983dd7
Make `error._func` private, as it's only intended for `shutil.rmtree()`
barneygale Apr 3, 2023
5387d32
Improve tests
barneygale Apr 3, 2023
75541fb
Improve performance
barneygale Apr 3, 2023
698db60
Merge branch 'main' into pathlib-fwalk
barneygale Apr 3, 2023
74cdd6c
Supply relative paths to `rmdir()` in fd-based walk.
barneygale Apr 7, 2023
c17d07c
Handle rmdir() and unlink() not accepting dir_fd
barneygale Apr 7, 2023
b4f120c
Move rmtree() implementation back into shutil.
barneygale Apr 12, 2023
e856ccb
Restore missing dir_fd check
barneygale Apr 12, 2023
d1e349c
Remove unnecessary change to test
barneygale Apr 12, 2023
63e55e1
Reduce memory usage a bit.
barneygale Apr 12, 2023
f7c5352
Reduce diff
barneygale Apr 12, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
GH-89727: Fix shutil.rmtree() recursion error on deep trees.
Add a `follow_junctions` argument to `pathlib.Path.walk()`. When set to
false, directory junctions are treated as files.

Add an `fwalk()` method to `pathlib.Path`, analogous to `os.fwalk()`.

Implement `shutil.rmtree()` using `pathlib.Path.walk()` and `fwalk()`.
  • Loading branch information
barneygale committed Apr 1, 2023
commit 17f02bbe06fc43e20dc706f66232cf9f1b5116cd
143 changes: 107 additions & 36 deletions Lib/pathlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,89 @@ def _select_from(self, parent_path, is_dir, exists, scandir, normcase):
return


#
# Walking helpers
#


class _WalkAction:
WALK = object()
YIELD = object()
CLOSE = object()


_supports_fwalk = (
{os.stat, os.open} <= os.supports_dir_fd and
{os.stat, os.scandir} <= os.supports_fd)


def _walk(top_down, follow_symlinks, follow_junctions, use_fd, actions):
action, value = actions.pop()

if action is _WalkAction.WALK:
path, dir_fd, orig_st = value
dirstats = None
dirnames = []
filenames = []
if use_fd:
name = path if dir_fd is None else path.name
if not follow_symlinks:
if top_down:
orig_st = os.stat(name, follow_symlinks=False, dir_fd=dir_fd)
else:
dirstats = []
fd = os.open(name, os.O_RDONLY, dir_fd=dir_fd)
actions.append((_WalkAction.CLOSE, fd))
if orig_st and not os.path.samestat(orig_st, os.stat(fd)):
# This can only happen if a directory is replaced with a symlink during the walk.
return
result = path, dirnames, filenames, fd
try:
scandir_it = os.scandir(fd)
except OSError as exc:
exc.filename = str(path)
raise exc
else:
fd = None
result = path, dirnames, filenames
scandir_it = path._scandir()

with scandir_it:
for entry in scandir_it:
try:
is_dir = entry.is_dir(follow_symlinks=follow_symlinks) and (
follow_junctions or not entry.is_junction())
if is_dir:
if dirstats is not None:
dirstats.append(entry.stat(follow_symlinks=False))
dirnames.append(entry.name)
else:
filenames.append(entry.name)

except OSError:
filenames.append(entry.name)

if top_down:
yield result
else:
actions.append((_WalkAction.YIELD, result))

if dirstats:
actions += [
(_WalkAction.WALK, (path._make_child_relpath(dirname), fd, dirstat))
for dirname, dirstat in zip(reversed(dirnames), reversed(dirstats))]
else:
actions += [
(_WalkAction.WALK, (path._make_child_relpath(dirname), fd, None))
for dirname in reversed(dirnames)]

elif action is _WalkAction.YIELD:
yield value
elif action is _WalkAction.CLOSE:
os.close(value)
else:
raise AssertionError(f"unknown walk action: {action}")

#
# Public API
#
Expand Down Expand Up @@ -1194,50 +1277,38 @@ def expanduser(self):

return self

def walk(self, top_down=True, on_error=None, follow_symlinks=False):
def walk(self, top_down=True, on_error=None, follow_symlinks=False, follow_junctions=True):
"""Walk the directory tree from this directory, similar to os.walk()."""
sys.audit("pathlib.Path.walk", self, on_error, follow_symlinks)
paths = [self]

while paths:
path = paths.pop()
if isinstance(path, tuple):
yield path
continue

# We may not have read permission for self, in which case we can't
# get a list of the files the directory contains. os.walk()
# always suppressed the exception in that instance, rather than
# blow up for a minor reason when (say) a thousand readable
# directories are still left to visit. That logic is copied here.
sys.audit("pathlib.Path.walk", self, on_error, follow_symlinks, follow_junctions)
actions = [(_WalkAction.WALK, (self, None, None))]
while actions:
try:
scandir_it = path._scandir()
yield from _walk(top_down, follow_symlinks, follow_junctions, False, actions)
except OSError as error:
if on_error is not None:
on_error(error)
continue

with scandir_it:
dirnames = []
filenames = []
for entry in scandir_it:
if _supports_fwalk:
def fwalk(self, top_down=True, on_error=None, follow_symlinks=False, dir_fd=None):
"""Walk the directory tree from this directory, similar to os.fwalk()."""
sys.audit("pathlib.Path.fwalk", self, on_error, follow_symlinks, dir_fd)
actions = [(_WalkAction.WALK, (self, dir_fd, None))]
try:
while actions:
try:
is_dir = entry.is_dir(follow_symlinks=follow_symlinks)
except OSError:
# Carried over from os.path.isdir().
is_dir = False

if is_dir:
dirnames.append(entry.name)
else:
filenames.append(entry.name)

if top_down:
yield path, dirnames, filenames
else:
paths.append((path, dirnames, filenames))

paths += [path._make_child_relpath(d) for d in reversed(dirnames)]
yield from _walk(top_down, follow_symlinks, True, True, actions)
except OSError as error:
if on_error is not None:
on_error(error)
continue
finally:
for action, value in reversed(actions):
if action is _WalkAction.CLOSE:
try:
os.close(value)
except OSError:
pass


class PosixPath(Path, PurePosixPath):
Expand Down
182 changes: 45 additions & 137 deletions Lib/shutil.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import fnmatch
import collections
import errno
import pathlib
import warnings

try:
Expand Down Expand Up @@ -562,111 +563,56 @@ def copytree(src, dst, symlinks=False, ignore=None, copy_function=copy2,
ignore_dangling_symlinks=ignore_dangling_symlinks,
dirs_exist_ok=dirs_exist_ok)

if hasattr(os.stat_result, 'st_file_attributes'):
def _rmtree_islink(path):
try:
st = os.lstat(path)
return (stat.S_ISLNK(st.st_mode) or
(st.st_file_attributes & stat.FILE_ATTRIBUTE_REPARSE_POINT
and st.st_reparse_tag == stat.IO_REPARSE_TAG_MOUNT_POINT))
except OSError:
return False
else:
def _rmtree_islink(path):
return os.path.islink(path)

# version vulnerable to race conditions
def _rmtree_unsafe(path, onexc):
try:
with os.scandir(path) as scandir_it:
entries = list(scandir_it)
except OSError as err:
onexc(os.scandir, path, err)
entries = []
for entry in entries:
fullname = entry.path
try:
is_dir = entry.is_dir(follow_symlinks=False)
except OSError:
is_dir = False

if is_dir and not entry.is_junction():
def on_walk_error(exc):
onexc(os.scandir, exc.filename, exc)
walker = path.walk(
top_down=False,
follow_symlinks=False,
follow_junctions=False,
on_error=on_walk_error)
for toppath, dirnames, filenames in walker:
for dirname in dirnames:
try:
if entry.is_symlink():
# This can only happen if someone replaces
# a directory with a symlink after the call to
# os.scandir or entry.is_dir above.
raise OSError("Cannot call rmtree on a symbolic link")
except OSError as err:
onexc(os.path.islink, fullname, err)
continue
_rmtree_unsafe(fullname, onexc)
else:
toppath._make_child_relpath(dirname).rmdir()
except OSError as exc:
onexc(os.rmdir, str(toppath / dirname), exc)
for filename in filenames:
try:
os.unlink(fullname)
except OSError as err:
onexc(os.unlink, fullname, err)
toppath._make_child_relpath(filename).unlink()
except OSError as exc:
onexc(os.unlink, str(toppath / filename), exc)
try:
os.rmdir(path)
except OSError as err:
onexc(os.rmdir, path, err)
path.rmdir()
except OSError as exc:
onexc(os.rmdir, str(path), exc)

# Version using fd-based APIs to protect against races
def _rmtree_safe_fd(topfd, path, onexc):
try:
with os.scandir(topfd) as scandir_it:
entries = list(scandir_it)
except OSError as err:
err.filename = path
onexc(os.scandir, path, err)
return
for entry in entries:
fullname = os.path.join(path, entry.name)
try:
is_dir = entry.is_dir(follow_symlinks=False)
except OSError:
is_dir = False
else:
if is_dir:
try:
orig_st = entry.stat(follow_symlinks=False)
is_dir = stat.S_ISDIR(orig_st.st_mode)
except OSError as err:
onexc(os.lstat, fullname, err)
continue
if is_dir:
def _rmtree_safe_fd(path, onexc, dir_fd):
def on_walk_error(exc):
onexc(os.scandir, exc.filename, exc)
walker = path.fwalk(
top_down=False,
follow_symlinks=False,
on_error=on_walk_error,
dir_fd=dir_fd)
for toppath, dirnames, filenames, topfd in walker:
for dirname in dirnames:
try:
dirfd = os.open(entry.name, os.O_RDONLY, dir_fd=topfd)
dirfd_closed = False
except OSError as err:
onexc(os.open, fullname, err)
else:
try:
if os.path.samestat(orig_st, os.fstat(dirfd)):
_rmtree_safe_fd(dirfd, fullname, onexc)
try:
os.close(dirfd)
dirfd_closed = True
os.rmdir(entry.name, dir_fd=topfd)
except OSError as err:
onexc(os.rmdir, fullname, err)
else:
try:
# This can only happen if someone replaces
# a directory with a symlink after the call to
# os.scandir or stat.S_ISDIR above.
raise OSError("Cannot call rmtree on a symbolic "
"link")
except OSError as err:
onexc(os.path.islink, fullname, err)
finally:
if not dirfd_closed:
os.close(dirfd)
else:
os.rmdir(dirname, dir_fd=topfd)
except OSError as exc:
onexc(os.rmdir, str(toppath / dirname), exc)
for filename in filenames:
try:
os.unlink(entry.name, dir_fd=topfd)
except OSError as err:
onexc(os.unlink, fullname, err)
os.unlink(filename, dir_fd=topfd)
except OSError as exc:
onexc(os.unlink, str(toppath / filename), exc)
try:
os.rmdir(path, dir_fd=dir_fd)
except OSError as exc:
onexc(os.rmdir, str(path), exc)

_use_fd_functions = ({os.open, os.stat, os.unlink, os.rmdir} <=
os.supports_dir_fd and
Expand Down Expand Up @@ -719,52 +665,14 @@ def onexc(*args):
exc_info = type(exc), exc, exc.__traceback__
return onerror(func, path, exc_info)

if isinstance(path, bytes):
path = os.fsdecode(path)
path = pathlib.Path(path)
if _use_fd_functions:
# While the unsafe rmtree works fine on bytes, the fd based does not.
if isinstance(path, bytes):
path = os.fsdecode(path)
# Note: To guard against symlink races, we use the standard
# lstat()/open()/fstat() trick.
try:
orig_st = os.lstat(path, dir_fd=dir_fd)
except Exception as err:
onexc(os.lstat, path, err)
return
try:
fd = os.open(path, os.O_RDONLY, dir_fd=dir_fd)
fd_closed = False
except Exception as err:
onexc(os.open, path, err)
return
try:
if os.path.samestat(orig_st, os.fstat(fd)):
_rmtree_safe_fd(fd, path, onexc)
try:
os.close(fd)
fd_closed = True
os.rmdir(path, dir_fd=dir_fd)
except OSError as err:
onexc(os.rmdir, path, err)
else:
try:
# symlinks to directories are forbidden, see bug #1669
raise OSError("Cannot call rmtree on a symbolic link")
except OSError as err:
onexc(os.path.islink, path, err)
finally:
if not fd_closed:
os.close(fd)
_rmtree_safe_fd(path, onexc, dir_fd)
else:
if dir_fd is not None:
Comment thread
barneygale marked this conversation as resolved.
raise NotImplementedError("dir_fd unavailable on this platform")
try:
if _rmtree_islink(path):
# symlinks to directories are forbidden, see bug #1669
raise OSError("Cannot call rmtree on a symbolic link")
except OSError as err:
onexc(os.path.islink, path, err)
# can't continue even if onexc hook returns
return
return _rmtree_unsafe(path, onexc)

# Allow introspection of whether or not the hardening against symlink
Expand Down
4 changes: 2 additions & 2 deletions Lib/test/test_shutil.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ def onerror(*args):
with self.assertWarns(DeprecationWarning):
shutil.rmtree(link, onerror=onerror)
self.assertEqual(len(errors), 1)
self.assertIs(errors[0][0], os.path.islink)
self.assertIs(errors[0][0], os.rmdir)
self.assertEqual(errors[0][1], link)
self.assertIsInstance(errors[0][2][1], OSError)

Expand All @@ -230,7 +230,7 @@ def onexc(*args):
errors.append(args)
shutil.rmtree(link, onexc=onexc)
self.assertEqual(len(errors), 1)
self.assertIs(errors[0][0], os.path.islink)
self.assertIs(errors[0][0], os.rmdir)
self.assertEqual(errors[0][1], link)
self.assertIsInstance(errors[0][2], OSError)

Expand Down