Skip to content
Closed
Changes from 1 commit
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
17f02bb
GH-89727: Fix `shutil.rmtree()` recursion error on deep trees.
barneygale Apr 1, 2023
48dc5db
Delay urllib import
barneygale Apr 1, 2023
6e92b27
Fix up filenames in exceptions
barneygale Apr 1, 2023
c042220
Restore `shutil._rmtree_islink()`
barneygale Apr 1, 2023
e268e89
Tidy up exception handling
barneygale Apr 1, 2023
3f40cb0
Better compatibility with shutil tests.
barneygale Apr 1, 2023
b59bda8
Move main implementation into pathlib
barneygale Apr 1, 2023
c174376
Make `_fwalk()` private, tidy up naming.
barneygale Apr 1, 2023
0e8d8e6
Fix tests
barneygale Apr 1, 2023
fea58ab
Fix missing filename in exception, tidy up more naming.
barneygale Apr 1, 2023
bff0f5e
More cleanup
barneygale Apr 1, 2023
3b21684
Even more tidy-up
barneygale Apr 1, 2023
5abee55
Reduce diff a bit
barneygale Apr 2, 2023
57173d9
Performance improvements
barneygale Apr 2, 2023
58070d9
Simplify rmtree (h/t eryksun)
barneygale Apr 2, 2023
d61baa1
More performance tweaks
barneygale Apr 2, 2023
e983dd7
Make `error._func` private, as it's only intended for `shutil.rmtree()`
barneygale Apr 3, 2023
5387d32
Improve tests
barneygale Apr 3, 2023
75541fb
Improve performance
barneygale Apr 3, 2023
698db60
Merge branch 'main' into pathlib-fwalk
barneygale Apr 3, 2023
74cdd6c
Supply relative paths to `rmdir()` in fd-based walk.
barneygale Apr 7, 2023
c17d07c
Handle rmdir() and unlink() not accepting dir_fd
barneygale Apr 7, 2023
b4f120c
Move rmtree() implementation back into shutil.
barneygale Apr 12, 2023
e856ccb
Restore missing dir_fd check
barneygale Apr 12, 2023
d1e349c
Remove unnecessary change to test
barneygale Apr 12, 2023
63e55e1
Reduce memory usage a bit.
barneygale Apr 12, 2023
f7c5352
Reduce diff
barneygale Apr 12, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Even more tidy-up
  • Loading branch information
barneygale committed Apr 2, 2023
commit 3b216840cf5e8b407a919564ab6c0ae3b30a4cc3
72 changes: 33 additions & 39 deletions Lib/pathlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,11 +203,6 @@ def _select_from(self, parent_path, is_dir, exists, scandir, normcase):
return


#
# Walking helpers
#


class _WalkAction:
WALK = object()
YIELD = object()
Expand All @@ -221,39 +216,23 @@ class _WalkAction:

def _walk(top_down, follow_symlinks, follow_junctions, use_fd, actions):
action, value = actions.pop()

if action is _WalkAction.WALK:
path, dir_fd, entry = value
entries = [] if use_fd and not top_down and not follow_symlinks else None
dirnames = []
filenames = []
try:
if use_fd:
name = path if dir_fd is None else path.name
orig_st = None
if not follow_symlinks:
if entry:
orig_st = entry.stat(follow_symlinks=False)
else:
orig_st = os.stat(name, follow_symlinks=False, dir_fd=dir_fd)
fd = os.open(name, os.O_RDONLY, dir_fd=dir_fd)
actions.append((_WalkAction.CLOSE, fd))
if orig_st and not os.path.samestat(orig_st, os.stat(fd)):
error = NotADirectoryError("Will not walk into symbolic link")
error.func = os.path.islink
raise error
scandir_it, fd = path._fscandir(follow_symlinks, actions, dir_fd, entry)
result = path, dirnames, filenames, fd
scandir_it = os.scandir(fd)
else:
fd = None
scandir_it, fd = path._scandir(), None
result = path, dirnames, filenames
scandir_it = path._scandir()
except OSError as error:
if not hasattr(error, 'func'):
error.func = os.scandir
error.filename = str(path)
raise error

with scandir_it:
for entry in scandir_it:
try:
Expand All @@ -265,15 +244,12 @@ def _walk(top_down, follow_symlinks, follow_junctions, use_fd, actions):
dirnames.append(entry.name)
else:
filenames.append(entry.name)

except OSError:
filenames.append(entry.name)

if top_down:
yield result
else:
actions.append((_WalkAction.YIELD, result))

if entries:
actions += [
(_WalkAction.WALK, (path._make_child_relpath(dirname), fd, entry))
Expand All @@ -282,7 +258,6 @@ def _walk(top_down, follow_symlinks, follow_junctions, use_fd, actions):
actions += [
(_WalkAction.WALK, (path._make_child_relpath(dirname), fd, None))
for dirname in reversed(dirnames)]

elif action is _WalkAction.YIELD:
yield value
elif action is _WalkAction.CLOSE:
Expand Down Expand Up @@ -1301,10 +1276,10 @@ def walk(self, top_down=True, on_error=None, follow_symlinks=False, follow_junct
except OSError as error:
if on_error is not None:
on_error(error)
continue

if _supports_fwalk:
def _fwalk(self, top_down=True, *, on_error=None, follow_symlinks=False, dir_fd=None):
"""Walk the directory tree from this directory, similar to os.fwalk()."""
sys.audit("pathlib.Path._fwalk", self, on_error, follow_symlinks, dir_fd)
actions = [(_WalkAction.WALK, (self, dir_fd, None))]
try:
Expand All @@ -1314,7 +1289,6 @@ def _fwalk(self, top_down=True, *, on_error=None, follow_symlinks=False, dir_fd=
except OSError as error:
if on_error is not None:
on_error(error)
continue
finally:
for action, value in reversed(actions):
if action is _WalkAction.CLOSE:
Expand All @@ -1323,6 +1297,25 @@ def _fwalk(self, top_down=True, *, on_error=None, follow_symlinks=False, dir_fd=
except OSError:
pass

def _fscandir(self, follow_symlinks, actions, dir_fd, entry):
name = self if dir_fd is None else self.name
if not follow_symlinks:
# Note: To guard against symlink races, we use the standard
# lstat()/open()/fstat() trick.
if entry:
orig_st = entry.stat(follow_symlinks=False)
else:
orig_st = os.stat(name, follow_symlinks=False, dir_fd=dir_fd)
fd = os.open(name, os.O_RDONLY, dir_fd=dir_fd)
actions.append((_WalkAction.CLOSE, fd))
if not follow_symlinks and not os.path.samestat(orig_st, os.stat(fd)):
# This can only happen if someone replaces a directory
# with a symbolic link after the call to stat() above.
error = NotADirectoryError("Cannot walk into a symbolic link")
error.func = os.path.islink
raise error
return os.scandir(fd), fd

# Version using fd-based APIs to protect against races
_rmtree.avoids_symlink_attacks = True
def _rmtree_impl(self, on_error, dir_fd):
Expand All @@ -1331,24 +1324,24 @@ def _rmtree_impl(self, on_error, dir_fd):
follow_symlinks=False,
on_error=on_error,
dir_fd=dir_fd)
for toppath, dirnames, filenames, topfd in walker:
for path, dirnames, filenames, fd in walker:
for dirname in dirnames:
try:
os.rmdir(dirname, dir_fd=topfd)
os.rmdir(dirname, dir_fd=fd)
except OSError as error:
if on_error is not None:
error.filename = str(toppath._make_child_relpath(dirname))
error.filename = str(path._make_child_relpath(dirname))
error.func = os.rmdir
on_error(error)
for filename in filenames:
try:
os.unlink(filename, dir_fd=topfd)
os.unlink(filename, dir_fd=fd)
except OSError as error:
if on_error is not None:
error.filename = str(toppath._make_child_relpath(filename))
error.filename = str(path._make_child_relpath(filename))
error.func = os.unlink
on_error(error)
if toppath == self:
if path == self:
try:
os.rmdir(self, dir_fd=dir_fd)
except OSError as error:
Expand All @@ -1372,29 +1365,30 @@ def _rmtree_impl(self, on_error, dir_fd):
if on_error is not None:
error.func = os.path.islink
on_error(error)
# can't continue even if on_error hook returns
return

walker = self.walk(
top_down=False,
follow_symlinks=False,
follow_junctions=False,
on_error=on_error)
for toppath, dirnames, filenames in walker:
for path, dirnames, filenames in walker:
for dirname in dirnames:
try:
toppath._make_child_relpath(dirname).rmdir()
path._make_child_relpath(dirname).rmdir()
except OSError as error:
if on_error is not None:
error.func = os.rmdir
on_error(error)
for filename in filenames:
try:
toppath._make_child_relpath(filename).unlink()
path._make_child_relpath(filename).unlink()
except OSError as error:
if on_error is not None:
error.func = os.unlink
on_error(error)
if toppath == self:
if path == self:
try:
self.rmdir()
except OSError as error:
Expand Down