Skip to content
Closed
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
move stack logic to bottom of loop in os.walk
  • Loading branch information
jonburdo committed Jan 3, 2023
commit 04a16a22f5c7cd969c46e9a0c510e66f86bfc432
164 changes: 83 additions & 81 deletions Lib/os.py
Original file line number Diff line number Diff line change
Expand Up @@ -341,40 +341,14 @@ def walk(top, topdown=True, onerror=None, followlinks=False):
"""
sys.audit("os.walk", top, topdown, onerror, followlinks)

parent = b"" if isinstance(top, bytes) else ""
dirs = iter([fspath(top)])
stack = [[parent, dirs]] if topdown else [dirs]
top = fspath(top)
stack = []
islink, join = path.islink, path.join
while stack:
value = stack[-1]
if topdown:
parent, dirs = value
try:
dirname = next(dirs)
except StopIteration:
stack.pop()
continue
top = join(parent, dirname)
# bpo-23605: os.path.islink() is used instead of caching
# entry.is_symlink() result during the loop on os.scandir() because
# the caller can replace the directory entry during the previous
# "yield"
if not (followlinks or not islink(top)):
continue
else:
if isinstance(value, tuple):
stack.pop()
yield value
continue
try:
top = next(value)
except StopIteration:
stack.pop()
continue

while True:
dirs = []
nondirs = []
walk_dirs = []
cont = False

# We may not have read permission for top, in which case we can't
# get a list of the files the directory contains.
Expand All @@ -386,64 +360,92 @@ def walk(top, topdown=True, onerror=None, followlinks=False):
except OSError as error:
if onerror is not None:
onerror(error)
continue
cont = True
else:
with scandir_it:
while True:
try:
try:
entry = next(scandir_it)
except StopIteration:
break
except OSError as error:
if onerror is not None:
onerror(error)
cont = True
break

try:
is_dir = entry.is_dir()
except OSError:
# If is_dir() raises an OSError, consider the entry not to
# be a directory, same behaviour as os.path.isdir().
is_dir = False

if is_dir:
dirs.append(entry.name)
else:
nondirs.append(entry.name)

if not topdown and is_dir:
# Bottom-up: traverse into sub-directory, but exclude
# symlinks to directories if followlinks is False
if followlinks:
walk_into = True
else:
try:
is_symlink = entry.is_symlink()
except OSError:
# If is_symlink() raises an OSError, consider the
# entry not to be a symbolic link, same behaviour
# as os.path.islink().
is_symlink = False
walk_into = not is_symlink

if walk_into:
walk_dirs.append(entry.path)
if topdown:
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Such a high percentage of code in this function is now under either if topdown or the reverse that I'm kind of curious what it would look like to just have separate (internal) functions for topdown vs bottom-up. But this would still increase code duplication significantly, and move the duplicated code/structure further apart, so I suspect it's still better this way.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was actually thinking about this. I just gave it a shot to see. This seems to me like one of those cases where there are enough little differences in logic that it's much cleaner to separate the two. If we didn't support dir modification for topdown or didn't care about performance it might be simpler, but when you have a lot of the same logic interspersed by a lot of little differences it gets messy. In these kinds of situations I also tend to prefer some duplication between functions over one big function with conditions inside of loops.

I also find it easier to look at the two sets of logic separately, but either way works.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I think the split version seems fine. Will wait a bit and see if some core devs have opinions.

if not cont:
# Yield before sub-directory traversal if going top down
yield top, dirs, nondirs
# Traverse into sub-directories
stack.append([top, iter(dirs)])

cont = False
with scandir_it:
while True:
try:
try:
entry = next(scandir_it)
except StopIteration:
value, dirs = stack[-1]
except IndexError:
return
for dirname in dirs:
top = join(value, dirname)
# bpo-23605: os.path.islink() is used instead of caching
# entry.is_symlink() result during the loop on os.scandir() because
# the caller can replace the directory entry during the previous
# "yield"
if followlinks or not islink(top):
break
except OSError as error:
if onerror is not None:
onerror(error)
cont = True
break
else:
stack.pop()
continue
break
else:
if not cont:
# Traverse into sub-directories
stack.append(((top, dirs, nondirs), iter(walk_dirs)))

while True:
try:
is_dir = entry.is_dir()
except OSError:
# If is_dir() raises an OSError, consider the entry not to
# be a directory, same behaviour as os.path.isdir().
is_dir = False

if is_dir:
dirs.append(entry.name)
value, dirs = stack[-1]
except IndexError:
return
try:
top = next(dirs)
except StopIteration:
stack.pop()
# Yield after sub-directory traversal if going bottom up
yield value
else:
nondirs.append(entry.name)

if not topdown and is_dir:
# Bottom-up: traverse into sub-directory, but exclude
# symlinks to directories if followlinks is False
if followlinks:
walk_into = True
else:
try:
is_symlink = entry.is_symlink()
except OSError:
# If is_symlink() raises an OSError, consider the
# entry not to be a symbolic link, same behaviour
# as os.path.islink().
is_symlink = False
walk_into = not is_symlink

if walk_into:
walk_dirs.append(entry.path)
if cont:
continue

if topdown:
# Yield before sub-directory traversal if going top down
yield top, dirs, nondirs
# Traverse into sub-directories
stack.append([top, iter(dirs)])
else:
# Yield after sub-directory traversal if going bottom up
stack.append((top, dirs, nondirs))
# Traverse into sub-directories
stack.append(iter(walk_dirs))
break

__all__.append("walk")

Expand Down