Skip to content

Commit

Permalink
pythonGH-125413: Add pathlib.Path.dir_entry attribute
Browse files Browse the repository at this point in the history
Add a `Path.dir_entry` attribute. In any path object generated by
`Path.iterdir()`, it stores an `os.DirEntry` object corresponding to the
path; in other cases it is `None`.

This can be used to retrieve the file type and attributes of directory
children without necessarily incurring further system calls.

Under the hood, we use `dir_entry` in our implementations of
`PathBase.glob()`, `PathBase.walk()` and `PathBase.copy()`, the last of
which also provides the implementation of `Path.copy()`, resulting in a
modest speedup when copying local directory trees.
  • Loading branch information
barneygale committed Oct 13, 2024
1 parent cb8e599 commit 3a3780d
Show file tree
Hide file tree
Showing 6 changed files with 102 additions and 35 deletions.
23 changes: 23 additions & 0 deletions Doc/library/pathlib.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1289,6 +1289,29 @@ Reading directories
raised.


.. attribute:: Path.dir_entry

In a path yielded from :meth:`Path.iterdir`, this attribute stores a
:class:`os.DirEntry` object corresponding to the path; in other cases it is
``None``. This can be used to retrieve the file type and attributes of
directory children without necessarily incurring further system calls::

>>> p = Path('docs')
>>> for child in p.iterdir():
... entry = child.dir_entry
... if entry.is_dir():
... child
...
PosixPath('docs/_templates')
PosixPath('docs/_build')
PosixPath('docs/_static')

For technical reasons, this attribute is also available from
:class:`PurePath` objects, where its value is always ``None``.

.. versionadded:: 3.14


.. method:: Path.glob(pattern, *, case_sensitive=None, recurse_symlinks=False)

Glob the given relative *pattern* in the directory represented by this path,
Expand Down
6 changes: 6 additions & 0 deletions Doc/whatsnew/3.14.rst
Original file line number Diff line number Diff line change
Expand Up @@ -343,6 +343,12 @@ pathlib

(Contributed by Barney Gale in :gh:`73991`.)

* Add a :attr:`.Path.dir_entry` attribute. In a path object generated by
:meth:`.Path.iterdir`, it stores a :class:`os.DirEntry` object corresponding
to the path; in other cases it is ``None``.

(Contributed by Barney Gale in :gh:`125413`.)


pdb
---
Expand Down
13 changes: 4 additions & 9 deletions Lib/glob.py
Original file line number Diff line number Diff line change
Expand Up @@ -364,12 +364,6 @@ def concat_path(path, text):
"""
raise NotImplementedError

@staticmethod
def parse_entry(entry):
"""Returns the path of an entry yielded from scandir().
"""
raise NotImplementedError

# High-level methods

def compile(self, pat):
Expand Down Expand Up @@ -438,6 +432,7 @@ def select_wildcard(path, exists=False):
except OSError:
pass
else:
prefix = self.add_slash(path)
for entry in entries:
if match is None or match(entry.name):
if dir_only:
Expand All @@ -446,7 +441,7 @@ def select_wildcard(path, exists=False):
continue
except OSError:
continue
entry_path = self.parse_entry(entry)
entry_path = self.concat_path(prefix, entry.name)
if dir_only:
yield from select_next(entry_path, exists=True)
else:
Expand Down Expand Up @@ -495,6 +490,7 @@ def select_recursive_step(stack, match_pos):
except OSError:
pass
else:
prefix = self.add_slash(path)
for entry in entries:
is_dir = False
try:
Expand All @@ -504,7 +500,7 @@ def select_recursive_step(stack, match_pos):
pass

if is_dir or not dir_only:
entry_path = self.parse_entry(entry)
entry_path = self.concat_path(prefix, entry.name)
if match is None or match(str(entry_path), match_pos):
if dir_only:
yield from select_next(entry_path, exists=True)
Expand Down Expand Up @@ -533,7 +529,6 @@ class _StringGlobber(_GlobberBase):
"""
lexists = staticmethod(os.path.lexists)
scandir = staticmethod(os.scandir)
parse_entry = operator.attrgetter('path')
concat_path = operator.add

if os.name == 'nt':
Expand Down
33 changes: 18 additions & 15 deletions Lib/pathlib/_abc.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,18 +101,13 @@ def scandir(path):
a context manager. This method is called by walk() and glob().
"""
import contextlib
return contextlib.nullcontext(path.iterdir())
return contextlib.nullcontext(child.dir_entry for child in path.iterdir())

@staticmethod
def concat_path(path, text):
"""Appends text to the given path."""
return path.with_segments(path._raw_path + text)

@staticmethod
def parse_entry(entry):
"""Returns the path of an entry yielded from scandir()."""
return entry


class PurePathBase:
"""Base class for pure path objects.
Expand All @@ -132,6 +127,12 @@ class PurePathBase:
# is being processed by `PathBase.resolve()`. This prevents duplicate
# work from occurring when `resolve()` calls `stat()` or `readlink()`.
'_resolving',

# The 'dir_entry' slot stores an `os.DirEntry`-like object or `None`.
# It is available for paths generated from `PathBase.iterdir()`. It is
# defined here rather than in `PathBase` to avoid a class layout
# conflict in `Path`.
'dir_entry',
)
parser = ParserBase()
_globber = PathGlobber
Expand All @@ -142,6 +143,7 @@ def __init__(self, path, *paths):
raise TypeError(
f"path should be a str, not {type(self._raw_path).__name__!r}")
self._resolving = False
self.dir_entry = None

def with_segments(self, *pathsegments):
"""Construct a new path object from any number of path-like objects.
Expand Down Expand Up @@ -696,15 +698,16 @@ def walk(self, top_down=True, on_error=None, follow_symlinks=False):
paths.append((path, dirnames, filenames))
try:
for child in path.iterdir():
entry = child.dir_entry
try:
if child.is_dir(follow_symlinks=follow_symlinks):
if entry.is_dir(follow_symlinks=follow_symlinks):
if not top_down:
paths.append(child)
dirnames.append(child.name)
dirnames.append(entry.name)
else:
filenames.append(child.name)
filenames.append(entry.name)
except OSError:
filenames.append(child.name)
filenames.append(entry.name)
except OSError as error:
if on_error is not None:
on_error(error)
Expand Down Expand Up @@ -872,17 +875,17 @@ def copy(self, target, *, follow_symlinks=True, dirs_exist_ok=False,
if not isinstance(target, PathBase):
target = self.with_segments(target)
self._ensure_distinct_path(target)
stack = [(self, target)]
stack = [(self, self, target)]
while stack:
src, dst = stack.pop()
if not follow_symlinks and src.is_symlink():
entry, src, dst = stack.pop()
if not follow_symlinks and entry.is_symlink():
dst._symlink_to_target_of(src)
if preserve_metadata:
src._copy_metadata(dst, follow_symlinks=False)
elif src.is_dir():
elif entry.is_dir():
children = src.iterdir()
dst.mkdir(exist_ok=dirs_exist_ok)
stack.extend((child, dst.joinpath(child.name))
stack.extend((child.dir_entry, child, dst.joinpath(child.name))
for child in children)
if preserve_metadata:
src._copy_metadata(dst)
Expand Down
12 changes: 7 additions & 5 deletions Lib/pathlib/_local.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@ def __init__(self, *args):
paths.append(path)
# Avoid calling super().__init__, as an optimisation
self._raw_paths = paths
self.dir_entry = None

def joinpath(self, *pathsegments):
"""Combine this path with one or several arguments, and return a
Expand Down Expand Up @@ -622,11 +623,12 @@ def iterdir(self):
special entries '.' and '..' are not included.
"""
root_dir = str(self)
with os.scandir(root_dir) as scandir_it:
paths = [entry.path for entry in scandir_it]
if root_dir == '.':
paths = map(self._remove_leading_dot, paths)
return map(self._from_parsed_string, paths)
str_attr = 'name' if root_dir == '.' else 'path'
def parse(entry):
path = self._from_parsed_string(getattr(entry, str_attr))
path.dir_entry = entry
return path
return map(parse, list(os.scandir(root_dir)))

def glob(self, pattern, *, case_sensitive=None, recurse_symlinks=False):
"""Iterate over this subtree and yield all existing files (of any
Expand Down
50 changes: 44 additions & 6 deletions Lib/test/test_pathlib/test_pathlib_abc.py
Original file line number Diff line number Diff line change
Expand Up @@ -1424,6 +1424,21 @@ def close(self):
'st_mode st_ino st_dev st_nlink st_uid st_gid st_size st_atime st_mtime st_ctime')


class DummyDirEntry:
__slots__ = ('name', '_is_symlink', '_is_dir')

def __init__(self, name, is_symlink, is_dir):
self.name = name
self._is_symlink = is_symlink
self._is_dir = is_dir

def is_symlink(self):
return self._is_symlink

def is_dir(self, *, follow_symlinks=True):
return self._is_dir and (follow_symlinks or not self._is_symlink)


class DummyPath(PathBase):
"""
Simple implementation of PathBase that keeps files and directories in
Expand Down Expand Up @@ -1492,13 +1507,22 @@ def open(self, mode='r', buffering=-1, encoding=None,
return stream

def iterdir(self):
path = str(self.resolve())
if path in self._files:
raise NotADirectoryError(errno.ENOTDIR, "Not a directory", path)
elif path in self._directories:
return iter([self / name for name in self._directories[path]])
path = self.resolve()
path_str = str(path)
if path_str in self._files:
raise NotADirectoryError(errno.ENOTDIR, "Not a directory", path_str)
elif path_str in self._directories:
return iter([self._make_dir_child(path, name) for name in self._directories[path_str]])
else:
raise FileNotFoundError(errno.ENOENT, "File not found", path)
raise FileNotFoundError(errno.ENOENT, "File not found", path_str)

def _make_dir_child(self, resolved_self, name):
path = self.joinpath(name)
path_str = str(resolved_self.joinpath(name))
is_symlink = path_str in self._symlinks
is_directory = path_str in self._directories if not is_symlink else path.is_dir()
path.dir_entry = DummyDirEntry(name, is_symlink, is_directory)
return path

def mkdir(self, mode=0o777, parents=False, exist_ok=False):
path = str(self.parent.resolve() / self.name)
Expand Down Expand Up @@ -2187,6 +2211,20 @@ def test_iterdir_nodir(self):
self.assertIn(cm.exception.errno, (errno.ENOTDIR,
errno.ENOENT, errno.EINVAL))

def test_dir_entry(self):
p = self.cls(self.base)
self.assertIsNone(p.dir_entry)
for child in p.iterdir():
entry = child.dir_entry
self.assertIsNotNone(entry)
self.assertEqual(entry.name, child.name)
self.assertEqual(entry.is_symlink(),
child.is_symlink())
self.assertEqual(entry.is_dir(follow_symlinks=False),
child.is_dir(follow_symlinks=False))
if entry.name != 'brokenLinkLoop':
self.assertEqual(entry.is_dir(), child.is_dir())

def test_glob_common(self):
def _check(glob, expected):
self.assertEqual(set(glob), { P(self.base, q) for q in expected })
Expand Down

0 comments on commit 3a3780d

Please sign in to comment.