With some recent changes to pathlib.PurePath.match()
, it’s become possible to write a variant method that returns the matching path segments, rather than True
, on successful match. Consider:
>>> from pathlib import PurePath
>>> path = PurePath('/home/barney/cpython/Lib/test/test_pathlib.py')
>>> path.destructure('**/*')
('/home/barney/cpython/Lib/test', 'test_pathlib.py')
>>> path.destructure('**/*', keep_ends=True)
('/home/barney/cpython/Lib/test/', 'test_pathlib.py')
>>> path.destructure('/home/*/**/cpython/**/*.py', keep_ends=True)
('/', 'home/', 'barney/', '', 'cpython/', 'Lib/test/', 'test_pathlib.py')
I think this could be useful for pulling information out of paths, but I’m not sure. Any opinions on this potential feature?
Patch (click to expand...)
diff --git a/Lib/pathlib.py b/Lib/pathlib.py
index 62406473b6..d51bbc7808 100644
--- a/Lib/pathlib.py
+++ b/Lib/pathlib.py
@@ -145,7 +145,7 @@ def _compile_pattern_lines(pattern_lines, case_sensitive):
# path separators, because the '.' characters in the pattern will
# not match newlines.
part = fnmatch.translate(part)[_FNMATCH_SLICE]
- parts.append(part)
+ parts.append(f'({part})')
# Match the end of the path, always.
parts.append(r'\Z')
flags = re.MULTILINE
@@ -785,6 +785,25 @@ def match(self, path_pattern, *, case_sensitive=None):
else:
raise ValueError("empty pattern")
+ def destructure(self, path_pattern, *, case_sensitive=None, keep_ends=False):
+ if not isinstance(path_pattern, PurePath):
+ path_pattern = self.with_segments(path_pattern)
+ if case_sensitive is None:
+ case_sensitive = _is_case_sensitive(self._flavour)
+ pattern = _compile_pattern_lines(path_pattern._lines, case_sensitive)
+ match = pattern.match(self._lines)
+ if not match:
+ return None
+ sep = self._flavour.sep
+ trans = _SWAP_SEP_AND_NEWLINE[sep]
+ groups = []
+ for group in match.groups():
+ group = group.translate(trans)
+ if not keep_ends:
+ group = group.rstrip(sep) or group
+ groups.append(group)
+ return tuple(groups)
+
# Subclassing os.PathLike makes isinstance() checks slower,
# which in turn makes Path construction slower. Register instead!