From 440d6e82f499c9a3ff7bbd9a1ea384836cea6852 Mon Sep 17 00:00:00 2001 From: barneygale Date: Tue, 19 Dec 2023 18:29:46 +0000 Subject: [PATCH 1/2] GH-110109: Drop use of new regex features in `pathlib._abc`. A regex group with a `?+` possessive quantifier was used to match empty paths, which were represented as a single dot, preventing the dot from being matched by other wildcards. This quantifier is only available from Python 3.11+, but pathlib's `_abc.py` file will be made available as a PyPI package for Python 3.8+. This commit adds a new private `_pattern_str` property that works like `__str__()` but represents empty paths as `''` rather than `'.'`. This string is used for pattern matching, which removes the need for the possessive group. Improves compatibility with older Python; no other change of behaviour. --- Lib/pathlib/_abc.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index 4808d0e61f7038..33946aad74730b 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -63,9 +63,6 @@ def _compile_pattern(pat, sep, case_sensitive): flags = re.NOFLAG if case_sensitive else re.IGNORECASE regex = glob.translate(pat, recursive=True, include_hidden=True, seps=sep) - # The string representation of an empty path is a single dot ('.'). Empty - # paths shouldn't match wildcards, so we consume it with an atomic group. - regex = r'(\.\Z)?+' + regex return re.compile(regex, flags=flags).match @@ -277,6 +274,14 @@ def __str__(self): self._tail) or '.' return self._str + @property + def _pattern_str(self): + """A string representation of the path, suitable for pattern matching. + This differs from __str__() by representing empty paths as the empty + string '', rather than a dot '.' character.""" + path_str = str(self) + return '' if path_str == '.' else path_str + def as_posix(self): """Return the string representation of the path with forward (/) slashes.""" @@ -528,7 +533,7 @@ def match(self, path_pattern, *, case_sensitive=None): else: raise ValueError("empty pattern") match = _compile_pattern(pattern_str, sep, case_sensitive) - return match(str(self)) is not None + return match(self._pattern_str) is not None @@ -882,9 +887,9 @@ def _glob(self, pattern, case_sensitive, follow_symlinks): paths = _select_recursive(paths, dir_only, follow_symlinks) # Filter out paths that don't match pattern. - prefix_len = len(str(self._make_child_relpath('_'))) - 1 + prefix_len = len(self._make_child_relpath('_')._pattern_str) - 1 match = _compile_pattern(str(path_pattern), sep, case_sensitive) - paths = (path for path in paths if match(str(path), prefix_len)) + paths = (path for path in paths if match(path._pattern_str, prefix_len)) return paths dir_only = part_idx < len(pattern_parts) From 55a75cb9d734feaa1c45cad887c969c8c5be8c15 Mon Sep 17 00:00:00 2001 From: barneygale Date: Tue, 19 Dec 2023 18:38:16 +0000 Subject: [PATCH 2/2] Drop re.NOFLAG while we're in the area (also 3.11+) --- Lib/pathlib/_abc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index 33946aad74730b..56030f8cf9de54 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -61,7 +61,7 @@ def _compile_pattern(pat, sep, case_sensitive): if re is None: import re, glob - flags = re.NOFLAG if case_sensitive else re.IGNORECASE + flags = 0 if case_sensitive else re.IGNORECASE regex = glob.translate(pat, recursive=True, include_hidden=True, seps=sep) return re.compile(regex, flags=flags).match