diff --git a/Doc/library/os.path.rst b/Doc/library/os.path.rst index a673b81278ea74..459f6b972129ff 100644 --- a/Doc/library/os.path.rst +++ b/Doc/library/os.path.rst @@ -350,11 +350,18 @@ the :mod:`glob` module.) .. function:: realpath(path) Return the canonical path of the specified filename, eliminating any symbolic - links encountered in the path (if they are supported by the operating system). + links encountered in the path (if they are supported by the operating + system). + + .. note:: + When symbolic link cycles occur, the returned path will be one member of + the cycle, but no guarantee is made about which member that will be. .. versionchanged:: 3.6 Accepts a :term:`path-like object`. + .. versionchanged:: 3.8 + Symbolic links are now resolved on Windows. .. function:: relpath(path, start=os.curdir) diff --git a/Doc/library/os.rst b/Doc/library/os.rst index c74d687f08fb0a..7ae3d328d5be10 100644 --- a/Doc/library/os.rst +++ b/Doc/library/os.rst @@ -1858,6 +1858,9 @@ features: .. versionchanged:: 3.6 Accepts a :term:`path-like object` for *src* and *dst*. + .. versionchanged:: 3.8 + On Windows, now opens reparse points that represent another file + (name surrogates). .. function:: mkdir(path, mode=0o777, *, dir_fd=None) @@ -2053,6 +2056,11 @@ features: .. versionchanged:: 3.8 Accepts a :term:`path-like object` and a bytes object on Windows. + .. versionchanged:: 3.8 + Added support for directory junctions, and changed to return the + substitution path (which typically includes ``\\?\`` prefix) rather than + the optional "print name" field that was previously returned. + .. function:: remove(path, *, dir_fd=None) Remove (delete) the file *path*. If *path* is a directory, an @@ -2358,6 +2366,13 @@ features: This method can raise :exc:`OSError`, such as :exc:`PermissionError`, but :exc:`FileNotFoundError` is caught and not raised. + .. versionchanged:: 3.8 + On Windows, now returns ``True`` for directory junctions as well as + symlinks. To determine whether the entry is actually a symlink to a + directory or a directory junction, compare + ``entry.stat(follow_symlinks=False).st_reparse_tag`` against + ``stat.IO_REPARSE_TAG_SYMLINK`` or ``stat.IO_REPARSE_TAG_MOUNT_POINT``. + .. method:: stat(\*, follow_symlinks=True) Return a :class:`stat_result` object for this entry. This method @@ -2403,6 +2418,16 @@ features: This function can support :ref:`specifying a file descriptor ` and :ref:`not following symlinks `. + On Windows, passing ``follow_symlinks=False`` will disable following all + types of reparse points, including directory junctions. Otherwise, if the + operating system is unable to follow a reparse point (for example, when it + is a custom reparse point type with no filesystem support), the stat result + for the original link is returned as if ``follow_symlinks=False`` had been + specified. To obtain stat results for the final path in this case, use the + :func:`os.path.realpath` function to resolve the path name as far as + possible and call :func:`lstat` on the result. This does not apply to + dangling symlinks or junction points, which will raise the usual exceptions. + .. index:: module: stat Example:: @@ -2427,6 +2452,14 @@ features: .. versionchanged:: 3.6 Accepts a :term:`path-like object`. + .. versionchanged:: 3.8 + On Windows, all reparse points that can be resolved by the operating + system are now followed, and passing ``follow_symlinks=False`` + disables following all name surrogate reparse points. If the operating + system reaches a reparse point that it is not able to follow, *stat* now + returns the information for the original path as if + ``follow_symlinks=False`` had been specified instead of raising an error. + .. class:: stat_result @@ -2578,7 +2611,7 @@ features: File type. - On Windows systems, the following attribute is also available: + On Windows systems, the following attributes are also available: .. attribute:: st_file_attributes @@ -2587,6 +2620,12 @@ features: :c:func:`GetFileInformationByHandle`. See the ``FILE_ATTRIBUTE_*`` constants in the :mod:`stat` module. + .. attribute:: st_reparse_tag + + When :attr:`st_file_attributes` has the ``FILE_ATTRIBUTE_REPARSE_POINT`` + set, this field contains the tag identifying the type of reparse point. + See the ``IO_REPARSE_TAG_*`` constants in the :mod:`stat` module. + The standard module :mod:`stat` defines functions and constants that are useful for extracting information from a :c:type:`stat` structure. (On Windows, some items are filled with dummy values.) @@ -2614,6 +2653,13 @@ features: .. versionadded:: 3.7 Added the :attr:`st_fstype` member to Solaris/derivatives. + .. versionadded:: 3.8 + Added the :attr:`st_reparse_tag` member on Windows. + + .. versionchanged:: 3.8 + On Windows, the :attr:`st_mode` member now identifies directory + junctions as links instead of directories. + .. function:: statvfs(path) Perform a :c:func:`statvfs` system call on the given path. The return value is diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index 33fac5f6972583..c317f67bf65ad5 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -807,6 +807,13 @@ call fails (for example because the path doesn't exist). ``False`` is also returned if the path doesn't exist; other errors (such as permission errors) are propagated. + .. versionchanged:: 3.8 + On Windows, now returns ``True`` for directory junctions as well as + symlinks. To determine whether the path is actually a symlink to a + directory or a directory junction, compare ``Path.lstat().st_reparse_tag`` + against ``stat.IO_REPARSE_TAG_SYMLINK`` or + ``stat.IO_REPARSE_TAG_MOUNT_POINT``. + .. method:: Path.is_socket() diff --git a/Doc/library/shutil.rst b/Doc/library/shutil.rst index eaeee8df81b8e6..a51e068ebd593a 100644 --- a/Doc/library/shutil.rst +++ b/Doc/library/shutil.rst @@ -304,6 +304,10 @@ Directory and files operations Added a symlink attack resistant version that is used automatically if platform supports fd-based functions. + .. versionchanged:: 3.8 + On Windows, will no longer delete the contents of a directory junction + before removing the junction. + .. attribute:: rmtree.avoids_symlink_attacks Indicates whether the current platform and implementation provides a diff --git a/Doc/library/stat.rst b/Doc/library/stat.rst index c8f6904f9b1f7a..f48a0a9faa6b0c 100644 --- a/Doc/library/stat.rst +++ b/Doc/library/stat.rst @@ -425,3 +425,13 @@ for more detail on the meaning of these constants. FILE_ATTRIBUTE_VIRTUAL .. versionadded:: 3.5 + +On Windows, the following constants are available for comparing against the +``st_reparse_tag`` member returned by :func:`os.lstat`. These are well-known +constants, but are not an exhaustive list. + +.. data:: IO_REPARSE_TAG_SYMLINK + IO_REPARSE_TAG_MOUNT_POINT + IO_REPARSE_TAG_APPEXECLINK + + .. versionadded:: 3.8 diff --git a/Doc/whatsnew/3.8.rst b/Doc/whatsnew/3.8.rst index e8238251d6ea25..dc2745baada108 100644 --- a/Doc/whatsnew/3.8.rst +++ b/Doc/whatsnew/3.8.rst @@ -808,6 +808,19 @@ A new :func:`os.memfd_create` function was added to wrap the ``memfd_create()`` syscall. (Contributed by Zackery Spytz and Christian Heimes in :issue:`26836`.) +On Windows, much of the manual logic for handling reparse points (symlinks) +has been delegated to the operating system. Specifically, :func:`os.stat` +will now traverse anything supported by the operating system, while +:func:`os.lstat` will not traverse anything. The stat result now includes +:attr:`stat_result.st_reparse_tag` for reparse points, and :func:`os.readlink` +is now able to read directory junctions. + +Directory results from :func:`os.scandir` on Windows will now return true for +both :meth:`os.DirEntry.is_symlink` and :meth:`os.DirEntry.is_dir` when the +entry is a directory junction (this would already happen for symbolic links +to directories). To distinguish a directory junction from a symlink, use +``stat(follow_symlinks=False).st_reparse_tag == stat.IO_REPARSE_TAG_MOUNT_POINT``. + os.path ------- @@ -824,6 +837,12 @@ characters or bytes unrepresentable at the OS level. environment variable and does not use :envvar:`HOME`, which is not normally set for regular user accounts. +:func:`~os.path.isdir` on Windows no longer returns true for a link to a +non-existent directory. + +:func:`~os.path.realpath` on Windows now resolves reparse points, including +symlinks and directory junctions. + ncurses ------- @@ -909,6 +928,9 @@ format for new archives to improve portability and standards conformance, inherited from the corresponding change to the :mod:`tarfile` module. (Contributed by C.A.M. Gerlach in :issue:`30661`.) +:func:`shutil.rmtree` on Windows now removes directory junctions without +removing their contents first. + ssl --- diff --git a/Include/fileutils.h b/Include/fileutils.h index f081779f8aac95..a9655bbf9a5f05 100644 --- a/Include/fileutils.h +++ b/Include/fileutils.h @@ -84,6 +84,7 @@ struct _Py_stat_struct { time_t st_ctime; int st_ctime_nsec; unsigned long st_file_attributes; + unsigned long st_reparse_tag; }; #else # define _Py_stat_struct stat diff --git a/Lib/ntpath.py b/Lib/ntpath.py index f3cfabf0238ef5..8cc95d8f9f8ab1 100644 --- a/Lib/ntpath.py +++ b/Lib/ntpath.py @@ -519,8 +519,87 @@ def abspath(path): except (OSError, ValueError): return _abspath_fallback(path) -# realpath is a no-op on systems without islink support -realpath = abspath +try: + from nt import _getfinalpathname, readlink as _nt_readlink +except ImportError: + # realpath is a no-op on systems without _getfinalpathname support. + realpath = abspath +else: + def _readlink_deep(path, seen=None): + if seen is None: + seen = set() + + while normcase(path) not in seen: + seen.add(normcase(path)) + try: + path = _nt_readlink(path) + except OSError as ex: + # Stop on file (2) or directory (3) not found, or + # paths that are not reparse points (4390) + if ex.winerror in (2, 3, 4390): + break + raise + except ValueError: + # Stop on reparse points that are not symlinks + break + return path + + def _getfinalpathname_nonstrict(path): + # Allow file (2) or directory (3) not found, invalid syntax (123), + # and symlinks that cannot be followed (1921) + allowed_winerror = 2, 3, 123, 1921 + + # Non-strict algorithm is to find as much of the target directory + # as we can and join the rest. + tail = '' + seen = set() + while path: + try: + path = _readlink_deep(path, seen) + path = _getfinalpathname(path) + return join(path, tail) if tail else path + except OSError as ex: + if ex.winerror not in allowed_winerror: + raise + path, name = split(path) + if path and not name: + return abspath(path + tail) + tail = join(name, tail) if tail else name + return abspath(tail) + + def realpath(path): + path = os.fspath(path) + if isinstance(path, bytes): + prefix = b'\\\\?\\' + unc_prefix = b'\\\\?\\UNC\\' + new_unc_prefix = b'\\\\' + cwd = os.getcwdb() + else: + prefix = '\\\\?\\' + unc_prefix = '\\\\?\\UNC\\' + new_unc_prefix = '\\\\' + cwd = os.getcwd() + had_prefix = path.startswith(prefix) + path = _getfinalpathname_nonstrict(path) + # The path returned by _getfinalpathname will always start with \\?\ - + # strip off that prefix unless it was already provided on the original + # path. + if not had_prefix and path.startswith(prefix): + # For UNC paths, the prefix will actually be \\?\UNC\ + # Handle that case as well. + if path.startswith(unc_prefix): + spath = new_unc_prefix + path[len(unc_prefix):] + else: + spath = path[len(prefix):] + # Ensure that the non-prefixed path resolves to the same path + try: + if _getfinalpathname(spath) == path: + path = spath + except OSError as ex: + pass + return path + + # Win9x family and earlier have no Unicode filename support. supports_unicode_filenames = (hasattr(sys, "getwindowsversion") and sys.getwindowsversion()[3] >= 2) @@ -633,23 +712,6 @@ def commonpath(paths): raise -# determine if two files are in fact the same file -try: - # GetFinalPathNameByHandle is available starting with Windows 6.0. - # Windows XP and non-Windows OS'es will mock _getfinalpathname. - if sys.getwindowsversion()[:2] >= (6, 0): - from nt import _getfinalpathname - else: - raise ImportError -except (AttributeError, ImportError): - # On Windows XP and earlier, two files are the same if their absolute - # pathnames are the same. - # Non-Windows operating systems fake this method with an XP - # approximation. - def _getfinalpathname(f): - return normcase(abspath(f)) - - try: # The genericpath.isdir implementation uses os.stat and checks the mode # attribute to tell whether or not the path is a directory. diff --git a/Lib/shutil.py b/Lib/shutil.py index ab1a7d61897ac0..39f793b5f3bde5 100644 --- a/Lib/shutil.py +++ b/Lib/shutil.py @@ -452,7 +452,14 @@ def _copytree(entries, src, dst, symlinks, ignore, copy_function, dstname = os.path.join(dst, srcentry.name) srcobj = srcentry if use_srcentry else srcname try: - if srcentry.is_symlink(): + is_symlink = srcentry.is_symlink() + if is_symlink and os.name == 'nt': + # Special check for directory junctions, which appear as + # symlinks but we want to recurse. + lstat = srcentry.stat(follow_symlinks=False) + if lstat.st_reparse_tag == stat.IO_REPARSE_TAG_MOUNT_POINT: + is_symlink = False + if is_symlink: linkto = os.readlink(srcname) if symlinks: # We can't just leave it to `copy_function` because legacy @@ -537,6 +544,37 @@ def copytree(src, dst, symlinks=False, ignore=None, copy_function=copy2, ignore_dangling_symlinks=ignore_dangling_symlinks, dirs_exist_ok=dirs_exist_ok) +if hasattr(stat, 'FILE_ATTRIBUTE_REPARSE_POINT'): + # Special handling for directory junctions to make them behave like + # symlinks for shutil.rmtree, since in general they do not appear as + # regular links. + def _rmtree_isdir(entry): + try: + st = entry.stat(follow_symlinks=False) + return (stat.S_ISDIR(st.st_mode) and not + (st.st_file_attributes & stat.FILE_ATTRIBUTE_REPARSE_POINT + and st.st_reparse_tag == stat.IO_REPARSE_TAG_MOUNT_POINT)) + except OSError: + return False + + def _rmtree_islink(path): + try: + st = os.lstat(path) + return (stat.S_ISLNK(st.st_mode) or + (st.st_file_attributes & stat.FILE_ATTRIBUTE_REPARSE_POINT + and st.st_reparse_tag == stat.IO_REPARSE_TAG_MOUNT_POINT)) + except OSError: + return False +else: + def _rmtree_isdir(entry): + try: + return entry.is_dir(follow_symlinks=False) + except OSError: + return False + + def _rmtree_islink(path): + return os.path.islink(path) + # version vulnerable to race conditions def _rmtree_unsafe(path, onerror): try: @@ -547,11 +585,7 @@ def _rmtree_unsafe(path, onerror): entries = [] for entry in entries: fullname = entry.path - try: - is_dir = entry.is_dir(follow_symlinks=False) - except OSError: - is_dir = False - if is_dir: + if _rmtree_isdir(entry): try: if entry.is_symlink(): # This can only happen if someone replaces @@ -681,7 +715,7 @@ def onerror(*args): os.close(fd) else: try: - if os.path.islink(path): + if _rmtree_islink(path): # symlinks to directories are forbidden, see bug #1669 raise OSError("Cannot call rmtree on a symbolic link") except OSError: diff --git a/Lib/test/test_ntpath.py b/Lib/test/test_ntpath.py index 92d85ecbc4fcfe..74dc8c378e2746 100644 --- a/Lib/test/test_ntpath.py +++ b/Lib/test/test_ntpath.py @@ -7,6 +7,7 @@ from test import support, test_genericpath from tempfile import TemporaryFile + try: import nt except ImportError: @@ -14,6 +15,14 @@ # but for those that require it we import here. nt = None +try: + ntpath._getfinalpathname +except AttributeError: + HAVE_GETFINALPATHNAME = False +else: + HAVE_GETFINALPATHNAME = True + + def tester(fn, wantResult): fn = fn.replace("\\", "\\\\") gotResult = eval(fn) @@ -194,6 +203,189 @@ def test_normpath(self): tester("ntpath.normpath('\\\\.\\NUL')", r'\\.\NUL') tester("ntpath.normpath('\\\\?\\D:/XY\\Z')", r'\\?\D:/XY\Z') + def test_realpath_curdir(self): + expected = ntpath.normpath(os.getcwd()) + tester("ntpath.realpath('.')", expected) + tester("ntpath.realpath('./.')", expected) + tester("ntpath.realpath('/'.join(['.'] * 100))", expected) + tester("ntpath.realpath('.\\.')", expected) + tester("ntpath.realpath('\\'.join(['.'] * 100))", expected) + + def test_realpath_pardir(self): + expected = ntpath.normpath(os.getcwd()) + tester("ntpath.realpath('..')", ntpath.dirname(expected)) + tester("ntpath.realpath('../..')", + ntpath.dirname(ntpath.dirname(expected))) + tester("ntpath.realpath('/'.join(['..'] * 50))", + ntpath.splitdrive(expected)[0] + '\\') + tester("ntpath.realpath('..\\..')", + ntpath.dirname(ntpath.dirname(expected))) + tester("ntpath.realpath('\\'.join(['..'] * 50))", + ntpath.splitdrive(expected)[0] + '\\') + + @support.skip_unless_symlink + @unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname') + def test_realpath_basic(self): + ABSTFN = ntpath.abspath(support.TESTFN) + open(ABSTFN, "wb").close() + self.addCleanup(support.unlink, ABSTFN) + self.addCleanup(support.unlink, ABSTFN + "1") + + os.symlink(ABSTFN, ABSTFN + "1") + self.assertEqual(ntpath.realpath(ABSTFN + "1"), ABSTFN) + self.assertEqual(ntpath.realpath(os.fsencode(ABSTFN + "1")), + os.fsencode(ABSTFN)) + + @support.skip_unless_symlink + @unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname') + def test_realpath_relative(self): + ABSTFN = ntpath.abspath(support.TESTFN) + open(ABSTFN, "wb").close() + self.addCleanup(support.unlink, ABSTFN) + self.addCleanup(support.unlink, ABSTFN + "1") + + os.symlink(ABSTFN, ntpath.relpath(ABSTFN + "1")) + self.assertEqual(ntpath.realpath(ABSTFN + "1"), ABSTFN) + + @support.skip_unless_symlink + @unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname') + def test_realpath_broken_symlinks(self): + ABSTFN = ntpath.abspath(support.TESTFN) + os.mkdir(ABSTFN) + self.addCleanup(support.rmtree, ABSTFN) + + with support.change_cwd(ABSTFN): + os.mkdir("subdir") + os.chdir("subdir") + os.symlink(".", "recursive") + os.symlink("..", "parent") + os.chdir("..") + os.symlink(".", "self") + os.symlink("missing", "broken") + os.symlink(r"broken\bar", "broken1") + os.symlink(r"self\self\broken", "broken2") + os.symlink(r"subdir\parent\subdir\parent\broken", "broken3") + os.symlink(ABSTFN + r"\broken", "broken4") + os.symlink(r"recursive\..\broken", "broken5") + + self.assertEqual(ntpath.realpath("broken"), + ABSTFN + r"\missing") + self.assertEqual(ntpath.realpath(r"broken\foo"), + ABSTFN + r"\missing\foo") + self.assertEqual(ntpath.realpath(r"broken1"), + ABSTFN + r"\missing\bar") + self.assertEqual(ntpath.realpath(r"broken1\baz"), + ABSTFN + r"\missing\bar\baz") + self.assertEqual(ntpath.realpath("broken2"), + ABSTFN + r"\missing") + self.assertEqual(ntpath.realpath("broken3"), + ABSTFN + r"\missing") + self.assertEqual(ntpath.realpath("broken4"), + ABSTFN + r"\missing") + self.assertEqual(ntpath.realpath("broken5"), + ABSTFN + r"\missing") + + self.assertEqual(ntpath.realpath(b"broken"), + os.fsencode(ABSTFN + r"\missing")) + self.assertEqual(ntpath.realpath(rb"broken\foo"), + os.fsencode(ABSTFN + r"\missing\foo")) + self.assertEqual(ntpath.realpath(rb"broken1"), + os.fsencode(ABSTFN + r"\missing\bar")) + self.assertEqual(ntpath.realpath(rb"broken1\baz"), + os.fsencode(ABSTFN + r"\missing\bar\baz")) + self.assertEqual(ntpath.realpath(b"broken2"), + os.fsencode(ABSTFN + r"\missing")) + self.assertEqual(ntpath.realpath(rb"broken3"), + os.fsencode(ABSTFN + r"\missing")) + self.assertEqual(ntpath.realpath(b"broken4"), + os.fsencode(ABSTFN + r"\missing")) + self.assertEqual(ntpath.realpath(b"broken5"), + os.fsencode(ABSTFN + r"\missing")) + + @support.skip_unless_symlink + @unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname') + def test_realpath_symlink_loops(self): + # Bug #930024, return the path unchanged if we get into an infinite + # symlink loop. + ABSTFN = ntpath.abspath(support.TESTFN) + self.addCleanup(support.unlink, ABSTFN) + self.addCleanup(support.unlink, ABSTFN + "1") + self.addCleanup(support.unlink, ABSTFN + "2") + self.addCleanup(support.unlink, ABSTFN + "y") + self.addCleanup(support.unlink, ABSTFN + "c") + self.addCleanup(support.unlink, ABSTFN + "a") + + P = "\\\\?\\" + + os.symlink(ABSTFN, ABSTFN) + self.assertEqual(ntpath.realpath(ABSTFN), P + ABSTFN) + + # cycles are non-deterministic as to which path is returned, but + # it will always be the fully resolved path of one member of the cycle + os.symlink(ABSTFN + "1", ABSTFN + "2") + os.symlink(ABSTFN + "2", ABSTFN + "1") + expected = (P + ABSTFN + "1", P + ABSTFN + "2") + self.assertIn(ntpath.realpath(ABSTFN + "1"), expected) + self.assertIn(ntpath.realpath(ABSTFN + "2"), expected) + + self.assertIn(ntpath.realpath(ABSTFN + "1\\x"), + (ntpath.join(r, "x") for r in expected)) + self.assertEqual(ntpath.realpath(ABSTFN + "1\\.."), + ntpath.dirname(ABSTFN)) + self.assertEqual(ntpath.realpath(ABSTFN + "1\\..\\x"), + ntpath.dirname(P + ABSTFN) + "\\x") + os.symlink(ABSTFN + "x", ABSTFN + "y") + self.assertEqual(ntpath.realpath(ABSTFN + "1\\..\\" + + ntpath.basename(ABSTFN) + "y"), + P + ABSTFN + "x") + self.assertIn(ntpath.realpath(ABSTFN + "1\\..\\" + + ntpath.basename(ABSTFN) + "1"), + expected) + + os.symlink(ntpath.basename(ABSTFN) + "a\\b", ABSTFN + "a") + self.assertEqual(ntpath.realpath(ABSTFN + "a"), P + ABSTFN + "a") + + os.symlink("..\\" + ntpath.basename(ntpath.dirname(ABSTFN)) + + "\\" + ntpath.basename(ABSTFN) + "c", ABSTFN + "c") + self.assertEqual(ntpath.realpath(ABSTFN + "c"), P + ABSTFN + "c") + + # Test using relative path as well. + self.assertEqual(ntpath.realpath(ntpath.basename(ABSTFN)), P + ABSTFN) + + @support.skip_unless_symlink + @unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname') + def test_realpath_symlink_prefix(self): + ABSTFN = ntpath.abspath(support.TESTFN) + self.addCleanup(support.unlink, ABSTFN + "3") + self.addCleanup(support.unlink, "\\\\?\\" + ABSTFN + "3.") + self.addCleanup(support.unlink, ABSTFN + "3link") + self.addCleanup(support.unlink, ABSTFN + "3.link") + + with open(ABSTFN + "3", "wb") as f: + f.write(b'0') + os.symlink(ABSTFN + "3", ABSTFN + "3link") + + with open("\\\\?\\" + ABSTFN + "3.", "wb") as f: + f.write(b'1') + os.symlink("\\\\?\\" + ABSTFN + "3.", ABSTFN + "3.link") + + self.assertEqual(ntpath.realpath(ABSTFN + "3link"), + ABSTFN + "3") + self.assertEqual(ntpath.realpath(ABSTFN + "3.link"), + "\\\\?\\" + ABSTFN + "3.") + + # Resolved paths should be usable to open target files + with open(ntpath.realpath(ABSTFN + "3link"), "rb") as f: + self.assertEqual(f.read(), b'0') + with open(ntpath.realpath(ABSTFN + "3.link"), "rb") as f: + self.assertEqual(f.read(), b'1') + + # When the prefix is included, it is not stripped + self.assertEqual(ntpath.realpath("\\\\?\\" + ABSTFN + "3link"), + "\\\\?\\" + ABSTFN + "3") + self.assertEqual(ntpath.realpath("\\\\?\\" + ABSTFN + "3.link"), + "\\\\?\\" + ABSTFN + "3.") + def test_expandvars(self): with support.EnvironmentVarGuard() as env: env.clear() @@ -288,11 +480,11 @@ def test_abspath(self): def test_relpath(self): tester('ntpath.relpath("a")', 'a') - tester('ntpath.relpath(os.path.abspath("a"))', 'a') + tester('ntpath.relpath(ntpath.abspath("a"))', 'a') tester('ntpath.relpath("a/b")', 'a\\b') tester('ntpath.relpath("../a/b")', '..\\a\\b') with support.temp_cwd(support.TESTFN) as cwd_dir: - currentdir = os.path.basename(cwd_dir) + currentdir = ntpath.basename(cwd_dir) tester('ntpath.relpath("a", "../b")', '..\\'+currentdir+'\\a') tester('ntpath.relpath("a/b", "../c")', '..\\'+currentdir+'\\a\\b') tester('ntpath.relpath("a", "b/c")', '..\\..\\a') @@ -417,7 +609,7 @@ def test_ismount(self): # locations below cannot then refer to mount points # drive, path = ntpath.splitdrive(sys.executable) - with support.change_cwd(os.path.dirname(sys.executable)): + with support.change_cwd(ntpath.dirname(sys.executable)): self.assertFalse(ntpath.ismount(drive.lower())) self.assertFalse(ntpath.ismount(drive.upper())) diff --git a/Lib/test/test_os.py b/Lib/test/test_os.py index b2cd4cca5f21fe..ba9f5c35ae3188 100644 --- a/Lib/test/test_os.py +++ b/Lib/test/test_os.py @@ -8,6 +8,7 @@ import contextlib import decimal import errno +import fnmatch import fractions import itertools import locale @@ -2253,6 +2254,20 @@ class ReadlinkTests(unittest.TestCase): filelinkb = os.fsencode(filelink) filelinkb_target = os.fsencode(filelink_target) + def assertPathEqual(self, left, right): + left = os.path.normcase(left) + right = os.path.normcase(right) + if sys.platform == 'win32': + # Bad practice to blindly strip the prefix as it may be required to + # correctly refer to the file, but we're only comparing paths here. + has_prefix = lambda p: p.startswith( + b'\\\\?\\' if isinstance(p, bytes) else '\\\\?\\') + if has_prefix(left): + left = left[4:] + if has_prefix(right): + right = right[4:] + self.assertEqual(left, right) + def setUp(self): self.assertTrue(os.path.exists(self.filelink_target)) self.assertTrue(os.path.exists(self.filelinkb_target)) @@ -2274,14 +2289,14 @@ def test_pathlike(self): os.symlink(self.filelink_target, self.filelink) self.addCleanup(support.unlink, self.filelink) filelink = FakePath(self.filelink) - self.assertEqual(os.readlink(filelink), self.filelink_target) + self.assertPathEqual(os.readlink(filelink), self.filelink_target) @support.skip_unless_symlink def test_pathlike_bytes(self): os.symlink(self.filelinkb_target, self.filelinkb) self.addCleanup(support.unlink, self.filelinkb) path = os.readlink(FakePath(self.filelinkb)) - self.assertEqual(path, self.filelinkb_target) + self.assertPathEqual(path, self.filelinkb_target) self.assertIsInstance(path, bytes) @support.skip_unless_symlink @@ -2289,7 +2304,7 @@ def test_bytes(self): os.symlink(self.filelinkb_target, self.filelinkb) self.addCleanup(support.unlink, self.filelinkb) path = os.readlink(self.filelinkb) - self.assertEqual(path, self.filelinkb_target) + self.assertPathEqual(path, self.filelinkb_target) self.assertIsInstance(path, bytes) @@ -2348,16 +2363,12 @@ def test_remove_directory_link_to_missing_target(self): # was created with target_is_dir==True. os.remove(self.missing_link) - @unittest.skip("currently fails; consider for improvement") def test_isdir_on_directory_link_to_missing_target(self): self._create_missing_dir_link() - # consider having isdir return true for directory links - self.assertTrue(os.path.isdir(self.missing_link)) + self.assertFalse(os.path.isdir(self.missing_link)) - @unittest.skip("currently fails; consider for improvement") def test_rmdir_on_directory_link_to_missing_target(self): self._create_missing_dir_link() - # consider allowing rmdir to remove directory links os.rmdir(self.missing_link) def check_stat(self, link, target): @@ -2453,6 +2464,24 @@ def test_buffer_overflow(self): except OSError: pass + def test_appexeclink(self): + root = os.path.expandvars(r'%LOCALAPPDATA%\Microsoft\WindowsApps') + aliases = [os.path.join(root, a) + for a in fnmatch.filter(os.listdir(root), '*.exe')] + + for alias in aliases: + if support.verbose: + print() + print("Testing with", alias) + st = os.lstat(alias) + self.assertEqual(st, os.stat(alias)) + self.assertFalse(stat.S_ISLNK(st.st_mode)) + self.assertEqual(st.st_reparse_tag, stat.IO_REPARSE_TAG_APPEXECLINK) + # testing the first one we see is sufficient + break + else: + self.skipTest("test requires an app execution alias") + @unittest.skipUnless(sys.platform == "win32", "Win32 specific tests") class Win32JunctionTests(unittest.TestCase): junction = 'junctiontest' @@ -2460,25 +2489,29 @@ class Win32JunctionTests(unittest.TestCase): def setUp(self): assert os.path.exists(self.junction_target) - assert not os.path.exists(self.junction) + assert not os.path.lexists(self.junction) def tearDown(self): - if os.path.exists(self.junction): - # os.rmdir delegates to Windows' RemoveDirectoryW, - # which removes junction points safely. - os.rmdir(self.junction) + if os.path.lexists(self.junction): + os.unlink(self.junction) def test_create_junction(self): _winapi.CreateJunction(self.junction_target, self.junction) + self.assertTrue(os.path.lexists(self.junction)) self.assertTrue(os.path.exists(self.junction)) self.assertTrue(os.path.isdir(self.junction)) + self.assertNotEqual(os.stat(self.junction), os.lstat(self.junction)) + self.assertEqual(os.stat(self.junction), os.stat(self.junction_target)) - # Junctions are not recognized as links. + # bpo-37834: Junctions are not recognized as links. self.assertFalse(os.path.islink(self.junction)) + self.assertEqual(os.path.normcase("\\\\?\\" + self.junction_target), + os.path.normcase(os.readlink(self.junction))) def test_unlink_removes_junction(self): _winapi.CreateJunction(self.junction_target, self.junction) self.assertTrue(os.path.exists(self.junction)) + self.assertTrue(os.path.lexists(self.junction)) os.unlink(self.junction) self.assertFalse(os.path.exists(self.junction)) @@ -3358,10 +3391,7 @@ def test_oserror_filename(self): if hasattr(os, "lchmod"): funcs.append((self.filenames, os.lchmod, 0o777)) if hasattr(os, "readlink"): - if sys.platform == "win32": - funcs.append((self.unicode_filenames, os.readlink,)) - else: - funcs.append((self.filenames, os.readlink,)) + funcs.append((self.filenames, os.readlink,)) for filenames, func, *func_args in funcs: diff --git a/Lib/test/test_shutil.py b/Lib/test/test_shutil.py index e209607f22c145..636e3bd9795593 100644 --- a/Lib/test/test_shutil.py +++ b/Lib/test/test_shutil.py @@ -42,6 +42,11 @@ except ImportError: UID_GID_SUPPORT = False +try: + import _winapi +except ImportError: + _winapi = None + def _fake_rename(*args, **kwargs): # Pretend the destination path is on a different filesystem. raise OSError(getattr(errno, 'EXDEV', 18), "Invalid cross-device link") @@ -226,6 +231,47 @@ def test_rmtree_works_on_symlinks(self): self.assertTrue(os.path.exists(dir3)) self.assertTrue(os.path.exists(file1)) + @unittest.skipUnless(_winapi, 'only relevant on Windows') + def test_rmtree_fails_on_junctions(self): + tmp = self.mkdtemp() + dir_ = os.path.join(tmp, 'dir') + os.mkdir(dir_) + link = os.path.join(tmp, 'link') + _winapi.CreateJunction(dir_, link) + self.assertRaises(OSError, shutil.rmtree, link) + self.assertTrue(os.path.exists(dir_)) + self.assertTrue(os.path.lexists(link)) + errors = [] + def onerror(*args): + errors.append(args) + shutil.rmtree(link, onerror=onerror) + self.assertEqual(len(errors), 1) + self.assertIs(errors[0][0], os.path.islink) + self.assertEqual(errors[0][1], link) + self.assertIsInstance(errors[0][2][1], OSError) + + @unittest.skipUnless(_winapi, 'only relevant on Windows') + def test_rmtree_works_on_junctions(self): + tmp = self.mkdtemp() + dir1 = os.path.join(tmp, 'dir1') + dir2 = os.path.join(dir1, 'dir2') + dir3 = os.path.join(tmp, 'dir3') + for d in dir1, dir2, dir3: + os.mkdir(d) + file1 = os.path.join(tmp, 'file1') + write_file(file1, 'foo') + link1 = os.path.join(dir1, 'link1') + _winapi.CreateJunction(dir2, link1) + link2 = os.path.join(dir1, 'link2') + _winapi.CreateJunction(dir3, link2) + link3 = os.path.join(dir1, 'link3') + _winapi.CreateJunction(file1, link3) + # make sure junctions are removed but not followed + shutil.rmtree(dir1) + self.assertFalse(os.path.exists(dir1)) + self.assertTrue(os.path.exists(dir3)) + self.assertTrue(os.path.exists(file1)) + def test_rmtree_errors(self): # filename is guaranteed not to exist filename = tempfile.mktemp() @@ -754,8 +800,12 @@ def test_copytree_symlinks(self): src_stat = os.lstat(src_link) shutil.copytree(src_dir, dst_dir, symlinks=True) self.assertTrue(os.path.islink(os.path.join(dst_dir, 'sub', 'link'))) - self.assertEqual(os.readlink(os.path.join(dst_dir, 'sub', 'link')), - os.path.join(src_dir, 'file.txt')) + actual = os.readlink(os.path.join(dst_dir, 'sub', 'link')) + # Bad practice to blindly strip the prefix as it may be required to + # correctly refer to the file, but we're only comparing paths here. + if os.name == 'nt' and actual.startswith('\\\\?\\'): + actual = actual[4:] + self.assertEqual(actual, os.path.join(src_dir, 'file.txt')) dst_stat = os.lstat(dst_link) if hasattr(os, 'lchmod'): self.assertEqual(dst_stat.st_mode, src_stat.st_mode) @@ -886,7 +936,6 @@ def custom_cpfun(a, b): shutil.copytree(src, dst, copy_function=custom_cpfun) self.assertEqual(len(flag), 1) - @unittest.skipIf(os.name == 'nt', 'temporarily disabled on Windows') @unittest.skipUnless(hasattr(os, 'link'), 'requires os.link') def test_dont_copy_file_onto_link_to_itself(self): # bug 851123. @@ -941,6 +990,20 @@ def test_rmtree_on_symlink(self): finally: shutil.rmtree(TESTFN, ignore_errors=True) + @unittest.skipUnless(_winapi, 'only relevant on Windows') + def test_rmtree_on_junction(self): + os.mkdir(TESTFN) + try: + src = os.path.join(TESTFN, 'cheese') + dst = os.path.join(TESTFN, 'shop') + os.mkdir(src) + open(os.path.join(src, 'spam'), 'wb').close() + _winapi.CreateJunction(src, dst) + self.assertRaises(OSError, shutil.rmtree, dst) + shutil.rmtree(dst, ignore_errors=True) + finally: + shutil.rmtree(TESTFN, ignore_errors=True) + # Issue #3002: copyfile and copytree block indefinitely on named pipes @unittest.skipUnless(hasattr(os, "mkfifo"), 'requires os.mkfifo()') def test_copyfile_named_pipe(self): @@ -1871,11 +1934,7 @@ def test_move_dangling_symlink(self): dst_link = os.path.join(self.dst_dir, 'quux') shutil.move(dst, dst_link) self.assertTrue(os.path.islink(dst_link)) - # On Windows, os.path.realpath does not follow symlinks (issue #9949) - if os.name == 'nt': - self.assertEqual(os.path.realpath(src), os.readlink(dst_link)) - else: - self.assertEqual(os.path.realpath(src), os.path.realpath(dst_link)) + self.assertEqual(os.path.realpath(src), os.path.realpath(dst_link)) @support.skip_unless_symlink @mock_rename diff --git a/Lib/test/test_tools/test_lll.py b/Lib/test/test_tools/test_lll.py index f3fbe961eee55a..b01e2188e1cf20 100644 --- a/Lib/test/test_tools/test_lll.py +++ b/Lib/test/test_tools/test_lll.py @@ -1,6 +1,7 @@ """Tests for the lll script in the Tools/script directory.""" import os +import sys import tempfile from test import support from test.test_tools import skip_if_missing, import_tool @@ -26,12 +27,13 @@ def test_lll_multiple_dirs(self): with support.captured_stdout() as output: self.lll.main([dir1, dir2]) + prefix = '\\\\?\\' if os.name == 'nt' else '' self.assertEqual(output.getvalue(), f'{dir1}:\n' - f'symlink -> {fn1}\n' + f'symlink -> {prefix}{fn1}\n' f'\n' f'{dir2}:\n' - f'symlink -> {fn2}\n' + f'symlink -> {prefix}{fn2}\n' ) diff --git a/Lib/test/test_venv.py b/Lib/test/test_venv.py index 9724d9ef57bce4..de93d9539c2cd6 100644 --- a/Lib/test/test_venv.py +++ b/Lib/test/test_venv.py @@ -394,11 +394,7 @@ def test_devnull(self): with open(os.devnull, "rb") as f: self.assertEqual(f.read(), b"") - # Issue #20541: os.path.exists('nul') is False on Windows - if os.devnull.lower() == 'nul': - self.assertFalse(os.path.exists(os.devnull)) - else: - self.assertTrue(os.path.exists(os.devnull)) + self.assertTrue(os.path.exists(os.devnull)) def do_test_with_pip(self, system_site_packages): rmtree(self.env_dir) diff --git a/Lib/unittest/test/test_discovery.py b/Lib/unittest/test/test_discovery.py index 204043b493b5d2..16e081e1fb76ec 100644 --- a/Lib/unittest/test/test_discovery.py +++ b/Lib/unittest/test/test_discovery.py @@ -723,11 +723,13 @@ class Module(object): original_listdir = os.listdir original_isfile = os.path.isfile original_isdir = os.path.isdir + original_realpath = os.path.realpath def cleanup(): os.listdir = original_listdir os.path.isfile = original_isfile os.path.isdir = original_isdir + os.path.realpath = original_realpath del sys.modules['foo'] if full_path in sys.path: sys.path.remove(full_path) @@ -742,6 +744,10 @@ def isdir(_): os.listdir = listdir os.path.isfile = isfile os.path.isdir = isdir + if os.name == 'nt': + # ntpath.realpath may inject path prefixes when failing to + # resolve real files, so we substitute abspath() here instead. + os.path.realpath = os.path.abspath return full_path def test_detect_module_clash(self): diff --git a/Misc/NEWS.d/next/Windows/2019-08-12-12-00-24.bpo-37834.VB2QVj.rst b/Misc/NEWS.d/next/Windows/2019-08-12-12-00-24.bpo-37834.VB2QVj.rst new file mode 100644 index 00000000000000..f2a654cac9118f --- /dev/null +++ b/Misc/NEWS.d/next/Windows/2019-08-12-12-00-24.bpo-37834.VB2QVj.rst @@ -0,0 +1,2 @@ +Treat all name surrogate reparse points on Windows in :func:`os.lstat` and +other reparse points as regular files in :func:`os.stat`. diff --git a/Misc/NEWS.d/next/Windows/2019-08-14-13-40-15.bpo-9949.zW45Ks.rst b/Misc/NEWS.d/next/Windows/2019-08-14-13-40-15.bpo-9949.zW45Ks.rst new file mode 100644 index 00000000000000..e42169a927c718 --- /dev/null +++ b/Misc/NEWS.d/next/Windows/2019-08-14-13-40-15.bpo-9949.zW45Ks.rst @@ -0,0 +1 @@ +Enable support for following symlinks in :func:`os.realpath`. diff --git a/Modules/_stat.c b/Modules/_stat.c index f6cb303500cd14..6a3020a00d1142 100644 --- a/Modules/_stat.c +++ b/Modules/_stat.c @@ -589,6 +589,13 @@ PyInit__stat(void) if (PyModule_AddIntMacro(m, FILE_ATTRIBUTE_SYSTEM)) return NULL; if (PyModule_AddIntMacro(m, FILE_ATTRIBUTE_TEMPORARY)) return NULL; if (PyModule_AddIntMacro(m, FILE_ATTRIBUTE_VIRTUAL)) return NULL; + + if (PyModule_AddObject(m, "IO_REPARSE_TAG_SYMLINK", + PyLong_FromUnsignedLong(IO_REPARSE_TAG_SYMLINK))) return NULL; + if (PyModule_AddObject(m, "IO_REPARSE_TAG_MOUNT_POINT", + PyLong_FromUnsignedLong(IO_REPARSE_TAG_MOUNT_POINT))) return NULL; + if (PyModule_AddObject(m, "IO_REPARSE_TAG_APPEXECLINK", + PyLong_FromUnsignedLong(IO_REPARSE_TAG_APPEXECLINK))) return NULL; #endif return m; diff --git a/Modules/clinic/posixmodule.c.h b/Modules/clinic/posixmodule.c.h index aa1ab79bd96a53..c4ebe3996aa731 100644 --- a/Modules/clinic/posixmodule.c.h +++ b/Modules/clinic/posixmodule.c.h @@ -1272,19 +1272,6 @@ os__getfinalpathname(PyObject *module, PyObject *arg) #if defined(MS_WINDOWS) -PyDoc_STRVAR(os__isdir__doc__, -"_isdir($module, path, /)\n" -"--\n" -"\n" -"Return true if the pathname refers to an existing directory."); - -#define OS__ISDIR_METHODDEF \ - {"_isdir", (PyCFunction)os__isdir, METH_O, os__isdir__doc__}, - -#endif /* defined(MS_WINDOWS) */ - -#if defined(MS_WINDOWS) - PyDoc_STRVAR(os__getvolumepathname__doc__, "_getvolumepathname($module, /, path)\n" "--\n" @@ -8274,10 +8261,6 @@ os__remove_dll_directory(PyObject *module, PyObject *const *args, Py_ssize_t nar #define OS__GETFINALPATHNAME_METHODDEF #endif /* !defined(OS__GETFINALPATHNAME_METHODDEF) */ -#ifndef OS__ISDIR_METHODDEF - #define OS__ISDIR_METHODDEF -#endif /* !defined(OS__ISDIR_METHODDEF) */ - #ifndef OS__GETVOLUMEPATHNAME_METHODDEF #define OS__GETVOLUMEPATHNAME_METHODDEF #endif /* !defined(OS__GETVOLUMEPATHNAME_METHODDEF) */ @@ -8741,4 +8724,4 @@ os__remove_dll_directory(PyObject *module, PyObject *const *args, Py_ssize_t nar #ifndef OS__REMOVE_DLL_DIRECTORY_METHODDEF #define OS__REMOVE_DLL_DIRECTORY_METHODDEF #endif /* !defined(OS__REMOVE_DLL_DIRECTORY_METHODDEF) */ -/*[clinic end generated code: output=1e001c855e011720 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=b71eff00b91f5e43 input=a9049054013a1b77]*/ diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index 4f8c074a671670..2302678ccc14ce 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -1625,6 +1625,7 @@ win32_wchdir(LPCWSTR path) */ #define HAVE_STAT_NSEC 1 #define HAVE_STRUCT_STAT_ST_FILE_ATTRIBUTES 1 +#define HAVE_STRUCT_STAT_ST_REPARSE_TAG 1 static void find_data_to_file_info(WIN32_FIND_DATAW *pFileData, @@ -1658,136 +1659,178 @@ attributes_from_dir(LPCWSTR pszFile, BY_HANDLE_FILE_INFORMATION *info, ULONG *re return TRUE; } -static BOOL -get_target_path(HANDLE hdl, wchar_t **target_path) -{ - int buf_size, result_length; - wchar_t *buf; - - /* We have a good handle to the target, use it to determine - the target path name (then we'll call lstat on it). */ - buf_size = GetFinalPathNameByHandleW(hdl, 0, 0, - VOLUME_NAME_DOS); - if(!buf_size) - return FALSE; - - buf = (wchar_t *)PyMem_RawMalloc((buf_size + 1) * sizeof(wchar_t)); - if (!buf) { - SetLastError(ERROR_OUTOFMEMORY); - return FALSE; - } - - result_length = GetFinalPathNameByHandleW(hdl, - buf, buf_size, VOLUME_NAME_DOS); - - if(!result_length) { - PyMem_RawFree(buf); - return FALSE; - } - - buf[result_length] = 0; - - *target_path = buf; - return TRUE; -} - static int win32_xstat_impl(const wchar_t *path, struct _Py_stat_struct *result, BOOL traverse) { - int code; - HANDLE hFile, hFile2; - BY_HANDLE_FILE_INFORMATION info; - ULONG reparse_tag = 0; - wchar_t *target_path; - const wchar_t *dot; + HANDLE hFile; + BY_HANDLE_FILE_INFORMATION fileInfo; + FILE_ATTRIBUTE_TAG_INFO tagInfo = { 0 }; + DWORD fileType, error; + BOOL isUnhandledTag = FALSE; + int retval = 0; - hFile = CreateFileW( - path, - FILE_READ_ATTRIBUTES, /* desired access */ - 0, /* share mode */ - NULL, /* security attributes */ - OPEN_EXISTING, - /* FILE_FLAG_BACKUP_SEMANTICS is required to open a directory */ - /* FILE_FLAG_OPEN_REPARSE_POINT does not follow the symlink. - Because of this, calls like GetFinalPathNameByHandle will return - the symlink path again and not the actual final path. */ - FILE_ATTRIBUTE_NORMAL|FILE_FLAG_BACKUP_SEMANTICS| - FILE_FLAG_OPEN_REPARSE_POINT, - NULL); + DWORD access = FILE_READ_ATTRIBUTES; + DWORD flags = FILE_FLAG_BACKUP_SEMANTICS; /* Allow opening directories. */ + if (!traverse) { + flags |= FILE_FLAG_OPEN_REPARSE_POINT; + } + hFile = CreateFileW(path, access, 0, NULL, OPEN_EXISTING, flags, NULL); if (hFile == INVALID_HANDLE_VALUE) { - /* Either the target doesn't exist, or we don't have access to - get a handle to it. If the former, we need to return an error. - If the latter, we can use attributes_from_dir. */ - DWORD lastError = GetLastError(); - if (lastError != ERROR_ACCESS_DENIED && - lastError != ERROR_SHARING_VIOLATION) - return -1; - /* Could not get attributes on open file. Fall back to - reading the directory. */ - if (!attributes_from_dir(path, &info, &reparse_tag)) - /* Very strange. This should not fail now */ - return -1; - if (info.dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT) { - if (traverse) { - /* Should traverse, but could not open reparse point handle */ - SetLastError(lastError); + /* Either the path doesn't exist, or the caller lacks access. */ + error = GetLastError(); + switch (error) { + case ERROR_ACCESS_DENIED: /* Cannot sync or read attributes. */ + case ERROR_SHARING_VIOLATION: /* It's a paging file. */ + /* Try reading the parent directory. */ + if (!attributes_from_dir(path, &fileInfo, &tagInfo.ReparseTag)) { + /* Cannot read the parent directory. */ + SetLastError(error); return -1; } - } - } else { - if (!GetFileInformationByHandle(hFile, &info)) { - CloseHandle(hFile); - return -1; - } - if (info.dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT) { - if (!win32_get_reparse_tag(hFile, &reparse_tag)) { - CloseHandle(hFile); - return -1; + if (fileInfo.dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT) { + if (traverse || + !IsReparseTagNameSurrogate(tagInfo.ReparseTag)) { + /* The stat call has to traverse but cannot, so fail. */ + SetLastError(error); + return -1; + } } - /* Close the outer open file handle now that we're about to - reopen it with different flags. */ - if (!CloseHandle(hFile)) + break; + + case ERROR_INVALID_PARAMETER: + /* \\.\con requires read or write access. */ + hFile = CreateFileW(path, access | GENERIC_READ, + FILE_SHARE_READ | FILE_SHARE_WRITE, NULL, + OPEN_EXISTING, flags, NULL); + if (hFile == INVALID_HANDLE_VALUE) { + SetLastError(error); return -1; + } + break; + case ERROR_CANT_ACCESS_FILE: + /* bpo37834: open unhandled reparse points if traverse fails. */ if (traverse) { - /* In order to call GetFinalPathNameByHandle we need to open - the file without the reparse handling flag set. */ - hFile2 = CreateFileW( - path, FILE_READ_ATTRIBUTES, FILE_SHARE_READ, - NULL, OPEN_EXISTING, - FILE_ATTRIBUTE_NORMAL|FILE_FLAG_BACKUP_SEMANTICS, - NULL); - if (hFile2 == INVALID_HANDLE_VALUE) - return -1; + traverse = FALSE; + isUnhandledTag = TRUE; + hFile = CreateFileW(path, access, 0, NULL, OPEN_EXISTING, + flags | FILE_FLAG_OPEN_REPARSE_POINT, NULL); + } + if (hFile == INVALID_HANDLE_VALUE) { + SetLastError(error); + return -1; + } + break; - if (!get_target_path(hFile2, &target_path)) { - CloseHandle(hFile2); - return -1; - } + default: + return -1; + } + } - if (!CloseHandle(hFile2)) { - return -1; + if (hFile != INVALID_HANDLE_VALUE) { + /* Handle types other than files on disk. */ + fileType = GetFileType(hFile); + if (fileType != FILE_TYPE_DISK) { + if (fileType == FILE_TYPE_UNKNOWN && GetLastError() != 0) { + retval = -1; + goto cleanup; + } + DWORD fileAttributes = GetFileAttributesW(path); + memset(result, 0, sizeof(*result)); + if (fileAttributes != INVALID_FILE_ATTRIBUTES && + fileAttributes & FILE_ATTRIBUTE_DIRECTORY) { + /* \\.\pipe\ or \\.\mailslot\ */ + result->st_mode = _S_IFDIR; + } else if (fileType == FILE_TYPE_CHAR) { + /* \\.\nul */ + result->st_mode = _S_IFCHR; + } else if (fileType == FILE_TYPE_PIPE) { + /* \\.\pipe\spam */ + result->st_mode = _S_IFIFO; + } + /* FILE_TYPE_UNKNOWN, e.g. \\.\mailslot\waitfor.exe\spam */ + goto cleanup; + } + + /* Query the reparse tag, and traverse a non-link. */ + if (!traverse) { + if (!GetFileInformationByHandleEx(hFile, FileAttributeTagInfo, + &tagInfo, sizeof(tagInfo))) { + /* Allow devices that do not support FileAttributeTagInfo. */ + switch (GetLastError()) { + case ERROR_INVALID_PARAMETER: + case ERROR_INVALID_FUNCTION: + case ERROR_NOT_SUPPORTED: + tagInfo.FileAttributes = FILE_ATTRIBUTE_NORMAL; + tagInfo.ReparseTag = 0; + break; + default: + retval = -1; + goto cleanup; } + } else if (tagInfo.FileAttributes & + FILE_ATTRIBUTE_REPARSE_POINT) { + if (IsReparseTagNameSurrogate(tagInfo.ReparseTag)) { + if (isUnhandledTag) { + /* Traversing previously failed for either this link + or its target. */ + SetLastError(ERROR_CANT_ACCESS_FILE); + retval = -1; + goto cleanup; + } + /* Traverse a non-link, but not if traversing already failed + for an unhandled tag. */ + } else if (!isUnhandledTag) { + CloseHandle(hFile); + return win32_xstat_impl(path, result, TRUE); + } + } + } - code = win32_xstat_impl(target_path, result, FALSE); - PyMem_RawFree(target_path); - return code; + if (!GetFileInformationByHandle(hFile, &fileInfo)) { + switch (GetLastError()) { + case ERROR_INVALID_PARAMETER: + case ERROR_INVALID_FUNCTION: + case ERROR_NOT_SUPPORTED: + retval = -1; + goto cleanup; } - } else - CloseHandle(hFile); + /* Volumes and physical disks are block devices, e.g. + \\.\C: and \\.\PhysicalDrive0. */ + memset(result, 0, sizeof(*result)); + result->st_mode = 0x6000; /* S_IFBLK */ + goto cleanup; + } } - _Py_attribute_data_to_stat(&info, reparse_tag, result); - /* Set S_IEXEC if it is an .exe, .bat, ... */ - dot = wcsrchr(path, '.'); - if (dot) { - if (_wcsicmp(dot, L".bat") == 0 || _wcsicmp(dot, L".cmd") == 0 || - _wcsicmp(dot, L".exe") == 0 || _wcsicmp(dot, L".com") == 0) - result->st_mode |= 0111; + _Py_attribute_data_to_stat(&fileInfo, tagInfo.ReparseTag, result); + + if (!(fileInfo.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)) { + /* Fix the file execute permissions. This hack sets S_IEXEC if + the filename has an extension that is commonly used by files + that CreateProcessW can execute. A real implementation calls + GetSecurityInfo, OpenThreadToken/OpenProcessToken, and + AccessCheck to check for generic read, write, and execute + access. */ + const wchar_t *fileExtension = wcsrchr(path, '.'); + if (fileExtension) { + if (_wcsicmp(fileExtension, L".exe") == 0 || + _wcsicmp(fileExtension, L".bat") == 0 || + _wcsicmp(fileExtension, L".cmd") == 0 || + _wcsicmp(fileExtension, L".com") == 0) { + result->st_mode |= 0111; + } + } } - return 0; + +cleanup: + if (hFile != INVALID_HANDLE_VALUE) { + CloseHandle(hFile); + } + + return retval; } static int @@ -1806,9 +1849,8 @@ win32_xstat(const wchar_t *path, struct _Py_stat_struct *result, BOOL traverse) default does not traverse symlinks and instead returns attributes for the symlink. - Therefore, win32_lstat will get the attributes traditionally, and - win32_stat will first explicitly resolve the symlink target and then will - call win32_lstat on that result. */ + Instead, we will open the file (which *does* traverse symlinks by default) + and GetFileInformationByHandle(). */ static int win32_lstat(const wchar_t* path, struct _Py_stat_struct *result) @@ -1876,6 +1918,9 @@ static PyStructSequence_Field stat_result_fields[] = { #endif #ifdef HAVE_STRUCT_STAT_ST_FSTYPE {"st_fstype", "Type of filesystem"}, +#endif +#ifdef HAVE_STRUCT_STAT_ST_REPARSE_TAG + {"st_reparse_tag", "Windows reparse tag"}, #endif {0} }; @@ -1928,6 +1973,12 @@ static PyStructSequence_Field stat_result_fields[] = { #define ST_FSTYPE_IDX ST_FILE_ATTRIBUTES_IDX #endif +#ifdef HAVE_STRUCT_STAT_ST_REPARSE_TAG +#define ST_REPARSE_TAG_IDX (ST_FSTYPE_IDX+1) +#else +#define ST_REPARSE_TAG_IDX ST_FSTYPE_IDX +#endif + static PyStructSequence_Desc stat_result_desc = { "stat_result", /* name */ stat_result__doc__, /* doc */ @@ -2155,6 +2206,10 @@ _pystat_fromstructstat(STRUCT_STAT *st) PyStructSequence_SET_ITEM(v, ST_FSTYPE_IDX, PyUnicode_FromString(st->st_fstype)); #endif +#ifdef HAVE_STRUCT_STAT_ST_REPARSE_TAG + PyStructSequence_SET_ITEM(v, ST_REPARSE_TAG_IDX, + PyLong_FromUnsignedLong(st->st_reparse_tag)); +#endif if (PyErr_Occurred()) { Py_DECREF(v); @@ -3877,8 +3932,9 @@ os__getfinalpathname_impl(PyObject *module, path_t *path) } result = PyUnicode_FromWideChar(target_path, result_length); - if (path->narrow) + if (result && path->narrow) { Py_SETREF(result, PyUnicode_EncodeFSDefault(result)); + } cleanup: if (target_path != buf) { @@ -3888,44 +3944,6 @@ os__getfinalpathname_impl(PyObject *module, path_t *path) return result; } -/*[clinic input] -os._isdir - - path as arg: object - / - -Return true if the pathname refers to an existing directory. -[clinic start generated code]*/ - -static PyObject * -os__isdir(PyObject *module, PyObject *arg) -/*[clinic end generated code: output=404f334d85d4bf25 input=36cb6785874d479e]*/ -{ - DWORD attributes; - path_t path = PATH_T_INITIALIZE("_isdir", "path", 0, 0); - - if (!path_converter(arg, &path)) { - if (PyErr_ExceptionMatches(PyExc_ValueError)) { - PyErr_Clear(); - Py_RETURN_FALSE; - } - return NULL; - } - - Py_BEGIN_ALLOW_THREADS - attributes = GetFileAttributesW(path.wide); - Py_END_ALLOW_THREADS - - path_cleanup(&path); - if (attributes == INVALID_FILE_ATTRIBUTES) - Py_RETURN_FALSE; - - if (attributes & FILE_ATTRIBUTE_DIRECTORY) - Py_RETURN_TRUE; - else - Py_RETURN_FALSE; -} - /*[clinic input] os._getvolumepathname @@ -7796,11 +7814,10 @@ os_readlink_impl(PyObject *module, path_t *path, int dir_fd) return PyBytes_FromStringAndSize(buffer, length); #elif defined(MS_WINDOWS) DWORD n_bytes_returned; - DWORD io_result; + DWORD io_result = 0; HANDLE reparse_point_handle; char target_buffer[_Py_MAXIMUM_REPARSE_DATA_BUFFER_SIZE]; _Py_REPARSE_DATA_BUFFER *rdb = (_Py_REPARSE_DATA_BUFFER *)target_buffer; - const wchar_t *print_name; PyObject *result; /* First get a handle to the reparse point */ @@ -7813,42 +7830,51 @@ os_readlink_impl(PyObject *module, path_t *path, int dir_fd) OPEN_EXISTING, FILE_FLAG_OPEN_REPARSE_POINT|FILE_FLAG_BACKUP_SEMANTICS, 0); - Py_END_ALLOW_THREADS - - if (reparse_point_handle == INVALID_HANDLE_VALUE) { - return path_error(path); + if (reparse_point_handle != INVALID_HANDLE_VALUE) { + /* New call DeviceIoControl to read the reparse point */ + io_result = DeviceIoControl( + reparse_point_handle, + FSCTL_GET_REPARSE_POINT, + 0, 0, /* in buffer */ + target_buffer, sizeof(target_buffer), + &n_bytes_returned, + 0 /* we're not using OVERLAPPED_IO */ + ); + CloseHandle(reparse_point_handle); } - - Py_BEGIN_ALLOW_THREADS - /* New call DeviceIoControl to read the reparse point */ - io_result = DeviceIoControl( - reparse_point_handle, - FSCTL_GET_REPARSE_POINT, - 0, 0, /* in buffer */ - target_buffer, sizeof(target_buffer), - &n_bytes_returned, - 0 /* we're not using OVERLAPPED_IO */ - ); - CloseHandle(reparse_point_handle); Py_END_ALLOW_THREADS if (io_result == 0) { return path_error(path); } - if (rdb->ReparseTag != IO_REPARSE_TAG_SYMLINK) + wchar_t *name = NULL; + Py_ssize_t nameLen = 0; + if (rdb->ReparseTag == IO_REPARSE_TAG_SYMLINK) { - PyErr_SetString(PyExc_ValueError, - "not a symbolic link"); - return NULL; + name = (wchar_t *)((char*)rdb->SymbolicLinkReparseBuffer.PathBuffer + + rdb->SymbolicLinkReparseBuffer.SubstituteNameOffset); + nameLen = rdb->SymbolicLinkReparseBuffer.SubstituteNameLength / sizeof(wchar_t); } - print_name = (wchar_t *)((char*)rdb->SymbolicLinkReparseBuffer.PathBuffer + - rdb->SymbolicLinkReparseBuffer.PrintNameOffset); - - result = PyUnicode_FromWideChar(print_name, - rdb->SymbolicLinkReparseBuffer.PrintNameLength / sizeof(wchar_t)); - if (path->narrow) { - Py_SETREF(result, PyUnicode_EncodeFSDefault(result)); + else if (rdb->ReparseTag == IO_REPARSE_TAG_MOUNT_POINT) + { + name = (wchar_t *)((char*)rdb->MountPointReparseBuffer.PathBuffer + + rdb->MountPointReparseBuffer.SubstituteNameOffset); + nameLen = rdb->MountPointReparseBuffer.SubstituteNameLength / sizeof(wchar_t); + } + else + { + PyErr_SetString(PyExc_ValueError, "not a symbolic link"); + } + if (name) { + if (nameLen > 4 && wcsncmp(name, L"\\??\\", 4) == 0) { + /* Our buffer is mutable, so this is okay */ + name[1] = L'\\'; + } + result = PyUnicode_FromWideChar(name, nameLen); + if (path->narrow) { + Py_SETREF(result, PyUnicode_EncodeFSDefault(result)); + } } return result; #endif @@ -13647,7 +13673,6 @@ static PyMethodDef posix_methods[] = { OS_PATHCONF_METHODDEF OS_ABORT_METHODDEF OS__GETFULLPATHNAME_METHODDEF - OS__ISDIR_METHODDEF OS__GETDISKUSAGE_METHODDEF OS__GETFINALPATHNAME_METHODDEF OS__GETVOLUMEPATHNAME_METHODDEF diff --git a/Python/fileutils.c b/Python/fileutils.c index 55bc1940aeb886..36a3c995a98a92 100644 --- a/Python/fileutils.c +++ b/Python/fileutils.c @@ -878,7 +878,12 @@ _Py_attribute_data_to_stat(BY_HANDLE_FILE_INFORMATION *info, ULONG reparse_tag, FILE_TIME_to_time_t_nsec(&info->ftLastAccessTime, &result->st_atime, &result->st_atime_nsec); result->st_nlink = info->nNumberOfLinks; result->st_ino = (((uint64_t)info->nFileIndexHigh) << 32) + info->nFileIndexLow; - if (reparse_tag == IO_REPARSE_TAG_SYMLINK) { + /* bpo-37834: Only actual symlinks set the S_IFLNK flag. But lstat() will + open other name surrogate reparse points without traversing them. To + detect/handle these, check st_file_attributes and st_reparse_tag. */ + result->st_reparse_tag = reparse_tag; + if (info->dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT && + reparse_tag == IO_REPARSE_TAG_SYMLINK) { /* first clear the S_IFMT bits */ result->st_mode ^= (result->st_mode & S_IFMT); /* now set the bits that make this a symlink */