From f5b146b0950f91c4b5ebc5b208a4d3ad9fff92cf Mon Sep 17 00:00:00 2001 From: Alastair Houghton Date: Fri, 6 Sep 2024 15:31:56 +0100 Subject: [PATCH 1/3] [Windows][Concurrency] Use the same clock as Dispatch. The Concurrency runtime calculates deadlines for scheduling itself using `swift_get_time()`; unfortunately, on Windows that was using `QueryPerformanceCounter()`, while Dispatch uses `QueryInterruptTimePrecise()`. The problem with that is that the two do not necessarily correspond *at all*. In general `QueryPerformanceCounter()` may be using any of a number of hardware timers depending on the machine on which we're running. In the VM I was testing on, the two differed by 20ms, but the worst case is that they are completely unrelated, in which case `Task.sleep()` will wait essentially a random amount of time. rdar://135413803 --- stdlib/public/Concurrency/CMakeLists.txt | 5 ++- stdlib/public/Concurrency/Clock.cpp | 45 +++++++++++++++++------- 2 files changed, 36 insertions(+), 14 deletions(-) diff --git a/stdlib/public/Concurrency/CMakeLists.txt b/stdlib/public/Concurrency/CMakeLists.txt index 4d13ee095d671..cbb2a261ed49a 100644 --- a/stdlib/public/Concurrency/CMakeLists.txt +++ b/stdlib/public/Concurrency/CMakeLists.txt @@ -15,7 +15,10 @@ set(SWIFT_RUNTIME_CONCURRENCY_SWIFT_FLAGS -I${CMAKE_CURRENT_SOURCE_DIR}/Internal set(swift_concurrency_private_link_libraries) if(CMAKE_SYSTEM_NAME STREQUAL "Windows") - list(APPEND swift_concurrency_private_link_libraries Synchronization) + list(APPEND swift_concurrency_private_link_libraries + Synchronization + mincore.lib # For QueryInterruptTime() + ) endif() set(swift_concurrency_incorporate_object_libraries_so swiftThreading) diff --git a/stdlib/public/Concurrency/Clock.cpp b/stdlib/public/Concurrency/Clock.cpp index 50e4e61b1808f..db65f7201e1d3 100644 --- a/stdlib/public/Concurrency/Clock.cpp +++ b/stdlib/public/Concurrency/Clock.cpp @@ -41,19 +41,36 @@ void swift_get_time( #elif (defined(__OpenBSD__) || defined(__FreeBSD__) || defined(__wasi__)) clock_gettime(CLOCK_MONOTONIC, &continuous); #elif defined(_WIN32) - LARGE_INTEGER freq; - QueryPerformanceFrequency(&freq); - LARGE_INTEGER count; - QueryPerformanceCounter(&count); - // Divide count (number of ticks) by frequency (number of ticks per - // second) to get the counter in seconds. We also need to multiply the - // count by 1,000,000,000 to get nanosecond resolution. By multiplying - // first, we maintain high precision. The resulting value is the tick - // count in nanoseconds. Use 128-bit math to avoid overflowing. - auto quadPart = static_cast(count.QuadPart); - auto ns = (quadPart * 1'000'000'000) / freq.QuadPart; - continuous.tv_sec = ns / 1'000'000'000; - continuous.tv_nsec = ns % 1'000'000'000; + // This needs to match what swift-corelibs-libdispatch does + + // QueryInterruptTimePrecise() was added in Windows 10 and is, as + // the name suggests, more precise than QueryInterruptTime(). + // Unfortunately, the symbol is not listed in any .lib file in the SDK and + // must be looked up dynamically at runtime even if our minimum deployment + // target is Windows 10. + typedef decltype(QueryInterruptTimePrecise) *QueryITP_FP; + static QueryITP_FP queryITP = nullptr; + static swift::once_t onceToken; + swift::once(onceToken, [] { + if (HMODULE hKernelBase = GetModuleHandleW(L"KernelBase.dll")) { + queryITP = reinterpret_cast( + GetProcAddress(hKernelBase, "QueryInterruptTimePrecise") + ); + } + }); + + // Call whichever API is available. Both output a value measured in 100ns + // units. We must divide the output by 10,000,000 to get a value in + // seconds and multiply the remainder by 100 to get nanoseconds. + ULONGLONG interruptTime; + if (queryITP) { + (* queryITP)(&interruptTime); + } else { + // Fall back to the older, less precise API. + (void)QueryInterruptTime(&interruptTime); + } + continuous.tv_sec = interruptTime / 10'000'000; + continuous.tv_nsec = (interruptTime % 10'000'000) * 100; #else #error Missing platform continuous time definition #endif @@ -72,6 +89,8 @@ void swift_get_time( #elif (defined(__OpenBSD__) || defined(__FreeBSD__)) clock_gettime(CLOCK_UPTIME, &suspending); #elif defined(_WIN32) + // This needs to match what swift-corelibs-libdispatch does + // QueryUnbiasedInterruptTimePrecise() was added in Windows 10 and is, as // the name suggests, more precise than QueryUnbiasedInterruptTime(). // Unfortunately, the symbol is not listed in any .lib file in the SDK and From 86c643ddc875898db2f532bbb29439b41d8f067e Mon Sep 17 00:00:00 2001 From: Alastair Houghton Date: Fri, 6 Sep 2024 15:51:55 +0100 Subject: [PATCH 2/3] [Concurrency][Windows] Also update swift_get_clock_res(). I should have updated this to match as well. rdar://135413803 --- stdlib/public/Concurrency/Clock.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/stdlib/public/Concurrency/Clock.cpp b/stdlib/public/Concurrency/Clock.cpp index db65f7201e1d3..c46ff8bd148d5 100644 --- a/stdlib/public/Concurrency/Clock.cpp +++ b/stdlib/public/Concurrency/Clock.cpp @@ -147,10 +147,8 @@ switch (clock_id) { #elif (defined(__OpenBSD__) || defined(__FreeBSD__) || defined(__wasi__)) clock_getres(CLOCK_MONOTONIC, &continuous); #elif defined(_WIN32) - LARGE_INTEGER freq; - QueryPerformanceFrequency(&freq); continuous.tv_sec = 0; - continuous.tv_nsec = 1'000'000'000 / freq.QuadPart; + continuous.tv_nsec = 100; #else #error Missing platform continuous time definition #endif From 8e91ae48e641fbf45f27bf7d9cf89e2a573d381e Mon Sep 17 00:00:00 2001 From: Alastair Houghton Date: Fri, 6 Sep 2024 17:16:27 +0100 Subject: [PATCH 3/3] [Concurrency][Windows] Remove use of `GetProcAddress()`. Since we're linking `mincore.lib`, we don't need to use `GetProcAddress()` to find `Query[Unbiased]InterruptTimePrecise()`. rdar://135413803 --- stdlib/public/Concurrency/Clock.cpp | 50 +++-------------------------- 1 file changed, 4 insertions(+), 46 deletions(-) diff --git a/stdlib/public/Concurrency/Clock.cpp b/stdlib/public/Concurrency/Clock.cpp index c46ff8bd148d5..2c9e43c8f4cdd 100644 --- a/stdlib/public/Concurrency/Clock.cpp +++ b/stdlib/public/Concurrency/Clock.cpp @@ -43,32 +43,11 @@ void swift_get_time( #elif defined(_WIN32) // This needs to match what swift-corelibs-libdispatch does - // QueryInterruptTimePrecise() was added in Windows 10 and is, as - // the name suggests, more precise than QueryInterruptTime(). - // Unfortunately, the symbol is not listed in any .lib file in the SDK and - // must be looked up dynamically at runtime even if our minimum deployment - // target is Windows 10. - typedef decltype(QueryInterruptTimePrecise) *QueryITP_FP; - static QueryITP_FP queryITP = nullptr; - static swift::once_t onceToken; - swift::once(onceToken, [] { - if (HMODULE hKernelBase = GetModuleHandleW(L"KernelBase.dll")) { - queryITP = reinterpret_cast( - GetProcAddress(hKernelBase, "QueryInterruptTimePrecise") - ); - } - }); - - // Call whichever API is available. Both output a value measured in 100ns + // QueryInterruptTimePrecise() outputs a value measured in 100ns // units. We must divide the output by 10,000,000 to get a value in // seconds and multiply the remainder by 100 to get nanoseconds. ULONGLONG interruptTime; - if (queryITP) { - (* queryITP)(&interruptTime); - } else { - // Fall back to the older, less precise API. - (void)QueryInterruptTime(&interruptTime); - } + (void)QueryInterruptTimePrecise(&interruptTime); continuous.tv_sec = interruptTime / 10'000'000; continuous.tv_nsec = (interruptTime % 10'000'000) * 100; #else @@ -91,32 +70,11 @@ void swift_get_time( #elif defined(_WIN32) // This needs to match what swift-corelibs-libdispatch does - // QueryUnbiasedInterruptTimePrecise() was added in Windows 10 and is, as - // the name suggests, more precise than QueryUnbiasedInterruptTime(). - // Unfortunately, the symbol is not listed in any .lib file in the SDK and - // must be looked up dynamically at runtime even if our minimum deployment - // target is Windows 10. - typedef decltype(QueryUnbiasedInterruptTimePrecise) *QueryUITP_FP; - static QueryUITP_FP queryUITP = nullptr; - static swift::once_t onceToken; - swift::once(onceToken, [] { - if (HMODULE hKernelBase = GetModuleHandleW(L"KernelBase.dll")) { - queryUITP = reinterpret_cast( - GetProcAddress(hKernelBase, "QueryUnbiasedInterruptTimePrecise") - ); - } - }); - - // Call whichever API is available. Both output a value measured in 100ns + // QueryUnbiasedInterruptTimePrecise() outputs a value measured in 100ns // units. We must divide the output by 10,000,000 to get a value in // seconds and multiply the remainder by 100 to get nanoseconds. ULONGLONG unbiasedTime; - if (queryUITP) { - (* queryUITP)(&unbiasedTime); - } else { - // Fall back to the older, less precise API. - (void)QueryUnbiasedInterruptTime(&unbiasedTime); - } + (void)QueryUnbiasedInterruptTimePrecise(&unbiasedTime); suspending.tv_sec = unbiasedTime / 10'000'000; suspending.tv_nsec = (unbiasedTime % 10'000'000) * 100; #else