From bff14532825e7517882ca913738347059f73cf7f Mon Sep 17 00:00:00 2001 From: Morph <39850852+Morph1984@users.noreply.github.com> Date: Wed, 1 Mar 2023 21:06:19 -0500 Subject: [PATCH] core_timing: Use higher precision sleeps on Windows The precision of sleep_for and wait_for is limited to 1-1.5ms on Windows. Using SleepForOneTick() allows us to sleep for exactly one interval of the current timer resolution. This allows us to take advantage of systems that have a timer resolution of 0.5ms to reduce CPU overhead in the event loop. --- src/common/wall_clock.cpp | 5 ++++ src/common/wall_clock.h | 3 +++ src/core/core_timing.cpp | 55 +++++++++++++++++++++++---------------- src/core/core_timing.h | 6 ++++- src/video_core/gpu.cpp | 2 +- 5 files changed, 47 insertions(+), 24 deletions(-) diff --git a/src/common/wall_clock.cpp b/src/common/wall_clock.cpp index 6d972d136..817e71d52 100644 --- a/src/common/wall_clock.cpp +++ b/src/common/wall_clock.cpp @@ -81,4 +81,9 @@ std::unique_ptr CreateBestMatchingClock(u64 emulated_cpu_frequency, #endif +std::unique_ptr CreateStandardWallClock(u64 emulated_cpu_frequency, + u64 emulated_clock_frequency) { + return std::make_unique(emulated_cpu_frequency, emulated_clock_frequency); +} + } // namespace Common diff --git a/src/common/wall_clock.h b/src/common/wall_clock.h index 828a523a8..157ec5eae 100644 --- a/src/common/wall_clock.h +++ b/src/common/wall_clock.h @@ -55,4 +55,7 @@ private: [[nodiscard]] std::unique_ptr CreateBestMatchingClock(u64 emulated_cpu_frequency, u64 emulated_clock_frequency); +[[nodiscard]] std::unique_ptr CreateStandardWallClock(u64 emulated_cpu_frequency, + u64 emulated_clock_frequency); + } // namespace Common diff --git a/src/core/core_timing.cpp b/src/core/core_timing.cpp index 3a63b52e3..742cfb996 100644 --- a/src/core/core_timing.cpp +++ b/src/core/core_timing.cpp @@ -6,6 +6,10 @@ #include #include +#ifdef _WIN32 +#include "common/windows/timer_resolution.h" +#endif + #include "common/microprofile.h" #include "core/core_timing.h" #include "core/core_timing_util.h" @@ -38,7 +42,8 @@ struct CoreTiming::Event { }; CoreTiming::CoreTiming() - : clock{Common::CreateBestMatchingClock(Hardware::BASE_CLOCK_RATE, Hardware::CNTFREQ)} {} + : cpu_clock{Common::CreateBestMatchingClock(Hardware::BASE_CLOCK_RATE, Hardware::CNTFREQ)}, + event_clock{Common::CreateStandardWallClock(Hardware::BASE_CLOCK_RATE, Hardware::CNTFREQ)} {} CoreTiming::~CoreTiming() { Reset(); @@ -185,15 +190,15 @@ void CoreTiming::ResetTicks() { } u64 CoreTiming::GetCPUTicks() const { - if (is_multicore) { - return clock->GetCPUCycles(); + if (is_multicore) [[likely]] { + return cpu_clock->GetCPUCycles(); } return ticks; } u64 CoreTiming::GetClockTicks() const { - if (is_multicore) { - return clock->GetClockCycles(); + if (is_multicore) [[likely]] { + return cpu_clock->GetClockCycles(); } return CpuCyclesToClockCycles(ticks); } @@ -252,21 +257,20 @@ void CoreTiming::ThreadLoop() { const auto next_time = Advance(); if (next_time) { // There are more events left in the queue, wait until the next event. - const auto wait_time = *next_time - GetGlobalTimeNs().count(); + auto wait_time = *next_time - GetGlobalTimeNs().count(); if (wait_time > 0) { #ifdef _WIN32 - // Assume a timer resolution of 1ms. - static constexpr s64 TimerResolutionNS = 1000000; + const auto timer_resolution_ns = + Common::Windows::GetCurrentTimerResolution().count(); - // Sleep in discrete intervals of the timer resolution, and spin the rest. - const auto sleep_time = wait_time - (wait_time % TimerResolutionNS); - if (sleep_time > 0) { - event.WaitFor(std::chrono::nanoseconds(sleep_time)); - } + while (!paused && !event.IsSet() && wait_time > 0) { + wait_time = *next_time - GetGlobalTimeNs().count(); - while (!paused && !event.IsSet() && GetGlobalTimeNs().count() < *next_time) { - // Yield to reduce thread starvation. - std::this_thread::yield(); + if (wait_time >= timer_resolution_ns) { + Common::Windows::SleepForOneTick(); + } else { + std::this_thread::yield(); + } } if (event.IsSet()) { @@ -285,9 +289,9 @@ void CoreTiming::ThreadLoop() { } paused_set = true; - clock->Pause(true); + event_clock->Pause(true); pause_event.Wait(); - clock->Pause(false); + event_clock->Pause(false); } } @@ -303,16 +307,23 @@ void CoreTiming::Reset() { has_started = false; } +std::chrono::nanoseconds CoreTiming::GetCPUTimeNs() const { + if (is_multicore) [[likely]] { + return cpu_clock->GetTimeNS(); + } + return CyclesToNs(ticks); +} + std::chrono::nanoseconds CoreTiming::GetGlobalTimeNs() const { - if (is_multicore) { - return clock->GetTimeNS(); + if (is_multicore) [[likely]] { + return event_clock->GetTimeNS(); } return CyclesToNs(ticks); } std::chrono::microseconds CoreTiming::GetGlobalTimeUs() const { - if (is_multicore) { - return clock->GetTimeUS(); + if (is_multicore) [[likely]] { + return event_clock->GetTimeUS(); } return CyclesToUs(ticks); } diff --git a/src/core/core_timing.h b/src/core/core_timing.h index da366637b..4b89c0c39 100644 --- a/src/core/core_timing.h +++ b/src/core/core_timing.h @@ -122,6 +122,9 @@ public: /// Returns current time in emulated in Clock cycles u64 GetClockTicks() const; + /// Returns current time in nanoseconds. + std::chrono::nanoseconds GetCPUTimeNs() const; + /// Returns current time in microseconds. std::chrono::microseconds GetGlobalTimeUs() const; @@ -139,7 +142,8 @@ private: void Reset(); - std::unique_ptr clock; + std::unique_ptr cpu_clock; + std::unique_ptr event_clock; s64 global_timer = 0; diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 7024a19cf..2e7f9c5ed 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -197,7 +197,7 @@ struct GPU::Impl { constexpr u64 gpu_ticks_num = 384; constexpr u64 gpu_ticks_den = 625; - u64 nanoseconds = system.CoreTiming().GetGlobalTimeNs().count(); + u64 nanoseconds = system.CoreTiming().GetCPUTimeNs().count(); if (Settings::values.use_fast_gpu_time.GetValue()) { nanoseconds /= 256; }