core_timing: Use higher precision sleeps on Windows
The precision of sleep_for and wait_for is limited to 1-1.5ms on Windows. Using SleepForOneTick() allows us to sleep for exactly one interval of the current timer resolution. This allows us to take advantage of systems that have a timer resolution of 0.5ms to reduce CPU overhead in the event loop.
This commit is contained in:
parent
7e353082ac
commit
bff1453282
5 changed files with 47 additions and 24 deletions
|
@ -81,4 +81,9 @@ std::unique_ptr<WallClock> CreateBestMatchingClock(u64 emulated_cpu_frequency,
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
std::unique_ptr<WallClock> CreateStandardWallClock(u64 emulated_cpu_frequency,
|
||||||
|
u64 emulated_clock_frequency) {
|
||||||
|
return std::make_unique<StandardWallClock>(emulated_cpu_frequency, emulated_clock_frequency);
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace Common
|
} // namespace Common
|
||||||
|
|
|
@ -55,4 +55,7 @@ private:
|
||||||
[[nodiscard]] std::unique_ptr<WallClock> CreateBestMatchingClock(u64 emulated_cpu_frequency,
|
[[nodiscard]] std::unique_ptr<WallClock> CreateBestMatchingClock(u64 emulated_cpu_frequency,
|
||||||
u64 emulated_clock_frequency);
|
u64 emulated_clock_frequency);
|
||||||
|
|
||||||
|
[[nodiscard]] std::unique_ptr<WallClock> CreateStandardWallClock(u64 emulated_cpu_frequency,
|
||||||
|
u64 emulated_clock_frequency);
|
||||||
|
|
||||||
} // namespace Common
|
} // namespace Common
|
||||||
|
|
|
@ -6,6 +6,10 @@
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <tuple>
|
#include <tuple>
|
||||||
|
|
||||||
|
#ifdef _WIN32
|
||||||
|
#include "common/windows/timer_resolution.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
#include "common/microprofile.h"
|
#include "common/microprofile.h"
|
||||||
#include "core/core_timing.h"
|
#include "core/core_timing.h"
|
||||||
#include "core/core_timing_util.h"
|
#include "core/core_timing_util.h"
|
||||||
|
@ -38,7 +42,8 @@ struct CoreTiming::Event {
|
||||||
};
|
};
|
||||||
|
|
||||||
CoreTiming::CoreTiming()
|
CoreTiming::CoreTiming()
|
||||||
: clock{Common::CreateBestMatchingClock(Hardware::BASE_CLOCK_RATE, Hardware::CNTFREQ)} {}
|
: cpu_clock{Common::CreateBestMatchingClock(Hardware::BASE_CLOCK_RATE, Hardware::CNTFREQ)},
|
||||||
|
event_clock{Common::CreateStandardWallClock(Hardware::BASE_CLOCK_RATE, Hardware::CNTFREQ)} {}
|
||||||
|
|
||||||
CoreTiming::~CoreTiming() {
|
CoreTiming::~CoreTiming() {
|
||||||
Reset();
|
Reset();
|
||||||
|
@ -185,15 +190,15 @@ void CoreTiming::ResetTicks() {
|
||||||
}
|
}
|
||||||
|
|
||||||
u64 CoreTiming::GetCPUTicks() const {
|
u64 CoreTiming::GetCPUTicks() const {
|
||||||
if (is_multicore) {
|
if (is_multicore) [[likely]] {
|
||||||
return clock->GetCPUCycles();
|
return cpu_clock->GetCPUCycles();
|
||||||
}
|
}
|
||||||
return ticks;
|
return ticks;
|
||||||
}
|
}
|
||||||
|
|
||||||
u64 CoreTiming::GetClockTicks() const {
|
u64 CoreTiming::GetClockTicks() const {
|
||||||
if (is_multicore) {
|
if (is_multicore) [[likely]] {
|
||||||
return clock->GetClockCycles();
|
return cpu_clock->GetClockCycles();
|
||||||
}
|
}
|
||||||
return CpuCyclesToClockCycles(ticks);
|
return CpuCyclesToClockCycles(ticks);
|
||||||
}
|
}
|
||||||
|
@ -252,21 +257,20 @@ void CoreTiming::ThreadLoop() {
|
||||||
const auto next_time = Advance();
|
const auto next_time = Advance();
|
||||||
if (next_time) {
|
if (next_time) {
|
||||||
// There are more events left in the queue, wait until the next event.
|
// There are more events left in the queue, wait until the next event.
|
||||||
const auto wait_time = *next_time - GetGlobalTimeNs().count();
|
auto wait_time = *next_time - GetGlobalTimeNs().count();
|
||||||
if (wait_time > 0) {
|
if (wait_time > 0) {
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
// Assume a timer resolution of 1ms.
|
const auto timer_resolution_ns =
|
||||||
static constexpr s64 TimerResolutionNS = 1000000;
|
Common::Windows::GetCurrentTimerResolution().count();
|
||||||
|
|
||||||
// Sleep in discrete intervals of the timer resolution, and spin the rest.
|
while (!paused && !event.IsSet() && wait_time > 0) {
|
||||||
const auto sleep_time = wait_time - (wait_time % TimerResolutionNS);
|
wait_time = *next_time - GetGlobalTimeNs().count();
|
||||||
if (sleep_time > 0) {
|
|
||||||
event.WaitFor(std::chrono::nanoseconds(sleep_time));
|
|
||||||
}
|
|
||||||
|
|
||||||
while (!paused && !event.IsSet() && GetGlobalTimeNs().count() < *next_time) {
|
if (wait_time >= timer_resolution_ns) {
|
||||||
// Yield to reduce thread starvation.
|
Common::Windows::SleepForOneTick();
|
||||||
std::this_thread::yield();
|
} else {
|
||||||
|
std::this_thread::yield();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (event.IsSet()) {
|
if (event.IsSet()) {
|
||||||
|
@ -285,9 +289,9 @@ void CoreTiming::ThreadLoop() {
|
||||||
}
|
}
|
||||||
|
|
||||||
paused_set = true;
|
paused_set = true;
|
||||||
clock->Pause(true);
|
event_clock->Pause(true);
|
||||||
pause_event.Wait();
|
pause_event.Wait();
|
||||||
clock->Pause(false);
|
event_clock->Pause(false);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -303,16 +307,23 @@ void CoreTiming::Reset() {
|
||||||
has_started = false;
|
has_started = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::chrono::nanoseconds CoreTiming::GetCPUTimeNs() const {
|
||||||
|
if (is_multicore) [[likely]] {
|
||||||
|
return cpu_clock->GetTimeNS();
|
||||||
|
}
|
||||||
|
return CyclesToNs(ticks);
|
||||||
|
}
|
||||||
|
|
||||||
std::chrono::nanoseconds CoreTiming::GetGlobalTimeNs() const {
|
std::chrono::nanoseconds CoreTiming::GetGlobalTimeNs() const {
|
||||||
if (is_multicore) {
|
if (is_multicore) [[likely]] {
|
||||||
return clock->GetTimeNS();
|
return event_clock->GetTimeNS();
|
||||||
}
|
}
|
||||||
return CyclesToNs(ticks);
|
return CyclesToNs(ticks);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::chrono::microseconds CoreTiming::GetGlobalTimeUs() const {
|
std::chrono::microseconds CoreTiming::GetGlobalTimeUs() const {
|
||||||
if (is_multicore) {
|
if (is_multicore) [[likely]] {
|
||||||
return clock->GetTimeUS();
|
return event_clock->GetTimeUS();
|
||||||
}
|
}
|
||||||
return CyclesToUs(ticks);
|
return CyclesToUs(ticks);
|
||||||
}
|
}
|
||||||
|
|
|
@ -122,6 +122,9 @@ public:
|
||||||
/// Returns current time in emulated in Clock cycles
|
/// Returns current time in emulated in Clock cycles
|
||||||
u64 GetClockTicks() const;
|
u64 GetClockTicks() const;
|
||||||
|
|
||||||
|
/// Returns current time in nanoseconds.
|
||||||
|
std::chrono::nanoseconds GetCPUTimeNs() const;
|
||||||
|
|
||||||
/// Returns current time in microseconds.
|
/// Returns current time in microseconds.
|
||||||
std::chrono::microseconds GetGlobalTimeUs() const;
|
std::chrono::microseconds GetGlobalTimeUs() const;
|
||||||
|
|
||||||
|
@ -139,7 +142,8 @@ private:
|
||||||
|
|
||||||
void Reset();
|
void Reset();
|
||||||
|
|
||||||
std::unique_ptr<Common::WallClock> clock;
|
std::unique_ptr<Common::WallClock> cpu_clock;
|
||||||
|
std::unique_ptr<Common::WallClock> event_clock;
|
||||||
|
|
||||||
s64 global_timer = 0;
|
s64 global_timer = 0;
|
||||||
|
|
||||||
|
|
|
@ -197,7 +197,7 @@ struct GPU::Impl {
|
||||||
constexpr u64 gpu_ticks_num = 384;
|
constexpr u64 gpu_ticks_num = 384;
|
||||||
constexpr u64 gpu_ticks_den = 625;
|
constexpr u64 gpu_ticks_den = 625;
|
||||||
|
|
||||||
u64 nanoseconds = system.CoreTiming().GetGlobalTimeNs().count();
|
u64 nanoseconds = system.CoreTiming().GetCPUTimeNs().count();
|
||||||
if (Settings::values.use_fast_gpu_time.GetValue()) {
|
if (Settings::values.use_fast_gpu_time.GetValue()) {
|
||||||
nanoseconds /= 256;
|
nanoseconds /= 256;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue