Shader/Pipeline Cache: Use VAddr instead of physical memory for addressing.

This commit is contained in:
Fernando Sahmkow 2020-04-05 19:18:00 -04:00
parent 3dd5c07454
commit ea535d9470
7 changed files with 62 additions and 87 deletions

View file

@ -18,22 +18,14 @@
class RasterizerCacheObject { class RasterizerCacheObject {
public: public:
explicit RasterizerCacheObject(const u8* host_ptr) explicit RasterizerCacheObject(const VAddr cpu_addr) : cpu_addr{cpu_addr} {}
: host_ptr{host_ptr}, cache_addr{ToCacheAddr(host_ptr)} {}
virtual ~RasterizerCacheObject(); virtual ~RasterizerCacheObject();
CacheAddr GetCacheAddr() const { VAddr GetCpuAddr() const {
return cache_addr; return cpu_addr;
} }
const u8* GetHostPtr() const {
return host_ptr;
}
/// Gets the address of the shader in guest memory, required for cache management
virtual VAddr GetCpuAddr() const = 0;
/// Gets the size of the shader in guest memory, required for cache management /// Gets the size of the shader in guest memory, required for cache management
virtual std::size_t GetSizeInBytes() const = 0; virtual std::size_t GetSizeInBytes() const = 0;
@ -68,8 +60,7 @@ private:
bool is_registered{}; ///< Whether the object is currently registered with the cache bool is_registered{}; ///< Whether the object is currently registered with the cache
bool is_dirty{}; ///< Whether the object is dirty (out of sync with guest memory) bool is_dirty{}; ///< Whether the object is dirty (out of sync with guest memory)
u64 last_modified_ticks{}; ///< When the object was last modified, used for in-order flushing u64 last_modified_ticks{}; ///< When the object was last modified, used for in-order flushing
const u8* host_ptr{}; ///< Pointer to the memory backing this cached region VAddr cpu_addr{}; ///< Cpu address memory, unique from emulated virtual address space
CacheAddr cache_addr{}; ///< Cache address memory, unique from emulated virtual address space
}; };
template <class T> template <class T>
@ -80,7 +71,7 @@ public:
explicit RasterizerCache(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {} explicit RasterizerCache(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {}
/// Write any cached resources overlapping the specified region back to memory /// Write any cached resources overlapping the specified region back to memory
void FlushRegion(CacheAddr addr, std::size_t size) { void FlushRegion(VAddr addr, std::size_t size) {
std::lock_guard lock{mutex}; std::lock_guard lock{mutex};
const auto& objects{GetSortedObjectsFromRegion(addr, size)}; const auto& objects{GetSortedObjectsFromRegion(addr, size)};
@ -90,7 +81,7 @@ public:
} }
/// Mark the specified region as being invalidated /// Mark the specified region as being invalidated
void InvalidateRegion(CacheAddr addr, u64 size) { void InvalidateRegion(VAddr addr, u64 size) {
std::lock_guard lock{mutex}; std::lock_guard lock{mutex};
const auto& objects{GetSortedObjectsFromRegion(addr, size)}; const auto& objects{GetSortedObjectsFromRegion(addr, size)};
@ -114,27 +105,20 @@ public:
protected: protected:
/// Tries to get an object from the cache with the specified cache address /// Tries to get an object from the cache with the specified cache address
T TryGet(CacheAddr addr) const { T TryGet(VAddr addr) const {
const auto iter = map_cache.find(addr); const auto iter = map_cache.find(addr);
if (iter != map_cache.end()) if (iter != map_cache.end())
return iter->second; return iter->second;
return nullptr; return nullptr;
} }
T TryGet(const void* addr) const {
const auto iter = map_cache.find(ToCacheAddr(addr));
if (iter != map_cache.end())
return iter->second;
return nullptr;
}
/// Register an object into the cache /// Register an object into the cache
virtual void Register(const T& object) { virtual void Register(const T& object) {
std::lock_guard lock{mutex}; std::lock_guard lock{mutex};
object->SetIsRegistered(true); object->SetIsRegistered(true);
interval_cache.add({GetInterval(object), ObjectSet{object}}); interval_cache.add({GetInterval(object), ObjectSet{object}});
map_cache.insert({object->GetCacheAddr(), object}); map_cache.insert({object->GetCpuAddr(), object});
rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), 1); rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), 1);
} }
@ -144,7 +128,7 @@ protected:
object->SetIsRegistered(false); object->SetIsRegistered(false);
rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), -1); rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), -1);
const CacheAddr addr = object->GetCacheAddr(); const VAddr addr = object->GetCpuAddr();
interval_cache.subtract({GetInterval(object), ObjectSet{object}}); interval_cache.subtract({GetInterval(object), ObjectSet{object}});
map_cache.erase(addr); map_cache.erase(addr);
} }
@ -173,7 +157,7 @@ protected:
private: private:
/// Returns a list of cached objects from the specified memory region, ordered by access time /// Returns a list of cached objects from the specified memory region, ordered by access time
std::vector<T> GetSortedObjectsFromRegion(CacheAddr addr, u64 size) { std::vector<T> GetSortedObjectsFromRegion(VAddr addr, u64 size) {
if (size == 0) { if (size == 0) {
return {}; return {};
} }
@ -197,13 +181,13 @@ private:
} }
using ObjectSet = std::set<T>; using ObjectSet = std::set<T>;
using ObjectCache = std::unordered_map<CacheAddr, T>; using ObjectCache = std::unordered_map<VAddr, T>;
using IntervalCache = boost::icl::interval_map<CacheAddr, ObjectSet>; using IntervalCache = boost::icl::interval_map<VAddr, ObjectSet>;
using ObjectInterval = typename IntervalCache::interval_type; using ObjectInterval = typename IntervalCache::interval_type;
static auto GetInterval(const T& object) { static auto GetInterval(const T& object) {
return ObjectInterval::right_open(object->GetCacheAddr(), return ObjectInterval::right_open(object->GetCpuAddr(),
object->GetCacheAddr() + object->GetSizeInBytes()); object->GetCpuAddr() + object->GetSizeInBytes());
} }
ObjectCache map_cache; ObjectCache map_cache;

View file

@ -671,9 +671,8 @@ void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
if (!addr || !size) { if (!addr || !size) {
return; return;
} }
CacheAddr cache_addr = ToCacheAddr(system.Memory().GetPointer(addr));
texture_cache.InvalidateRegion(addr, size); texture_cache.InvalidateRegion(addr, size);
shader_cache.InvalidateRegion(cache_addr, size); shader_cache.InvalidateRegion(addr, size);
buffer_cache.InvalidateRegion(addr, size); buffer_cache.InvalidateRegion(addr, size);
query_cache.InvalidateRegion(addr, size); query_cache.InvalidateRegion(addr, size);
} }

View file

@ -214,11 +214,11 @@ std::unordered_set<GLenum> GetSupportedFormats() {
} // Anonymous namespace } // Anonymous namespace
CachedShader::CachedShader(const u8* host_ptr, VAddr cpu_addr, std::size_t size_in_bytes, CachedShader::CachedShader(VAddr cpu_addr, std::size_t size_in_bytes,
std::shared_ptr<VideoCommon::Shader::Registry> registry, std::shared_ptr<VideoCommon::Shader::Registry> registry,
ShaderEntries entries, std::shared_ptr<OGLProgram> program) ShaderEntries entries, std::shared_ptr<OGLProgram> program)
: RasterizerCacheObject{host_ptr}, registry{std::move(registry)}, entries{std::move(entries)}, : RasterizerCacheObject{cpu_addr}, registry{std::move(registry)}, entries{std::move(entries)},
cpu_addr{cpu_addr}, size_in_bytes{size_in_bytes}, program{std::move(program)} {} size_in_bytes{size_in_bytes}, program{std::move(program)} {}
CachedShader::~CachedShader() = default; CachedShader::~CachedShader() = default;
@ -254,9 +254,8 @@ Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params,
entry.bindless_samplers = registry->GetBindlessSamplers(); entry.bindless_samplers = registry->GetBindlessSamplers();
params.disk_cache.SaveEntry(std::move(entry)); params.disk_cache.SaveEntry(std::move(entry));
return std::shared_ptr<CachedShader>(new CachedShader(params.host_ptr, params.cpu_addr, return std::shared_ptr<CachedShader>(new CachedShader(
size_in_bytes, std::move(registry), params.cpu_addr, size_in_bytes, std::move(registry), MakeEntries(ir), std::move(program)));
MakeEntries(ir), std::move(program)));
} }
Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) { Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) {
@ -279,17 +278,16 @@ Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, Prog
entry.bindless_samplers = registry->GetBindlessSamplers(); entry.bindless_samplers = registry->GetBindlessSamplers();
params.disk_cache.SaveEntry(std::move(entry)); params.disk_cache.SaveEntry(std::move(entry));
return std::shared_ptr<CachedShader>(new CachedShader(params.host_ptr, params.cpu_addr, return std::shared_ptr<CachedShader>(new CachedShader(
size_in_bytes, std::move(registry), params.cpu_addr, size_in_bytes, std::move(registry), MakeEntries(ir), std::move(program)));
MakeEntries(ir), std::move(program)));
} }
Shader CachedShader::CreateFromCache(const ShaderParameters& params, Shader CachedShader::CreateFromCache(const ShaderParameters& params,
const PrecompiledShader& precompiled_shader, const PrecompiledShader& precompiled_shader,
std::size_t size_in_bytes) { std::size_t size_in_bytes) {
return std::shared_ptr<CachedShader>(new CachedShader( return std::shared_ptr<CachedShader>(
params.host_ptr, params.cpu_addr, size_in_bytes, precompiled_shader.registry, new CachedShader(params.cpu_addr, size_in_bytes, precompiled_shader.registry,
precompiled_shader.entries, precompiled_shader.program)); precompiled_shader.entries, precompiled_shader.program));
} }
ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system, ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system,
@ -449,12 +447,14 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
const GPUVAddr address{GetShaderAddress(system, program)}; const GPUVAddr address{GetShaderAddress(system, program)};
// Look up shader in the cache based on address // Look up shader in the cache based on address
const auto host_ptr{memory_manager.GetPointer(address)}; const auto cpu_addr{memory_manager.GpuToCpuAddress(address)};
Shader shader{TryGet(host_ptr)}; Shader shader{cpu_addr ? TryGet(*cpu_addr) : nullptr};
if (shader) { if (shader) {
return last_shaders[static_cast<std::size_t>(program)] = shader; return last_shaders[static_cast<std::size_t>(program)] = shader;
} }
const auto host_ptr{memory_manager.GetPointer(address)};
// No shader found - create a new one // No shader found - create a new one
ProgramCode code{GetShaderCode(memory_manager, address, host_ptr)}; ProgramCode code{GetShaderCode(memory_manager, address, host_ptr)};
ProgramCode code_b; ProgramCode code_b;
@ -465,9 +465,9 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
const auto unique_identifier = GetUniqueIdentifier( const auto unique_identifier = GetUniqueIdentifier(
GetShaderType(program), program == Maxwell::ShaderProgram::VertexA, code, code_b); GetShaderType(program), program == Maxwell::ShaderProgram::VertexA, code, code_b);
const auto cpu_addr{*memory_manager.GpuToCpuAddress(address)};
const ShaderParameters params{system, disk_cache, device, const ShaderParameters params{system, disk_cache, device,
cpu_addr, host_ptr, unique_identifier}; *cpu_addr, host_ptr, unique_identifier};
const auto found = runtime_cache.find(unique_identifier); const auto found = runtime_cache.find(unique_identifier);
if (found == runtime_cache.end()) { if (found == runtime_cache.end()) {
@ -484,18 +484,20 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) { Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
auto& memory_manager{system.GPU().MemoryManager()}; auto& memory_manager{system.GPU().MemoryManager()};
const auto host_ptr{memory_manager.GetPointer(code_addr)}; const auto cpu_addr{memory_manager.GpuToCpuAddress(code_addr)};
auto kernel = TryGet(host_ptr);
auto kernel = cpu_addr ? TryGet(*cpu_addr) : nullptr;
if (kernel) { if (kernel) {
return kernel; return kernel;
} }
const auto host_ptr{memory_manager.GetPointer(code_addr)};
// No kernel found, create a new one // No kernel found, create a new one
auto code{GetShaderCode(memory_manager, code_addr, host_ptr)}; auto code{GetShaderCode(memory_manager, code_addr, host_ptr)};
const auto unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code)}; const auto unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code)};
const auto cpu_addr{*memory_manager.GpuToCpuAddress(code_addr)};
const ShaderParameters params{system, disk_cache, device, const ShaderParameters params{system, disk_cache, device,
cpu_addr, host_ptr, unique_identifier}; *cpu_addr, host_ptr, unique_identifier};
const auto found = runtime_cache.find(unique_identifier); const auto found = runtime_cache.find(unique_identifier);
if (found == runtime_cache.end()) { if (found == runtime_cache.end()) {

View file

@ -65,11 +65,6 @@ public:
/// Gets the GL program handle for the shader /// Gets the GL program handle for the shader
GLuint GetHandle() const; GLuint GetHandle() const;
/// Returns the guest CPU address of the shader
VAddr GetCpuAddr() const override {
return cpu_addr;
}
/// Returns the size in bytes of the shader /// Returns the size in bytes of the shader
std::size_t GetSizeInBytes() const override { std::size_t GetSizeInBytes() const override {
return size_in_bytes; return size_in_bytes;
@ -90,13 +85,12 @@ public:
std::size_t size_in_bytes); std::size_t size_in_bytes);
private: private:
explicit CachedShader(const u8* host_ptr, VAddr cpu_addr, std::size_t size_in_bytes, explicit CachedShader(VAddr cpu_addr, std::size_t size_in_bytes,
std::shared_ptr<VideoCommon::Shader::Registry> registry, std::shared_ptr<VideoCommon::Shader::Registry> registry,
ShaderEntries entries, std::shared_ptr<OGLProgram> program); ShaderEntries entries, std::shared_ptr<OGLProgram> program);
std::shared_ptr<VideoCommon::Shader::Registry> registry; std::shared_ptr<VideoCommon::Shader::Registry> registry;
ShaderEntries entries; ShaderEntries entries;
VAddr cpu_addr = 0;
std::size_t size_in_bytes = 0; std::size_t size_in_bytes = 0;
std::shared_ptr<OGLProgram> program; std::shared_ptr<OGLProgram> program;
}; };

View file

@ -158,11 +158,11 @@ u32 FillDescriptorLayout(const ShaderEntries& entries,
} // Anonymous namespace } // Anonymous namespace
CachedShader::CachedShader(Core::System& system, Tegra::Engines::ShaderType stage, CachedShader::CachedShader(Core::System& system, Tegra::Engines::ShaderType stage,
GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, GPUVAddr gpu_addr, VAddr cpu_addr, ProgramCode program_code,
ProgramCode program_code, u32 main_offset) u32 main_offset)
: RasterizerCacheObject{host_ptr}, gpu_addr{gpu_addr}, cpu_addr{cpu_addr}, : RasterizerCacheObject{cpu_addr}, gpu_addr{gpu_addr}, program_code{std::move(program_code)},
program_code{std::move(program_code)}, registry{stage, GetEngine(system, stage)}, registry{stage, GetEngine(system, stage)}, shader_ir{this->program_code, main_offset,
shader_ir{this->program_code, main_offset, compiler_settings, registry}, compiler_settings, registry},
entries{GenerateShaderEntries(shader_ir)} {} entries{GenerateShaderEntries(shader_ir)} {}
CachedShader::~CachedShader() = default; CachedShader::~CachedShader() = default;
@ -201,19 +201,19 @@ std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
auto& memory_manager{system.GPU().MemoryManager()}; auto& memory_manager{system.GPU().MemoryManager()};
const GPUVAddr program_addr{GetShaderAddress(system, program)}; const GPUVAddr program_addr{GetShaderAddress(system, program)};
const auto host_ptr{memory_manager.GetPointer(program_addr)}; const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
auto shader = TryGet(host_ptr); ASSERT(cpu_addr);
auto shader = cpu_addr ? TryGet(*cpu_addr) : nullptr;
if (!shader) { if (!shader) {
const auto host_ptr{memory_manager.GetPointer(program_addr)};
// No shader found - create a new one // No shader found - create a new one
constexpr u32 stage_offset = 10; constexpr u32 stage_offset = 10;
const auto stage = static_cast<Tegra::Engines::ShaderType>(index == 0 ? 0 : index - 1); const auto stage = static_cast<Tegra::Engines::ShaderType>(index == 0 ? 0 : index - 1);
auto code = GetShaderCode(memory_manager, program_addr, host_ptr, false); auto code = GetShaderCode(memory_manager, program_addr, host_ptr, false);
const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
ASSERT(cpu_addr);
shader = std::make_shared<CachedShader>(system, stage, program_addr, *cpu_addr, shader = std::make_shared<CachedShader>(system, stage, program_addr, *cpu_addr,
host_ptr, std::move(code), stage_offset); std::move(code), stage_offset);
Register(shader); Register(shader);
} }
shaders[index] = std::move(shader); shaders[index] = std::move(shader);
@ -253,18 +253,19 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach
auto& memory_manager = system.GPU().MemoryManager(); auto& memory_manager = system.GPU().MemoryManager();
const auto program_addr = key.shader; const auto program_addr = key.shader;
const auto host_ptr = memory_manager.GetPointer(program_addr);
auto shader = TryGet(host_ptr); const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
ASSERT(cpu_addr);
auto shader = cpu_addr ? TryGet(*cpu_addr) : nullptr;
if (!shader) { if (!shader) {
// No shader found - create a new one // No shader found - create a new one
const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr); const auto host_ptr = memory_manager.GetPointer(program_addr);
ASSERT(cpu_addr);
auto code = GetShaderCode(memory_manager, program_addr, host_ptr, true); auto code = GetShaderCode(memory_manager, program_addr, host_ptr, true);
constexpr u32 kernel_main_offset = 0; constexpr u32 kernel_main_offset = 0;
shader = std::make_shared<CachedShader>(system, Tegra::Engines::ShaderType::Compute, shader = std::make_shared<CachedShader>(system, Tegra::Engines::ShaderType::Compute,
program_addr, *cpu_addr, host_ptr, std::move(code), program_addr, *cpu_addr, std::move(code),
kernel_main_offset); kernel_main_offset);
Register(shader); Register(shader);
} }
@ -345,8 +346,9 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
} }
const GPUVAddr gpu_addr = GetShaderAddress(system, program_enum); const GPUVAddr gpu_addr = GetShaderAddress(system, program_enum);
const auto host_ptr = memory_manager.GetPointer(gpu_addr); const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr);
const auto shader = TryGet(host_ptr); ASSERT(cpu_addr);
const auto shader = TryGet(*cpu_addr);
ASSERT(shader); ASSERT(shader);
const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5 const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5

View file

@ -113,17 +113,13 @@ namespace Vulkan {
class CachedShader final : public RasterizerCacheObject { class CachedShader final : public RasterizerCacheObject {
public: public:
explicit CachedShader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr, explicit CachedShader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr,
VAddr cpu_addr, u8* host_ptr, ProgramCode program_code, u32 main_offset); VAddr cpu_addr, ProgramCode program_code, u32 main_offset);
~CachedShader(); ~CachedShader();
GPUVAddr GetGpuAddr() const { GPUVAddr GetGpuAddr() const {
return gpu_addr; return gpu_addr;
} }
VAddr GetCpuAddr() const override {
return cpu_addr;
}
std::size_t GetSizeInBytes() const override { std::size_t GetSizeInBytes() const override {
return program_code.size() * sizeof(u64); return program_code.size() * sizeof(u64);
} }
@ -149,7 +145,6 @@ private:
Tegra::Engines::ShaderType stage); Tegra::Engines::ShaderType stage);
GPUVAddr gpu_addr{}; GPUVAddr gpu_addr{};
VAddr cpu_addr{};
ProgramCode program_code; ProgramCode program_code;
VideoCommon::Shader::Registry registry; VideoCommon::Shader::Registry registry;
VideoCommon::Shader::ShaderIR shader_ir; VideoCommon::Shader::ShaderIR shader_ir;

View file

@ -509,9 +509,8 @@ void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) {
if (!addr || !size) { if (!addr || !size) {
return; return;
} }
CacheAddr cache_addr = ToCacheAddr(system.Memory().GetPointer(addr));
texture_cache.InvalidateRegion(addr, size); texture_cache.InvalidateRegion(addr, size);
pipeline_cache.InvalidateRegion(cache_addr, size); pipeline_cache.InvalidateRegion(addr, size);
buffer_cache.InvalidateRegion(addr, size); buffer_cache.InvalidateRegion(addr, size);
query_cache.InvalidateRegion(addr, size); query_cache.InvalidateRegion(addr, size);
} }