From 3630bfaef332768e08ecc0c34cd4bca83a2579f8 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sun, 20 Nov 2022 03:07:14 +0100 Subject: [PATCH] RasterizerMemory: Add filtering for flushing/invalidation operations. --- src/video_core/CMakeLists.txt | 1 + src/video_core/buffer_cache/buffer_cache.h | 2 +- src/video_core/cache_types.h | 24 ++++++++ src/video_core/memory_manager.cpp | 60 ++++++++++--------- src/video_core/memory_manager.h | 25 +++++--- src/video_core/rasterizer_interface.h | 13 ++-- .../renderer_null/null_rasterizer.cpp | 8 +-- .../renderer_null/null_rasterizer.h | 12 ++-- .../renderer_opengl/gl_rasterizer.cpp | 57 +++++++++++------- .../renderer_opengl/gl_rasterizer.h | 13 ++-- .../renderer_vulkan/vk_rasterizer.cpp | 52 ++++++++++------ .../renderer_vulkan/vk_rasterizer.h | 13 ++-- src/video_core/texture_cache/texture_cache.h | 3 +- .../texture_cache/texture_cache_base.h | 2 +- 14 files changed, 189 insertions(+), 96 deletions(-) create mode 100644 src/video_core/cache_types.h diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index fd71bf186..aa271a377 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -13,6 +13,7 @@ add_library(video_core STATIC buffer_cache/buffer_base.h buffer_cache/buffer_cache.cpp buffer_cache/buffer_cache.h + cache_types.h cdma_pusher.cpp cdma_pusher.h compatible_formats.cpp diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index f86edaa3e..bdc0681b7 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -208,7 +208,7 @@ public: [[nodiscard]] std::pair GetDrawIndirectBuffer(); - std::mutex mutex; + std::recursive_mutex mutex; Runtime& runtime; private: diff --git a/src/video_core/cache_types.h b/src/video_core/cache_types.h new file mode 100644 index 000000000..1a5db3c55 --- /dev/null +++ b/src/video_core/cache_types.h @@ -0,0 +1,24 @@ +// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "common/common_funcs.h" +#include "common/common_types.h" + +namespace VideoCommon { + +enum class CacheType : u32 { + None = 0, + TextureCache = 1 << 0, + QueryCache = 1 << 1, + BufferCache = 1 << 2, + ShaderCache = 1 << 3, + NoTextureCache = QueryCache | BufferCache | ShaderCache, + NoBufferCache = TextureCache | QueryCache | ShaderCache, + NoQueryCache = TextureCache | BufferCache | ShaderCache, + All = TextureCache | QueryCache | BufferCache | ShaderCache, +}; +DECLARE_ENUM_FLAG_OPERATORS(CacheType) + +} // namespace VideoCommon diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 4fcae9909..3a5cdeb39 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -356,8 +356,8 @@ inline void MemoryManager::MemoryOperation(GPUVAddr gpu_src_addr, std::size_t si } template -void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, - std::size_t size) const { +void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size, + [[maybe_unused]] VideoCommon::CacheType which) const { auto set_to_zero = [&]([[maybe_unused]] std::size_t page_index, [[maybe_unused]] std::size_t offset, std::size_t copy_amount) { std::memset(dest_buffer, 0, copy_amount); @@ -367,7 +367,7 @@ void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, const VAddr cpu_addr_base = (static_cast(page_table[page_index]) << cpu_page_bits) + offset; if constexpr (is_safe) { - rasterizer->FlushRegion(cpu_addr_base, copy_amount); + rasterizer->FlushRegion(cpu_addr_base, copy_amount, which); } u8* physical = memory.GetPointer(cpu_addr_base); std::memcpy(dest_buffer, physical, copy_amount); @@ -377,7 +377,7 @@ void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, const VAddr cpu_addr_base = (static_cast(big_page_table_cpu[page_index]) << cpu_page_bits) + offset; if constexpr (is_safe) { - rasterizer->FlushRegion(cpu_addr_base, copy_amount); + rasterizer->FlushRegion(cpu_addr_base, copy_amount, which); } if (!IsBigPageContinous(page_index)) [[unlikely]] { memory.ReadBlockUnsafe(cpu_addr_base, dest_buffer, copy_amount); @@ -395,18 +395,19 @@ void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, MemoryOperation(gpu_src_addr, size, mapped_big, set_to_zero, read_short_pages); } -void MemoryManager::ReadBlock(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size) const { - ReadBlockImpl(gpu_src_addr, dest_buffer, size); +void MemoryManager::ReadBlock(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size, + VideoCommon::CacheType which) const { + ReadBlockImpl(gpu_src_addr, dest_buffer, size, which); } void MemoryManager::ReadBlockUnsafe(GPUVAddr gpu_src_addr, void* dest_buffer, const std::size_t size) const { - ReadBlockImpl(gpu_src_addr, dest_buffer, size); + ReadBlockImpl(gpu_src_addr, dest_buffer, size, VideoCommon::CacheType::None); } template -void MemoryManager::WriteBlockImpl(GPUVAddr gpu_dest_addr, const void* src_buffer, - std::size_t size) { +void MemoryManager::WriteBlockImpl(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size, + [[maybe_unused]] VideoCommon::CacheType which) { auto just_advance = [&]([[maybe_unused]] std::size_t page_index, [[maybe_unused]] std::size_t offset, std::size_t copy_amount) { src_buffer = static_cast(src_buffer) + copy_amount; @@ -415,7 +416,7 @@ void MemoryManager::WriteBlockImpl(GPUVAddr gpu_dest_addr, const void* src_buffe const VAddr cpu_addr_base = (static_cast(page_table[page_index]) << cpu_page_bits) + offset; if constexpr (is_safe) { - rasterizer->InvalidateRegion(cpu_addr_base, copy_amount); + rasterizer->InvalidateRegion(cpu_addr_base, copy_amount, which); } u8* physical = memory.GetPointer(cpu_addr_base); std::memcpy(physical, src_buffer, copy_amount); @@ -425,7 +426,7 @@ void MemoryManager::WriteBlockImpl(GPUVAddr gpu_dest_addr, const void* src_buffe const VAddr cpu_addr_base = (static_cast(big_page_table_cpu[page_index]) << cpu_page_bits) + offset; if constexpr (is_safe) { - rasterizer->InvalidateRegion(cpu_addr_base, copy_amount); + rasterizer->InvalidateRegion(cpu_addr_base, copy_amount, which); } if (!IsBigPageContinous(page_index)) [[unlikely]] { memory.WriteBlockUnsafe(cpu_addr_base, src_buffer, copy_amount); @@ -443,16 +444,18 @@ void MemoryManager::WriteBlockImpl(GPUVAddr gpu_dest_addr, const void* src_buffe MemoryOperation(gpu_dest_addr, size, mapped_big, just_advance, write_short_pages); } -void MemoryManager::WriteBlock(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size) { - WriteBlockImpl(gpu_dest_addr, src_buffer, size); +void MemoryManager::WriteBlock(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size, + VideoCommon::CacheType which) { + WriteBlockImpl(gpu_dest_addr, src_buffer, size, which); } void MemoryManager::WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size) { - WriteBlockImpl(gpu_dest_addr, src_buffer, size); + WriteBlockImpl(gpu_dest_addr, src_buffer, size, VideoCommon::CacheType::None); } -void MemoryManager::FlushRegion(GPUVAddr gpu_addr, size_t size) const { +void MemoryManager::FlushRegion(GPUVAddr gpu_addr, size_t size, + VideoCommon::CacheType which) const { auto do_nothing = [&]([[maybe_unused]] std::size_t page_index, [[maybe_unused]] std::size_t offset, [[maybe_unused]] std::size_t copy_amount) {}; @@ -460,12 +463,12 @@ void MemoryManager::FlushRegion(GPUVAddr gpu_addr, size_t size) const { auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { const VAddr cpu_addr_base = (static_cast(page_table[page_index]) << cpu_page_bits) + offset; - rasterizer->FlushRegion(cpu_addr_base, copy_amount); + rasterizer->FlushRegion(cpu_addr_base, copy_amount, which); }; auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { const VAddr cpu_addr_base = (static_cast(big_page_table_cpu[page_index]) << cpu_page_bits) + offset; - rasterizer->FlushRegion(cpu_addr_base, copy_amount); + rasterizer->FlushRegion(cpu_addr_base, copy_amount, which); }; auto flush_short_pages = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { @@ -475,7 +478,8 @@ void MemoryManager::FlushRegion(GPUVAddr gpu_addr, size_t size) const { MemoryOperation(gpu_addr, size, mapped_big, do_nothing, flush_short_pages); } -bool MemoryManager::IsMemoryDirty(GPUVAddr gpu_addr, size_t size) const { +bool MemoryManager::IsMemoryDirty(GPUVAddr gpu_addr, size_t size, + VideoCommon::CacheType which) const { bool result = false; auto do_nothing = [&]([[maybe_unused]] std::size_t page_index, [[maybe_unused]] std::size_t offset, @@ -484,13 +488,13 @@ bool MemoryManager::IsMemoryDirty(GPUVAddr gpu_addr, size_t size) const { auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { const VAddr cpu_addr_base = (static_cast(page_table[page_index]) << cpu_page_bits) + offset; - result |= rasterizer->MustFlushRegion(cpu_addr_base, copy_amount); + result |= rasterizer->MustFlushRegion(cpu_addr_base, copy_amount, which); return result; }; auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { const VAddr cpu_addr_base = (static_cast(big_page_table_cpu[page_index]) << cpu_page_bits) + offset; - result |= rasterizer->MustFlushRegion(cpu_addr_base, copy_amount); + result |= rasterizer->MustFlushRegion(cpu_addr_base, copy_amount, which); return result; }; auto check_short_pages = [&](std::size_t page_index, std::size_t offset, @@ -547,7 +551,8 @@ size_t MemoryManager::GetMemoryLayoutSize(GPUVAddr gpu_addr, size_t max_size) co return kind_map.GetContinousSizeFrom(gpu_addr); } -void MemoryManager::InvalidateRegion(GPUVAddr gpu_addr, size_t size) const { +void MemoryManager::InvalidateRegion(GPUVAddr gpu_addr, size_t size, + VideoCommon::CacheType which) const { auto do_nothing = [&]([[maybe_unused]] std::size_t page_index, [[maybe_unused]] std::size_t offset, [[maybe_unused]] std::size_t copy_amount) {}; @@ -555,12 +560,12 @@ void MemoryManager::InvalidateRegion(GPUVAddr gpu_addr, size_t size) const { auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { const VAddr cpu_addr_base = (static_cast(page_table[page_index]) << cpu_page_bits) + offset; - rasterizer->InvalidateRegion(cpu_addr_base, copy_amount); + rasterizer->InvalidateRegion(cpu_addr_base, copy_amount, which); }; auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { const VAddr cpu_addr_base = (static_cast(big_page_table_cpu[page_index]) << cpu_page_bits) + offset; - rasterizer->InvalidateRegion(cpu_addr_base, copy_amount); + rasterizer->InvalidateRegion(cpu_addr_base, copy_amount, which); }; auto invalidate_short_pages = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { @@ -570,14 +575,15 @@ void MemoryManager::InvalidateRegion(GPUVAddr gpu_addr, size_t size) const { MemoryOperation(gpu_addr, size, mapped_big, do_nothing, invalidate_short_pages); } -void MemoryManager::CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size) { +void MemoryManager::CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size, + VideoCommon::CacheType which) { std::vector tmp_buffer(size); - ReadBlock(gpu_src_addr, tmp_buffer.data(), size); + ReadBlock(gpu_src_addr, tmp_buffer.data(), size, which); // The output block must be flushed in case it has data modified from the GPU. // Fixes NPC geometry in Zombie Panic in Wonderland DX - FlushRegion(gpu_dest_addr, size); - WriteBlock(gpu_dest_addr, tmp_buffer.data(), size); + FlushRegion(gpu_dest_addr, size, which); + WriteBlock(gpu_dest_addr, tmp_buffer.data(), size, which); } bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const { diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index 50043a8ae..828e13439 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h @@ -12,6 +12,7 @@ #include "common/multi_level_page_table.h" #include "common/range_map.h" #include "common/virtual_buffer.h" +#include "video_core/cache_types.h" #include "video_core/pte_kind.h" namespace VideoCore { @@ -60,9 +61,12 @@ public: * in the Host Memory counterpart. Note: This functions cause Host GPU Memory * Flushes and Invalidations, respectively to each operation. */ - void ReadBlock(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size) const; - void WriteBlock(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size); - void CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size); + void ReadBlock(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size, + VideoCommon::CacheType which = VideoCommon::CacheType::All) const; + void WriteBlock(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size, + VideoCommon::CacheType which = VideoCommon::CacheType::All); + void CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size, + VideoCommon::CacheType which = VideoCommon::CacheType::All); /** * ReadBlockUnsafe and WriteBlockUnsafe are special versions of ReadBlock and @@ -105,11 +109,14 @@ public: GPUVAddr MapSparse(GPUVAddr gpu_addr, std::size_t size, bool is_big_pages = true); void Unmap(GPUVAddr gpu_addr, std::size_t size); - void FlushRegion(GPUVAddr gpu_addr, size_t size) const; + void FlushRegion(GPUVAddr gpu_addr, size_t size, + VideoCommon::CacheType which = VideoCommon::CacheType::All) const; - void InvalidateRegion(GPUVAddr gpu_addr, size_t size) const; + void InvalidateRegion(GPUVAddr gpu_addr, size_t size, + VideoCommon::CacheType which = VideoCommon::CacheType::All) const; - bool IsMemoryDirty(GPUVAddr gpu_addr, size_t size) const; + bool IsMemoryDirty(GPUVAddr gpu_addr, size_t size, + VideoCommon::CacheType which = VideoCommon::CacheType::All) const; size_t MaxContinousRange(GPUVAddr gpu_addr, size_t size) const; @@ -128,10 +135,12 @@ private: FuncReserved&& func_reserved, FuncUnmapped&& func_unmapped) const; template - void ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size) const; + void ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size, + VideoCommon::CacheType which) const; template - void WriteBlockImpl(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size); + void WriteBlockImpl(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size, + VideoCommon::CacheType which); template [[nodiscard]] std::size_t PageEntryIndex(GPUVAddr gpu_addr) const { diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 641b95c7c..6d8d2b666 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -8,6 +8,7 @@ #include #include "common/common_types.h" #include "common/polyfill_thread.h" +#include "video_core/cache_types.h" #include "video_core/engines/fermi_2d.h" #include "video_core/gpu.h" @@ -83,13 +84,16 @@ public: virtual void FlushAll() = 0; /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory - virtual void FlushRegion(VAddr addr, u64 size) = 0; + virtual void FlushRegion(VAddr addr, u64 size, + VideoCommon::CacheType which = VideoCommon::CacheType::All) = 0; /// Check if the the specified memory area requires flushing to CPU Memory. - virtual bool MustFlushRegion(VAddr addr, u64 size) = 0; + virtual bool MustFlushRegion(VAddr addr, u64 size, + VideoCommon::CacheType which = VideoCommon::CacheType::All) = 0; /// Notify rasterizer that any caches of the specified region should be invalidated - virtual void InvalidateRegion(VAddr addr, u64 size) = 0; + virtual void InvalidateRegion(VAddr addr, u64 size, + VideoCommon::CacheType which = VideoCommon::CacheType::All) = 0; /// Notify rasterizer that any caches of the specified region are desync with guest virtual void OnCPUWrite(VAddr addr, u64 size) = 0; @@ -105,7 +109,8 @@ public: /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory /// and invalidated - virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0; + virtual void FlushAndInvalidateRegion( + VAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) = 0; /// Notify the host renderer to wait for previous primitive and compute operations. virtual void WaitForIdle() = 0; diff --git a/src/video_core/renderer_null/null_rasterizer.cpp b/src/video_core/renderer_null/null_rasterizer.cpp index 9734d84bc..2c11345d7 100644 --- a/src/video_core/renderer_null/null_rasterizer.cpp +++ b/src/video_core/renderer_null/null_rasterizer.cpp @@ -39,11 +39,11 @@ void RasterizerNull::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr u32 size) {} void RasterizerNull::DisableGraphicsUniformBuffer(size_t stage, u32 index) {} void RasterizerNull::FlushAll() {} -void RasterizerNull::FlushRegion(VAddr addr, u64 size) {} -bool RasterizerNull::MustFlushRegion(VAddr addr, u64 size) { +void RasterizerNull::FlushRegion(VAddr addr, u64 size, VideoCommon::CacheType) {} +bool RasterizerNull::MustFlushRegion(VAddr addr, u64 size, VideoCommon::CacheType) { return false; } -void RasterizerNull::InvalidateRegion(VAddr addr, u64 size) {} +void RasterizerNull::InvalidateRegion(VAddr addr, u64 size, VideoCommon::CacheType) {} void RasterizerNull::OnCPUWrite(VAddr addr, u64 size) {} void RasterizerNull::InvalidateGPUCache() {} void RasterizerNull::UnmapMemory(VAddr addr, u64 size) {} @@ -61,7 +61,7 @@ void RasterizerNull::SignalSyncPoint(u32 value) { } void RasterizerNull::SignalReference() {} void RasterizerNull::ReleaseFences() {} -void RasterizerNull::FlushAndInvalidateRegion(VAddr addr, u64 size) {} +void RasterizerNull::FlushAndInvalidateRegion(VAddr addr, u64 size, VideoCommon::CacheType) {} void RasterizerNull::WaitForIdle() {} void RasterizerNull::FragmentBarrier() {} void RasterizerNull::TiledCacheBarrier() {} diff --git a/src/video_core/renderer_null/null_rasterizer.h b/src/video_core/renderer_null/null_rasterizer.h index ecf77ba42..2112aa70e 100644 --- a/src/video_core/renderer_null/null_rasterizer.h +++ b/src/video_core/renderer_null/null_rasterizer.h @@ -38,9 +38,12 @@ public: void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; void DisableGraphicsUniformBuffer(size_t stage, u32 index) override; void FlushAll() override; - void FlushRegion(VAddr addr, u64 size) override; - bool MustFlushRegion(VAddr addr, u64 size) override; - void InvalidateRegion(VAddr addr, u64 size) override; + void FlushRegion(VAddr addr, u64 size, + VideoCommon::CacheType which = VideoCommon::CacheType::All) override; + bool MustFlushRegion(VAddr addr, u64 size, + VideoCommon::CacheType which = VideoCommon::CacheType::All) override; + void InvalidateRegion(VAddr addr, u64 size, + VideoCommon::CacheType which = VideoCommon::CacheType::All) override; void OnCPUWrite(VAddr addr, u64 size) override; void InvalidateGPUCache() override; void UnmapMemory(VAddr addr, u64 size) override; @@ -50,7 +53,8 @@ public: void SignalSyncPoint(u32 value) override; void SignalReference() override; void ReleaseFences() override; - void FlushAndInvalidateRegion(VAddr addr, u64 size) override; + void FlushAndInvalidateRegion( + VAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) override; void WaitForIdle() override; void FragmentBarrier() override; void TiledCacheBarrier() override; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 0807d0b88..d58dcedea 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -352,46 +352,60 @@ void RasterizerOpenGL::DisableGraphicsUniformBuffer(size_t stage, u32 index) { void RasterizerOpenGL::FlushAll() {} -void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) { +void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size, VideoCommon::CacheType which) { MICROPROFILE_SCOPE(OpenGL_CacheManagement); if (addr == 0 || size == 0) { return; } - { + if (bool(which & VideoCommon::CacheType::TextureCache)) { std::scoped_lock lock{texture_cache.mutex}; texture_cache.DownloadMemory(addr, size); } - { + if ((bool(which & VideoCommon::CacheType::BufferCache))) { std::scoped_lock lock{buffer_cache.mutex}; buffer_cache.DownloadMemory(addr, size); } - query_cache.FlushRegion(addr, size); -} - -bool RasterizerOpenGL::MustFlushRegion(VAddr addr, u64 size) { - std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; - if (!Settings::IsGPULevelHigh()) { - return buffer_cache.IsRegionGpuModified(addr, size); + if ((bool(which & VideoCommon::CacheType::QueryCache))) { + query_cache.FlushRegion(addr, size); } - return texture_cache.IsRegionGpuModified(addr, size) || - buffer_cache.IsRegionGpuModified(addr, size); } -void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) { +bool RasterizerOpenGL::MustFlushRegion(VAddr addr, u64 size, VideoCommon::CacheType which) { + if ((bool(which & VideoCommon::CacheType::BufferCache))) { + std::scoped_lock lock{buffer_cache.mutex}; + if (buffer_cache.IsRegionGpuModified(addr, size)) { + return true; + } + } + if (!Settings::IsGPULevelHigh()) { + return false; + } + if (bool(which & VideoCommon::CacheType::TextureCache)) { + std::scoped_lock lock{texture_cache.mutex}; + return texture_cache.IsRegionGpuModified(addr, size); + } + return false; +} + +void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size, VideoCommon::CacheType which) { MICROPROFILE_SCOPE(OpenGL_CacheManagement); if (addr == 0 || size == 0) { return; } - { + if (bool(which & VideoCommon::CacheType::TextureCache)) { std::scoped_lock lock{texture_cache.mutex}; texture_cache.WriteMemory(addr, size); } - { + if (bool(which & VideoCommon::CacheType::BufferCache)) { std::scoped_lock lock{buffer_cache.mutex}; buffer_cache.WriteMemory(addr, size); } - shader_cache.InvalidateRegion(addr, size); - query_cache.InvalidateRegion(addr, size); + if (bool(which & VideoCommon::CacheType::ShaderCache)) { + shader_cache.InvalidateRegion(addr, size); + } + if (bool(which & VideoCommon::CacheType::QueryCache)) { + query_cache.InvalidateRegion(addr, size); + } } void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) { @@ -458,11 +472,12 @@ void RasterizerOpenGL::ReleaseFences() { fence_manager.WaitPendingFences(); } -void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) { +void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size, + VideoCommon::CacheType which) { if (Settings::IsGPULevelExtreme()) { - FlushRegion(addr, size); + FlushRegion(addr, size, which); } - InvalidateRegion(addr, size); + InvalidateRegion(addr, size, which); } void RasterizerOpenGL::WaitForIdle() { @@ -531,7 +546,7 @@ void RasterizerOpenGL::AccelerateInlineToMemory(GPUVAddr address, size_t copy_si } gpu_memory->WriteBlockUnsafe(address, memory.data(), copy_size); { - std::unique_lock lock{buffer_cache.mutex}; + std::unique_lock lock{buffer_cache.mutex}; if (!buffer_cache.InlineMemory(*cpu_addr, copy_size, memory)) { buffer_cache.WriteMemory(*cpu_addr, copy_size); } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index efd19f880..94e65d64b 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -77,9 +77,12 @@ public: void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; void DisableGraphicsUniformBuffer(size_t stage, u32 index) override; void FlushAll() override; - void FlushRegion(VAddr addr, u64 size) override; - bool MustFlushRegion(VAddr addr, u64 size) override; - void InvalidateRegion(VAddr addr, u64 size) override; + void FlushRegion(VAddr addr, u64 size, + VideoCommon::CacheType which = VideoCommon::CacheType::All) override; + bool MustFlushRegion(VAddr addr, u64 size, + VideoCommon::CacheType which = VideoCommon::CacheType::All) override; + void InvalidateRegion(VAddr addr, u64 size, + VideoCommon::CacheType which = VideoCommon::CacheType::All) override; void OnCPUWrite(VAddr addr, u64 size) override; void InvalidateGPUCache() override; void UnmapMemory(VAddr addr, u64 size) override; @@ -89,7 +92,9 @@ public: void SignalSyncPoint(u32 value) override; void SignalReference() override; void ReleaseFences() override; - void FlushAndInvalidateRegion(VAddr addr, u64 size) override; + void FlushAndInvalidateRegion( + VAddr addr, u64 size, + VideoCommon::CacheType which = VideoCommon::CacheType::All) override; void WaitForIdle() override; void FragmentBarrier() override; void TiledCacheBarrier() override; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 143af93c5..463c49f9c 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -423,41 +423,58 @@ void Vulkan::RasterizerVulkan::DisableGraphicsUniformBuffer(size_t stage, u32 in void RasterizerVulkan::FlushAll() {} -void RasterizerVulkan::FlushRegion(VAddr addr, u64 size) { +void RasterizerVulkan::FlushRegion(VAddr addr, u64 size, VideoCommon::CacheType which) { if (addr == 0 || size == 0) { return; } - { + if (bool(which & VideoCommon::CacheType::TextureCache)) { std::scoped_lock lock{texture_cache.mutex}; texture_cache.DownloadMemory(addr, size); } - { + if ((bool(which & VideoCommon::CacheType::BufferCache))) { std::scoped_lock lock{buffer_cache.mutex}; buffer_cache.DownloadMemory(addr, size); } - query_cache.FlushRegion(addr, size); + if ((bool(which & VideoCommon::CacheType::QueryCache))) { + query_cache.FlushRegion(addr, size); + } } -bool RasterizerVulkan::MustFlushRegion(VAddr addr, u64 size) { - std::scoped_lock lock{texture_cache.mutex, buffer_cache.mutex}; - return texture_cache.IsRegionGpuModified(addr, size) || - buffer_cache.IsRegionGpuModified(addr, size); +bool RasterizerVulkan::MustFlushRegion(VAddr addr, u64 size, VideoCommon::CacheType which) { + if ((bool(which & VideoCommon::CacheType::BufferCache))) { + std::scoped_lock lock{buffer_cache.mutex}; + if (buffer_cache.IsRegionGpuModified(addr, size)) { + return true; + } + } + if (!Settings::IsGPULevelHigh()) { + return false; + } + if (bool(which & VideoCommon::CacheType::TextureCache)) { + std::scoped_lock lock{texture_cache.mutex}; + return texture_cache.IsRegionGpuModified(addr, size); + } + return false; } -void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) { +void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size, VideoCommon::CacheType which) { if (addr == 0 || size == 0) { return; } - { + if (bool(which & VideoCommon::CacheType::TextureCache)) { std::scoped_lock lock{texture_cache.mutex}; texture_cache.WriteMemory(addr, size); } - { + if ((bool(which & VideoCommon::CacheType::BufferCache))) { std::scoped_lock lock{buffer_cache.mutex}; buffer_cache.WriteMemory(addr, size); } - pipeline_cache.InvalidateRegion(addr, size); - query_cache.InvalidateRegion(addr, size); + if ((bool(which & VideoCommon::CacheType::QueryCache))) { + query_cache.InvalidateRegion(addr, size); + } + if ((bool(which & VideoCommon::CacheType::ShaderCache))) { + pipeline_cache.InvalidateRegion(addr, size); + } } void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) { @@ -522,11 +539,12 @@ void RasterizerVulkan::ReleaseFences() { fence_manager.WaitPendingFences(); } -void RasterizerVulkan::FlushAndInvalidateRegion(VAddr addr, u64 size) { +void RasterizerVulkan::FlushAndInvalidateRegion(VAddr addr, u64 size, + VideoCommon::CacheType which) { if (Settings::IsGPULevelExtreme()) { - FlushRegion(addr, size); + FlushRegion(addr, size, which); } - InvalidateRegion(addr, size); + InvalidateRegion(addr, size, which); } void RasterizerVulkan::WaitForIdle() { @@ -602,7 +620,7 @@ void RasterizerVulkan::AccelerateInlineToMemory(GPUVAddr address, size_t copy_si } gpu_memory->WriteBlockUnsafe(address, memory.data(), copy_size); { - std::unique_lock lock{buffer_cache.mutex}; + std::unique_lock lock{buffer_cache.mutex}; if (!buffer_cache.InlineMemory(*cpu_addr, copy_size, memory)) { buffer_cache.WriteMemory(*cpu_addr, copy_size); } diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 839de6b26..82b28a54a 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -73,9 +73,12 @@ public: void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; void DisableGraphicsUniformBuffer(size_t stage, u32 index) override; void FlushAll() override; - void FlushRegion(VAddr addr, u64 size) override; - bool MustFlushRegion(VAddr addr, u64 size) override; - void InvalidateRegion(VAddr addr, u64 size) override; + void FlushRegion(VAddr addr, u64 size, + VideoCommon::CacheType which = VideoCommon::CacheType::All) override; + bool MustFlushRegion(VAddr addr, u64 size, + VideoCommon::CacheType which = VideoCommon::CacheType::All) override; + void InvalidateRegion(VAddr addr, u64 size, + VideoCommon::CacheType which = VideoCommon::CacheType::All) override; void OnCPUWrite(VAddr addr, u64 size) override; void InvalidateGPUCache() override; void UnmapMemory(VAddr addr, u64 size) override; @@ -85,7 +88,9 @@ public: void SignalSyncPoint(u32 value) override; void SignalReference() override; void ReleaseFences() override; - void FlushAndInvalidateRegion(VAddr addr, u64 size) override; + void FlushAndInvalidateRegion( + VAddr addr, u64 size, + VideoCommon::CacheType which = VideoCommon::CacheType::All) override; void WaitForIdle() override; void FragmentBarrier() override; void TiledCacheBarrier() override; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 27c82cd20..7fe451b5a 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -740,7 +740,8 @@ void TextureCache

::UploadImageContents(Image& image, StagingBuffer& staging) const GPUVAddr gpu_addr = image.gpu_addr; if (True(image.flags & ImageFlagBits::AcceleratedUpload)) { - gpu_memory->ReadBlockUnsafe(gpu_addr, mapped_span.data(), mapped_span.size_bytes()); + gpu_memory->ReadBlock(gpu_addr, mapped_span.data(), mapped_span.size_bytes(), + VideoCommon::CacheType::NoTextureCache); const auto uploads = FullUploadSwizzles(image.info); runtime.AccelerateImageUpload(image, staging, uploads); return; diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index 4fd677a80..6b2898705 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -203,7 +203,7 @@ public: /// Create channel state. void CreateChannel(Tegra::Control::ChannelState& channel) final override; - std::mutex mutex; + std::recursive_mutex mutex; private: /// Iterate over all page indices in a range