From 18637766efd1ff9a0c22967553983cfda69c96ca Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Thu, 17 Nov 2022 16:36:53 +0100 Subject: [PATCH] MacroHLE: Reduce massive calculations on sizing estimation. --- src/common/CMakeLists.txt | 1 + src/common/range_map.h | 139 ++++++++++++++++++++++++++ src/tests/CMakeLists.txt | 1 + src/tests/common/range_map.cpp | 70 +++++++++++++ src/video_core/dma_pusher.cpp | 3 +- src/video_core/engines/maxwell_3d.cpp | 15 +++ src/video_core/engines/maxwell_3d.h | 2 + src/video_core/memory_manager.cpp | 91 ++--------------- src/video_core/memory_manager.h | 11 +- 9 files changed, 238 insertions(+), 95 deletions(-) create mode 100644 src/common/range_map.h create mode 100644 src/tests/common/range_map.cpp diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index eb05e46a8..45332cf95 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -97,6 +97,7 @@ add_library(common STATIC point.h precompiled_headers.h quaternion.h + range_map.h reader_writer_queue.h ring_buffer.h ${CMAKE_CURRENT_BINARY_DIR}/scm_rev.cpp diff --git a/src/common/range_map.h b/src/common/range_map.h new file mode 100644 index 000000000..993e21643 --- /dev/null +++ b/src/common/range_map.h @@ -0,0 +1,139 @@ +// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + +#pragma once + +#include +#include + +#include "common/common_types.h" + +namespace Common { + +template +class RangeMap { +private: + using KeyT = std::conditional_t, typename KeyTBase, + std::make_signed_t>; + +public: + explicit RangeMap(ValueT null_value_) : null_value{null_value_} { + container.emplace(std::numeric_limits::min(), null_value); + }; + ~RangeMap() = default; + + void Map(KeyTBase address, KeyTBase address_end, ValueT value) { + KeyT new_address = static_cast(address); + KeyT new_address_end = static_cast(address_end); + if (new_address < 0) { + new_address = 0; + } + if (new_address_end < 0) { + new_address_end = 0; + } + InternalMap(new_address, new_address_end, value); + } + + void Unmap(KeyTBase address, KeyTBase address_end) { + Map(address, address_end, null_value); + } + + [[nodiscard]] size_t GetContinousSizeFrom(KeyTBase address) const { + const KeyT new_address = static_cast(address); + if (new_address < 0) { + return 0; + } + return ContinousSizeInternal(new_address); + } + + [[nodiscard]] ValueT GetValueAt(KeyT address) const { + const KeyT new_address = static_cast(address); + if (new_address < 0) { + return null_value; + } + return GetValueInternal(new_address); + } + +private: + using MapType = std::map; + using IteratorType = MapType::iterator; + using ConstIteratorType = MapType::const_iterator; + + size_t ContinousSizeInternal(KeyT address) const { + const auto it = GetFirstElemnentBeforeOrOn(address); + if (it == container.end() || it->second == null_value) { + return 0; + } + const auto it_end = std::next(it); + if (it_end == container.end()) { + return std::numeric_limits::max() - address; + } + return it_end->first - address; + } + + ValueT GetValueInternal(KeyT address) const { + const auto it = GetFirstElemnentBeforeOrOn(address); + if (it == container.end()) { + return null_value; + } + return it->second; + } + + ConstIteratorType GetFirstElemnentBeforeOrOn(KeyT address) const { + auto it = container.lower_bound(address); + if (it == container.begin()) { + return it; + } + if (it != container.end() && (it->first == address)) { + return it; + } + --it; + return it; + } + + ValueT GetFirstValueWithin(KeyT address) { + auto it = container.lower_bound(address); + if (it == container.begin()) { + return it->second; + } + if (it == container.end()) [[unlikely]] { // this would be a bug + return null_value; + } + --it; + return it->second; + } + + ValueT GetLastValueWithin(KeyT address) { + auto it = container.upper_bound(address); + if (it == container.end()) { + return null_value; + } + if (it == container.begin()) [[unlikely]] { // this would be a bug + return it->second; + } + --it; + return it->second; + } + + void InternalMap(KeyT address, KeyT address_end, ValueT value) { + const bool must_add_start = GetFirstValueWithin(address) != value; + const ValueT last_value = GetLastValueWithin(address_end); + const bool must_add_end = last_value != value; + auto it = container.lower_bound(address); + const auto it_end = container.upper_bound(address_end); + while (it != it_end) { + it = container.erase(it); + } + if (must_add_start) { + container.emplace(address, value); + } + if (must_add_end) { + container.emplace(address_end, last_value); + } + } + + ValueT null_value; + MapType container; +}; + +} // namespace Common diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt index 6a4022e45..9b65e79cb 100644 --- a/src/tests/CMakeLists.txt +++ b/src/tests/CMakeLists.txt @@ -7,6 +7,7 @@ add_executable(tests common/fibers.cpp common/host_memory.cpp common/param_package.cpp + common/range_map.cpp common/ring_buffer.cpp common/scratch_buffer.cpp common/unique_function.cpp diff --git a/src/tests/common/range_map.cpp b/src/tests/common/range_map.cpp new file mode 100644 index 000000000..5a4630a38 --- /dev/null +++ b/src/tests/common/range_map.cpp @@ -0,0 +1,70 @@ +// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + +#include + +#include + +#include "common/range_map.h" + +enum class MappedEnum : u32 { + Invalid = 0, + Valid_1 = 1, + Valid_2 = 2, + Valid_3 = 3, +}; + +TEST_CASE("Range Map: Setup", "[video_core]") { + Common::RangeMap my_map(MappedEnum::Invalid); + my_map.Map(3000, 3500, MappedEnum::Valid_1); + my_map.Unmap(3200, 3600); + my_map.Map(4000, 4500, MappedEnum::Valid_2); + my_map.Map(4200, 4400, MappedEnum::Valid_2); + my_map.Map(4200, 4400, MappedEnum::Valid_1); + REQUIRE(my_map.GetContinousSizeFrom(4200) == 200); + REQUIRE(my_map.GetContinousSizeFrom(3000) == 200); + REQUIRE(my_map.GetContinousSizeFrom(2900) == 0); + + REQUIRE(my_map.GetValueAt(2900) == MappedEnum::Invalid); + REQUIRE(my_map.GetValueAt(3100) == MappedEnum::Valid_1); + REQUIRE(my_map.GetValueAt(3000) == MappedEnum::Valid_1); + REQUIRE(my_map.GetValueAt(3200) == MappedEnum::Invalid); + + REQUIRE(my_map.GetValueAt(4199) == MappedEnum::Valid_2); + REQUIRE(my_map.GetValueAt(4200) == MappedEnum::Valid_1); + REQUIRE(my_map.GetValueAt(4400) == MappedEnum::Valid_2); + REQUIRE(my_map.GetValueAt(4500) == MappedEnum::Invalid); + REQUIRE(my_map.GetValueAt(4600) == MappedEnum::Invalid); + + my_map.Unmap(0, 6000); + for (u64 address = 0; address < 10000; address += 1000) { + REQUIRE(my_map.GetContinousSizeFrom(address) == 0); + } + + my_map.Map(1000, 3000, MappedEnum::Valid_1); + my_map.Map(4000, 5000, MappedEnum::Valid_1); + my_map.Map(2500, 4100, MappedEnum::Valid_1); + REQUIRE(my_map.GetContinousSizeFrom(1000) == 4000); + + my_map.Map(1000, 3000, MappedEnum::Valid_1); + my_map.Map(4000, 5000, MappedEnum::Valid_2); + my_map.Map(2500, 4100, MappedEnum::Valid_3); + REQUIRE(my_map.GetContinousSizeFrom(1000) == 1500); + REQUIRE(my_map.GetContinousSizeFrom(2500) == 1600); + REQUIRE(my_map.GetContinousSizeFrom(4100) == 900); + REQUIRE(my_map.GetValueAt(900) == MappedEnum::Invalid); + REQUIRE(my_map.GetValueAt(1000) == MappedEnum::Valid_1); + REQUIRE(my_map.GetValueAt(2500) == MappedEnum::Valid_3); + REQUIRE(my_map.GetValueAt(4100) == MappedEnum::Valid_2); + REQUIRE(my_map.GetValueAt(5000) == MappedEnum::Invalid); + + my_map.Map(2000, 6000, MappedEnum::Valid_3); + REQUIRE(my_map.GetContinousSizeFrom(1000) == 1000); + REQUIRE(my_map.GetContinousSizeFrom(3000) == 3000); + REQUIRE(my_map.GetValueAt(1000) == MappedEnum::Valid_1); + REQUIRE(my_map.GetValueAt(1999) == MappedEnum::Valid_1); + REQUIRE(my_map.GetValueAt(1500) == MappedEnum::Valid_1); + REQUIRE(my_map.GetValueAt(2001) == MappedEnum::Valid_3); + REQUIRE(my_map.GetValueAt(5999) == MappedEnum::Valid_3); + REQUIRE(my_map.GetValueAt(6000) == MappedEnum::Invalid); +} diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp index 5ad40abaa..7a82355da 100644 --- a/src/video_core/dma_pusher.cpp +++ b/src/video_core/dma_pusher.cpp @@ -75,7 +75,8 @@ bool DmaPusher::Step() { // Push buffer non-empty, read a word command_headers.resize_destructive(command_list_header.size); - if (Settings::IsGPULevelExtreme()) { + constexpr u32 MacroRegistersStart = 0xE00; + if (dma_state.method < MacroRegistersStart) { memory_manager.ReadBlock(dma_state.dma_get, command_headers.data(), command_list_header.size * sizeof(u32)); } else { diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 50d8a94b1..a9fd6d960 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -157,6 +157,21 @@ void Maxwell3D::RefreshParameters() { } } +bool Maxwell3D::AnyParametersDirty() { + size_t current_index = 0; + for (auto& segment : macro_segments) { + if (segment.first == 0) { + current_index += segment.second; + continue; + } + if (memory_manager.IsMemoryDirty(segment.first, sizeof(u32) * segment.second)) { + return true; + } + current_index += segment.second; + } + return false; +} + u32 Maxwell3D::GetMaxCurrentVertices() { u32 num_vertices = 0; for (size_t index = 0; index < Regs::NumVertexArrays; ++index) { diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 397e88f67..cd996413c 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -3092,6 +3092,8 @@ public: void RefreshParameters(); + bool AnyParametersDirty(); + u32 GetMaxCurrentVertices(); size_t EstimateIndexBufferSize(); diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 11e7d225e..4fcae9909 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -25,7 +25,8 @@ MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64 address_space_bits{address_space_bits_}, page_bits{page_bits_}, big_page_bits{big_page_bits_}, entries{}, big_entries{}, page_table{address_space_bits, address_space_bits + page_bits - 38, page_bits != big_page_bits ? page_bits : 0}, - unique_identifier{unique_identifier_generator.fetch_add(1, std::memory_order_acq_rel)} { + kind_map{PTEKind::INVALID}, unique_identifier{unique_identifier_generator.fetch_add( + 1, std::memory_order_acq_rel)} { address_space_size = 1ULL << address_space_bits; page_size = 1ULL << page_bits; page_mask = page_size - 1ULL; @@ -41,11 +42,7 @@ MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64 big_entries.resize(big_page_table_size / 32, 0); big_page_table_cpu.resize(big_page_table_size); big_page_continous.resize(big_page_table_size / continous_bits, 0); - std::array kind_valus; - kind_valus.fill(PTEKind::INVALID); - big_kinds.resize(big_page_table_size / 32, kind_valus); entries.resize(page_table_size / 32, 0); - kinds.resize(page_table_size / 32, kind_valus); } MemoryManager::~MemoryManager() = default; @@ -83,38 +80,7 @@ void MemoryManager::SetEntry(size_t position, MemoryManager::EntryType entry) { } PTEKind MemoryManager::GetPageKind(GPUVAddr gpu_addr) const { - auto entry = GetEntry(gpu_addr); - if (entry == EntryType::Mapped || entry == EntryType::Reserved) [[likely]] { - return GetKind(gpu_addr); - } else { - return GetKind(gpu_addr); - } -} - -template -PTEKind MemoryManager::GetKind(size_t position) const { - if constexpr (is_big_page) { - position = position >> big_page_bits; - const size_t sub_index = position % 32; - return big_kinds[position / 32][sub_index]; - } else { - position = position >> page_bits; - const size_t sub_index = position % 32; - return kinds[position / 32][sub_index]; - } -} - -template -void MemoryManager::SetKind(size_t position, PTEKind kind) { - if constexpr (is_big_page) { - position = position >> big_page_bits; - const size_t sub_index = position % 32; - big_kinds[position / 32][sub_index] = kind; - } else { - position = position >> page_bits; - const size_t sub_index = position % 32; - kinds[position / 32][sub_index] = kind; - } + return kind_map.GetValueAt(gpu_addr); } inline bool MemoryManager::IsBigPageContinous(size_t big_page_index) const { @@ -141,7 +107,6 @@ GPUVAddr MemoryManager::PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cp const GPUVAddr current_gpu_addr = gpu_addr + offset; [[maybe_unused]] const auto current_entry_type = GetEntry(current_gpu_addr); SetEntry(current_gpu_addr, entry_type); - SetKind(current_gpu_addr, kind); if (current_entry_type != entry_type) { rasterizer->ModifyGPUMemory(unique_identifier, gpu_addr, page_size); } @@ -153,6 +118,7 @@ GPUVAddr MemoryManager::PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cp } remaining_size -= page_size; } + kind_map.Map(gpu_addr, gpu_addr + size, kind); return gpu_addr; } @@ -164,7 +130,6 @@ GPUVAddr MemoryManager::BigPageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr const GPUVAddr current_gpu_addr = gpu_addr + offset; [[maybe_unused]] const auto current_entry_type = GetEntry(current_gpu_addr); SetEntry(current_gpu_addr, entry_type); - SetKind(current_gpu_addr, kind); if (current_entry_type != entry_type) { rasterizer->ModifyGPUMemory(unique_identifier, gpu_addr, big_page_size); } @@ -193,6 +158,7 @@ GPUVAddr MemoryManager::BigPageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr } remaining_size -= big_page_size; } + kind_map.Map(gpu_addr, gpu_addr + size, kind); return gpu_addr; } @@ -578,52 +544,7 @@ size_t MemoryManager::MaxContinousRange(GPUVAddr gpu_addr, size_t size) const { } size_t MemoryManager::GetMemoryLayoutSize(GPUVAddr gpu_addr, size_t max_size) const { - PTEKind base_kind = GetPageKind(gpu_addr); - if (base_kind == PTEKind::INVALID) { - return 0; - } - size_t range_so_far = 0; - bool result{false}; - auto fail = [&]([[maybe_unused]] std::size_t page_index, [[maybe_unused]] std::size_t offset, - std::size_t copy_amount) { - result = true; - return true; - }; - auto short_check = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { - PTEKind base_kind_other = GetKind((page_index << page_bits) + offset); - if (base_kind != base_kind_other) { - result = true; - return true; - } - range_so_far += copy_amount; - if (range_so_far >= max_size) { - result = true; - return true; - } - return false; - }; - auto big_check = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { - PTEKind base_kind_other = GetKind((page_index << big_page_bits) + offset); - if (base_kind != base_kind_other) { - result = true; - return true; - } - range_so_far += copy_amount; - if (range_so_far >= max_size) { - result = true; - return true; - } - return false; - }; - auto check_short_pages = [&](std::size_t page_index, std::size_t offset, - std::size_t copy_amount) { - GPUVAddr base = (page_index << big_page_bits) + offset; - MemoryOperation(base, copy_amount, short_check, fail, fail); - return result; - }; - MemoryOperation(gpu_addr, address_space_size - gpu_addr, big_check, fail, - check_short_pages); - return range_so_far; + return kind_map.GetContinousSizeFrom(gpu_addr); } void MemoryManager::InvalidateRegion(GPUVAddr gpu_addr, size_t size) const { diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index ca22520d7..50043a8ae 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h @@ -10,6 +10,7 @@ #include "common/common_types.h" #include "common/multi_level_page_table.h" +#include "common/range_map.h" #include "common/virtual_buffer.h" #include "video_core/pte_kind.h" @@ -186,16 +187,8 @@ private: template inline void SetEntry(size_t position, EntryType entry); - std::vector> kinds; - std::vector> big_kinds; - - template - inline PTEKind GetKind(size_t position) const; - - template - inline void SetKind(size_t position, PTEKind kind); - Common::MultiLevelPageTable page_table; + Common::RangeMap kind_map; Common::VirtualBuffer big_page_table_cpu; std::vector big_page_continous;