From 090bc588e5498c6a2cea136d28bfe43354aa2096 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Wed, 22 Feb 2023 00:26:07 -0500 Subject: [PATCH] texture_cache: Add async texture decoding --- src/common/scratch_buffer.h | 1 + src/video_core/texture_cache/image_base.h | 3 + src/video_core/texture_cache/texture_cache.h | 69 +++++++++++++++++++ .../texture_cache/texture_cache_base.h | 16 +++++ 4 files changed, 89 insertions(+) diff --git a/src/common/scratch_buffer.h b/src/common/scratch_buffer.h index 1245a5086..26d4e76dc 100644 --- a/src/common/scratch_buffer.h +++ b/src/common/scratch_buffer.h @@ -23,6 +23,7 @@ public: buffer{Common::make_unique_for_overwrite(initial_capacity)} {} ~ScratchBuffer() = default; + ScratchBuffer(ScratchBuffer&&) = default; /// This will only grow the buffer's capacity if size is greater than the current capacity. /// The previously held data will remain intact. diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h index 620565684..e8fa592d2 100644 --- a/src/video_core/texture_cache/image_base.h +++ b/src/video_core/texture_cache/image_base.h @@ -38,6 +38,9 @@ enum class ImageFlagBits : u32 { Rescaled = 1 << 13, CheckingRescalable = 1 << 14, IsRescalable = 1 << 15, + + AsynchronousDecode = 1 << 16, + IsDecoding = 1 << 17, ///< Is currently being decoded asynchornously. }; DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits) diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 3e2cbb0b0..4159bc796 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -85,6 +85,11 @@ void TextureCache

::RunGarbageCollector() { } --num_iterations; auto& image = slot_images[image_id]; + if (True(image.flags & ImageFlagBits::IsDecoding)) { + // This image is still being decoded, deleting it will invalidate the slot + // used by the async decoder thread. + return false; + } const bool must_download = image.IsSafeDownload() && False(image.flags & ImageFlagBits::BadOverlap); if (!high_priority_mode && @@ -133,6 +138,8 @@ void TextureCache

::TickFrame() { sentenced_images.Tick(); sentenced_framebuffers.Tick(); sentenced_image_view.Tick(); + TickAsyncDecode(); + runtime.TickFrame(); critical_gc = 0; ++frame_tick; @@ -777,6 +784,10 @@ void TextureCache

::RefreshContents(Image& image, ImageId image_id) { LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented"); return; } + if (True(image.flags & ImageFlagBits::AsynchronousDecode)) { + QueueAsyncDecode(image, image_id); + return; + } auto staging = runtime.UploadStagingBuffer(MapSizeBytes(image)); UploadImageContents(image, staging); runtime.InsertUploadMemoryBarrier(); @@ -989,6 +1000,64 @@ u64 TextureCache

::GetScaledImageSizeBytes(const ImageBase& image) { return fitted_size; } +template +void TextureCache

::QueueAsyncDecode(Image& image, ImageId image_id) { + UNIMPLEMENTED_IF(False(image.flags & ImageFlagBits::Converted)); + + image.flags |= ImageFlagBits::IsDecoding; + auto decode = std::make_unique(); + auto* decode_ptr = decode.get(); + decode->image_id = image_id; + async_decodes.push_back(std::move(decode)); + + Common::ScratchBuffer local_unswizzle_data_buffer(image.unswizzled_size_bytes); + const size_t guest_size_bytes = image.guest_size_bytes; + swizzle_data_buffer.resize_destructive(guest_size_bytes); + gpu_memory->ReadBlockUnsafe(image.gpu_addr, swizzle_data_buffer.data(), guest_size_bytes); + auto copies = UnswizzleImage(*gpu_memory, image.gpu_addr, image.info, swizzle_data_buffer, + local_unswizzle_data_buffer); + const size_t out_size = MapSizeBytes(image); + + auto func = [out_size, copies, info = image.info, + input = std::move(local_unswizzle_data_buffer), + async_decode = decode_ptr]() mutable { + async_decode->decoded_data.resize_destructive(out_size); + std::span copies_span{copies.data(), copies.size()}; + ConvertImage(input, info, async_decode->decoded_data, copies_span); + + // TODO: Do we need this lock? + std::unique_lock lock{async_decode->mutex}; + async_decode->copies = std::move(copies); + async_decode->complete = true; + }; + texture_decode_worker.QueueWork(std::move(func)); +} + +template +void TextureCache

::TickAsyncDecode() { + bool has_uploads{}; + auto i = async_decodes.begin(); + while (i != async_decodes.end()) { + auto* async_decode = i->get(); + std::unique_lock lock{async_decode->mutex}; + if (!async_decode->complete) { + ++i; + continue; + } + Image& image = slot_images[async_decode->image_id]; + auto staging = runtime.UploadStagingBuffer(MapSizeBytes(image)); + std::memcpy(staging.mapped_span.data(), async_decode->decoded_data.data(), + async_decode->decoded_data.size()); + image.UploadMemory(staging, async_decode->copies); + image.flags &= ~ImageFlagBits::IsDecoding; + has_uploads = true; + i = async_decodes.erase(i); + } + if (has_uploads) { + runtime.InsertUploadMemoryBarrier(); + } +} + template bool TextureCache

::ScaleUp(Image& image) { const bool has_copy = image.HasScaled(); diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index 485eaabaa..013836933 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -3,6 +3,7 @@ #pragma once +#include #include #include #include @@ -18,6 +19,7 @@ #include "common/lru_cache.h" #include "common/polyfill_ranges.h" #include "common/scratch_buffer.h" +#include "common/thread_worker.h" #include "video_core/compatible_formats.h" #include "video_core/control/channel_state_cache.h" #include "video_core/delayed_destruction_ring.h" @@ -54,6 +56,14 @@ struct ImageViewInOut { ImageViewId id{}; }; +struct AsyncDecodeContext { + ImageId image_id; + Common::ScratchBuffer decoded_data; + std::vector copies; + std::mutex mutex; + std::atomic_bool complete; +}; + using TextureCacheGPUMap = std::unordered_map, Common::IdentityHash>; class TextureCacheChannelInfo : public ChannelInfo { @@ -377,6 +387,9 @@ private: bool ScaleDown(Image& image); u64 GetScaledImageSizeBytes(const ImageBase& image); + void QueueAsyncDecode(Image& image, ImageId image_id); + void TickAsyncDecode(); + Runtime& runtime; VideoCore::RasterizerInterface& rasterizer; @@ -430,6 +443,9 @@ private: u64 modification_tick = 0; u64 frame_tick = 0; + + Common::ThreadWorker texture_decode_worker{1, "TextureDecoder"}; + std::vector> async_decodes; }; } // namespace VideoCommon