From bab21e8cb3df9c06e3c0a37a8fc68fed676f5d6e Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 11 Apr 2019 17:14:55 -0300 Subject: [PATCH 001/113] gl_texture_cache: Initial implementation --- src/video_core/CMakeLists.txt | 4 +- .../renderer_opengl/gl_rasterizer.cpp | 66 +-- .../renderer_opengl/gl_rasterizer.h | 4 +- .../renderer_opengl/gl_texture_cache.cpp | 514 ++++++++++++++++++ .../renderer_opengl/gl_texture_cache.h | 131 +++++ .../renderer_opengl/renderer_opengl.cpp | 1 + src/video_core/texture_cache.cpp | 37 +- src/video_core/texture_cache.h | 96 ++++ src/video_core/textures/decoders.cpp | 3 +- 9 files changed, 809 insertions(+), 47 deletions(-) create mode 100644 src/video_core/renderer_opengl/gl_texture_cache.cpp create mode 100644 src/video_core/renderer_opengl/gl_texture_cache.h diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index f8b67cbe1..64cff27a4 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -45,8 +45,6 @@ add_library(video_core STATIC renderer_opengl/gl_global_cache.h renderer_opengl/gl_rasterizer.cpp renderer_opengl/gl_rasterizer.h - renderer_opengl/gl_rasterizer_cache.cpp - renderer_opengl/gl_rasterizer_cache.h renderer_opengl/gl_resource_manager.cpp renderer_opengl/gl_resource_manager.h renderer_opengl/gl_sampler_cache.cpp @@ -67,6 +65,8 @@ add_library(video_core STATIC renderer_opengl/gl_state.h renderer_opengl/gl_stream_buffer.cpp renderer_opengl/gl_stream_buffer.h + renderer_opengl/gl_texture_cache.cpp + renderer_opengl/gl_texture_cache.h renderer_opengl/maxwell_to_gl.h renderer_opengl/renderer_opengl.cpp renderer_opengl/renderer_opengl.h diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index d77426067..cea268f1e 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -100,7 +100,7 @@ struct FramebufferCacheKey { RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, ScreenInfo& info) - : res_cache{*this}, shader_cache{*this, system, emu_window, device}, + : texture_cache{system, *this}, shader_cache{*this, system, emu_window, device}, global_cache{*this}, system{system}, screen_info{info}, buffer_cache(*this, STREAM_BUFFER_SIZE) { OpenGLState::ApplyDefaultState(); @@ -478,9 +478,9 @@ std::pair RasterizerOpenGL::ConfigureFramebuffers( } current_framebuffer_config_state = fb_config_state; - Surface depth_surface; + CachedSurfaceView* depth_surface{}; if (using_depth_fb) { - depth_surface = res_cache.GetDepthBufferSurface(preserve_contents); + depth_surface = texture_cache.GetDepthBufferSurface(preserve_contents); } UNIMPLEMENTED_IF(regs.rt_separate_frag_data == 0); @@ -493,42 +493,43 @@ std::pair RasterizerOpenGL::ConfigureFramebuffers( if (using_color_fb) { if (single_color_target) { // Used when just a single color attachment is enabled, e.g. for clearing a color buffer - Surface color_surface = - res_cache.GetColorBufferSurface(*single_color_target, preserve_contents); + CachedSurfaceView* color_surface{ + texture_cache.GetColorBufferSurface(*single_color_target, preserve_contents)}; if (color_surface) { // Assume that a surface will be written to if it is used as a framebuffer, even if // the shader doesn't actually write to it. - color_surface->MarkAsModified(true, res_cache); + color_surface->MarkAsModified(true); // Workaround for and issue in nvidia drivers // https://devtalk.nvidia.com/default/topic/776591/opengl/gl_framebuffer_srgb-functions-incorrectly/ - state.framebuffer_srgb.enabled |= color_surface->GetSurfaceParams().srgb_conversion; + // state.framebuffer_srgb.enabled |= + // color_surface->GetSurfaceParams().srgb_conversion; } fbkey.is_single_buffer = true; fbkey.color_attachments[0] = GL_COLOR_ATTACHMENT0 + static_cast(*single_color_target); - fbkey.colors[0] = color_surface != nullptr ? color_surface->Texture().handle : 0; + fbkey.colors[0] = color_surface != nullptr ? color_surface->GetTexture() : 0; } else { // Multiple color attachments are enabled for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) { - Surface color_surface = res_cache.GetColorBufferSurface(index, preserve_contents); + CachedSurfaceView* color_surface{ + texture_cache.GetColorBufferSurface(index, preserve_contents)}; if (color_surface) { // Assume that a surface will be written to if it is used as a framebuffer, even // if the shader doesn't actually write to it. - color_surface->MarkAsModified(true, res_cache); + color_surface->MarkAsModified(true); // Enable sRGB only for supported formats // Workaround for and issue in nvidia drivers // https://devtalk.nvidia.com/default/topic/776591/opengl/gl_framebuffer_srgb-functions-incorrectly/ - state.framebuffer_srgb.enabled |= - color_surface->GetSurfaceParams().srgb_conversion; + // state.framebuffer_srgb.enabled |= + // color_surface->GetSurfaceParams().srgb_conversion; } fbkey.color_attachments[index] = GL_COLOR_ATTACHMENT0 + regs.rt_control.GetMap(index); - fbkey.colors[index] = - color_surface != nullptr ? color_surface->Texture().handle : 0; + fbkey.colors[index] = color_surface != nullptr ? color_surface->GetTexture() : 0; } fbkey.is_single_buffer = false; fbkey.colors_count = regs.rt_control.count; @@ -541,11 +542,11 @@ std::pair RasterizerOpenGL::ConfigureFramebuffers( if (depth_surface) { // Assume that a surface will be written to if it is used as a framebuffer, even if // the shader doesn't actually write to it. - depth_surface->MarkAsModified(true, res_cache); + depth_surface->MarkAsModified(true); - fbkey.zeta = depth_surface->Texture().handle; - fbkey.stencil_enable = regs.stencil_enable && - depth_surface->GetSurfaceParams().type == SurfaceType::DepthStencil; + fbkey.zeta = depth_surface->GetTexture(); + fbkey.stencil_enable = regs.stencil_enable && depth_surface->GetSurfaceParams().GetType() == + SurfaceType::DepthStencil; } SetupCachedFramebuffer(fbkey, current_state); @@ -704,9 +705,7 @@ void RasterizerOpenGL::DrawArrays() { shader_program_manager->ApplyTo(state); state.Apply(); - res_cache.SignalPreDrawCall(); params.DispatchDraw(); - res_cache.SignalPostDrawCall(); accelerate_draw = AccelDraw::Disabled; } @@ -718,7 +717,7 @@ void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) { if (!addr || !size) { return; } - res_cache.FlushRegion(addr, size); + // texture_cache.FlushRegion(addr, size); global_cache.FlushRegion(addr, size); } @@ -727,7 +726,7 @@ void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) { if (!addr || !size) { return; } - res_cache.InvalidateRegion(addr, size); + texture_cache.InvalidateRegion(addr, size); shader_cache.InvalidateRegion(addr, size); global_cache.InvalidateRegion(addr, size); buffer_cache.InvalidateRegion(addr, size); @@ -743,7 +742,8 @@ bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs const Common::Rectangle& src_rect, const Common::Rectangle& dst_rect) { MICROPROFILE_SCOPE(OpenGL_Blits); - res_cache.FermiCopySurface(src, dst, src_rect, dst_rect); + UNIMPLEMENTED(); + // texture_cache.FermiCopySurface(src, dst, src_rect, dst_rect); return true; } @@ -755,7 +755,8 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, MICROPROFILE_SCOPE(OpenGL_CacheManagement); - const auto& surface{res_cache.TryFindFramebufferSurface(Memory::GetPointer(framebuffer_addr))}; + const auto surface{ + texture_cache.TryFindFramebufferSurface(Memory::GetPointer(framebuffer_addr))}; if (!surface) { return {}; } @@ -764,14 +765,14 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, const auto& params{surface->GetSurfaceParams()}; const auto& pixel_format{ VideoCore::Surface::PixelFormatFromGPUPixelFormat(config.pixel_format)}; - ASSERT_MSG(params.width == config.width, "Framebuffer width is different"); - ASSERT_MSG(params.height == config.height, "Framebuffer height is different"); + ASSERT_MSG(params.GetWidth() == config.width, "Framebuffer width is different"); + ASSERT_MSG(params.GetHeight() == config.height, "Framebuffer height is different"); - if (params.pixel_format != pixel_format) { + if (params.GetPixelFormat() != pixel_format) { LOG_WARNING(Render_OpenGL, "Framebuffer pixel_format is different"); } - screen_info.display_texture = surface->Texture().handle; + screen_info.display_texture = surface->GetTexture(); return true; } @@ -862,11 +863,10 @@ void RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& s state.texture_units[current_bindpoint].sampler = sampler_cache.GetSampler(texture.tsc); - if (Surface surface = res_cache.GetTextureSurface(texture, entry); surface) { - state.texture_units[current_bindpoint].texture = - surface->Texture(entry.IsArray()).handle; - surface->UpdateSwizzle(texture.tic.x_source, texture.tic.y_source, texture.tic.z_source, - texture.tic.w_source); + if (const auto surface{texture_cache.GetTextureSurface(texture)}; surface) { + state.texture_units[current_bindpoint].texture = surface->GetTexture( + entry.GetType(), entry.IsArray(), texture.tic.x_source, texture.tic.y_source, + texture.tic.z_source, texture.tic.w_source); } else { // Can occur when texture addr is null or its memory is unmapped/invalid state.texture_units[current_bindpoint].texture = 0; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index f7671ff5d..921e9fc31 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -24,13 +24,13 @@ #include "video_core/renderer_opengl/gl_buffer_cache.h" #include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_global_cache.h" -#include "video_core/renderer_opengl/gl_rasterizer_cache.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_sampler_cache.h" #include "video_core/renderer_opengl/gl_shader_cache.h" #include "video_core/renderer_opengl/gl_shader_decompiler.h" #include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_state.h" +#include "video_core/renderer_opengl/gl_texture_cache.h" #include "video_core/renderer_opengl/utils.h" namespace Core { @@ -181,7 +181,7 @@ private: const Device device; OpenGLState state; - RasterizerCacheOpenGL res_cache; + TextureCacheOpenGL texture_cache; ShaderCacheOpenGL shader_cache; GlobalRegionCacheOpenGL global_cache; SamplerCacheOpenGL sampler_cache; diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp new file mode 100644 index 000000000..3a456995e --- /dev/null +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -0,0 +1,514 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/assert.h" +#include "common/common_types.h" +#include "common/scope_exit.h" +#include "video_core/morton.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" +#include "video_core/renderer_opengl/gl_texture_cache.h" +#include "video_core/texture_cache.h" +#include "video_core/textures/convert.h" +#include "video_core/textures/texture.h" + +namespace OpenGL { + +using Tegra::Texture::ConvertFromGuestToHost; +using Tegra::Texture::SwizzleSource; +using VideoCore::MortonSwizzleMode; + +namespace { + +struct FormatTuple { + GLint internal_format; + GLenum format; + GLenum type; + ComponentType component_type; + bool compressed; +}; + +constexpr std::array tex_format_tuples = {{ + {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, ComponentType::UNorm, false}, // ABGR8U + {GL_RGBA8, GL_RGBA, GL_BYTE, ComponentType::SNorm, false}, // ABGR8S + {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE, ComponentType::UInt, false}, // ABGR8UI + {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, ComponentType::UNorm, false}, // B5G6R5U + {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, ComponentType::UNorm, + false}, // A2B10G10R10U + {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV, ComponentType::UNorm, false}, // A1B5G5R5U + {GL_R8, GL_RED, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // R8U + {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE, ComponentType::UInt, false}, // R8UI + {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT, ComponentType::Float, false}, // RGBA16F + {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT, ComponentType::UNorm, false}, // RGBA16U + {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT, ComponentType::UInt, false}, // RGBA16UI + {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV, ComponentType::Float, + false}, // R11FG11FB10F + {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // RGBA32UI + {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, + true}, // DXT1 + {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, + true}, // DXT23 + {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, + true}, // DXT45 + {GL_COMPRESSED_RED_RGTC1, GL_RED, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, true}, // DXN1 + {GL_COMPRESSED_RG_RGTC2, GL_RG, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, + true}, // DXN2UNORM + {GL_COMPRESSED_SIGNED_RG_RGTC2, GL_RG, GL_INT, ComponentType::SNorm, true}, // DXN2SNORM + {GL_COMPRESSED_RGBA_BPTC_UNORM, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, + true}, // BC7U + {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, ComponentType::Float, + true}, // BC6H_UF16 + {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, ComponentType::Float, + true}, // BC6H_SF16 + {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_4X4 + {GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // BGRA8 + {GL_RGBA32F, GL_RGBA, GL_FLOAT, ComponentType::Float, false}, // RGBA32F + {GL_RG32F, GL_RG, GL_FLOAT, ComponentType::Float, false}, // RG32F + {GL_R32F, GL_RED, GL_FLOAT, ComponentType::Float, false}, // R32F + {GL_R16F, GL_RED, GL_HALF_FLOAT, ComponentType::Float, false}, // R16F + {GL_R16, GL_RED, GL_UNSIGNED_SHORT, ComponentType::UNorm, false}, // R16U + {GL_R16_SNORM, GL_RED, GL_SHORT, ComponentType::SNorm, false}, // R16S + {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT, ComponentType::UInt, false}, // R16UI + {GL_R16I, GL_RED_INTEGER, GL_SHORT, ComponentType::SInt, false}, // R16I + {GL_RG16, GL_RG, GL_UNSIGNED_SHORT, ComponentType::UNorm, false}, // RG16 + {GL_RG16F, GL_RG, GL_HALF_FLOAT, ComponentType::Float, false}, // RG16F + {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT, ComponentType::UInt, false}, // RG16UI + {GL_RG16I, GL_RG_INTEGER, GL_SHORT, ComponentType::SInt, false}, // RG16I + {GL_RG16_SNORM, GL_RG, GL_SHORT, ComponentType::SNorm, false}, // RG16S + {GL_RGB32F, GL_RGB, GL_FLOAT, ComponentType::Float, false}, // RGB32F + {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, ComponentType::UNorm, + false}, // RGBA8_SRGB + {GL_RG8, GL_RG, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // RG8U + {GL_RG8, GL_RG, GL_BYTE, ComponentType::SNorm, false}, // RG8S + {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // RG32UI + {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // R32UI + {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X8 + {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X5 + {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X4 + {GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // BGRA8 + // Compressed sRGB formats + {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, + true}, // DXT1_SRGB + {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, + true}, // DXT23_SRGB + {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, + true}, // DXT45_SRGB + {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, + true}, // BC7U_SRGB + {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_4X4_SRGB + {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X8_SRGB + {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X5_SRGB + {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X4_SRGB + {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X5 + {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X5_SRGB + {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_10X8 + {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_10X8_SRGB + + // Depth formats + {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT, ComponentType::Float, false}, // Z32F + {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, ComponentType::UNorm, + false}, // Z16 + + // DepthStencil formats + {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, ComponentType::UNorm, + false}, // Z24S8 + {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, ComponentType::UNorm, + false}, // S8Z24 + {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, GL_FLOAT_32_UNSIGNED_INT_24_8_REV, + ComponentType::Float, false}, // Z32FS8 +}}; + +const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType component_type) { + ASSERT(static_cast(pixel_format) < tex_format_tuples.size()); + const auto& format{tex_format_tuples[static_cast(pixel_format)]}; + ASSERT(component_type == format.component_type); + return format; +} + +GLenum GetTextureTarget(const SurfaceParams& params) { + switch (params.GetTarget()) { + case SurfaceTarget::Texture1D: + return GL_TEXTURE_1D; + case SurfaceTarget::Texture2D: + return GL_TEXTURE_2D; + case SurfaceTarget::Texture3D: + return GL_TEXTURE_3D; + case SurfaceTarget::Texture1DArray: + return GL_TEXTURE_1D_ARRAY; + case SurfaceTarget::Texture2DArray: + return GL_TEXTURE_2D_ARRAY; + case SurfaceTarget::TextureCubemap: + return GL_TEXTURE_CUBE_MAP; + case SurfaceTarget::TextureCubeArray: + return GL_TEXTURE_CUBE_MAP_ARRAY; + } + UNREACHABLE(); + return {}; +} + +GLint GetSwizzleSource(SwizzleSource source) { + switch (source) { + case SwizzleSource::Zero: + return GL_ZERO; + case SwizzleSource::R: + return GL_RED; + case SwizzleSource::G: + return GL_GREEN; + case SwizzleSource::B: + return GL_BLUE; + case SwizzleSource::A: + return GL_ALPHA; + case SwizzleSource::OneInt: + case SwizzleSource::OneFloat: + return GL_ONE; + } + UNREACHABLE(); + return GL_NONE; +} + +void ApplyTextureDefaults(const SurfaceParams& params, GLuint texture) { + glTextureParameteri(texture, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + glTextureParameteri(texture, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + glTextureParameteri(texture, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glTextureParameteri(texture, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + glTextureParameteri(texture, GL_TEXTURE_MAX_LEVEL, params.GetNumLevels() - 1); + if (params.GetNumLevels() == 1) { + glTextureParameterf(texture, GL_TEXTURE_LOD_BIAS, 1000.0f); + } +} + +OGLTexture CreateTexture(const SurfaceParams& params, GLenum internal_format) { + OGLTexture texture; + texture.Create(GetTextureTarget(params)); + + switch (params.GetTarget()) { + case SurfaceTarget::Texture1D: + glTextureStorage1D(texture.handle, params.GetNumLevels(), internal_format, + params.GetWidth()); + break; + case SurfaceTarget::Texture2D: + case SurfaceTarget::TextureCubemap: + glTextureStorage2D(texture.handle, params.GetNumLevels(), internal_format, + params.GetWidth(), params.GetHeight()); + break; + case SurfaceTarget::Texture3D: + case SurfaceTarget::Texture2DArray: + case SurfaceTarget::TextureCubeArray: + glTextureStorage3D(texture.handle, params.GetNumLevels(), internal_format, + params.GetWidth(), params.GetHeight(), params.GetDepth()); + break; + default: + UNREACHABLE(); + } + + ApplyTextureDefaults(params, texture.handle); + + return texture; +} + +void SwizzleFunc(MortonSwizzleMode mode, u8* memory, const SurfaceParams& params, u8* buffer, + u32 level) { + const u32 width{params.GetMipWidth(level)}; + const u32 height{params.GetMipHeight(level)}; + const u32 block_height{params.GetMipBlockHeight(level)}; + const u32 block_depth{params.GetMipBlockDepth(level)}; + + std::size_t guest_offset{params.GetGuestMipmapLevelOffset(level)}; + if (params.IsLayered()) { + std::size_t host_offset{0}; + const std::size_t guest_stride = params.GetGuestLayerSize(); + const std::size_t host_stride = params.GetHostLayerSize(level); + for (u32 layer = 0; layer < params.GetNumLayers(); layer++) { + MortonSwizzle(mode, params.GetPixelFormat(), width, block_height, height, block_depth, + 1, params.GetTileWidthSpacing(), buffer + host_offset, + memory + guest_offset); + guest_offset += guest_stride; + host_offset += host_stride; + } + } else { + MortonSwizzle(mode, params.GetPixelFormat(), width, block_height, height, block_depth, + params.GetMipDepth(level), params.GetTileWidthSpacing(), buffer, + memory + guest_offset); + } +} + +} // Anonymous namespace + +CachedSurface::CachedSurface(const SurfaceParams& params) + : VideoCommon::SurfaceBaseContextless{params} { + const auto& tuple{GetFormatTuple(params.GetPixelFormat(), params.GetComponentType())}; + internal_format = tuple.internal_format; + format = tuple.format; + type = tuple.type; + is_compressed = tuple.compressed; + texture = CreateTexture(params, internal_format); + staging_buffer.resize(params.GetHostSizeInBytes()); +} + +CachedSurface::~CachedSurface() = default; + +void CachedSurface::LoadBuffer() { + if (params.IsTiled()) { + ASSERT_MSG(params.GetBlockWidth() == 1, "Block width is defined as {} on texture target {}", + params.GetBlockWidth(), static_cast(params.GetTarget())); + for (u32 level = 0; level < params.GetNumLevels(); ++level) { + u8* const buffer{staging_buffer.data() + params.GetHostMipmapLevelOffset(level)}; + SwizzleFunc(MortonSwizzleMode::MortonToLinear, GetHostPtr(), params, buffer, level); + } + } else { + ASSERT_MSG(params.GetNumLevels() == 1, "Linear mipmap loading is not implemented"); + const u32 bpp{GetFormatBpp(params.GetPixelFormat()) / CHAR_BIT}; + const u32 block_width{VideoCore::Surface::GetDefaultBlockWidth(params.GetPixelFormat())}; + const u32 block_height{VideoCore::Surface::GetDefaultBlockHeight(params.GetPixelFormat())}; + const u32 width{(params.GetWidth() + block_width - 1) / block_width}; + const u32 height{(params.GetHeight() + block_height - 1) / block_height}; + const u32 copy_size{width * bpp}; + if (params.GetPitch() == copy_size) { + std::memcpy(staging_buffer.data(), GetHostPtr(), params.GetHostSizeInBytes()); + } else { + const u8* start{GetHostPtr()}; + u8* write_to{staging_buffer.data()}; + for (u32 h = height; h > 0; --h) { + std::memcpy(write_to, start, copy_size); + start += params.GetPitch(); + write_to += copy_size; + } + } + } + + for (u32 level = 0; level < params.GetNumLevels(); ++level) { + ConvertFromGuestToHost(staging_buffer.data() + params.GetHostMipmapLevelOffset(level), + params.GetPixelFormat(), params.GetMipWidth(level), + params.GetMipHeight(level), params.GetMipDepth(level), true, true); + } +} + +void CachedSurface::FlushBufferImpl() { + if (!IsModified()) { + return; + } + + // TODO(Rodrigo): Optimize alignment + glPixelStorei(GL_PACK_ALIGNMENT, 1); + SCOPE_EXIT({ glPixelStorei(GL_PACK_ROW_LENGTH, 0); }); + + for (u32 level = 0; level < params.GetNumLevels(); ++level) { + glPixelStorei(GL_PACK_ROW_LENGTH, static_cast(params.GetMipWidth(level))); + if (is_compressed) { + glGetCompressedTextureImage( + texture.handle, level, static_cast(params.GetHostMipmapSize(level)), + staging_buffer.data() + params.GetHostMipmapLevelOffset(level)); + } else { + glGetTextureImage(texture.handle, level, format, type, + static_cast(params.GetHostMipmapSize(level)), + staging_buffer.data() + params.GetHostMipmapLevelOffset(level)); + } + } + + if (params.IsTiled()) { + ASSERT_MSG(params.GetBlockWidth() == 1, "Block width is defined as {}", + params.GetBlockWidth()); + for (u32 level = 0; level < params.GetNumLevels(); ++level) { + u8* const buffer = staging_buffer.data() + params.GetHostMipmapLevelOffset(level); + SwizzleFunc(MortonSwizzleMode::LinearToMorton, GetHostPtr(), params, buffer, level); + } + } else { + UNIMPLEMENTED(); + /* + ASSERT(params.GetTarget() == SurfaceTarget::Texture2D); + ASSERT(params.GetNumLevels() == 1); + + const u32 bpp{params.GetFormatBpp() / 8}; + const u32 copy_size{params.GetWidth() * bpp}; + if (params.GetPitch() == copy_size) { + std::memcpy(host_ptr, staging_buffer.data(), GetSizeInBytes()); + } else { + u8* start{host_ptr}; + const u8* read_to{staging_buffer.data()}; + for (u32 h = params.GetHeight(); h > 0; --h) { + std::memcpy(start, read_to, copy_size); + start += params.GetPitch(); + read_to += copy_size; + } + } + */ + } +} + +void CachedSurface::UploadTextureImpl() { + for (u32 level = 0; level < params.GetNumLevels(); ++level) { + UploadTextureMipmap(level); + } +} + +void CachedSurface::UploadTextureMipmap(u32 level) { + u8* buffer{staging_buffer.data() + params.GetHostMipmapLevelOffset(level)}; + + // TODO(Rodrigo): Optimize alignment + glPixelStorei(GL_UNPACK_ALIGNMENT, 1); + glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast(params.GetMipWidth(level))); + SCOPE_EXIT({ glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); }); + + if (is_compressed) { + const auto image_size{static_cast(params.GetHostMipmapSize(level))}; + GLint expected_size; + glGetTextureLevelParameteriv(texture.handle, level, GL_TEXTURE_COMPRESSED_IMAGE_SIZE, + &expected_size); + switch (params.GetTarget()) { + case SurfaceTarget::Texture2D: + glCompressedTextureSubImage2D(texture.handle, level, 0, 0, + static_cast(params.GetMipWidth(level)), + static_cast(params.GetMipHeight(level)), + internal_format, image_size, buffer); + break; + case SurfaceTarget::Texture3D: + case SurfaceTarget::Texture2DArray: + case SurfaceTarget::TextureCubeArray: + glCompressedTextureSubImage3D(texture.handle, level, 0, 0, 0, + static_cast(params.GetMipWidth(level)), + static_cast(params.GetMipHeight(level)), + static_cast(params.GetMipDepth(level)), + internal_format, image_size, buffer); + break; + case SurfaceTarget::TextureCubemap: { + const std::size_t layer_size{params.GetHostLayerSize(level)}; + for (std::size_t face = 0; face < params.GetDepth(); ++face) { + glCompressedTextureSubImage3D(texture.handle, level, 0, 0, static_cast(face), + static_cast(params.GetMipWidth(level)), + static_cast(params.GetMipHeight(level)), 1, + internal_format, static_cast(layer_size), + buffer); + buffer += layer_size; + } + break; + } + default: + UNREACHABLE(); + } + } else { + switch (params.GetTarget()) { + case SurfaceTarget::Texture1D: + glTextureSubImage1D(texture.handle, level, 0, params.GetMipWidth(level), format, type, + buffer); + break; + case SurfaceTarget::Texture1DArray: + case SurfaceTarget::Texture2D: + glTextureSubImage2D(texture.handle, level, 0, 0, params.GetMipWidth(level), + params.GetMipHeight(level), format, type, buffer); + break; + case SurfaceTarget::Texture3D: + case SurfaceTarget::Texture2DArray: + case SurfaceTarget::TextureCubeArray: + glTextureSubImage3D( + texture.handle, level, 0, 0, 0, static_cast(params.GetMipWidth(level)), + static_cast(params.GetMipHeight(level)), + static_cast(params.GetMipDepth(level)), format, type, buffer); + break; + case SurfaceTarget::TextureCubemap: + for (std::size_t face = 0; face < params.GetDepth(); ++face) { + glTextureSubImage3D(texture.handle, level, 0, 0, static_cast(face), + params.GetMipWidth(level), params.GetMipHeight(level), 1, + format, type, buffer); + buffer += params.GetHostLayerSize(level); + } + break; + default: + UNREACHABLE(); + } + } +} + +std::unique_ptr CachedSurface::CreateView(const ViewKey& view_key) { + return std::make_unique(*this, view_key); +} + +CachedSurfaceView::CachedSurfaceView(CachedSurface& surface, ViewKey key) + : surface{surface}, key{key}, params{surface.GetSurfaceParams()} {} + +CachedSurfaceView::~CachedSurfaceView() = default; + +GLuint CachedSurfaceView::GetTexture() { + // TODO(Rodrigo): Remove this entry and attach the super texture to the framebuffer through + // legacy API (also dropping Intel driver issues). + if (texture_view_2d.texture.handle == 0) { + texture_view_2d = CreateTextureView(GL_TEXTURE_2D); + } + return texture_view_2d.texture.handle; +} + +GLuint CachedSurfaceView::GetTexture(Tegra::Shader::TextureType texture_type, bool is_array, + SwizzleSource x_source, SwizzleSource y_source, + SwizzleSource z_source, SwizzleSource w_source) { + const auto [texture_view, target] = GetTextureView(texture_type, is_array); + if (texture_view.get().texture.handle == 0) { + texture_view.get() = std::move(CreateTextureView(target)); + } + ApplySwizzle(texture_view, x_source, y_source, z_source, w_source); + return texture_view.get().texture.handle; +} + +void CachedSurfaceView::ApplySwizzle(TextureView& texture_view, SwizzleSource x_source, + SwizzleSource y_source, SwizzleSource z_source, + SwizzleSource w_source) { + const std::array swizzle = {x_source, y_source, z_source, w_source}; + if (swizzle == texture_view.swizzle) { + return; + } + const std::array gl_swizzle = {GetSwizzleSource(x_source), GetSwizzleSource(y_source), + GetSwizzleSource(z_source), + GetSwizzleSource(w_source)}; + glTextureParameteriv(texture_view.texture.handle, GL_TEXTURE_SWIZZLE_RGBA, gl_swizzle.data()); + texture_view.swizzle = swizzle; +} + +CachedSurfaceView::TextureView CachedSurfaceView::CreateTextureView(GLenum target) const { + TextureView texture_view; + glGenTextures(1, &texture_view.texture.handle); + + const GLuint handle{texture_view.texture.handle}; + const FormatTuple& tuple{GetFormatTuple(params.GetPixelFormat(), params.GetComponentType())}; + + glTextureView(handle, target, surface.texture.handle, tuple.internal_format, key.base_level, + key.num_levels, key.base_layer, key.num_layers); + ApplyTextureDefaults(params, handle); + + return texture_view; +} + +std::pair, GLenum> +CachedSurfaceView::GetTextureView(Tegra::Shader::TextureType texture_type, bool is_array) { + using Pair = std::pair, GLenum>; + switch (texture_type) { + case Tegra::Shader::TextureType::Texture1D: + return is_array ? Pair{texture_view_1d_array, GL_TEXTURE_1D_ARRAY} + : Pair{texture_view_1d, GL_TEXTURE_1D}; + case Tegra::Shader::TextureType::Texture2D: + return is_array ? Pair{texture_view_2d_array, GL_TEXTURE_2D_ARRAY} + : Pair{texture_view_2d, GL_TEXTURE_2D}; + case Tegra::Shader::TextureType::Texture3D: + ASSERT(!is_array); + return {texture_view_3d, GL_TEXTURE_3D}; + case Tegra::Shader::TextureType::TextureCube: + return is_array ? Pair{texture_view_cube_array, GL_TEXTURE_CUBE_MAP_ARRAY} + : Pair{texture_view_cube, GL_TEXTURE_CUBE_MAP}; + } + UNREACHABLE(); +} + +TextureCacheOpenGL::TextureCacheOpenGL(Core::System& system, + VideoCore::RasterizerInterface& rasterizer) + : TextureCacheBase{system, rasterizer} {} + +TextureCacheOpenGL::~TextureCacheOpenGL() = default; + +CachedSurfaceView* TextureCacheOpenGL::TryFastGetSurfaceView( + VAddr cpu_addr, u8* host_ptr, const SurfaceParams& params, bool preserve_contents, + const std::vector& overlaps) { + return nullptr; +} + +std::unique_ptr TextureCacheOpenGL::CreateSurface(const SurfaceParams& params) { + return std::make_unique(params); +} + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h new file mode 100644 index 000000000..f0a524882 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -0,0 +1,131 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include +#include + +#include + +#include "common/common_types.h" +#include "video_core/engines/shader_bytecode.h" +#include "video_core/texture_cache.h" + +namespace OpenGL { + +using VideoCommon::SurfaceParams; +using VideoCommon::ViewKey; +using VideoCore::Surface::ComponentType; +using VideoCore::Surface::PixelFormat; +using VideoCore::Surface::SurfaceTarget; +using VideoCore::Surface::SurfaceType; + +class CachedSurfaceView; +class CachedSurface; + +using TextureCacheBase = VideoCommon::TextureCacheContextless; + +class CachedSurface final : public VideoCommon::SurfaceBaseContextless { + friend CachedSurfaceView; + +public: + explicit CachedSurface(const SurfaceParams& params); + ~CachedSurface(); + + void LoadBuffer(); + + GLuint GetTexture() const { + return texture.handle; + } + +protected: + std::unique_ptr CreateView(const ViewKey& view_key); + + void FlushBufferImpl(); + + void UploadTextureImpl(); + +private: + void UploadTextureMipmap(u32 level); + + GLenum internal_format{}; + GLenum format{}; + GLenum type{}; + bool is_compressed{}; + + OGLTexture texture; + + std::vector staging_buffer; + u8* host_ptr{}; +}; + +class CachedSurfaceView final { +public: + explicit CachedSurfaceView(CachedSurface& surface, ViewKey key); + ~CachedSurfaceView(); + + GLuint GetTexture(); + + GLuint GetTexture(Tegra::Shader::TextureType texture_type, bool is_array, + Tegra::Texture::SwizzleSource x_source, + Tegra::Texture::SwizzleSource y_source, + Tegra::Texture::SwizzleSource z_source, + Tegra::Texture::SwizzleSource w_source); + + void MarkAsModified(bool is_modified) { + surface.MarkAsModified(is_modified); + } + + const SurfaceParams& GetSurfaceParams() const { + return params; + } + +private: + struct TextureView { + OGLTexture texture; + std::array swizzle{ + Tegra::Texture::SwizzleSource::R, Tegra::Texture::SwizzleSource::G, + Tegra::Texture::SwizzleSource::B, Tegra::Texture::SwizzleSource::A}; + }; + + void ApplySwizzle(TextureView& texture_view, Tegra::Texture::SwizzleSource x_source, + Tegra::Texture::SwizzleSource y_source, + Tegra::Texture::SwizzleSource z_source, + Tegra::Texture::SwizzleSource w_source); + + TextureView CreateTextureView(GLenum target) const; + + std::pair, GLenum> GetTextureView( + Tegra::Shader::TextureType texture_type, bool is_array); + + CachedSurface& surface; + const ViewKey key; + const SurfaceParams params; + + TextureView texture_view_1d; + TextureView texture_view_1d_array; + TextureView texture_view_2d; + TextureView texture_view_2d_array; + TextureView texture_view_3d; + TextureView texture_view_cube; + TextureView texture_view_cube_array; +}; + +class TextureCacheOpenGL final : public TextureCacheBase { +public: + explicit TextureCacheOpenGL(Core::System& system, VideoCore::RasterizerInterface& rasterizer); + ~TextureCacheOpenGL(); + +protected: + CachedSurfaceView* TryFastGetSurfaceView(VAddr cpu_addr, u8* host_ptr, + const SurfaceParams& params, bool preserve_contents, + const std::vector& overlaps); + + std::unique_ptr CreateSurface(const SurfaceParams& params); +}; + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index aafd6f31b..710bf8303 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -460,6 +460,7 @@ static void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum switch (severity) { case GL_DEBUG_SEVERITY_HIGH: LOG_CRITICAL(Render_OpenGL, format, str_source, str_type, id, message); + __debugbreak(); break; case GL_DEBUG_SEVERITY_MEDIUM: LOG_WARNING(Render_OpenGL, format, str_source, str_type, id, message); diff --git a/src/video_core/texture_cache.cpp b/src/video_core/texture_cache.cpp index e96eba7cc..c42365a82 100644 --- a/src/video_core/texture_cache.cpp +++ b/src/video_core/texture_cache.cpp @@ -163,7 +163,7 @@ u32 SurfaceParams::GetMipBlockHeight(u32 level) const { return block_height; } const u32 height{GetMipHeight(level)}; - const u32 default_block_height{GetDefaultBlockHeight(pixel_format)}; + const u32 default_block_height{GetDefaultBlockHeight()}; const u32 blocks_in_y{(height + default_block_height - 1) / default_block_height}; u32 block_height = 16; while (block_height > 1 && blocks_in_y <= block_height * 4) { @@ -205,6 +205,10 @@ std::size_t SurfaceParams::GetHostMipmapLevelOffset(u32 level) const { return offset; } +std::size_t SurfaceParams::GetHostMipmapSize(u32 level) const { + return GetInnerMipmapMemorySize(level, true, true, false) * GetNumLayers(); +} + std::size_t SurfaceParams::GetGuestLayerSize() const { return GetInnerMemorySize(false, true, false); } @@ -213,6 +217,22 @@ std::size_t SurfaceParams::GetHostLayerSize(u32 level) const { return GetInnerMipmapMemorySize(level, true, IsLayered(), false); } +u32 SurfaceParams::GetDefaultBlockWidth() const { + return VideoCore::Surface::GetDefaultBlockWidth(pixel_format); +} + +u32 SurfaceParams::GetDefaultBlockHeight() const { + return VideoCore::Surface::GetDefaultBlockHeight(pixel_format); +} + +u32 SurfaceParams::GetBitsPerPixel() const { + return VideoCore::Surface::GetFormatBpp(pixel_format); +} + +u32 SurfaceParams::GetBytesPerPixel() const { + return VideoCore::Surface::GetBytesPerPixel(pixel_format); +} + bool SurfaceParams::IsFamiliar(const SurfaceParams& view_params) const { if (std::tie(is_tiled, tile_width_spacing, pixel_format, component_type, type) != std::tie(view_params.is_tiled, view_params.tile_width_spacing, view_params.pixel_format, @@ -257,7 +277,7 @@ void SurfaceParams::CalculateCachedValues() { // ASTC is uncompressed in software, in emulated as RGBA8 if (IsPixelFormatASTC(pixel_format)) { - host_size_in_bytes = width * height * depth * 4; + host_size_in_bytes = static_cast(width * height * depth) * 4ULL; } else { host_size_in_bytes = GetInnerMemorySize(true, false, false); } @@ -282,13 +302,11 @@ void SurfaceParams::CalculateCachedValues() { std::size_t SurfaceParams::GetInnerMipmapMemorySize(u32 level, bool as_host_size, bool layer_only, bool uncompressed) const { const bool tiled{as_host_size ? false : is_tiled}; - const u32 tile_x{GetDefaultBlockWidth(pixel_format)}; - const u32 tile_y{GetDefaultBlockHeight(pixel_format)}; - const u32 width{GetMipmapSize(uncompressed, GetMipWidth(level), tile_x)}; - const u32 height{GetMipmapSize(uncompressed, GetMipHeight(level), tile_y)}; + const u32 width{GetMipmapSize(uncompressed, GetMipWidth(level), GetDefaultBlockWidth())}; + const u32 height{GetMipmapSize(uncompressed, GetMipHeight(level), GetDefaultBlockHeight())}; const u32 depth{layer_only ? 1U : GetMipDepth(level)}; - return Tegra::Texture::CalculateSize(tiled, GetBytesPerPixel(pixel_format), width, height, - depth, GetMipBlockHeight(level), GetMipBlockDepth(level)); + return Tegra::Texture::CalculateSize(tiled, GetBytesPerPixel(), width, height, depth, + GetMipBlockHeight(level), GetMipBlockDepth(level)); } std::size_t SurfaceParams::GetInnerMemorySize(bool as_host_size, bool layer_only, @@ -297,7 +315,7 @@ std::size_t SurfaceParams::GetInnerMemorySize(bool as_host_size, bool layer_only for (u32 level = 0; level < num_levels; ++level) { size += GetInnerMipmapMemorySize(level, as_host_size, layer_only, uncompressed); } - if (!as_host_size && is_tiled) { + if (is_tiled && !as_host_size) { size = Common::AlignUp(size, Tegra::Texture::GetGOBSize() * block_height * block_depth); } return size; @@ -309,6 +327,7 @@ std::map> SurfaceParams::CreateViewOffsetMap() const { case SurfaceTarget::Texture1D: case SurfaceTarget::Texture2D: case SurfaceTarget::Texture3D: { + // TODO(Rodrigo): Add layer iterations for 3D textures constexpr u32 layer = 0; for (u32 level = 0; level < num_levels; ++level) { const std::size_t offset{GetGuestMipmapLevelOffset(level)}; diff --git a/src/video_core/texture_cache.h b/src/video_core/texture_cache.h index 041551691..9fd5f074e 100644 --- a/src/video_core/texture_cache.h +++ b/src/video_core/texture_cache.h @@ -20,6 +20,7 @@ #include "video_core/engines/fermi_2d.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/gpu.h" +#include "video_core/memory_manager.h" #include "video_core/rasterizer_interface.h" #include "video_core/surface.h" @@ -43,6 +44,10 @@ public: bool operator==(const HasheableSurfaceParams& rhs) const; + bool operator!=(const HasheableSurfaceParams& rhs) const { + return !operator==(rhs); + } + protected: // Avoid creation outside of a managed environment. HasheableSurfaceParams() = default; @@ -167,12 +172,27 @@ public: /// Returns the offset in bytes in host memory (linear) of a given mipmap level. std::size_t GetHostMipmapLevelOffset(u32 level) const; + /// Returns the size in bytes in host memory (linear) of a given mipmap level. + std::size_t GetHostMipmapSize(u32 level) const; + /// Returns the size of a layer in bytes in guest memory. std::size_t GetGuestLayerSize() const; /// Returns the size of a layer in bytes in host memory for a given mipmap level. std::size_t GetHostLayerSize(u32 level) const; + /// Returns the default block width. + u32 GetDefaultBlockWidth() const; + + /// Returns the default block height. + u32 GetDefaultBlockHeight() const; + + /// Returns the bits per pixel. + u32 GetBitsPerPixel() const; + + /// Returns the bytes per pixel. + u32 GetBytesPerPixel() const; + /// Returns true if another surface can be familiar with this. This is a loosely defined term /// that reflects the possibility of these two surface parameters potentially being part of a /// bigger superset. @@ -370,6 +390,7 @@ private: template class TextureCache { static_assert(std::is_trivially_copyable_v); + using ResultType = std::tuple; using IntervalMap = boost::icl::interval_map>; using IntervalType = typename IntervalMap::interval_type; @@ -583,4 +604,79 @@ private: std::unordered_map>> surface_reserve; }; +struct DummyExecutionContext {}; + +template +class TextureCacheContextless : protected TextureCache { + using Base = TextureCache; + +public: + void InvalidateRegion(CacheAddr addr, std::size_t size) { + Base::InvalidateRegion(addr, size); + } + + TView* GetTextureSurface(const Tegra::Texture::FullTextureInfo& config) { + return RemoveContext(Base::GetTextureSurface({}, config)); + } + + TView* GetDepthBufferSurface(bool preserve_contents) { + return RemoveContext(Base::GetDepthBufferSurface({}, preserve_contents)); + } + + TView* GetColorBufferSurface(std::size_t index, bool preserve_contents) { + return RemoveContext(Base::GetColorBufferSurface({}, index, preserve_contents)); + } + + TView* GetFermiSurface(const Tegra::Engines::Fermi2D::Regs::Surface& config) { + return RemoveContext(Base::GetFermiSurface({}, config)); + } + + TSurface* TryFindFramebufferSurface(const u8* host_ptr) const { + return Base::TryFindFramebufferSurface(host_ptr); + } + +protected: + explicit TextureCacheContextless(Core::System& system, + VideoCore::RasterizerInterface& rasterizer) + : TextureCache{system, rasterizer} {} + + virtual TView* TryFastGetSurfaceView(VAddr cpu_addr, u8* host_ptr, const SurfaceParams& params, + bool preserve_contents, + const std::vector& overlaps) = 0; + +private: + std::tuple TryFastGetSurfaceView( + DummyExecutionContext, VAddr cpu_addr, u8* host_ptr, const SurfaceParams& params, + bool preserve_contents, const std::vector& overlaps) { + return {TryFastGetSurfaceView(cpu_addr, host_ptr, params, preserve_contents, overlaps), {}}; + } + + TView* RemoveContext(std::tuple return_value) { + const auto [view, exctx] = return_value; + return view; + } +}; + +template +class SurfaceBaseContextless : public SurfaceBase { +public: + DummyExecutionContext FlushBuffer(DummyExecutionContext) { + FlushBufferImpl(); + return {}; + } + + DummyExecutionContext UploadTexture(DummyExecutionContext) { + UploadTextureImpl(); + return {}; + } + +protected: + explicit SurfaceBaseContextless(const SurfaceParams& params) + : SurfaceBase{params} {} + + virtual void FlushBufferImpl() = 0; + + virtual void UploadTextureImpl() = 0; +}; + } // namespace VideoCommon diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp index 217805386..664ed4b56 100644 --- a/src/video_core/textures/decoders.cpp +++ b/src/video_core/textures/decoders.cpp @@ -359,7 +359,8 @@ std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height const u32 aligned_width = Common::AlignUp(width * bytes_per_pixel, gob_size_x); const u32 aligned_height = Common::AlignUp(height, gob_size_y * block_height); const u32 aligned_depth = Common::AlignUp(depth, gob_size_z * block_depth); - return aligned_width * aligned_height * aligned_depth; + const u32 size = aligned_width * aligned_height * aligned_depth; + return size; } else { return width * height * depth * bytes_per_pixel; } From fb94871791f78703737125cd2e5a13db8b7d1059 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 14 Apr 2019 01:44:16 -0300 Subject: [PATCH 002/113] gl_texture_cache: Add fast copy path --- .../renderer_opengl/gl_texture_cache.cpp | 51 +++++++++++++++++-- .../renderer_opengl/gl_texture_cache.h | 9 ++++ src/video_core/texture_cache.cpp | 4 +- src/video_core/textures/decoders.cpp | 3 +- 4 files changed, 60 insertions(+), 7 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 3a456995e..00f9ab92f 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -177,9 +177,9 @@ void ApplyTextureDefaults(const SurfaceParams& params, GLuint texture) { } } -OGLTexture CreateTexture(const SurfaceParams& params, GLenum internal_format) { +OGLTexture CreateTexture(const SurfaceParams& params, GLenum target, GLenum internal_format) { OGLTexture texture; - texture.Create(GetTextureTarget(params)); + texture.Create(target); switch (params.GetTarget()) { case SurfaceTarget::Texture1D: @@ -241,7 +241,8 @@ CachedSurface::CachedSurface(const SurfaceParams& params) format = tuple.format; type = tuple.type; is_compressed = tuple.compressed; - texture = CreateTexture(params, internal_format); + target = GetTextureTarget(params); + texture = CreateTexture(params, target, internal_format); staging_buffer.resize(params.GetHostSizeInBytes()); } @@ -504,9 +505,53 @@ TextureCacheOpenGL::~TextureCacheOpenGL() = default; CachedSurfaceView* TextureCacheOpenGL::TryFastGetSurfaceView( VAddr cpu_addr, u8* host_ptr, const SurfaceParams& params, bool preserve_contents, const std::vector& overlaps) { + if (overlaps.size() > 1) { + return nullptr; + } + + const auto& old_surface{overlaps[0]}; + const auto& old_params{old_surface->GetSurfaceParams()}; + const auto& new_params{params}; + + if (old_params.GetTarget() == new_params.GetTarget() && + old_params.GetDepth() == new_params.GetDepth() && old_params.GetDepth() == 1 && + old_params.GetNumLevels() == new_params.GetNumLevels() && + old_params.GetPixelFormat() == new_params.GetPixelFormat()) { + return SurfaceCopy(cpu_addr, host_ptr, new_params, old_surface, old_params); + } + return nullptr; } +CachedSurfaceView* TextureCacheOpenGL::SurfaceCopy(VAddr cpu_addr, u8* host_ptr, + const SurfaceParams& new_params, + CachedSurface* old_surface, + const SurfaceParams& old_params) { + CachedSurface* const new_surface{GetUncachedSurface(new_params)}; + Register(new_surface, cpu_addr, host_ptr); + + const u32 min_width{ + std::max(old_params.GetDefaultBlockWidth(), new_params.GetDefaultBlockWidth())}; + const u32 min_height{ + std::max(old_params.GetDefaultBlockHeight(), new_params.GetDefaultBlockHeight())}; + for (u32 level = 0; level < old_params.GetNumLevels(); ++level) { + const u32 width{std::min(old_params.GetMipWidth(level), new_params.GetMipWidth(level))}; + const u32 height{std::min(old_params.GetMipHeight(level), new_params.GetMipHeight(level))}; + if (width < min_width || height < min_height) { + // Avoid copies that are too small to be handled in OpenGL + break; + } + glCopyImageSubData(old_surface->GetTexture(), old_surface->GetTarget(), level, 0, 0, 0, + new_surface->GetTexture(), new_surface->GetTarget(), level, 0, 0, 0, + width, height, 1); + } + + new_surface->MarkAsModified(true); + + // TODO(Rodrigo): Add an entry to directly get the superview + return new_surface->GetView(cpu_addr, new_params); +} + std::unique_ptr TextureCacheOpenGL::CreateSurface(const SurfaceParams& params) { return std::make_unique(params); } diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index f0a524882..b18b32d99 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -38,6 +38,10 @@ public: void LoadBuffer(); + GLenum GetTarget() const { + return target; + } + GLuint GetTexture() const { return texture.handle; } @@ -56,6 +60,7 @@ private: GLenum format{}; GLenum type{}; bool is_compressed{}; + GLenum target{}; OGLTexture texture; @@ -126,6 +131,10 @@ protected: const std::vector& overlaps); std::unique_ptr CreateSurface(const SurfaceParams& params); + +private: + CachedSurfaceView* SurfaceCopy(VAddr cpu_addr, u8* host_ptr, const SurfaceParams& new_params, + CachedSurface* old_surface, const SurfaceParams& old_params); }; } // namespace OpenGL diff --git a/src/video_core/texture_cache.cpp b/src/video_core/texture_cache.cpp index c42365a82..1cfb9962f 100644 --- a/src/video_core/texture_cache.cpp +++ b/src/video_core/texture_cache.cpp @@ -160,7 +160,7 @@ u32 SurfaceParams::GetMipBlockHeight(u32 level) const { // Auto block resizing algorithm from: // https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nv50/nv50_miptree.c if (level == 0) { - return block_height; + return this->block_height; } const u32 height{GetMipHeight(level)}; const u32 default_block_height{GetDefaultBlockHeight()}; @@ -316,7 +316,7 @@ std::size_t SurfaceParams::GetInnerMemorySize(bool as_host_size, bool layer_only size += GetInnerMipmapMemorySize(level, as_host_size, layer_only, uncompressed); } if (is_tiled && !as_host_size) { - size = Common::AlignUp(size, Tegra::Texture::GetGOBSize() * block_height * block_depth); + //size = Common::AlignUp(size, Tegra::Texture::GetGOBSize() * block_height * block_depth); } return size; } diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp index 664ed4b56..217805386 100644 --- a/src/video_core/textures/decoders.cpp +++ b/src/video_core/textures/decoders.cpp @@ -359,8 +359,7 @@ std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height const u32 aligned_width = Common::AlignUp(width * bytes_per_pixel, gob_size_x); const u32 aligned_height = Common::AlignUp(height, gob_size_y * block_height); const u32 aligned_depth = Common::AlignUp(depth, gob_size_z * block_depth); - const u32 size = aligned_width * aligned_height * aligned_depth; - return size; + return aligned_width * aligned_height * aligned_depth; } else { return width * height * depth * bytes_per_pixel; } From 84139586c9e6c95a7f3faaa09d04eb11b2bcd70c Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 14 Apr 2019 18:16:27 -0300 Subject: [PATCH 003/113] gl_texture_cache: Attach surface textures instead of views --- .../renderer_opengl/gl_rasterizer.cpp | 22 +++++++-------- .../renderer_opengl/gl_texture_cache.cpp | 27 ++++++++++++++----- .../renderer_opengl/gl_texture_cache.h | 3 ++- 3 files changed, 32 insertions(+), 20 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index cea268f1e..07c28357e 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -83,10 +83,10 @@ struct FramebufferCacheKey { bool stencil_enable = false; std::array color_attachments{}; - std::array colors{}; + std::array colors{}; u32 colors_count = 0; - GLuint zeta = 0; + CachedSurfaceView* zeta = nullptr; auto Tie() const { return std::tie(is_single_buffer, stencil_enable, color_attachments, colors, colors_count, @@ -367,25 +367,21 @@ void RasterizerOpenGL::SetupCachedFramebuffer(const FramebufferCacheKey& fbkey, if (fbkey.is_single_buffer) { if (fbkey.color_attachments[0] != GL_NONE) { - glFramebufferTexture(GL_DRAW_FRAMEBUFFER, fbkey.color_attachments[0], fbkey.colors[0], - 0); + fbkey.colors[0]->Attach(fbkey.color_attachments[0]); } glDrawBuffer(fbkey.color_attachments[0]); } else { for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) { if (fbkey.colors[index]) { - glFramebufferTexture(GL_DRAW_FRAMEBUFFER, - GL_COLOR_ATTACHMENT0 + static_cast(index), - fbkey.colors[index], 0); + fbkey.colors[index]->Attach(GL_COLOR_ATTACHMENT0 + static_cast(index)); } } glDrawBuffers(fbkey.colors_count, fbkey.color_attachments.data()); } if (fbkey.zeta) { - GLenum zeta_attachment = - fbkey.stencil_enable ? GL_DEPTH_STENCIL_ATTACHMENT : GL_DEPTH_ATTACHMENT; - glFramebufferTexture(GL_DRAW_FRAMEBUFFER, zeta_attachment, fbkey.zeta, 0); + fbkey.zeta->Attach(fbkey.stencil_enable ? GL_DEPTH_STENCIL_ATTACHMENT + : GL_DEPTH_ATTACHMENT); } } @@ -509,7 +505,7 @@ std::pair RasterizerOpenGL::ConfigureFramebuffers( fbkey.is_single_buffer = true; fbkey.color_attachments[0] = GL_COLOR_ATTACHMENT0 + static_cast(*single_color_target); - fbkey.colors[0] = color_surface != nullptr ? color_surface->GetTexture() : 0; + fbkey.colors[0] = color_surface; } else { // Multiple color attachments are enabled for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) { @@ -529,7 +525,7 @@ std::pair RasterizerOpenGL::ConfigureFramebuffers( fbkey.color_attachments[index] = GL_COLOR_ATTACHMENT0 + regs.rt_control.GetMap(index); - fbkey.colors[index] = color_surface != nullptr ? color_surface->GetTexture() : 0; + fbkey.colors[index] = color_surface; } fbkey.is_single_buffer = false; fbkey.colors_count = regs.rt_control.count; @@ -544,7 +540,7 @@ std::pair RasterizerOpenGL::ConfigureFramebuffers( // the shader doesn't actually write to it. depth_surface->MarkAsModified(true); - fbkey.zeta = depth_surface->GetTexture(); + fbkey.zeta = depth_surface; fbkey.stencil_enable = regs.stencil_enable && depth_surface->GetSurfaceParams().GetType() == SurfaceType::DepthStencil; } diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 00f9ab92f..ba6d3af4b 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -428,13 +428,28 @@ CachedSurfaceView::CachedSurfaceView(CachedSurface& surface, ViewKey key) CachedSurfaceView::~CachedSurfaceView() = default; -GLuint CachedSurfaceView::GetTexture() { - // TODO(Rodrigo): Remove this entry and attach the super texture to the framebuffer through - // legacy API (also dropping Intel driver issues). - if (texture_view_2d.texture.handle == 0) { - texture_view_2d = CreateTextureView(GL_TEXTURE_2D); +void CachedSurfaceView::Attach(GLenum attachment) const { + ASSERT(key.num_layers == 1 && key.num_levels == 1); + + switch (params.GetTarget()) { + case SurfaceTarget::Texture1D: + glFramebufferTexture1D(GL_DRAW_FRAMEBUFFER, attachment, surface.GetTarget(), + surface.GetTexture(), key.base_level); + break; + case SurfaceTarget::Texture2D: + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, attachment, surface.GetTarget(), + surface.GetTexture(), key.base_level); + break; + case SurfaceTarget::Texture1DArray: + case SurfaceTarget::Texture2DArray: + case SurfaceTarget::TextureCubemap: + case SurfaceTarget::TextureCubeArray: + glFramebufferTextureLayer(GL_DRAW_FRAMEBUFFER, attachment, surface.GetTexture(), + key.base_level, key.base_layer); + break; + default: + UNIMPLEMENTED(); } - return texture_view_2d.texture.handle; } GLuint CachedSurfaceView::GetTexture(Tegra::Shader::TextureType texture_type, bool is_array, diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index b18b32d99..80733ac36 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -73,7 +73,8 @@ public: explicit CachedSurfaceView(CachedSurface& surface, ViewKey key); ~CachedSurfaceView(); - GLuint GetTexture(); + /// Attaches this texture view to the current bound GL_DRAW_FRAMEBUFFER + void Attach(GLenum attachment) const; GLuint GetTexture(Tegra::Shader::TextureType texture_type, bool is_array, Tegra::Texture::SwizzleSource x_source, From 0cefb7bcb481dc32d6362bba1976cadf25f9c95a Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 15 Apr 2019 16:17:27 -0300 Subject: [PATCH 004/113] gl_texture_cache: Add copy from multiple overlaps into a single surface --- .../renderer_opengl/gl_texture_cache.cpp | 54 +++++++++++++++++-- .../renderer_opengl/gl_texture_cache.h | 34 +++++++++++- src/video_core/texture_cache.cpp | 2 +- 3 files changed, 84 insertions(+), 6 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index ba6d3af4b..6a6fe7cc4 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -518,16 +518,14 @@ TextureCacheOpenGL::TextureCacheOpenGL(Core::System& system, TextureCacheOpenGL::~TextureCacheOpenGL() = default; CachedSurfaceView* TextureCacheOpenGL::TryFastGetSurfaceView( - VAddr cpu_addr, u8* host_ptr, const SurfaceParams& params, bool preserve_contents, + VAddr cpu_addr, u8* host_ptr, const SurfaceParams& new_params, bool preserve_contents, const std::vector& overlaps) { if (overlaps.size() > 1) { - return nullptr; + return TryCopyAsViews(cpu_addr, host_ptr, new_params, overlaps); } const auto& old_surface{overlaps[0]}; const auto& old_params{old_surface->GetSurfaceParams()}; - const auto& new_params{params}; - if (old_params.GetTarget() == new_params.GetTarget() && old_params.GetDepth() == new_params.GetDepth() && old_params.GetDepth() == 1 && old_params.GetNumLevels() == new_params.GetNumLevels() && @@ -567,6 +565,54 @@ CachedSurfaceView* TextureCacheOpenGL::SurfaceCopy(VAddr cpu_addr, u8* host_ptr, return new_surface->GetView(cpu_addr, new_params); } +CachedSurfaceView* TextureCacheOpenGL::TryCopyAsViews(VAddr cpu_addr, u8* host_ptr, + const SurfaceParams& new_params, + const std::vector& overlaps) { + if (new_params.GetTarget() == SurfaceTarget::Texture1D || + new_params.GetTarget() == SurfaceTarget::Texture1DArray || + new_params.GetTarget() == SurfaceTarget::Texture3D) { + // Non-2D textures are not handled at the moment in this fast path. + return nullptr; + } + + CachedSurface* const new_surface{GetUncachedSurface(new_params)}; + // TODO(Rodrigo): Move this down + Register(new_surface, cpu_addr, host_ptr); + + // TODO(Rodrigo): Find a way to avoid heap allocations here. + std::vector views; + views.reserve(overlaps.size()); + for (const auto& overlap : overlaps) { + const auto view{ + new_surface->TryGetView(overlap->GetCpuAddr(), overlap->GetSurfaceParams())}; + if (!view) { + // TODO(Rodrigo): Remove this + Unregister(new_surface); + return nullptr; + } + views.push_back(view); + } + + // TODO(Rodrigo): It's possible that these method leaves some unloaded textures if the data has + // been uploaded to guest memory but not used as a surface previously. + for (std::size_t i = 0; i < overlaps.size(); ++i) { + const auto& overlap{overlaps[i]}; + const auto& view{views[i]}; + for (u32 overlap_level = 0; overlap_level < view->GetNumLevels(); ++overlap_level) { + const u32 super_level{view->GetBaseLevel() + overlap_level}; + glCopyImageSubData(overlap->GetTexture(), overlap->GetTarget(), overlap_level, 0, 0, 0, + new_surface->GetTexture(), new_surface->GetTarget(), super_level, 0, + 0, view->GetBaseLayer(), view->GetWidth(), view->GetHeight(), + view->GetNumLayers()); + } + } + + new_surface->MarkAsModified(true); + + // TODO(Rodrigo): Add an entry to directly get the superview + return new_surface->GetView(cpu_addr, new_params); +} + std::unique_ptr TextureCacheOpenGL::CreateSurface(const SurfaceParams& params) { return std::make_unique(params); } diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 80733ac36..86ad91dab 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -90,6 +90,34 @@ public: return params; } + u32 GetWidth() const { + return params.GetMipWidth(GetBaseLevel()); + } + + u32 GetHeight() const { + return params.GetMipHeight(GetBaseLevel()); + } + + u32 GetDepth() const { + return params.GetMipDepth(GetBaseLevel()); + } + + u32 GetBaseLayer() const { + return key.base_layer; + } + + u32 GetNumLayers() const { + return key.num_layers; + } + + u32 GetBaseLevel() const { + return key.base_level; + } + + u32 GetNumLevels() const { + return key.num_levels; + } + private: struct TextureView { OGLTexture texture; @@ -128,7 +156,8 @@ public: protected: CachedSurfaceView* TryFastGetSurfaceView(VAddr cpu_addr, u8* host_ptr, - const SurfaceParams& params, bool preserve_contents, + const SurfaceParams& new_params, + bool preserve_contents, const std::vector& overlaps); std::unique_ptr CreateSurface(const SurfaceParams& params); @@ -136,6 +165,9 @@ protected: private: CachedSurfaceView* SurfaceCopy(VAddr cpu_addr, u8* host_ptr, const SurfaceParams& new_params, CachedSurface* old_surface, const SurfaceParams& old_params); + + CachedSurfaceView* TryCopyAsViews(VAddr cpu_addr, u8* host_ptr, const SurfaceParams& new_params, + const std::vector& overlaps); }; } // namespace OpenGL diff --git a/src/video_core/texture_cache.cpp b/src/video_core/texture_cache.cpp index 1cfb9962f..2994312f4 100644 --- a/src/video_core/texture_cache.cpp +++ b/src/video_core/texture_cache.cpp @@ -316,7 +316,7 @@ std::size_t SurfaceParams::GetInnerMemorySize(bool as_host_size, bool layer_only size += GetInnerMipmapMemorySize(level, as_host_size, layer_only, uncompressed); } if (is_tiled && !as_host_size) { - //size = Common::AlignUp(size, Tegra::Texture::GetGOBSize() * block_height * block_depth); + size = Common::AlignUp(size, Tegra::Texture::GetGOBSize() * block_height * block_depth); } return size; } From 4b396f375c0d32b60595f224d06b1b63d6df6b0a Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 16 Apr 2019 20:01:07 -0300 Subject: [PATCH 005/113] gl_texture_cache: Minor changes --- .../renderer_opengl/gl_rasterizer.cpp | 8 +- .../renderer_opengl/gl_texture_cache.cpp | 50 ++--- .../renderer_opengl/gl_texture_cache.h | 20 +- src/video_core/texture_cache.cpp | 72 ++++--- src/video_core/texture_cache.h | 175 ++++++++++-------- 5 files changed, 185 insertions(+), 140 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 07c28357e..af63365a4 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -498,8 +498,8 @@ std::pair RasterizerOpenGL::ConfigureFramebuffers( color_surface->MarkAsModified(true); // Workaround for and issue in nvidia drivers // https://devtalk.nvidia.com/default/topic/776591/opengl/gl_framebuffer_srgb-functions-incorrectly/ - // state.framebuffer_srgb.enabled |= - // color_surface->GetSurfaceParams().srgb_conversion; + state.framebuffer_srgb.enabled |= + color_surface->GetSurfaceParams().GetSrgbConversion(); } fbkey.is_single_buffer = true; @@ -519,8 +519,8 @@ std::pair RasterizerOpenGL::ConfigureFramebuffers( // Enable sRGB only for supported formats // Workaround for and issue in nvidia drivers // https://devtalk.nvidia.com/default/topic/776591/opengl/gl_framebuffer_srgb-functions-incorrectly/ - // state.framebuffer_srgb.enabled |= - // color_surface->GetSurfaceParams().srgb_conversion; + state.framebuffer_srgb.enabled |= + color_surface->GetSurfaceParams().GetSrgbConversion(); } fbkey.color_attachments[index] = diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 6a6fe7cc4..da2d1e63a 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -8,6 +8,7 @@ #include "video_core/morton.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_texture_cache.h" +#include "video_core/renderer_opengl/utils.h" #include "video_core/texture_cache.h" #include "video_core/textures/convert.h" #include "video_core/textures/texture.h" @@ -285,6 +286,8 @@ void CachedSurface::LoadBuffer() { } void CachedSurface::FlushBufferImpl() { + LOG_CRITICAL(Render_OpenGL, "Flushing"); + if (!IsModified()) { return; } @@ -352,9 +355,6 @@ void CachedSurface::UploadTextureMipmap(u32 level) { if (is_compressed) { const auto image_size{static_cast(params.GetHostMipmapSize(level))}; - GLint expected_size; - glGetTextureLevelParameteriv(texture.handle, level, GL_TEXTURE_COMPRESSED_IMAGE_SIZE, - &expected_size); switch (params.GetTarget()) { case SurfaceTarget::Texture2D: glCompressedTextureSubImage2D(texture.handle, level, 0, 0, @@ -419,6 +419,10 @@ void CachedSurface::UploadTextureMipmap(u32 level) { } } +void CachedSurface::DecorateSurfaceName() { + LabelGLObject(GL_TEXTURE, texture.handle, GetGpuAddr()); +} + std::unique_ptr CachedSurface::CreateView(const ViewKey& view_key) { return std::make_unique(*this, view_key); } @@ -517,11 +521,13 @@ TextureCacheOpenGL::TextureCacheOpenGL(Core::System& system, TextureCacheOpenGL::~TextureCacheOpenGL() = default; -CachedSurfaceView* TextureCacheOpenGL::TryFastGetSurfaceView( - VAddr cpu_addr, u8* host_ptr, const SurfaceParams& new_params, bool preserve_contents, - const std::vector& overlaps) { +CachedSurfaceView* TextureCacheOpenGL::TryFastGetSurfaceView(GPUVAddr gpu_addr, VAddr cpu_addr, + u8* host_ptr, + const SurfaceParams& new_params, + bool preserve_contents, + const std::vector& overlaps) { if (overlaps.size() > 1) { - return TryCopyAsViews(cpu_addr, host_ptr, new_params, overlaps); + return TryCopyAsViews(gpu_addr, cpu_addr, host_ptr, new_params, overlaps); } const auto& old_surface{overlaps[0]}; @@ -530,18 +536,18 @@ CachedSurfaceView* TextureCacheOpenGL::TryFastGetSurfaceView( old_params.GetDepth() == new_params.GetDepth() && old_params.GetDepth() == 1 && old_params.GetNumLevels() == new_params.GetNumLevels() && old_params.GetPixelFormat() == new_params.GetPixelFormat()) { - return SurfaceCopy(cpu_addr, host_ptr, new_params, old_surface, old_params); + return SurfaceCopy(gpu_addr, cpu_addr, host_ptr, new_params, old_surface, old_params); } return nullptr; } -CachedSurfaceView* TextureCacheOpenGL::SurfaceCopy(VAddr cpu_addr, u8* host_ptr, +CachedSurfaceView* TextureCacheOpenGL::SurfaceCopy(GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, const SurfaceParams& new_params, - CachedSurface* old_surface, + const Surface& old_surface, const SurfaceParams& old_params) { - CachedSurface* const new_surface{GetUncachedSurface(new_params)}; - Register(new_surface, cpu_addr, host_ptr); + const auto new_surface{GetUncachedSurface(new_params)}; + Register(new_surface, gpu_addr, cpu_addr, host_ptr); const u32 min_width{ std::max(old_params.GetDefaultBlockWidth(), new_params.GetDefaultBlockWidth())}; @@ -562,12 +568,12 @@ CachedSurfaceView* TextureCacheOpenGL::SurfaceCopy(VAddr cpu_addr, u8* host_ptr, new_surface->MarkAsModified(true); // TODO(Rodrigo): Add an entry to directly get the superview - return new_surface->GetView(cpu_addr, new_params); + return new_surface->GetView(gpu_addr, new_params); } -CachedSurfaceView* TextureCacheOpenGL::TryCopyAsViews(VAddr cpu_addr, u8* host_ptr, - const SurfaceParams& new_params, - const std::vector& overlaps) { +CachedSurfaceView* TextureCacheOpenGL::TryCopyAsViews(GPUVAddr gpu_addr, VAddr cpu_addr, + u8* host_ptr, const SurfaceParams& new_params, + const std::vector& overlaps) { if (new_params.GetTarget() == SurfaceTarget::Texture1D || new_params.GetTarget() == SurfaceTarget::Texture1DArray || new_params.GetTarget() == SurfaceTarget::Texture3D) { @@ -575,16 +581,16 @@ CachedSurfaceView* TextureCacheOpenGL::TryCopyAsViews(VAddr cpu_addr, u8* host_p return nullptr; } - CachedSurface* const new_surface{GetUncachedSurface(new_params)}; + const auto new_surface{GetUncachedSurface(new_params)}; // TODO(Rodrigo): Move this down - Register(new_surface, cpu_addr, host_ptr); + Register(new_surface, gpu_addr, cpu_addr, host_ptr); // TODO(Rodrigo): Find a way to avoid heap allocations here. std::vector views; views.reserve(overlaps.size()); for (const auto& overlap : overlaps) { const auto view{ - new_surface->TryGetView(overlap->GetCpuAddr(), overlap->GetSurfaceParams())}; + new_surface->TryGetView(overlap->GetGpuAddr(), overlap->GetSurfaceParams())}; if (!view) { // TODO(Rodrigo): Remove this Unregister(new_surface); @@ -610,11 +616,11 @@ CachedSurfaceView* TextureCacheOpenGL::TryCopyAsViews(VAddr cpu_addr, u8* host_p new_surface->MarkAsModified(true); // TODO(Rodrigo): Add an entry to directly get the superview - return new_surface->GetView(cpu_addr, new_params); + return new_surface->GetView(gpu_addr, new_params); } -std::unique_ptr TextureCacheOpenGL::CreateSurface(const SurfaceParams& params) { - return std::make_unique(params); +Surface TextureCacheOpenGL::CreateSurface(const SurfaceParams& params) { + return std::make_unique(*this, params); } } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 86ad91dab..8705db74c 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -27,6 +27,7 @@ using VideoCore::Surface::SurfaceType; class CachedSurfaceView; class CachedSurface; +using Surface = std::shared_ptr; using TextureCacheBase = VideoCommon::TextureCacheContextless; class CachedSurface final : public VideoCommon::SurfaceBaseContextless { @@ -47,6 +48,8 @@ public: } protected: + void DecorateSurfaceName(); + std::unique_ptr CreateView(const ViewKey& view_key); void FlushBufferImpl(); @@ -65,7 +68,6 @@ private: OGLTexture texture; std::vector staging_buffer; - u8* host_ptr{}; }; class CachedSurfaceView final { @@ -155,19 +157,21 @@ public: ~TextureCacheOpenGL(); protected: - CachedSurfaceView* TryFastGetSurfaceView(VAddr cpu_addr, u8* host_ptr, + CachedSurfaceView* TryFastGetSurfaceView(GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, const SurfaceParams& new_params, bool preserve_contents, - const std::vector& overlaps); + const std::vector& overlaps); - std::unique_ptr CreateSurface(const SurfaceParams& params); + Surface CreateSurface(const SurfaceParams& params); private: - CachedSurfaceView* SurfaceCopy(VAddr cpu_addr, u8* host_ptr, const SurfaceParams& new_params, - CachedSurface* old_surface, const SurfaceParams& old_params); + CachedSurfaceView* SurfaceCopy(GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, + const SurfaceParams& new_params, const Surface& old_surface, + const SurfaceParams& old_params); - CachedSurfaceView* TryCopyAsViews(VAddr cpu_addr, u8* host_ptr, const SurfaceParams& new_params, - const std::vector& overlaps); + CachedSurfaceView* TryCopyAsViews(GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, + const SurfaceParams& new_params, + const std::vector& overlaps); }; } // namespace OpenGL diff --git a/src/video_core/texture_cache.cpp b/src/video_core/texture_cache.cpp index 2994312f4..b47ce6b98 100644 --- a/src/video_core/texture_cache.cpp +++ b/src/video_core/texture_cache.cpp @@ -32,12 +32,13 @@ SurfaceParams SurfaceParams::CreateForTexture(Core::System& system, const Tegra::Texture::FullTextureInfo& config) { SurfaceParams params; params.is_tiled = config.tic.IsTiled(); + params.srgb_conversion = config.tic.IsSrgbConversionEnabled(); params.block_width = params.is_tiled ? config.tic.BlockWidth() : 0, params.block_height = params.is_tiled ? config.tic.BlockHeight() : 0, params.block_depth = params.is_tiled ? config.tic.BlockDepth() : 0, params.tile_width_spacing = params.is_tiled ? (1 << config.tic.tile_width_spacing.Value()) : 1; - params.pixel_format = - PixelFormatFromTextureFormat(config.tic.format, config.tic.r_type.Value(), false); + params.pixel_format = PixelFormatFromTextureFormat(config.tic.format, config.tic.r_type.Value(), + params.srgb_conversion); params.component_type = ComponentTypeFromTexture(config.tic.r_type.Value()); params.type = GetFormatType(params.pixel_format); params.target = SurfaceTargetFromTextureType(config.tic.texture_type); @@ -62,6 +63,7 @@ SurfaceParams SurfaceParams::CreateForDepthBuffer( Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type) { SurfaceParams params; params.is_tiled = type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear; + params.srgb_conversion = false; params.block_width = 1 << std::min(block_width, 5U); params.block_height = 1 << std::min(block_height, 5U); params.block_depth = 1 << std::min(block_depth, 5U); @@ -85,6 +87,8 @@ SurfaceParams SurfaceParams::CreateForFramebuffer(Core::System& system, std::siz SurfaceParams params; params.is_tiled = config.memory_layout.type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear; + params.srgb_conversion = config.format == Tegra::RenderTargetFormat::BGRA8_SRGB || + config.format == Tegra::RenderTargetFormat::RGBA8_SRGB; params.block_width = 1 << config.memory_layout.block_width; params.block_height = 1 << config.memory_layout.block_height; params.block_depth = 1 << config.memory_layout.block_depth; @@ -113,6 +117,8 @@ SurfaceParams SurfaceParams::CreateForFermiCopySurface( const Tegra::Engines::Fermi2D::Regs::Surface& config) { SurfaceParams params{}; params.is_tiled = !config.linear; + params.srgb_conversion = config.format == Tegra::RenderTargetFormat::BGRA8_SRGB || + config.format == Tegra::RenderTargetFormat::RGBA8_SRGB; params.block_width = params.is_tiled ? std::min(config.BlockWidth(), 32U) : 0, params.block_height = params.is_tiled ? std::min(config.BlockHeight(), 32U) : 0, params.block_depth = params.is_tiled ? std::min(config.BlockDepth(), 32U) : 0, @@ -162,6 +168,7 @@ u32 SurfaceParams::GetMipBlockHeight(u32 level) const { if (level == 0) { return this->block_height; } + const u32 height{GetMipHeight(level)}; const u32 default_block_height{GetDefaultBlockHeight()}; const u32 blocks_in_y{(height + default_block_height - 1) / default_block_height}; @@ -173,10 +180,12 @@ u32 SurfaceParams::GetMipBlockHeight(u32 level) const { } u32 SurfaceParams::GetMipBlockDepth(u32 level) const { - if (level == 0) - return block_depth; - if (target != SurfaceTarget::Texture3D) + if (level == 0) { + return this->block_depth; + } + if (IsLayered()) { return 1; + } const u32 depth{GetMipDepth(level)}; u32 block_depth = 32; @@ -192,7 +201,7 @@ u32 SurfaceParams::GetMipBlockDepth(u32 level) const { std::size_t SurfaceParams::GetGuestMipmapLevelOffset(u32 level) const { std::size_t offset = 0; for (u32 i = 0; i < level; i++) { - offset += GetInnerMipmapMemorySize(i, false, IsLayered(), false); + offset += GetInnerMipmapMemorySize(i, false, false); } return offset; } @@ -200,21 +209,33 @@ std::size_t SurfaceParams::GetGuestMipmapLevelOffset(u32 level) const { std::size_t SurfaceParams::GetHostMipmapLevelOffset(u32 level) const { std::size_t offset = 0; for (u32 i = 0; i < level; i++) { - offset += GetInnerMipmapMemorySize(i, true, false, false); + offset += GetInnerMipmapMemorySize(i, true, false) * GetNumLayers(); } return offset; } std::size_t SurfaceParams::GetHostMipmapSize(u32 level) const { - return GetInnerMipmapMemorySize(level, true, true, false) * GetNumLayers(); + return GetInnerMipmapMemorySize(level, true, false) * GetNumLayers(); } std::size_t SurfaceParams::GetGuestLayerSize() const { - return GetInnerMemorySize(false, true, false); + return GetLayerSize(false, false); +} + +std::size_t SurfaceParams::GetLayerSize(bool as_host_size, bool uncompressed) const { + std::size_t size = 0; + for (u32 level = 0; level < num_levels; ++level) { + size += GetInnerMipmapMemorySize(level, as_host_size, uncompressed); + } + if (is_tiled && IsLayered()) { + return Common::AlignUp(size, Tegra::Texture::GetGOBSize() * block_height * block_depth); + } + return size; } std::size_t SurfaceParams::GetHostLayerSize(u32 level) const { - return GetInnerMipmapMemorySize(level, true, IsLayered(), false); + ASSERT(target != SurfaceTarget::Texture3D); + return GetInnerMipmapMemorySize(level, true, false); } u32 SurfaceParams::GetDefaultBlockWidth() const { @@ -273,15 +294,6 @@ bool SurfaceParams::IsPixelFormatZeta() const { } void SurfaceParams::CalculateCachedValues() { - guest_size_in_bytes = GetInnerMemorySize(false, false, false); - - // ASTC is uncompressed in software, in emulated as RGBA8 - if (IsPixelFormatASTC(pixel_format)) { - host_size_in_bytes = static_cast(width * height * depth) * 4ULL; - } else { - host_size_in_bytes = GetInnerMemorySize(true, false, false); - } - switch (target) { case SurfaceTarget::Texture1D: case SurfaceTarget::Texture2D: @@ -297,28 +309,30 @@ void SurfaceParams::CalculateCachedValues() { default: UNREACHABLE(); } + + guest_size_in_bytes = GetInnerMemorySize(false, false, false); + + // ASTC is uncompressed in software, in emulated as RGBA8 + if (IsPixelFormatASTC(pixel_format)) { + host_size_in_bytes = static_cast(width * height * depth * 4U); + } else { + host_size_in_bytes = GetInnerMemorySize(true, false, false); + } } -std::size_t SurfaceParams::GetInnerMipmapMemorySize(u32 level, bool as_host_size, bool layer_only, +std::size_t SurfaceParams::GetInnerMipmapMemorySize(u32 level, bool as_host_size, bool uncompressed) const { const bool tiled{as_host_size ? false : is_tiled}; const u32 width{GetMipmapSize(uncompressed, GetMipWidth(level), GetDefaultBlockWidth())}; const u32 height{GetMipmapSize(uncompressed, GetMipHeight(level), GetDefaultBlockHeight())}; - const u32 depth{layer_only ? 1U : GetMipDepth(level)}; + const u32 depth{target == SurfaceTarget::Texture3D ? GetMipDepth(level) : 1U}; return Tegra::Texture::CalculateSize(tiled, GetBytesPerPixel(), width, height, depth, GetMipBlockHeight(level), GetMipBlockDepth(level)); } std::size_t SurfaceParams::GetInnerMemorySize(bool as_host_size, bool layer_only, bool uncompressed) const { - std::size_t size = 0; - for (u32 level = 0; level < num_levels; ++level) { - size += GetInnerMipmapMemorySize(level, as_host_size, layer_only, uncompressed); - } - if (is_tiled && !as_host_size) { - size = Common::AlignUp(size, Tegra::Texture::GetGOBSize() * block_height * block_depth); - } - return size; + return GetLayerSize(as_host_size, uncompressed) * (layer_only ? 1U : num_layers); } std::map> SurfaceParams::CreateViewOffsetMap() const { diff --git a/src/video_core/texture_cache.h b/src/video_core/texture_cache.h index 9fd5f074e..0e289d378 100644 --- a/src/video_core/texture_cache.h +++ b/src/video_core/texture_cache.h @@ -53,6 +53,7 @@ protected: HasheableSurfaceParams() = default; bool is_tiled; + bool srgb_conversion; u32 block_width; u32 block_height; u32 block_depth; @@ -92,6 +93,10 @@ public: return is_tiled; } + bool GetSrgbConversion() const { + return srgb_conversion; + } + u32 GetBlockWidth() const { return block_width; } @@ -211,13 +216,15 @@ private: /// Calculates values that can be deduced from HasheableSurfaceParams. void CalculateCachedValues(); - /// Returns the size of a given mipmap level. - std::size_t GetInnerMipmapMemorySize(u32 level, bool as_host_size, bool layer_only, - bool uncompressed) const; + /// Returns the size of a given mipmap level inside a layer. + std::size_t GetInnerMipmapMemorySize(u32 level, bool as_host_size, bool uncompressed) const; /// Returns the size of all mipmap levels and aligns as needed. std::size_t GetInnerMemorySize(bool as_host_size, bool layer_only, bool uncompressed) const; + /// Returns the size of a layer + std::size_t GetLayerSize(bool as_host_size, bool uncompressed) const; + /// Returns true if the passed view width and height match the size of this params in a given /// mipmap level. bool IsDimensionValid(const SurfaceParams& view_params, u32 level) const; @@ -277,13 +284,13 @@ public: virtual TExecutionContext UploadTexture(TExecutionContext exctx) = 0; - TView* TryGetView(VAddr view_addr, const SurfaceParams& view_params) { - if (view_addr < cpu_addr || !params.IsFamiliar(view_params)) { + TView* TryGetView(GPUVAddr view_addr, const SurfaceParams& view_params) { + if (view_addr < gpu_addr || !params.IsFamiliar(view_params)) { // It can't be a view if it's in a prior address. return {}; } - const auto relative_offset{static_cast(view_addr - cpu_addr)}; + const auto relative_offset{static_cast(view_addr - gpu_addr)}; const auto it{view_offset_map.find(relative_offset)}; if (it == view_offset_map.end()) { // Couldn't find an aligned view. @@ -298,6 +305,11 @@ public: return GetView(layer, view_params.GetNumLayers(), level, view_params.GetNumLevels()); } + GPUVAddr GetGpuAddr() const { + ASSERT(is_registered); + return gpu_addr; + } + VAddr GetCpuAddr() const { ASSERT(is_registered); return cpu_addr; @@ -325,22 +337,20 @@ public: return params; } - TView* GetView(VAddr view_addr, const SurfaceParams& view_params) { + TView* GetView(GPUVAddr view_addr, const SurfaceParams& view_params) { TView* view{TryGetView(view_addr, view_params)}; ASSERT(view != nullptr); return view; } - void Register(VAddr cpu_addr_, u8* host_ptr_) { + void Register(GPUVAddr gpu_addr_, VAddr cpu_addr_, u8* host_ptr_) { ASSERT(!is_registered); is_registered = true; + gpu_addr = gpu_addr_; cpu_addr = cpu_addr_; host_ptr = host_ptr_; cache_addr = ToCacheAddr(host_ptr_); - } - - void Register(VAddr cpu_addr_) { - Register(cpu_addr_, Memory::GetPointer(cpu_addr_)); + DecorateSurfaceName(); } void Unregister() { @@ -358,6 +368,8 @@ protected: ~SurfaceBase() = default; + virtual void DecorateSurfaceName() = 0; + virtual std::unique_ptr CreateView(const ViewKey& view_key) = 0; bool IsModified() const { @@ -379,6 +391,7 @@ private: const std::map> view_offset_map; + GPUVAddr gpu_addr{}; VAddr cpu_addr{}; u8* host_ptr{}; CacheAddr cache_addr{}; @@ -392,12 +405,12 @@ class TextureCache { static_assert(std::is_trivially_copyable_v); using ResultType = std::tuple; - using IntervalMap = boost::icl::interval_map>; + using IntervalMap = boost::icl::interval_map>>; using IntervalType = typename IntervalMap::interval_type; public: void InvalidateRegion(CacheAddr addr, std::size_t size) { - for (TSurface* surface : GetSurfacesInRegion(addr, size)) { + for (const auto& surface : GetSurfacesInRegion(addr, size)) { if (!surface->IsRegistered()) { // Skip duplicates continue; @@ -408,32 +421,25 @@ public: ResultType GetTextureSurface(TExecutionContext exctx, const Tegra::Texture::FullTextureInfo& config) { - auto& memory_manager{system.GPU().MemoryManager()}; - const auto cpu_addr{memory_manager.GpuToCpuAddress(config.tic.Address())}; - if (!cpu_addr) { + const auto gpu_addr{config.tic.Address()}; + if (!gpu_addr) { return {{}, exctx}; } const auto params{SurfaceParams::CreateForTexture(system, config)}; - return GetSurfaceView(exctx, *cpu_addr, params, true); + return GetSurfaceView(exctx, gpu_addr, params, true); } ResultType GetDepthBufferSurface(TExecutionContext exctx, bool preserve_contents) { const auto& regs{system.GPU().Maxwell3D().regs}; - if (!regs.zeta.Address() || !regs.zeta_enable) { + const auto gpu_addr{regs.zeta.Address()}; + if (!gpu_addr || !regs.zeta_enable) { return {{}, exctx}; } - - auto& memory_manager{system.GPU().MemoryManager()}; - const auto cpu_addr{memory_manager.GpuToCpuAddress(regs.zeta.Address())}; - if (!cpu_addr) { - return {{}, exctx}; - } - const auto depth_params{SurfaceParams::CreateForDepthBuffer( system, regs.zeta_width, regs.zeta_height, regs.zeta.format, regs.zeta.memory_layout.block_width, regs.zeta.memory_layout.block_height, regs.zeta.memory_layout.block_depth, regs.zeta.memory_layout.type)}; - return GetSurfaceView(exctx, *cpu_addr, depth_params, preserve_contents); + return GetSurfaceView(exctx, gpu_addr, depth_params, preserve_contents); } ResultType GetColorBufferSurface(TExecutionContext exctx, std::size_t index, @@ -448,25 +454,23 @@ public: auto& memory_manager{system.GPU().MemoryManager()}; const auto& config{system.GPU().Maxwell3D().regs.rt[index]}; - const auto cpu_addr{memory_manager.GpuToCpuAddress( - config.Address() + config.base_layer * config.layer_stride * sizeof(u32))}; - if (!cpu_addr) { + const auto gpu_addr{config.Address() + + config.base_layer * config.layer_stride * sizeof(u32)}; + if (!gpu_addr) { return {{}, exctx}; } - return GetSurfaceView(exctx, *cpu_addr, SurfaceParams::CreateForFramebuffer(system, index), + return GetSurfaceView(exctx, gpu_addr, SurfaceParams::CreateForFramebuffer(system, index), preserve_contents); } ResultType GetFermiSurface(TExecutionContext exctx, const Tegra::Engines::Fermi2D::Regs::Surface& config) { - const auto cpu_addr{system.GPU().MemoryManager().GpuToCpuAddress(config.Address())}; - ASSERT(cpu_addr); - return GetSurfaceView(exctx, *cpu_addr, SurfaceParams::CreateForFermiCopySurface(config), - true); + return GetSurfaceView(exctx, config.Address(), + SurfaceParams::CreateForFermiCopySurface(config), true); } - TSurface* TryFindFramebufferSurface(const u8* host_ptr) const { + std::shared_ptr TryFindFramebufferSurface(const u8* host_ptr) const { const auto it{registered_surfaces.find(ToCacheAddr(host_ptr))}; return it != registered_surfaces.end() ? *it->second.begin() : nullptr; } @@ -477,56 +481,68 @@ protected: ~TextureCache() = default; - virtual ResultType TryFastGetSurfaceView(TExecutionContext exctx, VAddr cpu_addr, u8* host_ptr, - const SurfaceParams& params, bool preserve_contents, - const std::vector& overlaps) = 0; + virtual ResultType TryFastGetSurfaceView( + TExecutionContext exctx, GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, + const SurfaceParams& params, bool preserve_contents, + const std::vector>& overlaps) = 0; - virtual std::unique_ptr CreateSurface(const SurfaceParams& params) = 0; + virtual std::shared_ptr CreateSurface(const SurfaceParams& params) = 0; - void Register(TSurface* surface, VAddr cpu_addr, u8* host_ptr) { - surface->Register(cpu_addr, host_ptr); + void Register(std::shared_ptr surface, GPUVAddr gpu_addr, VAddr cpu_addr, + u8* host_ptr) { + surface->Register(gpu_addr, cpu_addr, host_ptr); registered_surfaces.add({GetSurfaceInterval(surface), {surface}}); rasterizer.UpdatePagesCachedCount(surface->GetCpuAddr(), surface->GetSizeInBytes(), 1); } - void Unregister(TSurface* surface) { + void Unregister(std::shared_ptr surface) { registered_surfaces.subtract({GetSurfaceInterval(surface), {surface}}); rasterizer.UpdatePagesCachedCount(surface->GetCpuAddr(), surface->GetSizeInBytes(), -1); surface->Unregister(); } - TSurface* GetUncachedSurface(const SurfaceParams& params) { - if (TSurface* surface = TryGetReservedSurface(params); surface) + std::shared_ptr GetUncachedSurface(const SurfaceParams& params) { + if (const auto surface = TryGetReservedSurface(params); surface) return surface; // No reserved surface available, create a new one and reserve it auto new_surface{CreateSurface(params)}; - TSurface* surface{new_surface.get()}; - ReserveSurface(params, std::move(new_surface)); - return surface; + ReserveSurface(params, new_surface); + return new_surface; } Core::System& system; private: - ResultType GetSurfaceView(TExecutionContext exctx, VAddr cpu_addr, const SurfaceParams& params, - bool preserve_contents) { - const auto host_ptr{Memory::GetPointer(cpu_addr)}; + ResultType GetSurfaceView(TExecutionContext exctx, GPUVAddr gpu_addr, + const SurfaceParams& params, bool preserve_contents) { + auto& memory_manager{system.GPU().MemoryManager()}; + const auto cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)}; + DEBUG_ASSERT(cpu_addr); + + const auto host_ptr{memory_manager.GetPointer(gpu_addr)}; const auto cache_addr{ToCacheAddr(host_ptr)}; const auto overlaps{GetSurfacesInRegion(cache_addr, params.GetGuestSizeInBytes())}; if (overlaps.empty()) { - return LoadSurfaceView(exctx, cpu_addr, host_ptr, params, preserve_contents); + return LoadSurfaceView(exctx, gpu_addr, *cpu_addr, host_ptr, params, preserve_contents); } if (overlaps.size() == 1) { - if (TView* view = overlaps[0]->TryGetView(cpu_addr, params); view) + if (TView* view = overlaps[0]->TryGetView(gpu_addr, params); view) { return {view, exctx}; + } } TView* fast_view; - std::tie(fast_view, exctx) = - TryFastGetSurfaceView(exctx, cpu_addr, host_ptr, params, preserve_contents, overlaps); + std::tie(fast_view, exctx) = TryFastGetSurfaceView(exctx, gpu_addr, *cpu_addr, host_ptr, + params, preserve_contents, overlaps); - for (TSurface* surface : overlaps) { + if (!fast_view) { + std::sort(overlaps.begin(), overlaps.end(), [](const auto& lhs, const auto& rhs) { + return lhs->GetModificationTick() < rhs->GetModificationTick(); + }); + } + + for (const auto& surface : overlaps) { if (!fast_view) { // Flush even when we don't care about the contents, to preserve memory not written // by the new surface. @@ -539,57 +555,59 @@ private: return {fast_view, exctx}; } - return LoadSurfaceView(exctx, cpu_addr, host_ptr, params, preserve_contents); + return LoadSurfaceView(exctx, gpu_addr, *cpu_addr, host_ptr, params, preserve_contents); } - ResultType LoadSurfaceView(TExecutionContext exctx, VAddr cpu_addr, u8* host_ptr, - const SurfaceParams& params, bool preserve_contents) { - TSurface* new_surface{GetUncachedSurface(params)}; - Register(new_surface, cpu_addr, host_ptr); + ResultType LoadSurfaceView(TExecutionContext exctx, GPUVAddr gpu_addr, VAddr cpu_addr, + u8* host_ptr, const SurfaceParams& params, bool preserve_contents) { + const auto new_surface{GetUncachedSurface(params)}; + Register(new_surface, gpu_addr, cpu_addr, host_ptr); if (preserve_contents) { exctx = LoadSurface(exctx, new_surface); } - return {new_surface->GetView(cpu_addr, params), exctx}; + return {new_surface->GetView(gpu_addr, params), exctx}; } - TExecutionContext LoadSurface(TExecutionContext exctx, TSurface* surface) { + TExecutionContext LoadSurface(TExecutionContext exctx, + const std::shared_ptr& surface) { surface->LoadBuffer(); exctx = surface->UploadTexture(exctx); surface->MarkAsModified(false); return exctx; } - std::vector GetSurfacesInRegion(CacheAddr cache_addr, std::size_t size) const { + std::vector> GetSurfacesInRegion(CacheAddr cache_addr, + std::size_t size) const { if (size == 0) { return {}; } const IntervalType interval{cache_addr, cache_addr + size}; - std::vector surfaces; + std::vector> surfaces; for (auto& pair : boost::make_iterator_range(registered_surfaces.equal_range(interval))) { surfaces.push_back(*pair.second.begin()); } return surfaces; } - void ReserveSurface(const SurfaceParams& params, std::unique_ptr surface) { + void ReserveSurface(const SurfaceParams& params, std::shared_ptr surface) { surface_reserve[params].push_back(std::move(surface)); } - TSurface* TryGetReservedSurface(const SurfaceParams& params) { + std::shared_ptr TryGetReservedSurface(const SurfaceParams& params) { auto search{surface_reserve.find(params)}; if (search == surface_reserve.end()) { return {}; } for (auto& surface : search->second) { if (!surface->IsRegistered()) { - return surface.get(); + return surface; } } return {}; } - IntervalType GetSurfaceInterval(TSurface* surface) const { + IntervalType GetSurfaceInterval(std::shared_ptr surface) const { return IntervalType::right_open(surface->GetCacheAddr(), surface->GetCacheAddr() + surface->GetSizeInBytes()); } @@ -601,7 +619,7 @@ private: /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have /// previously been used. This is to prevent surfaces from being constantly created and /// destroyed when used with different surface parameters. - std::unordered_map>> surface_reserve; + std::unordered_map>> surface_reserve; }; struct DummyExecutionContext {}; @@ -631,7 +649,7 @@ public: return RemoveContext(Base::GetFermiSurface({}, config)); } - TSurface* TryFindFramebufferSurface(const u8* host_ptr) const { + std::shared_ptr TryFindFramebufferSurface(const u8* host_ptr) const { return Base::TryFindFramebufferSurface(host_ptr); } @@ -640,15 +658,18 @@ protected: VideoCore::RasterizerInterface& rasterizer) : TextureCache{system, rasterizer} {} - virtual TView* TryFastGetSurfaceView(VAddr cpu_addr, u8* host_ptr, const SurfaceParams& params, - bool preserve_contents, - const std::vector& overlaps) = 0; + virtual TView* TryFastGetSurfaceView( + GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, const SurfaceParams& params, + bool preserve_contents, const std::vector>& overlaps) = 0; private: std::tuple TryFastGetSurfaceView( - DummyExecutionContext, VAddr cpu_addr, u8* host_ptr, const SurfaceParams& params, - bool preserve_contents, const std::vector& overlaps) { - return {TryFastGetSurfaceView(cpu_addr, host_ptr, params, preserve_contents, overlaps), {}}; + DummyExecutionContext, GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, + const SurfaceParams& params, bool preserve_contents, + const std::vector>& overlaps) { + return {TryFastGetSurfaceView(gpu_addr, cpu_addr, host_ptr, params, preserve_contents, + overlaps), + {}}; } TView* RemoveContext(std::tuple return_value) { From 2787a0c2875c6c686a50a03e446099124824b17f Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 20 Apr 2019 20:01:26 -0300 Subject: [PATCH 006/113] texture_cache: Flush 3D textures in the order they are drawn --- .../renderer_opengl/gl_texture_cache.cpp | 5 ++- .../renderer_opengl/gl_texture_cache.h | 6 ++- .../renderer_opengl/renderer_opengl.cpp | 1 - src/video_core/texture_cache.cpp | 11 +++-- src/video_core/texture_cache.h | 40 ++++++++++++++----- 5 files changed, 44 insertions(+), 19 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index da2d1e63a..362f4019c 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -235,8 +235,9 @@ void SwizzleFunc(MortonSwizzleMode mode, u8* memory, const SurfaceParams& params } // Anonymous namespace -CachedSurface::CachedSurface(const SurfaceParams& params) - : VideoCommon::SurfaceBaseContextless{params} { +CachedSurface::CachedSurface(TextureCacheOpenGL& texture_cache, const SurfaceParams& params) + : VideoCommon::SurfaceBaseContextless{texture_cache, + params} { const auto& tuple{GetFormatTuple(params.GetPixelFormat(), params.GetComponentType())}; internal_format = tuple.internal_format; format = tuple.format; diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 8705db74c..e6448c6f8 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -26,15 +26,17 @@ using VideoCore::Surface::SurfaceType; class CachedSurfaceView; class CachedSurface; +class TextureCacheOpenGL; using Surface = std::shared_ptr; using TextureCacheBase = VideoCommon::TextureCacheContextless; -class CachedSurface final : public VideoCommon::SurfaceBaseContextless { +class CachedSurface final + : public VideoCommon::SurfaceBaseContextless { friend CachedSurfaceView; public: - explicit CachedSurface(const SurfaceParams& params); + explicit CachedSurface(TextureCacheOpenGL& texture_cache, const SurfaceParams& params); ~CachedSurface(); void LoadBuffer(); diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 710bf8303..aafd6f31b 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -460,7 +460,6 @@ static void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum switch (severity) { case GL_DEBUG_SEVERITY_HIGH: LOG_CRITICAL(Render_OpenGL, format, str_source, str_type, id, message); - __debugbreak(); break; case GL_DEBUG_SEVERITY_MEDIUM: LOG_WARNING(Render_OpenGL, format, str_source, str_type, id, message); diff --git a/src/video_core/texture_cache.cpp b/src/video_core/texture_cache.cpp index b47ce6b98..b78a7d951 100644 --- a/src/video_core/texture_cache.cpp +++ b/src/video_core/texture_cache.cpp @@ -154,8 +154,8 @@ bool SurfaceParams::IsLayered() const { switch (target) { case SurfaceTarget::Texture1DArray: case SurfaceTarget::Texture2DArray: - case SurfaceTarget::TextureCubeArray: case SurfaceTarget::TextureCubemap: + case SurfaceTarget::TextureCubeArray: return true; default: return false; @@ -192,9 +192,11 @@ u32 SurfaceParams::GetMipBlockDepth(u32 level) const { while (block_depth > 1 && depth * 2 <= block_depth) { block_depth >>= 1; } + if (block_depth == 32 && GetMipBlockHeight(level) >= 4) { return 16; } + return block_depth; } @@ -227,7 +229,7 @@ std::size_t SurfaceParams::GetLayerSize(bool as_host_size, bool uncompressed) co for (u32 level = 0; level < num_levels; ++level) { size += GetInnerMipmapMemorySize(level, as_host_size, uncompressed); } - if (is_tiled && IsLayered()) { + if (is_tiled && (IsLayered() || target == SurfaceTarget::Texture3D)) { return Common::AlignUp(size, Tegra::Texture::GetGOBSize() * block_height * block_depth); } return size; @@ -312,9 +314,10 @@ void SurfaceParams::CalculateCachedValues() { guest_size_in_bytes = GetInnerMemorySize(false, false, false); - // ASTC is uncompressed in software, in emulated as RGBA8 if (IsPixelFormatASTC(pixel_format)) { - host_size_in_bytes = static_cast(width * height * depth * 4U); + // ASTC is uncompressed in software, in emulated as RGBA8 + host_size_in_bytes = static_cast(width) * static_cast(height) * + static_cast(depth) * 4ULL; } else { host_size_in_bytes = GetInnerMemorySize(true, false, false); } diff --git a/src/video_core/texture_cache.h b/src/video_core/texture_cache.h index 0e289d378..f22e8e776 100644 --- a/src/video_core/texture_cache.h +++ b/src/video_core/texture_cache.h @@ -273,7 +273,7 @@ struct hash { namespace VideoCommon { -template +template class SurfaceBase { static_assert(std::is_trivially_copyable_v); @@ -331,6 +331,9 @@ public: void MarkAsModified(bool is_modified_) { is_modified = is_modified_; + if (is_modified_) { + modification_tick = texture_cache.Tick(); + } } const SurfaceParams& GetSurfaceParams() const { @@ -358,13 +361,18 @@ public: is_registered = false; } + u64 GetModificationTick() const { + return modification_tick; + } + bool IsRegistered() const { return is_registered; } protected: - explicit SurfaceBase(const SurfaceParams& params) - : params{params}, view_offset_map{params.CreateViewOffsetMap()} {} + explicit SurfaceBase(TTextureCache& texture_cache, const SurfaceParams& params) + : params{params}, texture_cache{texture_cache}, view_offset_map{ + params.CreateViewOffsetMap()} {} ~SurfaceBase() = default; @@ -389,12 +397,14 @@ private: return view.get(); } + TTextureCache& texture_cache; const std::map> view_offset_map; GPUVAddr gpu_addr{}; VAddr cpu_addr{}; u8* host_ptr{}; CacheAddr cache_addr{}; + u64 modification_tick{}; bool is_modified{}; bool is_registered{}; std::unordered_map> views; @@ -475,6 +485,10 @@ public: return it != registered_surfaces.end() ? *it->second.begin() : nullptr; } + u64 Tick() { + return ++ticks; + } + protected: TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer) : system{system}, rasterizer{rasterizer} {} @@ -521,7 +535,7 @@ private: const auto host_ptr{memory_manager.GetPointer(gpu_addr)}; const auto cache_addr{ToCacheAddr(host_ptr)}; - const auto overlaps{GetSurfacesInRegion(cache_addr, params.GetGuestSizeInBytes())}; + auto overlaps{GetSurfacesInRegion(cache_addr, params.GetGuestSizeInBytes())}; if (overlaps.empty()) { return LoadSurfaceView(exctx, gpu_addr, *cpu_addr, host_ptr, params, preserve_contents); } @@ -544,8 +558,8 @@ private: for (const auto& surface : overlaps) { if (!fast_view) { - // Flush even when we don't care about the contents, to preserve memory not written - // by the new surface. + // Flush even when we don't care about the contents, to preserve memory not + // written by the new surface. exctx = surface->FlushBuffer(exctx); } Unregister(surface); @@ -614,6 +628,8 @@ private: VideoCore::RasterizerInterface& rasterizer; + u64 ticks{}; + IntervalMap registered_surfaces; /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have @@ -653,6 +669,10 @@ public: return Base::TryFindFramebufferSurface(host_ptr); } + u64 Tick() { + return Base::Tick(); + } + protected: explicit TextureCacheContextless(Core::System& system, VideoCore::RasterizerInterface& rasterizer) @@ -678,8 +698,8 @@ private: } }; -template -class SurfaceBaseContextless : public SurfaceBase { +template +class SurfaceBaseContextless : public SurfaceBase { public: DummyExecutionContext FlushBuffer(DummyExecutionContext) { FlushBufferImpl(); @@ -692,8 +712,8 @@ public: } protected: - explicit SurfaceBaseContextless(const SurfaceParams& params) - : SurfaceBase{params} {} + explicit SurfaceBaseContextless(TTextureCache& texture_cache, const SurfaceParams& params) + : SurfaceBase{texture_cache, params} {} virtual void FlushBufferImpl() = 0; From 5f3aacdc3760f0e9e0daeda3ee4c55e42fc9397e Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 24 Apr 2019 02:45:03 -0300 Subject: [PATCH 007/113] texture_cache: Move staging buffer into a generic implementation --- .../renderer_opengl/gl_texture_cache.cpp | 114 +------------ .../renderer_opengl/gl_texture_cache.h | 7 +- src/video_core/texture_cache.cpp | 110 +++++++++++- src/video_core/texture_cache.h | 161 ++++++++++-------- 4 files changed, 211 insertions(+), 181 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 362f4019c..3e2a1f53c 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -15,7 +15,6 @@ namespace OpenGL { -using Tegra::Texture::ConvertFromGuestToHost; using Tegra::Texture::SwizzleSource; using VideoCore::MortonSwizzleMode; @@ -207,32 +206,6 @@ OGLTexture CreateTexture(const SurfaceParams& params, GLenum target, GLenum inte return texture; } -void SwizzleFunc(MortonSwizzleMode mode, u8* memory, const SurfaceParams& params, u8* buffer, - u32 level) { - const u32 width{params.GetMipWidth(level)}; - const u32 height{params.GetMipHeight(level)}; - const u32 block_height{params.GetMipBlockHeight(level)}; - const u32 block_depth{params.GetMipBlockDepth(level)}; - - std::size_t guest_offset{params.GetGuestMipmapLevelOffset(level)}; - if (params.IsLayered()) { - std::size_t host_offset{0}; - const std::size_t guest_stride = params.GetGuestLayerSize(); - const std::size_t host_stride = params.GetHostLayerSize(level); - for (u32 layer = 0; layer < params.GetNumLayers(); layer++) { - MortonSwizzle(mode, params.GetPixelFormat(), width, block_height, height, block_depth, - 1, params.GetTileWidthSpacing(), buffer + host_offset, - memory + guest_offset); - guest_offset += guest_stride; - host_offset += host_stride; - } - } else { - MortonSwizzle(mode, params.GetPixelFormat(), width, block_height, height, block_depth, - params.GetMipDepth(level), params.GetTileWidthSpacing(), buffer, - memory + guest_offset); - } -} - } // Anonymous namespace CachedSurface::CachedSurface(TextureCacheOpenGL& texture_cache, const SurfaceParams& params) @@ -245,54 +218,11 @@ CachedSurface::CachedSurface(TextureCacheOpenGL& texture_cache, const SurfacePar is_compressed = tuple.compressed; target = GetTextureTarget(params); texture = CreateTexture(params, target, internal_format); - staging_buffer.resize(params.GetHostSizeInBytes()); } CachedSurface::~CachedSurface() = default; -void CachedSurface::LoadBuffer() { - if (params.IsTiled()) { - ASSERT_MSG(params.GetBlockWidth() == 1, "Block width is defined as {} on texture target {}", - params.GetBlockWidth(), static_cast(params.GetTarget())); - for (u32 level = 0; level < params.GetNumLevels(); ++level) { - u8* const buffer{staging_buffer.data() + params.GetHostMipmapLevelOffset(level)}; - SwizzleFunc(MortonSwizzleMode::MortonToLinear, GetHostPtr(), params, buffer, level); - } - } else { - ASSERT_MSG(params.GetNumLevels() == 1, "Linear mipmap loading is not implemented"); - const u32 bpp{GetFormatBpp(params.GetPixelFormat()) / CHAR_BIT}; - const u32 block_width{VideoCore::Surface::GetDefaultBlockWidth(params.GetPixelFormat())}; - const u32 block_height{VideoCore::Surface::GetDefaultBlockHeight(params.GetPixelFormat())}; - const u32 width{(params.GetWidth() + block_width - 1) / block_width}; - const u32 height{(params.GetHeight() + block_height - 1) / block_height}; - const u32 copy_size{width * bpp}; - if (params.GetPitch() == copy_size) { - std::memcpy(staging_buffer.data(), GetHostPtr(), params.GetHostSizeInBytes()); - } else { - const u8* start{GetHostPtr()}; - u8* write_to{staging_buffer.data()}; - for (u32 h = height; h > 0; --h) { - std::memcpy(write_to, start, copy_size); - start += params.GetPitch(); - write_to += copy_size; - } - } - } - - for (u32 level = 0; level < params.GetNumLevels(); ++level) { - ConvertFromGuestToHost(staging_buffer.data() + params.GetHostMipmapLevelOffset(level), - params.GetPixelFormat(), params.GetMipWidth(level), - params.GetMipHeight(level), params.GetMipDepth(level), true, true); - } -} - -void CachedSurface::FlushBufferImpl() { - LOG_CRITICAL(Render_OpenGL, "Flushing"); - - if (!IsModified()) { - return; - } - +void CachedSurface::DownloadTextureImpl() { // TODO(Rodrigo): Optimize alignment glPixelStorei(GL_PACK_ALIGNMENT, 1); SCOPE_EXIT({ glPixelStorei(GL_PACK_ROW_LENGTH, 0); }); @@ -300,60 +230,30 @@ void CachedSurface::FlushBufferImpl() { for (u32 level = 0; level < params.GetNumLevels(); ++level) { glPixelStorei(GL_PACK_ROW_LENGTH, static_cast(params.GetMipWidth(level))); if (is_compressed) { - glGetCompressedTextureImage( - texture.handle, level, static_cast(params.GetHostMipmapSize(level)), - staging_buffer.data() + params.GetHostMipmapLevelOffset(level)); + glGetCompressedTextureImage(texture.handle, level, + static_cast(params.GetHostMipmapSize(level)), + GetStagingBufferLevelData(level)); } else { glGetTextureImage(texture.handle, level, format, type, static_cast(params.GetHostMipmapSize(level)), - staging_buffer.data() + params.GetHostMipmapLevelOffset(level)); + GetStagingBufferLevelData(level)); } } - - if (params.IsTiled()) { - ASSERT_MSG(params.GetBlockWidth() == 1, "Block width is defined as {}", - params.GetBlockWidth()); - for (u32 level = 0; level < params.GetNumLevels(); ++level) { - u8* const buffer = staging_buffer.data() + params.GetHostMipmapLevelOffset(level); - SwizzleFunc(MortonSwizzleMode::LinearToMorton, GetHostPtr(), params, buffer, level); - } - } else { - UNIMPLEMENTED(); - /* - ASSERT(params.GetTarget() == SurfaceTarget::Texture2D); - ASSERT(params.GetNumLevels() == 1); - - const u32 bpp{params.GetFormatBpp() / 8}; - const u32 copy_size{params.GetWidth() * bpp}; - if (params.GetPitch() == copy_size) { - std::memcpy(host_ptr, staging_buffer.data(), GetSizeInBytes()); - } else { - u8* start{host_ptr}; - const u8* read_to{staging_buffer.data()}; - for (u32 h = params.GetHeight(); h > 0; --h) { - std::memcpy(start, read_to, copy_size); - start += params.GetPitch(); - read_to += copy_size; - } - } - */ - } } void CachedSurface::UploadTextureImpl() { + SCOPE_EXIT({ glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); }); for (u32 level = 0; level < params.GetNumLevels(); ++level) { UploadTextureMipmap(level); } } void CachedSurface::UploadTextureMipmap(u32 level) { - u8* buffer{staging_buffer.data() + params.GetHostMipmapLevelOffset(level)}; - // TODO(Rodrigo): Optimize alignment glPixelStorei(GL_UNPACK_ALIGNMENT, 1); glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast(params.GetMipWidth(level))); - SCOPE_EXIT({ glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); }); + u8* buffer{GetStagingBufferLevelData(level)}; if (is_compressed) { const auto image_size{static_cast(params.GetHostMipmapSize(level))}; switch (params.GetTarget()) { diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index e6448c6f8..0a69be233 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -39,8 +39,6 @@ public: explicit CachedSurface(TextureCacheOpenGL& texture_cache, const SurfaceParams& params); ~CachedSurface(); - void LoadBuffer(); - GLenum GetTarget() const { return target; } @@ -54,9 +52,8 @@ protected: std::unique_ptr CreateView(const ViewKey& view_key); - void FlushBufferImpl(); - void UploadTextureImpl(); + void DownloadTextureImpl(); private: void UploadTextureMipmap(u32 level); @@ -68,8 +65,6 @@ private: GLenum target{}; OGLTexture texture; - - std::vector staging_buffer; }; class CachedSurfaceView final { diff --git a/src/video_core/texture_cache.cpp b/src/video_core/texture_cache.cpp index b78a7d951..146e8ed9b 100644 --- a/src/video_core/texture_cache.cpp +++ b/src/video_core/texture_cache.cpp @@ -7,14 +7,16 @@ #include "common/cityhash.h" #include "common/common_types.h" #include "core/core.h" +#include "video_core/morton.h" #include "video_core/surface.h" #include "video_core/texture_cache.h" +#include "video_core/textures/convert.h" #include "video_core/textures/decoders.h" #include "video_core/textures/texture.h" namespace VideoCommon { -using VideoCore::Surface::SurfaceTarget; +using VideoCore::MortonSwizzleMode; using VideoCore::Surface::ComponentTypeFromDepthFormat; using VideoCore::Surface::ComponentTypeFromRenderTarget; @@ -22,12 +24,118 @@ using VideoCore::Surface::ComponentTypeFromTexture; using VideoCore::Surface::PixelFormatFromDepthFormat; using VideoCore::Surface::PixelFormatFromRenderTargetFormat; using VideoCore::Surface::PixelFormatFromTextureFormat; +using VideoCore::Surface::SurfaceTarget; using VideoCore::Surface::SurfaceTargetFromTextureType; +using Tegra::Texture::ConvertFromGuestToHost; + +namespace { + constexpr u32 GetMipmapSize(bool uncompressed, u32 mip_size, u32 tile) { return uncompressed ? mip_size : std::max(1U, (mip_size + tile - 1) / tile); } +void SwizzleFunc(MortonSwizzleMode mode, u8* memory, const SurfaceParams& params, u8* buffer, + u32 level) { + const u32 width{params.GetMipWidth(level)}; + const u32 height{params.GetMipHeight(level)}; + const u32 block_height{params.GetMipBlockHeight(level)}; + const u32 block_depth{params.GetMipBlockDepth(level)}; + + std::size_t guest_offset{params.GetGuestMipmapLevelOffset(level)}; + if (params.IsLayered()) { + std::size_t host_offset{0}; + const std::size_t guest_stride = params.GetGuestLayerSize(); + const std::size_t host_stride = params.GetHostLayerSize(level); + for (u32 layer = 0; layer < params.GetNumLayers(); layer++) { + MortonSwizzle(mode, params.GetPixelFormat(), width, block_height, height, block_depth, + 1, params.GetTileWidthSpacing(), buffer + host_offset, + memory + guest_offset); + guest_offset += guest_stride; + host_offset += host_stride; + } + } else { + MortonSwizzle(mode, params.GetPixelFormat(), width, block_height, height, block_depth, + params.GetMipDepth(level), params.GetTileWidthSpacing(), buffer, + memory + guest_offset); + } +} + +} // Anonymous namespace + +SurfaceBaseImpl::SurfaceBaseImpl(const SurfaceParams& params) : params{params} { + staging_buffer.resize(params.GetHostSizeInBytes()); +} + +SurfaceBaseImpl::~SurfaceBaseImpl() = default; + +void SurfaceBaseImpl::LoadBuffer() { + if (params.IsTiled()) { + ASSERT_MSG(params.GetBlockWidth() == 1, "Block width is defined as {} on texture target {}", + params.GetBlockWidth(), static_cast(params.GetTarget())); + for (u32 level = 0; level < params.GetNumLevels(); ++level) { + u8* const buffer{GetStagingBufferLevelData(level)}; + SwizzleFunc(MortonSwizzleMode::MortonToLinear, host_ptr, params, buffer, level); + } + } else { + ASSERT_MSG(params.GetNumLevels() == 1, "Linear mipmap loading is not implemented"); + const u32 bpp{GetFormatBpp(params.GetPixelFormat()) / CHAR_BIT}; + const u32 block_width{params.GetDefaultBlockWidth()}; + const u32 block_height{params.GetDefaultBlockHeight()}; + const u32 width{(params.GetWidth() + block_width - 1) / block_width}; + const u32 height{(params.GetHeight() + block_height - 1) / block_height}; + const u32 copy_size{width * bpp}; + if (params.GetPitch() == copy_size) { + std::memcpy(staging_buffer.data(), host_ptr, params.GetHostSizeInBytes()); + } else { + const u8* start{host_ptr}; + u8* write_to{staging_buffer.data()}; + for (u32 h = height; h > 0; --h) { + std::memcpy(write_to, start, copy_size); + start += params.GetPitch(); + write_to += copy_size; + } + } + } + + for (u32 level = 0; level < params.GetNumLevels(); ++level) { + ConvertFromGuestToHost(GetStagingBufferLevelData(level), params.GetPixelFormat(), + params.GetMipWidth(level), params.GetMipHeight(level), + params.GetMipDepth(level), true, true); + } +} + +void SurfaceBaseImpl::FlushBuffer() { + if (params.IsTiled()) { + ASSERT_MSG(params.GetBlockWidth() == 1, "Block width is defined as {}", + params.GetBlockWidth()); + for (u32 level = 0; level < params.GetNumLevels(); ++level) { + u8* const buffer = GetStagingBufferLevelData(level); + SwizzleFunc(MortonSwizzleMode::LinearToMorton, GetHostPtr(), params, buffer, level); + } + } else { + UNIMPLEMENTED(); + /* + ASSERT(params.GetTarget() == SurfaceTarget::Texture2D); + ASSERT(params.GetNumLevels() == 1); + + const u32 bpp{params.GetFormatBpp() / 8}; + const u32 copy_size{params.GetWidth() * bpp}; + if (params.GetPitch() == copy_size) { + std::memcpy(host_ptr, staging_buffer.data(), GetSizeInBytes()); + } else { + u8* start{host_ptr}; + const u8* read_to{staging_buffer.data()}; + for (u32 h = params.GetHeight(); h > 0; --h) { + std::memcpy(start, read_to, copy_size); + start += params.GetPitch(); + read_to += copy_size; + } + } + */ + } +} + SurfaceParams SurfaceParams::CreateForTexture(Core::System& system, const Tegra::Texture::FullTextureInfo& config) { SurfaceParams params; diff --git a/src/video_core/texture_cache.h b/src/video_core/texture_cache.h index f22e8e776..90c72cb15 100644 --- a/src/video_core/texture_cache.h +++ b/src/video_core/texture_cache.h @@ -273,37 +273,11 @@ struct hash { namespace VideoCommon { -template -class SurfaceBase { - static_assert(std::is_trivially_copyable_v); - +class SurfaceBaseImpl { public: - virtual void LoadBuffer() = 0; + void LoadBuffer(); - virtual TExecutionContext FlushBuffer(TExecutionContext exctx) = 0; - - virtual TExecutionContext UploadTexture(TExecutionContext exctx) = 0; - - TView* TryGetView(GPUVAddr view_addr, const SurfaceParams& view_params) { - if (view_addr < gpu_addr || !params.IsFamiliar(view_params)) { - // It can't be a view if it's in a prior address. - return {}; - } - - const auto relative_offset{static_cast(view_addr - gpu_addr)}; - const auto it{view_offset_map.find(relative_offset)}; - if (it == view_offset_map.end()) { - // Couldn't find an aligned view. - return {}; - } - const auto [layer, level] = it->second; - - if (!params.IsViewValid(view_params, layer, level)) { - return {}; - } - - return GetView(layer, view_params.GetNumLayers(), level, view_params.GetNumLevels()); - } + void FlushBuffer(); GPUVAddr GetGpuAddr() const { ASSERT(is_registered); @@ -325,27 +299,10 @@ public: return cache_addr; } - std::size_t GetSizeInBytes() const { - return params.GetGuestSizeInBytes(); - } - - void MarkAsModified(bool is_modified_) { - is_modified = is_modified_; - if (is_modified_) { - modification_tick = texture_cache.Tick(); - } - } - const SurfaceParams& GetSurfaceParams() const { return params; } - TView* GetView(GPUVAddr view_addr, const SurfaceParams& view_params) { - TView* view{TryGetView(view_addr, view_params)}; - ASSERT(view != nullptr); - return view; - } - void Register(GPUVAddr gpu_addr_, VAddr cpu_addr_, u8* host_ptr_) { ASSERT(!is_registered); is_registered = true; @@ -361,30 +318,95 @@ public: is_registered = false; } - u64 GetModificationTick() const { - return modification_tick; - } - bool IsRegistered() const { return is_registered; } -protected: - explicit SurfaceBase(TTextureCache& texture_cache, const SurfaceParams& params) - : params{params}, texture_cache{texture_cache}, view_offset_map{ - params.CreateViewOffsetMap()} {} + std::size_t GetSizeInBytes() const { + return params.GetGuestSizeInBytes(); + } - ~SurfaceBase() = default; + u8* GetStagingBufferLevelData(u32 level) { + return staging_buffer.data() + params.GetHostMipmapLevelOffset(level); + } + +protected: + explicit SurfaceBaseImpl(const SurfaceParams& params); + ~SurfaceBaseImpl(); // non-virtual is intended virtual void DecorateSurfaceName() = 0; - virtual std::unique_ptr CreateView(const ViewKey& view_key) = 0; + const SurfaceParams params; + +private: + GPUVAddr gpu_addr{}; + VAddr cpu_addr{}; + u8* host_ptr{}; + CacheAddr cache_addr{}; + bool is_registered{}; + + std::vector staging_buffer; +}; + +template +class SurfaceBase : public SurfaceBaseImpl { + static_assert(std::is_trivially_copyable_v); + +public: + virtual TExecutionContext UploadTexture(TExecutionContext exctx) = 0; + + virtual TExecutionContext DownloadTexture(TExecutionContext exctx) = 0; + + TView* TryGetView(GPUVAddr view_addr, const SurfaceParams& view_params) { + if (view_addr < GetGpuAddr() || !params.IsFamiliar(view_params)) { + // It can't be a view if it's in a prior address. + return {}; + } + + const auto relative_offset{static_cast(view_addr - GetGpuAddr())}; + const auto it{view_offset_map.find(relative_offset)}; + if (it == view_offset_map.end()) { + // Couldn't find an aligned view. + return {}; + } + const auto [layer, level] = it->second; + + if (!params.IsViewValid(view_params, layer, level)) { + return {}; + } + + return GetView(layer, view_params.GetNumLayers(), level, view_params.GetNumLevels()); + } + + void MarkAsModified(bool is_modified_) { + is_modified = is_modified_; + if (is_modified_) { + modification_tick = texture_cache.Tick(); + } + } + + TView* GetView(GPUVAddr view_addr, const SurfaceParams& view_params) { + TView* view{TryGetView(view_addr, view_params)}; + ASSERT(view != nullptr); + return view; + } bool IsModified() const { return is_modified; } - const SurfaceParams params; + u64 GetModificationTick() const { + return modification_tick; + } + +protected: + explicit SurfaceBase(TTextureCache& texture_cache, const SurfaceParams& params) + : SurfaceBaseImpl{params}, texture_cache{texture_cache}, + view_offset_map{params.CreateViewOffsetMap()} {} + + ~SurfaceBase() = default; + + virtual std::unique_ptr CreateView(const ViewKey& view_key) = 0; private: TView* GetView(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels) { @@ -400,13 +422,8 @@ private: TTextureCache& texture_cache; const std::map> view_offset_map; - GPUVAddr gpu_addr{}; - VAddr cpu_addr{}; - u8* host_ptr{}; - CacheAddr cache_addr{}; - u64 modification_tick{}; bool is_modified{}; - bool is_registered{}; + u64 modification_tick{}; std::unordered_map> views; }; @@ -560,7 +577,7 @@ private: if (!fast_view) { // Flush even when we don't care about the contents, to preserve memory not // written by the new surface. - exctx = surface->FlushBuffer(exctx); + exctx = FlushSurface(exctx, surface); } Unregister(surface); } @@ -590,6 +607,16 @@ private: return exctx; } + TExecutionContext FlushSurface(TExecutionContext exctx, + const std::shared_ptr& surface) { + if (!surface->IsModified()) { + return exctx; + } + exctx = surface->DownloadTexture(exctx); + surface->FlushBuffer(); + return exctx; + } + std::vector> GetSurfacesInRegion(CacheAddr cache_addr, std::size_t size) const { if (size == 0) { @@ -701,8 +728,8 @@ private: template class SurfaceBaseContextless : public SurfaceBase { public: - DummyExecutionContext FlushBuffer(DummyExecutionContext) { - FlushBufferImpl(); + DummyExecutionContext DownloadTexture(DummyExecutionContext) { + DownloadTextureImpl(); return {}; } @@ -715,7 +742,7 @@ protected: explicit SurfaceBaseContextless(TTextureCache& texture_cache, const SurfaceParams& params) : SurfaceBase{texture_cache, params} {} - virtual void FlushBufferImpl() = 0; + virtual void DownloadTextureImpl() = 0; virtual void UploadTextureImpl() = 0; }; From 1b4503c571d3b961efe74fa7e35d5fa14941ec09 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 24 Apr 2019 16:35:54 -0300 Subject: [PATCH 008/113] texture_cache: Split texture cache into different files --- src/video_core/CMakeLists.txt | 9 +- .../renderer_opengl/gl_texture_cache.cpp | 2 +- .../renderer_opengl/gl_texture_cache.h | 2 +- src/video_core/texture_cache.h | 750 ------------------ src/video_core/texture_cache/surface_base.cpp | 118 +++ src/video_core/texture_cache/surface_base.h | 172 ++++ .../surface_params.cpp} | 126 +-- src/video_core/texture_cache/surface_params.h | 229 ++++++ src/video_core/texture_cache/surface_view.cpp | 23 + src/video_core/texture_cache/surface_view.h | 35 + src/video_core/texture_cache/texture_cache.h | 282 +++++++ .../texture_cache/texture_cache_contextless.h | 93 +++ 12 files changed, 965 insertions(+), 876 deletions(-) delete mode 100644 src/video_core/texture_cache.h create mode 100644 src/video_core/texture_cache/surface_base.cpp create mode 100644 src/video_core/texture_cache/surface_base.h rename src/video_core/{texture_cache.cpp => texture_cache/surface_params.cpp} (76%) create mode 100644 src/video_core/texture_cache/surface_params.h create mode 100644 src/video_core/texture_cache/surface_view.cpp create mode 100644 src/video_core/texture_cache/surface_view.h create mode 100644 src/video_core/texture_cache/texture_cache.h create mode 100644 src/video_core/texture_cache/texture_cache_contextless.h diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 64cff27a4..470fbceda 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -109,6 +109,13 @@ add_library(video_core STATIC shader/track.cpp surface.cpp surface.h + texture_cache/surface_base.cpp + texture_cache/surface_base.h + texture_cache/surface_params.cpp + texture_cache/surface_params.h + texture_cache/surface_view.cpp + texture_cache/surface_view.h + texture_cache/texture_cache.h textures/astc.cpp textures/astc.h textures/convert.cpp @@ -116,8 +123,6 @@ add_library(video_core STATIC textures/decoders.cpp textures/decoders.h textures/texture.h - texture_cache.cpp - texture_cache.h video_core.cpp video_core.h ) diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 3e2a1f53c..ca007b797 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -9,7 +9,7 @@ #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_texture_cache.h" #include "video_core/renderer_opengl/utils.h" -#include "video_core/texture_cache.h" +#include "video_core/texture_cache/texture_cache_contextless.h" #include "video_core/textures/convert.h" #include "video_core/textures/texture.h" diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 0a69be233..3c15b37bd 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -13,7 +13,7 @@ #include "common/common_types.h" #include "video_core/engines/shader_bytecode.h" -#include "video_core/texture_cache.h" +#include "video_core/texture_cache/texture_cache_contextless.h" namespace OpenGL { diff --git a/src/video_core/texture_cache.h b/src/video_core/texture_cache.h deleted file mode 100644 index 90c72cb15..000000000 --- a/src/video_core/texture_cache.h +++ /dev/null @@ -1,750 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include -#include -#include -#include - -#include -#include - -#include "common/assert.h" -#include "common/common_types.h" -#include "core/memory.h" -#include "video_core/engines/fermi_2d.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/gpu.h" -#include "video_core/memory_manager.h" -#include "video_core/rasterizer_interface.h" -#include "video_core/surface.h" - -namespace Core { -class System; -} - -namespace Tegra::Texture { -struct FullTextureInfo; -} - -namespace VideoCore { -class RasterizerInterface; -} - -namespace VideoCommon { - -class HasheableSurfaceParams { -public: - std::size_t Hash() const; - - bool operator==(const HasheableSurfaceParams& rhs) const; - - bool operator!=(const HasheableSurfaceParams& rhs) const { - return !operator==(rhs); - } - -protected: - // Avoid creation outside of a managed environment. - HasheableSurfaceParams() = default; - - bool is_tiled; - bool srgb_conversion; - u32 block_width; - u32 block_height; - u32 block_depth; - u32 tile_width_spacing; - u32 width; - u32 height; - u32 depth; - u32 pitch; - u32 unaligned_height; - u32 num_levels; - VideoCore::Surface::PixelFormat pixel_format; - VideoCore::Surface::ComponentType component_type; - VideoCore::Surface::SurfaceType type; - VideoCore::Surface::SurfaceTarget target; -}; - -class SurfaceParams final : public HasheableSurfaceParams { -public: - /// Creates SurfaceCachedParams from a texture configuration. - static SurfaceParams CreateForTexture(Core::System& system, - const Tegra::Texture::FullTextureInfo& config); - - /// Creates SurfaceCachedParams for a depth buffer configuration. - static SurfaceParams CreateForDepthBuffer( - Core::System& system, u32 zeta_width, u32 zeta_height, Tegra::DepthFormat format, - u32 block_width, u32 block_height, u32 block_depth, - Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type); - - /// Creates SurfaceCachedParams from a framebuffer configuration. - static SurfaceParams CreateForFramebuffer(Core::System& system, std::size_t index); - - /// Creates SurfaceCachedParams from a Fermi2D surface configuration. - static SurfaceParams CreateForFermiCopySurface( - const Tegra::Engines::Fermi2D::Regs::Surface& config); - - bool IsTiled() const { - return is_tiled; - } - - bool GetSrgbConversion() const { - return srgb_conversion; - } - - u32 GetBlockWidth() const { - return block_width; - } - - u32 GetTileWidthSpacing() const { - return tile_width_spacing; - } - - u32 GetWidth() const { - return width; - } - - u32 GetHeight() const { - return height; - } - - u32 GetDepth() const { - return depth; - } - - u32 GetPitch() const { - return pitch; - } - - u32 GetNumLevels() const { - return num_levels; - } - - VideoCore::Surface::PixelFormat GetPixelFormat() const { - return pixel_format; - } - - VideoCore::Surface::ComponentType GetComponentType() const { - return component_type; - } - - VideoCore::Surface::SurfaceTarget GetTarget() const { - return target; - } - - VideoCore::Surface::SurfaceType GetType() const { - return type; - } - - std::size_t GetGuestSizeInBytes() const { - return guest_size_in_bytes; - } - - std::size_t GetHostSizeInBytes() const { - return host_size_in_bytes; - } - - u32 GetNumLayers() const { - return num_layers; - } - - /// Returns the width of a given mipmap level. - u32 GetMipWidth(u32 level) const; - - /// Returns the height of a given mipmap level. - u32 GetMipHeight(u32 level) const; - - /// Returns the depth of a given mipmap level. - u32 GetMipDepth(u32 level) const; - - /// Returns true if these parameters are from a layered surface. - bool IsLayered() const; - - /// Returns the block height of a given mipmap level. - u32 GetMipBlockHeight(u32 level) const; - - /// Returns the block depth of a given mipmap level. - u32 GetMipBlockDepth(u32 level) const; - - /// Returns the offset in bytes in guest memory of a given mipmap level. - std::size_t GetGuestMipmapLevelOffset(u32 level) const; - - /// Returns the offset in bytes in host memory (linear) of a given mipmap level. - std::size_t GetHostMipmapLevelOffset(u32 level) const; - - /// Returns the size in bytes in host memory (linear) of a given mipmap level. - std::size_t GetHostMipmapSize(u32 level) const; - - /// Returns the size of a layer in bytes in guest memory. - std::size_t GetGuestLayerSize() const; - - /// Returns the size of a layer in bytes in host memory for a given mipmap level. - std::size_t GetHostLayerSize(u32 level) const; - - /// Returns the default block width. - u32 GetDefaultBlockWidth() const; - - /// Returns the default block height. - u32 GetDefaultBlockHeight() const; - - /// Returns the bits per pixel. - u32 GetBitsPerPixel() const; - - /// Returns the bytes per pixel. - u32 GetBytesPerPixel() const; - - /// Returns true if another surface can be familiar with this. This is a loosely defined term - /// that reflects the possibility of these two surface parameters potentially being part of a - /// bigger superset. - bool IsFamiliar(const SurfaceParams& view_params) const; - - /// Returns true if the pixel format is a depth and/or stencil format. - bool IsPixelFormatZeta() const; - - /// Creates a map that redirects an address difference to a layer and mipmap level. - std::map> CreateViewOffsetMap() const; - - /// Returns true if the passed surface view parameters is equal or a valid subset of this. - bool IsViewValid(const SurfaceParams& view_params, u32 layer, u32 level) const; - -private: - /// Calculates values that can be deduced from HasheableSurfaceParams. - void CalculateCachedValues(); - - /// Returns the size of a given mipmap level inside a layer. - std::size_t GetInnerMipmapMemorySize(u32 level, bool as_host_size, bool uncompressed) const; - - /// Returns the size of all mipmap levels and aligns as needed. - std::size_t GetInnerMemorySize(bool as_host_size, bool layer_only, bool uncompressed) const; - - /// Returns the size of a layer - std::size_t GetLayerSize(bool as_host_size, bool uncompressed) const; - - /// Returns true if the passed view width and height match the size of this params in a given - /// mipmap level. - bool IsDimensionValid(const SurfaceParams& view_params, u32 level) const; - - /// Returns true if the passed view depth match the size of this params in a given mipmap level. - bool IsDepthValid(const SurfaceParams& view_params, u32 level) const; - - /// Returns true if the passed view layers and mipmap levels are in bounds. - bool IsInBounds(const SurfaceParams& view_params, u32 layer, u32 level) const; - - std::size_t guest_size_in_bytes; - std::size_t host_size_in_bytes; - u32 num_layers; -}; - -struct ViewKey { - std::size_t Hash() const; - - bool operator==(const ViewKey& rhs) const; - - u32 base_layer{}; - u32 num_layers{}; - u32 base_level{}; - u32 num_levels{}; -}; - -} // namespace VideoCommon - -namespace std { - -template <> -struct hash { - std::size_t operator()(const VideoCommon::SurfaceParams& k) const noexcept { - return k.Hash(); - } -}; - -template <> -struct hash { - std::size_t operator()(const VideoCommon::ViewKey& k) const noexcept { - return k.Hash(); - } -}; - -} // namespace std - -namespace VideoCommon { - -class SurfaceBaseImpl { -public: - void LoadBuffer(); - - void FlushBuffer(); - - GPUVAddr GetGpuAddr() const { - ASSERT(is_registered); - return gpu_addr; - } - - VAddr GetCpuAddr() const { - ASSERT(is_registered); - return cpu_addr; - } - - u8* GetHostPtr() const { - ASSERT(is_registered); - return host_ptr; - } - - CacheAddr GetCacheAddr() const { - ASSERT(is_registered); - return cache_addr; - } - - const SurfaceParams& GetSurfaceParams() const { - return params; - } - - void Register(GPUVAddr gpu_addr_, VAddr cpu_addr_, u8* host_ptr_) { - ASSERT(!is_registered); - is_registered = true; - gpu_addr = gpu_addr_; - cpu_addr = cpu_addr_; - host_ptr = host_ptr_; - cache_addr = ToCacheAddr(host_ptr_); - DecorateSurfaceName(); - } - - void Unregister() { - ASSERT(is_registered); - is_registered = false; - } - - bool IsRegistered() const { - return is_registered; - } - - std::size_t GetSizeInBytes() const { - return params.GetGuestSizeInBytes(); - } - - u8* GetStagingBufferLevelData(u32 level) { - return staging_buffer.data() + params.GetHostMipmapLevelOffset(level); - } - -protected: - explicit SurfaceBaseImpl(const SurfaceParams& params); - ~SurfaceBaseImpl(); // non-virtual is intended - - virtual void DecorateSurfaceName() = 0; - - const SurfaceParams params; - -private: - GPUVAddr gpu_addr{}; - VAddr cpu_addr{}; - u8* host_ptr{}; - CacheAddr cache_addr{}; - bool is_registered{}; - - std::vector staging_buffer; -}; - -template -class SurfaceBase : public SurfaceBaseImpl { - static_assert(std::is_trivially_copyable_v); - -public: - virtual TExecutionContext UploadTexture(TExecutionContext exctx) = 0; - - virtual TExecutionContext DownloadTexture(TExecutionContext exctx) = 0; - - TView* TryGetView(GPUVAddr view_addr, const SurfaceParams& view_params) { - if (view_addr < GetGpuAddr() || !params.IsFamiliar(view_params)) { - // It can't be a view if it's in a prior address. - return {}; - } - - const auto relative_offset{static_cast(view_addr - GetGpuAddr())}; - const auto it{view_offset_map.find(relative_offset)}; - if (it == view_offset_map.end()) { - // Couldn't find an aligned view. - return {}; - } - const auto [layer, level] = it->second; - - if (!params.IsViewValid(view_params, layer, level)) { - return {}; - } - - return GetView(layer, view_params.GetNumLayers(), level, view_params.GetNumLevels()); - } - - void MarkAsModified(bool is_modified_) { - is_modified = is_modified_; - if (is_modified_) { - modification_tick = texture_cache.Tick(); - } - } - - TView* GetView(GPUVAddr view_addr, const SurfaceParams& view_params) { - TView* view{TryGetView(view_addr, view_params)}; - ASSERT(view != nullptr); - return view; - } - - bool IsModified() const { - return is_modified; - } - - u64 GetModificationTick() const { - return modification_tick; - } - -protected: - explicit SurfaceBase(TTextureCache& texture_cache, const SurfaceParams& params) - : SurfaceBaseImpl{params}, texture_cache{texture_cache}, - view_offset_map{params.CreateViewOffsetMap()} {} - - ~SurfaceBase() = default; - - virtual std::unique_ptr CreateView(const ViewKey& view_key) = 0; - -private: - TView* GetView(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels) { - const ViewKey key{base_layer, num_layers, base_level, num_levels}; - const auto [entry, is_cache_miss] = views.try_emplace(key); - auto& view{entry->second}; - if (is_cache_miss) { - view = CreateView(key); - } - return view.get(); - } - - TTextureCache& texture_cache; - const std::map> view_offset_map; - - bool is_modified{}; - u64 modification_tick{}; - std::unordered_map> views; -}; - -template -class TextureCache { - static_assert(std::is_trivially_copyable_v); - - using ResultType = std::tuple; - using IntervalMap = boost::icl::interval_map>>; - using IntervalType = typename IntervalMap::interval_type; - -public: - void InvalidateRegion(CacheAddr addr, std::size_t size) { - for (const auto& surface : GetSurfacesInRegion(addr, size)) { - if (!surface->IsRegistered()) { - // Skip duplicates - continue; - } - Unregister(surface); - } - } - - ResultType GetTextureSurface(TExecutionContext exctx, - const Tegra::Texture::FullTextureInfo& config) { - const auto gpu_addr{config.tic.Address()}; - if (!gpu_addr) { - return {{}, exctx}; - } - const auto params{SurfaceParams::CreateForTexture(system, config)}; - return GetSurfaceView(exctx, gpu_addr, params, true); - } - - ResultType GetDepthBufferSurface(TExecutionContext exctx, bool preserve_contents) { - const auto& regs{system.GPU().Maxwell3D().regs}; - const auto gpu_addr{regs.zeta.Address()}; - if (!gpu_addr || !regs.zeta_enable) { - return {{}, exctx}; - } - const auto depth_params{SurfaceParams::CreateForDepthBuffer( - system, regs.zeta_width, regs.zeta_height, regs.zeta.format, - regs.zeta.memory_layout.block_width, regs.zeta.memory_layout.block_height, - regs.zeta.memory_layout.block_depth, regs.zeta.memory_layout.type)}; - return GetSurfaceView(exctx, gpu_addr, depth_params, preserve_contents); - } - - ResultType GetColorBufferSurface(TExecutionContext exctx, std::size_t index, - bool preserve_contents) { - ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); - - const auto& regs{system.GPU().Maxwell3D().regs}; - if (index >= regs.rt_control.count || regs.rt[index].Address() == 0 || - regs.rt[index].format == Tegra::RenderTargetFormat::NONE) { - return {{}, exctx}; - } - - auto& memory_manager{system.GPU().MemoryManager()}; - const auto& config{system.GPU().Maxwell3D().regs.rt[index]}; - const auto gpu_addr{config.Address() + - config.base_layer * config.layer_stride * sizeof(u32)}; - if (!gpu_addr) { - return {{}, exctx}; - } - - return GetSurfaceView(exctx, gpu_addr, SurfaceParams::CreateForFramebuffer(system, index), - preserve_contents); - } - - ResultType GetFermiSurface(TExecutionContext exctx, - const Tegra::Engines::Fermi2D::Regs::Surface& config) { - return GetSurfaceView(exctx, config.Address(), - SurfaceParams::CreateForFermiCopySurface(config), true); - } - - std::shared_ptr TryFindFramebufferSurface(const u8* host_ptr) const { - const auto it{registered_surfaces.find(ToCacheAddr(host_ptr))}; - return it != registered_surfaces.end() ? *it->second.begin() : nullptr; - } - - u64 Tick() { - return ++ticks; - } - -protected: - TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer) - : system{system}, rasterizer{rasterizer} {} - - ~TextureCache() = default; - - virtual ResultType TryFastGetSurfaceView( - TExecutionContext exctx, GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, - const SurfaceParams& params, bool preserve_contents, - const std::vector>& overlaps) = 0; - - virtual std::shared_ptr CreateSurface(const SurfaceParams& params) = 0; - - void Register(std::shared_ptr surface, GPUVAddr gpu_addr, VAddr cpu_addr, - u8* host_ptr) { - surface->Register(gpu_addr, cpu_addr, host_ptr); - registered_surfaces.add({GetSurfaceInterval(surface), {surface}}); - rasterizer.UpdatePagesCachedCount(surface->GetCpuAddr(), surface->GetSizeInBytes(), 1); - } - - void Unregister(std::shared_ptr surface) { - registered_surfaces.subtract({GetSurfaceInterval(surface), {surface}}); - rasterizer.UpdatePagesCachedCount(surface->GetCpuAddr(), surface->GetSizeInBytes(), -1); - surface->Unregister(); - } - - std::shared_ptr GetUncachedSurface(const SurfaceParams& params) { - if (const auto surface = TryGetReservedSurface(params); surface) - return surface; - // No reserved surface available, create a new one and reserve it - auto new_surface{CreateSurface(params)}; - ReserveSurface(params, new_surface); - return new_surface; - } - - Core::System& system; - -private: - ResultType GetSurfaceView(TExecutionContext exctx, GPUVAddr gpu_addr, - const SurfaceParams& params, bool preserve_contents) { - auto& memory_manager{system.GPU().MemoryManager()}; - const auto cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)}; - DEBUG_ASSERT(cpu_addr); - - const auto host_ptr{memory_manager.GetPointer(gpu_addr)}; - const auto cache_addr{ToCacheAddr(host_ptr)}; - auto overlaps{GetSurfacesInRegion(cache_addr, params.GetGuestSizeInBytes())}; - if (overlaps.empty()) { - return LoadSurfaceView(exctx, gpu_addr, *cpu_addr, host_ptr, params, preserve_contents); - } - - if (overlaps.size() == 1) { - if (TView* view = overlaps[0]->TryGetView(gpu_addr, params); view) { - return {view, exctx}; - } - } - - TView* fast_view; - std::tie(fast_view, exctx) = TryFastGetSurfaceView(exctx, gpu_addr, *cpu_addr, host_ptr, - params, preserve_contents, overlaps); - - if (!fast_view) { - std::sort(overlaps.begin(), overlaps.end(), [](const auto& lhs, const auto& rhs) { - return lhs->GetModificationTick() < rhs->GetModificationTick(); - }); - } - - for (const auto& surface : overlaps) { - if (!fast_view) { - // Flush even when we don't care about the contents, to preserve memory not - // written by the new surface. - exctx = FlushSurface(exctx, surface); - } - Unregister(surface); - } - - if (fast_view) { - return {fast_view, exctx}; - } - - return LoadSurfaceView(exctx, gpu_addr, *cpu_addr, host_ptr, params, preserve_contents); - } - - ResultType LoadSurfaceView(TExecutionContext exctx, GPUVAddr gpu_addr, VAddr cpu_addr, - u8* host_ptr, const SurfaceParams& params, bool preserve_contents) { - const auto new_surface{GetUncachedSurface(params)}; - Register(new_surface, gpu_addr, cpu_addr, host_ptr); - if (preserve_contents) { - exctx = LoadSurface(exctx, new_surface); - } - return {new_surface->GetView(gpu_addr, params), exctx}; - } - - TExecutionContext LoadSurface(TExecutionContext exctx, - const std::shared_ptr& surface) { - surface->LoadBuffer(); - exctx = surface->UploadTexture(exctx); - surface->MarkAsModified(false); - return exctx; - } - - TExecutionContext FlushSurface(TExecutionContext exctx, - const std::shared_ptr& surface) { - if (!surface->IsModified()) { - return exctx; - } - exctx = surface->DownloadTexture(exctx); - surface->FlushBuffer(); - return exctx; - } - - std::vector> GetSurfacesInRegion(CacheAddr cache_addr, - std::size_t size) const { - if (size == 0) { - return {}; - } - const IntervalType interval{cache_addr, cache_addr + size}; - - std::vector> surfaces; - for (auto& pair : boost::make_iterator_range(registered_surfaces.equal_range(interval))) { - surfaces.push_back(*pair.second.begin()); - } - return surfaces; - } - - void ReserveSurface(const SurfaceParams& params, std::shared_ptr surface) { - surface_reserve[params].push_back(std::move(surface)); - } - - std::shared_ptr TryGetReservedSurface(const SurfaceParams& params) { - auto search{surface_reserve.find(params)}; - if (search == surface_reserve.end()) { - return {}; - } - for (auto& surface : search->second) { - if (!surface->IsRegistered()) { - return surface; - } - } - return {}; - } - - IntervalType GetSurfaceInterval(std::shared_ptr surface) const { - return IntervalType::right_open(surface->GetCacheAddr(), - surface->GetCacheAddr() + surface->GetSizeInBytes()); - } - - VideoCore::RasterizerInterface& rasterizer; - - u64 ticks{}; - - IntervalMap registered_surfaces; - - /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have - /// previously been used. This is to prevent surfaces from being constantly created and - /// destroyed when used with different surface parameters. - std::unordered_map>> surface_reserve; -}; - -struct DummyExecutionContext {}; - -template -class TextureCacheContextless : protected TextureCache { - using Base = TextureCache; - -public: - void InvalidateRegion(CacheAddr addr, std::size_t size) { - Base::InvalidateRegion(addr, size); - } - - TView* GetTextureSurface(const Tegra::Texture::FullTextureInfo& config) { - return RemoveContext(Base::GetTextureSurface({}, config)); - } - - TView* GetDepthBufferSurface(bool preserve_contents) { - return RemoveContext(Base::GetDepthBufferSurface({}, preserve_contents)); - } - - TView* GetColorBufferSurface(std::size_t index, bool preserve_contents) { - return RemoveContext(Base::GetColorBufferSurface({}, index, preserve_contents)); - } - - TView* GetFermiSurface(const Tegra::Engines::Fermi2D::Regs::Surface& config) { - return RemoveContext(Base::GetFermiSurface({}, config)); - } - - std::shared_ptr TryFindFramebufferSurface(const u8* host_ptr) const { - return Base::TryFindFramebufferSurface(host_ptr); - } - - u64 Tick() { - return Base::Tick(); - } - -protected: - explicit TextureCacheContextless(Core::System& system, - VideoCore::RasterizerInterface& rasterizer) - : TextureCache{system, rasterizer} {} - - virtual TView* TryFastGetSurfaceView( - GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, const SurfaceParams& params, - bool preserve_contents, const std::vector>& overlaps) = 0; - -private: - std::tuple TryFastGetSurfaceView( - DummyExecutionContext, GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, - const SurfaceParams& params, bool preserve_contents, - const std::vector>& overlaps) { - return {TryFastGetSurfaceView(gpu_addr, cpu_addr, host_ptr, params, preserve_contents, - overlaps), - {}}; - } - - TView* RemoveContext(std::tuple return_value) { - const auto [view, exctx] = return_value; - return view; - } -}; - -template -class SurfaceBaseContextless : public SurfaceBase { -public: - DummyExecutionContext DownloadTexture(DummyExecutionContext) { - DownloadTextureImpl(); - return {}; - } - - DummyExecutionContext UploadTexture(DummyExecutionContext) { - UploadTextureImpl(); - return {}; - } - -protected: - explicit SurfaceBaseContextless(TTextureCache& texture_cache, const SurfaceParams& params) - : SurfaceBase{texture_cache, params} {} - - virtual void DownloadTextureImpl() = 0; - - virtual void UploadTextureImpl() = 0; -}; - -} // namespace VideoCommon diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp new file mode 100644 index 000000000..8680485b4 --- /dev/null +++ b/src/video_core/texture_cache/surface_base.cpp @@ -0,0 +1,118 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/assert.h" +#include "common/common_types.h" +#include "video_core/morton.h" +#include "video_core/texture_cache/surface_base.h" +#include "video_core/texture_cache/surface_params.h" +#include "video_core/textures/convert.h" + +namespace VideoCommon { + +using Tegra::Texture::ConvertFromGuestToHost; +using VideoCore::MortonSwizzleMode; + +namespace { +void SwizzleFunc(MortonSwizzleMode mode, u8* memory, const SurfaceParams& params, u8* buffer, + u32 level) { + const u32 width{params.GetMipWidth(level)}; + const u32 height{params.GetMipHeight(level)}; + const u32 block_height{params.GetMipBlockHeight(level)}; + const u32 block_depth{params.GetMipBlockDepth(level)}; + + std::size_t guest_offset{params.GetGuestMipmapLevelOffset(level)}; + if (params.IsLayered()) { + std::size_t host_offset{0}; + const std::size_t guest_stride = params.GetGuestLayerSize(); + const std::size_t host_stride = params.GetHostLayerSize(level); + for (u32 layer = 0; layer < params.GetNumLayers(); layer++) { + MortonSwizzle(mode, params.GetPixelFormat(), width, block_height, height, block_depth, + 1, params.GetTileWidthSpacing(), buffer + host_offset, + memory + guest_offset); + guest_offset += guest_stride; + host_offset += host_stride; + } + } else { + MortonSwizzle(mode, params.GetPixelFormat(), width, block_height, height, block_depth, + params.GetMipDepth(level), params.GetTileWidthSpacing(), buffer, + memory + guest_offset); + } +} +} // Anonymous namespace + +SurfaceBaseImpl::SurfaceBaseImpl(const SurfaceParams& params) : params{params} { + staging_buffer.resize(params.GetHostSizeInBytes()); +} + +SurfaceBaseImpl::~SurfaceBaseImpl() = default; + +void SurfaceBaseImpl::LoadBuffer() { + if (params.IsTiled()) { + ASSERT_MSG(params.GetBlockWidth() == 1, "Block width is defined as {} on texture target {}", + params.GetBlockWidth(), static_cast(params.GetTarget())); + for (u32 level = 0; level < params.GetNumLevels(); ++level) { + u8* const buffer{GetStagingBufferLevelData(level)}; + SwizzleFunc(MortonSwizzleMode::MortonToLinear, host_ptr, params, buffer, level); + } + } else { + ASSERT_MSG(params.GetNumLevels() == 1, "Linear mipmap loading is not implemented"); + const u32 bpp{GetFormatBpp(params.GetPixelFormat()) / CHAR_BIT}; + const u32 block_width{params.GetDefaultBlockWidth()}; + const u32 block_height{params.GetDefaultBlockHeight()}; + const u32 width{(params.GetWidth() + block_width - 1) / block_width}; + const u32 height{(params.GetHeight() + block_height - 1) / block_height}; + const u32 copy_size{width * bpp}; + if (params.GetPitch() == copy_size) { + std::memcpy(staging_buffer.data(), host_ptr, params.GetHostSizeInBytes()); + } else { + const u8* start{host_ptr}; + u8* write_to{staging_buffer.data()}; + for (u32 h = height; h > 0; --h) { + std::memcpy(write_to, start, copy_size); + start += params.GetPitch(); + write_to += copy_size; + } + } + } + + for (u32 level = 0; level < params.GetNumLevels(); ++level) { + ConvertFromGuestToHost(GetStagingBufferLevelData(level), params.GetPixelFormat(), + params.GetMipWidth(level), params.GetMipHeight(level), + params.GetMipDepth(level), true, true); + } +} + +void SurfaceBaseImpl::FlushBuffer() { + if (params.IsTiled()) { + ASSERT_MSG(params.GetBlockWidth() == 1, "Block width is defined as {}", + params.GetBlockWidth()); + for (u32 level = 0; level < params.GetNumLevels(); ++level) { + u8* const buffer = GetStagingBufferLevelData(level); + SwizzleFunc(MortonSwizzleMode::LinearToMorton, GetHostPtr(), params, buffer, level); + } + } else { + UNIMPLEMENTED(); + /* + ASSERT(params.GetTarget() == SurfaceTarget::Texture2D); + ASSERT(params.GetNumLevels() == 1); + + const u32 bpp{params.GetFormatBpp() / 8}; + const u32 copy_size{params.GetWidth() * bpp}; + if (params.GetPitch() == copy_size) { + std::memcpy(host_ptr, staging_buffer.data(), GetSizeInBytes()); + } else { + u8* start{host_ptr}; + const u8* read_to{staging_buffer.data()}; + for (u32 h = params.GetHeight(); h > 0; --h) { + std::memcpy(start, read_to, copy_size); + start += params.GetPitch(); + read_to += copy_size; + } + } + */ + } +} + +} // namespace VideoCommon diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h new file mode 100644 index 000000000..d0142a9e6 --- /dev/null +++ b/src/video_core/texture_cache/surface_base.h @@ -0,0 +1,172 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include "common/assert.h" +#include "common/common_types.h" +#include "video_core/gpu.h" +#include "video_core/texture_cache/surface_params.h" +#include "video_core/texture_cache/surface_view.h" + +namespace VideoCommon { + +class SurfaceBaseImpl { +public: + void LoadBuffer(); + + void FlushBuffer(); + + GPUVAddr GetGpuAddr() const { + ASSERT(is_registered); + return gpu_addr; + } + + VAddr GetCpuAddr() const { + ASSERT(is_registered); + return cpu_addr; + } + + u8* GetHostPtr() const { + ASSERT(is_registered); + return host_ptr; + } + + CacheAddr GetCacheAddr() const { + ASSERT(is_registered); + return cache_addr; + } + + const SurfaceParams& GetSurfaceParams() const { + return params; + } + + void Register(GPUVAddr gpu_addr_, VAddr cpu_addr_, u8* host_ptr_) { + ASSERT(!is_registered); + is_registered = true; + gpu_addr = gpu_addr_; + cpu_addr = cpu_addr_; + host_ptr = host_ptr_; + cache_addr = ToCacheAddr(host_ptr_); + DecorateSurfaceName(); + } + + void Unregister() { + ASSERT(is_registered); + is_registered = false; + } + + bool IsRegistered() const { + return is_registered; + } + + std::size_t GetSizeInBytes() const { + return params.GetGuestSizeInBytes(); + } + + u8* GetStagingBufferLevelData(u32 level) { + return staging_buffer.data() + params.GetHostMipmapLevelOffset(level); + } + +protected: + explicit SurfaceBaseImpl(const SurfaceParams& params); + ~SurfaceBaseImpl(); // non-virtual is intended + + virtual void DecorateSurfaceName() = 0; + + const SurfaceParams params; + +private: + GPUVAddr gpu_addr{}; + VAddr cpu_addr{}; + u8* host_ptr{}; + CacheAddr cache_addr{}; + bool is_registered{}; + + std::vector staging_buffer; +}; + +template +class SurfaceBase : public SurfaceBaseImpl { + static_assert(std::is_trivially_copyable_v); + +public: + virtual TExecutionContext UploadTexture(TExecutionContext exctx) = 0; + + virtual TExecutionContext DownloadTexture(TExecutionContext exctx) = 0; + + TView* TryGetView(GPUVAddr view_addr, const SurfaceParams& view_params) { + if (view_addr < GetGpuAddr() || !params.IsFamiliar(view_params)) { + // It can't be a view if it's in a prior address. + return {}; + } + + const auto relative_offset{static_cast(view_addr - GetGpuAddr())}; + const auto it{view_offset_map.find(relative_offset)}; + if (it == view_offset_map.end()) { + // Couldn't find an aligned view. + return {}; + } + const auto [layer, level] = it->second; + + if (!params.IsViewValid(view_params, layer, level)) { + return {}; + } + + return GetView(layer, view_params.GetNumLayers(), level, view_params.GetNumLevels()); + } + + void MarkAsModified(bool is_modified_) { + is_modified = is_modified_; + if (is_modified_) { + modification_tick = texture_cache.Tick(); + } + } + + TView* GetView(GPUVAddr view_addr, const SurfaceParams& view_params) { + TView* view{TryGetView(view_addr, view_params)}; + ASSERT(view != nullptr); + return view; + } + + bool IsModified() const { + return is_modified; + } + + u64 GetModificationTick() const { + return modification_tick; + } + +protected: + explicit SurfaceBase(TTextureCache& texture_cache, const SurfaceParams& params) + : SurfaceBaseImpl{params}, texture_cache{texture_cache}, + view_offset_map{params.CreateViewOffsetMap()} {} + + ~SurfaceBase() = default; + + virtual std::unique_ptr CreateView(const ViewKey& view_key) = 0; + +private: + TView* GetView(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels) { + const ViewKey key{base_layer, num_layers, base_level, num_levels}; + const auto [entry, is_cache_miss] = views.try_emplace(key); + auto& view{entry->second}; + if (is_cache_miss) { + view = CreateView(key); + } + return view.get(); + } + + TTextureCache& texture_cache; + const std::map> view_offset_map; + + std::unordered_map> views; + + bool is_modified{}; + u64 modification_tick{}; +}; + +} // namespace VideoCommon diff --git a/src/video_core/texture_cache.cpp b/src/video_core/texture_cache/surface_params.cpp similarity index 76% rename from src/video_core/texture_cache.cpp rename to src/video_core/texture_cache/surface_params.cpp index 146e8ed9b..d1f8c53d5 100644 --- a/src/video_core/texture_cache.cpp +++ b/src/video_core/texture_cache/surface_params.cpp @@ -2,22 +2,17 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include "common/alignment.h" -#include "common/assert.h" +#include + #include "common/cityhash.h" -#include "common/common_types.h" +#include "common/alignment.h" #include "core/core.h" -#include "video_core/morton.h" #include "video_core/surface.h" -#include "video_core/texture_cache.h" -#include "video_core/textures/convert.h" +#include "video_core/texture_cache/surface_params.h" #include "video_core/textures/decoders.h" -#include "video_core/textures/texture.h" namespace VideoCommon { -using VideoCore::MortonSwizzleMode; - using VideoCore::Surface::ComponentTypeFromDepthFormat; using VideoCore::Surface::ComponentTypeFromRenderTarget; using VideoCore::Surface::ComponentTypeFromTexture; @@ -27,115 +22,12 @@ using VideoCore::Surface::PixelFormatFromTextureFormat; using VideoCore::Surface::SurfaceTarget; using VideoCore::Surface::SurfaceTargetFromTextureType; -using Tegra::Texture::ConvertFromGuestToHost; - namespace { - constexpr u32 GetMipmapSize(bool uncompressed, u32 mip_size, u32 tile) { return uncompressed ? mip_size : std::max(1U, (mip_size + tile - 1) / tile); } - -void SwizzleFunc(MortonSwizzleMode mode, u8* memory, const SurfaceParams& params, u8* buffer, - u32 level) { - const u32 width{params.GetMipWidth(level)}; - const u32 height{params.GetMipHeight(level)}; - const u32 block_height{params.GetMipBlockHeight(level)}; - const u32 block_depth{params.GetMipBlockDepth(level)}; - - std::size_t guest_offset{params.GetGuestMipmapLevelOffset(level)}; - if (params.IsLayered()) { - std::size_t host_offset{0}; - const std::size_t guest_stride = params.GetGuestLayerSize(); - const std::size_t host_stride = params.GetHostLayerSize(level); - for (u32 layer = 0; layer < params.GetNumLayers(); layer++) { - MortonSwizzle(mode, params.GetPixelFormat(), width, block_height, height, block_depth, - 1, params.GetTileWidthSpacing(), buffer + host_offset, - memory + guest_offset); - guest_offset += guest_stride; - host_offset += host_stride; - } - } else { - MortonSwizzle(mode, params.GetPixelFormat(), width, block_height, height, block_depth, - params.GetMipDepth(level), params.GetTileWidthSpacing(), buffer, - memory + guest_offset); - } -} - } // Anonymous namespace -SurfaceBaseImpl::SurfaceBaseImpl(const SurfaceParams& params) : params{params} { - staging_buffer.resize(params.GetHostSizeInBytes()); -} - -SurfaceBaseImpl::~SurfaceBaseImpl() = default; - -void SurfaceBaseImpl::LoadBuffer() { - if (params.IsTiled()) { - ASSERT_MSG(params.GetBlockWidth() == 1, "Block width is defined as {} on texture target {}", - params.GetBlockWidth(), static_cast(params.GetTarget())); - for (u32 level = 0; level < params.GetNumLevels(); ++level) { - u8* const buffer{GetStagingBufferLevelData(level)}; - SwizzleFunc(MortonSwizzleMode::MortonToLinear, host_ptr, params, buffer, level); - } - } else { - ASSERT_MSG(params.GetNumLevels() == 1, "Linear mipmap loading is not implemented"); - const u32 bpp{GetFormatBpp(params.GetPixelFormat()) / CHAR_BIT}; - const u32 block_width{params.GetDefaultBlockWidth()}; - const u32 block_height{params.GetDefaultBlockHeight()}; - const u32 width{(params.GetWidth() + block_width - 1) / block_width}; - const u32 height{(params.GetHeight() + block_height - 1) / block_height}; - const u32 copy_size{width * bpp}; - if (params.GetPitch() == copy_size) { - std::memcpy(staging_buffer.data(), host_ptr, params.GetHostSizeInBytes()); - } else { - const u8* start{host_ptr}; - u8* write_to{staging_buffer.data()}; - for (u32 h = height; h > 0; --h) { - std::memcpy(write_to, start, copy_size); - start += params.GetPitch(); - write_to += copy_size; - } - } - } - - for (u32 level = 0; level < params.GetNumLevels(); ++level) { - ConvertFromGuestToHost(GetStagingBufferLevelData(level), params.GetPixelFormat(), - params.GetMipWidth(level), params.GetMipHeight(level), - params.GetMipDepth(level), true, true); - } -} - -void SurfaceBaseImpl::FlushBuffer() { - if (params.IsTiled()) { - ASSERT_MSG(params.GetBlockWidth() == 1, "Block width is defined as {}", - params.GetBlockWidth()); - for (u32 level = 0; level < params.GetNumLevels(); ++level) { - u8* const buffer = GetStagingBufferLevelData(level); - SwizzleFunc(MortonSwizzleMode::LinearToMorton, GetHostPtr(), params, buffer, level); - } - } else { - UNIMPLEMENTED(); - /* - ASSERT(params.GetTarget() == SurfaceTarget::Texture2D); - ASSERT(params.GetNumLevels() == 1); - - const u32 bpp{params.GetFormatBpp() / 8}; - const u32 copy_size{params.GetWidth() * bpp}; - if (params.GetPitch() == copy_size) { - std::memcpy(host_ptr, staging_buffer.data(), GetSizeInBytes()); - } else { - u8* start{host_ptr}; - const u8* read_to{staging_buffer.data()}; - for (u32 h = params.GetHeight(); h > 0; --h) { - std::memcpy(start, read_to, copy_size); - start += params.GetPitch(); - read_to += copy_size; - } - } - */ - } -} - SurfaceParams SurfaceParams::CreateForTexture(Core::System& system, const Tegra::Texture::FullTextureInfo& config) { SurfaceParams params; @@ -517,14 +409,4 @@ bool HasheableSurfaceParams::operator==(const HasheableSurfaceParams& rhs) const rhs.type, rhs.target); } -std::size_t ViewKey::Hash() const { - return static_cast( - Common::CityHash64(reinterpret_cast(this), sizeof(*this))); -} - -bool ViewKey::operator==(const ViewKey& rhs) const { - return std::tie(base_layer, num_layers, base_level, num_levels) == - std::tie(rhs.base_layer, rhs.num_layers, rhs.base_level, rhs.num_levels); -} - } // namespace VideoCommon diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h new file mode 100644 index 000000000..77dc0ba66 --- /dev/null +++ b/src/video_core/texture_cache/surface_params.h @@ -0,0 +1,229 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include "common/common_types.h" +#include "video_core/engines/fermi_2d.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/surface.h" + +namespace VideoCommon { + +class HasheableSurfaceParams { +public: + std::size_t Hash() const; + + bool operator==(const HasheableSurfaceParams& rhs) const; + + bool operator!=(const HasheableSurfaceParams& rhs) const { + return !operator==(rhs); + } + +protected: + // Avoid creation outside of a managed environment. + HasheableSurfaceParams() = default; + + bool is_tiled; + bool srgb_conversion; + u32 block_width; + u32 block_height; + u32 block_depth; + u32 tile_width_spacing; + u32 width; + u32 height; + u32 depth; + u32 pitch; + u32 unaligned_height; + u32 num_levels; + VideoCore::Surface::PixelFormat pixel_format; + VideoCore::Surface::ComponentType component_type; + VideoCore::Surface::SurfaceType type; + VideoCore::Surface::SurfaceTarget target; +}; + +class SurfaceParams final : public HasheableSurfaceParams { +public: + /// Creates SurfaceCachedParams from a texture configuration. + static SurfaceParams CreateForTexture(Core::System& system, + const Tegra::Texture::FullTextureInfo& config); + + /// Creates SurfaceCachedParams for a depth buffer configuration. + static SurfaceParams CreateForDepthBuffer( + Core::System& system, u32 zeta_width, u32 zeta_height, Tegra::DepthFormat format, + u32 block_width, u32 block_height, u32 block_depth, + Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type); + + /// Creates SurfaceCachedParams from a framebuffer configuration. + static SurfaceParams CreateForFramebuffer(Core::System& system, std::size_t index); + + /// Creates SurfaceCachedParams from a Fermi2D surface configuration. + static SurfaceParams CreateForFermiCopySurface( + const Tegra::Engines::Fermi2D::Regs::Surface& config); + + bool IsTiled() const { + return is_tiled; + } + + bool GetSrgbConversion() const { + return srgb_conversion; + } + + u32 GetBlockWidth() const { + return block_width; + } + + u32 GetTileWidthSpacing() const { + return tile_width_spacing; + } + + u32 GetWidth() const { + return width; + } + + u32 GetHeight() const { + return height; + } + + u32 GetDepth() const { + return depth; + } + + u32 GetPitch() const { + return pitch; + } + + u32 GetNumLevels() const { + return num_levels; + } + + VideoCore::Surface::PixelFormat GetPixelFormat() const { + return pixel_format; + } + + VideoCore::Surface::ComponentType GetComponentType() const { + return component_type; + } + + VideoCore::Surface::SurfaceTarget GetTarget() const { + return target; + } + + VideoCore::Surface::SurfaceType GetType() const { + return type; + } + + std::size_t GetGuestSizeInBytes() const { + return guest_size_in_bytes; + } + + std::size_t GetHostSizeInBytes() const { + return host_size_in_bytes; + } + + u32 GetNumLayers() const { + return num_layers; + } + + /// Returns the width of a given mipmap level. + u32 GetMipWidth(u32 level) const; + + /// Returns the height of a given mipmap level. + u32 GetMipHeight(u32 level) const; + + /// Returns the depth of a given mipmap level. + u32 GetMipDepth(u32 level) const; + + /// Returns true if these parameters are from a layered surface. + bool IsLayered() const; + + /// Returns the block height of a given mipmap level. + u32 GetMipBlockHeight(u32 level) const; + + /// Returns the block depth of a given mipmap level. + u32 GetMipBlockDepth(u32 level) const; + + /// Returns the offset in bytes in guest memory of a given mipmap level. + std::size_t GetGuestMipmapLevelOffset(u32 level) const; + + /// Returns the offset in bytes in host memory (linear) of a given mipmap level. + std::size_t GetHostMipmapLevelOffset(u32 level) const; + + /// Returns the size in bytes in host memory (linear) of a given mipmap level. + std::size_t GetHostMipmapSize(u32 level) const; + + /// Returns the size of a layer in bytes in guest memory. + std::size_t GetGuestLayerSize() const; + + /// Returns the size of a layer in bytes in host memory for a given mipmap level. + std::size_t GetHostLayerSize(u32 level) const; + + /// Returns the default block width. + u32 GetDefaultBlockWidth() const; + + /// Returns the default block height. + u32 GetDefaultBlockHeight() const; + + /// Returns the bits per pixel. + u32 GetBitsPerPixel() const; + + /// Returns the bytes per pixel. + u32 GetBytesPerPixel() const; + + /// Returns true if another surface can be familiar with this. This is a loosely defined term + /// that reflects the possibility of these two surface parameters potentially being part of a + /// bigger superset. + bool IsFamiliar(const SurfaceParams& view_params) const; + + /// Returns true if the pixel format is a depth and/or stencil format. + bool IsPixelFormatZeta() const; + + /// Creates a map that redirects an address difference to a layer and mipmap level. + std::map> CreateViewOffsetMap() const; + + /// Returns true if the passed surface view parameters is equal or a valid subset of this. + bool IsViewValid(const SurfaceParams& view_params, u32 layer, u32 level) const; + +private: + /// Calculates values that can be deduced from HasheableSurfaceParams. + void CalculateCachedValues(); + + /// Returns the size of a given mipmap level inside a layer. + std::size_t GetInnerMipmapMemorySize(u32 level, bool as_host_size, bool uncompressed) const; + + /// Returns the size of all mipmap levels and aligns as needed. + std::size_t GetInnerMemorySize(bool as_host_size, bool layer_only, bool uncompressed) const; + + /// Returns the size of a layer + std::size_t GetLayerSize(bool as_host_size, bool uncompressed) const; + + /// Returns true if the passed view width and height match the size of this params in a given + /// mipmap level. + bool IsDimensionValid(const SurfaceParams& view_params, u32 level) const; + + /// Returns true if the passed view depth match the size of this params in a given mipmap level. + bool IsDepthValid(const SurfaceParams& view_params, u32 level) const; + + /// Returns true if the passed view layers and mipmap levels are in bounds. + bool IsInBounds(const SurfaceParams& view_params, u32 layer, u32 level) const; + + std::size_t guest_size_in_bytes; + std::size_t host_size_in_bytes; + u32 num_layers; +}; + +} // namespace VideoCommon + +namespace std { + +template <> +struct hash { + std::size_t operator()(const VideoCommon::SurfaceParams& k) const noexcept { + return k.Hash(); + } +}; + +} // namespace std diff --git a/src/video_core/texture_cache/surface_view.cpp b/src/video_core/texture_cache/surface_view.cpp new file mode 100644 index 000000000..5f4cdbb1c --- /dev/null +++ b/src/video_core/texture_cache/surface_view.cpp @@ -0,0 +1,23 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "common/common_types.h" +#include "video_core/texture_cache/surface_view.h" + +namespace VideoCommon { + +std::size_t ViewKey::Hash() const { + return static_cast(base_layer) ^ static_cast(num_layers << 16) ^ + (static_cast(base_level) << 32) ^ + (static_cast(num_levels) << 48); +} + +bool ViewKey::operator==(const ViewKey& rhs) const { + return std::tie(base_layer, num_layers, base_level, num_levels) == + std::tie(rhs.base_layer, rhs.num_layers, rhs.base_level, rhs.num_levels); +} + +} // namespace VideoCommon diff --git a/src/video_core/texture_cache/surface_view.h b/src/video_core/texture_cache/surface_view.h new file mode 100644 index 000000000..e73d8f6ae --- /dev/null +++ b/src/video_core/texture_cache/surface_view.h @@ -0,0 +1,35 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include "common/common_types.h" + +namespace VideoCommon { + +struct ViewKey { + std::size_t Hash() const; + + bool operator==(const ViewKey& rhs) const; + + u32 base_layer{}; + u32 num_layers{}; + u32 base_level{}; + u32 num_levels{}; +}; + +} // namespace VideoCommon + +namespace std { + +template <> +struct hash { + std::size_t operator()(const VideoCommon::ViewKey& k) const noexcept { + return k.Hash(); + } +}; + +} // namespace std diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h new file mode 100644 index 000000000..fb43fa65e --- /dev/null +++ b/src/video_core/texture_cache/texture_cache.h @@ -0,0 +1,282 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "common/assert.h" +#include "common/common_types.h" +#include "core/memory.h" +#include "video_core/engines/fermi_2d.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/gpu.h" +#include "video_core/memory_manager.h" +#include "video_core/rasterizer_interface.h" +#include "video_core/surface.h" +#include "video_core/texture_cache/surface_base.h" +#include "video_core/texture_cache/surface_params.h" +#include "video_core/texture_cache/surface_view.h" + +namespace Core { +class System; +} + +namespace Tegra::Texture { +struct FullTextureInfo; +} + +namespace VideoCore { +class RasterizerInterface; +} + +namespace VideoCommon { + +template +class TextureCache { + static_assert(std::is_trivially_copyable_v); + + using ResultType = std::tuple; + using IntervalMap = boost::icl::interval_map>>; + using IntervalType = typename IntervalMap::interval_type; + +public: + void InvalidateRegion(CacheAddr addr, std::size_t size) { + for (const auto& surface : GetSurfacesInRegion(addr, size)) { + if (!surface->IsRegistered()) { + // Skip duplicates + continue; + } + Unregister(surface); + } + } + + ResultType GetTextureSurface(TExecutionContext exctx, + const Tegra::Texture::FullTextureInfo& config) { + const auto gpu_addr{config.tic.Address()}; + if (!gpu_addr) { + return {{}, exctx}; + } + const auto params{SurfaceParams::CreateForTexture(system, config)}; + return GetSurfaceView(exctx, gpu_addr, params, true); + } + + ResultType GetDepthBufferSurface(TExecutionContext exctx, bool preserve_contents) { + const auto& regs{system.GPU().Maxwell3D().regs}; + const auto gpu_addr{regs.zeta.Address()}; + if (!gpu_addr || !regs.zeta_enable) { + return {{}, exctx}; + } + const auto depth_params{SurfaceParams::CreateForDepthBuffer( + system, regs.zeta_width, regs.zeta_height, regs.zeta.format, + regs.zeta.memory_layout.block_width, regs.zeta.memory_layout.block_height, + regs.zeta.memory_layout.block_depth, regs.zeta.memory_layout.type)}; + return GetSurfaceView(exctx, gpu_addr, depth_params, preserve_contents); + } + + ResultType GetColorBufferSurface(TExecutionContext exctx, std::size_t index, + bool preserve_contents) { + ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); + + const auto& regs{system.GPU().Maxwell3D().regs}; + if (index >= regs.rt_control.count || regs.rt[index].Address() == 0 || + regs.rt[index].format == Tegra::RenderTargetFormat::NONE) { + return {{}, exctx}; + } + + auto& memory_manager{system.GPU().MemoryManager()}; + const auto& config{system.GPU().Maxwell3D().regs.rt[index]}; + const auto gpu_addr{config.Address() + + config.base_layer * config.layer_stride * sizeof(u32)}; + if (!gpu_addr) { + return {{}, exctx}; + } + + return GetSurfaceView(exctx, gpu_addr, SurfaceParams::CreateForFramebuffer(system, index), + preserve_contents); + } + + ResultType GetFermiSurface(TExecutionContext exctx, + const Tegra::Engines::Fermi2D::Regs::Surface& config) { + return GetSurfaceView(exctx, config.Address(), + SurfaceParams::CreateForFermiCopySurface(config), true); + } + + std::shared_ptr TryFindFramebufferSurface(const u8* host_ptr) const { + const auto it{registered_surfaces.find(ToCacheAddr(host_ptr))}; + return it != registered_surfaces.end() ? *it->second.begin() : nullptr; + } + + u64 Tick() { + return ++ticks; + } + +protected: + TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer) + : system{system}, rasterizer{rasterizer} {} + + ~TextureCache() = default; + + virtual ResultType TryFastGetSurfaceView( + TExecutionContext exctx, GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, + const SurfaceParams& params, bool preserve_contents, + const std::vector>& overlaps) = 0; + + virtual std::shared_ptr CreateSurface(const SurfaceParams& params) = 0; + + void Register(std::shared_ptr surface, GPUVAddr gpu_addr, VAddr cpu_addr, + u8* host_ptr) { + surface->Register(gpu_addr, cpu_addr, host_ptr); + registered_surfaces.add({GetSurfaceInterval(surface), {surface}}); + rasterizer.UpdatePagesCachedCount(surface->GetCpuAddr(), surface->GetSizeInBytes(), 1); + } + + void Unregister(std::shared_ptr surface) { + registered_surfaces.subtract({GetSurfaceInterval(surface), {surface}}); + rasterizer.UpdatePagesCachedCount(surface->GetCpuAddr(), surface->GetSizeInBytes(), -1); + surface->Unregister(); + } + + std::shared_ptr GetUncachedSurface(const SurfaceParams& params) { + if (const auto surface = TryGetReservedSurface(params); surface) + return surface; + // No reserved surface available, create a new one and reserve it + auto new_surface{CreateSurface(params)}; + ReserveSurface(params, new_surface); + return new_surface; + } + + Core::System& system; + +private: + ResultType GetSurfaceView(TExecutionContext exctx, GPUVAddr gpu_addr, + const SurfaceParams& params, bool preserve_contents) { + auto& memory_manager{system.GPU().MemoryManager()}; + const auto cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)}; + DEBUG_ASSERT(cpu_addr); + + const auto host_ptr{memory_manager.GetPointer(gpu_addr)}; + const auto cache_addr{ToCacheAddr(host_ptr)}; + auto overlaps{GetSurfacesInRegion(cache_addr, params.GetGuestSizeInBytes())}; + if (overlaps.empty()) { + return LoadSurfaceView(exctx, gpu_addr, *cpu_addr, host_ptr, params, preserve_contents); + } + + if (overlaps.size() == 1) { + if (TView* view = overlaps[0]->TryGetView(gpu_addr, params); view) { + return {view, exctx}; + } + } + + TView* fast_view; + std::tie(fast_view, exctx) = TryFastGetSurfaceView(exctx, gpu_addr, *cpu_addr, host_ptr, + params, preserve_contents, overlaps); + + if (!fast_view) { + std::sort(overlaps.begin(), overlaps.end(), [](const auto& lhs, const auto& rhs) { + return lhs->GetModificationTick() < rhs->GetModificationTick(); + }); + } + + for (const auto& surface : overlaps) { + if (!fast_view) { + // Flush even when we don't care about the contents, to preserve memory not + // written by the new surface. + exctx = FlushSurface(exctx, surface); + } + Unregister(surface); + } + + if (fast_view) { + return {fast_view, exctx}; + } + + return LoadSurfaceView(exctx, gpu_addr, *cpu_addr, host_ptr, params, preserve_contents); + } + + ResultType LoadSurfaceView(TExecutionContext exctx, GPUVAddr gpu_addr, VAddr cpu_addr, + u8* host_ptr, const SurfaceParams& params, bool preserve_contents) { + const auto new_surface{GetUncachedSurface(params)}; + Register(new_surface, gpu_addr, cpu_addr, host_ptr); + if (preserve_contents) { + exctx = LoadSurface(exctx, new_surface); + } + return {new_surface->GetView(gpu_addr, params), exctx}; + } + + TExecutionContext LoadSurface(TExecutionContext exctx, + const std::shared_ptr& surface) { + surface->LoadBuffer(); + exctx = surface->UploadTexture(exctx); + surface->MarkAsModified(false); + return exctx; + } + + TExecutionContext FlushSurface(TExecutionContext exctx, + const std::shared_ptr& surface) { + if (!surface->IsModified()) { + return exctx; + } + exctx = surface->DownloadTexture(exctx); + surface->FlushBuffer(); + return exctx; + } + + std::vector> GetSurfacesInRegion(CacheAddr cache_addr, + std::size_t size) const { + if (size == 0) { + return {}; + } + const IntervalType interval{cache_addr, cache_addr + size}; + + std::vector> surfaces; + for (auto& pair : boost::make_iterator_range(registered_surfaces.equal_range(interval))) { + surfaces.push_back(*pair.second.begin()); + } + return surfaces; + } + + void ReserveSurface(const SurfaceParams& params, std::shared_ptr surface) { + surface_reserve[params].push_back(std::move(surface)); + } + + std::shared_ptr TryGetReservedSurface(const SurfaceParams& params) { + auto search{surface_reserve.find(params)}; + if (search == surface_reserve.end()) { + return {}; + } + for (auto& surface : search->second) { + if (!surface->IsRegistered()) { + return surface; + } + } + return {}; + } + + IntervalType GetSurfaceInterval(std::shared_ptr surface) const { + return IntervalType::right_open(surface->GetCacheAddr(), + surface->GetCacheAddr() + surface->GetSizeInBytes()); + } + + VideoCore::RasterizerInterface& rasterizer; + + u64 ticks{}; + + IntervalMap registered_surfaces; + + /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have + /// previously been used. This is to prevent surfaces from being constantly created and + /// destroyed when used with different surface parameters. + std::unordered_map>> surface_reserve; +}; + +} // namespace VideoCommon diff --git a/src/video_core/texture_cache/texture_cache_contextless.h b/src/video_core/texture_cache/texture_cache_contextless.h new file mode 100644 index 000000000..cd35a9fd4 --- /dev/null +++ b/src/video_core/texture_cache/texture_cache_contextless.h @@ -0,0 +1,93 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "video_core/texture_cache/texture_cache.h" + +namespace VideoCommon { + +struct DummyExecutionContext {}; + +template +class TextureCacheContextless : protected TextureCache { + using Base = TextureCache; + +public: + void InvalidateRegion(CacheAddr addr, std::size_t size) { + Base::InvalidateRegion(addr, size); + } + + TView* GetTextureSurface(const Tegra::Texture::FullTextureInfo& config) { + return RemoveContext(Base::GetTextureSurface({}, config)); + } + + TView* GetDepthBufferSurface(bool preserve_contents) { + return RemoveContext(Base::GetDepthBufferSurface({}, preserve_contents)); + } + + TView* GetColorBufferSurface(std::size_t index, bool preserve_contents) { + return RemoveContext(Base::GetColorBufferSurface({}, index, preserve_contents)); + } + + TView* GetFermiSurface(const Tegra::Engines::Fermi2D::Regs::Surface& config) { + return RemoveContext(Base::GetFermiSurface({}, config)); + } + + std::shared_ptr TryFindFramebufferSurface(const u8* host_ptr) const { + return Base::TryFindFramebufferSurface(host_ptr); + } + + u64 Tick() { + return Base::Tick(); + } + +protected: + explicit TextureCacheContextless(Core::System& system, + VideoCore::RasterizerInterface& rasterizer) + : TextureCache{system, rasterizer} {} + + virtual TView* TryFastGetSurfaceView( + GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, const SurfaceParams& params, + bool preserve_contents, const std::vector>& overlaps) = 0; + +private: + std::tuple TryFastGetSurfaceView( + DummyExecutionContext, GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, + const SurfaceParams& params, bool preserve_contents, + const std::vector>& overlaps) { + return {TryFastGetSurfaceView(gpu_addr, cpu_addr, host_ptr, params, preserve_contents, + overlaps), + {}}; + } + + TView* RemoveContext(std::tuple return_value) { + const auto [view, exctx] = return_value; + return view; + } +}; + +template +class SurfaceBaseContextless : public SurfaceBase { +public: + DummyExecutionContext DownloadTexture(DummyExecutionContext) { + DownloadTextureImpl(); + return {}; + } + + DummyExecutionContext UploadTexture(DummyExecutionContext) { + UploadTextureImpl(); + return {}; + } + +protected: + explicit SurfaceBaseContextless(TTextureCache& texture_cache, const SurfaceParams& params) + : SurfaceBase{texture_cache, params} {} + + virtual void DownloadTextureImpl() = 0; + + virtual void UploadTextureImpl() = 0; +}; + +} // namespace VideoCommon From fa59a7b4d8403c0d277b189c880469cf8113e386 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 24 Apr 2019 18:47:59 -0300 Subject: [PATCH 009/113] gl_texture_cache: Implement fermi copies --- .../renderer_opengl/gl_rasterizer.cpp | 5 +- .../renderer_opengl/gl_rasterizer.h | 1 + .../renderer_opengl/gl_texture_cache.h | 1 + src/video_core/renderer_opengl/utils.cpp | 83 +++++++++++++++++++ src/video_core/renderer_opengl/utils.h | 17 ++++ 5 files changed, 105 insertions(+), 2 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index af63365a4..027e9d293 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -738,8 +738,9 @@ bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs const Common::Rectangle& src_rect, const Common::Rectangle& dst_rect) { MICROPROFILE_SCOPE(OpenGL_Blits); - UNIMPLEMENTED(); - // texture_cache.FermiCopySurface(src, dst, src_rect, dst_rect); + const auto src_surface{texture_cache.GetFermiSurface(src)}; + const auto dst_surface{texture_cache.GetFermiSurface(dst)}; + blitter.Blit(src_surface, dst_surface, src_rect, dst_rect); return true; } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 921e9fc31..8e5009eeb 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -202,6 +202,7 @@ private: static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; OGLBufferCache buffer_cache; + SurfaceBlitter blitter; BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER}; BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER}; diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 3c15b37bd..b165187d9 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -13,6 +13,7 @@ #include "common/common_types.h" #include "video_core/engines/shader_bytecode.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/texture_cache/texture_cache_contextless.h" namespace OpenGL { diff --git a/src/video_core/renderer_opengl/utils.cpp b/src/video_core/renderer_opengl/utils.cpp index f23fc9f9d..d9be61604 100644 --- a/src/video_core/renderer_opengl/utils.cpp +++ b/src/video_core/renderer_opengl/utils.cpp @@ -5,12 +5,21 @@ #include #include #include + #include "common/assert.h" #include "common/common_types.h" +#include "common/scope_exit.h" +#include "video_core/renderer_opengl/gl_state.h" +#include "video_core/renderer_opengl/gl_texture_cache.h" #include "video_core/renderer_opengl/utils.h" +#include "video_core/surface.h" namespace OpenGL { +using Tegra::Shader::TextureType; +using Tegra::Texture::SwizzleSource; +using VideoCore::Surface::SurfaceTarget; + BindBuffersRangePushBuffer::BindBuffersRangePushBuffer(GLenum target) : target{target} {} BindBuffersRangePushBuffer::~BindBuffersRangePushBuffer() = default; @@ -38,6 +47,80 @@ void BindBuffersRangePushBuffer::Bind() const { sizes.data()); } +SurfaceBlitter::SurfaceBlitter() { + src_framebuffer.Create(); + dst_framebuffer.Create(); +} + +SurfaceBlitter::~SurfaceBlitter() = default; + +void SurfaceBlitter::Blit(CachedSurfaceView* src, CachedSurfaceView* dst, + const Common::Rectangle& src_rect, + const Common::Rectangle& dst_rect) const { + const auto& src_params{src->GetSurfaceParams()}; + const auto& dst_params{dst->GetSurfaceParams()}; + + OpenGLState prev_state{OpenGLState::GetCurState()}; + SCOPE_EXIT({ prev_state.Apply(); }); + + OpenGLState state; + state.draw.read_framebuffer = src_framebuffer.handle; + state.draw.draw_framebuffer = dst_framebuffer.handle; + state.ApplyFramebufferState(); + + u32 buffers{}; + + UNIMPLEMENTED_IF(src_params.GetTarget() != SurfaceTarget::Texture2D); + UNIMPLEMENTED_IF(dst_params.GetTarget() != SurfaceTarget::Texture2D); + + const auto GetTexture = [](CachedSurfaceView* view) { + return view->GetTexture(TextureType::Texture2D, false, SwizzleSource::R, SwizzleSource::G, + SwizzleSource::B, SwizzleSource::A); + }; + const GLuint src_texture{GetTexture(src)}; + const GLuint dst_texture{GetTexture(dst)}; + + if (src_params.GetType() == SurfaceType::ColorTexture) { + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, + src_texture, 0); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, + 0); + + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, + dst_texture, 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, + 0); + + buffers = GL_COLOR_BUFFER_BIT; + } else if (src_params.GetType() == SurfaceType::Depth) { + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, src_texture, + 0); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); + + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, dst_texture, + 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); + + buffers = GL_DEPTH_BUFFER_BIT; + } else if (src_params.GetType() == SurfaceType::DepthStencil) { + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, + src_texture, 0); + + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, + dst_texture, 0); + + buffers = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT; + } + + glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom, dst_rect.left, + dst_rect.top, dst_rect.right, dst_rect.bottom, buffers, + buffers == GL_COLOR_BUFFER_BIT ? GL_LINEAR : GL_NEAREST); +} + void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_view extra_info) { if (!GLAD_GL_KHR_debug) { // We don't need to throw an error as this is just for debugging diff --git a/src/video_core/renderer_opengl/utils.h b/src/video_core/renderer_opengl/utils.h index b3e9fc499..e7726d14e 100644 --- a/src/video_core/renderer_opengl/utils.h +++ b/src/video_core/renderer_opengl/utils.h @@ -8,9 +8,13 @@ #include #include #include "common/common_types.h" +#include "common/math_util.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" namespace OpenGL { +class CachedSurfaceView; + class BindBuffersRangePushBuffer { public: BindBuffersRangePushBuffer(GLenum target); @@ -30,6 +34,19 @@ private: std::vector sizes; }; +class SurfaceBlitter { +public: + explicit SurfaceBlitter(); + ~SurfaceBlitter(); + + void Blit(CachedSurfaceView* src, CachedSurfaceView* dst, + const Common::Rectangle& src_rect, const Common::Rectangle& dst_rect) const; + +private: + OGLFramebuffer src_framebuffer; + OGLFramebuffer dst_framebuffer; +}; + void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_view extra_info = {}); } // namespace OpenGL \ No newline at end of file From 6c410104f4f6953ac37095aa5e65804bf115c026 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 25 Apr 2019 13:41:57 -0300 Subject: [PATCH 010/113] texture_cache: Remove execution context copies from the texture cache This is done to simplify the OpenGL implementation, it is needed for Vulkan. --- .../renderer_opengl/gl_texture_cache.cpp | 16 ++-- .../renderer_opengl/gl_texture_cache.h | 17 ++-- src/video_core/renderer_opengl/utils.cpp | 2 + src/video_core/texture_cache/surface_base.cpp | 8 +- src/video_core/texture_cache/surface_base.h | 8 +- src/video_core/texture_cache/texture_cache.h | 83 +++++++---------- .../texture_cache/texture_cache_contextless.h | 93 ------------------- 7 files changed, 59 insertions(+), 168 deletions(-) delete mode 100644 src/video_core/texture_cache/texture_cache_contextless.h diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index ca007b797..f7c2f46aa 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -9,7 +9,7 @@ #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_texture_cache.h" #include "video_core/renderer_opengl/utils.h" -#include "video_core/texture_cache/texture_cache_contextless.h" +#include "video_core/texture_cache/texture_cache.h" #include "video_core/textures/convert.h" #include "video_core/textures/texture.h" @@ -18,6 +18,10 @@ namespace OpenGL { using Tegra::Texture::SwizzleSource; using VideoCore::MortonSwizzleMode; +using VideoCore::Surface::ComponentType; +using VideoCore::Surface::PixelFormat; +using VideoCore::Surface::SurfaceTarget; + namespace { struct FormatTuple { @@ -209,8 +213,7 @@ OGLTexture CreateTexture(const SurfaceParams& params, GLenum target, GLenum inte } // Anonymous namespace CachedSurface::CachedSurface(TextureCacheOpenGL& texture_cache, const SurfaceParams& params) - : VideoCommon::SurfaceBaseContextless{texture_cache, - params} { + : VideoCommon::SurfaceBase{texture_cache, params} { const auto& tuple{GetFormatTuple(params.GetPixelFormat(), params.GetComponentType())}; internal_format = tuple.internal_format; format = tuple.format; @@ -222,7 +225,7 @@ CachedSurface::CachedSurface(TextureCacheOpenGL& texture_cache, const SurfacePar CachedSurface::~CachedSurface() = default; -void CachedSurface::DownloadTextureImpl() { +void CachedSurface::DownloadTexture() { // TODO(Rodrigo): Optimize alignment glPixelStorei(GL_PACK_ALIGNMENT, 1); SCOPE_EXIT({ glPixelStorei(GL_PACK_ROW_LENGTH, 0); }); @@ -241,7 +244,7 @@ void CachedSurface::DownloadTextureImpl() { } } -void CachedSurface::UploadTextureImpl() { +void CachedSurface::UploadTexture() { SCOPE_EXIT({ glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); }); for (u32 level = 0; level < params.GetNumLevels(); ++level) { UploadTextureMipmap(level); @@ -321,7 +324,8 @@ void CachedSurface::UploadTextureMipmap(u32 level) { } void CachedSurface::DecorateSurfaceName() { - LabelGLObject(GL_TEXTURE, texture.handle, GetGpuAddr()); + LabelGLObject(GL_TEXTURE, texture.handle, GetGpuAddr(), + params.GetTarget() == SurfaceTarget::Texture3D ? "3D" : ""); } std::unique_ptr CachedSurface::CreateView(const ViewKey& view_key) { diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index b165187d9..c65e37153 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -14,32 +14,30 @@ #include "common/common_types.h" #include "video_core/engines/shader_bytecode.h" #include "video_core/renderer_opengl/gl_resource_manager.h" -#include "video_core/texture_cache/texture_cache_contextless.h" +#include "video_core/texture_cache/texture_cache.h" namespace OpenGL { using VideoCommon::SurfaceParams; using VideoCommon::ViewKey; -using VideoCore::Surface::ComponentType; -using VideoCore::Surface::PixelFormat; -using VideoCore::Surface::SurfaceTarget; -using VideoCore::Surface::SurfaceType; class CachedSurfaceView; class CachedSurface; class TextureCacheOpenGL; using Surface = std::shared_ptr; -using TextureCacheBase = VideoCommon::TextureCacheContextless; +using TextureCacheBase = VideoCommon::TextureCache; -class CachedSurface final - : public VideoCommon::SurfaceBaseContextless { +class CachedSurface final : public VideoCommon::SurfaceBase { friend CachedSurfaceView; public: explicit CachedSurface(TextureCacheOpenGL& texture_cache, const SurfaceParams& params); ~CachedSurface(); + void UploadTexture(); + void DownloadTexture(); + GLenum GetTarget() const { return target; } @@ -53,9 +51,6 @@ protected: std::unique_ptr CreateView(const ViewKey& view_key); - void UploadTextureImpl(); - void DownloadTextureImpl(); - private: void UploadTextureMipmap(u32 level); diff --git a/src/video_core/renderer_opengl/utils.cpp b/src/video_core/renderer_opengl/utils.cpp index d9be61604..5994c0c61 100644 --- a/src/video_core/renderer_opengl/utils.cpp +++ b/src/video_core/renderer_opengl/utils.cpp @@ -18,7 +18,9 @@ namespace OpenGL { using Tegra::Shader::TextureType; using Tegra::Texture::SwizzleSource; + using VideoCore::Surface::SurfaceTarget; +using VideoCore::Surface::SurfaceType; BindBuffersRangePushBuffer::BindBuffersRangePushBuffer(GLenum target) : target{target} {} diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp index 8680485b4..d0779b502 100644 --- a/src/video_core/texture_cache/surface_base.cpp +++ b/src/video_core/texture_cache/surface_base.cpp @@ -53,8 +53,8 @@ void SurfaceBaseImpl::LoadBuffer() { ASSERT_MSG(params.GetBlockWidth() == 1, "Block width is defined as {} on texture target {}", params.GetBlockWidth(), static_cast(params.GetTarget())); for (u32 level = 0; level < params.GetNumLevels(); ++level) { - u8* const buffer{GetStagingBufferLevelData(level)}; - SwizzleFunc(MortonSwizzleMode::MortonToLinear, host_ptr, params, buffer, level); + SwizzleFunc(MortonSwizzleMode::MortonToLinear, host_ptr, params, + GetStagingBufferLevelData(level), level); } } else { ASSERT_MSG(params.GetNumLevels() == 1, "Linear mipmap loading is not implemented"); @@ -89,8 +89,8 @@ void SurfaceBaseImpl::FlushBuffer() { ASSERT_MSG(params.GetBlockWidth() == 1, "Block width is defined as {}", params.GetBlockWidth()); for (u32 level = 0; level < params.GetNumLevels(); ++level) { - u8* const buffer = GetStagingBufferLevelData(level); - SwizzleFunc(MortonSwizzleMode::LinearToMorton, GetHostPtr(), params, buffer, level); + SwizzleFunc(MortonSwizzleMode::LinearToMorton, GetHostPtr(), params, + GetStagingBufferLevelData(level), level); } } else { UNIMPLEMENTED(); diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index d0142a9e6..eed8dc59d 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -89,14 +89,12 @@ private: std::vector staging_buffer; }; -template +template class SurfaceBase : public SurfaceBaseImpl { - static_assert(std::is_trivially_copyable_v); - public: - virtual TExecutionContext UploadTexture(TExecutionContext exctx) = 0; + virtual void UploadTexture() = 0; - virtual TExecutionContext DownloadTexture(TExecutionContext exctx) = 0; + virtual void DownloadTexture() = 0; TView* TryGetView(GPUVAddr view_addr, const SurfaceParams& view_params) { if (view_addr < GetGpuAddr() || !params.IsFamiliar(view_params)) { diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index fb43fa65e..c5c01957a 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -8,7 +8,6 @@ #include #include #include -#include #include #include @@ -41,11 +40,8 @@ class RasterizerInterface; namespace VideoCommon { -template +template class TextureCache { - static_assert(std::is_trivially_copyable_v); - - using ResultType = std::tuple; using IntervalMap = boost::icl::interval_map>>; using IntervalType = typename IntervalMap::interval_type; @@ -60,37 +56,35 @@ public: } } - ResultType GetTextureSurface(TExecutionContext exctx, - const Tegra::Texture::FullTextureInfo& config) { + TView* GetTextureSurface(const Tegra::Texture::FullTextureInfo& config) { const auto gpu_addr{config.tic.Address()}; if (!gpu_addr) { - return {{}, exctx}; + return {}; } const auto params{SurfaceParams::CreateForTexture(system, config)}; - return GetSurfaceView(exctx, gpu_addr, params, true); + return GetSurfaceView(gpu_addr, params, true); } - ResultType GetDepthBufferSurface(TExecutionContext exctx, bool preserve_contents) { + TView* GetDepthBufferSurface(bool preserve_contents) { const auto& regs{system.GPU().Maxwell3D().regs}; const auto gpu_addr{regs.zeta.Address()}; if (!gpu_addr || !regs.zeta_enable) { - return {{}, exctx}; + return {}; } const auto depth_params{SurfaceParams::CreateForDepthBuffer( system, regs.zeta_width, regs.zeta_height, regs.zeta.format, regs.zeta.memory_layout.block_width, regs.zeta.memory_layout.block_height, regs.zeta.memory_layout.block_depth, regs.zeta.memory_layout.type)}; - return GetSurfaceView(exctx, gpu_addr, depth_params, preserve_contents); + return GetSurfaceView(gpu_addr, depth_params, preserve_contents); } - ResultType GetColorBufferSurface(TExecutionContext exctx, std::size_t index, - bool preserve_contents) { + TView* GetColorBufferSurface(std::size_t index, bool preserve_contents) { ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); const auto& regs{system.GPU().Maxwell3D().regs}; if (index >= regs.rt_control.count || regs.rt[index].Address() == 0 || regs.rt[index].format == Tegra::RenderTargetFormat::NONE) { - return {{}, exctx}; + return {}; } auto& memory_manager{system.GPU().MemoryManager()}; @@ -98,17 +92,16 @@ public: const auto gpu_addr{config.Address() + config.base_layer * config.layer_stride * sizeof(u32)}; if (!gpu_addr) { - return {{}, exctx}; + return {}; } - return GetSurfaceView(exctx, gpu_addr, SurfaceParams::CreateForFramebuffer(system, index), + return GetSurfaceView(gpu_addr, SurfaceParams::CreateForFramebuffer(system, index), preserve_contents); } - ResultType GetFermiSurface(TExecutionContext exctx, - const Tegra::Engines::Fermi2D::Regs::Surface& config) { - return GetSurfaceView(exctx, config.Address(), - SurfaceParams::CreateForFermiCopySurface(config), true); + TView* GetFermiSurface(const Tegra::Engines::Fermi2D::Regs::Surface& config) { + return GetSurfaceView(config.Address(), SurfaceParams::CreateForFermiCopySurface(config), + true); } std::shared_ptr TryFindFramebufferSurface(const u8* host_ptr) const { @@ -126,10 +119,9 @@ protected: ~TextureCache() = default; - virtual ResultType TryFastGetSurfaceView( - TExecutionContext exctx, GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, - const SurfaceParams& params, bool preserve_contents, - const std::vector>& overlaps) = 0; + virtual TView* TryFastGetSurfaceView( + GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, const SurfaceParams& params, + bool preserve_contents, const std::vector>& overlaps) = 0; virtual std::shared_ptr CreateSurface(const SurfaceParams& params) = 0; @@ -158,8 +150,7 @@ protected: Core::System& system; private: - ResultType GetSurfaceView(TExecutionContext exctx, GPUVAddr gpu_addr, - const SurfaceParams& params, bool preserve_contents) { + TView* GetSurfaceView(GPUVAddr gpu_addr, const SurfaceParams& params, bool preserve_contents) { auto& memory_manager{system.GPU().MemoryManager()}; const auto cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)}; DEBUG_ASSERT(cpu_addr); @@ -168,18 +159,17 @@ private: const auto cache_addr{ToCacheAddr(host_ptr)}; auto overlaps{GetSurfacesInRegion(cache_addr, params.GetGuestSizeInBytes())}; if (overlaps.empty()) { - return LoadSurfaceView(exctx, gpu_addr, *cpu_addr, host_ptr, params, preserve_contents); + return LoadSurfaceView(gpu_addr, *cpu_addr, host_ptr, params, preserve_contents); } if (overlaps.size() == 1) { if (TView* view = overlaps[0]->TryGetView(gpu_addr, params); view) { - return {view, exctx}; + return view; } } - TView* fast_view; - std::tie(fast_view, exctx) = TryFastGetSurfaceView(exctx, gpu_addr, *cpu_addr, host_ptr, - params, preserve_contents, overlaps); + const auto fast_view{TryFastGetSurfaceView(gpu_addr, *cpu_addr, host_ptr, params, + preserve_contents, overlaps)}; if (!fast_view) { std::sort(overlaps.begin(), overlaps.end(), [](const auto& lhs, const auto& rhs) { @@ -191,44 +181,39 @@ private: if (!fast_view) { // Flush even when we don't care about the contents, to preserve memory not // written by the new surface. - exctx = FlushSurface(exctx, surface); + FlushSurface(surface); } Unregister(surface); } - if (fast_view) { - return {fast_view, exctx}; + return fast_view; } - return LoadSurfaceView(exctx, gpu_addr, *cpu_addr, host_ptr, params, preserve_contents); + return LoadSurfaceView(gpu_addr, *cpu_addr, host_ptr, params, preserve_contents); } - ResultType LoadSurfaceView(TExecutionContext exctx, GPUVAddr gpu_addr, VAddr cpu_addr, - u8* host_ptr, const SurfaceParams& params, bool preserve_contents) { + TView* LoadSurfaceView(GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, + const SurfaceParams& params, bool preserve_contents) { const auto new_surface{GetUncachedSurface(params)}; Register(new_surface, gpu_addr, cpu_addr, host_ptr); if (preserve_contents) { - exctx = LoadSurface(exctx, new_surface); + LoadSurface(new_surface); } - return {new_surface->GetView(gpu_addr, params), exctx}; + return new_surface->GetView(gpu_addr, params); } - TExecutionContext LoadSurface(TExecutionContext exctx, - const std::shared_ptr& surface) { + void LoadSurface(const std::shared_ptr& surface) { surface->LoadBuffer(); - exctx = surface->UploadTexture(exctx); + surface->UploadTexture(); surface->MarkAsModified(false); - return exctx; } - TExecutionContext FlushSurface(TExecutionContext exctx, - const std::shared_ptr& surface) { + void FlushSurface(const std::shared_ptr& surface) { if (!surface->IsModified()) { - return exctx; + return; } - exctx = surface->DownloadTexture(exctx); + surface->DownloadTexture(); surface->FlushBuffer(); - return exctx; } std::vector> GetSurfacesInRegion(CacheAddr cache_addr, diff --git a/src/video_core/texture_cache/texture_cache_contextless.h b/src/video_core/texture_cache/texture_cache_contextless.h deleted file mode 100644 index cd35a9fd4..000000000 --- a/src/video_core/texture_cache/texture_cache_contextless.h +++ /dev/null @@ -1,93 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include "video_core/texture_cache/texture_cache.h" - -namespace VideoCommon { - -struct DummyExecutionContext {}; - -template -class TextureCacheContextless : protected TextureCache { - using Base = TextureCache; - -public: - void InvalidateRegion(CacheAddr addr, std::size_t size) { - Base::InvalidateRegion(addr, size); - } - - TView* GetTextureSurface(const Tegra::Texture::FullTextureInfo& config) { - return RemoveContext(Base::GetTextureSurface({}, config)); - } - - TView* GetDepthBufferSurface(bool preserve_contents) { - return RemoveContext(Base::GetDepthBufferSurface({}, preserve_contents)); - } - - TView* GetColorBufferSurface(std::size_t index, bool preserve_contents) { - return RemoveContext(Base::GetColorBufferSurface({}, index, preserve_contents)); - } - - TView* GetFermiSurface(const Tegra::Engines::Fermi2D::Regs::Surface& config) { - return RemoveContext(Base::GetFermiSurface({}, config)); - } - - std::shared_ptr TryFindFramebufferSurface(const u8* host_ptr) const { - return Base::TryFindFramebufferSurface(host_ptr); - } - - u64 Tick() { - return Base::Tick(); - } - -protected: - explicit TextureCacheContextless(Core::System& system, - VideoCore::RasterizerInterface& rasterizer) - : TextureCache{system, rasterizer} {} - - virtual TView* TryFastGetSurfaceView( - GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, const SurfaceParams& params, - bool preserve_contents, const std::vector>& overlaps) = 0; - -private: - std::tuple TryFastGetSurfaceView( - DummyExecutionContext, GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, - const SurfaceParams& params, bool preserve_contents, - const std::vector>& overlaps) { - return {TryFastGetSurfaceView(gpu_addr, cpu_addr, host_ptr, params, preserve_contents, - overlaps), - {}}; - } - - TView* RemoveContext(std::tuple return_value) { - const auto [view, exctx] = return_value; - return view; - } -}; - -template -class SurfaceBaseContextless : public SurfaceBase { -public: - DummyExecutionContext DownloadTexture(DummyExecutionContext) { - DownloadTextureImpl(); - return {}; - } - - DummyExecutionContext UploadTexture(DummyExecutionContext) { - UploadTextureImpl(); - return {}; - } - -protected: - explicit SurfaceBaseContextless(TTextureCache& texture_cache, const SurfaceParams& params) - : SurfaceBase{texture_cache, params} {} - - virtual void DownloadTextureImpl() = 0; - - virtual void UploadTextureImpl() = 0; -}; - -} // namespace VideoCommon From 6b0695b3cdd930b0157df8fd8f3c9d2dce328595 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 7 May 2019 10:55:18 -0400 Subject: [PATCH 011/113] Deglobalize Memory Manager on texture cahe and Implement Invalidation and Flushing using GPUVAddr --- src/video_core/gpu.cpp | 1 + src/video_core/memory_manager.cpp | 2 +- src/video_core/rasterizer_interface.h | 10 ++++++++++ src/video_core/renderer_opengl/gl_rasterizer.h | 8 ++++++++ 4 files changed, 20 insertions(+), 1 deletion(-) diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 52706505b..619e06a0e 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -32,6 +32,7 @@ u32 FramebufferConfig::BytesPerPixel(PixelFormat format) { GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer) : renderer{renderer} { auto& rasterizer{renderer.Rasterizer()}; memory_manager = std::make_unique(rasterizer); + rasterizer.InitMemoryMananger(*memory_manager); dma_pusher = std::make_unique(*this); maxwell_3d = std::make_unique(system, rasterizer, *memory_manager); fermi_2d = std::make_unique(rasterizer, *memory_manager); diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 5d8d126c1..74a1441e3 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -69,7 +69,7 @@ GPUVAddr MemoryManager::UnmapBuffer(GPUVAddr gpu_addr, u64 size) { const u64 aligned_size{Common::AlignUp(size, page_size)}; const CacheAddr cache_addr{ToCacheAddr(GetPointer(gpu_addr))}; - rasterizer.FlushAndInvalidateRegion(cache_addr, aligned_size); + rasterizer.FlushAndInvalidateRegionEx(gpu_addr, cache_addr, aligned_size); UnmapRange(gpu_addr, aligned_size); return gpu_addr; diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index d7b86df38..d5505ef9c 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -10,6 +10,10 @@ #include "video_core/engines/fermi_2d.h" #include "video_core/gpu.h" +namespace Tegra { +class MemoryManager; +} + namespace VideoCore { enum class LoadCallbackStage { @@ -24,6 +28,8 @@ class RasterizerInterface { public: virtual ~RasterizerInterface() {} + virtual void InitMemoryMananger(Tegra::MemoryManager& memory_manager) = 0; + /// Draw the current batch of vertex arrays virtual void DrawArrays() = 0; @@ -43,6 +49,10 @@ public: /// and invalidated virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0; + /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory + /// and invalidated + virtual void FlushAndInvalidateRegionEx(GPUVAddr gpu_addr, CacheAddr addr, u64 size) = 0; + /// Attempt to use a faster method to perform a surface copy virtual bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, const Tegra::Engines::Fermi2D::Regs::Surface& dst, diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 8e5009eeb..971a38ab7 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -41,6 +41,10 @@ namespace Core::Frontend { class EmuWindow; } +namespace Tegra { +class MemoryManager; +} + namespace OpenGL { struct ScreenInfo; @@ -53,12 +57,16 @@ public: ScreenInfo& info); ~RasterizerOpenGL() override; + void InitMemoryMananger(Tegra::MemoryManager& memory_manager) override; + void DrawArrays() override; void Clear() override; void FlushAll() override; void FlushRegion(CacheAddr addr, u64 size) override; void InvalidateRegion(CacheAddr addr, u64 size) override; + void InvalidateRegionEx(GPUVAddr gpu_addr, CacheAddr addr, u64 size); void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; + void FlushAndInvalidateRegionEx(GPUVAddr gpu_addr, CacheAddr addr, u64 size) override; bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, const Tegra::Engines::Fermi2D::Regs::Surface& dst, const Common::Rectangle& src_rect, From 3b26206dbdd36e53152f97726da372da2e1a9a84 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 7 May 2019 10:55:44 -0400 Subject: [PATCH 012/113] Add OGLTextureView --- .../renderer_opengl/gl_resource_manager.cpp | 18 +++++++++++++ .../renderer_opengl/gl_resource_manager.h | 25 +++++++++++++++++++ 2 files changed, 43 insertions(+) diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp index bfe666a73..9f81c15cb 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.cpp +++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp @@ -33,6 +33,24 @@ void OGLTexture::Release() { handle = 0; } +void OGLTextureView::Create() { + if (handle != 0) + return; + + MICROPROFILE_SCOPE(OpenGL_ResourceCreation); + glGenTextures(1, &handle); +} + +void OGLTextureView::Release() { + if (handle == 0) + return; + + MICROPROFILE_SCOPE(OpenGL_ResourceDeletion); + glDeleteTextures(1, &handle); + OpenGLState::GetCurState().UnbindTexture(handle).Apply(); + handle = 0; +} + void OGLSampler::Create() { if (handle != 0) return; diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h index fbb93ee49..310ee2bf3 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.h +++ b/src/video_core/renderer_opengl/gl_resource_manager.h @@ -36,6 +36,31 @@ public: GLuint handle = 0; }; +class OGLTextureView : private NonCopyable { +public: + OGLTextureView() = default; + + OGLTextureView(OGLTextureView&& o) noexcept : handle(std::exchange(o.handle, 0)) {} + + ~OGLTextureView() { + Release(); + } + + OGLTextureView& operator=(OGLTextureView&& o) noexcept { + Release(); + handle = std::exchange(o.handle, 0); + return *this; + } + + /// Creates a new internal OpenGL resource and stores the handle + void Create(); + + /// Deletes the internal OpenGL resource + void Release(); + + GLuint handle = 0; +}; + class OGLSampler : private NonCopyable { public: OGLSampler() = default; From 3d471e732d688c20aef73a506bdb6126002d3193 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 7 May 2019 10:56:45 -0400 Subject: [PATCH 013/113] Correct Surface Base and Views for new Texture Cache --- src/video_core/texture_cache/copy_params.h | 25 ++ src/video_core/texture_cache/surface_base.cpp | 108 +++--- src/video_core/texture_cache/surface_base.h | 309 +++++++++++++----- .../texture_cache/surface_params.cpp | 210 +++++------- src/video_core/texture_cache/surface_params.h | 159 +++------ src/video_core/texture_cache/surface_view.cpp | 12 +- src/video_core/texture_cache/surface_view.h | 35 +- 7 files changed, 472 insertions(+), 386 deletions(-) create mode 100644 src/video_core/texture_cache/copy_params.h diff --git a/src/video_core/texture_cache/copy_params.h b/src/video_core/texture_cache/copy_params.h new file mode 100644 index 000000000..75c2b1f05 --- /dev/null +++ b/src/video_core/texture_cache/copy_params.h @@ -0,0 +1,25 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/common_types.h" + +namespace VideoCommon { + +struct CopyParams { + u32 source_x; + u32 source_y; + u32 source_z; + u32 dest_x; + u32 dest_y; + u32 dest_z; + u32 source_level; + u32 dest_level; + u32 width; + u32 height; + u32 depth; +}; + +} // namespace VideoCommon diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp index d0779b502..5273fcb44 100644 --- a/src/video_core/texture_cache/surface_base.cpp +++ b/src/video_core/texture_cache/surface_base.cpp @@ -4,104 +4,120 @@ #include "common/assert.h" #include "common/common_types.h" -#include "video_core/morton.h" +#include "common/microprofile.h" +#include "video_core/memory_manager.h" #include "video_core/texture_cache/surface_base.h" #include "video_core/texture_cache/surface_params.h" #include "video_core/textures/convert.h" namespace VideoCommon { +MICROPROFILE_DEFINE(GPU_Load_Texture, "GPU", "Texture Load", MP_RGB(128, 192, 128)); +MICROPROFILE_DEFINE(GPU_Flush_Texture, "GPU", "Texture Flush", MP_RGB(128, 192, 128)); + using Tegra::Texture::ConvertFromGuestToHost; using VideoCore::MortonSwizzleMode; -namespace { -void SwizzleFunc(MortonSwizzleMode mode, u8* memory, const SurfaceParams& params, u8* buffer, - u32 level) { +SurfaceBaseImpl::SurfaceBaseImpl(const GPUVAddr gpu_vaddr, const SurfaceParams& params) + : gpu_addr{gpu_vaddr}, params{params}, mipmap_sizes{params.num_levels}, + mipmap_offsets{params.num_levels}, layer_size{params.GetGuestLayerSize()}, + memory_size{params.GetGuestSizeInBytes()}, host_memory_size{params.GetHostSizeInBytes()} { + u32 offset = 0; + mipmap_offsets.resize(params.num_levels); + mipmap_sizes.resize(params.num_levels); + gpu_addr_end = gpu_addr + memory_size; + for (u32 i = 0; i < params.num_levels; i++) { + mipmap_offsets[i] = offset; + mipmap_sizes[i] = params.GetGuestMipmapSize(i); + offset += mipmap_sizes[i]; + } +} + +void SurfaceBaseImpl::SwizzleFunc(MortonSwizzleMode mode, u8* memory, const SurfaceParams& params, + u8* buffer, u32 level) { const u32 width{params.GetMipWidth(level)}; const u32 height{params.GetMipHeight(level)}; const u32 block_height{params.GetMipBlockHeight(level)}; const u32 block_depth{params.GetMipBlockDepth(level)}; - std::size_t guest_offset{params.GetGuestMipmapLevelOffset(level)}; - if (params.IsLayered()) { + std::size_t guest_offset{mipmap_offsets[level]}; + if (params.is_layered) { std::size_t host_offset{0}; - const std::size_t guest_stride = params.GetGuestLayerSize(); + const std::size_t guest_stride = layer_size; const std::size_t host_stride = params.GetHostLayerSize(level); - for (u32 layer = 0; layer < params.GetNumLayers(); layer++) { - MortonSwizzle(mode, params.GetPixelFormat(), width, block_height, height, block_depth, - 1, params.GetTileWidthSpacing(), buffer + host_offset, - memory + guest_offset); + for (u32 layer = 0; layer < params.depth; layer++) { + MortonSwizzle(mode, params.pixel_format, width, block_height, height, block_depth, 1, + params.tile_width_spacing, buffer + host_offset, memory + guest_offset); guest_offset += guest_stride; host_offset += host_stride; } } else { - MortonSwizzle(mode, params.GetPixelFormat(), width, block_height, height, block_depth, - params.GetMipDepth(level), params.GetTileWidthSpacing(), buffer, + MortonSwizzle(mode, params.pixel_format, width, block_height, height, block_depth, + params.GetMipDepth(level), params.tile_width_spacing, buffer, memory + guest_offset); } } -} // Anonymous namespace -SurfaceBaseImpl::SurfaceBaseImpl(const SurfaceParams& params) : params{params} { - staging_buffer.resize(params.GetHostSizeInBytes()); -} - -SurfaceBaseImpl::~SurfaceBaseImpl() = default; - -void SurfaceBaseImpl::LoadBuffer() { - if (params.IsTiled()) { - ASSERT_MSG(params.GetBlockWidth() == 1, "Block width is defined as {} on texture target {}", - params.GetBlockWidth(), static_cast(params.GetTarget())); - for (u32 level = 0; level < params.GetNumLevels(); ++level) { +void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager, + std::vector& staging_buffer) { + MICROPROFILE_SCOPE(GPU_Load_Texture); + auto host_ptr = memory_manager.GetPointer(gpu_addr); + if (params.is_tiled) { + ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture target {}", + params.block_width, static_cast(params.target)); + for (u32 level = 0; level < params.num_levels; ++level) { + const u32 host_offset = params.GetHostMipmapLevelOffset(level); SwizzleFunc(MortonSwizzleMode::MortonToLinear, host_ptr, params, - GetStagingBufferLevelData(level), level); + staging_buffer.data() + host_offset, level); } } else { - ASSERT_MSG(params.GetNumLevels() == 1, "Linear mipmap loading is not implemented"); - const u32 bpp{GetFormatBpp(params.GetPixelFormat()) / CHAR_BIT}; + ASSERT_MSG(params.num_levels == 1, "Linear mipmap loading is not implemented"); + const u32 bpp{params.GetBytesPerPixel()}; const u32 block_width{params.GetDefaultBlockWidth()}; const u32 block_height{params.GetDefaultBlockHeight()}; - const u32 width{(params.GetWidth() + block_width - 1) / block_width}; - const u32 height{(params.GetHeight() + block_height - 1) / block_height}; + const u32 width{(params.width + block_width - 1) / block_width}; + const u32 height{(params.height + block_height - 1) / block_height}; const u32 copy_size{width * bpp}; - if (params.GetPitch() == copy_size) { + if (params.pitch == copy_size) { std::memcpy(staging_buffer.data(), host_ptr, params.GetHostSizeInBytes()); } else { const u8* start{host_ptr}; u8* write_to{staging_buffer.data()}; for (u32 h = height; h > 0; --h) { std::memcpy(write_to, start, copy_size); - start += params.GetPitch(); + start += params.pitch; write_to += copy_size; } } } - for (u32 level = 0; level < params.GetNumLevels(); ++level) { - ConvertFromGuestToHost(GetStagingBufferLevelData(level), params.GetPixelFormat(), + for (u32 level = 0; level < params.num_levels; ++level) { + const u32 host_offset = params.GetHostMipmapLevelOffset(level); + ConvertFromGuestToHost(staging_buffer.data() + host_offset, params.pixel_format, params.GetMipWidth(level), params.GetMipHeight(level), params.GetMipDepth(level), true, true); } } -void SurfaceBaseImpl::FlushBuffer() { - if (params.IsTiled()) { - ASSERT_MSG(params.GetBlockWidth() == 1, "Block width is defined as {}", - params.GetBlockWidth()); - for (u32 level = 0; level < params.GetNumLevels(); ++level) { - SwizzleFunc(MortonSwizzleMode::LinearToMorton, GetHostPtr(), params, - GetStagingBufferLevelData(level), level); +void SurfaceBaseImpl::FlushBuffer(std::vector& staging_buffer) { + MICROPROFILE_SCOPE(GPU_Flush_Texture); + if (params.is_tiled) { + ASSERT_MSG(params.block_width == 1, "Block width is defined as {}", params.block_width); + for (u32 level = 0; level < params.num_levels; ++level) { + const u32 host_offset = params.GetHostMipmapLevelOffset(level); + SwizzleFunc(MortonSwizzleMode::LinearToMorton, host_ptr, params, + staging_buffer.data() + host_offset, level); } } else { UNIMPLEMENTED(); /* - ASSERT(params.GetTarget() == SurfaceTarget::Texture2D); - ASSERT(params.GetNumLevels() == 1); + ASSERT(params.target == SurfaceTarget::Texture2D); + ASSERT(params.num_levels == 1); const u32 bpp{params.GetFormatBpp() / 8}; - const u32 copy_size{params.GetWidth() * bpp}; - if (params.GetPitch() == copy_size) { - std::memcpy(host_ptr, staging_buffer.data(), GetSizeInBytes()); + const u32 copy_size{params.width * bpp}; + if (params.pitch == copy_size) { + std::memcpy(host_ptr, staging_buffer.data(), memory_size); } else { u8* start{host_ptr}; const u8* read_to{staging_buffer.data()}; diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index eed8dc59d..5fd7add0a 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -4,166 +4,309 @@ #pragma once +#include #include +#include #include "common/assert.h" #include "common/common_types.h" #include "video_core/gpu.h" +#include "video_core/morton.h" +#include "video_core/texture_cache/copy_params.h" #include "video_core/texture_cache/surface_params.h" #include "video_core/texture_cache/surface_view.h" +template> +ForwardIt binary_find(ForwardIt first, ForwardIt last, const T& value, Compare comp={}) +{ + // Note: BOTH type T and the type after ForwardIt is dereferenced + // must be implicitly convertible to BOTH Type1 and Type2, used in Compare. + // This is stricter than lower_bound requirement (see above) + + first = std::lower_bound(first, last, value, comp); + return first != last && !comp(value, *first) ? first : last; +} + +namespace Tegra { +class MemoryManager; +} + namespace VideoCommon { +using VideoCore::Surface::SurfaceTarget; +using VideoCore::MortonSwizzleMode; + class SurfaceBaseImpl { public: - void LoadBuffer(); + void LoadBuffer(Tegra::MemoryManager& memory_manager, std::vector& staging_buffer); - void FlushBuffer(); + void FlushBuffer(std::vector& staging_buffer); GPUVAddr GetGpuAddr() const { - ASSERT(is_registered); return gpu_addr; } + GPUVAddr GetGpuAddrEnd() const { + return gpu_addr_end; + } + + bool Overlaps(const GPUVAddr start, const GPUVAddr end) const { + return (gpu_addr < end) && (gpu_addr_end > start); + } + + // Use only when recycling a surface + void SetGpuAddr(const GPUVAddr new_addr) { + gpu_addr = new_addr; + gpu_addr_end = new_addr + memory_size; + } + VAddr GetCpuAddr() const { - ASSERT(is_registered); - return cpu_addr; + return gpu_addr; + } + + void SetCpuAddr(const VAddr new_addr) { + cpu_addr = new_addr; } u8* GetHostPtr() const { - ASSERT(is_registered); return host_ptr; } - CacheAddr GetCacheAddr() const { - ASSERT(is_registered); - return cache_addr; + void SetHostPtr(u8* new_addr) { + host_ptr = new_addr; } const SurfaceParams& GetSurfaceParams() const { return params; } - void Register(GPUVAddr gpu_addr_, VAddr cpu_addr_, u8* host_ptr_) { - ASSERT(!is_registered); - is_registered = true; - gpu_addr = gpu_addr_; - cpu_addr = cpu_addr_; - host_ptr = host_ptr_; - cache_addr = ToCacheAddr(host_ptr_); - DecorateSurfaceName(); - } - - void Unregister() { - ASSERT(is_registered); - is_registered = false; - } - - bool IsRegistered() const { - return is_registered; - } - std::size_t GetSizeInBytes() const { - return params.GetGuestSizeInBytes(); + return memory_size; } - u8* GetStagingBufferLevelData(u32 level) { - return staging_buffer.data() + params.GetHostMipmapLevelOffset(level); + std::size_t GetHostSizeInBytes() const { + return host_memory_size; + } + + std::size_t GetMipmapSize(const u32 level) const { + return mipmap_sizes[level]; + } + + bool MatchFormat(VideoCore::Surface::PixelFormat pixel_format) const { + return params.pixel_format == pixel_format; + } + + bool MatchTarget(VideoCore::Surface::SurfaceTarget target) const { + return params.target == target; + } + + bool MatchesTopology(const SurfaceParams& rhs) const { + const u32 src_bpp = params.GetBytesPerPixel(); + const u32 dst_bpp = rhs.GetBytesPerPixel(); + return std::tie(src_bpp, params.is_tiled) == std::tie(dst_bpp, rhs.is_tiled); + } + + bool MatchesStructure(const SurfaceParams& rhs) const { + if (params.is_tiled) { + const u32 a_width1 = params.GetBlockAlignedWidth(); + const u32 a_width2 = rhs.GetBlockAlignedWidth(); + return std::tie(a_width1, params.height, params.depth, params.block_width, + params.block_height, params.block_depth, params.tile_width_spacing) == + std::tie(a_width2, rhs.height, rhs.depth, rhs.block_width, rhs.block_height, + rhs.block_depth, rhs.tile_width_spacing); + } else { + return std::tie(params.width, params.height, params.pitch) == + std::tie(rhs.width, rhs.height, rhs.pitch); + } + } + + std::optional> GetLayerMipmap(const GPUVAddr candidate_gpu_addr) const { + if (candidate_gpu_addr < gpu_addr) + return {}; + const GPUVAddr relative_address = candidate_gpu_addr - gpu_addr; + const u32 layer = relative_address / layer_size; + const GPUVAddr mipmap_address = relative_address - layer_size * layer; + const auto mipmap_it = binary_find(mipmap_offsets.begin(), mipmap_offsets.end(), mipmap_address); + if (mipmap_it != mipmap_offsets.end()) { + return {{layer, std::distance(mipmap_offsets.begin(), mipmap_it)}}; + } + return {}; + } + + std::vector BreakDown() const { + auto set_up_copy = [](CopyParams& cp, const SurfaceParams& params, const u32 depth, + const u32 level) { + cp.source_x = 0; + cp.source_y = 0; + cp.source_z = 0; + cp.dest_x = 0; + cp.dest_y = 0; + cp.dest_z = 0; + cp.source_level = level; + cp.dest_level = level; + cp.width = params.GetMipWidth(level); + cp.height = params.GetMipHeight(level); + cp.depth = depth; + }; + const u32 layers = params.depth; + const u32 mipmaps = params.num_levels; + if (params.is_layered) { + std::vector result{layers * mipmaps}; + for (std::size_t layer = 0; layer < layers; layer++) { + const u32 layer_offset = layer * mipmaps; + for (std::size_t level = 0; level < mipmaps; level++) { + CopyParams& cp = result[layer_offset + level]; + set_up_copy(cp, params, layer, level); + } + } + return result; + } else { + std::vector result{mipmaps}; + for (std::size_t level = 0; level < mipmaps; level++) { + CopyParams& cp = result[level]; + set_up_copy(cp, params, params.GetMipDepth(level), level); + } + return result; + } } protected: - explicit SurfaceBaseImpl(const SurfaceParams& params); - ~SurfaceBaseImpl(); // non-virtual is intended + explicit SurfaceBaseImpl(const GPUVAddr gpu_vaddr, const SurfaceParams& params); + ~SurfaceBaseImpl() = default; virtual void DecorateSurfaceName() = 0; const SurfaceParams params; + GPUVAddr gpu_addr{}; + GPUVAddr gpu_addr_end{}; + std::vector mipmap_sizes; + std::vector mipmap_offsets; + const std::size_t layer_size; + const std::size_t memory_size; + const std::size_t host_memory_size; + u8* host_ptr; + VAddr cpu_addr; private: - GPUVAddr gpu_addr{}; - VAddr cpu_addr{}; - u8* host_ptr{}; - CacheAddr cache_addr{}; - bool is_registered{}; - - std::vector staging_buffer; + void SwizzleFunc(MortonSwizzleMode mode, u8* memory, const SurfaceParams& params, u8* buffer, + u32 level); }; -template +template class SurfaceBase : public SurfaceBaseImpl { public: - virtual void UploadTexture() = 0; + virtual void UploadTexture(std::vector& staging_buffer) = 0; - virtual void DownloadTexture() = 0; + virtual void DownloadTexture(std::vector& staging_buffer) = 0; - TView* TryGetView(GPUVAddr view_addr, const SurfaceParams& view_params) { - if (view_addr < GetGpuAddr() || !params.IsFamiliar(view_params)) { - // It can't be a view if it's in a prior address. - return {}; - } - - const auto relative_offset{static_cast(view_addr - GetGpuAddr())}; - const auto it{view_offset_map.find(relative_offset)}; - if (it == view_offset_map.end()) { - // Couldn't find an aligned view. - return {}; - } - const auto [layer, level] = it->second; - - if (!params.IsViewValid(view_params, layer, level)) { - return {}; - } - - return GetView(layer, view_params.GetNumLayers(), level, view_params.GetNumLevels()); + void MarkAsModified(const bool is_modified_, const u64 tick) { + is_modified = is_modified_ || is_protected; + modification_tick = tick; } - void MarkAsModified(bool is_modified_) { - is_modified = is_modified_; - if (is_modified_) { - modification_tick = texture_cache.Tick(); - } + void MarkAsProtected(const bool is_protected) { + this->is_protected = is_protected; } - TView* GetView(GPUVAddr view_addr, const SurfaceParams& view_params) { - TView* view{TryGetView(view_addr, view_params)}; - ASSERT(view != nullptr); - return view; + void MarkAsPicked(const bool is_picked) { + this->is_picked = is_picked; } bool IsModified() const { return is_modified; } + bool IsProtected() const { + return is_protected; + } + + bool IsRegistered() const { + return is_registered; + } + + bool IsPicked() const { + return is_picked; + } + + void MarkAsRegistered(bool is_reg) { + is_registered = is_reg; + } + u64 GetModificationTick() const { return modification_tick; } + TView EmplaceOverview(const SurfaceParams& overview_params) { + ViewParams vp{}; + vp.base_level = 0; + vp.num_levels = params.num_levels; + vp.target = overview_params.target; + if (params.is_layered && !overview_params.is_layered) { + vp.base_layer = 0; + vp.num_layers = 1; + } else { + vp.base_layer = 0; + vp.num_layers = params.depth; + } + return GetView(vp); + } + + std::optional EmplaceView(const SurfaceParams& view_params, const GPUVAddr view_addr) { + if (view_addr < gpu_addr) + return {}; + if (params.target == SurfaceTarget::Texture3D || view_params.target == SurfaceTarget::Texture3D) { + return {}; + } + const std::size_t size = view_params.GetGuestSizeInBytes(); + const GPUVAddr relative_address = view_addr - gpu_addr; + auto layer_mipmap = GetLayerMipmap(relative_address); + if (!layer_mipmap) { + return {}; + } + const u32 layer = (*layer_mipmap).first; + const u32 mipmap = (*layer_mipmap).second; + if (GetMipmapSize(mipmap) != size) { + // TODO: the view may cover many mimaps, this case can still go on + return {}; + } + ViewParams vp{}; + vp.base_layer = layer; + vp.num_layers = 1; + vp.base_level = mipmap; + vp.num_levels = 1; + vp.target = params.target; + return {GetView(vp)}; + } + + TView GetMainView() const { + return main_view; + } + protected: - explicit SurfaceBase(TTextureCache& texture_cache, const SurfaceParams& params) - : SurfaceBaseImpl{params}, texture_cache{texture_cache}, - view_offset_map{params.CreateViewOffsetMap()} {} + explicit SurfaceBase(const GPUVAddr gpu_addr, const SurfaceParams& params) + : SurfaceBaseImpl(gpu_addr, params) {} ~SurfaceBase() = default; - virtual std::unique_ptr CreateView(const ViewKey& view_key) = 0; + virtual TView CreateView(const ViewParams& view_key) = 0; + + std::unordered_map views; + TView main_view; private: - TView* GetView(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels) { - const ViewKey key{base_layer, num_layers, base_level, num_levels}; + TView GetView(const ViewParams& key) { const auto [entry, is_cache_miss] = views.try_emplace(key); auto& view{entry->second}; if (is_cache_miss) { view = CreateView(key); } - return view.get(); + return view; } - TTextureCache& texture_cache; - const std::map> view_offset_map; - - std::unordered_map> views; - bool is_modified{}; + bool is_protected{}; + bool is_registered{}; + bool is_picked{}; u64 modification_tick{}; }; diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index d1f8c53d5..d9052152c 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp @@ -7,6 +7,7 @@ #include "common/cityhash.h" #include "common/alignment.h" #include "core/core.h" +#include "video_core/engines/shader_bytecode.h" #include "video_core/surface.h" #include "video_core/texture_cache/surface_params.h" #include "video_core/textures/decoders.h" @@ -22,6 +23,37 @@ using VideoCore::Surface::PixelFormatFromTextureFormat; using VideoCore::Surface::SurfaceTarget; using VideoCore::Surface::SurfaceTargetFromTextureType; +SurfaceTarget TextureType2SurfaceTarget(Tegra::Shader::TextureType type, bool is_array) { + switch (type) { + case Tegra::Shader::TextureType::Texture1D: { + if (is_array) + return SurfaceTarget::Texture1DArray; + else + return SurfaceTarget::Texture1D; + } + case Tegra::Shader::TextureType::Texture2D: { + if (is_array) + return SurfaceTarget::Texture2DArray; + else + return SurfaceTarget::Texture2D; + } + case Tegra::Shader::TextureType::Texture3D: { + ASSERT(!is_array); + return SurfaceTarget::Texture3D; + } + case Tegra::Shader::TextureType::TextureCube: { + if (is_array) + return SurfaceTarget::TextureCubeArray; + else + return SurfaceTarget::TextureCubemap; + } + default: { + UNREACHABLE(); + return SurfaceTarget::Texture2D; + } + } +} + namespace { constexpr u32 GetMipmapSize(bool uncompressed, u32 mip_size, u32 tile) { return uncompressed ? mip_size : std::max(1U, (mip_size + tile - 1) / tile); @@ -29,7 +61,8 @@ constexpr u32 GetMipmapSize(bool uncompressed, u32 mip_size, u32 tile) { } // Anonymous namespace SurfaceParams SurfaceParams::CreateForTexture(Core::System& system, - const Tegra::Texture::FullTextureInfo& config) { + const Tegra::Texture::FullTextureInfo& config, + const VideoCommon::Shader::Sampler& entry) { SurfaceParams params; params.is_tiled = config.tic.IsTiled(); params.srgb_conversion = config.tic.IsSrgbConversionEnabled(); @@ -41,7 +74,8 @@ SurfaceParams SurfaceParams::CreateForTexture(Core::System& system, params.srgb_conversion); params.component_type = ComponentTypeFromTexture(config.tic.r_type.Value()); params.type = GetFormatType(params.pixel_format); - params.target = SurfaceTargetFromTextureType(config.tic.texture_type); + // TODO: on 1DBuffer we should use the tic info. + params.target = TextureType2SurfaceTarget(entry.GetType(), entry.IsArray()); params.width = Common::AlignUp(config.tic.Width(), GetCompressionFactor(params.pixel_format)); params.height = Common::AlignUp(config.tic.Height(), GetCompressionFactor(params.pixel_format)); params.depth = config.tic.Depth(); @@ -52,8 +86,7 @@ SurfaceParams SurfaceParams::CreateForTexture(Core::System& system, params.pitch = params.is_tiled ? 0 : config.tic.Pitch(); params.unaligned_height = config.tic.Height(); params.num_levels = config.tic.max_mip_level + 1; - - params.CalculateCachedValues(); + params.is_layered = params.IsLayered(); return params; } @@ -77,8 +110,7 @@ SurfaceParams SurfaceParams::CreateForDepthBuffer( params.target = SurfaceTarget::Texture2D; params.depth = 1; params.num_levels = 1; - - params.CalculateCachedValues(); + params.is_layered = false; return params; } @@ -108,8 +140,7 @@ SurfaceParams SurfaceParams::CreateForFramebuffer(Core::System& system, std::siz params.unaligned_height = config.height; params.target = SurfaceTarget::Texture2D; params.num_levels = 1; - - params.CalculateCachedValues(); + params.is_layered = false; return params; } @@ -128,13 +159,13 @@ SurfaceParams SurfaceParams::CreateForFermiCopySurface( params.type = GetFormatType(params.pixel_format); params.width = config.width; params.height = config.height; + params.pitch = config.pitch; params.unaligned_height = config.height; // TODO(Rodrigo): Try to guess the surface target from depth and layer parameters params.target = SurfaceTarget::Texture2D; params.depth = 1; params.num_levels = 1; - - params.CalculateCachedValues(); + params.is_layered = params.IsLayered(); return params; } @@ -147,7 +178,7 @@ u32 SurfaceParams::GetMipHeight(u32 level) const { } u32 SurfaceParams::GetMipDepth(u32 level) const { - return IsLayered() ? depth : std::max(1U, depth >> level); + return is_layered ? depth : std::max(1U, depth >> level); } bool SurfaceParams::IsLayered() const { @@ -183,7 +214,7 @@ u32 SurfaceParams::GetMipBlockDepth(u32 level) const { if (level == 0) { return this->block_depth; } - if (IsLayered()) { + if (is_layered) { return 1; } @@ -216,6 +247,10 @@ std::size_t SurfaceParams::GetHostMipmapLevelOffset(u32 level) const { return offset; } +std::size_t SurfaceParams::GetGuestMipmapSize(u32 level) const { + return GetInnerMipmapMemorySize(level, false, false); +} + std::size_t SurfaceParams::GetHostMipmapSize(u32 level) const { return GetInnerMipmapMemorySize(level, true, false) * GetNumLayers(); } @@ -229,7 +264,7 @@ std::size_t SurfaceParams::GetLayerSize(bool as_host_size, bool uncompressed) co for (u32 level = 0; level < num_levels; ++level) { size += GetInnerMipmapMemorySize(level, as_host_size, uncompressed); } - if (is_tiled && (IsLayered() || target == SurfaceTarget::Texture3D)) { + if (is_tiled && is_layered) { return Common::AlignUp(size, Tegra::Texture::GetGOBSize() * block_height * block_depth); } return size; @@ -256,150 +291,32 @@ u32 SurfaceParams::GetBytesPerPixel() const { return VideoCore::Surface::GetBytesPerPixel(pixel_format); } -bool SurfaceParams::IsFamiliar(const SurfaceParams& view_params) const { - if (std::tie(is_tiled, tile_width_spacing, pixel_format, component_type, type) != - std::tie(view_params.is_tiled, view_params.tile_width_spacing, view_params.pixel_format, - view_params.component_type, view_params.type)) { - return false; - } - - const SurfaceTarget view_target{view_params.target}; - if (view_target == target) { - return true; - } - - switch (target) { - case SurfaceTarget::Texture1D: - case SurfaceTarget::Texture2D: - case SurfaceTarget::Texture3D: - return false; - case SurfaceTarget::Texture1DArray: - return view_target == SurfaceTarget::Texture1D; - case SurfaceTarget::Texture2DArray: - return view_target == SurfaceTarget::Texture2D; - case SurfaceTarget::TextureCubemap: - return view_target == SurfaceTarget::Texture2D || - view_target == SurfaceTarget::Texture2DArray; - case SurfaceTarget::TextureCubeArray: - return view_target == SurfaceTarget::Texture2D || - view_target == SurfaceTarget::Texture2DArray || - view_target == SurfaceTarget::TextureCubemap; - default: - UNIMPLEMENTED_MSG("Unimplemented texture family={}", static_cast(target)); - return false; - } -} - bool SurfaceParams::IsPixelFormatZeta() const { return pixel_format >= VideoCore::Surface::PixelFormat::MaxColorFormat && pixel_format < VideoCore::Surface::PixelFormat::MaxDepthStencilFormat; } -void SurfaceParams::CalculateCachedValues() { - switch (target) { - case SurfaceTarget::Texture1D: - case SurfaceTarget::Texture2D: - case SurfaceTarget::Texture3D: - num_layers = 1; - break; - case SurfaceTarget::Texture1DArray: - case SurfaceTarget::Texture2DArray: - case SurfaceTarget::TextureCubemap: - case SurfaceTarget::TextureCubeArray: - num_layers = depth; - break; - default: - UNREACHABLE(); - } - - guest_size_in_bytes = GetInnerMemorySize(false, false, false); - - if (IsPixelFormatASTC(pixel_format)) { - // ASTC is uncompressed in software, in emulated as RGBA8 - host_size_in_bytes = static_cast(width) * static_cast(height) * - static_cast(depth) * 4ULL; - } else { - host_size_in_bytes = GetInnerMemorySize(true, false, false); - } -} - std::size_t SurfaceParams::GetInnerMipmapMemorySize(u32 level, bool as_host_size, bool uncompressed) const { const bool tiled{as_host_size ? false : is_tiled}; const u32 width{GetMipmapSize(uncompressed, GetMipWidth(level), GetDefaultBlockWidth())}; const u32 height{GetMipmapSize(uncompressed, GetMipHeight(level), GetDefaultBlockHeight())}; - const u32 depth{target == SurfaceTarget::Texture3D ? GetMipDepth(level) : 1U}; + const u32 depth{is_layered ? 1U : GetMipDepth(level)}; return Tegra::Texture::CalculateSize(tiled, GetBytesPerPixel(), width, height, depth, GetMipBlockHeight(level), GetMipBlockDepth(level)); } std::size_t SurfaceParams::GetInnerMemorySize(bool as_host_size, bool layer_only, bool uncompressed) const { - return GetLayerSize(as_host_size, uncompressed) * (layer_only ? 1U : num_layers); + return GetLayerSize(as_host_size, uncompressed) * (layer_only ? 1U : depth); } -std::map> SurfaceParams::CreateViewOffsetMap() const { - std::map> view_offset_map; - switch (target) { - case SurfaceTarget::Texture1D: - case SurfaceTarget::Texture2D: - case SurfaceTarget::Texture3D: { - // TODO(Rodrigo): Add layer iterations for 3D textures - constexpr u32 layer = 0; - for (u32 level = 0; level < num_levels; ++level) { - const std::size_t offset{GetGuestMipmapLevelOffset(level)}; - view_offset_map.insert({offset, {layer, level}}); - } - break; - } - case SurfaceTarget::Texture1DArray: - case SurfaceTarget::Texture2DArray: - case SurfaceTarget::TextureCubemap: - case SurfaceTarget::TextureCubeArray: { - const std::size_t layer_size{GetGuestLayerSize()}; - for (u32 level = 0; level < num_levels; ++level) { - const std::size_t level_offset{GetGuestMipmapLevelOffset(level)}; - for (u32 layer = 0; layer < num_layers; ++layer) { - const auto layer_offset{static_cast(layer_size * layer)}; - const std::size_t offset{level_offset + layer_offset}; - view_offset_map.insert({offset, {layer, level}}); - } - } - break; - } - default: - UNIMPLEMENTED_MSG("Unimplemented surface target {}", static_cast(target)); - } - return view_offset_map; -} - -bool SurfaceParams::IsViewValid(const SurfaceParams& view_params, u32 layer, u32 level) const { - return IsDimensionValid(view_params, level) && IsDepthValid(view_params, level) && - IsInBounds(view_params, layer, level); -} - -bool SurfaceParams::IsDimensionValid(const SurfaceParams& view_params, u32 level) const { - return view_params.width == GetMipWidth(level) && view_params.height == GetMipHeight(level); -} - -bool SurfaceParams::IsDepthValid(const SurfaceParams& view_params, u32 level) const { - if (view_params.target != SurfaceTarget::Texture3D) { - return true; - } - return view_params.depth == GetMipDepth(level); -} - -bool SurfaceParams::IsInBounds(const SurfaceParams& view_params, u32 layer, u32 level) const { - return layer + view_params.num_layers <= num_layers && - level + view_params.num_levels <= num_levels; -} - -std::size_t HasheableSurfaceParams::Hash() const { +std::size_t SurfaceParams::Hash() const { return static_cast( Common::CityHash64(reinterpret_cast(this), sizeof(*this))); } -bool HasheableSurfaceParams::operator==(const HasheableSurfaceParams& rhs) const { +bool SurfaceParams::operator==(const SurfaceParams& rhs) const { return std::tie(is_tiled, block_width, block_height, block_depth, tile_width_spacing, width, height, depth, pitch, unaligned_height, num_levels, pixel_format, component_type, type, target) == @@ -409,4 +326,27 @@ bool HasheableSurfaceParams::operator==(const HasheableSurfaceParams& rhs) const rhs.type, rhs.target); } +std::string SurfaceParams::TargetName() const { + switch (target) { + case SurfaceTarget::Texture1D: + return "1D"; + case SurfaceTarget::Texture2D: + return "2D"; + case SurfaceTarget::Texture3D: + return "3D"; + case SurfaceTarget::Texture1DArray: + return "1DArray"; + case SurfaceTarget::Texture2DArray: + return "2DArray"; + case SurfaceTarget::TextureCubemap: + return "Cube"; + case SurfaceTarget::TextureCubeArray: + return "CubeArray"; + default: + LOG_CRITICAL(HW_GPU, "Unimplemented surface_target={}", static_cast(target)); + UNREACHABLE(); + return fmt::format("TUK({})", static_cast(target)); + } +} + } // namespace VideoCommon diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h index 77dc0ba66..ec8efa210 100644 --- a/src/video_core/texture_cache/surface_params.h +++ b/src/video_core/texture_cache/surface_params.h @@ -6,50 +6,21 @@ #include +#include "common/alignment.h" #include "common/common_types.h" #include "video_core/engines/fermi_2d.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/surface.h" +#include "video_core/shader/shader_ir.h" namespace VideoCommon { -class HasheableSurfaceParams { -public: - std::size_t Hash() const; - - bool operator==(const HasheableSurfaceParams& rhs) const; - - bool operator!=(const HasheableSurfaceParams& rhs) const { - return !operator==(rhs); - } - -protected: - // Avoid creation outside of a managed environment. - HasheableSurfaceParams() = default; - - bool is_tiled; - bool srgb_conversion; - u32 block_width; - u32 block_height; - u32 block_depth; - u32 tile_width_spacing; - u32 width; - u32 height; - u32 depth; - u32 pitch; - u32 unaligned_height; - u32 num_levels; - VideoCore::Surface::PixelFormat pixel_format; - VideoCore::Surface::ComponentType component_type; - VideoCore::Surface::SurfaceType type; - VideoCore::Surface::SurfaceTarget target; -}; - -class SurfaceParams final : public HasheableSurfaceParams { +class SurfaceParams { public: /// Creates SurfaceCachedParams from a texture configuration. static SurfaceParams CreateForTexture(Core::System& system, - const Tegra::Texture::FullTextureInfo& config); + const Tegra::Texture::FullTextureInfo& config, + const VideoCommon::Shader::Sampler& entry); /// Creates SurfaceCachedParams for a depth buffer configuration. static SurfaceParams CreateForDepthBuffer( @@ -64,68 +35,33 @@ public: static SurfaceParams CreateForFermiCopySurface( const Tegra::Engines::Fermi2D::Regs::Surface& config); - bool IsTiled() const { - return is_tiled; - } + std::size_t Hash() const; - bool GetSrgbConversion() const { - return srgb_conversion; - } + bool operator==(const SurfaceParams& rhs) const; - u32 GetBlockWidth() const { - return block_width; - } - - u32 GetTileWidthSpacing() const { - return tile_width_spacing; - } - - u32 GetWidth() const { - return width; - } - - u32 GetHeight() const { - return height; - } - - u32 GetDepth() const { - return depth; - } - - u32 GetPitch() const { - return pitch; - } - - u32 GetNumLevels() const { - return num_levels; - } - - VideoCore::Surface::PixelFormat GetPixelFormat() const { - return pixel_format; - } - - VideoCore::Surface::ComponentType GetComponentType() const { - return component_type; - } - - VideoCore::Surface::SurfaceTarget GetTarget() const { - return target; - } - - VideoCore::Surface::SurfaceType GetType() const { - return type; + bool operator!=(const SurfaceParams& rhs) const { + return !operator==(rhs); } std::size_t GetGuestSizeInBytes() const { - return guest_size_in_bytes; + return GetInnerMemorySize(false, false, false); } std::size_t GetHostSizeInBytes() const { + std::size_t host_size_in_bytes; + if (IsPixelFormatASTC(pixel_format)) { + // ASTC is uncompressed in software, in emulated as RGBA8 + host_size_in_bytes = static_cast(Common::AlignUp(width, GetDefaultBlockWidth())) * + static_cast(Common::AlignUp(height, GetDefaultBlockHeight())) * + static_cast(depth) * 4ULL; + } else { + host_size_in_bytes = GetInnerMemorySize(true, false, false); + } return host_size_in_bytes; } - u32 GetNumLayers() const { - return num_layers; + u32 GetBlockAlignedWidth() const { + return Common::AlignUp(width, 64 / GetBytesPerPixel()); } /// Returns the width of a given mipmap level. @@ -137,9 +73,6 @@ public: /// Returns the depth of a given mipmap level. u32 GetMipDepth(u32 level) const; - /// Returns true if these parameters are from a layered surface. - bool IsLayered() const; - /// Returns the block height of a given mipmap level. u32 GetMipBlockHeight(u32 level) const; @@ -152,6 +85,9 @@ public: /// Returns the offset in bytes in host memory (linear) of a given mipmap level. std::size_t GetHostMipmapLevelOffset(u32 level) const; + /// Returns the size in bytes in guest memory of a given mipmap level. + std::size_t GetGuestMipmapSize(u32 level) const; + /// Returns the size in bytes in host memory (linear) of a given mipmap level. std::size_t GetHostMipmapSize(u32 level) const; @@ -173,24 +109,30 @@ public: /// Returns the bytes per pixel. u32 GetBytesPerPixel() const; - /// Returns true if another surface can be familiar with this. This is a loosely defined term - /// that reflects the possibility of these two surface parameters potentially being part of a - /// bigger superset. - bool IsFamiliar(const SurfaceParams& view_params) const; - /// Returns true if the pixel format is a depth and/or stencil format. bool IsPixelFormatZeta() const; - /// Creates a map that redirects an address difference to a layer and mipmap level. - std::map> CreateViewOffsetMap() const; + std::string TargetName() const; - /// Returns true if the passed surface view parameters is equal or a valid subset of this. - bool IsViewValid(const SurfaceParams& view_params, u32 layer, u32 level) const; + bool is_tiled; + bool srgb_conversion; + bool is_layered; + u32 block_width; + u32 block_height; + u32 block_depth; + u32 tile_width_spacing; + u32 width; + u32 height; + u32 depth; + u32 pitch; + u32 unaligned_height; + u32 num_levels; + VideoCore::Surface::PixelFormat pixel_format; + VideoCore::Surface::ComponentType component_type; + VideoCore::Surface::SurfaceType type; + VideoCore::Surface::SurfaceTarget target; private: - /// Calculates values that can be deduced from HasheableSurfaceParams. - void CalculateCachedValues(); - /// Returns the size of a given mipmap level inside a layer. std::size_t GetInnerMipmapMemorySize(u32 level, bool as_host_size, bool uncompressed) const; @@ -200,19 +142,12 @@ private: /// Returns the size of a layer std::size_t GetLayerSize(bool as_host_size, bool uncompressed) const; - /// Returns true if the passed view width and height match the size of this params in a given - /// mipmap level. - bool IsDimensionValid(const SurfaceParams& view_params, u32 level) const; + std::size_t GetNumLayers() const { + return is_layered ? depth : 1; + } - /// Returns true if the passed view depth match the size of this params in a given mipmap level. - bool IsDepthValid(const SurfaceParams& view_params, u32 level) const; - - /// Returns true if the passed view layers and mipmap levels are in bounds. - bool IsInBounds(const SurfaceParams& view_params, u32 layer, u32 level) const; - - std::size_t guest_size_in_bytes; - std::size_t host_size_in_bytes; - u32 num_layers; + /// Returns true if these parameters are from a layered surface. + bool IsLayered() const; }; } // namespace VideoCommon diff --git a/src/video_core/texture_cache/surface_view.cpp b/src/video_core/texture_cache/surface_view.cpp index 5f4cdbb1c..467696a4c 100644 --- a/src/video_core/texture_cache/surface_view.cpp +++ b/src/video_core/texture_cache/surface_view.cpp @@ -9,15 +9,15 @@ namespace VideoCommon { -std::size_t ViewKey::Hash() const { +std::size_t ViewParams::Hash() const { return static_cast(base_layer) ^ static_cast(num_layers << 16) ^ - (static_cast(base_level) << 32) ^ - (static_cast(num_levels) << 48); + (static_cast(base_level) << 24) ^ + (static_cast(num_levels) << 32) ^ (static_cast(target) << 36); } -bool ViewKey::operator==(const ViewKey& rhs) const { - return std::tie(base_layer, num_layers, base_level, num_levels) == - std::tie(rhs.base_layer, rhs.num_layers, rhs.base_level, rhs.num_levels); +bool ViewParams::operator==(const ViewParams& rhs) const { + return std::tie(base_layer, num_layers, base_level, num_levels, target) == + std::tie(rhs.base_layer, rhs.num_layers, rhs.base_level, rhs.num_levels, rhs.target); } } // namespace VideoCommon diff --git a/src/video_core/texture_cache/surface_view.h b/src/video_core/texture_cache/surface_view.h index e73d8f6ae..c122800a6 100644 --- a/src/video_core/texture_cache/surface_view.h +++ b/src/video_core/texture_cache/surface_view.h @@ -7,18 +7,45 @@ #include #include "common/common_types.h" +#include "video_core/surface.h" +#include "video_core/texture_cache/surface_params.h" namespace VideoCommon { -struct ViewKey { +struct ViewParams { std::size_t Hash() const; - bool operator==(const ViewKey& rhs) const; + bool operator==(const ViewParams& rhs) const; u32 base_layer{}; u32 num_layers{}; u32 base_level{}; u32 num_levels{}; + VideoCore::Surface::SurfaceTarget target; + bool IsLayered() const { + switch (target) { + case VideoCore::Surface::SurfaceTarget::Texture1DArray: + case VideoCore::Surface::SurfaceTarget::Texture2DArray: + case VideoCore::Surface::SurfaceTarget::TextureCubemap: + case VideoCore::Surface::SurfaceTarget::TextureCubeArray: + return true; + default: + return false; + } + } +}; + +class ViewBase { +public: + ViewBase(const ViewParams& params) : params{params} {} + ~ViewBase() = default; + + const ViewParams& GetViewParams() const { + return params; + } + +protected: + ViewParams params; }; } // namespace VideoCommon @@ -26,8 +53,8 @@ struct ViewKey { namespace std { template <> -struct hash { - std::size_t operator()(const VideoCommon::ViewKey& k) const noexcept { +struct hash { + std::size_t operator()(const VideoCommon::ViewParams& k) const noexcept { return k.Hash(); } }; From bc930754cc9437ddd86e7d246b3eb4302540896a Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 7 May 2019 10:57:16 -0400 Subject: [PATCH 014/113] Implement Texture Cache V2 --- .../renderer_opengl/gl_rasterizer.cpp | 69 ++- .../renderer_opengl/gl_texture_cache.cpp | 286 ++++------- .../renderer_opengl/gl_texture_cache.h | 117 ++--- src/video_core/renderer_opengl/utils.cpp | 23 +- src/video_core/renderer_opengl/utils.h | 6 +- src/video_core/texture_cache/texture_cache.h | 462 ++++++++++++++---- 6 files changed, 575 insertions(+), 388 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 027e9d293..482d0428c 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -83,10 +83,10 @@ struct FramebufferCacheKey { bool stencil_enable = false; std::array color_attachments{}; - std::array colors{}; + std::array colors{}; u32 colors_count = 0; - CachedSurfaceView* zeta = nullptr; + View zeta = nullptr; auto Tie() const { return std::tie(is_single_buffer, stencil_enable, color_attachments, colors, colors_count, @@ -115,6 +115,10 @@ RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWind RasterizerOpenGL::~RasterizerOpenGL() {} +void RasterizerOpenGL::InitMemoryMananger(Tegra::MemoryManager& memory_manager) { + texture_cache.InitMemoryMananger(memory_manager); +} + void RasterizerOpenGL::CheckExtensions() { if (!GLAD_GL_ARB_texture_filter_anisotropic && !GLAD_GL_EXT_texture_filter_anisotropic) { LOG_WARNING( @@ -474,9 +478,11 @@ std::pair RasterizerOpenGL::ConfigureFramebuffers( } current_framebuffer_config_state = fb_config_state; - CachedSurfaceView* depth_surface{}; + View depth_surface{}; if (using_depth_fb) { depth_surface = texture_cache.GetDepthBufferSurface(preserve_contents); + } else { + texture_cache.SetEmptyDepthBuffer(); } UNIMPLEMENTED_IF(regs.rt_separate_frag_data == 0); @@ -489,38 +495,41 @@ std::pair RasterizerOpenGL::ConfigureFramebuffers( if (using_color_fb) { if (single_color_target) { // Used when just a single color attachment is enabled, e.g. for clearing a color buffer - CachedSurfaceView* color_surface{ + View color_surface{ texture_cache.GetColorBufferSurface(*single_color_target, preserve_contents)}; if (color_surface) { // Assume that a surface will be written to if it is used as a framebuffer, even if // the shader doesn't actually write to it. - color_surface->MarkAsModified(true); + texture_cache.MarkColorBufferInUse(*single_color_target); // Workaround for and issue in nvidia drivers // https://devtalk.nvidia.com/default/topic/776591/opengl/gl_framebuffer_srgb-functions-incorrectly/ - state.framebuffer_srgb.enabled |= - color_surface->GetSurfaceParams().GetSrgbConversion(); + state.framebuffer_srgb.enabled |= color_surface->GetSurfaceParams().srgb_conversion; } fbkey.is_single_buffer = true; fbkey.color_attachments[0] = GL_COLOR_ATTACHMENT0 + static_cast(*single_color_target); fbkey.colors[0] = color_surface; + for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) { + if (index != *single_color_target) { + texture_cache.SetEmptyColorBuffer(index); + } + } } else { // Multiple color attachments are enabled for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) { - CachedSurfaceView* color_surface{ - texture_cache.GetColorBufferSurface(index, preserve_contents)}; + View color_surface{texture_cache.GetColorBufferSurface(index, preserve_contents)}; if (color_surface) { // Assume that a surface will be written to if it is used as a framebuffer, even // if the shader doesn't actually write to it. - color_surface->MarkAsModified(true); + texture_cache.MarkColorBufferInUse(index); // Enable sRGB only for supported formats // Workaround for and issue in nvidia drivers // https://devtalk.nvidia.com/default/topic/776591/opengl/gl_framebuffer_srgb-functions-incorrectly/ state.framebuffer_srgb.enabled |= - color_surface->GetSurfaceParams().GetSrgbConversion(); + color_surface->GetSurfaceParams().srgb_conversion; } fbkey.color_attachments[index] = @@ -538,11 +547,11 @@ std::pair RasterizerOpenGL::ConfigureFramebuffers( if (depth_surface) { // Assume that a surface will be written to if it is used as a framebuffer, even if // the shader doesn't actually write to it. - depth_surface->MarkAsModified(true); + texture_cache.MarkDepthBufferInUse(); fbkey.zeta = depth_surface; - fbkey.stencil_enable = regs.stencil_enable && depth_surface->GetSurfaceParams().GetType() == - SurfaceType::DepthStencil; + fbkey.stencil_enable = regs.stencil_enable && + depth_surface->GetSurfaceParams().type == SurfaceType::DepthStencil; } SetupCachedFramebuffer(fbkey, current_state); @@ -728,11 +737,27 @@ void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) { buffer_cache.InvalidateRegion(addr, size); } +void RasterizerOpenGL::InvalidateRegionEx(GPUVAddr gpu_addr, CacheAddr addr, u64 size) { + MICROPROFILE_SCOPE(OpenGL_CacheManagement); + if (!addr || !size) { + return; + } + texture_cache.InvalidateRegionEx(gpu_addr, size); + shader_cache.InvalidateRegion(addr, size); + global_cache.InvalidateRegion(addr, size); + buffer_cache.InvalidateRegion(addr, size); +} + void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { FlushRegion(addr, size); InvalidateRegion(addr, size); } +void RasterizerOpenGL::FlushAndInvalidateRegionEx(GPUVAddr gpu_addr, CacheAddr addr, u64 size) { + FlushRegion(addr, size); + InvalidateRegionEx(gpu_addr, addr, size); +} + bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, const Tegra::Engines::Fermi2D::Regs::Surface& dst, const Common::Rectangle& src_rect, @@ -740,7 +765,7 @@ bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs MICROPROFILE_SCOPE(OpenGL_Blits); const auto src_surface{texture_cache.GetFermiSurface(src)}; const auto dst_surface{texture_cache.GetFermiSurface(dst)}; - blitter.Blit(src_surface, dst_surface, src_rect, dst_rect); + // blitter.Blit(src_surface, dst_surface, src_rect, dst_rect); return true; } @@ -762,10 +787,10 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, const auto& params{surface->GetSurfaceParams()}; const auto& pixel_format{ VideoCore::Surface::PixelFormatFromGPUPixelFormat(config.pixel_format)}; - ASSERT_MSG(params.GetWidth() == config.width, "Framebuffer width is different"); - ASSERT_MSG(params.GetHeight() == config.height, "Framebuffer height is different"); + ASSERT_MSG(params.width == config.width, "Framebuffer width is different"); + ASSERT_MSG(params.height == config.height, "Framebuffer height is different"); - if (params.GetPixelFormat() != pixel_format) { + if (params.pixel_format != pixel_format) { LOG_WARNING(Render_OpenGL, "Framebuffer pixel_format is different"); } @@ -860,10 +885,10 @@ void RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& s state.texture_units[current_bindpoint].sampler = sampler_cache.GetSampler(texture.tsc); - if (const auto surface{texture_cache.GetTextureSurface(texture)}; surface) { - state.texture_units[current_bindpoint].texture = surface->GetTexture( - entry.GetType(), entry.IsArray(), texture.tic.x_source, texture.tic.y_source, - texture.tic.z_source, texture.tic.w_source); + if (const auto view{texture_cache.GetTextureSurface(texture, entry)}; view) { + view->ApplySwizzle(texture.tic.x_source, texture.tic.y_source, texture.tic.z_source, + texture.tic.w_source); + state.texture_units[current_bindpoint].texture = view->GetTexture(); } else { // Can occur when texture addr is null or its memory is unmapped/invalid state.texture_units[current_bindpoint].texture = 0; diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index f7c2f46aa..871608f6d 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -4,7 +4,9 @@ #include "common/assert.h" #include "common/common_types.h" +#include "common/microprofile.h" #include "common/scope_exit.h" +#include "core/core.h" #include "video_core/morton.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_texture_cache.h" @@ -22,6 +24,9 @@ using VideoCore::Surface::ComponentType; using VideoCore::Surface::PixelFormat; using VideoCore::Surface::SurfaceTarget; +MICROPROFILE_DEFINE(OpenGL_Texture_Upload, "OpenGL", "Texture Upload", MP_RGB(128, 192, 128)); +MICROPROFILE_DEFINE(OpenGL_Texture_Download, "OpenGL", "Texture Download", MP_RGB(128, 192, 128)); + namespace { struct FormatTuple { @@ -129,8 +134,8 @@ const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType compon return format; } -GLenum GetTextureTarget(const SurfaceParams& params) { - switch (params.GetTarget()) { +GLenum GetTextureTarget(const SurfaceTarget& target) { + switch (target) { case SurfaceTarget::Texture1D: return GL_TEXTURE_1D; case SurfaceTarget::Texture2D: @@ -175,8 +180,8 @@ void ApplyTextureDefaults(const SurfaceParams& params, GLuint texture) { glTextureParameteri(texture, GL_TEXTURE_MAG_FILTER, GL_LINEAR); glTextureParameteri(texture, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); glTextureParameteri(texture, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - glTextureParameteri(texture, GL_TEXTURE_MAX_LEVEL, params.GetNumLevels() - 1); - if (params.GetNumLevels() == 1) { + glTextureParameteri(texture, GL_TEXTURE_MAX_LEVEL, params.num_levels - 1); + if (params.num_levels == 1) { glTextureParameterf(texture, GL_TEXTURE_LOD_BIAS, 1000.0f); } } @@ -185,21 +190,20 @@ OGLTexture CreateTexture(const SurfaceParams& params, GLenum target, GLenum inte OGLTexture texture; texture.Create(target); - switch (params.GetTarget()) { + switch (params.target) { case SurfaceTarget::Texture1D: - glTextureStorage1D(texture.handle, params.GetNumLevels(), internal_format, - params.GetWidth()); + glTextureStorage1D(texture.handle, params.num_levels, internal_format, params.width); break; case SurfaceTarget::Texture2D: case SurfaceTarget::TextureCubemap: - glTextureStorage2D(texture.handle, params.GetNumLevels(), internal_format, - params.GetWidth(), params.GetHeight()); + glTextureStorage2D(texture.handle, params.num_levels, internal_format, params.width, + params.height); break; case SurfaceTarget::Texture3D: case SurfaceTarget::Texture2DArray: case SurfaceTarget::TextureCubeArray: - glTextureStorage3D(texture.handle, params.GetNumLevels(), internal_format, - params.GetWidth(), params.GetHeight(), params.GetDepth()); + glTextureStorage3D(texture.handle, params.num_levels, internal_format, params.width, + params.height, params.depth); break; default: UNREACHABLE(); @@ -212,54 +216,72 @@ OGLTexture CreateTexture(const SurfaceParams& params, GLenum target, GLenum inte } // Anonymous namespace -CachedSurface::CachedSurface(TextureCacheOpenGL& texture_cache, const SurfaceParams& params) - : VideoCommon::SurfaceBase{texture_cache, params} { - const auto& tuple{GetFormatTuple(params.GetPixelFormat(), params.GetComponentType())}; +CachedSurface::CachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) + : VideoCommon::SurfaceBase(gpu_addr, params) { + const auto& tuple{GetFormatTuple(params.pixel_format, params.component_type)}; internal_format = tuple.internal_format; format = tuple.format; type = tuple.type; is_compressed = tuple.compressed; - target = GetTextureTarget(params); + target = GetTextureTarget(params.target); texture = CreateTexture(params, target, internal_format); + DecorateSurfaceName(); + ViewParams main{}; + main.num_levels = params.num_levels; + main.base_level = 0; + main.base_layer = 0; + main.num_layers = params.is_layered ? params.depth : 1; + main.target = params.target; + main_view = CreateView(main); + main_view->DecorateViewName(gpu_addr, params.TargetName() + "V:" + std::to_string(view_count++)); } -CachedSurface::~CachedSurface() = default; +CachedSurface::~CachedSurface() { + views.clear(); + main_view = nullptr; +} + +void CachedSurface::DownloadTexture(std::vector& staging_buffer) { + LOG_CRITICAL(Render_OpenGL, "Flushing"); + MICROPROFILE_SCOPE(OpenGL_Texture_Download); -void CachedSurface::DownloadTexture() { // TODO(Rodrigo): Optimize alignment glPixelStorei(GL_PACK_ALIGNMENT, 1); SCOPE_EXIT({ glPixelStorei(GL_PACK_ROW_LENGTH, 0); }); - for (u32 level = 0; level < params.GetNumLevels(); ++level) { + for (u32 level = 0; level < params.num_levels; ++level) { glPixelStorei(GL_PACK_ROW_LENGTH, static_cast(params.GetMipWidth(level))); + const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level); if (is_compressed) { glGetCompressedTextureImage(texture.handle, level, static_cast(params.GetHostMipmapSize(level)), - GetStagingBufferLevelData(level)); + staging_buffer.data() + mip_offset); } else { glGetTextureImage(texture.handle, level, format, type, static_cast(params.GetHostMipmapSize(level)), - GetStagingBufferLevelData(level)); + staging_buffer.data() + mip_offset); } } } -void CachedSurface::UploadTexture() { +void CachedSurface::UploadTexture(std::vector& staging_buffer) { + MICROPROFILE_SCOPE(OpenGL_Texture_Upload); SCOPE_EXIT({ glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); }); - for (u32 level = 0; level < params.GetNumLevels(); ++level) { - UploadTextureMipmap(level); + for (u32 level = 0; level < params.num_levels; ++level) { + UploadTextureMipmap(level, staging_buffer); } } -void CachedSurface::UploadTextureMipmap(u32 level) { +void CachedSurface::UploadTextureMipmap(u32 level, std::vector& staging_buffer) { // TODO(Rodrigo): Optimize alignment glPixelStorei(GL_UNPACK_ALIGNMENT, 1); glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast(params.GetMipWidth(level))); - u8* buffer{GetStagingBufferLevelData(level)}; + const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level); + u8* buffer{staging_buffer.data() + mip_offset}; if (is_compressed) { const auto image_size{static_cast(params.GetHostMipmapSize(level))}; - switch (params.GetTarget()) { + switch (params.target) { case SurfaceTarget::Texture2D: glCompressedTextureSubImage2D(texture.handle, level, 0, 0, static_cast(params.GetMipWidth(level)), @@ -277,7 +299,7 @@ void CachedSurface::UploadTextureMipmap(u32 level) { break; case SurfaceTarget::TextureCubemap: { const std::size_t layer_size{params.GetHostLayerSize(level)}; - for (std::size_t face = 0; face < params.GetDepth(); ++face) { + for (std::size_t face = 0; face < params.depth; ++face) { glCompressedTextureSubImage3D(texture.handle, level, 0, 0, static_cast(face), static_cast(params.GetMipWidth(level)), static_cast(params.GetMipHeight(level)), 1, @@ -291,7 +313,7 @@ void CachedSurface::UploadTextureMipmap(u32 level) { UNREACHABLE(); } } else { - switch (params.GetTarget()) { + switch (params.target) { case SurfaceTarget::Texture1D: glTextureSubImage1D(texture.handle, level, 0, params.GetMipWidth(level), format, type, buffer); @@ -310,7 +332,7 @@ void CachedSurface::UploadTextureMipmap(u32 level) { static_cast(params.GetMipDepth(level)), format, type, buffer); break; case SurfaceTarget::TextureCubemap: - for (std::size_t face = 0; face < params.GetDepth(); ++face) { + for (std::size_t face = 0; face < params.depth; ++face) { glTextureSubImage3D(texture.handle, level, 0, 0, static_cast(face), params.GetMipWidth(level), params.GetMipHeight(level), 1, format, type, buffer); @@ -324,61 +346,57 @@ void CachedSurface::UploadTextureMipmap(u32 level) { } void CachedSurface::DecorateSurfaceName() { - LabelGLObject(GL_TEXTURE, texture.handle, GetGpuAddr(), - params.GetTarget() == SurfaceTarget::Texture3D ? "3D" : ""); + LabelGLObject(GL_TEXTURE, texture.handle, GetGpuAddr(), params.TargetName()); } -std::unique_ptr CachedSurface::CreateView(const ViewKey& view_key) { - return std::make_unique(*this, view_key); +void CachedSurfaceView::DecorateViewName(GPUVAddr gpu_addr, std::string prefix) { + LabelGLObject(GL_TEXTURE, texture_view.texture.handle, gpu_addr, prefix); } -CachedSurfaceView::CachedSurfaceView(CachedSurface& surface, ViewKey key) - : surface{surface}, key{key}, params{surface.GetSurfaceParams()} {} +View CachedSurface::CreateView(const ViewParams& view_key) { + auto view = std::make_shared(*this, view_key); + views[view_key] = view; + view->DecorateViewName(gpu_addr, params.TargetName() + "V:" + std::to_string(view_count++)); + return view; +} + +CachedSurfaceView::CachedSurfaceView(CachedSurface& surface, const ViewParams& params) + : VideoCommon::ViewBase(params), surface{surface} { + target = GetTextureTarget(params.target); + texture_view = CreateTextureView(); +} CachedSurfaceView::~CachedSurfaceView() = default; void CachedSurfaceView::Attach(GLenum attachment) const { - ASSERT(key.num_layers == 1 && key.num_levels == 1); + ASSERT(params.num_layers == 1 && params.num_levels == 1); - switch (params.GetTarget()) { + switch (params.target) { case SurfaceTarget::Texture1D: - glFramebufferTexture1D(GL_DRAW_FRAMEBUFFER, attachment, surface.GetTarget(), - surface.GetTexture(), key.base_level); + glFramebufferTexture1D(GL_DRAW_FRAMEBUFFER, attachment, target, + surface.GetTexture(), params.base_level); break; case SurfaceTarget::Texture2D: - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, attachment, surface.GetTarget(), - surface.GetTexture(), key.base_level); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, attachment, target, + surface.GetTexture(), params.base_level); break; case SurfaceTarget::Texture1DArray: case SurfaceTarget::Texture2DArray: case SurfaceTarget::TextureCubemap: case SurfaceTarget::TextureCubeArray: - glFramebufferTextureLayer(GL_DRAW_FRAMEBUFFER, attachment, surface.GetTexture(), - key.base_level, key.base_layer); + glFramebufferTextureLayer(GL_DRAW_FRAMEBUFFER, attachment, target, + params.base_level, params.base_layer); break; default: UNIMPLEMENTED(); } } -GLuint CachedSurfaceView::GetTexture(Tegra::Shader::TextureType texture_type, bool is_array, - SwizzleSource x_source, SwizzleSource y_source, +void CachedSurfaceView::ApplySwizzle(SwizzleSource x_source, SwizzleSource y_source, SwizzleSource z_source, SwizzleSource w_source) { - const auto [texture_view, target] = GetTextureView(texture_type, is_array); - if (texture_view.get().texture.handle == 0) { - texture_view.get() = std::move(CreateTextureView(target)); - } - ApplySwizzle(texture_view, x_source, y_source, z_source, w_source); - return texture_view.get().texture.handle; -} - -void CachedSurfaceView::ApplySwizzle(TextureView& texture_view, SwizzleSource x_source, - SwizzleSource y_source, SwizzleSource z_source, - SwizzleSource w_source) { - const std::array swizzle = {x_source, y_source, z_source, w_source}; - if (swizzle == texture_view.swizzle) { + u32 swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source); + if (swizzle == texture_view.swizzle) return; - } const std::array gl_swizzle = {GetSwizzleSource(x_source), GetSwizzleSource(y_source), GetSwizzleSource(z_source), GetSwizzleSource(w_source)}; @@ -386,146 +404,48 @@ void CachedSurfaceView::ApplySwizzle(TextureView& texture_view, SwizzleSource x_ texture_view.swizzle = swizzle; } -CachedSurfaceView::TextureView CachedSurfaceView::CreateTextureView(GLenum target) const { +CachedSurfaceView::TextureView CachedSurfaceView::CreateTextureView() const { + const auto& owner_params = surface.GetSurfaceParams(); TextureView texture_view; - glGenTextures(1, &texture_view.texture.handle); + texture_view.texture.Create(); const GLuint handle{texture_view.texture.handle}; - const FormatTuple& tuple{GetFormatTuple(params.GetPixelFormat(), params.GetComponentType())}; + const FormatTuple& tuple{ + GetFormatTuple(owner_params.pixel_format, owner_params.component_type)}; - glTextureView(handle, target, surface.texture.handle, tuple.internal_format, key.base_level, - key.num_levels, key.base_layer, key.num_layers); - ApplyTextureDefaults(params, handle); + glTextureView(handle, target, surface.texture.handle, tuple.internal_format, params.base_level, + params.num_levels, params.base_layer, params.num_layers); + + ApplyTextureDefaults(owner_params, handle); + + u32 swizzle = + EncodeSwizzle(SwizzleSource::R, SwizzleSource::G, SwizzleSource::B, SwizzleSource::A); + texture_view.swizzle = swizzle; return texture_view; } -std::pair, GLenum> -CachedSurfaceView::GetTextureView(Tegra::Shader::TextureType texture_type, bool is_array) { - using Pair = std::pair, GLenum>; - switch (texture_type) { - case Tegra::Shader::TextureType::Texture1D: - return is_array ? Pair{texture_view_1d_array, GL_TEXTURE_1D_ARRAY} - : Pair{texture_view_1d, GL_TEXTURE_1D}; - case Tegra::Shader::TextureType::Texture2D: - return is_array ? Pair{texture_view_2d_array, GL_TEXTURE_2D_ARRAY} - : Pair{texture_view_2d, GL_TEXTURE_2D}; - case Tegra::Shader::TextureType::Texture3D: - ASSERT(!is_array); - return {texture_view_3d, GL_TEXTURE_3D}; - case Tegra::Shader::TextureType::TextureCube: - return is_array ? Pair{texture_view_cube_array, GL_TEXTURE_CUBE_MAP_ARRAY} - : Pair{texture_view_cube, GL_TEXTURE_CUBE_MAP}; - } - UNREACHABLE(); -} - TextureCacheOpenGL::TextureCacheOpenGL(Core::System& system, VideoCore::RasterizerInterface& rasterizer) : TextureCacheBase{system, rasterizer} {} TextureCacheOpenGL::~TextureCacheOpenGL() = default; -CachedSurfaceView* TextureCacheOpenGL::TryFastGetSurfaceView(GPUVAddr gpu_addr, VAddr cpu_addr, - u8* host_ptr, - const SurfaceParams& new_params, - bool preserve_contents, - const std::vector& overlaps) { - if (overlaps.size() > 1) { - return TryCopyAsViews(gpu_addr, cpu_addr, host_ptr, new_params, overlaps); - } - - const auto& old_surface{overlaps[0]}; - const auto& old_params{old_surface->GetSurfaceParams()}; - if (old_params.GetTarget() == new_params.GetTarget() && - old_params.GetDepth() == new_params.GetDepth() && old_params.GetDepth() == 1 && - old_params.GetNumLevels() == new_params.GetNumLevels() && - old_params.GetPixelFormat() == new_params.GetPixelFormat()) { - return SurfaceCopy(gpu_addr, cpu_addr, host_ptr, new_params, old_surface, old_params); - } - - return nullptr; +Surface TextureCacheOpenGL::CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) { + return std::make_shared(gpu_addr, params); } -CachedSurfaceView* TextureCacheOpenGL::SurfaceCopy(GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, - const SurfaceParams& new_params, - const Surface& old_surface, - const SurfaceParams& old_params) { - const auto new_surface{GetUncachedSurface(new_params)}; - Register(new_surface, gpu_addr, cpu_addr, host_ptr); - - const u32 min_width{ - std::max(old_params.GetDefaultBlockWidth(), new_params.GetDefaultBlockWidth())}; - const u32 min_height{ - std::max(old_params.GetDefaultBlockHeight(), new_params.GetDefaultBlockHeight())}; - for (u32 level = 0; level < old_params.GetNumLevels(); ++level) { - const u32 width{std::min(old_params.GetMipWidth(level), new_params.GetMipWidth(level))}; - const u32 height{std::min(old_params.GetMipHeight(level), new_params.GetMipHeight(level))}; - if (width < min_width || height < min_height) { - // Avoid copies that are too small to be handled in OpenGL - break; - } - glCopyImageSubData(old_surface->GetTexture(), old_surface->GetTarget(), level, 0, 0, 0, - new_surface->GetTexture(), new_surface->GetTarget(), level, 0, 0, 0, - width, height, 1); - } - - new_surface->MarkAsModified(true); - - // TODO(Rodrigo): Add an entry to directly get the superview - return new_surface->GetView(gpu_addr, new_params); -} - -CachedSurfaceView* TextureCacheOpenGL::TryCopyAsViews(GPUVAddr gpu_addr, VAddr cpu_addr, - u8* host_ptr, const SurfaceParams& new_params, - const std::vector& overlaps) { - if (new_params.GetTarget() == SurfaceTarget::Texture1D || - new_params.GetTarget() == SurfaceTarget::Texture1DArray || - new_params.GetTarget() == SurfaceTarget::Texture3D) { - // Non-2D textures are not handled at the moment in this fast path. - return nullptr; - } - - const auto new_surface{GetUncachedSurface(new_params)}; - // TODO(Rodrigo): Move this down - Register(new_surface, gpu_addr, cpu_addr, host_ptr); - - // TODO(Rodrigo): Find a way to avoid heap allocations here. - std::vector views; - views.reserve(overlaps.size()); - for (const auto& overlap : overlaps) { - const auto view{ - new_surface->TryGetView(overlap->GetGpuAddr(), overlap->GetSurfaceParams())}; - if (!view) { - // TODO(Rodrigo): Remove this - Unregister(new_surface); - return nullptr; - } - views.push_back(view); - } - - // TODO(Rodrigo): It's possible that these method leaves some unloaded textures if the data has - // been uploaded to guest memory but not used as a surface previously. - for (std::size_t i = 0; i < overlaps.size(); ++i) { - const auto& overlap{overlaps[i]}; - const auto& view{views[i]}; - for (u32 overlap_level = 0; overlap_level < view->GetNumLevels(); ++overlap_level) { - const u32 super_level{view->GetBaseLevel() + overlap_level}; - glCopyImageSubData(overlap->GetTexture(), overlap->GetTarget(), overlap_level, 0, 0, 0, - new_surface->GetTexture(), new_surface->GetTarget(), super_level, 0, - 0, view->GetBaseLayer(), view->GetWidth(), view->GetHeight(), - view->GetNumLayers()); - } - } - - new_surface->MarkAsModified(true); - - // TODO(Rodrigo): Add an entry to directly get the superview - return new_surface->GetView(gpu_addr, new_params); -} - -Surface TextureCacheOpenGL::CreateSurface(const SurfaceParams& params) { - return std::make_unique(*this, params); +void TextureCacheOpenGL::ImageCopy(Surface src_surface, Surface dst_surface, + const VideoCommon::CopyParams& copy_params) { + const auto src_handle = src_surface->GetTexture(); + const auto src_target = src_surface->GetTarget(); + const auto dst_handle = dst_surface->GetTexture(); + const auto dst_target = dst_surface->GetTarget(); + glCopyImageSubData(src_handle, src_target, copy_params.source_level, copy_params.source_x, + copy_params.source_y, copy_params.source_z, dst_handle, dst_target, + copy_params.dest_level, copy_params.dest_x, copy_params.dest_y, + copy_params.dest_z, copy_params.width, copy_params.height, + copy_params.depth); } } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index c65e37153..1722c1bbc 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -19,24 +19,25 @@ namespace OpenGL { using VideoCommon::SurfaceParams; -using VideoCommon::ViewKey; +using VideoCommon::ViewParams; class CachedSurfaceView; class CachedSurface; class TextureCacheOpenGL; using Surface = std::shared_ptr; -using TextureCacheBase = VideoCommon::TextureCache; +using View = std::shared_ptr; +using TextureCacheBase = VideoCommon::TextureCache; -class CachedSurface final : public VideoCommon::SurfaceBase { +class CachedSurface final : public VideoCommon::SurfaceBase { friend CachedSurfaceView; public: - explicit CachedSurface(TextureCacheOpenGL& texture_cache, const SurfaceParams& params); + explicit CachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& params); ~CachedSurface(); - void UploadTexture(); - void DownloadTexture(); + void UploadTexture(std::vector& staging_buffer) override; + void DownloadTexture(std::vector& staging_buffer) override; GLenum GetTarget() const { return target; @@ -49,99 +50,79 @@ public: protected: void DecorateSurfaceName(); - std::unique_ptr CreateView(const ViewKey& view_key); + View CreateView(const ViewParams& view_key) override; private: - void UploadTextureMipmap(u32 level); + void UploadTextureMipmap(u32 level, std::vector& staging_buffer); GLenum internal_format{}; GLenum format{}; GLenum type{}; bool is_compressed{}; GLenum target{}; + u32 view_count{}; OGLTexture texture; }; -class CachedSurfaceView final { +class CachedSurfaceView final : public VideoCommon::ViewBase { public: - explicit CachedSurfaceView(CachedSurface& surface, ViewKey key); + explicit CachedSurfaceView(CachedSurface& surface, const ViewParams& params); ~CachedSurfaceView(); /// Attaches this texture view to the current bound GL_DRAW_FRAMEBUFFER void Attach(GLenum attachment) const; - GLuint GetTexture(Tegra::Shader::TextureType texture_type, bool is_array, - Tegra::Texture::SwizzleSource x_source, - Tegra::Texture::SwizzleSource y_source, - Tegra::Texture::SwizzleSource z_source, - Tegra::Texture::SwizzleSource w_source); - - void MarkAsModified(bool is_modified) { - surface.MarkAsModified(is_modified); + GLuint GetTexture() { + return texture_view.texture.handle; } const SurfaceParams& GetSurfaceParams() const { - return params; + return surface.GetSurfaceParams(); } u32 GetWidth() const { - return params.GetMipWidth(GetBaseLevel()); + const auto owner_params = GetSurfaceParams(); + return owner_params.GetMipWidth(params.base_level); } u32 GetHeight() const { - return params.GetMipHeight(GetBaseLevel()); + const auto owner_params = GetSurfaceParams(); + return owner_params.GetMipHeight(params.base_level); } u32 GetDepth() const { - return params.GetMipDepth(GetBaseLevel()); + const auto owner_params = GetSurfaceParams(); + return owner_params.GetMipDepth(params.base_level); } - u32 GetBaseLayer() const { - return key.base_layer; - } - - u32 GetNumLayers() const { - return key.num_layers; - } - - u32 GetBaseLevel() const { - return key.base_level; - } - - u32 GetNumLevels() const { - return key.num_levels; - } - -private: - struct TextureView { - OGLTexture texture; - std::array swizzle{ - Tegra::Texture::SwizzleSource::R, Tegra::Texture::SwizzleSource::G, - Tegra::Texture::SwizzleSource::B, Tegra::Texture::SwizzleSource::A}; - }; - - void ApplySwizzle(TextureView& texture_view, Tegra::Texture::SwizzleSource x_source, + void ApplySwizzle(Tegra::Texture::SwizzleSource x_source, Tegra::Texture::SwizzleSource y_source, Tegra::Texture::SwizzleSource z_source, Tegra::Texture::SwizzleSource w_source); - TextureView CreateTextureView(GLenum target) const; + void DecorateViewName(GPUVAddr gpu_addr, std::string prefix); - std::pair, GLenum> GetTextureView( - Tegra::Shader::TextureType texture_type, bool is_array); +private: + struct TextureView { + OGLTextureView texture; + u32 swizzle; + }; + + u32 EncodeSwizzle(Tegra::Texture::SwizzleSource x_source, + Tegra::Texture::SwizzleSource y_source, + Tegra::Texture::SwizzleSource z_source, + Tegra::Texture::SwizzleSource w_source) const { + return (static_cast(x_source) << 24) | (static_cast(y_source) << 16) | + (static_cast(z_source) << 8) | static_cast(w_source); + } + + TextureView CreateTextureView() const; CachedSurface& surface; - const ViewKey key; - const SurfaceParams params; + GLenum target{}; - TextureView texture_view_1d; - TextureView texture_view_1d_array; - TextureView texture_view_2d; - TextureView texture_view_2d_array; - TextureView texture_view_3d; - TextureView texture_view_cube; - TextureView texture_view_cube_array; + TextureView texture_view; }; class TextureCacheOpenGL final : public TextureCacheBase { @@ -150,21 +131,9 @@ public: ~TextureCacheOpenGL(); protected: - CachedSurfaceView* TryFastGetSurfaceView(GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, - const SurfaceParams& new_params, - bool preserve_contents, - const std::vector& overlaps); - - Surface CreateSurface(const SurfaceParams& params); - -private: - CachedSurfaceView* SurfaceCopy(GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, - const SurfaceParams& new_params, const Surface& old_surface, - const SurfaceParams& old_params); - - CachedSurfaceView* TryCopyAsViews(GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, - const SurfaceParams& new_params, - const std::vector& overlaps); + Surface CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) override; + void ImageCopy(Surface src_surface, Surface dst_surface, + const VideoCommon::CopyParams& copy_params) override; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/utils.cpp b/src/video_core/renderer_opengl/utils.cpp index 5994c0c61..a9fa539a5 100644 --- a/src/video_core/renderer_opengl/utils.cpp +++ b/src/video_core/renderer_opengl/utils.cpp @@ -56,8 +56,7 @@ SurfaceBlitter::SurfaceBlitter() { SurfaceBlitter::~SurfaceBlitter() = default; -void SurfaceBlitter::Blit(CachedSurfaceView* src, CachedSurfaceView* dst, - const Common::Rectangle& src_rect, +void SurfaceBlitter::Blit(View src, View dst, const Common::Rectangle& src_rect, const Common::Rectangle& dst_rect) const { const auto& src_params{src->GetSurfaceParams()}; const auto& dst_params{dst->GetSurfaceParams()}; @@ -72,17 +71,13 @@ void SurfaceBlitter::Blit(CachedSurfaceView* src, CachedSurfaceView* dst, u32 buffers{}; - UNIMPLEMENTED_IF(src_params.GetTarget() != SurfaceTarget::Texture2D); - UNIMPLEMENTED_IF(dst_params.GetTarget() != SurfaceTarget::Texture2D); + UNIMPLEMENTED_IF(src_params.target != SurfaceTarget::Texture2D); + UNIMPLEMENTED_IF(dst_params.target != SurfaceTarget::Texture2D); - const auto GetTexture = [](CachedSurfaceView* view) { - return view->GetTexture(TextureType::Texture2D, false, SwizzleSource::R, SwizzleSource::G, - SwizzleSource::B, SwizzleSource::A); - }; - const GLuint src_texture{GetTexture(src)}; - const GLuint dst_texture{GetTexture(dst)}; + const GLuint src_texture{src->GetTexture()}; + const GLuint dst_texture{dst->GetTexture()}; - if (src_params.GetType() == SurfaceType::ColorTexture) { + if (src_params.type == SurfaceType::ColorTexture) { glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, src_texture, 0); glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, @@ -94,7 +89,7 @@ void SurfaceBlitter::Blit(CachedSurfaceView* src, CachedSurfaceView* dst, 0); buffers = GL_COLOR_BUFFER_BIT; - } else if (src_params.GetType() == SurfaceType::Depth) { + } else if (src_params.type == SurfaceType::Depth) { glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, src_texture, 0); @@ -106,7 +101,7 @@ void SurfaceBlitter::Blit(CachedSurfaceView* src, CachedSurfaceView* dst, glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); buffers = GL_DEPTH_BUFFER_BIT; - } else if (src_params.GetType() == SurfaceType::DepthStencil) { + } else if (src_params.type == SurfaceType::DepthStencil) { glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, src_texture, 0); @@ -148,4 +143,4 @@ void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_vie glObjectLabel(identifier, handle, -1, static_cast(object_label.c_str())); } -} // namespace OpenGL \ No newline at end of file +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/utils.h b/src/video_core/renderer_opengl/utils.h index e7726d14e..8977d2383 100644 --- a/src/video_core/renderer_opengl/utils.h +++ b/src/video_core/renderer_opengl/utils.h @@ -39,8 +39,8 @@ public: explicit SurfaceBlitter(); ~SurfaceBlitter(); - void Blit(CachedSurfaceView* src, CachedSurfaceView* dst, - const Common::Rectangle& src_rect, const Common::Rectangle& dst_rect) const; + void Blit(View src, View dst, const Common::Rectangle& src_rect, + const Common::Rectangle& dst_rect) const; private: OGLFramebuffer src_framebuffer; @@ -49,4 +49,4 @@ private: void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_view extra_info = {}); -} // namespace OpenGL \ No newline at end of file +} // namespace OpenGL diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index c5c01957a..eb0d9bc10 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -22,6 +22,7 @@ #include "video_core/memory_manager.h" #include "video_core/rasterizer_interface.h" #include "video_core/surface.h" +#include "video_core/texture_cache/copy_params.h" #include "video_core/texture_cache/surface_base.h" #include "video_core/texture_cache/surface_params.h" #include "video_core/texture_cache/surface_view.h" @@ -40,32 +41,42 @@ class RasterizerInterface; namespace VideoCommon { +using VideoCore::Surface::SurfaceTarget; +using RenderTargetConfig = Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig; + template class TextureCache { - using IntervalMap = boost::icl::interval_map>>; + using IntervalMap = boost::icl::interval_map>; using IntervalType = typename IntervalMap::interval_type; public: + void InitMemoryMananger(Tegra::MemoryManager& memory_manager) { + this->memory_manager = &memory_manager; + } + void InvalidateRegion(CacheAddr addr, std::size_t size) { for (const auto& surface : GetSurfacesInRegion(addr, size)) { - if (!surface->IsRegistered()) { - // Skip duplicates - continue; - } Unregister(surface); } } - TView* GetTextureSurface(const Tegra::Texture::FullTextureInfo& config) { + void InvalidateRegionEx(GPUVAddr addr, std::size_t size) { + for (const auto& surface : GetSurfacesInRegionInner(addr, size)) { + Unregister(surface); + } + } + + TView GetTextureSurface(const Tegra::Texture::FullTextureInfo& config, + const VideoCommon::Shader::Sampler& entry) { const auto gpu_addr{config.tic.Address()}; if (!gpu_addr) { return {}; } - const auto params{SurfaceParams::CreateForTexture(system, config)}; - return GetSurfaceView(gpu_addr, params, true); + const auto params{SurfaceParams::CreateForTexture(system, config, entry)}; + return GetSurface(gpu_addr, params, true).second; } - TView* GetDepthBufferSurface(bool preserve_contents) { + TView GetDepthBufferSurface(bool preserve_contents) { const auto& regs{system.GPU().Maxwell3D().regs}; const auto gpu_addr{regs.zeta.Address()}; if (!gpu_addr || !regs.zeta_enable) { @@ -75,36 +86,75 @@ public: system, regs.zeta_width, regs.zeta_height, regs.zeta.format, regs.zeta.memory_layout.block_width, regs.zeta.memory_layout.block_height, regs.zeta.memory_layout.block_depth, regs.zeta.memory_layout.type)}; - return GetSurfaceView(gpu_addr, depth_params, preserve_contents); + auto surface_view = GetSurface(gpu_addr, depth_params, preserve_contents); + if (depth_buffer.target) + depth_buffer.target->MarkAsProtected(false); + if (depth_buffer.target) + depth_buffer.target->MarkAsProtected(true); + return surface_view.second; } - TView* GetColorBufferSurface(std::size_t index, bool preserve_contents) { + TView GetColorBufferSurface(std::size_t index, bool preserve_contents) { ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); const auto& regs{system.GPU().Maxwell3D().regs}; if (index >= regs.rt_control.count || regs.rt[index].Address() == 0 || regs.rt[index].format == Tegra::RenderTargetFormat::NONE) { + SetEmptyColorBuffer(index); return {}; } - auto& memory_manager{system.GPU().MemoryManager()}; - const auto& config{system.GPU().Maxwell3D().regs.rt[index]}; - const auto gpu_addr{config.Address() + - config.base_layer * config.layer_stride * sizeof(u32)}; + const auto& config{regs.rt[index]}; + const auto gpu_addr{config.Address()}; if (!gpu_addr) { + SetEmptyColorBuffer(index); return {}; } - return GetSurfaceView(gpu_addr, SurfaceParams::CreateForFramebuffer(system, index), - preserve_contents); + auto surface_view = GetSurface(gpu_addr, SurfaceParams::CreateForFramebuffer(system, index), + preserve_contents); + if (render_targets[index].target) + render_targets[index].target->MarkAsProtected(false); + render_targets[index].target = surface_view.first; + if (render_targets[index].target) + render_targets[index].target->MarkAsProtected(true); + return surface_view.second; } - TView* GetFermiSurface(const Tegra::Engines::Fermi2D::Regs::Surface& config) { - return GetSurfaceView(config.Address(), SurfaceParams::CreateForFermiCopySurface(config), - true); + void MarkColorBufferInUse(std::size_t index) { + if (render_targets[index].target) + render_targets[index].target->MarkAsModified(true, Tick()); } - std::shared_ptr TryFindFramebufferSurface(const u8* host_ptr) const { + void MarkDepthBufferInUse() { + if (depth_buffer.target) + depth_buffer.target->MarkAsModified(true, Tick()); + } + + void SetEmptyDepthBuffer() { + if (depth_buffer.target != nullptr) { + depth_buffer.target->MarkAsProtected(false); + depth_buffer.target = nullptr; + depth_buffer.view = nullptr; + } + } + + void SetEmptyColorBuffer(std::size_t index) { + if (render_targets[index].target != nullptr) { + render_targets[index].target->MarkAsProtected(false); + std::memset(&render_targets[index].config, sizeof(RenderTargetConfig), 0); + render_targets[index].target = nullptr; + render_targets[index].view = nullptr; + } + } + + TView GetFermiSurface(const Tegra::Engines::Fermi2D::Regs::Surface& config) { + SurfaceParams params = SurfaceParams::CreateForFermiCopySurface(config); + const GPUVAddr gpu_addr = config.Address(); + return GetSurface(gpu_addr, params, true).second; + } + + TSurface TryFindFramebufferSurface(const u8* host_ptr) const { const auto it{registered_surfaces.find(ToCacheAddr(host_ptr))}; return it != registered_surfaces.end() ? *it->second.begin() : nullptr; } @@ -115,126 +165,334 @@ public: protected: TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer) - : system{system}, rasterizer{rasterizer} {} + : system{system}, rasterizer{rasterizer} { + for (std::size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) { + SetEmptyColorBuffer(i); + } + SetEmptyDepthBuffer(); + } ~TextureCache() = default; - virtual TView* TryFastGetSurfaceView( - GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, const SurfaceParams& params, - bool preserve_contents, const std::vector>& overlaps) = 0; + virtual TSurface CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) = 0; - virtual std::shared_ptr CreateSurface(const SurfaceParams& params) = 0; + virtual void ImageCopy(TSurface src_surface, TSurface dst_surface, + const CopyParams& copy_params) = 0; - void Register(std::shared_ptr surface, GPUVAddr gpu_addr, VAddr cpu_addr, - u8* host_ptr) { - surface->Register(gpu_addr, cpu_addr, host_ptr); - registered_surfaces.add({GetSurfaceInterval(surface), {surface}}); - rasterizer.UpdatePagesCachedCount(surface->GetCpuAddr(), surface->GetSizeInBytes(), 1); + void Register(TSurface surface) { + const GPUVAddr gpu_addr = surface->GetGpuAddr(); + u8* host_ptr = memory_manager->GetPointer(gpu_addr); + const std::size_t size = surface->GetSizeInBytes(); + const std::optional cpu_addr = memory_manager->GpuToCpuAddress(gpu_addr); + if (!host_ptr || !cpu_addr) { + LOG_CRITICAL(HW_GPU, "Failed to register surface with unmapped gpu_address 0x{:016x}", + gpu_addr); + return; + } + surface->SetHostPtr(host_ptr); + surface->SetCpuAddr(*cpu_addr); + registered_surfaces.add({GetInterval(host_ptr, size), {surface}}); + rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1); + RegisterInnerCache(surface); + surface->MarkAsRegistered(true); } - void Unregister(std::shared_ptr surface) { - registered_surfaces.subtract({GetSurfaceInterval(surface), {surface}}); - rasterizer.UpdatePagesCachedCount(surface->GetCpuAddr(), surface->GetSizeInBytes(), -1); - surface->Unregister(); + void Unregister(TSurface surface) { + if (surface->IsProtected()) + return; + const GPUVAddr gpu_addr = surface->GetGpuAddr(); + const void* host_ptr = surface->GetHostPtr(); + const std::size_t size = surface->GetSizeInBytes(); + const VAddr cpu_addr = surface->GetCpuAddr(); + registered_surfaces.erase(GetInterval(host_ptr, size)); + rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1); + UnregisterInnerCache(surface); + surface->MarkAsRegistered(false); + ReserveSurface(surface->GetSurfaceParams(), surface); } - std::shared_ptr GetUncachedSurface(const SurfaceParams& params) { - if (const auto surface = TryGetReservedSurface(params); surface) + TSurface GetUncachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) { + if (const auto surface = TryGetReservedSurface(params); surface) { + surface->SetGpuAddr(gpu_addr); return surface; + } // No reserved surface available, create a new one and reserve it - auto new_surface{CreateSurface(params)}; - ReserveSurface(params, new_surface); + auto new_surface{CreateSurface(gpu_addr, params)}; return new_surface; } Core::System& system; private: - TView* GetSurfaceView(GPUVAddr gpu_addr, const SurfaceParams& params, bool preserve_contents) { - auto& memory_manager{system.GPU().MemoryManager()}; - const auto cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)}; - DEBUG_ASSERT(cpu_addr); + enum class RecycleStrategy : u32 { + Ignore = 0, + Flush = 1, + BufferCopy = 3, + }; - const auto host_ptr{memory_manager.GetPointer(gpu_addr)}; + RecycleStrategy PickStrategy(std::vector& overlaps, const SurfaceParams& params, + const GPUVAddr gpu_addr, const bool untopological) { + // Untopological decision + if (untopological) { + return RecycleStrategy::Ignore; + } + // 3D Textures decision + if (params.block_depth > 1 || params.target == SurfaceTarget::Texture3D) { + return RecycleStrategy::Flush; + } + for (auto s : overlaps) { + const auto& s_params = s->GetSurfaceParams(); + if (s_params.block_depth > 1 || s_params.target == SurfaceTarget::Texture3D) { + return RecycleStrategy::Flush; + } + } + return RecycleStrategy::Ignore; + } + + std::pair RecycleSurface(std::vector& overlaps, + const SurfaceParams& params, const GPUVAddr gpu_addr, + const u8* host_ptr, const bool preserve_contents, + const bool untopological) { + for (auto surface : overlaps) { + Unregister(surface); + } + RecycleStrategy strategy = !Settings::values.use_accurate_gpu_emulation + ? PickStrategy(overlaps, params, gpu_addr, untopological) + : RecycleStrategy::Flush; + switch (strategy) { + case RecycleStrategy::Ignore: { + return InitializeSurface(gpu_addr, params, preserve_contents); + } + case RecycleStrategy::Flush: { + std::sort(overlaps.begin(), overlaps.end(), + [](const TSurface& a, const TSurface& b) -> bool { + return a->GetModificationTick() < b->GetModificationTick(); + }); + for (auto surface : overlaps) { + FlushSurface(surface); + } + return InitializeSurface(gpu_addr, params, preserve_contents); + } + default: { + UNIMPLEMENTED_MSG("Unimplemented Texture Cache Recycling Strategy!"); + return InitializeSurface(gpu_addr, params, preserve_contents); + } + } + } + + std::pair RebuildMirage(TSurface current_surface, + const SurfaceParams& params) { + const auto gpu_addr = current_surface->GetGpuAddr(); + TSurface new_surface = GetUncachedSurface(gpu_addr, params); + std::vector bricks = current_surface->BreakDown(); + for (auto& brick : bricks) { + ImageCopy(current_surface, new_surface, brick); + } + Unregister(current_surface); + Register(new_surface); + return {new_surface, new_surface->GetMainView()}; + } + + std::pair ManageStructuralMatch(TSurface current_surface, + const SurfaceParams& params) { + const bool is_mirage = !current_surface->MatchFormat(params.pixel_format); + if (is_mirage) { + return RebuildMirage(current_surface, params); + } + const bool matches_target = current_surface->MatchTarget(params.target); + if (matches_target) { + return {current_surface, current_surface->GetMainView()}; + } + return {current_surface, current_surface->EmplaceOverview(params)}; + } + + std::optional> ReconstructSurface(std::vector& overlaps, + const SurfaceParams& params, + const GPUVAddr gpu_addr, + const u8* host_ptr) { + if (!params.is_layered || params.target == SurfaceTarget::Texture3D) { + return {}; + } + TSurface new_surface = GetUncachedSurface(gpu_addr, params); + for (auto surface : overlaps) { + const SurfaceParams& src_params = surface->GetSurfaceParams(); + if (src_params.is_layered || src_params.num_levels > 1) { + // We send this cases to recycle as they are more complex to handle + return {}; + } + const std::size_t candidate_size = src_params.GetGuestSizeInBytes(); + auto mipmap_layer = new_surface->GetLayerMipmap(surface->GetGpuAddr()); + if (!mipmap_layer) { + return {}; + } + const u32 layer = (*mipmap_layer).first; + const u32 mipmap = (*mipmap_layer).second; + if (new_surface->GetMipmapSize(mipmap) != candidate_size) { + return {}; + } + // Now we got all the data set up + CopyParams copy_params{}; + const u32 dst_width = params.GetMipWidth(mipmap); + const u32 dst_height = params.GetMipHeight(mipmap); + copy_params.width = std::min(src_params.width, dst_width); + copy_params.height = std::min(src_params.height, dst_height); + copy_params.depth = 1; + copy_params.source_level = 0; + copy_params.dest_level = mipmap; + copy_params.source_z = 0; + copy_params.dest_z = layer; + ImageCopy(surface, new_surface, copy_params); + } + for (auto surface : overlaps) { + Unregister(surface); + } + Register(new_surface); + return {{new_surface, new_surface->GetMainView()}}; + } + + std::pair GetSurface(const GPUVAddr gpu_addr, const SurfaceParams& params, + bool preserve_contents) { + + const auto host_ptr{memory_manager->GetPointer(gpu_addr)}; const auto cache_addr{ToCacheAddr(host_ptr)}; - auto overlaps{GetSurfacesInRegion(cache_addr, params.GetGuestSizeInBytes())}; + const std::size_t candidate_size = params.GetGuestSizeInBytes(); + auto overlaps{GetSurfacesInRegionInner(gpu_addr, candidate_size)}; if (overlaps.empty()) { - return LoadSurfaceView(gpu_addr, *cpu_addr, host_ptr, params, preserve_contents); + return InitializeSurface(gpu_addr, params, preserve_contents); + } + + for (auto surface : overlaps) { + if (!surface->MatchesTopology(params)) { + return RecycleSurface(overlaps, params, gpu_addr, host_ptr, preserve_contents, + true); + } } if (overlaps.size() == 1) { - if (TView* view = overlaps[0]->TryGetView(gpu_addr, params); view) { - return view; + TSurface current_surface = overlaps[0]; + if (current_surface->MatchesStructure(params) && + current_surface->GetGpuAddr() == gpu_addr && + (params.target != SurfaceTarget::Texture3D || + current_surface->MatchTarget(params.target))) { + return ManageStructuralMatch(current_surface, params); } - } - - const auto fast_view{TryFastGetSurfaceView(gpu_addr, *cpu_addr, host_ptr, params, - preserve_contents, overlaps)}; - - if (!fast_view) { - std::sort(overlaps.begin(), overlaps.end(), [](const auto& lhs, const auto& rhs) { - return lhs->GetModificationTick() < rhs->GetModificationTick(); - }); - } - - for (const auto& surface : overlaps) { - if (!fast_view) { - // Flush even when we don't care about the contents, to preserve memory not - // written by the new surface. - FlushSurface(surface); + if (current_surface->GetSizeInBytes() <= candidate_size) { + return RecycleSurface(overlaps, params, gpu_addr, host_ptr, preserve_contents, + false); } - Unregister(surface); + std::optional view = current_surface->EmplaceView(params, gpu_addr); + if (view.has_value()) { + const bool is_mirage = !current_surface->MatchFormat(params.pixel_format); + if (is_mirage) { + LOG_CRITICAL(HW_GPU, "Mirage View Unsupported"); + return RecycleSurface(overlaps, params, gpu_addr, host_ptr, preserve_contents, + false); + } + return {current_surface, *view}; + } + return RecycleSurface(overlaps, params, gpu_addr, host_ptr, preserve_contents, false); + } else { + std::optional> view = + ReconstructSurface(overlaps, params, gpu_addr, host_ptr); + if (view.has_value()) { + return *view; + } + return RecycleSurface(overlaps, params, gpu_addr, host_ptr, preserve_contents, false); } - if (fast_view) { - return fast_view; - } - - return LoadSurfaceView(gpu_addr, *cpu_addr, host_ptr, params, preserve_contents); } - TView* LoadSurfaceView(GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, - const SurfaceParams& params, bool preserve_contents) { - const auto new_surface{GetUncachedSurface(params)}; - Register(new_surface, gpu_addr, cpu_addr, host_ptr); + std::pair InitializeSurface(GPUVAddr gpu_addr, const SurfaceParams& params, + bool preserve_contents) { + auto new_surface{GetUncachedSurface(gpu_addr, params)}; + Register(new_surface); if (preserve_contents) { LoadSurface(new_surface); } - return new_surface->GetView(gpu_addr, params); + return {new_surface, new_surface->GetMainView()}; } - void LoadSurface(const std::shared_ptr& surface) { - surface->LoadBuffer(); - surface->UploadTexture(); - surface->MarkAsModified(false); + void LoadSurface(const TSurface& surface) { + staging_buffer.resize(surface->GetHostSizeInBytes()); + surface->LoadBuffer(*memory_manager, staging_buffer); + surface->UploadTexture(staging_buffer); + surface->MarkAsModified(false, Tick()); } - void FlushSurface(const std::shared_ptr& surface) { + void FlushSurface(const TSurface& surface) { if (!surface->IsModified()) { return; } - surface->DownloadTexture(); - surface->FlushBuffer(); + staging_buffer.resize(surface->GetHostSizeInBytes()); + surface->DownloadTexture(staging_buffer); + surface->FlushBuffer(staging_buffer); + surface->MarkAsModified(false, Tick()); } - std::vector> GetSurfacesInRegion(CacheAddr cache_addr, - std::size_t size) const { + std::vector GetSurfacesInRegion(CacheAddr cache_addr, std::size_t size) const { if (size == 0) { return {}; } const IntervalType interval{cache_addr, cache_addr + size}; - std::vector> surfaces; + std::vector surfaces; for (auto& pair : boost::make_iterator_range(registered_surfaces.equal_range(interval))) { - surfaces.push_back(*pair.second.begin()); + for (auto& s : pair.second) { + if (!s || !s->IsRegistered()) { + continue; + } + surfaces.push_back(s); + } } return surfaces; } - void ReserveSurface(const SurfaceParams& params, std::shared_ptr surface) { + void RegisterInnerCache(TSurface& surface) { + GPUVAddr start = surface->GetGpuAddr() >> inner_cache_page_bits; + const GPUVAddr end = (surface->GetGpuAddrEnd() - 1) >> inner_cache_page_bits; + while (start <= end) { + inner_cache[start].push_back(surface); + start++; + } + } + + void UnregisterInnerCache(TSurface& surface) { + GPUVAddr start = surface->GetGpuAddr() >> inner_cache_page_bits; + const GPUVAddr end = (surface->GetGpuAddrEnd() - 1) >> inner_cache_page_bits; + while (start <= end) { + inner_cache[start].remove(surface); + start++; + } + } + + std::vector GetSurfacesInRegionInner(const GPUVAddr gpu_addr, const std::size_t size) { + if (size == 0) { + return {}; + } + const GPUVAddr gpu_addr_end = gpu_addr + size; + GPUVAddr start = gpu_addr >> inner_cache_page_bits; + const GPUVAddr end = (gpu_addr_end - 1) >> inner_cache_page_bits; + std::vector surfaces; + while (start <= end) { + std::list& list = inner_cache[start]; + for (auto& s : list) { + if (!s->IsPicked() && s->Overlaps(gpu_addr, gpu_addr_end)) { + s->MarkAsPicked(true); + surfaces.push_back(s); + } + } + start++; + } + for (auto& s : surfaces) { + s->MarkAsPicked(false); + } + return surfaces; + } + + void ReserveSurface(const SurfaceParams& params, TSurface surface) { surface_reserve[params].push_back(std::move(surface)); } - std::shared_ptr TryGetReservedSurface(const SurfaceParams& params) { + TSurface TryGetReservedSurface(const SurfaceParams& params) { auto search{surface_reserve.find(params)}; if (search == surface_reserve.end()) { return {}; @@ -247,21 +505,41 @@ private: return {}; } - IntervalType GetSurfaceInterval(std::shared_ptr surface) const { - return IntervalType::right_open(surface->GetCacheAddr(), - surface->GetCacheAddr() + surface->GetSizeInBytes()); + IntervalType GetInterval(const void* host_ptr, const std::size_t size) const { + const CacheAddr addr = ToCacheAddr(host_ptr); + return IntervalType::right_open(addr, addr + size); } + struct RenderInfo { + RenderTargetConfig config; + TSurface target; + TView view; + }; + + struct DepthBufferInfo { + TSurface target; + TView view; + }; + VideoCore::RasterizerInterface& rasterizer; + Tegra::MemoryManager* memory_manager; u64 ticks{}; IntervalMap registered_surfaces; + static constexpr u64 inner_cache_page_bits{20}; + static constexpr u64 inner_cache_page_size{1 << inner_cache_page_bits}; + std::unordered_map> inner_cache; + /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have /// previously been used. This is to prevent surfaces from being constantly created and /// destroyed when used with different surface parameters. - std::unordered_map>> surface_reserve; + std::unordered_map> surface_reserve; + std::array render_targets; + DepthBufferInfo depth_buffer; + + std::vector staging_buffer; }; } // namespace VideoCommon From b711cdce782ee604edc3c52628eb76e6b9a08b72 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 7 May 2019 13:58:37 -0400 Subject: [PATCH 015/113] Corrections to Structural Matching The texture will now be reconstructed if the width only matches on GoB alignment. --- src/video_core/texture_cache/surface_base.h | 66 +++++++++++++------- src/video_core/texture_cache/texture_cache.h | 11 +++- 2 files changed, 53 insertions(+), 24 deletions(-) diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index 5fd7add0a..9c048eb88 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -16,9 +16,8 @@ #include "video_core/texture_cache/surface_params.h" #include "video_core/texture_cache/surface_view.h" -template> -ForwardIt binary_find(ForwardIt first, ForwardIt last, const T& value, Compare comp={}) -{ +template > +ForwardIt binary_find(ForwardIt first, ForwardIt last, const T& value, Compare comp = {}) { // Note: BOTH type T and the type after ForwardIt is dereferenced // must be implicitly convertible to BOTH Type1 and Type2, used in Compare. // This is stricter than lower_bound requirement (see above) @@ -33,8 +32,14 @@ class MemoryManager; namespace VideoCommon { -using VideoCore::Surface::SurfaceTarget; using VideoCore::MortonSwizzleMode; +using VideoCore::Surface::SurfaceTarget; + +enum class MatchStructureResult : u32 { + FullMatch = 0, + SemiMatch = 1, + None = 2, +}; class SurfaceBaseImpl { public: @@ -106,17 +111,26 @@ public: return std::tie(src_bpp, params.is_tiled) == std::tie(dst_bpp, rhs.is_tiled); } - bool MatchesStructure(const SurfaceParams& rhs) const { + MatchStructureResult MatchesStructure(const SurfaceParams& rhs) const { if (params.is_tiled) { - const u32 a_width1 = params.GetBlockAlignedWidth(); - const u32 a_width2 = rhs.GetBlockAlignedWidth(); - return std::tie(a_width1, params.height, params.depth, params.block_width, - params.block_height, params.block_depth, params.tile_width_spacing) == - std::tie(a_width2, rhs.height, rhs.depth, rhs.block_width, rhs.block_height, - rhs.block_depth, rhs.tile_width_spacing); + if (std::tie(params.height, params.depth, params.block_width, params.block_height, + params.block_depth, params.tile_width_spacing) == + std::tie(rhs.height, rhs.depth, rhs.block_width, rhs.block_height, rhs.block_depth, + rhs.tile_width_spacing)) { + if (params.width == rhs.width) { + return MatchStructureResult::FullMatch; + } + if (params.GetBlockAlignedWidth() == rhs.GetBlockAlignedWidth()) { + return MatchStructureResult::SemiMatch; + } + } + return MatchStructureResult::None; } else { - return std::tie(params.width, params.height, params.pitch) == - std::tie(rhs.width, rhs.height, rhs.pitch); + if (std::tie(params.width, params.height, params.pitch) == + std::tie(rhs.width, rhs.height, rhs.pitch)) { + return MatchStructureResult::FullMatch; + } + return MatchStructureResult::None; } } @@ -126,15 +140,16 @@ public: const GPUVAddr relative_address = candidate_gpu_addr - gpu_addr; const u32 layer = relative_address / layer_size; const GPUVAddr mipmap_address = relative_address - layer_size * layer; - const auto mipmap_it = binary_find(mipmap_offsets.begin(), mipmap_offsets.end(), mipmap_address); + const auto mipmap_it = + binary_find(mipmap_offsets.begin(), mipmap_offsets.end(), mipmap_address); if (mipmap_it != mipmap_offsets.end()) { return {{layer, std::distance(mipmap_offsets.begin(), mipmap_it)}}; } return {}; } - std::vector BreakDown() const { - auto set_up_copy = [](CopyParams& cp, const SurfaceParams& params, const u32 depth, + std::vector BreakDown(const SurfaceParams& in_params) const { + auto set_up_copy = [](CopyParams& cp, const u32 width, const u32 height, const u32 depth, const u32 level) { cp.source_x = 0; cp.source_y = 0; @@ -144,8 +159,8 @@ public: cp.dest_z = 0; cp.source_level = level; cp.dest_level = level; - cp.width = params.GetMipWidth(level); - cp.height = params.GetMipHeight(level); + cp.width = width; + cp.height = height; cp.depth = depth; }; const u32 layers = params.depth; @@ -156,7 +171,11 @@ public: const u32 layer_offset = layer * mipmaps; for (std::size_t level = 0; level < mipmaps; level++) { CopyParams& cp = result[layer_offset + level]; - set_up_copy(cp, params, layer, level); + const u32 width = + std::min(params.GetMipWidth(level), in_params.GetMipWidth(level)); + const u32 height = + std::min(params.GetMipHeight(level), in_params.GetMipHeight(level)); + set_up_copy(cp, width, height, layer, level); } } return result; @@ -164,7 +183,11 @@ public: std::vector result{mipmaps}; for (std::size_t level = 0; level < mipmaps; level++) { CopyParams& cp = result[level]; - set_up_copy(cp, params, params.GetMipDepth(level), level); + const u32 width = std::min(params.GetMipWidth(level), in_params.GetMipWidth(level)); + const u32 height = + std::min(params.GetMipHeight(level), in_params.GetMipHeight(level)); + const u32 depth = std::min(params.GetMipDepth(level), in_params.GetMipDepth(level)); + set_up_copy(cp, width, height, depth, level); } return result; } @@ -254,7 +277,8 @@ public: std::optional EmplaceView(const SurfaceParams& view_params, const GPUVAddr view_addr) { if (view_addr < gpu_addr) return {}; - if (params.target == SurfaceTarget::Texture3D || view_params.target == SurfaceTarget::Texture3D) { + if (params.target == SurfaceTarget::Texture3D || + view_params.target == SurfaceTarget::Texture3D) { return {}; } const std::size_t size = view_params.GetGuestSizeInBytes(); diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index eb0d9bc10..f3b28453a 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -284,7 +284,7 @@ private: const SurfaceParams& params) { const auto gpu_addr = current_surface->GetGpuAddr(); TSurface new_surface = GetUncachedSurface(gpu_addr, params); - std::vector bricks = current_surface->BreakDown(); + std::vector bricks = current_surface->BreakDown(params); for (auto& brick : bricks) { ImageCopy(current_surface, new_surface, brick); } @@ -370,11 +370,16 @@ private: if (overlaps.size() == 1) { TSurface current_surface = overlaps[0]; - if (current_surface->MatchesStructure(params) && + MatchStructureResult s_result = current_surface->MatchesStructure(params); + if (s_result != MatchStructureResult::None && current_surface->GetGpuAddr() == gpu_addr && (params.target != SurfaceTarget::Texture3D || current_surface->MatchTarget(params.target))) { - return ManageStructuralMatch(current_surface, params); + if (s_result == MatchStructureResult::FullMatch) { + return ManageStructuralMatch(current_surface, params); + } else { + return RebuildMirage(current_surface, params); + } } if (current_surface->GetSizeInBytes() <= candidate_size) { return RecycleSurface(overlaps, params, gpu_addr, host_ptr, preserve_contents, From d86f9cd70910d4b96ec301e7d532b11d18a290a4 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 7 May 2019 17:30:36 -0400 Subject: [PATCH 016/113] Change texture_cache chaching from GPUAddr to CacheAddr This also reverses the changes to make invalidation and flushing through the GPU address. --- src/video_core/memory_manager.cpp | 2 +- src/video_core/rasterizer_interface.h | 4 - .../renderer_opengl/gl_rasterizer.cpp | 16 --- .../renderer_opengl/gl_rasterizer.h | 2 - src/video_core/texture_cache/surface_base.cpp | 5 +- src/video_core/texture_cache/surface_base.h | 30 +++--- src/video_core/texture_cache/texture_cache.h | 102 +++++++----------- 7 files changed, 60 insertions(+), 101 deletions(-) diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 74a1441e3..5d8d126c1 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -69,7 +69,7 @@ GPUVAddr MemoryManager::UnmapBuffer(GPUVAddr gpu_addr, u64 size) { const u64 aligned_size{Common::AlignUp(size, page_size)}; const CacheAddr cache_addr{ToCacheAddr(GetPointer(gpu_addr))}; - rasterizer.FlushAndInvalidateRegionEx(gpu_addr, cache_addr, aligned_size); + rasterizer.FlushAndInvalidateRegion(cache_addr, aligned_size); UnmapRange(gpu_addr, aligned_size); return gpu_addr; diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index d5505ef9c..3c18d3b1f 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -49,10 +49,6 @@ public: /// and invalidated virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0; - /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory - /// and invalidated - virtual void FlushAndInvalidateRegionEx(GPUVAddr gpu_addr, CacheAddr addr, u64 size) = 0; - /// Attempt to use a faster method to perform a surface copy virtual bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, const Tegra::Engines::Fermi2D::Regs::Surface& dst, diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 482d0428c..77ac963b4 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -737,27 +737,11 @@ void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) { buffer_cache.InvalidateRegion(addr, size); } -void RasterizerOpenGL::InvalidateRegionEx(GPUVAddr gpu_addr, CacheAddr addr, u64 size) { - MICROPROFILE_SCOPE(OpenGL_CacheManagement); - if (!addr || !size) { - return; - } - texture_cache.InvalidateRegionEx(gpu_addr, size); - shader_cache.InvalidateRegion(addr, size); - global_cache.InvalidateRegion(addr, size); - buffer_cache.InvalidateRegion(addr, size); -} - void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { FlushRegion(addr, size); InvalidateRegion(addr, size); } -void RasterizerOpenGL::FlushAndInvalidateRegionEx(GPUVAddr gpu_addr, CacheAddr addr, u64 size) { - FlushRegion(addr, size); - InvalidateRegionEx(gpu_addr, addr, size); -} - bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, const Tegra::Engines::Fermi2D::Regs::Surface& dst, const Common::Rectangle& src_rect, diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 971a38ab7..5c37d3bfa 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -64,9 +64,7 @@ public: void FlushAll() override; void FlushRegion(CacheAddr addr, u64 size) override; void InvalidateRegion(CacheAddr addr, u64 size) override; - void InvalidateRegionEx(GPUVAddr gpu_addr, CacheAddr addr, u64 size); void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; - void FlushAndInvalidateRegionEx(GPUVAddr gpu_addr, CacheAddr addr, u64 size) override; bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, const Tegra::Engines::Fermi2D::Regs::Surface& dst, const Common::Rectangle& src_rect, diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp index 5273fcb44..0de0bc656 100644 --- a/src/video_core/texture_cache/surface_base.cpp +++ b/src/video_core/texture_cache/surface_base.cpp @@ -25,7 +25,6 @@ SurfaceBaseImpl::SurfaceBaseImpl(const GPUVAddr gpu_vaddr, const SurfaceParams& u32 offset = 0; mipmap_offsets.resize(params.num_levels); mipmap_sizes.resize(params.num_levels); - gpu_addr_end = gpu_addr + memory_size; for (u32 i = 0; i < params.num_levels; i++) { mipmap_offsets[i] = offset; mipmap_sizes[i] = params.GetGuestMipmapSize(i); @@ -99,8 +98,10 @@ void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager, } } -void SurfaceBaseImpl::FlushBuffer(std::vector& staging_buffer) { +void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager, + std::vector& staging_buffer) { MICROPROFILE_SCOPE(GPU_Flush_Texture); + auto host_ptr = memory_manager.GetPointer(gpu_addr); if (params.is_tiled) { ASSERT_MSG(params.block_width == 1, "Block width is defined as {}", params.block_width); for (u32 level = 0; level < params.num_levels; ++level) { diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index 9c048eb88..74be3237d 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -45,40 +45,40 @@ class SurfaceBaseImpl { public: void LoadBuffer(Tegra::MemoryManager& memory_manager, std::vector& staging_buffer); - void FlushBuffer(std::vector& staging_buffer); + void FlushBuffer(Tegra::MemoryManager& memory_manager, std::vector& staging_buffer); GPUVAddr GetGpuAddr() const { return gpu_addr; } - GPUVAddr GetGpuAddrEnd() const { - return gpu_addr_end; - } - - bool Overlaps(const GPUVAddr start, const GPUVAddr end) const { - return (gpu_addr < end) && (gpu_addr_end > start); + bool Overlaps(const CacheAddr start, const CacheAddr end) const { + return (cache_addr < end) && (cache_addr_end > start); } // Use only when recycling a surface void SetGpuAddr(const GPUVAddr new_addr) { gpu_addr = new_addr; - gpu_addr_end = new_addr + memory_size; } VAddr GetCpuAddr() const { - return gpu_addr; + return cpu_addr; } void SetCpuAddr(const VAddr new_addr) { cpu_addr = new_addr; } - u8* GetHostPtr() const { - return host_ptr; + CacheAddr GetCacheAddr() const { + return cache_addr; } - void SetHostPtr(u8* new_addr) { - host_ptr = new_addr; + CacheAddr GetCacheAddrEnd() const { + return cache_addr_end; + } + + void SetCacheAddr(const CacheAddr new_addr) { + cache_addr = new_addr; + cache_addr_end = new_addr + memory_size; } const SurfaceParams& GetSurfaceParams() const { @@ -201,13 +201,13 @@ protected: const SurfaceParams params; GPUVAddr gpu_addr{}; - GPUVAddr gpu_addr_end{}; std::vector mipmap_sizes; std::vector mipmap_offsets; const std::size_t layer_size; const std::size_t memory_size; const std::size_t host_memory_size; - u8* host_ptr; + CacheAddr cache_addr; + CacheAddr cache_addr_end{}; VAddr cpu_addr; private: diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index f3b28453a..43aaec011 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -60,12 +60,6 @@ public: } } - void InvalidateRegionEx(GPUVAddr addr, std::size_t size) { - for (const auto& surface : GetSurfacesInRegionInner(addr, size)) { - Unregister(surface); - } - } - TView GetTextureSurface(const Tegra::Texture::FullTextureInfo& config, const VideoCommon::Shader::Sampler& entry) { const auto gpu_addr{config.tic.Address()}; @@ -154,9 +148,19 @@ public: return GetSurface(gpu_addr, params, true).second; } - TSurface TryFindFramebufferSurface(const u8* host_ptr) const { - const auto it{registered_surfaces.find(ToCacheAddr(host_ptr))}; - return it != registered_surfaces.end() ? *it->second.begin() : nullptr; + TSurface TryFindFramebufferSurface(const u8* host_ptr) { + const CacheAddr cache_addr = ToCacheAddr(host_ptr); + if (!cache_addr) { + return nullptr; + } + const CacheAddr page = cache_addr >> registry_page_bits; + std::list& list = registry[page]; + for (auto& s : list) { + if (s->GetCacheAddr() == cache_addr) { + return s; + } + } + return nullptr; } u64 Tick() { @@ -181,30 +185,28 @@ protected: void Register(TSurface surface) { const GPUVAddr gpu_addr = surface->GetGpuAddr(); - u8* host_ptr = memory_manager->GetPointer(gpu_addr); + const CacheAddr cache_ptr = ToCacheAddr(memory_manager->GetPointer(gpu_addr)); const std::size_t size = surface->GetSizeInBytes(); const std::optional cpu_addr = memory_manager->GpuToCpuAddress(gpu_addr); - if (!host_ptr || !cpu_addr) { + if (!cache_ptr || !cpu_addr) { LOG_CRITICAL(HW_GPU, "Failed to register surface with unmapped gpu_address 0x{:016x}", gpu_addr); return; } - surface->SetHostPtr(host_ptr); + surface->SetCacheAddr(cache_ptr); surface->SetCpuAddr(*cpu_addr); - registered_surfaces.add({GetInterval(host_ptr, size), {surface}}); - rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1); RegisterInnerCache(surface); surface->MarkAsRegistered(true); + rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1); } void Unregister(TSurface surface) { if (surface->IsProtected()) return; const GPUVAddr gpu_addr = surface->GetGpuAddr(); - const void* host_ptr = surface->GetHostPtr(); + const CacheAddr cache_ptr = surface->GetCacheAddr(); const std::size_t size = surface->GetSizeInBytes(); const VAddr cpu_addr = surface->GetCpuAddr(); - registered_surfaces.erase(GetInterval(host_ptr, size)); rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1); UnregisterInnerCache(surface); surface->MarkAsRegistered(false); @@ -280,7 +282,7 @@ private: } } - std::pair RebuildMirage(TSurface current_surface, + std::pair RebuildSurface(TSurface current_surface, const SurfaceParams& params) { const auto gpu_addr = current_surface->GetGpuAddr(); TSurface new_surface = GetUncachedSurface(gpu_addr, params); @@ -297,7 +299,7 @@ private: const SurfaceParams& params) { const bool is_mirage = !current_surface->MatchFormat(params.pixel_format); if (is_mirage) { - return RebuildMirage(current_surface, params); + return RebuildSurface(current_surface, params); } const bool matches_target = current_surface->MatchTarget(params.target); if (matches_target) { @@ -356,7 +358,7 @@ private: const auto host_ptr{memory_manager->GetPointer(gpu_addr)}; const auto cache_addr{ToCacheAddr(host_ptr)}; const std::size_t candidate_size = params.GetGuestSizeInBytes(); - auto overlaps{GetSurfacesInRegionInner(gpu_addr, candidate_size)}; + auto overlaps{GetSurfacesInRegion(cache_addr, candidate_size)}; if (overlaps.empty()) { return InitializeSurface(gpu_addr, params, preserve_contents); } @@ -378,7 +380,7 @@ private: if (s_result == MatchStructureResult::FullMatch) { return ManageStructuralMatch(current_surface, params); } else { - return RebuildMirage(current_surface, params); + return RebuildSurface(current_surface, params); } } if (current_surface->GetSizeInBytes() <= candidate_size) { @@ -429,58 +431,40 @@ private: } staging_buffer.resize(surface->GetHostSizeInBytes()); surface->DownloadTexture(staging_buffer); - surface->FlushBuffer(staging_buffer); + surface->FlushBuffer(*memory_manager, staging_buffer); surface->MarkAsModified(false, Tick()); } - std::vector GetSurfacesInRegion(CacheAddr cache_addr, std::size_t size) const { - if (size == 0) { - return {}; - } - const IntervalType interval{cache_addr, cache_addr + size}; - - std::vector surfaces; - for (auto& pair : boost::make_iterator_range(registered_surfaces.equal_range(interval))) { - for (auto& s : pair.second) { - if (!s || !s->IsRegistered()) { - continue; - } - surfaces.push_back(s); - } - } - return surfaces; - } - void RegisterInnerCache(TSurface& surface) { - GPUVAddr start = surface->GetGpuAddr() >> inner_cache_page_bits; - const GPUVAddr end = (surface->GetGpuAddrEnd() - 1) >> inner_cache_page_bits; + CacheAddr start = surface->GetCacheAddr() >> registry_page_bits; + const CacheAddr end = (surface->GetCacheAddrEnd() - 1) >> registry_page_bits; while (start <= end) { - inner_cache[start].push_back(surface); + registry[start].push_back(surface); start++; } } void UnregisterInnerCache(TSurface& surface) { - GPUVAddr start = surface->GetGpuAddr() >> inner_cache_page_bits; - const GPUVAddr end = (surface->GetGpuAddrEnd() - 1) >> inner_cache_page_bits; + CacheAddr start = surface->GetCacheAddr() >> registry_page_bits; + const CacheAddr end = (surface->GetCacheAddrEnd() - 1) >> registry_page_bits; while (start <= end) { - inner_cache[start].remove(surface); + registry[start].remove(surface); start++; } } - std::vector GetSurfacesInRegionInner(const GPUVAddr gpu_addr, const std::size_t size) { + std::vector GetSurfacesInRegion(const CacheAddr cache_addr, const std::size_t size) { if (size == 0) { return {}; } - const GPUVAddr gpu_addr_end = gpu_addr + size; - GPUVAddr start = gpu_addr >> inner_cache_page_bits; - const GPUVAddr end = (gpu_addr_end - 1) >> inner_cache_page_bits; + const CacheAddr cache_addr_end = cache_addr + size; + CacheAddr start = cache_addr >> registry_page_bits; + const CacheAddr end = (cache_addr_end - 1) >> registry_page_bits; std::vector surfaces; while (start <= end) { - std::list& list = inner_cache[start]; + std::list& list = registry[start]; for (auto& s : list) { - if (!s->IsPicked() && s->Overlaps(gpu_addr, gpu_addr_end)) { + if (!s->IsPicked() && s->Overlaps(cache_addr, cache_addr_end)) { s->MarkAsPicked(true); surfaces.push_back(s); } @@ -510,11 +494,6 @@ private: return {}; } - IntervalType GetInterval(const void* host_ptr, const std::size_t size) const { - const CacheAddr addr = ToCacheAddr(host_ptr); - return IntervalType::right_open(addr, addr + size); - } - struct RenderInfo { RenderTargetConfig config; TSurface target; @@ -531,11 +510,12 @@ private: u64 ticks{}; - IntervalMap registered_surfaces; - - static constexpr u64 inner_cache_page_bits{20}; - static constexpr u64 inner_cache_page_size{1 << inner_cache_page_bits}; - std::unordered_map> inner_cache; + // The internal Cache is different for the Texture Cache. It's based on buckets + // of 1MB. This fits better for the purpose of this cache as textures are normaly + // large in size. + static constexpr u64 registry_page_bits{20}; + static constexpr u64 registry_page_size{1 << registry_page_bits}; + std::unordered_map> registry; /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have /// previously been used. This is to prevent surfaces from being constantly created and From 1af4414861fda5cad2549372e65ecda090caf2f8 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 7 May 2019 19:09:34 -0400 Subject: [PATCH 017/113] Correct Mipmaps View method in Texture Cache --- .../renderer_opengl/gl_texture_cache.cpp | 44 ++++++++++--------- .../renderer_opengl/gl_texture_cache.h | 12 ++--- src/video_core/texture_cache/surface_base.h | 5 +-- 3 files changed, 29 insertions(+), 32 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 871608f6d..575608266 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -233,7 +233,8 @@ CachedSurface::CachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& param main.num_layers = params.is_layered ? params.depth : 1; main.target = params.target; main_view = CreateView(main); - main_view->DecorateViewName(gpu_addr, params.TargetName() + "V:" + std::to_string(view_count++)); + main_view->DecorateViewName(gpu_addr, + params.TargetName() + "V:" + std::to_string(view_count++)); } CachedSurface::~CachedSurface() { @@ -350,7 +351,7 @@ void CachedSurface::DecorateSurfaceName() { } void CachedSurfaceView::DecorateViewName(GPUVAddr gpu_addr, std::string prefix) { - LabelGLObject(GL_TEXTURE, texture_view.texture.handle, gpu_addr, prefix); + LabelGLObject(GL_TEXTURE, texture_view.handle, gpu_addr, prefix); } View CachedSurface::CreateView(const ViewParams& view_key) { @@ -364,6 +365,7 @@ CachedSurfaceView::CachedSurfaceView(CachedSurface& surface, const ViewParams& p : VideoCommon::ViewBase(params), surface{surface} { target = GetTextureTarget(params.target); texture_view = CreateTextureView(); + swizzle = EncodeSwizzle(SwizzleSource::R, SwizzleSource::G, SwizzleSource::B, SwizzleSource::A); } CachedSurfaceView::~CachedSurfaceView() = default; @@ -371,20 +373,24 @@ CachedSurfaceView::~CachedSurfaceView() = default; void CachedSurfaceView::Attach(GLenum attachment) const { ASSERT(params.num_layers == 1 && params.num_levels == 1); - switch (params.target) { + const auto& owner_params = surface.GetSurfaceParams(); + + switch (owner_params.target) { case SurfaceTarget::Texture1D: - glFramebufferTexture1D(GL_DRAW_FRAMEBUFFER, attachment, target, - surface.GetTexture(), params.base_level); + glFramebufferTexture1D(GL_DRAW_FRAMEBUFFER, attachment, surface.GetTarget(), + surface.GetTexture(), + params.base_level); break; case SurfaceTarget::Texture2D: - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, attachment, target, - surface.GetTexture(), params.base_level); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, attachment, surface.GetTarget(), + surface.GetTexture(), + params.base_level); break; case SurfaceTarget::Texture1DArray: case SurfaceTarget::Texture2DArray: case SurfaceTarget::TextureCubemap: case SurfaceTarget::TextureCubeArray: - glFramebufferTextureLayer(GL_DRAW_FRAMEBUFFER, attachment, target, + glFramebufferTextureLayer(GL_DRAW_FRAMEBUFFER, attachment, surface.GetTexture(), params.base_level, params.base_layer); break; default: @@ -394,22 +400,22 @@ void CachedSurfaceView::Attach(GLenum attachment) const { void CachedSurfaceView::ApplySwizzle(SwizzleSource x_source, SwizzleSource y_source, SwizzleSource z_source, SwizzleSource w_source) { - u32 swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source); - if (swizzle == texture_view.swizzle) + u32 new_swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source); + if (new_swizzle == swizzle) return; + swizzle = new_swizzle; const std::array gl_swizzle = {GetSwizzleSource(x_source), GetSwizzleSource(y_source), GetSwizzleSource(z_source), GetSwizzleSource(w_source)}; - glTextureParameteriv(texture_view.texture.handle, GL_TEXTURE_SWIZZLE_RGBA, gl_swizzle.data()); - texture_view.swizzle = swizzle; + glTextureParameteriv(texture_view.handle, GL_TEXTURE_SWIZZLE_RGBA, gl_swizzle.data()); } -CachedSurfaceView::TextureView CachedSurfaceView::CreateTextureView() const { +OGLTextureView CachedSurfaceView::CreateTextureView() const { const auto& owner_params = surface.GetSurfaceParams(); - TextureView texture_view; - texture_view.texture.Create(); + OGLTextureView tv; + tv.Create(); - const GLuint handle{texture_view.texture.handle}; + const GLuint handle{tv.handle}; const FormatTuple& tuple{ GetFormatTuple(owner_params.pixel_format, owner_params.component_type)}; @@ -418,11 +424,7 @@ CachedSurfaceView::TextureView CachedSurfaceView::CreateTextureView() const { ApplyTextureDefaults(owner_params, handle); - u32 swizzle = - EncodeSwizzle(SwizzleSource::R, SwizzleSource::G, SwizzleSource::B, SwizzleSource::A); - texture_view.swizzle = swizzle; - - return texture_view; + return tv; } TextureCacheOpenGL::TextureCacheOpenGL(Core::System& system, diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 1722c1bbc..083b5406b 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -74,7 +74,7 @@ public: void Attach(GLenum attachment) const; GLuint GetTexture() { - return texture_view.texture.handle; + return texture_view.handle; } const SurfaceParams& GetSurfaceParams() const { @@ -104,11 +104,6 @@ public: void DecorateViewName(GPUVAddr gpu_addr, std::string prefix); private: - struct TextureView { - OGLTextureView texture; - u32 swizzle; - }; - u32 EncodeSwizzle(Tegra::Texture::SwizzleSource x_source, Tegra::Texture::SwizzleSource y_source, Tegra::Texture::SwizzleSource z_source, @@ -117,12 +112,13 @@ private: (static_cast(z_source) << 8) | static_cast(w_source); } - TextureView CreateTextureView() const; + OGLTextureView CreateTextureView() const; CachedSurface& surface; GLenum target{}; - TextureView texture_view; + OGLTextureView texture_view; + u32 swizzle; }; class TextureCacheOpenGL final : public TextureCacheBase { diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index 74be3237d..486585c9c 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -282,8 +282,7 @@ public: return {}; } const std::size_t size = view_params.GetGuestSizeInBytes(); - const GPUVAddr relative_address = view_addr - gpu_addr; - auto layer_mipmap = GetLayerMipmap(relative_address); + auto layer_mipmap = GetLayerMipmap(view_addr); if (!layer_mipmap) { return {}; } @@ -298,7 +297,7 @@ public: vp.num_layers = 1; vp.base_level = mipmap; vp.num_levels = 1; - vp.target = params.target; + vp.target = view_params.target; return {GetView(vp)}; } From 03d10ea3b420c923c14a11c86b47e2f00bc30e00 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 7 May 2019 21:28:31 -0300 Subject: [PATCH 018/113] copy_params: Use constructor instead of C-like initialization --- src/video_core/texture_cache/copy_params.h | 10 ++++ src/video_core/texture_cache/surface_base.h | 53 ++++++++------------ src/video_core/texture_cache/texture_cache.h | 23 ++++----- 3 files changed, 39 insertions(+), 47 deletions(-) diff --git a/src/video_core/texture_cache/copy_params.h b/src/video_core/texture_cache/copy_params.h index 75c2b1f05..8cf010142 100644 --- a/src/video_core/texture_cache/copy_params.h +++ b/src/video_core/texture_cache/copy_params.h @@ -9,6 +9,16 @@ namespace VideoCommon { struct CopyParams { + CopyParams(u32 source_x, u32 source_y, u32 source_z, u32 dest_x, u32 dest_y, u32 dest_z, + u32 source_level, u32 dest_level, u32 width, u32 height, u32 depth) + : source_x{source_x}, source_y{source_y}, source_z{source_z}, dest_x{dest_x}, + dest_y{dest_y}, dest_z{dest_z}, source_level{source_level}, + dest_level{dest_level}, width{width}, height{height}, depth{depth} {} + + CopyParams(u32 width, u32 height, u32 depth, u32 level) + : source_x{}, source_y{}, source_z{}, dest_x{}, dest_y{}, dest_z{}, source_level{level}, + dest_level{level}, width{width}, height{height}, depth{depth} {} + u32 source_x; u32 source_y; u32 source_z; diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index 486585c9c..029cfb055 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -149,45 +149,32 @@ public: } std::vector BreakDown(const SurfaceParams& in_params) const { - auto set_up_copy = [](CopyParams& cp, const u32 width, const u32 height, const u32 depth, - const u32 level) { - cp.source_x = 0; - cp.source_y = 0; - cp.source_z = 0; - cp.dest_x = 0; - cp.dest_y = 0; - cp.dest_z = 0; - cp.source_level = level; - cp.dest_level = level; - cp.width = width; - cp.height = height; - cp.depth = depth; - }; - const u32 layers = params.depth; - const u32 mipmaps = params.num_levels; + std::vector result; + const u32 layers{params.depth}; + const u32 mipmaps{params.num_levels}; + if (params.is_layered) { - std::vector result{layers * mipmaps}; - for (std::size_t layer = 0; layer < layers; layer++) { - const u32 layer_offset = layer * mipmaps; - for (std::size_t level = 0; level < mipmaps; level++) { - CopyParams& cp = result[layer_offset + level]; - const u32 width = - std::min(params.GetMipWidth(level), in_params.GetMipWidth(level)); - const u32 height = - std::min(params.GetMipHeight(level), in_params.GetMipHeight(level)); - set_up_copy(cp, width, height, layer, level); + result.reserve(static_cast(layers) * static_cast(mipmaps)); + for (u32 layer = 0; layer < layers; layer++) { + const u32 layer_offset{layer * mipmaps}; + for (u32 level = 0; level < mipmaps; level++) { + const u32 width{ + std::min(params.GetMipWidth(level), in_params.GetMipWidth(level))}; + const u32 height{ + std::min(params.GetMipHeight(level), in_params.GetMipHeight(level))}; + result.emplace_back(width, height, layer, level); } } return result; + } else { - std::vector result{mipmaps}; + result.reserve(mipmaps); for (std::size_t level = 0; level < mipmaps; level++) { - CopyParams& cp = result[level]; - const u32 width = std::min(params.GetMipWidth(level), in_params.GetMipWidth(level)); - const u32 height = - std::min(params.GetMipHeight(level), in_params.GetMipHeight(level)); - const u32 depth = std::min(params.GetMipDepth(level), in_params.GetMipDepth(level)); - set_up_copy(cp, width, height, depth, level); + const u32 width{std::min(params.GetMipWidth(level), in_params.GetMipWidth(level))}; + const u32 height{ + std::min(params.GetMipHeight(level), in_params.GetMipHeight(level))}; + const u32 depth{std::min(params.GetMipDepth(level), in_params.GetMipDepth(level))}; + result.emplace_back(width, height, depth, level); } return result; } diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 43aaec011..c9a648bbd 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -283,7 +283,7 @@ private: } std::pair RebuildSurface(TSurface current_surface, - const SurfaceParams& params) { + const SurfaceParams& params) { const auto gpu_addr = current_surface->GetGpuAddr(); TSurface new_surface = GetUncachedSurface(gpu_addr, params); std::vector bricks = current_surface->BreakDown(params); @@ -323,26 +323,21 @@ private: return {}; } const std::size_t candidate_size = src_params.GetGuestSizeInBytes(); - auto mipmap_layer = new_surface->GetLayerMipmap(surface->GetGpuAddr()); + auto mipmap_layer{new_surface->GetLayerMipmap(surface->GetGpuAddr())}; if (!mipmap_layer) { return {}; } - const u32 layer = (*mipmap_layer).first; - const u32 mipmap = (*mipmap_layer).second; + const u32 layer{mipmap_layer->first}; + const u32 mipmap{mipmap_layer->second}; if (new_surface->GetMipmapSize(mipmap) != candidate_size) { return {}; } // Now we got all the data set up - CopyParams copy_params{}; - const u32 dst_width = params.GetMipWidth(mipmap); - const u32 dst_height = params.GetMipHeight(mipmap); - copy_params.width = std::min(src_params.width, dst_width); - copy_params.height = std::min(src_params.height, dst_height); - copy_params.depth = 1; - copy_params.source_level = 0; - copy_params.dest_level = mipmap; - copy_params.source_z = 0; - copy_params.dest_z = layer; + const u32 dst_width{params.GetMipWidth(mipmap)}; + const u32 dst_height{params.GetMipHeight(mipmap)}; + const CopyParams copy_params(0, 0, 0, 0, 0, layer, 0, mipmap, + std::min(src_params.width, dst_width), + std::min(src_params.height, dst_height), 1); ImageCopy(surface, new_surface, copy_params); } for (auto surface : overlaps) { From 2b30000a1ed1972e0701a8525182104b4544caa4 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 7 May 2019 21:48:02 -0300 Subject: [PATCH 019/113] surface_base: Silence truncation warnings and minor renames and reordering --- src/video_core/texture_cache/surface_base.cpp | 34 +++++++++--------- src/video_core/texture_cache/surface_base.h | 35 ++++++++++--------- 2 files changed, 37 insertions(+), 32 deletions(-) diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp index 0de0bc656..5e994cf08 100644 --- a/src/video_core/texture_cache/surface_base.cpp +++ b/src/video_core/texture_cache/surface_base.cpp @@ -18,17 +18,19 @@ MICROPROFILE_DEFINE(GPU_Flush_Texture, "GPU", "Texture Flush", MP_RGB(128, 192, using Tegra::Texture::ConvertFromGuestToHost; using VideoCore::MortonSwizzleMode; -SurfaceBaseImpl::SurfaceBaseImpl(const GPUVAddr gpu_vaddr, const SurfaceParams& params) - : gpu_addr{gpu_vaddr}, params{params}, mipmap_sizes{params.num_levels}, - mipmap_offsets{params.num_levels}, layer_size{params.GetGuestLayerSize()}, - memory_size{params.GetGuestSizeInBytes()}, host_memory_size{params.GetHostSizeInBytes()} { - u32 offset = 0; - mipmap_offsets.resize(params.num_levels); - mipmap_sizes.resize(params.num_levels); - for (u32 i = 0; i < params.num_levels; i++) { - mipmap_offsets[i] = offset; - mipmap_sizes[i] = params.GetGuestMipmapSize(i); - offset += mipmap_sizes[i]; +SurfaceBaseImpl::SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params) + : params{params}, gpu_addr{gpu_addr}, layer_size{params.GetGuestLayerSize()}, + guest_memory_size{params.GetGuestSizeInBytes()}, host_memory_size{ + params.GetHostSizeInBytes()} { + mipmap_offsets.reserve(params.num_levels); + mipmap_sizes.reserve(params.num_levels); + + std::size_t offset = 0; + for (u32 level = 0; level < params.num_levels; ++level) { + const std::size_t mipmap_size{params.GetGuestMipmapSize(level)}; + mipmap_sizes.push_back(mipmap_size); + mipmap_offsets.push_back(offset); + offset += mipmap_size; } } @@ -44,7 +46,7 @@ void SurfaceBaseImpl::SwizzleFunc(MortonSwizzleMode mode, u8* memory, const Surf std::size_t host_offset{0}; const std::size_t guest_stride = layer_size; const std::size_t host_stride = params.GetHostLayerSize(level); - for (u32 layer = 0; layer < params.depth; layer++) { + for (u32 layer = 0; layer < params.depth; ++layer) { MortonSwizzle(mode, params.pixel_format, width, block_height, height, block_depth, 1, params.tile_width_spacing, buffer + host_offset, memory + guest_offset); guest_offset += guest_stride; @@ -60,12 +62,12 @@ void SurfaceBaseImpl::SwizzleFunc(MortonSwizzleMode mode, u8* memory, const Surf void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager, std::vector& staging_buffer) { MICROPROFILE_SCOPE(GPU_Load_Texture); - auto host_ptr = memory_manager.GetPointer(gpu_addr); + const auto host_ptr{memory_manager.GetPointer(gpu_addr)}; if (params.is_tiled) { ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture target {}", params.block_width, static_cast(params.target)); for (u32 level = 0; level < params.num_levels; ++level) { - const u32 host_offset = params.GetHostMipmapLevelOffset(level); + const std::size_t host_offset{params.GetHostMipmapLevelOffset(level)}; SwizzleFunc(MortonSwizzleMode::MortonToLinear, host_ptr, params, staging_buffer.data() + host_offset, level); } @@ -91,7 +93,7 @@ void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager, } for (u32 level = 0; level < params.num_levels; ++level) { - const u32 host_offset = params.GetHostMipmapLevelOffset(level); + const std::size_t host_offset{params.GetHostMipmapLevelOffset(level)}; ConvertFromGuestToHost(staging_buffer.data() + host_offset, params.pixel_format, params.GetMipWidth(level), params.GetMipHeight(level), params.GetMipDepth(level), true, true); @@ -105,7 +107,7 @@ void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager, if (params.is_tiled) { ASSERT_MSG(params.block_width == 1, "Block width is defined as {}", params.block_width); for (u32 level = 0; level < params.num_levels; ++level) { - const u32 host_offset = params.GetHostMipmapLevelOffset(level); + const std::size_t host_offset{params.GetHostMipmapLevelOffset(level)}; SwizzleFunc(MortonSwizzleMode::LinearToMorton, host_ptr, params, staging_buffer.data() + host_offset, level); } diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index 029cfb055..7cc122158 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -78,7 +78,7 @@ public: void SetCacheAddr(const CacheAddr new_addr) { cache_addr = new_addr; - cache_addr_end = new_addr + memory_size; + cache_addr_end = new_addr + guest_memory_size; } const SurfaceParams& GetSurfaceParams() const { @@ -86,7 +86,7 @@ public: } std::size_t GetSizeInBytes() const { - return memory_size; + return guest_memory_size; } std::size_t GetHostSizeInBytes() const { @@ -135,17 +135,19 @@ public: } std::optional> GetLayerMipmap(const GPUVAddr candidate_gpu_addr) const { - if (candidate_gpu_addr < gpu_addr) + if (candidate_gpu_addr < gpu_addr) { return {}; - const GPUVAddr relative_address = candidate_gpu_addr - gpu_addr; - const u32 layer = relative_address / layer_size; + } + const auto relative_address{static_cast(candidate_gpu_addr - gpu_addr)}; + const auto layer{static_cast(relative_address / layer_size)}; const GPUVAddr mipmap_address = relative_address - layer_size * layer; const auto mipmap_it = binary_find(mipmap_offsets.begin(), mipmap_offsets.end(), mipmap_address); - if (mipmap_it != mipmap_offsets.end()) { - return {{layer, std::distance(mipmap_offsets.begin(), mipmap_it)}}; + if (mipmap_it == mipmap_offsets.end()) { + return {}; } - return {}; + const auto level{static_cast(std::distance(mipmap_offsets.begin(), mipmap_it))}; + return std::make_pair(layer, level); } std::vector BreakDown(const SurfaceParams& in_params) const { @@ -169,7 +171,7 @@ public: } else { result.reserve(mipmaps); - for (std::size_t level = 0; level < mipmaps; level++) { + for (u32 level = 0; level < mipmaps; level++) { const u32 width{std::min(params.GetMipWidth(level), in_params.GetMipWidth(level))}; const u32 height{ std::min(params.GetMipHeight(level), in_params.GetMipHeight(level))}; @@ -181,21 +183,22 @@ public: } protected: - explicit SurfaceBaseImpl(const GPUVAddr gpu_vaddr, const SurfaceParams& params); + explicit SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params); ~SurfaceBaseImpl() = default; virtual void DecorateSurfaceName() = 0; const SurfaceParams params; - GPUVAddr gpu_addr{}; - std::vector mipmap_sizes; - std::vector mipmap_offsets; const std::size_t layer_size; - const std::size_t memory_size; + const std::size_t guest_memory_size; const std::size_t host_memory_size; - CacheAddr cache_addr; + GPUVAddr gpu_addr{}; + CacheAddr cache_addr{}; CacheAddr cache_addr_end{}; - VAddr cpu_addr; + VAddr cpu_addr{}; + + std::vector mipmap_sizes; + std::vector mipmap_offsets; private: void SwizzleFunc(MortonSwizzleMode mode, u8* memory, const SurfaceParams& params, u8* buffer, From 16e8625a301b1f43ecebe459a40bf33f89322032 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 7 May 2019 21:55:55 -0300 Subject: [PATCH 020/113] surface_base: Split BreakDown into layered and non-layered variants --- src/video_core/texture_cache/surface_base.h | 93 +++++++++++---------- 1 file changed, 48 insertions(+), 45 deletions(-) diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index 7cc122158..0cfb835d9 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -106,32 +106,32 @@ public: } bool MatchesTopology(const SurfaceParams& rhs) const { - const u32 src_bpp = params.GetBytesPerPixel(); - const u32 dst_bpp = rhs.GetBytesPerPixel(); + const u32 src_bpp{params.GetBytesPerPixel()}; + const u32 dst_bpp{rhs.GetBytesPerPixel()}; return std::tie(src_bpp, params.is_tiled) == std::tie(dst_bpp, rhs.is_tiled); } MatchStructureResult MatchesStructure(const SurfaceParams& rhs) const { - if (params.is_tiled) { - if (std::tie(params.height, params.depth, params.block_width, params.block_height, - params.block_depth, params.tile_width_spacing) == - std::tie(rhs.height, rhs.depth, rhs.block_width, rhs.block_height, rhs.block_depth, - rhs.tile_width_spacing)) { - if (params.width == rhs.width) { - return MatchStructureResult::FullMatch; - } - if (params.GetBlockAlignedWidth() == rhs.GetBlockAlignedWidth()) { - return MatchStructureResult::SemiMatch; - } - } - return MatchStructureResult::None; - } else { + if (!params.is_tiled) { if (std::tie(params.width, params.height, params.pitch) == std::tie(rhs.width, rhs.height, rhs.pitch)) { return MatchStructureResult::FullMatch; } return MatchStructureResult::None; } + // Tiled surface + if (std::tie(params.height, params.depth, params.block_width, params.block_height, + params.block_depth, params.tile_width_spacing) == + std::tie(rhs.height, rhs.depth, rhs.block_width, rhs.block_height, rhs.block_depth, + rhs.tile_width_spacing)) { + if (params.width == rhs.width) { + return MatchStructureResult::FullMatch; + } + if (params.GetBlockAlignedWidth() == rhs.GetBlockAlignedWidth()) { + return MatchStructureResult::SemiMatch; + } + } + return MatchStructureResult::None; } std::optional> GetLayerMipmap(const GPUVAddr candidate_gpu_addr) const { @@ -151,35 +151,7 @@ public: } std::vector BreakDown(const SurfaceParams& in_params) const { - std::vector result; - const u32 layers{params.depth}; - const u32 mipmaps{params.num_levels}; - - if (params.is_layered) { - result.reserve(static_cast(layers) * static_cast(mipmaps)); - for (u32 layer = 0; layer < layers; layer++) { - const u32 layer_offset{layer * mipmaps}; - for (u32 level = 0; level < mipmaps; level++) { - const u32 width{ - std::min(params.GetMipWidth(level), in_params.GetMipWidth(level))}; - const u32 height{ - std::min(params.GetMipHeight(level), in_params.GetMipHeight(level))}; - result.emplace_back(width, height, layer, level); - } - } - return result; - - } else { - result.reserve(mipmaps); - for (u32 level = 0; level < mipmaps; level++) { - const u32 width{std::min(params.GetMipWidth(level), in_params.GetMipWidth(level))}; - const u32 height{ - std::min(params.GetMipHeight(level), in_params.GetMipHeight(level))}; - const u32 depth{std::min(params.GetMipDepth(level), in_params.GetMipDepth(level))}; - result.emplace_back(width, height, depth, level); - } - return result; - } + return params.is_layered ? BreakDownLayered(in_params) : BreakDownNonLayered(in_params); } protected: @@ -203,6 +175,37 @@ protected: private: void SwizzleFunc(MortonSwizzleMode mode, u8* memory, const SurfaceParams& params, u8* buffer, u32 level); + + std::vector BreakDownLayered(const SurfaceParams& in_params) const { + const u32 layers{params.depth}; + const u32 mipmaps{params.num_levels}; + std::vector result; + result.reserve(static_cast(layers) * static_cast(mipmaps)); + + for (u32 layer = 0; layer < layers; layer++) { + for (u32 level = 0; level < mipmaps; level++) { + const u32 width{std::min(params.GetMipWidth(level), in_params.GetMipWidth(level))}; + const u32 height{ + std::min(params.GetMipHeight(level), in_params.GetMipHeight(level))}; + result.emplace_back(width, height, layer, level); + } + } + return result; + } + + std::vector BreakDownNonLayered(const SurfaceParams& in_params) const { + const u32 mipmaps{params.num_levels}; + std::vector result; + result.reserve(mipmaps); + + for (u32 level = 0; level < mipmaps; level++) { + const u32 width{std::min(params.GetMipWidth(level), in_params.GetMipWidth(level))}; + const u32 height{std::min(params.GetMipHeight(level), in_params.GetMipHeight(level))}; + const u32 depth{std::min(params.GetMipDepth(level), in_params.GetMipDepth(level))}; + result.emplace_back(width, height, depth, level); + } + return result; + } }; template From 549fd18ac44c6bcefdf6584484d775f0129e3fe3 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 7 May 2019 22:03:33 -0300 Subject: [PATCH 021/113] surface_view: Add constructor for ViewParams --- .../renderer_opengl/gl_texture_cache.cpp | 15 ++----- src/video_core/texture_cache/surface_base.h | 39 ++++++------------- src/video_core/texture_cache/surface_view.h | 8 +++- 3 files changed, 23 insertions(+), 39 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 575608266..c6990ad21 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -226,13 +226,8 @@ CachedSurface::CachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& param target = GetTextureTarget(params.target); texture = CreateTexture(params, target, internal_format); DecorateSurfaceName(); - ViewParams main{}; - main.num_levels = params.num_levels; - main.base_level = 0; - main.base_layer = 0; - main.num_layers = params.is_layered ? params.depth : 1; - main.target = params.target; - main_view = CreateView(main); + main_view = CreateView( + ViewParams(params.target, 0, params.is_layered ? params.depth : 1, 0, params.num_levels)); main_view->DecorateViewName(gpu_addr, params.TargetName() + "V:" + std::to_string(view_count++)); } @@ -378,13 +373,11 @@ void CachedSurfaceView::Attach(GLenum attachment) const { switch (owner_params.target) { case SurfaceTarget::Texture1D: glFramebufferTexture1D(GL_DRAW_FRAMEBUFFER, attachment, surface.GetTarget(), - surface.GetTexture(), - params.base_level); + surface.GetTexture(), params.base_level); break; case SurfaceTarget::Texture2D: glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, attachment, surface.GetTarget(), - surface.GetTexture(), - params.base_level); + surface.GetTexture(), params.base_level); break; case SurfaceTarget::Texture1DArray: case SurfaceTarget::Texture2DArray: diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index 0cfb835d9..f469ab498 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -253,45 +253,30 @@ public: } TView EmplaceOverview(const SurfaceParams& overview_params) { - ViewParams vp{}; - vp.base_level = 0; - vp.num_levels = params.num_levels; - vp.target = overview_params.target; - if (params.is_layered && !overview_params.is_layered) { - vp.base_layer = 0; - vp.num_layers = 1; - } else { - vp.base_layer = 0; - vp.num_layers = params.depth; - } - return GetView(vp); + const u32 num_layers{params.is_layered && !overview_params.is_layered ? 1 : params.depth}; + const ViewParams view_params(overview_params.target, 0, num_layers, 0, params.num_levels); + return GetView(view_params); } std::optional EmplaceView(const SurfaceParams& view_params, const GPUVAddr view_addr) { - if (view_addr < gpu_addr) - return {}; - if (params.target == SurfaceTarget::Texture3D || + if (view_addr < gpu_addr || params.target == SurfaceTarget::Texture3D || view_params.target == SurfaceTarget::Texture3D) { return {}; } - const std::size_t size = view_params.GetGuestSizeInBytes(); - auto layer_mipmap = GetLayerMipmap(view_addr); + const std::size_t size{view_params.GetGuestSizeInBytes()}; + const auto layer_mipmap{GetLayerMipmap(view_addr)}; if (!layer_mipmap) { return {}; } - const u32 layer = (*layer_mipmap).first; - const u32 mipmap = (*layer_mipmap).second; + const u32 layer{layer_mipmap->first}; + const u32 mipmap{layer_mipmap->second}; if (GetMipmapSize(mipmap) != size) { - // TODO: the view may cover many mimaps, this case can still go on + // TODO: The view may cover many mimaps, this case can still go on. + // This edge-case can be safely be ignored since it will just result in worse + // performance. return {}; } - ViewParams vp{}; - vp.base_layer = layer; - vp.num_layers = 1; - vp.base_level = mipmap; - vp.num_levels = 1; - vp.target = view_params.target; - return {GetView(vp)}; + return GetView(ViewParams(params.target, layer, 1, mipmap, 1)); } TView GetMainView() const { diff --git a/src/video_core/texture_cache/surface_view.h b/src/video_core/texture_cache/surface_view.h index c122800a6..1ef4509ce 100644 --- a/src/video_core/texture_cache/surface_view.h +++ b/src/video_core/texture_cache/surface_view.h @@ -13,15 +13,21 @@ namespace VideoCommon { struct ViewParams { + ViewParams(VideoCore::Surface::SurfaceTarget target, u32 base_layer, u32 num_layers, + u32 base_level, u32 num_levels) + : target{target}, base_layer{base_layer}, num_layers{num_layers}, base_level{base_level}, + num_levels{num_levels} {} + std::size_t Hash() const; bool operator==(const ViewParams& rhs) const; + VideoCore::Surface::SurfaceTarget target{}; u32 base_layer{}; u32 num_layers{}; u32 base_level{}; u32 num_levels{}; - VideoCore::Surface::SurfaceTarget target; + bool IsLayered() const { switch (target) { case VideoCore::Surface::SurfaceTarget::Texture1DArray: From 324e470879e63423844a687f7d675a0536006f07 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 7 May 2019 23:13:05 -0400 Subject: [PATCH 022/113] Texture Cache: Implement Blitting and Fermi Copies --- .../renderer_opengl/gl_rasterizer.cpp | 4 +- .../renderer_opengl/gl_rasterizer.h | 1 - .../renderer_opengl/gl_texture_cache.cpp | 70 ++++++++++++++++- .../renderer_opengl/gl_texture_cache.h | 8 ++ src/video_core/renderer_opengl/utils.cpp | 78 ------------------- src/video_core/renderer_opengl/utils.h | 13 ---- src/video_core/texture_cache/texture_cache.h | 19 ++++- 7 files changed, 93 insertions(+), 100 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 77ac963b4..d0e7b61e7 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -747,9 +747,7 @@ bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs const Common::Rectangle& src_rect, const Common::Rectangle& dst_rect) { MICROPROFILE_SCOPE(OpenGL_Blits); - const auto src_surface{texture_cache.GetFermiSurface(src)}; - const auto dst_surface{texture_cache.GetFermiSurface(dst)}; - // blitter.Blit(src_surface, dst_surface, src_rect, dst_rect); + texture_cache.DoFermiCopy(src, dst, src_rect, dst_rect); return true; } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 5c37d3bfa..d872e5110 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -208,7 +208,6 @@ private: static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; OGLBufferCache buffer_cache; - SurfaceBlitter blitter; BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER}; BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER}; diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index c6990ad21..a58e3a816 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -9,6 +9,7 @@ #include "core/core.h" #include "video_core/morton.h" #include "video_core/renderer_opengl/gl_resource_manager.h" +#include "video_core/renderer_opengl/gl_state.h" #include "video_core/renderer_opengl/gl_texture_cache.h" #include "video_core/renderer_opengl/utils.h" #include "video_core/texture_cache/texture_cache.h" @@ -23,6 +24,7 @@ using VideoCore::MortonSwizzleMode; using VideoCore::Surface::ComponentType; using VideoCore::Surface::PixelFormat; using VideoCore::Surface::SurfaceTarget; +using VideoCore::Surface::SurfaceType; MICROPROFILE_DEFINE(OpenGL_Texture_Upload, "OpenGL", "Texture Upload", MP_RGB(128, 192, 128)); MICROPROFILE_DEFINE(OpenGL_Texture_Download, "OpenGL", "Texture Download", MP_RGB(128, 192, 128)); @@ -422,7 +424,10 @@ OGLTextureView CachedSurfaceView::CreateTextureView() const { TextureCacheOpenGL::TextureCacheOpenGL(Core::System& system, VideoCore::RasterizerInterface& rasterizer) - : TextureCacheBase{system, rasterizer} {} + : TextureCacheBase{system, rasterizer} { + src_framebuffer.Create(); + dst_framebuffer.Create(); +} TextureCacheOpenGL::~TextureCacheOpenGL() = default; @@ -443,4 +448,67 @@ void TextureCacheOpenGL::ImageCopy(Surface src_surface, Surface dst_surface, copy_params.depth); } +void TextureCacheOpenGL::ImageBlit(Surface src_surface, Surface dst_surface, + const Common::Rectangle& src_rect, + const Common::Rectangle& dst_rect) { + const auto& src_params{src_surface->GetSurfaceParams()}; + const auto& dst_params{dst_surface->GetSurfaceParams()}; + + OpenGLState prev_state{OpenGLState::GetCurState()}; + SCOPE_EXIT({ prev_state.Apply(); }); + + OpenGLState state; + state.draw.read_framebuffer = src_framebuffer.handle; + state.draw.draw_framebuffer = dst_framebuffer.handle; + state.ApplyFramebufferState(); + + u32 buffers{}; + + UNIMPLEMENTED_IF(src_params.target != SurfaceTarget::Texture2D); + UNIMPLEMENTED_IF(dst_params.target != SurfaceTarget::Texture2D); + + const GLuint src_texture{src_surface->GetTexture()}; + const GLuint dst_texture{dst_surface->GetTexture()}; + + if (src_params.type == SurfaceType::ColorTexture) { + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, + src_texture, 0); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, + 0); + + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, + dst_texture, 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, + 0); + + buffers = GL_COLOR_BUFFER_BIT; + } else if (src_params.type == SurfaceType::Depth) { + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, src_texture, + 0); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); + + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, dst_texture, + 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); + + buffers = GL_DEPTH_BUFFER_BIT; + } else if (src_params.type == SurfaceType::DepthStencil) { + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, + src_texture, 0); + + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, + dst_texture, 0); + + buffers = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT; + } + + glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom, dst_rect.left, + dst_rect.top, dst_rect.right, dst_rect.bottom, buffers, + buffers == GL_COLOR_BUFFER_BIT ? GL_LINEAR : GL_NEAREST); +} + } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 083b5406b..1ad01137b 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -128,8 +128,16 @@ public: protected: Surface CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) override; + void ImageCopy(Surface src_surface, Surface dst_surface, const VideoCommon::CopyParams& copy_params) override; + + void ImageBlit(Surface src_surface, Surface dst_surface, const Common::Rectangle& src_rect, + const Common::Rectangle& dst_rect) override; + +private: + OGLFramebuffer src_framebuffer; + OGLFramebuffer dst_framebuffer; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/utils.cpp b/src/video_core/renderer_opengl/utils.cpp index a9fa539a5..68c36988d 100644 --- a/src/video_core/renderer_opengl/utils.cpp +++ b/src/video_core/renderer_opengl/utils.cpp @@ -9,19 +9,10 @@ #include "common/assert.h" #include "common/common_types.h" #include "common/scope_exit.h" -#include "video_core/renderer_opengl/gl_state.h" -#include "video_core/renderer_opengl/gl_texture_cache.h" #include "video_core/renderer_opengl/utils.h" -#include "video_core/surface.h" namespace OpenGL { -using Tegra::Shader::TextureType; -using Tegra::Texture::SwizzleSource; - -using VideoCore::Surface::SurfaceTarget; -using VideoCore::Surface::SurfaceType; - BindBuffersRangePushBuffer::BindBuffersRangePushBuffer(GLenum target) : target{target} {} BindBuffersRangePushBuffer::~BindBuffersRangePushBuffer() = default; @@ -49,75 +40,6 @@ void BindBuffersRangePushBuffer::Bind() const { sizes.data()); } -SurfaceBlitter::SurfaceBlitter() { - src_framebuffer.Create(); - dst_framebuffer.Create(); -} - -SurfaceBlitter::~SurfaceBlitter() = default; - -void SurfaceBlitter::Blit(View src, View dst, const Common::Rectangle& src_rect, - const Common::Rectangle& dst_rect) const { - const auto& src_params{src->GetSurfaceParams()}; - const auto& dst_params{dst->GetSurfaceParams()}; - - OpenGLState prev_state{OpenGLState::GetCurState()}; - SCOPE_EXIT({ prev_state.Apply(); }); - - OpenGLState state; - state.draw.read_framebuffer = src_framebuffer.handle; - state.draw.draw_framebuffer = dst_framebuffer.handle; - state.ApplyFramebufferState(); - - u32 buffers{}; - - UNIMPLEMENTED_IF(src_params.target != SurfaceTarget::Texture2D); - UNIMPLEMENTED_IF(dst_params.target != SurfaceTarget::Texture2D); - - const GLuint src_texture{src->GetTexture()}; - const GLuint dst_texture{dst->GetTexture()}; - - if (src_params.type == SurfaceType::ColorTexture) { - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, - src_texture, 0); - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, - 0); - - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, - dst_texture, 0); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, - 0); - - buffers = GL_COLOR_BUFFER_BIT; - } else if (src_params.type == SurfaceType::Depth) { - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, src_texture, - 0); - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); - - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, dst_texture, - 0); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); - - buffers = GL_DEPTH_BUFFER_BIT; - } else if (src_params.type == SurfaceType::DepthStencil) { - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, - src_texture, 0); - - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, - dst_texture, 0); - - buffers = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT; - } - - glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom, dst_rect.left, - dst_rect.top, dst_rect.right, dst_rect.bottom, buffers, - buffers == GL_COLOR_BUFFER_BIT ? GL_LINEAR : GL_NEAREST); -} - void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_view extra_info) { if (!GLAD_GL_KHR_debug) { // We don't need to throw an error as this is just for debugging diff --git a/src/video_core/renderer_opengl/utils.h b/src/video_core/renderer_opengl/utils.h index 8977d2383..77e8d53ba 100644 --- a/src/video_core/renderer_opengl/utils.h +++ b/src/video_core/renderer_opengl/utils.h @@ -34,19 +34,6 @@ private: std::vector sizes; }; -class SurfaceBlitter { -public: - explicit SurfaceBlitter(); - ~SurfaceBlitter(); - - void Blit(View src, View dst, const Common::Rectangle& src_rect, - const Common::Rectangle& dst_rect) const; - -private: - OGLFramebuffer src_framebuffer; - OGLFramebuffer dst_framebuffer; -}; - void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_view extra_info = {}); } // namespace OpenGL diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index c9a648bbd..bb5a50ab9 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -15,6 +15,7 @@ #include "common/assert.h" #include "common/common_types.h" +#include "common/math_util.h" #include "core/memory.h" #include "video_core/engines/fermi_2d.h" #include "video_core/engines/maxwell_3d.h" @@ -142,10 +143,11 @@ public: } } - TView GetFermiSurface(const Tegra::Engines::Fermi2D::Regs::Surface& config) { - SurfaceParams params = SurfaceParams::CreateForFermiCopySurface(config); - const GPUVAddr gpu_addr = config.Address(); - return GetSurface(gpu_addr, params, true).second; + void DoFermiCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src_config, + const Tegra::Engines::Fermi2D::Regs::Surface& dst_config, + const Common::Rectangle& src_rect, + const Common::Rectangle& dst_rect) { + ImageBlit(GetFermiSurface(src_config), GetFermiSurface(dst_config), src_rect, dst_rect); } TSurface TryFindFramebufferSurface(const u8* host_ptr) { @@ -183,6 +185,9 @@ protected: virtual void ImageCopy(TSurface src_surface, TSurface dst_surface, const CopyParams& copy_params) = 0; + virtual void ImageBlit(TSurface src, TSurface dst, const Common::Rectangle& src_rect, + const Common::Rectangle& dst_rect) = 0; + void Register(TSurface surface) { const GPUVAddr gpu_addr = surface->GetGpuAddr(); const CacheAddr cache_ptr = ToCacheAddr(memory_manager->GetPointer(gpu_addr)); @@ -223,6 +228,12 @@ protected: return new_surface; } + TSurface GetFermiSurface(const Tegra::Engines::Fermi2D::Regs::Surface& config) { + SurfaceParams params = SurfaceParams::CreateForFermiCopySurface(config); + const GPUVAddr gpu_addr = config.Address(); + return GetSurface(gpu_addr, params, true).first; + } + Core::System& system; private: From e0002599accc783be1bda5853df377c84ee6219a Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 8 May 2019 03:51:54 -0300 Subject: [PATCH 023/113] surface_base: Add parenthesis to EmplaceOverview's predicate --- src/video_core/texture_cache/surface_base.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index f469ab498..c11998249 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -253,9 +253,8 @@ public: } TView EmplaceOverview(const SurfaceParams& overview_params) { - const u32 num_layers{params.is_layered && !overview_params.is_layered ? 1 : params.depth}; - const ViewParams view_params(overview_params.target, 0, num_layers, 0, params.num_levels); - return GetView(view_params); + const u32 num_layers{(params.is_layered && !overview_params.is_layered) ? 1 : params.depth}; + return GetView(ViewParams(overview_params.target, 0, num_layers, 0, params.num_levels)); } std::optional EmplaceView(const SurfaceParams& view_params, const GPUVAddr view_addr) { From de0b1cb2b2199bd8efff78938d385fa74652cdfb Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Wed, 8 May 2019 07:09:02 -0400 Subject: [PATCH 024/113] Fixes to mipmap's process and reconstruct process --- src/video_core/texture_cache/surface_base.h | 4 ++-- src/video_core/texture_cache/texture_cache.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index c11998249..017ee999e 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -262,20 +262,20 @@ public: view_params.target == SurfaceTarget::Texture3D) { return {}; } - const std::size_t size{view_params.GetGuestSizeInBytes()}; const auto layer_mipmap{GetLayerMipmap(view_addr)}; if (!layer_mipmap) { return {}; } const u32 layer{layer_mipmap->first}; const u32 mipmap{layer_mipmap->second}; + const std::size_t size{view_params.GetGuestSizeInBytes()}; if (GetMipmapSize(mipmap) != size) { // TODO: The view may cover many mimaps, this case can still go on. // This edge-case can be safely be ignored since it will just result in worse // performance. return {}; } - return GetView(ViewParams(params.target, layer, 1, mipmap, 1)); + return GetView(ViewParams(view_params.target, layer, 1, mipmap, 1)); } TView GetMainView() const { diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index bb5a50ab9..554b9a228 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -323,7 +323,7 @@ private: const SurfaceParams& params, const GPUVAddr gpu_addr, const u8* host_ptr) { - if (!params.is_layered || params.target == SurfaceTarget::Texture3D) { + if (params.target == SurfaceTarget::Texture3D) { return {}; } TSurface new_surface = GetUncachedSurface(gpu_addr, params); From ba677ccb5a8ae0c889751fcdd40b0c9e818ad992 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Wed, 8 May 2019 10:32:30 -0400 Subject: [PATCH 025/113] texture_cache: Implement guest flushing --- .../renderer_opengl/gl_rasterizer.cpp | 6 ++++-- src/video_core/texture_cache/surface_base.cpp | 19 +++++++++++-------- src/video_core/texture_cache/texture_cache.h | 14 ++++++++++++++ 3 files changed, 29 insertions(+), 10 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index d0e7b61e7..63ee83391 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -722,7 +722,7 @@ void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) { if (!addr || !size) { return; } - // texture_cache.FlushRegion(addr, size); + texture_cache.FlushRegion(addr, size); global_cache.FlushRegion(addr, size); } @@ -738,7 +738,9 @@ void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) { } void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { - FlushRegion(addr, size); + if (Settings::values.use_accurate_gpu_emulation) { + FlushRegion(addr, size); + } InvalidateRegion(addr, size); } diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp index 5e994cf08..dc5013240 100644 --- a/src/video_core/texture_cache/surface_base.cpp +++ b/src/video_core/texture_cache/surface_base.cpp @@ -63,6 +63,9 @@ void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager, std::vector& staging_buffer) { MICROPROFILE_SCOPE(GPU_Load_Texture); const auto host_ptr{memory_manager.GetPointer(gpu_addr)}; + if (!host_ptr) { + return; + } if (params.is_tiled) { ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture target {}", params.block_width, static_cast(params.target)); @@ -103,7 +106,10 @@ void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager, void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager, std::vector& staging_buffer) { MICROPROFILE_SCOPE(GPU_Flush_Texture); - auto host_ptr = memory_manager.GetPointer(gpu_addr); + const auto host_ptr{memory_manager.GetPointer(gpu_addr)}; + if (!host_ptr) { + return; + } if (params.is_tiled) { ASSERT_MSG(params.block_width == 1, "Block width is defined as {}", params.block_width); for (u32 level = 0; level < params.num_levels; ++level) { @@ -112,25 +118,22 @@ void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager, staging_buffer.data() + host_offset, level); } } else { - UNIMPLEMENTED(); - /* ASSERT(params.target == SurfaceTarget::Texture2D); ASSERT(params.num_levels == 1); - const u32 bpp{params.GetFormatBpp() / 8}; + const u32 bpp{params.GetBytesPerPixel()}; const u32 copy_size{params.width * bpp}; if (params.pitch == copy_size) { - std::memcpy(host_ptr, staging_buffer.data(), memory_size); + std::memcpy(host_ptr, staging_buffer.data(), guest_memory_size); } else { u8* start{host_ptr}; const u8* read_to{staging_buffer.data()}; - for (u32 h = params.GetHeight(); h > 0; --h) { + for (u32 h = params.height; h > 0; --h) { std::memcpy(start, read_to, copy_size); - start += params.GetPitch(); + start += params.pitch; read_to += copy_size; } } - */ } } diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 554b9a228..422bf3e58 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -61,6 +61,20 @@ public: } } + void FlushRegion(CacheAddr addr, std::size_t size) { + auto surfaces = GetSurfacesInRegion(addr, size); + if (surfaces.empty()) { + return; + } + std::sort(surfaces.begin(), surfaces.end(), + [](const TSurface& a, const TSurface& b) -> bool { + return a->GetModificationTick() < b->GetModificationTick(); + }); + for (const auto& surface : surfaces) { + FlushSurface(surface); + } + } + TView GetTextureSurface(const Tegra::Texture::FullTextureInfo& config, const VideoCommon::Shader::Sampler& entry) { const auto gpu_addr{config.tic.Address()}; From 4e2071b6d9b414fa0152deb5e9d55674d636afe4 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Wed, 8 May 2019 17:45:59 -0400 Subject: [PATCH 026/113] texture_cache: Correct premature texceptions Due to our current infrastructure, it is possible for a mipmap to be set on as a render target before a texception of that mipmap's superset be set afterwards. This is problematic as we rely on texture views to set up texceptions and protecting render targets targets for 3D texture rendering. One simple solution is to configure framebuffers after texture setup but this brings other problems. This solution, forces a reconfiguration of the framebuffers after such event happens. --- .../renderer_opengl/gl_rasterizer.cpp | 15 +++++++---- .../renderer_opengl/gl_rasterizer.h | 7 +++-- src/video_core/texture_cache/surface_base.h | 17 +++++++++--- src/video_core/texture_cache/texture_cache.h | 26 ++++++++++++++++--- 4 files changed, 51 insertions(+), 14 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 63ee83391..3baf1522d 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -461,15 +461,15 @@ void RasterizerOpenGL::LoadDiskResources(const std::atomic_bool& stop_loading, } std::pair RasterizerOpenGL::ConfigureFramebuffers( - OpenGLState& current_state, bool using_color_fb, bool using_depth_fb, bool preserve_contents, - std::optional single_color_target) { + OpenGLState& current_state, bool must_reconfigure, bool using_color_fb, bool using_depth_fb, + bool preserve_contents, std::optional single_color_target) { MICROPROFILE_SCOPE(OpenGL_Framebuffer); auto& gpu = system.GPU().Maxwell3D(); const auto& regs = gpu.regs; const FramebufferConfigState fb_config_state{using_color_fb, using_depth_fb, preserve_contents, single_color_target}; - if (fb_config_state == current_framebuffer_config_state && + if (!must_reconfigure && fb_config_state == current_framebuffer_config_state && gpu.dirty_flags.color_buffer.none() && !gpu.dirty_flags.zeta_buffer) { // Only skip if the previous ConfigureFramebuffers call was from the same kind (multiple or // single color targets). This is done because the guest registers may not change but the @@ -622,8 +622,9 @@ void RasterizerOpenGL::Clear() { return; } - const auto [clear_depth, clear_stencil] = ConfigureFramebuffers( - clear_state, use_color, use_depth || use_stencil, false, regs.clear_buffers.RT.Value()); + const auto [clear_depth, clear_stencil] = + ConfigureFramebuffers(clear_state, false, use_color, use_depth || use_stencil, false, + regs.clear_buffers.RT.Value()); if (regs.clear_flags.scissor) { SyncScissorTest(clear_state); } @@ -705,6 +706,10 @@ void RasterizerOpenGL::DrawArrays() { DrawParameters params = SetupDraw(); SetupShaders(params.primitive_mode); + if (texture_cache.ConsumeReconfigurationFlag()) { + ConfigureFramebuffers(state, true); + } + buffer_cache.Unmap(); shader_program_manager->ApplyTo(state); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index d872e5110..970637efa 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -101,6 +101,8 @@ private: /** * Configures the color and depth framebuffer states. + * @param must_reconfigure If true, tells the framebuffer to skip the cache and reconfigure + * again. Used by the texture cache to solve texception conflicts * @param use_color_fb If true, configure color framebuffers. * @param using_depth_fb If true, configure the depth/stencil framebuffer. * @param preserve_contents If true, tries to preserve data from a previously used framebuffer. @@ -109,8 +111,9 @@ private: * (requires using_depth_fb to be true) */ std::pair ConfigureFramebuffers( - OpenGLState& current_state, bool use_color_fb = true, bool using_depth_fb = true, - bool preserve_contents = true, std::optional single_color_target = {}); + OpenGLState& current_state, bool must_reconfigure = false, bool use_color_fb = true, + bool using_depth_fb = true, bool preserve_contents = true, + std::optional single_color_target = {}); /// Configures the current constbuffers to use for the draw command. void SetupDrawConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index 017ee999e..179e80ddb 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -55,6 +55,11 @@ public: return (cache_addr < end) && (cache_addr_end > start); } + bool IsInside(const GPUVAddr other_start, const GPUVAddr other_end) { + const GPUVAddr gpu_addr_end = gpu_addr + guest_memory_size; + return (gpu_addr <= other_start && other_end <= gpu_addr_end); + } + // Use only when recycling a surface void SetGpuAddr(const GPUVAddr new_addr) { gpu_addr = new_addr; @@ -105,6 +110,12 @@ public: return params.target == target; } + bool MatchesSubTexture(const SurfaceParams& rhs, const GPUVAddr other_gpu_addr) const { + return std::tie(gpu_addr, params.target, params.num_levels) == + std::tie(other_gpu_addr, rhs.target, rhs.num_levels) && + params.target == SurfaceTarget::Texture2D && params.num_levels == 1; + } + bool MatchesTopology(const SurfaceParams& rhs) const { const u32 src_bpp{params.GetBytesPerPixel()}; const u32 dst_bpp{rhs.GetBytesPerPixel()}; @@ -121,9 +132,9 @@ public: } // Tiled surface if (std::tie(params.height, params.depth, params.block_width, params.block_height, - params.block_depth, params.tile_width_spacing) == + params.block_depth, params.tile_width_spacing, params.num_levels) == std::tie(rhs.height, rhs.depth, rhs.block_width, rhs.block_height, rhs.block_depth, - rhs.tile_width_spacing)) { + rhs.tile_width_spacing, rhs.num_levels)) { if (params.width == rhs.width) { return MatchStructureResult::FullMatch; } @@ -259,7 +270,7 @@ public: std::optional EmplaceView(const SurfaceParams& view_params, const GPUVAddr view_addr) { if (view_addr < gpu_addr || params.target == SurfaceTarget::Texture3D || - view_params.target == SurfaceTarget::Texture3D) { + params.num_levels == 1 || view_params.target == SurfaceTarget::Texture3D) { return {}; } const auto layer_mipmap{GetLayerMipmap(view_addr)}; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 422bf3e58..96d108147 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -120,6 +120,10 @@ public: return {}; } + if (regs.color_mask[index].raw == 0) { + return {}; + } + auto surface_view = GetSurface(gpu_addr, SurfaceParams::CreateForFramebuffer(system, index), preserve_contents); if (render_targets[index].target) @@ -183,6 +187,12 @@ public: return ++ticks; } + bool ConsumeReconfigurationFlag() { + const bool result = force_reconfiguration; + force_reconfiguration = false; + return result; + } + protected: TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer) : system{system}, rasterizer{rasterizer} { @@ -219,9 +229,10 @@ protected: rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1); } - void Unregister(TSurface surface) { - if (surface->IsProtected()) + void Unregister(TSurface surface, const bool force_unregister = false) { + if (surface->IsProtected() && !force_unregister) { return; + } const GPUVAddr gpu_addr = surface->GetGpuAddr(); const CacheAddr cache_ptr = surface->GetCacheAddr(); const std::size_t size = surface->GetSizeInBytes(); @@ -365,8 +376,10 @@ private: std::min(src_params.height, dst_height), 1); ImageCopy(surface, new_surface, copy_params); } + force_reconfiguration = false; for (auto surface : overlaps) { - Unregister(surface); + force_reconfiguration |= surface->IsProtected(); + Unregister(surface, true); } Register(new_surface); return {{new_surface, new_surface->GetMainView()}}; @@ -379,6 +392,7 @@ private: const auto cache_addr{ToCacheAddr(host_ptr)}; const std::size_t candidate_size = params.GetGuestSizeInBytes(); auto overlaps{GetSurfacesInRegion(cache_addr, candidate_size)}; + if (overlaps.empty()) { return InitializeSurface(gpu_addr, params, preserve_contents); } @@ -403,7 +417,7 @@ private: return RebuildSurface(current_surface, params); } } - if (current_surface->GetSizeInBytes() <= candidate_size) { + if (!current_surface->IsInside(gpu_addr, gpu_addr + candidate_size)) { return RecycleSurface(overlaps, params, gpu_addr, host_ptr, preserve_contents, false); } @@ -530,6 +544,10 @@ private: u64 ticks{}; + // Sometimes Setup Textures can hit a surface that's on the render target, when this happens + // we force a reconfiguration of the frame buffer after setup. + bool force_reconfiguration; + // The internal Cache is different for the Texture Cache. It's based on buckets // of 1MB. This fits better for the purpose of this cache as textures are normaly // large in size. From b347543e8341ae323ea232d47df2c144fe21c739 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Wed, 8 May 2019 18:27:29 -0400 Subject: [PATCH 027/113] Reduce amount of size calculations. --- src/common/common_funcs.h | 11 ++++ .../renderer_opengl/gl_texture_cache.cpp | 1 - .../renderer_opengl/gl_texture_cache.h | 2 +- src/video_core/texture_cache/surface_base.cpp | 22 +++++--- src/video_core/texture_cache/surface_base.h | 28 ++++------- .../texture_cache/surface_params.cpp | 31 +----------- src/video_core/texture_cache/surface_params.h | 50 +++++++++++++++---- src/video_core/texture_cache/texture_cache.h | 40 +++++++-------- 8 files changed, 97 insertions(+), 88 deletions(-) diff --git a/src/common/common_funcs.h b/src/common/common_funcs.h index 8b0d34da6..00a5698f3 100644 --- a/src/common/common_funcs.h +++ b/src/common/common_funcs.h @@ -4,6 +4,7 @@ #pragma once +#include #include #if !defined(ARCHITECTURE_x86_64) @@ -60,4 +61,14 @@ constexpr u32 MakeMagic(char a, char b, char c, char d) { return a | b << 8 | c << 16 | d << 24; } +template > +ForwardIt BinaryFind(ForwardIt first, ForwardIt last, const T& value, Compare comp = {}) { + // Note: BOTH type T and the type after ForwardIt is dereferenced + // must be implicitly convertible to BOTH Type1 and Type2, used in Compare. + // This is stricter than lower_bound requirement (see above) + + first = std::lower_bound(first, last, value, comp); + return first != last && !comp(value, *first) ? first : last; +} + } // namespace Common diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index a58e3a816..32cb08963 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -240,7 +240,6 @@ CachedSurface::~CachedSurface() { } void CachedSurface::DownloadTexture(std::vector& staging_buffer) { - LOG_CRITICAL(Render_OpenGL, "Flushing"); MICROPROFILE_SCOPE(OpenGL_Texture_Download); // TODO(Rodrigo): Optimize alignment diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 1ad01137b..0a1b57014 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -133,7 +133,7 @@ protected: const VideoCommon::CopyParams& copy_params) override; void ImageBlit(Surface src_surface, Surface dst_surface, const Common::Rectangle& src_rect, - const Common::Rectangle& dst_rect) override; + const Common::Rectangle& dst_rect) override; private: OGLFramebuffer src_framebuffer; diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp index dc5013240..36ca72b4a 100644 --- a/src/video_core/texture_cache/surface_base.cpp +++ b/src/video_core/texture_cache/surface_base.cpp @@ -19,19 +19,27 @@ using Tegra::Texture::ConvertFromGuestToHost; using VideoCore::MortonSwizzleMode; SurfaceBaseImpl::SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params) - : params{params}, gpu_addr{gpu_addr}, layer_size{params.GetGuestLayerSize()}, - guest_memory_size{params.GetGuestSizeInBytes()}, host_memory_size{ - params.GetHostSizeInBytes()} { - mipmap_offsets.reserve(params.num_levels); - mipmap_sizes.reserve(params.num_levels); + : params{params}, mipmap_sizes(params.num_levels), + mipmap_offsets(params.num_levels), gpu_addr{gpu_addr}, host_memory_size{ + params.GetHostSizeInBytes()} { std::size_t offset = 0; for (u32 level = 0; level < params.num_levels; ++level) { const std::size_t mipmap_size{params.GetGuestMipmapSize(level)}; - mipmap_sizes.push_back(mipmap_size); - mipmap_offsets.push_back(offset); + mipmap_sizes[level] = mipmap_size; + mipmap_offsets[level] = offset; offset += mipmap_size; } + layer_size = offset; + if (params.is_layered) { + if (params.is_tiled) { + layer_size = + SurfaceParams::AlignLayered(layer_size, params.block_height, params.block_depth); + } + guest_memory_size = layer_size * params.depth; + } else { + guest_memory_size = layer_size; + } } void SurfaceBaseImpl::SwizzleFunc(MortonSwizzleMode mode, u8* memory, const SurfaceParams& params, diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index 179e80ddb..095deb602 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -9,6 +9,7 @@ #include #include "common/assert.h" +#include "common/common_funcs.h" #include "common/common_types.h" #include "video_core/gpu.h" #include "video_core/morton.h" @@ -16,16 +17,6 @@ #include "video_core/texture_cache/surface_params.h" #include "video_core/texture_cache/surface_view.h" -template > -ForwardIt binary_find(ForwardIt first, ForwardIt last, const T& value, Compare comp = {}) { - // Note: BOTH type T and the type after ForwardIt is dereferenced - // must be implicitly convertible to BOTH Type1 and Type2, used in Compare. - // This is stricter than lower_bound requirement (see above) - - first = std::lower_bound(first, last, value, comp); - return first != last && !comp(value, *first) ? first : last; -} - namespace Tegra { class MemoryManager; } @@ -153,7 +144,7 @@ public: const auto layer{static_cast(relative_address / layer_size)}; const GPUVAddr mipmap_address = relative_address - layer_size * layer; const auto mipmap_it = - binary_find(mipmap_offsets.begin(), mipmap_offsets.end(), mipmap_address); + Common::BinaryFind(mipmap_offsets.begin(), mipmap_offsets.end(), mipmap_address); if (mipmap_it == mipmap_offsets.end()) { return {}; } @@ -172,8 +163,8 @@ protected: virtual void DecorateSurfaceName() = 0; const SurfaceParams params; - const std::size_t layer_size; - const std::size_t guest_memory_size; + std::size_t layer_size; + std::size_t guest_memory_size; const std::size_t host_memory_size; GPUVAddr gpu_addr{}; CacheAddr cache_addr{}; @@ -268,9 +259,11 @@ public: return GetView(ViewParams(overview_params.target, 0, num_layers, 0, params.num_levels)); } - std::optional EmplaceView(const SurfaceParams& view_params, const GPUVAddr view_addr) { - if (view_addr < gpu_addr || params.target == SurfaceTarget::Texture3D || - params.num_levels == 1 || view_params.target == SurfaceTarget::Texture3D) { + std::optional EmplaceView(const SurfaceParams& view_params, const GPUVAddr view_addr, + const std::size_t candidate_size) { + if (params.target == SurfaceTarget::Texture3D || + (params.num_levels == 1 && !params.is_layered) || + view_params.target == SurfaceTarget::Texture3D) { return {}; } const auto layer_mipmap{GetLayerMipmap(view_addr)}; @@ -279,8 +272,7 @@ public: } const u32 layer{layer_mipmap->first}; const u32 mipmap{layer_mipmap->second}; - const std::size_t size{view_params.GetGuestSizeInBytes()}; - if (GetMipmapSize(mipmap) != size) { + if (GetMipmapSize(mipmap) != candidate_size) { // TODO: The view may cover many mimaps, this case can still go on. // This edge-case can be safely be ignored since it will just result in worse // performance. diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index d9052152c..b537b26e2 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp @@ -4,13 +4,12 @@ #include -#include "common/cityhash.h" #include "common/alignment.h" +#include "common/cityhash.h" #include "core/core.h" #include "video_core/engines/shader_bytecode.h" #include "video_core/surface.h" #include "video_core/texture_cache/surface_params.h" -#include "video_core/textures/decoders.h" namespace VideoCommon { @@ -169,18 +168,6 @@ SurfaceParams SurfaceParams::CreateForFermiCopySurface( return params; } -u32 SurfaceParams::GetMipWidth(u32 level) const { - return std::max(1U, width >> level); -} - -u32 SurfaceParams::GetMipHeight(u32 level) const { - return std::max(1U, height >> level); -} - -u32 SurfaceParams::GetMipDepth(u32 level) const { - return is_layered ? depth : std::max(1U, depth >> level); -} - bool SurfaceParams::IsLayered() const { switch (target) { case SurfaceTarget::Texture1DArray: @@ -275,22 +262,6 @@ std::size_t SurfaceParams::GetHostLayerSize(u32 level) const { return GetInnerMipmapMemorySize(level, true, false); } -u32 SurfaceParams::GetDefaultBlockWidth() const { - return VideoCore::Surface::GetDefaultBlockWidth(pixel_format); -} - -u32 SurfaceParams::GetDefaultBlockHeight() const { - return VideoCore::Surface::GetDefaultBlockHeight(pixel_format); -} - -u32 SurfaceParams::GetBitsPerPixel() const { - return VideoCore::Surface::GetFormatBpp(pixel_format); -} - -u32 SurfaceParams::GetBytesPerPixel() const { - return VideoCore::Surface::GetBytesPerPixel(pixel_format); -} - bool SurfaceParams::IsPixelFormatZeta() const { return pixel_format >= VideoCore::Surface::PixelFormat::MaxColorFormat && pixel_format < VideoCore::Surface::PixelFormat::MaxDepthStencilFormat; diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h index ec8efa210..e0ec1be0e 100644 --- a/src/video_core/texture_cache/surface_params.h +++ b/src/video_core/texture_cache/surface_params.h @@ -10,8 +10,9 @@ #include "common/common_types.h" #include "video_core/engines/fermi_2d.h" #include "video_core/engines/maxwell_3d.h" -#include "video_core/surface.h" #include "video_core/shader/shader_ir.h" +#include "video_core/surface.h" +#include "video_core/textures/decoders.h" namespace VideoCommon { @@ -50,10 +51,17 @@ public: std::size_t GetHostSizeInBytes() const { std::size_t host_size_in_bytes; if (IsPixelFormatASTC(pixel_format)) { + constexpr std::size_t rgb8_bpp = 4ULL; // ASTC is uncompressed in software, in emulated as RGBA8 - host_size_in_bytes = static_cast(Common::AlignUp(width, GetDefaultBlockWidth())) * - static_cast(Common::AlignUp(height, GetDefaultBlockHeight())) * - static_cast(depth) * 4ULL; + host_size_in_bytes = 0; + for (std::size_t level = 0; level < num_levels; level++) { + const std::size_t width = + Common::AlignUp(GetMipWidth(level), GetDefaultBlockWidth()); + const std::size_t height = + Common::AlignUp(GetMipHeight(level), GetDefaultBlockHeight()); + const std::size_t depth = is_layered ? depth : GetMipDepth(level); + host_size_in_bytes += width * height * depth * rgb8_bpp; + } } else { host_size_in_bytes = GetInnerMemorySize(true, false, false); } @@ -65,13 +73,19 @@ public: } /// Returns the width of a given mipmap level. - u32 GetMipWidth(u32 level) const; + u32 GetMipWidth(u32 level) const { + return std::max(1U, width >> level); + } /// Returns the height of a given mipmap level. - u32 GetMipHeight(u32 level) const; + u32 GetMipHeight(u32 level) const { + return std::max(1U, height >> level); + } /// Returns the depth of a given mipmap level. - u32 GetMipDepth(u32 level) const; + u32 GetMipDepth(u32 level) const { + return is_layered ? depth : std::max(1U, depth >> level); + } /// Returns the block height of a given mipmap level. u32 GetMipBlockHeight(u32 level) const; @@ -79,6 +93,12 @@ public: /// Returns the block depth of a given mipmap level. u32 GetMipBlockDepth(u32 level) const; + // Helper used for out of class size calculations + static std::size_t AlignLayered(const std::size_t out_size, const u32 block_height, + const u32 block_depth) { + return Common::AlignUp(out_size, Tegra::Texture::GetGOBSize() * block_height * block_depth); + } + /// Returns the offset in bytes in guest memory of a given mipmap level. std::size_t GetGuestMipmapLevelOffset(u32 level) const; @@ -98,16 +118,24 @@ public: std::size_t GetHostLayerSize(u32 level) const; /// Returns the default block width. - u32 GetDefaultBlockWidth() const; + u32 GetDefaultBlockWidth() const { + return VideoCore::Surface::GetDefaultBlockWidth(pixel_format); + } /// Returns the default block height. - u32 GetDefaultBlockHeight() const; + u32 GetDefaultBlockHeight() const { + return VideoCore::Surface::GetDefaultBlockHeight(pixel_format); + } /// Returns the bits per pixel. - u32 GetBitsPerPixel() const; + u32 GetBitsPerPixel() const { + return VideoCore::Surface::GetFormatBpp(pixel_format); + } /// Returns the bytes per pixel. - u32 GetBytesPerPixel() const; + u32 GetBytesPerPixel() const { + return VideoCore::Surface::GetBytesPerPixel(pixel_format); + } /// Returns true if the pixel format is a depth and/or stencil format. bool IsPixelFormatZeta() const; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 96d108147..fbfd1ff0b 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -120,10 +120,6 @@ public: return {}; } - if (regs.color_mask[index].raw == 0) { - return {}; - } - auto surface_view = GetSurface(gpu_addr, SurfaceParams::CreateForFramebuffer(system, index), preserve_contents); if (render_targets[index].target) @@ -165,7 +161,9 @@ public: const Tegra::Engines::Fermi2D::Regs::Surface& dst_config, const Common::Rectangle& src_rect, const Common::Rectangle& dst_rect) { - ImageBlit(GetFermiSurface(src_config), GetFermiSurface(dst_config), src_rect, dst_rect); + TSurface dst_surface = GetFermiSurface(dst_config); + ImageBlit(GetFermiSurface(src_config), dst_surface, src_rect, dst_rect); + dst_surface->MarkAsModified(true, Tick()); } TSurface TryFindFramebufferSurface(const u8* host_ptr) { @@ -270,10 +268,6 @@ private: RecycleStrategy PickStrategy(std::vector& overlaps, const SurfaceParams& params, const GPUVAddr gpu_addr, const bool untopological) { - // Untopological decision - if (untopological) { - return RecycleStrategy::Ignore; - } // 3D Textures decision if (params.block_depth > 1 || params.target == SurfaceTarget::Texture3D) { return RecycleStrategy::Flush; @@ -284,12 +278,16 @@ private: return RecycleStrategy::Flush; } } + // Untopological decision + if (untopological) { + return RecycleStrategy::Ignore; + } return RecycleStrategy::Ignore; } std::pair RecycleSurface(std::vector& overlaps, const SurfaceParams& params, const GPUVAddr gpu_addr, - const u8* host_ptr, const bool preserve_contents, + const bool preserve_contents, const bool untopological) { for (auto surface : overlaps) { Unregister(surface); @@ -328,6 +326,7 @@ private: } Unregister(current_surface); Register(new_surface); + new_surface->MarkAsModified(current_surface->IsModified(), Tick()); return {new_surface, new_surface->GetMainView()}; } @@ -351,6 +350,7 @@ private: if (params.target == SurfaceTarget::Texture3D) { return {}; } + bool modified = false; TSurface new_surface = GetUncachedSurface(gpu_addr, params); for (auto surface : overlaps) { const SurfaceParams& src_params = surface->GetSurfaceParams(); @@ -358,7 +358,7 @@ private: // We send this cases to recycle as they are more complex to handle return {}; } - const std::size_t candidate_size = src_params.GetGuestSizeInBytes(); + const std::size_t candidate_size = surface->GetSizeInBytes(); auto mipmap_layer{new_surface->GetLayerMipmap(surface->GetGpuAddr())}; if (!mipmap_layer) { return {}; @@ -368,6 +368,7 @@ private: if (new_surface->GetMipmapSize(mipmap) != candidate_size) { return {}; } + modified |= surface->IsModified(); // Now we got all the data set up const u32 dst_width{params.GetMipWidth(mipmap)}; const u32 dst_height{params.GetMipHeight(mipmap)}; @@ -381,6 +382,7 @@ private: force_reconfiguration |= surface->IsProtected(); Unregister(surface, true); } + new_surface->MarkAsModified(modified, Tick()); Register(new_surface); return {{new_surface, new_surface->GetMainView()}}; } @@ -399,8 +401,7 @@ private: for (auto surface : overlaps) { if (!surface->MatchesTopology(params)) { - return RecycleSurface(overlaps, params, gpu_addr, host_ptr, preserve_contents, - true); + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, true); } } @@ -418,27 +419,26 @@ private: } } if (!current_surface->IsInside(gpu_addr, gpu_addr + candidate_size)) { - return RecycleSurface(overlaps, params, gpu_addr, host_ptr, preserve_contents, - false); + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, false); } - std::optional view = current_surface->EmplaceView(params, gpu_addr); + std::optional view = + current_surface->EmplaceView(params, gpu_addr, candidate_size); if (view.has_value()) { const bool is_mirage = !current_surface->MatchFormat(params.pixel_format); if (is_mirage) { LOG_CRITICAL(HW_GPU, "Mirage View Unsupported"); - return RecycleSurface(overlaps, params, gpu_addr, host_ptr, preserve_contents, - false); + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, false); } return {current_surface, *view}; } - return RecycleSurface(overlaps, params, gpu_addr, host_ptr, preserve_contents, false); + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, false); } else { std::optional> view = ReconstructSurface(overlaps, params, gpu_addr, host_ptr); if (view.has_value()) { return *view; } - return RecycleSurface(overlaps, params, gpu_addr, host_ptr, preserve_contents, false); + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, false); } } From 28d7c2f5a5089051410d37a03d5a4a42e4230842 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 10 May 2019 01:10:16 -0300 Subject: [PATCH 028/113] texture_cache: Change internal cache from lists to vectors --- src/video_core/texture_cache/texture_cache.h | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index fbfd1ff0b..1c2b63dae 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -4,11 +4,11 @@ #pragma once -#include #include #include #include #include +#include #include #include @@ -172,7 +172,7 @@ public: return nullptr; } const CacheAddr page = cache_addr >> registry_page_bits; - std::list& list = registry[page]; + std::vector& list = registry[page]; for (auto& s : list) { if (s->GetCacheAddr() == cache_addr) { return s; @@ -482,7 +482,8 @@ private: CacheAddr start = surface->GetCacheAddr() >> registry_page_bits; const CacheAddr end = (surface->GetCacheAddrEnd() - 1) >> registry_page_bits; while (start <= end) { - registry[start].remove(surface); + auto& reg{registry[start]}; + reg.erase(std::find(reg.begin(), reg.end(), surface)); start++; } } @@ -496,7 +497,7 @@ private: const CacheAddr end = (cache_addr_end - 1) >> registry_page_bits; std::vector surfaces; while (start <= end) { - std::list& list = registry[start]; + std::vector& list = registry[start]; for (auto& s : list) { if (!s->IsPicked() && s->Overlaps(cache_addr, cache_addr_end)) { s->MarkAsPicked(true); @@ -553,12 +554,12 @@ private: // large in size. static constexpr u64 registry_page_bits{20}; static constexpr u64 registry_page_size{1 << registry_page_bits}; - std::unordered_map> registry; + std::unordered_map> registry; /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have /// previously been used. This is to prevent surfaces from being constantly created and /// destroyed when used with different surface parameters. - std::unordered_map> surface_reserve; + std::unordered_map> surface_reserve; std::array render_targets; DepthBufferInfo depth_buffer; From 345e73f2feb0701e3c3099d002a1c21fb524eae4 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 10 May 2019 04:17:48 -0300 Subject: [PATCH 029/113] video_core: Use un-shifted block sizes to avoid integer divisions Instead of storing all block width, height and depths in their shifted form: block_width = 1U << block_shift; Store them like they are provided by the emulated hardware (their block_shift form). This way we can avoid doing the costly Common::AlignUp operation to align texture sizes and drop CPU integer divisions with bitwise logic (defined in Common::AlignBits). --- src/common/alignment.h | 5 ++ src/video_core/engines/fermi_2d.h | 9 +-- src/video_core/engines/maxwell_dma.h | 4 +- src/video_core/texture_cache/surface_base.cpp | 3 +- .../texture_cache/surface_params.cpp | 39 ++++++------- src/video_core/texture_cache/surface_params.h | 7 ++- src/video_core/texture_cache/texture_cache.h | 3 + src/video_core/textures/decoders.cpp | 55 ++++++++++++------- src/video_core/textures/decoders.h | 4 +- src/video_core/textures/texture.h | 9 +-- 10 files changed, 78 insertions(+), 60 deletions(-) diff --git a/src/common/alignment.h b/src/common/alignment.h index d94a2291f..3379a6967 100644 --- a/src/common/alignment.h +++ b/src/common/alignment.h @@ -19,6 +19,11 @@ constexpr T AlignDown(T value, std::size_t size) { return static_cast(value - value % size); } +template +constexpr T AlignBits(T value, T align) { + return (value + ((1 << align) - 1)) >> align << align; +} + template constexpr bool Is4KBAligned(T value) { static_assert(std::is_unsigned_v, "T must be an unsigned value."); diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h index 45f59a4d9..3d28afa91 100644 --- a/src/video_core/engines/fermi_2d.h +++ b/src/video_core/engines/fermi_2d.h @@ -63,18 +63,15 @@ public: } u32 BlockWidth() const { - // The block width is stored in log2 format. - return 1 << block_width; + return block_width; } u32 BlockHeight() const { - // The block height is stored in log2 format. - return 1 << block_height; + return block_height; } u32 BlockDepth() const { - // The block depth is stored in log2 format. - return 1 << block_depth; + return block_depth; } }; static_assert(sizeof(Surface) == 0x28, "Surface has incorrect size"); diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h index e5942f671..522fa97dc 100644 --- a/src/video_core/engines/maxwell_dma.h +++ b/src/video_core/engines/maxwell_dma.h @@ -59,11 +59,11 @@ public: }; u32 BlockHeight() const { - return 1 << block_height; + return block_height; } u32 BlockDepth() const { - return 1 << block_depth; + return block_depth; } }; diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp index 36ca72b4a..510d1aef5 100644 --- a/src/video_core/texture_cache/surface_base.cpp +++ b/src/video_core/texture_cache/surface_base.cpp @@ -22,7 +22,6 @@ SurfaceBaseImpl::SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params) : params{params}, mipmap_sizes(params.num_levels), mipmap_offsets(params.num_levels), gpu_addr{gpu_addr}, host_memory_size{ params.GetHostSizeInBytes()} { - std::size_t offset = 0; for (u32 level = 0; level < params.num_levels; ++level) { const std::size_t mipmap_size{params.GetGuestMipmapSize(level)}; @@ -75,7 +74,7 @@ void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager, return; } if (params.is_tiled) { - ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture target {}", + ASSERT_MSG(params.block_width == 0, "Block width is defined as {} on texture target {}", params.block_width, static_cast(params.target)); for (u32 level = 0; level < params.num_levels; ++level) { const std::size_t host_offset{params.GetHostMipmapLevelOffset(level)}; diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index b537b26e2..3a47f404d 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp @@ -96,9 +96,9 @@ SurfaceParams SurfaceParams::CreateForDepthBuffer( SurfaceParams params; params.is_tiled = type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear; params.srgb_conversion = false; - params.block_width = 1 << std::min(block_width, 5U); - params.block_height = 1 << std::min(block_height, 5U); - params.block_depth = 1 << std::min(block_depth, 5U); + params.block_width = std::min(block_width, 5U); + params.block_height = std::min(block_height, 5U); + params.block_depth = std::min(block_depth, 5U); params.tile_width_spacing = 1; params.pixel_format = PixelFormatFromDepthFormat(format); params.component_type = ComponentTypeFromDepthFormat(format); @@ -120,9 +120,9 @@ SurfaceParams SurfaceParams::CreateForFramebuffer(Core::System& system, std::siz config.memory_layout.type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear; params.srgb_conversion = config.format == Tegra::RenderTargetFormat::BGRA8_SRGB || config.format == Tegra::RenderTargetFormat::RGBA8_SRGB; - params.block_width = 1 << config.memory_layout.block_width; - params.block_height = 1 << config.memory_layout.block_height; - params.block_depth = 1 << config.memory_layout.block_depth; + params.block_width = config.memory_layout.block_width; + params.block_height = config.memory_layout.block_height; + params.block_depth = config.memory_layout.block_depth; params.tile_width_spacing = 1; params.pixel_format = PixelFormatFromRenderTargetFormat(config.format); params.component_type = ComponentTypeFromRenderTarget(config.format); @@ -149,9 +149,9 @@ SurfaceParams SurfaceParams::CreateForFermiCopySurface( params.is_tiled = !config.linear; params.srgb_conversion = config.format == Tegra::RenderTargetFormat::BGRA8_SRGB || config.format == Tegra::RenderTargetFormat::RGBA8_SRGB; - params.block_width = params.is_tiled ? std::min(config.BlockWidth(), 32U) : 0, - params.block_height = params.is_tiled ? std::min(config.BlockHeight(), 32U) : 0, - params.block_depth = params.is_tiled ? std::min(config.BlockDepth(), 32U) : 0, + params.block_width = params.is_tiled ? std::min(config.BlockWidth(), 5U) : 0, + params.block_height = params.is_tiled ? std::min(config.BlockHeight(), 5U) : 0, + params.block_depth = params.is_tiled ? std::min(config.BlockDepth(), 5U) : 0, params.tile_width_spacing = 1; params.pixel_format = PixelFormatFromRenderTargetFormat(config.format); params.component_type = ComponentTypeFromRenderTarget(config.format); @@ -190,9 +190,9 @@ u32 SurfaceParams::GetMipBlockHeight(u32 level) const { const u32 height{GetMipHeight(level)}; const u32 default_block_height{GetDefaultBlockHeight()}; const u32 blocks_in_y{(height + default_block_height - 1) / default_block_height}; - u32 block_height = 16; - while (block_height > 1 && blocks_in_y <= block_height * 4) { - block_height >>= 1; + u32 block_height = 4; + while (block_height > 0 && blocks_in_y <= (1U << block_height) * 4) { + --block_height; } return block_height; } @@ -202,17 +202,17 @@ u32 SurfaceParams::GetMipBlockDepth(u32 level) const { return this->block_depth; } if (is_layered) { - return 1; + return 0; } const u32 depth{GetMipDepth(level)}; - u32 block_depth = 32; - while (block_depth > 1 && depth * 2 <= block_depth) { - block_depth >>= 1; + u32 block_depth = 5; + while (block_depth > 0 && depth * 2 <= (1U << block_depth)) { + --block_depth; } - if (block_depth == 32 && GetMipBlockHeight(level) >= 4) { - return 16; + if (block_depth == 5 && GetMipBlockHeight(level) >= 2) { + return 4; } return block_depth; @@ -252,7 +252,8 @@ std::size_t SurfaceParams::GetLayerSize(bool as_host_size, bool uncompressed) co size += GetInnerMipmapMemorySize(level, as_host_size, uncompressed); } if (is_tiled && is_layered) { - return Common::AlignUp(size, Tegra::Texture::GetGOBSize() * block_height * block_depth); + return Common::AlignBits(size, + Tegra::Texture::GetGOBSizeShift() + block_height + block_depth); } return size; } diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h index e0ec1be0e..7c48782c7 100644 --- a/src/video_core/texture_cache/surface_params.h +++ b/src/video_core/texture_cache/surface_params.h @@ -54,12 +54,12 @@ public: constexpr std::size_t rgb8_bpp = 4ULL; // ASTC is uncompressed in software, in emulated as RGBA8 host_size_in_bytes = 0; - for (std::size_t level = 0; level < num_levels; level++) { + for (u32 level = 0; level < num_levels; ++level) { const std::size_t width = Common::AlignUp(GetMipWidth(level), GetDefaultBlockWidth()); const std::size_t height = Common::AlignUp(GetMipHeight(level), GetDefaultBlockHeight()); - const std::size_t depth = is_layered ? depth : GetMipDepth(level); + const std::size_t depth = is_layered ? this->depth : GetMipDepth(level); host_size_in_bytes += width * height * depth * rgb8_bpp; } } else { @@ -96,7 +96,8 @@ public: // Helper used for out of class size calculations static std::size_t AlignLayered(const std::size_t out_size, const u32 block_height, const u32 block_depth) { - return Common::AlignUp(out_size, Tegra::Texture::GetGOBSize() * block_height * block_depth); + return Common::AlignBits(out_size, + Tegra::Texture::GetGOBSizeShift() + block_height + block_depth); } /// Returns the offset in bytes in guest memory of a given mipmap level. diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 1c2b63dae..f35d0c88f 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -81,6 +81,9 @@ public: if (!gpu_addr) { return {}; } + if (gpu_addr == 0x1b7ec0000) { + // __debugbreak(); + } const auto params{SurfaceParams::CreateForTexture(system, config, entry)}; return GetSurface(gpu_addr, params, true).second; } diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp index 217805386..f45fd175a 100644 --- a/src/video_core/textures/decoders.cpp +++ b/src/video_core/textures/decoders.cpp @@ -36,10 +36,16 @@ struct alignas(64) SwizzleTable { std::array, N> values{}; }; -constexpr u32 gob_size_x = 64; -constexpr u32 gob_size_y = 8; -constexpr u32 gob_size_z = 1; -constexpr u32 gob_size = gob_size_x * gob_size_y * gob_size_z; +constexpr u32 gob_size_x_shift = 6; +constexpr u32 gob_size_y_shift = 3; +constexpr u32 gob_size_z_shift = 0; +constexpr u32 gob_size_shift = gob_size_x_shift + gob_size_y_shift + gob_size_z_shift; + +constexpr u32 gob_size_x = 1U << gob_size_x_shift; +constexpr u32 gob_size_y = 1U << gob_size_y_shift; +constexpr u32 gob_size_z = 1U << gob_size_z_shift; +constexpr u32 gob_size = 1U << gob_size_shift; + constexpr u32 fast_swizzle_align = 16; constexpr auto legacy_swizzle_table = SwizzleTable(); @@ -171,14 +177,16 @@ void SwizzledData(u8* const swizzled_data, u8* const unswizzled_data, const bool void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel, u32 out_bytes_per_pixel, u8* const swizzled_data, u8* const unswizzled_data, bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing) { + const u32 block_height_size{1U << block_height}; + const u32 block_depth_size{1U << block_depth}; if (bytes_per_pixel % 3 != 0 && (width * bytes_per_pixel) % fast_swizzle_align == 0) { SwizzledData(swizzled_data, unswizzled_data, unswizzle, width, height, depth, - bytes_per_pixel, out_bytes_per_pixel, block_height, block_depth, - width_spacing); + bytes_per_pixel, out_bytes_per_pixel, block_height_size, + block_depth_size, width_spacing); } else { SwizzledData(swizzled_data, unswizzled_data, unswizzle, width, height, depth, - bytes_per_pixel, out_bytes_per_pixel, block_height, block_depth, - width_spacing); + bytes_per_pixel, out_bytes_per_pixel, block_height_size, + block_depth_size, width_spacing); } } @@ -249,16 +257,18 @@ std::vector UnswizzleTexture(u8* address, u32 tile_size_x, u32 tile_size_y, void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height) { + const u32 block_height_size{1U << block_height}; const u32 image_width_in_gobs{(swizzled_width * bytes_per_pixel + (gob_size_x - 1)) / gob_size_x}; for (u32 line = 0; line < subrect_height; ++line) { const u32 gob_address_y = - (line / (gob_size_y * block_height)) * gob_size * block_height * image_width_in_gobs + - ((line % (gob_size_y * block_height)) / gob_size_y) * gob_size; + (line / (gob_size_y * block_height_size)) * gob_size * block_height_size * + image_width_in_gobs + + ((line % (gob_size_y * block_height_size)) / gob_size_y) * gob_size; const auto& table = legacy_swizzle_table[line % gob_size_y]; for (u32 x = 0; x < subrect_width; ++x) { const u32 gob_address = - gob_address_y + (x * bytes_per_pixel / gob_size_x) * gob_size * block_height; + gob_address_y + (x * bytes_per_pixel / gob_size_x) * gob_size * block_height_size; const u32 swizzled_offset = gob_address + table[(x * bytes_per_pixel) % gob_size_x]; u8* source_line = unswizzled_data + line * source_pitch + x * bytes_per_pixel; u8* dest_addr = swizzled_data + swizzled_offset; @@ -271,14 +281,17 @@ void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width, u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height, u32 offset_x, u32 offset_y) { + const u32 block_height_size{1U << block_height}; for (u32 line = 0; line < subrect_height; ++line) { const u32 y2 = line + offset_y; - const u32 gob_address_y = (y2 / (gob_size_y * block_height)) * gob_size * block_height + - ((y2 % (gob_size_y * block_height)) / gob_size_y) * gob_size; + const u32 gob_address_y = + (y2 / (gob_size_y * block_height_size)) * gob_size * block_height_size + + ((y2 % (gob_size_y * block_height_size)) / gob_size_y) * gob_size; const auto& table = legacy_swizzle_table[y2 % gob_size_y]; for (u32 x = 0; x < subrect_width; ++x) { const u32 x2 = (x + offset_x) * bytes_per_pixel; - const u32 gob_address = gob_address_y + (x2 / gob_size_x) * gob_size * block_height; + const u32 gob_address = + gob_address_y + (x2 / gob_size_x) * gob_size * block_height_size; const u32 swizzled_offset = gob_address + table[x2 % gob_size_x]; u8* dest_line = unswizzled_data + line * dest_pitch + x * bytes_per_pixel; u8* source_addr = swizzled_data + swizzled_offset; @@ -291,16 +304,18 @@ void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 void SwizzleKepler(const u32 width, const u32 height, const u32 dst_x, const u32 dst_y, const u32 block_height, const std::size_t copy_size, const u8* source_data, u8* swizzle_data) { + const u32 block_height_size{1U << block_height}; const u32 image_width_in_gobs{(width + gob_size_x - 1) / gob_size_x}; std::size_t count = 0; for (std::size_t y = dst_y; y < height && count < copy_size; ++y) { const std::size_t gob_address_y = - (y / (gob_size_y * block_height)) * gob_size * block_height * image_width_in_gobs + - ((y % (gob_size_y * block_height)) / gob_size_y) * gob_size; + (y / (gob_size_y * block_height_size)) * gob_size * block_height_size * + image_width_in_gobs + + ((y % (gob_size_y * block_height_size)) / gob_size_y) * gob_size; const auto& table = legacy_swizzle_table[y % gob_size_y]; for (std::size_t x = dst_x; x < width && count < copy_size; ++x) { const std::size_t gob_address = - gob_address_y + (x / gob_size_x) * gob_size * block_height; + gob_address_y + (x / gob_size_x) * gob_size * block_height_size; const std::size_t swizzled_offset = gob_address + table[x % gob_size_x]; const u8* source_line = source_data + count; u8* dest_addr = swizzle_data + swizzled_offset; @@ -356,9 +371,9 @@ std::vector DecodeTexture(const std::vector& texture_data, TextureFormat std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth, u32 block_height, u32 block_depth) { if (tiled) { - const u32 aligned_width = Common::AlignUp(width * bytes_per_pixel, gob_size_x); - const u32 aligned_height = Common::AlignUp(height, gob_size_y * block_height); - const u32 aligned_depth = Common::AlignUp(depth, gob_size_z * block_depth); + const u32 aligned_width = Common::AlignBits(width * bytes_per_pixel, gob_size_x_shift); + const u32 aligned_height = Common::AlignBits(height, gob_size_y_shift + block_height); + const u32 aligned_depth = Common::AlignBits(depth, gob_size_z_shift + block_depth); return aligned_width * aligned_height * aligned_depth; } else { return width * height * depth * bytes_per_pixel; diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h index e072d8401..eaec9b5a5 100644 --- a/src/video_core/textures/decoders.h +++ b/src/video_core/textures/decoders.h @@ -12,8 +12,8 @@ namespace Tegra::Texture { // GOBSize constant. Calculated by 64 bytes in x multiplied by 8 y coords, represents // an small rect of (64/bytes_per_pixel)X8. -inline std::size_t GetGOBSize() { - return 512; +inline std::size_t GetGOBSizeShift() { + return 9; } /// Unswizzles a swizzled texture without changing its format. diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h index 219bfd559..f22b4e7c7 100644 --- a/src/video_core/textures/texture.h +++ b/src/video_core/textures/texture.h @@ -219,20 +219,17 @@ struct TICEntry { u32 BlockWidth() const { ASSERT(IsTiled()); - // The block height is stored in log2 format. - return 1 << block_width; + return block_width; } u32 BlockHeight() const { ASSERT(IsTiled()); - // The block height is stored in log2 format. - return 1 << block_height; + return block_height; } u32 BlockDepth() const { ASSERT(IsTiled()); - // The block height is stored in log2 format. - return 1 << block_depth; + return block_depth; } bool IsTiled() const { From a4a58be2d46e95df4cead2916b6efbd658a0deaa Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 10 May 2019 17:59:18 -0400 Subject: [PATCH 030/113] texture_cache: Implement L1_Inner_cache --- src/video_core/texture_cache/texture_cache.h | 43 ++++++++++++++------ 1 file changed, 30 insertions(+), 13 deletions(-) diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index f35d0c88f..ad0fbd7ce 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -395,6 +395,26 @@ private: const auto host_ptr{memory_manager->GetPointer(gpu_addr)}; const auto cache_addr{ToCacheAddr(host_ptr)}; + + if (l1_cache.count(cache_addr) > 0) { + TSurface current_surface = l1_cache[cache_addr]; + if (!current_surface->MatchesTopology(params)) { + std::vector overlaps{current_surface}; + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, true); + } + MatchStructureResult s_result = current_surface->MatchesStructure(params); + if (s_result != MatchStructureResult::None && + current_surface->GetGpuAddr() == gpu_addr && + (params.target != SurfaceTarget::Texture3D || + current_surface->MatchTarget(params.target))) { + if (s_result == MatchStructureResult::FullMatch) { + return ManageStructuralMatch(current_surface, params); + } else { + return RebuildSurface(current_surface, params); + } + } + } + const std::size_t candidate_size = params.GetGuestSizeInBytes(); auto overlaps{GetSurfacesInRegion(cache_addr, candidate_size)}; @@ -410,17 +430,6 @@ private: if (overlaps.size() == 1) { TSurface current_surface = overlaps[0]; - MatchStructureResult s_result = current_surface->MatchesStructure(params); - if (s_result != MatchStructureResult::None && - current_surface->GetGpuAddr() == gpu_addr && - (params.target != SurfaceTarget::Texture3D || - current_surface->MatchTarget(params.target))) { - if (s_result == MatchStructureResult::FullMatch) { - return ManageStructuralMatch(current_surface, params); - } else { - return RebuildSurface(current_surface, params); - } - } if (!current_surface->IsInside(gpu_addr, gpu_addr + candidate_size)) { return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, false); } @@ -473,8 +482,10 @@ private: } void RegisterInnerCache(TSurface& surface) { - CacheAddr start = surface->GetCacheAddr() >> registry_page_bits; + const CacheAddr cache_addr = surface->GetCacheAddr(); + CacheAddr start = cache_addr >> registry_page_bits; const CacheAddr end = (surface->GetCacheAddrEnd() - 1) >> registry_page_bits; + l1_cache[cache_addr] = surface; while (start <= end) { registry[start].push_back(surface); start++; @@ -482,8 +493,10 @@ private: } void UnregisterInnerCache(TSurface& surface) { - CacheAddr start = surface->GetCacheAddr() >> registry_page_bits; + const CacheAddr cache_addr = surface->GetCacheAddr(); + CacheAddr start = cache_addr >> registry_page_bits; const CacheAddr end = (surface->GetCacheAddrEnd() - 1) >> registry_page_bits; + l1_cache.erase(cache_addr); while (start <= end) { auto& reg{registry[start]}; reg.erase(std::find(reg.begin(), reg.end(), surface)); @@ -559,6 +572,10 @@ private: static constexpr u64 registry_page_size{1 << registry_page_bits}; std::unordered_map> registry; + // The L1 Cache is used for fast texture lookup before checking the overlaps + // This avoids calculating size and other stuffs. + std::unordered_map l1_cache; + /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have /// previously been used. This is to prevent surfaces from being constantly created and /// destroyed when used with different surface parameters. From 94f2be5473182789ec3f6388b43fcd708a505500 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 10 May 2019 22:12:35 -0400 Subject: [PATCH 031/113] texture_cache: Optimize GetMipBlockHeight and GetMipBlockDepth --- src/common/bit_util.h | 44 +++++++++++++++++++ .../texture_cache/surface_params.cpp | 19 +++----- 2 files changed, 50 insertions(+), 13 deletions(-) diff --git a/src/common/bit_util.h b/src/common/bit_util.h index d032df413..6f7d5a947 100644 --- a/src/common/bit_util.h +++ b/src/common/bit_util.h @@ -97,4 +97,48 @@ inline u32 CountTrailingZeroes64(u64 value) { } #endif +#ifdef _MSC_VER + +inline u32 MostSignificantBit32(const u32 value) { + unsigned long result; + _BitScanReverse(&result, value); + return static_cast(result); +} + +inline u32 MostSignificantBit64(const u64 value) { + unsigned long result; + _BitScanReverse64(&result, value); + return static_cast(result); +} + +#else + +inline u32 MostSignificantBit32(const u32 value) { + return 31U - static_cast(__builtin_clz(value)); +} + +inline u32 MostSignificantBit64(const u64 value) { + return 63U - static_cast(__builtin_clzll(value)); +} + +#endif + +inline u32 Log2Floor32(const u32 value) { + return MostSignificantBit32(value); +} + +inline u32 Log2Ceil32(const u32 value) { + const u32 log2_f = Log2Floor32(value); + return log2_f + ((value ^ (1U << log2_f)) != 0U); +} + +inline u32 Log2Floor64(const u64 value) { + return MostSignificantBit64(value); +} + +inline u32 Log2Ceil64(const u64 value) { + const u64 log2_f = static_cast(Log2Floor64(value)); + return static_cast(log2_f + ((value ^ (1ULL << log2_f)) != 0ULL)); +} + } // namespace Common diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index 3a47f404d..e7e671d8c 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp @@ -5,6 +5,7 @@ #include #include "common/alignment.h" +#include "common/bit_util.h" #include "common/cityhash.h" #include "core/core.h" #include "video_core/engines/shader_bytecode.h" @@ -190,11 +191,8 @@ u32 SurfaceParams::GetMipBlockHeight(u32 level) const { const u32 height{GetMipHeight(level)}; const u32 default_block_height{GetDefaultBlockHeight()}; const u32 blocks_in_y{(height + default_block_height - 1) / default_block_height}; - u32 block_height = 4; - while (block_height > 0 && blocks_in_y <= (1U << block_height) * 4) { - --block_height; - } - return block_height; + const u32 block_height = Common::Log2Ceil32(blocks_in_y); + return std::clamp(block_height, 3U, 8U) - 3U; } u32 SurfaceParams::GetMipBlockDepth(u32 level) const { @@ -206,15 +204,10 @@ u32 SurfaceParams::GetMipBlockDepth(u32 level) const { } const u32 depth{GetMipDepth(level)}; - u32 block_depth = 5; - while (block_depth > 0 && depth * 2 <= (1U << block_depth)) { - --block_depth; + const u32 block_depth = Common::Log2Ceil32(depth); + if (block_depth > 4) { + return 5 - (GetMipBlockHeight(level) >= 2); } - - if (block_depth == 5 && GetMipBlockHeight(level) >= 2) { - return 4; - } - return block_depth; } From 5192521dc3f752c385de356158706899f523e498 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 10 May 2019 22:26:46 -0400 Subject: [PATCH 032/113] texture_cache: Implement GPU Dirty Flags --- src/video_core/texture_cache/texture_cache.h | 37 ++++++++++++-------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index ad0fbd7ce..8aa0d6515 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -81,17 +81,22 @@ public: if (!gpu_addr) { return {}; } - if (gpu_addr == 0x1b7ec0000) { - // __debugbreak(); - } const auto params{SurfaceParams::CreateForTexture(system, config, entry)}; return GetSurface(gpu_addr, params, true).second; } TView GetDepthBufferSurface(bool preserve_contents) { - const auto& regs{system.GPU().Maxwell3D().regs}; + auto& maxwell3d = system.GPU().Maxwell3D(); + + if (!maxwell3d.dirty_flags.zeta_buffer) { + return depth_buffer.view; + } + maxwell3d.dirty_flags.zeta_buffer = false; + + const auto& regs{maxwell3d.regs}; const auto gpu_addr{regs.zeta.Address()}; if (!gpu_addr || !regs.zeta_enable) { + SetEmptyDepthBuffer(); return {}; } const auto depth_params{SurfaceParams::CreateForDepthBuffer( @@ -101,6 +106,8 @@ public: auto surface_view = GetSurface(gpu_addr, depth_params, preserve_contents); if (depth_buffer.target) depth_buffer.target->MarkAsProtected(false); + depth_buffer.target = surface_view.first; + depth_buffer.view = surface_view.second; if (depth_buffer.target) depth_buffer.target->MarkAsProtected(true); return surface_view.second; @@ -108,8 +115,13 @@ public: TView GetColorBufferSurface(std::size_t index, bool preserve_contents) { ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); + auto& maxwell3d = system.GPU().Maxwell3D(); + if (!maxwell3d.dirty_flags.color_buffer[index]) { + return render_targets[index].view; + } + maxwell3d.dirty_flags.color_buffer.reset(index); - const auto& regs{system.GPU().Maxwell3D().regs}; + const auto& regs{maxwell3d.regs}; if (index >= regs.rt_control.count || regs.rt[index].Address() == 0 || regs.rt[index].format == Tegra::RenderTargetFormat::NONE) { SetEmptyColorBuffer(index); @@ -128,6 +140,7 @@ public: if (render_targets[index].target) render_targets[index].target->MarkAsProtected(false); render_targets[index].target = surface_view.first; + render_targets[index].view = surface_view.second; if (render_targets[index].target) render_targets[index].target->MarkAsProtected(true); return surface_view.second; @@ -154,7 +167,6 @@ public: void SetEmptyColorBuffer(std::size_t index) { if (render_targets[index].target != nullptr) { render_targets[index].target->MarkAsProtected(false); - std::memset(&render_targets[index].config, sizeof(RenderTargetConfig), 0); render_targets[index].target = nullptr; render_targets[index].view = nullptr; } @@ -545,13 +557,7 @@ private: return {}; } - struct RenderInfo { - RenderTargetConfig config; - TSurface target; - TView view; - }; - - struct DepthBufferInfo { + struct FramebufferTargetInfo { TSurface target; TView view; }; @@ -580,8 +586,9 @@ private: /// previously been used. This is to prevent surfaces from being constantly created and /// destroyed when used with different surface parameters. std::unordered_map> surface_reserve; - std::array render_targets; - DepthBufferInfo depth_buffer; + std::array + render_targets; + FramebufferTargetInfo depth_buffer; std::vector staging_buffer; }; From 1bbc9debfbcbd960874e2f877604506d174f613c Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 10 May 2019 23:42:08 -0400 Subject: [PATCH 033/113] Remove Framebuffer reconfiguration and restrict rendertarget protection --- .../renderer_opengl/gl_rasterizer.cpp | 16 ++++------ .../renderer_opengl/gl_rasterizer.h | 5 ++-- src/video_core/texture_cache/surface_base.h | 15 ++++++---- src/video_core/texture_cache/texture_cache.h | 30 ++++++------------- 4 files changed, 27 insertions(+), 39 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 3baf1522d..2d6fd154a 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -461,15 +461,15 @@ void RasterizerOpenGL::LoadDiskResources(const std::atomic_bool& stop_loading, } std::pair RasterizerOpenGL::ConfigureFramebuffers( - OpenGLState& current_state, bool must_reconfigure, bool using_color_fb, bool using_depth_fb, - bool preserve_contents, std::optional single_color_target) { + OpenGLState& current_state, bool using_color_fb, bool using_depth_fb, bool preserve_contents, + std::optional single_color_target) { MICROPROFILE_SCOPE(OpenGL_Framebuffer); auto& gpu = system.GPU().Maxwell3D(); const auto& regs = gpu.regs; const FramebufferConfigState fb_config_state{using_color_fb, using_depth_fb, preserve_contents, single_color_target}; - if (!must_reconfigure && fb_config_state == current_framebuffer_config_state && + if (fb_config_state == current_framebuffer_config_state && gpu.dirty_flags.color_buffer.none() && !gpu.dirty_flags.zeta_buffer) { // Only skip if the previous ConfigureFramebuffers call was from the same kind (multiple or // single color targets). This is done because the guest registers may not change but the @@ -622,9 +622,8 @@ void RasterizerOpenGL::Clear() { return; } - const auto [clear_depth, clear_stencil] = - ConfigureFramebuffers(clear_state, false, use_color, use_depth || use_stencil, false, - regs.clear_buffers.RT.Value()); + const auto [clear_depth, clear_stencil] = ConfigureFramebuffers( + clear_state, use_color, use_depth || use_stencil, false, regs.clear_buffers.RT.Value()); if (regs.clear_flags.scissor) { SyncScissorTest(clear_state); } @@ -659,7 +658,6 @@ void RasterizerOpenGL::DrawArrays() { auto& gpu = system.GPU().Maxwell3D(); const auto& regs = gpu.regs; - ConfigureFramebuffers(state); SyncColorMask(); SyncFragmentColorClampState(); SyncMultiSampleState(); @@ -706,9 +704,7 @@ void RasterizerOpenGL::DrawArrays() { DrawParameters params = SetupDraw(); SetupShaders(params.primitive_mode); - if (texture_cache.ConsumeReconfigurationFlag()) { - ConfigureFramebuffers(state, true); - } + ConfigureFramebuffers(state); buffer_cache.Unmap(); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 970637efa..be5ac1b9f 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -111,9 +111,8 @@ private: * (requires using_depth_fb to be true) */ std::pair ConfigureFramebuffers( - OpenGLState& current_state, bool must_reconfigure = false, bool use_color_fb = true, - bool using_depth_fb = true, bool preserve_contents = true, - std::optional single_color_target = {}); + OpenGLState& current_state, bool use_color_fb = true, bool using_depth_fb = true, + bool preserve_contents = true, std::optional single_color_target = {}); /// Configures the current constbuffers to use for the draw command. void SetupDrawConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index 095deb602..78db2d665 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -218,12 +218,12 @@ public: virtual void DownloadTexture(std::vector& staging_buffer) = 0; void MarkAsModified(const bool is_modified_, const u64 tick) { - is_modified = is_modified_ || is_protected; + is_modified = is_modified_ || is_target; modification_tick = tick; } - void MarkAsProtected(const bool is_protected) { - this->is_protected = is_protected; + void MarkAsRenderTarget(const bool is_target) { + this->is_target = is_target; } void MarkAsPicked(const bool is_picked) { @@ -235,7 +235,12 @@ public: } bool IsProtected() const { - return is_protected; + // Only 3D Slices are to be protected + return is_target && params.block_depth > 0; + } + + bool IsRenderTarget() const { + return is_target; } bool IsRegistered() const { @@ -307,7 +312,7 @@ private: } bool is_modified{}; - bool is_protected{}; + bool is_target{}; bool is_registered{}; bool is_picked{}; u64 modification_tick{}; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 8aa0d6515..4ac5668c8 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -105,11 +105,11 @@ public: regs.zeta.memory_layout.block_depth, regs.zeta.memory_layout.type)}; auto surface_view = GetSurface(gpu_addr, depth_params, preserve_contents); if (depth_buffer.target) - depth_buffer.target->MarkAsProtected(false); + depth_buffer.target->MarkAsRenderTarget(false); depth_buffer.target = surface_view.first; depth_buffer.view = surface_view.second; if (depth_buffer.target) - depth_buffer.target->MarkAsProtected(true); + depth_buffer.target->MarkAsRenderTarget(true); return surface_view.second; } @@ -138,11 +138,11 @@ public: auto surface_view = GetSurface(gpu_addr, SurfaceParams::CreateForFramebuffer(system, index), preserve_contents); if (render_targets[index].target) - render_targets[index].target->MarkAsProtected(false); + render_targets[index].target->MarkAsRenderTarget(false); render_targets[index].target = surface_view.first; render_targets[index].view = surface_view.second; if (render_targets[index].target) - render_targets[index].target->MarkAsProtected(true); + render_targets[index].target->MarkAsRenderTarget(true); return surface_view.second; } @@ -158,7 +158,7 @@ public: void SetEmptyDepthBuffer() { if (depth_buffer.target != nullptr) { - depth_buffer.target->MarkAsProtected(false); + depth_buffer.target->MarkAsRenderTarget(false); depth_buffer.target = nullptr; depth_buffer.view = nullptr; } @@ -166,7 +166,7 @@ public: void SetEmptyColorBuffer(std::size_t index) { if (render_targets[index].target != nullptr) { - render_targets[index].target->MarkAsProtected(false); + render_targets[index].target->MarkAsRenderTarget(false); render_targets[index].target = nullptr; render_targets[index].view = nullptr; } @@ -200,12 +200,6 @@ public: return ++ticks; } - bool ConsumeReconfigurationFlag() { - const bool result = force_reconfiguration; - force_reconfiguration = false; - return result; - } - protected: TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer) : system{system}, rasterizer{rasterizer} { @@ -242,8 +236,8 @@ protected: rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1); } - void Unregister(TSurface surface, const bool force_unregister = false) { - if (surface->IsProtected() && !force_unregister) { + void Unregister(TSurface surface) { + if (surface->IsProtected()) { return; } const GPUVAddr gpu_addr = surface->GetGpuAddr(); @@ -392,10 +386,8 @@ private: std::min(src_params.height, dst_height), 1); ImageCopy(surface, new_surface, copy_params); } - force_reconfiguration = false; for (auto surface : overlaps) { - force_reconfiguration |= surface->IsProtected(); - Unregister(surface, true); + Unregister(surface); } new_surface->MarkAsModified(modified, Tick()); Register(new_surface); @@ -567,10 +559,6 @@ private: u64 ticks{}; - // Sometimes Setup Textures can hit a surface that's on the render target, when this happens - // we force a reconfiguration of the frame buffer after setup. - bool force_reconfiguration; - // The internal Cache is different for the Texture Cache. It's based on buckets // of 1MB. This fits better for the purpose of this cache as textures are normaly // large in size. From 07cc7e0c12143a84744abb8dc03eb46eb615b308 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 10 May 2019 23:50:01 -0400 Subject: [PATCH 034/113] texture_cache: Add ASync Protections --- src/video_core/texture_cache/texture_cache.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 4ac5668c8..1b8ada910 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -5,6 +5,7 @@ #pragma once #include +#include #include #include #include @@ -56,12 +57,16 @@ public: } void InvalidateRegion(CacheAddr addr, std::size_t size) { + std::lock_guard lock{mutex}; + for (const auto& surface : GetSurfacesInRegion(addr, size)) { Unregister(surface); } } void FlushRegion(CacheAddr addr, std::size_t size) { + std::lock_guard lock{mutex}; + auto surfaces = GetSurfacesInRegion(addr, size); if (surfaces.empty()) { return; @@ -220,6 +225,8 @@ protected: const Common::Rectangle& dst_rect) = 0; void Register(TSurface surface) { + std::lock_guard lock{mutex}; + const GPUVAddr gpu_addr = surface->GetGpuAddr(); const CacheAddr cache_ptr = ToCacheAddr(memory_manager->GetPointer(gpu_addr)); const std::size_t size = surface->GetSizeInBytes(); @@ -237,6 +244,8 @@ protected: } void Unregister(TSurface surface) { + std::lock_guard lock{mutex}; + if (surface->IsProtected()) { return; } @@ -579,6 +588,7 @@ private: FramebufferTargetInfo depth_buffer; std::vector staging_buffer; + std::recursive_mutex mutex; }; } // namespace VideoCommon From 41b4674458595d3d5cd938c06ce651b6d80d305c Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sat, 11 May 2019 00:17:47 -0400 Subject: [PATCH 035/113] gl_texture_cache: Make main views be proxy textures instead of a full view. --- .../renderer_opengl/gl_texture_cache.cpp | 28 ++++++++++++------- .../renderer_opengl/gl_texture_cache.h | 8 +++++- 2 files changed, 25 insertions(+), 11 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 32cb08963..a55097e5f 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -228,10 +228,9 @@ CachedSurface::CachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& param target = GetTextureTarget(params.target); texture = CreateTexture(params, target, internal_format); DecorateSurfaceName(); - main_view = CreateView( - ViewParams(params.target, 0, params.is_layered ? params.depth : 1, 0, params.num_levels)); - main_view->DecorateViewName(gpu_addr, - params.TargetName() + "V:" + std::to_string(view_count++)); + main_view = CreateViewInner( + ViewParams(params.target, 0, params.is_layered ? params.depth : 1, 0, params.num_levels), + true); } CachedSurface::~CachedSurface() { @@ -351,16 +350,24 @@ void CachedSurfaceView::DecorateViewName(GPUVAddr gpu_addr, std::string prefix) } View CachedSurface::CreateView(const ViewParams& view_key) { - auto view = std::make_shared(*this, view_key); + return CreateViewInner(view_key, false); +} + +View CachedSurface::CreateViewInner(const ViewParams& view_key, const bool is_proxy) { + auto view = std::make_shared(*this, view_key, is_proxy); views[view_key] = view; - view->DecorateViewName(gpu_addr, params.TargetName() + "V:" + std::to_string(view_count++)); + if (!is_proxy) + view->DecorateViewName(gpu_addr, params.TargetName() + "V:" + std::to_string(view_count++)); return view; } -CachedSurfaceView::CachedSurfaceView(CachedSurface& surface, const ViewParams& params) - : VideoCommon::ViewBase(params), surface{surface} { +CachedSurfaceView::CachedSurfaceView(CachedSurface& surface, const ViewParams& params, + const bool is_proxy) + : VideoCommon::ViewBase(params), surface{surface}, is_proxy{is_proxy} { target = GetTextureTarget(params.target); - texture_view = CreateTextureView(); + if (!is_proxy) { + texture_view = CreateTextureView(); + } swizzle = EncodeSwizzle(SwizzleSource::R, SwizzleSource::G, SwizzleSource::B, SwizzleSource::A); } @@ -401,7 +408,8 @@ void CachedSurfaceView::ApplySwizzle(SwizzleSource x_source, SwizzleSource y_sou const std::array gl_swizzle = {GetSwizzleSource(x_source), GetSwizzleSource(y_source), GetSwizzleSource(z_source), GetSwizzleSource(w_source)}; - glTextureParameteriv(texture_view.handle, GL_TEXTURE_SWIZZLE_RGBA, gl_swizzle.data()); + const GLuint handle = GetTexture(); + glTextureParameteriv(handle, GL_TEXTURE_SWIZZLE_RGBA, gl_swizzle.data()); } OGLTextureView CachedSurfaceView::CreateTextureView() const { diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 0a1b57014..0ba42dbab 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -51,6 +51,7 @@ protected: void DecorateSurfaceName(); View CreateView(const ViewParams& view_key) override; + View CreateViewInner(const ViewParams& view_key, const bool is_proxy); private: void UploadTextureMipmap(u32 level, std::vector& staging_buffer); @@ -67,13 +68,17 @@ private: class CachedSurfaceView final : public VideoCommon::ViewBase { public: - explicit CachedSurfaceView(CachedSurface& surface, const ViewParams& params); + explicit CachedSurfaceView(CachedSurface& surface, const ViewParams& params, + const bool is_proxy); ~CachedSurfaceView(); /// Attaches this texture view to the current bound GL_DRAW_FRAMEBUFFER void Attach(GLenum attachment) const; GLuint GetTexture() { + if (is_proxy) { + return surface.GetTexture(); + } return texture_view.handle; } @@ -119,6 +124,7 @@ private: OGLTextureView texture_view; u32 swizzle; + bool is_proxy; }; class TextureCacheOpenGL final : public TextureCacheBase { From 2131f715730580dfeb692acdf3ae3e62ffd455c1 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 10 May 2019 23:02:14 -0300 Subject: [PATCH 036/113] surface_params: Optimize CreateForTexture Instead of using Common::AlignUp, use Common::AlignBits to align the texture compression factor. --- src/video_core/surface.h | 142 +++++++++--------- .../texture_cache/surface_params.cpp | 6 +- 2 files changed, 76 insertions(+), 72 deletions(-) diff --git a/src/video_core/surface.h b/src/video_core/surface.h index b783e4b27..8e98033f3 100644 --- a/src/video_core/surface.h +++ b/src/video_core/surface.h @@ -122,71 +122,71 @@ enum class SurfaceTarget { TextureCubeArray, }; -constexpr std::array compression_factor_table = {{ - 1, // ABGR8U - 1, // ABGR8S - 1, // ABGR8UI - 1, // B5G6R5U - 1, // A2B10G10R10U - 1, // A1B5G5R5U - 1, // R8U - 1, // R8UI - 1, // RGBA16F - 1, // RGBA16U - 1, // RGBA16UI - 1, // R11FG11FB10F - 1, // RGBA32UI - 4, // DXT1 - 4, // DXT23 - 4, // DXT45 - 4, // DXN1 - 4, // DXN2UNORM - 4, // DXN2SNORM - 4, // BC7U - 4, // BC6H_UF16 - 4, // BC6H_SF16 - 4, // ASTC_2D_4X4 - 1, // BGRA8 - 1, // RGBA32F - 1, // RG32F - 1, // R32F - 1, // R16F - 1, // R16U - 1, // R16S - 1, // R16UI - 1, // R16I - 1, // RG16 - 1, // RG16F - 1, // RG16UI - 1, // RG16I - 1, // RG16S - 1, // RGB32F - 1, // RGBA8_SRGB - 1, // RG8U - 1, // RG8S - 1, // RG32UI - 1, // R32UI - 4, // ASTC_2D_8X8 - 4, // ASTC_2D_8X5 - 4, // ASTC_2D_5X4 - 1, // BGRA8_SRGB - 4, // DXT1_SRGB - 4, // DXT23_SRGB - 4, // DXT45_SRGB - 4, // BC7U_SRGB - 4, // ASTC_2D_4X4_SRGB - 4, // ASTC_2D_8X8_SRGB - 4, // ASTC_2D_8X5_SRGB - 4, // ASTC_2D_5X4_SRGB - 4, // ASTC_2D_5X5 - 4, // ASTC_2D_5X5_SRGB - 4, // ASTC_2D_10X8 - 4, // ASTC_2D_10X8_SRGB - 1, // Z32F - 1, // Z16 - 1, // Z24S8 - 1, // S8Z24 - 1, // Z32FS8 +inline constexpr std::array compression_factor_shift_table = {{ + 0, // ABGR8U + 0, // ABGR8S + 0, // ABGR8UI + 0, // B5G6R5U + 0, // A2B10G10R10U + 0, // A1B5G5R5U + 0, // R8U + 0, // R8UI + 0, // RGBA16F + 0, // RGBA16U + 0, // RGBA16UI + 0, // R11FG11FB10F + 0, // RGBA32UI + 2, // DXT1 + 2, // DXT23 + 2, // DXT45 + 2, // DXN1 + 2, // DXN2UNORM + 2, // DXN2SNORM + 2, // BC7U + 2, // BC6H_UF16 + 2, // BC6H_SF16 + 2, // ASTC_2D_4X4 + 0, // BGRA8 + 0, // RGBA32F + 0, // RG32F + 0, // R32F + 0, // R16F + 0, // R16U + 0, // R16S + 0, // R16UI + 0, // R16I + 0, // RG16 + 0, // RG16F + 0, // RG16UI + 0, // RG16I + 0, // RG16S + 0, // RGB32F + 0, // RGBA8_SRGB + 0, // RG8U + 0, // RG8S + 0, // RG32UI + 0, // R32UI + 2, // ASTC_2D_8X8 + 2, // ASTC_2D_8X5 + 2, // ASTC_2D_5X4 + 0, // BGRA8_SRGB + 2, // DXT1_SRGB + 2, // DXT23_SRGB + 2, // DXT45_SRGB + 2, // BC7U_SRGB + 2, // ASTC_2D_4X4_SRGB + 2, // ASTC_2D_8X8_SRGB + 2, // ASTC_2D_8X5_SRGB + 2, // ASTC_2D_5X4_SRGB + 2, // ASTC_2D_5X5 + 2, // ASTC_2D_5X5_SRGB + 2, // ASTC_2D_10X8 + 2, // ASTC_2D_10X8_SRGB + 0, // Z32F + 0, // Z16 + 0, // Z24S8 + 0, // S8Z24 + 0, // Z32FS8 }}; /** @@ -195,12 +195,14 @@ constexpr std::array compression_factor_table = {{ * compressed image. This is used for maintaining proper surface sizes for compressed * texture formats. */ -static constexpr u32 GetCompressionFactor(PixelFormat format) { - if (format == PixelFormat::Invalid) - return 0; +inline constexpr u32 GetCompressionFactorShift(PixelFormat format) { + DEBUG_ASSERT(format != PixelFormat::Invalid); + DEBUG_ASSERT(static_cast(format) < compression_factor_table.size()); + return compression_factor_shift_table[static_cast(format)]; +} - ASSERT(static_cast(format) < compression_factor_table.size()); - return compression_factor_table[static_cast(format)]; +inline constexpr u32 GetCompressionFactor(PixelFormat format) { + return 1U << GetCompressionFactorShift(format); } constexpr std::array block_width_table = {{ diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index e7e671d8c..6f39f8468 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp @@ -76,8 +76,10 @@ SurfaceParams SurfaceParams::CreateForTexture(Core::System& system, params.type = GetFormatType(params.pixel_format); // TODO: on 1DBuffer we should use the tic info. params.target = TextureType2SurfaceTarget(entry.GetType(), entry.IsArray()); - params.width = Common::AlignUp(config.tic.Width(), GetCompressionFactor(params.pixel_format)); - params.height = Common::AlignUp(config.tic.Height(), GetCompressionFactor(params.pixel_format)); + params.width = + Common::AlignBits(config.tic.Width(), GetCompressionFactorShift(params.pixel_format)); + params.height = + Common::AlignBits(config.tic.Height(), GetCompressionFactorShift(params.pixel_format)); params.depth = config.tic.Depth(); if (params.target == SurfaceTarget::TextureCubemap || params.target == SurfaceTarget::TextureCubeArray) { From 6bd034eae9ab48c00e1635e2d7059b3d99b764b0 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sat, 11 May 2019 00:38:35 -0400 Subject: [PATCH 037/113] engine_upload: Addapt to new Texture Cache --- src/video_core/engines/engine_upload.cpp | 4 ++-- src/video_core/engines/engine_upload.h | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/video_core/engines/engine_upload.cpp b/src/video_core/engines/engine_upload.cpp index 082a40cd9..c776b9a56 100644 --- a/src/video_core/engines/engine_upload.cpp +++ b/src/video_core/engines/engine_upload.cpp @@ -36,8 +36,8 @@ void State::ProcessData(const u32 data, const bool is_last_call) { } else { UNIMPLEMENTED_IF(regs.dest.z != 0); UNIMPLEMENTED_IF(regs.dest.depth != 1); - UNIMPLEMENTED_IF(regs.dest.BlockWidth() != 1); - UNIMPLEMENTED_IF(regs.dest.BlockDepth() != 1); + UNIMPLEMENTED_IF(regs.dest.BlockWidth() != 0); + UNIMPLEMENTED_IF(regs.dest.BlockDepth() != 0); const std::size_t dst_size = Tegra::Texture::CalculateSize( true, 1, regs.dest.width, regs.dest.height, 1, regs.dest.BlockHeight(), 1); tmp_buffer.resize(dst_size); diff --git a/src/video_core/engines/engine_upload.h b/src/video_core/engines/engine_upload.h index ef4f5839a..cb294aec3 100644 --- a/src/video_core/engines/engine_upload.h +++ b/src/video_core/engines/engine_upload.h @@ -39,15 +39,15 @@ struct Registers { } u32 BlockWidth() const { - return 1U << block_width.Value(); + return block_width; } u32 BlockHeight() const { - return 1U << block_height.Value(); + return block_height; } u32 BlockDepth() const { - return 1U << block_depth.Value(); + return block_depth; } } dest; }; From d65a4af89582f272efbbfd47d1ee78e616553312 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sat, 11 May 2019 01:21:02 -0400 Subject: [PATCH 038/113] texture_cache return invalid buffer on deactivated color_mask --- src/video_core/renderer_opengl/gl_rasterizer.cpp | 6 ++++-- src/video_core/texture_cache/texture_cache.h | 5 +++++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 2d6fd154a..2872dbdeb 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -370,10 +370,12 @@ void RasterizerOpenGL::SetupCachedFramebuffer(const FramebufferCacheKey& fbkey, return; if (fbkey.is_single_buffer) { - if (fbkey.color_attachments[0] != GL_NONE) { + if (fbkey.color_attachments[0] != GL_NONE && fbkey.colors[0]) { fbkey.colors[0]->Attach(fbkey.color_attachments[0]); + glDrawBuffer(fbkey.color_attachments[0]); + } else { + glDrawBuffer(GL_NONE); } - glDrawBuffer(fbkey.color_attachments[0]); } else { for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) { if (fbkey.colors[index]) { diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 1b8ada910..7058399e2 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -133,6 +133,11 @@ public: return {}; } + if (regs.color_mask[index].raw != 0) { + SetEmptyColorBuffer(index); + return {}; + } + const auto& config{regs.rt[index]}; const auto gpu_addr{config.Address()}; if (!gpu_addr) { From 9098905dd13bb68f2fe49a9590688b76cc999fdd Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 11 May 2019 03:15:49 -0300 Subject: [PATCH 039/113] gl_framebuffer_cache: Use a hashed struct to cache framebuffers --- src/video_core/CMakeLists.txt | 2 + .../renderer_opengl/gl_framebuffer_cache.cpp | 73 +++++++++++++++++++ .../renderer_opengl/gl_framebuffer_cache.h | 68 +++++++++++++++++ .../renderer_opengl/gl_rasterizer.cpp | 59 +-------------- .../renderer_opengl/gl_rasterizer.h | 6 +- src/video_core/texture_cache/texture_cache.h | 2 +- 6 files changed, 148 insertions(+), 62 deletions(-) create mode 100644 src/video_core/renderer_opengl/gl_framebuffer_cache.cpp create mode 100644 src/video_core/renderer_opengl/gl_framebuffer_cache.h diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 470fbceda..9d43f03d2 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -41,6 +41,8 @@ add_library(video_core STATIC renderer_opengl/gl_buffer_cache.h renderer_opengl/gl_device.cpp renderer_opengl/gl_device.h + renderer_opengl/gl_framebuffer_cache.cpp + renderer_opengl/gl_framebuffer_cache.h renderer_opengl/gl_global_cache.cpp renderer_opengl/gl_global_cache.h renderer_opengl/gl_rasterizer.cpp diff --git a/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp b/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp new file mode 100644 index 000000000..bb9f9b81f --- /dev/null +++ b/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp @@ -0,0 +1,73 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "common/cityhash.h" +#include "common/scope_exit.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/renderer_opengl/gl_framebuffer_cache.h" +#include "video_core/renderer_opengl/gl_state.h" + +namespace OpenGL { + +using Maxwell = Tegra::Engines::Maxwell3D::Regs; + +FramebufferCacheOpenGL::FramebufferCacheOpenGL() = default; + +FramebufferCacheOpenGL::~FramebufferCacheOpenGL() = default; + +GLuint FramebufferCacheOpenGL::GetFramebuffer(const FramebufferCacheKey& key) { + const auto [entry, is_cache_miss] = cache.try_emplace(key); + auto& framebuffer{entry->second}; + if (is_cache_miss) { + framebuffer = CreateFramebuffer(key); + } + return framebuffer.handle; +} + +OGLFramebuffer FramebufferCacheOpenGL::CreateFramebuffer(const FramebufferCacheKey& key) { + OGLFramebuffer framebuffer; + framebuffer.Create(); + + // TODO(Rodrigo): Use DSA here after Nvidia fixes their framebuffer DSA bugs. + local_state.draw.draw_framebuffer = framebuffer.handle; + local_state.ApplyFramebufferState(); + + if (key.is_single_buffer) { + if (key.color_attachments[0] != GL_NONE && key.colors[0]) { + key.colors[0]->Attach(key.color_attachments[0]); + glDrawBuffer(key.color_attachments[0]); + } else { + glDrawBuffer(GL_NONE); + } + } else { + for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) { + if (key.colors[index]) { + key.colors[index]->Attach(GL_COLOR_ATTACHMENT0 + static_cast(index)); + } + } + glDrawBuffers(key.colors_count, key.color_attachments.data()); + } + + if (key.zeta) { + key.zeta->Attach(key.stencil_enable ? GL_DEPTH_STENCIL_ATTACHMENT : GL_DEPTH_ATTACHMENT); + } + + return framebuffer; +} + +std::size_t FramebufferCacheKey::Hash() const { + static_assert(sizeof(*this) % sizeof(u64) == 0, "Unaligned struct"); + return static_cast( + Common::CityHash64(reinterpret_cast(this), sizeof(*this))); +} + +bool FramebufferCacheKey::operator==(const FramebufferCacheKey& rhs) const { + return std::tie(is_single_buffer, stencil_enable, colors_count, color_attachments, colors, + zeta) == std::tie(rhs.is_single_buffer, rhs.stencil_enable, rhs.colors_count, + rhs.color_attachments, rhs.colors, rhs.zeta); +} + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_framebuffer_cache.h b/src/video_core/renderer_opengl/gl_framebuffer_cache.h new file mode 100644 index 000000000..a3a996353 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_framebuffer_cache.h @@ -0,0 +1,68 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include + +#include + +#include "common/common_types.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" +#include "video_core/renderer_opengl/gl_state.h" +#include "video_core/renderer_opengl/gl_texture_cache.h" + +namespace OpenGL { + +struct alignas(sizeof(u64)) FramebufferCacheKey { + bool is_single_buffer = false; + bool stencil_enable = false; + u16 colors_count = 0; + + std::array color_attachments{}; + std::array colors; + View zeta; + + std::size_t Hash() const; + + bool operator==(const FramebufferCacheKey& rhs) const; + + bool operator!=(const FramebufferCacheKey& rhs) const { + return !operator==(rhs); + } +}; + +} // namespace OpenGL + +namespace std { + +template <> +struct hash { + std::size_t operator()(const OpenGL::FramebufferCacheKey& k) const noexcept { + return k.Hash(); + } +}; + +} // namespace std + +namespace OpenGL { + +class FramebufferCacheOpenGL { +public: + FramebufferCacheOpenGL(); + ~FramebufferCacheOpenGL(); + + GLuint GetFramebuffer(const FramebufferCacheKey& key); + +private: + OGLFramebuffer CreateFramebuffer(const FramebufferCacheKey& key); + + OpenGLState local_state; + std::unordered_map cache; +}; + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 2872dbdeb..8218c5143 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -78,26 +78,6 @@ struct DrawParameters { } }; -struct FramebufferCacheKey { - bool is_single_buffer = false; - bool stencil_enable = false; - - std::array color_attachments{}; - std::array colors{}; - u32 colors_count = 0; - - View zeta = nullptr; - - auto Tie() const { - return std::tie(is_single_buffer, stencil_enable, color_attachments, colors, colors_count, - zeta); - } - - bool operator<(const FramebufferCacheKey& rhs) const { - return Tie() < rhs.Tie(); - } -}; - RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, ScreenInfo& info) : texture_cache{system, *this}, shader_cache{*this, system, emu_window, device}, @@ -355,42 +335,6 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { gpu.dirty_flags.shaders = false; } -void RasterizerOpenGL::SetupCachedFramebuffer(const FramebufferCacheKey& fbkey, - OpenGLState& current_state) { - const auto [entry, is_cache_miss] = framebuffer_cache.try_emplace(fbkey); - auto& framebuffer = entry->second; - - if (is_cache_miss) - framebuffer.Create(); - - current_state.draw.draw_framebuffer = framebuffer.handle; - current_state.ApplyFramebufferState(); - - if (!is_cache_miss) - return; - - if (fbkey.is_single_buffer) { - if (fbkey.color_attachments[0] != GL_NONE && fbkey.colors[0]) { - fbkey.colors[0]->Attach(fbkey.color_attachments[0]); - glDrawBuffer(fbkey.color_attachments[0]); - } else { - glDrawBuffer(GL_NONE); - } - } else { - for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) { - if (fbkey.colors[index]) { - fbkey.colors[index]->Attach(GL_COLOR_ATTACHMENT0 + static_cast(index)); - } - } - glDrawBuffers(fbkey.colors_count, fbkey.color_attachments.data()); - } - - if (fbkey.zeta) { - fbkey.zeta->Attach(fbkey.stencil_enable ? GL_DEPTH_STENCIL_ATTACHMENT - : GL_DEPTH_ATTACHMENT); - } -} - std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const { const auto& regs = system.GPU().Maxwell3D().regs; @@ -556,7 +500,7 @@ std::pair RasterizerOpenGL::ConfigureFramebuffers( depth_surface->GetSurfaceParams().type == SurfaceType::DepthStencil; } - SetupCachedFramebuffer(fbkey, current_state); + current_state.draw.draw_framebuffer = framebuffer_cache.GetFramebuffer(fbkey); SyncViewport(current_state); return current_depth_stencil_usage = {static_cast(depth_surface), fbkey.stencil_enable}; @@ -638,6 +582,7 @@ void RasterizerOpenGL::Clear() { clear_state.ApplyDepth(); clear_state.ApplyStencilTest(); clear_state.ApplyViewport(); + clear_state.ApplyFramebufferState(); if (use_color) { glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index be5ac1b9f..2f13d9758 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -23,6 +23,7 @@ #include "video_core/rasterizer_interface.h" #include "video_core/renderer_opengl/gl_buffer_cache.h" #include "video_core/renderer_opengl/gl_device.h" +#include "video_core/renderer_opengl/gl_framebuffer_cache.h" #include "video_core/renderer_opengl/gl_global_cache.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_sampler_cache.h" @@ -49,7 +50,6 @@ namespace OpenGL { struct ScreenInfo; struct DrawParameters; -struct FramebufferCacheKey; class RasterizerOpenGL : public VideoCore::RasterizerInterface { public: @@ -193,6 +193,7 @@ private: ShaderCacheOpenGL shader_cache; GlobalRegionCacheOpenGL global_cache; SamplerCacheOpenGL sampler_cache; + FramebufferCacheOpenGL framebuffer_cache; Core::System& system; ScreenInfo& screen_info; @@ -203,7 +204,6 @@ private: OGLVertexArray> vertex_array_cache; - std::map framebuffer_cache; FramebufferConfigState current_framebuffer_config_state; std::pair current_depth_stencil_usage{}; @@ -226,8 +226,6 @@ private: void SetupShaders(GLenum primitive_mode); - void SetupCachedFramebuffer(const FramebufferCacheKey& fbkey, OpenGLState& current_state); - enum class AccelDraw { Disabled, Arrays, Indexed }; AccelDraw accelerate_draw = AccelDraw::Disabled; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 7058399e2..419c0de5e 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -133,7 +133,7 @@ public: return {}; } - if (regs.color_mask[index].raw != 0) { + if (regs.color_mask[index].raw == 0) { SetEmptyColorBuffer(index); return {}; } From c2ed348bddc1cd1bd97ce789d7855b1571e45ef4 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 12 May 2019 18:31:03 -0300 Subject: [PATCH 040/113] surface_params: Ensure pitch is always written to avoid surface leaks --- src/video_core/texture_cache/surface_params.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index 6f39f8468..8472b69dc 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp @@ -111,6 +111,7 @@ SurfaceParams SurfaceParams::CreateForDepthBuffer( params.unaligned_height = zeta_height; params.target = SurfaceTarget::Texture2D; params.depth = 1; + params.pitch = 0; params.num_levels = 1; params.is_layered = false; return params; @@ -131,6 +132,7 @@ SurfaceParams SurfaceParams::CreateForFramebuffer(Core::System& system, std::siz params.component_type = ComponentTypeFromRenderTarget(config.format); params.type = GetFormatType(params.pixel_format); if (params.is_tiled) { + params.pitch = 0; params.width = config.width; } else { const u32 bpp = GetFormatBpp(params.pixel_format) / CHAR_BIT; From 7731a0e2d15da04eea746b4b8dd5c6c4b29f9f29 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sun, 12 May 2019 20:33:52 -0400 Subject: [PATCH 041/113] texture_cache: General Fixes Fixed ASTC mipmaps loading Fixed alignment on openGL upload/download Fixed Block Height Calculation Removed unalign_height --- .../renderer_opengl/gl_texture_cache.cpp | 11 ++- src/video_core/surface.h | 84 ++++++++++++++++++- src/video_core/texture_cache/surface_base.cpp | 18 +++- src/video_core/texture_cache/surface_base.h | 4 + .../texture_cache/surface_params.cpp | 52 +++++++----- src/video_core/texture_cache/surface_params.h | 27 ++++-- src/video_core/textures/convert.cpp | 14 ++-- src/video_core/textures/convert.h | 7 +- 8 files changed, 170 insertions(+), 47 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index a55097e5f..197c9f02c 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -23,6 +23,7 @@ using VideoCore::MortonSwizzleMode; using VideoCore::Surface::ComponentType; using VideoCore::Surface::PixelFormat; +using VideoCore::Surface::SurfaceCompression; using VideoCore::Surface::SurfaceTarget; using VideoCore::Surface::SurfaceType; @@ -242,10 +243,10 @@ void CachedSurface::DownloadTexture(std::vector& staging_buffer) { MICROPROFILE_SCOPE(OpenGL_Texture_Download); // TODO(Rodrigo): Optimize alignment - glPixelStorei(GL_PACK_ALIGNMENT, 1); SCOPE_EXIT({ glPixelStorei(GL_PACK_ROW_LENGTH, 0); }); for (u32 level = 0; level < params.num_levels; ++level) { + glPixelStorei(GL_PACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level))); glPixelStorei(GL_PACK_ROW_LENGTH, static_cast(params.GetMipWidth(level))); const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level); if (is_compressed) { @@ -270,10 +271,14 @@ void CachedSurface::UploadTexture(std::vector& staging_buffer) { void CachedSurface::UploadTextureMipmap(u32 level, std::vector& staging_buffer) { // TODO(Rodrigo): Optimize alignment - glPixelStorei(GL_UNPACK_ALIGNMENT, 1); + glPixelStorei(GL_UNPACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level))); glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast(params.GetMipWidth(level))); - const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level); + auto compression_type = params.GetCompressionType(); + + const std::size_t mip_offset = compression_type == SurfaceCompression::Converted + ? params.GetConvertedMipmapOffset(level) + : params.GetHostMipmapLevelOffset(level); u8* buffer{staging_buffer.data() + mip_offset}; if (is_compressed) { const auto image_size{static_cast(params.GetHostMipmapSize(level))}; diff --git a/src/video_core/surface.h b/src/video_core/surface.h index 8e98033f3..5d49214e5 100644 --- a/src/video_core/surface.h +++ b/src/video_core/surface.h @@ -197,7 +197,7 @@ inline constexpr std::array compression_factor_shift_table */ inline constexpr u32 GetCompressionFactorShift(PixelFormat format) { DEBUG_ASSERT(format != PixelFormat::Invalid); - DEBUG_ASSERT(static_cast(format) < compression_factor_table.size()); + DEBUG_ASSERT(static_cast(format) < compression_factor_shift_table.size()); return compression_factor_shift_table[static_cast(format)]; } @@ -438,6 +438,88 @@ static constexpr u32 GetBytesPerPixel(PixelFormat pixel_format) { return GetFormatBpp(pixel_format) / CHAR_BIT; } +enum class SurfaceCompression : u8 { + None = 0, + Compressed = 1, + Converted = 2, + Rearranged = 3, +}; + +inline constexpr std::array compression_type_table = {{ + SurfaceCompression::None, // ABGR8U + SurfaceCompression::None, // ABGR8S + SurfaceCompression::None, // ABGR8UI + SurfaceCompression::None, // B5G6R5U + SurfaceCompression::None, // A2B10G10R10U + SurfaceCompression::None, // A1B5G5R5U + SurfaceCompression::None, // R8U + SurfaceCompression::None, // R8UI + SurfaceCompression::None, // RGBA16F + SurfaceCompression::None, // RGBA16U + SurfaceCompression::None, // RGBA16UI + SurfaceCompression::None, // R11FG11FB10F + SurfaceCompression::None, // RGBA32UI + SurfaceCompression::Compressed, // DXT1 + SurfaceCompression::Compressed, // DXT23 + SurfaceCompression::Compressed, // DXT45 + SurfaceCompression::Compressed, // DXN1 + SurfaceCompression::Compressed, // DXN2UNORM + SurfaceCompression::Compressed, // DXN2SNORM + SurfaceCompression::Compressed, // BC7U + SurfaceCompression::Compressed, // BC6H_UF16 + SurfaceCompression::Compressed, // BC6H_SF16 + SurfaceCompression::Converted, // ASTC_2D_4X4 + SurfaceCompression::None, // BGRA8 + SurfaceCompression::None, // RGBA32F + SurfaceCompression::None, // RG32F + SurfaceCompression::None, // R32F + SurfaceCompression::None, // R16F + SurfaceCompression::None, // R16U + SurfaceCompression::None, // R16S + SurfaceCompression::None, // R16UI + SurfaceCompression::None, // R16I + SurfaceCompression::None, // RG16 + SurfaceCompression::None, // RG16F + SurfaceCompression::None, // RG16UI + SurfaceCompression::None, // RG16I + SurfaceCompression::None, // RG16S + SurfaceCompression::None, // RGB32F + SurfaceCompression::None, // RGBA8_SRGB + SurfaceCompression::None, // RG8U + SurfaceCompression::None, // RG8S + SurfaceCompression::None, // RG32UI + SurfaceCompression::None, // R32UI + SurfaceCompression::Converted, // ASTC_2D_8X8 + SurfaceCompression::Converted, // ASTC_2D_8X5 + SurfaceCompression::Converted, // ASTC_2D_5X4 + SurfaceCompression::None, // BGRA8_SRGB + SurfaceCompression::Compressed, // DXT1_SRGB + SurfaceCompression::Compressed, // DXT23_SRGB + SurfaceCompression::Compressed, // DXT45_SRGB + SurfaceCompression::Compressed, // BC7U_SRGB + SurfaceCompression::Converted, // ASTC_2D_4X4_SRGB + SurfaceCompression::Converted, // ASTC_2D_8X8_SRGB + SurfaceCompression::Converted, // ASTC_2D_8X5_SRGB + SurfaceCompression::Converted, // ASTC_2D_5X4_SRGB + SurfaceCompression::Converted, // ASTC_2D_5X5 + SurfaceCompression::Converted, // ASTC_2D_5X5_SRGB + SurfaceCompression::Converted, // ASTC_2D_10X8 + SurfaceCompression::Converted, // ASTC_2D_10X8_SRGB + SurfaceCompression::None, // Z32F + SurfaceCompression::None, // Z16 + SurfaceCompression::None, // Z24S8 + SurfaceCompression::Rearranged, // S8Z24 + SurfaceCompression::None, // Z32FS8 +}}; + +static constexpr SurfaceCompression GetFormatCompressionType(PixelFormat format) { + if (format == PixelFormat::Invalid) + return SurfaceCompression::None; + + ASSERT(static_cast(format) < compression_type_table.size()); + return compression_type_table[static_cast(format)]; +} + SurfaceTarget SurfaceTargetFromTextureType(Tegra::Texture::TextureType texture_type); bool SurfaceTargetIsLayered(SurfaceTarget target); diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp index 510d1aef5..ceff51043 100644 --- a/src/video_core/texture_cache/surface_base.cpp +++ b/src/video_core/texture_cache/surface_base.cpp @@ -17,6 +17,7 @@ MICROPROFILE_DEFINE(GPU_Flush_Texture, "GPU", "Texture Flush", MP_RGB(128, 192, using Tegra::Texture::ConvertFromGuestToHost; using VideoCore::MortonSwizzleMode; +using VideoCore::Surface::SurfaceCompression; SurfaceBaseImpl::SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params) : params{params}, mipmap_sizes(params.num_levels), @@ -102,9 +103,20 @@ void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager, } } - for (u32 level = 0; level < params.num_levels; ++level) { - const std::size_t host_offset{params.GetHostMipmapLevelOffset(level)}; - ConvertFromGuestToHost(staging_buffer.data() + host_offset, params.pixel_format, + auto compression_type = params.GetCompressionType(); + if (compression_type == SurfaceCompression::None || + compression_type == SurfaceCompression::Compressed) + return; + + for (u32 level_up = params.num_levels; level_up > 0; --level_up) { + const u32 level = level_up - 1; + const std::size_t in_host_offset{params.GetHostMipmapLevelOffset(level)}; + const std::size_t out_host_offset = compression_type == SurfaceCompression::Rearranged + ? in_host_offset + : params.GetConvertedMipmapOffset(level); + u8* in_buffer = staging_buffer.data() + in_host_offset; + u8* out_buffer = staging_buffer.data() + out_host_offset; + ConvertFromGuestToHost(in_buffer, out_buffer, params.pixel_format, params.GetMipWidth(level), params.GetMipHeight(level), params.GetMipDepth(level), true, true); } diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index 78db2d665..cb7f22706 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -93,6 +93,10 @@ public: return mipmap_sizes[level]; } + bool IsLinear() const { + return !params.is_tiled; + } + bool MatchFormat(VideoCore::Surface::PixelFormat pixel_format) const { return params.pixel_format == pixel_format; } diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index 8472b69dc..d9d157d02 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp @@ -76,17 +76,14 @@ SurfaceParams SurfaceParams::CreateForTexture(Core::System& system, params.type = GetFormatType(params.pixel_format); // TODO: on 1DBuffer we should use the tic info. params.target = TextureType2SurfaceTarget(entry.GetType(), entry.IsArray()); - params.width = - Common::AlignBits(config.tic.Width(), GetCompressionFactorShift(params.pixel_format)); - params.height = - Common::AlignBits(config.tic.Height(), GetCompressionFactorShift(params.pixel_format)); + params.width = config.tic.Width(); + params.height = config.tic.Height(); params.depth = config.tic.Depth(); if (params.target == SurfaceTarget::TextureCubemap || params.target == SurfaceTarget::TextureCubeArray) { params.depth *= 6; } params.pitch = params.is_tiled ? 0 : config.tic.Pitch(); - params.unaligned_height = config.tic.Height(); params.num_levels = config.tic.max_mip_level + 1; params.is_layered = params.IsLayered(); return params; @@ -108,7 +105,6 @@ SurfaceParams SurfaceParams::CreateForDepthBuffer( params.type = GetFormatType(params.pixel_format); params.width = zeta_width; params.height = zeta_height; - params.unaligned_height = zeta_height; params.target = SurfaceTarget::Texture2D; params.depth = 1; params.pitch = 0; @@ -141,7 +137,6 @@ SurfaceParams SurfaceParams::CreateForFramebuffer(Core::System& system, std::siz } params.height = config.height; params.depth = 1; - params.unaligned_height = config.height; params.target = SurfaceTarget::Texture2D; params.num_levels = 1; params.is_layered = false; @@ -164,7 +159,6 @@ SurfaceParams SurfaceParams::CreateForFermiCopySurface( params.width = config.width; params.height = config.height; params.pitch = config.pitch; - params.unaligned_height = config.height; // TODO(Rodrigo): Try to guess the surface target from depth and layer parameters params.target = SurfaceTarget::Texture2D; params.depth = 1; @@ -185,18 +179,18 @@ bool SurfaceParams::IsLayered() const { } } +// Auto block resizing algorithm from: +// https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nv50/nv50_miptree.c u32 SurfaceParams::GetMipBlockHeight(u32 level) const { - // Auto block resizing algorithm from: - // https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nv50/nv50_miptree.c if (level == 0) { return this->block_height; } - const u32 height{GetMipHeight(level)}; + const u32 height_new{GetMipHeight(level)}; const u32 default_block_height{GetDefaultBlockHeight()}; - const u32 blocks_in_y{(height + default_block_height - 1) / default_block_height}; - const u32 block_height = Common::Log2Ceil32(blocks_in_y); - return std::clamp(block_height, 3U, 8U) - 3U; + const u32 blocks_in_y{(height_new + default_block_height - 1) / default_block_height}; + const u32 block_height_new = Common::Log2Ceil32(blocks_in_y); + return std::clamp(block_height_new, 3U, 7U) - 3U; } u32 SurfaceParams::GetMipBlockDepth(u32 level) const { @@ -207,12 +201,12 @@ u32 SurfaceParams::GetMipBlockDepth(u32 level) const { return 0; } - const u32 depth{GetMipDepth(level)}; - const u32 block_depth = Common::Log2Ceil32(depth); - if (block_depth > 4) { + const u32 depth_new{GetMipDepth(level)}; + const u32 block_depth_new = Common::Log2Ceil32(depth_new); + if (block_depth_new > 4) { return 5 - (GetMipBlockHeight(level) >= 2); } - return block_depth; + return block_depth_new; } std::size_t SurfaceParams::GetGuestMipmapLevelOffset(u32 level) const { @@ -231,6 +225,14 @@ std::size_t SurfaceParams::GetHostMipmapLevelOffset(u32 level) const { return offset; } +std::size_t SurfaceParams::GetConvertedMipmapOffset(u32 level) const { + std::size_t offset = 0; + for (u32 i = 0; i < level; i++) { + offset += GetConvertedMipmapSize(i); + } + return offset; +} + std::size_t SurfaceParams::GetGuestMipmapSize(u32 level) const { return GetInnerMipmapMemorySize(level, false, false); } @@ -239,6 +241,14 @@ std::size_t SurfaceParams::GetHostMipmapSize(u32 level) const { return GetInnerMipmapMemorySize(level, true, false) * GetNumLayers(); } +std::size_t SurfaceParams::GetConvertedMipmapSize(u32 level) const { + constexpr std::size_t rgb8_bpp = 4ULL; + const std::size_t width_t = GetMipWidth(level); + const std::size_t height_t = GetMipHeight(level); + const std::size_t depth_t = is_layered ? depth : GetMipDepth(level); + return width_t * height_t * depth_t * rgb8_bpp; +} + std::size_t SurfaceParams::GetGuestLayerSize() const { return GetLayerSize(false, false); } @@ -287,12 +297,10 @@ std::size_t SurfaceParams::Hash() const { bool SurfaceParams::operator==(const SurfaceParams& rhs) const { return std::tie(is_tiled, block_width, block_height, block_depth, tile_width_spacing, width, - height, depth, pitch, unaligned_height, num_levels, pixel_format, - component_type, type, target) == + height, depth, pitch, num_levels, pixel_format, component_type, type, target) == std::tie(rhs.is_tiled, rhs.block_width, rhs.block_height, rhs.block_depth, rhs.tile_width_spacing, rhs.width, rhs.height, rhs.depth, rhs.pitch, - rhs.unaligned_height, rhs.num_levels, rhs.pixel_format, rhs.component_type, - rhs.type, rhs.target); + rhs.num_levels, rhs.pixel_format, rhs.component_type, rhs.type, rhs.target); } std::string SurfaceParams::TargetName() const { diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h index 7c48782c7..b3082173f 100644 --- a/src/video_core/texture_cache/surface_params.h +++ b/src/video_core/texture_cache/surface_params.h @@ -7,6 +7,7 @@ #include #include "common/alignment.h" +#include "common/bit_util.h" #include "common/common_types.h" #include "video_core/engines/fermi_2d.h" #include "video_core/engines/maxwell_3d.h" @@ -16,6 +17,8 @@ namespace VideoCommon { +using VideoCore::Surface::SurfaceCompression; + class SurfaceParams { public: /// Creates SurfaceCachedParams from a texture configuration. @@ -50,17 +53,12 @@ public: std::size_t GetHostSizeInBytes() const { std::size_t host_size_in_bytes; - if (IsPixelFormatASTC(pixel_format)) { + if (GetCompressionType() == SurfaceCompression::Converted) { constexpr std::size_t rgb8_bpp = 4ULL; // ASTC is uncompressed in software, in emulated as RGBA8 host_size_in_bytes = 0; for (u32 level = 0; level < num_levels; ++level) { - const std::size_t width = - Common::AlignUp(GetMipWidth(level), GetDefaultBlockWidth()); - const std::size_t height = - Common::AlignUp(GetMipHeight(level), GetDefaultBlockHeight()); - const std::size_t depth = is_layered ? this->depth : GetMipDepth(level); - host_size_in_bytes += width * height * depth * rgb8_bpp; + host_size_in_bytes += GetConvertedMipmapSize(level); } } else { host_size_in_bytes = GetInnerMemorySize(true, false, false); @@ -93,6 +91,12 @@ public: /// Returns the block depth of a given mipmap level. u32 GetMipBlockDepth(u32 level) const; + u32 GetRowAlignment(u32 level) const { + const u32 bpp = + GetCompressionType() == SurfaceCompression::Converted ? 4 : GetBytesPerPixel(); + return 1U << Common::CountTrailingZeroes32(GetMipWidth(level) * bpp); + } + // Helper used for out of class size calculations static std::size_t AlignLayered(const std::size_t out_size, const u32 block_height, const u32 block_depth) { @@ -106,12 +110,16 @@ public: /// Returns the offset in bytes in host memory (linear) of a given mipmap level. std::size_t GetHostMipmapLevelOffset(u32 level) const; + std::size_t GetConvertedMipmapOffset(u32 level) const; + /// Returns the size in bytes in guest memory of a given mipmap level. std::size_t GetGuestMipmapSize(u32 level) const; /// Returns the size in bytes in host memory (linear) of a given mipmap level. std::size_t GetHostMipmapSize(u32 level) const; + std::size_t GetConvertedMipmapSize(u32 level) const; + /// Returns the size of a layer in bytes in guest memory. std::size_t GetGuestLayerSize() const; @@ -141,6 +149,10 @@ public: /// Returns true if the pixel format is a depth and/or stencil format. bool IsPixelFormatZeta() const; + SurfaceCompression GetCompressionType() const { + return VideoCore::Surface::GetFormatCompressionType(pixel_format); + } + std::string TargetName() const; bool is_tiled; @@ -154,7 +166,6 @@ public: u32 height; u32 depth; u32 pitch; - u32 unaligned_height; u32 num_levels; VideoCore::Surface::PixelFormat pixel_format; VideoCore::Surface::ComponentType component_type; diff --git a/src/video_core/textures/convert.cpp b/src/video_core/textures/convert.cpp index 82050bd51..f3efa7eb0 100644 --- a/src/video_core/textures/convert.cpp +++ b/src/video_core/textures/convert.cpp @@ -62,19 +62,19 @@ static void ConvertZ24S8ToS8Z24(u8* data, u32 width, u32 height) { SwapS8Z24ToZ24S8(data, width, height); } -void ConvertFromGuestToHost(u8* data, PixelFormat pixel_format, u32 width, u32 height, u32 depth, - bool convert_astc, bool convert_s8z24) { +void ConvertFromGuestToHost(u8* in_data, u8* out_data, PixelFormat pixel_format, u32 width, + u32 height, u32 depth, bool convert_astc, bool convert_s8z24) { if (convert_astc && IsPixelFormatASTC(pixel_format)) { // Convert ASTC pixel formats to RGBA8, as most desktop GPUs do not support ASTC. u32 block_width{}; u32 block_height{}; std::tie(block_width, block_height) = GetASTCBlockSize(pixel_format); - const std::vector rgba8_data = - Tegra::Texture::ASTC::Decompress(data, width, height, depth, block_width, block_height); - std::copy(rgba8_data.begin(), rgba8_data.end(), data); + const std::vector rgba8_data = Tegra::Texture::ASTC::Decompress( + in_data, width, height, depth, block_width, block_height); + std::copy(rgba8_data.begin(), rgba8_data.end(), out_data); } else if (convert_s8z24 && pixel_format == PixelFormat::S8Z24) { - Tegra::Texture::ConvertS8Z24ToZ24S8(data, width, height); + Tegra::Texture::ConvertS8Z24ToZ24S8(in_data, width, height); } } @@ -90,4 +90,4 @@ void ConvertFromHostToGuest(u8* data, PixelFormat pixel_format, u32 width, u32 h } } -} // namespace Tegra::Texture \ No newline at end of file +} // namespace Tegra::Texture diff --git a/src/video_core/textures/convert.h b/src/video_core/textures/convert.h index 12542e71c..d5d6c77bb 100644 --- a/src/video_core/textures/convert.h +++ b/src/video_core/textures/convert.h @@ -12,10 +12,11 @@ enum class PixelFormat; namespace Tegra::Texture { -void ConvertFromGuestToHost(u8* data, VideoCore::Surface::PixelFormat pixel_format, u32 width, - u32 height, u32 depth, bool convert_astc, bool convert_s8z24); +void ConvertFromGuestToHost(u8* in_data, u8* out_data, VideoCore::Surface::PixelFormat pixel_format, + u32 width, u32 height, u32 depth, bool convert_astc, + bool convert_s8z24); void ConvertFromHostToGuest(u8* data, VideoCore::Surface::PixelFormat pixel_format, u32 width, u32 height, u32 depth, bool convert_astc, bool convert_s8z24); -} // namespace Tegra::Texture \ No newline at end of file +} // namespace Tegra::Texture From a79831d9d02f7c42d82ea36210cac7952a3ef16e Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Mon, 13 May 2019 19:14:02 -0400 Subject: [PATCH 042/113] texture_cache: Implement Guard mechanism --- src/video_core/renderer_opengl/gl_rasterizer.cpp | 4 ++++ src/video_core/texture_cache/texture_cache.h | 9 ++++++++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 8218c5143..afacc3fbd 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -424,6 +424,8 @@ std::pair RasterizerOpenGL::ConfigureFramebuffers( } current_framebuffer_config_state = fb_config_state; + texture_cache.Guard(true); + View depth_surface{}; if (using_depth_fb) { depth_surface = texture_cache.GetDepthBufferSurface(preserve_contents); @@ -500,6 +502,8 @@ std::pair RasterizerOpenGL::ConfigureFramebuffers( depth_surface->GetSurfaceParams().type == SurfaceType::DepthStencil; } + texture_cache.Guard(false); + current_state.draw.draw_framebuffer = framebuffer_cache.GetFramebuffer(fbkey); SyncViewport(current_state); diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 419c0de5e..2ad6210dd 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -64,6 +64,10 @@ public: } } + void Guard(bool new_guard) { + guard_cache = new_guard; + } + void FlushRegion(CacheAddr addr, std::size_t size) { std::lock_guard lock{mutex}; @@ -251,7 +255,7 @@ protected: void Unregister(TSurface surface) { std::lock_guard lock{mutex}; - if (surface->IsProtected()) { + if (guard_cache && surface->IsProtected()) { return; } const GPUVAddr gpu_addr = surface->GetGpuAddr(); @@ -573,6 +577,9 @@ private: u64 ticks{}; + // Guards the cache for protection conflicts. + bool guard_cache{}; + // The internal Cache is different for the Texture Cache. It's based on buckets // of 1MB. This fits better for the purpose of this cache as textures are normaly // large in size. From 4530511ee4dfc92ddbfed7f91978f332be517c90 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Mon, 13 May 2019 21:35:32 -0400 Subject: [PATCH 043/113] texture_cache: Try to Reconstruct Surface on bigger than overlap. This fixes clouds in SMO Cap Kingdom and lens on Cloud Kingdom. Also moved accurate_gpu setting check to Pick Strategy --- src/video_core/texture_cache/texture_cache.h | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 2ad6210dd..38b56475f 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -295,6 +295,9 @@ private: RecycleStrategy PickStrategy(std::vector& overlaps, const SurfaceParams& params, const GPUVAddr gpu_addr, const bool untopological) { + if (Settings::values.use_accurate_gpu_emulation) { + return RecycleStrategy::Flush; + } // 3D Textures decision if (params.block_depth > 1 || params.target == SurfaceTarget::Texture3D) { return RecycleStrategy::Flush; @@ -319,10 +322,7 @@ private: for (auto surface : overlaps) { Unregister(surface); } - RecycleStrategy strategy = !Settings::values.use_accurate_gpu_emulation - ? PickStrategy(overlaps, params, gpu_addr, untopological) - : RecycleStrategy::Flush; - switch (strategy) { + switch (PickStrategy(overlaps, params, gpu_addr, untopological)) { case RecycleStrategy::Ignore: { return InitializeSurface(gpu_addr, params, preserve_contents); } @@ -453,6 +453,13 @@ private: if (overlaps.size() == 1) { TSurface current_surface = overlaps[0]; if (!current_surface->IsInside(gpu_addr, gpu_addr + candidate_size)) { + if (current_surface->GetGpuAddr() == gpu_addr) { + std::optional> view = + ReconstructSurface(overlaps, params, gpu_addr, host_ptr); + if (view.has_value()) { + return *view; + } + } return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, false); } std::optional view = From 6162cb922e67c6c529fb17a91da726fdf3444a50 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Mon, 13 May 2019 22:59:18 -0400 Subject: [PATCH 044/113] texture_cache: Document the most important methods. --- src/video_core/texture_cache/texture_cache.h | 95 ++++++++++++++++++-- 1 file changed, 87 insertions(+), 8 deletions(-) diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 38b56475f..04e9528b8 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -64,6 +64,10 @@ public: } } + /** + * `Guard` guarantees that rendertargets don't unregister themselves if the + * collide. Protection is currently only done on 3D slices. + **/ void Guard(bool new_guard) { guard_cache = new_guard; } @@ -293,6 +297,14 @@ private: BufferCopy = 3, }; + /** + * `PickStrategy` takes care of selecting a proper strategy to deal with a texture recycle. + * @param overlaps, the overlapping surfaces registered in the cache. + * @param params, the paremeters on the new surface. + * @param gpu_addr, the starting address of the new surface. + * @param untopological, tells the recycler that the texture has no way to match the overlaps + * due to topological reasons. + **/ RecycleStrategy PickStrategy(std::vector& overlaps, const SurfaceParams& params, const GPUVAddr gpu_addr, const bool untopological) { if (Settings::values.use_accurate_gpu_emulation) { @@ -315,6 +327,18 @@ private: return RecycleStrategy::Ignore; } + /** + * `RecycleSurface` es a method we use to decide what to do with textures we can't resolve in + *the cache It has 2 implemented strategies: Ignore and Flush. Ignore just unregisters all the + *overlaps and loads the new texture. Flush, flushes all the overlaps into memory and loads the + *new surface from that data. + * @param overlaps, the overlapping surfaces registered in the cache. + * @param params, the paremeters on the new surface. + * @param gpu_addr, the starting address of the new surface. + * @param preserve_contents, tells if the new surface should be loaded from meory or left blank + * @param untopological, tells the recycler that the texture has no way to match the overlaps + * due to topological reasons. + **/ std::pair RecycleSurface(std::vector& overlaps, const SurfaceParams& params, const GPUVAddr gpu_addr, const bool preserve_contents, @@ -343,6 +367,12 @@ private: } } + /** + * `RebuildSurface` this method takes a single surface and recreates into another that + * may differ in format, target or width alingment. + * @param current_surface, the registered surface in the cache which we want to convert. + * @param params, the new surface params which we'll use to recreate the surface. + **/ std::pair RebuildSurface(TSurface current_surface, const SurfaceParams& params) { const auto gpu_addr = current_surface->GetGpuAddr(); @@ -357,6 +387,14 @@ private: return {new_surface, new_surface->GetMainView()}; } + /** + * `ManageStructuralMatch` this method takes a single surface and checks with the new surface's + * params if it's an exact match, we return the main view of the registered surface. If it's + * formats don't match, we rebuild the surface. We call this last method a `Mirage`. If formats + * match but the targets don't, we create an overview View of the registered surface. + * @param current_surface, the registered surface in the cache which we want to convert. + * @param params, the new surface params which we want to check. + **/ std::pair ManageStructuralMatch(TSurface current_surface, const SurfaceParams& params) { const bool is_mirage = !current_surface->MatchFormat(params.pixel_format); @@ -370,10 +408,18 @@ private: return {current_surface, current_surface->EmplaceOverview(params)}; } - std::optional> ReconstructSurface(std::vector& overlaps, - const SurfaceParams& params, - const GPUVAddr gpu_addr, - const u8* host_ptr) { + /** + * `TryReconstructSurface` unlike `RebuildSurface` where we know the registered surface + * matches the candidate in some way, we got no guarantess here. We try to see if the overlaps + * are sublayers/mipmaps of the new surface, if they all match we end up recreating a surface + * for them, else we return nothing. + * @param overlaps, the overlapping surfaces registered in the cache. + * @param params, the paremeters on the new surface. + * @param gpu_addr, the starting address of the new surface. + **/ + std::optional> TryReconstructSurface(std::vector& overlaps, + const SurfaceParams& params, + const GPUVAddr gpu_addr) { if (params.target == SurfaceTarget::Texture3D) { return {}; } @@ -412,12 +458,30 @@ private: return {{new_surface, new_surface->GetMainView()}}; } + /** + * `GetSurface` gets the starting address and parameters of a candidate surface and tries + * to find a matching surface within the cache. This is done in 3 big steps. The first is to + * check the 1st Level Cache in order to find an exact match, if we fail, we move to step 2. + * Step 2 is checking if there are any overlaps at all, if none, we just load the texture from + * memory else we move to step 3. Step 3 consists on figuring the relationship between the + * candidate texture and the overlaps. We divide the scenarios depending if there's 1 or many + * overlaps. If there's many, we just try to reconstruct a new surface out of them based on the + * candidate's parameters, if we fail, we recycle. When there's only 1 overlap then we have to + * check if the candidate is a view (layer/mipmap) of the overlap or if the registered surface + * is a mipmap/layer of the candidate. In this last case we reconstruct a new surface. + * @param gpu_addr, the starting address of the candidate surface. + * @param params, the paremeters on the candidate surface. + * @param preserve_contents, tells if the new surface should be loaded from meory or left blank. + **/ std::pair GetSurface(const GPUVAddr gpu_addr, const SurfaceParams& params, bool preserve_contents) { const auto host_ptr{memory_manager->GetPointer(gpu_addr)}; const auto cache_addr{ToCacheAddr(host_ptr)}; + // Step 1 + // Check Level 1 Cache for a fast structural match. If candidate surface + // matches at certain level we are pretty much done. if (l1_cache.count(cache_addr) > 0) { TSurface current_surface = l1_cache[cache_addr]; if (!current_surface->MatchesTopology(params)) { @@ -437,31 +501,43 @@ private: } } + // Step 2 + // Obtain all possible overlaps in the memory region const std::size_t candidate_size = params.GetGuestSizeInBytes(); auto overlaps{GetSurfacesInRegion(cache_addr, candidate_size)}; + // If none are found, we are done. we just load the surface and create it. if (overlaps.empty()) { return InitializeSurface(gpu_addr, params, preserve_contents); } + // Step 3 + // Now we need to figure the relationship between the texture and its overlaps + // we do a topological test to ensure we can find some relationship. If it fails + // inmediatly recycle the texture for (auto surface : overlaps) { if (!surface->MatchesTopology(params)) { return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, true); } } + // Split cases between 1 overlap or many. if (overlaps.size() == 1) { TSurface current_surface = overlaps[0]; + // First check if the surface is within the overlap. If not, it means + // two things either the candidate surface is a supertexture of the overlap + // or they don't match in any known way. if (!current_surface->IsInside(gpu_addr, gpu_addr + candidate_size)) { if (current_surface->GetGpuAddr() == gpu_addr) { std::optional> view = - ReconstructSurface(overlaps, params, gpu_addr, host_ptr); + TryReconstructSurface(overlaps, params, gpu_addr); if (view.has_value()) { return *view; } } return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, false); } + // Now we check if the candidate is a mipmap/layer of the overlap std::optional view = current_surface->EmplaceView(params, gpu_addr, candidate_size); if (view.has_value()) { @@ -472,15 +548,18 @@ private: } return {current_surface, *view}; } - return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, false); } else { + // If there are many overlaps, odds are they are subtextures of the candidate + // surface. We try to construct a new surface based on the candidate parameters, + // using the overlaps. If a single overlap fails, this will fail. std::optional> view = - ReconstructSurface(overlaps, params, gpu_addr, host_ptr); + TryReconstructSurface(overlaps, params, gpu_addr); if (view.has_value()) { return *view; } - return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, false); } + // We failed all the tests, recycle the overlaps into a new texture. + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, false); } std::pair InitializeSurface(GPUVAddr gpu_addr, const SurfaceParams& params, From d267948a73d2364949660a24d07833ea05c9fcc8 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 14 May 2019 00:55:32 -0400 Subject: [PATCH 045/113] texture_cache: loose TryReconstructSurface when accurate GPU is not on. Also corrects some asserts. --- src/video_core/engines/maxwell_dma.cpp | 2 +- src/video_core/texture_cache/surface_base.cpp | 2 +- src/video_core/texture_cache/texture_cache.h | 20 +++++++++++++++++-- 3 files changed, 20 insertions(+), 4 deletions(-) diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index 3a5dfef0c..afb9578d0 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp @@ -111,7 +111,7 @@ void MaxwellDMA::HandleCopy() { memory_manager.WriteBlock(dest, write_buffer.data(), dst_size); } else { - ASSERT(regs.dst_params.BlockDepth() == 1); + ASSERT(regs.dst_params.BlockDepth() == 0); const u32 src_bytes_per_pixel = regs.src_pitch / regs.x_count; diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp index ceff51043..d4aa2c54b 100644 --- a/src/video_core/texture_cache/surface_base.cpp +++ b/src/video_core/texture_cache/surface_base.cpp @@ -130,7 +130,7 @@ void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager, return; } if (params.is_tiled) { - ASSERT_MSG(params.block_width == 1, "Block width is defined as {}", params.block_width); + ASSERT_MSG(params.block_width == 0, "Block width is defined as {}", params.block_width); for (u32 level = 0; level < params.num_levels; ++level) { const std::size_t host_offset{params.GetHostMipmapLevelOffset(level)}; SwizzleFunc(MortonSwizzleMode::LinearToMorton, host_ptr, params, diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 04e9528b8..85c9160e0 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -425,6 +425,7 @@ private: } bool modified = false; TSurface new_surface = GetUncachedSurface(gpu_addr, params); + u32 passed_tests = 0; for (auto surface : overlaps) { const SurfaceParams& src_params = surface->GetSurfaceParams(); if (src_params.is_layered || src_params.num_levels > 1) { @@ -434,12 +435,12 @@ private: const std::size_t candidate_size = surface->GetSizeInBytes(); auto mipmap_layer{new_surface->GetLayerMipmap(surface->GetGpuAddr())}; if (!mipmap_layer) { - return {}; + continue; } const u32 layer{mipmap_layer->first}; const u32 mipmap{mipmap_layer->second}; if (new_surface->GetMipmapSize(mipmap) != candidate_size) { - return {}; + continue; } modified |= surface->IsModified(); // Now we got all the data set up @@ -448,8 +449,15 @@ private: const CopyParams copy_params(0, 0, 0, 0, 0, layer, 0, mipmap, std::min(src_params.width, dst_width), std::min(src_params.height, dst_height), 1); + passed_tests++; ImageCopy(surface, new_surface, copy_params); } + if (passed_tests == 0) { + return {}; + // In Accurate GPU all test should pass, else we recycle + } else if (Settings::values.use_accurate_gpu_emulation && passed_tests != overlaps.size()) { + return {}; + } for (auto surface : overlaps) { Unregister(surface); } @@ -548,6 +556,14 @@ private: } return {current_surface, *view}; } + // The next case is unsafe, so if we r in accurate GPU, just skip it + if (Settings::values.use_accurate_gpu_emulation) { + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, false); + } + // This is the case the texture is a part of the parent. + if (current_surface->MatchesSubTexture(params, gpu_addr)) { + return RebuildSurface(current_surface, params); + } } else { // If there are many overlaps, odds are they are subtextures of the candidate // surface. We try to construct a new surface based on the candidate parameters, From 4e81fc8296c6204645151bbaa23a7d80827a4293 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 27 Apr 2019 20:50:35 -0300 Subject: [PATCH 046/113] shader: Implement texture buffers --- src/video_core/engines/shader_bytecode.h | 16 +++++++++ src/video_core/shader/decode/texture.cpp | 44 ++++++++++++++++++++++++ src/video_core/shader/shader_ir.h | 2 ++ 3 files changed, 62 insertions(+) diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index ffb3ec3e0..5b32e1249 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -1231,6 +1231,20 @@ union Instruction { } } texs; + union { + BitField<28, 1, u64> is_array; + BitField<29, 2, TextureType> texture_type; + BitField<35, 1, u64> aoffi; + BitField<49, 1, u64> nodep_flag; + BitField<50, 1, u64> ms; // Multisample? + BitField<54, 1, u64> cl; + BitField<55, 1, u64> process_mode; + + TextureProcessMode GetTextureProcessMode() const { + return process_mode == 0 ? TextureProcessMode::LZ : TextureProcessMode::LL; + } + } tld; + union { BitField<49, 1, u64> nodep_flag; BitField<53, 4, u64> texture_info; @@ -1408,6 +1422,7 @@ public: TXQ, // Texture Query TXQ_B, // Texture Query Bindless TEXS, // Texture Fetch with scalar/non-vec4 source/destinations + TLD, // Texture Load TLDS, // Texture Load with scalar/non-vec4 source/destinations TLD4, // Texture Load 4 TLD4S, // Texture Load 4 with scalar / non - vec4 source / destinations @@ -1682,6 +1697,7 @@ private: INST("1101111101001---", Id::TXQ, Type::Texture, "TXQ"), INST("1101111101010---", Id::TXQ_B, Type::Texture, "TXQ_B"), INST("1101-00---------", Id::TEXS, Type::Texture, "TEXS"), + INST("11011100--11----", Id::TLD, Type::Texture, "TLD"), INST("1101101---------", Id::TLDS, Type::Texture, "TLDS"), INST("110010----111---", Id::TLD4, Type::Texture, "TLD4"), INST("1101111100------", Id::TLD4S, Type::Texture, "TLD4S"), diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index 4a356dbd4..b22831c64 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp @@ -245,6 +245,18 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { } break; } + case OpCode::Id::TLD: { + UNIMPLEMENTED_IF_MSG(instr.tld.aoffi, "AOFFI is not implemented"); + UNIMPLEMENTED_IF_MSG(instr.tld.ms, "MS is not implemented"); + UNIMPLEMENTED_IF_MSG(instr.tld.cl, "CL is not implemented"); + + if (instr.tld.nodep_flag) { + LOG_WARNING(HW_GPU, "TLD.NODEP implementation is incomplete"); + } + + WriteTexInstructionFloat(bb, instr, GetTldCode(instr)); + break; + } case OpCode::Id::TLDS: { const Tegra::Shader::TextureType texture_type{instr.tlds.GetTextureType()}; const bool is_array{instr.tlds.IsArrayTexture()}; @@ -575,6 +587,38 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de return values; } +Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) { + const auto texture_type{instr.tld.texture_type}; + const bool is_array{instr.tld.is_array}; + const bool lod_enabled{instr.tld.GetTextureProcessMode() == TextureProcessMode::LL}; + const std::size_t coord_count{GetCoordCount(texture_type)}; + + u64 gpr8_cursor{instr.gpr8.Value()}; + const Node array_register{is_array ? GetRegister(gpr8_cursor++) : nullptr}; + + std::vector coords; + for (std::size_t i = 0; i < coord_count; ++i) { + coords.push_back(GetRegister(gpr8_cursor++)); + } + + u64 gpr20_cursor{instr.gpr20.Value()}; + // const Node bindless_register{is_bindless ? GetRegister(gpr20_cursor++) : nullptr}; + const Node lod{lod_enabled ? GetRegister(gpr20_cursor++) : Immediate(0u)}; + // const Node aoffi_register{is_aoffi ? GetRegister(gpr20_cursor++) : nullptr}; + // const Node multisample{is_multisample ? GetRegister(gpr20_cursor++) : nullptr}; + + const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false); + + Node4 values; + for (u32 element = 0; element < values.size(); ++element) { + auto coords_copy = coords; + MetaTexture meta{sampler, array_register, {}, {}, {}, lod, {}, element}; + values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy)); + } + + return values; +} + Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) { const std::size_t type_coord_count = GetCoordCount(texture_type); const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL; diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index edcf2288e..1b84c0672 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -277,6 +277,8 @@ private: Node4 GetTld4Code(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, bool depth_compare, bool is_array, bool is_aoffi); + Node4 GetTldCode(Tegra::Shader::Instruction instr); + Node4 GetTldsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, bool is_array); From 6c81c8f5b7f80f0f40a69827adb3c1c99e4e5d29 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 27 Apr 2019 21:45:59 -0300 Subject: [PATCH 047/113] gl_shader_decompiler: Allow 1D textures to be texture buffers --- .../renderer_opengl/gl_shader_decompiler.cpp | 42 +++++++++++++++++-- 1 file changed, 38 insertions(+), 4 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 7dc2e0560..ece386cdc 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -453,9 +453,13 @@ private: void DeclareSamplers() { const auto& samplers = ir.GetSamplers(); for (const auto& sampler : samplers) { - std::string sampler_type = [&sampler] { + const std::string name{GetSampler(sampler)}; + const std::string description{"layout (binding = SAMPLER_BINDING_" + + std::to_string(sampler.GetIndex()) + ") uniform "}; + std::string sampler_type = [&]() { switch (sampler.GetType()) { case Tegra::Shader::TextureType::Texture1D: + // Special cased, read below. return "sampler1D"; case Tegra::Shader::TextureType::Texture2D: return "sampler2D"; @@ -475,8 +479,19 @@ private: sampler_type += "Shadow"; } - code.AddLine("layout (binding = SAMPLER_BINDING_{}) uniform {} {};", sampler.GetIndex(), - sampler_type, GetSampler(sampler)); + if (sampler.GetType() == Tegra::Shader::TextureType::Texture1D) { + // 1D textures can be aliased to texture buffers, hide the declarations behind a + // preprocessor flag and use one or the other from the GPU state. This has to be + // done because shaders don't have enough information to determine the texture type. + EmitIfdefIsBuffer(sampler); + code.AddLine(description + "samplerBuffer " + name + ';'); + code.AddLine("#else"); + code.AddLine(description + sampler_type + ' ' + name + ';'); + code.AddLine("#endif"); + } else { + // The other texture types (2D, 3D and cubes) don't have this issue. + code.AddLine(description + sampler_type + ' ' + name + ';'); + } } if (!samplers.empty()) { code.AddNewLine(); @@ -1439,13 +1454,28 @@ private: else if (next < count) expr += ", "; } + + // Store a copy of the expression without the lod to be used with texture buffers + std::string expr_buffer = expr; + if (meta->lod) { expr += ", "; expr += CastOperand(Visit(meta->lod), Type::Int); } expr += ')'; + expr += GetSwizzle(meta->element); - return expr + GetSwizzle(meta->element); + expr_buffer += ')'; + expr_buffer += GetSwizzle(meta->element); + + const std::string tmp{code.GenerateTemporary()}; + EmitIfdefIsBuffer(meta->sampler); + code.AddLine("float " + tmp + " = " + expr_buffer + ';'); + code.AddLine("#else"); + code.AddLine("float " + tmp + " = " + expr + ';'); + code.AddLine("#endif"); + + return tmp; } std::string Branch(Operation operation) { @@ -1756,6 +1786,10 @@ private: return GetDeclarationWithSuffix(static_cast(sampler.GetIndex()), "sampler"); } + void EmitIfdefIsBuffer(const Sampler& sampler) { + code.AddLine(fmt::format("#ifdef SAMPLER_{}_IS_BUFFER", sampler.GetIndex())); + } + std::string GetDeclarationWithSuffix(u32 index, const std::string& name) const { return fmt::format("{}_{}_{}", name, index, suffix); } From b8c75a845b1784045a10fa8b5f1f57f2ec53eeca Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 28 Apr 2019 01:01:22 -0300 Subject: [PATCH 048/113] maxwell_3d: Partially implement texture buffers as 1D textures --- src/video_core/engines/maxwell_3d.cpp | 12 ++++-------- .../renderer_opengl/gl_rasterizer_cache.cpp | 2 +- src/video_core/surface.cpp | 2 ++ src/video_core/textures/texture.h | 18 +++++++++++++++++- 4 files changed, 24 insertions(+), 10 deletions(-) diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 08d553696..8755b8af4 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -430,14 +430,10 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const { Texture::TICEntry tic_entry; memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry)); - ASSERT_MSG(tic_entry.header_version == Texture::TICHeaderVersion::BlockLinear || - tic_entry.header_version == Texture::TICHeaderVersion::Pitch, - "TIC versions other than BlockLinear or Pitch are unimplemented"); - - const auto r_type = tic_entry.r_type.Value(); - const auto g_type = tic_entry.g_type.Value(); - const auto b_type = tic_entry.b_type.Value(); - const auto a_type = tic_entry.a_type.Value(); + const auto r_type{tic_entry.r_type.Value()}; + const auto g_type{tic_entry.g_type.Value()}; + const auto b_type{tic_entry.b_type.Value()}; + const auto a_type{tic_entry.a_type.Value()}; // TODO(Subv): Different data types for separate components are not supported DEBUG_ASSERT(r_type == g_type && r_type == b_type && r_type == a_type); diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index a7681902e..543b36271 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -140,7 +140,7 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only, params.width = Common::AlignUp(config.tic.Width(), GetCompressionFactor(params.pixel_format)); params.height = Common::AlignUp(config.tic.Height(), GetCompressionFactor(params.pixel_format)); - if (!params.is_tiled) { + if (config.tic.IsLineal()) { params.pitch = config.tic.Pitch(); } params.unaligned_height = config.tic.Height(); diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp index 6384fa8d2..56c43af17 100644 --- a/src/video_core/surface.cpp +++ b/src/video_core/surface.cpp @@ -12,6 +12,8 @@ SurfaceTarget SurfaceTargetFromTextureType(Tegra::Texture::TextureType texture_t switch (texture_type) { case Tegra::Texture::TextureType::Texture1D: return SurfaceTarget::Texture1D; + case Tegra::Texture::TextureType::Texture1DBuffer: + return SurfaceTarget::Texture1D; // Fixme case Tegra::Texture::TextureType::Texture2D: case Tegra::Texture::TextureType::Texture2DNoMipmap: return SurfaceTarget::Texture2D; diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h index f22b4e7c7..ddeed73d0 100644 --- a/src/video_core/textures/texture.h +++ b/src/video_core/textures/texture.h @@ -172,12 +172,16 @@ struct TICEntry { BitField<26, 1, u32> use_header_opt_control; BitField<27, 1, u32> depth_texture; BitField<28, 4, u32> max_mip_level; + + BitField<0, 16, u32> buffer_high_width_minus_one; }; union { BitField<0, 16, u32> width_minus_1; BitField<22, 1, u32> srgb_conversion; BitField<23, 4, TextureType> texture_type; BitField<29, 3, u32> border_size; + + BitField<0, 16, u32> buffer_low_width_minus_one; }; union { BitField<0, 16, u32> height_minus_1; @@ -206,7 +210,10 @@ struct TICEntry { } u32 Width() const { - return width_minus_1 + 1; + if (header_version != TICHeaderVersion::OneDBuffer) { + return width_minus_1 + 1; + } + return (buffer_high_width_minus_one << 16) | buffer_low_width_minus_one; } u32 Height() const { @@ -237,6 +244,15 @@ struct TICEntry { header_version == TICHeaderVersion::BlockLinearColorKey; } + bool IsLineal() const { + return header_version == TICHeaderVersion::Pitch || + header_version == TICHeaderVersion::PitchColorKey; + } + + bool IsBuffer() const { + return header_version == TICHeaderVersion::OneDBuffer; + } + bool IsSrgbConversionEnabled() const { return srgb_conversion != 0; } From 07f7ce1da2e86e862b1254a5f543af5ae76d1b43 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 28 Apr 2019 18:03:41 -0300 Subject: [PATCH 049/113] gl_rasterizer_cache: Use texture buffers to emulate texture buffers --- .../renderer_opengl/gl_rasterizer_cache.cpp | 37 ++++++++++++++----- .../renderer_opengl/gl_rasterizer_cache.h | 3 ++ .../renderer_opengl/renderer_opengl.cpp | 1 - src/video_core/surface.cpp | 4 +- src/video_core/surface.h | 1 + 5 files changed, 35 insertions(+), 11 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 543b36271..e27da1fa7 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -149,6 +149,7 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only, switch (params.target) { case SurfaceTarget::Texture1D: + case SurfaceTarget::TextureBuffer: case SurfaceTarget::Texture2D: params.depth = 1; break; @@ -389,6 +390,8 @@ static GLenum SurfaceTargetToGL(SurfaceTarget target) { switch (target) { case SurfaceTarget::Texture1D: return GL_TEXTURE_1D; + case SurfaceTarget::TextureBuffer: + return GL_TEXTURE_BUFFER; case SurfaceTarget::Texture2D: return GL_TEXTURE_2D; case SurfaceTarget::Texture3D: @@ -600,29 +603,35 @@ CachedSurface::CachedSurface(const SurfaceParams& params) switch (params.target) { case SurfaceTarget::Texture1D: - glTextureStorage1D(texture.handle, params.max_mip_level, format_tuple.internal_format, - width); + glTextureStorage1D(texture.handle, params.max_mip_level, gl_internal_format, width); + break; + case SurfaceTarget::TextureBuffer: + texture_buffer.Create(); + glNamedBufferStorage(texture_buffer.handle, + params.width * GetBytesPerPixel(params.pixel_format), nullptr, + GL_DYNAMIC_STORAGE_BIT); + glTextureBuffer(texture.handle, gl_internal_format, texture_buffer.handle); break; case SurfaceTarget::Texture2D: case SurfaceTarget::TextureCubemap: - glTextureStorage2D(texture.handle, params.max_mip_level, format_tuple.internal_format, - width, height); + glTextureStorage2D(texture.handle, params.max_mip_level, gl_internal_format, width, height); break; case SurfaceTarget::Texture3D: case SurfaceTarget::Texture2DArray: case SurfaceTarget::TextureCubeArray: - glTextureStorage3D(texture.handle, params.max_mip_level, format_tuple.internal_format, - width, height, params.depth); + glTextureStorage3D(texture.handle, params.max_mip_level, gl_internal_format, width, height, + params.depth); break; default: LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}", static_cast(params.target)); UNREACHABLE(); - glTextureStorage2D(texture.handle, params.max_mip_level, format_tuple.internal_format, - width, height); + glTextureStorage2D(texture.handle, params.max_mip_level, gl_internal_format, width, height); } - ApplyTextureDefaults(texture.handle, params.max_mip_level); + if (params.target != SurfaceTarget::TextureBuffer) { + ApplyTextureDefaults(texture.handle, params.max_mip_level); + } OpenGL::LabelGLObject(GL_TEXTURE, texture.handle, params.gpu_addr, params.IdentityString()); } @@ -785,6 +794,13 @@ void CachedSurface::UploadGLMipmapTexture(RasterizerTemporaryMemory& res_cache_t glTextureSubImage1D(texture.handle, mip_map, x0, static_cast(rect.GetWidth()), tuple.format, tuple.type, &gl_buffer[mip_map][buffer_offset]); break; + case SurfaceTarget::TextureBuffer: + ASSERT(mip_map == 0); + glNamedBufferSubData(texture_buffer.handle, x0, + static_cast(rect.GetWidth()) * + GetBytesPerPixel(params.pixel_format), + &gl_buffer[mip_map][buffer_offset]); + break; case SurfaceTarget::Texture2D: glTextureSubImage2D(texture.handle, mip_map, x0, y0, static_cast(rect.GetWidth()), @@ -860,6 +876,9 @@ void CachedSurface::UpdateSwizzle(Tegra::Texture::SwizzleSource swizzle_x, Tegra::Texture::SwizzleSource swizzle_y, Tegra::Texture::SwizzleSource swizzle_z, Tegra::Texture::SwizzleSource swizzle_w) { + if (params.target == SurfaceTarget::TextureBuffer) { + return; + } const GLenum new_x = MaxwellToGL::SwizzleSource(swizzle_x); const GLenum new_y = MaxwellToGL::SwizzleSource(swizzle_y); const GLenum new_z = MaxwellToGL::SwizzleSource(swizzle_z); diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 6263ef3e7..bbab79575 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -250,6 +250,8 @@ struct SurfaceParams { switch (target) { case SurfaceTarget::Texture1D: return "1D"; + case SurfaceTarget::TextureBuffer: + return "Buffer"; case SurfaceTarget::Texture2D: return "2D"; case SurfaceTarget::Texture3D: @@ -439,6 +441,7 @@ private: OGLTexture texture; OGLTexture discrepant_view; + OGLBuffer texture_buffer; SurfaceParams params{}; GLenum gl_target{}; GLenum gl_internal_format{}; diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index aafd6f31b..b142521ec 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -471,7 +471,6 @@ static void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum } } -/// Initialize the renderer bool RendererOpenGL::Init() { Core::Frontend::ScopeAcquireWindowContext acquire_context{render_window}; diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp index 56c43af17..52a79e4a7 100644 --- a/src/video_core/surface.cpp +++ b/src/video_core/surface.cpp @@ -13,7 +13,7 @@ SurfaceTarget SurfaceTargetFromTextureType(Tegra::Texture::TextureType texture_t case Tegra::Texture::TextureType::Texture1D: return SurfaceTarget::Texture1D; case Tegra::Texture::TextureType::Texture1DBuffer: - return SurfaceTarget::Texture1D; // Fixme + return SurfaceTarget::TextureBuffer; case Tegra::Texture::TextureType::Texture2D: case Tegra::Texture::TextureType::Texture2DNoMipmap: return SurfaceTarget::Texture2D; @@ -37,6 +37,7 @@ SurfaceTarget SurfaceTargetFromTextureType(Tegra::Texture::TextureType texture_t bool SurfaceTargetIsLayered(SurfaceTarget target) { switch (target) { case SurfaceTarget::Texture1D: + case SurfaceTarget::TextureBuffer: case SurfaceTarget::Texture2D: case SurfaceTarget::Texture3D: return false; @@ -55,6 +56,7 @@ bool SurfaceTargetIsLayered(SurfaceTarget target) { bool SurfaceTargetIsArray(SurfaceTarget target) { switch (target) { case SurfaceTarget::Texture1D: + case SurfaceTarget::TextureBuffer: case SurfaceTarget::Texture2D: case SurfaceTarget::Texture3D: case SurfaceTarget::TextureCubemap: diff --git a/src/video_core/surface.h b/src/video_core/surface.h index 5d49214e5..959504cd3 100644 --- a/src/video_core/surface.h +++ b/src/video_core/surface.h @@ -114,6 +114,7 @@ enum class SurfaceType { enum class SurfaceTarget { Texture1D, + TextureBuffer, Texture2D, Texture3D, Texture1DArray, From 58c0d374227c9607e8ddcc9f2bff78a1fb86f440 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 28 Apr 2019 18:12:28 -0300 Subject: [PATCH 050/113] video_core: Make ARB_buffer_storage a required extension --- src/video_core/renderer_opengl/gl_rasterizer.cpp | 5 ----- src/video_core/renderer_opengl/gl_stream_buffer.cpp | 5 +++-- src/video_core/renderer_opengl/gl_stream_buffer.h | 3 ++- src/yuzu/main.cpp | 3 +++ src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp | 4 ++++ 5 files changed, 12 insertions(+), 8 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index afacc3fbd..56f2d2972 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -105,11 +105,6 @@ void RasterizerOpenGL::CheckExtensions() { Render_OpenGL, "Anisotropic filter is not supported! This can cause graphical issues in some games."); } - if (!GLAD_GL_ARB_buffer_storage) { - LOG_WARNING( - Render_OpenGL, - "Buffer storage control is not supported! This can cause performance degradation."); - } } GLuint RasterizerOpenGL::SetupVertexFormat() { diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp index d0b14b3f6..35ba334e4 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp +++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp @@ -15,7 +15,8 @@ MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning", namespace OpenGL { -OGLStreamBuffer::OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool prefer_coherent) +OGLStreamBuffer::OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool prefer_coherent, + bool use_persistent) : buffer_size(size) { gl_buffer.Create(); @@ -29,7 +30,7 @@ OGLStreamBuffer::OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool p allocate_size *= 2; } - if (GLAD_GL_ARB_buffer_storage) { + if (use_persistent) { persistent = true; coherent = prefer_coherent; const GLbitfield flags = diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.h b/src/video_core/renderer_opengl/gl_stream_buffer.h index 3d18ecb4d..f8383cbd4 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.h +++ b/src/video_core/renderer_opengl/gl_stream_buffer.h @@ -13,7 +13,8 @@ namespace OpenGL { class OGLStreamBuffer : private NonCopyable { public: - explicit OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool prefer_coherent = false); + explicit OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool prefer_coherent = false, + bool use_persistent = true); ~OGLStreamBuffer(); GLuint GetHandle() const; diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp index 66a7080c9..443fec249 100644 --- a/src/yuzu/main.cpp +++ b/src/yuzu/main.cpp @@ -750,6 +750,9 @@ void GMainWindow::OnDisplayTitleBars(bool show) { QStringList GMainWindow::GetUnsupportedGLExtensions() { QStringList unsupported_ext; + if (!GLAD_GL_ARB_buffer_storage) { + unsupported_ext.append(QStringLiteral("ARB_buffer_storage")); + } if (!GLAD_GL_ARB_direct_state_access) { unsupported_ext.append(QStringLiteral("ARB_direct_state_access")); } diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp b/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp index e2d3df180..f91b071bf 100644 --- a/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp +++ b/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp @@ -52,6 +52,10 @@ private: bool EmuWindow_SDL2_GL::SupportsRequiredGLExtensions() { std::vector unsupported_ext; + if (!GLAD_GL_ARB_buffer_storage) + unsupported_ext.push_back("ARB_buffer_storage"); + if (!GLAD_GL_ARB_direct_state_access) + unsupported_ext.push_back("ARB_direct_state_access"); if (!GLAD_GL_ARB_vertex_type_10f_11f_11f_rev) unsupported_ext.push_back("ARB_vertex_type_10f_11f_11f_rev"); if (!GLAD_GL_ARB_texture_mirror_clamp_to_edge) From 007ffbef1c3bb6ae5fb85d24754a60d4eea87e45 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 28 Apr 2019 21:08:31 -0300 Subject: [PATCH 051/113] gl_rasterizer: Track texture buffer usage --- .../renderer_opengl/gl_rasterizer.cpp | 34 ++++---- .../renderer_opengl/gl_rasterizer.h | 7 +- .../renderer_opengl/gl_shader_cache.cpp | 78 +++++++++++-------- .../renderer_opengl/gl_shader_cache.h | 17 ++-- .../renderer_opengl/gl_shader_disk_cache.cpp | 4 +- .../renderer_opengl/gl_shader_disk_cache.h | 53 +++++++++---- 6 files changed, 119 insertions(+), 74 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 56f2d2972..4f7eeb22c 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -29,8 +29,10 @@ namespace OpenGL { using Maxwell = Tegra::Engines::Maxwell3D::Regs; -using PixelFormat = VideoCore::Surface::PixelFormat; -using SurfaceType = VideoCore::Surface::SurfaceType; + +using VideoCore::Surface::PixelFormat; +using VideoCore::Surface::SurfaceTarget; +using VideoCore::Surface::SurfaceType; MICROPROFILE_DEFINE(OpenGL_VAO, "OpenGL", "Vertex Format Setup", MP_RGB(128, 128, 192)); MICROPROFILE_DEFINE(OpenGL_VB, "OpenGL", "Vertex Buffer Setup", MP_RGB(128, 128, 192)); @@ -281,8 +283,14 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { static_cast(sizeof(ubo))); Shader shader{shader_cache.GetStageProgram(program)}; - const auto [program_handle, next_bindings] = - shader->GetProgramHandle(primitive_mode, base_bindings); + + const auto stage_enum{static_cast(stage)}; + SetupDrawConstBuffers(stage_enum, shader); + SetupGlobalRegions(stage_enum, shader); + const auto texture_buffer_usage{SetupTextures(stage_enum, shader, base_bindings)}; + + const ProgramVariant variant{base_bindings, primitive_mode, texture_buffer_usage}; + const auto [program_handle, next_bindings] = shader->GetProgramHandle(variant); switch (program) { case Maxwell::ShaderProgram::VertexA: @@ -300,11 +308,6 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { shader_config.enable.Value(), shader_config.offset); } - const auto stage_enum = static_cast(stage); - SetupDrawConstBuffers(stage_enum, shader); - SetupGlobalRegions(stage_enum, shader); - SetupTextures(stage_enum, shader, base_bindings); - // Workaround for Intel drivers. // When a clip distance is enabled but not set in the shader it crops parts of the screen // (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the @@ -791,8 +794,8 @@ void RasterizerOpenGL::SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::Shade } } -void RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& shader, - BaseBindings base_bindings) { +TextureBufferUsage RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& shader, + BaseBindings base_bindings) { MICROPROFILE_SCOPE(OpenGL_Texture); const auto& gpu = system.GPU(); const auto& maxwell3d = gpu.Maxwell3D(); @@ -801,6 +804,8 @@ void RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& s ASSERT_MSG(base_bindings.sampler + entries.size() <= std::size(state.texture_units), "Exceeded the number of active textures."); + TextureBufferUsage texture_buffer_usage{0}; + for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { const auto& entry = entries[bindpoint]; Tegra::Texture::FullTextureInfo texture; @@ -814,7 +819,8 @@ void RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& s } const u32 current_bindpoint = base_bindings.sampler + bindpoint; - state.texture_units[current_bindpoint].sampler = sampler_cache.GetSampler(texture.tsc); + auto& unit{state.texture_units[current_bindpoint]}; + unit.sampler = sampler_cache.GetSampler(texture.tsc); if (const auto view{texture_cache.GetTextureSurface(texture, entry)}; view) { view->ApplySwizzle(texture.tic.x_source, texture.tic.y_source, texture.tic.z_source, @@ -822,9 +828,11 @@ void RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& s state.texture_units[current_bindpoint].texture = view->GetTexture(); } else { // Can occur when texture addr is null or its memory is unmapped/invalid - state.texture_units[current_bindpoint].texture = 0; + unit.texture = 0; } } + + return texture_buffer_usage; } void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) { diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 2f13d9758..64c27660f 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -126,9 +126,10 @@ private: void SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, const Shader& shader); - /// Configures the current textures to use for the draw command. - void SetupTextures(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, const Shader& shader, - BaseBindings base_bindings); + /// Configures the current textures to use for the draw command. Returns shaders texture buffer + /// usage. + TextureBufferUsage SetupTextures(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, + const Shader& shader, BaseBindings base_bindings); /// Syncs the viewport and depth range to match the guest state void SyncViewport(OpenGLState& current_state); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index ac8a9e6b7..e859a900c 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -168,8 +168,12 @@ GLShader::ProgramResult CreateProgram(const Device& device, Maxwell::ShaderProgr } CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEntries& entries, - Maxwell::ShaderProgram program_type, BaseBindings base_bindings, - GLenum primitive_mode, bool hint_retrievable = false) { + Maxwell::ShaderProgram program_type, const ProgramVariant& variant, + bool hint_retrievable = false) { + auto base_bindings{variant.base_bindings}; + const auto primitive_mode{variant.primitive_mode}; + const auto texture_buffer_usage{variant.texture_buffer_usage}; + std::string source = "#version 430 core\n" "#extension GL_ARB_separate_shader_objects : enable\n\n"; source += fmt::format("#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++); @@ -187,6 +191,14 @@ CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEn base_bindings.sampler++); } + // Transform 1D textures to texture samplers by declaring its preprocessor macros. + for (std::size_t i = 0; i < texture_buffer_usage.size(); ++i) { + if (!texture_buffer_usage.test(i)) { + continue; + } + source += fmt::format("#define SAMPLER_{}_IS_BUFFER", i); + } + if (program_type == Maxwell::ShaderProgram::Geometry) { const auto [glsl_topology, debug_name, max_vertices] = GetPrimitiveDescription(primitive_mode); @@ -261,20 +273,18 @@ CachedShader::CachedShader(VAddr cpu_addr, u64 unique_identifier, shader_length = entries.shader_length; } -std::tuple CachedShader::GetProgramHandle(GLenum primitive_mode, - BaseBindings base_bindings) { +std::tuple CachedShader::GetProgramHandle(const ProgramVariant& variant) { GLuint handle{}; if (program_type == Maxwell::ShaderProgram::Geometry) { - handle = GetGeometryShader(primitive_mode, base_bindings); + handle = GetGeometryShader(variant); } else { - const auto [entry, is_cache_miss] = programs.try_emplace(base_bindings); + const auto [entry, is_cache_miss] = programs.try_emplace(variant); auto& program = entry->second; if (is_cache_miss) { - program = TryLoadProgram(primitive_mode, base_bindings); + program = TryLoadProgram(variant); if (!program) { - program = - SpecializeShader(code, entries, program_type, base_bindings, primitive_mode); - disk_cache.SaveUsage(GetUsage(primitive_mode, base_bindings)); + program = SpecializeShader(code, entries, program_type, variant); + disk_cache.SaveUsage(GetUsage(variant)); } LabelGLObject(GL_PROGRAM, program->handle, cpu_addr); @@ -283,6 +293,7 @@ std::tuple CachedShader::GetProgramHandle(GLenum primitive handle = program->handle; } + auto base_bindings{variant.base_bindings}; base_bindings.cbuf += static_cast(entries.const_buffers.size()) + RESERVED_UBOS; base_bindings.gmem += static_cast(entries.global_memory_entries.size()); base_bindings.sampler += static_cast(entries.samplers.size()); @@ -290,43 +301,42 @@ std::tuple CachedShader::GetProgramHandle(GLenum primitive return {handle, base_bindings}; } -GLuint CachedShader::GetGeometryShader(GLenum primitive_mode, BaseBindings base_bindings) { - const auto [entry, is_cache_miss] = geometry_programs.try_emplace(base_bindings); +GLuint CachedShader::GetGeometryShader(const ProgramVariant& variant) { + const auto [entry, is_cache_miss] = geometry_programs.try_emplace(variant); auto& programs = entry->second; - switch (primitive_mode) { + switch (variant.primitive_mode) { case GL_POINTS: - return LazyGeometryProgram(programs.points, base_bindings, primitive_mode); + return LazyGeometryProgram(programs.points, variant); case GL_LINES: case GL_LINE_STRIP: - return LazyGeometryProgram(programs.lines, base_bindings, primitive_mode); + return LazyGeometryProgram(programs.lines, variant); case GL_LINES_ADJACENCY: case GL_LINE_STRIP_ADJACENCY: - return LazyGeometryProgram(programs.lines_adjacency, base_bindings, primitive_mode); + return LazyGeometryProgram(programs.lines_adjacency, variant); case GL_TRIANGLES: case GL_TRIANGLE_STRIP: case GL_TRIANGLE_FAN: - return LazyGeometryProgram(programs.triangles, base_bindings, primitive_mode); + return LazyGeometryProgram(programs.triangles, variant); case GL_TRIANGLES_ADJACENCY: case GL_TRIANGLE_STRIP_ADJACENCY: - return LazyGeometryProgram(programs.triangles_adjacency, base_bindings, primitive_mode); + return LazyGeometryProgram(programs.triangles_adjacency, variant); default: UNREACHABLE_MSG("Unknown primitive mode."); - return LazyGeometryProgram(programs.points, base_bindings, primitive_mode); + return LazyGeometryProgram(programs.points, variant); } } -GLuint CachedShader::LazyGeometryProgram(CachedProgram& target_program, BaseBindings base_bindings, - GLenum primitive_mode) { +GLuint CachedShader::LazyGeometryProgram(CachedProgram& target_program, + const ProgramVariant& variant) { if (target_program) { return target_program->handle; } - const auto [glsl_name, debug_name, vertices] = GetPrimitiveDescription(primitive_mode); - target_program = TryLoadProgram(primitive_mode, base_bindings); + const auto [glsl_name, debug_name, vertices] = GetPrimitiveDescription(variant.primitive_mode); + target_program = TryLoadProgram(variant); if (!target_program) { - target_program = - SpecializeShader(code, entries, program_type, base_bindings, primitive_mode); - disk_cache.SaveUsage(GetUsage(primitive_mode, base_bindings)); + target_program = SpecializeShader(code, entries, program_type, variant); + disk_cache.SaveUsage(GetUsage(variant)); } LabelGLObject(GL_PROGRAM, target_program->handle, cpu_addr, debug_name); @@ -334,18 +344,19 @@ GLuint CachedShader::LazyGeometryProgram(CachedProgram& target_program, BaseBind return target_program->handle; }; -CachedProgram CachedShader::TryLoadProgram(GLenum primitive_mode, - BaseBindings base_bindings) const { - const auto found = precompiled_programs.find(GetUsage(primitive_mode, base_bindings)); +CachedProgram CachedShader::TryLoadProgram(const ProgramVariant& variant) const { + const auto found = precompiled_programs.find(GetUsage(variant)); if (found == precompiled_programs.end()) { return {}; } return found->second; } -ShaderDiskCacheUsage CachedShader::GetUsage(GLenum primitive_mode, - BaseBindings base_bindings) const { - return {unique_identifier, base_bindings, primitive_mode}; +ShaderDiskCacheUsage CachedShader::GetUsage(const ProgramVariant& variant) const { + ShaderDiskCacheUsage usage; + usage.unique_identifier = unique_identifier; + usage.variant = variant; + return usage; } ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system, @@ -411,8 +422,7 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, } if (!shader) { shader = SpecializeShader(unspecialized.code, unspecialized.entries, - unspecialized.program_type, usage.bindings, - usage.primitive, true); + unspecialized.program_type, usage.variant, true); } std::scoped_lock lock(mutex); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index 09bd0761d..59bcb14e8 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -6,6 +6,7 @@ #include #include +#include #include #include #include @@ -67,8 +68,7 @@ public: } /// Gets the GL program handle for the shader - std::tuple GetProgramHandle(GLenum primitive_mode, - BaseBindings base_bindings); + std::tuple GetProgramHandle(const ProgramVariant& variant); private: // Geometry programs. These are needed because GLSL needs an input topology but it's not @@ -82,15 +82,14 @@ private: CachedProgram triangles_adjacency; }; - GLuint GetGeometryShader(GLenum primitive_mode, BaseBindings base_bindings); + GLuint GetGeometryShader(const ProgramVariant& variant); /// Generates a geometry shader or returns one that already exists. - GLuint LazyGeometryProgram(CachedProgram& target_program, BaseBindings base_bindings, - GLenum primitive_mode); + GLuint LazyGeometryProgram(CachedProgram& target_program, const ProgramVariant& variant); - CachedProgram TryLoadProgram(GLenum primitive_mode, BaseBindings base_bindings) const; + CachedProgram TryLoadProgram(const ProgramVariant& variant) const; - ShaderDiskCacheUsage GetUsage(GLenum primitive_mode, BaseBindings base_bindings) const; + ShaderDiskCacheUsage GetUsage(const ProgramVariant& variant) const; u8* host_ptr{}; VAddr cpu_addr{}; @@ -104,8 +103,8 @@ private: std::string code; - std::unordered_map programs; - std::unordered_map geometry_programs; + std::unordered_map programs; + std::unordered_map geometry_programs; std::unordered_map cbuf_resource_cache; std::unordered_map gmem_resource_cache; diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index ee4a45ca2..d338ece8e 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp @@ -34,11 +34,11 @@ enum class PrecompiledEntryKind : u32 { Dump, }; -constexpr u32 NativeVersion = 1; +constexpr u32 NativeVersion = 2; // Making sure sizes doesn't change by accident static_assert(sizeof(BaseBindings) == 12); -static_assert(sizeof(ShaderDiskCacheUsage) == 24); +static_assert(sizeof(ShaderDiskCacheUsage) == 32); namespace { diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h index ecd72ba58..7c9f0cc75 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h @@ -30,15 +30,17 @@ class IOFile; namespace OpenGL { -using ProgramCode = std::vector; -using Maxwell = Tegra::Engines::Maxwell3D::Regs; - struct ShaderDiskCacheUsage; struct ShaderDiskCacheDump; using ShaderDumpsMap = std::unordered_map; -/// Allocated bindings used by an OpenGL shader program +using ProgramCode = std::vector; +using Maxwell = Tegra::Engines::Maxwell3D::Regs; + +using TextureBufferUsage = std::bitset<64>; + +/// Allocated bindings used by an OpenGL shader program. struct BaseBindings { u32 cbuf{}; u32 gmem{}; @@ -53,15 +55,29 @@ struct BaseBindings { } }; -/// Describes how a shader is used +/// Describes the different variants a single program can be compiled. +struct ProgramVariant { + BaseBindings base_bindings; + GLenum primitive_mode{}; + TextureBufferUsage texture_buffer_usage{}; + + bool operator==(const ProgramVariant& rhs) const { + return std::tie(base_bindings, primitive_mode, texture_buffer_usage) == + std::tie(rhs.base_bindings, rhs.primitive_mode, rhs.texture_buffer_usage); + } + + bool operator!=(const ProgramVariant& rhs) const { + return !operator==(rhs); + } +}; + +/// Describes how a shader is used. struct ShaderDiskCacheUsage { u64 unique_identifier{}; - BaseBindings bindings; - GLenum primitive{}; + ProgramVariant variant; bool operator==(const ShaderDiskCacheUsage& rhs) const { - return std::tie(unique_identifier, bindings, primitive) == - std::tie(rhs.unique_identifier, rhs.bindings, rhs.primitive); + return std::tie(unique_identifier, variant) == std::tie(rhs.unique_identifier, rhs.variant); } bool operator!=(const ShaderDiskCacheUsage& rhs) const { @@ -80,11 +96,20 @@ struct hash { } }; +template <> +struct hash { + std::size_t operator()(const OpenGL::ProgramVariant& variant) const { + return std::hash()(variant.base_bindings) ^ + std::hash()(variant.texture_buffer_usage) ^ + (static_cast(variant.primitive_mode) << 6); + } +}; + template <> struct hash { std::size_t operator()(const OpenGL::ShaderDiskCacheUsage& usage) const noexcept { return static_cast(usage.unique_identifier) ^ - std::hash()(usage.bindings) ^ usage.primitive << 16; + std::hash()(usage.variant); } }; @@ -288,13 +313,15 @@ private: // Core system Core::System& system; - // Stored transferable shaders - std::map> transferable; - // Stores whole precompiled cache which will be read from/saved to the precompiled cache file + // Stores whole precompiled cache which will be read from or saved to the precompiled chache + // file FileSys::VectorVfsFile precompiled_cache_virtual_file; // Stores the current offset of the precompiled cache file for IO purposes std::size_t precompiled_cache_virtual_file_offset = 0; + // Stored transferable shaders + std::unordered_map> transferable; + // The cache has been loaded at boot bool tried_to_load{}; }; From 06c4ce86458310870abec90ada68ac393256b9b6 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 27 Apr 2019 02:07:18 -0300 Subject: [PATCH 052/113] shader: Decode SUST and implement backing image functionality --- CMakeModules/GenerateSCMRev.cmake | 1 + src/common/CMakeLists.txt | 1 + src/video_core/CMakeLists.txt | 1 + src/video_core/engines/shader_bytecode.h | 66 +++++++++++++- .../renderer_opengl/gl_shader_decompiler.cpp | 70 +++++++++++++++ .../renderer_vulkan/vk_shader_decompiler.cpp | 7 ++ src/video_core/shader/decode.cpp | 1 + src/video_core/shader/decode/image.cpp | 89 +++++++++++++++++++ src/video_core/shader/node.h | 42 ++++++++- src/video_core/shader/shader_ir.h | 9 ++ 10 files changed, 284 insertions(+), 3 deletions(-) create mode 100644 src/video_core/shader/decode/image.cpp diff --git a/CMakeModules/GenerateSCMRev.cmake b/CMakeModules/GenerateSCMRev.cmake index 31edeb63d..dd65cfe42 100644 --- a/CMakeModules/GenerateSCMRev.cmake +++ b/CMakeModules/GenerateSCMRev.cmake @@ -70,6 +70,7 @@ set(HASH_FILES "${VIDEO_CORE}/shader/decode/half_set.cpp" "${VIDEO_CORE}/shader/decode/half_set_predicate.cpp" "${VIDEO_CORE}/shader/decode/hfma2.cpp" + "${VIDEO_CORE}/shader/decode/image.cpp" "${VIDEO_CORE}/shader/decode/integer_set.cpp" "${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp" "${VIDEO_CORE}/shader/decode/memory.cpp" diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 198b3fe07..8ae05137b 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -44,6 +44,7 @@ add_custom_command(OUTPUT scm_rev.cpp "${VIDEO_CORE}/shader/decode/half_set.cpp" "${VIDEO_CORE}/shader/decode/half_set_predicate.cpp" "${VIDEO_CORE}/shader/decode/hfma2.cpp" + "${VIDEO_CORE}/shader/decode/image.cpp" "${VIDEO_CORE}/shader/decode/integer_set.cpp" "${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp" "${VIDEO_CORE}/shader/decode/memory.cpp" diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 9d43f03d2..6839abe71 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -90,6 +90,7 @@ add_library(video_core STATIC shader/decode/conversion.cpp shader/decode/memory.cpp shader/decode/texture.cpp + shader/decode/image.cpp shader/decode/float_set_predicate.cpp shader/decode/integer_set_predicate.cpp shader/decode/half_set_predicate.cpp diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 5b32e1249..54a1a04f9 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -126,6 +126,15 @@ union Sampler { u64 value{}; }; +union Image { + Image() = default; + + constexpr explicit Image(u64 value) : value{value} {} + + BitField<36, 13, u64> index; + u64 value; +}; + } // namespace Tegra::Shader namespace std { @@ -344,6 +353,26 @@ enum class TextureMiscMode : u64 { PTP, }; +enum class SurfaceDataMode : u64 { + P = 0, + D_BA = 1, +}; + +enum class OutOfBoundsStore : u64 { + Ignore = 0, + Clamp = 1, + Trap = 2, +}; + +enum class ImageType : u64 { + Texture1D = 0, + TextureBuffer = 1, + Texture1DArray = 2, + Texture2D = 3, + Texture2DArray = 4, + Texture3D = 5, +}; + enum class IsberdMode : u64 { None = 0, Patch = 1, @@ -398,7 +427,7 @@ enum class LmemLoadCacheManagement : u64 { CV = 3, }; -enum class LmemStoreCacheManagement : u64 { +enum class StoreCacheManagement : u64 { Default = 0, CG = 1, CS = 2, @@ -811,7 +840,7 @@ union Instruction { } ld_l; union { - BitField<44, 2, LmemStoreCacheManagement> cache_management; + BitField<44, 2, StoreCacheManagement> cache_management; } st_l; union { @@ -1294,6 +1323,35 @@ union Instruction { } } tlds; + union { + BitField<24, 2, StoreCacheManagement> cache_management; + BitField<33, 3, ImageType> image_type; + BitField<49, 2, OutOfBoundsStore> out_of_bounds_store; + BitField<51, 1, u64> is_immediate; + BitField<52, 1, SurfaceDataMode> mode; + + BitField<20, 3, StoreType> store_data_layout; + BitField<20, 4, u64> component_mask_selector; + + bool IsComponentEnabled(std::size_t component) const { + ASSERT(mode == SurfaceDataMode::P); + constexpr u8 R = 0b0001; + constexpr u8 G = 0b0010; + constexpr u8 B = 0b0100; + constexpr u8 A = 0b1000; + constexpr std::array mask = { + 0, (R), (G), (R | G), (B), (R | B), + (G | B), (R | G | B), (A), (R | A), (G | A), (R | G | A), + (B | A), (R | B | A), (G | B | A), (R | G | B | A)}; + return std::bitset<4>{mask.at(component_mask_selector)}.test(component); + } + + StoreType GetStoreDataLayout() const { + ASSERT(mode == SurfaceDataMode::D_BA); + return store_data_layout; + } + } sust; + union { BitField<20, 24, u64> target; BitField<5, 1, u64> constant_buffer; @@ -1385,6 +1443,7 @@ union Instruction { Attribute attribute; Sampler sampler; + Image image; u64 value; }; @@ -1428,6 +1487,7 @@ public: TLD4S, // Texture Load 4 with scalar / non - vec4 source / destinations TMML_B, // Texture Mip Map Level TMML, // Texture Mip Map Level + SUST, // Surface Store EXIT, IPA, OUT_R, // Emit vertex/primitive @@ -1558,6 +1618,7 @@ public: Synch, Memory, Texture, + Image, FloatSet, FloatSetPredicate, IntegerSet, @@ -1703,6 +1764,7 @@ private: INST("1101111100------", Id::TLD4S, Type::Texture, "TLD4S"), INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"), INST("1101111101011---", Id::TMML, Type::Texture, "TMML"), + INST("11101011001-----", Id::SUST, Type::Image, "SUST"), INST("11100000--------", Id::IPA, Type::Trivial, "IPA"), INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"), INST("1110111111010---", Id::ISBERD, Type::Trivial, "ISBERD"), diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index ece386cdc..2ae2f1db2 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -180,6 +180,7 @@ public: DeclareGlobalMemory(); DeclareSamplers(); DeclarePhysicalAttributeReader(); + DeclareImages(); code.AddLine("void execute_{}() {{", suffix); ++code.scope; @@ -531,6 +532,36 @@ private: code.AddNewLine(); } + void DeclareImages() { + const auto& images{ir.GetImages()}; + for (const auto& image : images) { + const std::string image_type = [&]() { + switch (image.GetType()) { + case Tegra::Shader::ImageType::Texture1D: + return "image1D"; + case Tegra::Shader::ImageType::TextureBuffer: + return "bufferImage"; + case Tegra::Shader::ImageType::Texture1DArray: + return "image1DArray"; + case Tegra::Shader::ImageType::Texture2D: + return "image2D"; + case Tegra::Shader::ImageType::Texture2DArray: + return "image2DArray"; + case Tegra::Shader::ImageType::Texture3D: + return "image3D"; + default: + UNREACHABLE(); + return "image1D"; + } + }(); + code.AddLine("layout (binding = IMAGE_BINDING_" + std::to_string(image.GetIndex()) + + ") coherent volatile writeonly uniform " + image_type + ' ' + + GetImage(image) + ';'); + } + if (!images.empty()) + code.AddNewLine(); + } + void VisitBlock(const NodeBlock& bb) { for (const auto& node : bb) { if (const std::string expr = Visit(node); !expr.empty()) { @@ -1478,6 +1509,39 @@ private: return tmp; } + std::string ImageStore(Operation operation) { + constexpr std::array constructors{"int(", "ivec2(", "ivec3(", "ivec4("}; + const auto meta{std::get(operation.GetMeta())}; + + std::string expr = "imageStore("; + expr += GetImage(meta.image); + expr += ", "; + + const std::size_t coords_count{operation.GetOperandsCount()}; + expr += constructors.at(coords_count - 1); + for (std::size_t i = 0; i < coords_count; ++i) { + expr += VisitOperand(operation, i, Type::Int); + if (i + 1 < coords_count) { + expr += ", "; + } + } + expr += "), "; + + const std::size_t values_count{meta.values.size()}; + UNIMPLEMENTED_IF(values_count != 4); + expr += "vec4("; + for (std::size_t i = 0; i < values_count; ++i) { + expr += Visit(meta.values.at(i)); + if (i + 1 < values_count) { + expr += ", "; + } + } + expr += "));"; + + code.AddLine(expr); + return {}; + } + std::string Branch(Operation operation) { const auto target = std::get_if(&*operation[0]); UNIMPLEMENTED_IF(!target); @@ -1718,6 +1782,8 @@ private: &GLSLDecompiler::TextureQueryLod, &GLSLDecompiler::TexelFetch, + &GLSLDecompiler::ImageStore, + &GLSLDecompiler::Branch, &GLSLDecompiler::PushFlowStack, &GLSLDecompiler::PopFlowStack, @@ -1786,6 +1852,10 @@ private: return GetDeclarationWithSuffix(static_cast(sampler.GetIndex()), "sampler"); } + std::string GetImage(const Image& image) const { + return GetDeclarationWithSuffix(static_cast(image.GetIndex()), "image"); + } + void EmitIfdefIsBuffer(const Sampler& sampler) { code.AddLine(fmt::format("#ifdef SAMPLER_{}_IS_BUFFER", sampler.GetIndex())); } diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 33ad9764a..97ce214b1 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -935,6 +935,11 @@ private: return {}; } + Id ImageStore(Operation operation) { + UNIMPLEMENTED(); + return {}; + } + Id Branch(Operation operation) { const auto target = std::get_if(&*operation[0]); UNIMPLEMENTED_IF(!target); @@ -1326,6 +1331,8 @@ private: &SPIRVDecompiler::TextureQueryLod, &SPIRVDecompiler::TexelFetch, + &SPIRVDecompiler::ImageStore, + &SPIRVDecompiler::Branch, &SPIRVDecompiler::PushFlowStack, &SPIRVDecompiler::PopFlowStack, diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index a0554c97e..2c9ff28f2 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp @@ -169,6 +169,7 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) { {OpCode::Type::Conversion, &ShaderIR::DecodeConversion}, {OpCode::Type::Memory, &ShaderIR::DecodeMemory}, {OpCode::Type::Texture, &ShaderIR::DecodeTexture}, + {OpCode::Type::Image, &ShaderIR::DecodeImage}, {OpCode::Type::FloatSetPredicate, &ShaderIR::DecodeFloatSetPredicate}, {OpCode::Type::IntegerSetPredicate, &ShaderIR::DecodeIntegerSetPredicate}, {OpCode::Type::HalfSetPredicate, &ShaderIR::DecodeHalfSetPredicate}, diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp new file mode 100644 index 000000000..66fdf5714 --- /dev/null +++ b/src/video_core/shader/decode/image.cpp @@ -0,0 +1,89 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "common/assert.h" +#include "common/common_types.h" +#include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/shader_ir.h" + +namespace VideoCommon::Shader { + +using Tegra::Shader::Instruction; +using Tegra::Shader::OpCode; + +namespace { +std::size_t GetImageTypeNumCoordinates(Tegra::Shader::ImageType image_type) { + switch (image_type) { + case Tegra::Shader::ImageType::Texture1D: + case Tegra::Shader::ImageType::TextureBuffer: + return 1; + case Tegra::Shader::ImageType::Texture1DArray: + case Tegra::Shader::ImageType::Texture2D: + return 2; + case Tegra::Shader::ImageType::Texture2DArray: + case Tegra::Shader::ImageType::Texture3D: + return 3; + } + UNREACHABLE(); + return 1; +} +} // Anonymous namespace + +u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { + const Instruction instr = {program_code[pc]}; + const auto opcode = OpCode::Decode(instr); + + switch (opcode->get().GetId()) { + case OpCode::Id::SUST: { + UNIMPLEMENTED_IF(instr.sust.mode != Tegra::Shader::SurfaceDataMode::P); + UNIMPLEMENTED_IF(instr.sust.image_type == Tegra::Shader::ImageType::TextureBuffer); + UNIMPLEMENTED_IF(instr.sust.out_of_bounds_store != Tegra::Shader::OutOfBoundsStore::Ignore); + UNIMPLEMENTED_IF(instr.sust.component_mask_selector != 0xf); // Ensure we have an RGBA store + + std::vector values; + constexpr std::size_t hardcoded_size{4}; + for (std::size_t i = 0; i < hardcoded_size; ++i) { + values.push_back(GetRegister(instr.gpr0.Value() + i)); + } + + std::vector coords; + const std::size_t num_coords{GetImageTypeNumCoordinates(instr.sust.image_type)}; + for (std::size_t i = 0; i < num_coords; ++i) { + coords.push_back(GetRegister(instr.gpr8.Value() + i)); + } + + ASSERT(instr.sust.is_immediate); + const auto& image{GetImage(instr.image, instr.sust.image_type)}; + MetaImage meta{image, values}; + const Node store{Operation(OperationCode::ImageStore, meta, std::move(coords))}; + bb.push_back(store); + break; + } + default: + UNIMPLEMENTED_MSG("Unhandled conversion instruction: {}", opcode->get().GetName()); + } + + return pc; +} + +const Image& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type) { + const auto offset{static_cast(image.index.Value())}; + + // If this image has already been used, return the existing mapping. + const auto itr{std::find_if(used_images.begin(), used_images.end(), + [=](const Image& entry) { return entry.GetOffset() == offset; })}; + if (itr != used_images.end()) { + ASSERT(itr->GetType() == type); + return *itr; + } + + // Otherwise create a new mapping for this image. + const std::size_t next_index{used_images.size()}; + const Image entry{offset, next_index, type}; + return *used_images.emplace(entry).first; +} + +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index 3cfb911bb..8b8d83ae7 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h @@ -146,6 +146,8 @@ enum class OperationCode { TextureQueryLod, /// (MetaTexture, float[N] coords) -> float4 TexelFetch, /// (MetaTexture, int[N], int) -> float4 + ImageStore, /// (MetaImage, float[N] coords) -> void + Branch, /// (uint branch_target) -> void PushFlowStack, /// (uint branch_target) -> void PopFlowStack, /// () -> void @@ -263,6 +265,39 @@ private: bool is_bindless{}; ///< Whether this sampler belongs to a bindless texture or not. }; +class Image { +public: + explicit Image(std::size_t offset, std::size_t index, Tegra::Shader::ImageType type) + : offset{offset}, index{index}, type{type}, is_bindless{false} {} + + std::size_t GetOffset() const { + return offset; + } + + std::size_t GetIndex() const { + return index; + } + + Tegra::Shader::ImageType GetType() const { + return type; + } + + bool IsBindless() const { + return is_bindless; + } + + bool operator<(const Image& rhs) const { + return std::tie(offset, index, type, is_bindless) < + std::tie(rhs.offset, rhs.index, rhs.type, rhs.is_bindless); + } + +private: + std::size_t offset{}; + std::size_t index{}; + Tegra::Shader::ImageType type{}; + bool is_bindless{}; +}; + struct GlobalMemoryBase { u32 cbuf_index{}; u32 cbuf_offset{}; @@ -289,8 +324,13 @@ struct MetaTexture { u32 element{}; }; +struct MetaImage { + const Image& image; + std::vector values; +}; + /// Parameters that modify an operation but are not part of any particular operand -using Meta = std::variant; +using Meta = std::variant; /// Holds any kind of operation that can be done in the IR class OperationNode final { diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 1b84c0672..c7f264371 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -104,6 +104,10 @@ public: return used_samplers; } + const std::set& GetImages() const { + return used_images; + } + const std::array& GetClipDistances() const { return used_clip_distances; @@ -154,6 +158,7 @@ private: u32 DecodeConversion(NodeBlock& bb, u32 pc); u32 DecodeMemory(NodeBlock& bb, u32 pc); u32 DecodeTexture(NodeBlock& bb, u32 pc); + u32 DecodeImage(NodeBlock& bb, u32 pc); u32 DecodeFloatSetPredicate(NodeBlock& bb, u32 pc); u32 DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc); u32 DecodeHalfSetPredicate(NodeBlock& bb, u32 pc); @@ -254,6 +259,9 @@ private: Tegra::Shader::TextureType type, bool is_array, bool is_shadow); + /// Accesses an image. + const Image& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type); + /// Extracts a sequence of bits from a node Node BitfieldExtract(Node value, u32 offset, u32 bits); @@ -329,6 +337,7 @@ private: std::set used_output_attributes; std::map used_cbufs; std::set used_samplers; + std::set used_images; std::array used_clip_distances{}; std::map used_global_memory; bool uses_physical_attributes{}; // Shader uses AL2P or physical attribute read/writes From 9097301d924ac9d873f04acdc247e8023edf1811 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 27 Apr 2019 03:04:13 -0300 Subject: [PATCH 053/113] shader: Implement bindless images --- src/video_core/shader/decode/image.cpp | 30 ++++++++++++++++++++++++-- src/video_core/shader/node.h | 9 ++++++++ src/video_core/shader/shader_ir.h | 3 +++ 3 files changed, 40 insertions(+), 2 deletions(-) diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp index 66fdf5714..199b6b793 100644 --- a/src/video_core/shader/decode/image.cpp +++ b/src/video_core/shader/decode/image.cpp @@ -55,8 +55,9 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { coords.push_back(GetRegister(instr.gpr8.Value() + i)); } - ASSERT(instr.sust.is_immediate); - const auto& image{GetImage(instr.image, instr.sust.image_type)}; + const auto type{instr.sust.image_type}; + const auto& image{instr.sust.is_immediate ? GetImage(instr.image, type) + : GetBindlessImage(instr.gpr39, type)}; MetaImage meta{image, values}; const Node store{Operation(OperationCode::ImageStore, meta, std::move(coords))}; bb.push_back(store); @@ -86,4 +87,29 @@ const Image& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::Image return *used_images.emplace(entry).first; } +const Image& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, + Tegra::Shader::ImageType type) { + const Node image_register{GetRegister(reg)}; + const Node base_image{ + TrackCbuf(image_register, global_code, static_cast(global_code.size()))}; + const auto cbuf{std::get_if(base_image)}; + const auto cbuf_offset_imm{std::get_if(cbuf->GetOffset())}; + const auto cbuf_offset{cbuf_offset_imm->GetValue()}; + const auto cbuf_index{cbuf->GetIndex()}; + const auto cbuf_key{(static_cast(cbuf_index) << 32) | static_cast(cbuf_offset)}; + + // If this image has already been used, return the existing mapping. + const auto itr{std::find_if(used_images.begin(), used_images.end(), + [=](const Image& entry) { return entry.GetOffset() == cbuf_key; })}; + if (itr != used_images.end()) { + ASSERT(itr->GetType() == type); + return *itr; + } + + // Otherwise create a new mapping for this image. + const std::size_t next_index{used_images.size()}; + const Image entry{cbuf_index, cbuf_offset, next_index, type}; + return *used_images.emplace(entry).first; +} + } // namespace VideoCommon::Shader diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index 8b8d83ae7..2bf535928 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h @@ -270,6 +270,15 @@ public: explicit Image(std::size_t offset, std::size_t index, Tegra::Shader::ImageType type) : offset{offset}, index{index}, type{type}, is_bindless{false} {} + explicit Image(u32 cbuf_index, u32 cbuf_offset, std::size_t index, + Tegra::Shader::ImageType type) + : offset{(static_cast(cbuf_index) << 32) | cbuf_offset}, index{index}, type{type}, + is_bindless{true} {} + + explicit Image(std::size_t offset, std::size_t index, Tegra::Shader::ImageType type, + bool is_bindless) + : offset{offset}, index{index}, type{type}, is_bindless{is_bindless} {} + std::size_t GetOffset() const { return offset; } diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index c7f264371..e22548208 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -262,6 +262,9 @@ private: /// Accesses an image. const Image& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type); + /// Access a bindless image sampler. + const Image& GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type); + /// Extracts a sequence of bits from a node Node BitfieldExtract(Node value, u32 offset, u32 bits); From 1bf4154e7d0589dab6922321bf39cf80f22c07d0 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 27 Apr 2019 02:37:15 -0300 Subject: [PATCH 054/113] gl_shader_decompiler: Implement image binding settings --- .../renderer_opengl/gl_shader_cache.cpp | 4 ++ .../renderer_opengl/gl_shader_decompiler.cpp | 3 ++ .../renderer_opengl/gl_shader_decompiler.h | 2 + .../renderer_opengl/gl_shader_disk_cache.cpp | 43 +++++++++++++++---- .../renderer_opengl/gl_shader_disk_cache.h | 24 ++++------- 5 files changed, 52 insertions(+), 24 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index e859a900c..67789db73 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -190,6 +190,10 @@ CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEn source += fmt::format("#define SAMPLER_BINDING_{} {}\n", sampler.GetIndex(), base_bindings.sampler++); } + for (const auto& image : entries.images) { + source += + fmt::format("#define IMAGE_BINDING_{} {}\n", image.GetIndex(), base_bindings.image++); + } // Transform 1D textures to texture samplers by declaring its preprocessor macros. for (std::size_t i = 0; i < texture_buffer_usage.size(); ++i) { diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 2ae2f1db2..ca04d8618 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -235,6 +235,9 @@ public: for (const auto& sampler : ir.GetSamplers()) { entries.samplers.emplace_back(sampler); } + for (const auto& image : ir.GetImages()) { + entries.images.emplace_back(image); + } for (const auto& gmem_pair : ir.GetGlobalMemory()) { const auto& [base, usage] = gmem_pair; entries.global_memory_entries.emplace_back(base.cbuf_index, base.cbuf_offset, diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h index c1569e737..14d11c7fc 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.h +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h @@ -27,6 +27,7 @@ struct ShaderEntries; using Maxwell = Tegra::Engines::Maxwell3D::Regs; using ProgramResult = std::pair; using SamplerEntry = VideoCommon::Shader::Sampler; +using ImageEntry = VideoCommon::Shader::Image; class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer { public: @@ -74,6 +75,7 @@ struct ShaderEntries { std::vector const_buffers; std::vector samplers; std::vector bindless_samplers; + std::vector images; std::vector global_memory_entries; std::array clip_distances{}; std::size_t shader_length{}; diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index d338ece8e..51d9aae94 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp @@ -34,11 +34,11 @@ enum class PrecompiledEntryKind : u32 { Dump, }; -constexpr u32 NativeVersion = 2; +constexpr u32 NativeVersion = 3; // Making sure sizes doesn't change by accident -static_assert(sizeof(BaseBindings) == 12); -static_assert(sizeof(ShaderDiskCacheUsage) == 32); +static_assert(sizeof(BaseBindings) == 16); +static_assert(sizeof(ShaderDiskCacheUsage) == 40); namespace { @@ -285,8 +285,7 @@ std::optional ShaderDiskCacheOpenGL::LoadDecompiledEn if (!LoadObjectFromPrecompiled(code_size)) { return {}; } - - std::string code(code_size, '\0'); + std::vector code(code_size); if (!LoadArrayFromPrecompiled(code.data(), code.size())) { return {}; } @@ -298,7 +297,6 @@ std::optional ShaderDiskCacheOpenGL::LoadDecompiledEn if (!LoadObjectFromPrecompiled(const_buffers_count)) { return {}; } - for (u32 i = 0; i < const_buffers_count; ++i) { u32 max_offset{}; u32 index{}; @@ -314,7 +312,6 @@ std::optional ShaderDiskCacheOpenGL::LoadDecompiledEn if (!LoadObjectFromPrecompiled(samplers_count)) { return {}; } - for (u32 i = 0; i < samplers_count; ++i) { u64 offset{}; u64 index{}; @@ -332,11 +329,28 @@ std::optional ShaderDiskCacheOpenGL::LoadDecompiledEn static_cast(type), is_array, is_shadow, is_bindless); } + u32 images_count{}; + if (!LoadObjectFromPrecompiled(images_count)) { + return {}; + } + for (u32 i = 0; i < images_count; ++i) { + u64 offset{}; + u64 index{}; + u32 type{}; + u8 is_bindless{}; + if (!LoadObjectFromPrecompiled(offset) || !LoadObjectFromPrecompiled(index) || + !LoadObjectFromPrecompiled(type) || !LoadObjectFromPrecompiled(is_bindless)) { + return {}; + } + entry.entries.images.emplace_back( + static_cast(offset), static_cast(index), + static_cast(type), is_bindless != 0); + } + u32 global_memory_count{}; if (!LoadObjectFromPrecompiled(global_memory_count)) { return {}; } - for (u32 i = 0; i < global_memory_count; ++i) { u32 cbuf_index{}; u32 cbuf_offset{}; @@ -360,7 +374,6 @@ std::optional ShaderDiskCacheOpenGL::LoadDecompiledEn if (!LoadObjectFromPrecompiled(shader_length)) { return {}; } - entry.entries.shader_length = static_cast(shader_length); return entry; @@ -400,6 +413,18 @@ bool ShaderDiskCacheOpenGL::SaveDecompiledFile(u64 unique_identifier, const std: } } + if (!SaveObjectToPrecompiled(static_cast(entries.images.size()))) { + return false; + } + for (const auto& image : entries.images) { + if (!SaveObjectToPrecompiled(static_cast(image.GetOffset())) || + !SaveObjectToPrecompiled(static_cast(image.GetIndex())) || + !SaveObjectToPrecompiled(static_cast(image.GetType())) || + !SaveObjectToPrecompiled(static_cast(image.IsBindless() ? 1 : 0))) { + return false; + } + } + if (!SaveObjectToPrecompiled(static_cast(entries.global_memory_entries.size()))) { return false; } diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h index 7c9f0cc75..aa12ffc71 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h @@ -45,9 +45,11 @@ struct BaseBindings { u32 cbuf{}; u32 gmem{}; u32 sampler{}; + u32 image{}; bool operator==(const BaseBindings& rhs) const { - return std::tie(cbuf, gmem, sampler) == std::tie(rhs.cbuf, rhs.gmem, rhs.sampler); + return std::tie(cbuf, gmem, sampler, image) == + std::tie(rhs.cbuf, rhs.gmem, rhs.sampler, rhs.image); } bool operator!=(const BaseBindings& rhs) const { @@ -91,8 +93,11 @@ namespace std { template <> struct hash { - std::size_t operator()(const OpenGL::BaseBindings& bindings) const noexcept { - return bindings.cbuf | bindings.gmem << 8 | bindings.sampler << 16; + std::size_t operator()(const OpenGL::BaseBindings& bindings) const { + return static_cast(bindings.cbuf) ^ + (static_cast(bindings.gmem) << 8) ^ + (static_cast(bindings.sampler) << 16) ^ + (static_cast(bindings.image) << 24); } }; @@ -300,19 +305,8 @@ private: return LoadArrayFromPrecompiled(&object, 1); } - bool LoadObjectFromPrecompiled(bool& object) { - u8 value; - const bool read_ok = LoadArrayFromPrecompiled(&value, 1); - if (!read_ok) { - return false; - } - - object = value != 0; - return true; - } - - // Core system Core::System& system; + // Stores whole precompiled cache which will be read from or saved to the precompiled chache // file FileSys::VectorVfsFile precompiled_cache_virtual_file; From 175aa343ff1c9f931b266caf2d19b8df943dab0d Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sat, 18 May 2019 04:57:49 -0400 Subject: [PATCH 055/113] texture_cache: Fermi2D reform and implement View Mirage This also does some fixes on compressed textures reinterpret and on the Fermi2D engine in general. --- src/video_core/engines/fermi_2d.cpp | 10 ++-- src/video_core/engines/fermi_2d.h | 44 +++++++++++---- src/video_core/rasterizer_interface.h | 3 +- .../renderer_opengl/gl_framebuffer_cache.cpp | 8 +-- .../renderer_opengl/gl_rasterizer.cpp | 5 +- .../renderer_opengl/gl_rasterizer.h | 3 +- .../renderer_opengl/gl_texture_cache.cpp | 54 +++++++++---------- .../renderer_opengl/gl_texture_cache.h | 6 +-- src/video_core/texture_cache/surface_base.h | 15 ++++-- src/video_core/texture_cache/surface_params.h | 14 +++++ src/video_core/texture_cache/texture_cache.h | 40 ++++++++------ 11 files changed, 125 insertions(+), 77 deletions(-) diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp index 55966eef1..d63b82838 100644 --- a/src/video_core/engines/fermi_2d.cpp +++ b/src/video_core/engines/fermi_2d.cpp @@ -4,7 +4,6 @@ #include "common/assert.h" #include "common/logging/log.h" -#include "common/math_util.h" #include "video_core/engines/fermi_2d.h" #include "video_core/memory_manager.h" #include "video_core/rasterizer_interface.h" @@ -35,7 +34,7 @@ void Fermi2D::HandleSurfaceCopy() { static_cast(regs.operation)); // TODO(Subv): Only raw copies are implemented. - ASSERT(regs.operation == Regs::Operation::SrcCopy); + ASSERT(regs.operation == Operation::SrcCopy); const u32 src_blit_x1{static_cast(regs.blit_src_x >> 32)}; const u32 src_blit_y1{static_cast(regs.blit_src_y >> 32)}; @@ -48,8 +47,13 @@ void Fermi2D::HandleSurfaceCopy() { const Common::Rectangle dst_rect{regs.blit_dst_x, regs.blit_dst_y, regs.blit_dst_x + regs.blit_dst_width, regs.blit_dst_y + regs.blit_dst_height}; + Config copy_config; + copy_config.operation = regs.operation; + copy_config.filter = regs.blit_control.filter; + copy_config.src_rect = src_rect; + copy_config.dst_rect = dst_rect; - if (!rasterizer.AccelerateSurfaceCopy(regs.src, regs.dst, src_rect, dst_rect)) { + if (!rasterizer.AccelerateSurfaceCopy(regs.src, regs.dst, copy_config)) { UNIMPLEMENTED(); } } diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h index 3d28afa91..0a4c7c5ad 100644 --- a/src/video_core/engines/fermi_2d.h +++ b/src/video_core/engines/fermi_2d.h @@ -9,6 +9,7 @@ #include "common/bit_field.h" #include "common/common_funcs.h" #include "common/common_types.h" +#include "common/math_util.h" #include "video_core/gpu.h" namespace Tegra { @@ -38,6 +39,26 @@ public: /// Write the value to the register identified by method. void CallMethod(const GPU::MethodCall& method_call); + enum class Origin : u32 { + Center = 0, + Corner = 1, + }; + + enum class Filter : u32 { + PointSample = 0, // Nearest + Linear = 1, + }; + + enum class Operation : u32 { + SrcCopyAnd = 0, + ROPAnd = 1, + Blend = 2, + SrcCopy = 3, + ROP = 4, + SrcCopyPremult = 5, + BlendPremult = 6, + }; + struct Regs { static constexpr std::size_t NUM_REGS = 0x258; @@ -76,16 +97,6 @@ public: }; static_assert(sizeof(Surface) == 0x28, "Surface has incorrect size"); - enum class Operation : u32 { - SrcCopyAnd = 0, - ROPAnd = 1, - Blend = 2, - SrcCopy = 3, - ROP = 4, - SrcCopyPremult = 5, - BlendPremult = 6, - }; - union { struct { INSERT_PADDING_WORDS(0x80); @@ -102,7 +113,11 @@ public: INSERT_PADDING_WORDS(0x177); - u32 blit_control; + union { + u32 raw; + BitField<0, 1, Origin> origin; + BitField<4, 1, Filter> filter; + } blit_control; INSERT_PADDING_WORDS(0x8); @@ -121,6 +136,13 @@ public: }; } regs{}; + struct Config { + Operation operation; + Filter filter; + Common::Rectangle src_rect; + Common::Rectangle dst_rect; + }; + private: VideoCore::RasterizerInterface& rasterizer; MemoryManager& memory_manager; diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 3c18d3b1f..6007e8c2e 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -52,8 +52,7 @@ public: /// Attempt to use a faster method to perform a surface copy virtual bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, const Tegra::Engines::Fermi2D::Regs::Surface& dst, - const Common::Rectangle& src_rect, - const Common::Rectangle& dst_rect) { + const Tegra::Engines::Fermi2D::Config& copy_config) { return false; } diff --git a/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp b/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp index bb9f9b81f..7c926bd48 100644 --- a/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp @@ -37,7 +37,7 @@ OGLFramebuffer FramebufferCacheOpenGL::CreateFramebuffer(const FramebufferCacheK if (key.is_single_buffer) { if (key.color_attachments[0] != GL_NONE && key.colors[0]) { - key.colors[0]->Attach(key.color_attachments[0]); + key.colors[0]->Attach(key.color_attachments[0], GL_DRAW_FRAMEBUFFER); glDrawBuffer(key.color_attachments[0]); } else { glDrawBuffer(GL_NONE); @@ -45,14 +45,16 @@ OGLFramebuffer FramebufferCacheOpenGL::CreateFramebuffer(const FramebufferCacheK } else { for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) { if (key.colors[index]) { - key.colors[index]->Attach(GL_COLOR_ATTACHMENT0 + static_cast(index)); + key.colors[index]->Attach(GL_COLOR_ATTACHMENT0 + static_cast(index), + GL_DRAW_FRAMEBUFFER); } } glDrawBuffers(key.colors_count, key.color_attachments.data()); } if (key.zeta) { - key.zeta->Attach(key.stencil_enable ? GL_DEPTH_STENCIL_ATTACHMENT : GL_DEPTH_ATTACHMENT); + key.zeta->Attach(key.stencil_enable ? GL_DEPTH_STENCIL_ATTACHMENT : GL_DEPTH_ATTACHMENT, + GL_DRAW_FRAMEBUFFER); } return framebuffer; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 4f7eeb22c..d613cb1dc 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -696,10 +696,9 @@ void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, const Tegra::Engines::Fermi2D::Regs::Surface& dst, - const Common::Rectangle& src_rect, - const Common::Rectangle& dst_rect) { + const Tegra::Engines::Fermi2D::Config& copy_config) { MICROPROFILE_SCOPE(OpenGL_Blits); - texture_cache.DoFermiCopy(src, dst, src_rect, dst_rect); + texture_cache.DoFermiCopy(src, dst, copy_config); return true; } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 64c27660f..33582ac42 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -67,8 +67,7 @@ public: void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, const Tegra::Engines::Fermi2D::Regs::Surface& dst, - const Common::Rectangle& src_rect, - const Common::Rectangle& dst_rect) override; + const Tegra::Engines::Fermi2D::Config& copy_config) override; bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, u32 pixel_stride) override; bool AccelerateDrawBatch(bool is_indexed) override; diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 197c9f02c..9e9734f9e 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -378,26 +378,26 @@ CachedSurfaceView::CachedSurfaceView(CachedSurface& surface, const ViewParams& p CachedSurfaceView::~CachedSurfaceView() = default; -void CachedSurfaceView::Attach(GLenum attachment) const { +void CachedSurfaceView::Attach(GLenum attachment, GLenum target) const { ASSERT(params.num_layers == 1 && params.num_levels == 1); const auto& owner_params = surface.GetSurfaceParams(); switch (owner_params.target) { case SurfaceTarget::Texture1D: - glFramebufferTexture1D(GL_DRAW_FRAMEBUFFER, attachment, surface.GetTarget(), - surface.GetTexture(), params.base_level); + glFramebufferTexture1D(target, attachment, surface.GetTarget(), surface.GetTexture(), + params.base_level); break; case SurfaceTarget::Texture2D: - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, attachment, surface.GetTarget(), - surface.GetTexture(), params.base_level); + glFramebufferTexture2D(target, attachment, surface.GetTarget(), surface.GetTexture(), + params.base_level); break; case SurfaceTarget::Texture1DArray: case SurfaceTarget::Texture2DArray: case SurfaceTarget::TextureCubemap: case SurfaceTarget::TextureCubeArray: - glFramebufferTextureLayer(GL_DRAW_FRAMEBUFFER, attachment, surface.GetTexture(), - params.base_level, params.base_layer); + glFramebufferTextureLayer(target, attachment, surface.GetTexture(), params.base_level, + params.base_layer); break; default: UNIMPLEMENTED(); @@ -460,11 +460,10 @@ void TextureCacheOpenGL::ImageCopy(Surface src_surface, Surface dst_surface, copy_params.depth); } -void TextureCacheOpenGL::ImageBlit(Surface src_surface, Surface dst_surface, - const Common::Rectangle& src_rect, - const Common::Rectangle& dst_rect) { - const auto& src_params{src_surface->GetSurfaceParams()}; - const auto& dst_params{dst_surface->GetSurfaceParams()}; +void TextureCacheOpenGL::ImageBlit(View src_view, View dst_view, + const Tegra::Engines::Fermi2D::Config& copy_config) { + const auto& src_params{src_view->GetSurfaceParams()}; + const auto& dst_params{dst_view->GetSurfaceParams()}; OpenGLState prev_state{OpenGLState::GetCurState()}; SCOPE_EXIT({ prev_state.Apply(); }); @@ -476,51 +475,46 @@ void TextureCacheOpenGL::ImageBlit(Surface src_surface, Surface dst_surface, u32 buffers{}; - UNIMPLEMENTED_IF(src_params.target != SurfaceTarget::Texture2D); - UNIMPLEMENTED_IF(dst_params.target != SurfaceTarget::Texture2D); - - const GLuint src_texture{src_surface->GetTexture()}; - const GLuint dst_texture{dst_surface->GetTexture()}; + UNIMPLEMENTED_IF(src_params.target == SurfaceTarget::Texture3D); + UNIMPLEMENTED_IF(dst_params.target == SurfaceTarget::Texture3D); if (src_params.type == SurfaceType::ColorTexture) { - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, - src_texture, 0); + src_view->Attach(GL_COLOR_ATTACHMENT0, GL_READ_FRAMEBUFFER); glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, - dst_texture, 0); + dst_view->Attach(GL_COLOR_ATTACHMENT0, GL_DRAW_FRAMEBUFFER); glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); buffers = GL_COLOR_BUFFER_BIT; } else if (src_params.type == SurfaceType::Depth) { glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, src_texture, - 0); + src_view->Attach(GL_DEPTH_ATTACHMENT, GL_READ_FRAMEBUFFER); glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, dst_texture, - 0); + dst_view->Attach(GL_DEPTH_ATTACHMENT, GL_DRAW_FRAMEBUFFER); glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); buffers = GL_DEPTH_BUFFER_BIT; } else if (src_params.type == SurfaceType::DepthStencil) { glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, - src_texture, 0); + src_view->Attach(GL_DEPTH_STENCIL_ATTACHMENT, GL_READ_FRAMEBUFFER); glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, - dst_texture, 0); + dst_view->Attach(GL_DEPTH_STENCIL_ATTACHMENT, GL_DRAW_FRAMEBUFFER); buffers = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT; } + const Common::Rectangle& src_rect = copy_config.src_rect; + const Common::Rectangle& dst_rect = copy_config.dst_rect; + const bool is_linear = copy_config.filter == Tegra::Engines::Fermi2D::Filter::Linear; + glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom, dst_rect.left, dst_rect.top, dst_rect.right, dst_rect.bottom, buffers, - buffers == GL_COLOR_BUFFER_BIT ? GL_LINEAR : GL_NEAREST); + is_linear ? GL_LINEAR : GL_NEAREST); } } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 0ba42dbab..0b333e9e3 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -73,7 +73,7 @@ public: ~CachedSurfaceView(); /// Attaches this texture view to the current bound GL_DRAW_FRAMEBUFFER - void Attach(GLenum attachment) const; + void Attach(GLenum attachment, GLenum target) const; GLuint GetTexture() { if (is_proxy) { @@ -138,8 +138,8 @@ protected: void ImageCopy(Surface src_surface, Surface dst_surface, const VideoCommon::CopyParams& copy_params) override; - void ImageBlit(Surface src_surface, Surface dst_surface, const Common::Rectangle& src_rect, - const Common::Rectangle& dst_rect) override; + void ImageBlit(View src_view, View dst_view, + const Tegra::Engines::Fermi2D::Config& copy_config) override; private: OGLFramebuffer src_framebuffer; diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index cb7f22706..a3dd1c607 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -126,14 +126,19 @@ public: return MatchStructureResult::None; } // Tiled surface - if (std::tie(params.height, params.depth, params.block_width, params.block_height, - params.block_depth, params.tile_width_spacing, params.num_levels) == - std::tie(rhs.height, rhs.depth, rhs.block_width, rhs.block_height, rhs.block_depth, + if (std::tie(params.depth, params.block_width, params.block_height, params.block_depth, + params.tile_width_spacing, params.num_levels) == + std::tie(rhs.depth, rhs.block_width, rhs.block_height, rhs.block_depth, rhs.tile_width_spacing, rhs.num_levels)) { - if (params.width == rhs.width) { + if (std::tie(params.width, params.height) == std::tie(rhs.width, rhs.height)) { return MatchStructureResult::FullMatch; } - if (params.GetBlockAlignedWidth() == rhs.GetBlockAlignedWidth()) { + const u32 ws = SurfaceParams::ConvertWidth(rhs.GetBlockAlignedWidth(), + params.pixel_format, rhs.pixel_format); + const u32 hs = + SurfaceParams::ConvertHeight(rhs.height, params.pixel_format, rhs.pixel_format); + const u32 w1 = params.GetBlockAlignedWidth(); + if (std::tie(w1, params.height) == std::tie(ws, hs)) { return MatchStructureResult::SemiMatch; } } diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h index b3082173f..13a08a60f 100644 --- a/src/video_core/texture_cache/surface_params.h +++ b/src/video_core/texture_cache/surface_params.h @@ -126,6 +126,20 @@ public: /// Returns the size of a layer in bytes in host memory for a given mipmap level. std::size_t GetHostLayerSize(u32 level) const; + static u32 ConvertWidth(u32 width, VideoCore::Surface::PixelFormat pixel_format_from, + VideoCore::Surface::PixelFormat pixel_format_to) { + const u32 bw1 = VideoCore::Surface::GetDefaultBlockWidth(pixel_format_from); + const u32 bw2 = VideoCore::Surface::GetDefaultBlockWidth(pixel_format_to); + return (width * bw2 + bw1 - 1) / bw1; + } + + static u32 ConvertHeight(u32 height, VideoCore::Surface::PixelFormat pixel_format_from, + VideoCore::Surface::PixelFormat pixel_format_to) { + const u32 bh1 = VideoCore::Surface::GetDefaultBlockHeight(pixel_format_from); + const u32 bh2 = VideoCore::Surface::GetDefaultBlockHeight(pixel_format_to); + return (height * bh2 + bh1 - 1) / bh1; + } + /// Returns the default block width. u32 GetDefaultBlockWidth() const { return VideoCore::Surface::GetDefaultBlockWidth(pixel_format); diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 85c9160e0..593ceeaf6 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -141,11 +141,6 @@ public: return {}; } - if (regs.color_mask[index].raw == 0) { - SetEmptyColorBuffer(index); - return {}; - } - const auto& config{regs.rt[index]}; const auto gpu_addr{config.Address()}; if (!gpu_addr) { @@ -192,11 +187,11 @@ public: void DoFermiCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src_config, const Tegra::Engines::Fermi2D::Regs::Surface& dst_config, - const Common::Rectangle& src_rect, - const Common::Rectangle& dst_rect) { - TSurface dst_surface = GetFermiSurface(dst_config); - ImageBlit(GetFermiSurface(src_config), dst_surface, src_rect, dst_rect); - dst_surface->MarkAsModified(true, Tick()); + const Tegra::Engines::Fermi2D::Config& copy_config) { + std::pair dst_surface = GetFermiSurface(dst_config); + std::pair src_surface = GetFermiSurface(src_config); + ImageBlit(src_surface.second, dst_surface.second, copy_config); + dst_surface.first->MarkAsModified(true, Tick()); } TSurface TryFindFramebufferSurface(const u8* host_ptr) { @@ -234,8 +229,8 @@ protected: virtual void ImageCopy(TSurface src_surface, TSurface dst_surface, const CopyParams& copy_params) = 0; - virtual void ImageBlit(TSurface src, TSurface dst, const Common::Rectangle& src_rect, - const Common::Rectangle& dst_rect) = 0; + virtual void ImageBlit(TView src_view, TView dst_view, + const Tegra::Engines::Fermi2D::Config& copy_config) = 0; void Register(TSurface surface) { std::lock_guard lock{mutex}; @@ -282,10 +277,11 @@ protected: return new_surface; } - TSurface GetFermiSurface(const Tegra::Engines::Fermi2D::Regs::Surface& config) { + std::pair GetFermiSurface( + const Tegra::Engines::Fermi2D::Regs::Surface& config) { SurfaceParams params = SurfaceParams::CreateForFermiCopySurface(config); const GPUVAddr gpu_addr = config.Address(); - return GetSurface(gpu_addr, params, true).first; + return GetSurface(gpu_addr, params, true); } Core::System& system; @@ -551,7 +547,21 @@ private: if (view.has_value()) { const bool is_mirage = !current_surface->MatchFormat(params.pixel_format); if (is_mirage) { - LOG_CRITICAL(HW_GPU, "Mirage View Unsupported"); + // On a mirage view, we need to recreate the surface under this new view + // and then obtain a view again. + SurfaceParams new_params = current_surface->GetSurfaceParams(); + const u32 wh = SurfaceParams::ConvertWidth( + new_params.width, new_params.pixel_format, params.pixel_format); + const u32 hh = SurfaceParams::ConvertHeight( + new_params.height, new_params.pixel_format, params.pixel_format); + new_params.width = wh; + new_params.height = hh; + new_params.pixel_format = params.pixel_format; + std::pair pair = RebuildSurface(current_surface, new_params); + std::optional mirage_view = + pair.first->EmplaceView(params, gpu_addr, candidate_size); + if (mirage_view) + return {pair.first, *mirage_view}; return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, false); } return {current_surface, *view}; From fcac55d5bff025fee822c2e7b0e06cdc178143dc Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 21 May 2019 07:56:53 -0400 Subject: [PATCH 056/113] texture_cache: Add checks for texture buffers. --- src/video_core/texture_cache/surface_base.h | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index a3dd1c607..210f27907 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -114,10 +114,23 @@ public: bool MatchesTopology(const SurfaceParams& rhs) const { const u32 src_bpp{params.GetBytesPerPixel()}; const u32 dst_bpp{rhs.GetBytesPerPixel()}; - return std::tie(src_bpp, params.is_tiled) == std::tie(dst_bpp, rhs.is_tiled); + const bool ib1 = params.IsBuffer(); + const bool ib2 = rhs.IsBuffer(); + return std::tie(src_bpp, params.is_tiled, ib1) == std::tie(dst_bpp, rhs.is_tiled, ib2); } MatchStructureResult MatchesStructure(const SurfaceParams& rhs) const { + // Buffer surface Check + if (params.IsBuffer()) { + const std::size_t wd1 = params.width*params.GetBytesPerPixel(); + const std::size_t wd2 = rhs.width*rhs.GetBytesPerPixel(); + if (wd1 == wd2) { + return MatchStructureResult::FullMatch; + } + return MatchStructureResult::None; + } + + // Linear Surface check if (!params.is_tiled) { if (std::tie(params.width, params.height, params.pitch) == std::tie(rhs.width, rhs.height, rhs.pitch)) { @@ -125,7 +138,8 @@ public: } return MatchStructureResult::None; } - // Tiled surface + + // Tiled Surface check if (std::tie(params.depth, params.block_width, params.block_height, params.block_depth, params.tile_width_spacing, params.num_levels) == std::tie(rhs.depth, rhs.block_width, rhs.block_height, rhs.block_depth, From e60ed2bb3e7e4ce63cc263019cce72a080c536ed Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 21 May 2019 08:36:00 -0400 Subject: [PATCH 057/113] texture_cache: return null surface on invalid address --- src/video_core/texture_cache/texture_cache.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 593ceeaf6..24c87127d 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -483,6 +483,18 @@ private: const auto host_ptr{memory_manager->GetPointer(gpu_addr)}; const auto cache_addr{ToCacheAddr(host_ptr)}; + // Step 0: guarantee a valid surface + if (!cache_addr) { + // Return a null surface if it's invalid + SurfaceParams new_params = params; + new_params.width = 1; + new_params.height = 1; + new_params.depth = 1; + new_params.block_height = 0; + new_params.block_depth = 0; + return InitializeSurface(gpu_addr, new_params, false); + } + // Step 1 // Check Level 1 Cache for a fast structural match. If candidate surface // matches at certain level we are pretty much done. From bdf9faab331cd79ca5c5e51c2369fc801e8cecea Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 21 May 2019 11:24:20 -0400 Subject: [PATCH 058/113] texture_cache: Handle uncontinuous surfaces. --- src/video_core/memory_manager.cpp | 5 +- src/video_core/texture_cache/surface_base.cpp | 47 +++++++++++++++---- src/video_core/texture_cache/surface_base.h | 34 ++++++++++++-- src/video_core/texture_cache/texture_cache.h | 17 ++++--- 4 files changed, 82 insertions(+), 21 deletions(-) diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 5d8d126c1..322453116 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -202,11 +202,12 @@ const u8* MemoryManager::GetPointer(GPUVAddr addr) const { } bool MemoryManager::IsBlockContinuous(const GPUVAddr start, const std::size_t size) const { - const GPUVAddr end = start + size; + const std::size_t inner_size = size - 1; + const GPUVAddr end = start + inner_size; const auto host_ptr_start = reinterpret_cast(GetPointer(start)); const auto host_ptr_end = reinterpret_cast(GetPointer(end)); const auto range = static_cast(host_ptr_end - host_ptr_start); - return range == size; + return range == inner_size; } void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, const std::size_t size) const { diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp index d4aa2c54b..7e90960f7 100644 --- a/src/video_core/texture_cache/surface_base.cpp +++ b/src/video_core/texture_cache/surface_base.cpp @@ -68,12 +68,27 @@ void SurfaceBaseImpl::SwizzleFunc(MortonSwizzleMode mode, u8* memory, const Surf } void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager, - std::vector& staging_buffer) { + StagingCache& staging_cache) { MICROPROFILE_SCOPE(GPU_Load_Texture); - const auto host_ptr{memory_manager.GetPointer(gpu_addr)}; - if (!host_ptr) { - return; + auto& staging_buffer = staging_cache.GetBuffer(0); + u8* host_ptr; + is_continuous = memory_manager.IsBlockContinuous(gpu_addr, guest_memory_size); + + // Handle continuouty + if (is_continuous) { + // Use physical memory directly + host_ptr = memory_manager.GetPointer(gpu_addr); + if (!host_ptr) { + return; + } + } else { + // Use an extra temporal buffer + auto& tmp_buffer = staging_cache.GetBuffer(1); + tmp_buffer.resize(guest_memory_size); + host_ptr = tmp_buffer.data(); + memory_manager.ReadBlockUnsafe(gpu_addr, host_ptr, guest_memory_size); } + if (params.is_tiled) { ASSERT_MSG(params.block_width == 0, "Block width is defined as {} on texture target {}", params.block_width, static_cast(params.target)); @@ -123,12 +138,25 @@ void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager, } void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager, - std::vector& staging_buffer) { + StagingCache& staging_cache) { MICROPROFILE_SCOPE(GPU_Flush_Texture); - const auto host_ptr{memory_manager.GetPointer(gpu_addr)}; - if (!host_ptr) { - return; + auto& staging_buffer = staging_cache.GetBuffer(0); + u8* host_ptr; + + // Handle continuouty + if (is_continuous) { + // Use physical memory directly + host_ptr = memory_manager.GetPointer(gpu_addr); + if (!host_ptr) { + return; + } + } else { + // Use an extra temporal buffer + auto& tmp_buffer = staging_cache.GetBuffer(1); + tmp_buffer.resize(guest_memory_size); + host_ptr = tmp_buffer.data(); } + if (params.is_tiled) { ASSERT_MSG(params.block_width == 0, "Block width is defined as {}", params.block_width); for (u32 level = 0; level < params.num_levels; ++level) { @@ -154,6 +182,9 @@ void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager, } } } + if (!is_continuous) { + memory_manager.WriteBlockUnsafe(gpu_addr, host_ptr, guest_memory_size); + } } } // namespace VideoCommon diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index 210f27907..dacbc97c7 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -32,11 +32,28 @@ enum class MatchStructureResult : u32 { None = 2, }; +class StagingCache { +public: + StagingCache() {} + ~StagingCache() = default; + + std::vector& GetBuffer(std::size_t index) { + return staging_buffer[index]; + } + + void SetSize(std::size_t size) { + staging_buffer.resize(size); + } + +private: + std::vector> staging_buffer; +}; + class SurfaceBaseImpl { public: - void LoadBuffer(Tegra::MemoryManager& memory_manager, std::vector& staging_buffer); + void LoadBuffer(Tegra::MemoryManager& memory_manager, StagingCache& staging_cache); - void FlushBuffer(Tegra::MemoryManager& memory_manager, std::vector& staging_buffer); + void FlushBuffer(Tegra::MemoryManager& memory_manager, StagingCache& staging_cache); GPUVAddr GetGpuAddr() const { return gpu_addr; @@ -93,6 +110,14 @@ public: return mipmap_sizes[level]; } + void MarkAsContinuous(const bool is_continuous) { + this->is_continuous = is_continuous; + } + + bool IsContinuous() const { + return is_continuous; + } + bool IsLinear() const { return !params.is_tiled; } @@ -122,8 +147,8 @@ public: MatchStructureResult MatchesStructure(const SurfaceParams& rhs) const { // Buffer surface Check if (params.IsBuffer()) { - const std::size_t wd1 = params.width*params.GetBytesPerPixel(); - const std::size_t wd2 = rhs.width*rhs.GetBytesPerPixel(); + const std::size_t wd1 = params.width * params.GetBytesPerPixel(); + const std::size_t wd2 = rhs.width * rhs.GetBytesPerPixel(); if (wd1 == wd2) { return MatchStructureResult::FullMatch; } @@ -193,6 +218,7 @@ protected: CacheAddr cache_addr{}; CacheAddr cache_addr_end{}; VAddr cpu_addr{}; + bool is_continuous{}; std::vector mipmap_sizes; std::vector mipmap_offsets; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 24c87127d..ab4e094ea 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -220,6 +220,7 @@ protected: SetEmptyColorBuffer(i); } SetEmptyDepthBuffer(); + staging_cache.SetSize(2); } ~TextureCache() = default; @@ -244,6 +245,8 @@ protected: gpu_addr); return; } + bool continuouty = memory_manager->IsBlockContinuous(gpu_addr, size); + surface->MarkAsContinuous(continuouty); surface->SetCacheAddr(cache_ptr); surface->SetCpuAddr(*cpu_addr); RegisterInnerCache(surface); @@ -611,9 +614,9 @@ private: } void LoadSurface(const TSurface& surface) { - staging_buffer.resize(surface->GetHostSizeInBytes()); - surface->LoadBuffer(*memory_manager, staging_buffer); - surface->UploadTexture(staging_buffer); + staging_cache.GetBuffer(0).resize(surface->GetHostSizeInBytes()); + surface->LoadBuffer(*memory_manager, staging_cache); + surface->UploadTexture(staging_cache.GetBuffer(0)); surface->MarkAsModified(false, Tick()); } @@ -621,9 +624,9 @@ private: if (!surface->IsModified()) { return; } - staging_buffer.resize(surface->GetHostSizeInBytes()); - surface->DownloadTexture(staging_buffer); - surface->FlushBuffer(*memory_manager, staging_buffer); + staging_cache.GetBuffer(0).resize(surface->GetHostSizeInBytes()); + surface->DownloadTexture(staging_cache.GetBuffer(0)); + surface->FlushBuffer(*memory_manager, staging_cache); surface->MarkAsModified(false, Tick()); } @@ -723,7 +726,7 @@ private: render_targets; FramebufferTargetInfo depth_buffer; - std::vector staging_buffer; + StagingCache staging_cache; std::recursive_mutex mutex; }; From ea1525dab1bf7e9e56471b6d5fd50014bfeb4f96 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 21 May 2019 12:48:28 -0400 Subject: [PATCH 059/113] Fix rebase errors --- src/video_core/renderer_opengl/gl_rasterizer.cpp | 10 ++++++++-- .../renderer_opengl/gl_shader_disk_cache.cpp | 2 +- src/video_core/texture_cache/surface_params.h | 4 ++++ 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index d613cb1dc..8fe115aec 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -822,8 +822,14 @@ TextureBufferUsage RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, c unit.sampler = sampler_cache.GetSampler(texture.tsc); if (const auto view{texture_cache.GetTextureSurface(texture, entry)}; view) { - view->ApplySwizzle(texture.tic.x_source, texture.tic.y_source, texture.tic.z_source, - texture.tic.w_source); + if (view->GetSurfaceParams().IsBuffer()) { + // Record that this texture is a texture buffer. + texture_buffer_usage.set(bindpoint); + } else { + // Apply swizzle to textures that are not buffers. + view->ApplySwizzle(texture.tic.x_source, texture.tic.y_source, texture.tic.z_source, + texture.tic.w_source); + } state.texture_units[current_bindpoint].texture = view->GetTexture(); } else { // Can occur when texture addr is null or its memory is unmapped/invalid diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index 51d9aae94..5ec911adc 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp @@ -291,7 +291,7 @@ std::optional ShaderDiskCacheOpenGL::LoadDecompiledEn } ShaderDiskCacheDecompiled entry; - entry.code = std::move(code); + entry.code = std::string(reinterpret_cast(code.data()), code_size); u32 const_buffers_count{}; if (!LoadObjectFromPrecompiled(const_buffers_count)) { diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h index 13a08a60f..d9aa0b521 100644 --- a/src/video_core/texture_cache/surface_params.h +++ b/src/video_core/texture_cache/surface_params.h @@ -167,6 +167,10 @@ public: return VideoCore::Surface::GetFormatCompressionType(pixel_format); } + bool IsBuffer() const { + return target == VideoCore::Surface::SurfaceTarget::TextureBuffer; + } + std::string TargetName() const; bool is_tiled; From 0966665fc225eee29b3ed87baefd74f79c19d307 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Wed, 22 May 2019 12:30:53 -0400 Subject: [PATCH 060/113] texture_cache: Only load on recycle with accurate GPU. Testing so far has proven this to be quite safe as texture memory read added a 2-5ms load to the current cache. --- src/video_core/texture_cache/texture_cache.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index ab4e094ea..685bd28f4 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -342,12 +342,13 @@ private: const SurfaceParams& params, const GPUVAddr gpu_addr, const bool preserve_contents, const bool untopological) { + const bool do_load = Settings::values.use_accurate_gpu_emulation && preserve_contents; for (auto surface : overlaps) { Unregister(surface); } switch (PickStrategy(overlaps, params, gpu_addr, untopological)) { case RecycleStrategy::Ignore: { - return InitializeSurface(gpu_addr, params, preserve_contents); + return InitializeSurface(gpu_addr, params, do_load); } case RecycleStrategy::Flush: { std::sort(overlaps.begin(), overlaps.end(), @@ -361,7 +362,7 @@ private: } default: { UNIMPLEMENTED_MSG("Unimplemented Texture Cache Recycling Strategy!"); - return InitializeSurface(gpu_addr, params, preserve_contents); + return InitializeSurface(gpu_addr, params, do_load); } } } From 92513541529e90f4f79a1f2c3f8ccf5a199e4c20 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 24 May 2019 11:59:23 -0400 Subject: [PATCH 061/113] texture_cache: Correct copying between compressed and uncompressed formats --- src/video_core/texture_cache/surface_base.h | 9 ++++----- src/video_core/texture_cache/surface_params.h | 20 +++++++++++++++++++ src/video_core/texture_cache/texture_cache.h | 8 +++----- 3 files changed, 27 insertions(+), 10 deletions(-) diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index dacbc97c7..77c2d6758 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -235,9 +235,8 @@ private: for (u32 layer = 0; layer < layers; layer++) { for (u32 level = 0; level < mipmaps; level++) { - const u32 width{std::min(params.GetMipWidth(level), in_params.GetMipWidth(level))}; - const u32 height{ - std::min(params.GetMipHeight(level), in_params.GetMipHeight(level))}; + const u32 width = SurfaceParams::IntersectWidth(params, in_params, level, level); + const u32 height = SurfaceParams::IntersectHeight(params, in_params, level, level); result.emplace_back(width, height, layer, level); } } @@ -250,8 +249,8 @@ private: result.reserve(mipmaps); for (u32 level = 0; level < mipmaps; level++) { - const u32 width{std::min(params.GetMipWidth(level), in_params.GetMipWidth(level))}; - const u32 height{std::min(params.GetMipHeight(level), in_params.GetMipHeight(level))}; + const u32 width = SurfaceParams::IntersectWidth(params, in_params, level, level); + const u32 height = SurfaceParams::IntersectHeight(params, in_params, level, level); const u32 depth{std::min(params.GetMipDepth(level), in_params.GetMipDepth(level))}; result.emplace_back(width, height, depth, level); } diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h index d9aa0b521..c3affd621 100644 --- a/src/video_core/texture_cache/surface_params.h +++ b/src/video_core/texture_cache/surface_params.h @@ -140,6 +140,26 @@ public: return (height * bh2 + bh1 - 1) / bh1; } + // this finds the maximun possible width between 2 2D layers of different formats + static u32 IntersectWidth(const SurfaceParams& src_params, const SurfaceParams& dst_params, + const u32 src_level, const u32 dst_level) { + const u32 bw1 = src_params.GetDefaultBlockWidth(); + const u32 bw2 = dst_params.GetDefaultBlockWidth(); + const u32 t_src_width = (src_params.GetMipWidth(src_level) * bw2 + bw1 - 1) / bw1; + const u32 t_dst_width = (dst_params.GetMipWidth(dst_level) * bw1 + bw2 - 1) / bw2; + return std::min(t_src_width, t_dst_width); + } + + // this finds the maximun possible height between 2 2D layers of different formats + static u32 IntersectHeight(const SurfaceParams& src_params, const SurfaceParams& dst_params, + const u32 src_level, const u32 dst_level) { + const u32 bh1 = src_params.GetDefaultBlockHeight(); + const u32 bh2 = dst_params.GetDefaultBlockHeight(); + const u32 t_src_height = (src_params.GetMipHeight(src_level) * bh2 + bh1 - 1) / bh1; + const u32 t_dst_height = (dst_params.GetMipHeight(dst_level) * bh1 + bh2 - 1) / bh2; + return std::min(t_src_height, t_dst_height); + } + /// Returns the default block width. u32 GetDefaultBlockWidth() const { return VideoCore::Surface::GetDefaultBlockWidth(pixel_format); diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 685bd28f4..d2093e581 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -444,11 +444,9 @@ private: } modified |= surface->IsModified(); // Now we got all the data set up - const u32 dst_width{params.GetMipWidth(mipmap)}; - const u32 dst_height{params.GetMipHeight(mipmap)}; - const CopyParams copy_params(0, 0, 0, 0, 0, layer, 0, mipmap, - std::min(src_params.width, dst_width), - std::min(src_params.height, dst_height), 1); + const u32 width = SurfaceParams::IntersectWidth(src_params, params, 0, mipmap); + const u32 height = SurfaceParams::IntersectHeight(src_params, params, 0, mipmap); + const CopyParams copy_params(0, 0, 0, 0, 0, layer, 0, mipmap, width, height, 1); passed_tests++; ImageCopy(surface, new_surface, copy_params); } From 228f516bb4426a41a4d1c1756751557f7a0eecda Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 24 May 2019 15:34:31 -0400 Subject: [PATCH 062/113] texture_cache uncompress-compress is untopological. This makes conflicts between non compress and compress textures to be auto recycled. It also limits the amount of mipmaps a texture can have if it goes above it's limit. --- .../renderer_opengl/gl_texture_cache.cpp | 10 +++---- src/video_core/texture_cache/surface_base.h | 18 +++++++++++-- .../texture_cache/surface_params.cpp | 4 +++ src/video_core/texture_cache/surface_params.h | 14 ++++++++++ src/video_core/texture_cache/texture_cache.h | 26 ++++++++++--------- 5 files changed, 53 insertions(+), 19 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 9e9734f9e..e6f08a764 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -195,17 +195,17 @@ OGLTexture CreateTexture(const SurfaceParams& params, GLenum target, GLenum inte switch (params.target) { case SurfaceTarget::Texture1D: - glTextureStorage1D(texture.handle, params.num_levels, internal_format, params.width); + glTextureStorage1D(texture.handle, params.emulated_levels, internal_format, params.width); break; case SurfaceTarget::Texture2D: case SurfaceTarget::TextureCubemap: - glTextureStorage2D(texture.handle, params.num_levels, internal_format, params.width, + glTextureStorage2D(texture.handle, params.emulated_levels, internal_format, params.width, params.height); break; case SurfaceTarget::Texture3D: case SurfaceTarget::Texture2DArray: case SurfaceTarget::TextureCubeArray: - glTextureStorage3D(texture.handle, params.num_levels, internal_format, params.width, + glTextureStorage3D(texture.handle, params.emulated_levels, internal_format, params.width, params.height, params.depth); break; default: @@ -245,7 +245,7 @@ void CachedSurface::DownloadTexture(std::vector& staging_buffer) { // TODO(Rodrigo): Optimize alignment SCOPE_EXIT({ glPixelStorei(GL_PACK_ROW_LENGTH, 0); }); - for (u32 level = 0; level < params.num_levels; ++level) { + for (u32 level = 0; level < params.emulated_levels; ++level) { glPixelStorei(GL_PACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level))); glPixelStorei(GL_PACK_ROW_LENGTH, static_cast(params.GetMipWidth(level))); const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level); @@ -264,7 +264,7 @@ void CachedSurface::DownloadTexture(std::vector& staging_buffer) { void CachedSurface::UploadTexture(std::vector& staging_buffer) { MICROPROFILE_SCOPE(OpenGL_Texture_Upload); SCOPE_EXIT({ glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); }); - for (u32 level = 0; level < params.num_levels; ++level) { + for (u32 level = 0; level < params.emulated_levels; ++level) { UploadTextureMipmap(level, staging_buffer); } } diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index 77c2d6758..70b5258c9 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -32,6 +32,12 @@ enum class MatchStructureResult : u32 { None = 2, }; +enum class MatchTopologyResult : u32 { + FullMatch = 0, + CompressUnmatch = 1, + None = 2, +}; + class StagingCache { public: StagingCache() {} @@ -136,12 +142,20 @@ public: params.target == SurfaceTarget::Texture2D && params.num_levels == 1; } - bool MatchesTopology(const SurfaceParams& rhs) const { + MatchTopologyResult MatchesTopology(const SurfaceParams& rhs) const { const u32 src_bpp{params.GetBytesPerPixel()}; const u32 dst_bpp{rhs.GetBytesPerPixel()}; const bool ib1 = params.IsBuffer(); const bool ib2 = rhs.IsBuffer(); - return std::tie(src_bpp, params.is_tiled, ib1) == std::tie(dst_bpp, rhs.is_tiled, ib2); + if (std::tie(src_bpp, params.is_tiled, ib1) == std::tie(dst_bpp, rhs.is_tiled, ib2)) { + const bool cb1 = params.IsCompressed(); + const bool cb2 = rhs.IsCompressed(); + if (cb1 == cb2) { + return MatchTopologyResult::FullMatch; + } + return MatchTopologyResult::CompressUnmatch; + } + return MatchTopologyResult::None; } MatchStructureResult MatchesStructure(const SurfaceParams& rhs) const { diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index d9d157d02..77c09264a 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp @@ -85,6 +85,7 @@ SurfaceParams SurfaceParams::CreateForTexture(Core::System& system, } params.pitch = params.is_tiled ? 0 : config.tic.Pitch(); params.num_levels = config.tic.max_mip_level + 1; + params.emulated_levels = std::min(params.num_levels, params.MaxPossibleMipmap()); params.is_layered = params.IsLayered(); return params; } @@ -109,6 +110,7 @@ SurfaceParams SurfaceParams::CreateForDepthBuffer( params.depth = 1; params.pitch = 0; params.num_levels = 1; + params.emulated_levels = 1; params.is_layered = false; return params; } @@ -139,6 +141,7 @@ SurfaceParams SurfaceParams::CreateForFramebuffer(Core::System& system, std::siz params.depth = 1; params.target = SurfaceTarget::Texture2D; params.num_levels = 1; + params.emulated_levels = 1; params.is_layered = false; return params; } @@ -163,6 +166,7 @@ SurfaceParams SurfaceParams::CreateForFermiCopySurface( params.target = SurfaceTarget::Texture2D; params.depth = 1; params.num_levels = 1; + params.emulated_levels = 1; params.is_layered = params.IsLayered(); return params; } diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h index c3affd621..5fde695b6 100644 --- a/src/video_core/texture_cache/surface_params.h +++ b/src/video_core/texture_cache/surface_params.h @@ -160,6 +160,19 @@ public: return std::min(t_src_height, t_dst_height); } + u32 MaxPossibleMipmap() const { + const u32 max_mipmap_w = Common::Log2Ceil32(width) + 1U; + const u32 max_mipmap_h = Common::Log2Ceil32(height) + 1U; + const u32 max_mipmap = std::max(max_mipmap_w, max_mipmap_h); + if (target != VideoCore::Surface::SurfaceTarget::Texture3D) + return max_mipmap; + return std::max(max_mipmap, Common::Log2Ceil32(depth) + 1U); + } + + bool IsCompressed() const { + return GetDefaultBlockHeight() > 1 || GetDefaultBlockWidth() > 1; + } + /// Returns the default block width. u32 GetDefaultBlockWidth() const { return VideoCore::Surface::GetDefaultBlockWidth(pixel_format); @@ -205,6 +218,7 @@ public: u32 depth; u32 pitch; u32 num_levels; + u32 emulated_levels; VideoCore::Surface::PixelFormat pixel_format; VideoCore::Surface::ComponentType component_type; VideoCore::Surface::SurfaceType type; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index d2093e581..69ef7a2bd 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -305,7 +305,7 @@ private: * due to topological reasons. **/ RecycleStrategy PickStrategy(std::vector& overlaps, const SurfaceParams& params, - const GPUVAddr gpu_addr, const bool untopological) { + const GPUVAddr gpu_addr, const MatchTopologyResult untopological) { if (Settings::values.use_accurate_gpu_emulation) { return RecycleStrategy::Flush; } @@ -320,8 +320,8 @@ private: } } // Untopological decision - if (untopological) { - return RecycleStrategy::Ignore; + if (untopological == MatchTopologyResult::CompressUnmatch) { + return RecycleStrategy::Flush; } return RecycleStrategy::Ignore; } @@ -341,7 +341,7 @@ private: std::pair RecycleSurface(std::vector& overlaps, const SurfaceParams& params, const GPUVAddr gpu_addr, const bool preserve_contents, - const bool untopological) { + const MatchTopologyResult untopological) { const bool do_load = Settings::values.use_accurate_gpu_emulation && preserve_contents; for (auto surface : overlaps) { Unregister(surface); @@ -502,9 +502,10 @@ private: // matches at certain level we are pretty much done. if (l1_cache.count(cache_addr) > 0) { TSurface current_surface = l1_cache[cache_addr]; - if (!current_surface->MatchesTopology(params)) { + auto topological_result = current_surface->MatchesTopology(params); + if (topological_result != MatchTopologyResult::FullMatch) { std::vector overlaps{current_surface}; - return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, true); + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, topological_result); } MatchStructureResult s_result = current_surface->MatchesStructure(params); if (s_result != MatchStructureResult::None && @@ -534,8 +535,9 @@ private: // we do a topological test to ensure we can find some relationship. If it fails // inmediatly recycle the texture for (auto surface : overlaps) { - if (!surface->MatchesTopology(params)) { - return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, true); + auto topological_result = surface->MatchesTopology(params); + if (topological_result != MatchTopologyResult::FullMatch) { + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, topological_result); } } @@ -553,7 +555,7 @@ private: return *view; } } - return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, false); + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, MatchTopologyResult::FullMatch); } // Now we check if the candidate is a mipmap/layer of the overlap std::optional view = @@ -576,13 +578,13 @@ private: pair.first->EmplaceView(params, gpu_addr, candidate_size); if (mirage_view) return {pair.first, *mirage_view}; - return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, false); + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, MatchTopologyResult::FullMatch); } return {current_surface, *view}; } // The next case is unsafe, so if we r in accurate GPU, just skip it if (Settings::values.use_accurate_gpu_emulation) { - return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, false); + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, MatchTopologyResult::FullMatch); } // This is the case the texture is a part of the parent. if (current_surface->MatchesSubTexture(params, gpu_addr)) { @@ -599,7 +601,7 @@ private: } } // We failed all the tests, recycle the overlaps into a new texture. - return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, false); + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, MatchTopologyResult::FullMatch); } std::pair InitializeSurface(GPUVAddr gpu_addr, const SurfaceParams& params, From 60bf761afbb125abd324e4b798d18a1611b5777b Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sat, 1 Jun 2019 19:12:00 -0400 Subject: [PATCH 063/113] texture_cache: Implement Buffer Copy and detect Turing GPUs Image Copies --- src/video_core/renderer_opengl/gl_device.cpp | 1 + src/video_core/renderer_opengl/gl_device.h | 5 + .../renderer_opengl/gl_rasterizer.cpp | 2 +- .../renderer_opengl/gl_resource_manager.cpp | 8 ++ .../renderer_opengl/gl_resource_manager.h | 3 + .../renderer_opengl/gl_texture_cache.cpp | 92 ++++++++++++++++++- .../renderer_opengl/gl_texture_cache.h | 9 +- src/video_core/texture_cache/texture_cache.h | 40 ++++++-- 8 files changed, 148 insertions(+), 12 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 65a88b06c..ad15ea54e 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -28,6 +28,7 @@ Device::Device() { max_varyings = GetInteger(GL_MAX_VARYING_VECTORS); has_variable_aoffi = TestVariableAoffi(); has_component_indexing_bug = TestComponentIndexingBug(); + is_turing_plus = GLAD_GL_NV_mesh_shader; } Device::Device(std::nullptr_t) { diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 8c8c93760..1afe16779 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -34,6 +34,10 @@ public: return has_component_indexing_bug; } + bool IsTuringGPU() const { + return is_turing_plus; + } + private: static bool TestVariableAoffi(); static bool TestComponentIndexingBug(); @@ -43,6 +47,7 @@ private: u32 max_varyings{}; bool has_variable_aoffi{}; bool has_component_indexing_bug{}; + bool is_turing_plus{}; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 8fe115aec..97c55f2ec 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -82,7 +82,7 @@ struct DrawParameters { RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, ScreenInfo& info) - : texture_cache{system, *this}, shader_cache{*this, system, emu_window, device}, + : texture_cache{system, *this, device}, shader_cache{*this, system, emu_window, device}, global_cache{*this}, system{system}, screen_info{info}, buffer_cache(*this, STREAM_BUFFER_SIZE) { OpenGLState::ApplyDefaultState(); diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp index 9f81c15cb..a1f91d677 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.cpp +++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp @@ -148,6 +148,14 @@ void OGLBuffer::Release() { handle = 0; } +void OGLBuffer::MakePersistant(std::size_t buffer_size) { + if (handle == 0 || buffer_size == 0) + return; + + const GLbitfield flags = GL_MAP_PERSISTENT_BIT | GL_MAP_WRITE_BIT | GL_MAP_READ_BIT; + glNamedBufferStorage(handle, static_cast(buffer_size), nullptr, flags); +} + void OGLSync::Create() { if (handle != 0) return; diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h index 310ee2bf3..f2873ef96 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.h +++ b/src/video_core/renderer_opengl/gl_resource_manager.h @@ -186,6 +186,9 @@ public: /// Deletes the internal OpenGL resource void Release(); + // Converts the buffer into a persistant storage buffer + void MakePersistant(std::size_t buffer_size); + GLuint handle = 0; }; diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index e6f08a764..bddb15cb1 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include "common/assert.h" +#include "common/bit_util.h" #include "common/common_types.h" #include "common/microprofile.h" #include "common/scope_exit.h" @@ -435,8 +436,10 @@ OGLTextureView CachedSurfaceView::CreateTextureView() const { } TextureCacheOpenGL::TextureCacheOpenGL(Core::System& system, - VideoCore::RasterizerInterface& rasterizer) + VideoCore::RasterizerInterface& rasterizer, + const Device& device) : TextureCacheBase{system, rasterizer} { + support_info.depth_color_image_copies = !device.IsTuringGPU(); src_framebuffer.Create(); dst_framebuffer.Create(); } @@ -449,6 +452,14 @@ Surface TextureCacheOpenGL::CreateSurface(GPUVAddr gpu_addr, const SurfaceParams void TextureCacheOpenGL::ImageCopy(Surface src_surface, Surface dst_surface, const VideoCommon::CopyParams& copy_params) { + if (!support_info.depth_color_image_copies) { + const auto& src_params = src_surface->GetSurfaceParams(); + const auto& dst_params = dst_surface->GetSurfaceParams(); + if (src_params.type != dst_params.type) { + // A fallback is needed + return; + } + } const auto src_handle = src_surface->GetTexture(); const auto src_target = src_surface->GetTarget(); const auto dst_handle = dst_surface->GetTexture(); @@ -517,4 +528,83 @@ void TextureCacheOpenGL::ImageBlit(View src_view, View dst_view, is_linear ? GL_LINEAR : GL_NEAREST); } +void TextureCacheOpenGL::BufferCopy(Surface src_surface, Surface dst_surface) { + const auto& src_params = src_surface->GetSurfaceParams(); + const auto& dst_params = dst_surface->GetSurfaceParams(); + + const auto source_format = GetFormatTuple(src_params.pixel_format, src_params.component_type); + const auto dest_format = GetFormatTuple(dst_params.pixel_format, dst_params.component_type); + + const std::size_t source_size = src_surface->GetHostSizeInBytes(); + const std::size_t dest_size = dst_surface->GetHostSizeInBytes(); + + const std::size_t buffer_size = std::max(source_size, dest_size); + + GLuint copy_pbo_handle = FetchPBO(buffer_size); + + glBindBuffer(GL_PIXEL_PACK_BUFFER, copy_pbo_handle); + + if (source_format.compressed) { + glGetCompressedTextureImage(src_surface->GetTexture(), 0, static_cast(source_size), + nullptr); + } else { + glGetTextureImage(src_surface->GetTexture(), 0, source_format.format, source_format.type, + static_cast(source_size), nullptr); + } + glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); + + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, copy_pbo_handle); + + const GLsizei width = static_cast(dst_params.width); + const GLsizei height = static_cast(dst_params.height); + const GLsizei depth = static_cast(dst_params.depth); + if (dest_format.compressed) { + LOG_CRITICAL(HW_GPU, "Compressed buffer copy is unimplemented!"); + UNREACHABLE(); + } else { + switch (dst_params.target) { + case SurfaceTarget::Texture1D: + glTextureSubImage1D(dst_surface->GetTexture(), 0, 0, width, dest_format.format, + dest_format.type, nullptr); + break; + case SurfaceTarget::Texture2D: + glTextureSubImage2D(dst_surface->GetTexture(), 0, 0, 0, width, height, + dest_format.format, dest_format.type, nullptr); + break; + case SurfaceTarget::Texture3D: + case SurfaceTarget::Texture2DArray: + case SurfaceTarget::TextureCubeArray: + glTextureSubImage3D(dst_surface->GetTexture(), 0, 0, 0, 0, width, height, depth, + dest_format.format, dest_format.type, nullptr); + break; + case SurfaceTarget::TextureCubemap: + glTextureSubImage3D(dst_surface->GetTexture(), 0, 0, 0, 0, width, height, depth, + dest_format.format, dest_format.type, nullptr); + break; + default: + LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}", + static_cast(dst_params.target)); + UNREACHABLE(); + } + } + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); + + glTextureBarrier(); +} + +GLuint TextureCacheOpenGL::FetchPBO(std::size_t buffer_size) { + if (buffer_size < 0) { + UNREACHABLE(); + return 0; + } + const u32 l2 = Common::Log2Ceil64(static_cast(buffer_size)); + OGLBuffer& cp = copy_pbo_cache[l2]; + if (cp.handle == 0) { + const std::size_t ceil_size = 1ULL << l2; + cp.Create(); + cp.MakePersistant(ceil_size); + } + return cp.handle; +} + } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 0b333e9e3..f514f137c 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -13,6 +13,7 @@ #include "common/common_types.h" #include "video_core/engines/shader_bytecode.h" +#include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/texture_cache/texture_cache.h" @@ -129,7 +130,8 @@ private: class TextureCacheOpenGL final : public TextureCacheBase { public: - explicit TextureCacheOpenGL(Core::System& system, VideoCore::RasterizerInterface& rasterizer); + explicit TextureCacheOpenGL(Core::System& system, VideoCore::RasterizerInterface& rasterizer, + const Device& device); ~TextureCacheOpenGL(); protected: @@ -141,9 +143,14 @@ protected: void ImageBlit(View src_view, View dst_view, const Tegra::Engines::Fermi2D::Config& copy_config) override; + void BufferCopy(Surface src_surface, Surface dst_surface) override; + private: + GLuint FetchPBO(std::size_t buffer_size); + OGLFramebuffer src_framebuffer; OGLFramebuffer dst_framebuffer; + std::unordered_map copy_pbo_cache; }; } // namespace OpenGL diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 69ef7a2bd..e0d0e1f70 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -214,6 +214,13 @@ public: } protected: + // This structure is used for communicating with the backend, on which behaviors + // it supports and what not, to avoid assuming certain things about hardware. + // The backend is RESPONSIBLE for filling this settings on creation. + struct Support { + bool depth_color_image_copies; + } support_info; + TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer) : system{system}, rasterizer{rasterizer} { for (std::size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) { @@ -233,6 +240,10 @@ protected: virtual void ImageBlit(TView src_view, TView dst_view, const Tegra::Engines::Fermi2D::Config& copy_config) = 0; + // Depending on the backend, a buffer copy can be slow as it means deoptimizing the texture + // and reading it from a sepparate buffer. + virtual void BufferCopy(TSurface src_surface, TSurface dst_surface) = 0; + void Register(TSurface surface) { std::lock_guard lock{mutex}; @@ -377,9 +388,14 @@ private: const SurfaceParams& params) { const auto gpu_addr = current_surface->GetGpuAddr(); TSurface new_surface = GetUncachedSurface(gpu_addr, params); - std::vector bricks = current_surface->BreakDown(params); - for (auto& brick : bricks) { - ImageCopy(current_surface, new_surface, brick); + const auto& cr_params = current_surface->GetSurfaceParams(); + if (!support_info.depth_color_image_copies && cr_params.type != params.type) { + BufferCopy(current_surface, new_surface); + } else { + std::vector bricks = current_surface->BreakDown(params); + for (auto& brick : bricks) { + ImageCopy(current_surface, new_surface, brick); + } } Unregister(current_surface); Register(new_surface); @@ -505,7 +521,8 @@ private: auto topological_result = current_surface->MatchesTopology(params); if (topological_result != MatchTopologyResult::FullMatch) { std::vector overlaps{current_surface}; - return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, topological_result); + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, + topological_result); } MatchStructureResult s_result = current_surface->MatchesStructure(params); if (s_result != MatchStructureResult::None && @@ -537,7 +554,8 @@ private: for (auto surface : overlaps) { auto topological_result = surface->MatchesTopology(params); if (topological_result != MatchTopologyResult::FullMatch) { - return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, topological_result); + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, + topological_result); } } @@ -555,7 +573,8 @@ private: return *view; } } - return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, MatchTopologyResult::FullMatch); + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, + MatchTopologyResult::FullMatch); } // Now we check if the candidate is a mipmap/layer of the overlap std::optional view = @@ -578,13 +597,15 @@ private: pair.first->EmplaceView(params, gpu_addr, candidate_size); if (mirage_view) return {pair.first, *mirage_view}; - return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, MatchTopologyResult::FullMatch); + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, + MatchTopologyResult::FullMatch); } return {current_surface, *view}; } // The next case is unsafe, so if we r in accurate GPU, just skip it if (Settings::values.use_accurate_gpu_emulation) { - return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, MatchTopologyResult::FullMatch); + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, + MatchTopologyResult::FullMatch); } // This is the case the texture is a part of the parent. if (current_surface->MatchesSubTexture(params, gpu_addr)) { @@ -601,7 +622,8 @@ private: } } // We failed all the tests, recycle the overlaps into a new texture. - return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, MatchTopologyResult::FullMatch); + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, + MatchTopologyResult::FullMatch); } std::pair InitializeSurface(GPUVAddr gpu_addr, const SurfaceParams& params, From 3809041c24a6ebea009923c14fb36aa1031bf188 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sat, 1 Jun 2019 22:15:55 -0400 Subject: [PATCH 064/113] texture_cache: Optimize GetSurface and use references on functions that don't change a surface. --- src/video_core/renderer_opengl/gl_texture_cache.cpp | 6 +++--- src/video_core/renderer_opengl/gl_texture_cache.h | 6 +++--- src/video_core/texture_cache/texture_cache.h | 12 ++++++------ 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index bddb15cb1..71f6888c6 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -450,7 +450,7 @@ Surface TextureCacheOpenGL::CreateSurface(GPUVAddr gpu_addr, const SurfaceParams return std::make_shared(gpu_addr, params); } -void TextureCacheOpenGL::ImageCopy(Surface src_surface, Surface dst_surface, +void TextureCacheOpenGL::ImageCopy(Surface& src_surface, Surface& dst_surface, const VideoCommon::CopyParams& copy_params) { if (!support_info.depth_color_image_copies) { const auto& src_params = src_surface->GetSurfaceParams(); @@ -471,7 +471,7 @@ void TextureCacheOpenGL::ImageCopy(Surface src_surface, Surface dst_surface, copy_params.depth); } -void TextureCacheOpenGL::ImageBlit(View src_view, View dst_view, +void TextureCacheOpenGL::ImageBlit(View& src_view, View& dst_view, const Tegra::Engines::Fermi2D::Config& copy_config) { const auto& src_params{src_view->GetSurfaceParams()}; const auto& dst_params{dst_view->GetSurfaceParams()}; @@ -528,7 +528,7 @@ void TextureCacheOpenGL::ImageBlit(View src_view, View dst_view, is_linear ? GL_LINEAR : GL_NEAREST); } -void TextureCacheOpenGL::BufferCopy(Surface src_surface, Surface dst_surface) { +void TextureCacheOpenGL::BufferCopy(Surface& src_surface, Surface& dst_surface) { const auto& src_params = src_surface->GetSurfaceParams(); const auto& dst_params = dst_surface->GetSurfaceParams(); diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index f514f137c..dda3bf715 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -137,13 +137,13 @@ public: protected: Surface CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) override; - void ImageCopy(Surface src_surface, Surface dst_surface, + void ImageCopy(Surface& src_surface, Surface& dst_surface, const VideoCommon::CopyParams& copy_params) override; - void ImageBlit(View src_view, View dst_view, + void ImageBlit(View& src_view, View& dst_view, const Tegra::Engines::Fermi2D::Config& copy_config) override; - void BufferCopy(Surface src_surface, Surface dst_surface) override; + void BufferCopy(Surface& src_surface, Surface& dst_surface) override; private: GLuint FetchPBO(std::size_t buffer_size); diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index e0d0e1f70..951168357 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -234,15 +234,15 @@ protected: virtual TSurface CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) = 0; - virtual void ImageCopy(TSurface src_surface, TSurface dst_surface, + virtual void ImageCopy(TSurface& src_surface, TSurface& dst_surface, const CopyParams& copy_params) = 0; - virtual void ImageBlit(TView src_view, TView dst_view, + virtual void ImageBlit(TView& src_view, TView& dst_view, const Tegra::Engines::Fermi2D::Config& copy_config) = 0; // Depending on the backend, a buffer copy can be slow as it means deoptimizing the texture // and reading it from a sepparate buffer. - virtual void BufferCopy(TSurface src_surface, TSurface dst_surface) = 0; + virtual void BufferCopy(TSurface& src_surface, TSurface& dst_surface) = 0; void Register(TSurface surface) { std::lock_guard lock{mutex}; @@ -516,8 +516,9 @@ private: // Step 1 // Check Level 1 Cache for a fast structural match. If candidate surface // matches at certain level we are pretty much done. - if (l1_cache.count(cache_addr) > 0) { - TSurface current_surface = l1_cache[cache_addr]; + auto iter = l1_cache.find(cache_addr); + if (iter != l1_cache.end()) { + TSurface& current_surface = iter->second; auto topological_result = current_surface->MatchesTopology(params); if (topological_result != MatchTopologyResult::FullMatch) { std::vector overlaps{current_surface}; @@ -526,7 +527,6 @@ private: } MatchStructureResult s_result = current_surface->MatchesStructure(params); if (s_result != MatchStructureResult::None && - current_surface->GetGpuAddr() == gpu_addr && (params.target != SurfaceTarget::Texture3D || current_surface->MatchTarget(params.target))) { if (s_result == MatchStructureResult::FullMatch) { From 9f755218a1359cbd004e6c287f5fead0897c1d11 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sat, 1 Jun 2019 23:03:22 -0400 Subject: [PATCH 065/113] texture_cache: move some large methods to cpp files --- src/video_core/texture_cache/surface_base.cpp | 103 +++++++++++++++++ src/video_core/texture_cache/surface_base.h | 106 ++---------------- .../texture_cache/surface_params.cpp | 33 ------ src/video_core/texture_cache/surface_params.h | 32 ++++-- 4 files changed, 135 insertions(+), 139 deletions(-) diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp index 7e90960f7..8c6edb04f 100644 --- a/src/video_core/texture_cache/surface_base.cpp +++ b/src/video_core/texture_cache/surface_base.cpp @@ -42,6 +42,109 @@ SurfaceBaseImpl::SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params) } } +MatchTopologyResult SurfaceBaseImpl::MatchesTopology(const SurfaceParams& rhs) const { + const u32 src_bpp{params.GetBytesPerPixel()}; + const u32 dst_bpp{rhs.GetBytesPerPixel()}; + const bool ib1 = params.IsBuffer(); + const bool ib2 = rhs.IsBuffer(); + if (std::tie(src_bpp, params.is_tiled, ib1) == std::tie(dst_bpp, rhs.is_tiled, ib2)) { + const bool cb1 = params.IsCompressed(); + const bool cb2 = rhs.IsCompressed(); + if (cb1 == cb2) { + return MatchTopologyResult::FullMatch; + } + return MatchTopologyResult::CompressUnmatch; + } + return MatchTopologyResult::None; +} + +MatchStructureResult SurfaceBaseImpl::MatchesStructure(const SurfaceParams& rhs) const { + // Buffer surface Check + if (params.IsBuffer()) { + const std::size_t wd1 = params.width * params.GetBytesPerPixel(); + const std::size_t wd2 = rhs.width * rhs.GetBytesPerPixel(); + if (wd1 == wd2) { + return MatchStructureResult::FullMatch; + } + return MatchStructureResult::None; + } + + // Linear Surface check + if (!params.is_tiled) { + if (std::tie(params.width, params.height, params.pitch) == + std::tie(rhs.width, rhs.height, rhs.pitch)) { + return MatchStructureResult::FullMatch; + } + return MatchStructureResult::None; + } + + // Tiled Surface check + if (std::tie(params.depth, params.block_width, params.block_height, params.block_depth, + params.tile_width_spacing, params.num_levels) == + std::tie(rhs.depth, rhs.block_width, rhs.block_height, rhs.block_depth, + rhs.tile_width_spacing, rhs.num_levels)) { + if (std::tie(params.width, params.height) == std::tie(rhs.width, rhs.height)) { + return MatchStructureResult::FullMatch; + } + const u32 ws = SurfaceParams::ConvertWidth(rhs.GetBlockAlignedWidth(), params.pixel_format, + rhs.pixel_format); + const u32 hs = + SurfaceParams::ConvertHeight(rhs.height, params.pixel_format, rhs.pixel_format); + const u32 w1 = params.GetBlockAlignedWidth(); + if (std::tie(w1, params.height) == std::tie(ws, hs)) { + return MatchStructureResult::SemiMatch; + } + } + return MatchStructureResult::None; +} + +std::optional> SurfaceBaseImpl::GetLayerMipmap( + const GPUVAddr candidate_gpu_addr) const { + if (candidate_gpu_addr < gpu_addr) { + return {}; + } + const auto relative_address{static_cast(candidate_gpu_addr - gpu_addr)}; + const auto layer{static_cast(relative_address / layer_size)}; + const GPUVAddr mipmap_address = relative_address - layer_size * layer; + const auto mipmap_it = + Common::BinaryFind(mipmap_offsets.begin(), mipmap_offsets.end(), mipmap_address); + if (mipmap_it == mipmap_offsets.end()) { + return {}; + } + const auto level{static_cast(std::distance(mipmap_offsets.begin(), mipmap_it))}; + return std::make_pair(layer, level); +} + +std::vector SurfaceBaseImpl::BreakDownLayered(const SurfaceParams& in_params) const { + const u32 layers{params.depth}; + const u32 mipmaps{params.num_levels}; + std::vector result; + result.reserve(static_cast(layers) * static_cast(mipmaps)); + + for (u32 layer = 0; layer < layers; layer++) { + for (u32 level = 0; level < mipmaps; level++) { + const u32 width = SurfaceParams::IntersectWidth(params, in_params, level, level); + const u32 height = SurfaceParams::IntersectHeight(params, in_params, level, level); + result.emplace_back(width, height, layer, level); + } + } + return result; +} + +std::vector SurfaceBaseImpl::BreakDownNonLayered(const SurfaceParams& in_params) const { + const u32 mipmaps{params.num_levels}; + std::vector result; + result.reserve(mipmaps); + + for (u32 level = 0; level < mipmaps; level++) { + const u32 width = SurfaceParams::IntersectWidth(params, in_params, level, level); + const u32 height = SurfaceParams::IntersectHeight(params, in_params, level, level); + const u32 depth{std::min(params.GetMipDepth(level), in_params.GetMipDepth(level))}; + result.emplace_back(width, height, depth, level); + } + return result; +} + void SurfaceBaseImpl::SwizzleFunc(MortonSwizzleMode mode, u8* memory, const SurfaceParams& params, u8* buffer, u32 level) { const u32 width{params.GetMipWidth(level)}; diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index 70b5258c9..9d19ecd5f 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -136,83 +136,17 @@ public: return params.target == target; } + MatchTopologyResult MatchesTopology(const SurfaceParams& rhs) const; + + MatchStructureResult MatchesStructure(const SurfaceParams& rhs) const; + bool MatchesSubTexture(const SurfaceParams& rhs, const GPUVAddr other_gpu_addr) const { return std::tie(gpu_addr, params.target, params.num_levels) == std::tie(other_gpu_addr, rhs.target, rhs.num_levels) && params.target == SurfaceTarget::Texture2D && params.num_levels == 1; } - MatchTopologyResult MatchesTopology(const SurfaceParams& rhs) const { - const u32 src_bpp{params.GetBytesPerPixel()}; - const u32 dst_bpp{rhs.GetBytesPerPixel()}; - const bool ib1 = params.IsBuffer(); - const bool ib2 = rhs.IsBuffer(); - if (std::tie(src_bpp, params.is_tiled, ib1) == std::tie(dst_bpp, rhs.is_tiled, ib2)) { - const bool cb1 = params.IsCompressed(); - const bool cb2 = rhs.IsCompressed(); - if (cb1 == cb2) { - return MatchTopologyResult::FullMatch; - } - return MatchTopologyResult::CompressUnmatch; - } - return MatchTopologyResult::None; - } - - MatchStructureResult MatchesStructure(const SurfaceParams& rhs) const { - // Buffer surface Check - if (params.IsBuffer()) { - const std::size_t wd1 = params.width * params.GetBytesPerPixel(); - const std::size_t wd2 = rhs.width * rhs.GetBytesPerPixel(); - if (wd1 == wd2) { - return MatchStructureResult::FullMatch; - } - return MatchStructureResult::None; - } - - // Linear Surface check - if (!params.is_tiled) { - if (std::tie(params.width, params.height, params.pitch) == - std::tie(rhs.width, rhs.height, rhs.pitch)) { - return MatchStructureResult::FullMatch; - } - return MatchStructureResult::None; - } - - // Tiled Surface check - if (std::tie(params.depth, params.block_width, params.block_height, params.block_depth, - params.tile_width_spacing, params.num_levels) == - std::tie(rhs.depth, rhs.block_width, rhs.block_height, rhs.block_depth, - rhs.tile_width_spacing, rhs.num_levels)) { - if (std::tie(params.width, params.height) == std::tie(rhs.width, rhs.height)) { - return MatchStructureResult::FullMatch; - } - const u32 ws = SurfaceParams::ConvertWidth(rhs.GetBlockAlignedWidth(), - params.pixel_format, rhs.pixel_format); - const u32 hs = - SurfaceParams::ConvertHeight(rhs.height, params.pixel_format, rhs.pixel_format); - const u32 w1 = params.GetBlockAlignedWidth(); - if (std::tie(w1, params.height) == std::tie(ws, hs)) { - return MatchStructureResult::SemiMatch; - } - } - return MatchStructureResult::None; - } - - std::optional> GetLayerMipmap(const GPUVAddr candidate_gpu_addr) const { - if (candidate_gpu_addr < gpu_addr) { - return {}; - } - const auto relative_address{static_cast(candidate_gpu_addr - gpu_addr)}; - const auto layer{static_cast(relative_address / layer_size)}; - const GPUVAddr mipmap_address = relative_address - layer_size * layer; - const auto mipmap_it = - Common::BinaryFind(mipmap_offsets.begin(), mipmap_offsets.end(), mipmap_address); - if (mipmap_it == mipmap_offsets.end()) { - return {}; - } - const auto level{static_cast(std::distance(mipmap_offsets.begin(), mipmap_it))}; - return std::make_pair(layer, level); - } + std::optional> GetLayerMipmap(const GPUVAddr candidate_gpu_addr) const; std::vector BreakDown(const SurfaceParams& in_params) const { return params.is_layered ? BreakDownLayered(in_params) : BreakDownNonLayered(in_params); @@ -241,35 +175,9 @@ private: void SwizzleFunc(MortonSwizzleMode mode, u8* memory, const SurfaceParams& params, u8* buffer, u32 level); - std::vector BreakDownLayered(const SurfaceParams& in_params) const { - const u32 layers{params.depth}; - const u32 mipmaps{params.num_levels}; - std::vector result; - result.reserve(static_cast(layers) * static_cast(mipmaps)); + std::vector BreakDownLayered(const SurfaceParams& in_params) const; - for (u32 layer = 0; layer < layers; layer++) { - for (u32 level = 0; level < mipmaps; level++) { - const u32 width = SurfaceParams::IntersectWidth(params, in_params, level, level); - const u32 height = SurfaceParams::IntersectHeight(params, in_params, level, level); - result.emplace_back(width, height, layer, level); - } - } - return result; - } - - std::vector BreakDownNonLayered(const SurfaceParams& in_params) const { - const u32 mipmaps{params.num_levels}; - std::vector result; - result.reserve(mipmaps); - - for (u32 level = 0; level < mipmaps; level++) { - const u32 width = SurfaceParams::IntersectWidth(params, in_params, level, level); - const u32 height = SurfaceParams::IntersectHeight(params, in_params, level, level); - const u32 depth{std::min(params.GetMipDepth(level), in_params.GetMipDepth(level))}; - result.emplace_back(width, height, depth, level); - } - return result; - } + std::vector BreakDownNonLayered(const SurfaceParams& in_params) const; }; template diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index 77c09264a..60a7356bb 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp @@ -6,7 +6,6 @@ #include "common/alignment.h" #include "common/bit_util.h" -#include "common/cityhash.h" #include "core/core.h" #include "video_core/engines/shader_bytecode.h" #include "video_core/surface.h" @@ -237,14 +236,6 @@ std::size_t SurfaceParams::GetConvertedMipmapOffset(u32 level) const { return offset; } -std::size_t SurfaceParams::GetGuestMipmapSize(u32 level) const { - return GetInnerMipmapMemorySize(level, false, false); -} - -std::size_t SurfaceParams::GetHostMipmapSize(u32 level) const { - return GetInnerMipmapMemorySize(level, true, false) * GetNumLayers(); -} - std::size_t SurfaceParams::GetConvertedMipmapSize(u32 level) const { constexpr std::size_t rgb8_bpp = 4ULL; const std::size_t width_t = GetMipWidth(level); @@ -253,10 +244,6 @@ std::size_t SurfaceParams::GetConvertedMipmapSize(u32 level) const { return width_t * height_t * depth_t * rgb8_bpp; } -std::size_t SurfaceParams::GetGuestLayerSize() const { - return GetLayerSize(false, false); -} - std::size_t SurfaceParams::GetLayerSize(bool as_host_size, bool uncompressed) const { std::size_t size = 0; for (u32 level = 0; level < num_levels; ++level) { @@ -269,16 +256,6 @@ std::size_t SurfaceParams::GetLayerSize(bool as_host_size, bool uncompressed) co return size; } -std::size_t SurfaceParams::GetHostLayerSize(u32 level) const { - ASSERT(target != SurfaceTarget::Texture3D); - return GetInnerMipmapMemorySize(level, true, false); -} - -bool SurfaceParams::IsPixelFormatZeta() const { - return pixel_format >= VideoCore::Surface::PixelFormat::MaxColorFormat && - pixel_format < VideoCore::Surface::PixelFormat::MaxDepthStencilFormat; -} - std::size_t SurfaceParams::GetInnerMipmapMemorySize(u32 level, bool as_host_size, bool uncompressed) const { const bool tiled{as_host_size ? false : is_tiled}; @@ -289,16 +266,6 @@ std::size_t SurfaceParams::GetInnerMipmapMemorySize(u32 level, bool as_host_size GetMipBlockHeight(level), GetMipBlockDepth(level)); } -std::size_t SurfaceParams::GetInnerMemorySize(bool as_host_size, bool layer_only, - bool uncompressed) const { - return GetLayerSize(as_host_size, uncompressed) * (layer_only ? 1U : depth); -} - -std::size_t SurfaceParams::Hash() const { - return static_cast( - Common::CityHash64(reinterpret_cast(this), sizeof(*this))); -} - bool SurfaceParams::operator==(const SurfaceParams& rhs) const { return std::tie(is_tiled, block_width, block_height, block_depth, tile_width_spacing, width, height, depth, pitch, num_levels, pixel_format, component_type, type, target) == diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h index 5fde695b6..c51e174cd 100644 --- a/src/video_core/texture_cache/surface_params.h +++ b/src/video_core/texture_cache/surface_params.h @@ -8,6 +8,7 @@ #include "common/alignment.h" #include "common/bit_util.h" +#include "common/cityhash.h" #include "common/common_types.h" #include "video_core/engines/fermi_2d.h" #include "video_core/engines/maxwell_3d.h" @@ -39,7 +40,10 @@ public: static SurfaceParams CreateForFermiCopySurface( const Tegra::Engines::Fermi2D::Regs::Surface& config); - std::size_t Hash() const; + std::size_t Hash() const { + return static_cast( + Common::CityHash64(reinterpret_cast(this), sizeof(*this))); + } bool operator==(const SurfaceParams& rhs) const; @@ -113,18 +117,27 @@ public: std::size_t GetConvertedMipmapOffset(u32 level) const; /// Returns the size in bytes in guest memory of a given mipmap level. - std::size_t GetGuestMipmapSize(u32 level) const; + std::size_t GetGuestMipmapSize(u32 level) const { + return GetInnerMipmapMemorySize(level, false, false); + } /// Returns the size in bytes in host memory (linear) of a given mipmap level. - std::size_t GetHostMipmapSize(u32 level) const; + std::size_t GetHostMipmapSize(u32 level) const { + return GetInnerMipmapMemorySize(level, true, false) * GetNumLayers(); + } std::size_t GetConvertedMipmapSize(u32 level) const; /// Returns the size of a layer in bytes in guest memory. - std::size_t GetGuestLayerSize() const; + std::size_t GetGuestLayerSize() const { + return GetLayerSize(false, false); + } /// Returns the size of a layer in bytes in host memory for a given mipmap level. - std::size_t GetHostLayerSize(u32 level) const; + std::size_t GetHostLayerSize(u32 level) const { + ASSERT(target != VideoCore::Surface::SurfaceTarget::Texture3D); + return GetInnerMipmapMemorySize(level, true, false); + } static u32 ConvertWidth(u32 width, VideoCore::Surface::PixelFormat pixel_format_from, VideoCore::Surface::PixelFormat pixel_format_to) { @@ -194,7 +207,10 @@ public: } /// Returns true if the pixel format is a depth and/or stencil format. - bool IsPixelFormatZeta() const; + bool IsPixelFormatZeta() const { + return pixel_format >= VideoCore::Surface::PixelFormat::MaxColorFormat && + pixel_format < VideoCore::Surface::PixelFormat::MaxDepthStencilFormat; + } SurfaceCompression GetCompressionType() const { return VideoCore::Surface::GetFormatCompressionType(pixel_format); @@ -229,7 +245,9 @@ private: std::size_t GetInnerMipmapMemorySize(u32 level, bool as_host_size, bool uncompressed) const; /// Returns the size of all mipmap levels and aligns as needed. - std::size_t GetInnerMemorySize(bool as_host_size, bool layer_only, bool uncompressed) const; + std::size_t GetInnerMemorySize(bool as_host_size, bool layer_only, bool uncompressed) const { + return GetLayerSize(as_host_size, uncompressed) * (layer_only ? 1U : depth); + } /// Returns the size of a layer std::size_t GetLayerSize(bool as_host_size, bool uncompressed) const; From 6f69f06873f666174d3c0306055bc5f097d64afc Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 4 Jun 2019 12:12:40 -0400 Subject: [PATCH 066/113] texture_cache: Don't Image Copy if component types differ --- src/video_core/texture_cache/texture_cache.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 951168357..d2c27bcef 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -389,7 +389,8 @@ private: const auto gpu_addr = current_surface->GetGpuAddr(); TSurface new_surface = GetUncachedSurface(gpu_addr, params); const auto& cr_params = current_surface->GetSurfaceParams(); - if (!support_info.depth_color_image_copies && cr_params.type != params.type) { + if (cr_params.type != params.type && (!support_info.depth_color_image_copies || + cr_params.component_type != params.component_type)) { BufferCopy(current_surface, new_surface); } else { std::vector bricks = current_surface->BreakDown(params); From b7de31ac97da9ac80be9f93180a934874b547b0e Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sat, 8 Jun 2019 11:25:11 -0400 Subject: [PATCH 067/113] shader_ir: Fix image copy rebase issues --- src/video_core/shader/decode/image.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp index 199b6b793..24f022cc0 100644 --- a/src/video_core/shader/decode/image.cpp +++ b/src/video_core/shader/decode/image.cpp @@ -3,10 +3,15 @@ // Refer to the license.txt file included. #include +#include +#include #include "common/assert.h" +#include "common/bit_field.h" #include "common/common_types.h" +#include "common/logging/log.h" #include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { @@ -92,8 +97,8 @@ const Image& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, const Node image_register{GetRegister(reg)}; const Node base_image{ TrackCbuf(image_register, global_code, static_cast(global_code.size()))}; - const auto cbuf{std::get_if(base_image)}; - const auto cbuf_offset_imm{std::get_if(cbuf->GetOffset())}; + const auto cbuf{std::get_if(&*base_image)}; + const auto cbuf_offset_imm{std::get_if(&*cbuf->GetOffset())}; const auto cbuf_offset{cbuf_offset_imm->GetValue()}; const auto cbuf_index{cbuf->GetIndex()}; const auto cbuf_key{(static_cast(cbuf_index) << 32) | static_cast(cbuf_offset)}; From 561ce29c98bf822941061023e1f71a62175318ae Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Mon, 10 Jun 2019 10:39:59 -0400 Subject: [PATCH 068/113] texture_cache: correct mutex locks --- src/video_core/texture_cache/texture_cache.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index d2c27bcef..503bd2b43 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -90,6 +90,7 @@ public: TView GetTextureSurface(const Tegra::Texture::FullTextureInfo& config, const VideoCommon::Shader::Sampler& entry) { + std::lock_guard lock{mutex}; const auto gpu_addr{config.tic.Address()}; if (!gpu_addr) { return {}; @@ -99,6 +100,7 @@ public: } TView GetDepthBufferSurface(bool preserve_contents) { + std::lock_guard lock{mutex}; auto& maxwell3d = system.GPU().Maxwell3D(); if (!maxwell3d.dirty_flags.zeta_buffer) { @@ -127,6 +129,7 @@ public: } TView GetColorBufferSurface(std::size_t index, bool preserve_contents) { + std::lock_guard lock{mutex}; ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); auto& maxwell3d = system.GPU().Maxwell3D(); if (!maxwell3d.dirty_flags.color_buffer[index]) { @@ -188,6 +191,7 @@ public: void DoFermiCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src_config, const Tegra::Engines::Fermi2D::Regs::Surface& dst_config, const Tegra::Engines::Fermi2D::Config& copy_config) { + std::lock_guard lock{mutex}; std::pair dst_surface = GetFermiSurface(dst_config); std::pair src_surface = GetFermiSurface(src_config); ImageBlit(src_surface.second, dst_surface.second, copy_config); @@ -245,8 +249,6 @@ protected: virtual void BufferCopy(TSurface& src_surface, TSurface& dst_surface) = 0; void Register(TSurface surface) { - std::lock_guard lock{mutex}; - const GPUVAddr gpu_addr = surface->GetGpuAddr(); const CacheAddr cache_ptr = ToCacheAddr(memory_manager->GetPointer(gpu_addr)); const std::size_t size = surface->GetSizeInBytes(); @@ -266,8 +268,6 @@ protected: } void Unregister(TSurface surface) { - std::lock_guard lock{mutex}; - if (guard_cache && surface->IsProtected()) { return; } From b01f9c8a7090fa056ca564593eabcebab946ef41 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 11 Jun 2019 07:20:27 -0400 Subject: [PATCH 069/113] texture_cache: eliminate accelerated depth->color/color->depth copies due to driver instability. --- src/video_core/renderer_opengl/gl_device.cpp | 1 - src/video_core/renderer_opengl/gl_device.h | 5 ----- src/video_core/renderer_opengl/gl_texture_cache.cpp | 13 +++++-------- src/video_core/texture_cache/texture_cache.h | 9 +-------- 4 files changed, 6 insertions(+), 22 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index ad15ea54e..65a88b06c 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -28,7 +28,6 @@ Device::Device() { max_varyings = GetInteger(GL_MAX_VARYING_VECTORS); has_variable_aoffi = TestVariableAoffi(); has_component_indexing_bug = TestComponentIndexingBug(); - is_turing_plus = GLAD_GL_NV_mesh_shader; } Device::Device(std::nullptr_t) { diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 1afe16779..8c8c93760 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -34,10 +34,6 @@ public: return has_component_indexing_bug; } - bool IsTuringGPU() const { - return is_turing_plus; - } - private: static bool TestVariableAoffi(); static bool TestComponentIndexingBug(); @@ -47,7 +43,6 @@ private: u32 max_varyings{}; bool has_variable_aoffi{}; bool has_component_indexing_bug{}; - bool is_turing_plus{}; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 71f6888c6..7c1d14138 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -439,7 +439,6 @@ TextureCacheOpenGL::TextureCacheOpenGL(Core::System& system, VideoCore::RasterizerInterface& rasterizer, const Device& device) : TextureCacheBase{system, rasterizer} { - support_info.depth_color_image_copies = !device.IsTuringGPU(); src_framebuffer.Create(); dst_framebuffer.Create(); } @@ -452,13 +451,11 @@ Surface TextureCacheOpenGL::CreateSurface(GPUVAddr gpu_addr, const SurfaceParams void TextureCacheOpenGL::ImageCopy(Surface& src_surface, Surface& dst_surface, const VideoCommon::CopyParams& copy_params) { - if (!support_info.depth_color_image_copies) { - const auto& src_params = src_surface->GetSurfaceParams(); - const auto& dst_params = dst_surface->GetSurfaceParams(); - if (src_params.type != dst_params.type) { - // A fallback is needed - return; - } + const auto& src_params = src_surface->GetSurfaceParams(); + const auto& dst_params = dst_surface->GetSurfaceParams(); + if (src_params.type != dst_params.type) { + // A fallback is needed + return; } const auto src_handle = src_surface->GetTexture(); const auto src_target = src_surface->GetTarget(); diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 503bd2b43..c95b1b976 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -218,12 +218,6 @@ public: } protected: - // This structure is used for communicating with the backend, on which behaviors - // it supports and what not, to avoid assuming certain things about hardware. - // The backend is RESPONSIBLE for filling this settings on creation. - struct Support { - bool depth_color_image_copies; - } support_info; TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer) : system{system}, rasterizer{rasterizer} { @@ -389,8 +383,7 @@ private: const auto gpu_addr = current_surface->GetGpuAddr(); TSurface new_surface = GetUncachedSurface(gpu_addr, params); const auto& cr_params = current_surface->GetSurfaceParams(); - if (cr_params.type != params.type && (!support_info.depth_color_image_copies || - cr_params.component_type != params.component_type)) { + if (cr_params.type != params.type || (cr_params.component_type != params.component_type)) { BufferCopy(current_surface, new_surface); } else { std::vector bricks = current_surface->BreakDown(params); From a56f687793a0a24a368f0dafd5333daf8cbacecf Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Wed, 12 Jun 2019 09:32:26 -0400 Subject: [PATCH 070/113] texture_cache: correct texture buffer on surface params --- src/video_core/texture_cache/surface_params.cpp | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index 60a7356bb..f789da2c4 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp @@ -74,10 +74,17 @@ SurfaceParams SurfaceParams::CreateForTexture(Core::System& system, params.component_type = ComponentTypeFromTexture(config.tic.r_type.Value()); params.type = GetFormatType(params.pixel_format); // TODO: on 1DBuffer we should use the tic info. - params.target = TextureType2SurfaceTarget(entry.GetType(), entry.IsArray()); - params.width = config.tic.Width(); - params.height = config.tic.Height(); - params.depth = config.tic.Depth(); + if (!config.tic.IsBuffer()) { + params.target = TextureType2SurfaceTarget(entry.GetType(), entry.IsArray()); + params.width = config.tic.Width(); + params.height = config.tic.Height(); + params.depth = config.tic.Depth(); + } else { + params.target = SurfaceTarget::TextureBuffer; + params.width = config.tic.Width(); + params.height = 0; + params.depth = 0; + } if (params.target == SurfaceTarget::TextureCubemap || params.target == SurfaceTarget::TextureCubeArray) { params.depth *= 6; From cb728797b0cec79d756ba9395d24924882222103 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Wed, 12 Jun 2019 16:20:20 -0400 Subject: [PATCH 071/113] fermi2d: Correct Origin Mode --- src/video_core/engines/fermi_2d.cpp | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp index d63b82838..0ee228e28 100644 --- a/src/video_core/engines/fermi_2d.cpp +++ b/src/video_core/engines/fermi_2d.cpp @@ -38,11 +38,16 @@ void Fermi2D::HandleSurfaceCopy() { const u32 src_blit_x1{static_cast(regs.blit_src_x >> 32)}; const u32 src_blit_y1{static_cast(regs.blit_src_y >> 32)}; - const u32 src_blit_x2{ - static_cast((regs.blit_src_x + (regs.blit_dst_width * regs.blit_du_dx)) >> 32)}; - const u32 src_blit_y2{ - static_cast((regs.blit_src_y + (regs.blit_dst_height * regs.blit_dv_dy)) >> 32)}; - + u32 src_blit_x2, src_blit_y2; + if (regs.blit_control.origin == Origin::Corner) { + src_blit_x2 = + static_cast((regs.blit_src_x + (regs.blit_du_dx * regs.blit_dst_width)) >> 32); + src_blit_y2 = + static_cast((regs.blit_src_y + (regs.blit_dv_dy * regs.blit_dst_height)) >> 32); + } else { + src_blit_x2 = static_cast((regs.blit_src_x >> 32) + regs.blit_dst_width); + src_blit_y2 = static_cast((regs.blit_src_y >> 32) + regs.blit_dst_height); + } const Common::Rectangle src_rect{src_blit_x1, src_blit_y1, src_blit_x2, src_blit_y2}; const Common::Rectangle dst_rect{regs.blit_dst_x, regs.blit_dst_y, regs.blit_dst_x + regs.blit_dst_width, From 2d83553ea7ab2629e7e1a83cc3345c0115d69453 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Thu, 13 Jun 2019 09:46:36 -0400 Subject: [PATCH 072/113] texture_cache: Implement siblings texture formats. --- src/video_core/texture_cache/surface_base.h | 4 ++ src/video_core/texture_cache/texture_cache.h | 39 ++++++++++++++------ 2 files changed, 31 insertions(+), 12 deletions(-) diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index 9d19ecd5f..58265e9d3 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -132,6 +132,10 @@ public: return params.pixel_format == pixel_format; } + VideoCore::Surface::PixelFormat GetFormat() const { + return params.pixel_format; + } + bool MatchTarget(VideoCore::Surface::SurfaceTarget target) const { return params.target == target; } diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index c95b1b976..022416706 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -43,6 +43,8 @@ class RasterizerInterface; namespace VideoCommon { +using VideoCore::Surface::PixelFormat; + using VideoCore::Surface::SurfaceTarget; using RenderTargetConfig = Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig; @@ -96,7 +98,7 @@ public: return {}; } const auto params{SurfaceParams::CreateForTexture(system, config, entry)}; - return GetSurface(gpu_addr, params, true).second; + return GetSurface(gpu_addr, params, true, false).second; } TView GetDepthBufferSurface(bool preserve_contents) { @@ -118,7 +120,7 @@ public: system, regs.zeta_width, regs.zeta_height, regs.zeta.format, regs.zeta.memory_layout.block_width, regs.zeta.memory_layout.block_height, regs.zeta.memory_layout.block_depth, regs.zeta.memory_layout.type)}; - auto surface_view = GetSurface(gpu_addr, depth_params, preserve_contents); + auto surface_view = GetSurface(gpu_addr, depth_params, preserve_contents, true); if (depth_buffer.target) depth_buffer.target->MarkAsRenderTarget(false); depth_buffer.target = surface_view.first; @@ -152,7 +154,7 @@ public: } auto surface_view = GetSurface(gpu_addr, SurfaceParams::CreateForFramebuffer(system, index), - preserve_contents); + preserve_contents, true); if (render_targets[index].target) render_targets[index].target->MarkAsRenderTarget(false); render_targets[index].target = surface_view.first; @@ -226,6 +228,11 @@ protected: } SetEmptyDepthBuffer(); staging_cache.SetSize(2); + siblings_table[PixelFormat::Z16] = PixelFormat::R16F; + siblings_table[PixelFormat::Z32F] = PixelFormat::R32F; + siblings_table[PixelFormat::Z32FS8] = PixelFormat::RG32F; + siblings_table[PixelFormat::R16F] = PixelFormat::Z16; + siblings_table[PixelFormat::R32F] = PixelFormat::Z32F; } ~TextureCache() = default; @@ -289,7 +296,7 @@ protected: const Tegra::Engines::Fermi2D::Regs::Surface& config) { SurfaceParams params = SurfaceParams::CreateForFermiCopySurface(config); const GPUVAddr gpu_addr = config.Address(); - return GetSurface(gpu_addr, params, true); + return GetSurface(gpu_addr, params, true, false); } Core::System& system; @@ -406,16 +413,22 @@ private: * @param params, the new surface params which we want to check. **/ std::pair ManageStructuralMatch(TSurface current_surface, - const SurfaceParams& params) { + const SurfaceParams& params, bool is_render) { const bool is_mirage = !current_surface->MatchFormat(params.pixel_format); + const bool matches_target = current_surface->MatchTarget(params.target); + auto match_check = ([&]() -> std::pair { + if (matches_target) { + return {current_surface, current_surface->GetMainView()}; + } + return {current_surface, current_surface->EmplaceOverview(params)}; + }); if (is_mirage) { + if (!is_render && siblings_table[current_surface->GetFormat()] == params.pixel_format) { + return match_check(); + } return RebuildSurface(current_surface, params); } - const bool matches_target = current_surface->MatchTarget(params.target); - if (matches_target) { - return {current_surface, current_surface->GetMainView()}; - } - return {current_surface, current_surface->EmplaceOverview(params)}; + return match_check(); } /** @@ -490,7 +503,7 @@ private: * @param preserve_contents, tells if the new surface should be loaded from meory or left blank. **/ std::pair GetSurface(const GPUVAddr gpu_addr, const SurfaceParams& params, - bool preserve_contents) { + bool preserve_contents, bool is_render) { const auto host_ptr{memory_manager->GetPointer(gpu_addr)}; const auto cache_addr{ToCacheAddr(host_ptr)}; @@ -524,7 +537,7 @@ private: (params.target != SurfaceTarget::Texture3D || current_surface->MatchTarget(params.target))) { if (s_result == MatchStructureResult::FullMatch) { - return ManageStructuralMatch(current_surface, params); + return ManageStructuralMatch(current_surface, params, is_render); } else { return RebuildSurface(current_surface, params); } @@ -724,6 +737,8 @@ private: // Guards the cache for protection conflicts. bool guard_cache{}; + std::unordered_map siblings_table; + // The internal Cache is different for the Texture Cache. It's based on buckets // of 1MB. This fits better for the purpose of this cache as textures are normaly // large in size. From 4db28f72f617b1500581e621719928fa0807d9ac Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Thu, 13 Jun 2019 10:26:17 -0400 Subject: [PATCH 073/113] texture_cache: Remove old rasterizer cache --- .../renderer_opengl/gl_rasterizer_cache.cpp | 1381 ----------------- .../renderer_opengl/gl_rasterizer_cache.h | 575 ------- 2 files changed, 1956 deletions(-) delete mode 100644 src/video_core/renderer_opengl/gl_rasterizer_cache.cpp delete mode 100644 src/video_core/renderer_opengl/gl_rasterizer_cache.h diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp deleted file mode 100644 index e27da1fa7..000000000 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ /dev/null @@ -1,1381 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include -#include - -#include "common/alignment.h" -#include "common/assert.h" -#include "common/logging/log.h" -#include "common/microprofile.h" -#include "common/scope_exit.h" -#include "core/core.h" -#include "core/hle/kernel/process.h" -#include "core/settings.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/memory_manager.h" -#include "video_core/morton.h" -#include "video_core/renderer_opengl/gl_rasterizer.h" -#include "video_core/renderer_opengl/gl_rasterizer_cache.h" -#include "video_core/renderer_opengl/utils.h" -#include "video_core/surface.h" -#include "video_core/textures/convert.h" -#include "video_core/textures/decoders.h" - -namespace OpenGL { - -using VideoCore::MortonSwizzle; -using VideoCore::MortonSwizzleMode; -using VideoCore::Surface::ComponentTypeFromDepthFormat; -using VideoCore::Surface::ComponentTypeFromRenderTarget; -using VideoCore::Surface::ComponentTypeFromTexture; -using VideoCore::Surface::PixelFormatFromDepthFormat; -using VideoCore::Surface::PixelFormatFromRenderTargetFormat; -using VideoCore::Surface::PixelFormatFromTextureFormat; -using VideoCore::Surface::SurfaceTargetFromTextureType; - -struct FormatTuple { - GLint internal_format; - GLenum format; - GLenum type; - ComponentType component_type; - bool compressed; -}; - -static void ApplyTextureDefaults(GLuint texture, u32 max_mip_level) { - glTextureParameteri(texture, GL_TEXTURE_MIN_FILTER, GL_LINEAR); - glTextureParameteri(texture, GL_TEXTURE_MAG_FILTER, GL_LINEAR); - glTextureParameteri(texture, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); - glTextureParameteri(texture, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - glTextureParameteri(texture, GL_TEXTURE_MAX_LEVEL, max_mip_level - 1); - if (max_mip_level == 1) { - glTextureParameterf(texture, GL_TEXTURE_LOD_BIAS, 1000.0); - } -} - -void SurfaceParams::InitCacheParameters(GPUVAddr gpu_addr_) { - auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()}; - - gpu_addr = gpu_addr_; - host_ptr = memory_manager.GetPointer(gpu_addr_); - size_in_bytes = SizeInBytesRaw(); - - if (IsPixelFormatASTC(pixel_format)) { - // ASTC is uncompressed in software, in emulated as RGBA8 - size_in_bytes_gl = width * height * depth * 4; - } else { - size_in_bytes_gl = SizeInBytesGL(); - } -} - -std::size_t SurfaceParams::InnerMipmapMemorySize(u32 mip_level, bool force_gl, bool layer_only, - bool uncompressed) const { - const u32 tile_x{GetDefaultBlockWidth(pixel_format)}; - const u32 tile_y{GetDefaultBlockHeight(pixel_format)}; - const u32 bytes_per_pixel{GetBytesPerPixel(pixel_format)}; - u32 m_depth = (layer_only ? 1U : depth); - u32 m_width = MipWidth(mip_level); - u32 m_height = MipHeight(mip_level); - m_width = uncompressed ? m_width : std::max(1U, (m_width + tile_x - 1) / tile_x); - m_height = uncompressed ? m_height : std::max(1U, (m_height + tile_y - 1) / tile_y); - m_depth = std::max(1U, m_depth >> mip_level); - u32 m_block_height = MipBlockHeight(mip_level); - u32 m_block_depth = MipBlockDepth(mip_level); - return Tegra::Texture::CalculateSize(force_gl ? false : is_tiled, bytes_per_pixel, m_width, - m_height, m_depth, m_block_height, m_block_depth); -} - -std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only, - bool uncompressed) const { - std::size_t block_size_bytes = Tegra::Texture::GetGOBSize() * block_height * block_depth; - std::size_t size = 0; - for (u32 i = 0; i < max_mip_level; i++) { - size += InnerMipmapMemorySize(i, force_gl, layer_only, uncompressed); - } - if (!force_gl && is_tiled) { - size = Common::AlignUp(size, block_size_bytes); - } - return size; -} - -/*static*/ SurfaceParams SurfaceParams::CreateForTexture( - const Tegra::Texture::FullTextureInfo& config, const GLShader::SamplerEntry& entry) { - SurfaceParams params{}; - params.is_tiled = config.tic.IsTiled(); - params.block_width = params.is_tiled ? config.tic.BlockWidth() : 0, - params.block_height = params.is_tiled ? config.tic.BlockHeight() : 0, - params.block_depth = params.is_tiled ? config.tic.BlockDepth() : 0, - params.tile_width_spacing = params.is_tiled ? (1 << config.tic.tile_width_spacing.Value()) : 1; - params.srgb_conversion = config.tic.IsSrgbConversionEnabled(); - params.pixel_format = PixelFormatFromTextureFormat(config.tic.format, config.tic.r_type.Value(), - params.srgb_conversion); - - if (config.tsc.depth_compare_enabled) { - // Some titles create a 'R16U' (normalized 16-bit) texture with depth_compare enabled, - // then attempt to sample from it via a shadow sampler. Convert format to Z16 (which also - // causes GetFormatType to properly return 'Depth' below). - if (GetFormatType(params.pixel_format) == SurfaceType::ColorTexture) { - switch (params.pixel_format) { - case PixelFormat::R16S: - case PixelFormat::R16U: - case PixelFormat::R16F: - params.pixel_format = PixelFormat::Z16; - break; - case PixelFormat::R32F: - params.pixel_format = PixelFormat::Z32F; - break; - default: - LOG_WARNING(HW_GPU, "Color texture format being used with depth compare: {}", - static_cast(params.pixel_format)); - break; - } - } - } - - params.component_type = ComponentTypeFromTexture(config.tic.r_type.Value()); - params.type = GetFormatType(params.pixel_format); - UNIMPLEMENTED_IF(params.type == SurfaceType::ColorTexture && config.tsc.depth_compare_enabled); - - params.width = Common::AlignUp(config.tic.Width(), GetCompressionFactor(params.pixel_format)); - params.height = Common::AlignUp(config.tic.Height(), GetCompressionFactor(params.pixel_format)); - if (config.tic.IsLineal()) { - params.pitch = config.tic.Pitch(); - } - params.unaligned_height = config.tic.Height(); - params.target = SurfaceTargetFromTextureType(config.tic.texture_type); - params.identity = SurfaceClass::Uploaded; - - switch (params.target) { - case SurfaceTarget::Texture1D: - case SurfaceTarget::TextureBuffer: - case SurfaceTarget::Texture2D: - params.depth = 1; - break; - case SurfaceTarget::TextureCubemap: - params.depth = config.tic.Depth() * 6; - break; - case SurfaceTarget::Texture3D: - params.depth = config.tic.Depth(); - break; - case SurfaceTarget::Texture2DArray: - params.depth = config.tic.Depth(); - if (!entry.IsArray()) { - // TODO(bunnei): We have seen games re-use a Texture2D as Texture2DArray with depth of - // one, but sample the texture in the shader as if it were not an array texture. This - // probably is valid on hardware, but we still need to write a test to confirm this. In - // emulation, the workaround here is to continue to treat this as a Texture2D. An - // example game that does this is Super Mario Odyssey (in Cloud Kingdom). - ASSERT(params.depth == 1); - params.target = SurfaceTarget::Texture2D; - } - break; - case SurfaceTarget::TextureCubeArray: - params.depth = config.tic.Depth() * 6; - if (!entry.IsArray()) { - ASSERT(params.depth == 6); - params.target = SurfaceTarget::TextureCubemap; - } - break; - default: - LOG_CRITICAL(HW_GPU, "Unknown depth for target={}", static_cast(params.target)); - UNREACHABLE(); - params.depth = 1; - break; - } - - params.is_layered = SurfaceTargetIsLayered(params.target); - params.is_array = SurfaceTargetIsArray(params.target); - params.max_mip_level = config.tic.max_mip_level + 1; - params.rt = {}; - - params.InitCacheParameters(config.tic.Address()); - - return params; -} - -/*static*/ SurfaceParams SurfaceParams::CreateForFramebuffer(std::size_t index) { - const auto& config{Core::System::GetInstance().GPU().Maxwell3D().regs.rt[index]}; - SurfaceParams params{}; - - params.is_tiled = - config.memory_layout.type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear; - params.block_width = 1 << config.memory_layout.block_width; - params.block_height = 1 << config.memory_layout.block_height; - params.block_depth = 1 << config.memory_layout.block_depth; - params.tile_width_spacing = 1; - params.pixel_format = PixelFormatFromRenderTargetFormat(config.format); - params.srgb_conversion = config.format == Tegra::RenderTargetFormat::BGRA8_SRGB || - config.format == Tegra::RenderTargetFormat::RGBA8_SRGB; - params.component_type = ComponentTypeFromRenderTarget(config.format); - params.type = GetFormatType(params.pixel_format); - if (params.is_tiled) { - params.width = config.width; - } else { - params.pitch = config.width; - const u32 bpp = params.GetFormatBpp() / 8; - params.width = params.pitch / bpp; - } - params.height = config.height; - params.unaligned_height = config.height; - params.target = SurfaceTarget::Texture2D; - params.identity = SurfaceClass::RenderTarget; - params.depth = 1; - params.max_mip_level = 1; - params.is_layered = false; - - // Render target specific parameters, not used for caching - params.rt.index = static_cast(index); - params.rt.array_mode = config.array_mode; - params.rt.layer_stride = config.layer_stride; - params.rt.volume = config.volume; - params.rt.base_layer = config.base_layer; - - params.InitCacheParameters(config.Address()); - - return params; -} - -/*static*/ SurfaceParams SurfaceParams::CreateForDepthBuffer( - u32 zeta_width, u32 zeta_height, GPUVAddr zeta_address, Tegra::DepthFormat format, - u32 block_width, u32 block_height, u32 block_depth, - Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type) { - SurfaceParams params{}; - - params.is_tiled = type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear; - params.block_width = 1 << std::min(block_width, 5U); - params.block_height = 1 << std::min(block_height, 5U); - params.block_depth = 1 << std::min(block_depth, 5U); - params.tile_width_spacing = 1; - params.pixel_format = PixelFormatFromDepthFormat(format); - params.component_type = ComponentTypeFromDepthFormat(format); - params.type = GetFormatType(params.pixel_format); - params.srgb_conversion = false; - params.width = zeta_width; - params.height = zeta_height; - params.unaligned_height = zeta_height; - params.target = SurfaceTarget::Texture2D; - params.identity = SurfaceClass::DepthBuffer; - params.depth = 1; - params.max_mip_level = 1; - params.is_layered = false; - params.rt = {}; - - params.InitCacheParameters(zeta_address); - - return params; -} - -/*static*/ SurfaceParams SurfaceParams::CreateForFermiCopySurface( - const Tegra::Engines::Fermi2D::Regs::Surface& config) { - SurfaceParams params{}; - - params.is_tiled = !config.linear; - params.block_width = params.is_tiled ? std::min(config.BlockWidth(), 32U) : 0, - params.block_height = params.is_tiled ? std::min(config.BlockHeight(), 32U) : 0, - params.block_depth = params.is_tiled ? std::min(config.BlockDepth(), 32U) : 0, - params.tile_width_spacing = 1; - params.pixel_format = PixelFormatFromRenderTargetFormat(config.format); - params.srgb_conversion = config.format == Tegra::RenderTargetFormat::BGRA8_SRGB || - config.format == Tegra::RenderTargetFormat::RGBA8_SRGB; - params.component_type = ComponentTypeFromRenderTarget(config.format); - params.type = GetFormatType(params.pixel_format); - params.width = config.width; - params.pitch = config.pitch; - params.height = config.height; - params.unaligned_height = config.height; - params.target = SurfaceTarget::Texture2D; - params.identity = SurfaceClass::Copy; - params.depth = 1; - params.max_mip_level = 1; - params.rt = {}; - - params.InitCacheParameters(config.Address()); - - return params; -} - -static constexpr std::array tex_format_tuples = {{ - {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, ComponentType::UNorm, false}, // ABGR8U - {GL_RGBA8, GL_RGBA, GL_BYTE, ComponentType::SNorm, false}, // ABGR8S - {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE, ComponentType::UInt, false}, // ABGR8UI - {GL_RGB8, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, ComponentType::UNorm, false}, // B5G6R5U - {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, ComponentType::UNorm, - false}, // A2B10G10R10U - {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV, ComponentType::UNorm, false}, // A1B5G5R5U - {GL_R8, GL_RED, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // R8U - {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE, ComponentType::UInt, false}, // R8UI - {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT, ComponentType::Float, false}, // RGBA16F - {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT, ComponentType::UNorm, false}, // RGBA16U - {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT, ComponentType::UInt, false}, // RGBA16UI - {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV, ComponentType::Float, - false}, // R11FG11FB10F - {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // RGBA32UI - {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, - true}, // DXT1 - {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, - true}, // DXT23 - {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, - true}, // DXT45 - {GL_COMPRESSED_RED_RGTC1, GL_RED, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, true}, // DXN1 - {GL_COMPRESSED_RG_RGTC2, GL_RG, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, - true}, // DXN2UNORM - {GL_COMPRESSED_SIGNED_RG_RGTC2, GL_RG, GL_INT, ComponentType::SNorm, true}, // DXN2SNORM - {GL_COMPRESSED_RGBA_BPTC_UNORM, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, - true}, // BC7U - {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, ComponentType::Float, - true}, // BC6H_UF16 - {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, ComponentType::Float, - true}, // BC6H_SF16 - {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_4X4 - {GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // BGRA8 - {GL_RGBA32F, GL_RGBA, GL_FLOAT, ComponentType::Float, false}, // RGBA32F - {GL_RG32F, GL_RG, GL_FLOAT, ComponentType::Float, false}, // RG32F - {GL_R32F, GL_RED, GL_FLOAT, ComponentType::Float, false}, // R32F - {GL_R16F, GL_RED, GL_HALF_FLOAT, ComponentType::Float, false}, // R16F - {GL_R16, GL_RED, GL_UNSIGNED_SHORT, ComponentType::UNorm, false}, // R16U - {GL_R16_SNORM, GL_RED, GL_SHORT, ComponentType::SNorm, false}, // R16S - {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT, ComponentType::UInt, false}, // R16UI - {GL_R16I, GL_RED_INTEGER, GL_SHORT, ComponentType::SInt, false}, // R16I - {GL_RG16, GL_RG, GL_UNSIGNED_SHORT, ComponentType::UNorm, false}, // RG16 - {GL_RG16F, GL_RG, GL_HALF_FLOAT, ComponentType::Float, false}, // RG16F - {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT, ComponentType::UInt, false}, // RG16UI - {GL_RG16I, GL_RG_INTEGER, GL_SHORT, ComponentType::SInt, false}, // RG16I - {GL_RG16_SNORM, GL_RG, GL_SHORT, ComponentType::SNorm, false}, // RG16S - {GL_RGB32F, GL_RGB, GL_FLOAT, ComponentType::Float, false}, // RGB32F - {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, ComponentType::UNorm, - false}, // RGBA8_SRGB - {GL_RG8, GL_RG, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // RG8U - {GL_RG8, GL_RG, GL_BYTE, ComponentType::SNorm, false}, // RG8S - {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // RG32UI - {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // R32UI - {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X8 - {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X5 - {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X4 - {GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // BGRA8 - // Compressed sRGB formats - {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, - true}, // DXT1_SRGB - {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, - true}, // DXT23_SRGB - {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, - true}, // DXT45_SRGB - {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, - true}, // BC7U_SRGB - {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_4X4_SRGB - {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X8_SRGB - {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X5_SRGB - {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X4_SRGB - {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X5 - {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X5_SRGB - {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_10X8 - {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_10X8_SRGB - - // Depth formats - {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT, ComponentType::Float, false}, // Z32F - {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, ComponentType::UNorm, - false}, // Z16 - - // DepthStencil formats - {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, ComponentType::UNorm, - false}, // Z24S8 - {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, ComponentType::UNorm, - false}, // S8Z24 - {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, GL_FLOAT_32_UNSIGNED_INT_24_8_REV, - ComponentType::Float, false}, // Z32FS8 -}}; - -static GLenum SurfaceTargetToGL(SurfaceTarget target) { - switch (target) { - case SurfaceTarget::Texture1D: - return GL_TEXTURE_1D; - case SurfaceTarget::TextureBuffer: - return GL_TEXTURE_BUFFER; - case SurfaceTarget::Texture2D: - return GL_TEXTURE_2D; - case SurfaceTarget::Texture3D: - return GL_TEXTURE_3D; - case SurfaceTarget::Texture1DArray: - return GL_TEXTURE_1D_ARRAY; - case SurfaceTarget::Texture2DArray: - return GL_TEXTURE_2D_ARRAY; - case SurfaceTarget::TextureCubemap: - return GL_TEXTURE_CUBE_MAP; - case SurfaceTarget::TextureCubeArray: - return GL_TEXTURE_CUBE_MAP_ARRAY; - } - LOG_CRITICAL(Render_OpenGL, "Unimplemented texture target={}", static_cast(target)); - UNREACHABLE(); - return {}; -} - -static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType component_type) { - ASSERT(static_cast(pixel_format) < tex_format_tuples.size()); - auto& format = tex_format_tuples[static_cast(pixel_format)]; - ASSERT(component_type == format.component_type); - - return format; -} - -/// Returns the discrepant array target -constexpr GLenum GetArrayDiscrepantTarget(SurfaceTarget target) { - switch (target) { - case SurfaceTarget::Texture1D: - return GL_TEXTURE_1D_ARRAY; - case SurfaceTarget::Texture2D: - return GL_TEXTURE_2D_ARRAY; - case SurfaceTarget::Texture3D: - return GL_NONE; - case SurfaceTarget::Texture1DArray: - return GL_TEXTURE_1D; - case SurfaceTarget::Texture2DArray: - return GL_TEXTURE_2D; - case SurfaceTarget::TextureCubemap: - return GL_TEXTURE_CUBE_MAP_ARRAY; - case SurfaceTarget::TextureCubeArray: - return GL_TEXTURE_CUBE_MAP; - } - return GL_NONE; -} - -Common::Rectangle SurfaceParams::GetRect(u32 mip_level) const { - u32 actual_height{std::max(1U, unaligned_height >> mip_level)}; - if (IsPixelFormatASTC(pixel_format)) { - // ASTC formats must stop at the ATSC block size boundary - actual_height = Common::AlignDown(actual_height, GetASTCBlockSize(pixel_format).second); - } - return {0, actual_height, MipWidth(mip_level), 0}; -} - -void SwizzleFunc(const MortonSwizzleMode& mode, const SurfaceParams& params, - std::vector& gl_buffer, u32 mip_level) { - u32 depth = params.MipDepth(mip_level); - if (params.target == SurfaceTarget::Texture2D) { - // TODO(Blinkhawk): Eliminate this condition once all texture types are implemented. - depth = 1U; - } - if (params.is_layered) { - u64 offset = params.GetMipmapLevelOffset(mip_level); - u64 offset_gl = 0; - const u64 layer_size = params.LayerMemorySize(); - const u64 gl_size = params.LayerSizeGL(mip_level); - for (u32 i = 0; i < params.depth; i++) { - MortonSwizzle(mode, params.pixel_format, params.MipWidth(mip_level), - params.MipBlockHeight(mip_level), params.MipHeight(mip_level), - params.MipBlockDepth(mip_level), 1, params.tile_width_spacing, - gl_buffer.data() + offset_gl, params.host_ptr + offset); - offset += layer_size; - offset_gl += gl_size; - } - } else { - const u64 offset = params.GetMipmapLevelOffset(mip_level); - MortonSwizzle(mode, params.pixel_format, params.MipWidth(mip_level), - params.MipBlockHeight(mip_level), params.MipHeight(mip_level), - params.MipBlockDepth(mip_level), depth, params.tile_width_spacing, - gl_buffer.data(), params.host_ptr + offset); - } -} - -void RasterizerCacheOpenGL::FastCopySurface(const Surface& src_surface, - const Surface& dst_surface) { - const auto& src_params{src_surface->GetSurfaceParams()}; - const auto& dst_params{dst_surface->GetSurfaceParams()}; - - const u32 width{std::min(src_params.width, dst_params.width)}; - const u32 height{std::min(src_params.height, dst_params.height)}; - - glCopyImageSubData(src_surface->Texture().handle, SurfaceTargetToGL(src_params.target), 0, 0, 0, - 0, dst_surface->Texture().handle, SurfaceTargetToGL(dst_params.target), 0, 0, - 0, 0, width, height, 1); - - dst_surface->MarkAsModified(true, *this); -} - -MICROPROFILE_DEFINE(OpenGL_CopySurface, "OpenGL", "CopySurface", MP_RGB(128, 192, 64)); -void RasterizerCacheOpenGL::CopySurface(const Surface& src_surface, const Surface& dst_surface, - const GLuint copy_pbo_handle, const GLenum src_attachment, - const GLenum dst_attachment, - const std::size_t cubemap_face) { - MICROPROFILE_SCOPE(OpenGL_CopySurface); - ASSERT_MSG(dst_attachment == 0, "Unimplemented"); - - const auto& src_params{src_surface->GetSurfaceParams()}; - const auto& dst_params{dst_surface->GetSurfaceParams()}; - - const auto source_format = GetFormatTuple(src_params.pixel_format, src_params.component_type); - const auto dest_format = GetFormatTuple(dst_params.pixel_format, dst_params.component_type); - - const std::size_t buffer_size = std::max(src_params.size_in_bytes, dst_params.size_in_bytes); - - glBindBuffer(GL_PIXEL_PACK_BUFFER, copy_pbo_handle); - glBufferData(GL_PIXEL_PACK_BUFFER, buffer_size, nullptr, GL_STREAM_COPY); - if (source_format.compressed) { - glGetCompressedTextureImage(src_surface->Texture().handle, src_attachment, - static_cast(src_params.size_in_bytes), nullptr); - } else { - glGetTextureImage(src_surface->Texture().handle, src_attachment, source_format.format, - source_format.type, static_cast(src_params.size_in_bytes), - nullptr); - } - // If the new texture is bigger than the previous one, we need to fill in the rest with data - // from the CPU. - if (src_params.size_in_bytes < dst_params.size_in_bytes) { - // Upload the rest of the memory. - if (dst_params.is_tiled) { - // TODO(Subv): We might have to de-tile the subtexture and re-tile it with the rest - // of the data in this case. Games like Super Mario Odyssey seem to hit this case - // when drawing, it re-uses the memory of a previous texture as a bigger framebuffer - // but it doesn't clear it beforehand, the texture is already full of zeros. - LOG_DEBUG(HW_GPU, "Trying to upload extra texture data from the CPU during " - "reinterpretation but the texture is tiled."); - } - const std::size_t remaining_size = dst_params.size_in_bytes - src_params.size_in_bytes; - auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()}; - glBufferSubData(GL_PIXEL_PACK_BUFFER, src_params.size_in_bytes, remaining_size, - memory_manager.GetPointer(dst_params.gpu_addr + src_params.size_in_bytes)); - } - - glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); - - const GLsizei width{static_cast( - std::min(src_params.GetRect().GetWidth(), dst_params.GetRect().GetWidth()))}; - const GLsizei height{static_cast( - std::min(src_params.GetRect().GetHeight(), dst_params.GetRect().GetHeight()))}; - - glBindBuffer(GL_PIXEL_UNPACK_BUFFER, copy_pbo_handle); - if (dest_format.compressed) { - LOG_CRITICAL(HW_GPU, "Compressed copy is unimplemented!"); - UNREACHABLE(); - } else { - switch (dst_params.target) { - case SurfaceTarget::Texture1D: - glTextureSubImage1D(dst_surface->Texture().handle, 0, 0, width, dest_format.format, - dest_format.type, nullptr); - break; - case SurfaceTarget::Texture2D: - glTextureSubImage2D(dst_surface->Texture().handle, 0, 0, 0, width, height, - dest_format.format, dest_format.type, nullptr); - break; - case SurfaceTarget::Texture3D: - case SurfaceTarget::Texture2DArray: - case SurfaceTarget::TextureCubeArray: - glTextureSubImage3D(dst_surface->Texture().handle, 0, 0, 0, 0, width, height, - static_cast(dst_params.depth), dest_format.format, - dest_format.type, nullptr); - break; - case SurfaceTarget::TextureCubemap: - glTextureSubImage3D(dst_surface->Texture().handle, 0, 0, 0, - static_cast(cubemap_face), width, height, 1, - dest_format.format, dest_format.type, nullptr); - break; - default: - LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}", - static_cast(dst_params.target)); - UNREACHABLE(); - } - glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); - } - - dst_surface->MarkAsModified(true, *this); -} - -CachedSurface::CachedSurface(const SurfaceParams& params) - : RasterizerCacheObject{params.host_ptr}, params{params}, - gl_target{SurfaceTargetToGL(params.target)}, cached_size_in_bytes{params.size_in_bytes} { - - const auto optional_cpu_addr{ - Core::System::GetInstance().GPU().MemoryManager().GpuToCpuAddress(params.gpu_addr)}; - ASSERT_MSG(optional_cpu_addr, "optional_cpu_addr is invalid"); - cpu_addr = *optional_cpu_addr; - - texture.Create(gl_target); - - // TODO(Rodrigo): Using params.GetRect() returns a different size than using its Mip*(0) - // alternatives. This signals a bug on those functions. - const auto width = static_cast(params.MipWidth(0)); - const auto height = static_cast(params.MipHeight(0)); - memory_size = params.MemorySize(); - reinterpreted = false; - - const auto& format_tuple = GetFormatTuple(params.pixel_format, params.component_type); - gl_internal_format = format_tuple.internal_format; - - switch (params.target) { - case SurfaceTarget::Texture1D: - glTextureStorage1D(texture.handle, params.max_mip_level, gl_internal_format, width); - break; - case SurfaceTarget::TextureBuffer: - texture_buffer.Create(); - glNamedBufferStorage(texture_buffer.handle, - params.width * GetBytesPerPixel(params.pixel_format), nullptr, - GL_DYNAMIC_STORAGE_BIT); - glTextureBuffer(texture.handle, gl_internal_format, texture_buffer.handle); - break; - case SurfaceTarget::Texture2D: - case SurfaceTarget::TextureCubemap: - glTextureStorage2D(texture.handle, params.max_mip_level, gl_internal_format, width, height); - break; - case SurfaceTarget::Texture3D: - case SurfaceTarget::Texture2DArray: - case SurfaceTarget::TextureCubeArray: - glTextureStorage3D(texture.handle, params.max_mip_level, gl_internal_format, width, height, - params.depth); - break; - default: - LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}", - static_cast(params.target)); - UNREACHABLE(); - glTextureStorage2D(texture.handle, params.max_mip_level, gl_internal_format, width, height); - } - - if (params.target != SurfaceTarget::TextureBuffer) { - ApplyTextureDefaults(texture.handle, params.max_mip_level); - } - - OpenGL::LabelGLObject(GL_TEXTURE, texture.handle, params.gpu_addr, params.IdentityString()); -} - -MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 192, 64)); -void CachedSurface::LoadGLBuffer(RasterizerTemporaryMemory& res_cache_tmp_mem) { - MICROPROFILE_SCOPE(OpenGL_SurfaceLoad); - auto& gl_buffer = res_cache_tmp_mem.gl_buffer; - if (gl_buffer.size() < params.max_mip_level) - gl_buffer.resize(params.max_mip_level); - for (u32 i = 0; i < params.max_mip_level; i++) - gl_buffer[i].resize(params.GetMipmapSizeGL(i)); - if (params.is_tiled) { - ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture type {}", - params.block_width, static_cast(params.target)); - for (u32 i = 0; i < params.max_mip_level; i++) - SwizzleFunc(MortonSwizzleMode::MortonToLinear, params, gl_buffer[i], i); - } else { - const u32 bpp = params.GetFormatBpp() / 8; - const u32 copy_size = (params.width * bpp + GetDefaultBlockWidth(params.pixel_format) - 1) / - GetDefaultBlockWidth(params.pixel_format); - if (params.pitch == copy_size) { - std::memcpy(gl_buffer[0].data(), params.host_ptr, params.size_in_bytes_gl); - } else { - const u32 height = (params.height + GetDefaultBlockHeight(params.pixel_format) - 1) / - GetDefaultBlockHeight(params.pixel_format); - const u8* start{params.host_ptr}; - u8* write_to = gl_buffer[0].data(); - for (u32 h = height; h > 0; h--) { - std::memcpy(write_to, start, copy_size); - start += params.pitch; - write_to += copy_size; - } - } - } - for (u32 i = 0; i < params.max_mip_level; i++) { - const u32 width = params.MipWidth(i); - const u32 height = params.MipHeight(i); - const u32 depth = params.MipDepth(i); - if (VideoCore::Surface::IsPixelFormatASTC(params.pixel_format)) { - // Reserve size for RGBA8 conversion - constexpr std::size_t rgba_bpp = 4; - gl_buffer[i].resize(std::max(gl_buffer[i].size(), width * height * depth * rgba_bpp)); - } - Tegra::Texture::ConvertFromGuestToHost(gl_buffer[i].data(), params.pixel_format, width, - height, depth, true, true); - } -} - -MICROPROFILE_DEFINE(OpenGL_SurfaceFlush, "OpenGL", "Surface Flush", MP_RGB(128, 192, 64)); -void CachedSurface::FlushGLBuffer(RasterizerTemporaryMemory& res_cache_tmp_mem) { - MICROPROFILE_SCOPE(OpenGL_SurfaceFlush); - - ASSERT_MSG(!IsPixelFormatASTC(params.pixel_format), "Unimplemented"); - - auto& gl_buffer = res_cache_tmp_mem.gl_buffer; - // OpenGL temporary buffer needs to be big enough to store raw texture size - gl_buffer[0].resize(GetSizeInBytes()); - - const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type); - const u32 align = std::clamp(params.RowAlign(0), 1U, 8U); - glPixelStorei(GL_PACK_ALIGNMENT, align); - glPixelStorei(GL_PACK_ROW_LENGTH, static_cast(params.width)); - ASSERT(!tuple.compressed); - glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); - glGetTextureImage(texture.handle, 0, tuple.format, tuple.type, - static_cast(gl_buffer[0].size()), gl_buffer[0].data()); - glPixelStorei(GL_PACK_ROW_LENGTH, 0); - Tegra::Texture::ConvertFromHostToGuest(gl_buffer[0].data(), params.pixel_format, params.width, - params.height, params.depth, true, true); - if (params.is_tiled) { - ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture type {}", - params.block_width, static_cast(params.target)); - - SwizzleFunc(MortonSwizzleMode::LinearToMorton, params, gl_buffer[0], 0); - } else { - const u32 bpp = params.GetFormatBpp() / 8; - const u32 copy_size = params.width * bpp; - if (params.pitch == copy_size) { - std::memcpy(params.host_ptr, gl_buffer[0].data(), GetSizeInBytes()); - } else { - u8* start{params.host_ptr}; - const u8* read_to = gl_buffer[0].data(); - for (u32 h = params.height; h > 0; h--) { - std::memcpy(start, read_to, copy_size); - start += params.pitch; - read_to += copy_size; - } - } - } -} - -void CachedSurface::UploadGLMipmapTexture(RasterizerTemporaryMemory& res_cache_tmp_mem, u32 mip_map, - GLuint read_fb_handle, GLuint draw_fb_handle) { - const auto& rect{params.GetRect(mip_map)}; - - auto& gl_buffer = res_cache_tmp_mem.gl_buffer; - - // Load data from memory to the surface - const auto x0 = static_cast(rect.left); - const auto y0 = static_cast(rect.bottom); - auto buffer_offset = - static_cast(static_cast(y0) * params.MipWidth(mip_map) + - static_cast(x0)) * - GetBytesPerPixel(params.pixel_format); - - const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type); - - const u32 align = std::clamp(params.RowAlign(mip_map), 1U, 8U); - glPixelStorei(GL_UNPACK_ALIGNMENT, align); - glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast(params.MipWidth(mip_map))); - - const auto image_size = static_cast(params.GetMipmapSizeGL(mip_map, false)); - if (tuple.compressed) { - switch (params.target) { - case SurfaceTarget::Texture2D: - glCompressedTextureSubImage2D( - texture.handle, mip_map, 0, 0, static_cast(params.MipWidth(mip_map)), - static_cast(params.MipHeight(mip_map)), tuple.internal_format, image_size, - &gl_buffer[mip_map][buffer_offset]); - break; - case SurfaceTarget::Texture3D: - glCompressedTextureSubImage3D( - texture.handle, mip_map, 0, 0, 0, static_cast(params.MipWidth(mip_map)), - static_cast(params.MipHeight(mip_map)), - static_cast(params.MipDepth(mip_map)), tuple.internal_format, image_size, - &gl_buffer[mip_map][buffer_offset]); - break; - case SurfaceTarget::Texture2DArray: - case SurfaceTarget::TextureCubeArray: - glCompressedTextureSubImage3D( - texture.handle, mip_map, 0, 0, 0, static_cast(params.MipWidth(mip_map)), - static_cast(params.MipHeight(mip_map)), static_cast(params.depth), - tuple.internal_format, image_size, &gl_buffer[mip_map][buffer_offset]); - break; - case SurfaceTarget::TextureCubemap: { - const auto layer_size = static_cast(params.LayerSizeGL(mip_map)); - for (std::size_t face = 0; face < params.depth; ++face) { - glCompressedTextureSubImage3D( - texture.handle, mip_map, 0, 0, static_cast(face), - static_cast(params.MipWidth(mip_map)), - static_cast(params.MipHeight(mip_map)), 1, tuple.internal_format, - layer_size, &gl_buffer[mip_map][buffer_offset]); - buffer_offset += layer_size; - } - break; - } - default: - LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}", - static_cast(params.target)); - UNREACHABLE(); - glCompressedTextureSubImage2D( - texture.handle, mip_map, 0, 0, static_cast(params.MipWidth(mip_map)), - static_cast(params.MipHeight(mip_map)), tuple.internal_format, - static_cast(params.size_in_bytes_gl), &gl_buffer[mip_map][buffer_offset]); - } - } else { - switch (params.target) { - case SurfaceTarget::Texture1D: - glTextureSubImage1D(texture.handle, mip_map, x0, static_cast(rect.GetWidth()), - tuple.format, tuple.type, &gl_buffer[mip_map][buffer_offset]); - break; - case SurfaceTarget::TextureBuffer: - ASSERT(mip_map == 0); - glNamedBufferSubData(texture_buffer.handle, x0, - static_cast(rect.GetWidth()) * - GetBytesPerPixel(params.pixel_format), - &gl_buffer[mip_map][buffer_offset]); - break; - case SurfaceTarget::Texture2D: - glTextureSubImage2D(texture.handle, mip_map, x0, y0, - static_cast(rect.GetWidth()), - static_cast(rect.GetHeight()), tuple.format, tuple.type, - &gl_buffer[mip_map][buffer_offset]); - break; - case SurfaceTarget::Texture3D: - glTextureSubImage3D(texture.handle, mip_map, x0, y0, 0, - static_cast(rect.GetWidth()), - static_cast(rect.GetHeight()), params.MipDepth(mip_map), - tuple.format, tuple.type, &gl_buffer[mip_map][buffer_offset]); - break; - case SurfaceTarget::Texture2DArray: - case SurfaceTarget::TextureCubeArray: - glTextureSubImage3D(texture.handle, mip_map, x0, y0, 0, - static_cast(rect.GetWidth()), - static_cast(rect.GetHeight()), params.depth, tuple.format, - tuple.type, &gl_buffer[mip_map][buffer_offset]); - break; - case SurfaceTarget::TextureCubemap: { - for (std::size_t face = 0; face < params.depth; ++face) { - glTextureSubImage3D(texture.handle, mip_map, x0, y0, static_cast(face), - static_cast(rect.GetWidth()), - static_cast(rect.GetHeight()), 1, tuple.format, - tuple.type, &gl_buffer[mip_map][buffer_offset]); - buffer_offset += params.LayerSizeGL(mip_map); - } - break; - } - default: - LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}", - static_cast(params.target)); - UNREACHABLE(); - glTextureSubImage2D(texture.handle, mip_map, x0, y0, - static_cast(rect.GetWidth()), - static_cast(rect.GetHeight()), tuple.format, tuple.type, - &gl_buffer[mip_map][buffer_offset]); - } - } - - glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); -} - -void CachedSurface::EnsureTextureDiscrepantView() { - if (discrepant_view.handle != 0) - return; - - const GLenum target{GetArrayDiscrepantTarget(params.target)}; - ASSERT(target != GL_NONE); - - const GLuint num_layers{target == GL_TEXTURE_CUBE_MAP_ARRAY ? 6u : 1u}; - constexpr GLuint min_layer = 0; - constexpr GLuint min_level = 0; - - glGenTextures(1, &discrepant_view.handle); - glTextureView(discrepant_view.handle, target, texture.handle, gl_internal_format, min_level, - params.max_mip_level, min_layer, num_layers); - ApplyTextureDefaults(discrepant_view.handle, params.max_mip_level); - glTextureParameteriv(discrepant_view.handle, GL_TEXTURE_SWIZZLE_RGBA, - reinterpret_cast(swizzle.data())); -} - -MICROPROFILE_DEFINE(OpenGL_TextureUL, "OpenGL", "Texture Upload", MP_RGB(128, 192, 64)); -void CachedSurface::UploadGLTexture(RasterizerTemporaryMemory& res_cache_tmp_mem, - GLuint read_fb_handle, GLuint draw_fb_handle) { - MICROPROFILE_SCOPE(OpenGL_TextureUL); - - for (u32 i = 0; i < params.max_mip_level; i++) - UploadGLMipmapTexture(res_cache_tmp_mem, i, read_fb_handle, draw_fb_handle); -} - -void CachedSurface::UpdateSwizzle(Tegra::Texture::SwizzleSource swizzle_x, - Tegra::Texture::SwizzleSource swizzle_y, - Tegra::Texture::SwizzleSource swizzle_z, - Tegra::Texture::SwizzleSource swizzle_w) { - if (params.target == SurfaceTarget::TextureBuffer) { - return; - } - const GLenum new_x = MaxwellToGL::SwizzleSource(swizzle_x); - const GLenum new_y = MaxwellToGL::SwizzleSource(swizzle_y); - const GLenum new_z = MaxwellToGL::SwizzleSource(swizzle_z); - const GLenum new_w = MaxwellToGL::SwizzleSource(swizzle_w); - if (swizzle[0] == new_x && swizzle[1] == new_y && swizzle[2] == new_z && swizzle[3] == new_w) { - return; - } - swizzle = {new_x, new_y, new_z, new_w}; - const auto swizzle_data = reinterpret_cast(swizzle.data()); - glTextureParameteriv(texture.handle, GL_TEXTURE_SWIZZLE_RGBA, swizzle_data); - if (discrepant_view.handle != 0) { - glTextureParameteriv(discrepant_view.handle, GL_TEXTURE_SWIZZLE_RGBA, swizzle_data); - } -} - -RasterizerCacheOpenGL::RasterizerCacheOpenGL(RasterizerOpenGL& rasterizer) - : RasterizerCache{rasterizer} { - read_framebuffer.Create(); - draw_framebuffer.Create(); - copy_pbo.Create(); -} - -Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextureInfo& config, - const GLShader::SamplerEntry& entry) { - return GetSurface(SurfaceParams::CreateForTexture(config, entry)); -} - -Surface RasterizerCacheOpenGL::GetDepthBufferSurface(bool preserve_contents) { - auto& gpu{Core::System::GetInstance().GPU().Maxwell3D()}; - const auto& regs{gpu.regs}; - - if (!gpu.dirty_flags.zeta_buffer) { - return last_depth_buffer; - } - gpu.dirty_flags.zeta_buffer = false; - - if (!regs.zeta.Address() || !regs.zeta_enable) { - return last_depth_buffer = {}; - } - - SurfaceParams depth_params{SurfaceParams::CreateForDepthBuffer( - regs.zeta_width, regs.zeta_height, regs.zeta.Address(), regs.zeta.format, - regs.zeta.memory_layout.block_width, regs.zeta.memory_layout.block_height, - regs.zeta.memory_layout.block_depth, regs.zeta.memory_layout.type)}; - - return last_depth_buffer = GetSurface(depth_params, preserve_contents); -} - -Surface RasterizerCacheOpenGL::GetColorBufferSurface(std::size_t index, bool preserve_contents) { - auto& gpu{Core::System::GetInstance().GPU().Maxwell3D()}; - const auto& regs{gpu.regs}; - - if (!gpu.dirty_flags.color_buffer[index]) { - return current_color_buffers[index]; - } - gpu.dirty_flags.color_buffer.reset(index); - - ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); - - if (index >= regs.rt_control.count) { - return current_color_buffers[index] = {}; - } - - if (regs.rt[index].Address() == 0 || regs.rt[index].format == Tegra::RenderTargetFormat::NONE) { - return current_color_buffers[index] = {}; - } - - const SurfaceParams color_params{SurfaceParams::CreateForFramebuffer(index)}; - - return current_color_buffers[index] = GetSurface(color_params, preserve_contents); -} - -void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) { - surface->LoadGLBuffer(temporal_memory); - surface->UploadGLTexture(temporal_memory, read_framebuffer.handle, draw_framebuffer.handle); - surface->MarkAsModified(false, *this); - surface->MarkForReload(false); -} - -Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool preserve_contents) { - if (!params.IsValid()) { - return {}; - } - - // Look up surface in the cache based on address - Surface surface{TryGet(params.host_ptr)}; - if (surface) { - if (surface->GetSurfaceParams().IsCompatibleSurface(params)) { - // Use the cached surface as-is unless it's not synced with memory - if (surface->MustReload()) - LoadSurface(surface); - return surface; - } else if (preserve_contents) { - // If surface parameters changed and we care about keeping the previous data, recreate - // the surface from the old one - Surface new_surface{RecreateSurface(surface, params)}; - Unregister(surface); - Register(new_surface); - if (new_surface->IsUploaded()) { - RegisterReinterpretSurface(new_surface); - } - return new_surface; - } else { - // Delete the old surface before creating a new one to prevent collisions. - Unregister(surface); - } - } - - // No cached surface found - get a new one - surface = GetUncachedSurface(params); - Register(surface); - - // Only load surface from memory if we care about the contents - if (preserve_contents) { - LoadSurface(surface); - } - - return surface; -} - -Surface RasterizerCacheOpenGL::GetUncachedSurface(const SurfaceParams& params) { - Surface surface{TryGetReservedSurface(params)}; - if (!surface) { - // No reserved surface available, create a new one and reserve it - surface = std::make_shared(params); - ReserveSurface(surface); - } - return surface; -} - -void RasterizerCacheOpenGL::FastLayeredCopySurface(const Surface& src_surface, - const Surface& dst_surface) { - const auto& init_params{src_surface->GetSurfaceParams()}; - const auto& dst_params{dst_surface->GetSurfaceParams()}; - auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()}; - GPUVAddr address{init_params.gpu_addr}; - const std::size_t layer_size{dst_params.LayerMemorySize()}; - for (u32 layer = 0; layer < dst_params.depth; layer++) { - for (u32 mipmap = 0; mipmap < dst_params.max_mip_level; mipmap++) { - const GPUVAddr sub_address{address + dst_params.GetMipmapLevelOffset(mipmap)}; - const Surface& copy{TryGet(memory_manager.GetPointer(sub_address))}; - if (!copy) { - continue; - } - const auto& src_params{copy->GetSurfaceParams()}; - const u32 width{std::min(src_params.width, dst_params.MipWidth(mipmap))}; - const u32 height{std::min(src_params.height, dst_params.MipHeight(mipmap))}; - - glCopyImageSubData(copy->Texture().handle, SurfaceTargetToGL(src_params.target), 0, 0, - 0, 0, dst_surface->Texture().handle, - SurfaceTargetToGL(dst_params.target), mipmap, 0, 0, layer, width, - height, 1); - } - address += layer_size; - } - - dst_surface->MarkAsModified(true, *this); -} - -static bool BlitSurface(const Surface& src_surface, const Surface& dst_surface, - const Common::Rectangle& src_rect, - const Common::Rectangle& dst_rect, GLuint read_fb_handle, - GLuint draw_fb_handle, GLenum src_attachment = 0, GLenum dst_attachment = 0, - std::size_t cubemap_face = 0) { - - const auto& src_params{src_surface->GetSurfaceParams()}; - const auto& dst_params{dst_surface->GetSurfaceParams()}; - - OpenGLState prev_state{OpenGLState::GetCurState()}; - SCOPE_EXIT({ prev_state.Apply(); }); - - OpenGLState state; - state.draw.read_framebuffer = read_fb_handle; - state.draw.draw_framebuffer = draw_fb_handle; - state.Apply(); - - u32 buffers{}; - - if (src_params.type == SurfaceType::ColorTexture) { - switch (src_params.target) { - case SurfaceTarget::Texture2D: - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment, - GL_TEXTURE_2D, src_surface->Texture().handle, 0); - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, - 0, 0); - break; - case SurfaceTarget::TextureCubemap: - glFramebufferTexture2D( - GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment, - static_cast(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face), - src_surface->Texture().handle, 0); - glFramebufferTexture2D( - GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, - static_cast(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face), 0, 0); - break; - case SurfaceTarget::Texture2DArray: - glFramebufferTextureLayer(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment, - src_surface->Texture().handle, 0, 0); - glFramebufferTextureLayer(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, 0, 0, 0); - break; - case SurfaceTarget::Texture3D: - glFramebufferTexture3D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment, - SurfaceTargetToGL(src_params.target), - src_surface->Texture().handle, 0, 0); - glFramebufferTexture3D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, - SurfaceTargetToGL(src_params.target), 0, 0, 0); - break; - default: - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment, - GL_TEXTURE_2D, src_surface->Texture().handle, 0); - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, - 0, 0); - break; - } - - switch (dst_params.target) { - case SurfaceTarget::Texture2D: - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment, - GL_TEXTURE_2D, dst_surface->Texture().handle, 0); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, - 0, 0); - break; - case SurfaceTarget::TextureCubemap: - glFramebufferTexture2D( - GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment, - static_cast(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face), - dst_surface->Texture().handle, 0); - glFramebufferTexture2D( - GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, - static_cast(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face), 0, 0); - break; - case SurfaceTarget::Texture2DArray: - glFramebufferTextureLayer(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment, - dst_surface->Texture().handle, 0, 0); - glFramebufferTextureLayer(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, 0, 0, 0); - break; - - case SurfaceTarget::Texture3D: - glFramebufferTexture3D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment, - SurfaceTargetToGL(dst_params.target), - dst_surface->Texture().handle, 0, 0); - glFramebufferTexture3D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, - SurfaceTargetToGL(dst_params.target), 0, 0, 0); - break; - default: - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment, - GL_TEXTURE_2D, dst_surface->Texture().handle, 0); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, - 0, 0); - break; - } - - buffers = GL_COLOR_BUFFER_BIT; - } else if (src_params.type == SurfaceType::Depth) { - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment, - GL_TEXTURE_2D, 0, 0); - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, - src_surface->Texture().handle, 0); - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); - - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment, - GL_TEXTURE_2D, 0, 0); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, - dst_surface->Texture().handle, 0); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); - - buffers = GL_DEPTH_BUFFER_BIT; - } else if (src_params.type == SurfaceType::DepthStencil) { - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment, - GL_TEXTURE_2D, 0, 0); - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, - src_surface->Texture().handle, 0); - - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment, - GL_TEXTURE_2D, 0, 0); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, - dst_surface->Texture().handle, 0); - - buffers = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT; - } - - glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom, dst_rect.left, - dst_rect.top, dst_rect.right, dst_rect.bottom, buffers, - buffers == GL_COLOR_BUFFER_BIT ? GL_LINEAR : GL_NEAREST); - - return true; -} - -void RasterizerCacheOpenGL::FermiCopySurface( - const Tegra::Engines::Fermi2D::Regs::Surface& src_config, - const Tegra::Engines::Fermi2D::Regs::Surface& dst_config, - const Common::Rectangle& src_rect, const Common::Rectangle& dst_rect) { - - const auto& src_params = SurfaceParams::CreateForFermiCopySurface(src_config); - const auto& dst_params = SurfaceParams::CreateForFermiCopySurface(dst_config); - - ASSERT(src_params.pixel_format == dst_params.pixel_format); - ASSERT(src_params.block_height == dst_params.block_height); - ASSERT(src_params.is_tiled == dst_params.is_tiled); - ASSERT(src_params.depth == dst_params.depth); - ASSERT(src_params.target == dst_params.target); - ASSERT(src_params.rt.index == dst_params.rt.index); - - auto src_surface = GetSurface(src_params, true); - auto dst_surface = GetSurface(dst_params, true); - - BlitSurface(src_surface, dst_surface, src_rect, dst_rect, read_framebuffer.handle, - draw_framebuffer.handle); - - dst_surface->MarkAsModified(true, *this); -} - -void RasterizerCacheOpenGL::AccurateCopySurface(const Surface& src_surface, - const Surface& dst_surface) { - const auto& src_params{src_surface->GetSurfaceParams()}; - const auto& dst_params{dst_surface->GetSurfaceParams()}; - - // Flush enough memory for both the source and destination surface - FlushRegion(ToCacheAddr(src_params.host_ptr), - std::max(src_params.MemorySize(), dst_params.MemorySize())); - - LoadSurface(dst_surface); -} - -Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface, - const SurfaceParams& new_params) { - // Verify surface is compatible for blitting - auto old_params{old_surface->GetSurfaceParams()}; - - // Get a new surface with the new parameters, and blit the previous surface to it - Surface new_surface{GetUncachedSurface(new_params)}; - - // With use_accurate_gpu_emulation enabled, do an accurate surface copy - if (Settings::values.use_accurate_gpu_emulation) { - AccurateCopySurface(old_surface, new_surface); - return new_surface; - } - - const bool old_compressed = - GetFormatTuple(old_params.pixel_format, old_params.component_type).compressed; - const bool new_compressed = - GetFormatTuple(new_params.pixel_format, new_params.component_type).compressed; - const bool compatible_formats = - GetFormatBpp(old_params.pixel_format) == GetFormatBpp(new_params.pixel_format) && - !(old_compressed || new_compressed); - // For compatible surfaces, we can just do fast glCopyImageSubData based copy - if (old_params.target == new_params.target && old_params.depth == new_params.depth && - old_params.depth == 1 && compatible_formats) { - FastCopySurface(old_surface, new_surface); - return new_surface; - } - - switch (new_params.target) { - case SurfaceTarget::Texture2D: - CopySurface(old_surface, new_surface, copy_pbo.handle); - break; - case SurfaceTarget::Texture3D: - AccurateCopySurface(old_surface, new_surface); - break; - case SurfaceTarget::TextureCubemap: - case SurfaceTarget::Texture2DArray: - case SurfaceTarget::TextureCubeArray: - if (compatible_formats) - FastLayeredCopySurface(old_surface, new_surface); - else { - AccurateCopySurface(old_surface, new_surface); - } - break; - default: - LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}", - static_cast(new_params.target)); - UNREACHABLE(); - } - - return new_surface; -} - -Surface RasterizerCacheOpenGL::TryFindFramebufferSurface(const u8* host_ptr) const { - return TryGet(host_ptr); -} - -void RasterizerCacheOpenGL::ReserveSurface(const Surface& surface) { - const auto& surface_reserve_key{SurfaceReserveKey::Create(surface->GetSurfaceParams())}; - surface_reserve[surface_reserve_key] = surface; -} - -Surface RasterizerCacheOpenGL::TryGetReservedSurface(const SurfaceParams& params) { - const auto& surface_reserve_key{SurfaceReserveKey::Create(params)}; - auto search{surface_reserve.find(surface_reserve_key)}; - if (search != surface_reserve.end()) { - return search->second; - } - return {}; -} - -static std::optional TryFindBestMipMap(std::size_t memory, const SurfaceParams params, - u32 height) { - for (u32 i = 0; i < params.max_mip_level; i++) { - if (memory == params.GetMipmapSingleSize(i) && params.MipHeight(i) == height) { - return {i}; - } - } - return {}; -} - -static std::optional TryFindBestLayer(GPUVAddr addr, const SurfaceParams params, u32 mipmap) { - const std::size_t size{params.LayerMemorySize()}; - GPUVAddr start{params.gpu_addr + params.GetMipmapLevelOffset(mipmap)}; - for (u32 i = 0; i < params.depth; i++) { - if (start == addr) { - return {i}; - } - start += size; - } - return {}; -} - -static bool LayerFitReinterpretSurface(RasterizerCacheOpenGL& cache, const Surface render_surface, - const Surface blitted_surface) { - const auto& dst_params = blitted_surface->GetSurfaceParams(); - const auto& src_params = render_surface->GetSurfaceParams(); - const std::size_t src_memory_size = src_params.size_in_bytes; - const std::optional level = - TryFindBestMipMap(src_memory_size, dst_params, src_params.height); - if (level.has_value()) { - if (src_params.width == dst_params.MipWidthGobAligned(*level) && - src_params.height == dst_params.MipHeight(*level) && - src_params.block_height >= dst_params.MipBlockHeight(*level)) { - const std::optional slot = - TryFindBestLayer(render_surface->GetSurfaceParams().gpu_addr, dst_params, *level); - if (slot.has_value()) { - glCopyImageSubData(render_surface->Texture().handle, - SurfaceTargetToGL(src_params.target), 0, 0, 0, 0, - blitted_surface->Texture().handle, - SurfaceTargetToGL(dst_params.target), *level, 0, 0, *slot, - dst_params.MipWidth(*level), dst_params.MipHeight(*level), 1); - blitted_surface->MarkAsModified(true, cache); - return true; - } - } - } - return false; -} - -static bool IsReinterpretInvalid(const Surface render_surface, const Surface blitted_surface) { - const VAddr bound1 = blitted_surface->GetCpuAddr() + blitted_surface->GetMemorySize(); - const VAddr bound2 = render_surface->GetCpuAddr() + render_surface->GetMemorySize(); - if (bound2 > bound1) - return true; - const auto& dst_params = blitted_surface->GetSurfaceParams(); - const auto& src_params = render_surface->GetSurfaceParams(); - return (dst_params.component_type != src_params.component_type); -} - -static bool IsReinterpretInvalidSecond(const Surface render_surface, - const Surface blitted_surface) { - const auto& dst_params = blitted_surface->GetSurfaceParams(); - const auto& src_params = render_surface->GetSurfaceParams(); - return (dst_params.height > src_params.height && dst_params.width > src_params.width); -} - -bool RasterizerCacheOpenGL::PartialReinterpretSurface(Surface triggering_surface, - Surface intersect) { - if (IsReinterpretInvalid(triggering_surface, intersect)) { - Unregister(intersect); - return false; - } - if (!LayerFitReinterpretSurface(*this, triggering_surface, intersect)) { - if (IsReinterpretInvalidSecond(triggering_surface, intersect)) { - Unregister(intersect); - return false; - } - FlushObject(intersect); - FlushObject(triggering_surface); - intersect->MarkForReload(true); - } - return true; -} - -void RasterizerCacheOpenGL::SignalPreDrawCall() { - if (texception && GLAD_GL_ARB_texture_barrier) { - glTextureBarrier(); - } - texception = false; -} - -void RasterizerCacheOpenGL::SignalPostDrawCall() { - for (u32 i = 0; i < Maxwell::NumRenderTargets; i++) { - if (current_color_buffers[i] != nullptr) { - Surface intersect = - CollideOnReinterpretedSurface(current_color_buffers[i]->GetCacheAddr()); - if (intersect != nullptr) { - PartialReinterpretSurface(current_color_buffers[i], intersect); - texception = true; - } - } - } -} - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h deleted file mode 100644 index bbab79575..000000000 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ /dev/null @@ -1,575 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include -#include -#include - -#include "common/alignment.h" -#include "common/bit_util.h" -#include "common/common_types.h" -#include "common/hash.h" -#include "common/math_util.h" -#include "video_core/engines/fermi_2d.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/rasterizer_cache.h" -#include "video_core/renderer_opengl/gl_resource_manager.h" -#include "video_core/renderer_opengl/gl_shader_gen.h" -#include "video_core/surface.h" -#include "video_core/textures/decoders.h" -#include "video_core/textures/texture.h" - -namespace OpenGL { - -class CachedSurface; -using Surface = std::shared_ptr; -using SurfaceSurfaceRect_Tuple = std::tuple>; - -using SurfaceTarget = VideoCore::Surface::SurfaceTarget; -using SurfaceType = VideoCore::Surface::SurfaceType; -using PixelFormat = VideoCore::Surface::PixelFormat; -using ComponentType = VideoCore::Surface::ComponentType; -using Maxwell = Tegra::Engines::Maxwell3D::Regs; - -struct SurfaceParams { - enum class SurfaceClass { - Uploaded, - RenderTarget, - DepthBuffer, - Copy, - }; - - static std::string SurfaceTargetName(SurfaceTarget target) { - switch (target) { - case SurfaceTarget::Texture1D: - return "Texture1D"; - case SurfaceTarget::Texture2D: - return "Texture2D"; - case SurfaceTarget::Texture3D: - return "Texture3D"; - case SurfaceTarget::Texture1DArray: - return "Texture1DArray"; - case SurfaceTarget::Texture2DArray: - return "Texture2DArray"; - case SurfaceTarget::TextureCubemap: - return "TextureCubemap"; - case SurfaceTarget::TextureCubeArray: - return "TextureCubeArray"; - default: - LOG_CRITICAL(HW_GPU, "Unimplemented surface_target={}", static_cast(target)); - UNREACHABLE(); - return fmt::format("TextureUnknown({})", static_cast(target)); - } - } - - u32 GetFormatBpp() const { - return VideoCore::Surface::GetFormatBpp(pixel_format); - } - - /// Returns the rectangle corresponding to this surface - Common::Rectangle GetRect(u32 mip_level = 0) const; - - /// Returns the total size of this surface in bytes, adjusted for compression - std::size_t SizeInBytesRaw(bool ignore_tiled = false) const { - const u32 compression_factor{GetCompressionFactor(pixel_format)}; - const u32 bytes_per_pixel{GetBytesPerPixel(pixel_format)}; - const size_t uncompressed_size{ - Tegra::Texture::CalculateSize((ignore_tiled ? false : is_tiled), bytes_per_pixel, width, - height, depth, block_height, block_depth)}; - - // Divide by compression_factor^2, as height and width are factored by this - return uncompressed_size / (compression_factor * compression_factor); - } - - /// Returns the size of this surface as an OpenGL texture in bytes - std::size_t SizeInBytesGL() const { - return SizeInBytesRaw(true); - } - - /// Returns the size of this surface as a cube face in bytes - std::size_t SizeInBytesCubeFace() const { - return size_in_bytes / 6; - } - - /// Returns the size of this surface as an OpenGL cube face in bytes - std::size_t SizeInBytesCubeFaceGL() const { - return size_in_bytes_gl / 6; - } - - /// Returns the exact size of memory occupied by the texture in VRAM, including mipmaps. - std::size_t MemorySize() const { - std::size_t size = InnerMemorySize(false, is_layered); - if (is_layered) - return size * depth; - return size; - } - - /// Returns true if the parameters constitute a valid rasterizer surface. - bool IsValid() const { - return gpu_addr && host_ptr && height && width; - } - - /// Returns the exact size of the memory occupied by a layer in a texture in VRAM, including - /// mipmaps. - std::size_t LayerMemorySize() const { - return InnerMemorySize(false, true); - } - - /// Returns the size of a layer of this surface in OpenGL. - std::size_t LayerSizeGL(u32 mip_level) const { - return InnerMipmapMemorySize(mip_level, true, is_layered, false); - } - - std::size_t GetMipmapSizeGL(u32 mip_level, bool ignore_compressed = true) const { - std::size_t size = InnerMipmapMemorySize(mip_level, true, is_layered, ignore_compressed); - if (is_layered) - return size * depth; - return size; - } - - std::size_t GetMipmapLevelOffset(u32 mip_level) const { - std::size_t offset = 0; - for (u32 i = 0; i < mip_level; i++) - offset += InnerMipmapMemorySize(i, false, is_layered); - return offset; - } - - std::size_t GetMipmapLevelOffsetGL(u32 mip_level) const { - std::size_t offset = 0; - for (u32 i = 0; i < mip_level; i++) - offset += InnerMipmapMemorySize(i, true, is_layered); - return offset; - } - - std::size_t GetMipmapSingleSize(u32 mip_level) const { - return InnerMipmapMemorySize(mip_level, false, is_layered); - } - - u32 MipWidth(u32 mip_level) const { - return std::max(1U, width >> mip_level); - } - - u32 MipWidthGobAligned(u32 mip_level) const { - return Common::AlignUp(std::max(1U, width >> mip_level), 64U * 8U / GetFormatBpp()); - } - - u32 MipHeight(u32 mip_level) const { - return std::max(1U, height >> mip_level); - } - - u32 MipDepth(u32 mip_level) const { - return is_layered ? depth : std::max(1U, depth >> mip_level); - } - - // Auto block resizing algorithm from: - // https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nv50/nv50_miptree.c - u32 MipBlockHeight(u32 mip_level) const { - if (mip_level == 0) - return block_height; - u32 alt_height = MipHeight(mip_level); - u32 h = GetDefaultBlockHeight(pixel_format); - u32 blocks_in_y = (alt_height + h - 1) / h; - u32 bh = 16; - while (bh > 1 && blocks_in_y <= bh * 4) { - bh >>= 1; - } - return bh; - } - - u32 MipBlockDepth(u32 mip_level) const { - if (mip_level == 0) { - return block_depth; - } - - if (is_layered) { - return 1; - } - - const u32 mip_depth = MipDepth(mip_level); - u32 bd = 32; - while (bd > 1 && mip_depth * 2 <= bd) { - bd >>= 1; - } - - if (bd == 32) { - const u32 bh = MipBlockHeight(mip_level); - if (bh >= 4) { - return 16; - } - } - - return bd; - } - - u32 RowAlign(u32 mip_level) const { - const u32 m_width = MipWidth(mip_level); - const u32 bytes_per_pixel = GetBytesPerPixel(pixel_format); - const u32 l2 = Common::CountTrailingZeroes32(m_width * bytes_per_pixel); - return (1U << l2); - } - - /// Creates SurfaceParams from a texture configuration - static SurfaceParams CreateForTexture(const Tegra::Texture::FullTextureInfo& config, - const GLShader::SamplerEntry& entry); - - /// Creates SurfaceParams from a framebuffer configuration - static SurfaceParams CreateForFramebuffer(std::size_t index); - - /// Creates SurfaceParams for a depth buffer configuration - static SurfaceParams CreateForDepthBuffer( - u32 zeta_width, u32 zeta_height, GPUVAddr zeta_address, Tegra::DepthFormat format, - u32 block_width, u32 block_height, u32 block_depth, - Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type); - - /// Creates SurfaceParams for a Fermi2D surface copy - static SurfaceParams CreateForFermiCopySurface( - const Tegra::Engines::Fermi2D::Regs::Surface& config); - - /// Checks if surfaces are compatible for caching - bool IsCompatibleSurface(const SurfaceParams& other) const { - if (std::tie(pixel_format, type, width, height, target, depth, is_tiled) == - std::tie(other.pixel_format, other.type, other.width, other.height, other.target, - other.depth, other.is_tiled)) { - if (!is_tiled) - return true; - return std::tie(block_height, block_depth, tile_width_spacing) == - std::tie(other.block_height, other.block_depth, other.tile_width_spacing); - } - return false; - } - - /// Initializes parameters for caching, should be called after everything has been initialized - void InitCacheParameters(GPUVAddr gpu_addr); - - std::string TargetName() const { - switch (target) { - case SurfaceTarget::Texture1D: - return "1D"; - case SurfaceTarget::TextureBuffer: - return "Buffer"; - case SurfaceTarget::Texture2D: - return "2D"; - case SurfaceTarget::Texture3D: - return "3D"; - case SurfaceTarget::Texture1DArray: - return "1DArray"; - case SurfaceTarget::Texture2DArray: - return "2DArray"; - case SurfaceTarget::TextureCubemap: - return "Cube"; - default: - LOG_CRITICAL(HW_GPU, "Unimplemented surface_target={}", static_cast(target)); - UNREACHABLE(); - return fmt::format("TUK({})", static_cast(target)); - } - } - - std::string ClassName() const { - switch (identity) { - case SurfaceClass::Uploaded: - return "UP"; - case SurfaceClass::RenderTarget: - return "RT"; - case SurfaceClass::DepthBuffer: - return "DB"; - case SurfaceClass::Copy: - return "CP"; - default: - LOG_CRITICAL(HW_GPU, "Unimplemented surface_class={}", static_cast(identity)); - UNREACHABLE(); - return fmt::format("CUK({})", static_cast(identity)); - } - } - - std::string IdentityString() const { - return ClassName() + '_' + TargetName() + '_' + (is_tiled ? 'T' : 'L'); - } - - bool is_tiled; - u32 block_width; - u32 block_height; - u32 block_depth; - u32 tile_width_spacing; - PixelFormat pixel_format; - ComponentType component_type; - SurfaceType type; - u32 width; - u32 height; - u32 depth; - u32 unaligned_height; - u32 pitch; - SurfaceTarget target; - SurfaceClass identity; - u32 max_mip_level; - bool is_layered; - bool is_array; - bool srgb_conversion; - // Parameters used for caching - u8* host_ptr; - GPUVAddr gpu_addr; - std::size_t size_in_bytes; - std::size_t size_in_bytes_gl; - - // Render target specific parameters, not used in caching - struct { - u32 index; - u32 array_mode; - u32 volume; - u32 layer_stride; - u32 base_layer; - } rt; - -private: - std::size_t InnerMipmapMemorySize(u32 mip_level, bool force_gl = false, bool layer_only = false, - bool uncompressed = false) const; - std::size_t InnerMemorySize(bool force_gl = false, bool layer_only = false, - bool uncompressed = false) const; -}; - -}; // namespace OpenGL - -/// Hashable variation of SurfaceParams, used for a key in the surface cache -struct SurfaceReserveKey : Common::HashableStruct { - static SurfaceReserveKey Create(const OpenGL::SurfaceParams& params) { - SurfaceReserveKey res; - res.state = params; - res.state.identity = {}; // Ignore the origin of the texture - res.state.gpu_addr = {}; // Ignore GPU vaddr in caching - res.state.rt = {}; // Ignore rt config in caching - return res; - } -}; -namespace std { -template <> -struct hash { - std::size_t operator()(const SurfaceReserveKey& k) const { - return k.Hash(); - } -}; -} // namespace std - -namespace OpenGL { - -class RasterizerOpenGL; - -// This is used to store temporary big buffers, -// instead of creating/destroying all the time -struct RasterizerTemporaryMemory { - std::vector> gl_buffer; -}; - -class CachedSurface final : public RasterizerCacheObject { -public: - explicit CachedSurface(const SurfaceParams& params); - - VAddr GetCpuAddr() const override { - return cpu_addr; - } - - std::size_t GetSizeInBytes() const override { - return cached_size_in_bytes; - } - - std::size_t GetMemorySize() const { - return memory_size; - } - - const OGLTexture& Texture() const { - return texture; - } - - const OGLTexture& Texture(bool as_array) { - if (params.is_array == as_array) { - return texture; - } else { - EnsureTextureDiscrepantView(); - return discrepant_view; - } - } - - GLenum Target() const { - return gl_target; - } - - const SurfaceParams& GetSurfaceParams() const { - return params; - } - - // Read/Write data in Switch memory to/from gl_buffer - void LoadGLBuffer(RasterizerTemporaryMemory& res_cache_tmp_mem); - void FlushGLBuffer(RasterizerTemporaryMemory& res_cache_tmp_mem); - - // Upload data in gl_buffer to this surface's texture - void UploadGLTexture(RasterizerTemporaryMemory& res_cache_tmp_mem, GLuint read_fb_handle, - GLuint draw_fb_handle); - - void UpdateSwizzle(Tegra::Texture::SwizzleSource swizzle_x, - Tegra::Texture::SwizzleSource swizzle_y, - Tegra::Texture::SwizzleSource swizzle_z, - Tegra::Texture::SwizzleSource swizzle_w); - - void MarkReinterpreted() { - reinterpreted = true; - } - - bool IsReinterpreted() const { - return reinterpreted; - } - - void MarkForReload(bool reload) { - must_reload = reload; - } - - bool MustReload() const { - return must_reload; - } - - bool IsUploaded() const { - return params.identity == SurfaceParams::SurfaceClass::Uploaded; - } - -private: - void UploadGLMipmapTexture(RasterizerTemporaryMemory& res_cache_tmp_mem, u32 mip_map, - GLuint read_fb_handle, GLuint draw_fb_handle); - - void EnsureTextureDiscrepantView(); - - OGLTexture texture; - OGLTexture discrepant_view; - OGLBuffer texture_buffer; - SurfaceParams params{}; - GLenum gl_target{}; - GLenum gl_internal_format{}; - std::size_t cached_size_in_bytes{}; - std::array swizzle{GL_RED, GL_GREEN, GL_BLUE, GL_ALPHA}; - std::size_t memory_size; - bool reinterpreted = false; - bool must_reload = false; - VAddr cpu_addr{}; -}; - -class RasterizerCacheOpenGL final : public RasterizerCache { -public: - explicit RasterizerCacheOpenGL(RasterizerOpenGL& rasterizer); - - /// Get a surface based on the texture configuration - Surface GetTextureSurface(const Tegra::Texture::FullTextureInfo& config, - const GLShader::SamplerEntry& entry); - - /// Get the depth surface based on the framebuffer configuration - Surface GetDepthBufferSurface(bool preserve_contents); - - /// Get the color surface based on the framebuffer configuration and the specified render target - Surface GetColorBufferSurface(std::size_t index, bool preserve_contents); - - /// Tries to find a framebuffer using on the provided CPU address - Surface TryFindFramebufferSurface(const u8* host_ptr) const; - - /// Copies the contents of one surface to another - void FermiCopySurface(const Tegra::Engines::Fermi2D::Regs::Surface& src_config, - const Tegra::Engines::Fermi2D::Regs::Surface& dst_config, - const Common::Rectangle& src_rect, - const Common::Rectangle& dst_rect); - - void SignalPreDrawCall(); - void SignalPostDrawCall(); - -protected: - void FlushObjectInner(const Surface& object) override { - object->FlushGLBuffer(temporal_memory); - } - -private: - void LoadSurface(const Surface& surface); - Surface GetSurface(const SurfaceParams& params, bool preserve_contents = true); - - /// Gets an uncached surface, creating it if need be - Surface GetUncachedSurface(const SurfaceParams& params); - - /// Recreates a surface with new parameters - Surface RecreateSurface(const Surface& old_surface, const SurfaceParams& new_params); - - /// Reserves a unique surface that can be reused later - void ReserveSurface(const Surface& surface); - - /// Tries to get a reserved surface for the specified parameters - Surface TryGetReservedSurface(const SurfaceParams& params); - - // Partialy reinterpret a surface based on a triggering_surface that collides with it. - // returns true if the reinterpret was successful, false in case it was not. - bool PartialReinterpretSurface(Surface triggering_surface, Surface intersect); - - /// Performs a slow but accurate surface copy, flushing to RAM and reinterpreting the data - void AccurateCopySurface(const Surface& src_surface, const Surface& dst_surface); - void FastLayeredCopySurface(const Surface& src_surface, const Surface& dst_surface); - void FastCopySurface(const Surface& src_surface, const Surface& dst_surface); - void CopySurface(const Surface& src_surface, const Surface& dst_surface, - const GLuint copy_pbo_handle, const GLenum src_attachment = 0, - const GLenum dst_attachment = 0, const std::size_t cubemap_face = 0); - - /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have - /// previously been used. This is to prevent surfaces from being constantly created and - /// destroyed when used with different surface parameters. - std::unordered_map surface_reserve; - - OGLFramebuffer read_framebuffer; - OGLFramebuffer draw_framebuffer; - - bool texception = false; - - /// Use a Pixel Buffer Object to download the previous texture and then upload it to the new one - /// using the new format. - OGLBuffer copy_pbo; - - std::array last_color_buffers; - std::array current_color_buffers; - Surface last_depth_buffer; - - RasterizerTemporaryMemory temporal_memory; - - using SurfaceIntervalCache = boost::icl::interval_map; - using SurfaceInterval = typename SurfaceIntervalCache::interval_type; - - static auto GetReinterpretInterval(const Surface& object) { - return SurfaceInterval::right_open(object->GetCacheAddr() + 1, - object->GetCacheAddr() + object->GetMemorySize() - 1); - } - - // Reinterpreted surfaces are very fragil as the game may keep rendering into them. - SurfaceIntervalCache reinterpreted_surfaces; - - void RegisterReinterpretSurface(Surface reinterpret_surface) { - auto interval = GetReinterpretInterval(reinterpret_surface); - reinterpreted_surfaces.insert({interval, reinterpret_surface}); - reinterpret_surface->MarkReinterpreted(); - } - - Surface CollideOnReinterpretedSurface(CacheAddr addr) const { - const SurfaceInterval interval{addr}; - for (auto& pair : - boost::make_iterator_range(reinterpreted_surfaces.equal_range(interval))) { - return pair.second; - } - return nullptr; - } - - void Register(const Surface& object) override { - RasterizerCache::Register(object); - } - - /// Unregisters an object from the cache - void Unregister(const Surface& object) override { - if (object->IsReinterpreted()) { - auto interval = GetReinterpretInterval(object); - reinterpreted_surfaces.erase(interval); - } - RasterizerCache::Unregister(object); - } -}; - -} // namespace OpenGL From 3dd76432141a5cbc97bed15788984b37e44aa4a5 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Thu, 13 Jun 2019 10:39:45 -0400 Subject: [PATCH 074/113] texture_cache: Use siblings textures on Rebuild and fix possible error on blitting --- .../renderer_opengl/gl_texture_cache.cpp | 2 +- src/video_core/texture_cache/texture_cache.h | 33 +++++++++++++------ 2 files changed, 24 insertions(+), 11 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 7c1d14138..d30d04cd5 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -522,7 +522,7 @@ void TextureCacheOpenGL::ImageBlit(View& src_view, View& dst_view, glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom, dst_rect.left, dst_rect.top, dst_rect.right, dst_rect.bottom, buffers, - is_linear ? GL_LINEAR : GL_NEAREST); + is_linear && (buffers == GL_COLOR_BUFFER_BIT) ? GL_LINEAR : GL_NEAREST); } void TextureCacheOpenGL::BufferCopy(Surface& src_surface, Surface& dst_surface) { diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 022416706..201c4d42e 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -220,7 +220,6 @@ public: } protected: - TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer) : system{system}, rasterizer{rasterizer} { for (std::size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) { @@ -233,6 +232,7 @@ protected: siblings_table[PixelFormat::Z32FS8] = PixelFormat::RG32F; siblings_table[PixelFormat::R16F] = PixelFormat::Z16; siblings_table[PixelFormat::R32F] = PixelFormat::Z32F; + siblings_table[PixelFormat::RG32F] = PixelFormat::Z32FS8; } ~TextureCache() = default; @@ -385,15 +385,27 @@ private: * @param current_surface, the registered surface in the cache which we want to convert. * @param params, the new surface params which we'll use to recreate the surface. **/ - std::pair RebuildSurface(TSurface current_surface, - const SurfaceParams& params) { + std::pair RebuildSurface(TSurface current_surface, const SurfaceParams& params, + bool is_render) { const auto gpu_addr = current_surface->GetGpuAddr(); - TSurface new_surface = GetUncachedSurface(gpu_addr, params); const auto& cr_params = current_surface->GetSurfaceParams(); - if (cr_params.type != params.type || (cr_params.component_type != params.component_type)) { + TSurface new_surface; + if (cr_params.pixel_format != params.pixel_format && !is_render && + siblings_table[cr_params.pixel_format] == params.pixel_format) { + SurfaceParams new_params = params; + new_params.pixel_format = cr_params.pixel_format; + new_params.component_type = cr_params.component_type; + new_params.type = cr_params.type; + new_surface = GetUncachedSurface(gpu_addr, new_params); + } else { + new_surface = GetUncachedSurface(gpu_addr, params); + } + const auto& final_params = new_surface->GetSurfaceParams(); + if (cr_params.type != final_params.type || + (cr_params.component_type != final_params.component_type)) { BufferCopy(current_surface, new_surface); } else { - std::vector bricks = current_surface->BreakDown(params); + std::vector bricks = current_surface->BreakDown(final_params); for (auto& brick : bricks) { ImageCopy(current_surface, new_surface, brick); } @@ -426,7 +438,7 @@ private: if (!is_render && siblings_table[current_surface->GetFormat()] == params.pixel_format) { return match_check(); } - return RebuildSurface(current_surface, params); + return RebuildSurface(current_surface, params, is_render); } return match_check(); } @@ -539,7 +551,7 @@ private: if (s_result == MatchStructureResult::FullMatch) { return ManageStructuralMatch(current_surface, params, is_render); } else { - return RebuildSurface(current_surface, params); + return RebuildSurface(current_surface, params, is_render); } } } @@ -599,7 +611,8 @@ private: new_params.width = wh; new_params.height = hh; new_params.pixel_format = params.pixel_format; - std::pair pair = RebuildSurface(current_surface, new_params); + std::pair pair = + RebuildSurface(current_surface, new_params, is_render); std::optional mirage_view = pair.first->EmplaceView(params, gpu_addr, candidate_size); if (mirage_view) @@ -616,7 +629,7 @@ private: } // This is the case the texture is a part of the parent. if (current_surface->MatchesSubTexture(params, gpu_addr)) { - return RebuildSurface(current_surface, params); + return RebuildSurface(current_surface, params, is_render); } } else { // If there are many overlaps, odds are they are subtextures of the candidate From 7232a1ed16e46715c29d781fb143bdf799090bec Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Thu, 13 Jun 2019 16:41:16 -0400 Subject: [PATCH 075/113] decoders: correct block calculation --- src/video_core/engines/engine_upload.cpp | 2 +- src/video_core/engines/engine_upload.h | 6 ++-- src/video_core/engines/fermi_2d.h | 6 ++-- src/video_core/engines/maxwell_dma.cpp | 2 +- src/video_core/engines/maxwell_dma.h | 4 +-- src/video_core/texture_cache/texture_cache.h | 16 +++++++++ src/video_core/textures/decoders.cpp | 34 +++++++++----------- 7 files changed, 41 insertions(+), 29 deletions(-) diff --git a/src/video_core/engines/engine_upload.cpp b/src/video_core/engines/engine_upload.cpp index c776b9a56..d44ad0cd8 100644 --- a/src/video_core/engines/engine_upload.cpp +++ b/src/video_core/engines/engine_upload.cpp @@ -39,7 +39,7 @@ void State::ProcessData(const u32 data, const bool is_last_call) { UNIMPLEMENTED_IF(regs.dest.BlockWidth() != 0); UNIMPLEMENTED_IF(regs.dest.BlockDepth() != 0); const std::size_t dst_size = Tegra::Texture::CalculateSize( - true, 1, regs.dest.width, regs.dest.height, 1, regs.dest.BlockHeight(), 1); + true, 1, regs.dest.width, regs.dest.height, 1, regs.dest.BlockHeight(), 0); tmp_buffer.resize(dst_size); memory_manager.ReadBlock(address, tmp_buffer.data(), dst_size); Tegra::Texture::SwizzleKepler(regs.dest.width, regs.dest.height, regs.dest.x, regs.dest.y, diff --git a/src/video_core/engines/engine_upload.h b/src/video_core/engines/engine_upload.h index cb294aec3..462da419e 100644 --- a/src/video_core/engines/engine_upload.h +++ b/src/video_core/engines/engine_upload.h @@ -39,15 +39,15 @@ struct Registers { } u32 BlockWidth() const { - return block_width; + return block_width.Value(); } u32 BlockHeight() const { - return block_height; + return block_height.Value(); } u32 BlockDepth() const { - return block_depth; + return block_depth.Value(); } } dest; }; diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h index 0a4c7c5ad..05421d185 100644 --- a/src/video_core/engines/fermi_2d.h +++ b/src/video_core/engines/fermi_2d.h @@ -84,15 +84,15 @@ public: } u32 BlockWidth() const { - return block_width; + return block_width.Value(); } u32 BlockHeight() const { - return block_height; + return block_height.Value(); } u32 BlockDepth() const { - return block_depth; + return block_depth.Value(); } }; static_assert(sizeof(Surface) == 0x28, "Surface has incorrect size"); diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index afb9578d0..3a5dfef0c 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp @@ -111,7 +111,7 @@ void MaxwellDMA::HandleCopy() { memory_manager.WriteBlock(dest, write_buffer.data(), dst_size); } else { - ASSERT(regs.dst_params.BlockDepth() == 0); + ASSERT(regs.dst_params.BlockDepth() == 1); const u32 src_bytes_per_pixel = regs.src_pitch / regs.x_count; diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h index 522fa97dc..17b015ca7 100644 --- a/src/video_core/engines/maxwell_dma.h +++ b/src/video_core/engines/maxwell_dma.h @@ -59,11 +59,11 @@ public: }; u32 BlockHeight() const { - return block_height; + return block_height.Value(); } u32 BlockDepth() const { - return block_depth; + return block_depth.Value(); } }; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 201c4d42e..7a9b4c27d 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -335,6 +335,9 @@ private: if (untopological == MatchTopologyResult::CompressUnmatch) { return RecycleStrategy::Flush; } + if (untopological == MatchTopologyResult::FullMatch && !params.is_tiled) { + return RecycleStrategy::Flush; + } return RecycleStrategy::Ignore; } @@ -372,6 +375,11 @@ private: } return InitializeSurface(gpu_addr, params, preserve_contents); } + case RecycleStrategy::BufferCopy: { + auto new_surface = GetUncachedSurface(gpu_addr, params); + BufferCopy(overlaps[0], new_surface); + return {new_surface, new_surface->GetMainView()}; + } default: { UNIMPLEMENTED_MSG("Unimplemented Texture Cache Recycling Strategy!"); return InitializeSurface(gpu_addr, params, do_load); @@ -520,6 +528,10 @@ private: const auto host_ptr{memory_manager->GetPointer(gpu_addr)}; const auto cache_addr{ToCacheAddr(host_ptr)}; + if (gpu_addr == 0x00000001682F0000ULL) { + LOG_CRITICAL(HW_GPU, "Here's the texture!"); + } + // Step 0: guarantee a valid surface if (!cache_addr) { // Return a null surface if it's invalid @@ -566,6 +578,10 @@ private: return InitializeSurface(gpu_addr, params, preserve_contents); } + if (!params.is_tiled) { + return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, + MatchTopologyResult::FullMatch); + } // Step 3 // Now we need to figure the relationship between the texture and its overlaps // we do a topological test to ensure we can find some relationship. If it fails diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp index f45fd175a..9a2f4198a 100644 --- a/src/video_core/textures/decoders.cpp +++ b/src/video_core/textures/decoders.cpp @@ -256,19 +256,18 @@ std::vector UnswizzleTexture(u8* address, u32 tile_size_x, u32 tile_size_y, } void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, - u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height) { - const u32 block_height_size{1U << block_height}; + u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height_bit) { + const u32 block_height = 1U << block_height_bit; const u32 image_width_in_gobs{(swizzled_width * bytes_per_pixel + (gob_size_x - 1)) / gob_size_x}; for (u32 line = 0; line < subrect_height; ++line) { const u32 gob_address_y = - (line / (gob_size_y * block_height_size)) * gob_size * block_height_size * - image_width_in_gobs + - ((line % (gob_size_y * block_height_size)) / gob_size_y) * gob_size; + (line / (gob_size_y * block_height)) * gob_size * block_height * image_width_in_gobs + + ((line % (gob_size_y * block_height)) / gob_size_y) * gob_size; const auto& table = legacy_swizzle_table[line % gob_size_y]; for (u32 x = 0; x < subrect_width; ++x) { const u32 gob_address = - gob_address_y + (x * bytes_per_pixel / gob_size_x) * gob_size * block_height_size; + gob_address_y + (x * bytes_per_pixel / gob_size_x) * gob_size * block_height; const u32 swizzled_offset = gob_address + table[(x * bytes_per_pixel) % gob_size_x]; u8* source_line = unswizzled_data + line * source_pitch + x * bytes_per_pixel; u8* dest_addr = swizzled_data + swizzled_offset; @@ -279,19 +278,17 @@ void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 } void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width, - u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height, + u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height_bit, u32 offset_x, u32 offset_y) { - const u32 block_height_size{1U << block_height}; + const u32 block_height = 1U << block_height_bit; for (u32 line = 0; line < subrect_height; ++line) { const u32 y2 = line + offset_y; - const u32 gob_address_y = - (y2 / (gob_size_y * block_height_size)) * gob_size * block_height_size + - ((y2 % (gob_size_y * block_height_size)) / gob_size_y) * gob_size; + const u32 gob_address_y = (y2 / (gob_size_y * block_height)) * gob_size * block_height + + ((y2 % (gob_size_y * block_height)) / gob_size_y) * gob_size; const auto& table = legacy_swizzle_table[y2 % gob_size_y]; for (u32 x = 0; x < subrect_width; ++x) { const u32 x2 = (x + offset_x) * bytes_per_pixel; - const u32 gob_address = - gob_address_y + (x2 / gob_size_x) * gob_size * block_height_size; + const u32 gob_address = gob_address_y + (x2 / gob_size_x) * gob_size * block_height; const u32 swizzled_offset = gob_address + table[x2 % gob_size_x]; u8* dest_line = unswizzled_data + line * dest_pitch + x * bytes_per_pixel; u8* source_addr = swizzled_data + swizzled_offset; @@ -302,20 +299,19 @@ void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 } void SwizzleKepler(const u32 width, const u32 height, const u32 dst_x, const u32 dst_y, - const u32 block_height, const std::size_t copy_size, const u8* source_data, + const u32 block_height_bit, const std::size_t copy_size, const u8* source_data, u8* swizzle_data) { - const u32 block_height_size{1U << block_height}; + const u32 block_height = 1U << block_height_bit; const u32 image_width_in_gobs{(width + gob_size_x - 1) / gob_size_x}; std::size_t count = 0; for (std::size_t y = dst_y; y < height && count < copy_size; ++y) { const std::size_t gob_address_y = - (y / (gob_size_y * block_height_size)) * gob_size * block_height_size * - image_width_in_gobs + - ((y % (gob_size_y * block_height_size)) / gob_size_y) * gob_size; + (y / (gob_size_y * block_height)) * gob_size * block_height * image_width_in_gobs + + ((y % (gob_size_y * block_height)) / gob_size_y) * gob_size; const auto& table = legacy_swizzle_table[y % gob_size_y]; for (std::size_t x = dst_x; x < width && count < copy_size; ++x) { const std::size_t gob_address = - gob_address_y + (x / gob_size_x) * gob_size * block_height_size; + gob_address_y + (x / gob_size_x) * gob_size * block_height; const std::size_t swizzled_offset = gob_address + table[x % gob_size_x]; const u8* source_line = source_data + count; u8* dest_addr = swizzle_data + swizzled_offset; From fac370625384373b2e5006ffbbae60d2de690f7f Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Thu, 13 Jun 2019 23:03:20 -0400 Subject: [PATCH 076/113] gl_texture_cache: Correct Image Blit --- src/video_core/renderer_opengl/gl_texture_cache.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index d30d04cd5..edb4e3177 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -479,7 +479,7 @@ void TextureCacheOpenGL::ImageBlit(View& src_view, View& dst_view, OpenGLState state; state.draw.read_framebuffer = src_framebuffer.handle; state.draw.draw_framebuffer = dst_framebuffer.handle; - state.ApplyFramebufferState(); + state.Apply(); u32 buffers{}; From 9422cf7c105106d794d4d2fb32822ab287e79422 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 14 Jun 2019 12:51:13 -0400 Subject: [PATCH 077/113] gl_texture_cache: Use Stream Buffers instead of Persistant for Buffer Copies. --- src/video_core/renderer_opengl/gl_resource_manager.cpp | 5 ++--- src/video_core/renderer_opengl/gl_resource_manager.h | 2 +- src/video_core/renderer_opengl/gl_texture_cache.cpp | 2 +- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp index a1f91d677..9f840a42e 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.cpp +++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp @@ -148,12 +148,11 @@ void OGLBuffer::Release() { handle = 0; } -void OGLBuffer::MakePersistant(std::size_t buffer_size) { +void OGLBuffer::MakeStreamCopy(std::size_t buffer_size) { if (handle == 0 || buffer_size == 0) return; - const GLbitfield flags = GL_MAP_PERSISTENT_BIT | GL_MAP_WRITE_BIT | GL_MAP_READ_BIT; - glNamedBufferStorage(handle, static_cast(buffer_size), nullptr, flags); + glNamedBufferData(handle, buffer_size, nullptr, GL_STREAM_COPY); } void OGLSync::Create() { diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h index f2873ef96..b2aa558a1 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.h +++ b/src/video_core/renderer_opengl/gl_resource_manager.h @@ -187,7 +187,7 @@ public: void Release(); // Converts the buffer into a persistant storage buffer - void MakePersistant(std::size_t buffer_size); + void MakeStreamCopy(std::size_t buffer_size); GLuint handle = 0; }; diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index edb4e3177..892f286b7 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -599,7 +599,7 @@ GLuint TextureCacheOpenGL::FetchPBO(std::size_t buffer_size) { if (cp.handle == 0) { const std::size_t ceil_size = 1ULL << l2; cp.Create(); - cp.MakePersistant(ceil_size); + cp.MakeStreamCopy(ceil_size); } return cp.handle; } From 03d489dcf5dbe13dff1ff788c609f964dd24019c Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 14 Jun 2019 15:41:28 -0400 Subject: [PATCH 078/113] texture_cache: Initialize all siblings to invalid pixel format. --- src/video_core/texture_cache/texture_cache.h | 21 ++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 7a9b4c27d..8213f434d 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -227,12 +227,18 @@ protected: } SetEmptyDepthBuffer(); staging_cache.SetSize(2); - siblings_table[PixelFormat::Z16] = PixelFormat::R16F; - siblings_table[PixelFormat::Z32F] = PixelFormat::R32F; - siblings_table[PixelFormat::Z32FS8] = PixelFormat::RG32F; - siblings_table[PixelFormat::R16F] = PixelFormat::Z16; - siblings_table[PixelFormat::R32F] = PixelFormat::Z32F; - siblings_table[PixelFormat::RG32F] = PixelFormat::Z32FS8; + auto make_siblings = ([this](PixelFormat a, PixelFormat b) { + siblings_table[a] = b; + siblings_table[b] = a; + }); + const u32 max_formats = static_cast(PixelFormat::Max); + siblings_table.reserve(max_formats); + for (u32 i = 0; i < max_formats; i++) { + siblings_table[static_cast(i)] = PixelFormat::Invalid; + } + make_siblings(PixelFormat::Z16, PixelFormat::R16F); + make_siblings(PixelFormat::Z32F, PixelFormat::R32F); + make_siblings(PixelFormat::Z32FS8, PixelFormat::RG32F); } ~TextureCache() = default; @@ -766,6 +772,9 @@ private: // Guards the cache for protection conflicts. bool guard_cache{}; + // The siblings table is for formats that can inter exchange with one another + // without causing issues. This is only valid when a conflict occurs on a non + // rendering use. std::unordered_map siblings_table; // The internal Cache is different for the Texture Cache. It's based on buckets From 082740d34db0996a0af73d7680c57e1abb31c712 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 14 Jun 2019 16:40:04 -0400 Subject: [PATCH 079/113] surface: Correct format S8Z24 --- src/video_core/engines/maxwell_dma.cpp | 2 +- src/video_core/surface.cpp | 4 ++-- src/video_core/texture_cache/texture_cache.h | 4 ---- src/video_core/textures/texture.h | 4 ++-- 4 files changed, 5 insertions(+), 9 deletions(-) diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index 3a5dfef0c..afb9578d0 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp @@ -111,7 +111,7 @@ void MaxwellDMA::HandleCopy() { memory_manager.WriteBlock(dest, write_buffer.data(), dst_size); } else { - ASSERT(regs.dst_params.BlockDepth() == 1); + ASSERT(regs.dst_params.BlockDepth() == 0); const u32 src_bytes_per_pixel = regs.src_pitch / regs.x_count; diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp index 52a79e4a7..c50f6354d 100644 --- a/src/video_core/surface.cpp +++ b/src/video_core/surface.cpp @@ -308,8 +308,8 @@ PixelFormat PixelFormatFromTextureFormat(Tegra::Texture::TextureFormat format, return PixelFormat::Z32F; case Tegra::Texture::TextureFormat::Z16: return PixelFormat::Z16; - case Tegra::Texture::TextureFormat::Z24S8: - return PixelFormat::Z24S8; + case Tegra::Texture::TextureFormat::S8Z24: + return PixelFormat::S8Z24; case Tegra::Texture::TextureFormat::ZF32_X24S8: return PixelFormat::Z32FS8; case Tegra::Texture::TextureFormat::DXT1: diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 8213f434d..a9e61cba1 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -534,10 +534,6 @@ private: const auto host_ptr{memory_manager->GetPointer(gpu_addr)}; const auto cache_addr{ToCacheAddr(host_ptr)}; - if (gpu_addr == 0x00000001682F0000ULL) { - LOG_CRITICAL(HW_GPU, "Here's the texture!"); - } - // Step 0: guarantee a valid surface if (!cache_addr) { // Return a null surface if it's invalid diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h index ddeed73d0..e3be018b9 100644 --- a/src/video_core/textures/texture.h +++ b/src/video_core/textures/texture.h @@ -52,9 +52,9 @@ enum class TextureFormat : u32 { DXT45 = 0x26, DXN1 = 0x27, DXN2 = 0x28, - Z24S8 = 0x29, + S8Z24 = 0x29, X8Z24 = 0x2a, - S8Z24 = 0x2b, + Z24S8 = 0x2b, X4V4Z24__COV4R4V = 0x2c, X4V4Z24__COV8R8V = 0x2d, V8Z24__COV4R12V = 0x2e, From fed773a86c96fc62f18181a1d3ba410b25c2edee Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 14 Jun 2019 18:40:06 -0400 Subject: [PATCH 080/113] texture_cache: Implement Irregular Views in surfaces --- src/video_core/texture_cache/surface_base.cpp | 3 +++ src/video_core/texture_cache/surface_base.h | 25 ++++++++++++++++--- 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp index 8c6edb04f..97bf9ad7a 100644 --- a/src/video_core/texture_cache/surface_base.cpp +++ b/src/video_core/texture_cache/surface_base.cpp @@ -100,6 +100,9 @@ MatchStructureResult SurfaceBaseImpl::MatchesStructure(const SurfaceParams& rhs) std::optional> SurfaceBaseImpl::GetLayerMipmap( const GPUVAddr candidate_gpu_addr) const { + if (gpu_addr == candidate_gpu_addr) { + return {{0,0}}; + } if (candidate_gpu_addr < gpu_addr) { return {}; } diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index 58265e9d3..662221adc 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -238,6 +238,26 @@ public: return GetView(ViewParams(overview_params.target, 0, num_layers, 0, params.num_levels)); } + std::optional EmplaceIrregularView(const SurfaceParams& view_params, + const GPUVAddr view_addr, + const std::size_t candidate_size, const u32 mipmap, + const u32 layer) { + const auto layer_mipmap{GetLayerMipmap(view_addr + candidate_size)}; + if (!layer_mipmap) { + return {}; + } + const u32 end_layer{layer_mipmap->first}; + const u32 end_mipmap{layer_mipmap->second}; + if (layer != end_layer) { + if (mipmap == 0 && end_mipmap == 0) { + return GetView(ViewParams(view_params.target, layer, end_layer - layer + 1, 0, 1)); + } + return {}; + } else { + return GetView(ViewParams(view_params.target, layer, 1, mipmap, end_mipmap - mipmap + 1)); + } + } + std::optional EmplaceView(const SurfaceParams& view_params, const GPUVAddr view_addr, const std::size_t candidate_size) { if (params.target == SurfaceTarget::Texture3D || @@ -252,10 +272,7 @@ public: const u32 layer{layer_mipmap->first}; const u32 mipmap{layer_mipmap->second}; if (GetMipmapSize(mipmap) != candidate_size) { - // TODO: The view may cover many mimaps, this case can still go on. - // This edge-case can be safely be ignored since it will just result in worse - // performance. - return {}; + return EmplaceIrregularView(view_params, view_addr, candidate_size, mipmap, layer); } return GetView(ViewParams(view_params.target, layer, 1, mipmap, 1)); } From 198a0395bb1b1d19de12560ac146add0705ed00e Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sat, 15 Jun 2019 11:08:11 -0400 Subject: [PATCH 081/113] texture_cache: Corrections to buffers and shadow formats use. --- .../texture_cache/surface_params.cpp | 44 ++++++++++++++----- 1 file changed, 34 insertions(+), 10 deletions(-) diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index f789da2c4..290ba438d 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp @@ -16,11 +16,13 @@ namespace VideoCommon { using VideoCore::Surface::ComponentTypeFromDepthFormat; using VideoCore::Surface::ComponentTypeFromRenderTarget; using VideoCore::Surface::ComponentTypeFromTexture; +using VideoCore::Surface::PixelFormat; using VideoCore::Surface::PixelFormatFromDepthFormat; using VideoCore::Surface::PixelFormatFromRenderTargetFormat; using VideoCore::Surface::PixelFormatFromTextureFormat; using VideoCore::Surface::SurfaceTarget; using VideoCore::Surface::SurfaceTargetFromTextureType; +using VideoCore::Surface::SurfaceType; SurfaceTarget TextureType2SurfaceTarget(Tegra::Shader::TextureType type, bool is_array) { switch (type) { @@ -71,6 +73,24 @@ SurfaceParams SurfaceParams::CreateForTexture(Core::System& system, params.tile_width_spacing = params.is_tiled ? (1 << config.tic.tile_width_spacing.Value()) : 1; params.pixel_format = PixelFormatFromTextureFormat(config.tic.format, config.tic.r_type.Value(), params.srgb_conversion); + params.type = GetFormatType(params.pixel_format); + if (entry.IsShadow() && params.type == SurfaceType::ColorTexture) { + switch (params.pixel_format) { + case PixelFormat::R16F: { + params.pixel_format = PixelFormat::Z16; + break; + } + case PixelFormat::R32F: { + params.pixel_format = PixelFormat::Z32F; + break; + } + default: { + UNIMPLEMENTED_MSG("Unimplemented shadow convert format: {}", + static_cast(params.pixel_format)); + } + } + params.type = GetFormatType(params.pixel_format); + } params.component_type = ComponentTypeFromTexture(config.tic.r_type.Value()); params.type = GetFormatType(params.pixel_format); // TODO: on 1DBuffer we should use the tic info. @@ -79,20 +99,24 @@ SurfaceParams SurfaceParams::CreateForTexture(Core::System& system, params.width = config.tic.Width(); params.height = config.tic.Height(); params.depth = config.tic.Depth(); + params.pitch = params.is_tiled ? 0 : config.tic.Pitch(); + if (params.target == SurfaceTarget::TextureCubemap || + params.target == SurfaceTarget::TextureCubeArray) { + params.depth *= 6; + } + params.num_levels = config.tic.max_mip_level + 1; + params.emulated_levels = std::min(params.num_levels, params.MaxPossibleMipmap()); + params.is_layered = params.IsLayered(); } else { params.target = SurfaceTarget::TextureBuffer; params.width = config.tic.Width(); - params.height = 0; - params.depth = 0; + params.pitch = params.width * params.GetBytesPerPixel(); + params.height = 1; + params.depth = 1; + params.num_levels = 1; + params.emulated_levels = 1; + params.is_layered = false; } - if (params.target == SurfaceTarget::TextureCubemap || - params.target == SurfaceTarget::TextureCubeArray) { - params.depth *= 6; - } - params.pitch = params.is_tiled ? 0 : config.tic.Pitch(); - params.num_levels = config.tic.max_mip_level + 1; - params.emulated_levels = std::min(params.num_levels, params.MaxPossibleMipmap()); - params.is_layered = params.IsLayered(); return params; } From d7587842eb404a52eb75a12816028f0706821dd0 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sat, 15 Jun 2019 13:22:57 -0400 Subject: [PATCH 082/113] texture_cache: Implement texception detection and texture barriers. --- .../renderer_opengl/gl_rasterizer.cpp | 10 ++++- src/video_core/texture_cache/texture_cache.h | 37 ++++++++++++++++--- 2 files changed, 40 insertions(+), 7 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 97c55f2ec..c9f3a35e6 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -422,7 +422,7 @@ std::pair RasterizerOpenGL::ConfigureFramebuffers( } current_framebuffer_config_state = fb_config_state; - texture_cache.Guard(true); + texture_cache.GuardRenderTargets(true); View depth_surface{}; if (using_depth_fb) { @@ -500,7 +500,7 @@ std::pair RasterizerOpenGL::ConfigureFramebuffers( depth_surface->GetSurfaceParams().type == SurfaceType::DepthStencil; } - texture_cache.Guard(false); + texture_cache.GuardRenderTargets(false); current_state.draw.draw_framebuffer = framebuffer_cache.GetFramebuffer(fbkey); SyncViewport(current_state); @@ -651,7 +651,9 @@ void RasterizerOpenGL::DrawArrays() { SetupVertexBuffer(vao); DrawParameters params = SetupDraw(); + texture_cache.GuardSamplers(true); SetupShaders(params.primitive_mode); + texture_cache.GuardSamplers(false); ConfigureFramebuffers(state); @@ -660,6 +662,10 @@ void RasterizerOpenGL::DrawArrays() { shader_program_manager->ApplyTo(state); state.Apply(); + if (texture_cache.TextureBarrier()) { + glTextureBarrier(); + } + params.DispatchDraw(); accelerate_draw = AccelDraw::Disabled; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index a9e61cba1..353fa4e31 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -70,8 +70,12 @@ public: * `Guard` guarantees that rendertargets don't unregister themselves if the * collide. Protection is currently only done on 3D slices. **/ - void Guard(bool new_guard) { - guard_cache = new_guard; + void GuardRenderTargets(bool new_guard) { + guard_render_targets = new_guard; + } + + void GuardSamplers(bool new_guard) { + guard_samplers = new_guard; } void FlushRegion(CacheAddr addr, std::size_t size) { @@ -98,7 +102,25 @@ public: return {}; } const auto params{SurfaceParams::CreateForTexture(system, config, entry)}; - return GetSurface(gpu_addr, params, true, false).second; + auto pair = GetSurface(gpu_addr, params, true, false); + if (guard_samplers) { + if (sampled_textures_stack_pointer == sampled_textures_stack.size()) { + sampled_textures_stack.resize(sampled_textures_stack.size() * 2); + } + sampled_textures_stack[sampled_textures_stack_pointer] = pair.first; + sampled_textures_stack_pointer++; + } + return pair.second; + } + + bool TextureBarrier() { + bool must_do = false; + for (u32 i = 0; i < sampled_textures_stack_pointer; i++) { + must_do |= sampled_textures_stack[i]->IsRenderTarget(); + sampled_textures_stack[i] = nullptr; + } + sampled_textures_stack_pointer = 0; + return must_do; } TView GetDepthBufferSurface(bool preserve_contents) { @@ -239,6 +261,7 @@ protected: make_siblings(PixelFormat::Z16, PixelFormat::R16F); make_siblings(PixelFormat::Z32F, PixelFormat::R32F); make_siblings(PixelFormat::Z32FS8, PixelFormat::RG32F); + sampled_textures_stack.resize(64); } ~TextureCache() = default; @@ -275,7 +298,7 @@ protected: } void Unregister(TSurface surface) { - if (guard_cache && surface->IsProtected()) { + if (guard_render_targets && surface->IsProtected()) { return; } const GPUVAddr gpu_addr = surface->GetGpuAddr(); @@ -766,7 +789,8 @@ private: u64 ticks{}; // Guards the cache for protection conflicts. - bool guard_cache{}; + bool guard_render_targets{}; + bool guard_samplers{}; // The siblings table is for formats that can inter exchange with one another // without causing issues. This is only valid when a conflict occurs on a non @@ -792,6 +816,9 @@ private: render_targets; FramebufferTargetInfo depth_buffer; + std::vector sampled_textures_stack{}; + u32 sampled_textures_stack_pointer{}; + StagingCache staging_cache; std::recursive_mutex mutex; }; From 6acdae0e4c9d0c20f668cd86250b5d5b0dbd70c4 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Mon, 17 Jun 2019 19:19:47 -0400 Subject: [PATCH 083/113] texture_cache: Correct format R16U as sibling --- src/video_core/texture_cache/surface_params.cpp | 1 + src/video_core/texture_cache/texture_cache.h | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index 290ba438d..a670fc1a9 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp @@ -76,6 +76,7 @@ SurfaceParams SurfaceParams::CreateForTexture(Core::System& system, params.type = GetFormatType(params.pixel_format); if (entry.IsShadow() && params.type == SurfaceType::ColorTexture) { switch (params.pixel_format) { + case PixelFormat::R16U: case PixelFormat::R16F: { params.pixel_format = PixelFormat::Z16; break; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 353fa4e31..78821503e 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -258,7 +258,7 @@ protected: for (u32 i = 0; i < max_formats; i++) { siblings_table[static_cast(i)] = PixelFormat::Invalid; } - make_siblings(PixelFormat::Z16, PixelFormat::R16F); + make_siblings(PixelFormat::Z16, PixelFormat::R16U); make_siblings(PixelFormat::Z32F, PixelFormat::R32F); make_siblings(PixelFormat::Z32FS8, PixelFormat::RG32F); sampled_textures_stack.resize(64); From 97c8c9f49a3327f8f38dd460951071630c3e26fa Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Thu, 20 Jun 2019 14:58:32 -0400 Subject: [PATCH 084/113] texture_cache: Eliminate linear textures fallthrough --- src/video_core/texture_cache/texture_cache.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 78821503e..d86ddeb76 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -603,10 +603,6 @@ private: return InitializeSurface(gpu_addr, params, preserve_contents); } - if (!params.is_tiled) { - return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, - MatchTopologyResult::FullMatch); - } // Step 3 // Now we need to figure the relationship between the texture and its overlaps // we do a topological test to ensure we can find some relationship. If it fails From 51ba60b27e54beec476416cd0c7334110bcdb274 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Thu, 20 Jun 2019 15:02:53 -0400 Subject: [PATCH 085/113] shader_cache: Correct versioning and size calculation. --- src/video_core/renderer_opengl/gl_shader_cache.cpp | 7 ++++++- src/video_core/renderer_opengl/gl_shader_disk_cache.cpp | 2 +- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 67789db73..02e217b8c 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -103,15 +103,20 @@ constexpr std::tuple GetPrimitiveDescription(GLen /// Calculates the size of a program stream std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) { constexpr std::size_t start_offset = 10; + constexpr u64 key = 0xE2400FFFFF07000FULL; + constexpr u64 mask =0xFFFFFFFFFF7FFFFFULL; std::size_t offset = start_offset; std::size_t size = start_offset * sizeof(u64); while (offset < program.size()) { const u64 instruction = program[offset]; if (!IsSchedInstruction(offset, start_offset)) { - if (instruction == 0 || (instruction >> 52) == 0x50b) { + if ((instruction & mask) == key) { // End on Maxwell's "nop" instruction break; } + if (instruction == 0) { + break; + } } size += sizeof(u64); offset++; diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index 5ec911adc..922c72590 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp @@ -34,7 +34,7 @@ enum class PrecompiledEntryKind : u32 { Dump, }; -constexpr u32 NativeVersion = 3; +constexpr u32 NativeVersion = 4; // Making sure sizes doesn't change by accident static_assert(sizeof(BaseBindings) == 16); From d1812316e1b0f03af2ba10d4fe04be728e72725c Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Thu, 20 Jun 2019 21:22:20 -0400 Subject: [PATCH 086/113] texture_cache: Style and Corrections --- .../renderer_opengl/gl_shader_cache.cpp | 2 +- src/video_core/shader/node.h | 3 +- src/video_core/surface.h | 128 +++++++++--------- src/video_core/texture_cache/surface_base.cpp | 2 +- src/video_core/texture_cache/surface_base.h | 3 +- src/video_core/texture_cache/texture_cache.h | 1 + src/video_core/textures/decoders.cpp | 7 +- 7 files changed, 75 insertions(+), 71 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 02e217b8c..718703091 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -104,7 +104,7 @@ constexpr std::tuple GetPrimitiveDescription(GLen std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) { constexpr std::size_t start_offset = 10; constexpr u64 key = 0xE2400FFFFF07000FULL; - constexpr u64 mask =0xFFFFFFFFFF7FFFFFULL; + constexpr u64 mask = 0xFFFFFFFFFF7FFFFFULL; std::size_t offset = start_offset; std::size_t size = start_offset * sizeof(u64); while (offset < program.size()) { diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index 2bf535928..0ac83fcf0 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h @@ -339,7 +339,8 @@ struct MetaImage { }; /// Parameters that modify an operation but are not part of any particular operand -using Meta = std::variant; +using Meta = + std::variant; /// Holds any kind of operation that can be done in the IR class OperationNode final { diff --git a/src/video_core/surface.h b/src/video_core/surface.h index 959504cd3..ee9f00fa6 100644 --- a/src/video_core/surface.h +++ b/src/video_core/surface.h @@ -447,70 +447,70 @@ enum class SurfaceCompression : u8 { }; inline constexpr std::array compression_type_table = {{ - SurfaceCompression::None, // ABGR8U - SurfaceCompression::None, // ABGR8S - SurfaceCompression::None, // ABGR8UI - SurfaceCompression::None, // B5G6R5U - SurfaceCompression::None, // A2B10G10R10U - SurfaceCompression::None, // A1B5G5R5U - SurfaceCompression::None, // R8U - SurfaceCompression::None, // R8UI - SurfaceCompression::None, // RGBA16F - SurfaceCompression::None, // RGBA16U - SurfaceCompression::None, // RGBA16UI - SurfaceCompression::None, // R11FG11FB10F - SurfaceCompression::None, // RGBA32UI - SurfaceCompression::Compressed, // DXT1 - SurfaceCompression::Compressed, // DXT23 - SurfaceCompression::Compressed, // DXT45 - SurfaceCompression::Compressed, // DXN1 - SurfaceCompression::Compressed, // DXN2UNORM - SurfaceCompression::Compressed, // DXN2SNORM - SurfaceCompression::Compressed, // BC7U - SurfaceCompression::Compressed, // BC6H_UF16 - SurfaceCompression::Compressed, // BC6H_SF16 - SurfaceCompression::Converted, // ASTC_2D_4X4 - SurfaceCompression::None, // BGRA8 - SurfaceCompression::None, // RGBA32F - SurfaceCompression::None, // RG32F - SurfaceCompression::None, // R32F - SurfaceCompression::None, // R16F - SurfaceCompression::None, // R16U - SurfaceCompression::None, // R16S - SurfaceCompression::None, // R16UI - SurfaceCompression::None, // R16I - SurfaceCompression::None, // RG16 - SurfaceCompression::None, // RG16F - SurfaceCompression::None, // RG16UI - SurfaceCompression::None, // RG16I - SurfaceCompression::None, // RG16S - SurfaceCompression::None, // RGB32F - SurfaceCompression::None, // RGBA8_SRGB - SurfaceCompression::None, // RG8U - SurfaceCompression::None, // RG8S - SurfaceCompression::None, // RG32UI - SurfaceCompression::None, // R32UI - SurfaceCompression::Converted, // ASTC_2D_8X8 - SurfaceCompression::Converted, // ASTC_2D_8X5 - SurfaceCompression::Converted, // ASTC_2D_5X4 - SurfaceCompression::None, // BGRA8_SRGB - SurfaceCompression::Compressed, // DXT1_SRGB - SurfaceCompression::Compressed, // DXT23_SRGB - SurfaceCompression::Compressed, // DXT45_SRGB - SurfaceCompression::Compressed, // BC7U_SRGB - SurfaceCompression::Converted, // ASTC_2D_4X4_SRGB - SurfaceCompression::Converted, // ASTC_2D_8X8_SRGB - SurfaceCompression::Converted, // ASTC_2D_8X5_SRGB - SurfaceCompression::Converted, // ASTC_2D_5X4_SRGB - SurfaceCompression::Converted, // ASTC_2D_5X5 - SurfaceCompression::Converted, // ASTC_2D_5X5_SRGB - SurfaceCompression::Converted, // ASTC_2D_10X8 - SurfaceCompression::Converted, // ASTC_2D_10X8_SRGB - SurfaceCompression::None, // Z32F - SurfaceCompression::None, // Z16 - SurfaceCompression::None, // Z24S8 - SurfaceCompression::Rearranged, // S8Z24 - SurfaceCompression::None, // Z32FS8 + SurfaceCompression::None, // ABGR8U + SurfaceCompression::None, // ABGR8S + SurfaceCompression::None, // ABGR8UI + SurfaceCompression::None, // B5G6R5U + SurfaceCompression::None, // A2B10G10R10U + SurfaceCompression::None, // A1B5G5R5U + SurfaceCompression::None, // R8U + SurfaceCompression::None, // R8UI + SurfaceCompression::None, // RGBA16F + SurfaceCompression::None, // RGBA16U + SurfaceCompression::None, // RGBA16UI + SurfaceCompression::None, // R11FG11FB10F + SurfaceCompression::None, // RGBA32UI + SurfaceCompression::Compressed, // DXT1 + SurfaceCompression::Compressed, // DXT23 + SurfaceCompression::Compressed, // DXT45 + SurfaceCompression::Compressed, // DXN1 + SurfaceCompression::Compressed, // DXN2UNORM + SurfaceCompression::Compressed, // DXN2SNORM + SurfaceCompression::Compressed, // BC7U + SurfaceCompression::Compressed, // BC6H_UF16 + SurfaceCompression::Compressed, // BC6H_SF16 + SurfaceCompression::Converted, // ASTC_2D_4X4 + SurfaceCompression::None, // BGRA8 + SurfaceCompression::None, // RGBA32F + SurfaceCompression::None, // RG32F + SurfaceCompression::None, // R32F + SurfaceCompression::None, // R16F + SurfaceCompression::None, // R16U + SurfaceCompression::None, // R16S + SurfaceCompression::None, // R16UI + SurfaceCompression::None, // R16I + SurfaceCompression::None, // RG16 + SurfaceCompression::None, // RG16F + SurfaceCompression::None, // RG16UI + SurfaceCompression::None, // RG16I + SurfaceCompression::None, // RG16S + SurfaceCompression::None, // RGB32F + SurfaceCompression::None, // RGBA8_SRGB + SurfaceCompression::None, // RG8U + SurfaceCompression::None, // RG8S + SurfaceCompression::None, // RG32UI + SurfaceCompression::None, // R32UI + SurfaceCompression::Converted, // ASTC_2D_8X8 + SurfaceCompression::Converted, // ASTC_2D_8X5 + SurfaceCompression::Converted, // ASTC_2D_5X4 + SurfaceCompression::None, // BGRA8_SRGB + SurfaceCompression::Compressed, // DXT1_SRGB + SurfaceCompression::Compressed, // DXT23_SRGB + SurfaceCompression::Compressed, // DXT45_SRGB + SurfaceCompression::Compressed, // BC7U_SRGB + SurfaceCompression::Converted, // ASTC_2D_4X4_SRGB + SurfaceCompression::Converted, // ASTC_2D_8X8_SRGB + SurfaceCompression::Converted, // ASTC_2D_8X5_SRGB + SurfaceCompression::Converted, // ASTC_2D_5X4_SRGB + SurfaceCompression::Converted, // ASTC_2D_5X5 + SurfaceCompression::Converted, // ASTC_2D_5X5_SRGB + SurfaceCompression::Converted, // ASTC_2D_10X8 + SurfaceCompression::Converted, // ASTC_2D_10X8_SRGB + SurfaceCompression::None, // Z32F + SurfaceCompression::None, // Z16 + SurfaceCompression::None, // Z24S8 + SurfaceCompression::Rearranged, // S8Z24 + SurfaceCompression::None, // Z32FS8 }}; static constexpr SurfaceCompression GetFormatCompressionType(PixelFormat format) { diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp index 97bf9ad7a..051014c6a 100644 --- a/src/video_core/texture_cache/surface_base.cpp +++ b/src/video_core/texture_cache/surface_base.cpp @@ -101,7 +101,7 @@ MatchStructureResult SurfaceBaseImpl::MatchesStructure(const SurfaceParams& rhs) std::optional> SurfaceBaseImpl::GetLayerMipmap( const GPUVAddr candidate_gpu_addr) const { if (gpu_addr == candidate_gpu_addr) { - return {{0,0}}; + return {{0, 0}}; } if (candidate_gpu_addr < gpu_addr) { return {}; diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index 662221adc..252b18538 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -254,7 +254,8 @@ public: } return {}; } else { - return GetView(ViewParams(view_params.target, layer, 1, mipmap, end_mipmap - mipmap + 1)); + return GetView( + ViewParams(view_params.target, layer, 1, mipmap, end_mipmap - mipmap + 1)); } } diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index d86ddeb76..b720856f2 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -18,6 +18,7 @@ #include "common/common_types.h" #include "common/math_util.h" #include "core/memory.h" +#include "core/settings.h" #include "video_core/engines/fermi_2d.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/gpu.h" diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp index 9a2f4198a..7e8295944 100644 --- a/src/video_core/textures/decoders.cpp +++ b/src/video_core/textures/decoders.cpp @@ -256,7 +256,8 @@ std::vector UnswizzleTexture(u8* address, u32 tile_size_x, u32 tile_size_y, } void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, - u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height_bit) { + u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, + u32 block_height_bit) { const u32 block_height = 1U << block_height_bit; const u32 image_width_in_gobs{(swizzled_width * bytes_per_pixel + (gob_size_x - 1)) / gob_size_x}; @@ -278,8 +279,8 @@ void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 } void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width, - u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height_bit, - u32 offset_x, u32 offset_y) { + u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, + u32 block_height_bit, u32 offset_x, u32 offset_y) { const u32 block_height = 1U << block_height_bit; for (u32 line = 0; line < subrect_height; ++line) { const u32 y2 = line + offset_y; From de982deb25f685dd8fa67680fb8dd6c627f70859 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 24 Jun 2019 01:47:09 -0300 Subject: [PATCH 087/113] common/alignment: Address feedback --- src/common/alignment.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/common/alignment.h b/src/common/alignment.h index 3379a6967..617b14d9b 100644 --- a/src/common/alignment.h +++ b/src/common/alignment.h @@ -20,8 +20,9 @@ constexpr T AlignDown(T value, std::size_t size) { } template -constexpr T AlignBits(T value, T align) { - return (value + ((1 << align) - 1)) >> align << align; +constexpr T AlignBits(T value, std::size_t align) { + static_assert(std::is_unsigned_v, "T must be an unsigned value."); + return static_cast((value + ((1ULL << align) - 1)) >> align << align); } template From 4d63f9794575f6180fbecb1d7dce7fcd3875439f Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 24 Jun 2019 01:51:02 -0300 Subject: [PATCH 088/113] shader_bytecode: Include missing --- src/video_core/engines/shader_bytecode.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 54a1a04f9..404d4f5aa 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -4,6 +4,7 @@ #pragma once +#include #include #include #include From b8b05a484a16372b481e1820601cc823543fc58a Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 24 Jun 2019 01:56:38 -0300 Subject: [PATCH 089/113] gl_shader_decompiler: Address feedback --- .../renderer_opengl/gl_shader_decompiler.cpp | 23 ++++++++++--------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index ca04d8618..5f2f1510c 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -459,7 +459,7 @@ private: for (const auto& sampler : samplers) { const std::string name{GetSampler(sampler)}; const std::string description{"layout (binding = SAMPLER_BINDING_" + - std::to_string(sampler.GetIndex()) + ") uniform "}; + std::to_string(sampler.GetIndex()) + ") uniform"}; std::string sampler_type = [&]() { switch (sampler.GetType()) { case Tegra::Shader::TextureType::Texture1D: @@ -488,13 +488,13 @@ private: // preprocessor flag and use one or the other from the GPU state. This has to be // done because shaders don't have enough information to determine the texture type. EmitIfdefIsBuffer(sampler); - code.AddLine(description + "samplerBuffer " + name + ';'); + code.AddLine("{} samplerBuffer {};", description, name); code.AddLine("#else"); - code.AddLine(description + sampler_type + ' ' + name + ';'); + code.AddLine("{} {} {};", description, sampler_type, name); code.AddLine("#endif"); } else { // The other texture types (2D, 3D and cubes) don't have this issue. - code.AddLine(description + sampler_type + ' ' + name + ';'); + code.AddLine("{} {} {};", description, sampler_type, name); } } if (!samplers.empty()) { @@ -557,12 +557,13 @@ private: return "image1D"; } }(); - code.AddLine("layout (binding = IMAGE_BINDING_" + std::to_string(image.GetIndex()) + - ") coherent volatile writeonly uniform " + image_type + ' ' + - GetImage(image) + ';'); + code.AddLine("layout (binding = IMAGE_BINDING_{}) coherent volatile writeonly uniform " + "{} {};", + image.GetIndex(), image_type, GetImage(image)); } - if (!images.empty()) + if (!images.empty()) { code.AddNewLine(); + } } void VisitBlock(const NodeBlock& bb) { @@ -1504,9 +1505,9 @@ private: const std::string tmp{code.GenerateTemporary()}; EmitIfdefIsBuffer(meta->sampler); - code.AddLine("float " + tmp + " = " + expr_buffer + ';'); + code.AddLine("float {} = {};", tmp, expr_buffer); code.AddLine("#else"); - code.AddLine("float " + tmp + " = " + expr + ';'); + code.AddLine("float {} = {};", tmp, expr); code.AddLine("#endif"); return tmp; @@ -1860,7 +1861,7 @@ private: } void EmitIfdefIsBuffer(const Sampler& sampler) { - code.AddLine(fmt::format("#ifdef SAMPLER_{}_IS_BUFFER", sampler.GetIndex())); + code.AddLine("#ifdef SAMPLER_{}_IS_BUFFER", sampler.GetIndex()); } std::string GetDeclarationWithSuffix(u32 index, const std::string& name) const { From 0b6df52109bc3d3d9161732131ffa29e6a51d976 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 24 Jun 2019 01:58:44 -0300 Subject: [PATCH 090/113] gl_shader_disk_cache: Address feedback --- src/video_core/renderer_opengl/gl_shader_disk_cache.cpp | 7 +++++-- src/video_core/renderer_opengl/gl_shader_disk_cache.h | 5 +++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index 922c72590..10688397b 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp @@ -285,18 +285,20 @@ std::optional ShaderDiskCacheOpenGL::LoadDecompiledEn if (!LoadObjectFromPrecompiled(code_size)) { return {}; } - std::vector code(code_size); + + std::string code(code_size, '\0'); if (!LoadArrayFromPrecompiled(code.data(), code.size())) { return {}; } ShaderDiskCacheDecompiled entry; - entry.code = std::string(reinterpret_cast(code.data()), code_size); + entry.code = std::move(code); u32 const_buffers_count{}; if (!LoadObjectFromPrecompiled(const_buffers_count)) { return {}; } + for (u32 i = 0; i < const_buffers_count; ++i) { u32 max_offset{}; u32 index{}; @@ -312,6 +314,7 @@ std::optional ShaderDiskCacheOpenGL::LoadDecompiledEn if (!LoadObjectFromPrecompiled(samplers_count)) { return {}; } + for (u32 i = 0; i < samplers_count; ++i) { u64 offset{}; u64 index{}; diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h index aa12ffc71..4f296dda6 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h @@ -4,6 +4,7 @@ #pragma once +#include #include #include #include @@ -93,7 +94,7 @@ namespace std { template <> struct hash { - std::size_t operator()(const OpenGL::BaseBindings& bindings) const { + std::size_t operator()(const OpenGL::BaseBindings& bindings) const noexcept { return static_cast(bindings.cbuf) ^ (static_cast(bindings.gmem) << 8) ^ (static_cast(bindings.sampler) << 16) ^ @@ -103,7 +104,7 @@ struct hash { template <> struct hash { - std::size_t operator()(const OpenGL::ProgramVariant& variant) const { + std::size_t operator()(const OpenGL::ProgramVariant& variant) const noexcept { return std::hash()(variant.base_bindings) ^ std::hash()(variant.texture_buffer_usage) ^ (static_cast(variant.primitive_mode) << 6); From 4b2ff1e00e5073cbb17456fea781f0c12f019f7f Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 24 Jun 2019 02:01:44 -0300 Subject: [PATCH 091/113] gl_texture_cache: Address some feedback --- src/video_core/renderer_opengl/gl_texture_cache.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index dda3bf715..cdc44a60c 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -6,6 +6,8 @@ #include #include +#include +#include #include #include @@ -34,7 +36,7 @@ class CachedSurface final : public VideoCommon::SurfaceBase { friend CachedSurfaceView; public: - explicit CachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& params); + explicit CachedSurface(GPUVAddr gpu_addr, const SurfaceParams& params); ~CachedSurface(); void UploadTexture(std::vector& staging_buffer) override; @@ -52,7 +54,7 @@ protected: void DecorateSurfaceName(); View CreateView(const ViewParams& view_key) override; - View CreateViewInner(const ViewParams& view_key, const bool is_proxy); + View CreateViewInner(const ViewParams& view_key, bool is_proxy); private: void UploadTextureMipmap(u32 level, std::vector& staging_buffer); From 4504302abcccfe5116bb5fed55e62472dc983ec5 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 24 Jun 2019 02:02:35 -0300 Subject: [PATCH 092/113] renderer_opengl/utils: Remove unused includes and unused forward declaration --- src/video_core/renderer_opengl/utils.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/video_core/renderer_opengl/utils.h b/src/video_core/renderer_opengl/utils.h index 77e8d53ba..4a752f3b4 100644 --- a/src/video_core/renderer_opengl/utils.h +++ b/src/video_core/renderer_opengl/utils.h @@ -8,13 +8,9 @@ #include #include #include "common/common_types.h" -#include "common/math_util.h" -#include "video_core/renderer_opengl/gl_resource_manager.h" namespace OpenGL { -class CachedSurfaceView; - class BindBuffersRangePushBuffer { public: BindBuffersRangePushBuffer(GLenum target); From 10a83653eed5a281cfe2aa8cd7615ba6d185526e Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 24 Jun 2019 02:05:05 -0300 Subject: [PATCH 093/113] decode/texture: Address feedback --- src/video_core/shader/decode/texture.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index b22831c64..cb480be9b 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp @@ -597,6 +597,7 @@ Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) { const Node array_register{is_array ? GetRegister(gpr8_cursor++) : nullptr}; std::vector coords; + coords.reserve(coord_count); for (std::size_t i = 0; i < coord_count; ++i) { coords.push_back(GetRegister(gpr8_cursor++)); } From 75de730e285d30779596c5fe6b1bbd265c4e54f8 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 24 Jun 2019 02:07:11 -0300 Subject: [PATCH 094/113] video_core/surface: Address feedback --- src/video_core/surface.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/video_core/surface.h b/src/video_core/surface.h index ee9f00fa6..bfdbc3b81 100644 --- a/src/video_core/surface.h +++ b/src/video_core/surface.h @@ -123,7 +123,7 @@ enum class SurfaceTarget { TextureCubeArray, }; -inline constexpr std::array compression_factor_shift_table = {{ +constexpr std::array compression_factor_shift_table = {{ 0, // ABGR8U 0, // ABGR8S 0, // ABGR8UI @@ -446,7 +446,7 @@ enum class SurfaceCompression : u8 { Rearranged = 3, }; -inline constexpr std::array compression_type_table = {{ +constexpr std::array compression_type_table = {{ SurfaceCompression::None, // ABGR8U SurfaceCompression::None, // ABGR8S SurfaceCompression::None, // ABGR8UI From 0837290992e0873f270cd032d2d0e5b91b643267 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 24 Jun 2019 02:08:52 -0300 Subject: [PATCH 095/113] texture_cache/surface_base: Address feedback --- src/video_core/texture_cache/surface_base.cpp | 4 ++++ src/video_core/texture_cache/surface_base.h | 8 ++++++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp index 051014c6a..7a0fdb19b 100644 --- a/src/video_core/texture_cache/surface_base.cpp +++ b/src/video_core/texture_cache/surface_base.cpp @@ -19,6 +19,10 @@ using Tegra::Texture::ConvertFromGuestToHost; using VideoCore::MortonSwizzleMode; using VideoCore::Surface::SurfaceCompression; +StagingCache::StagingCache() = default; + +StagingCache::~StagingCache() = default; + SurfaceBaseImpl::SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params) : params{params}, mipmap_sizes(params.num_levels), mipmap_offsets(params.num_levels), gpu_addr{gpu_addr}, host_memory_size{ diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index 252b18538..d632630ce 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -40,13 +40,17 @@ enum class MatchTopologyResult : u32 { class StagingCache { public: - StagingCache() {} - ~StagingCache() = default; + explicit StagingCache(); + ~StagingCache(); std::vector& GetBuffer(std::size_t index) { return staging_buffer[index]; } + const std::vector& GetBuffer(std::size_t index) const { + return staging_buffer[index]; + } + void SetSize(std::size_t size) { staging_buffer.resize(size); } From 34841a41c308aa1336f71fbce3006302452302d1 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 24 Jun 2019 02:09:56 -0300 Subject: [PATCH 096/113] texture_cache/surface_view: Address feedback --- src/video_core/texture_cache/surface_view.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/video_core/texture_cache/surface_view.h b/src/video_core/texture_cache/surface_view.h index 1ef4509ce..04ca5639b 100644 --- a/src/video_core/texture_cache/surface_view.h +++ b/src/video_core/texture_cache/surface_view.h @@ -44,7 +44,6 @@ struct ViewParams { class ViewBase { public: ViewBase(const ViewParams& params) : params{params} {} - ~ViewBase() = default; const ViewParams& GetViewParams() const { return params; From e723441e3716eb65f4d6fdc8cdaf76da752fe697 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 24 Jun 2019 02:13:55 -0300 Subject: [PATCH 097/113] gl_texture_cache: Explicitly add indirect include --- src/video_core/renderer_opengl/gl_texture_cache.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 892f286b7..c5e3b7a94 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -13,6 +13,7 @@ #include "video_core/renderer_opengl/gl_state.h" #include "video_core/renderer_opengl/gl_texture_cache.h" #include "video_core/renderer_opengl/utils.h" +#include "video_core/texture_cache/surface_base.h" #include "video_core/texture_cache/texture_cache.h" #include "video_core/textures/convert.h" #include "video_core/textures/texture.h" From 7565389700a5741460a118d1fcc5e14fccb4b413 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 24 Jun 2019 02:15:57 -0300 Subject: [PATCH 098/113] texture_cache: Include "core/core.h" --- src/video_core/texture_cache/texture_cache.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index b720856f2..a91b2a220 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -17,6 +17,7 @@ #include "common/assert.h" #include "common/common_types.h" #include "common/math_util.h" +#include "core/core.h" #include "core/memory.h" #include "core/settings.h" #include "video_core/engines/fermi_2d.h" @@ -30,10 +31,6 @@ #include "video_core/texture_cache/surface_params.h" #include "video_core/texture_cache/surface_view.h" -namespace Core { -class System; -} - namespace Tegra::Texture { struct FullTextureInfo; } From 58c8a44e7aa18f768db39a36870d8b279257e1d8 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 25 Jun 2019 17:26:00 -0400 Subject: [PATCH 099/113] texture_cache: Query MemoryManager from the system --- src/video_core/gpu.cpp | 1 - src/video_core/rasterizer_interface.h | 2 -- .../renderer_opengl/gl_rasterizer.cpp | 4 ---- src/video_core/renderer_opengl/gl_rasterizer.h | 2 -- src/video_core/texture_cache/texture_cache.h | 18 +++++++----------- 5 files changed, 7 insertions(+), 20 deletions(-) diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 619e06a0e..52706505b 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -32,7 +32,6 @@ u32 FramebufferConfig::BytesPerPixel(PixelFormat format) { GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer) : renderer{renderer} { auto& rasterizer{renderer.Rasterizer()}; memory_manager = std::make_unique(rasterizer); - rasterizer.InitMemoryMananger(*memory_manager); dma_pusher = std::make_unique(*this); maxwell_3d = std::make_unique(system, rasterizer, *memory_manager); fermi_2d = std::make_unique(rasterizer, *memory_manager); diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 6007e8c2e..5ee4f8e8e 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -28,8 +28,6 @@ class RasterizerInterface { public: virtual ~RasterizerInterface() {} - virtual void InitMemoryMananger(Tegra::MemoryManager& memory_manager) = 0; - /// Draw the current batch of vertex arrays virtual void DrawArrays() = 0; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index c9f3a35e6..f45a3c5ef 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -97,10 +97,6 @@ RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWind RasterizerOpenGL::~RasterizerOpenGL() {} -void RasterizerOpenGL::InitMemoryMananger(Tegra::MemoryManager& memory_manager) { - texture_cache.InitMemoryMananger(memory_manager); -} - void RasterizerOpenGL::CheckExtensions() { if (!GLAD_GL_ARB_texture_filter_anisotropic && !GLAD_GL_EXT_texture_filter_anisotropic) { LOG_WARNING( diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 33582ac42..bf67e3a70 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -57,8 +57,6 @@ public: ScreenInfo& info); ~RasterizerOpenGL() override; - void InitMemoryMananger(Tegra::MemoryManager& memory_manager) override; - void DrawArrays() override; void Clear() override; void FlushAll() override; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index a91b2a220..1516fcea3 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -52,10 +52,6 @@ class TextureCache { using IntervalType = typename IntervalMap::interval_type; public: - void InitMemoryMananger(Tegra::MemoryManager& memory_manager) { - this->memory_manager = &memory_manager; - } - void InvalidateRegion(CacheAddr addr, std::size_t size) { std::lock_guard lock{mutex}; @@ -278,15 +274,16 @@ protected: void Register(TSurface surface) { const GPUVAddr gpu_addr = surface->GetGpuAddr(); - const CacheAddr cache_ptr = ToCacheAddr(memory_manager->GetPointer(gpu_addr)); + const CacheAddr cache_ptr = ToCacheAddr(system.GPU().MemoryManager().GetPointer(gpu_addr)); const std::size_t size = surface->GetSizeInBytes(); - const std::optional cpu_addr = memory_manager->GpuToCpuAddress(gpu_addr); + const std::optional cpu_addr = + system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr); if (!cache_ptr || !cpu_addr) { LOG_CRITICAL(HW_GPU, "Failed to register surface with unmapped gpu_address 0x{:016x}", gpu_addr); return; } - bool continuouty = memory_manager->IsBlockContinuous(gpu_addr, size); + bool continuouty = system.GPU().MemoryManager().IsBlockContinuous(gpu_addr, size); surface->MarkAsContinuous(continuouty); surface->SetCacheAddr(cache_ptr); surface->SetCpuAddr(*cpu_addr); @@ -552,7 +549,7 @@ private: std::pair GetSurface(const GPUVAddr gpu_addr, const SurfaceParams& params, bool preserve_contents, bool is_render) { - const auto host_ptr{memory_manager->GetPointer(gpu_addr)}; + const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)}; const auto cache_addr{ToCacheAddr(host_ptr)}; // Step 0: guarantee a valid surface @@ -693,7 +690,7 @@ private: void LoadSurface(const TSurface& surface) { staging_cache.GetBuffer(0).resize(surface->GetHostSizeInBytes()); - surface->LoadBuffer(*memory_manager, staging_cache); + surface->LoadBuffer(system.GPU().MemoryManager(), staging_cache); surface->UploadTexture(staging_cache.GetBuffer(0)); surface->MarkAsModified(false, Tick()); } @@ -704,7 +701,7 @@ private: } staging_cache.GetBuffer(0).resize(surface->GetHostSizeInBytes()); surface->DownloadTexture(staging_cache.GetBuffer(0)); - surface->FlushBuffer(*memory_manager, staging_cache); + surface->FlushBuffer(system.GPU().MemoryManager(), staging_cache); surface->MarkAsModified(false, Tick()); } @@ -778,7 +775,6 @@ private: }; VideoCore::RasterizerInterface& rasterizer; - Tegra::MemoryManager* memory_manager; u64 ticks{}; From 36665ce0b249a31305a90d1f628b6a4123d81517 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 25 Jun 2019 17:32:04 -0400 Subject: [PATCH 100/113] gl_resource_manager: Correct MakeStreamCopy --- src/video_core/renderer_opengl/gl_resource_manager.cpp | 3 +-- src/video_core/renderer_opengl/gl_resource_manager.h | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp index 9f840a42e..a5f83a8f7 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.cpp +++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp @@ -149,8 +149,7 @@ void OGLBuffer::Release() { } void OGLBuffer::MakeStreamCopy(std::size_t buffer_size) { - if (handle == 0 || buffer_size == 0) - return; + ASSERT_OR_EXECUTE((handle == 0 || buffer_size == 0), { return; }); glNamedBufferData(handle, buffer_size, nullptr, GL_STREAM_COPY); } diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h index b2aa558a1..3a85a1d4c 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.h +++ b/src/video_core/renderer_opengl/gl_resource_manager.h @@ -186,7 +186,7 @@ public: /// Deletes the internal OpenGL resource void Release(); - // Converts the buffer into a persistant storage buffer + // Converts the buffer into a stream copy buffer with a fixed size void MakeStreamCopy(std::size_t buffer_size); GLuint handle = 0; From 18d24fbdd00ee2d05cd02c2794d56b396118f9d5 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 25 Jun 2019 17:40:08 -0400 Subject: [PATCH 101/113] gl_texture_cache: Corrections and fixes --- .../renderer_opengl/gl_texture_cache.cpp | 16 ++++++---------- .../renderer_opengl/gl_texture_cache.h | 6 +++--- 2 files changed, 9 insertions(+), 13 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index c5e3b7a94..d539bf07c 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -244,7 +244,6 @@ CachedSurface::~CachedSurface() { void CachedSurface::DownloadTexture(std::vector& staging_buffer) { MICROPROFILE_SCOPE(OpenGL_Texture_Download); - // TODO(Rodrigo): Optimize alignment SCOPE_EXIT({ glPixelStorei(GL_PACK_ROW_LENGTH, 0); }); for (u32 level = 0; level < params.emulated_levels; ++level) { @@ -272,7 +271,6 @@ void CachedSurface::UploadTexture(std::vector& staging_buffer) { } void CachedSurface::UploadTextureMipmap(u32 level, std::vector& staging_buffer) { - // TODO(Rodrigo): Optimize alignment glPixelStorei(GL_UNPACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level))); glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast(params.GetMipWidth(level))); @@ -421,10 +419,10 @@ void CachedSurfaceView::ApplySwizzle(SwizzleSource x_source, SwizzleSource y_sou OGLTextureView CachedSurfaceView::CreateTextureView() const { const auto& owner_params = surface.GetSurfaceParams(); - OGLTextureView tv; - tv.Create(); + OGLTextureView texture_view; + texture_view.Create(); - const GLuint handle{tv.handle}; + const GLuint handle{texture_view.handle}; const FormatTuple& tuple{ GetFormatTuple(owner_params.pixel_format, owner_params.component_type)}; @@ -433,7 +431,7 @@ OGLTextureView CachedSurfaceView::CreateTextureView() const { ApplyTextureDefaults(owner_params, handle); - return tv; + return texture_view; } TextureCacheOpenGL::TextureCacheOpenGL(Core::System& system, @@ -529,6 +527,7 @@ void TextureCacheOpenGL::ImageBlit(View& src_view, View& dst_view, void TextureCacheOpenGL::BufferCopy(Surface& src_surface, Surface& dst_surface) { const auto& src_params = src_surface->GetSurfaceParams(); const auto& dst_params = dst_surface->GetSurfaceParams(); + UNIMPLEMENTED_IF(src_params.num_levels > 1 || dst_params.num_levels > 1); const auto source_format = GetFormatTuple(src_params.pixel_format, src_params.component_type); const auto dest_format = GetFormatTuple(dst_params.pixel_format, dst_params.component_type); @@ -591,10 +590,7 @@ void TextureCacheOpenGL::BufferCopy(Surface& src_surface, Surface& dst_surface) } GLuint TextureCacheOpenGL::FetchPBO(std::size_t buffer_size) { - if (buffer_size < 0) { - UNREACHABLE(); - return 0; - } + ASSERT_OR_EXECUTE(buffer_size <= 0, { return 0; }); const u32 l2 = Common::Log2Ceil64(static_cast(buffer_size)); OGLBuffer& cp = copy_pbo_cache[l2]; if (cp.handle == 0) { diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index cdc44a60c..8da81dba3 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -90,17 +90,17 @@ public: } u32 GetWidth() const { - const auto owner_params = GetSurfaceParams(); + const auto& owner_params = GetSurfaceParams(); return owner_params.GetMipWidth(params.base_level); } u32 GetHeight() const { - const auto owner_params = GetSurfaceParams(); + const auto& owner_params = GetSurfaceParams(); return owner_params.GetMipHeight(params.base_level); } u32 GetDepth() const { - const auto owner_params = GetSurfaceParams(); + const auto& owner_params = GetSurfaceParams(); return owner_params.GetMipDepth(params.base_level); } From fb234560b060e528d66a77815330766e5aa88594 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 25 Jun 2019 17:42:50 -0400 Subject: [PATCH 102/113] copy_params: use constexpr for constructor --- src/video_core/texture_cache/copy_params.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/video_core/texture_cache/copy_params.h b/src/video_core/texture_cache/copy_params.h index 8cf010142..9c21a0649 100644 --- a/src/video_core/texture_cache/copy_params.h +++ b/src/video_core/texture_cache/copy_params.h @@ -9,13 +9,14 @@ namespace VideoCommon { struct CopyParams { - CopyParams(u32 source_x, u32 source_y, u32 source_z, u32 dest_x, u32 dest_y, u32 dest_z, - u32 source_level, u32 dest_level, u32 width, u32 height, u32 depth) + constexpr CopyParams(u32 source_x, u32 source_y, u32 source_z, u32 dest_x, u32 dest_y, + u32 dest_z, u32 source_level, u32 dest_level, u32 width, u32 height, + u32 depth) : source_x{source_x}, source_y{source_y}, source_z{source_z}, dest_x{dest_x}, dest_y{dest_y}, dest_z{dest_z}, source_level{source_level}, dest_level{dest_level}, width{width}, height{height}, depth{depth} {} - CopyParams(u32 width, u32 height, u32 depth, u32 level) + constexpr CopyParams(u32 width, u32 height, u32 depth, u32 level) : source_x{}, source_y{}, source_z{}, dest_x{}, dest_y{}, dest_z{}, source_level{level}, dest_level{level}, width{width}, height{height}, depth{depth} {} From c0abc7124d6ecd17f9da5ee5b3de9cb3dbf3ce1f Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 25 Jun 2019 18:03:25 -0400 Subject: [PATCH 103/113] surface_params: Corrections, asserts and documentation. --- .../texture_cache/surface_params.cpp | 4 +- src/video_core/texture_cache/surface_params.h | 97 +++++++++++-------- 2 files changed, 58 insertions(+), 43 deletions(-) diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index a670fc1a9..340ed2ca0 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp @@ -269,11 +269,11 @@ std::size_t SurfaceParams::GetConvertedMipmapOffset(u32 level) const { } std::size_t SurfaceParams::GetConvertedMipmapSize(u32 level) const { - constexpr std::size_t rgb8_bpp = 4ULL; + constexpr std::size_t rgba8_bpp = 4ULL; const std::size_t width_t = GetMipWidth(level); const std::size_t height_t = GetMipHeight(level); const std::size_t depth_t = is_layered ? depth : GetMipDepth(level); - return width_t * height_t * depth_t * rgb8_bpp; + return width_t * height_t * depth_t * rgba8_bpp; } std::size_t SurfaceParams::GetLayerSize(bool as_host_size, bool uncompressed) const { diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h index c51e174cd..4dfb882f0 100644 --- a/src/video_core/texture_cache/surface_params.h +++ b/src/video_core/texture_cache/surface_params.h @@ -95,25 +95,21 @@ public: /// Returns the block depth of a given mipmap level. u32 GetMipBlockDepth(u32 level) const; + /// returns the best possible row/pitch alignment for the surface. u32 GetRowAlignment(u32 level) const { const u32 bpp = GetCompressionType() == SurfaceCompression::Converted ? 4 : GetBytesPerPixel(); return 1U << Common::CountTrailingZeroes32(GetMipWidth(level) * bpp); } - // Helper used for out of class size calculations - static std::size_t AlignLayered(const std::size_t out_size, const u32 block_height, - const u32 block_depth) { - return Common::AlignBits(out_size, - Tegra::Texture::GetGOBSizeShift() + block_height + block_depth); - } - /// Returns the offset in bytes in guest memory of a given mipmap level. std::size_t GetGuestMipmapLevelOffset(u32 level) const; /// Returns the offset in bytes in host memory (linear) of a given mipmap level. std::size_t GetHostMipmapLevelOffset(u32 level) const; + /// Returns the offset in bytes in host memory (linear) of a given mipmap level + // for a texture that is converted in host gpu. std::size_t GetConvertedMipmapOffset(u32 level) const; /// Returns the size in bytes in guest memory of a given mipmap level. @@ -139,40 +135,7 @@ public: return GetInnerMipmapMemorySize(level, true, false); } - static u32 ConvertWidth(u32 width, VideoCore::Surface::PixelFormat pixel_format_from, - VideoCore::Surface::PixelFormat pixel_format_to) { - const u32 bw1 = VideoCore::Surface::GetDefaultBlockWidth(pixel_format_from); - const u32 bw2 = VideoCore::Surface::GetDefaultBlockWidth(pixel_format_to); - return (width * bw2 + bw1 - 1) / bw1; - } - - static u32 ConvertHeight(u32 height, VideoCore::Surface::PixelFormat pixel_format_from, - VideoCore::Surface::PixelFormat pixel_format_to) { - const u32 bh1 = VideoCore::Surface::GetDefaultBlockHeight(pixel_format_from); - const u32 bh2 = VideoCore::Surface::GetDefaultBlockHeight(pixel_format_to); - return (height * bh2 + bh1 - 1) / bh1; - } - - // this finds the maximun possible width between 2 2D layers of different formats - static u32 IntersectWidth(const SurfaceParams& src_params, const SurfaceParams& dst_params, - const u32 src_level, const u32 dst_level) { - const u32 bw1 = src_params.GetDefaultBlockWidth(); - const u32 bw2 = dst_params.GetDefaultBlockWidth(); - const u32 t_src_width = (src_params.GetMipWidth(src_level) * bw2 + bw1 - 1) / bw1; - const u32 t_dst_width = (dst_params.GetMipWidth(dst_level) * bw1 + bw2 - 1) / bw2; - return std::min(t_src_width, t_dst_width); - } - - // this finds the maximun possible height between 2 2D layers of different formats - static u32 IntersectHeight(const SurfaceParams& src_params, const SurfaceParams& dst_params, - const u32 src_level, const u32 dst_level) { - const u32 bh1 = src_params.GetDefaultBlockHeight(); - const u32 bh2 = dst_params.GetDefaultBlockHeight(); - const u32 t_src_height = (src_params.GetMipHeight(src_level) * bh2 + bh1 - 1) / bh1; - const u32 t_dst_height = (dst_params.GetMipHeight(dst_level) * bh1 + bh2 - 1) / bh2; - return std::min(t_src_height, t_dst_height); - } - + /// Returns the max possible mipmap that the texture can have in host gpu u32 MaxPossibleMipmap() const { const u32 max_mipmap_w = Common::Log2Ceil32(width) + 1U; const u32 max_mipmap_h = Common::Log2Ceil32(height) + 1U; @@ -182,6 +145,7 @@ public: return std::max(max_mipmap, Common::Log2Ceil32(depth) + 1U); } + /// Returns if the guest surface is a compressed surface. bool IsCompressed() const { return GetDefaultBlockHeight() > 1 || GetDefaultBlockWidth() > 1; } @@ -212,16 +176,67 @@ public: pixel_format < VideoCore::Surface::PixelFormat::MaxDepthStencilFormat; } + /// Returns how the compression should be handled for this texture. Values + /// are: None(no compression), Compressed(texture is compressed), + /// Converted(texture is converted before upload/ after download), + /// Rearranged(texture is swizzled before upload/after download). SurfaceCompression GetCompressionType() const { return VideoCore::Surface::GetFormatCompressionType(pixel_format); } + /// Returns is the surface is a TextureBuffer type of surface. bool IsBuffer() const { return target == VideoCore::Surface::SurfaceTarget::TextureBuffer; } + /// Returns the debug name of the texture for use in graphic debuggers. std::string TargetName() const; + // Helper used for out of class size calculations + static std::size_t AlignLayered(const std::size_t out_size, const u32 block_height, + const u32 block_depth) { + return Common::AlignBits(out_size, + Tegra::Texture::GetGOBSizeShift() + block_height + block_depth); + } + + /// Converts a width from a type of surface into another. This helps represent the + /// equivalent value between compressed/non-compressed textures. + static u32 ConvertWidth(u32 width, VideoCore::Surface::PixelFormat pixel_format_from, + VideoCore::Surface::PixelFormat pixel_format_to) { + const u32 bw1 = VideoCore::Surface::GetDefaultBlockWidth(pixel_format_from); + const u32 bw2 = VideoCore::Surface::GetDefaultBlockWidth(pixel_format_to); + return (width * bw2 + bw1 - 1) / bw1; + } + + /// Converts a height from a type of surface into another. This helps represent the + /// equivalent value between compressed/non-compressed textures. + static u32 ConvertHeight(u32 height, VideoCore::Surface::PixelFormat pixel_format_from, + VideoCore::Surface::PixelFormat pixel_format_to) { + const u32 bh1 = VideoCore::Surface::GetDefaultBlockHeight(pixel_format_from); + const u32 bh2 = VideoCore::Surface::GetDefaultBlockHeight(pixel_format_to); + return (height * bh2 + bh1 - 1) / bh1; + } + + // Finds the maximun possible width between 2 2D layers of different formats + static u32 IntersectWidth(const SurfaceParams& src_params, const SurfaceParams& dst_params, + const u32 src_level, const u32 dst_level) { + const u32 bw1 = src_params.GetDefaultBlockWidth(); + const u32 bw2 = dst_params.GetDefaultBlockWidth(); + const u32 t_src_width = (src_params.GetMipWidth(src_level) * bw2 + bw1 - 1) / bw1; + const u32 t_dst_width = (dst_params.GetMipWidth(dst_level) * bw1 + bw2 - 1) / bw2; + return std::min(t_src_width, t_dst_width); + } + + // Finds the maximun possible height between 2 2D layers of different formats + static u32 IntersectHeight(const SurfaceParams& src_params, const SurfaceParams& dst_params, + const u32 src_level, const u32 dst_level) { + const u32 bh1 = src_params.GetDefaultBlockHeight(); + const u32 bh2 = dst_params.GetDefaultBlockHeight(); + const u32 t_src_height = (src_params.GetMipHeight(src_level) * bh2 + bh1 - 1) / bh1; + const u32 t_dst_height = (dst_params.GetMipHeight(dst_level) * bh1 + bh2 - 1) / bh2; + return std::min(t_src_height, t_dst_height); + } + bool is_tiled; bool srgb_conversion; bool is_layered; From 88bc39374fd7cffd2864229ae60bdab3aebb37ea Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 25 Jun 2019 18:36:19 -0400 Subject: [PATCH 104/113] texture_cache: Corrections, documentation and asserts --- src/video_core/texture_cache/texture_cache.h | 84 ++++++++++---------- 1 file changed, 42 insertions(+), 42 deletions(-) diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 1516fcea3..fb6ca41ff 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -60,10 +60,10 @@ public: } } - /** + /*** * `Guard` guarantees that rendertargets don't unregister themselves if the * collide. Protection is currently only done on 3D slices. - **/ + ***/ void GuardRenderTargets(bool new_guard) { guard_render_targets = new_guard; } @@ -191,19 +191,21 @@ public: } void SetEmptyDepthBuffer() { - if (depth_buffer.target != nullptr) { - depth_buffer.target->MarkAsRenderTarget(false); - depth_buffer.target = nullptr; - depth_buffer.view = nullptr; + if (depth_buffer.target == nullptr) { + return; } + depth_buffer.target->MarkAsRenderTarget(false); + depth_buffer.target = nullptr; + depth_buffer.view = nullptr; } void SetEmptyColorBuffer(std::size_t index) { - if (render_targets[index].target != nullptr) { - render_targets[index].target->MarkAsRenderTarget(false); - render_targets[index].target = nullptr; - render_targets[index].view = nullptr; + if (render_targets[index].target == nullptr) { + return; } + render_targets[index].target->MarkAsRenderTarget(false); + render_targets[index].target = nullptr; + render_targets[index].view = nullptr; } void DoFermiCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src_config, @@ -283,8 +285,8 @@ protected: gpu_addr); return; } - bool continuouty = system.GPU().MemoryManager().IsBlockContinuous(gpu_addr, size); - surface->MarkAsContinuous(continuouty); + const bool continuous = system.GPU().MemoryManager().IsBlockContinuous(gpu_addr, size); + surface->MarkAsContinuous(continuous); surface->SetCacheAddr(cache_ptr); surface->SetCpuAddr(*cpu_addr); RegisterInnerCache(surface); @@ -381,8 +383,8 @@ private: const SurfaceParams& params, const GPUVAddr gpu_addr, const bool preserve_contents, const MatchTopologyResult untopological) { - const bool do_load = Settings::values.use_accurate_gpu_emulation && preserve_contents; - for (auto surface : overlaps) { + const bool do_load = preserve_contents && Settings::values.use_accurate_gpu_emulation; + for (auto& surface : overlaps) { Unregister(surface); } switch (PickStrategy(overlaps, params, gpu_addr, untopological)) { @@ -394,7 +396,7 @@ private: [](const TSurface& a, const TSurface& b) -> bool { return a->GetModificationTick() < b->GetModificationTick(); }); - for (auto surface : overlaps) { + for (auto& surface : overlaps) { FlushSurface(surface); } return InitializeSurface(gpu_addr, params, preserve_contents); @@ -460,19 +462,19 @@ private: const SurfaceParams& params, bool is_render) { const bool is_mirage = !current_surface->MatchFormat(params.pixel_format); const bool matches_target = current_surface->MatchTarget(params.target); - auto match_check = ([&]() -> std::pair { + const auto match_check = ([&]() -> std::pair { if (matches_target) { return {current_surface, current_surface->GetMainView()}; } return {current_surface, current_surface->EmplaceOverview(params)}; }); - if (is_mirage) { - if (!is_render && siblings_table[current_surface->GetFormat()] == params.pixel_format) { - return match_check(); - } - return RebuildSurface(current_surface, params, is_render); + if (!is_mirage) { + return match_check(); } - return match_check(); + if (!is_render && siblings_table[current_surface->GetFormat()] == params.pixel_format) { + return match_check(); + } + return RebuildSurface(current_surface, params, is_render); } /** @@ -493,7 +495,7 @@ private: bool modified = false; TSurface new_surface = GetUncachedSurface(gpu_addr, params); u32 passed_tests = 0; - for (auto surface : overlaps) { + for (auto& surface : overlaps) { const SurfaceParams& src_params = surface->GetSurfaceParams(); if (src_params.is_layered || src_params.num_levels > 1) { // We send this cases to recycle as they are more complex to handle @@ -504,8 +506,7 @@ private: if (!mipmap_layer) { continue; } - const u32 layer{mipmap_layer->first}; - const u32 mipmap{mipmap_layer->second}; + const auto [layer, mipmap] = *mipmap_layer; if (new_surface->GetMipmapSize(mipmap) != candidate_size) { continue; } @@ -519,7 +520,7 @@ private: } if (passed_tests == 0) { return {}; - // In Accurate GPU all test should pass, else we recycle + // In Accurate GPU all tests should pass, else we recycle } else if (Settings::values.use_accurate_gpu_emulation && passed_tests != overlaps.size()) { return {}; } @@ -548,7 +549,6 @@ private: **/ std::pair GetSurface(const GPUVAddr gpu_addr, const SurfaceParams& params, bool preserve_contents, bool is_render) { - const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)}; const auto cache_addr{ToCacheAddr(host_ptr)}; @@ -570,17 +570,17 @@ private: auto iter = l1_cache.find(cache_addr); if (iter != l1_cache.end()) { TSurface& current_surface = iter->second; - auto topological_result = current_surface->MatchesTopology(params); + const auto topological_result = current_surface->MatchesTopology(params); if (topological_result != MatchTopologyResult::FullMatch) { std::vector overlaps{current_surface}; return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, topological_result); } - MatchStructureResult s_result = current_surface->MatchesStructure(params); - if (s_result != MatchStructureResult::None && + const auto struct_result = current_surface->MatchesStructure(params); + if (struct_result != MatchStructureResult::None && (params.target != SurfaceTarget::Texture3D || current_surface->MatchTarget(params.target))) { - if (s_result == MatchStructureResult::FullMatch) { + if (struct_result == MatchStructureResult::FullMatch) { return ManageStructuralMatch(current_surface, params, is_render); } else { return RebuildSurface(current_surface, params, is_render); @@ -602,8 +602,8 @@ private: // Now we need to figure the relationship between the texture and its overlaps // we do a topological test to ensure we can find some relationship. If it fails // inmediatly recycle the texture - for (auto surface : overlaps) { - auto topological_result = surface->MatchesTopology(params); + for (const auto& surface : overlaps) { + const auto topological_result = surface->MatchesTopology(params); if (topological_result != MatchTopologyResult::FullMatch) { return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, topological_result); @@ -620,7 +620,7 @@ private: if (current_surface->GetGpuAddr() == gpu_addr) { std::optional> view = TryReconstructSurface(overlaps, params, gpu_addr); - if (view.has_value()) { + if (view) { return *view; } } @@ -630,7 +630,7 @@ private: // Now we check if the candidate is a mipmap/layer of the overlap std::optional view = current_surface->EmplaceView(params, gpu_addr, candidate_size); - if (view.has_value()) { + if (view) { const bool is_mirage = !current_surface->MatchFormat(params.pixel_format); if (is_mirage) { // On a mirage view, we need to recreate the surface under this new view @@ -669,7 +669,7 @@ private: // using the overlaps. If a single overlap fails, this will fail. std::optional> view = TryReconstructSurface(overlaps, params, gpu_addr); - if (view.has_value()) { + if (view) { return *view; } } @@ -738,16 +738,16 @@ private: std::vector surfaces; while (start <= end) { std::vector& list = registry[start]; - for (auto& s : list) { - if (!s->IsPicked() && s->Overlaps(cache_addr, cache_addr_end)) { - s->MarkAsPicked(true); - surfaces.push_back(s); + for (auto& surface : list) { + if (!surface->IsPicked() && surface->Overlaps(cache_addr, cache_addr_end)) { + surface->MarkAsPicked(true); + surfaces.push_back(surface); } } start++; } - for (auto& s : surfaces) { - s->MarkAsPicked(false); + for (auto& surface : surfaces) { + surface->MarkAsPicked(false); } return surfaces; } From 5aeabd9a1777de5d0d4ab540edbda325c8fec64e Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 25 Jun 2019 19:26:59 -0400 Subject: [PATCH 105/113] gl_texture_cache: Correct asserts --- src/video_core/renderer_opengl/gl_resource_manager.cpp | 2 +- src/video_core/renderer_opengl/gl_texture_cache.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp index a5f83a8f7..5c96c1d46 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.cpp +++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp @@ -149,7 +149,7 @@ void OGLBuffer::Release() { } void OGLBuffer::MakeStreamCopy(std::size_t buffer_size) { - ASSERT_OR_EXECUTE((handle == 0 || buffer_size == 0), { return; }); + ASSERT_OR_EXECUTE((handle != 0 && buffer_size != 0), { return; }); glNamedBufferData(handle, buffer_size, nullptr, GL_STREAM_COPY); } diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index d539bf07c..672f26f37 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -590,7 +590,7 @@ void TextureCacheOpenGL::BufferCopy(Surface& src_surface, Surface& dst_surface) } GLuint TextureCacheOpenGL::FetchPBO(std::size_t buffer_size) { - ASSERT_OR_EXECUTE(buffer_size <= 0, { return 0; }); + ASSERT_OR_EXECUTE(buffer_size > 0, { return 0; }); const u32 l2 = Common::Log2Ceil64(static_cast(buffer_size)); OGLBuffer& cp = copy_pbo_cache[l2]; if (cp.handle == 0) { From 223ca8075399463e51d4afea1adb0c5b6fba8588 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 25 Jun 2019 19:35:08 -0400 Subject: [PATCH 106/113] texture_cache: Correct variable naming. --- src/video_core/texture_cache/texture_cache.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index fb6ca41ff..b5b0e91ef 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -225,9 +225,9 @@ public: } const CacheAddr page = cache_addr >> registry_page_bits; std::vector& list = registry[page]; - for (auto& s : list) { - if (s->GetCacheAddr() == cache_addr) { - return s; + for (auto& surface : list) { + if (surface->GetCacheAddr() == cache_addr) { + return surface; } } return nullptr; From 3f3c3ca5f96fd5742524703f20b531338fa2e5f7 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 29 Jun 2019 17:29:39 -0300 Subject: [PATCH 107/113] texture_cache: Address feedback --- .../renderer_opengl/gl_texture_cache.cpp | 5 +---- .../renderer_opengl/gl_texture_cache.h | 15 ------------- src/video_core/texture_cache/surface_base.h | 2 +- src/video_core/texture_cache/texture_cache.h | 21 ++++++++++--------- 4 files changed, 13 insertions(+), 30 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 672f26f37..97014a676 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -236,10 +236,7 @@ CachedSurface::CachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& param true); } -CachedSurface::~CachedSurface() { - views.clear(); - main_view = nullptr; -} +CachedSurface::~CachedSurface() = default; void CachedSurface::DownloadTexture(std::vector& staging_buffer) { MICROPROFILE_SCOPE(OpenGL_Texture_Download); diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 8da81dba3..d4c6e9a30 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -89,21 +89,6 @@ public: return surface.GetSurfaceParams(); } - u32 GetWidth() const { - const auto& owner_params = GetSurfaceParams(); - return owner_params.GetMipWidth(params.base_level); - } - - u32 GetHeight() const { - const auto& owner_params = GetSurfaceParams(); - return owner_params.GetMipHeight(params.base_level); - } - - u32 GetDepth() const { - const auto& owner_params = GetSurfaceParams(); - return owner_params.GetMipDepth(params.base_level); - } - void ApplySwizzle(Tegra::Texture::SwizzleSource x_source, Tegra::Texture::SwizzleSource y_source, Tegra::Texture::SwizzleSource z_source, diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index d632630ce..eaed6545d 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -294,8 +294,8 @@ protected: virtual TView CreateView(const ViewParams& view_key) = 0; - std::unordered_map views; TView main_view; + std::unordered_map views; private: TView GetView(const ViewParams& key) { diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index b5b0e91ef..9436a5ff2 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -79,10 +79,9 @@ public: if (surfaces.empty()) { return; } - std::sort(surfaces.begin(), surfaces.end(), - [](const TSurface& a, const TSurface& b) -> bool { - return a->GetModificationTick() < b->GetModificationTick(); - }); + std::sort(surfaces.begin(), surfaces.end(), [](const TSurface& a, const TSurface& b) { + return a->GetModificationTick() < b->GetModificationTick(); + }); for (const auto& surface : surfaces) { FlushSurface(surface); } @@ -181,13 +180,15 @@ public: } void MarkColorBufferInUse(std::size_t index) { - if (render_targets[index].target) - render_targets[index].target->MarkAsModified(true, Tick()); + if (auto& render_target = render_targets[index].target) { + render_target->MarkAsModified(true, Tick()); + } } void MarkDepthBufferInUse() { - if (depth_buffer.target) + if (depth_buffer.target) { depth_buffer.target->MarkAsModified(true, Tick()); + } } void SetEmptyDepthBuffer() { @@ -245,11 +246,11 @@ protected: } SetEmptyDepthBuffer(); staging_cache.SetSize(2); - auto make_siblings = ([this](PixelFormat a, PixelFormat b) { + const auto make_siblings = [this](PixelFormat a, PixelFormat b) { siblings_table[a] = b; siblings_table[b] = a; - }); - const u32 max_formats = static_cast(PixelFormat::Max); + }; + const auto max_formats = static_cast(PixelFormat::Max); siblings_table.reserve(max_formats); for (u32 i = 0; i < max_formats; i++) { siblings_table[static_cast(i)] = PixelFormat::Invalid; From dd9ace502bfd2239ceddad8c5c41baf0e10e2144 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 29 Jun 2019 18:54:13 -0300 Subject: [PATCH 108/113] texture_cache: Use std::array for siblings_table --- src/video_core/texture_cache/texture_cache.h | 23 +++++++++++--------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 9436a5ff2..9fcf87744 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -4,6 +4,8 @@ #pragma once +#include +#include #include #include #include @@ -244,20 +246,19 @@ protected: for (std::size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) { SetEmptyColorBuffer(i); } + SetEmptyDepthBuffer(); staging_cache.SetSize(2); + const auto make_siblings = [this](PixelFormat a, PixelFormat b) { - siblings_table[a] = b; - siblings_table[b] = a; + siblings_table[static_cast(a)] = b; + siblings_table[static_cast(b)] = a; }; - const auto max_formats = static_cast(PixelFormat::Max); - siblings_table.reserve(max_formats); - for (u32 i = 0; i < max_formats; i++) { - siblings_table[static_cast(i)] = PixelFormat::Invalid; - } + std::fill(siblings_table.begin(), siblings_table.end(), PixelFormat::Invalid); make_siblings(PixelFormat::Z16, PixelFormat::R16U); make_siblings(PixelFormat::Z32F, PixelFormat::R32F); make_siblings(PixelFormat::Z32FS8, PixelFormat::RG32F); + sampled_textures_stack.resize(64); } @@ -426,7 +427,8 @@ private: const auto& cr_params = current_surface->GetSurfaceParams(); TSurface new_surface; if (cr_params.pixel_format != params.pixel_format && !is_render && - siblings_table[cr_params.pixel_format] == params.pixel_format) { + siblings_table[static_cast(cr_params.pixel_format)] == + params.pixel_format) { SurfaceParams new_params = params; new_params.pixel_format = cr_params.pixel_format; new_params.component_type = cr_params.component_type; @@ -472,7 +474,8 @@ private: if (!is_mirage) { return match_check(); } - if (!is_render && siblings_table[current_surface->GetFormat()] == params.pixel_format) { + if (!is_render && siblings_table[static_cast(current_surface->GetFormat())] == + params.pixel_format) { return match_check(); } return RebuildSurface(current_surface, params, is_render); @@ -786,7 +789,7 @@ private: // The siblings table is for formats that can inter exchange with one another // without causing issues. This is only valid when a conflict occurs on a non // rendering use. - std::unordered_map siblings_table; + std::array(PixelFormat::Max)> siblings_table; // The internal Cache is different for the Texture Cache. It's based on buckets // of 1MB. This fits better for the purpose of this cache as textures are normaly From f6f1a8f26a302dc33df635625c490f0d65880059 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 29 Jun 2019 19:52:37 -0300 Subject: [PATCH 109/113] texture_cache: Style changes --- src/video_core/surface.h | 18 +++++++++--------- src/video_core/texture_cache/surface_params.h | 9 +++------ src/video_core/texture_cache/texture_cache.h | 3 +-- 3 files changed, 13 insertions(+), 17 deletions(-) diff --git a/src/video_core/surface.h b/src/video_core/surface.h index bfdbc3b81..83f31c12c 100644 --- a/src/video_core/surface.h +++ b/src/video_core/surface.h @@ -439,11 +439,11 @@ static constexpr u32 GetBytesPerPixel(PixelFormat pixel_format) { return GetFormatBpp(pixel_format) / CHAR_BIT; } -enum class SurfaceCompression : u8 { - None = 0, - Compressed = 1, - Converted = 2, - Rearranged = 3, +enum class SurfaceCompression { + None, // Not compressed + Compressed, // Texture is compressed + Converted, // Texture is converted before upload or after download + Rearranged, // Texture is swizzled before upload or after download }; constexpr std::array compression_type_table = {{ @@ -513,11 +513,11 @@ constexpr std::array compression_type_table SurfaceCompression::None, // Z32FS8 }}; -static constexpr SurfaceCompression GetFormatCompressionType(PixelFormat format) { - if (format == PixelFormat::Invalid) +constexpr SurfaceCompression GetFormatCompressionType(PixelFormat format) { + if (format == PixelFormat::Invalid) { return SurfaceCompression::None; - - ASSERT(static_cast(format) < compression_type_table.size()); + } + DEBUG_ASSERT(static_cast(format) < compression_type_table.size()); return compression_type_table[static_cast(format)]; } diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h index 4dfb882f0..358d6757c 100644 --- a/src/video_core/texture_cache/surface_params.h +++ b/src/video_core/texture_cache/surface_params.h @@ -95,7 +95,7 @@ public: /// Returns the block depth of a given mipmap level. u32 GetMipBlockDepth(u32 level) const; - /// returns the best possible row/pitch alignment for the surface. + /// Returns the best possible row/pitch alignment for the surface. u32 GetRowAlignment(u32 level) const { const u32 bpp = GetCompressionType() == SurfaceCompression::Converted ? 4 : GetBytesPerPixel(); @@ -109,7 +109,7 @@ public: std::size_t GetHostMipmapLevelOffset(u32 level) const; /// Returns the offset in bytes in host memory (linear) of a given mipmap level - // for a texture that is converted in host gpu. + /// for a texture that is converted in host gpu. std::size_t GetConvertedMipmapOffset(u32 level) const; /// Returns the size in bytes in guest memory of a given mipmap level. @@ -176,10 +176,7 @@ public: pixel_format < VideoCore::Surface::PixelFormat::MaxDepthStencilFormat; } - /// Returns how the compression should be handled for this texture. Values - /// are: None(no compression), Compressed(texture is compressed), - /// Converted(texture is converted before upload/ after download), - /// Rearranged(texture is swizzled before upload/after download). + /// Returns how the compression should be handled for this texture. SurfaceCompression GetCompressionType() const { return VideoCore::Surface::GetFormatCompressionType(pixel_format); } diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 9fcf87744..3df3e17dd 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -571,8 +571,7 @@ private: // Step 1 // Check Level 1 Cache for a fast structural match. If candidate surface // matches at certain level we are pretty much done. - auto iter = l1_cache.find(cache_addr); - if (iter != l1_cache.end()) { + if (const auto iter = l1_cache.find(cache_addr); iter != l1_cache.end()) { TSurface& current_surface = iter->second; const auto topological_result = current_surface->MatchesTopology(params); if (topological_result != MatchTopologyResult::FullMatch) { From 8eae66907e043e6e26d78cfc4b5cde7ea93a4f77 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 29 Jun 2019 20:10:31 -0300 Subject: [PATCH 110/113] texture_cache: Use std::vector reservation for sampled_textures --- src/video_core/texture_cache/texture_cache.h | 27 ++++++++------------ 1 file changed, 10 insertions(+), 17 deletions(-) diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 3df3e17dd..8edae3d97 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -97,25 +97,19 @@ public: return {}; } const auto params{SurfaceParams::CreateForTexture(system, config, entry)}; - auto pair = GetSurface(gpu_addr, params, true, false); + const auto [surface, view] = GetSurface(gpu_addr, params, true, false); if (guard_samplers) { - if (sampled_textures_stack_pointer == sampled_textures_stack.size()) { - sampled_textures_stack.resize(sampled_textures_stack.size() * 2); - } - sampled_textures_stack[sampled_textures_stack_pointer] = pair.first; - sampled_textures_stack_pointer++; + sampled_textures.push_back(surface); } - return pair.second; + return view; } bool TextureBarrier() { - bool must_do = false; - for (u32 i = 0; i < sampled_textures_stack_pointer; i++) { - must_do |= sampled_textures_stack[i]->IsRenderTarget(); - sampled_textures_stack[i] = nullptr; - } - sampled_textures_stack_pointer = 0; - return must_do; + const bool any_rt = + std::any_of(sampled_textures.begin(), sampled_textures.end(), + [](const auto& surface) { return surface->IsRenderTarget(); }); + sampled_textures.clear(); + return any_rt; } TView GetDepthBufferSurface(bool preserve_contents) { @@ -259,7 +253,7 @@ protected: make_siblings(PixelFormat::Z32F, PixelFormat::R32F); make_siblings(PixelFormat::Z32FS8, PixelFormat::RG32F); - sampled_textures_stack.resize(64); + sampled_textures.reserve(64); } ~TextureCache() = default; @@ -809,8 +803,7 @@ private: render_targets; FramebufferTargetInfo depth_buffer; - std::vector sampled_textures_stack{}; - u32 sampled_textures_stack_pointer{}; + std::vector sampled_textures; StagingCache staging_cache; std::recursive_mutex mutex; From 6e1db6b7038329a9716763c8bdf14cc5b578fec1 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 29 Jun 2019 20:47:46 -0300 Subject: [PATCH 111/113] texture_cache: Pack sibling queries inside a method --- src/video_core/texture_cache/texture_cache.h | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 8edae3d97..c9e72531a 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -421,8 +421,7 @@ private: const auto& cr_params = current_surface->GetSurfaceParams(); TSurface new_surface; if (cr_params.pixel_format != params.pixel_format && !is_render && - siblings_table[static_cast(cr_params.pixel_format)] == - params.pixel_format) { + GetSiblingFormat(cr_params.pixel_format) == params.pixel_format) { SurfaceParams new_params = params; new_params.pixel_format = cr_params.pixel_format; new_params.component_type = cr_params.component_type; @@ -459,17 +458,16 @@ private: const SurfaceParams& params, bool is_render) { const bool is_mirage = !current_surface->MatchFormat(params.pixel_format); const bool matches_target = current_surface->MatchTarget(params.target); - const auto match_check = ([&]() -> std::pair { + const auto match_check = [&]() -> std::pair { if (matches_target) { return {current_surface, current_surface->GetMainView()}; } return {current_surface, current_surface->EmplaceOverview(params)}; - }); + }; if (!is_mirage) { return match_check(); } - if (!is_render && siblings_table[static_cast(current_surface->GetFormat())] == - params.pixel_format) { + if (!is_render && GetSiblingFormat(current_surface->GetFormat()) == params.pixel_format) { return match_check(); } return RebuildSurface(current_surface, params, is_render); @@ -766,6 +764,10 @@ private: return {}; } + constexpr PixelFormat GetSiblingFormat(PixelFormat format) const { + return siblings_table[static_cast(format)]; + } + struct FramebufferTargetInfo { TSurface target; TView view; From 30b176f92b67ec7a9b1ce08cf89d50abd125f8a8 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Thu, 4 Jul 2019 19:38:19 -0400 Subject: [PATCH 112/113] texture_cache: Correct Texture Buffer Uploading --- .../renderer_opengl/gl_texture_cache.cpp | 17 +++++++++++++++-- .../renderer_opengl/gl_texture_cache.h | 1 + src/video_core/texture_cache/surface_params.cpp | 2 ++ 3 files changed, 18 insertions(+), 2 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 97014a676..780526b66 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -141,6 +141,8 @@ const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType compon GLenum GetTextureTarget(const SurfaceTarget& target) { switch (target) { + case SurfaceTarget::TextureBuffer: + return GL_TEXTURE_BUFFER; case SurfaceTarget::Texture1D: return GL_TEXTURE_1D; case SurfaceTarget::Texture2D: @@ -191,7 +193,8 @@ void ApplyTextureDefaults(const SurfaceParams& params, GLuint texture) { } } -OGLTexture CreateTexture(const SurfaceParams& params, GLenum target, GLenum internal_format) { +OGLTexture CreateTexture(const SurfaceParams& params, GLenum target, GLenum internal_format, + OGLBuffer& texture_buffer) { OGLTexture texture; texture.Create(target); @@ -199,6 +202,11 @@ OGLTexture CreateTexture(const SurfaceParams& params, GLenum target, GLenum inte case SurfaceTarget::Texture1D: glTextureStorage1D(texture.handle, params.emulated_levels, internal_format, params.width); break; + case SurfaceTarget::TextureBuffer: + texture_buffer.Create(); + glNamedBufferStorage(texture_buffer.handle, params.width * params.GetBytesPerPixel(), + nullptr, GL_DYNAMIC_STORAGE_BIT); + glTextureBuffer(texture.handle, internal_format, texture_buffer.handle); case SurfaceTarget::Texture2D: case SurfaceTarget::TextureCubemap: glTextureStorage2D(texture.handle, params.emulated_levels, internal_format, params.width, @@ -229,7 +237,7 @@ CachedSurface::CachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& param type = tuple.type; is_compressed = tuple.compressed; target = GetTextureTarget(params.target); - texture = CreateTexture(params, target, internal_format); + texture = CreateTexture(params, target, internal_format, texture_buffer); DecorateSurfaceName(); main_view = CreateViewInner( ViewParams(params.target, 0, params.is_layered ? params.depth : 1, 0, params.num_levels), @@ -316,6 +324,11 @@ void CachedSurface::UploadTextureMipmap(u32 level, std::vector& staging_buff glTextureSubImage1D(texture.handle, level, 0, params.GetMipWidth(level), format, type, buffer); break; + case SurfaceTarget::TextureBuffer: + ASSERT(level == 0); + glNamedBufferSubData(texture_buffer.handle, 0, + params.GetMipWidth(level) * params.GetBytesPerPixel(), buffer); + break; case SurfaceTarget::Texture1DArray: case SurfaceTarget::Texture2D: glTextureSubImage2D(texture.handle, level, 0, 0, params.GetMipWidth(level), diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index d4c6e9a30..e7cc66fbb 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -67,6 +67,7 @@ private: u32 view_count{}; OGLTexture texture; + OGLBuffer texture_buffer; }; class CachedSurfaceView final : public VideoCommon::ViewBase { diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index 340ed2ca0..9c56e2b4f 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp @@ -310,6 +310,8 @@ std::string SurfaceParams::TargetName() const { switch (target) { case SurfaceTarget::Texture1D: return "1D"; + case SurfaceTarget::TextureBuffer: + return "TexBuffer"; case SurfaceTarget::Texture2D: return "2D"; case SurfaceTarget::Texture3D: From 3b9d89839dc62e9e63a3cbe9636cf85276babdfb Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Thu, 4 Jul 2019 21:10:59 -0400 Subject: [PATCH 113/113] texture_cache: Address Feedback --- src/common/CMakeLists.txt | 1 + src/common/binary_find.h | 21 +++++++++++++++++++ src/common/common_funcs.h | 10 --------- .../renderer_opengl/gl_shader_cache.cpp | 6 ++++-- .../renderer_opengl/gl_texture_cache.cpp | 6 +++--- .../renderer_opengl/gl_texture_cache.h | 9 ++++---- src/video_core/texture_cache/surface_base.h | 4 ++-- 7 files changed, 35 insertions(+), 22 deletions(-) create mode 100644 src/common/binary_find.h diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 8ae05137b..2554add28 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -75,6 +75,7 @@ add_library(common STATIC assert.h detached_tasks.cpp detached_tasks.h + binary_find.h bit_field.h bit_util.h cityhash.cpp diff --git a/src/common/binary_find.h b/src/common/binary_find.h new file mode 100644 index 000000000..5cc523bf9 --- /dev/null +++ b/src/common/binary_find.h @@ -0,0 +1,21 @@ +// Copyright 2019 yuzu emulator team +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +namespace Common { + +template > +ForwardIt BinaryFind(ForwardIt first, ForwardIt last, const T& value, Compare comp = {}) { + // Note: BOTH type T and the type after ForwardIt is dereferenced + // must be implicitly convertible to BOTH Type1 and Type2, used in Compare. + // This is stricter than lower_bound requirement (see above) + + first = std::lower_bound(first, last, value, comp); + return first != last && !comp(value, *first) ? first : last; +} + +} // namespace Common diff --git a/src/common/common_funcs.h b/src/common/common_funcs.h index 00a5698f3..04ecac959 100644 --- a/src/common/common_funcs.h +++ b/src/common/common_funcs.h @@ -61,14 +61,4 @@ constexpr u32 MakeMagic(char a, char b, char c, char d) { return a | b << 8 | c << 16 | d << 24; } -template > -ForwardIt BinaryFind(ForwardIt first, ForwardIt last, const T& value, Compare comp = {}) { - // Note: BOTH type T and the type after ForwardIt is dereferenced - // must be implicitly convertible to BOTH Type1 and Type2, used in Compare. - // This is stricter than lower_bound requirement (see above) - - first = std::lower_bound(first, last, value, comp); - return first != last && !comp(value, *first) ? first : last; -} - } // namespace Common diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 718703091..1bd182d98 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -103,14 +103,16 @@ constexpr std::tuple GetPrimitiveDescription(GLen /// Calculates the size of a program stream std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) { constexpr std::size_t start_offset = 10; - constexpr u64 key = 0xE2400FFFFF07000FULL; + // This is the encoded version of BRA that jumps to itself. All Nvidia + // shaders end with one. + constexpr u64 self_jumping_branch = 0xE2400FFFFF07000FULL; constexpr u64 mask = 0xFFFFFFFFFF7FFFFFULL; std::size_t offset = start_offset; std::size_t size = start_offset * sizeof(u64); while (offset < program.size()) { const u64 instruction = program[offset]; if (!IsSchedInstruction(offset, start_offset)) { - if ((instruction & mask) == key) { + if ((instruction & mask) == self_jumping_branch) { // End on Maxwell's "nop" instruction break; } diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 780526b66..08ae1a429 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -267,7 +267,7 @@ void CachedSurface::DownloadTexture(std::vector& staging_buffer) { } } -void CachedSurface::UploadTexture(std::vector& staging_buffer) { +void CachedSurface::UploadTexture(const std::vector& staging_buffer) { MICROPROFILE_SCOPE(OpenGL_Texture_Upload); SCOPE_EXIT({ glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); }); for (u32 level = 0; level < params.emulated_levels; ++level) { @@ -275,7 +275,7 @@ void CachedSurface::UploadTexture(std::vector& staging_buffer) { } } -void CachedSurface::UploadTextureMipmap(u32 level, std::vector& staging_buffer) { +void CachedSurface::UploadTextureMipmap(u32 level, const std::vector& staging_buffer) { glPixelStorei(GL_UNPACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level))); glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast(params.GetMipWidth(level))); @@ -284,7 +284,7 @@ void CachedSurface::UploadTextureMipmap(u32 level, std::vector& staging_buff const std::size_t mip_offset = compression_type == SurfaceCompression::Converted ? params.GetConvertedMipmapOffset(level) : params.GetHostMipmapLevelOffset(level); - u8* buffer{staging_buffer.data() + mip_offset}; + const u8* buffer{staging_buffer.data() + mip_offset}; if (is_compressed) { const auto image_size{static_cast(params.GetHostMipmapSize(level))}; switch (params.target) { diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index e7cc66fbb..ff6ab6988 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -39,7 +39,7 @@ public: explicit CachedSurface(GPUVAddr gpu_addr, const SurfaceParams& params); ~CachedSurface(); - void UploadTexture(std::vector& staging_buffer) override; + void UploadTexture(const std::vector& staging_buffer) override; void DownloadTexture(std::vector& staging_buffer) override; GLenum GetTarget() const { @@ -57,7 +57,7 @@ protected: View CreateViewInner(const ViewParams& view_key, bool is_proxy); private: - void UploadTextureMipmap(u32 level, std::vector& staging_buffer); + void UploadTextureMipmap(u32 level, const std::vector& staging_buffer); GLenum internal_format{}; GLenum format{}; @@ -72,14 +72,13 @@ private: class CachedSurfaceView final : public VideoCommon::ViewBase { public: - explicit CachedSurfaceView(CachedSurface& surface, const ViewParams& params, - const bool is_proxy); + explicit CachedSurfaceView(CachedSurface& surface, const ViewParams& params, bool is_proxy); ~CachedSurfaceView(); /// Attaches this texture view to the current bound GL_DRAW_FRAMEBUFFER void Attach(GLenum attachment, GLenum target) const; - GLuint GetTexture() { + GLuint GetTexture() const { if (is_proxy) { return surface.GetTexture(); } diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index eaed6545d..8ba386a8a 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -9,7 +9,7 @@ #include #include "common/assert.h" -#include "common/common_funcs.h" +#include "common/binary_find.h" #include "common/common_types.h" #include "video_core/gpu.h" #include "video_core/morton.h" @@ -191,7 +191,7 @@ private: template class SurfaceBase : public SurfaceBaseImpl { public: - virtual void UploadTexture(std::vector& staging_buffer) = 0; + virtual void UploadTexture(const std::vector& staging_buffer) = 0; virtual void DownloadTexture(std::vector& staging_buffer) = 0;