Improve state management by splitting some of the states id separated function to avoid a full apply overhead

This commit is contained in:
Rodolfo Bogado 2018-11-07 22:27:47 -03:00
parent 4a6eff3b7b
commit 4e6c64bf8d
6 changed files with 40 additions and 39 deletions

View file

@ -140,7 +140,7 @@ void RasterizerOpenGL::SetupVertexFormat() {
if (is_cache_miss) { if (is_cache_miss) {
VAO.Create(); VAO.Create();
state.draw.vertex_array = VAO.handle; state.draw.vertex_array = VAO.handle;
state.Apply(); state.ApplyVertexBufferState();
// The index buffer binding is stored within the VAO. Stupid OpenGL, but easy to work // The index buffer binding is stored within the VAO. Stupid OpenGL, but easy to work
// around. // around.
@ -182,7 +182,7 @@ void RasterizerOpenGL::SetupVertexFormat() {
} }
} }
state.draw.vertex_array = VAO.handle; state.draw.vertex_array = VAO.handle;
state.Apply(); state.ApplyVertexBufferState();
} }
void RasterizerOpenGL::SetupVertexBuffer() { void RasterizerOpenGL::SetupVertexBuffer() {
@ -342,8 +342,6 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
index++; index++;
} }
} }
state.Apply();
} }
std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const { std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
@ -412,8 +410,8 @@ void RasterizerOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {
cached_pages.add({pages_interval, delta}); cached_pages.add({pages_interval, delta});
} }
void RasterizerOpenGL::ConfigureFramebuffers(bool using_color_fb, bool using_depth_fb, void RasterizerOpenGL::ConfigureFramebuffers(OpenGLState& current_state, bool using_color_fb,
bool preserve_contents, bool using_depth_fb, bool preserve_contents,
std::optional<std::size_t> single_color_target) { std::optional<std::size_t> single_color_target) {
MICROPROFILE_SCOPE(OpenGL_Framebuffer); MICROPROFILE_SCOPE(OpenGL_Framebuffer);
const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
@ -429,9 +427,9 @@ void RasterizerOpenGL::ConfigureFramebuffers(bool using_color_fb, bool using_dep
ASSERT_MSG(regs.rt_separate_frag_data == 0, "Unimplemented"); ASSERT_MSG(regs.rt_separate_frag_data == 0, "Unimplemented");
// Bind the framebuffer surfaces // Bind the framebuffer surfaces
state.draw.draw_framebuffer = framebuffer.handle; current_state.draw.draw_framebuffer = framebuffer.handle;
state.Apply(); current_state.ApplyFramebufferState();
state.framebuffer_srgb.enabled = regs.framebuffer_srgb != 0; current_state.framebuffer_srgb.enabled = regs.framebuffer_srgb != 0;
if (using_color_fb) { if (using_color_fb) {
if (single_color_target) { if (single_color_target) {
@ -509,10 +507,7 @@ void RasterizerOpenGL::ConfigureFramebuffers(bool using_color_fb, bool using_dep
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
0); 0);
} }
SyncViewport(current_state);
SyncViewport();
state.Apply();
} }
void RasterizerOpenGL::Clear() { void RasterizerOpenGL::Clear() {
@ -525,22 +520,23 @@ void RasterizerOpenGL::Clear() {
bool use_stencil{}; bool use_stencil{};
OpenGLState clear_state; OpenGLState clear_state;
clear_state.draw.draw_framebuffer = framebuffer.handle;
clear_state.color_mask[0].red_enabled = regs.clear_buffers.R ? GL_TRUE : GL_FALSE;
clear_state.color_mask[0].green_enabled = regs.clear_buffers.G ? GL_TRUE : GL_FALSE;
clear_state.color_mask[0].blue_enabled = regs.clear_buffers.B ? GL_TRUE : GL_FALSE;
clear_state.color_mask[0].alpha_enabled = regs.clear_buffers.A ? GL_TRUE : GL_FALSE;
if (regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B || if (regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B ||
regs.clear_buffers.A) { regs.clear_buffers.A) {
use_color = true; use_color = true;
} }
if (use_color) {
clear_state.color_mask[0].red_enabled = regs.clear_buffers.R ? GL_TRUE : GL_FALSE;
clear_state.color_mask[0].green_enabled = regs.clear_buffers.G ? GL_TRUE : GL_FALSE;
clear_state.color_mask[0].blue_enabled = regs.clear_buffers.B ? GL_TRUE : GL_FALSE;
clear_state.color_mask[0].alpha_enabled = regs.clear_buffers.A ? GL_TRUE : GL_FALSE;
}
if (regs.clear_buffers.Z) { if (regs.clear_buffers.Z) {
ASSERT_MSG(regs.zeta_enable != 0, "Tried to clear Z but buffer is not enabled!"); ASSERT_MSG(regs.zeta_enable != 0, "Tried to clear Z but buffer is not enabled!");
use_depth = true; use_depth = true;
// Always enable the depth write when clearing the depth buffer. The depth write mask is // Always enable the depth write when clearing the depth buffer. The depth write mask is
// ignored when clearing the buffer in the Switch, but OpenGL obeys it so we set it to true. // ignored when clearing the buffer in the Switch, but OpenGL obeys it so we set it to
// true.
clear_state.depth.test_enabled = true; clear_state.depth.test_enabled = true;
clear_state.depth.test_func = GL_ALWAYS; clear_state.depth.test_func = GL_ALWAYS;
} }
@ -557,11 +553,8 @@ void RasterizerOpenGL::Clear() {
ScopeAcquireGLContext acquire_context{emu_window}; ScopeAcquireGLContext acquire_context{emu_window};
ConfigureFramebuffers(use_color, use_depth || use_stencil, false, ConfigureFramebuffers(clear_state, use_color, use_depth || use_stencil, false,
regs.clear_buffers.RT.Value()); regs.clear_buffers.RT.Value());
// Copy the sRGB setting to the clear state to avoid problem with
// specific driver implementations
clear_state.framebuffer_srgb.enabled = state.framebuffer_srgb.enabled;
clear_state.Apply(); clear_state.Apply();
if (use_color) { if (use_color) {
@ -587,7 +580,7 @@ void RasterizerOpenGL::DrawArrays() {
ScopeAcquireGLContext acquire_context{emu_window}; ScopeAcquireGLContext acquire_context{emu_window};
ConfigureFramebuffers(); ConfigureFramebuffers(state);
SyncColorMask(); SyncColorMask();
SyncDepthTestState(); SyncDepthTestState();
SyncStencilTestState(); SyncStencilTestState();
@ -608,7 +601,7 @@ void RasterizerOpenGL::DrawArrays() {
const bool is_indexed = accelerate_draw == AccelDraw::Indexed; const bool is_indexed = accelerate_draw == AccelDraw::Indexed;
state.draw.vertex_buffer = buffer_cache.GetHandle(); state.draw.vertex_buffer = buffer_cache.GetHandle();
state.Apply(); state.ApplyVertexBufferState();
std::size_t buffer_size = CalculateVertexArraysSize(); std::size_t buffer_size = CalculateVertexArraysSize();
@ -923,11 +916,11 @@ u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, Shader& shader,
return current_unit + static_cast<u32>(entries.size()); return current_unit + static_cast<u32>(entries.size());
} }
void RasterizerOpenGL::SyncViewport() { void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) {
const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
for (size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) { for (size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) {
const MathUtil::Rectangle<s32> viewport_rect{regs.viewport_transform[i].GetRect()}; const MathUtil::Rectangle<s32> viewport_rect{regs.viewport_transform[i].GetRect()};
auto& viewport = state.viewports[i]; auto& viewport = current_state.viewports[i];
viewport.x = viewport_rect.left; viewport.x = viewport_rect.left;
viewport.y = viewport_rect.bottom; viewport.y = viewport_rect.bottom;
viewport.width = static_cast<GLsizei>(viewport_rect.GetWidth()); viewport.width = static_cast<GLsizei>(viewport_rect.GetWidth());
@ -1131,9 +1124,8 @@ void RasterizerOpenGL::CheckAlphaTests() {
const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
if (regs.alpha_test_enabled != 0 && regs.rt_control.count > 1) { if (regs.alpha_test_enabled != 0 && regs.rt_control.count > 1) {
LOG_CRITICAL( LOG_CRITICAL(Render_OpenGL, "Alpha Testing is enabled with Multiple Render Targets, "
Render_OpenGL, "this behavior is undefined.");
"Alpha Testing is enabled with Multiple Render Targets, this behavior is undefined.");
UNREACHABLE(); UNREACHABLE();
} }
} }

View file

@ -109,8 +109,8 @@ private:
* @param preserve_contents If true, tries to preserve data from a previously used framebuffer. * @param preserve_contents If true, tries to preserve data from a previously used framebuffer.
* @param single_color_target Specifies if a single color buffer target should be used. * @param single_color_target Specifies if a single color buffer target should be used.
*/ */
void ConfigureFramebuffers(bool use_color_fb = true, bool using_depth_fb = true, void ConfigureFramebuffers(OpenGLState& current_state, bool use_color_fb = true,
bool preserve_contents = true, bool using_depth_fb = true, bool preserve_contents = true,
std::optional<std::size_t> single_color_target = {}); std::optional<std::size_t> single_color_target = {});
/* /*
@ -134,7 +134,7 @@ private:
GLenum primitive_mode, u32 current_unit); GLenum primitive_mode, u32 current_unit);
/// Syncs the viewport and depth range to match the guest state /// Syncs the viewport and depth range to match the guest state
void SyncViewport(); void SyncViewport(OpenGLState& current_state);
/// Syncs the clip enabled status to match the guest state /// Syncs the clip enabled status to match the guest state
void SyncClipEnabled(); void SyncClipEnabled();

View file

@ -580,7 +580,7 @@ static bool BlitSurface(const Surface& src_surface, const Surface& dst_surface,
state.draw.draw_framebuffer = draw_fb_handle; state.draw.draw_framebuffer = draw_fb_handle;
// Set sRGB enabled if the destination surfaces need it // Set sRGB enabled if the destination surfaces need it
state.framebuffer_srgb.enabled = dst_params.srgb_conversion; state.framebuffer_srgb.enabled = dst_params.srgb_conversion;
state.Apply(); state.ApplyFramebufferState();
u32 buffers{}; u32 buffers{};

View file

@ -427,7 +427,7 @@ void OpenGLState::ApplySamplers() const {
} }
} }
void OpenGLState::Apply() const { void OpenGLState::ApplyFramebufferState() const {
// Framebuffer // Framebuffer
if (draw.read_framebuffer != cur_state.draw.read_framebuffer) { if (draw.read_framebuffer != cur_state.draw.read_framebuffer) {
glBindFramebuffer(GL_READ_FRAMEBUFFER, draw.read_framebuffer); glBindFramebuffer(GL_READ_FRAMEBUFFER, draw.read_framebuffer);
@ -435,7 +435,9 @@ void OpenGLState::Apply() const {
if (draw.draw_framebuffer != cur_state.draw.draw_framebuffer) { if (draw.draw_framebuffer != cur_state.draw.draw_framebuffer) {
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, draw.draw_framebuffer); glBindFramebuffer(GL_DRAW_FRAMEBUFFER, draw.draw_framebuffer);
} }
}
void OpenGLState::ApplyVertexBufferState() const {
// Vertex array // Vertex array
if (draw.vertex_array != cur_state.draw.vertex_array) { if (draw.vertex_array != cur_state.draw.vertex_array) {
glBindVertexArray(draw.vertex_array); glBindVertexArray(draw.vertex_array);
@ -445,7 +447,11 @@ void OpenGLState::Apply() const {
if (draw.vertex_buffer != cur_state.draw.vertex_buffer) { if (draw.vertex_buffer != cur_state.draw.vertex_buffer) {
glBindBuffer(GL_ARRAY_BUFFER, draw.vertex_buffer); glBindBuffer(GL_ARRAY_BUFFER, draw.vertex_buffer);
} }
}
void OpenGLState::Apply() const {
ApplyFramebufferState();
ApplyVertexBufferState();
// Uniform buffer // Uniform buffer
if (draw.uniform_buffer != cur_state.draw.uniform_buffer) { if (draw.uniform_buffer != cur_state.draw.uniform_buffer) {
glBindBuffer(GL_UNIFORM_BUFFER, draw.uniform_buffer); glBindBuffer(GL_UNIFORM_BUFFER, draw.uniform_buffer);

View file

@ -181,6 +181,10 @@ public:
} }
/// Apply this state as the current OpenGL state /// Apply this state as the current OpenGL state
void Apply() const; void Apply() const;
/// Apply only the state afecting the framebuffer
void ApplyFramebufferState() const;
/// Apply only the state afecting the vertex buffer
void ApplyVertexBufferState() const;
/// Set the initial OpenGL state /// Set the initial OpenGL state
static void ApplyDefaultState(); static void ApplyDefaultState();
/// Resets any references to the given resource /// Resets any references to the given resource

View file

@ -159,8 +159,7 @@ inline GLenum TextureFilterMode(Tegra::Texture::TextureFilter filter_mode,
} }
} }
} }
LOG_ERROR(Render_OpenGL, "Unimplemented texture filter mode={}", LOG_ERROR(Render_OpenGL, "Unimplemented texture filter mode={}", static_cast<u32>(filter_mode));
static_cast<u32>(filter_mode));
return GL_LINEAR; return GL_LINEAR;
} }
@ -206,7 +205,7 @@ inline GLenum DepthCompareFunc(Tegra::Texture::DepthCompareFunc func) {
return GL_ALWAYS; return GL_ALWAYS;
} }
LOG_ERROR(Render_OpenGL, "Unimplemented texture depth compare function ={}", LOG_ERROR(Render_OpenGL, "Unimplemented texture depth compare function ={}",
static_cast<u32>(func)); static_cast<u32>(func));
return GL_GREATER; return GL_GREATER;
} }