glsl: Address rest of feedback
This commit is contained in:
parent
c5dfa0b630
commit
d36f667bc0
11 changed files with 86 additions and 38 deletions
|
@ -148,6 +148,16 @@ std::string_view ImageFormatString(ImageFormat format) {
|
|||
}
|
||||
}
|
||||
|
||||
std::string_view ImageAccessQualifier(bool is_written, bool is_read) {
|
||||
if (is_written && !is_read) {
|
||||
return "writeonly ";
|
||||
}
|
||||
if (is_read && !is_written) {
|
||||
return "readonly ";
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
std::string_view GetTessMode(TessPrimitive primitive) {
|
||||
switch (primitive) {
|
||||
case TessPrimitive::Triangles:
|
||||
|
@ -262,7 +272,9 @@ void SetupLegacyInPerFragment(EmitContext& ctx, std::string& header) {
|
|||
EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_,
|
||||
const RuntimeInfo& runtime_info_)
|
||||
: info{program.info}, profile{profile_}, runtime_info{runtime_info_} {
|
||||
if (profile.need_fastmath_off) {
|
||||
header += "#pragma optionNV(fastmath off)\n";
|
||||
}
|
||||
SetupExtensions();
|
||||
stage = program.stage;
|
||||
switch (program.stage) {
|
||||
|
@ -335,7 +347,7 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile
|
|||
}
|
||||
for (size_t index = 0; index < info.stores_generics.size(); ++index) {
|
||||
// TODO: Properly resolve attribute issues
|
||||
if (info.stores_generics[index] || stage == Stage::VertexA || stage == Stage::VertexB) {
|
||||
if (info.stores_generics[index] || StageInitializesVaryings()) {
|
||||
DefineGenericOutput(index, program.invocations);
|
||||
}
|
||||
}
|
||||
|
@ -347,6 +359,17 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile
|
|||
DefineConstants();
|
||||
}
|
||||
|
||||
bool EmitContext::StageInitializesVaryings() const noexcept {
|
||||
switch (stage) {
|
||||
case Stage::VertexA:
|
||||
case Stage::VertexB:
|
||||
case Stage::Geometry:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
void EmitContext::SetupExtensions() {
|
||||
if (info.uses_shadow_lod && profile.support_gl_texture_shadow_lod) {
|
||||
header += "#extension GL_EXT_texture_shadow_lod : enable\n";
|
||||
|
@ -361,7 +384,7 @@ void EmitContext::SetupExtensions() {
|
|||
header += "#extension GL_NV_shader_atomic_float : enable\n";
|
||||
}
|
||||
if (info.uses_atomic_f16x2_add || info.uses_atomic_f16x2_min || info.uses_atomic_f16x2_max) {
|
||||
header += "#extension NV_shader_atomic_fp16_vector : enable\n";
|
||||
header += "#extension GL_NV_shader_atomic_fp16_vector : enable\n";
|
||||
}
|
||||
if (info.uses_fp16) {
|
||||
if (profile.support_gl_nv_gpu_shader_5) {
|
||||
|
@ -392,7 +415,7 @@ void EmitContext::SetupExtensions() {
|
|||
if (info.stores_viewport_mask && profile.support_viewport_mask) {
|
||||
header += "#extension GL_NV_viewport_array2 : enable\n";
|
||||
}
|
||||
if (info.uses_typeless_image_reads || info.uses_typeless_image_writes) {
|
||||
if (info.uses_typeless_image_reads) {
|
||||
header += "#extension GL_EXT_shader_image_load_formatted : enable\n";
|
||||
}
|
||||
if (info.uses_derivatives && profile.support_gl_derivative_control) {
|
||||
|
@ -593,9 +616,9 @@ std::string EmitContext::DefineGlobalMemoryFunctions() {
|
|||
"return uvec4({0}[uint(addr-{1})>>2],{0}[uint(addr-{1}+4)>>2],{0}["
|
||||
"uint(addr-{1}+8)>>2],{0}[uint(addr-{1}+12)>>2]);}}");
|
||||
}
|
||||
write_func += "}";
|
||||
write_func_64 += "}";
|
||||
write_func_128 += "}";
|
||||
write_func += '}';
|
||||
write_func_64 += '}';
|
||||
write_func_128 += '}';
|
||||
load_func += "return 0u;}";
|
||||
load_func_64 += "return uvec2(0);}";
|
||||
load_func_128 += "return uvec4(0);}";
|
||||
|
@ -607,9 +630,10 @@ void EmitContext::SetupImages(Bindings& bindings) {
|
|||
for (const auto& desc : info.image_buffer_descriptors) {
|
||||
image_buffers.push_back({bindings.image, desc.count});
|
||||
const auto format{ImageFormatString(desc.format)};
|
||||
const auto qualifier{ImageAccessQualifier(desc.is_written, desc.is_read)};
|
||||
const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""};
|
||||
header += fmt::format("layout(binding={}{}) uniform uimageBuffer img{}{};", bindings.image,
|
||||
format, bindings.image, array_decorator);
|
||||
header += fmt::format("layout(binding={}{}) uniform {}uimageBuffer img{}{};",
|
||||
bindings.image, format, qualifier, bindings.image, array_decorator);
|
||||
bindings.image += desc.count;
|
||||
}
|
||||
images.reserve(info.image_descriptors.size());
|
||||
|
@ -617,7 +641,7 @@ void EmitContext::SetupImages(Bindings& bindings) {
|
|||
images.push_back({bindings.image, desc.count});
|
||||
const auto format{ImageFormatString(desc.format)};
|
||||
const auto image_type{ImageType(desc.type)};
|
||||
const auto qualifier{desc.is_written ? "" : "readonly "};
|
||||
const auto qualifier{ImageAccessQualifier(desc.is_written, desc.is_read)};
|
||||
const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""};
|
||||
header += fmt::format("layout(binding={}{})uniform {}{} img{}{};", bindings.image, format,
|
||||
qualifier, image_type, bindings.image, array_decorator);
|
||||
|
|
|
@ -136,6 +136,8 @@ public:
|
|||
code += '\n';
|
||||
}
|
||||
|
||||
[[nodiscard]] bool StageInitializesVaryings() const noexcept;
|
||||
|
||||
std::string header;
|
||||
std::string code;
|
||||
VarAlloc var_alloc;
|
||||
|
|
|
@ -329,7 +329,7 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view val
|
|||
ctx.Add("gl_BackSecondaryColor.{}={};", swizzle, value);
|
||||
break;
|
||||
case IR::Attribute::FogCoordinate:
|
||||
ctx.Add("gl_FogFragCoord.x={};", value);
|
||||
ctx.Add("gl_FogFragCoord={};", value);
|
||||
break;
|
||||
case IR::Attribute::ClipDistance0:
|
||||
case IR::Attribute::ClipDistance1:
|
||||
|
|
|
@ -10,6 +10,17 @@
|
|||
#include "shader_recompiler/frontend/ir/value.h"
|
||||
|
||||
namespace Shader::Backend::GLSL {
|
||||
namespace {
|
||||
void InitializeVaryings(EmitContext& ctx) {
|
||||
ctx.Add("gl_Position=vec4(0,0,0,1);");
|
||||
// TODO: Properly resolve attribute issues
|
||||
for (size_t index = 0; index < ctx.info.stores_generics.size() / 2; ++index) {
|
||||
if (!ctx.info.stores_generics[index]) {
|
||||
ctx.Add("out_attr{}=vec4(0,0,0,1);", index);
|
||||
}
|
||||
}
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void EmitPhi(EmitContext& ctx, IR::Inst& phi) {
|
||||
const size_t num_args{phi.NumArgs()};
|
||||
|
@ -44,14 +55,8 @@ void EmitPhiMove(EmitContext& ctx, const IR::Value& phi_value, const IR::Value&
|
|||
}
|
||||
|
||||
void EmitPrologue(EmitContext& ctx) {
|
||||
if (ctx.stage == Stage::VertexA || ctx.stage == Stage::VertexB) {
|
||||
ctx.Add("gl_Position=vec4(0.0f, 0.0f, 0.0f, 1.0f);");
|
||||
// TODO: Properly resolve attribute issues
|
||||
for (size_t index = 0; index < ctx.info.stores_generics.size() / 2; ++index) {
|
||||
if (!ctx.info.stores_generics[index]) {
|
||||
ctx.Add("out_attr{}=vec4(0,0,0,1);", index);
|
||||
}
|
||||
}
|
||||
if (ctx.StageInitializesVaryings()) {
|
||||
InitializeVaryings(ctx);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -59,6 +64,7 @@ void EmitEpilogue(EmitContext&) {}
|
|||
|
||||
void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream) {
|
||||
ctx.Add("EmitStreamVertex(int({}));", ctx.var_alloc.Consume(stream));
|
||||
InitializeVaryings(ctx);
|
||||
}
|
||||
|
||||
void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream) {
|
||||
|
|
|
@ -312,11 +312,14 @@ public:
|
|||
}
|
||||
|
||||
u32 Add(const ImageBufferDescriptor& desc) {
|
||||
return Add(image_buffer_descriptors, desc, [&desc](const auto& existing) {
|
||||
const u32 index{Add(image_buffer_descriptors, desc, [&desc](const auto& existing) {
|
||||
return desc.format == existing.format && desc.cbuf_index == existing.cbuf_index &&
|
||||
desc.cbuf_offset == existing.cbuf_offset && desc.count == existing.count &&
|
||||
desc.size_shift == existing.size_shift;
|
||||
});
|
||||
})};
|
||||
image_buffer_descriptors[index].is_written |= desc.is_written;
|
||||
image_buffer_descriptors[index].is_read |= desc.is_read;
|
||||
return index;
|
||||
}
|
||||
|
||||
u32 Add(const TextureDescriptor& desc) {
|
||||
|
@ -339,6 +342,7 @@ public:
|
|||
desc.size_shift == existing.size_shift;
|
||||
})};
|
||||
image_descriptors[index].is_written |= desc.is_written;
|
||||
image_descriptors[index].is_read |= desc.is_read;
|
||||
return index;
|
||||
}
|
||||
|
||||
|
@ -430,10 +434,12 @@ void TexturePass(Environment& env, IR::Program& program) {
|
|||
throw NotImplementedException("Unexpected separate sampler");
|
||||
}
|
||||
const bool is_written{inst->GetOpcode() != IR::Opcode::ImageRead};
|
||||
const bool is_read{inst->GetOpcode() == IR::Opcode::ImageRead};
|
||||
if (flags.type == TextureType::Buffer) {
|
||||
index = descriptors.Add(ImageBufferDescriptor{
|
||||
.format = flags.image_format,
|
||||
.is_written = is_written,
|
||||
.is_read = is_read,
|
||||
.cbuf_index = cbuf.index,
|
||||
.cbuf_offset = cbuf.offset,
|
||||
.count = cbuf.count,
|
||||
|
@ -444,6 +450,7 @@ void TexturePass(Environment& env, IR::Program& program) {
|
|||
.type = flags.type,
|
||||
.format = flags.image_format,
|
||||
.is_written = is_written,
|
||||
.is_read = is_read,
|
||||
.cbuf_index = cbuf.index,
|
||||
.cbuf_offset = cbuf.offset,
|
||||
.count = cbuf.count,
|
||||
|
|
|
@ -97,6 +97,8 @@ struct Profile {
|
|||
/// Fragment outputs have to be declared even if they are not written to avoid undefined values.
|
||||
/// See Ori and the Blind Forest's main menu for reference.
|
||||
bool need_declared_frag_colors{};
|
||||
/// Prevents fast math optimizations that may cause inaccuracies
|
||||
bool need_fastmath_off{};
|
||||
|
||||
/// OpFClamp is broken and OpFMax + OpFMin should be used instead
|
||||
bool has_broken_spirv_clamp{};
|
||||
|
|
|
@ -75,6 +75,7 @@ using TextureBufferDescriptors = boost::container::small_vector<TextureBufferDes
|
|||
struct ImageBufferDescriptor {
|
||||
ImageFormat format;
|
||||
bool is_written;
|
||||
bool is_read;
|
||||
u32 cbuf_index;
|
||||
u32 cbuf_offset;
|
||||
u32 count;
|
||||
|
@ -99,6 +100,7 @@ struct ImageDescriptor {
|
|||
TextureType type;
|
||||
ImageFormat format;
|
||||
bool is_written;
|
||||
bool is_read;
|
||||
u32 cbuf_index;
|
||||
u32 cbuf_offset;
|
||||
u32 count;
|
||||
|
|
|
@ -162,6 +162,7 @@ Device::Device() {
|
|||
has_amd_shader_half_float = GLAD_GL_AMD_gpu_shader_half_float;
|
||||
has_sparse_texture_2 = GLAD_GL_ARB_sparse_texture2;
|
||||
warp_size_potentially_larger_than_guest = !is_nvidia && !is_intel;
|
||||
need_fastmath_off = is_nvidia;
|
||||
|
||||
// At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive
|
||||
// uniform buffers as "push constants"
|
||||
|
|
|
@ -136,6 +136,10 @@ public:
|
|||
return warp_size_potentially_larger_than_guest;
|
||||
}
|
||||
|
||||
bool NeedsFastmathOff() const {
|
||||
return need_fastmath_off;
|
||||
}
|
||||
|
||||
private:
|
||||
static bool TestVariableAoffi();
|
||||
static bool TestPreciseBug();
|
||||
|
@ -171,6 +175,7 @@ private:
|
|||
bool has_amd_shader_half_float{};
|
||||
bool has_sparse_texture_2{};
|
||||
bool warp_size_potentially_larger_than_guest{};
|
||||
bool need_fastmath_off{};
|
||||
};
|
||||
|
||||
} // namespace OpenGL
|
||||
|
|
|
@ -132,28 +132,23 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c
|
|||
std::ranges::transform(infos, stage_infos.begin(),
|
||||
[](const Shader::Info* info) { return info ? *info : Shader::Info{}; });
|
||||
auto func{[this, device, sources, shader_notify, xfb_state](ShaderContext::Context*) mutable {
|
||||
if (device.UseAssemblyShaders()) {
|
||||
if (!device.UseAssemblyShaders()) {
|
||||
program.handle = glCreateProgram();
|
||||
}
|
||||
for (size_t stage = 0; stage < 5; ++stage) {
|
||||
const auto code{sources[stage]};
|
||||
if (code.empty()) {
|
||||
continue;
|
||||
}
|
||||
if (device.UseAssemblyShaders()) {
|
||||
assembly_programs[stage] = CompileProgram(code, AssemblyStage(stage));
|
||||
enabled_stages_mask |= (assembly_programs[stage].handle != 0 ? 1 : 0) << stage;
|
||||
}
|
||||
} else {
|
||||
program.handle = glCreateProgram();
|
||||
for (size_t stage = 0; stage < 5; ++stage) {
|
||||
const auto code{sources[stage]};
|
||||
if (code.empty()) {
|
||||
continue;
|
||||
}
|
||||
AttachShader(Stage(stage), program.handle, code);
|
||||
}
|
||||
LinkProgram(program.handle);
|
||||
}
|
||||
if (shader_notify) {
|
||||
shader_notify->MarkShaderComplete();
|
||||
if (!device.UseAssemblyShaders()) {
|
||||
LinkProgram(program.handle);
|
||||
}
|
||||
u32 num_textures{};
|
||||
u32 num_images{};
|
||||
|
@ -198,6 +193,9 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c
|
|||
if (assembly_shaders && xfb_state) {
|
||||
GenerateTransformFeedbackState(*xfb_state);
|
||||
}
|
||||
if (shader_notify) {
|
||||
shader_notify->MarkShaderComplete();
|
||||
}
|
||||
is_built.store(true, std::memory_order_relaxed);
|
||||
}};
|
||||
if (thread_worker) {
|
||||
|
|
|
@ -193,6 +193,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo
|
|||
|
||||
.lower_left_origin_mode = true,
|
||||
.need_declared_frag_colors = true,
|
||||
.need_fastmath_off = device.NeedsFastmathOff(),
|
||||
|
||||
.has_broken_spirv_clamp = true,
|
||||
.has_broken_unsigned_image_offsets = true,
|
||||
|
|
Loading…
Reference in a new issue