glsl: Implement VOTE for subgroup size potentially larger
This commit is contained in:
parent
770b754afd
commit
e35ffbbeb0
5 changed files with 43 additions and 20 deletions
|
@ -122,9 +122,11 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile
|
||||||
|
|
||||||
void EmitContext::SetupExtensions(std::string&) {
|
void EmitContext::SetupExtensions(std::string&) {
|
||||||
header += "#extension GL_ARB_separate_shader_objects : enable\n";
|
header += "#extension GL_ARB_separate_shader_objects : enable\n";
|
||||||
|
if (stage != Stage::Compute) {
|
||||||
|
// TODO: track this usage
|
||||||
header += "#extension GL_ARB_sparse_texture2 : enable\n";
|
header += "#extension GL_ARB_sparse_texture2 : enable\n";
|
||||||
header += "#extension GL_EXT_texture_shadow_lod : enable\n";
|
header += "#extension GL_EXT_texture_shadow_lod : enable\n";
|
||||||
// header += "#extension GL_ARB_texture_cube_map_array : enable\n";
|
}
|
||||||
if (info.uses_int64) {
|
if (info.uses_int64) {
|
||||||
header += "#extension GL_ARB_gpu_shader_int64 : enable\n";
|
header += "#extension GL_ARB_gpu_shader_int64 : enable\n";
|
||||||
}
|
}
|
||||||
|
@ -149,6 +151,10 @@ void EmitContext::SetupExtensions(std::string&) {
|
||||||
info.uses_subgroup_shuffles || info.uses_fswzadd) {
|
info.uses_subgroup_shuffles || info.uses_fswzadd) {
|
||||||
header += "#extension GL_ARB_shader_ballot : enable\n";
|
header += "#extension GL_ARB_shader_ballot : enable\n";
|
||||||
header += "#extension GL_ARB_shader_group_vote : enable\n";
|
header += "#extension GL_ARB_shader_group_vote : enable\n";
|
||||||
|
header += "#extension GL_KHR_shader_subgroup_basic : enable\n";
|
||||||
|
if (!info.uses_int64) {
|
||||||
|
header += "#extension GL_ARB_gpu_shader_int64 : enable\n";
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -42,31 +42,42 @@ void EmitLaneId([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& in
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitVoteAll(EmitContext& ctx, IR::Inst& inst, std::string_view pred) {
|
void EmitVoteAll(EmitContext& ctx, IR::Inst& inst, std::string_view pred) {
|
||||||
|
if (!ctx.profile.warp_size_potentially_larger_than_guest) {
|
||||||
ctx.AddU1("{}=allInvocationsEqualARB({});", inst, pred);
|
ctx.AddU1("{}=allInvocationsEqualARB({});", inst, pred);
|
||||||
// TODO:
|
} else {
|
||||||
// if (ctx.profile.warp_size_potentially_larger_than_guest) {
|
const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubgroupInvocationID]")};
|
||||||
// }
|
const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubgroupInvocationID]", pred)};
|
||||||
|
ctx.AddU1("{}=({}&{})=={};", inst, ballot, active_mask, active_mask);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitVoteAny(EmitContext& ctx, IR::Inst& inst, std::string_view pred) {
|
void EmitVoteAny(EmitContext& ctx, IR::Inst& inst, std::string_view pred) {
|
||||||
|
if (!ctx.profile.warp_size_potentially_larger_than_guest) {
|
||||||
ctx.AddU1("{}=anyInvocationARB({});", inst, pred);
|
ctx.AddU1("{}=anyInvocationARB({});", inst, pred);
|
||||||
// TODO:
|
} else {
|
||||||
// if (ctx.profile.warp_size_potentially_larger_than_guest) {
|
const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubgroupInvocationID]")};
|
||||||
// }
|
const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubgroupInvocationID]", pred)};
|
||||||
|
ctx.AddU1("{}=({}&{})!=0u;", inst, ballot, active_mask, active_mask);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitVoteEqual(EmitContext& ctx, IR::Inst& inst, std::string_view pred) {
|
void EmitVoteEqual(EmitContext& ctx, IR::Inst& inst, std::string_view pred) {
|
||||||
|
if (!ctx.profile.warp_size_potentially_larger_than_guest) {
|
||||||
ctx.AddU1("{}=allInvocationsEqualARB({});", inst, pred);
|
ctx.AddU1("{}=allInvocationsEqualARB({});", inst, pred);
|
||||||
// TODO:
|
} else {
|
||||||
// if (ctx.profile.warp_size_potentially_larger_than_guest) {
|
const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubgroupInvocationID]")};
|
||||||
// }
|
const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubgroupInvocationID]", pred)};
|
||||||
|
const auto value{fmt::format("({}^{})", ballot, active_mask)};
|
||||||
|
ctx.AddU1("{}=({}==0)||({}=={});", inst, value, value, active_mask);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitSubgroupBallot(EmitContext& ctx, IR::Inst& inst, std::string_view pred) {
|
void EmitSubgroupBallot(EmitContext& ctx, IR::Inst& inst, std::string_view pred) {
|
||||||
|
if (!ctx.profile.warp_size_potentially_larger_than_guest) {
|
||||||
ctx.AddU32("{}=uvec2(ballotARB({})).x;", inst, pred);
|
ctx.AddU32("{}=uvec2(ballotARB({})).x;", inst, pred);
|
||||||
// TODO:
|
} else {
|
||||||
// if (ctx.profile.warp_size_potentially_larger_than_guest) {
|
ctx.AddU32("{}=uvec2(ballotARB({}))[gl_SubgroupInvocationID];", inst, pred);
|
||||||
// }
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitSubgroupEqMask(EmitContext& ctx, IR::Inst& inst) {
|
void EmitSubgroupEqMask(EmitContext& ctx, IR::Inst& inst) {
|
||||||
|
|
|
@ -160,6 +160,7 @@ Device::Device() {
|
||||||
has_depth_buffer_float = HasExtension(extensions, "GL_NV_depth_buffer_float");
|
has_depth_buffer_float = HasExtension(extensions, "GL_NV_depth_buffer_float");
|
||||||
has_nv_gpu_shader_5 = GLAD_GL_NV_gpu_shader5;
|
has_nv_gpu_shader_5 = GLAD_GL_NV_gpu_shader5;
|
||||||
has_amd_shader_half_float = GLAD_GL_AMD_gpu_shader_half_float;
|
has_amd_shader_half_float = GLAD_GL_AMD_gpu_shader_half_float;
|
||||||
|
warp_size_potentially_larger_than_guest = !is_nvidia && !is_intel;
|
||||||
|
|
||||||
// At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive
|
// At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive
|
||||||
// uniform buffers as "push constants"
|
// uniform buffers as "push constants"
|
||||||
|
|
|
@ -128,6 +128,10 @@ public:
|
||||||
return has_amd_shader_half_float;
|
return has_amd_shader_half_float;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool IsWarpSizePotentiallyLargerThanGuest() const {
|
||||||
|
return warp_size_potentially_larger_than_guest;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
static bool TestVariableAoffi();
|
static bool TestVariableAoffi();
|
||||||
static bool TestPreciseBug();
|
static bool TestPreciseBug();
|
||||||
|
@ -161,6 +165,7 @@ private:
|
||||||
bool has_depth_buffer_float{};
|
bool has_depth_buffer_float{};
|
||||||
bool has_nv_gpu_shader_5{};
|
bool has_nv_gpu_shader_5{};
|
||||||
bool has_amd_shader_half_float{};
|
bool has_amd_shader_half_float{};
|
||||||
|
bool warp_size_potentially_larger_than_guest{};
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace OpenGL
|
} // namespace OpenGL
|
||||||
|
|
|
@ -220,7 +220,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo
|
||||||
.support_gl_nv_gpu_shader_5 = device.HasNvGpuShader5(),
|
.support_gl_nv_gpu_shader_5 = device.HasNvGpuShader5(),
|
||||||
.support_gl_amd_gpu_shader_half_float = device.HasAmdShaderHalfFloat(),
|
.support_gl_amd_gpu_shader_half_float = device.HasAmdShaderHalfFloat(),
|
||||||
|
|
||||||
.warp_size_potentially_larger_than_guest = true,
|
.warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyLargerThanGuest(),
|
||||||
|
|
||||||
.lower_left_origin_mode = true,
|
.lower_left_origin_mode = true,
|
||||||
.need_declared_frag_colors = true,
|
.need_declared_frag_colors = true,
|
||||||
|
|
Loading…
Reference in a new issue