Merge pull request #11544 from Kelebek1/reduce_stream_buffer_renderdoc
Allow GPUs without rebar to open multiple RenderDoc captures
This commit is contained in:
commit
15a5bdd979
4 changed files with 59 additions and 14 deletions
|
@ -24,25 +24,38 @@ using namespace Common::Literals;
|
||||||
|
|
||||||
// Maximum potential alignment of a Vulkan buffer
|
// Maximum potential alignment of a Vulkan buffer
|
||||||
constexpr VkDeviceSize MAX_ALIGNMENT = 256;
|
constexpr VkDeviceSize MAX_ALIGNMENT = 256;
|
||||||
// Maximum size to put elements in the stream buffer
|
|
||||||
constexpr VkDeviceSize MAX_STREAM_BUFFER_REQUEST_SIZE = 8_MiB;
|
|
||||||
// Stream buffer size in bytes
|
// Stream buffer size in bytes
|
||||||
constexpr VkDeviceSize STREAM_BUFFER_SIZE = 128_MiB;
|
constexpr VkDeviceSize MAX_STREAM_BUFFER_SIZE = 128_MiB;
|
||||||
constexpr VkDeviceSize REGION_SIZE = STREAM_BUFFER_SIZE / StagingBufferPool::NUM_SYNCS;
|
|
||||||
|
|
||||||
size_t Region(size_t iterator) noexcept {
|
size_t GetStreamBufferSize(const Device& device) {
|
||||||
return iterator / REGION_SIZE;
|
VkDeviceSize size{0};
|
||||||
|
if (device.HasDebuggingToolAttached()) {
|
||||||
|
ForEachDeviceLocalHostVisibleHeap(device, [&size](size_t index, VkMemoryHeap& heap) {
|
||||||
|
size = std::max(size, heap.size);
|
||||||
|
});
|
||||||
|
// If rebar is not supported, cut the max heap size to 40%. This will allow 2 captures to be
|
||||||
|
// loaded at the same time in RenderDoc. If rebar is supported, this shouldn't be an issue
|
||||||
|
// as the heap will be much larger.
|
||||||
|
if (size <= 256_MiB) {
|
||||||
|
size = size * 40 / 100;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
size = MAX_STREAM_BUFFER_SIZE;
|
||||||
|
}
|
||||||
|
return std::min(Common::AlignUp(size, MAX_ALIGNMENT), MAX_STREAM_BUFFER_SIZE);
|
||||||
}
|
}
|
||||||
} // Anonymous namespace
|
} // Anonymous namespace
|
||||||
|
|
||||||
StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& memory_allocator_,
|
StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& memory_allocator_,
|
||||||
Scheduler& scheduler_)
|
Scheduler& scheduler_)
|
||||||
: device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_} {
|
: device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_},
|
||||||
|
stream_buffer_size{GetStreamBufferSize(device)}, region_size{stream_buffer_size /
|
||||||
|
StagingBufferPool::NUM_SYNCS} {
|
||||||
VkBufferCreateInfo stream_ci = {
|
VkBufferCreateInfo stream_ci = {
|
||||||
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
|
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
|
||||||
.pNext = nullptr,
|
.pNext = nullptr,
|
||||||
.flags = 0,
|
.flags = 0,
|
||||||
.size = STREAM_BUFFER_SIZE,
|
.size = stream_buffer_size,
|
||||||
.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT |
|
.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT |
|
||||||
VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
|
VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
|
||||||
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
|
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
|
||||||
|
@ -63,7 +76,7 @@ StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& mem
|
||||||
StagingBufferPool::~StagingBufferPool() = default;
|
StagingBufferPool::~StagingBufferPool() = default;
|
||||||
|
|
||||||
StagingBufferRef StagingBufferPool::Request(size_t size, MemoryUsage usage, bool deferred) {
|
StagingBufferRef StagingBufferPool::Request(size_t size, MemoryUsage usage, bool deferred) {
|
||||||
if (!deferred && usage == MemoryUsage::Upload && size <= MAX_STREAM_BUFFER_REQUEST_SIZE) {
|
if (!deferred && usage == MemoryUsage::Upload && size <= region_size) {
|
||||||
return GetStreamBuffer(size);
|
return GetStreamBuffer(size);
|
||||||
}
|
}
|
||||||
return GetStagingBuffer(size, usage, deferred);
|
return GetStagingBuffer(size, usage, deferred);
|
||||||
|
@ -101,7 +114,7 @@ StagingBufferRef StagingBufferPool::GetStreamBuffer(size_t size) {
|
||||||
used_iterator = iterator;
|
used_iterator = iterator;
|
||||||
free_iterator = std::max(free_iterator, iterator + size);
|
free_iterator = std::max(free_iterator, iterator + size);
|
||||||
|
|
||||||
if (iterator + size >= STREAM_BUFFER_SIZE) {
|
if (iterator + size >= stream_buffer_size) {
|
||||||
std::fill(sync_ticks.begin() + Region(used_iterator), sync_ticks.begin() + NUM_SYNCS,
|
std::fill(sync_ticks.begin() + Region(used_iterator), sync_ticks.begin() + NUM_SYNCS,
|
||||||
current_tick);
|
current_tick);
|
||||||
used_iterator = 0;
|
used_iterator = 0;
|
||||||
|
|
|
@ -90,6 +90,9 @@ private:
|
||||||
void ReleaseCache(MemoryUsage usage);
|
void ReleaseCache(MemoryUsage usage);
|
||||||
|
|
||||||
void ReleaseLevel(StagingBuffersCache& cache, size_t log2);
|
void ReleaseLevel(StagingBuffersCache& cache, size_t log2);
|
||||||
|
size_t Region(size_t iter) const noexcept {
|
||||||
|
return iter / region_size;
|
||||||
|
}
|
||||||
|
|
||||||
const Device& device;
|
const Device& device;
|
||||||
MemoryAllocator& memory_allocator;
|
MemoryAllocator& memory_allocator;
|
||||||
|
@ -97,6 +100,8 @@ private:
|
||||||
|
|
||||||
vk::Buffer stream_buffer;
|
vk::Buffer stream_buffer;
|
||||||
std::span<u8> stream_pointer;
|
std::span<u8> stream_pointer;
|
||||||
|
VkDeviceSize stream_buffer_size;
|
||||||
|
VkDeviceSize region_size;
|
||||||
|
|
||||||
size_t iterator = 0;
|
size_t iterator = 0;
|
||||||
size_t used_iterator = 0;
|
size_t used_iterator = 0;
|
||||||
|
|
|
@ -9,6 +9,7 @@
|
||||||
#include "common/alignment.h"
|
#include "common/alignment.h"
|
||||||
#include "common/assert.h"
|
#include "common/assert.h"
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
|
#include "common/literals.h"
|
||||||
#include "common/logging/log.h"
|
#include "common/logging/log.h"
|
||||||
#include "common/polyfill_ranges.h"
|
#include "common/polyfill_ranges.h"
|
||||||
#include "video_core/vulkan_common/vma.h"
|
#include "video_core/vulkan_common/vma.h"
|
||||||
|
@ -69,8 +70,7 @@ struct Range {
|
||||||
case MemoryUsage::Download:
|
case MemoryUsage::Download:
|
||||||
return VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT;
|
return VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT;
|
||||||
case MemoryUsage::DeviceLocal:
|
case MemoryUsage::DeviceLocal:
|
||||||
return VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT |
|
return {};
|
||||||
VMA_ALLOCATION_CREATE_HOST_ACCESS_ALLOW_TRANSFER_INSTEAD_BIT;
|
|
||||||
}
|
}
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
@ -212,7 +212,20 @@ MemoryAllocator::MemoryAllocator(const Device& device_)
|
||||||
: device{device_}, allocator{device.GetAllocator()},
|
: device{device_}, allocator{device.GetAllocator()},
|
||||||
properties{device_.GetPhysical().GetMemoryProperties().memoryProperties},
|
properties{device_.GetPhysical().GetMemoryProperties().memoryProperties},
|
||||||
buffer_image_granularity{
|
buffer_image_granularity{
|
||||||
device_.GetPhysical().GetProperties().limits.bufferImageGranularity} {}
|
device_.GetPhysical().GetProperties().limits.bufferImageGranularity} {
|
||||||
|
// GPUs not supporting rebar may only have a region with less than 256MB host visible/device
|
||||||
|
// local memory. In that case, opening 2 RenderDoc captures side-by-side is not possible due to
|
||||||
|
// the heap running out of memory. With RenderDoc attached and only a small host/device region,
|
||||||
|
// only allow the stream buffer in this memory heap.
|
||||||
|
if (device.HasDebuggingToolAttached()) {
|
||||||
|
using namespace Common::Literals;
|
||||||
|
ForEachDeviceLocalHostVisibleHeap(device, [this](size_t index, VkMemoryHeap& heap) {
|
||||||
|
if (heap.size <= 256_MiB) {
|
||||||
|
valid_memory_types &= ~(1u << index);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
MemoryAllocator::~MemoryAllocator() = default;
|
MemoryAllocator::~MemoryAllocator() = default;
|
||||||
|
|
||||||
|
@ -244,7 +257,7 @@ vk::Buffer MemoryAllocator::CreateBuffer(const VkBufferCreateInfo& ci, MemoryUsa
|
||||||
.usage = MemoryUsageVma(usage),
|
.usage = MemoryUsageVma(usage),
|
||||||
.requiredFlags = 0,
|
.requiredFlags = 0,
|
||||||
.preferredFlags = MemoryUsagePreferedVmaFlags(usage),
|
.preferredFlags = MemoryUsagePreferedVmaFlags(usage),
|
||||||
.memoryTypeBits = 0,
|
.memoryTypeBits = usage == MemoryUsage::Stream ? 0u : valid_memory_types,
|
||||||
.pool = VK_NULL_HANDLE,
|
.pool = VK_NULL_HANDLE,
|
||||||
.pUserData = nullptr,
|
.pUserData = nullptr,
|
||||||
.priority = 0.f,
|
.priority = 0.f,
|
||||||
|
|
|
@ -7,6 +7,7 @@
|
||||||
#include <span>
|
#include <span>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
|
#include "video_core/vulkan_common/vulkan_device.h"
|
||||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||||
|
|
||||||
VK_DEFINE_HANDLE(VmaAllocator)
|
VK_DEFINE_HANDLE(VmaAllocator)
|
||||||
|
@ -26,6 +27,18 @@ enum class MemoryUsage {
|
||||||
Stream, ///< Requests device local host visible buffer, falling back host memory.
|
Stream, ///< Requests device local host visible buffer, falling back host memory.
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template <typename F>
|
||||||
|
void ForEachDeviceLocalHostVisibleHeap(const Device& device, F&& f) {
|
||||||
|
auto memory_props = device.GetPhysical().GetMemoryProperties().memoryProperties;
|
||||||
|
for (size_t i = 0; i < memory_props.memoryTypeCount; i++) {
|
||||||
|
auto& memory_type = memory_props.memoryTypes[i];
|
||||||
|
if ((memory_type.propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) &&
|
||||||
|
(memory_type.propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)) {
|
||||||
|
f(memory_type.heapIndex, memory_props.memoryHeaps[memory_type.heapIndex]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Ownership handle of a memory commitment.
|
/// Ownership handle of a memory commitment.
|
||||||
/// Points to a subregion of a memory allocation.
|
/// Points to a subregion of a memory allocation.
|
||||||
class MemoryCommit {
|
class MemoryCommit {
|
||||||
|
@ -124,6 +137,7 @@ private:
|
||||||
std::vector<std::unique_ptr<MemoryAllocation>> allocations; ///< Current allocations.
|
std::vector<std::unique_ptr<MemoryAllocation>> allocations; ///< Current allocations.
|
||||||
VkDeviceSize buffer_image_granularity; // The granularity for adjacent offsets between buffers
|
VkDeviceSize buffer_image_granularity; // The granularity for adjacent offsets between buffers
|
||||||
// and optimal images
|
// and optimal images
|
||||||
|
u32 valid_memory_types{~0u};
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace Vulkan
|
} // namespace Vulkan
|
||||||
|
|
Loading…
Reference in a new issue