From 798faad7e309de12bff235dd2ccc2297a4ddb6d2 Mon Sep 17 00:00:00 2001 From: Powei Feng Date: Fri, 15 Sep 2023 14:37:18 -0700 Subject: [PATCH] vulkan: cache vertex buffer info To reduce CPU on draw(), we move the VertexBuffer related metadata out of draw into a cache. We store the cache info outside of the actual VulkanVertexBuffer class since VulkanVertexBuffer subclass HwVertexBuffer, which is a Handle meant to be minimal. This means that we need to cache on the heap, but it should be ok since the caching is only for scene set-up and not per-frame. --- filament/backend/src/vulkan/VulkanBlitter.cpp | 14 +++-- filament/backend/src/vulkan/VulkanDriver.cpp | 56 +++---------------- filament/backend/src/vulkan/VulkanHandles.cpp | 54 ++++++++++++++++-- filament/backend/src/vulkan/VulkanHandles.h | 44 ++++++++++++++- .../src/vulkan/VulkanPipelineCache.cpp | 15 +++-- .../backend/src/vulkan/VulkanPipelineCache.h | 5 +- 6 files changed, 124 insertions(+), 64 deletions(-) diff --git a/filament/backend/src/vulkan/VulkanBlitter.cpp b/filament/backend/src/vulkan/VulkanBlitter.cpp index a880fd6d009b..f1eb13fc4cdd 100644 --- a/filament/backend/src/vulkan/VulkanBlitter.cpp +++ b/filament/backend/src/vulkan/VulkanBlitter.cpp @@ -377,13 +377,19 @@ void VulkanBlitter::blitSlowDepth(VkFilter filter, const VkExtent2D srcExtent, V vkraster.colorTargetCount = 0; mPipelineCache.bindRasterState(vkraster); - VulkanPipelineCache::VertexArray varray = {}; VkBuffer buffers[1] = {}; VkDeviceSize offsets[1] = {}; buffers[0] = mTriangleBuffer->getGpuBuffer(); - varray.attributes[0] = { .location = 0, .binding = 0, .format = VK_FORMAT_R32G32_SFLOAT }; - varray.buffers[0] = { .binding = 0, .stride = sizeof(float) * 2 }; - mPipelineCache.bindVertexArray(varray); + VkVertexInputAttributeDescription attribDesc = { + .location = 0, + .binding = 0, + .format = VK_FORMAT_R32G32_SFLOAT, + }; + VkVertexInputBindingDescription bufferDesc = { + .binding = 0, + .stride = sizeof(float) * 2, + }; + mPipelineCache.bindVertexArray(&attribDesc, &bufferDesc, 1); // Select nearest filtering and clamp_to_edge. VkSampler vksampler = mSamplerCache.getSampler({}); diff --git a/filament/backend/src/vulkan/VulkanDriver.cpp b/filament/backend/src/vulkan/VulkanDriver.cpp index 116451763e69..afce9ac3c17c 100644 --- a/filament/backend/src/vulkan/VulkanDriver.cpp +++ b/filament/backend/src/vulkan/VulkanDriver.cpp @@ -339,7 +339,8 @@ void VulkanDriver::destroyRenderPrimitive(Handle rph) { void VulkanDriver::createVertexBufferR(Handle vbh, uint8_t bufferCount, uint8_t attributeCount, uint32_t elementCount, AttributeArray attributes) { auto vertexBuffer = mResourceAllocator.construct(vbh, mContext, mStagePool, - &mResourceAllocator, bufferCount, attributeCount, elementCount, attributes); + &mResourceAllocator, bufferCount, attributeCount, elementCount, + attributes); mResourceManager.acquire(vertexBuffer); } @@ -370,7 +371,7 @@ void VulkanDriver::destroyIndexBuffer(Handle ibh) { void VulkanDriver::createBufferObjectR(Handle boh, uint32_t byteCount, BufferObjectBinding bindingType, BufferUsage usage) { auto bufferObject = mResourceAllocator.construct(boh, mAllocator, - mStagePool, byteCount, bindingType, usage); + mStagePool, byteCount, bindingType); mResourceManager.acquire(bufferObject); } @@ -1554,56 +1555,17 @@ void VulkanDriver::draw(PipelineState pipelineState, Handle r mPipelineCache.setCurrentRasterState(vkraster); // Declare fixed-size arrays that get passed to the pipeCache and to vkCmdBindVertexBuffers. - VulkanPipelineCache::VertexArray varray = {}; - VkBuffer buffers[MAX_VERTEX_ATTRIBUTE_COUNT] = {}; - VkDeviceSize offsets[MAX_VERTEX_ATTRIBUTE_COUNT] = {}; - - // For each attribute, append to each of the above lists. - const uint32_t bufferCount = prim.vertexBuffer->attributes.size(); - for (uint32_t attribIndex = 0; attribIndex < bufferCount; attribIndex++) { - Attribute attrib = prim.vertexBuffer->attributes[attribIndex]; - - const bool isInteger = attrib.flags & Attribute::FLAG_INTEGER_TARGET; - const bool isNormalized = attrib.flags & Attribute::FLAG_NORMALIZED; - - VkFormat vkformat = getVkFormat(attrib.type, isNormalized, isInteger); - - // HACK: Re-use the positions buffer as a dummy buffer for disabled attributes. Filament's - // vertex shaders declare all attributes as either vec4 or uvec4 (the latter for bone - // indices), and positions are always at least 32 bits per element. Therefore we can assign - // a dummy type of either R8G8B8A8_UINT or R8G8B8A8_SNORM, depending on whether the shader - // expects to receive floats or ints. - if (attrib.buffer == Attribute::BUFFER_UNUSED) { - vkformat = isInteger ? VK_FORMAT_R8G8B8A8_UINT : VK_FORMAT_R8G8B8A8_SNORM; - attrib = prim.vertexBuffer->attributes[0]; - } - - const VulkanBuffer* buffer = prim.vertexBuffer->buffers[attrib.buffer]; - - // If the vertex buffer is missing a constituent buffer object, skip the draw call. - // There is no need to emit an error message because this is not explicitly forbidden. - if (buffer == nullptr) { - return; - } - - buffers[attribIndex] = buffer->getGpuBuffer(); - offsets[attribIndex] = attrib.offset; - varray.attributes[attribIndex] = { - .location = attribIndex, // matches the GLSL layout specifier - .binding = attribIndex, // matches the position within vkCmdBindVertexBuffers - .format = vkformat, - }; - varray.buffers[attribIndex] = { - .binding = attribIndex, - .stride = attrib.stride, - }; - } + uint32_t const bufferCount = prim.vertexBuffer->attributes.size(); + VkVertexInputAttributeDescription const* attribDesc = prim.vertexBuffer->getAttribDescriptions(); + VkVertexInputBindingDescription const* bufferDesc = prim.vertexBuffer->getBufferDescriptions(); + VkBuffer const* buffers = prim.vertexBuffer->getVkBuffers(); + VkDeviceSize const* offsets = prim.vertexBuffer->getOffsets(); // Push state changes to the VulkanPipelineCache instance. This is fast and does not make VK calls. mPipelineCache.bindProgram(*program); mPipelineCache.bindRasterState(mPipelineCache.getCurrentRasterState()); mPipelineCache.bindPrimitiveTopology(prim.primitiveTopology); - mPipelineCache.bindVertexArray(varray); + mPipelineCache.bindVertexArray(attribDesc, bufferDesc, bufferCount); // Query the program for the mapping from (SamplerGroupBinding,Offset) to (SamplerBinding), // where "SamplerBinding" is the integer in the GLSL, and SamplerGroupBinding is the abstract diff --git a/filament/backend/src/vulkan/VulkanHandles.cpp b/filament/backend/src/vulkan/VulkanHandles.cpp index 5ab3365d0276..963ca70f0f11 100644 --- a/filament/backend/src/vulkan/VulkanHandles.cpp +++ b/filament/backend/src/vulkan/VulkanHandles.cpp @@ -276,16 +276,62 @@ VulkanVertexBuffer::VulkanVertexBuffer(VulkanContext& context, VulkanStagePool& uint32_t elementCount, AttributeArray const& attribs) : HwVertexBuffer(bufferCount, attributeCount, elementCount, attribs), VulkanResource(VulkanResourceType::VERTEX_BUFFER), - buffers(bufferCount, nullptr), - mResources(allocator) {} + mInfo(new PipelineInfo(attribs.size())), + mResources(allocator) { + auto attribDesc = mInfo->mSoa.data(); + auto bufferDesc = mInfo->mSoa.data(); + auto offsets = mInfo->mSoa.data(); + auto attribToBufferIndex = mInfo->mSoa.data(); + std::fill(mInfo->mSoa.begin(), + mInfo->mSoa.end(), -1); + + for (uint32_t attribIndex = 0; attribIndex < attribs.size(); attribIndex++) { + Attribute attrib = attribs[attribIndex]; + bool const isInteger = attrib.flags & Attribute::FLAG_INTEGER_TARGET; + bool const isNormalized = attrib.flags & Attribute::FLAG_NORMALIZED; + VkFormat vkformat = getVkFormat(attrib.type, isNormalized, isInteger); + + // HACK: Re-use the positions buffer as a dummy buffer for disabled attributes. Filament's + // vertex shaders declare all attributes as either vec4 or uvec4 (the latter for bone + // indices), and positions are always at least 32 bits per element. Therefore we can assign + // a dummy type of either R8G8B8A8_UINT or R8G8B8A8_SNORM, depending on whether the shader + // expects to receive floats or ints. + if (attrib.buffer == Attribute::BUFFER_UNUSED) { + vkformat = isInteger ? VK_FORMAT_R8G8B8A8_UINT : VK_FORMAT_R8G8B8A8_SNORM; + attrib = attribs[0]; + } + offsets[attribIndex] = attrib.offset; + attribDesc[attribIndex] = { + .location = attribIndex,// matches the GLSL layout specifier + .binding = attribIndex, // matches the position within vkCmdBindVertexBuffers + .format = vkformat, + }; + bufferDesc[attribIndex] = { + .binding = attribIndex, + .stride = attrib.stride, + }; + attribToBufferIndex[attribIndex] = attrib.buffer; + } +} + +VulkanVertexBuffer::~VulkanVertexBuffer() { + delete mInfo; +} void VulkanVertexBuffer::setBuffer(VulkanBufferObject* bufferObject, uint32_t index) { - buffers[index] = &bufferObject->buffer; + size_t count = attributes.size(); + auto vkbuffers = mInfo->mSoa.data(); + auto attribToBuffer = mInfo->mSoa.data(); + for (uint8_t attribIndex = 0; attribIndex < count; attribIndex++) { + if (attribToBuffer[attribIndex] == static_cast(index)) { + vkbuffers[attribIndex] = bufferObject->buffer.getGpuBuffer(); + } + } mResources.acquire(bufferObject); } VulkanBufferObject::VulkanBufferObject(VmaAllocator allocator, VulkanStagePool& stagePool, - uint32_t byteCount, BufferObjectBinding bindingType, BufferUsage usage) + uint32_t byteCount, BufferObjectBinding bindingType) : HwBufferObject(byteCount), VulkanResource(VulkanResourceType::BUFFER_OBJECT), buffer(allocator, stagePool, getBufferObjectUsage(bindingType), byteCount), diff --git a/filament/backend/src/vulkan/VulkanHandles.h b/filament/backend/src/vulkan/VulkanHandles.h index 276f2ab81968..1d3d032444de 100644 --- a/filament/backend/src/vulkan/VulkanHandles.h +++ b/filament/backend/src/vulkan/VulkanHandles.h @@ -30,6 +30,7 @@ #include "private/backend/SamplerGroup.h" #include +#include namespace filament::backend { @@ -94,11 +95,50 @@ struct VulkanVertexBuffer : public HwVertexBuffer, VulkanResource { VulkanResourceAllocator* allocator, uint8_t bufferCount, uint8_t attributeCount, uint32_t elementCount, AttributeArray const& attributes); + ~VulkanVertexBuffer(); + void setBuffer(VulkanBufferObject* bufferObject, uint32_t index); - utils::FixedCapacityVector buffers; + inline VkVertexInputAttributeDescription const* getAttribDescriptions() { + return mInfo->mSoa.data(); + } + + inline VkVertexInputBindingDescription const* getBufferDescriptions() { + return mInfo->mSoa.data(); + } + + inline VkBuffer const* getVkBuffers() const { + return mInfo->mSoa.data(); + } + + inline VkDeviceSize const* getOffsets() const { + return mInfo->mSoa.data(); + } private: + struct PipelineInfo { + PipelineInfo(size_t size) + : mSoa(size /* capacity */) { + mSoa.resize(size); + } + + // These corresponds to the index of the element in the SoA + static constexpr uint8_t ATTRIBUTE_DESCRIPTION = 0; + static constexpr uint8_t BUFFER_DESCRIPTION = 1; + static constexpr uint8_t VK_BUFFER = 2; + static constexpr uint8_t OFFSETS = 3; + static constexpr uint8_t ATTRIBUTE_TO_BUFFER_INDEX = 4; + + utils::StructureOfArrays< + VkVertexInputAttributeDescription, + VkVertexInputBindingDescription, + VkBuffer, + VkDeviceSize, + int8_t + > mSoa; + }; + + PipelineInfo* mInfo; FixedSizeVulkanResourceManager mResources; }; @@ -116,7 +156,7 @@ struct VulkanIndexBuffer : public HwIndexBuffer, VulkanResource { struct VulkanBufferObject : public HwBufferObject, VulkanResource { VulkanBufferObject(VmaAllocator allocator, VulkanStagePool& stagePool, uint32_t byteCount, - BufferObjectBinding bindingType, BufferUsage usage); + BufferObjectBinding bindingType); VulkanBuffer buffer; const BufferObjectBinding bindingType; diff --git a/filament/backend/src/vulkan/VulkanPipelineCache.cpp b/filament/backend/src/vulkan/VulkanPipelineCache.cpp index 233bef3a32f6..b554af2b54b3 100644 --- a/filament/backend/src/vulkan/VulkanPipelineCache.cpp +++ b/filament/backend/src/vulkan/VulkanPipelineCache.cpp @@ -580,14 +580,21 @@ void VulkanPipelineCache::bindPrimitiveTopology(VkPrimitiveTopology topology) no mPipelineRequirements.topology = topology; } -void VulkanPipelineCache::bindVertexArray(const VertexArray& varray) noexcept { +void VulkanPipelineCache::bindVertexArray(VkVertexInputAttributeDescription const* attribDesc, + VkVertexInputBindingDescription const* bufferDesc, uint8_t count) { for (size_t i = 0; i < VERTEX_ATTRIBUTE_COUNT; i++) { - mPipelineRequirements.vertexAttributes[i] = varray.attributes[i]; - mPipelineRequirements.vertexBuffers[i] = varray.buffers[i]; + if (i < count) { + mPipelineRequirements.vertexAttributes[i] = attribDesc[i]; + mPipelineRequirements.vertexBuffers[i] = bufferDesc[i]; + } else { + mPipelineRequirements.vertexAttributes[i] = {}; + mPipelineRequirements.vertexBuffers[i] = {}; + } } } -VulkanPipelineCache::UniformBufferBinding VulkanPipelineCache::getUniformBufferBinding(uint32_t bindingIndex) const noexcept { +VulkanPipelineCache::UniformBufferBinding VulkanPipelineCache::getUniformBufferBinding( + uint32_t bindingIndex) const noexcept { auto& key = mDescriptorRequirements; return { key.uniformBuffers[bindingIndex], diff --git a/filament/backend/src/vulkan/VulkanPipelineCache.h b/filament/backend/src/vulkan/VulkanPipelineCache.h index 2c9bf09350e5..b7248420f8ee 100644 --- a/filament/backend/src/vulkan/VulkanPipelineCache.h +++ b/filament/backend/src/vulkan/VulkanPipelineCache.h @@ -80,8 +80,6 @@ class VulkanPipelineCache : public CommandBufferObserver { // as a vertex assembler configuration. For simplicity it contains fixed-size arrays and does // not store sizes; all unused entries are simply zeroed out. struct VertexArray { - VkVertexInputAttributeDescription attributes[VERTEX_ATTRIBUTE_COUNT]; - VkVertexInputBindingDescription buffers[VERTEX_ATTRIBUTE_COUNT]; }; // The ProgramBundle contains weak references to the compiled vertex and fragment shaders. @@ -163,7 +161,8 @@ class VulkanPipelineCache : public CommandBufferObserver { void bindSamplers(VkDescriptorImageInfo samplers[SAMPLER_BINDING_COUNT], VulkanTexture* textures[SAMPLER_BINDING_COUNT], UsageFlags flags) noexcept; void bindInputAttachment(uint32_t bindingIndex, VkDescriptorImageInfo imageInfo) noexcept; - void bindVertexArray(const VertexArray& varray) noexcept; + void bindVertexArray(VkVertexInputAttributeDescription const* attribDesc, + VkVertexInputBindingDescription const* bufferDesc, uint8_t count); // Gets the current UBO at the given slot, useful for push / pop. UniformBufferBinding getUniformBufferBinding(uint32_t bindingIndex) const noexcept;