Skip to content

Commit

Permalink
Improved input layout automatic offset and stride handling
Browse files Browse the repository at this point in the history
  • Loading branch information
TheMostDiligent committed Oct 7, 2023
1 parent 2ad7e7a commit 00464ed
Show file tree
Hide file tree
Showing 7 changed files with 281 additions and 93 deletions.
13 changes: 12 additions & 1 deletion Graphics/GraphicsAccessories/interface/GraphicsAccessories.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright 2019-2022 Diligent Graphics LLC
* Copyright 2019-2023 Diligent Graphics LLC
* Copyright 2015-2019 Egor Yusov
*
* Licensed under the Apache License, Version 2.0 (the "License");
Expand Down Expand Up @@ -30,6 +30,8 @@
/// \file
/// Defines graphics engine utilities

#include <vector>

#include "../../GraphicsEngine/interface/GraphicsTypes.h"
#include "../../GraphicsEngine/interface/Shader.h"
#include "../../GraphicsEngine/interface/Texture.h"
Expand Down Expand Up @@ -435,6 +437,11 @@ String GetShaderCodeBufferDescString(const ShaderCodeBufferDesc& Desc, size_t Gl
/// Returns the string containing the shader code variable description.
String GetShaderCodeVariableDescString(const ShaderCodeVariableDesc& Desc, size_t GlobalIdent = 0, size_t MemberIdent = 2);

const char* GetInputElementFrequencyString(INPUT_ELEMENT_FREQUENCY Frequency);

/// Returns the string containing the layout element description.
String GetLayoutElementString(const LayoutElement& Element);

PIPELINE_RESOURCE_FLAGS GetValidPipelineResourceFlags(SHADER_RESOURCE_TYPE ResourceType);

PIPELINE_RESOURCE_FLAGS ShaderVariableFlagsToPipelineResourceFlags(SHADER_VARIABLE_FLAGS Flags);
Expand Down Expand Up @@ -754,4 +761,8 @@ inline uint3 GetNumSparseTilesInMipLevel(const TextureDesc& Desc,
/// Returns true if the Mapping defines an identity texture component swizzle
bool IsIdentityComponentMapping(const TextureComponentMapping& Mapping);

/// Resolves LAYOUT_ELEMENT_AUTO_OFFSET and LAYOUT_ELEMENT_AUTO_STRIDE values in the input layout,
/// and returns an array of buffer strides for each used input buffer slot.
std::vector<Uint32> ResolveInputLayoutAutoOffsetsAndStrides(LayoutElement* pLayoutElements, Uint32 NumElements);

} // namespace Diligent
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright 2019-2022 Diligent Graphics LLC
* Copyright 2019-2023 Diligent Graphics LLC
* Copyright 2015-2019 Egor Yusov
*
* Licensed under the Apache License, Version 2.0 (the "License");
Expand Down Expand Up @@ -69,6 +69,7 @@ DEFINE_TYPE_PRINTER(AttachmentReference, GetAttachmentReferenceString)
DEFINE_TYPE_PRINTER(ShaderDesc, GetShaderDescString)
DEFINE_TYPE_PRINTER(ShaderCodeBufferDesc, GetShaderCodeBufferDescString)
DEFINE_TYPE_PRINTER(ShaderCodeVariableDesc, GetShaderCodeVariableDescString)
DEFINE_TYPE_PRINTER(LayoutElement, GetLayoutElementString)
#undef DEFINE_TYPE_PRINTER

} // namespace Diligent
109 changes: 109 additions & 0 deletions Graphics/GraphicsAccessories/src/GraphicsAccessories.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1507,6 +1507,36 @@ String GetShaderCodeVariableDescString(const ShaderCodeVariableDesc& Desc, size_
return ss.str();
}

const char* GetInputElementFrequencyString(INPUT_ELEMENT_FREQUENCY Frequency)
{
switch (Frequency)
{
case INPUT_ELEMENT_FREQUENCY_UNDEFINED: return "undefined";
case INPUT_ELEMENT_FREQUENCY_PER_VERTEX: return "per-vertex";
case INPUT_ELEMENT_FREQUENCY_PER_INSTANCE: return "per-instance";

default:
UNEXPECTED("Unknown/unsupported input element frequency");
return "UNKNOWN";
}
}
String GetLayoutElementString(const LayoutElement& Element)
{
std::stringstream ss;
ss << "HLSLSemantic: " << Element.HLSLSemantic
<< ", InputIndex: " << Element.InputIndex
<< ", BufferSlot: " << Element.BufferSlot
<< ", NumComponents: " << Element.NumComponents
<< ", ValueType: " << GetValueTypeString(Element.ValueType)
<< ", IsNormalized: " << Element.IsNormalized
<< ", RelativeOffset: " << (Element.RelativeOffset == LAYOUT_ELEMENT_AUTO_OFFSET ? "auto" : std::to_string(Element.RelativeOffset))
<< ", Stride: " << (Element.Stride == LAYOUT_ELEMENT_AUTO_STRIDE ? "auto" : std::to_string(Element.Stride))
<< ", Frequency: " << GetInputElementFrequencyString(Element.Frequency)
<< ", InstanceDataStepRate: " << Element.InstanceDataStepRate;

return ss.str();
}

PIPELINE_RESOURCE_FLAGS GetValidPipelineResourceFlags(SHADER_RESOURCE_TYPE ResourceType)
{
static_assert(SHADER_RESOURCE_TYPE_LAST == 8, "Please update the switch below to handle the new shader resource type");
Expand Down Expand Up @@ -2362,4 +2392,83 @@ bool IsIdentityComponentMapping(const TextureComponentMapping& Mapping)
(Mapping.A == TEXTURE_COMPONENT_SWIZZLE_IDENTITY || Mapping.A == TEXTURE_COMPONENT_SWIZZLE_A));
}


std::vector<Uint32> ResolveInputLayoutAutoOffsetsAndStrides(LayoutElement* pLayoutElements, Uint32 NumElements)
{
Uint32 BufferSlotsUsed = 0;
for (Uint32 i = 0; i < NumElements; ++i)
{
BufferSlotsUsed = std::max(BufferSlotsUsed, pLayoutElements[i].BufferSlot + 1);
}

std::vector<Uint32> TightStrides(BufferSlotsUsed);
// Set all strides to an invalid value because an application may want to use 0 stride
std::vector<Uint32> Strides(BufferSlotsUsed, LAYOUT_ELEMENT_AUTO_STRIDE);

for (Uint32 i = 0; i < NumElements; ++i)
{
auto& LayoutElem = pLayoutElements[i];

if (LayoutElem.ValueType == VT_FLOAT32 || LayoutElem.ValueType == VT_FLOAT16)
LayoutElem.IsNormalized = false; // Floating point values cannot be normalized

auto BuffSlot = LayoutElem.BufferSlot;
auto& CurrAutoStride = TightStrides[BuffSlot];
// If offset is not explicitly specified, use current auto stride value
if (LayoutElem.RelativeOffset == LAYOUT_ELEMENT_AUTO_OFFSET)
{
LayoutElem.RelativeOffset = CurrAutoStride;
}

// If stride is explicitly specified, use it for the current buffer slot
if (LayoutElem.Stride != LAYOUT_ELEMENT_AUTO_STRIDE)
{
// Verify that the value is consistent with the previously specified stride, if any
if (Strides[BuffSlot] != LAYOUT_ELEMENT_AUTO_STRIDE && Strides[BuffSlot] != LayoutElem.Stride)
{
LOG_ERROR_MESSAGE("Inconsistent strides are specified for buffer slot ", BuffSlot,
". Input element at index ", LayoutElem.InputIndex, " explicitly specifies stride ",
LayoutElem.Stride, ", while current value is ", Strides[BuffSlot],
". Specify consistent strides or use LAYOUT_ELEMENT_AUTO_STRIDE to allow "
"the engine compute strides automatically.");
}
Strides[BuffSlot] = LayoutElem.Stride;
}

CurrAutoStride = std::max(CurrAutoStride, LayoutElem.RelativeOffset + LayoutElem.NumComponents * GetValueSize(LayoutElem.ValueType));
}

for (Uint32 i = 0; i < NumElements; ++i)
{
auto& LayoutElem = pLayoutElements[i];

auto BuffSlot = LayoutElem.BufferSlot;
// If no input elements explicitly defined stride for this buffer slot, use automatic stride
if (Strides[BuffSlot] == LAYOUT_ELEMENT_AUTO_STRIDE)
{
Strides[BuffSlot] = TightStrides[BuffSlot];
}
else
{
if (Strides[BuffSlot] < TightStrides[BuffSlot])
{
LOG_ERROR_MESSAGE("Stride ", Strides[BuffSlot], " explicitly specified for slot ", BuffSlot,
" is smaller than the minimum stride ", TightStrides[BuffSlot],
" required to accommodate all input elements.");
}
}
if (LayoutElem.Stride == LAYOUT_ELEMENT_AUTO_STRIDE)
LayoutElem.Stride = Strides[BuffSlot];
}

// Set strides for all unused slots to 0
for (auto& Stride : Strides)
{
if (Stride == LAYOUT_ELEMENT_AUTO_STRIDE)
Stride = 0;
}

return Strides;
}

} // namespace Diligent
115 changes: 25 additions & 90 deletions Graphics/GraphicsEngine/include/PipelineStateBase.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -542,17 +542,20 @@ class PipelineStateBase : public DeviceObjectBase<typename EngineImplTraits::Pip
ReserveResourceLayout(CreateInfo.PSODesc.ResourceLayout, MemPool);
ReserveResourceSignatures(CreateInfo, MemPool);

const auto& InputLayout = CreateInfo.GraphicsPipeline.InputLayout;
Uint32 BufferSlotsUsed = 0;
MemPool.AddSpace<LayoutElement>(InputLayout.NumElements);
for (Uint32 i = 0; i < InputLayout.NumElements; ++i)
const auto& InputLayout = CreateInfo.GraphicsPipeline.InputLayout;
if (InputLayout.NumElements > 0)
{
auto& LayoutElem = InputLayout.LayoutElements[i];
MemPool.AddSpaceForString(LayoutElem.HLSLSemantic);
BufferSlotsUsed = std::max(BufferSlotsUsed, LayoutElem.BufferSlot + 1);
}
Uint32 BufferSlotsUsed = 0;
MemPool.AddSpace<LayoutElement>(InputLayout.NumElements);
for (Uint32 i = 0; i < InputLayout.NumElements; ++i)
{
auto& LayoutElem = InputLayout.LayoutElements[i];
MemPool.AddSpaceForString(LayoutElem.HLSLSemantic);
BufferSlotsUsed = std::max(BufferSlotsUsed, LayoutElem.BufferSlot + 1);
}

MemPool.AddSpace<Uint32>(BufferSlotsUsed);
MemPool.AddSpace<Uint32>(BufferSlotsUsed);
}

static_assert(std::is_trivially_destructible<decltype(*InputLayout.LayoutElements)>::value, "Add destructor for this object to Destruct()");
}
Expand Down Expand Up @@ -819,93 +822,25 @@ class PipelineStateBase : public DeviceObjectBase<typename EngineImplTraits::Pip
}

const auto& InputLayout = GraphicsPipeline.InputLayout;
LayoutElement* pLayoutElements = MemPool.ConstructArray<LayoutElement>(InputLayout.NumElements);
for (size_t Elem = 0; Elem < InputLayout.NumElements; ++Elem)
{
const auto& SrcElem = InputLayout.LayoutElements[Elem];
pLayoutElements[Elem] = SrcElem;
VERIFY_EXPR(SrcElem.HLSLSemantic != nullptr);
pLayoutElements[Elem].HLSLSemantic = MemPool.CopyString(SrcElem.HLSLSemantic);
}
GraphicsPipeline.InputLayout.LayoutElements = pLayoutElements;


// Correct description and compute offsets and tight strides
std::array<Uint32, MAX_BUFFER_SLOTS> Strides, TightStrides = {};
// Set all strides to an invalid value because an application may want to use 0 stride
Strides.fill(LAYOUT_ELEMENT_AUTO_STRIDE);

for (Uint32 i = 0; i < InputLayout.NumElements; ++i)
{
auto& LayoutElem = pLayoutElements[i];

if (LayoutElem.ValueType == VT_FLOAT32 || LayoutElem.ValueType == VT_FLOAT16)
LayoutElem.IsNormalized = false; // Floating point values cannot be normalized

auto BuffSlot = LayoutElem.BufferSlot;
if (BuffSlot >= Strides.size())
{
UNEXPECTED("Buffer slot (", BuffSlot, ") exceeds the maximum allowed value (", Strides.size() - 1, ")");
continue;
}
BufferSlotsUsed = std::max(BufferSlotsUsed, static_cast<Uint8>(BuffSlot + 1));

auto& CurrAutoStride = TightStrides[BuffSlot];
// If offset is not explicitly specified, use current auto stride value
if (LayoutElem.RelativeOffset == LAYOUT_ELEMENT_AUTO_OFFSET)
{
LayoutElem.RelativeOffset = CurrAutoStride;
}

// If stride is explicitly specified, use it for the current buffer slot
if (LayoutElem.Stride != LAYOUT_ELEMENT_AUTO_STRIDE)
{
// Verify that the value is consistent with the previously specified stride, if any
if (Strides[BuffSlot] != LAYOUT_ELEMENT_AUTO_STRIDE && Strides[BuffSlot] != LayoutElem.Stride)
{
LOG_ERROR_MESSAGE("Inconsistent strides are specified for buffer slot ", BuffSlot,
". Input element at index ", LayoutElem.InputIndex, " explicitly specifies stride ",
LayoutElem.Stride, ", while current value is ", Strides[BuffSlot],
". Specify consistent strides or use LAYOUT_ELEMENT_AUTO_STRIDE to allow "
"the engine compute strides automatically.");
}
Strides[BuffSlot] = LayoutElem.Stride;
}

CurrAutoStride = std::max(CurrAutoStride, LayoutElem.RelativeOffset + LayoutElem.NumComponents * GetValueSize(LayoutElem.ValueType));
}

for (Uint32 i = 0; i < InputLayout.NumElements; ++i)
LayoutElement* pLayoutElements = nullptr;
if (InputLayout.NumElements > 0)
{
auto& LayoutElem = pLayoutElements[i];

auto BuffSlot = LayoutElem.BufferSlot;
// If no input elements explicitly specified stride for this buffer slot, use automatic stride
if (Strides[BuffSlot] == LAYOUT_ELEMENT_AUTO_STRIDE)
pLayoutElements = MemPool.ConstructArray<LayoutElement>(InputLayout.NumElements);
for (size_t Elem = 0; Elem < InputLayout.NumElements; ++Elem)
{
Strides[BuffSlot] = TightStrides[BuffSlot];
const auto& SrcElem = InputLayout.LayoutElements[Elem];
pLayoutElements[Elem] = SrcElem;
VERIFY_EXPR(SrcElem.HLSLSemantic != nullptr);
pLayoutElements[Elem].HLSLSemantic = MemPool.CopyString(SrcElem.HLSLSemantic);
}
else
{
if (Strides[BuffSlot] < TightStrides[BuffSlot])
{
LOG_ERROR_MESSAGE("Stride ", Strides[BuffSlot], " explicitly specified for slot ", BuffSlot,
" is smaller than the minimum stride ", TightStrides[BuffSlot],
" required to accommodate all input elements.");
}
}
if (LayoutElem.Stride == LAYOUT_ELEMENT_AUTO_STRIDE)
LayoutElem.Stride = Strides[BuffSlot];
}

pStrides = MemPool.ConstructArray<Uint32>(BufferSlotsUsed);
// Correct description and compute offsets and tight strides
const auto Strides = ResolveInputLayoutAutoOffsetsAndStrides(pLayoutElements, InputLayout.NumElements);
BufferSlotsUsed = static_cast<Uint8>(Strides.size());

// Set strides for all unused slots to 0
for (Uint32 i = 0; i < BufferSlotsUsed; ++i)
{
auto Stride = Strides[i];
pStrides[i] = Stride != LAYOUT_ELEMENT_AUTO_STRIDE ? Stride : 0;
pStrides = MemPool.CopyConstructArray<Uint32>(Strides.data(), BufferSlotsUsed);
}
GraphicsPipeline.InputLayout.LayoutElements = pLayoutElements;
}

void InitializePipelineDesc(const ComputePipelineStateCreateInfo& CreateInfo,
Expand Down
7 changes: 7 additions & 0 deletions Graphics/GraphicsEngine/interface/GraphicsTypesX.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
#include "RenderDevice.h"
#include "../../../Platforms/Basic/interface/DebugUtilities.hpp"
#include "../../../Common/interface/RefCntAutoPtr.hpp"
#include "../../GraphicsAccessories/interface/GraphicsAccessories.hpp"

namespace Diligent
{
Expand Down Expand Up @@ -488,6 +489,12 @@ struct InputLayoutDescX
return Elements[Index];
}

std::vector<Uint32> ResolveAutoOffsetsAndStrides()
{
VERIFY_EXPR(Desc.NumElements == Elements.size());
return ResolveInputLayoutAutoOffsetsAndStrides(Elements.data(), Desc.NumElements);
}

private:
void SyncDesc(bool CopyStrings = false)
{
Expand Down
11 changes: 11 additions & 0 deletions Graphics/GraphicsEngine/src/PipelineStateBase.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,17 @@ void ValidateGraphicsPipelineDesc(const PipelineStateDesc& PSODesc, const Graphi
LOG_PSO_ERROR_AND_THROW("Multiple viewports with variable shading rate require SHADING_RATE_CAP_FLAG_PER_PRIMITIVE_WITH_MULTIPLE_VIEWPORTS capability");
}
}

const auto& InputLayout = GraphicsPipeline.InputLayout;
if (InputLayout.NumElements > 0 && GraphicsPipeline.InputLayout.LayoutElements == nullptr)
LOG_PSO_ERROR_AND_THROW("InputLayout.LayoutElements must not be null when InputLayout.NumElements (", InputLayout.NumElements, ") is not zero.");

for (Uint32 i = 0; i < GraphicsPipeline.InputLayout.NumElements; ++i)
{
const auto& Elem = GraphicsPipeline.InputLayout.LayoutElements[i];
if (Elem.BufferSlot >= MAX_BUFFER_SLOTS)
LOG_PSO_ERROR_AND_THROW("InputLayout.LayoutElements[", i, "].BufferSlot (", Elem.BufferSlot, ") exceeds the limit (", MAX_BUFFER_SLOTS, ").");
}
}

void CorrectDepthStencilDesc(GraphicsPipelineDesc& GraphicsPipeline) noexcept
Expand Down
Loading

0 comments on commit 00464ed

Please sign in to comment.