From c552fa900c0f97a2beb0de3684569d9bb31f1e69 Mon Sep 17 00:00:00 2001 From: Autumn Ashton Date: Wed, 4 Dec 2024 16:29:13 +0000 Subject: [PATCH 1/2] [d3d9] Spec-constant out writes to clip distances when disabled Add a new spec constant with a mask of the enabled clip planes such that they can be optimized out to improve performance. For GPL shaders, override what we return here so it's always true and don't bother putting the mask in the UBO. Signed-off-by: Autumn Ashton --- src/d3d9/d3d9_device.cpp | 12 +++++++++--- src/d3d9/d3d9_fixed_function.cpp | 15 ++++++++++----- src/d3d9/d3d9_spec_constants.h | 13 ++++++++++--- src/d3d9/d3d9_state.h | 8 ++++++++ src/dxso/dxso_compiler.cpp | 9 ++++++++- 5 files changed, 45 insertions(+), 12 deletions(-) diff --git a/src/d3d9/d3d9_device.cpp b/src/d3d9/d3d9_device.cpp index 068e895197d..1e2eee49785 100644 --- a/src/d3d9/d3d9_device.cpp +++ b/src/d3d9/d3d9_device.cpp @@ -5783,7 +5783,7 @@ namespace dxvk { VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT, getSpecConstantBufferSlot(), - sizeof(D3D9SpecializationInfo)); + D3D9SpecializationInfo::UBOSize); } } @@ -5933,11 +5933,18 @@ namespace dxvk { auto mapPtr = m_vsClipPlanes.AllocSlice(); auto dst = reinterpret_cast(mapPtr); + uint32_t clipPlaneMask = 0u; for (uint32_t i = 0; i < caps::MaxClipPlanes; i++) { dst[i] = (m_state.renderStates[D3DRS_CLIPPLANEENABLE] & (1 << i)) ? m_state.clipPlanes[i] : D3D9ClipPlane(); + + if (dst[i] != D3D9ClipPlane()) + clipPlaneMask |= 1u << i; } + + if (m_specInfo.set(clipPlaneMask)) + m_flags.set(D3D9DeviceFlag::DirtySpecializationEntries); } @@ -8589,8 +8596,7 @@ namespace dxvk { if (m_usingGraphicsPipelines) { // TODO: Make uploading specialization information less naive. auto mapPtr = m_specBuffer.AllocSlice(); - auto dst = reinterpret_cast(mapPtr); - *dst = m_specInfo; + memcpy(mapPtr, m_specInfo.data.data(), D3D9SpecializationInfo::UBOSize); } m_flags.clr(D3D9DeviceFlag::DirtySpecializationEntries); diff --git a/src/d3d9/d3d9_fixed_function.cpp b/src/d3d9/d3d9_fixed_function.cpp index 46cdd4ec9db..4451d8b26a7 100644 --- a/src/d3d9/d3d9_fixed_function.cpp +++ b/src/d3d9/d3d9_fixed_function.cpp @@ -2366,6 +2366,7 @@ namespace dxvk { uint32_t floatType = m_module.defFloatType(32); uint32_t vec4Type = m_module.defVectorType(floatType, 4); + uint32_t boolType = m_module.defBoolType(); // Declare uniform buffer containing clip planes uint32_t clipPlaneArray = m_module.defArrayTypeUnique(vec4Type, clipPlaneCountId); @@ -2419,12 +2420,16 @@ namespace dxvk { clipPlaneBlock, blockMembers.size(), blockMembers.data())); uint32_t distId = m_module.opDot(floatType, worldPos, planeId); + + // Always consider clip planes enabled when doing GPL by forcing a mask of 0xffffffff for the quick value. + uint32_t clipPlaneEnabledBit = m_spec.get(m_module, m_specUbo, SpecClipPlaneMask, i, 1, m_module.constu32(0xffffffff)); + uint32_t clipPlaneEnabled = m_module.opINotEqual(boolType, clipPlaneEnabledBit, m_module.constu32(0)); + + uint32_t value = m_module.opSelect(floatType, clipPlaneEnabled, distId, m_module.constf32(0.0f)); - m_module.opStore( - m_module.opAccessChain( - m_module.defPointerType(floatType, spv::StorageClassOutput), - clipDistArray, 1, &blockMembers[1]), - distId); + m_module.opStore(m_module.opAccessChain( + m_module.defPointerType(floatType, spv::StorageClassOutput), + clipDistArray, 1, &blockMembers[1]), value); } } diff --git a/src/d3d9/d3d9_spec_constants.h b/src/d3d9/d3d9_spec_constants.h index 4ecf710c9d0..835717bd13b 100644 --- a/src/d3d9/d3d9_spec_constants.h +++ b/src/d3d9/d3d9_spec_constants.h @@ -30,6 +30,8 @@ namespace dxvk { SpecDrefClamp, // 1 bit for 16 PS samplers | Bits: 16 SpecFetch4, // 1 bit for 16 PS samplers | Bits: 16 + SpecClipPlaneMask, // 6 bits for 6 clip planes | Bits : 6 + SpecConstantCount, }; @@ -44,7 +46,10 @@ namespace dxvk { }; struct D3D9SpecializationInfo { - static constexpr uint32_t MaxSpecDwords = 5; + static constexpr uint32_t MaxSpecDwords = 6; + + static constexpr uint32_t MaxUBODwords = 5; + static constexpr size_t UBOSize = MaxUBODwords * sizeof(uint32_t); static constexpr std::array Layout{{ { 0, 0, 32 }, // SamplerType @@ -65,6 +70,8 @@ namespace dxvk { { 4, 0, 16 }, // DrefClamp { 4, 16, 16 }, // Fetch4 + + { 5, 0, 6 }, // ClipPlaneEnabled }}; template @@ -97,13 +104,13 @@ namespace dxvk { return get(module, specUbo, id, 0, 32); } - uint32_t get(SpirvModule &module, uint32_t specUbo, D3D9SpecConstantId id, uint32_t bitOffset, uint32_t bitCount) { + uint32_t get(SpirvModule &module, uint32_t specUbo, D3D9SpecConstantId id, uint32_t bitOffset, uint32_t bitCount, uint32_t uboOverride = 0) { const auto &layout = D3D9SpecializationInfo::Layout[id]; uint32_t uintType = module.defIntType(32, 0); uint32_t optimized = getOptimizedBool(module); - uint32_t quickValue = getSpecUBODword(module, specUbo, layout.dwordOffset); + uint32_t quickValue = uboOverride ? uboOverride : getSpecUBODword(module, specUbo, layout.dwordOffset); uint32_t optimizedValue = getSpecConstDword(module, layout.dwordOffset); uint32_t val = module.opSelect(uintType, optimized, optimizedValue, quickValue); diff --git a/src/d3d9/d3d9_state.h b/src/d3d9/d3d9_state.h index ddd3eaa5f85..79aa0d9d7e4 100644 --- a/src/d3d9/d3d9_state.h +++ b/src/d3d9/d3d9_state.h @@ -28,6 +28,14 @@ namespace dxvk { struct D3D9ClipPlane { float coeff[4] = {}; + + bool operator == (const D3D9ClipPlane& other) { + return std::memcmp(this, &other, sizeof(D3D9ClipPlane)) == 0; + } + + bool operator != (const D3D9ClipPlane& other) { + return !this->operator == (other); + } }; struct D3D9RenderStateInfo { diff --git a/src/dxso/dxso_compiler.cpp b/src/dxso/dxso_compiler.cpp index 10a7bceb962..ca171ace4ed 100644 --- a/src/dxso/dxso_compiler.cpp +++ b/src/dxso/dxso_compiler.cpp @@ -3482,6 +3482,7 @@ void DxsoCompiler::emitControlFlowGenericLoop( uint32_t floatType = m_module.defFloatType(32); uint32_t vec4Type = m_module.defVectorType(floatType, 4); + uint32_t boolType = m_module.defBoolType(); // Declare uniform buffer containing clip planes uint32_t clipPlaneArray = m_module.defArrayTypeUnique(vec4Type, clipPlaneCountId); @@ -3551,9 +3552,15 @@ void DxsoCompiler::emitControlFlowGenericLoop( DxsoRegisterValue dist = emitDot(position, plane); + // Always consider clip planes enabled when doing GPL by forcing a mask of 0xffffffff for the quick value. + uint32_t clipPlaneEnabledBit = m_spec.get(m_module, m_specUbo, SpecClipPlaneMask, i, 1, m_module.constu32(0xffffffff)); + uint32_t clipPlaneEnabled = m_module.opINotEqual(boolType, clipPlaneEnabledBit, m_module.constu32(0)); + + uint32_t value = m_module.opSelect(floatType, clipPlaneEnabled, dist.id, m_module.constf32(0.0f)); + m_module.opStore(m_module.opAccessChain( m_module.defPointerType(floatType, spv::StorageClassOutput), - clipDistArray, 1, &blockMembers[1]), dist.id); + clipDistArray, 1, &blockMembers[1]), value); } } From f54964a8b428f306d499050d7ae166386a8f02fe Mon Sep 17 00:00:00 2001 From: Autumn Ashton Date: Wed, 4 Dec 2024 16:41:48 +0000 Subject: [PATCH 2/2] [d3d9] Clip plane compaction Compact clip planes to the smallest amount that are enabled. Signed-off-by: Autumn Ashton --- src/d3d9/d3d9_device.cpp | 14 +++++++++----- src/d3d9/d3d9_fixed_function.cpp | 7 ++++--- src/d3d9/d3d9_spec_constants.h | 4 ++-- src/dxso/dxso_compiler.cpp | 7 ++++--- 4 files changed, 19 insertions(+), 13 deletions(-) diff --git a/src/d3d9/d3d9_device.cpp b/src/d3d9/d3d9_device.cpp index 1e2eee49785..d2e2557988b 100644 --- a/src/d3d9/d3d9_device.cpp +++ b/src/d3d9/d3d9_device.cpp @@ -5933,17 +5933,21 @@ namespace dxvk { auto mapPtr = m_vsClipPlanes.AllocSlice(); auto dst = reinterpret_cast(mapPtr); - uint32_t clipPlaneMask = 0u; + uint32_t clipPlaneCount = 0u; for (uint32_t i = 0; i < caps::MaxClipPlanes; i++) { - dst[i] = (m_state.renderStates[D3DRS_CLIPPLANEENABLE] & (1 << i)) + D3D9ClipPlane clipPlane = (m_state.renderStates[D3DRS_CLIPPLANEENABLE] & (1 << i)) ? m_state.clipPlanes[i] : D3D9ClipPlane(); - if (dst[i] != D3D9ClipPlane()) - clipPlaneMask |= 1u << i; + if (clipPlane != D3D9ClipPlane()) + dst[clipPlaneCount++] = clipPlane; } - if (m_specInfo.set(clipPlaneMask)) + // Write the rest to 0 for GPL. + for (uint32_t i = clipPlaneCount; i < caps::MaxClipPlanes; i++) + dst[i] = D3D9ClipPlane(); + + if (m_specInfo.set(clipPlaneCount)) m_flags.set(D3D9DeviceFlag::DirtySpecializationEntries); } diff --git a/src/d3d9/d3d9_fixed_function.cpp b/src/d3d9/d3d9_fixed_function.cpp index 4451d8b26a7..a47f89cad24 100644 --- a/src/d3d9/d3d9_fixed_function.cpp +++ b/src/d3d9/d3d9_fixed_function.cpp @@ -2407,6 +2407,9 @@ namespace dxvk { m_module.decorateBuiltIn(clipDistArray, spv::BuiltInClipDistance); + // Always consider clip planes enabled when doing GPL by forcing 6 for the quick value. + uint32_t clipPlaneCount = m_spec.get(m_module, m_specUbo, SpecClipPlaneCount, 0, 32, m_module.constu32(caps::MaxClipPlanes)); + // Compute clip distances for (uint32_t i = 0; i < caps::MaxClipPlanes; i++) { std::array blockMembers = {{ @@ -2421,9 +2424,7 @@ namespace dxvk { uint32_t distId = m_module.opDot(floatType, worldPos, planeId); - // Always consider clip planes enabled when doing GPL by forcing a mask of 0xffffffff for the quick value. - uint32_t clipPlaneEnabledBit = m_spec.get(m_module, m_specUbo, SpecClipPlaneMask, i, 1, m_module.constu32(0xffffffff)); - uint32_t clipPlaneEnabled = m_module.opINotEqual(boolType, clipPlaneEnabledBit, m_module.constu32(0)); + uint32_t clipPlaneEnabled = m_module.opULessThan(boolType, m_module.constu32(i), clipPlaneCount); uint32_t value = m_module.opSelect(floatType, clipPlaneEnabled, distId, m_module.constf32(0.0f)); diff --git a/src/d3d9/d3d9_spec_constants.h b/src/d3d9/d3d9_spec_constants.h index 835717bd13b..8732d7c4343 100644 --- a/src/d3d9/d3d9_spec_constants.h +++ b/src/d3d9/d3d9_spec_constants.h @@ -30,7 +30,7 @@ namespace dxvk { SpecDrefClamp, // 1 bit for 16 PS samplers | Bits: 16 SpecFetch4, // 1 bit for 16 PS samplers | Bits: 16 - SpecClipPlaneMask, // 6 bits for 6 clip planes | Bits : 6 + SpecClipPlaneCount, // 3 bits for 6 clip planes | Bits : 3 SpecConstantCount, }; @@ -71,7 +71,7 @@ namespace dxvk { { 4, 0, 16 }, // DrefClamp { 4, 16, 16 }, // Fetch4 - { 5, 0, 6 }, // ClipPlaneEnabled + { 5, 0, 3 }, // ClipPlaneCount }}; template diff --git a/src/dxso/dxso_compiler.cpp b/src/dxso/dxso_compiler.cpp index ca171ace4ed..29efcb151e4 100644 --- a/src/dxso/dxso_compiler.cpp +++ b/src/dxso/dxso_compiler.cpp @@ -3537,6 +3537,9 @@ void DxsoCompiler::emitControlFlowGenericLoop( DxsoRegisterValue position; position.type = { DxsoScalarType::Float32, 4 }; position.id = m_module.opLoad(vec4Type, positionPtr); + + // Always consider clip planes enabled when doing GPL by forcing 6 for the quick value. + uint32_t clipPlaneCount = m_spec.get(m_module, m_specUbo, SpecClipPlaneCount, 0, 32, m_module.constu32(caps::MaxClipPlanes)); for (uint32_t i = 0; i < caps::MaxClipPlanes; i++) { std::array blockMembers = {{ @@ -3552,9 +3555,7 @@ void DxsoCompiler::emitControlFlowGenericLoop( DxsoRegisterValue dist = emitDot(position, plane); - // Always consider clip planes enabled when doing GPL by forcing a mask of 0xffffffff for the quick value. - uint32_t clipPlaneEnabledBit = m_spec.get(m_module, m_specUbo, SpecClipPlaneMask, i, 1, m_module.constu32(0xffffffff)); - uint32_t clipPlaneEnabled = m_module.opINotEqual(boolType, clipPlaneEnabledBit, m_module.constu32(0)); + uint32_t clipPlaneEnabled = m_module.opULessThan(boolType, m_module.constu32(i), clipPlaneCount); uint32_t value = m_module.opSelect(floatType, clipPlaneEnabled, dist.id, m_module.constf32(0.0f));