Skip to content

Commit

Permalink
[d3d9] Spec-constant out writes to clip distances when disabled
Browse files Browse the repository at this point in the history
Add a new spec constant with a mask of the enabled clip planes such that they can be optimized out to improve performance.

For GPL shaders, override what we return here so it's always true and don't bother putting the mask in the UBO.

Signed-off-by: Autumn Ashton <[email protected]>
  • Loading branch information
misyltoad committed Dec 5, 2024
1 parent 027fe59 commit 8c4c814
Show file tree
Hide file tree
Showing 5 changed files with 45 additions and 12 deletions.
12 changes: 9 additions & 3 deletions src/d3d9/d3d9_device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5783,7 +5783,7 @@ namespace dxvk {
VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT,
getSpecConstantBufferSlot(),
sizeof(D3D9SpecializationInfo));
D3D9SpecializationInfo::UBOSize);
}
}

Expand Down Expand Up @@ -5933,11 +5933,18 @@ namespace dxvk {
auto mapPtr = m_vsClipPlanes.AllocSlice();
auto dst = reinterpret_cast<D3D9ClipPlane*>(mapPtr);

uint32_t clipPlaneMask = 0u;
for (uint32_t i = 0; i < caps::MaxClipPlanes; i++) {
dst[i] = (m_state.renderStates[D3DRS_CLIPPLANEENABLE] & (1 << i))
? m_state.clipPlanes[i]
: D3D9ClipPlane();

if (dst[i] != D3D9ClipPlane())
clipPlaneMask |= 1u << i;
}

if (m_specInfo.set<SpecClipPlaneMask>(clipPlaneMask))
m_flags.set(D3D9DeviceFlag::DirtySpecializationEntries);
}


Expand Down Expand Up @@ -8589,8 +8596,7 @@ namespace dxvk {
if (m_usingGraphicsPipelines) {
// TODO: Make uploading specialization information less naive.
auto mapPtr = m_specBuffer.AllocSlice();
auto dst = reinterpret_cast<D3D9SpecializationInfo*>(mapPtr);
*dst = m_specInfo;
memcpy(mapPtr, m_specInfo.data.data(), D3D9SpecializationInfo::UBOSize);
}

m_flags.clr(D3D9DeviceFlag::DirtySpecializationEntries);
Expand Down
15 changes: 10 additions & 5 deletions src/d3d9/d3d9_fixed_function.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2366,6 +2366,7 @@ namespace dxvk {

uint32_t floatType = m_module.defFloatType(32);
uint32_t vec4Type = m_module.defVectorType(floatType, 4);
uint32_t boolType = m_module.defBoolType();

// Declare uniform buffer containing clip planes
uint32_t clipPlaneArray = m_module.defArrayTypeUnique(vec4Type, clipPlaneCountId);
Expand Down Expand Up @@ -2419,12 +2420,16 @@ namespace dxvk {
clipPlaneBlock, blockMembers.size(), blockMembers.data()));

uint32_t distId = m_module.opDot(floatType, worldPos, planeId);

// Always consider clip planes enabled when doing GPL by forcing a mask of 0xffffffff for the quick value.
uint32_t clipPlaneEnabledBit = m_spec.get(m_module, m_specUbo, SpecClipPlaneMask, i, 1, m_module.constu32(0xffffffff));
uint32_t clipPlaneEnabled = m_module.opINotEqual(boolType, clipPlaneEnabledBit, m_module.constu32(0));

uint32_t value = m_module.opSelect(floatType, clipPlaneEnabled, distId, m_module.constf32(0.0f));

m_module.opStore(
m_module.opAccessChain(
m_module.defPointerType(floatType, spv::StorageClassOutput),
clipDistArray, 1, &blockMembers[1]),
distId);
m_module.opStore(m_module.opAccessChain(
m_module.defPointerType(floatType, spv::StorageClassOutput),
clipDistArray, 1, &blockMembers[1]), value);
}
}

Expand Down
13 changes: 10 additions & 3 deletions src/d3d9/d3d9_spec_constants.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ namespace dxvk {
SpecDrefClamp, // 1 bit for 16 PS samplers | Bits: 16
SpecFetch4, // 1 bit for 16 PS samplers | Bits: 16

SpecClipPlaneMask, // 6 bits for 6 clip planes | Bits : 6

SpecConstantCount,
};

Expand All @@ -44,7 +46,10 @@ namespace dxvk {
};

struct D3D9SpecializationInfo {
static constexpr uint32_t MaxSpecDwords = 5;
static constexpr uint32_t MaxSpecDwords = 6;

static constexpr uint32_t MaxUBODwords = 5;
static constexpr size_t UBOSize = MaxUBODwords * sizeof(uint32_t);

static constexpr std::array<BitfieldPosition, SpecConstantCount> Layout{{
{ 0, 0, 32 }, // SamplerType
Expand All @@ -65,6 +70,8 @@ namespace dxvk {

{ 4, 0, 16 }, // DrefClamp
{ 4, 16, 16 }, // Fetch4

{ 5, 0, 6 }, // ClipPlaneEnabled
}};

template <D3D9SpecConstantId Id, typename T>
Expand Down Expand Up @@ -97,13 +104,13 @@ namespace dxvk {
return get(module, specUbo, id, 0, 32);
}

uint32_t get(SpirvModule &module, uint32_t specUbo, D3D9SpecConstantId id, uint32_t bitOffset, uint32_t bitCount) {
uint32_t get(SpirvModule &module, uint32_t specUbo, D3D9SpecConstantId id, uint32_t bitOffset, uint32_t bitCount, uint32_t uboOverride = 0) {
const auto &layout = D3D9SpecializationInfo::Layout[id];

uint32_t uintType = module.defIntType(32, 0);
uint32_t optimized = getOptimizedBool(module);

uint32_t quickValue = getSpecUBODword(module, specUbo, layout.dwordOffset);
uint32_t quickValue = uboOverride ? uboOverride : getSpecUBODword(module, specUbo, layout.dwordOffset);
uint32_t optimizedValue = getSpecConstDword(module, layout.dwordOffset);

uint32_t val = module.opSelect(uintType, optimized, optimizedValue, quickValue);
Expand Down
8 changes: 8 additions & 0 deletions src/d3d9/d3d9_state.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,14 @@ namespace dxvk {

struct D3D9ClipPlane {
float coeff[4] = {};

bool operator == (const D3D9ClipPlane& other) {
return std::memcmp(this, &other, sizeof(D3D9ClipPlane)) == 0;
}

bool operator != (const D3D9ClipPlane& other) {
return !this->operator == (other);
}
};

struct D3D9RenderStateInfo {
Expand Down
9 changes: 8 additions & 1 deletion src/dxso/dxso_compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3482,6 +3482,7 @@ void DxsoCompiler::emitControlFlowGenericLoop(

uint32_t floatType = m_module.defFloatType(32);
uint32_t vec4Type = m_module.defVectorType(floatType, 4);
uint32_t boolType = m_module.defBoolType();

// Declare uniform buffer containing clip planes
uint32_t clipPlaneArray = m_module.defArrayTypeUnique(vec4Type, clipPlaneCountId);
Expand Down Expand Up @@ -3551,9 +3552,15 @@ void DxsoCompiler::emitControlFlowGenericLoop(

DxsoRegisterValue dist = emitDot(position, plane);

// Always consider clip planes enabled when doing GPL by forcing a mask of 0xffffffff for the quick value.
uint32_t clipPlaneEnabledBit = m_spec.get(m_module, m_specUbo, SpecClipPlaneMask, i, 1, m_module.constu32(0xffffffff));
uint32_t clipPlaneEnabled = m_module.opINotEqual(boolType, clipPlaneEnabledBit, m_module.constu32(0));

uint32_t value = m_module.opSelect(floatType, clipPlaneEnabled, dist.id, m_module.constf32(0.0f));

m_module.opStore(m_module.opAccessChain(
m_module.defPointerType(floatType, spv::StorageClassOutput),
clipDistArray, 1, &blockMembers[1]), dist.id);
clipDistArray, 1, &blockMembers[1]), value);
}
}

Expand Down

0 comments on commit 8c4c814

Please sign in to comment.