diff --git a/src/d3d11/d3d11_device.cpp b/src/d3d11/d3d11_device.cpp index 9398e4842088..f0df5610739f 100644 --- a/src/d3d11/d3d11_device.cpp +++ b/src/d3d11/d3d11_device.cpp @@ -15,6 +15,7 @@ #include "d3d11_device.h" #include "d3d11_fence.h" #include "d3d11_input_layout.h" +#include "d3d11_interfaces.h" #include "d3d11_interop.h" #include "d3d11_query.h" #include "d3d11_resource.h" @@ -2469,12 +2470,14 @@ namespace dxvk { return deviceFeatures.nvxBinaryImport && deviceFeatures.vk12.bufferDeviceAddress; + case D3D11_VK_NV_LOW_LATENCY_2: + return deviceFeatures.nvLowLatency2; + default: return false; } } - - + bool STDMETHODCALLTYPE D3D11DeviceExt::GetCudaTextureObjectNVX(uint32_t srvDriverHandle, uint32_t samplerDriverHandle, uint32_t* pCudaTextureHandle) { ID3D11ShaderResourceView* srv = HandleToSrvNVX(srvDriverHandle); @@ -2783,8 +2786,133 @@ namespace dxvk { + + D3D11LowLatencyDevice::D3D11LowLatencyDevice( + D3D11DXGIDevice* pContainer, + D3D11Device* pDevice) + : m_container(pContainer), m_device(pDevice) { + + } + ULONG STDMETHODCALLTYPE D3D11LowLatencyDevice::AddRef() { + return m_container->AddRef(); + } + + + ULONG STDMETHODCALLTYPE D3D11LowLatencyDevice::Release() { + return m_container->Release(); + } + + + HRESULT STDMETHODCALLTYPE D3D11LowLatencyDevice::QueryInterface( + REFIID riid, + void** ppvObject) { + return m_container->QueryInterface(riid, ppvObject); + } + + BOOL STDMETHODCALLTYPE D3D11LowLatencyDevice::SupportsLowLatency() { + return m_device->GetDXVKDevice()->features().nvLowLatency2; + } + + HRESULT STDMETHODCALLTYPE D3D11LowLatencyDevice::LatencySleep() { + if (!m_device->GetDXVKDevice()->features().nvLowLatency2) { + return E_NOINTERFACE; + } + + D3D11SwapChain* pSwapChain = m_device->GetLowLatencySwapChain(); + if (pSwapChain && pSwapChain->LowLatencyEnabled()) { + VkResult res = pSwapChain->LatencySleep(); + if (res != VK_SUCCESS) { + return S_FALSE; + } + } + + return S_OK; + } + + HRESULT STDMETHODCALLTYPE D3D11LowLatencyDevice::SetLatencySleepMode(BOOL lowLatencyMode, BOOL lowLatencyBoost, uint32_t minimumIntervalUs) { + if (!m_device->GetDXVKDevice()->features().nvLowLatency2) { + return E_NOINTERFACE; + } + + D3D11SwapChain* pSwapChain = m_device->GetLowLatencySwapChain(); + if (pSwapChain) { + VkResult res = pSwapChain->SetLatencySleepMode(lowLatencyMode, lowLatencyBoost, minimumIntervalUs); + if (res != VK_SUCCESS) { + return S_FALSE; + } + } + + return S_OK; + } + + HRESULT STDMETHODCALLTYPE D3D11LowLatencyDevice::SetLatencyMarker(uint64_t frameID, uint32_t markerType) { + if (!m_device->GetDXVKDevice()->features().nvLowLatency2) { + return E_NOINTERFACE; + } + + D3D11SwapChain* pSwapChain = m_device->GetLowLatencySwapChain(); + VkLatencyMarkerNV marker = static_cast(markerType); + uint64_t internalFrameId = frameID + DXGI_MAX_SWAP_CHAIN_BUFFERS; + + m_device->GetDXVKDevice()->setLatencyMarker(marker, internalFrameId); + + if (pSwapChain && pSwapChain->LowLatencyEnabled()) { + pSwapChain->SetLatencyMarker(marker, internalFrameId); + } + + return S_OK; + } + + HRESULT STDMETHODCALLTYPE D3D11LowLatencyDevice::GetLatencyInfo(D3D11_LATENCY_RESULTS* latency_results) + { + if (!m_device->GetDXVKDevice()->features().nvLowLatency2) { + return E_NOINTERFACE; + } + + constexpr uint32_t frameReportSize = 64; + D3D11SwapChain* pSwapChain = m_device->GetLowLatencySwapChain(); + + if (pSwapChain && pSwapChain->LowLatencyEnabled()) { + std::vector frameReports; + pSwapChain->GetLatencyTimings(frameReports); + + if (frameReports.size() >= frameReportSize) { + for (uint32_t i = 0; i < frameReportSize; i++) { + VkLatencyTimingsFrameReportNV& frameReport = frameReports[i]; + latency_results->frame_reports[i].frameID = frameReport.presentID - DXGI_MAX_SWAP_CHAIN_BUFFERS; + latency_results->frame_reports[i].inputSampleTime = frameReport.inputSampleTimeUs; + latency_results->frame_reports[i].simStartTime = frameReport.simStartTimeUs; + latency_results->frame_reports[i].simEndTime = frameReport.simEndTimeUs; + latency_results->frame_reports[i].renderSubmitStartTime = frameReport.renderSubmitStartTimeUs; + latency_results->frame_reports[i].renderSubmitEndTime = frameReport.renderSubmitEndTimeUs; + latency_results->frame_reports[i].presentStartTime = frameReport.presentStartTimeUs; + latency_results->frame_reports[i].presentEndTime = frameReport.presentEndTimeUs; + latency_results->frame_reports[i].driverStartTime = frameReport.driverStartTimeUs; + latency_results->frame_reports[i].driverEndTime = frameReport.driverEndTimeUs; + latency_results->frame_reports[i].osRenderQueueStartTime = frameReport.osRenderQueueStartTimeUs; + latency_results->frame_reports[i].osRenderQueueEndTime = frameReport.osRenderQueueEndTimeUs; + latency_results->frame_reports[i].gpuRenderStartTime = frameReport.gpuRenderStartTimeUs; + latency_results->frame_reports[i].gpuRenderEndTime = frameReport.gpuRenderEndTimeUs; + latency_results->frame_reports[i].gpuActiveRenderTimeUs = + frameReport.gpuRenderEndTimeUs - frameReport.gpuRenderStartTimeUs; + latency_results->frame_reports[i].gpuFrameTimeUs = 0; + + if (i) { + latency_results->frame_reports[i].gpuFrameTimeUs = + frameReports[i].gpuRenderEndTimeUs - frameReports[i - 1].gpuRenderEndTimeUs; + } + } + } + } + + return S_OK; + } + + + + D3D11VideoDevice::D3D11VideoDevice( D3D11DXGIDevice* pContainer, D3D11Device* pDevice) @@ -3021,7 +3149,11 @@ namespace dxvk { Com presenter = new D3D11SwapChain( m_container, m_device, pSurfaceFactory, pDesc); - + + if (m_device->GetDXVKDevice()->features().nvLowLatency2) { + m_device->AddSwapchain(presenter.ref()); + } + *ppSwapChain = presenter.ref(); return S_OK; } catch (const DxvkError& e) { @@ -3078,17 +3210,18 @@ namespace dxvk { Rc pDxvkDevice, D3D_FEATURE_LEVEL FeatureLevel, UINT FeatureFlags) - : m_dxgiAdapter (pAdapter), - m_dxvkInstance (pDxvkInstance), - m_dxvkAdapter (pDxvkAdapter), - m_dxvkDevice (pDxvkDevice), - m_d3d11Device (this, FeatureLevel, FeatureFlags), - m_d3d11DeviceExt(this, &m_d3d11Device), - m_d3d11Interop (this, &m_d3d11Device), - m_d3d11Video (this, &m_d3d11Device), - m_d3d11on12 (this, &m_d3d11Device, pD3D12Device, pD3D12Queue), - m_metaDevice (this), - m_dxvkFactory (this, &m_d3d11Device) { + : m_dxgiAdapter (pAdapter), + m_dxvkInstance (pDxvkInstance), + m_dxvkAdapter (pDxvkAdapter), + m_dxvkDevice (pDxvkDevice), + m_d3d11Device (this, FeatureLevel, FeatureFlags), + m_d3d11DeviceExt (this, &m_d3d11Device), + m_d3d11Interop (this, &m_d3d11Device), + m_d3dLowLatencyDevice (this, &m_d3d11Device), + m_d3d11Video (this, &m_d3d11Device), + m_d3d11on12 (this, &m_d3d11Device, pD3D12Device, pD3D12Queue), + m_metaDevice (this), + m_dxvkFactory (this, &m_d3d11Device) { } @@ -3142,7 +3275,12 @@ namespace dxvk { *ppvObject = ref(&m_d3d11DeviceExt); return S_OK; } - + + if (riid == __uuidof(ID3DLowLatencyDevice)) { + *ppvObject = ref(&m_d3dLowLatencyDevice); + return S_OK; + } + if (riid == __uuidof(IDXGIDXVKDevice)) { *ppvObject = ref(&m_metaDevice); return S_OK; diff --git a/src/d3d11/d3d11_device.h b/src/d3d11/d3d11_device.h index 7a44b5ad99cc..f36c32d1b524 100644 --- a/src/d3d11/d3d11_device.h +++ b/src/d3d11/d3d11_device.h @@ -24,6 +24,7 @@ #include "d3d11_options.h" #include "d3d11_shader.h" #include "d3d11_state.h" +#include "d3d11_swapchain.h" #include "d3d11_util.h" namespace dxvk { @@ -428,6 +429,22 @@ namespace dxvk { bool Is11on12Device() const; + void AddSwapchain(D3D11SwapChain* swapchain) { + m_swapchains.push_back(swapchain); + } + + void RemoveSwapchain(D3D11SwapChain* swapchain) { + std::remove(m_swapchains.begin(), m_swapchains.end(), swapchain); + } + + UINT GetSwapchainCount() { + return m_swapchains.size(); + } + + D3D11SwapChain* GetLowLatencySwapChain() { + return (m_swapchains.size()) == 1 ? m_swapchains[0] : nullptr; + } + static D3D_FEATURE_LEVEL GetMaxFeatureLevel( const Rc& Instance, const Rc& Adapter); @@ -464,6 +481,8 @@ namespace dxvk { D3D_FEATURE_LEVEL m_maxFeatureLevel; D3D11DeviceFeatures m_deviceFeatures; + std::vector m_swapchains; + HRESULT CreateShaderModule( D3D11CommonShader* pShaderModule, DxvkShaderKey ShaderKey, @@ -545,28 +564,28 @@ namespace dxvk { uint64_t* gpuVAStart, uint64_t* gpuVASize); - bool STDMETHODCALLTYPE CreateUnorderedAccessViewAndGetDriverHandleNVX( + bool STDMETHODCALLTYPE CreateUnorderedAccessViewAndGetDriverHandleNVX( ID3D11Resource* pResource, const D3D11_UNORDERED_ACCESS_VIEW_DESC* pDesc, ID3D11UnorderedAccessView** ppUAV, uint32_t* pDriverHandle); - bool STDMETHODCALLTYPE CreateShaderResourceViewAndGetDriverHandleNVX( + bool STDMETHODCALLTYPE CreateShaderResourceViewAndGetDriverHandleNVX( ID3D11Resource* pResource, const D3D11_SHADER_RESOURCE_VIEW_DESC* pDesc, ID3D11ShaderResourceView** ppSRV, uint32_t* pDriverHandle); - bool STDMETHODCALLTYPE CreateSamplerStateAndGetDriverHandleNVX( + bool STDMETHODCALLTYPE CreateSamplerStateAndGetDriverHandleNVX( const D3D11_SAMPLER_DESC* pSamplerDesc, ID3D11SamplerState** ppSamplerState, uint32_t* pDriverHandle); - + private: D3D11DXGIDevice* m_container; D3D11Device* m_device; - + void AddSamplerAndHandleNVX( ID3D11SamplerState* pSampler, uint32_t Handle); @@ -586,6 +605,46 @@ namespace dxvk { std::unordered_map m_srvHandleToPtr; }; + /** + * \brief Extended D3D11 device + */ + class D3D11LowLatencyDevice : public ID3DLowLatencyDevice { + + public: + + D3D11LowLatencyDevice( + D3D11DXGIDevice* pContainer, + D3D11Device* pDevice); + + ULONG STDMETHODCALLTYPE AddRef(); + + ULONG STDMETHODCALLTYPE Release(); + + HRESULT STDMETHODCALLTYPE QueryInterface( + REFIID riid, + void** ppvObject); + + BOOL STDMETHODCALLTYPE SupportsLowLatency(); + + HRESULT STDMETHODCALLTYPE LatencySleep(); + + HRESULT STDMETHODCALLTYPE SetLatencySleepMode( + BOOL lowLatencyMode, + BOOL lowLatencyBoost, + uint32_t minimumIntervalUs); + + HRESULT STDMETHODCALLTYPE SetLatencyMarker( + uint64_t frameID, + uint32_t markerType); + + HRESULT STDMETHODCALLTYPE GetLatencyInfo( + D3D11_LATENCY_RESULTS* latency_results); + + private: + + D3D11DXGIDevice* m_container; + D3D11Device* m_device; + }; /** * \brief D3D11 video device @@ -856,12 +915,13 @@ namespace dxvk { Rc m_dxvkAdapter; Rc m_dxvkDevice; - D3D11Device m_d3d11Device; - D3D11DeviceExt m_d3d11DeviceExt; - D3D11VkInterop m_d3d11Interop; - D3D11VideoDevice m_d3d11Video; - D3D11on12Device m_d3d11on12; - DXGIDXVKDevice m_metaDevice; + D3D11Device m_d3d11Device; + D3D11DeviceExt m_d3d11DeviceExt; + D3D11VkInterop m_d3d11Interop; + D3D11LowLatencyDevice m_d3dLowLatencyDevice; + D3D11VideoDevice m_d3d11Video; + D3D11on12Device m_d3d11on12; + DXGIDXVKDevice m_metaDevice; DXGIVkSwapChainFactory m_dxvkFactory; diff --git a/src/d3d11/d3d11_interfaces.h b/src/d3d11/d3d11_interfaces.h index 587cde1394e9..f9acbf75c102 100644 --- a/src/d3d11/d3d11_interfaces.h +++ b/src/d3d11/d3d11_interfaces.h @@ -16,6 +16,7 @@ enum D3D11_VK_EXTENSION : uint32_t { D3D11_VK_EXT_BARRIER_CONTROL = 3, D3D11_VK_NVX_BINARY_IMPORT = 4, D3D11_VK_NVX_IMAGE_VIEW_HANDLE = 5, + D3D11_VK_NV_LOW_LATENCY_2 = 6 }; @@ -27,6 +28,33 @@ enum D3D11_VK_BARRIER_CONTROL : uint32_t { D3D11_VK_BARRIER_CONTROL_IGNORE_GRAPHICS_UAV = 1 << 1, }; +/** + * \brief Frame Report Info + */ +typedef struct D3D11_LATENCY_RESULTS +{ + UINT32 version; + struct D3D11_FRAME_REPORT { + UINT64 frameID; + UINT64 inputSampleTime; + UINT64 simStartTime; + UINT64 simEndTime; + UINT64 renderSubmitStartTime; + UINT64 renderSubmitEndTime; + UINT64 presentStartTime; + UINT64 presentEndTime; + UINT64 driverStartTime; + UINT64 driverEndTime; + UINT64 osRenderQueueStartTime; + UINT64 osRenderQueueEndTime; + UINT64 gpuRenderStartTime; + UINT64 gpuRenderEndTime; + UINT32 gpuActiveRenderTimeUs; + UINT32 gpuFrameTimeUs; + UINT8 rsvd[120]; + } frame_reports[64]; + UINT8 rsvd[32]; +} D3D11_LATENCY_RESULTS; /** * \brief Extended shader interface @@ -114,6 +142,33 @@ ID3D11VkExtDevice1 : public ID3D11VkExtDevice { uint32_t* pCudaTextureHandle) = 0; }; +/** + * \brief Extended extended D3D11 device + * + * Introduces methods to get virtual addresses and driver + * handles for resources, and create and destroy objects + * for D3D11-Cuda interop. + */ +MIDL_INTERFACE("f3112584-41f9-348d-a59b-00b7e1d285d6") +ID3DLowLatencyDevice : public IUnknown { + static const GUID guid; + + virtual BOOL STDMETHODCALLTYPE SupportsLowLatency() = 0; + + virtual HRESULT STDMETHODCALLTYPE LatencySleep() = 0; + + virtual HRESULT STDMETHODCALLTYPE SetLatencySleepMode( + BOOL lowLatencyMode, + BOOL lowLatencyBoost, + uint32_t minimumIntervalUs) = 0; + + virtual HRESULT STDMETHODCALLTYPE SetLatencyMarker( + uint64_t frameID, + uint32_t markerType) = 0; + + virtual HRESULT STDMETHODCALLTYPE GetLatencyInfo( + D3D11_LATENCY_RESULTS* latency_results) = 0; +}; /** * \brief Extended D3D11 context @@ -182,17 +237,18 @@ ID3D11VkExtContext1 : public ID3D11VkExtContext { uint32_t numWriteResources) = 0; }; - #ifdef _MSC_VER struct __declspec(uuid("bb8a4fb9-3935-4762-b44b-35189a26414a")) ID3D11VkExtShader; struct __declspec(uuid("8a6e3c42-f74c-45b7-8265-a231b677ca17")) ID3D11VkExtDevice; struct __declspec(uuid("cfcf64ef-9586-46d0-bca4-97cf2ca61b06")) ID3D11VkExtDevice1; struct __declspec(uuid("fd0bca13-5cb6-4c3a-987e-4750de2ca791")) ID3D11VkExtContext; struct __declspec(uuid("874b09b2-ae0b-41d8-8476-5f3b7a0e879d")) ID3D11VkExtContext1; +struct __declspec(uuid("f3112584-41f9-348d-a59b-00b7e1d285d6")) ID3DLowLatencyDevice; #else __CRT_UUID_DECL(ID3D11VkExtShader, 0xbb8a4fb9,0x3935,0x4762,0xb4,0x4b,0x35,0x18,0x9a,0x26,0x41,0x4a); __CRT_UUID_DECL(ID3D11VkExtDevice, 0x8a6e3c42,0xf74c,0x45b7,0x82,0x65,0xa2,0x31,0xb6,0x77,0xca,0x17); __CRT_UUID_DECL(ID3D11VkExtDevice1, 0xcfcf64ef,0x9586,0x46d0,0xbc,0xa4,0x97,0xcf,0x2c,0xa6,0x1b,0x06); __CRT_UUID_DECL(ID3D11VkExtContext, 0xfd0bca13,0x5cb6,0x4c3a,0x98,0x7e,0x47,0x50,0xde,0x2c,0xa7,0x91); __CRT_UUID_DECL(ID3D11VkExtContext1, 0x874b09b2,0xae0b,0x41d8,0x84,0x76,0x5f,0x3b,0x7a,0x0e,0x87,0x9d); +__CRT_UUID_DECL(ID3DLowLatencyDevice, 0xf3112584,0x41f9,0x348d,0xa5,0x9b,0x00,0xb7,0xe1,0xd2,0x85,0xd6); #endif diff --git a/src/d3d11/d3d11_swapchain.cpp b/src/d3d11/d3d11_swapchain.cpp index 0e823f410ef3..34e79f28d429 100644 --- a/src/d3d11/d3d11_swapchain.cpp +++ b/src/d3d11/d3d11_swapchain.cpp @@ -351,6 +351,34 @@ namespace dxvk { *pFrameStatistics = m_frameStatistics; } + VkResult D3D11SwapChain::SetLatencySleepMode( + bool lowLatencyMode, + bool lowLatencyBoost, + uint32_t minimumIntervalUs) { + if (lowLatencyMode && !LowLatencyEnabled()) { + RecreateSwapChain(); + } + return m_presenter->setLatencySleepMode(lowLatencyMode, lowLatencyBoost, minimumIntervalUs); + } + + VkResult D3D11SwapChain::LatencySleep() { + return m_presenter->latencySleep(); + } + + void D3D11SwapChain::SetLatencyMarker( + VkLatencyMarkerNV marker, + uint64_t presentId) { + m_presenter->setLatencyMarker(marker, presentId); + } + + VkResult D3D11SwapChain::GetLatencyTimings( + std::vector& frameReports) { + return m_presenter->getLatencyTimings(frameReports); + } + + bool D3D11SwapChain::LowLatencyEnabled() { + return m_presenter->lowLatencyEnabled(); + } HRESULT D3D11SwapChain::PresentImage(UINT SyncInterval) { // Flush pending rendering commands before @@ -410,9 +438,11 @@ namespace dxvk { uint32_t Repeat) { auto lock = pContext->LockContext(); - // Bump frame ID as necessary - if (!Repeat) - m_frameId += 1; + if (!Repeat) { + m_frameId = m_presenter->lowLatencyEnabled() ? + m_device->getLatencyMarkers().present : + m_frameId + 1; + } // Present from CS thread so that we don't // have to synchronize with it first. diff --git a/src/d3d11/d3d11_swapchain.h b/src/d3d11/d3d11_swapchain.h index 00073d7690e3..a3ecf6343815 100644 --- a/src/d3d11/d3d11_swapchain.h +++ b/src/d3d11/d3d11_swapchain.h @@ -86,6 +86,22 @@ namespace dxvk { void STDMETHODCALLTYPE GetFrameStatistics( DXGI_VK_FRAME_STATISTICS* pFrameStatistics); + VkResult SetLatencySleepMode( + bool lowLatencyMode, + bool lowLatencyBoost, + uint32_t minimumIntervalUs); + + VkResult LatencySleep(); + + void SetLatencyMarker( + VkLatencyMarkerNV marker, + uint64_t presentId); + + VkResult GetLatencyTimings( + std::vector& frameReports); + + bool LowLatencyEnabled(); + private: enum BindingIds : uint32_t { @@ -176,4 +192,4 @@ namespace dxvk { }; -} \ No newline at end of file +} diff --git a/src/dxvk/dxvk_adapter.cpp b/src/dxvk/dxvk_adapter.cpp index cf4c3cce68f5..6a3b4f08180e 100644 --- a/src/dxvk/dxvk_adapter.cpp +++ b/src/dxvk/dxvk_adapter.cpp @@ -927,6 +927,9 @@ namespace dxvk { m_deviceFeatures.khrPresentWait.pNext = std::exchange(m_deviceFeatures.core.pNext, &m_deviceFeatures.khrPresentWait); } + if (m_deviceExtensions.supports(VK_NV_LOW_LATENCY_2_EXTENSION_NAME)) + m_deviceFeatures.nvLowLatency2 = VK_TRUE; + if (m_deviceExtensions.supports(VK_NVX_BINARY_IMPORT_EXTENSION_NAME)) m_deviceFeatures.nvxBinaryImport = VK_TRUE; @@ -994,6 +997,7 @@ namespace dxvk { &devExtensions.khrPresentWait, &devExtensions.khrSwapchain, &devExtensions.khrWin32KeyedMutex, + &devExtensions.nvLowLatency2, &devExtensions.nvxBinaryImport, &devExtensions.nvxImageViewHandle, }}; @@ -1133,8 +1137,13 @@ namespace dxvk { enabledFeatures.khrPresentWait.pNext = std::exchange(enabledFeatures.core.pNext, &enabledFeatures.khrPresentWait); } - if (devExtensions.nvxBinaryImport) + if (devExtensions.nvxBinaryImport) { enabledFeatures.nvxBinaryImport = VK_TRUE; + } + + if (devExtensions.nvLowLatency2) { + enabledFeatures.nvLowLatency2 = VK_TRUE; + } if (devExtensions.nvxImageViewHandle) enabledFeatures.nvxImageViewHandle = VK_TRUE; @@ -1279,6 +1288,8 @@ namespace dxvk { "\n presentId : ", features.khrPresentId.presentId ? "1" : "0", "\n", VK_KHR_PRESENT_WAIT_EXTENSION_NAME, "\n presentWait : ", features.khrPresentWait.presentWait ? "1" : "0", + "\n", VK_NV_LOW_LATENCY_2_EXTENSION_NAME, + "\n extension supported : ", features.nvLowLatency2 ? "1" : "0", "\n", VK_NVX_BINARY_IMPORT_EXTENSION_NAME, "\n extension supported : ", features.nvxBinaryImport ? "1" : "0", "\n", VK_NVX_IMAGE_VIEW_HANDLE_EXTENSION_NAME, diff --git a/src/dxvk/dxvk_cmdlist.cpp b/src/dxvk/dxvk_cmdlist.cpp index 3bd3aa953d46..54b50ea533c6 100644 --- a/src/dxvk/dxvk_cmdlist.cpp +++ b/src/dxvk/dxvk_cmdlist.cpp @@ -56,10 +56,12 @@ namespace dxvk { VkResult DxvkCommandSubmission::submit( DxvkDevice* device, - VkQueue queue) { + VkQueue queue, + uint64_t frameId) { auto vk = device->vkd(); VkSubmitInfo2 submitInfo = { VK_STRUCTURE_TYPE_SUBMIT_INFO_2 }; + VkLatencySubmissionPresentIdNV latencySubmitInfo = { VK_STRUCTURE_TYPE_LATENCY_SUBMISSION_PRESENT_ID_NV }; if (!m_semaphoreWaits.empty()) { submitInfo.waitSemaphoreInfoCount = m_semaphoreWaits.size(); @@ -76,6 +78,11 @@ namespace dxvk { submitInfo.pSignalSemaphoreInfos = m_semaphoreSignals.data(); } + if (device->features().nvLowLatency2 && frameId && !m_commandBuffers.empty()) { + latencySubmitInfo.presentID = frameId; + latencySubmitInfo.pNext = std::exchange(submitInfo.pNext, &latencySubmitInfo); + } + VkResult vr = VK_SUCCESS; if (!this->isEmpty()) @@ -206,7 +213,7 @@ namespace dxvk { } - VkResult DxvkCommandList::submit() { + VkResult DxvkCommandList::submit(uint64_t frameId) { VkResult status = VK_SUCCESS; const auto& graphics = m_device->queues().graphics; @@ -238,7 +245,7 @@ namespace dxvk { // for any prior submissions, then block any subsequent ones m_commandSubmission.signalSemaphore(m_bindSemaphore, 0, VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT); - if ((status = m_commandSubmission.submit(m_device, graphics.queueHandle))) + if ((status = m_commandSubmission.submit(m_device, graphics.queueHandle, frameId))) return status; sparseBind->waitSemaphore(m_bindSemaphore, 0); @@ -259,7 +266,7 @@ namespace dxvk { if (m_device->hasDedicatedTransferQueue() && !m_commandSubmission.isEmpty()) { m_commandSubmission.signalSemaphore(m_sdmaSemaphore, 0, VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT); - if ((status = m_commandSubmission.submit(m_device, transfer.queueHandle))) + if ((status = m_commandSubmission.submit(m_device, transfer.queueHandle, frameId))) return status; m_commandSubmission.waitSemaphore(m_sdmaSemaphore, 0, VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT); @@ -297,7 +304,7 @@ namespace dxvk { } // Finally, submit all graphics commands of the current submission - if ((status = m_commandSubmission.submit(m_device, graphics.queueHandle))) + if ((status = m_commandSubmission.submit(m_device, graphics.queueHandle, frameId))) return status; } diff --git a/src/dxvk/dxvk_cmdlist.h b/src/dxvk/dxvk_cmdlist.h index b9b9a165dd36..f9527516e171 100644 --- a/src/dxvk/dxvk_cmdlist.h +++ b/src/dxvk/dxvk_cmdlist.h @@ -94,7 +94,8 @@ namespace dxvk { */ VkResult submit( DxvkDevice* device, - VkQueue queue); + VkQueue queue, + uint64_t frameId); /** * \brief Resets object @@ -199,7 +200,7 @@ namespace dxvk { * \brief Submits command list * \returns Submission status */ - VkResult submit(); + VkResult submit(uint64_t frameId); /** * \brief Stat counters diff --git a/src/dxvk/dxvk_device.cpp b/src/dxvk/dxvk_device.cpp index 9a053791a7b9..44d208c41aa4 100644 --- a/src/dxvk/dxvk_device.cpp +++ b/src/dxvk/dxvk_device.cpp @@ -18,6 +18,7 @@ namespace dxvk { m_properties (adapter->devicePropertiesExt()), m_perfHints (getPerfHints()), m_objects (this), + m_latencyMarkers ({}), m_queues (queues), m_submissionQueue (this, queueCallback) { @@ -274,6 +275,7 @@ namespace dxvk { DxvkSubmitStatus* status) { DxvkSubmitInfo submitInfo = { }; submitInfo.cmdList = commandList; + submitInfo.frameId = m_latencyMarkers.render; m_submissionQueue.submit(submitInfo, status); std::lock_guard statLock(m_statLock); diff --git a/src/dxvk/dxvk_device.h b/src/dxvk/dxvk_device.h index a24ee311bf50..c34c7a510433 100644 --- a/src/dxvk/dxvk_device.h +++ b/src/dxvk/dxvk_device.h @@ -66,7 +66,16 @@ namespace dxvk { DxvkDeviceQueue transfer; DxvkDeviceQueue sparse; }; - + + /** + * \brief Latency marker frame ids + */ + struct DxvkDeviceLowLatencyMarkers { + uint64_t simulation; + uint64_t render; + uint64_t present; + }; + /** * \brief DXVK device * @@ -534,6 +543,37 @@ namespace dxvk { * used by the GPU can be safely destroyed. */ void waitForIdle(); + + /** + * \brief Updates the frame id for the given frame marker + * + * \param [in] marker The marker to set the frame ID for + * \param [in] id The frame ID to set + */ + void setLatencyMarker(VkLatencyMarkerNV marker, uint64_t id) { + switch (marker) { + case VK_LATENCY_MARKER_SIMULATION_START_NV: + m_latencyMarkers.simulation = id; + break; + case VK_LATENCY_MARKER_RENDERSUBMIT_START_NV: + m_latencyMarkers.render = id; + break; + case VK_LATENCY_MARKER_PRESENT_START_NV: + m_latencyMarkers.present = id; + break; + default: + break; + } + } + + /** + * \brief Returns the current set of latency marker frame IDs + * + * \returns The current set of frame marker IDs + */ + DxvkDeviceLowLatencyMarkers getLatencyMarkers() { + return m_latencyMarkers; + } private: @@ -549,6 +589,8 @@ namespace dxvk { DxvkDevicePerfHints m_perfHints; DxvkObjects m_objects; + DxvkDeviceLowLatencyMarkers m_latencyMarkers; + sync::Spinlock m_statLock; DxvkStatCounters m_statCounters; diff --git a/src/dxvk/dxvk_device_info.h b/src/dxvk/dxvk_device_info.h index e23a0e1812e6..ea1a074fd598 100644 --- a/src/dxvk/dxvk_device_info.h +++ b/src/dxvk/dxvk_device_info.h @@ -1,6 +1,7 @@ #pragma once #include "dxvk_include.h" +#include namespace dxvk { @@ -68,9 +69,10 @@ namespace dxvk { VkPhysicalDeviceMaintenance5FeaturesKHR khrMaintenance5; VkPhysicalDevicePresentIdFeaturesKHR khrPresentId; VkPhysicalDevicePresentWaitFeaturesKHR khrPresentWait; + VkBool32 nvLowLatency2; VkBool32 nvxBinaryImport; VkBool32 nvxImageViewHandle; VkBool32 khrWin32KeyedMutex; }; -} \ No newline at end of file +} diff --git a/src/dxvk/dxvk_extensions.h b/src/dxvk/dxvk_extensions.h index 8164ccf6ad67..041d00c3cee9 100644 --- a/src/dxvk/dxvk_extensions.h +++ b/src/dxvk/dxvk_extensions.h @@ -325,6 +325,7 @@ namespace dxvk { DxvkExt khrPresentWait = { VK_KHR_PRESENT_WAIT_EXTENSION_NAME, DxvkExtMode::Optional }; DxvkExt khrSwapchain = { VK_KHR_SWAPCHAIN_EXTENSION_NAME, DxvkExtMode::Required }; DxvkExt khrWin32KeyedMutex = { VK_KHR_WIN32_KEYED_MUTEX_EXTENSION_NAME, DxvkExtMode::Optional }; + DxvkExt nvLowLatency2 = { VK_NV_LOW_LATENCY_2_EXTENSION_NAME, DxvkExtMode::Optional }; DxvkExt nvxBinaryImport = { VK_NVX_BINARY_IMPORT_EXTENSION_NAME, DxvkExtMode::Disabled }; DxvkExt nvxImageViewHandle = { VK_NVX_IMAGE_VIEW_HANDLE_EXTENSION_NAME, DxvkExtMode::Disabled }; }; diff --git a/src/dxvk/dxvk_presenter.cpp b/src/dxvk/dxvk_presenter.cpp index 10f13da2783d..969331d53156 100644 --- a/src/dxvk/dxvk_presenter.cpp +++ b/src/dxvk/dxvk_presenter.cpp @@ -18,6 +18,15 @@ namespace dxvk { // with present operations and periodically signals the event if (m_device->features().khrPresentWait.presentWait && m_signal != nullptr) m_frameThread = dxvk::thread([this] { runFrameThread(); }); + + // If nvLowLatency2 is supported create the fence + if (m_device->features().nvLowLatency2) { + DxvkFenceCreateInfo info = {}; + info.initialValue = 0; + info.sharedType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_FLAG_BITS_MAX_ENUM; + + m_lowLatencyFence = DxvkFenceValuePair(m_device->createFence(info), 0u); + } } @@ -48,6 +57,7 @@ namespace dxvk { VkResult Presenter::acquireNextImage(PresenterSync& sync, uint32_t& index) { + std::lock_guard lock(m_lowLatencyMutex); sync = m_semaphores.at(m_frameIndex); // Don't acquire more than one image at a time @@ -68,11 +78,13 @@ namespace dxvk { VkResult Presenter::presentImage( VkPresentModeKHR mode, uint64_t frameId) { + std::lock_guard lock(m_lowLatencyMutex); + PresenterSync sync = m_semaphores.at(m_frameIndex); VkPresentIdKHR presentId = { VK_STRUCTURE_TYPE_PRESENT_ID_KHR }; presentId.swapchainCount = 1; - presentId.pPresentIds = &frameId; + presentId.pPresentIds = &frameId; VkSwapchainPresentModeInfoEXT modeInfo = { VK_STRUCTURE_TYPE_SWAPCHAIN_PRESENT_MODE_INFO_EXT }; modeInfo.swapchainCount = 1; @@ -151,6 +163,8 @@ namespace dxvk { VkResult Presenter::recreateSwapChain(const PresenterDesc& desc) { + std::lock_guard lock(m_lowLatencyMutex); + if (m_swapchain) destroySwapchain(); @@ -293,6 +307,9 @@ namespace dxvk { modeInfo.presentModeCount = compatibleModes.size(); modeInfo.pPresentModes = compatibleModes.data(); + VkSwapchainLatencyCreateInfoNV lowLatencyInfo = { VK_STRUCTURE_TYPE_SWAPCHAIN_LATENCY_CREATE_INFO_NV }; + lowLatencyInfo.latencyModeEnable = VK_TRUE; + VkSwapchainCreateInfoKHR swapInfo = { VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR }; swapInfo.surface = m_surface; swapInfo.minImageCount = m_info.imageCount; @@ -314,6 +331,9 @@ namespace dxvk { if (m_device->features().extSwapchainMaintenance1.swapchainMaintenance1) modeInfo.pNext = std::exchange(swapInfo.pNext, &modeInfo); + if (m_device->features().nvLowLatency2) + lowLatencyInfo.pNext = std::exchange(swapInfo.pNext, &lowLatencyInfo); + Logger::info(str::format( "Presenter: Actual swap chain properties:" "\n Format: ", m_info.format.format, @@ -322,11 +342,21 @@ namespace dxvk { "\n Buffer size: ", m_info.imageExtent.width, "x", m_info.imageExtent.height, "\n Image count: ", m_info.imageCount, "\n Exclusive FS: ", desc.fullScreenExclusive)); - + if ((status = m_vkd->vkCreateSwapchainKHR(m_vkd->device(), &swapInfo, nullptr, &m_swapchain))) return status; - + + if (m_device->features().nvLowLatency2) { + VkLatencySleepModeInfoNV sleepModeInfo = { VK_STRUCTURE_TYPE_LATENCY_SLEEP_MODE_INFO_NV }; + sleepModeInfo.lowLatencyMode = m_lowLatencyEnabled; + sleepModeInfo.lowLatencyBoost = m_lowLatencyBoost; + sleepModeInfo.minimumIntervalUs = m_minimumIntervalUs; + + if ((status = m_vkd->vkSetLatencySleepModeNV(m_vkd->device(), m_swapchain, &sleepModeInfo))) + return status; + } + // Acquire images and create views std::vector images; @@ -422,6 +452,66 @@ namespace dxvk { m_vkd->vkSetHdrMetadataEXT(m_vkd->device(), 1, &m_swapchain, &hdrMetadata); } + VkResult Presenter::setLatencySleepMode(bool lowLatencyMode, bool lowLatencyBoost, uint32_t minimumIntervalUs) { + VkLatencySleepModeInfoNV sleepModeInfo = { VK_STRUCTURE_TYPE_LATENCY_SLEEP_MODE_INFO_NV }; + sleepModeInfo.lowLatencyMode = lowLatencyMode; + sleepModeInfo.lowLatencyBoost = lowLatencyBoost; + sleepModeInfo.minimumIntervalUs = minimumIntervalUs; + + std::lock_guard lock(m_lowLatencyMutex); + VkResult status = m_vkd->vkSetLatencySleepModeNV(m_vkd->device(), m_swapchain, &sleepModeInfo); + + m_lowLatencyEnabled = lowLatencyMode; + m_lowLatencyBoost = lowLatencyBoost; + m_minimumIntervalUs = minimumIntervalUs; + + return status; + } + + VkResult Presenter::latencySleep() { + VkSemaphore sem = m_lowLatencyFence.fence->handle(); + uint64_t waitValue = m_lowLatencyFence.value + 1; + m_lowLatencyFence.value++; + + VkLatencySleepInfoNV sleepInfo = { VK_STRUCTURE_TYPE_LATENCY_SLEEP_INFO_NV }; + sleepInfo.signalSemaphore = sem; + sleepInfo.value = waitValue; + + { + std::lock_guard lock(m_lowLatencyMutex); + m_vkd->vkLatencySleepNV(m_vkd->device(), m_swapchain, &sleepInfo); + } + + m_lowLatencyFence.fence->wait(waitValue); + + return VK_SUCCESS; + } + + void Presenter::setLatencyMarker(VkLatencyMarkerNV marker, uint64_t presentId) { + VkSetLatencyMarkerInfoNV markerInfo = { VK_STRUCTURE_TYPE_SET_LATENCY_MARKER_INFO_NV }; + markerInfo.presentID = presentId; + markerInfo.marker = marker; + + std::lock_guard lock(m_lowLatencyMutex); + m_vkd->vkSetLatencyMarkerNV(m_vkd->device(), m_swapchain, &markerInfo); + } + + VkResult Presenter::getLatencyTimings(std::vector& frameReports) { + VkGetLatencyMarkerInfoNV markerInfo = { VK_STRUCTURE_TYPE_GET_LATENCY_MARKER_INFO_NV }; + uint32_t timingCount = 0; + + std::lock_guard lock(m_lowLatencyMutex); + m_vkd->vkGetLatencyTimingsNV(m_vkd->device(), m_swapchain, &timingCount, &markerInfo); + + if (timingCount != 0) { + frameReports.resize(timingCount, { VK_STRUCTURE_TYPE_GET_LATENCY_MARKER_INFO_NV }); + markerInfo.pTimings = frameReports.data(); + + m_vkd->vkGetLatencyTimingsNV(m_vkd->device(), m_swapchain, &timingCount, &markerInfo); + } + + return VK_SUCCESS; + } VkResult Presenter::getSupportedFormats(std::vector& formats, VkFullScreenExclusiveEXT fullScreenExclusive) const { uint32_t numFormats = 0; diff --git a/src/dxvk/dxvk_presenter.h b/src/dxvk/dxvk_presenter.h index c5ba12733646..aa52b97b4ced 100644 --- a/src/dxvk/dxvk_presenter.h +++ b/src/dxvk/dxvk_presenter.h @@ -15,6 +15,7 @@ #include "../vulkan/vulkan_loader.h" #include "dxvk_format.h" +#include "dxvk_fence.h" namespace dxvk { @@ -224,6 +225,42 @@ namespace dxvk { */ void setHdrMetadata(const VkHdrMetadataEXT& hdrMetadata); + /** + * \brief Set the latency mode of the swapchain + * + * \param [in] enableLowLatency Determines if the low latency + * mode should be enabled of disabled + */ + VkResult setLatencySleepMode(bool lowLatencyMode, bool lowLatencyBoost, uint32_t minimumIntervalUs); + + /** + * \brief Delay rendering work for lower latency + */ + VkResult latencySleep(); + + /** + * \brief Set a latency marker for the given stage + * + * \param [in] marker The stage this marker is for + * \param [in] presentId The presentId this marker is for + */ + void setLatencyMarker(VkLatencyMarkerNV marker, uint64_t presentId); + + /** + * \brief Get the low latency timing info + * + * \param [out] latencyInfo The structure to place + * the latency timings into + */ + VkResult getLatencyTimings(std::vector& frameReports); + + /** + * \brief Returns the low latency enabled state + */ + bool lowLatencyEnabled() { + return m_lowLatencyEnabled; + } + private: Rc m_device; @@ -237,6 +274,11 @@ namespace dxvk { VkSurfaceKHR m_surface = VK_NULL_HANDLE; VkSwapchainKHR m_swapchain = VK_NULL_HANDLE; + DxvkFenceValuePair m_lowLatencyFence = {}; + bool m_lowLatencyEnabled = false; + bool m_lowLatencyBoost = false; + uint32_t m_minimumIntervalUs = 0; + std::vector m_images; std::vector m_semaphores; @@ -250,6 +292,7 @@ namespace dxvk { FpsLimiter m_fpsLimiter; dxvk::mutex m_frameMutex; + dxvk::mutex m_lowLatencyMutex; dxvk::condition_variable m_frameCond; dxvk::thread m_frameThread; std::queue m_frameQueue; diff --git a/src/dxvk/dxvk_queue.cpp b/src/dxvk/dxvk_queue.cpp index 7273a37d6088..546a1f838b8e 100644 --- a/src/dxvk/dxvk_queue.cpp +++ b/src/dxvk/dxvk_queue.cpp @@ -126,7 +126,7 @@ namespace dxvk { m_callback(true); if (entry.submit.cmdList != nullptr) - entry.result = entry.submit.cmdList->submit(); + entry.result = entry.submit.cmdList->submit(entry.submit.frameId); else if (entry.present.presenter != nullptr) entry.result = entry.present.presenter->presentImage(entry.present.presentMode, entry.present.frameId); @@ -226,4 +226,4 @@ namespace dxvk { } } -} \ No newline at end of file +} diff --git a/src/dxvk/dxvk_queue.h b/src/dxvk/dxvk_queue.h index 38d91f5dd090..a3c6e581b31c 100644 --- a/src/dxvk/dxvk_queue.h +++ b/src/dxvk/dxvk_queue.h @@ -32,6 +32,7 @@ namespace dxvk { */ struct DxvkSubmitInfo { Rc cmdList; + uint64_t frameId; }; diff --git a/src/vulkan/vulkan_loader.h b/src/vulkan/vulkan_loader.h index 1741ccb8722b..6b0f80ea248f 100644 --- a/src/vulkan/vulkan_loader.h +++ b/src/vulkan/vulkan_loader.h @@ -452,6 +452,14 @@ namespace dxvk::vk { VULKAN_FN(wine_vkAcquireKeyedMutex); VULKAN_FN(wine_vkReleaseKeyedMutex); #endif + + #ifdef VK_NV_LOW_LATENCY_2_EXTENSION_NAME + VULKAN_FN(vkSetLatencySleepModeNV); + VULKAN_FN(vkLatencySleepNV); + VULKAN_FN(vkSetLatencyMarkerNV); + VULKAN_FN(vkGetLatencyTimingsNV); + VULKAN_FN(vkQueueNotifyOutOfBandNV); + #endif }; }