From bc40dfb65e6c7a767063d8262bb3b17e658bd5ee Mon Sep 17 00:00:00 2001 From: Victor P Date: Thu, 25 May 2023 05:10:19 -0700 Subject: [PATCH] [UWP] Use a shared output queue for sw av1 and vpx decoder Reorganize output queue. Use one large preallocated hw buffer for both av1 & vpx sw decoders b/249739051 Change-Id: I6430ae1ba5d288ed2495f3056cc7283f5c189f49 --- .../shared/uwp/extended_resources_manager.cc | 89 ++++++++++++++++--- .../shared/uwp/extended_resources_manager.h | 5 +- .../shared/uwp/player_components_factory.cc | 8 +- 3 files changed, 85 insertions(+), 17 deletions(-) diff --git a/starboard/shared/uwp/extended_resources_manager.cc b/starboard/shared/uwp/extended_resources_manager.cc index 6694d3b9c309..988c7656526d 100644 --- a/starboard/shared/uwp/extended_resources_manager.cc +++ b/starboard/shared/uwp/extended_resources_manager.cc @@ -47,6 +47,29 @@ using ::starboard::xb1::shared::VpxVideoDecoder; const SbTime kReleaseTimeout = kSbTimeSecond; +// The size of gpu memory heap for common use by vpx & av1 underlying decoders +// This value must be greater then max(av1_min_value, vpx_min_value), where +// av1_min_value & vpx_min_value are minimal possible memory size for sw av1 & +// vpx decoders. Both decoders need some memory for internal buffers and some +// minimal memory for output queue. This value depends on preroll size. Let's +// for instance assume preroll size is preroll_size = 8. +// +// vpx underlying decoder needs 13 internal frame buffers for work and minimum +// 8 buffers in internal output queue for preroll. +// The size of fb is 13762560 for 4K SDR and 12976128 for 2K HDR +// So, vpx decoder needs minimum 13762560 * (13 + preroll_size) = 289013760 +// bytes +// +// av1 underlying decoder needs 13 internal buffers and 8 buffers for preroll. +// The size of fb is 5996544 for 2K SDR and 11993088 for 2K HDR +// av1 decoder needs minimum 11993088 * (13 + preroll_size) = 251854848 bytes. +// +// So, the value 289013760 is minimal for reliable decoders working. +// +// To make playback more smooth it is better to increase the output queue size +// up to 30-50 frames. In other side existing memory budget can't be exeeded. +// So, the value of 440 Mb looks as compromise. +const uint64_t kFrameBuffersPoolMemorySize = 440 * 1024 * 1024; bool IsExtendedResourceModeRequired() { if (!::starboard::xb1::shared::CanAcquire()) { return false; @@ -150,6 +173,7 @@ void ExtendedResourcesManager::Quit() { bool ExtendedResourcesManager::GetD3D12Objects( Microsoft::WRL::ComPtr* device, + Microsoft::WRL::ComPtr* buffer_heap, void** command_queue) { if (HasNonrecoverableFailure()) { SB_LOG(WARNING) << "The D3D12 device has encountered a nonrecoverable " @@ -184,8 +208,8 @@ bool ExtendedResourcesManager::GetD3D12Objects( D3D12_HEAP_PROPERTIES prop = CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT); D3D12_RESOURCE_DESC desc = CD3DX12_RESOURCE_DESC::Buffer(1024 * 1024); HRESULT result = d3d12device_->CreateCommittedResource( - &prop, D3D12_HEAP_FLAG_NONE, &desc, D3D12_RESOURCE_STATE_COPY_DEST, - nullptr, IID_PPV_ARGS(&res)); + &prop, D3D12_HEAP_FLAG_NONE, &desc, D3D12_RESOURCE_STATE_COMMON, nullptr, + IID_PPV_ARGS(&res)); if (result != S_OK) { SB_LOG(WARNING) << "The D3D12 device is not in a good state, can not use " "GPU based decoders."; @@ -196,11 +220,25 @@ bool ExtendedResourcesManager::GetD3D12Objects( *device = d3d12device_; *command_queue = d3d12queue_.Get(); + *buffer_heap = d3d12FrameBuffersHeap_.Get(); return true; } bool ExtendedResourcesManager::GetD3D12ObjectsInternal() { if (!d3d12device_) { + UINT dxgiFactoryFlags = 0; +#if defined(_DEBUG) + { + // This can help to debug DX issues. If something goes wrong in DX, + // Debug Layer outputs detailed log + ComPtr debugController; + HRESULT hr = D3D12GetDebugInterface(IID_PPV_ARGS(&debugController)); + if (SUCCEEDED(hr)) { + debugController->EnableDebugLayer(); + } + } +#endif + if (FAILED(D3D12CreateDevice(NULL, D3D_FEATURE_LEVEL_11_0, IID_PPV_ARGS(&d3d12device_)))) { // GPU based vp9 decoding will be temporarily disabled. @@ -221,8 +259,26 @@ bool ExtendedResourcesManager::GetD3D12ObjectsInternal() { } SB_DCHECK(d3d12queue_); } + if (!d3d12FrameBuffersHeap_) { + D3D12_HEAP_DESC heap_desc; + heap_desc.SizeInBytes = kFrameBuffersPoolMemorySize; + heap_desc.Properties.Type = D3D12_HEAP_TYPE_DEFAULT; + heap_desc.Properties.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; + heap_desc.Properties.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; + heap_desc.Properties.CreationNodeMask = 0; + heap_desc.Properties.VisibleNodeMask = 0; + heap_desc.Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT; + heap_desc.Flags = D3D12_HEAP_FLAG_NONE; + + if (FAILED(d3d12device_->CreateHeap( + &heap_desc, IID_PPV_ARGS(&d3d12FrameBuffersHeap_)))) { + SB_LOG(WARNING) << "Failed to create d3d12 buffer."; + return false; + } + SB_DCHECK(d3d12FrameBuffersHeap_); + } - return d3d12device_ && d3d12queue_; + return d3d12device_ && d3d12queue_ && d3d12FrameBuffersHeap_; } bool ExtendedResourcesManager::AcquireExtendedResourcesInternal() { @@ -335,7 +391,8 @@ void ExtendedResourcesManager::CompileShadersAsynchronously() { "shader compile."; return; } - if (Av1VideoDecoder::CompileShaders(d3d12device_, d3d12queue_.Get())) { + if (Av1VideoDecoder::CompileShaders(d3d12device_, d3d12FrameBuffersHeap_, + d3d12queue_.Get())) { is_av1_shader_compiled_ = true; SB_LOG(INFO) << "Gpu based AV1 decoder finished compiling its shaders."; } else { @@ -352,7 +409,8 @@ void ExtendedResourcesManager::CompileShadersAsynchronously() { return; } - if (VpxVideoDecoder::CompileShaders(d3d12device_, d3d12queue_.Get())) { + if (VpxVideoDecoder::CompileShaders(d3d12device_, d3d12FrameBuffersHeap_, + d3d12queue_.Get())) { is_vp9_shader_compiled_ = true; SB_LOG(INFO) << "Gpu based VP9 decoder finished compiling its shaders."; } else { @@ -372,10 +430,6 @@ void ExtendedResourcesManager::CompileShadersAsynchronously() { void ExtendedResourcesManager::ReleaseExtendedResourcesInternal() { SB_DCHECK(thread_checker_.CalledOnValidThread()); -#if defined(INTERNAL_BUILD) - Av1VideoDecoder::ClearFrameBufferPool(); -#endif // defined(INTERNAL_BUILD) - ScopedLock scoped_lock(mutex_); if (!is_extended_resources_acquired_.load()) { SB_LOG(INFO) << "Extended resources hasn't been acquired," @@ -424,8 +478,7 @@ void ExtendedResourcesManager::ReleaseExtendedResourcesInternal() { #if !defined(COBALT_BUILD_TYPE_GOLD) d3d12queue_->AddRef(); ULONG reference_count = d3d12queue_->Release(); - SB_DLOG(INFO) << "Reference count of |d3d12queue_| is " - << reference_count; + SB_LOG(INFO) << "Reference count of |d3d12queue_| is " << reference_count; #endif d3d12queue_.Reset(); } @@ -434,11 +487,21 @@ void ExtendedResourcesManager::ReleaseExtendedResourcesInternal() { #if !defined(COBALT_BUILD_TYPE_GOLD) d3d12device_->AddRef(); ULONG reference_count = d3d12device_->Release(); - SB_DLOG(INFO) << "Reference count of |d3d12device_| is " - << reference_count; + SB_LOG(INFO) << "Reference count of |d3d12device_| is " + << reference_count; #endif d3d12device_.Reset(); } + if (d3d12FrameBuffersHeap_) { +#if !defined(COBALT_BUILD_TYPE_GOLD) + d3d12FrameBuffersHeap_->AddRef(); + ULONG reference_count = d3d12FrameBuffersHeap_->Release(); + SB_LOG(INFO) << "Reference count of |d3d12FrameBuffersHeap_| is " + << reference_count; +#endif + d3d12FrameBuffersHeap_.Reset(); + } + } catch (const std::exception& e) { SB_LOG(ERROR) << "Exception on releasing extended resources: " << e.what(); OnNonrecoverableFailure(); diff --git a/starboard/shared/uwp/extended_resources_manager.h b/starboard/shared/uwp/extended_resources_manager.h index fbd8a5b670bc..92f7083727a3 100644 --- a/starboard/shared/uwp/extended_resources_manager.h +++ b/starboard/shared/uwp/extended_resources_manager.h @@ -47,8 +47,9 @@ class ExtendedResourcesManager { void ReleaseExtendedResources(); void Quit(); - // Returns true when the d3d12 device and command queue can be used. + // Returns true when the d3d12 device, command queue & D3D12 heap can be used. bool GetD3D12Objects(Microsoft::WRL::ComPtr* device, + Microsoft::WRL::ComPtr* buffer_heap, void** command_queue); bool IsGpuDecoderReady() const { @@ -91,6 +92,8 @@ class ExtendedResourcesManager { Queue event_queue_; Microsoft::WRL::ComPtr d3d12device_; Microsoft::WRL::ComPtr d3d12queue_; + // heap for frame buffers (for the decoder and output queue) memory allocation + Microsoft::WRL::ComPtr d3d12FrameBuffersHeap_; // This is set to true when a release of extended resources is requested. // Anything delaying the release should be expedited when this is set. diff --git a/starboard/shared/uwp/player_components_factory.cc b/starboard/shared/uwp/player_components_factory.cc index 44d43f522b1c..cb3d307dfb62 100644 --- a/starboard/shared/uwp/player_components_factory.cc +++ b/starboard/shared/uwp/player_components_factory.cc @@ -236,9 +236,10 @@ class PlayerComponentsFactory : public PlayerComponents::Factory { SB_DCHECK(output_mode == kSbPlayerOutputModeDecodeToTexture); Microsoft::WRL::ComPtr d3d12device; + Microsoft::WRL::ComPtr d3d12buffer_heap; void* d3d12queue = nullptr; if (!uwp::ExtendedResourcesManager::GetInstance()->GetD3D12Objects( - &d3d12device, &d3d12queue)) { + &d3d12device, &d3d12buffer_heap, &d3d12queue)) { // Somehow extended resources get lost. Returns directly to trigger an // error to the player. *error_message = @@ -248,6 +249,7 @@ class PlayerComponentsFactory : public PlayerComponents::Factory { return false; } SB_DCHECK(d3d12device); + SB_DCHECK(d3d12buffer_heap); SB_DCHECK(d3d12queue); #if defined(INTERNAL_BUILD) @@ -258,14 +260,14 @@ class PlayerComponentsFactory : public PlayerComponents::Factory { video_decoder->reset(new GpuVp9VideoDecoder( creation_parameters.decode_target_graphics_context_provider(), creation_parameters.video_stream_info(), is_hdr_video, d3d12device, - d3d12queue)); + d3d12buffer_heap, d3d12queue)); } if (video_codec == kSbMediaVideoCodecAv1) { video_decoder->reset(new GpuAv1VideoDecoder( creation_parameters.decode_target_graphics_context_provider(), creation_parameters.video_stream_info(), is_hdr_video, d3d12device, - d3d12queue)); + d3d12buffer_heap, d3d12queue)); } #endif // defined(INTERNAL_BUILD)