Skip to content

Commit

Permalink
[UWP] Use a shared output queue for sw av1 and vpx decoder
Browse files Browse the repository at this point in the history
Reorganize output queue. Use one large preallocated hw buffer for both
av1 & vpx sw decoders

b/249739051

Change-Id: I6430ae1ba5d288ed2495f3056cc7283f5c189f49
  • Loading branch information
victorpasoshnikov committed Jun 23, 2023
1 parent 48939ab commit bc40dfb
Show file tree
Hide file tree
Showing 3 changed files with 85 additions and 17 deletions.
89 changes: 76 additions & 13 deletions starboard/shared/uwp/extended_resources_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,29 @@ using ::starboard::xb1::shared::VpxVideoDecoder;

const SbTime kReleaseTimeout = kSbTimeSecond;

// The size of gpu memory heap for common use by vpx & av1 underlying decoders
// This value must be greater then max(av1_min_value, vpx_min_value), where
// av1_min_value & vpx_min_value are minimal possible memory size for sw av1 &
// vpx decoders. Both decoders need some memory for internal buffers and some
// minimal memory for output queue. This value depends on preroll size. Let's
// for instance assume preroll size is preroll_size = 8.
//
// vpx underlying decoder needs 13 internal frame buffers for work and minimum
// 8 buffers in internal output queue for preroll.
// The size of fb is 13762560 for 4K SDR and 12976128 for 2K HDR
// So, vpx decoder needs minimum 13762560 * (13 + preroll_size) = 289013760
// bytes
//
// av1 underlying decoder needs 13 internal buffers and 8 buffers for preroll.
// The size of fb is 5996544 for 2K SDR and 11993088 for 2K HDR
// av1 decoder needs minimum 11993088 * (13 + preroll_size) = 251854848 bytes.
//
// So, the value 289013760 is minimal for reliable decoders working.
//
// To make playback more smooth it is better to increase the output queue size
// up to 30-50 frames. In other side existing memory budget can't be exeeded.
// So, the value of 440 Mb looks as compromise.
const uint64_t kFrameBuffersPoolMemorySize = 440 * 1024 * 1024;
bool IsExtendedResourceModeRequired() {
if (!::starboard::xb1::shared::CanAcquire()) {
return false;
Expand Down Expand Up @@ -150,6 +173,7 @@ void ExtendedResourcesManager::Quit() {

bool ExtendedResourcesManager::GetD3D12Objects(
Microsoft::WRL::ComPtr<ID3D12Device>* device,
Microsoft::WRL::ComPtr<ID3D12Heap>* buffer_heap,
void** command_queue) {
if (HasNonrecoverableFailure()) {
SB_LOG(WARNING) << "The D3D12 device has encountered a nonrecoverable "
Expand Down Expand Up @@ -184,8 +208,8 @@ bool ExtendedResourcesManager::GetD3D12Objects(
D3D12_HEAP_PROPERTIES prop = CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT);
D3D12_RESOURCE_DESC desc = CD3DX12_RESOURCE_DESC::Buffer(1024 * 1024);
HRESULT result = d3d12device_->CreateCommittedResource(
&prop, D3D12_HEAP_FLAG_NONE, &desc, D3D12_RESOURCE_STATE_COPY_DEST,
nullptr, IID_PPV_ARGS(&res));
&prop, D3D12_HEAP_FLAG_NONE, &desc, D3D12_RESOURCE_STATE_COMMON, nullptr,
IID_PPV_ARGS(&res));
if (result != S_OK) {
SB_LOG(WARNING) << "The D3D12 device is not in a good state, can not use "
"GPU based decoders.";
Expand All @@ -196,11 +220,25 @@ bool ExtendedResourcesManager::GetD3D12Objects(

*device = d3d12device_;
*command_queue = d3d12queue_.Get();
*buffer_heap = d3d12FrameBuffersHeap_.Get();
return true;
}

bool ExtendedResourcesManager::GetD3D12ObjectsInternal() {
if (!d3d12device_) {
UINT dxgiFactoryFlags = 0;
#if defined(_DEBUG)
{
// This can help to debug DX issues. If something goes wrong in DX,
// Debug Layer outputs detailed log
ComPtr<ID3D12Debug> debugController;
HRESULT hr = D3D12GetDebugInterface(IID_PPV_ARGS(&debugController));
if (SUCCEEDED(hr)) {
debugController->EnableDebugLayer();
}
}
#endif

if (FAILED(D3D12CreateDevice(NULL, D3D_FEATURE_LEVEL_11_0,
IID_PPV_ARGS(&d3d12device_)))) {
// GPU based vp9 decoding will be temporarily disabled.
Expand All @@ -221,8 +259,26 @@ bool ExtendedResourcesManager::GetD3D12ObjectsInternal() {
}
SB_DCHECK(d3d12queue_);
}
if (!d3d12FrameBuffersHeap_) {
D3D12_HEAP_DESC heap_desc;
heap_desc.SizeInBytes = kFrameBuffersPoolMemorySize;
heap_desc.Properties.Type = D3D12_HEAP_TYPE_DEFAULT;
heap_desc.Properties.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN;
heap_desc.Properties.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN;
heap_desc.Properties.CreationNodeMask = 0;
heap_desc.Properties.VisibleNodeMask = 0;
heap_desc.Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT;
heap_desc.Flags = D3D12_HEAP_FLAG_NONE;

if (FAILED(d3d12device_->CreateHeap(
&heap_desc, IID_PPV_ARGS(&d3d12FrameBuffersHeap_)))) {
SB_LOG(WARNING) << "Failed to create d3d12 buffer.";
return false;
}
SB_DCHECK(d3d12FrameBuffersHeap_);
}

return d3d12device_ && d3d12queue_;
return d3d12device_ && d3d12queue_ && d3d12FrameBuffersHeap_;
}

bool ExtendedResourcesManager::AcquireExtendedResourcesInternal() {
Expand Down Expand Up @@ -335,7 +391,8 @@ void ExtendedResourcesManager::CompileShadersAsynchronously() {
"shader compile.";
return;
}
if (Av1VideoDecoder::CompileShaders(d3d12device_, d3d12queue_.Get())) {
if (Av1VideoDecoder::CompileShaders(d3d12device_, d3d12FrameBuffersHeap_,
d3d12queue_.Get())) {
is_av1_shader_compiled_ = true;
SB_LOG(INFO) << "Gpu based AV1 decoder finished compiling its shaders.";
} else {
Expand All @@ -352,7 +409,8 @@ void ExtendedResourcesManager::CompileShadersAsynchronously() {
return;
}

if (VpxVideoDecoder::CompileShaders(d3d12device_, d3d12queue_.Get())) {
if (VpxVideoDecoder::CompileShaders(d3d12device_, d3d12FrameBuffersHeap_,
d3d12queue_.Get())) {
is_vp9_shader_compiled_ = true;
SB_LOG(INFO) << "Gpu based VP9 decoder finished compiling its shaders.";
} else {
Expand All @@ -372,10 +430,6 @@ void ExtendedResourcesManager::CompileShadersAsynchronously() {

void ExtendedResourcesManager::ReleaseExtendedResourcesInternal() {
SB_DCHECK(thread_checker_.CalledOnValidThread());
#if defined(INTERNAL_BUILD)
Av1VideoDecoder::ClearFrameBufferPool();
#endif // defined(INTERNAL_BUILD)

ScopedLock scoped_lock(mutex_);
if (!is_extended_resources_acquired_.load()) {
SB_LOG(INFO) << "Extended resources hasn't been acquired,"
Expand Down Expand Up @@ -424,8 +478,7 @@ void ExtendedResourcesManager::ReleaseExtendedResourcesInternal() {
#if !defined(COBALT_BUILD_TYPE_GOLD)
d3d12queue_->AddRef();
ULONG reference_count = d3d12queue_->Release();
SB_DLOG(INFO) << "Reference count of |d3d12queue_| is "
<< reference_count;
SB_LOG(INFO) << "Reference count of |d3d12queue_| is " << reference_count;
#endif
d3d12queue_.Reset();
}
Expand All @@ -434,11 +487,21 @@ void ExtendedResourcesManager::ReleaseExtendedResourcesInternal() {
#if !defined(COBALT_BUILD_TYPE_GOLD)
d3d12device_->AddRef();
ULONG reference_count = d3d12device_->Release();
SB_DLOG(INFO) << "Reference count of |d3d12device_| is "
<< reference_count;
SB_LOG(INFO) << "Reference count of |d3d12device_| is "
<< reference_count;
#endif
d3d12device_.Reset();
}
if (d3d12FrameBuffersHeap_) {
#if !defined(COBALT_BUILD_TYPE_GOLD)
d3d12FrameBuffersHeap_->AddRef();
ULONG reference_count = d3d12FrameBuffersHeap_->Release();
SB_LOG(INFO) << "Reference count of |d3d12FrameBuffersHeap_| is "
<< reference_count;
#endif
d3d12FrameBuffersHeap_.Reset();
}

} catch (const std::exception& e) {
SB_LOG(ERROR) << "Exception on releasing extended resources: " << e.what();
OnNonrecoverableFailure();
Expand Down
5 changes: 4 additions & 1 deletion starboard/shared/uwp/extended_resources_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,9 @@ class ExtendedResourcesManager {
void ReleaseExtendedResources();
void Quit();

// Returns true when the d3d12 device and command queue can be used.
// Returns true when the d3d12 device, command queue & D3D12 heap can be used.
bool GetD3D12Objects(Microsoft::WRL::ComPtr<ID3D12Device>* device,
Microsoft::WRL::ComPtr<ID3D12Heap>* buffer_heap,
void** command_queue);

bool IsGpuDecoderReady() const {
Expand Down Expand Up @@ -91,6 +92,8 @@ class ExtendedResourcesManager {
Queue<Event> event_queue_;
Microsoft::WRL::ComPtr<ID3D12Device> d3d12device_;
Microsoft::WRL::ComPtr<ID3D12CommandQueue> d3d12queue_;
// heap for frame buffers (for the decoder and output queue) memory allocation
Microsoft::WRL::ComPtr<ID3D12Heap> d3d12FrameBuffersHeap_;

// This is set to true when a release of extended resources is requested.
// Anything delaying the release should be expedited when this is set.
Expand Down
8 changes: 5 additions & 3 deletions starboard/shared/uwp/player_components_factory.cc
Original file line number Diff line number Diff line change
Expand Up @@ -236,9 +236,10 @@ class PlayerComponentsFactory : public PlayerComponents::Factory {
SB_DCHECK(output_mode == kSbPlayerOutputModeDecodeToTexture);

Microsoft::WRL::ComPtr<ID3D12Device> d3d12device;
Microsoft::WRL::ComPtr<ID3D12Heap> d3d12buffer_heap;
void* d3d12queue = nullptr;
if (!uwp::ExtendedResourcesManager::GetInstance()->GetD3D12Objects(
&d3d12device, &d3d12queue)) {
&d3d12device, &d3d12buffer_heap, &d3d12queue)) {
// Somehow extended resources get lost. Returns directly to trigger an
// error to the player.
*error_message =
Expand All @@ -248,6 +249,7 @@ class PlayerComponentsFactory : public PlayerComponents::Factory {
return false;
}
SB_DCHECK(d3d12device);
SB_DCHECK(d3d12buffer_heap);
SB_DCHECK(d3d12queue);

#if defined(INTERNAL_BUILD)
Expand All @@ -258,14 +260,14 @@ class PlayerComponentsFactory : public PlayerComponents::Factory {
video_decoder->reset(new GpuVp9VideoDecoder(
creation_parameters.decode_target_graphics_context_provider(),
creation_parameters.video_stream_info(), is_hdr_video, d3d12device,
d3d12queue));
d3d12buffer_heap, d3d12queue));
}

if (video_codec == kSbMediaVideoCodecAv1) {
video_decoder->reset(new GpuAv1VideoDecoder(
creation_parameters.decode_target_graphics_context_provider(),
creation_parameters.video_stream_info(), is_hdr_video, d3d12device,
d3d12queue));
d3d12buffer_heap, d3d12queue));
}
#endif // defined(INTERNAL_BUILD)

Expand Down

0 comments on commit bc40dfb

Please sign in to comment.