From a121bf3cbb507bdfb16cbdd4754d1f9ea851ed43 Mon Sep 17 00:00:00 2001 From: Exzap <13877693+Exzap@users.noreply.github.com> Date: Mon, 15 Jul 2024 22:44:04 +0200 Subject: [PATCH 1/3] h264: Use async decoding if possible This allows us to hide the latency of the software decoder when operating in buffered mode which should help games that are picky about slow decoding times --- src/Cafe/CMakeLists.txt | 2 + .../OS/libs/coreinit/coreinit_SysHeap.cpp | 12 +- src/Cafe/OS/libs/coreinit/coreinit_SysHeap.h | 3 + src/Cafe/OS/libs/h264_avc/H264Dec.cpp | 745 +++--------------- .../OS/libs/h264_avc/H264DecBackendAVC.cpp | 502 ++++++++++++ src/Cafe/OS/libs/h264_avc/H264DecInternal.h | 139 ++++ .../OS/libs/h264_avc/parser/H264Parser.cpp | 17 +- src/Cafe/OS/libs/h264_avc/parser/H264Parser.h | 2 + 8 files changed, 782 insertions(+), 640 deletions(-) create mode 100644 src/Cafe/OS/libs/h264_avc/H264DecBackendAVC.cpp create mode 100644 src/Cafe/OS/libs/h264_avc/H264DecInternal.h diff --git a/src/Cafe/CMakeLists.txt b/src/Cafe/CMakeLists.txt index 1583bdd7a..6da4155f8 100644 --- a/src/Cafe/CMakeLists.txt +++ b/src/Cafe/CMakeLists.txt @@ -374,7 +374,9 @@ add_library(CemuCafe OS/libs/gx2/GX2_Texture.h OS/libs/gx2/GX2_TilingAperture.cpp OS/libs/h264_avc/H264Dec.cpp + OS/libs/h264_avc/H264DecBackendAVC.cpp OS/libs/h264_avc/h264dec.h + OS/libs/h264_avc/H264DecInternal.h OS/libs/h264_avc/parser OS/libs/h264_avc/parser/H264Parser.cpp OS/libs/h264_avc/parser/H264Parser.h diff --git a/src/Cafe/OS/libs/coreinit/coreinit_SysHeap.cpp b/src/Cafe/OS/libs/coreinit/coreinit_SysHeap.cpp index e37949d77..2f819c500 100644 --- a/src/Cafe/OS/libs/coreinit/coreinit_SysHeap.cpp +++ b/src/Cafe/OS/libs/coreinit/coreinit_SysHeap.cpp @@ -14,13 +14,10 @@ namespace coreinit return coreinit::MEMAllocFromExpHeapEx(_sysHeapHandle, size, alignment); } - void export_OSAllocFromSystem(PPCInterpreter_t* hCPU) + void OSFreeToSystem(void* ptr) { - ppcDefineParamU32(size, 0); - ppcDefineParamS32(alignment, 1); - MEMPTR mem = OSAllocFromSystem(size, alignment); - cemuLog_logDebug(LogType::Force, "OSAllocFromSystem(0x{:x}, {}) -> 0x{:08x}", size, alignment, mem.GetMPTR()); - osLib_returnFromFunction(hCPU, mem.GetMPTR()); + _sysHeapFreeCounter++; + coreinit::MEMFreeToExpHeap(_sysHeapHandle, ptr); } void InitSysHeap() @@ -34,7 +31,8 @@ namespace coreinit void InitializeSysHeap() { - osLib_addFunction("coreinit", "OSAllocFromSystem", export_OSAllocFromSystem); + cafeExportRegister("h264", OSAllocFromSystem, LogType::CoreinitMem); + cafeExportRegister("h264", OSFreeToSystem, LogType::CoreinitMem); } } diff --git a/src/Cafe/OS/libs/coreinit/coreinit_SysHeap.h b/src/Cafe/OS/libs/coreinit/coreinit_SysHeap.h index 428224af2..ad1157547 100644 --- a/src/Cafe/OS/libs/coreinit/coreinit_SysHeap.h +++ b/src/Cafe/OS/libs/coreinit/coreinit_SysHeap.h @@ -4,5 +4,8 @@ namespace coreinit { void InitSysHeap(); + void* OSAllocFromSystem(uint32 size, uint32 alignment); + void OSFreeToSystem(void* ptr); + void InitializeSysHeap(); } \ No newline at end of file diff --git a/src/Cafe/OS/libs/h264_avc/H264Dec.cpp b/src/Cafe/OS/libs/h264_avc/H264Dec.cpp index 024965fd0..82db039b6 100644 --- a/src/Cafe/OS/libs/h264_avc/H264Dec.cpp +++ b/src/Cafe/OS/libs/h264_avc/H264Dec.cpp @@ -1,17 +1,12 @@ #include "Cafe/OS/common/OSCommon.h" #include "Cafe/HW/Espresso/PPCCallback.h" #include "Cafe/OS/libs/h264_avc/parser/H264Parser.h" +#include "Cafe/OS/libs/h264_avc/H264DecInternal.h" #include "util/highresolutiontimer/HighResolutionTimer.h" #include "Cafe/CafeSystem.h" #include "h264dec.h" -extern "C" -{ -#include "../dependencies/ih264d/common/ih264_typedefs.h" -#include "../dependencies/ih264d/decoder/ih264d.h" -}; - enum class H264DEC_STATUS : uint32 { SUCCESS = 0x0, @@ -33,10 +28,35 @@ namespace H264 return false; } + struct H264Context + { + struct + { + MEMPTR ptr{ nullptr }; + uint32be length{ 0 }; + float64be timestamp; + }BitStream; + struct + { + MEMPTR outputFunc{ nullptr }; + uint8be outputPerFrame{ 0 }; // whats the default? + MEMPTR userMemoryParam{ nullptr }; + }Param; + // misc + uint32be sessionHandle; + + // decoder state + struct + { + uint32 numFramesInFlight{0}; + }decoderState; + }; + uint32 H264DECMemoryRequirement(uint32 codecProfile, uint32 codecLevel, uint32 width, uint32 height, uint32be* sizeRequirementOut) { if (H264_IsBotW()) { + static_assert(sizeof(H264Context) < 256); *sizeRequirementOut = 256; return 0; } @@ -169,590 +189,47 @@ namespace H264 return H264DEC_STATUS::BAD_STREAM; } - struct H264Context - { - struct - { - MEMPTR ptr{ nullptr }; - uint32be length{ 0 }; - float64be timestamp; - }BitStream; - struct - { - MEMPTR outputFunc{ nullptr }; - uint8be outputPerFrame{ 0 }; // whats the default? - MEMPTR userMemoryParam{ nullptr }; - }Param; - // misc - uint32be sessionHandle; - }; - - class H264AVCDecoder + H264DEC_STATUS H264DECGetImageSize(uint8* stream, uint32 length, uint32 offset, uint32be* outputWidth, uint32be* outputHeight) { - static void* ivd_aligned_malloc(void* ctxt, WORD32 alignment, WORD32 size) - { -#ifdef _WIN32 - return _aligned_malloc(size, alignment); -#else - // alignment is atleast sizeof(void*) - alignment = std::max(alignment, sizeof(void*)); - - //smallest multiple of 2 at least as large as alignment - alignment--; - alignment |= alignment << 1; - alignment |= alignment >> 1; - alignment |= alignment >> 2; - alignment |= alignment >> 4; - alignment |= alignment >> 8; - alignment |= alignment >> 16; - alignment ^= (alignment >> 1); - - void* temp; - posix_memalign(&temp, (size_t)alignment, (size_t)size); - return temp; -#endif - } - - static void ivd_aligned_free(void* ctxt, void* buf) - { -#ifdef _WIN32 - _aligned_free(buf); -#else - free(buf); -#endif - return; - } - - public: - struct DecodeResult - { - bool frameReady{ false }; - double timestamp; - void* imageOutput; - ivd_video_decode_op_t decodeOutput; - }; - - void Init(bool isBufferedMode) - { - ih264d_create_ip_t s_create_ip{ 0 }; - ih264d_create_op_t s_create_op{ 0 }; - - s_create_ip.s_ivd_create_ip_t.u4_size = sizeof(ih264d_create_ip_t); - s_create_ip.s_ivd_create_ip_t.e_cmd = IVD_CMD_CREATE; - s_create_ip.s_ivd_create_ip_t.u4_share_disp_buf = 1; // shared display buffer mode -> We give the decoder a list of buffers that it will use (?) - - s_create_op.s_ivd_create_op_t.u4_size = sizeof(ih264d_create_op_t); - s_create_ip.s_ivd_create_ip_t.e_output_format = IV_YUV_420SP_UV; - s_create_ip.s_ivd_create_ip_t.pf_aligned_alloc = ivd_aligned_malloc; - s_create_ip.s_ivd_create_ip_t.pf_aligned_free = ivd_aligned_free; - s_create_ip.s_ivd_create_ip_t.pv_mem_ctxt = NULL; - - WORD32 status = ih264d_api_function(m_codecCtx, &s_create_ip, &s_create_op); - cemu_assert(!status); - - m_codecCtx = (iv_obj_t*)s_create_op.s_ivd_create_op_t.pv_handle; - m_codecCtx->pv_fxns = (void*)&ih264d_api_function; - m_codecCtx->u4_size = sizeof(iv_obj_t); - - SetDecoderCoreCount(1); - - m_isBufferedMode = isBufferedMode; - - UpdateParameters(false); - - m_bufferedResults.clear(); - m_numDecodedFrames = 0; - m_hasBufferSizeInfo = false; - m_timestampIndex = 0; - } - - void Destroy() - { - if (!m_codecCtx) - return; - ih264d_delete_ip_t s_delete_ip{ 0 }; - ih264d_delete_op_t s_delete_op{ 0 }; - s_delete_ip.s_ivd_delete_ip_t.u4_size = sizeof(ih264d_delete_ip_t); - s_delete_ip.s_ivd_delete_ip_t.e_cmd = IVD_CMD_DELETE; - s_delete_op.s_ivd_delete_op_t.u4_size = sizeof(ih264d_delete_op_t); - WORD32 status = ih264d_api_function(m_codecCtx, &s_delete_ip, &s_delete_op); - cemu_assert_debug(!status); - m_codecCtx = nullptr; - } - - void SetDecoderCoreCount(uint32 coreCount) - { - ih264d_ctl_set_num_cores_ip_t s_set_cores_ip; - ih264d_ctl_set_num_cores_op_t s_set_cores_op; - s_set_cores_ip.e_cmd = IVD_CMD_VIDEO_CTL; - s_set_cores_ip.e_sub_cmd = (IVD_CONTROL_API_COMMAND_TYPE_T)IH264D_CMD_CTL_SET_NUM_CORES; - s_set_cores_ip.u4_num_cores = coreCount; // valid numbers are 1-4 - s_set_cores_ip.u4_size = sizeof(ih264d_ctl_set_num_cores_ip_t); - s_set_cores_op.u4_size = sizeof(ih264d_ctl_set_num_cores_op_t); - IV_API_CALL_STATUS_T status = ih264d_api_function(m_codecCtx, (void *)&s_set_cores_ip, (void *)&s_set_cores_op); - cemu_assert(status == IV_SUCCESS); - } - - static bool GetImageInfo(uint8* stream, uint32 length, uint32& imageWidth, uint32& imageHeight) - { - // create temporary decoder - ih264d_create_ip_t s_create_ip{ 0 }; - ih264d_create_op_t s_create_op{ 0 }; - s_create_ip.s_ivd_create_ip_t.u4_size = sizeof(ih264d_create_ip_t); - s_create_ip.s_ivd_create_ip_t.e_cmd = IVD_CMD_CREATE; - s_create_ip.s_ivd_create_ip_t.u4_share_disp_buf = 0; - s_create_op.s_ivd_create_op_t.u4_size = sizeof(ih264d_create_op_t); - s_create_ip.s_ivd_create_ip_t.e_output_format = IV_YUV_420SP_UV; - s_create_ip.s_ivd_create_ip_t.pf_aligned_alloc = ivd_aligned_malloc; - s_create_ip.s_ivd_create_ip_t.pf_aligned_free = ivd_aligned_free; - s_create_ip.s_ivd_create_ip_t.pv_mem_ctxt = NULL; - iv_obj_t* ctx = nullptr; - WORD32 status = ih264d_api_function(ctx, &s_create_ip, &s_create_op); - cemu_assert_debug(!status); - if (status != IV_SUCCESS) - return false; - ctx = (iv_obj_t*)s_create_op.s_ivd_create_op_t.pv_handle; - ctx->pv_fxns = (void*)&ih264d_api_function; - ctx->u4_size = sizeof(iv_obj_t); - // set header-only mode - ih264d_ctl_set_config_ip_t s_h264d_ctl_ip{ 0 }; - ih264d_ctl_set_config_op_t s_h264d_ctl_op{ 0 }; - ivd_ctl_set_config_ip_t* ps_ctl_ip = &s_h264d_ctl_ip.s_ivd_ctl_set_config_ip_t; - ivd_ctl_set_config_op_t* ps_ctl_op = &s_h264d_ctl_op.s_ivd_ctl_set_config_op_t; - ps_ctl_ip->u4_disp_wd = 0; - ps_ctl_ip->e_frm_skip_mode = IVD_SKIP_NONE; - ps_ctl_ip->e_frm_out_mode = IVD_DISPLAY_FRAME_OUT; - ps_ctl_ip->e_vid_dec_mode = IVD_DECODE_HEADER; - ps_ctl_ip->e_cmd = IVD_CMD_VIDEO_CTL; - ps_ctl_ip->e_sub_cmd = IVD_CMD_CTL_SETPARAMS; - ps_ctl_ip->u4_size = sizeof(ih264d_ctl_set_config_ip_t); - ps_ctl_op->u4_size = sizeof(ih264d_ctl_set_config_op_t); - status = ih264d_api_function(ctx, &s_h264d_ctl_ip, &s_h264d_ctl_op); - cemu_assert(!status); - // decode stream - ivd_video_decode_ip_t s_dec_ip{ 0 }; - ivd_video_decode_op_t s_dec_op{ 0 }; - s_dec_ip.u4_size = sizeof(ivd_video_decode_ip_t); - s_dec_op.u4_size = sizeof(ivd_video_decode_op_t); - s_dec_ip.e_cmd = IVD_CMD_VIDEO_DECODE; - s_dec_ip.pv_stream_buffer = stream; - s_dec_ip.u4_num_Bytes = length; - s_dec_ip.s_out_buffer.u4_num_bufs = 0; - - s_dec_op.u4_raw_wd = 0; - s_dec_op.u4_raw_ht = 0; - - status = ih264d_api_function(ctx, &s_dec_ip, &s_dec_op); - //cemu_assert(status == 0); -> This errors when not both the headers are present, but it will still set the parameters we need - bool isValid = false; - if (true)//status == 0) - { - imageWidth = s_dec_op.u4_raw_wd; - imageHeight = s_dec_op.u4_raw_ht; - cemu_assert_debug(imageWidth != 0 && imageHeight != 0); - isValid = true; - } - // destroy decoder - ih264d_delete_ip_t s_delete_ip{ 0 }; - ih264d_delete_op_t s_delete_op{ 0 }; - s_delete_ip.s_ivd_delete_ip_t.u4_size = sizeof(ih264d_delete_ip_t); - s_delete_ip.s_ivd_delete_ip_t.e_cmd = IVD_CMD_DELETE; - s_delete_op.s_ivd_delete_op_t.u4_size = sizeof(ih264d_delete_op_t); - status = ih264d_api_function(ctx, &s_delete_ip, &s_delete_op); - cemu_assert_debug(!status); - return isValid; - } - - void Decode(void* data, uint32 length, double timestamp, void* imageOutput, DecodeResult& decodeResult) - { - if (!m_hasBufferSizeInfo) - { - uint32 numByteConsumed = 0; - if (!DetermineBufferSizes(data, length, numByteConsumed)) - { - cemuLog_log(LogType::Force, "H264: Unable to determine picture size. Ignoring decode input"); - decodeResult.frameReady = false; - return; - } - length -= numByteConsumed; - data = (uint8*)data + numByteConsumed; - m_hasBufferSizeInfo = true; - } - - ivd_video_decode_ip_t s_dec_ip{ 0 }; - ivd_video_decode_op_t s_dec_op{ 0 }; - s_dec_ip.u4_size = sizeof(ivd_video_decode_ip_t); - s_dec_op.u4_size = sizeof(ivd_video_decode_op_t); - - s_dec_ip.e_cmd = IVD_CMD_VIDEO_DECODE; - - // remember timestamp and associated output buffer - m_timestamps[m_timestampIndex] = timestamp; - m_imageBuffers[m_timestampIndex] = imageOutput; - s_dec_ip.u4_ts = m_timestampIndex; - m_timestampIndex = (m_timestampIndex + 1) % 64; - - s_dec_ip.pv_stream_buffer = (uint8*)data; - s_dec_ip.u4_num_Bytes = length; - - s_dec_ip.s_out_buffer.u4_min_out_buf_size[0] = 0; - s_dec_ip.s_out_buffer.u4_min_out_buf_size[1] = 0; - s_dec_ip.s_out_buffer.u4_num_bufs = 0; - - BenchmarkTimer bt; - bt.Start(); - WORD32 status = ih264d_api_function(m_codecCtx, &s_dec_ip, &s_dec_op); - if (status != 0 && (s_dec_op.u4_error_code&0xFF) == IVD_RES_CHANGED) - { - // resolution change - ResetDecoder(); - m_hasBufferSizeInfo = false; - Decode(data, length, timestamp, imageOutput, decodeResult); - return; - } - else if (status != 0) - { - cemuLog_log(LogType::Force, "H264: Failed to decode frame (error 0x{:08x})", status); - decodeResult.frameReady = false; - return; - } - - bt.Stop(); - double decodeTime = bt.GetElapsedMilliseconds(); - - cemu_assert(s_dec_op.u4_frame_decoded_flag); - cemu_assert_debug(s_dec_op.u4_num_bytes_consumed == length); - - cemu_assert_debug(m_isBufferedMode || s_dec_op.u4_output_present); // if buffered mode is disabled, then every input should output a frame (except for partial slices?) - - if (s_dec_op.u4_output_present) - { - cemu_assert(s_dec_op.e_output_format == IV_YUV_420SP_UV); - if (H264_IsBotW()) - { - if (s_dec_op.s_disp_frm_buf.u4_y_wd == 1920 && s_dec_op.s_disp_frm_buf.u4_y_ht == 1088) - s_dec_op.s_disp_frm_buf.u4_y_ht = 1080; - } - DecodeResult tmpResult; - tmpResult.frameReady = s_dec_op.u4_output_present != 0; - tmpResult.timestamp = m_timestamps[s_dec_op.u4_ts]; - tmpResult.imageOutput = m_imageBuffers[s_dec_op.u4_ts]; - tmpResult.decodeOutput = s_dec_op; - AddBufferedResult(tmpResult); - // transfer image to PPC output buffer and also correct stride - bt.Start(); - CopyImageToResultBuffer((uint8*)s_dec_op.s_disp_frm_buf.pv_y_buf, (uint8*)s_dec_op.s_disp_frm_buf.pv_u_buf, (uint8*)m_imageBuffers[s_dec_op.u4_ts], s_dec_op); - bt.Stop(); - double copyTime = bt.GetElapsedMilliseconds(); - // release buffer - sint32 bufferId = -1; - for (size_t i = 0; i < m_displayBuf.size(); i++) - { - if (s_dec_op.s_disp_frm_buf.pv_y_buf >= m_displayBuf[i].data() && s_dec_op.s_disp_frm_buf.pv_y_buf < (m_displayBuf[i].data() + m_displayBuf[i].size())) - { - bufferId = (sint32)i; - break; - } - } - cemu_assert_debug(bufferId == s_dec_op.u4_disp_buf_id); - cemu_assert(bufferId >= 0); - ivd_rel_display_frame_ip_t s_video_rel_disp_ip{ 0 }; - ivd_rel_display_frame_op_t s_video_rel_disp_op{ 0 }; - s_video_rel_disp_ip.e_cmd = IVD_CMD_REL_DISPLAY_FRAME; - s_video_rel_disp_ip.u4_size = sizeof(ivd_rel_display_frame_ip_t); - s_video_rel_disp_op.u4_size = sizeof(ivd_rel_display_frame_op_t); - s_video_rel_disp_ip.u4_disp_buf_id = bufferId; - status = ih264d_api_function(m_codecCtx, &s_video_rel_disp_ip, &s_video_rel_disp_op); - cemu_assert(!status); - - cemuLog_log(LogType::H264, "H264Bench | DecodeTime {}ms CopyTime {}ms", decodeTime, copyTime); - } - else - { - cemuLog_log(LogType::H264, "H264Bench | DecodeTime{}ms", decodeTime); - } - - if (s_dec_op.u4_frame_decoded_flag) - m_numDecodedFrames++; - - if (m_isBufferedMode) - { - // in buffered mode, always buffer 5 frames regardless of actual reordering and decoder latency - if (m_numDecodedFrames > 5) - GetCurrentBufferedResult(decodeResult); - } - else if(m_numDecodedFrames > 0) - GetCurrentBufferedResult(decodeResult); - - // get VUI - //ih264d_ctl_get_vui_params_ip_t s_ctl_get_vui_params_ip; - //ih264d_ctl_get_vui_params_op_t s_ctl_get_vui_params_op; - - //s_ctl_get_vui_params_ip.e_cmd = IVD_CMD_VIDEO_CTL; - //s_ctl_get_vui_params_ip.e_sub_cmd = (IVD_CONTROL_API_COMMAND_TYPE_T)IH264D_CMD_CTL_GET_VUI_PARAMS; - //s_ctl_get_vui_params_ip.u4_size = sizeof(ih264d_ctl_get_vui_params_ip_t); - //s_ctl_get_vui_params_op.u4_size = sizeof(ih264d_ctl_get_vui_params_op_t); - - //status = ih264d_api_function(mCodecCtx, &s_ctl_get_vui_params_ip, &s_ctl_get_vui_params_op); - //cemu_assert(status == 0); - } - - std::vector Flush() - { - std::vector results; - // set flush mode - ivd_ctl_flush_ip_t s_video_flush_ip{ 0 }; - ivd_ctl_flush_op_t s_video_flush_op{ 0 }; - s_video_flush_ip.e_cmd = IVD_CMD_VIDEO_CTL; - s_video_flush_ip.e_sub_cmd = IVD_CMD_CTL_FLUSH; - s_video_flush_ip.u4_size = sizeof(ivd_ctl_flush_ip_t); - s_video_flush_op.u4_size = sizeof(ivd_ctl_flush_op_t); - WORD32 status = ih264d_api_function(m_codecCtx, &s_video_flush_ip, &s_video_flush_op); - if (status != 0) - cemuLog_log(LogType::Force, "H264Dec: Unexpected error during flush ({})", status); - // get all frames from the codec - while (true) - { - ivd_video_decode_ip_t s_dec_ip{ 0 }; - ivd_video_decode_op_t s_dec_op{ 0 }; - s_dec_ip.u4_size = sizeof(ivd_video_decode_ip_t); - s_dec_op.u4_size = sizeof(ivd_video_decode_op_t); - s_dec_ip.e_cmd = IVD_CMD_VIDEO_DECODE; - s_dec_ip.pv_stream_buffer = NULL; - s_dec_ip.u4_num_Bytes = 0; - s_dec_ip.s_out_buffer.u4_min_out_buf_size[0] = 0; - s_dec_ip.s_out_buffer.u4_min_out_buf_size[1] = 0; - s_dec_ip.s_out_buffer.u4_num_bufs = 0; - status = ih264d_api_function(m_codecCtx, &s_dec_ip, &s_dec_op); - if (status != 0) - break; - cemu_assert_debug(s_dec_op.u4_output_present != 0); // should never be zero? - if(s_dec_op.u4_output_present == 0) - continue; - if (H264_IsBotW()) - { - if (s_dec_op.s_disp_frm_buf.u4_y_wd == 1920 && s_dec_op.s_disp_frm_buf.u4_y_ht == 1088) - s_dec_op.s_disp_frm_buf.u4_y_ht = 1080; - } - DecodeResult tmpResult; - tmpResult.frameReady = s_dec_op.u4_output_present != 0; - tmpResult.timestamp = m_timestamps[s_dec_op.u4_ts]; - tmpResult.imageOutput = m_imageBuffers[s_dec_op.u4_ts]; - tmpResult.decodeOutput = s_dec_op; - AddBufferedResult(tmpResult); - CopyImageToResultBuffer((uint8*)s_dec_op.s_disp_frm_buf.pv_y_buf, (uint8*)s_dec_op.s_disp_frm_buf.pv_u_buf, (uint8*)m_imageBuffers[s_dec_op.u4_ts], s_dec_op); - } - results = std::move(m_bufferedResults); - return results; - } - - void CopyImageToResultBuffer(uint8* yIn, uint8* uvIn, uint8* bufOut, ivd_video_decode_op_t& decodeInfo) - { - uint32 imageWidth = decodeInfo.s_disp_frm_buf.u4_y_wd; - uint32 imageHeight = decodeInfo.s_disp_frm_buf.u4_y_ht; - - size_t inputStride = decodeInfo.s_disp_frm_buf.u4_y_strd; - size_t outputStride = (imageWidth + 0xFF) & ~0xFF; - - // copy Y - uint8* yOut = bufOut; - for (uint32 row = 0; row < imageHeight; row++) - { - memcpy(yOut, yIn, imageWidth); - yIn += inputStride; - yOut += outputStride; - } - - // copy UV - uint8* uvOut = bufOut + outputStride * imageHeight; - for (uint32 row = 0; row < imageHeight/2; row++) - { - memcpy(uvOut, uvIn, imageWidth); - uvIn += inputStride; - uvOut += outputStride; - } - } - - private: - - bool DetermineBufferSizes(void* data, uint32 length, uint32& numByteConsumed) - { - numByteConsumed = 0; - UpdateParameters(true); - - ivd_video_decode_ip_t s_dec_ip{ 0 }; - ivd_video_decode_op_t s_dec_op{ 0 }; - s_dec_ip.u4_size = sizeof(ivd_video_decode_ip_t); - s_dec_op.u4_size = sizeof(ivd_video_decode_op_t); - - s_dec_ip.e_cmd = IVD_CMD_VIDEO_DECODE; - s_dec_ip.pv_stream_buffer = (uint8*)data; - s_dec_ip.u4_num_Bytes = length; - s_dec_ip.s_out_buffer.u4_num_bufs = 0; - WORD32 status = ih264d_api_function(m_codecCtx, &s_dec_ip, &s_dec_op); - if (status != 0) - { - cemuLog_log(LogType::Force, "H264: Unable to determine buffer sizes for stream"); - return false; - } - numByteConsumed = s_dec_op.u4_num_bytes_consumed; - cemu_assert(status == 0); - if (s_dec_op.u4_pic_wd == 0 || s_dec_op.u4_pic_ht == 0) - return false; - UpdateParameters(false); - ReinitBuffers(); - return true; - } - - void ReinitBuffers() + if(!stream || length < 4 || !outputWidth || !outputHeight) + return H264DEC_STATUS::INVALID_PARAM; + if( (offset+4) > length ) + return H264DEC_STATUS::INVALID_PARAM; + uint8* cur = stream + offset; + uint8* end = stream + length; + cur += 2; // we access cur[-2] and cur[-1] so we need to start at offset 2 + while(cur < end-2) { - ivd_ctl_getbufinfo_ip_t s_ctl_ip{ 0 }; - ivd_ctl_getbufinfo_op_t s_ctl_op{ 0 }; - WORD32 outlen = 0; - - s_ctl_ip.e_cmd = IVD_CMD_VIDEO_CTL; - s_ctl_ip.e_sub_cmd = IVD_CMD_CTL_GETBUFINFO; - s_ctl_ip.u4_size = sizeof(ivd_ctl_getbufinfo_ip_t); - s_ctl_op.u4_size = sizeof(ivd_ctl_getbufinfo_op_t); - - WORD32 status = ih264d_api_function(m_codecCtx, &s_ctl_ip, &s_ctl_op); - cemu_assert(!status); - - // allocate - for (uint32 i = 0; i < s_ctl_op.u4_num_disp_bufs; i++) + // check for start code + if(*cur != 1) { - m_displayBuf.emplace_back().resize(s_ctl_op.u4_min_out_buf_size[0] + s_ctl_op.u4_min_out_buf_size[1]); + cur++; + continue; } - // set - ivd_set_display_frame_ip_t s_set_display_frame_ip{ 0 }; // make sure to zero-initialize this. The codec seems to check the first 3 pointers/sizes per frame, regardless of the value of u4_num_bufs - ivd_set_display_frame_op_t s_set_display_frame_op{ 0 }; - - s_set_display_frame_ip.e_cmd = IVD_CMD_SET_DISPLAY_FRAME; - s_set_display_frame_ip.u4_size = sizeof(ivd_set_display_frame_ip_t); - s_set_display_frame_op.u4_size = sizeof(ivd_set_display_frame_op_t); - - cemu_assert_debug(s_ctl_op.u4_min_num_out_bufs == 2); - cemu_assert_debug(s_ctl_op.u4_min_out_buf_size[0] != 0 && s_ctl_op.u4_min_out_buf_size[1] != 0); - - s_set_display_frame_ip.num_disp_bufs = s_ctl_op.u4_num_disp_bufs; - - for (uint32 i = 0; i < s_ctl_op.u4_num_disp_bufs; i++) + // check if this is a valid NAL header + if(cur[-2] != 0 || cur[-1] != 0 || cur[0] != 1) { - s_set_display_frame_ip.s_disp_buffer[i].u4_num_bufs = 2; - s_set_display_frame_ip.s_disp_buffer[i].u4_min_out_buf_size[0] = s_ctl_op.u4_min_out_buf_size[0]; - s_set_display_frame_ip.s_disp_buffer[i].u4_min_out_buf_size[1] = s_ctl_op.u4_min_out_buf_size[1]; - s_set_display_frame_ip.s_disp_buffer[i].pu1_bufs[0] = m_displayBuf[i].data() + 0; - s_set_display_frame_ip.s_disp_buffer[i].pu1_bufs[1] = m_displayBuf[i].data() + s_ctl_op.u4_min_out_buf_size[0]; + cur++; + continue; } - - status = ih264d_api_function(m_codecCtx, &s_set_display_frame_ip, &s_set_display_frame_op); - cemu_assert(!status); - - - // mark all as released (available) - for (uint32 i = 0; i < s_ctl_op.u4_num_disp_bufs; i++) + uint8 nalHeader = cur[1]; + if((nalHeader & 0x1F) != 7) { - ivd_rel_display_frame_ip_t s_video_rel_disp_ip{ 0 }; - ivd_rel_display_frame_op_t s_video_rel_disp_op{ 0 }; - - s_video_rel_disp_ip.e_cmd = IVD_CMD_REL_DISPLAY_FRAME; - s_video_rel_disp_ip.u4_size = sizeof(ivd_rel_display_frame_ip_t); - s_video_rel_disp_op.u4_size = sizeof(ivd_rel_display_frame_op_t); - s_video_rel_disp_ip.u4_disp_buf_id = i; - - status = ih264d_api_function(m_codecCtx, &s_video_rel_disp_ip, &s_video_rel_disp_op); - cemu_assert(!status); - } - } - - void ResetDecoder() - { - ivd_ctl_reset_ip_t s_ctl_ip; - ivd_ctl_reset_op_t s_ctl_op; - - s_ctl_ip.e_cmd = IVD_CMD_VIDEO_CTL; - s_ctl_ip.e_sub_cmd = IVD_CMD_CTL_RESET; - s_ctl_ip.u4_size = sizeof(ivd_ctl_reset_ip_t); - s_ctl_op.u4_size = sizeof(ivd_ctl_reset_op_t); - - WORD32 status = ih264d_api_function(m_codecCtx, (void*)&s_ctl_ip, (void*)&s_ctl_op); - cemu_assert_debug(status == 0); - } - - void UpdateParameters(bool headerDecodeOnly) - { - ih264d_ctl_set_config_ip_t s_h264d_ctl_ip{ 0 }; - ih264d_ctl_set_config_op_t s_h264d_ctl_op{ 0 }; - ivd_ctl_set_config_ip_t* ps_ctl_ip = &s_h264d_ctl_ip.s_ivd_ctl_set_config_ip_t; - ivd_ctl_set_config_op_t* ps_ctl_op = &s_h264d_ctl_op.s_ivd_ctl_set_config_op_t; - - ps_ctl_ip->u4_disp_wd = 0; - ps_ctl_ip->e_frm_skip_mode = IVD_SKIP_NONE; - ps_ctl_ip->e_frm_out_mode = m_isBufferedMode ? IVD_DISPLAY_FRAME_OUT : IVD_DECODE_FRAME_OUT; - ps_ctl_ip->e_vid_dec_mode = headerDecodeOnly ? IVD_DECODE_HEADER : IVD_DECODE_FRAME; - ps_ctl_ip->e_cmd = IVD_CMD_VIDEO_CTL; - ps_ctl_ip->e_sub_cmd = IVD_CMD_CTL_SETPARAMS; - ps_ctl_ip->u4_size = sizeof(ih264d_ctl_set_config_ip_t); - ps_ctl_op->u4_size = sizeof(ih264d_ctl_set_config_op_t); - - WORD32 status = ih264d_api_function(m_codecCtx, &s_h264d_ctl_ip, &s_h264d_ctl_op); - cemu_assert(status == 0); - } - - /* In non-flush mode we have a delay of (at least?) 5 frames */ - void AddBufferedResult(DecodeResult& decodeResult) - { - if (decodeResult.frameReady) - m_bufferedResults.emplace_back(decodeResult); - } - - void GetCurrentBufferedResult(DecodeResult& decodeResult) - { - cemu_assert(!m_bufferedResults.empty()); - if (m_bufferedResults.empty()) - { - decodeResult.frameReady = false; - return; + cur++; + continue; } - decodeResult = m_bufferedResults.front(); - m_bufferedResults.erase(m_bufferedResults.begin()); - } - private: - iv_obj_t* m_codecCtx{nullptr}; - bool m_hasBufferSizeInfo{ false }; - bool m_isBufferedMode{ false }; - double m_timestamps[64]; - void* m_imageBuffers[64]; - uint32 m_timestampIndex{0}; - std::vector m_bufferedResults; - uint32 m_numDecodedFrames{0}; - std::vector> m_displayBuf; - }; - - H264DEC_STATUS H264DECGetImageSize(uint8* stream, uint32 length, uint32 offset, uint32be* outputWidth, uint32be* outputHeight) - { - cemu_assert(offset <= length); - - uint32 imageWidth, imageHeight; - - if (H264AVCDecoder::GetImageInfo(stream, length, imageWidth, imageHeight)) - { - if (H264_IsBotW()) + h264State_seq_parameter_set_t psp; + bool r = h264Parser_ParseSPS(cur+2, end-cur-2, psp); + if(!r) { - if (imageWidth == 1920 && imageHeight == 1088) - imageHeight = 1080; + cemu_assert_suspicious(); // should not happen + return H264DEC_STATUS::BAD_STREAM; } - *outputWidth = imageWidth; - *outputHeight = imageHeight; - } - else - { - *outputWidth = 0; - *outputHeight = 0; - return H264DEC_STATUS::BAD_STREAM; + *outputWidth = (psp.pic_width_in_mbs_minus1 + 1) * 16; + *outputHeight = (psp.pic_height_in_map_units_minus1 + 1) * 16; // affected by frame_mbs_only_flag? + return H264DEC_STATUS::SUCCESS; } - - return H264DEC_STATUS::SUCCESS; + return H264DEC_STATUS::BAD_STREAM; } uint32 H264DECInitParam(uint32 workMemorySize, void* workMemory) @@ -762,26 +239,28 @@ namespace H264 return 0; } - std::unordered_map sDecoderSessions; + std::unordered_map sDecoderSessions; std::mutex sDecoderSessionsMutex; std::atomic_uint32_t sCurrentSessionHandle{ 1 }; - static H264AVCDecoder* _CreateDecoderSession(uint32& handleOut) + H264DecoderBackend* CreateAVCDecoder(); + + static H264DecoderBackend* _CreateDecoderSession(uint32& handleOut) { std::unique_lock _lock(sDecoderSessionsMutex); handleOut = sCurrentSessionHandle.fetch_add(1); - H264AVCDecoder* session = new H264AVCDecoder(); + H264DecoderBackend* session = CreateAVCDecoder(); sDecoderSessions.try_emplace(handleOut, session); return session; } - static H264AVCDecoder* _AcquireDecoderSession(uint32 handle) + static H264DecoderBackend* _AcquireDecoderSession(uint32 handle) { std::unique_lock _lock(sDecoderSessionsMutex); auto it = sDecoderSessions.find(handle); if (it == sDecoderSessions.end()) return nullptr; - H264AVCDecoder* session = it->second; + H264DecoderBackend* session = it->second; if (sDecoderSessions.size() >= 5) { cemuLog_log(LogType::Force, "H264: Warning - more than 5 active sessions"); @@ -790,7 +269,7 @@ namespace H264 return session; } - static void _ReleaseDecoderSession(H264AVCDecoder* session) + static void _ReleaseDecoderSession(H264DecoderBackend* session) { std::unique_lock _lock(sDecoderSessionsMutex); @@ -802,7 +281,7 @@ namespace H264 auto it = sDecoderSessions.find(handle); if (it == sDecoderSessions.end()) return; - H264AVCDecoder* session = it->second; + H264DecoderBackend* session = it->second; session->Destroy(); delete session; sDecoderSessions.erase(it); @@ -830,45 +309,44 @@ namespace H264 uint32 H264DECBegin(void* workMemory) { H264Context* ctx = (H264Context*)workMemory; - H264AVCDecoder* session = _AcquireDecoderSession(ctx->sessionHandle); + H264DecoderBackend* session = _AcquireDecoderSession(ctx->sessionHandle); if (!session) { cemuLog_log(LogType::Force, "H264DECBegin(): Invalid session"); return 0; } session->Init(ctx->Param.outputPerFrame == 0); + ctx->decoderState.numFramesInFlight = 0; _ReleaseDecoderSession(session); return 0; } - void H264DoFrameOutputCallback(H264Context* ctx, H264AVCDecoder::DecodeResult& decodeResult); - - void _async_H264DECEnd(coreinit::OSEvent* executeDoneEvent, H264AVCDecoder* session, H264Context* ctx, std::vector* decodeResultsOut) - { - *decodeResultsOut = session->Flush(); - coreinit::OSSignalEvent(executeDoneEvent); - } + void H264DoFrameOutputCallback(H264Context* ctx, H264DecoderBackend::DecodeResult& decodeResult); H264DEC_STATUS H264DECEnd(void* workMemory) { H264Context* ctx = (H264Context*)workMemory; - H264AVCDecoder* session = _AcquireDecoderSession(ctx->sessionHandle); + H264DecoderBackend* session = _AcquireDecoderSession(ctx->sessionHandle); if (!session) { cemuLog_log(LogType::Force, "H264DECEnd(): Invalid session"); return H264DEC_STATUS::SUCCESS; } - StackAllocator executeDoneEvent; - coreinit::OSInitEvent(&executeDoneEvent, coreinit::OSEvent::EVENT_STATE::STATE_NOT_SIGNALED, coreinit::OSEvent::EVENT_MODE::MODE_MANUAL); - std::vector results; - auto asyncTask = std::async(std::launch::async, _async_H264DECEnd, executeDoneEvent.GetPointer(), session, ctx, &results); - coreinit::OSWaitEvent(&executeDoneEvent); - _ReleaseDecoderSession(session); - if (!results.empty()) + coreinit::OSEvent* flushEvt = &session->GetFlushEvent(); + coreinit::OSResetEvent(flushEvt); + session->QueueFlush(); + coreinit::OSWaitEvent(flushEvt); + while(true) { - for (auto& itr : results) - H264DoFrameOutputCallback(ctx, itr); + H264DecoderBackend::DecodeResult decodeResult; + if( !session->GetFrameOutputIfReady(decodeResult) ) + break; + // todo - output all frames in a single callback? + H264DoFrameOutputCallback(ctx, decodeResult); + ctx->decoderState.numFramesInFlight--; } + cemu_assert_debug(ctx->decoderState.numFramesInFlight == 0); // no frames should be in flight anymore. Exact behavior is not well understood but we may have to output dummy frames if necessary + _ReleaseDecoderSession(session); return H264DEC_STATUS::SUCCESS; } @@ -930,7 +408,6 @@ namespace H264 return 0; } - struct H264DECFrameOutput { /* +0x00 */ uint32be result; @@ -967,7 +444,7 @@ namespace H264 static_assert(sizeof(H264OutputCBStruct) == 12); - void H264DoFrameOutputCallback(H264Context* ctx, H264AVCDecoder::DecodeResult& decodeResult) + void H264DoFrameOutputCallback(H264Context* ctx, H264DecoderBackend::DecodeResult& decodeResult) { sint32 outputFrameCount = 1; @@ -984,14 +461,14 @@ namespace H264 frameOutput->imagePtr = (uint8*)decodeResult.imageOutput; frameOutput->result = 100; frameOutput->timestamp = decodeResult.timestamp; - frameOutput->frameWidth = decodeResult.decodeOutput.u4_pic_wd; - frameOutput->frameHeight = decodeResult.decodeOutput.u4_pic_ht; - frameOutput->bytesPerRow = (decodeResult.decodeOutput.u4_pic_wd + 0xFF) & ~0xFF; - frameOutput->cropEnable = decodeResult.decodeOutput.u1_frame_cropping_flag; - frameOutput->cropTop = decodeResult.decodeOutput.u1_frame_cropping_rect_top_ofst; - frameOutput->cropBottom = decodeResult.decodeOutput.u1_frame_cropping_rect_bottom_ofst; - frameOutput->cropLeft = decodeResult.decodeOutput.u1_frame_cropping_rect_left_ofst; - frameOutput->cropRight = decodeResult.decodeOutput.u1_frame_cropping_rect_right_ofst; + frameOutput->frameWidth = decodeResult.frameWidth; + frameOutput->frameHeight = decodeResult.frameHeight; + frameOutput->bytesPerRow = decodeResult.bytesPerRow; + frameOutput->cropEnable = decodeResult.cropEnable; + frameOutput->cropTop = decodeResult.cropTop; + frameOutput->cropBottom = decodeResult.cropBottom; + frameOutput->cropLeft = decodeResult.cropLeft; + frameOutput->cropRight = decodeResult.cropRight; StackAllocator stack_fptrOutputData; stack_fptrOutputData->frameCount = outputFrameCount; @@ -1006,29 +483,41 @@ namespace H264 } } - void _async_H264DECExecute(coreinit::OSEvent* executeDoneEvent, H264AVCDecoder* session, H264Context* ctx, void* imageOutput, H264AVCDecoder::DecodeResult* decodeResult) - { - session->Decode(ctx->BitStream.ptr.GetPtr(), ctx->BitStream.length, ctx->BitStream.timestamp, imageOutput, *decodeResult); - coreinit::OSSignalEvent(executeDoneEvent); - } - uint32 H264DECExecute(void* workMemory, void* imageOutput) { + BenchmarkTimer bt; + bt.Start(); H264Context* ctx = (H264Context*)workMemory; - H264AVCDecoder* session = _AcquireDecoderSession(ctx->sessionHandle); + H264DecoderBackend* session = _AcquireDecoderSession(ctx->sessionHandle); if (!session) { cemuLog_log(LogType::Force, "H264DECExecute(): Invalid session"); return 0; } - StackAllocator executeDoneEvent; - coreinit::OSInitEvent(&executeDoneEvent, coreinit::OSEvent::EVENT_STATE::STATE_NOT_SIGNALED, coreinit::OSEvent::EVENT_MODE::MODE_MANUAL); - H264AVCDecoder::DecodeResult decodeResult; - auto asyncTask = std::async(std::launch::async, _async_H264DECExecute, &executeDoneEvent, session, ctx, imageOutput , &decodeResult); - coreinit::OSWaitEvent(&executeDoneEvent); + // feed data to backend + session->QueueForDecode((uint8*)ctx->BitStream.ptr.GetPtr(), ctx->BitStream.length, ctx->BitStream.timestamp, imageOutput); + ctx->decoderState.numFramesInFlight++; + // H264DECExecute is synchronous and will return a frame after either every call (non-buffered) or after 6 calls (buffered) + // normally frame decoding happens only during H264DECExecute, but in order to hide the latency of our CPU decoder we will decode asynchronously in buffered mode + uint32 numFramesToBuffer = (ctx->Param.outputPerFrame == 0) ? 5 : 0; + if(ctx->decoderState.numFramesInFlight > numFramesToBuffer) + { + ctx->decoderState.numFramesInFlight--; + while(true) + { + coreinit::OSEvent& evt = session->GetFrameOutputEvent(); + coreinit::OSWaitEvent(&evt); + H264DecoderBackend::DecodeResult decodeResult; + if( !session->GetFrameOutputIfReady(decodeResult) ) + continue; + H264DoFrameOutputCallback(ctx, decodeResult); + break; + } + } _ReleaseDecoderSession(session); - if(decodeResult.frameReady) - H264DoFrameOutputCallback(ctx, decodeResult); + bt.Stop(); + double callTime = bt.GetElapsedMilliseconds(); + cemuLog_log(LogType::H264, "H264Bench | H264DECExecute took {}ms", callTime); return 0x80 | 100; } diff --git a/src/Cafe/OS/libs/h264_avc/H264DecBackendAVC.cpp b/src/Cafe/OS/libs/h264_avc/H264DecBackendAVC.cpp new file mode 100644 index 000000000..3104b01d8 --- /dev/null +++ b/src/Cafe/OS/libs/h264_avc/H264DecBackendAVC.cpp @@ -0,0 +1,502 @@ +#include "H264DecInternal.h" +#include "util/highresolutiontimer/HighResolutionTimer.h" + +extern "C" +{ +#include "../dependencies/ih264d/common/ih264_typedefs.h" +#include "../dependencies/ih264d/decoder/ih264d.h" +}; + +namespace H264 +{ + bool H264_IsBotW(); + + class H264AVCDecoder : public H264DecoderBackend + { + static void* ivd_aligned_malloc(void* ctxt, WORD32 alignment, WORD32 size) + { +#ifdef _WIN32 + return _aligned_malloc(size, alignment); +#else + // alignment is atleast sizeof(void*) + alignment = std::max(alignment, sizeof(void*)); + + //smallest multiple of 2 at least as large as alignment + alignment--; + alignment |= alignment << 1; + alignment |= alignment >> 1; + alignment |= alignment >> 2; + alignment |= alignment >> 4; + alignment |= alignment >> 8; + alignment |= alignment >> 16; + alignment ^= (alignment >> 1); + + void* temp; + posix_memalign(&temp, (size_t)alignment, (size_t)size); + return temp; +#endif + } + + static void ivd_aligned_free(void* ctxt, void* buf) + { +#ifdef _WIN32 + _aligned_free(buf); +#else + free(buf); +#endif + } + + public: + H264AVCDecoder() + { + m_decoderThread = std::thread(&H264AVCDecoder::DecoderThread, this); + } + + ~H264AVCDecoder() + { + m_decodeSem.increment(); + m_threadShouldExit = true; + if (m_decoderThread.joinable()) + m_decoderThread.join(); + } + + void Init(bool isBufferedMode) + { + ih264d_create_ip_t s_create_ip{ 0 }; + ih264d_create_op_t s_create_op{ 0 }; + + s_create_ip.s_ivd_create_ip_t.u4_size = sizeof(ih264d_create_ip_t); + s_create_ip.s_ivd_create_ip_t.e_cmd = IVD_CMD_CREATE; + s_create_ip.s_ivd_create_ip_t.u4_share_disp_buf = 1; // shared display buffer mode -> We give the decoder a list of buffers that it will use (?) + + s_create_op.s_ivd_create_op_t.u4_size = sizeof(ih264d_create_op_t); + s_create_ip.s_ivd_create_ip_t.e_output_format = IV_YUV_420SP_UV; + s_create_ip.s_ivd_create_ip_t.pf_aligned_alloc = ivd_aligned_malloc; + s_create_ip.s_ivd_create_ip_t.pf_aligned_free = ivd_aligned_free; + s_create_ip.s_ivd_create_ip_t.pv_mem_ctxt = NULL; + + WORD32 status = ih264d_api_function(m_codecCtx, &s_create_ip, &s_create_op); + cemu_assert(!status); + + m_codecCtx = (iv_obj_t*)s_create_op.s_ivd_create_op_t.pv_handle; + m_codecCtx->pv_fxns = (void*)&ih264d_api_function; + m_codecCtx->u4_size = sizeof(iv_obj_t); + + SetDecoderCoreCount(1); + + m_isBufferedMode = isBufferedMode; + + UpdateParameters(false); + + m_numDecodedFrames = 0; + m_hasBufferSizeInfo = false; + } + + void Destroy() + { + if (!m_codecCtx) + return; + ih264d_delete_ip_t s_delete_ip{ 0 }; + ih264d_delete_op_t s_delete_op{ 0 }; + s_delete_ip.s_ivd_delete_ip_t.u4_size = sizeof(ih264d_delete_ip_t); + s_delete_ip.s_ivd_delete_ip_t.e_cmd = IVD_CMD_DELETE; + s_delete_op.s_ivd_delete_op_t.u4_size = sizeof(ih264d_delete_op_t); + WORD32 status = ih264d_api_function(m_codecCtx, &s_delete_ip, &s_delete_op); + cemu_assert_debug(!status); + m_codecCtx = nullptr; + } + + void PushDecodedFrame(ivd_video_decode_op_t& s_dec_op) + { + // copy image data outside of lock since its an expensive operation + CopyImageToResultBuffer((uint8*)s_dec_op.s_disp_frm_buf.pv_y_buf, (uint8*)s_dec_op.s_disp_frm_buf.pv_u_buf, (uint8*)m_decodedSliceArray[s_dec_op.u4_ts].result.imageOutput, s_dec_op); + + std::unique_lock _l(m_decodeQueueMtx); + cemu_assert(s_dec_op.u4_ts < m_decodedSliceArray.size()); + auto& result = m_decodedSliceArray[s_dec_op.u4_ts]; + cemu_assert_debug(result.isUsed); + cemu_assert_debug(s_dec_op.u4_output_present != 0); + + result.result.isDecoded = true; + result.result.hasFrame = s_dec_op.u4_output_present != 0; + result.result.frameWidth = s_dec_op.u4_pic_wd; + result.result.frameHeight = s_dec_op.u4_pic_ht; + result.result.bytesPerRow = (s_dec_op.u4_pic_wd + 0xFF) & ~0xFF; + result.result.cropEnable = s_dec_op.u1_frame_cropping_flag; + result.result.cropTop = s_dec_op.u1_frame_cropping_rect_top_ofst; + result.result.cropBottom = s_dec_op.u1_frame_cropping_rect_bottom_ofst; + result.result.cropLeft = s_dec_op.u1_frame_cropping_rect_left_ofst; + result.result.cropRight = s_dec_op.u1_frame_cropping_rect_right_ofst; + + m_displayQueue.push_back(s_dec_op.u4_ts); + + _l.unlock(); + coreinit::OSSignalEvent(m_displayQueueEvt); + } + + // called from async worker thread + void Decode(DecodedSlice& decodedSlice) + { + if (!m_hasBufferSizeInfo) + { + uint32 numByteConsumed = 0; + if (!DetermineBufferSizes(decodedSlice.dataToDecode.m_data, decodedSlice.dataToDecode.m_length, numByteConsumed)) + { + cemuLog_log(LogType::Force, "H264AVC: Unable to determine picture size. Ignoring decode input"); + std::unique_lock _l(m_decodeQueueMtx); + decodedSlice.result.isDecoded = true; + decodedSlice.result.hasFrame = false; + coreinit::OSSignalEvent(m_displayQueueEvt); + return; + } + decodedSlice.dataToDecode.m_length -= numByteConsumed; + decodedSlice.dataToDecode.m_data = (uint8*)decodedSlice.dataToDecode.m_data + numByteConsumed; + m_hasBufferSizeInfo = true; + } + + ivd_video_decode_ip_t s_dec_ip{ 0 }; + ivd_video_decode_op_t s_dec_op{ 0 }; + s_dec_ip.u4_size = sizeof(ivd_video_decode_ip_t); + s_dec_op.u4_size = sizeof(ivd_video_decode_op_t); + + s_dec_ip.e_cmd = IVD_CMD_VIDEO_DECODE; + + s_dec_ip.u4_ts = std::distance(m_decodedSliceArray.data(), &decodedSlice); + cemu_assert_debug(s_dec_ip.u4_ts < m_decodedSliceArray.size()); + + s_dec_ip.pv_stream_buffer = (uint8*)decodedSlice.dataToDecode.m_data; + s_dec_ip.u4_num_Bytes = decodedSlice.dataToDecode.m_length; + + s_dec_ip.s_out_buffer.u4_min_out_buf_size[0] = 0; + s_dec_ip.s_out_buffer.u4_min_out_buf_size[1] = 0; + s_dec_ip.s_out_buffer.u4_num_bufs = 0; + + BenchmarkTimer bt; + bt.Start(); + WORD32 status = ih264d_api_function(m_codecCtx, &s_dec_ip, &s_dec_op); + if (status != 0 && (s_dec_op.u4_error_code&0xFF) == IVD_RES_CHANGED) + { + // resolution change + ResetDecoder(); + m_hasBufferSizeInfo = false; + Decode(decodedSlice); + return; + } + else if (status != 0) + { + cemuLog_log(LogType::Force, "H264: Failed to decode frame (error 0x{:08x})", status); + //decodeResult.frameReady = false; + cemu_assert_unimplemented(); + return; + } + + bt.Stop(); + double decodeTime = bt.GetElapsedMilliseconds(); + + cemu_assert(s_dec_op.u4_frame_decoded_flag); + cemu_assert_debug(s_dec_op.u4_num_bytes_consumed == decodedSlice.dataToDecode.m_length); + + cemu_assert_debug(m_isBufferedMode || s_dec_op.u4_output_present); // if buffered mode is disabled, then every input should output a frame (except for partial slices?) + + if (s_dec_op.u4_output_present) + { + cemu_assert(s_dec_op.e_output_format == IV_YUV_420SP_UV); + if (H264_IsBotW()) + { + if (s_dec_op.s_disp_frm_buf.u4_y_wd == 1920 && s_dec_op.s_disp_frm_buf.u4_y_ht == 1088) + s_dec_op.s_disp_frm_buf.u4_y_ht = 1080; + } + bt.Start(); + PushDecodedFrame(s_dec_op); + bt.Stop(); + double copyTime = bt.GetElapsedMilliseconds(); + // release buffer + sint32 bufferId = -1; + for (size_t i = 0; i < m_displayBuf.size(); i++) + { + if (s_dec_op.s_disp_frm_buf.pv_y_buf >= m_displayBuf[i].data() && s_dec_op.s_disp_frm_buf.pv_y_buf < (m_displayBuf[i].data() + m_displayBuf[i].size())) + { + bufferId = (sint32)i; + break; + } + } + cemu_assert_debug(bufferId == s_dec_op.u4_disp_buf_id); + cemu_assert(bufferId >= 0); + ivd_rel_display_frame_ip_t s_video_rel_disp_ip{ 0 }; + ivd_rel_display_frame_op_t s_video_rel_disp_op{ 0 }; + s_video_rel_disp_ip.e_cmd = IVD_CMD_REL_DISPLAY_FRAME; + s_video_rel_disp_ip.u4_size = sizeof(ivd_rel_display_frame_ip_t); + s_video_rel_disp_op.u4_size = sizeof(ivd_rel_display_frame_op_t); + s_video_rel_disp_ip.u4_disp_buf_id = bufferId; + status = ih264d_api_function(m_codecCtx, &s_video_rel_disp_ip, &s_video_rel_disp_op); + cemu_assert(!status); + + cemuLog_log(LogType::H264, "H264Bench | DecodeTime {}ms CopyTime {}ms", decodeTime, copyTime); + } + else + { + cemuLog_log(LogType::H264, "H264Bench | DecodeTime {}ms (no frame output)", decodeTime); + } + + if (s_dec_op.u4_frame_decoded_flag) + m_numDecodedFrames++; + // get VUI + //ih264d_ctl_get_vui_params_ip_t s_ctl_get_vui_params_ip; + //ih264d_ctl_get_vui_params_op_t s_ctl_get_vui_params_op; + + //s_ctl_get_vui_params_ip.e_cmd = IVD_CMD_VIDEO_CTL; + //s_ctl_get_vui_params_ip.e_sub_cmd = (IVD_CONTROL_API_COMMAND_TYPE_T)IH264D_CMD_CTL_GET_VUI_PARAMS; + //s_ctl_get_vui_params_ip.u4_size = sizeof(ih264d_ctl_get_vui_params_ip_t); + //s_ctl_get_vui_params_op.u4_size = sizeof(ih264d_ctl_get_vui_params_op_t); + + //status = ih264d_api_function(mCodecCtx, &s_ctl_get_vui_params_ip, &s_ctl_get_vui_params_op); + //cemu_assert(status == 0); + } + + void Flush() + { + // set flush mode + ivd_ctl_flush_ip_t s_video_flush_ip{ 0 }; + ivd_ctl_flush_op_t s_video_flush_op{ 0 }; + s_video_flush_ip.e_cmd = IVD_CMD_VIDEO_CTL; + s_video_flush_ip.e_sub_cmd = IVD_CMD_CTL_FLUSH; + s_video_flush_ip.u4_size = sizeof(ivd_ctl_flush_ip_t); + s_video_flush_op.u4_size = sizeof(ivd_ctl_flush_op_t); + WORD32 status = ih264d_api_function(m_codecCtx, &s_video_flush_ip, &s_video_flush_op); + if (status != 0) + cemuLog_log(LogType::Force, "H264Dec: Unexpected error during flush ({})", status); + // get all frames from the decoder + while (true) + { + ivd_video_decode_ip_t s_dec_ip{ 0 }; + ivd_video_decode_op_t s_dec_op{ 0 }; + s_dec_ip.u4_size = sizeof(ivd_video_decode_ip_t); + s_dec_op.u4_size = sizeof(ivd_video_decode_op_t); + s_dec_ip.e_cmd = IVD_CMD_VIDEO_DECODE; + s_dec_ip.pv_stream_buffer = NULL; + s_dec_ip.u4_num_Bytes = 0; + s_dec_ip.s_out_buffer.u4_min_out_buf_size[0] = 0; + s_dec_ip.s_out_buffer.u4_min_out_buf_size[1] = 0; + s_dec_ip.s_out_buffer.u4_num_bufs = 0; + status = ih264d_api_function(m_codecCtx, &s_dec_ip, &s_dec_op); + if (status != 0) + break; + cemu_assert_debug(s_dec_op.u4_output_present != 0); // should never be false? + if(s_dec_op.u4_output_present == 0) + continue; + if (H264_IsBotW()) + { + if (s_dec_op.s_disp_frm_buf.u4_y_wd == 1920 && s_dec_op.s_disp_frm_buf.u4_y_ht == 1088) + s_dec_op.s_disp_frm_buf.u4_y_ht = 1080; + } + PushDecodedFrame(s_dec_op); + } + } + + void CopyImageToResultBuffer(uint8* yIn, uint8* uvIn, uint8* bufOut, ivd_video_decode_op_t& decodeInfo) + { + uint32 imageWidth = decodeInfo.s_disp_frm_buf.u4_y_wd; + uint32 imageHeight = decodeInfo.s_disp_frm_buf.u4_y_ht; + + size_t inputStride = decodeInfo.s_disp_frm_buf.u4_y_strd; + size_t outputStride = (imageWidth + 0xFF) & ~0xFF; + + // copy Y + uint8* yOut = bufOut; + for (uint32 row = 0; row < imageHeight; row++) + { + memcpy(yOut, yIn, imageWidth); + yIn += inputStride; + yOut += outputStride; + } + + // copy UV + uint8* uvOut = bufOut + outputStride * imageHeight; + for (uint32 row = 0; row < imageHeight/2; row++) + { + memcpy(uvOut, uvIn, imageWidth); + uvIn += inputStride; + uvOut += outputStride; + } + } + private: + void SetDecoderCoreCount(uint32 coreCount) + { + ih264d_ctl_set_num_cores_ip_t s_set_cores_ip; + ih264d_ctl_set_num_cores_op_t s_set_cores_op; + s_set_cores_ip.e_cmd = IVD_CMD_VIDEO_CTL; + s_set_cores_ip.e_sub_cmd = (IVD_CONTROL_API_COMMAND_TYPE_T)IH264D_CMD_CTL_SET_NUM_CORES; + s_set_cores_ip.u4_num_cores = coreCount; // valid numbers are 1-4 + s_set_cores_ip.u4_size = sizeof(ih264d_ctl_set_num_cores_ip_t); + s_set_cores_op.u4_size = sizeof(ih264d_ctl_set_num_cores_op_t); + IV_API_CALL_STATUS_T status = ih264d_api_function(m_codecCtx, (void *)&s_set_cores_ip, (void *)&s_set_cores_op); + cemu_assert(status == IV_SUCCESS); + } + + bool DetermineBufferSizes(void* data, uint32 length, uint32& numByteConsumed) + { + numByteConsumed = 0; + UpdateParameters(true); + + ivd_video_decode_ip_t s_dec_ip{ 0 }; + ivd_video_decode_op_t s_dec_op{ 0 }; + s_dec_ip.u4_size = sizeof(ivd_video_decode_ip_t); + s_dec_op.u4_size = sizeof(ivd_video_decode_op_t); + + s_dec_ip.e_cmd = IVD_CMD_VIDEO_DECODE; + s_dec_ip.pv_stream_buffer = (uint8*)data; + s_dec_ip.u4_num_Bytes = length; + s_dec_ip.s_out_buffer.u4_num_bufs = 0; + WORD32 status = ih264d_api_function(m_codecCtx, &s_dec_ip, &s_dec_op); + if (status != 0) + { + cemuLog_log(LogType::Force, "H264: Unable to determine buffer sizes for stream"); + return false; + } + numByteConsumed = s_dec_op.u4_num_bytes_consumed; + cemu_assert(status == 0); + if (s_dec_op.u4_pic_wd == 0 || s_dec_op.u4_pic_ht == 0) + return false; + UpdateParameters(false); + ReinitBuffers(); + return true; + } + + void ReinitBuffers() + { + ivd_ctl_getbufinfo_ip_t s_ctl_ip{ 0 }; + ivd_ctl_getbufinfo_op_t s_ctl_op{ 0 }; + WORD32 outlen = 0; + + s_ctl_ip.e_cmd = IVD_CMD_VIDEO_CTL; + s_ctl_ip.e_sub_cmd = IVD_CMD_CTL_GETBUFINFO; + s_ctl_ip.u4_size = sizeof(ivd_ctl_getbufinfo_ip_t); + s_ctl_op.u4_size = sizeof(ivd_ctl_getbufinfo_op_t); + + WORD32 status = ih264d_api_function(m_codecCtx, &s_ctl_ip, &s_ctl_op); + cemu_assert(!status); + + // allocate + for (uint32 i = 0; i < s_ctl_op.u4_num_disp_bufs; i++) + { + m_displayBuf.emplace_back().resize(s_ctl_op.u4_min_out_buf_size[0] + s_ctl_op.u4_min_out_buf_size[1]); + } + // set + ivd_set_display_frame_ip_t s_set_display_frame_ip{ 0 }; // make sure to zero-initialize this. The codec seems to check the first 3 pointers/sizes per frame, regardless of the value of u4_num_bufs + ivd_set_display_frame_op_t s_set_display_frame_op{ 0 }; + + s_set_display_frame_ip.e_cmd = IVD_CMD_SET_DISPLAY_FRAME; + s_set_display_frame_ip.u4_size = sizeof(ivd_set_display_frame_ip_t); + s_set_display_frame_op.u4_size = sizeof(ivd_set_display_frame_op_t); + + cemu_assert_debug(s_ctl_op.u4_min_num_out_bufs == 2); + cemu_assert_debug(s_ctl_op.u4_min_out_buf_size[0] != 0 && s_ctl_op.u4_min_out_buf_size[1] != 0); + + s_set_display_frame_ip.num_disp_bufs = s_ctl_op.u4_num_disp_bufs; + + for (uint32 i = 0; i < s_ctl_op.u4_num_disp_bufs; i++) + { + s_set_display_frame_ip.s_disp_buffer[i].u4_num_bufs = 2; + s_set_display_frame_ip.s_disp_buffer[i].u4_min_out_buf_size[0] = s_ctl_op.u4_min_out_buf_size[0]; + s_set_display_frame_ip.s_disp_buffer[i].u4_min_out_buf_size[1] = s_ctl_op.u4_min_out_buf_size[1]; + s_set_display_frame_ip.s_disp_buffer[i].pu1_bufs[0] = m_displayBuf[i].data() + 0; + s_set_display_frame_ip.s_disp_buffer[i].pu1_bufs[1] = m_displayBuf[i].data() + s_ctl_op.u4_min_out_buf_size[0]; + } + + status = ih264d_api_function(m_codecCtx, &s_set_display_frame_ip, &s_set_display_frame_op); + cemu_assert(!status); + + + // mark all as released (available) + for (uint32 i = 0; i < s_ctl_op.u4_num_disp_bufs; i++) + { + ivd_rel_display_frame_ip_t s_video_rel_disp_ip{ 0 }; + ivd_rel_display_frame_op_t s_video_rel_disp_op{ 0 }; + + s_video_rel_disp_ip.e_cmd = IVD_CMD_REL_DISPLAY_FRAME; + s_video_rel_disp_ip.u4_size = sizeof(ivd_rel_display_frame_ip_t); + s_video_rel_disp_op.u4_size = sizeof(ivd_rel_display_frame_op_t); + s_video_rel_disp_ip.u4_disp_buf_id = i; + + status = ih264d_api_function(m_codecCtx, &s_video_rel_disp_ip, &s_video_rel_disp_op); + cemu_assert(!status); + } + } + + void ResetDecoder() + { + ivd_ctl_reset_ip_t s_ctl_ip; + ivd_ctl_reset_op_t s_ctl_op; + + s_ctl_ip.e_cmd = IVD_CMD_VIDEO_CTL; + s_ctl_ip.e_sub_cmd = IVD_CMD_CTL_RESET; + s_ctl_ip.u4_size = sizeof(ivd_ctl_reset_ip_t); + s_ctl_op.u4_size = sizeof(ivd_ctl_reset_op_t); + + WORD32 status = ih264d_api_function(m_codecCtx, (void*)&s_ctl_ip, (void*)&s_ctl_op); + cemu_assert_debug(status == 0); + } + + void UpdateParameters(bool headerDecodeOnly) + { + ih264d_ctl_set_config_ip_t s_h264d_ctl_ip{ 0 }; + ih264d_ctl_set_config_op_t s_h264d_ctl_op{ 0 }; + ivd_ctl_set_config_ip_t* ps_ctl_ip = &s_h264d_ctl_ip.s_ivd_ctl_set_config_ip_t; + ivd_ctl_set_config_op_t* ps_ctl_op = &s_h264d_ctl_op.s_ivd_ctl_set_config_op_t; + + ps_ctl_ip->u4_disp_wd = 0; + ps_ctl_ip->e_frm_skip_mode = IVD_SKIP_NONE; + ps_ctl_ip->e_frm_out_mode = m_isBufferedMode ? IVD_DISPLAY_FRAME_OUT : IVD_DECODE_FRAME_OUT; + ps_ctl_ip->e_vid_dec_mode = headerDecodeOnly ? IVD_DECODE_HEADER : IVD_DECODE_FRAME; + ps_ctl_ip->e_cmd = IVD_CMD_VIDEO_CTL; + ps_ctl_ip->e_sub_cmd = IVD_CMD_CTL_SETPARAMS; + ps_ctl_ip->u4_size = sizeof(ih264d_ctl_set_config_ip_t); + ps_ctl_op->u4_size = sizeof(ih264d_ctl_set_config_op_t); + + WORD32 status = ih264d_api_function(m_codecCtx, &s_h264d_ctl_ip, &s_h264d_ctl_op); + cemu_assert(status == 0); + } + + private: + void DecoderThread() + { + while(!m_threadShouldExit) + { + m_decodeSem.decrementWithWait(); + std::unique_lock _l(m_decodeQueueMtx); + if (m_decodeQueue.empty()) + continue; + uint32 decodeIndex = m_decodeQueue.front(); + m_decodeQueue.erase(m_decodeQueue.begin()); + _l.unlock(); + if(decodeIndex == CMD_FLUSH) + { + Flush(); + _l.lock(); + cemu_assert_debug(m_decodeQueue.empty()); // after flushing the queue should be empty since the sender is waiting for the flush to complete + _l.unlock(); + coreinit::OSSignalEvent(m_flushEvt); + } + else + { + auto& decodedSlice = m_decodedSliceArray[decodeIndex]; + Decode(decodedSlice); + } + } + } + + iv_obj_t* m_codecCtx{nullptr}; + bool m_hasBufferSizeInfo{ false }; + bool m_isBufferedMode{ false }; + uint32 m_numDecodedFrames{0}; + std::vector> m_displayBuf; + + std::thread m_decoderThread; + std::atomic_bool m_threadShouldExit{false}; + }; + + H264DecoderBackend* CreateAVCDecoder() + { + return new H264AVCDecoder(); + } +}; diff --git a/src/Cafe/OS/libs/h264_avc/H264DecInternal.h b/src/Cafe/OS/libs/h264_avc/H264DecInternal.h new file mode 100644 index 000000000..498cccfa8 --- /dev/null +++ b/src/Cafe/OS/libs/h264_avc/H264DecInternal.h @@ -0,0 +1,139 @@ +#pragma once + +#include "util/helpers/Semaphore.h" +#include "Cafe/OS/libs/coreinit/coreinit_Thread.h" +#include "Cafe/OS/libs/coreinit/coreinit_SysHeap.h" + +#include "Cafe/OS/libs/h264_avc/parser/H264Parser.h" + +namespace H264 +{ + class H264DecoderBackend + { + protected: + struct DataToDecode + { + uint8* m_data; + uint32 m_length; + std::vector m_buffer; + }; + + static constexpr uint32 CMD_FLUSH = 0xFFFFFFFF; + + public: + struct DecodeResult + { + bool isDecoded{false}; + bool hasFrame{false}; // set to true if a full frame was successfully decoded + double timestamp{}; + void* imageOutput{nullptr}; + sint32 frameWidth{0}; + sint32 frameHeight{0}; + uint32 bytesPerRow{0}; + bool cropEnable{false}; + sint32 cropTop{0}; + sint32 cropBottom{0}; + sint32 cropLeft{0}; + sint32 cropRight{0}; + }; + + struct DecodedSlice + { + bool isUsed{false}; + DecodeResult result; + DataToDecode dataToDecode; + }; + + H264DecoderBackend() + { + m_displayQueueEvt = (coreinit::OSEvent*)coreinit::OSAllocFromSystem(sizeof(coreinit::OSEvent), 4); + coreinit::OSInitEvent(m_displayQueueEvt, coreinit::OSEvent::EVENT_STATE::STATE_NOT_SIGNALED, coreinit::OSEvent::EVENT_MODE::MODE_AUTO); + m_flushEvt = (coreinit::OSEvent*)coreinit::OSAllocFromSystem(sizeof(coreinit::OSEvent), 4); + coreinit::OSInitEvent(m_flushEvt, coreinit::OSEvent::EVENT_STATE::STATE_NOT_SIGNALED, coreinit::OSEvent::EVENT_MODE::MODE_AUTO); + }; + + virtual ~H264DecoderBackend() + { + coreinit::OSFreeToSystem(m_displayQueueEvt); + coreinit::OSFreeToSystem(m_flushEvt); + }; + + virtual void Init(bool isBufferedMode) = 0; + virtual void Destroy() = 0; + + void QueueForDecode(uint8* data, uint32 length, double timestamp, void* imagePtr) + { + std::unique_lock _l(m_decodeQueueMtx); + + DecodedSlice& ds = GetFreeDecodedSliceEntry(); + + ds.dataToDecode.m_buffer.assign(data, data + length); + ds.dataToDecode.m_data = ds.dataToDecode.m_buffer.data(); + ds.dataToDecode.m_length = length; + + ds.result.isDecoded = false; + ds.result.imageOutput = imagePtr; + ds.result.timestamp = timestamp; + + m_decodeQueue.push_back(std::distance(m_decodedSliceArray.data(), &ds)); + m_decodeSem.increment(); + } + + void QueueFlush() + { + std::unique_lock _l(m_decodeQueueMtx); + m_decodeQueue.push_back(CMD_FLUSH); + m_decodeSem.increment(); + } + + bool GetFrameOutputIfReady(DecodeResult& result) + { + std::unique_lock _l(m_decodeQueueMtx); + if(m_displayQueue.empty()) + return false; + uint32 sliceIndex = m_displayQueue.front(); + DecodedSlice& ds = m_decodedSliceArray[sliceIndex]; + cemu_assert_debug(ds.result.isDecoded); + std::swap(result, ds.result); + ds.isUsed = false; + m_displayQueue.erase(m_displayQueue.begin()); + return true; + } + + coreinit::OSEvent& GetFrameOutputEvent() + { + return *m_displayQueueEvt; + } + + coreinit::OSEvent& GetFlushEvent() + { + return *m_flushEvt; + } + + protected: + DecodedSlice& GetFreeDecodedSliceEntry() + { + for (auto& slice : m_decodedSliceArray) + { + if (!slice.isUsed) + { + slice.isUsed = true; + return slice; + } + } + cemu_assert_suspicious(); + return m_decodedSliceArray[0]; + } + + std::mutex m_decodeQueueMtx; + std::vector m_decodeQueue; // indices into m_decodedSliceArray, in order of decode input + CounterSemaphore m_decodeSem; + std::vector m_displayQueue; // indices into m_decodedSliceArray, in order of frame display output + coreinit::OSEvent* m_displayQueueEvt; // signalled when a new frame is ready for display + coreinit::OSEvent* m_flushEvt; // signalled after flush operation finished and all queued slices are decoded + + // frame output queue + std::mutex m_frameOutputMtx; + std::array m_decodedSliceArray; + }; +} \ No newline at end of file diff --git a/src/Cafe/OS/libs/h264_avc/parser/H264Parser.cpp b/src/Cafe/OS/libs/h264_avc/parser/H264Parser.cpp index d77e551fa..36f70f814 100644 --- a/src/Cafe/OS/libs/h264_avc/parser/H264Parser.cpp +++ b/src/Cafe/OS/libs/h264_avc/parser/H264Parser.cpp @@ -319,6 +319,17 @@ bool parseNAL_pic_parameter_set_rbsp(h264ParserState_t* h264ParserState, h264Par return true; } +bool h264Parser_ParseSPS(uint8* data, uint32 length, h264State_seq_parameter_set_t& sps) +{ + h264ParserState_t parserState; + RBSPInputBitstream nalStream(data, length); + bool r = parseNAL_seq_parameter_set_rbsp(&parserState, nullptr, nalStream); + if(!r || !parserState.hasSPS) + return false; + sps = parserState.sps; + return true; +} + void parseNAL_ref_pic_list_modification(const h264State_seq_parameter_set_t& sps, const h264State_pic_parameter_set_t& pps, RBSPInputBitstream& nalStream, nal_slice_header_t* sliceHeader) { if (!sliceHeader->slice_type.isSliceTypeI() && !sliceHeader->slice_type.isSliceTypeSI()) @@ -688,9 +699,8 @@ void _calculateFrameOrder(h264ParserState_t* h264ParserState, const h264State_se else if (sps.pic_order_cnt_type == 2) { // display order matches decode order - uint32 prevFrameNum = h264ParserState->picture_order.prevFrameNum; - ; + uint32 FrameNumOffset; if (sliceHeader->IdrPicFlag) { @@ -706,9 +716,6 @@ void _calculateFrameOrder(h264ParserState_t* h264ParserState, const h264State_se FrameNumOffset = prevFrameNumOffset + sps.getMaxFrameNum(); else FrameNumOffset = prevFrameNumOffset; - - - } uint32 tempPicOrderCnt; diff --git a/src/Cafe/OS/libs/h264_avc/parser/H264Parser.h b/src/Cafe/OS/libs/h264_avc/parser/H264Parser.h index ee32ca8bb..6f2b3cf6d 100644 --- a/src/Cafe/OS/libs/h264_avc/parser/H264Parser.h +++ b/src/Cafe/OS/libs/h264_avc/parser/H264Parser.h @@ -513,6 +513,8 @@ typedef struct void h264Parse(h264ParserState_t* h264ParserState, h264ParserOutput_t* output, uint8* data, uint32 length, bool parseSlices = true); sint32 h264GetUnitLength(h264ParserState_t* h264ParserState, uint8* data, uint32 length); +bool h264Parser_ParseSPS(uint8* data, uint32 length, h264State_seq_parameter_set_t& sps); + void h264Parser_getScalingMatrix4x4(h264State_seq_parameter_set_t* sps, h264State_pic_parameter_set_t* pps, nal_slice_header_t* sliceHeader, sint32 index, uint8* matrix4x4); void h264Parser_getScalingMatrix8x8(h264State_seq_parameter_set_t* sps, h264State_pic_parameter_set_t* pps, nal_slice_header_t* sliceHeader, sint32 index, uint8* matrix8x8); From f6e75fa4db9e43717fcb96fcc67e00a37bcc6813 Mon Sep 17 00:00:00 2001 From: Exzap <13877693+Exzap@users.noreply.github.com> Date: Tue, 16 Jul 2024 14:40:56 +0200 Subject: [PATCH 2/3] h264: Fix race condition --- src/Cafe/OS/libs/h264_avc/H264DecBackendAVC.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Cafe/OS/libs/h264_avc/H264DecBackendAVC.cpp b/src/Cafe/OS/libs/h264_avc/H264DecBackendAVC.cpp index 3104b01d8..cba8fea26 100644 --- a/src/Cafe/OS/libs/h264_avc/H264DecBackendAVC.cpp +++ b/src/Cafe/OS/libs/h264_avc/H264DecBackendAVC.cpp @@ -54,8 +54,8 @@ namespace H264 ~H264AVCDecoder() { - m_decodeSem.increment(); m_threadShouldExit = true; + m_decodeSem.increment(); if (m_decoderThread.joinable()) m_decoderThread.join(); } From a6e9e5b4feb9d6bc783bd98b8fc79e844c90ae8d Mon Sep 17 00:00:00 2001 From: Exzap <13877693+Exzap@users.noreply.github.com> Date: Thu, 18 Jul 2024 21:41:19 +0200 Subject: [PATCH 3/3] h264: Mark frame as missing on decode error --- src/Cafe/OS/libs/h264_avc/H264DecBackendAVC.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Cafe/OS/libs/h264_avc/H264DecBackendAVC.cpp b/src/Cafe/OS/libs/h264_avc/H264DecBackendAVC.cpp index cba8fea26..228f65a55 100644 --- a/src/Cafe/OS/libs/h264_avc/H264DecBackendAVC.cpp +++ b/src/Cafe/OS/libs/h264_avc/H264DecBackendAVC.cpp @@ -185,7 +185,7 @@ namespace H264 else if (status != 0) { cemuLog_log(LogType::Force, "H264: Failed to decode frame (error 0x{:08x})", status); - //decodeResult.frameReady = false; + decodedSlice.result.hasFrame = false; cemu_assert_unimplemented(); return; }