From 9e0c2561f58eb2a207cc1fe4516d8ff7a253289b Mon Sep 17 00:00:00 2001 From: zhang_alex1 Date: Thu, 3 Aug 2023 00:19:37 +0800 Subject: [PATCH] [VP] add missing preferred SLM setting add missing preferred SLM setting on MTL --- media_common/agnostic/common/hw/mhw_render.h | 1 + .../Xe_HPG_Base/hw/mhw_render_hwcmd_xe_hpg.h | 38 ++++++++++++++++++- .../Xe_HPG_Base/hw/mhw_render_xe_hpg_impl.h | 11 +++++- .../renderhal/renderhal_xe_hpg_base.cpp | 1 + .../agnostic/common/hw/mhw_render_cmdpar.h | 1 + .../shared/packet/media_render_cmd_packet.cpp | 1 + .../shared/packet/media_render_cmd_packet.h | 1 + 7 files changed, 52 insertions(+), 2 deletions(-) diff --git a/media_common/agnostic/common/hw/mhw_render.h b/media_common/agnostic/common/hw/mhw_render.h index 4b3adf1b4c..91ff06b919 100644 --- a/media_common/agnostic/common/hw/mhw_render.h +++ b/media_common/agnostic/common/hw/mhw_render.h @@ -289,6 +289,7 @@ typedef struct _MHW_GPGPU_WALKER_PARAMS uint32_t IndirectDataLength; uint32_t IndirectDataStartAddress; uint32_t BindingTableID; + uint32_t ForcePreferredSLMZero; } MHW_GPGPU_WALKER_PARAMS, *PMHW_GPGPU_WALKER_PARAMS; typedef struct _MHW_MEDIA_OBJECT_PARAMS diff --git a/media_softlet/agnostic/Xe_R/Xe_HPG_Base/hw/mhw_render_hwcmd_xe_hpg.h b/media_softlet/agnostic/Xe_R/Xe_HPG_Base/hw/mhw_render_hwcmd_xe_hpg.h index 3686b2f259..f166513d99 100644 --- a/media_softlet/agnostic/Xe_R/Xe_HPG_Base/hw/mhw_render_hwcmd_xe_hpg.h +++ b/media_softlet/agnostic/Xe_R/Xe_HPG_Base/hw/mhw_render_hwcmd_xe_hpg.h @@ -1408,13 +1408,49 @@ namespace xe_hpg //!< DWORD 6_7 struct { - uint64_t Reserved192 : __CODEGEN_BITFIELD(0, 63); //!< Reserved + uint32_t PreferredSlmAllocationSizePerSubslice : __CODEGEN_BITFIELD(0, 3); //!< PREFERRED_SLM_ALLOCATION_SIZE_PER_SUBSLICE + uint64_t Reserved192 : __CODEGEN_BITFIELD(4, 63); //!< Reserved }; uint64_t Value = 0; } DW6_7; //! \name Initializations + //! \brief SHARED_LOCAL_MEMORY_SIZE + //! \details + //! This field indicates how much Shared Local Memory the thread group + //! requires. + //! If the barriers are not enabled,HW will enable at least 1 barrier for + //! Mid thread preemption to work. + enum SHARED_LOCAL_MEMORY_SIZE + { + SHARED_LOCAL_MEMORY_SIZE_SLMENCODES0K = 0, //!< No additional details + SHARED_LOCAL_MEMORY_SIZE_SLMENCODES1K = 1, //!< No additional details + SHARED_LOCAL_MEMORY_SIZE_SLMENCODES2K = 2, //!< No additional details + SHARED_LOCAL_MEMORY_SIZE_SLMENCODES4K = 3, //!< No additional details + SHARED_LOCAL_MEMORY_SIZE_SLMENCODES8K = 4, //!< No additional details + SHARED_LOCAL_MEMORY_SIZE_SLMENCODES16K = 5, //!< No additional details + SHARED_LOCAL_MEMORY_SIZE_SLMENCODES32K = 6, //!< No additional details + SHARED_LOCAL_MEMORY_SIZE_SLMENCODES64K = 7, //!< No additional details + }; + + //! \brief PREFERRED_SLM_ALLOCATION_SIZE_PER_SUBSLICE + //! \details + //! For products where SLM and Subslice L1 cacheshares a common, + //! re-partitionable RAM, this field indicates the preferred SLM size per + //! Subslice for this dispatch. The SLM size programmed here should be >= + //! the per thread-group SLM size programmed in DW[5][20:16]. + enum PREFERRED_SLM_ALLOCATION_SIZE_PER_SUBSLICE + { + PREFERRED_SLM_ALLOCATION_SIZE_PER_SUBSLICE_SLMENCODESMAX = 0x0, //!< No additional details + PREFERRED_SLM_ALLOCATION_SIZE_PER_SUBSLICE_SLMENCODES0K = 0x8, //!< No additional details + PREFERRED_SLM_ALLOCATION_SIZE_PER_SUBSLICE_SLMENCODES16K = 0x9, //!< No additional details + PREFERRED_SLM_ALLOCATION_SIZE_PER_SUBSLICE_SLMENCODES32K = 0xa, //!< No additional details + PREFERRED_SLM_ALLOCATION_SIZE_PER_SUBSLICE_SLMENCODES64K = 0xb, //!< No additional details + PREFERRED_SLM_ALLOCATION_SIZE_PER_SUBSLICE_SLMENCODES96K = 0xc, //!< No additional details + PREFERRED_SLM_ALLOCATION_SIZE_PER_SUBSLICE_SLMENCODES128K = 0xd, //!< No additional details + }; + //! \brief Explicit member initialization function INTERFACE_DESCRIPTOR_DATA_G12HP_CMD() { diff --git a/media_softlet/agnostic/Xe_R/Xe_HPG_Base/hw/mhw_render_xe_hpg_impl.h b/media_softlet/agnostic/Xe_R/Xe_HPG_Base/hw/mhw_render_xe_hpg_impl.h index e2b9fec5b4..5d4546f9cd 100644 --- a/media_softlet/agnostic/Xe_R/Xe_HPG_Base/hw/mhw_render_xe_hpg_impl.h +++ b/media_softlet/agnostic/Xe_R/Xe_HPG_Base/hw/mhw_render_xe_hpg_impl.h @@ -201,7 +201,16 @@ class Impl : public render::Impl cmd.interface_descriptor_data.DW4.BindingTablePointer = MOS_ROUNDUP_SHIFT(params.dwBindingTableOffset, MHW_BINDING_TABLE_ID_SHIFT); cmd.interface_descriptor_data.DW5.NumberOfThreadsInGpgpuThreadGroup = params.dwNumberofThreadsInGPGPUGroup; cmd.interface_descriptor_data.DW5.SharedLocalMemorySize = params.dwSharedLocalMemorySize; - + if (params.dwSharedLocalMemorySize > 0) + { + cmd.interface_descriptor_data.DW6_7.PreferredSlmAllocationSizePerSubslice = mhw::render::xe_hpg::Cmd::COMPUTE_WALKER_CMD::INTERFACE_DESCRIPTOR_DATA_G12HP_CMD::PREFERRED_SLM_ALLOCATION_SIZE_PER_SUBSLICE_SLMENCODES96K; + } + else // if (params.dwSharedLocalMemorySize == 0) + { + cmd.interface_descriptor_data.DW6_7.PreferredSlmAllocationSizePerSubslice = params.forcePreferredSLMZero ? + mhw::render::xe_hpg::Cmd::COMPUTE_WALKER_CMD::INTERFACE_DESCRIPTOR_DATA_G12HP_CMD::PREFERRED_SLM_ALLOCATION_SIZE_PER_SUBSLICE_SLMENCODESMAX : + mhw::render::xe_hpg::Cmd::COMPUTE_WALKER_CMD::INTERFACE_DESCRIPTOR_DATA_G12HP_CMD::PREFERRED_SLM_ALLOCATION_SIZE_PER_SUBSLICE_SLMENCODES0K; + } // when Barriers is not 0, the EU fusion will close. // Assigns barrier count. if (params.bBarrierEnable) diff --git a/media_softlet/agnostic/Xe_R/Xe_HPG_Base/renderhal/renderhal_xe_hpg_base.cpp b/media_softlet/agnostic/Xe_R/Xe_HPG_Base/renderhal/renderhal_xe_hpg_base.cpp index 72090afb9f..add74e6090 100644 --- a/media_softlet/agnostic/Xe_R/Xe_HPG_Base/renderhal/renderhal_xe_hpg_base.cpp +++ b/media_softlet/agnostic/Xe_R/Xe_HPG_Base/renderhal/renderhal_xe_hpg_base.cpp @@ -1138,6 +1138,7 @@ MHW_SETPAR_DECL_SRC(COMPUTE_WALKER, XRenderHal_Interface_Xe_Hpg_Base) params.dwNumberofThreadsInGPGPUGroup = m_interfaceDescriptorParams->dwNumberofThreadsInGPGPUGroup; params.dwSharedLocalMemorySize = m_interfaceDescriptorParams->dwSharedLocalMemorySize; params.IndirectDataStartAddress = m_gpgpuWalkerParams->IndirectDataStartAddress; + params.forcePreferredSLMZero = m_gpgpuWalkerParams->ForcePreferredSLMZero; if (m_gpgpuWalkerParams->ThreadDepth == 0) { diff --git a/media_softlet/agnostic/common/hw/mhw_render_cmdpar.h b/media_softlet/agnostic/common/hw/mhw_render_cmdpar.h index c54425626c..f5c5db0947 100644 --- a/media_softlet/agnostic/common/hw/mhw_render_cmdpar.h +++ b/media_softlet/agnostic/common/hw/mhw_render_cmdpar.h @@ -370,6 +370,7 @@ struct _MHW_PAR_T(COMPUTE_WALKER) bool bGlobalBarrierEnable = false; //! Enable Global Barrier (SKL+) uint32_t dwNumberofThreadsInGPGPUGroup = 0; //! Number of threads per group uint32_t dwSharedLocalMemorySize = 0; //! Size of SharedLocalMemory (SLM) + int32_t forcePreferredSLMZero = 0; //! force preferredSLM value as 0 int32_t iCrsThdConDataRdLn = 0; PMHW_STATE_HEAP pGeneralStateHeap = 0; //! General state heap in use MemoryBlock *memoryBlock = nullptr; //! Memory block associated with the state heap diff --git a/media_softlet/agnostic/common/shared/packet/media_render_cmd_packet.cpp b/media_softlet/agnostic/common/shared/packet/media_render_cmd_packet.cpp index adeefeb642..f1a0603a3e 100644 --- a/media_softlet/agnostic/common/shared/packet/media_render_cmd_packet.cpp +++ b/media_softlet/agnostic/common/shared/packet/media_render_cmd_packet.cpp @@ -919,6 +919,7 @@ MOS_STATUS RenderCmdPacket::PrepareComputeWalkerParams(KERNEL_WALKER_PARAMS para // Indirect Data Length is a multiple of 64 bytes (size of L3 cacheline). Bits [5:0] are zero. gpgpuWalker.IndirectDataLength = MOS_ALIGN_CEIL(params.iCurbeLength, 1 << MHW_COMPUTE_INDIRECT_SHIFT); gpgpuWalker.BindingTableID = params.iBindingTable; + gpgpuWalker.ForcePreferredSLMZero = params.forcePreferredSLMZero; return MOS_STATUS_SUCCESS; } diff --git a/media_softlet/agnostic/common/shared/packet/media_render_cmd_packet.h b/media_softlet/agnostic/common/shared/packet/media_render_cmd_packet.h index e8d6da703f..060ec64fed 100644 --- a/media_softlet/agnostic/common/shared/packet/media_render_cmd_packet.h +++ b/media_softlet/agnostic/common/shared/packet/media_render_cmd_packet.h @@ -84,6 +84,7 @@ typedef struct _KERNEL_WALKER_PARAMS bool bSyncFlag; bool isGroupStartInvolvedInGroupSize; // true if group start need be involved in the group size. bool calculateBlockXYByAlignedRect; // true if iBlocksX/iBlocksY is calculated by alignedRect in RenderCmdPacket instead of kernel object. + bool forcePreferredSLMZero; // true if preferredSLM need force to 0. }KERNEL_WALKER_PARAMS, * PKERNEL_WALKER_PARAMS; typedef struct _KERNEL_PACKET_RENDER_DATA