diff --git a/rpcs3/Emu/RSX/Common/surface_cache_dma.hpp b/rpcs3/Emu/RSX/Common/surface_cache_dma.hpp new file mode 100644 index 000000000000..da6535e86b54 --- /dev/null +++ b/rpcs3/Emu/RSX/Common/surface_cache_dma.hpp @@ -0,0 +1,118 @@ +#pragma once + +#include +#include "Utilities/address_range.h" + +namespace rsx +{ + template + class surface_cache_dma + { + protected: + static inline u32 block_for(u32 address) + { + return address / BlockSize; + } + + static inline u32 block_address(u32 block_id) + { + return block_id * BlockSize; + } + + using buffer_object_storage_type = typename Traits::buffer_object_storage_type; + using buffer_object_type = typename Traits::buffer_object_type; + + struct memory_buffer_entry_t + { + u32 id; + buffer_object_storage_type bo; + u64 memory_tag = 0; + u32 base_address = 0; + + inline buffer_object_type get() { return Traits::get(bo); } + inline operator bool () const { return base_address != 0; } + + inline void release() { bo.release(); } + inline void acquire(buffer_object_type b) { bo = b; } + }; + + using buffer_block_array = typename std::array; + buffer_block_array m_buffer_list; + + public: + surface_cache_dma() + { + for (usz i = 0; i < m_buffer_list.size(); ++i) + { + m_buffer_list[i].id = i; + } + } + + surface_cache_dma& with_range(Traits::command_list_type cmd, const utils::address_range& range) + { + // Prepare underlying memory so that the range specified is provisioned and contiguous + // 1. Check if we have a pre-existing bo layer + const auto& this_entry = m_buffer_list[block_for(range.start)]; + if (this_entry) + { + const auto bo = this_entry.get(); + const auto buffer_range = utils::address_range::start_length(bo.base_address, ::size32(*bo)); + + if (range.inside(buffer_range)) + { + // All is well + return *this; + } + } + + // Data does not exist or is not contiguous. Merge the layer + std::vector bo_list; + const auto start_address = this_entry ? this_entry.base_address : block_address(this_entry.id); + + for (u32 address = start_address; address <= range.end;) + { + auto& bo_storage = m_buffer_list[block_for(address)]; + bo_storage.base_address = start_address; + + if (auto bo = bo_storage.get()) + { + bo_list.push_back(bo); + bo_storage.release(); + address += ::size32(*bo); + continue; + } + + bo_list.push_back(nullptr); + address += BlockSize; + } + + auto unified = Traits::merge_bo_list(cmd, bo_list); + ensure(unified); + + m_buffer_list[block_for(start_address)].acquire(unified); + return *this; + } + + utils::address_range to_block_range(const utils::address_range& range) + { + u32 start = block_address(block_for(range.start)); + u32 end = block_address(block_for(range.end + BlockSize - 1)); + return utils::address_range::start_end(start, end - 1); + } + + std::tuple get(u32 address) + { + const auto& block = m_buffer_list[block_for(address)]; + return { block.get(), block.base_address - address }; + } + + void touch(const utils::address_range& range) + { + const u64 stamp = rsx::get_shared_tag(); + for (usz i = block_for(range.start); i <= block_for(range.end); i++) + { + m_buffer_list[i].memory_tag = stamp; + } + } + }; +} diff --git a/rpcs3/Emu/RSX/Common/surface_cache_storage.hpp b/rpcs3/Emu/RSX/Common/surface_cache_storage.hpp deleted file mode 100644 index 9176709c462e..000000000000 --- a/rpcs3/Emu/RSX/Common/surface_cache_storage.hpp +++ /dev/null @@ -1,117 +0,0 @@ -#pragma once -#include "ranged_map.hpp" - -namespace rsx -{ - template - class surface_cache_data_map : public ranged_map - { -#ifdef _MSC_VER - using super = ranged_map; -#else - using super = class ranged_map; -#endif - using metadata_t = typename super::block_metadata_t; - - const metadata_t& find_head_block(u32 address) - { - auto& meta = super::m_metadata[address]; - if (meta.head_block != umax) - { - return find_head_block(meta.head_block * BlockSize); - } - - return meta; - } - - public: - using buffer_object_storage_type = typename Traits::buffer_object_storage_type; - using buffer_object_type = typename Traits::buffer_object_type; - - struct buffer_object_t - { - buffer_object_storage_type bo; - u64 memory_tag = 0; - - inline buffer_object_type get() - { - return Traits::get(bo); - } - - inline void release() - { - bo.release(); - } - - inline void acquire(buffer_object_type obj) - { - ensure(!get()); - bo = obj; - } - }; - - protected: - using buffer_block_array = typename std::array; - buffer_block_array m_buffer_list; - - public: - surface_cache_data_map() - : super::ranged_map() - {} - - surface_cache_data_map& with_range(const utils::address_range& range) - { - // Prepare underlying memory so that the range specified is provisioned and contiguous - const auto& head_block = find_head_block(range.start); - const auto start_address = block_address(head_block.id); - - const auto& current = m_buffer_list[head_block.id]; - if (auto bo = current.get()) - { - if (::size32(*bo) >= (range.end - start_address)) - { - return *this; - } - } - - // Data does not exist or is not contiguous. Merge the layer - std::vector bo_list; - for (u32 address = start_address; address <= range.end;) - { - auto& bo_storage = m_buffer_list[super::block_for(address)]; - if (auto bo = bo_storage.get()) - { - bo_list.push_back(bo); - bo_storage.release(); - address += ::size32(*bo); - continue; - } - - bo_list.push_back(nullptr); - address += BlockSize; - } - - auto unified = Traits::merge_bo_list(bo_list); - ensure(unified); - - current.acquire(unified); - return *this; - } - - void spill(const utils::address_range& range) - { - // Move VRAM to system RAM - const auto& meta = with_range(range).find_head_block(range.start); - auto& storage = m_buffer_list[meta.id]; - Traits::spill_buffer(storage.bo); - } - - void unspill(const utils::address_range& range) - { - // Move system RAM to VRAM - const auto& meta = with_range(range).find_head_block(range.start); - auto& storage = m_buffer_list[meta.id]; - Traits::unspill_buffer(storage.bo); - } - }; -} diff --git a/rpcs3/Emu/RSX/Common/surface_store.h b/rpcs3/Emu/RSX/Common/surface_store.h index 67d8d5cb9d00..43c4ed52ea4e 100644 --- a/rpcs3/Emu/RSX/Common/surface_store.h +++ b/rpcs3/Emu/RSX/Common/surface_store.h @@ -2,7 +2,8 @@ #include "surface_utils.h" #include "simple_array.hpp" -#include "surface_cache_storage.hpp" +#include "ranged_map.hpp" +#include "surface_cache_dma.hpp" #include "../gcm_enums.h" #include "../rsx_utils.h" #include @@ -45,7 +46,8 @@ namespace rsx using surface_type = typename Traits::surface_type; using command_list_type = typename Traits::command_list_type; using surface_overlap_info = surface_overlap_info_t; - using surface_ranged_map = surface_cache_data_map; + using surface_ranged_map = ranged_map; + using surface_cache_dma_map = surface_cache_dma; protected: surface_ranged_map m_render_targets_storage = {}; @@ -54,6 +56,8 @@ namespace rsx rsx::address_range m_render_targets_memory_range; rsx::address_range m_depth_stencil_memory_range; + surface_cache_dma_map m_dma_block; + bool m_invalidate_on_write = false; rsx::surface_raster_type m_active_raster_type = rsx::surface_raster_type::linear; @@ -856,6 +860,94 @@ namespace rsx std::forward(extra_params)...); } + std::tuple, std::vector> + find_overlapping_set(const utils::address_range& range) const + { + std::vector color_result, depth_result; + utils::address_range result_range; + + if (m_render_targets_memory_range.valid() && + range.overlaps(m_render_targets_memory_range)) + { + for (auto it = m_render_targets_storage.begin_range(range); it != m_render_targets_storage.end(); ++it) + { + auto surface = Traits::get(it->second); + const auto surface_range = surface->get_memory_range(); + if (!range.overlaps(surface_range)) + continue; + + color_result.push_back(surface); + } + } + + if (m_depth_stencil_memory_range.valid() && + range.overlaps(m_depth_stencil_memory_range)) + { + for (auto it = m_depth_stencil_storage.begin_range(range); it != m_depth_stencil_storage.end(); ++it) + { + auto surface = Traits::get(it->second); + const auto surface_range = surface->get_memory_range(); + if (!range.overlaps(surface_range)) + continue; + + depth_result.push_back(surface); + } + } + + return { color_result, depth_result, result_range }; + } + + void write_to_dma_buffers( + command_list_type command_list, + const utils::address_range& range) + { + auto block_range = m_dma_block.to_block_range(range); + auto [color_data, depth_stencil_data] = find_overlapping_set(block_range); + auto [bo, offset, bo_timestamp] = m_dma_block + .with_range(command_list, block_range) + .get(block_range.start); + + u64 src_offset, dst_offset, write_length; + auto block_length = block_range.length(); + + auto all_data = std::move(color_data); + all_data.insert(all_data.end(), depth_stencil_data.begin(), depth_stencil_data.end()); + + if (all_data.size() > 1) + { + std::sort(all_data.begin(), all_data.end(), [](const auto& a, const auto& b) + { + return a->last_use_tag < b->last_use_tag; + }); + } + + for (const auto& surface : all_data) + { + if (surface->last_use_tag <= bo_timestamp) + { + continue; + } + + const auto this_range = surface->get_memory_range(); + const auto max_length = this_range.length(); + if (this_range.start < block_range.start) + { + src_offset = block_range.start - this_range.start; + dst_offset = 0; + } + else + { + src_offset = 0; + dst_offset = this_range.start - block_range.start; + } + + write_length = std::min(max_length, block_length - dst_offset); + Traits::write_render_target_to_memory(command_list, bo, surface, dst_offset, src_offset, write_length); + } + + m_dma_block.touch(block_range); + } + public: /** * Update bound color and depth surface. diff --git a/rpcs3/Emu/RSX/GL/GLRenderTargets.h b/rpcs3/Emu/RSX/GL/GLRenderTargets.h index 7cf3a3ef29d1..81d3074386b3 100644 --- a/rpcs3/Emu/RSX/GL/GLRenderTargets.h +++ b/rpcs3/Emu/RSX/GL/GLRenderTargets.h @@ -359,7 +359,18 @@ struct gl_render_target_traits } static - gl::buffer* merge_bo_list(const std::vector& /*list*/) + void write_render_target_to_memory( + gl::command_context&, + gl::buffer*, + gl::render_target*, + u64, u64, u64) + { + // TODO + } + + template + static + gl::buffer* merge_bo_list(gl::command_context&, const std::vector& /*list*/) { // TODO return nullptr; diff --git a/rpcs3/Emu/RSX/VK/VKRenderTargets.cpp b/rpcs3/Emu/RSX/VK/VKRenderTargets.cpp index 7344f9a7c4ae..73465c9e43c2 100644 --- a/rpcs3/Emu/RSX/VK/VKRenderTargets.cpp +++ b/rpcs3/Emu/RSX/VK/VKRenderTargets.cpp @@ -3,6 +3,15 @@ namespace vk { + namespace surface_cache_utils + { + void dispose(vk::buffer* buf) + { + auto obj = vk::disposable_t::make(buf); + vk::get_resource_manager()->dispose(obj); + } + } + void surface_cache::destroy() { invalidate_all(); diff --git a/rpcs3/Emu/RSX/VK/VKRenderTargets.h b/rpcs3/Emu/RSX/VK/VKRenderTargets.h index 7d08eea2538e..a867587e70bf 100644 --- a/rpcs3/Emu/RSX/VK/VKRenderTargets.h +++ b/rpcs3/Emu/RSX/VK/VKRenderTargets.h @@ -16,6 +16,11 @@ namespace vk { + namespace surface_cache_utils + { + void dispose(vk::buffer* buf); + } + void resolve_image(vk::command_buffer& cmd, vk::viewable_image* dst, vk::viewable_image* src); void unresolve_image(vk::command_buffer& cmd, vk::viewable_image* dst, vk::viewable_image* src); @@ -463,10 +468,106 @@ namespace vk // TODO } - static vk::buffer* merge_bo_list(const std::vector& /*list*/) + static void write_render_target_to_memory( + vk::command_buffer& cmd, + vk::buffer* bo, + vk::render_target* surface, + u64 dst_offset_in_buffer, + u64 src_offset_in_buffer, + u64 max_copy_length) { - // TODO - return nullptr; + surface->read_barrier(cmd); + vk::image* source = surface->get_surface(rsx::surface_access::transfer_read); + const bool is_scaled = surface->width() != surface->surface_width; + if (is_scaled) + { + const areai src_rect = { 0, 0, source->width(), source->height() }; + const areai dst_rect = { 0, 0, surface->get_surface_width(), surface->get_surface_height() }; + + auto scratch = vk::get_typeless_helper(source->format(), source->format_class(), dst_rect.x2, dst_rect.y2); + vk::copy_scaled_image(cmd, source, scratch, src_rect, dst_rect, 1, true, VK_FILTER_NEAREST); + + source = scratch; + } + + auto dest = bo; + const auto transfer_size = surface->get_memory_range().length(); + if (transfer_size > max_copy_length || src_offset_in_buffer || surface->is_depth_surface()) + { + auto scratch = vk::get_scratch_buffer(cmd, transfer_size * 4); + dest = scratch; + } + + VkBufferImageCopy region = + { + .bufferOffset = (dest == bo) ? dst_offset_in_buffer : 0, + .bufferRowLength = surface->rsx_pitch / surface->get_bpp(), + .bufferImageHeight = 0, + .imageSubresource = { source->aspect(), 0, 0, 1 }, + .imageOffset = {}, + .imageExtent = { + .width = source->width(), + .height = source->height(), + .depth = 1 + } + }; + + vk::copy_image_to_buffer(cmd, source, dest, region); + vk::insert_buffer_memory_barrier(cmd, + dest->value, src_offset_in_buffer, max_copy_length, + VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT); + + if (dest != bo) + { + VkBufferCopy copy = { src_offset_in_buffer, dst_offset_in_buffer, max_copy_length }; + vkCmdCopyBuffer(cmd, dest->value, bo->value, 1, ©); + + vk::insert_buffer_memory_barrier(cmd, + bo->value, dst_offset_in_buffer, max_copy_length, + VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT); + } + } + + template + static vk::buffer* merge_bo_list(vk::command_buffer& cmd, std::vector& list) + { + u32 required_bo_size = 0; + for (auto& bo : list) + { + required_bo_size += (bo ? bo->size() : BlockSize); + } + + // Create dst + auto pdev = cmd.get_command_pool().owner; + auto dst = new vk::buffer(*pdev, + required_bo_size, + pdev->get_memory_mapping().device_local, 0, + VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT, + 0, VMM_ALLOCATION_POOL_SURFACE_CACHE); + + // TODO: Initialize the buffer with system RAM contents + + // Copy all the data over from the sub-blocks + u32 offset = 0; + for (auto& bo : list) + { + if (!bo) + { + offset += BlockSize; + continue; + } + + VkBufferCopy copy = { 0, offset, ::size32(*bo) }; + offset += ::size32(*bo); + vkCmdCopyBuffer(cmd, bo->value, dst->value, 1, ©); + + // Cleanup + vk::surface_cache_utils::dispose(bo); + } + + return dst; } template diff --git a/rpcs3/emucore.vcxproj b/rpcs3/emucore.vcxproj index e59953eaa089..f76e66184f03 100644 --- a/rpcs3/emucore.vcxproj +++ b/rpcs3/emucore.vcxproj @@ -516,7 +516,7 @@ - + diff --git a/rpcs3/emucore.vcxproj.filters b/rpcs3/emucore.vcxproj.filters index 25d16be05ff1..85dabd03c98e 100644 --- a/rpcs3/emucore.vcxproj.filters +++ b/rpcs3/emucore.vcxproj.filters @@ -2143,7 +2143,7 @@ Emu\GPU\RSX\Common - + Emu\GPU\RSX\Common