Skip to content

Commit

Permalink
new PartBB test
Browse files Browse the repository at this point in the history
  • Loading branch information
diegolix29 committed Jan 3, 2025
1 parent 880262a commit fd707ce
Show file tree
Hide file tree
Showing 4 changed files with 98 additions and 165 deletions.
222 changes: 70 additions & 152 deletions src/video_core/buffer_cache/buffer_cache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,28 +35,19 @@ BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& s
// Ensure the first slot is used for the null buffer
const auto null_id =
slot_buffers.insert(instance, scheduler, MemoryUsage::DeviceLocal, 0, ReadFlags, 1);
slot_buffer_mutex_map[null_id] = std::make_unique<std::mutex>();
ASSERT(null_id.index == 0);
const vk::Buffer& null_buffer = slot_buffers[null_id].buffer;
Vulkan::SetObjectName(instance.GetDevice(), null_buffer, "Null Buffer");

const vk::BufferViewCreateInfo null_view_ci{
.buffer = null_buffer, // Buffer handle
.format = vk::Format::eR8Unorm, // Format of the buffer view
.offset = 0, // Start offset in the buffer
.range = VK_WHOLE_SIZE // Size of the view (entire buffer)
const vk::BufferViewCreateInfo null_view_ci = {
.buffer = null_buffer,
.format = vk::Format::eR8Unorm,
.offset = 0,
.range = VK_WHOLE_SIZE,
};

const auto [null_view_result, null_view] = instance.GetDevice().createBufferView(null_view_ci);

// Check if the buffer view creation succeeded
if (null_view_result != vk::Result::eSuccess) {
throw std::runtime_error("Failed to create null buffer view.");
}

ASSERT_MSG(null_view_result == vk::Result::eSuccess, "Failed to create null buffer view.");
null_buffer_view = null_view;

// Set an object name for debugging purposes
Vulkan::SetObjectName(instance.GetDevice(), null_buffer_view, "Null Buffer View");
}

Expand Down Expand Up @@ -113,13 +104,15 @@ bool BufferCache::BindVertexBuffers(
const Shader::Info& vs_info, const std::optional<Shader::Gcn::FetchShaderData>& fetch_shader) {
boost::container::small_vector<vk::VertexInputAttributeDescription2EXT, 16> attributes;
boost::container::small_vector<vk::VertexInputBindingDescription2EXT, 16> bindings;

// Ensure proper cleanup of dynamic state
SCOPE_EXIT {
if (instance.IsVertexInputDynamicState()) {
const auto cmdbuf = scheduler.CommandBuffer();
cmdbuf.setVertexInputEXT(bindings, attributes);
} else if (bindings.empty()) {
// Required to call bindVertexBuffers2EXT at least once in the current command buffer
// with non-null strides without a non-dynamic stride pipeline in between. Thus even
// when nothing is bound we still need to make a dummy call. Non-null strides in turn
// requires a count greater than 0.
const auto cmdbuf = scheduler.CommandBuffer();
const std::array null_buffers = {GetBuffer(NULL_BUFFER_ID).buffer.buffer};
constexpr std::array null_offsets = {static_cast<vk::DeviceSize>(0)};
Expand All @@ -128,7 +121,7 @@ bool BufferCache::BindVertexBuffers(
};

if (!fetch_shader || fetch_shader->attributes.empty()) {
return false; // No attributes to bind
return false;
}

std::array<vk::Buffer, NumVertexBuffers> host_buffers;
Expand Down Expand Up @@ -257,30 +250,29 @@ void BufferCache::InlineData(VAddr address, const void* value, u32 num_bytes, bo
memcpy(std::bit_cast<void*>(address), value, num_bytes);
return;
}

scheduler.EndRendering();
const auto cmdbuf = scheduler.CommandBuffer();
const Buffer* buffer = [&] {
if (is_gds) {
return &gds_buffer;
}
const BufferId buffer_id = FindOrCreateBuffer(address, num_bytes);
const BufferId buffer_id = FindBuffer(address, num_bytes);
return &slot_buffers[buffer_id];
}();
const vk::BufferMemoryBarrier2 pre_barrier = {
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
.srcAccessMask = vk::AccessFlagBits2::eMemoryRead,
.dstStageMask = vk::PipelineStageFlagBits2::eTransfer,
.dstAccessMask = vk::AccessFlagBits2::eTransferWrite,
.srcAccessMask = vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite,
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
.dstAccessMask = vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite,
.buffer = buffer->Handle(),
.offset = buffer->Offset(address),
.size = num_bytes,
};
const vk::BufferMemoryBarrier2 post_barrier = {
.srcStageMask = vk::PipelineStageFlagBits2::eTransfer,
.srcAccessMask = vk::AccessFlagBits2::eTransferWrite,
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
.srcAccessMask = vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite,
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
.dstAccessMask = vk::AccessFlagBits2::eTransferWrite,
.dstAccessMask = vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite,
.buffer = buffer->Handle(),
.offset = buffer->Offset(address),
.size = num_bytes,
Expand All @@ -291,78 +283,66 @@ void BufferCache::InlineData(VAddr address, const void* value, u32 num_bytes, bo
.pBufferMemoryBarriers = &pre_barrier,
});
cmdbuf.updateBuffer(buffer->Handle(), buffer->Offset(address), num_bytes, value);
const vk::BufferMemoryBarrier2 buf_barrier_after = {
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
.srcAccessMask = vk::AccessFlagBits2::eTransferWrite,
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
.dstAccessMask = vk::AccessFlagBits2::eMemoryRead,
.buffer = buffer->Handle(),
.offset = buffer->Offset(address),
.size = num_bytes,
};
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
.bufferMemoryBarrierCount = 1,
.pBufferMemoryBarriers = &buf_barrier_after,
.pBufferMemoryBarriers = &post_barrier,
});
}

void BufferCache::CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool is_dst_gds,
bool is_src_gds) {
// Check if the destination region is valid or registered.
if (!is_dst_gds && !IsRegionRegistered(dst, num_bytes)) {
if (is_src_gds || IsRegionRegistered(src, num_bytes)) {
LOG_CRITICAL(
Render_Vulkan,
"Readback or operations on unregistered destination regions are unsupported: "
"dst={}, src={}, num_bytes={}",
dst, src, num_bytes);
LOG_CRITICAL(Render_Vulkan, "Readback is not implemented for unregistered regions");
return;
}
// Perform direct memory copy for unregistered regions.
memcpy(reinterpret_cast<void*>(dst), reinterpret_cast<void*>(src), num_bytes);
return;
}

// Check if the source region is valid or registered.
if (!is_dst_gds && !IsRegionRegistered(dst, num_bytes)) {
if (!is_src_gds && !IsRegionRegistered(src, num_bytes)) {
// Direct copy for completely unregistered regions
memcpy(reinterpret_cast<void*>(dst), reinterpret_cast<void*>(src), num_bytes);
return;
}
if (!is_src_gds && !IsRegionRegistered(src, num_bytes)) {
// Inline data for unregistered source regions.
InlineData(dst, reinterpret_cast<void*>(src), num_bytes, is_dst_gds);
return;
}

// Retrieve source and destination buffers.
auto get_buffer = [&](VAddr addr, bool is_gds) -> const Buffer& {
return is_gds ? gds_buffer : slot_buffers[FindOrCreateBuffer(addr, num_bytes)];
};
auto& src_buffer = get_buffer(src, is_src_gds);
auto& dst_buffer = get_buffer(dst, is_dst_gds);

auto& src_buffer = [&]() -> const Buffer& {
if (is_src_gds) {
return gds_buffer; // Use the GDS buffer for source.
}
const BufferId buffer_id = FindBuffer(src, num_bytes);
return slot_buffers[buffer_id];
}();
auto& dst_buffer = [&]() -> const Buffer& {
if (is_dst_gds) {
return gds_buffer; // Use the GDS buffer for destination.
}
const BufferId buffer_id = FindBuffer(dst, num_bytes);
return slot_buffers[buffer_id];
}();
// Define Vulkan buffer copy region.
vk::BufferCopy region{
.srcOffset = src_buffer.Offset(src),
.dstOffset = dst_buffer.Offset(dst),
.size = num_bytes,
};
const vk::BufferMemoryBarrier2 buf_barriers_before[2] = {
const vk::BufferMemoryBarrier2 buf_barriers[2] = {
{
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
.srcAccessMask = vk::AccessFlagBits2::eMemoryRead,
.srcAccessMask = vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite,
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
.dstAccessMask = vk::AccessFlagBits2::eTransferWrite,
.dstAccessMask = vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite,
.buffer = dst_buffer.Handle(),
.offset = dst_buffer.Offset(dst),
.size = num_bytes,
},
{
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
.srcAccessMask = vk::AccessFlagBits2::eMemoryWrite,
.srcAccessMask = vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite,
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
.dstAccessMask = vk::AccessFlagBits2::eTransferRead,
.dstAccessMask = vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite,
.buffer = src_buffer.Handle(),
.offset = src_buffer.Offset(src),
.size = num_bytes,
Expand All @@ -373,40 +353,17 @@ void BufferCache::CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool is_dst_gd
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
.bufferMemoryBarrierCount = 2,
.pBufferMemoryBarriers = buf_barriers_before,
.pBufferMemoryBarriers = buf_barriers,
});
cmdbuf.copyBuffer(src_buffer.Handle(), dst_buffer.Handle(), region);

cmdbuf.copyBuffer(src_buffer.Handle(), dst_buffer.Handle(), region);

const vk::BufferMemoryBarrier2 buf_barriers_after[2] = {
{
.srcStageMask = vk::PipelineStageFlagBits2::eTransfer,
.srcAccessMask = vk::AccessFlagBits2::eTransferWrite,
.dstStageMask = vk::PipelineStageFlagBits2::eComputeShader,
.dstAccessMask = vk::AccessFlagBits2::eShaderRead,
.buffer = dst_buffer.Handle(),
.offset = dst_buffer.Offset(dst),
.size = num_bytes,
},
{
.srcStageMask = vk::PipelineStageFlagBits2::eTransfer,
.srcAccessMask = vk::AccessFlagBits2::eTransferRead,
.dstStageMask = vk::PipelineStageFlagBits2::eTransfer,
.dstAccessMask = vk::AccessFlagBits2::eMemoryWrite,
.buffer = src_buffer.Handle(),
.offset = src_buffer.Offset(src),
.size = num_bytes,
},
};
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
.bufferMemoryBarrierCount = 2,
.pBufferMemoryBarriers = buf_barriers_after,
.pBufferMemoryBarriers = buf_barriers,
});
}

std::pair<Buffer*, u32> BufferCache::ObtainHostUBO(std::span<const u32> data) {
std::pair<Buffer*, u32> BufferCache::ObtainHostUBO(std::span<const u32> data) {
static constexpr u64 StreamThreshold = CACHING_PAGESIZE;
ASSERT(data.size_bytes() <= StreamThreshold);
const u64 offset = stream_buffer.Copy(reinterpret_cast<VAddr>(data.data()), data.size_bytes(),
Expand All @@ -425,30 +382,26 @@ std::pair<Buffer*, u32> BufferCache::ObtainBuffer(VAddr device_addr, u32 size, b
return {&stream_buffer, offset};
}

Buffer* buffer = nullptr;
{
if (!buffer_id || slot_buffers[buffer_id].is_deleted) {
buffer_id = FindOrCreateBuffer(device_addr, size);
}
if (is_written) {
memory_tracker.MarkRegionAsGpuModified(device_addr, size);
gpu_modified_ranges.Add(device_addr, size);
}
Buffer& buffer = slot_buffers[buffer_id];
std::mutex& buffer_mutex = *slot_buffer_mutex_map[buffer_id];
SynchronizeBuffer(buffer, buffer_mutex, device_addr, size, is_texel_buffer);
return {&buffer, buffer.Offset(device_addr)};
if (!buffer_id || slot_buffers[buffer_id].is_deleted) {
buffer_id = FindBuffer(device_addr, size);
}
Buffer& buffer = slot_buffers[buffer_id];
SynchronizeBuffer(buffer, device_addr, size, is_texel_buffer);
if (is_written) {
memory_tracker.MarkRegionAsGpuModified(device_addr, size);
gpu_modified_ranges.Add(device_addr, size);
}
return {&buffer, buffer.Offset(device_addr)};
}

std::pair<Buffer*, u32> BufferCache::ObtainViewBuffer(VAddr gpu_addr, u32 size, bool prefer_gpu) {
// Check if any buffer contains the full requested range.
const u64 page = gpu_addr >> CACHING_PAGEBITS;
const BufferId buffer_id = page_table[page];
if (buffer_id) {
Buffer& buffer = slot_buffers[buffer_id];
std::mutex& buffer_mutex = *slot_buffer_mutex_map[buffer_id];
if (buffer.IsInBounds(gpu_addr, size)) {
SynchronizeBuffer(buffer, buffer_mutex, gpu_addr, size, false);
SynchronizeBuffer(buffer, gpu_addr, size, false);
return {&buffer, buffer.Offset(gpu_addr)};
}
}
Expand Down Expand Up @@ -492,7 +445,7 @@ bool BufferCache::IsRegionGpuModified(VAddr addr, size_t size) {
return memory_tracker.IsRegionGpuModified(addr, size);
}

BufferId BufferCache::FindOrCreateBuffer(VAddr device_addr, u32 size) {
BufferId BufferCache::FindBuffer(VAddr device_addr, u32 size) {
if (device_addr == 0) {
return NULL_BUFFER_ID;
}
Expand All @@ -508,27 +461,6 @@ BufferId BufferCache::FindOrCreateBuffer(VAddr device_addr, u32 size) {
return CreateBuffer(device_addr, size);
}

bool BufferCache::TryFindBuffer(VAddr device_addr, u32 size, BufferId& value) {
bool found = false;
value = NULL_BUFFER_ID;

if (device_addr != 0) {
const u64 page = device_addr >> CACHING_PAGEBITS;
auto* buffer_id = page_table.find(page);

if (buffer_id && *buffer_id) {
const Buffer& buffer = slot_buffers[*buffer_id];

if (buffer.IsInBounds(device_addr, size)) {
value = *buffer_id;
found = true;
}
}
}

return found;
}

BufferCache::OverlapResult BufferCache::ResolveOverlaps(VAddr device_addr, u32 wanted_size) {
static constexpr int STREAM_LEAP_THRESHOLD = 16;
boost::container::small_vector<BufferId, 16> overlap_ids;
Expand Down Expand Up @@ -735,38 +667,35 @@ void BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size,
return;
}
vk::Buffer src_buffer = staging_buffer.Handle();

// Use staging buffer for synchronization
constexpr u64 ChunkSize = 256 * 1024; // 256 KB chunks
if (total_size_bytes < StagingBufferSize) {
const auto [staging, offset] = staging_buffer.Map(total_size_bytes);
for (auto& copy : copies) {
std::memcpy(staging + copy.srcOffset,
std::bit_cast<const u8*>(buffer.CpuAddr() + copy.dstOffset), copy.size);
u8* const src_pointer = staging + copy.srcOffset;
const VAddr device_addr = buffer.CpuAddr() + copy.dstOffset;
std::memcpy(src_pointer, std::bit_cast<const u8*>(device_addr), copy.size);
// Apply the staging offset
copy.srcOffset += offset;
}
staging_buffer.Commit();
} else {
// Use temporary buffer for large transfers
// For large one time transfers use a temporary host buffer.
// RenderDoc can lag quite a bit if the stream buffer is too large.
Buffer temp_buffer{instance,
scheduler,
MemoryUsage::Upload,
0,
vk::BufferUsageFlagBits::eTransferSrc,
total_size_bytes};
src_buffer = temp_buffer.Handle();
auto* staging = temp_buffer.mapped_data.data();

// Process large data in chunks
for (size_t i = 0; i < total_size_bytes; i += ChunkSize) {
const u64 chunk_size = std::min(ChunkSize, total_size_bytes - i);
std::memcpy(staging + i, std::bit_cast<const u8*>(buffer.CpuAddr() + i), chunk_size);
u8* const staging = temp_buffer.mapped_data.data();
for (auto& copy : copies) {
u8* const src_pointer = staging + copy.srcOffset;
const VAddr device_addr = buffer.CpuAddr() + copy.dstOffset;
std::memcpy(src_pointer, std::bit_cast<const u8*>(device_addr), copy.size);
}
scheduler.DeferOperation([temp_buffer = std::move(temp_buffer)] {});
scheduler.DeferOperation([buffer = std::move(temp_buffer)]() mutable {});
}
scheduler.EndRendering();

// Record copy commands
const auto cmdbuf = scheduler.CommandBuffer();
const vk::BufferMemoryBarrier2 pre_barrier = {
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
Expand Down Expand Up @@ -889,19 +818,8 @@ bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr,
void BufferCache::DeleteBuffer(BufferId buffer_id) {
Buffer& buffer = slot_buffers[buffer_id];
Unregister(buffer_id);
scheduler.DeferOperation([this, buffer_id] {
auto mutex_iter = slot_buffer_mutex_map.find(buffer_id);
bool found_mutex = mutex_iter != slot_buffer_mutex_map.end();
{
auto& buffer_mutex = found_mutex ? *mutex_iter->second : mutex;
std::scoped_lock lk{buffer_mutex};
slot_buffers.erase(buffer_id);
}
if (found_mutex) {
slot_buffer_mutex_map.erase(mutex_iter);
}
});
scheduler.DeferOperation([this, buffer_id] { slot_buffers.erase(buffer_id); });
buffer.is_deleted = true;
}

} // namespace VideoCore
} // namespace VideoCore
Loading

0 comments on commit fd707ce

Please sign in to comment.