Skip to content

Commit

Permalink
MetalDevice: Avoid inline texture uploads when unnecessary
Browse files Browse the repository at this point in the history
  • Loading branch information
stenzek committed Aug 24, 2023
1 parent 1b9e72e commit 8a0033d
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 20 deletions.
13 changes: 10 additions & 3 deletions src/util/metal_device.h
Original file line number Diff line number Diff line change
Expand Up @@ -121,12 +121,19 @@ class MetalTexture final : public GPUTexture

void SetDebugName(const std::string_view& name) override;

// Call when the texture is bound to the pipeline, or read from in a copy.
ALWAYS_INLINE void SetUseFenceCounter(u64 counter) { m_use_fence_counter = counter; }

private:
MetalTexture(id<MTLTexture> texture, u16 width, u16 height, u8 layers, u8 levels, u8 samples, Type type,
Format format);

id<MTLTexture> m_texture;

// Contains the fence counter when the texture was last used.
// When this matches the current fence counter, the texture was used this command buffer.
u64 m_use_fence_counter = 0;

u16 m_map_x = 0;
u16 m_map_y = 0;
u16 m_map_width = 0;
Expand Down Expand Up @@ -179,9 +186,9 @@ class MetalDevice final : public GPUDevice
{
public:
ALWAYS_INLINE static MetalDevice& GetInstance() { return *static_cast<MetalDevice*>(g_gpu_device.get()); }
ALWAYS_INLINE static id<MTLDevice> GetMTLDevice() { return GetInstance().m_device; }
ALWAYS_INLINE static u64 GetCurrentFenceCounter() { return GetInstance().m_current_fence_counter; }
ALWAYS_INLINE static u64 GetCompletedFenceCounter() { return GetInstance().m_completed_fence_counter; }
ALWAYS_INLINE id<MTLDevice> GetMTLDevice() { return m_device; }
ALWAYS_INLINE u64 GetCurrentFenceCounter() { return m_current_fence_counter; }
ALWAYS_INLINE u64 GetCompletedFenceCounter() { return m_completed_fence_counter; }

MetalDevice();
~MetalDevice();
Expand Down
37 changes: 24 additions & 13 deletions src/util/metal_device.mm
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ static void RunOnMainThread(F&& f)
id<MTLDevice> device = nil;
if (!adapter.empty())
{
NSArray<id<MTLDevice>> *devices = [MTLCopyAllDevices() autorelease];
NSArray<id<MTLDevice>>* devices = [MTLCopyAllDevices() autorelease];
const u32 count = static_cast<u32>([devices count]);
for (u32 i = 0; i < count; i++)
{
Expand All @@ -140,11 +140,11 @@ static void RunOnMainThread(F&& f)
break;
}
}

if (device == nil)
Log_ErrorPrint(fmt::format("Failed to find device named '{}'. Trying default.", adapter).c_str());
}

if (device == nil)
{
device = [MTLCreateSystemDefaultDevice() autorelease];
Expand Down Expand Up @@ -416,14 +416,15 @@ static void RunOnMainThread(F&& f)
GPUDevice::AdapterAndModeList MetalDevice::StaticGetAdapterAndModeList()
{
AdapterAndModeList ret;
@autoreleasepool {
NSArray<id<MTLDevice>> *devices = [MTLCopyAllDevices() autorelease];
@autoreleasepool
{
NSArray<id<MTLDevice>>* devices = [MTLCopyAllDevices() autorelease];
const u32 count = static_cast<u32>([devices count]);
ret.adapter_names.reserve(count);
for (u32 i = 0; i < count; i++)
ret.adapter_names.emplace_back([devices[i].name UTF8String]);
}

return ret;
}

Expand Down Expand Up @@ -914,7 +915,7 @@ static void DumpShader(u32 n, const std::string_view& suffix, const std::string_
Panic("Failed to allocate temporary buffer.");
return false;
}

dev.DeferRelease(actual_buffer);
}
else
Expand All @@ -928,19 +929,19 @@ static void DumpShader(u32 n, const std::string_view& suffix, const std::string_
return false;
}
}

actual_offset = sb.GetCurrentOffset();
StringUtil::StrideMemCpy(sb.GetCurrentHostPointer(), aligned_pitch, data, pitch, width * GetPixelSize(), height);
sb.CommitMemory(req_size);
actual_buffer = sb.GetBuffer();
actual_pitch = aligned_pitch;
}

if (m_state == GPUTexture::State::Cleared && (x != 0 || y != 0 || width != m_width || height != m_height))
dev.CommitClear(this);

// TODO: track this
const bool is_inline = true;
const bool is_inline = (m_use_fence_counter == dev.GetCurrentFenceCounter());

id<MTLBlitCommandEncoder> encoder = dev.GetBlitEncoder(is_inline);
[encoder copyFromBuffer:actual_buffer
sourceOffset:actual_offset
Expand Down Expand Up @@ -1423,6 +1424,9 @@ static void DumpShader(u32 n, const std::string_view& suffix, const std::string_

CommitClear(S);

S->SetUseFenceCounter(m_current_fence_counter);
D->SetUseFenceCounter(m_current_fence_counter);

@autoreleasepool
{
id<MTLBlitCommandEncoder> encoder = GetBlitEncoder(true);
Expand Down Expand Up @@ -1743,7 +1747,8 @@ static void DumpShader(u32 n, const std::string_view& suffix, const std::string_
DebugAssert(slot < MAX_TEXTURE_SAMPLERS);

id<MTLTexture> T = texture ? static_cast<MetalTexture*>(texture)->GetMTLTexture() : nil;
id<MTLSamplerState> S = sampler ? static_cast<MetalSampler*>(sampler)->GetSamplerState() : nil;
if (texture)
static_cast<MetalTexture*>(texture)->SetUseFenceCounter(m_current_fence_counter);

if (m_current_textures[slot] != T)
{
Expand All @@ -1752,6 +1757,7 @@ static void DumpShader(u32 n, const std::string_view& suffix, const std::string_
[m_render_encoder setFragmentTexture:T atIndex:slot];
}

id<MTLSamplerState> S = sampler ? static_cast<MetalSampler*>(sampler)->GetSamplerState() : nil;
if (m_current_samplers[slot] != S)
{
m_current_samplers[slot] = S;
Expand Down Expand Up @@ -1829,7 +1835,8 @@ static void DumpShader(u32 n, const std::string_view& suffix, const std::string_
m_inline_upload_encoder = nil;
}

@autoreleasepool {
@autoreleasepool
{
MTLRenderPassDescriptor* desc;
if (!m_current_framebuffer)
{
Expand All @@ -1841,6 +1848,10 @@ static void DumpShader(u32 n, const std::string_view& suffix, const std::string_
else
{
desc = m_current_framebuffer->GetDescriptor();
if (MetalTexture* RT = static_cast<MetalTexture*>(m_current_framebuffer->GetRT()))
RT->SetUseFenceCounter(m_current_fence_counter);
if (MetalTexture* DS = static_cast<MetalTexture*>(m_current_framebuffer->GetDS()))
DS->SetUseFenceCounter(m_current_fence_counter);
}

m_render_encoder = [[m_render_cmdbuf renderCommandEncoderWithDescriptor:desc] retain];
Expand Down
9 changes: 5 additions & 4 deletions src/util/metal_stream_buffer.mm
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@
void MetalStreamBuffer::UpdateCurrentFencePosition()
{
// Has the offset changed since the last fence?
const u64 counter = MetalDevice::GetCurrentFenceCounter();
const u64 counter = MetalDevice::GetInstance().GetCurrentFenceCounter();
if (!m_tracked_fences.empty() && m_tracked_fences.back().first == counter)
{
// Still haven't executed a command buffer, so just update the offset.
Expand All @@ -155,7 +155,7 @@
auto start = m_tracked_fences.begin();
auto end = start;

const u64 completed_counter = MetalDevice::GetCompletedFenceCounter();
const u64 completed_counter = MetalDevice::GetInstance().GetCompletedFenceCounter();
while (end != m_tracked_fences.end() && completed_counter >= end->first)
{
m_current_gpu_position = end->second;
Expand Down Expand Up @@ -242,11 +242,12 @@

// Did any fences satisfy this condition?
// Has the command buffer been executed yet? If not, the caller should execute it.
if (iter == m_tracked_fences.end() || iter->first == MetalDevice::GetCurrentFenceCounter())
MetalDevice& dev = MetalDevice::GetInstance();
if (iter == m_tracked_fences.end() || iter->first == dev.GetCurrentFenceCounter())
return false;

// Wait until this fence is signaled. This will fire the callback, updating the GPU position.
MetalDevice::GetInstance().WaitForFenceCounter(iter->first);
dev.WaitForFenceCounter(iter->first);
m_tracked_fences.erase(m_tracked_fences.begin(), m_current_offset == iter->second ? m_tracked_fences.end() : ++iter);
m_current_offset = new_offset;
m_current_space = new_space;
Expand Down

0 comments on commit 8a0033d

Please sign in to comment.