From 571106ac335f7b392c18fab10f84334ce17d5006 Mon Sep 17 00:00:00 2001 From: Dzmitry Malyshau Date: Sun, 22 Sep 2024 22:23:27 -0700 Subject: [PATCH 1/9] Add a label argument to every pass creation --- blade-egui/src/lib.rs | 2 +- blade-graphics/README.md | 2 +- blade-graphics/src/gles/command.rs | 7 ++++--- blade-graphics/src/metal/command.rs | 21 +++++++++++++++------ blade-graphics/src/vulkan/command.rs | 23 +++++++++++++++-------- blade-render/src/model/mod.rs | 4 ++-- blade-render/src/render/debug.rs | 2 +- blade-render/src/render/dummy.rs | 2 +- blade-render/src/render/env_map.rs | 2 +- blade-render/src/render/mod.rs | 14 +++++++------- blade-render/src/texture/mod.rs | 2 +- docs/CHANGELOG.md | 1 + examples/bunnymark/main.rs | 21 ++++++++++++--------- examples/init/main.rs | 19 +++++++++++-------- examples/mini/main.rs | 6 +++--- examples/particle/main.rs | 19 +++++++++++-------- examples/particle/particle.rs | 6 +++--- examples/ray-query/main.rs | 25 ++++++++++++++----------- examples/scene/main.rs | 19 +++++++++++-------- src/lib.rs | 19 +++++++++++-------- 20 files changed, 126 insertions(+), 90 deletions(-) diff --git a/blade-egui/src/lib.rs b/blade-egui/src/lib.rs index 35c7c4f9..533a269b 100644 --- a/blade-egui/src/lib.rs +++ b/blade-egui/src/lib.rs @@ -265,7 +265,7 @@ impl GuiPainter { copies.push((src, dst, extent)); } - if let mut transfer = command_encoder.transfer() { + if let mut transfer = command_encoder.transfer("update egui textures") { for (src, dst, extent) in copies { transfer.copy_buffer_to_texture(src, 4 * extent.width, dst, extent); } diff --git a/blade-graphics/README.md b/blade-graphics/README.md index 41376b20..6a1c549a 100644 --- a/blade-graphics/README.md +++ b/blade-graphics/README.md @@ -42,7 +42,7 @@ All of these required extensions are supported in software by the driver on any GLES is also supported at a basic level. It's enabled for `wasm32-unknown-unknown` target, and can also be force-enabled on native: ```bash -RUSTFLAGS="--cfg gles" CARGO_TARGET_DIR=./target-gl cargo test +RUSTFLAGS="--cfg gles" CARGO_TARGET_DIR=./target-gl cargo run --example bunnymark ``` This path can be activated on all platforms via Angle library. diff --git a/blade-graphics/src/gles/command.rs b/blade-graphics/src/gles/command.rs index 0e2d6470..b17b65a9 100644 --- a/blade-graphics/src/gles/command.rs +++ b/blade-graphics/src/gles/command.rs @@ -91,7 +91,7 @@ impl super::CommandEncoder { self.has_present = true; } - pub fn transfer(&mut self) -> super::PassEncoder<()> { + pub fn transfer(&mut self, _label: &str) -> super::PassEncoder<()> { super::PassEncoder { commands: &mut self.commands, plain_data: &mut self.plain_data, @@ -102,11 +102,11 @@ impl super::CommandEncoder { } } - pub fn acceleration_structure(&mut self) -> super::PassEncoder<()> { + pub fn acceleration_structure(&mut self, _label: &str) -> super::PassEncoder<()> { unimplemented!() } - pub fn compute(&mut self) -> super::PassEncoder { + pub fn compute(&mut self, _label: &str) -> super::PassEncoder { super::PassEncoder { commands: &mut self.commands, plain_data: &mut self.plain_data, @@ -119,6 +119,7 @@ impl super::CommandEncoder { pub fn render( &mut self, + _label: &str, targets: crate::RenderTargetSet, ) -> super::PassEncoder { let mut target_size = [0u16; 2]; diff --git a/blade-graphics/src/metal/command.rs b/blade-graphics/src/metal/command.rs index c9fa1cf7..3c048fd7 100644 --- a/blade-graphics/src/metal/command.rs +++ b/blade-graphics/src/metal/command.rs @@ -106,12 +106,13 @@ impl super::CommandEncoder { self.raw.as_mut().unwrap().present_drawable(&frame.drawable); } - pub fn transfer(&mut self) -> super::TransferCommandEncoder { + pub fn transfer(&mut self, _label: &str) -> super::TransferCommandEncoder { let raw = objc::rc::autoreleasepool(|| { + let descriptor = metal::BlitPassDescriptor::new(); self.raw .as_mut() .unwrap() - .new_blit_command_encoder() + .blit_command_encoder_with_descriptor(&descriptor) .to_owned() }); super::TransferCommandEncoder { @@ -120,7 +121,10 @@ impl super::CommandEncoder { } } - pub fn acceleration_structure(&mut self) -> super::AccelerationStructureCommandEncoder { + pub fn acceleration_structure( + &mut self, + _label: &str, + ) -> super::AccelerationStructureCommandEncoder { let raw = objc::rc::autoreleasepool(|| { self.raw .as_mut() @@ -134,12 +138,13 @@ impl super::CommandEncoder { } } - pub fn compute(&mut self) -> super::ComputeCommandEncoder { + pub fn compute(&mut self, _label: &str) -> super::ComputeCommandEncoder { let raw = objc::rc::autoreleasepool(|| { + let descriptor = metal::ComputePassDescriptor::new(); self.raw .as_mut() .unwrap() - .new_compute_command_encoder() + .compute_command_encoder_with_descriptor(&descriptor) .to_owned() }); super::ComputeCommandEncoder { @@ -148,7 +153,11 @@ impl super::CommandEncoder { } } - pub fn render(&mut self, targets: crate::RenderTargetSet) -> super::RenderCommandEncoder { + pub fn render( + &mut self, + _label: &str, + targets: crate::RenderTargetSet, + ) -> super::RenderCommandEncoder { let raw = objc::rc::autoreleasepool(|| { let descriptor = metal::RenderPassDescriptor::new(); diff --git a/blade-graphics/src/vulkan/command.rs b/blade-graphics/src/vulkan/command.rs index 1206d9b2..9a1e95d5 100644 --- a/blade-graphics/src/vulkan/command.rs +++ b/blade-graphics/src/vulkan/command.rs @@ -330,27 +330,30 @@ impl super::CommandEncoder { } } - pub fn transfer(&mut self) -> super::TransferCommandEncoder { + pub fn transfer(&mut self, label: &str) -> super::TransferCommandEncoder { self.barrier(); - self.mark("pass/transfer"); + self.mark(label); super::TransferCommandEncoder { raw: self.buffers[0].raw, device: &self.device, } } - pub fn acceleration_structure(&mut self) -> super::AccelerationStructureCommandEncoder { + pub fn acceleration_structure( + &mut self, + label: &str, + ) -> super::AccelerationStructureCommandEncoder { self.barrier(); - self.mark("pass/acc-struct"); + self.mark(label); super::AccelerationStructureCommandEncoder { raw: self.buffers[0].raw, device: &self.device, } } - pub fn compute(&mut self) -> super::ComputeCommandEncoder { + pub fn compute(&mut self, label: &str) -> super::ComputeCommandEncoder { self.barrier(); - self.mark("pass/compute"); + self.mark(label); super::ComputeCommandEncoder { cmd_buf: self.buffers.first_mut().unwrap(), device: &self.device, @@ -358,9 +361,13 @@ impl super::CommandEncoder { } } - pub fn render(&mut self, targets: crate::RenderTargetSet) -> super::RenderCommandEncoder { + pub fn render( + &mut self, + label: &str, + targets: crate::RenderTargetSet, + ) -> super::RenderCommandEncoder { self.barrier(); - self.mark("pass/render"); + self.mark(label); let mut target_size = [0u16; 2]; let mut color_attachments = Vec::with_capacity(targets.colors.len()); diff --git a/blade-render/src/model/mod.rs b/blade-render/src/model/mod.rs index 4f513860..7c9c0416 100644 --- a/blade-render/src/model/mod.rs +++ b/blade-render/src/model/mod.rs @@ -381,7 +381,7 @@ impl Baker { ) { let mut pending_ops = self.pending_operations.lock().unwrap(); if !pending_ops.transfers.is_empty() { - let mut pass = encoder.transfer(); + let mut pass = encoder.transfer("init models"); for transfer in pending_ops.transfers.drain(..) { pass.copy_buffer_to_buffer( transfer.stage.into(), @@ -392,7 +392,7 @@ impl Baker { } } if !pending_ops.blas_constructs.is_empty() { - let mut pass = encoder.acceleration_structure(); + let mut pass = encoder.acceleration_structure("BLAS"); for construct in pending_ops.blas_constructs.drain(..) { pass.build_bottom_level(construct.dst, &construct.meshes, construct.scratch.into()); temp_buffers.push(construct.scratch); diff --git a/blade-render/src/render/debug.rs b/blade-render/src/render/debug.rs index 290e9f40..06da0806 100644 --- a/blade-render/src/render/debug.rs +++ b/blade-render/src/render/debug.rs @@ -194,7 +194,7 @@ impl DebugRender { ); } - let mut transfers = encoder.transfer(); + let mut transfers = encoder.transfer("upload debug"); transfers.copy_buffer_to_buffer( this.entry_buffer.at(0), this.buffer.at(0), diff --git a/blade-render/src/render/dummy.rs b/blade-render/src/render/dummy.rs index d3daa3e7..f4c9fb44 100644 --- a/blade-render/src/render/dummy.rs +++ b/blade-render/src/render/dummy.rs @@ -79,7 +79,7 @@ impl DummyResources { command_encoder.init_texture(white_texture); command_encoder.init_texture(black_texture); command_encoder.init_texture(red_texture); - let mut transfers = command_encoder.transfer(); + let mut transfers = command_encoder.transfer("init dummy"); let staging_buf = gpu.create_buffer(blade_graphics::BufferDesc { name: "dummy/staging", size: 4 * 3, diff --git a/blade-render/src/render/env_map.rs b/blade-render/src/render/env_map.rs index b157285b..979dfaea 100644 --- a/blade-render/src/render/env_map.rs +++ b/blade-render/src/render/env_map.rs @@ -144,7 +144,7 @@ impl EnvironmentMap { let groups = self .prepare_pipeline .get_dispatch_for(weight_extent.at_mip_level(target_level)); - let mut compute = encoder.compute(); + let mut compute = encoder.compute("pre-process env map"); let mut pass = compute.with(&self.prepare_pipeline); pass.bind( 0, diff --git a/blade-render/src/render/mod.rs b/blade-render/src/render/mod.rs index 751140a5..838f934c 100644 --- a/blade-render/src/render/mod.rs +++ b/blade-render/src/render/mod.rs @@ -904,7 +904,7 @@ impl Renderer { }); temp.buffers.push(hit_staging); { - let mut transfers = command_encoder.transfer(); + let mut transfers = command_encoder.transfer("build-scene"); transfers.copy_buffer_to_buffer(hit_staging.at(0), self.hit_buffer.at(0), hit_size); } @@ -1024,7 +1024,7 @@ impl Renderer { memory: blade_graphics::Memory::Device, }); - let mut tlas_encoder = command_encoder.acceleration_structure(); + let mut tlas_encoder = command_encoder.acceleration_structure("TLAS"); tlas_encoder.build_top_level( self.acceleration_structure, &blases, @@ -1076,7 +1076,7 @@ impl Renderer { camera: &crate::Camera, config: FrameConfig, ) { - let mut transfer = command_encoder.transfer(); + let mut transfer = command_encoder.transfer("prepare"); if config.debug_draw { self.debug.reset_lines(&mut transfer); @@ -1128,7 +1128,7 @@ impl Renderer { let (cur, prev) = self.work_indices(); assert_eq!(cur, self.post_proc_input_index); - if let mut pass = command_encoder.compute() { + if let mut pass = command_encoder.compute("fill-gbuf") { let mut pc = pass.with(&self.fill_pipeline); let groups = self.fill_pipeline.get_dispatch_for(self.surface_size); pc.bind( @@ -1155,7 +1155,7 @@ impl Renderer { pc.dispatch(groups); } - if let mut pass = command_encoder.compute() { + if let mut pass = command_encoder.compute("ray-trace") { let mut pc = pass.with(&self.main_pipeline); let groups = self.main_pipeline.get_dispatch_for(self.surface_size); pc.bind( @@ -1228,7 +1228,7 @@ impl Renderer { let temp = 2; if denoiser_config.temporal_weight < 1.0 { - let mut pass = command_encoder.compute(); + let mut pass = command_encoder.compute("temporal-accum"); let mut pc = pass.with(&self.blur.temporal_accum_pipeline); let groups = self .blur @@ -1258,7 +1258,7 @@ impl Renderer { assert_eq!(cur, self.post_proc_input_index); let mut ping_pong = [temp, if self.is_frozen { cur } else { prev }]; for _ in 0..denoiser_config.num_passes { - let mut pass = command_encoder.compute(); + let mut pass = command_encoder.compute("a-trous"); let mut pc = pass.with(&self.blur.a_trous_pipeline); let groups = self .blur diff --git a/blade-render/src/texture/mod.rs b/blade-render/src/texture/mod.rs index 5ef59c40..6e5192ce 100644 --- a/blade-render/src/texture/mod.rs +++ b/blade-render/src/texture/mod.rs @@ -81,7 +81,7 @@ impl Baker { encoder.init_texture(init.dst); } if !pending_ops.transfers.is_empty() { - let mut pass = encoder.transfer(); + let mut pass = encoder.transfer("init textures"); for transfer in pending_ops.transfers.drain(..) { let dst = blade_graphics::TexturePiece { texture: transfer.dst, diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 2c9426ff..69f7f63a 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -4,6 +4,7 @@ Changelog for Blade - graphics: - API for destruction of pipelines + - every pass now takes a label - Metal: - support for workgroup memory diff --git a/examples/bunnymark/main.rs b/examples/bunnymark/main.rs index 58a17ed2..6100e8af 100644 --- a/examples/bunnymark/main.rs +++ b/examples/bunnymark/main.rs @@ -200,7 +200,7 @@ impl Example { }); command_encoder.start(); command_encoder.init_texture(texture); - if let mut transfer = command_encoder.transfer() { + if let mut transfer = command_encoder.transfer("init texture") { transfer.copy_buffer_to_texture(upload_buffer.into(), 4, texture.into(), extent); } let sync_point = context.submit(&mut command_encoder); @@ -278,14 +278,17 @@ impl Example { self.command_encoder.start(); self.command_encoder.init_texture(frame.texture()); - if let mut pass = self.command_encoder.render(gpu::RenderTargetSet { - colors: &[gpu::RenderTarget { - view: frame.texture_view(), - init_op: gpu::InitOp::Clear(gpu::TextureColor::OpaqueBlack), - finish_op: gpu::FinishOp::Store, - }], - depth_stencil: None, - }) { + if let mut pass = self.command_encoder.render( + "main", + gpu::RenderTargetSet { + colors: &[gpu::RenderTarget { + view: frame.texture_view(), + init_op: gpu::InitOp::Clear(gpu::TextureColor::OpaqueBlack), + finish_op: gpu::FinishOp::Store, + }], + depth_stencil: None, + }, + ) { let mut rc = pass.with(&self.pipeline); rc.bind( 0, diff --git a/examples/init/main.rs b/examples/init/main.rs index d01f52fa..a192139b 100644 --- a/examples/init/main.rs +++ b/examples/init/main.rs @@ -92,14 +92,17 @@ impl EnvMapSampler { env_weights: gpu::TextureView, ) { command_encoder.init_texture(self.accum_texture); - let mut pass = command_encoder.render(gpu::RenderTargetSet { - colors: &[gpu::RenderTarget { - view: self.accum_view, - init_op: gpu::InitOp::Clear(gpu::TextureColor::TransparentBlack), - finish_op: gpu::FinishOp::Store, - }], - depth_stencil: None, - }); + let mut pass = command_encoder.render( + "accumulate", + gpu::RenderTargetSet { + colors: &[gpu::RenderTarget { + view: self.accum_view, + init_op: gpu::InitOp::Clear(gpu::TextureColor::TransparentBlack), + finish_op: gpu::FinishOp::Store, + }], + depth_stencil: None, + }, + ); if let mut encoder = pass.with(&self.init_pipeline) { encoder.bind( 0, diff --git a/examples/mini/main.rs b/examples/mini/main.rs index d566b338..223df8dd 100644 --- a/examples/mini/main.rs +++ b/examples/mini/main.rs @@ -111,7 +111,7 @@ fn main() { command_encoder.start(); command_encoder.init_texture(texture); - if let mut transfer = command_encoder.transfer() { + if let mut transfer = command_encoder.transfer("gen-mips") { transfer.copy_buffer_to_texture( upload_buffer.into(), extent.width * 4, @@ -120,7 +120,7 @@ fn main() { ); } for i in 1..mip_level_count { - if let mut compute = command_encoder.compute() { + if let mut compute = command_encoder.compute("generate mips") { if let mut pc = compute.with(&pipeline) { let groups = pipeline.get_dispatch_for(extent.at_mip_level(i)); pc.bind( @@ -139,7 +139,7 @@ fn main() { } } } - if let mut tranfer = command_encoder.transfer() { + if let mut tranfer = command_encoder.transfer("init 1x2 texture") { tranfer.copy_texture_to_buffer( gpu::TexturePiece { texture, diff --git a/examples/particle/main.rs b/examples/particle/main.rs index 04fc5354..632e5aef 100644 --- a/examples/particle/main.rs +++ b/examples/particle/main.rs @@ -89,14 +89,17 @@ impl Example { self.particle_system.update(&mut self.command_encoder); - if let mut pass = self.command_encoder.render(gpu::RenderTargetSet { - colors: &[gpu::RenderTarget { - view: frame.texture_view(), - init_op: gpu::InitOp::Clear(gpu::TextureColor::OpaqueBlack), - finish_op: gpu::FinishOp::Store, - }], - depth_stencil: None, - }) { + if let mut pass = self.command_encoder.render( + "draw", + gpu::RenderTargetSet { + colors: &[gpu::RenderTarget { + view: frame.texture_view(), + init_op: gpu::InitOp::Clear(gpu::TextureColor::OpaqueBlack), + finish_op: gpu::FinishOp::Store, + }], + depth_stencil: None, + }, + ) { self.particle_system.draw(&mut pass); self.gui_painter .paint(&mut pass, gui_primitives, screen_desc, &self.context); diff --git a/examples/particle/particle.rs b/examples/particle/particle.rs index 08882018..28f68b23 100644 --- a/examples/particle/particle.rs +++ b/examples/particle/particle.rs @@ -156,7 +156,7 @@ impl System { } pub fn reset(&self, encoder: &mut gpu::CommandEncoder) { - let mut pass = encoder.compute(); + let mut pass = encoder.compute("reset"); let mut pc = pass.with(&self.reset_pipeline); pc.bind(0, &self.main_data()); let group_size = self.reset_pipeline.get_workgroup_size(); @@ -166,7 +166,7 @@ impl System { pub fn update(&self, encoder: &mut gpu::CommandEncoder) { let main_data = self.main_data(); - if let mut pass = encoder.compute() { + if let mut pass = encoder.compute("update") { let mut pc = pass.with(&self.update_pipeline); pc.bind(0, &main_data); pc.bind( @@ -181,7 +181,7 @@ impl System { pc.dispatch([group_count, 1, 1]); } // new pass because both pipelines use the free list - if let mut pass = encoder.compute() { + if let mut pass = encoder.compute("emit") { let mut pc = pass.with(&self.emit_pipeline); pc.bind(0, &main_data); pc.bind( diff --git a/examples/ray-query/main.rs b/examples/ray-query/main.rs index 4fd8a608..d3283769 100644 --- a/examples/ray-query/main.rs +++ b/examples/ray-query/main.rs @@ -210,11 +210,11 @@ impl Example { }); command_encoder.start(); command_encoder.init_texture(target); - if let mut pass = command_encoder.acceleration_structure() { + if let mut pass = command_encoder.acceleration_structure("BLAS") { pass.build_bottom_level(blas, &meshes, scratch_buffer.at(0)); } //Note: separate pass in order to enforce synchronization - if let mut pass = command_encoder.acceleration_structure() { + if let mut pass = command_encoder.acceleration_structure("TLAS") { pass.build_top_level( tlas, &[blas], @@ -264,7 +264,7 @@ impl Example { fn render(&mut self) { self.command_encoder.start(); - if let mut pass = self.command_encoder.compute() { + if let mut pass = self.command_encoder.compute("ray-trace") { let groups = self.rt_pipeline.get_dispatch_for(self.screen_size); if let mut pc = pass.with(&self.rt_pipeline) { let fov_y = 0.3; @@ -293,14 +293,17 @@ impl Example { let frame = self.context.acquire_frame(); self.command_encoder.init_texture(frame.texture()); - if let mut pass = self.command_encoder.render(gpu::RenderTargetSet { - colors: &[gpu::RenderTarget { - view: frame.texture_view(), - init_op: gpu::InitOp::Clear(gpu::TextureColor::TransparentBlack), - finish_op: gpu::FinishOp::Store, - }], - depth_stencil: None, - }) { + if let mut pass = self.command_encoder.render( + "draw", + gpu::RenderTargetSet { + colors: &[gpu::RenderTarget { + view: frame.texture_view(), + init_op: gpu::InitOp::Clear(gpu::TextureColor::TransparentBlack), + finish_op: gpu::FinishOp::Store, + }], + depth_stencil: None, + }, + ) { if let mut pc = pass.with(&self.draw_pipeline) { pc.bind( 0, diff --git a/examples/scene/main.rs b/examples/scene/main.rs index 237546ef..5f8b9b99 100644 --- a/examples/scene/main.rs +++ b/examples/scene/main.rs @@ -476,14 +476,17 @@ impl Example { let frame = self.context.acquire_frame(); command_encoder.init_texture(frame.texture()); - if let mut pass = command_encoder.render(gpu::RenderTargetSet { - colors: &[gpu::RenderTarget { - view: frame.texture_view(), - init_op: gpu::InitOp::Clear(gpu::TextureColor::TransparentBlack), - finish_op: gpu::FinishOp::Store, - }], - depth_stencil: None, - }) { + if let mut pass = command_encoder.render( + "draw", + gpu::RenderTargetSet { + colors: &[gpu::RenderTarget { + view: frame.texture_view(), + init_op: gpu::InitOp::Clear(gpu::TextureColor::TransparentBlack), + finish_op: gpu::FinishOp::Store, + }], + depth_stencil: None, + }, + ) { let screen_desc = blade_egui::ScreenDescriptor { physical_size: (physical_size.width, physical_size.height), scale_factor, diff --git a/src/lib.rs b/src/lib.rs index 01889de7..d9bdd5a0 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -679,14 +679,17 @@ impl Engine { let frame = self.gpu_context.acquire_frame(); command_encoder.init_texture(frame.texture()); - if let mut pass = command_encoder.render(gpu::RenderTargetSet { - colors: &[gpu::RenderTarget { - view: frame.texture_view(), - init_op: gpu::InitOp::Clear(gpu::TextureColor::TransparentBlack), - finish_op: gpu::FinishOp::Store, - }], - depth_stencil: None, - }) { + if let mut pass = command_encoder.render( + "draw", + gpu::RenderTargetSet { + colors: &[gpu::RenderTarget { + view: frame.texture_view(), + init_op: gpu::InitOp::Clear(gpu::TextureColor::TransparentBlack), + finish_op: gpu::FinishOp::Store, + }], + depth_stencil: None, + }, + ) { let screen_desc = blade_egui::ScreenDescriptor { physical_size: (physical_size.width, physical_size.height), scale_factor, From 8bc0cd07f200cccca6179ca88cd39e25c5fbf3e2 Mon Sep 17 00:00:00 2001 From: Dzmitry Malyshau Date: Sun, 22 Sep 2024 23:02:04 -0700 Subject: [PATCH 2/9] vk: annotate debug scopes --- blade-graphics/src/vulkan/command.rs | 67 +++++++++++++++++++++++----- blade-graphics/src/vulkan/init.rs | 3 ++ blade-graphics/src/vulkan/mod.rs | 8 ++++ 3 files changed, 66 insertions(+), 12 deletions(-) diff --git a/blade-graphics/src/vulkan/command.rs b/blade-graphics/src/vulkan/command.rs index 9a1e95d5..82a40fd7 100644 --- a/blade-graphics/src/vulkan/command.rs +++ b/blade-graphics/src/vulkan/command.rs @@ -197,8 +197,16 @@ fn map_render_target(rt: &crate::RenderTarget) -> vk::RenderingAttachmentInfo<'s vk_info } +fn end_pass(device: &super::Device, cmd_buf: vk::CommandBuffer) { + if device.toggles.command_scopes { + unsafe { + device.debug_utils.cmd_end_debug_utils_label(cmd_buf); + } + } +} + impl super::CommandEncoder { - pub fn mark(&mut self, marker: &str) { + fn add_marker(&mut self, marker: &str) { if let Some(ref mut ch) = self.crash_handler { let id = ch.add_marker(marker); unsafe { @@ -217,10 +225,30 @@ impl super::CommandEncoder { } } + fn begin_pass(&mut self, label: &str) { + self.barrier(); + self.add_marker(label); + if self.device.toggles.command_scopes { + self.temp_label.clear(); + self.temp_label.extend_from_slice(label.as_bytes()); + self.temp_label.push(0); + unsafe { + self.device.debug_utils.cmd_begin_debug_utils_label( + self.buffers[0].raw, + &vk::DebugUtilsLabelEXT { + p_label_name: self.temp_label.as_ptr() as *const _, + ..Default::default() + }, + ) + } + } + } + pub fn start(&mut self) { self.buffers.rotate_left(1); + let cmd_buf = self.buffers.first_mut().unwrap(); self.device - .reset_descriptor_pool(&mut self.buffers[0].descriptor_pool); + .reset_descriptor_pool(&mut cmd_buf.descriptor_pool); let vk_info = vk::CommandBufferBeginInfo { flags: vk::CommandBufferUsageFlags::ONE_TIME_SUBMIT, @@ -229,14 +257,14 @@ impl super::CommandEncoder { unsafe { self.device .core - .begin_command_buffer(self.buffers[0].raw, &vk_info) + .begin_command_buffer(cmd_buf.raw, &vk_info) .unwrap(); } } pub(super) fn finish(&mut self) -> vk::CommandBuffer { self.barrier(); - self.mark("finish"); + self.add_marker("finish"); let raw = self.buffers[0].raw; unsafe { self.device.core.end_command_buffer(raw).unwrap() } raw @@ -331,8 +359,7 @@ impl super::CommandEncoder { } pub fn transfer(&mut self, label: &str) -> super::TransferCommandEncoder { - self.barrier(); - self.mark(label); + self.begin_pass(label); super::TransferCommandEncoder { raw: self.buffers[0].raw, device: &self.device, @@ -343,8 +370,7 @@ impl super::CommandEncoder { &mut self, label: &str, ) -> super::AccelerationStructureCommandEncoder { - self.barrier(); - self.mark(label); + self.begin_pass(label); super::AccelerationStructureCommandEncoder { raw: self.buffers[0].raw, device: &self.device, @@ -352,8 +378,7 @@ impl super::CommandEncoder { } pub fn compute(&mut self, label: &str) -> super::ComputeCommandEncoder { - self.barrier(); - self.mark(label); + self.begin_pass(label); super::ComputeCommandEncoder { cmd_buf: self.buffers.first_mut().unwrap(), device: &self.device, @@ -366,8 +391,7 @@ impl super::CommandEncoder { label: &str, targets: crate::RenderTargetSet, ) -> super::RenderCommandEncoder { - self.barrier(); - self.mark(label); + self.begin_pass(label); let mut target_size = [0u16; 2]; let mut color_attachments = Vec::with_capacity(targets.colors.len()); @@ -551,6 +575,12 @@ impl crate::traits::TransferEncoder for super::TransferCommandEncoder<'_> { } } +impl Drop for super::TransferCommandEncoder<'_> { + fn drop(&mut self) { + end_pass(self.device, self.raw); + } +} + #[hidden_trait::expose] impl crate::traits::AccelerationStructureEncoder for super::AccelerationStructureCommandEncoder<'_> @@ -635,6 +665,12 @@ impl crate::traits::AccelerationStructureEncoder } } +impl Drop for super::AccelerationStructureCommandEncoder<'_> { + fn drop(&mut self) { + end_pass(self.device, self.raw); + } +} + impl<'a> super::ComputeCommandEncoder<'a> { pub fn with<'b, 'p>( &'b mut self, @@ -651,6 +687,12 @@ impl<'a> super::ComputeCommandEncoder<'a> { } } +impl Drop for super::ComputeCommandEncoder<'_> { + fn drop(&mut self) { + end_pass(self.device, self.cmd_buf.raw); + } +} + impl<'a> super::RenderCommandEncoder<'a> { pub fn set_scissor_rect(&mut self, rect: &crate::ScissorRect) { let vk_scissor = vk::Rect2D { @@ -692,6 +734,7 @@ impl Drop for super::RenderCommandEncoder<'_> { .dynamic_rendering .cmd_end_rendering(self.cmd_buf.raw) }; + end_pass(self.device, self.cmd_buf.raw); } } diff --git a/blade-graphics/src/vulkan/init.rs b/blade-graphics/src/vulkan/init.rs index 4a0c95da..487cad90 100644 --- a/blade-graphics/src/vulkan/init.rs +++ b/blade-graphics/src/vulkan/init.rs @@ -564,6 +564,9 @@ impl super::Context { }, core: device_core, device_information: capabilities.device_information, + toggles: super::Toggles { + command_scopes: desc.capture, + }, //TODO: detect GPU family workarounds: super::Workarounds { extra_sync_src_access: vk::AccessFlags::TRANSFER_WRITE, diff --git a/blade-graphics/src/vulkan/mod.rs b/blade-graphics/src/vulkan/mod.rs index 4707fc8f..a0333070 100644 --- a/blade-graphics/src/vulkan/mod.rs +++ b/blade-graphics/src/vulkan/mod.rs @@ -20,6 +20,11 @@ struct RayTracingDevice { acceleration_structure: khr::acceleration_structure::Device, } +#[derive(Clone, Default)] +struct Toggles { + command_scopes: bool, +} + #[derive(Clone)] struct Workarounds { extra_sync_src_access: vk::AccessFlags, @@ -38,6 +43,7 @@ struct Device { buffer_marker: Option, shader_info: Option, full_screen_exclusive: Option, + toggles: Toggles, workarounds: Workarounds, } @@ -239,6 +245,7 @@ pub struct CommandEncoder { update_data: Vec, present: Option, crash_handler: Option, + temp_label: Vec, } pub struct TransferCommandEncoder<'a> { raw: vk::CommandBuffer, @@ -367,6 +374,7 @@ impl crate::traits::CommandDevice for Context { update_data: Vec::new(), present: None, crash_handler, + temp_label: Vec::new(), } } From 6f388365b70d79423900ec86c217dcd9d77a2dce Mon Sep 17 00:00:00 2001 From: Dzmitry Malyshau Date: Mon, 23 Sep 2024 22:36:04 -0700 Subject: [PATCH 3/9] vk: GPU timings --- blade-graphics/src/gles/command.rs | 6 ++ blade-graphics/src/lib.rs | 8 +++ blade-graphics/src/metal/command.rs | 6 +- blade-graphics/src/vulkan/command.rs | 85 ++++++++++++++++++++++++++-- blade-graphics/src/vulkan/init.rs | 22 ++++++- blade-graphics/src/vulkan/mod.rs | 39 +++++++++++-- docs/CHANGELOG.md | 2 + examples/bunnymark/main.rs | 1 + examples/particle/main.rs | 19 ++++++- examples/particle/particle.rs | 4 ++ examples/ray-query/main.rs | 3 +- examples/scene/main.rs | 3 +- src/lib.rs | 1 + 13 files changed, 179 insertions(+), 20 deletions(-) diff --git a/blade-graphics/src/gles/command.rs b/blade-graphics/src/gles/command.rs index b17b65a9..25c1258b 100644 --- a/blade-graphics/src/gles/command.rs +++ b/blade-graphics/src/gles/command.rs @@ -1,3 +1,5 @@ +use std::time::Duration; + const COLOR_ATTACHMENTS: &[u32] = &[ glow::COLOR_ATTACHMENT0, glow::COLOR_ATTACHMENT1, @@ -201,6 +203,10 @@ impl super::CommandEncoder { limits: &self.limits, } } + + pub fn timings(&self) -> &[(String, Duration)] { + &[] + } } impl super::PassEncoder<'_, super::ComputePipeline> { diff --git a/blade-graphics/src/lib.rs b/blade-graphics/src/lib.rs index 02e973f2..d354aca1 100644 --- a/blade-graphics/src/lib.rs +++ b/blade-graphics/src/lib.rs @@ -72,9 +72,15 @@ mod shader; mod traits; pub mod util; pub mod limits { + /// Max number of passes inside a command encoder. + pub const PASS_COUNT: usize = 100; + /// Max plain data size for a pipeline. pub const PLAIN_DATA_SIZE: u32 = 256; + /// Max number of resources in a bind group. pub const RESOURCES_IN_GROUP: u32 = 8; + /// Min storage buffer alignment. pub const STORAGE_BUFFER_ALIGNMENT: u64 = 256; + /// Min acceleration structure scratch buffer alignment. pub const ACCELERATION_STRUCTURE_SCRATCH_ALIGNMENT: u64 = 256; } @@ -87,6 +93,8 @@ pub struct ContextDesc { /// Enable validation of the GAPI, shaders, /// and insert crash markers into command buffers. pub validation: bool, + /// Enable GPU timing of all passes. + pub timing: bool, /// Enable capture support with GAPI tools. pub capture: bool, /// Enable GAPI overlay. diff --git a/blade-graphics/src/metal/command.rs b/blade-graphics/src/metal/command.rs index 3c048fd7..174e4bc8 100644 --- a/blade-graphics/src/metal/command.rs +++ b/blade-graphics/src/metal/command.rs @@ -1,4 +1,4 @@ -use std::{marker::PhantomData, mem}; +use std::{marker::PhantomData, mem, time::Duration}; impl crate::ShaderBindable for T { fn bind_to(&self, ctx: &mut super::PipelineContext, index: u32) { @@ -226,6 +226,10 @@ impl super::CommandEncoder { phantom: PhantomData, } } + + pub fn timings(&self) -> &[(String, Duration)] { + &[] + } } #[hidden_trait::expose] diff --git a/blade-graphics/src/vulkan/command.rs b/blade-graphics/src/vulkan/command.rs index 82a40fd7..9de98774 100644 --- a/blade-graphics/src/vulkan/command.rs +++ b/blade-graphics/src/vulkan/command.rs @@ -1,5 +1,5 @@ use ash::vk; -use std::str; +use std::{str, time::Duration}; impl super::CrashHandler { fn add_marker(&mut self, marker: &str) -> u32 { @@ -198,7 +198,7 @@ fn map_render_target(rt: &crate::RenderTarget) -> vk::RenderingAttachmentInfo<'s } fn end_pass(device: &super::Device, cmd_buf: vk::CommandBuffer) { - if device.toggles.command_scopes { + if device.command_scope.is_some() { unsafe { device.debug_utils.cmd_end_debug_utils_label(cmd_buf); } @@ -225,10 +225,32 @@ impl super::CommandEncoder { } } + fn add_timestamp(&mut self, label: &str) { + if let Some(_) = self.device.timing { + let cmd_buf = self.buffers.first_mut().unwrap(); + if cmd_buf.timed_pass_names.len() == crate::limits::PASS_COUNT { + log::warn!("Reached the maximum for `limits::PASS_COUNT`, skipping the timer"); + return; + } + let index = cmd_buf.timed_pass_names.len() as u32; + unsafe { + self.device.core.cmd_write_timestamp( + cmd_buf.raw, + vk::PipelineStageFlags::TOP_OF_PIPE, + cmd_buf.query_pool, + index, + ); + } + cmd_buf.timed_pass_names.push(label.to_string()); + } + } + fn begin_pass(&mut self, label: &str) { self.barrier(); self.add_marker(label); - if self.device.toggles.command_scopes { + self.add_timestamp(label); + + if let Some(_) = self.device.command_scope { self.temp_label.clear(); self.temp_label.extend_from_slice(label.as_bytes()); self.temp_label.push(0); @@ -260,14 +282,61 @@ impl super::CommandEncoder { .begin_command_buffer(cmd_buf.raw, &vk_info) .unwrap(); } + + if let Some(ref timing) = self.device.timing { + self.timings.clear(); + if !cmd_buf.timed_pass_names.is_empty() { + let mut timestamps = [0u64; super::QUERY_POOL_SIZE]; + unsafe { + self.device + .core + .get_query_pool_results( + cmd_buf.query_pool, + 0, + &mut timestamps[..cmd_buf.timed_pass_names.len() + 1], + vk::QueryResultFlags::TYPE_64, + ) + .unwrap(); + } + let mut prev = timestamps[0]; + for (name, &ts) in cmd_buf + .timed_pass_names + .drain(..) + .zip(timestamps[1..].iter()) + { + let diff = (ts - prev) as f32 * timing.period; + prev = ts; + self.timings.push((name, Duration::from_nanos(diff as _))); + } + } + unsafe { + self.device.core.cmd_reset_query_pool( + cmd_buf.raw, + cmd_buf.query_pool, + 0, + super::QUERY_POOL_SIZE as u32, + ); + } + } } pub(super) fn finish(&mut self) -> vk::CommandBuffer { self.barrier(); self.add_marker("finish"); - let raw = self.buffers[0].raw; - unsafe { self.device.core.end_command_buffer(raw).unwrap() } - raw + let cmd_buf = self.buffers.first_mut().unwrap(); + unsafe { + if self.device.timing.is_some() { + let index = cmd_buf.timed_pass_names.len() as u32; + self.device.core.cmd_write_timestamp( + cmd_buf.raw, + vk::PipelineStageFlags::TOP_OF_PIPE, + cmd_buf.query_pool, + index, + ); + } + self.device.core.end_command_buffer(cmd_buf.raw).unwrap(); + } + cmd_buf.raw } fn barrier(&mut self) { @@ -477,6 +546,10 @@ impl super::CommandEncoder { Err(other) => panic!("GPU error {}", other), } } + + pub fn timings(&self) -> &[(String, Duration)] { + &self.timings + } } #[hidden_trait::expose] diff --git a/blade-graphics/src/vulkan/init.rs b/blade-graphics/src/vulkan/init.rs index 487cad90..9645caa9 100644 --- a/blade-graphics/src/vulkan/init.rs +++ b/blade-graphics/src/vulkan/init.rs @@ -44,6 +44,7 @@ struct AdapterCapabilities { buffer_marker: bool, shader_info: bool, full_screen_exclusive: bool, + timing: bool, bugs: SystemBugs, } @@ -202,6 +203,13 @@ unsafe fn inspect_adapter( return None; } + let timing = if properties.limits.timestamp_compute_and_graphics == vk::FALSE { + log::info!("No timing because of queue support"); + false + } else { + true + }; + let ray_tracing = if !supported_extensions.contains(&vk::KHR_ACCELERATION_STRUCTURE_NAME) || !supported_extensions.contains(&vk::KHR_RAY_QUERY_NAME) { @@ -269,6 +277,7 @@ unsafe fn inspect_adapter( buffer_marker, shader_info, full_screen_exclusive, + timing, bugs, }) } @@ -564,8 +573,17 @@ impl super::Context { }, core: device_core, device_information: capabilities.device_information, - toggles: super::Toggles { - command_scopes: desc.capture, + command_scope: if desc.capture { + Some(super::CommandScopeDevice {}) + } else { + None + }, + timing: if desc.timing && capabilities.timing { + Some(super::TimingDevice { + period: capabilities.properties.limits.timestamp_period, + }) + } else { + None }, //TODO: detect GPU family workarounds: super::Workarounds { diff --git a/blade-graphics/src/vulkan/mod.rs b/blade-graphics/src/vulkan/mod.rs index a0333070..136a8f5e 100644 --- a/blade-graphics/src/vulkan/mod.rs +++ b/blade-graphics/src/vulkan/mod.rs @@ -1,5 +1,5 @@ use ash::{khr, vk}; -use std::{num::NonZeroU32, path::PathBuf, ptr, sync::Mutex}; +use std::{num::NonZeroU32, path::PathBuf, ptr, sync::Mutex, time::Duration}; mod command; mod descriptor; @@ -7,6 +7,8 @@ mod init; mod pipeline; mod resource; +const QUERY_POOL_SIZE: usize = crate::limits::PASS_COUNT + 1; + struct Instance { core: ash::Instance, _debug_utils: ash::ext::debug_utils::Instance, @@ -21,8 +23,10 @@ struct RayTracingDevice { } #[derive(Clone, Default)] -struct Toggles { - command_scopes: bool, +struct CommandScopeDevice {} +#[derive(Clone, Default)] +struct TimingDevice { + period: f32, } #[derive(Clone)] @@ -43,7 +47,8 @@ struct Device { buffer_marker: Option, shader_info: Option, full_screen_exclusive: Option, - toggles: Toggles, + command_scope: Option, + timing: Option, workarounds: Workarounds, } @@ -223,6 +228,8 @@ pub struct RenderPipeline { struct CommandBuffer { raw: vk::CommandBuffer, descriptor_pool: descriptor::DescriptorPool, + query_pool: vk::QueryPool, + timed_pass_names: Vec, } #[derive(Debug, PartialEq)] @@ -246,6 +253,7 @@ pub struct CommandEncoder { present: Option, crash_handler: Option, temp_label: Vec, + timings: Vec<(String, Duration)>, } pub struct TransferCommandEncoder<'a> { raw: vk::CommandBuffer, @@ -345,9 +353,24 @@ impl crate::traits::CommandDevice for Context { self.set_object_name(raw, desc.name); }; let descriptor_pool = self.device.create_descriptor_pool(); + let query_pool = if self.device.timing.is_some() { + let query_pool_info = vk::QueryPoolCreateInfo::default() + .query_type(vk::QueryType::TIMESTAMP) + .query_count(QUERY_POOL_SIZE as u32); + unsafe { + self.device + .core + .create_query_pool(&query_pool_info, None) + .unwrap() + } + } else { + vk::QueryPool::null() + }; CommandBuffer { raw, descriptor_pool, + query_pool, + timed_pass_names: Vec::new(), } }) .collect(); @@ -375,6 +398,7 @@ impl crate::traits::CommandDevice for Context { present: None, crash_handler, temp_label: Vec::new(), + timings: Vec::new(), } } @@ -388,6 +412,13 @@ impl crate::traits::CommandDevice for Context { } self.device .destroy_descriptor_pool(&mut cmd_buf.descriptor_pool); + if self.device.timing.is_some() { + unsafe { + self.device + .core + .destroy_query_pool(cmd_buf.query_pool, None); + } + } } unsafe { self.device diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 69f7f63a..2f29f1f2 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -5,6 +5,8 @@ Changelog for Blade - graphics: - API for destruction of pipelines - every pass now takes a label + - automatic GPU pass markers + - ability to capture pass GPU timings - Metal: - support for workgroup memory diff --git a/examples/bunnymark/main.rs b/examples/bunnymark/main.rs index 6100e8af..52b960ca 100644 --- a/examples/bunnymark/main.rs +++ b/examples/bunnymark/main.rs @@ -67,6 +67,7 @@ impl Example { window, gpu::ContextDesc { validation: cfg!(debug_assertions), + timing: false, capture: false, overlay: true, }, diff --git a/examples/particle/main.rs b/examples/particle/main.rs index 632e5aef..3e9b9590 100644 --- a/examples/particle/main.rs +++ b/examples/particle/main.rs @@ -20,6 +20,7 @@ impl Example { window, gpu::ContextDesc { validation: cfg!(debug_assertions), + timing: true, capture: false, overlay: false, }, @@ -113,6 +114,19 @@ impl Example { } self.prev_sync_point = Some(sync_point); } + + fn add_gui(&mut self, ui: &mut egui::Ui) { + ui.heading("Particle System"); + self.particle_system.add_gui(ui); + ui.heading("Timings"); + for &(ref name, time) in self.command_encoder.timings() { + let millis = time.as_secs_f32() * 1000.0; + ui.horizontal(|ui| { + ui.label(name); + ui.colored_label(egui::Color32::WHITE, format!("{:.2} ms", millis)); + }); + } + } } fn main() { @@ -167,9 +181,8 @@ fn main() { winit::event::WindowEvent::RedrawRequested => { let raw_input = egui_winit.take_egui_input(&window); let egui_output = egui_winit.egui_ctx().run(raw_input, |egui_ctx| { - egui::SidePanel::left("my_side_panel").show(egui_ctx, |ui| { - ui.heading("Particle System"); - example.particle_system.add_gui(ui); + egui::SidePanel::left("info").show(egui_ctx, |ui| { + example.add_gui(ui); if ui.button("Quit").clicked() { target.exit(); } diff --git a/examples/particle/particle.rs b/examples/particle/particle.rs index 28f68b23..73145148 100644 --- a/examples/particle/particle.rs +++ b/examples/particle/particle.rs @@ -146,6 +146,10 @@ impl System { pub fn destroy(&mut self, context: &gpu::Context) { context.destroy_buffer(self.particle_buf); context.destroy_buffer(self.free_list_buf); + context.destroy_compute_pipeline(&mut self.reset_pipeline); + context.destroy_compute_pipeline(&mut self.emit_pipeline); + context.destroy_compute_pipeline(&mut self.update_pipeline); + context.destroy_render_pipeline(&mut self.draw_pipeline); } fn main_data(&self) -> MainData { diff --git a/examples/ray-query/main.rs b/examples/ray-query/main.rs index d3283769..1e16092f 100644 --- a/examples/ray-query/main.rs +++ b/examples/ray-query/main.rs @@ -51,8 +51,7 @@ impl Example { window, gpu::ContextDesc { validation: cfg!(debug_assertions), - capture: false, - overlay: false, + ..Default::default() }, ) .unwrap() diff --git a/examples/scene/main.rs b/examples/scene/main.rs index 5f8b9b99..11437619 100644 --- a/examples/scene/main.rs +++ b/examples/scene/main.rs @@ -191,8 +191,7 @@ impl Example { window, gpu::ContextDesc { validation: cfg!(debug_assertions), - capture: false, - overlay: false, + ..Default::default() }, ) .unwrap() diff --git a/src/lib.rs b/src/lib.rs index d9bdd5a0..ea293d2b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -415,6 +415,7 @@ impl Engine { window, gpu::ContextDesc { validation: cfg!(debug_assertions), + timing: true, capture: false, overlay: false, }, From a50880310d65ab213283368b2cd2df8f04c5e04e Mon Sep 17 00:00:00 2001 From: Dzmitry Malyshau Date: Tue, 24 Sep 2024 21:20:43 -0700 Subject: [PATCH 4/9] mtl: enable concurrect dispatches --- blade-graphics/Cargo.toml | 3 ++- blade-graphics/src/metal/command.rs | 3 +++ blade-graphics/src/metal/mod.rs | 11 ++++++++--- docs/CHANGELOG.md | 1 + 4 files changed, 14 insertions(+), 4 deletions(-) diff --git a/blade-graphics/Cargo.toml b/blade-graphics/Cargo.toml index 226bd3ca..7b69456d 100644 --- a/blade-graphics/Cargo.toml +++ b/blade-graphics/Cargo.toml @@ -22,7 +22,8 @@ raw-window-handle = "0.6" [target.'cfg(any(target_os = "ios", target_os = "macos"))'.dependencies] block = "0.1" core-graphics-types = "0.1" -metal = "0.29" +#TODO: switch to crates once https://github.com/gfx-rs/metal-rs/pull/335 is published +metal = { git = "https://github.com/gfx-rs/metal-rs", rev = "9bbe74b1d3706e46ddf41bc8aad58ee74b0bf844" } objc = "0.2.5" naga = { workspace = true, features = ["msl-out"] } diff --git a/blade-graphics/src/metal/command.rs b/blade-graphics/src/metal/command.rs index 174e4bc8..a44d272c 100644 --- a/blade-graphics/src/metal/command.rs +++ b/blade-graphics/src/metal/command.rs @@ -141,6 +141,9 @@ impl super::CommandEncoder { pub fn compute(&mut self, _label: &str) -> super::ComputeCommandEncoder { let raw = objc::rc::autoreleasepool(|| { let descriptor = metal::ComputePassDescriptor::new(); + if self.private_info.supports_dispatch_type { + descriptor.set_dispatch_type(metal::MTLDispatchType::Concurrent); + } self.raw .as_mut() .unwrap() diff --git a/blade-graphics/src/metal/mod.rs b/blade-graphics/src/metal/mod.rs index 727f2351..c604167a 100644 --- a/blade-graphics/src/metal/mod.rs +++ b/blade-graphics/src/metal/mod.rs @@ -39,8 +39,10 @@ impl Frame { } } -struct DeviceInfo { +#[derive(Debug, Clone)] +struct PrivateInfo { language_version: metal::MTLLanguageVersion, + supports_dispatch_type: bool, } pub struct Context { @@ -48,7 +50,7 @@ pub struct Context { queue: Arc>, surface: Option>, capture: Option, - info: DeviceInfo, + info: PrivateInfo, device_information: crate::DeviceInformation, } @@ -183,6 +185,7 @@ pub struct CommandEncoder { raw: Option, name: String, queue: Arc>, + private_info: PrivateInfo, } #[derive(Debug)] @@ -427,9 +430,10 @@ impl Context { queue: Arc::new(Mutex::new(queue)), surface: None, capture, - info: DeviceInfo { + info: PrivateInfo { //TODO: determine based on OS version language_version: metal::MTLLanguageVersion::V2_4, + supports_dispatch_type: true, }, device_information, }) @@ -503,6 +507,7 @@ impl crate::traits::CommandDevice for Context { raw: None, name: desc.name.to_string(), queue: Arc::clone(&self.queue), + private_info: self.info.clone(), } } diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 2f29f1f2..a7135dab 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -9,6 +9,7 @@ Changelog for Blade - ability to capture pass GPU timings - Metal: - support for workgroup memory + - concurrent compute dispatches ## blade-graphics-0.5, blade-macros-0.3, blade-egui-0.4, blade-util-0.1 (27 Aug 2024) From 18e54422c655c30e3bff06b9e153edf7ffc4f000 Mon Sep 17 00:00:00 2001 From: Dzmitry Malyshau Date: Tue, 24 Sep 2024 23:30:13 -0700 Subject: [PATCH 5/9] metal: timing support Also switches to my own fork of metal-rs for now --- blade-graphics/Cargo.toml | 5 +- blade-graphics/src/metal/command.rs | 75 ++++++++++++++++++++++++++--- blade-graphics/src/metal/mod.rs | 58 +++++++++++++++++++++- 3 files changed, 129 insertions(+), 9 deletions(-) diff --git a/blade-graphics/Cargo.toml b/blade-graphics/Cargo.toml index 7b69456d..f6b8fc0a 100644 --- a/blade-graphics/Cargo.toml +++ b/blade-graphics/Cargo.toml @@ -23,7 +23,10 @@ raw-window-handle = "0.6" block = "0.1" core-graphics-types = "0.1" #TODO: switch to crates once https://github.com/gfx-rs/metal-rs/pull/335 is published -metal = { git = "https://github.com/gfx-rs/metal-rs", rev = "9bbe74b1d3706e46ddf41bc8aad58ee74b0bf844" } +#TODO: switch to upstream once these are merged: +# - https://github.com/gfx-rs/metal-rs/pull/336 +# - https://github.com/gfx-rs/metal-rs/pull/337 +metal = { git = "https://github.com/kvark/metal-rs", branch = "blade" } objc = "0.2.5" naga = { workspace = true, features = ["msl-out"] } diff --git a/blade-graphics/src/metal/command.rs b/blade-graphics/src/metal/command.rs index a44d272c..5391678a 100644 --- a/blade-graphics/src/metal/command.rs +++ b/blade-graphics/src/metal/command.rs @@ -88,8 +88,31 @@ impl crate::ShaderBindable for crate::AccelerationStructure { } } +impl super::TimingData { + fn add(&mut self, label: &str) -> u64 { + let counter_index = self.pass_names.len() as u64 * 2; + self.pass_names.push(label.to_string()); + counter_index + } +} + impl super::CommandEncoder { pub fn start(&mut self) { + if let Some(ref mut td_array) = self.timing_datas { + self.timings.clear(); + td_array.rotate_left(1); + let td = td_array.first_mut().unwrap(); + if !td.pass_names.is_empty() { + let counters = td + .sample_buffer + .resolve_counter_range(metal::NSRange::new(0, td.pass_names.len() as u64 * 2)); + for (name, chunk) in td.pass_names.drain(..).zip(counters.chunks(2)) { + let duration = Duration::from_nanos(chunk[1] - chunk[0]); + self.timings.push((name, duration)); + } + } + } + let queue = self.queue.lock().unwrap(); self.raw = Some(objc::rc::autoreleasepool(|| { let cmd_buf = queue.new_command_buffer_with_unretained_references(); @@ -106,9 +129,19 @@ impl super::CommandEncoder { self.raw.as_mut().unwrap().present_drawable(&frame.drawable); } - pub fn transfer(&mut self, _label: &str) -> super::TransferCommandEncoder { + pub fn transfer(&mut self, label: &str) -> super::TransferCommandEncoder { let raw = objc::rc::autoreleasepool(|| { let descriptor = metal::BlitPassDescriptor::new(); + + if let Some(ref mut td_array) = self.timing_datas { + let td = td_array.first_mut().unwrap(); + let counter_index = td.add(label); + let sba = descriptor.sample_buffer_attachments().object_at(0).unwrap(); + sba.set_sample_buffer(&td.sample_buffer); + sba.set_start_of_encoder_sample_index(counter_index); + sba.set_end_of_encoder_sample_index(counter_index + 1); + } + self.raw .as_mut() .unwrap() @@ -123,9 +156,20 @@ impl super::CommandEncoder { pub fn acceleration_structure( &mut self, - _label: &str, + label: &str, ) -> super::AccelerationStructureCommandEncoder { let raw = objc::rc::autoreleasepool(|| { + let descriptor = metal::AccelerationStructurePassDescriptor::new(); + + if let Some(ref mut td_array) = self.timing_datas { + let td = td_array.first_mut().unwrap(); + let counter_index = td.add(label); + let sba = descriptor.sample_buffer_attachments().object_at(0).unwrap(); + sba.set_sample_buffer(&td.sample_buffer); + sba.set_start_of_encoder_sample_index(counter_index); + sba.set_end_of_encoder_sample_index(counter_index + 1); + } + self.raw .as_mut() .unwrap() @@ -138,12 +182,22 @@ impl super::CommandEncoder { } } - pub fn compute(&mut self, _label: &str) -> super::ComputeCommandEncoder { + pub fn compute(&mut self, label: &str) -> super::ComputeCommandEncoder { let raw = objc::rc::autoreleasepool(|| { let descriptor = metal::ComputePassDescriptor::new(); - if self.private_info.supports_dispatch_type { + if self.supports_dispatch_type { descriptor.set_dispatch_type(metal::MTLDispatchType::Concurrent); } + + if let Some(ref mut td_array) = self.timing_datas { + let td = td_array.first_mut().unwrap(); + let counter_index = td.add(label); + let sba = descriptor.sample_buffer_attachments().object_at(0).unwrap(); + sba.set_sample_buffer(&td.sample_buffer); + sba.set_start_of_encoder_sample_index(counter_index); + sba.set_end_of_encoder_sample_index(counter_index + 1); + } + self.raw .as_mut() .unwrap() @@ -158,7 +212,7 @@ impl super::CommandEncoder { pub fn render( &mut self, - _label: &str, + label: &str, targets: crate::RenderTargetSet, ) -> super::RenderCommandEncoder { let raw = objc::rc::autoreleasepool(|| { @@ -217,6 +271,15 @@ impl super::CommandEncoder { at_descriptor.set_store_action(store_action); } + if let Some(ref mut td_array) = self.timing_datas { + let td = td_array.first_mut().unwrap(); + let counter_index = td.add(label); + let sba = descriptor.sample_buffer_attachments().object_at(0).unwrap(); + sba.set_sample_buffer(&td.sample_buffer); + sba.set_start_of_vertex_sample_index(counter_index); + sba.set_end_of_fragment_sample_index(counter_index + 1); + } + self.raw .as_mut() .unwrap() @@ -231,7 +294,7 @@ impl super::CommandEncoder { } pub fn timings(&self) -> &[(String, Duration)] { - &[] + &self.timings } } diff --git a/blade-graphics/src/metal/mod.rs b/blade-graphics/src/metal/mod.rs index c604167a..a15f0e67 100644 --- a/blade-graphics/src/metal/mod.rs +++ b/blade-graphics/src/metal/mod.rs @@ -12,6 +12,8 @@ mod pipeline; mod resource; mod surface; +const MAX_TIMESTAMPS: u64 = crate::limits::PASS_COUNT as u64 * 2; + struct Surface { view: *mut objc::runtime::Object, render_layer: metal::MetalLayer, @@ -43,6 +45,7 @@ impl Frame { struct PrivateInfo { language_version: metal::MTLLanguageVersion, supports_dispatch_type: bool, + timestamp_counter_set: Option, } pub struct Context { @@ -180,12 +183,20 @@ pub struct SyncPoint { cmd_buf: metal::CommandBuffer, } +#[derive(Debug)] +struct TimingData { + pass_names: Vec, + sample_buffer: metal::CounterSampleBuffer, +} + #[derive(Debug)] pub struct CommandEncoder { raw: Option, name: String, queue: Arc>, - private_info: PrivateInfo, + supports_dispatch_type: bool, + timing_datas: Option>, + timings: Vec<(String, time::Duration)>, } #[derive(Debug)] @@ -425,6 +436,23 @@ impl Context { driver_info: "".to_string(), }; + let mut timestamp_counter_set = None; + if desc.timing { + for counter_set in device.counter_sets() { + if counter_set.name() == "timestamp" { + timestamp_counter_set = Some(counter_set); + } + } + if timestamp_counter_set.is_none() { + log::warn!("Timing counters are not supported by the device"); + } else if !device + .supports_counter_sampling(metal::MTLCounterSamplingPoint::AtStageBoundary) + { + log::warn!("Timing counters do not support stage boundary"); + timestamp_counter_set = None; + } + } + Ok(Context { device: Mutex::new(device), queue: Arc::new(Mutex::new(queue)), @@ -434,6 +462,7 @@ impl Context { //TODO: determine based on OS version language_version: metal::MTLLanguageVersion::V2_4, supports_dispatch_type: true, + timestamp_counter_set, }, device_information, }) @@ -503,11 +532,36 @@ impl crate::traits::CommandDevice for Context { type SyncPoint = SyncPoint; fn create_command_encoder(&self, desc: super::CommandEncoderDesc) -> CommandEncoder { + let timing_datas = if let Some(ref counter_set) = self.info.timestamp_counter_set { + let mut array = Vec::with_capacity(desc.buffer_count as usize); + let csb_desc = metal::CounterSampleBufferDescriptor::new(); + csb_desc.set_counter_set(counter_set); + csb_desc.set_storage_mode(metal::MTLStorageMode::Shared); + csb_desc.set_sample_count(MAX_TIMESTAMPS); + for i in 0..desc.buffer_count { + csb_desc.set_label(&format!("{}/counter{}", desc.name, i)); + let sample_buffer = self + .device + .lock() + .unwrap() + .new_counter_sample_buffer_with_descriptor(&csb_desc) + .unwrap(); + array.push(TimingData { + sample_buffer, + pass_names: Vec::new(), + }); + } + Some(array.into_boxed_slice()) + } else { + None + }; CommandEncoder { raw: None, name: desc.name.to_string(), queue: Arc::clone(&self.queue), - private_info: self.info.clone(), + supports_dispatch_type: self.info.supports_dispatch_type, + timing_datas, + timings: Vec::new(), } } From 9a28a52c07c8171a4ebfb8f4a1744b98464f1539 Mon Sep 17 00:00:00 2001 From: Dzmitry Malyshau Date: Tue, 24 Sep 2024 23:55:07 -0700 Subject: [PATCH 6/9] metal: profiling scopes, disable auto-capture --- blade-graphics/src/metal/command.rs | 23 ++++++++++++++++++++++- blade-graphics/src/metal/mod.rs | 19 +++++++++++++------ 2 files changed, 35 insertions(+), 7 deletions(-) diff --git a/blade-graphics/src/metal/command.rs b/blade-graphics/src/metal/command.rs index 5391678a..b2d0f279 100644 --- a/blade-graphics/src/metal/command.rs +++ b/blade-graphics/src/metal/command.rs @@ -97,6 +97,25 @@ impl super::TimingData { } impl super::CommandEncoder { + fn begin_pass(&mut self, label: &str) { + if self.enable_debug_groups { + //HACK: close the previous group + if self.has_open_debug_group { + self.raw.as_mut().unwrap().pop_debug_group(); + } else { + self.has_open_debug_group = true; + } + self.raw.as_mut().unwrap().push_debug_group(label); + } + } + + pub(super) fn finish(&mut self) -> metal::CommandBuffer { + if self.has_open_debug_group { + self.raw.as_mut().unwrap().pop_debug_group(); + } + self.raw.take().unwrap() + } + pub fn start(&mut self) { if let Some(ref mut td_array) = self.timing_datas { self.timings.clear(); @@ -121,6 +140,7 @@ impl super::CommandEncoder { } cmd_buf.to_owned() })); + self.has_open_debug_group = false; } pub fn init_texture(&mut self, _texture: super::Texture) {} @@ -130,6 +150,7 @@ impl super::CommandEncoder { } pub fn transfer(&mut self, label: &str) -> super::TransferCommandEncoder { + self.begin_pass(label); let raw = objc::rc::autoreleasepool(|| { let descriptor = metal::BlitPassDescriptor::new(); @@ -185,7 +206,7 @@ impl super::CommandEncoder { pub fn compute(&mut self, label: &str) -> super::ComputeCommandEncoder { let raw = objc::rc::autoreleasepool(|| { let descriptor = metal::ComputePassDescriptor::new(); - if self.supports_dispatch_type { + if self.enable_dispatch_type { descriptor.set_dispatch_type(metal::MTLDispatchType::Concurrent); } diff --git a/blade-graphics/src/metal/mod.rs b/blade-graphics/src/metal/mod.rs index a15f0e67..b508f01f 100644 --- a/blade-graphics/src/metal/mod.rs +++ b/blade-graphics/src/metal/mod.rs @@ -44,7 +44,8 @@ impl Frame { #[derive(Debug, Clone)] struct PrivateInfo { language_version: metal::MTLLanguageVersion, - supports_dispatch_type: bool, + enable_debug_groups: bool, + enable_dispatch_type: bool, timestamp_counter_set: Option, } @@ -194,7 +195,9 @@ pub struct CommandEncoder { raw: Option, name: String, queue: Arc>, - supports_dispatch_type: bool, + enable_debug_groups: bool, + enable_dispatch_type: bool, + has_open_debug_group: bool, timing_datas: Option>, timings: Vec<(String, time::Duration)>, } @@ -417,7 +420,8 @@ impl Context { .ok_or(super::NotSupportedError::NoSupportedDeviceFound)?; let queue = device.new_command_queue(); - let capture = if desc.capture { + let auto_capture_everything = false; + let capture = if desc.capture && auto_capture_everything { objc::rc::autoreleasepool(|| { let capture_manager = metal::CaptureManager::shared(); let default_capture_scope = capture_manager.new_capture_scope_with_device(&device); @@ -461,7 +465,8 @@ impl Context { info: PrivateInfo { //TODO: determine based on OS version language_version: metal::MTLLanguageVersion::V2_4, - supports_dispatch_type: true, + enable_debug_groups: desc.capture, + enable_dispatch_type: true, timestamp_counter_set, }, device_information, @@ -559,7 +564,9 @@ impl crate::traits::CommandDevice for Context { raw: None, name: desc.name.to_string(), queue: Arc::clone(&self.queue), - supports_dispatch_type: self.info.supports_dispatch_type, + enable_debug_groups: self.info.enable_debug_groups, + enable_dispatch_type: self.info.enable_dispatch_type, + has_open_debug_group: false, timing_datas, timings: Vec::new(), } @@ -568,7 +575,7 @@ impl crate::traits::CommandDevice for Context { fn destroy_command_encoder(&self, _command_encoder: &mut CommandEncoder) {} fn submit(&self, encoder: &mut CommandEncoder) -> SyncPoint { - let cmd_buf = encoder.raw.take().unwrap(); + let cmd_buf = encoder.finish(); cmd_buf.commit(); SyncPoint { cmd_buf } } From 492f727fea41937f252bddd79597b841add642b1 Mon Sep 17 00:00:00 2001 From: Dzmitry Malyshau Date: Tue, 10 Sep 2024 23:26:58 -0700 Subject: [PATCH 7/9] gles: merge Context definition between egl and wasm --- blade-graphics/src/gles/egl.rs | 78 ++++++++++++++++++++-------------- blade-graphics/src/gles/mod.rs | 20 +++++++-- blade-graphics/src/gles/web.rs | 28 ++++++------ examples/bunnymark/main.rs | 8 +++- 4 files changed, 82 insertions(+), 52 deletions(-) diff --git a/blade-graphics/src/gles/egl.rs b/blade-graphics/src/gles/egl.rs index f387c075..fc3fbb58 100644 --- a/blade-graphics/src/gles/egl.rs +++ b/blade-graphics/src/gles/egl.rs @@ -119,12 +119,9 @@ struct ContextInner { glow: glow::Context, } -pub struct Context { +pub struct PlatformContext { wsi: Option, inner: Mutex, - pub(super) capabilities: super::Capabilities, - pub(super) limits: super::Limits, - pub(super) device_information: crate::DeviceInformation, } pub struct ContextLock<'a> { @@ -186,7 +183,7 @@ fn init_egl(desc: &crate::ContextDesc) -> Result<(EglInstance, String), crate::N Ok((egl, client_ext_str)) } -impl Context { +impl super::Context { pub unsafe fn init(desc: crate::ContextDesc) -> Result { let (egl, client_extensions) = init_egl(&desc)?; @@ -209,17 +206,21 @@ impl Context { let egl_context = EglContext::init(&desc, egl, display)?; egl_context.make_current(); - let (glow, capabilities, device_information, limits) = egl_context.load_functions(&desc); + let (glow, capabilities, toggles, device_information, limits) = + egl_context.load_functions(&desc); egl_context.unmake_current(); Ok(Self { - wsi: None, - inner: Mutex::new(ContextInner { - egl: egl_context, - swapchain: None, - glow, - }), + platform: PlatformContext { + wsi: None, + inner: Mutex::new(ContextInner { + egl: egl_context, + swapchain: None, + glow, + }), + }, capabilities, + toggles, limits, device_information, }) @@ -324,24 +325,28 @@ impl Context { let egl_context = EglContext::init(&desc, egl, display)?; egl_context.make_current(); - let (glow, capabilities, device_information, limits) = egl_context.load_functions(&desc); + let (glow, capabilities, toggles, device_information, limits) = + egl_context.load_functions(&desc); let renderbuf = glow.create_renderbuffer().unwrap(); let framebuf = glow.create_framebuffer().unwrap(); egl_context.unmake_current(); Ok(Self { - wsi: Some(WindowSystemInterface { - library: wsi_library.map(Arc::new), - window_handle: window.window_handle().unwrap().as_raw(), - renderbuf, - framebuf, - }), - inner: Mutex::new(ContextInner { - egl: egl_context, - swapchain: None, - glow, - }), + platform: PlatformContext { + wsi: Some(WindowSystemInterface { + library: wsi_library.map(Arc::new), + window_handle: window.window_handle().unwrap().as_raw(), + renderbuf, + framebuf, + }), + inner: Mutex::new(ContextInner { + egl: egl_context, + swapchain: None, + glow, + }), + }, capabilities, + toggles, limits, device_information, }) @@ -350,7 +355,7 @@ impl Context { pub fn resize(&self, config: crate::SurfaceConfig) -> crate::SurfaceInfo { use raw_window_handle::RawWindowHandle as Rwh; - let wsi = self.wsi.as_ref().unwrap(); + let wsi = self.platform.wsi.as_ref().unwrap(); let (mut temp_xlib_handle, mut temp_xcb_handle); #[allow(trivial_casts)] let native_window_ptr = match wsi.window_handle { @@ -399,7 +404,7 @@ impl Context { log::warn!("Unable to forbid exclusive full screen"); } - let mut inner = self.inner.lock().unwrap(); + let mut inner = self.platform.inner.lock().unwrap(); let mut attributes = vec![ egl::RENDER_BUFFER, @@ -501,8 +506,8 @@ impl Context { } pub fn acquire_frame(&self) -> super::Frame { - let wsi = self.wsi.as_ref().unwrap(); - let inner = self.inner.lock().unwrap(); + let wsi = self.platform.wsi.as_ref().unwrap(); + let inner = self.platform.inner.lock().unwrap(); let sc = inner.swapchain.as_ref().unwrap(); super::Frame { texture: super::Texture { @@ -514,14 +519,14 @@ impl Context { } pub(super) fn lock(&self) -> ContextLock { - let inner = self.inner.lock().unwrap(); + let inner = self.platform.inner.lock().unwrap(); inner.egl.make_current(); ContextLock { guard: inner } } pub(super) fn present(&self) { - let inner = self.inner.lock().unwrap(); - let wsi = self.wsi.as_ref().unwrap(); + let inner = self.platform.inner.lock().unwrap(); + let wsi = self.platform.wsi.as_ref().unwrap(); inner.present(wsi); } } @@ -761,6 +766,7 @@ impl EglContext { ) -> ( glow::Context, super::Capabilities, + super::Toggles, crate::DeviceInformation, super::Limits, ) { @@ -815,11 +821,19 @@ impl EglContext { // Therefore, GL_EXT_draw_buffers_indexed is not sufficient. ); + let toggles = super::Toggles { + timing: desc.timing + && (extensions.contains("GL_EXT_disjoint_timer_query") || { + log::warn!("Timing is not supported"); + false + }), + }; + let limits = super::Limits { uniform_buffer_alignment: gl.get_parameter_i32(glow::UNIFORM_BUFFER_OFFSET_ALIGNMENT) as u32, }; - (gl, capabilities, device_information, limits) + (gl, capabilities, toggles, device_information, limits) } } diff --git a/blade-graphics/src/gles/mod.rs b/blade-graphics/src/gles/mod.rs index 40162aec..51262e05 100644 --- a/blade-graphics/src/gles/mod.rs +++ b/blade-graphics/src/gles/mod.rs @@ -5,12 +5,11 @@ mod pipeline; mod platform; mod resource; -type BindTarget = u32; - -pub use platform::Context; use std::{marker::PhantomData, ops::Range}; +type BindTarget = u32; const DEBUG_ID: u32 = 0; +const MAX_TIMEOUT: u64 = 1_000_000_000; // MAX_CLIENT_WAIT_TIMEOUT_WEBGL; bitflags::bitflags! { struct Capabilities: u32 { @@ -24,6 +23,19 @@ struct Limits { uniform_buffer_alignment: u32, } +#[derive(Debug, Default)] +struct Toggles { + timing: bool, +} + +pub struct Context { + platform: platform::PlatformContext, + capabilities: Capabilities, + toggles: Toggles, + limits: Limits, + device_information: crate::DeviceInformation, +} + #[derive(Clone, Copy, Debug, Hash, PartialEq)] pub struct Buffer { raw: glow::Buffer, @@ -483,7 +495,7 @@ impl crate::traits::CommandDevice for Context { timeout_ms as u64 * 1_000_000 }; //TODO: https://github.com/grovesNL/glow/issues/287 - let timeout_ns_i32 = timeout_ns.min(std::i32::MAX as u64) as i32; + let timeout_ns_i32 = timeout_ns.min(MAX_TIMEOUT) as i32; let status = unsafe { gl.client_wait_sync(sp.fence, glow::SYNC_FLUSH_COMMANDS_BIT, timeout_ns_i32) }; diff --git a/blade-graphics/src/gles/web.rs b/blade-graphics/src/gles/web.rs index 975d2e9f..3d913495 100644 --- a/blade-graphics/src/gles/web.rs +++ b/blade-graphics/src/gles/web.rs @@ -10,17 +10,14 @@ struct Swapchain { extent: Cell, } -pub struct Context { +pub struct PlatformContext { #[allow(unused)] webgl2: web_sys::WebGl2RenderingContext, glow: glow::Context, swapchain: Swapchain, - pub(super) capabilities: super::Capabilities, - pub(super) limits: super::Limits, - pub(super) device_information: crate::DeviceInformation, } -impl Context { +impl super::Context { pub unsafe fn init(_desc: crate::ContextDesc) -> Result { Err(crate::NotSupportedError::PlatformNotSupported) } @@ -82,10 +79,13 @@ impl Context { }; Ok(Self { - webgl2, - glow, - swapchain, + platform: PlatformContext { + webgl2, + glow, + swapchain, + }, capabilities, + toggles: super::Toggles::default(), limits, device_information, }) @@ -93,9 +93,9 @@ impl Context { pub fn resize(&self, config: crate::SurfaceConfig) -> crate::SurfaceInfo { //TODO: create WebGL context here - let sc = &self.swapchain; + let sc = &self.platform.swapchain; let format_desc = super::describe_texture_format(sc.format); - let gl = &self.glow; + let gl = &self.platform.glow; //Note: this code can be shared with EGL unsafe { gl.bind_renderbuffer(glow::RENDERBUFFER, Some(sc.renderbuf)); @@ -123,7 +123,7 @@ impl Context { } pub fn acquire_frame(&self) -> super::Frame { - let sc = &self.swapchain; + let sc = &self.platform.swapchain; let size = sc.extent.get(); super::Frame { texture: super::Texture { @@ -137,13 +137,13 @@ impl Context { /// Obtain a lock to the EGL context and get handle to the [`glow::Context`] that can be used to /// do rendering. pub(super) fn lock(&self) -> &glow::Context { - &self.glow + &self.platform.glow } pub(super) fn present(&self) { - let sc = &self.swapchain; + let sc = &self.platform.swapchain; unsafe { - super::present_blit(&self.glow, sc.framebuf, sc.extent.get()); + super::present_blit(&self.platform.glow, sc.framebuf, sc.extent.get()); } } } diff --git a/examples/bunnymark/main.rs b/examples/bunnymark/main.rs index 52b960ca..0f07d6cd 100644 --- a/examples/bunnymark/main.rs +++ b/examples/bunnymark/main.rs @@ -61,7 +61,6 @@ struct Example { impl Example { fn new(window: &winit::window::Window) -> Self { - let window_size = window.inner_size(); let context = unsafe { gpu::Context::init_windowed( window, @@ -75,6 +74,8 @@ impl Example { .unwrap() }; println!("{:?}", context.device_information()); + let window_size = window.inner_size(); + log::info!("Initial size: {:?}", window_size); let surface_info = context.resize(gpu::SurfaceConfig { size: gpu::Extent { @@ -274,6 +275,9 @@ impl Example { } fn render(&mut self) { + if self.window_size == Default::default() { + return; + } let frame = self.context.acquire_frame(); self.command_encoder.start(); @@ -354,7 +358,7 @@ fn main() { { use winit::platform::web::WindowExtWebSys as _; - std::panic::set_hook(Box::new(console_error_panic_hook::hook)); + console_error_panic_hook::set_once(); console_log::init().expect("could not initialize logger"); // On wasm, append the canvas to the document body let canvas = window.canvas().unwrap(); From 3d8081f5081630e3cac6797171c5a7f174dd3523 Mon Sep 17 00:00:00 2001 From: Dzmitry Malyshau Date: Wed, 25 Sep 2024 23:51:13 -0700 Subject: [PATCH 8/9] gles: GPU timing support --- blade-graphics/src/gles/command.rs | 74 +++++++++++++++++++++++++++--- blade-graphics/src/gles/egl.rs | 28 ++++++----- blade-graphics/src/gles/mod.rs | 50 +++++++++++++++++++- examples/particle/main.rs | 2 +- 4 files changed, 132 insertions(+), 22 deletions(-) diff --git a/blade-graphics/src/gles/command.rs b/blade-graphics/src/gles/command.rs index 25c1258b..6453c72a 100644 --- a/blade-graphics/src/gles/command.rs +++ b/blade-graphics/src/gles/command.rs @@ -81,19 +81,72 @@ impl crate::ShaderBindable for super::AccelerationStructure { } impl super::CommandEncoder { + fn begin_pass(&mut self, label: &str) { + if let Some(ref mut timing_datas) = self.timing_datas { + let td = timing_datas.first_mut().unwrap(); + let id = td.pass_names.len(); + self.commands.push(super::Command::QueryCounter { + query: td.queries[id], + }); + td.pass_names.push(label.to_string()); + } + } + pub fn start(&mut self) { self.commands.clear(); self.plain_data.clear(); self.has_present = false; } + pub(super) fn finish(&mut self, gl: &glow::Context) { + use glow::HasContext as _; + #[allow(trivial_casts)] + if let Some(ref mut timing_datas) = self.timing_datas { + { + let td = timing_datas.first_mut().unwrap(); + let id = td.pass_names.len(); + self.commands.push(super::Command::QueryCounter { + query: td.queries[id], + }); + } + + timing_datas.rotate_left(1); + self.timings.clear(); + let td = timing_datas.first_mut().unwrap(); + if !td.pass_names.is_empty() { + let mut prev = 0; + unsafe { + gl.get_query_parameter_u64_with_offset( + td.queries[0], + glow::QUERY_RESULT, + &mut prev as *mut _ as usize, + ); + } + for (pass_name, &query) in td.pass_names.drain(..).zip(td.queries[1..].iter()) { + let mut result: u64 = 0; + unsafe { + gl.get_query_parameter_u64_with_offset( + query, + glow::QUERY_RESULT, + &mut result as *mut _ as usize, + ); + } + let time = Duration::from_nanos(result - prev); + self.timings.push((pass_name, time)); + prev = result + } + } + } + } + pub fn init_texture(&mut self, _texture: super::Texture) {} pub fn present(&mut self, _frame: super::Frame) { self.has_present = true; } - pub fn transfer(&mut self, _label: &str) -> super::PassEncoder<()> { + pub fn transfer(&mut self, label: &str) -> super::PassEncoder<()> { + self.begin_pass(label); super::PassEncoder { commands: &mut self.commands, plain_data: &mut self.plain_data, @@ -108,7 +161,8 @@ impl super::CommandEncoder { unimplemented!() } - pub fn compute(&mut self, _label: &str) -> super::PassEncoder { + pub fn compute(&mut self, label: &str) -> super::PassEncoder { + self.begin_pass(label); super::PassEncoder { commands: &mut self.commands, plain_data: &mut self.plain_data, @@ -121,9 +175,11 @@ impl super::CommandEncoder { pub fn render( &mut self, - _label: &str, + label: &str, targets: crate::RenderTargetSet, ) -> super::PassEncoder { + self.begin_pass(label); + let mut target_size = [0u16; 2]; let mut invalidate_attachments = Vec::new(); for (i, rt) in targets.colors.iter().enumerate() { @@ -205,7 +261,7 @@ impl super::CommandEncoder { } pub fn timings(&self) -> &[(String, Duration)] { - &[] + &self.timings } } @@ -400,6 +456,7 @@ impl crate::traits::RenderPipelineEncoder for super::PipelineEncoder<'_> { } fn bind_vertex(&mut self, index: u32, vertex_buf: crate::BufferPiece) { + assert_eq!(index, 0); self.commands.push(super::Command::BindVertex { buffer: vertex_buf.buffer.raw, }); @@ -612,9 +669,9 @@ impl super::Command { gl.dispatch_compute_indirect(indirect_buf.offset as i32); } Self::FillBuffer { - ref dst, - size, - value, + dst: ref _dst, + size: _size, + value: _value, } => unimplemented!(), Self::CopyBufferToBuffer { ref src, @@ -988,6 +1045,9 @@ impl super::Command { gl.bind_sampler(slot, None); } } + Self::QueryCounter { query } => { + gl.query_counter(query, glow::TIMESTAMP); + } } } } diff --git a/blade-graphics/src/gles/egl.rs b/blade-graphics/src/gles/egl.rs index fc3fbb58..d7f76c42 100644 --- a/blade-graphics/src/gles/egl.rs +++ b/blade-graphics/src/gles/egl.rs @@ -775,18 +775,22 @@ impl EglContext { .get_proc_address(name) .map_or(ptr::null(), |p| p as *const _) }); - if desc.validation && gl.supports_debug() { - log::info!("Enabling GLES debug output"); - gl.enable(glow::DEBUG_OUTPUT); - gl.debug_message_callback(gl_debug_message_callback); - for &(level, severity) in LOG_LEVEL_SEVERITY.iter() { - gl.debug_message_control( - glow::DONT_CARE, - glow::DONT_CARE, - severity, - &[], - level <= log::max_level(), - ); + if desc.validation { + if gl.supports_debug() { + log::info!("Enabling GLES debug output"); + gl.enable(glow::DEBUG_OUTPUT); + gl.debug_message_callback(gl_debug_message_callback); + for &(level, severity) in LOG_LEVEL_SEVERITY.iter() { + gl.debug_message_control( + glow::DONT_CARE, + glow::DONT_CARE, + severity, + &[], + level <= log::max_level(), + ); + } + } else { + log::warn!("Can't enable validation"); } } diff --git a/blade-graphics/src/gles/mod.rs b/blade-graphics/src/gles/mod.rs index 51262e05..a3cc95da 100644 --- a/blade-graphics/src/gles/mod.rs +++ b/blade-graphics/src/gles/mod.rs @@ -5,16 +5,18 @@ mod pipeline; mod platform; mod resource; -use std::{marker::PhantomData, ops::Range}; +use std::{marker::PhantomData, ops::Range, time::Duration}; type BindTarget = u32; const DEBUG_ID: u32 = 0; const MAX_TIMEOUT: u64 = 1_000_000_000; // MAX_CLIENT_WAIT_TIMEOUT_WEBGL; +const MAX_QUERIES: usize = crate::limits::PASS_COUNT + 1; bitflags::bitflags! { struct Capabilities: u32 { const BUFFER_STORAGE = 1 << 0; const DRAW_BUFFERS_INDEXED = 1 << 1; + const DISJOINT_TIMER_QUERY = 1 << 2; } } @@ -344,6 +346,14 @@ enum Command { binding: ImageBinding, }, ResetAllSamplers, + QueryCounter { + query: glow::Query, + }, +} + +struct TimingData { + pass_names: Vec, + queries: Box<[glow::Query]>, } pub struct CommandEncoder { @@ -352,6 +362,8 @@ pub struct CommandEncoder { plain_data: Vec, has_present: bool, limits: Limits, + timing_datas: Option>, + timings: Vec<(String, Duration)>, } enum PassKind { @@ -428,22 +440,56 @@ impl crate::traits::CommandDevice for Context { type SyncPoint = SyncPoint; fn create_command_encoder(&self, desc: super::CommandEncoderDesc) -> CommandEncoder { + use glow::HasContext as _; + + let timing_datas = if self.toggles.timing { + let gl = self.lock(); + let mut array = Vec::new(); + // Allocating one extra set of timers because we are resolving them + // in submit() as opposed to start(). + for _ in 0..desc.buffer_count + 1 { + array.push(TimingData { + pass_names: Vec::new(), + queries: (0..MAX_QUERIES) + .map(|_| unsafe { gl.create_query().unwrap() }) + .collect(), + }); + } + Some(array.into_boxed_slice()) + } else { + None + }; CommandEncoder { name: desc.name.to_string(), commands: Vec::new(), plain_data: Vec::new(), has_present: false, limits: self.limits.clone(), + timing_datas, + timings: Vec::new(), } } - fn destroy_command_encoder(&self, _command_encoder: &mut CommandEncoder) {} + fn destroy_command_encoder(&self, encoder: &mut CommandEncoder) { + use glow::HasContext as _; + + if let Some(timing_datas) = encoder.timing_datas.take() { + let gl = self.lock(); + for td in timing_datas { + for query in td.queries { + unsafe { gl.delete_query(query) }; + } + } + } + } fn submit(&self, encoder: &mut CommandEncoder) -> SyncPoint { use glow::HasContext as _; let fence = { let gl = self.lock(); + encoder.finish(&gl); + let push_group = !encoder.name.is_empty() && gl.supports_debug(); let ec = unsafe { if push_group { diff --git a/examples/particle/main.rs b/examples/particle/main.rs index 3e9b9590..8ae86d8d 100644 --- a/examples/particle/main.rs +++ b/examples/particle/main.rs @@ -21,7 +21,7 @@ impl Example { gpu::ContextDesc { validation: cfg!(debug_assertions), timing: true, - capture: false, + capture: true, overlay: false, }, ) From 7ae4569368a85980384cfe06ebde97250886be49 Mon Sep 17 00:00:00 2001 From: Dzmitry Malyshau Date: Thu, 26 Sep 2024 23:08:14 -0700 Subject: [PATCH 9/9] gles: GPU scopes support --- blade-graphics/src/gles/command.rs | 88 ++++++++++++------------------ blade-graphics/src/gles/egl.rs | 5 ++ blade-graphics/src/gles/mod.rs | 14 ++++- 3 files changed, 54 insertions(+), 53 deletions(-) diff --git a/blade-graphics/src/gles/command.rs b/blade-graphics/src/gles/command.rs index 6453c72a..0e9a7097 100644 --- a/blade-graphics/src/gles/command.rs +++ b/blade-graphics/src/gles/command.rs @@ -1,4 +1,4 @@ -use std::time::Duration; +use std::{str, time::Duration}; const COLOR_ATTACHMENTS: &[u32] = &[ glow::COLOR_ATTACHMENT0, @@ -82,6 +82,13 @@ impl crate::ShaderBindable for super::AccelerationStructure { impl super::CommandEncoder { fn begin_pass(&mut self, label: &str) { + if self.needs_scopes { + let start = self.string_data.len(); + self.string_data.extend_from_slice(label.as_bytes()); + self.commands.push(super::Command::PushScope { + name_range: start..self.string_data.len(), + }); + } if let Some(ref mut timing_datas) = self.timing_datas { let td = timing_datas.first_mut().unwrap(); let id = td.pass_names.len(); @@ -92,9 +99,22 @@ impl super::CommandEncoder { } } + fn pass

(&mut self, kind: super::PassKind) -> super::PassEncoder

{ + super::PassEncoder { + commands: &mut self.commands, + plain_data: &mut self.plain_data, + kind, + invalidate_attachments: Vec::new(), + pipeline: Default::default(), + limits: &self.limits, + has_scope: self.needs_scopes, + } + } + pub fn start(&mut self) { self.commands.clear(); self.plain_data.clear(); + self.string_data.clear(); self.has_present = false; } @@ -147,14 +167,7 @@ impl super::CommandEncoder { pub fn transfer(&mut self, label: &str) -> super::PassEncoder<()> { self.begin_pass(label); - super::PassEncoder { - commands: &mut self.commands, - plain_data: &mut self.plain_data, - kind: super::PassKind::Transfer, - invalidate_attachments: Vec::new(), - pipeline: Default::default(), - limits: &self.limits, - } + self.pass(super::PassKind::Transfer) } pub fn acceleration_structure(&mut self, _label: &str) -> super::PassEncoder<()> { @@ -163,14 +176,7 @@ impl super::CommandEncoder { pub fn compute(&mut self, label: &str) -> super::PassEncoder { self.begin_pass(label); - super::PassEncoder { - commands: &mut self.commands, - plain_data: &mut self.plain_data, - kind: super::PassKind::Compute, - invalidate_attachments: Vec::new(), - pipeline: Default::default(), - limits: &self.limits, - } + self.pass(super::PassKind::Compute) } pub fn render( @@ -250,14 +256,9 @@ impl super::CommandEncoder { } } - super::PassEncoder { - commands: &mut self.commands, - plain_data: &mut self.plain_data, - kind: super::PassKind::Render, - invalidate_attachments, - pipeline: Default::default(), - limits: &self.limits, - } + let mut pass = self.pass(super::PassKind::Render); + pass.invalidate_attachments = invalidate_attachments; + pass } pub fn timings(&self) -> &[(String, Duration)] { @@ -329,6 +330,9 @@ impl Drop for super::PassEncoder<'_, T> { self.commands.push(super::Command::ResetFramebuffer); } } + if self.has_scope { + self.commands.push(super::Command::PopScope); + } } } @@ -401,33 +405,6 @@ impl crate::traits::TransferEncoder for super::PassEncoder<'_, ()> { } } -#[hidden_trait::expose] -impl crate::traits::AccelerationStructureEncoder for super::PassEncoder<'_, ()> { - type AccelerationStructure = crate::AccelerationStructure; - type AccelerationStructureMesh = crate::AccelerationStructureMesh; - type BufferPiece = crate::BufferPiece; - - fn build_bottom_level( - &mut self, - _acceleration_structure: super::AccelerationStructure, - _meshes: &[crate::AccelerationStructureMesh], - _scratch_data: crate::BufferPiece, - ) { - unimplemented!() - } - - fn build_top_level( - &mut self, - _acceleration_structure: super::AccelerationStructure, - _bottom_level: &[super::AccelerationStructure], - _instance_count: u32, - _instance_data: crate::BufferPiece, - _scratch_data: crate::BufferPiece, - ) { - unimplemented!() - } -} - #[hidden_trait::expose] impl crate::traits::PipelineEncoder for super::PipelineEncoder<'_> { fn bind(&mut self, group: u32, data: &D) { @@ -1048,6 +1025,13 @@ impl super::Command { Self::QueryCounter { query } => { gl.query_counter(query, glow::TIMESTAMP); } + Self::PushScope { ref name_range } => { + let name = str::from_utf8(&ec.string_data[name_range.clone()]).unwrap(); + gl.push_debug_group(glow::DEBUG_SOURCE_APPLICATION, super::DEBUG_ID, name); + } + Self::PopScope => { + gl.pop_debug_group(); + } } } } diff --git a/blade-graphics/src/gles/egl.rs b/blade-graphics/src/gles/egl.rs index d7f76c42..3bf90178 100644 --- a/blade-graphics/src/gles/egl.rs +++ b/blade-graphics/src/gles/egl.rs @@ -826,6 +826,11 @@ impl EglContext { ); let toggles = super::Toggles { + scoping: desc.capture + && (gl.supports_debug() || { + log::warn!("Scoping is not supported"); + false + }), timing: desc.timing && (extensions.contains("GL_EXT_disjoint_timer_query") || { log::warn!("Timing is not supported"); diff --git a/blade-graphics/src/gles/mod.rs b/blade-graphics/src/gles/mod.rs index a3cc95da..d3f6dca6 100644 --- a/blade-graphics/src/gles/mod.rs +++ b/blade-graphics/src/gles/mod.rs @@ -5,7 +5,7 @@ mod pipeline; mod platform; mod resource; -use std::{marker::PhantomData, ops::Range, time::Duration}; +use std::{marker::PhantomData, mem, ops::Range, time::Duration}; type BindTarget = u32; const DEBUG_ID: u32 = 0; @@ -27,6 +27,7 @@ struct Limits { #[derive(Debug, Default)] struct Toggles { + scoping: bool, timing: bool, } @@ -349,6 +350,10 @@ enum Command { QueryCounter { query: glow::Query, }, + PushScope { + name_range: Range, + }, + PopScope, } struct TimingData { @@ -360,6 +365,8 @@ pub struct CommandEncoder { name: String, commands: Vec, plain_data: Vec, + string_data: Vec, + needs_scopes: bool, has_present: bool, limits: Limits, timing_datas: Option>, @@ -380,6 +387,7 @@ pub struct PassEncoder<'a, P> { invalidate_attachments: Vec, pipeline: PhantomData

, limits: &'a Limits, + has_scope: bool, } pub type ComputeCommandEncoder<'a> = PassEncoder<'a, ComputePipeline>; @@ -420,6 +428,7 @@ pub struct SyncPoint { struct ExecutionContext { framebuf: glow::Framebuffer, plain_buffer: glow::Buffer, + string_data: Box<[u8]>, } impl Context { @@ -463,6 +472,8 @@ impl crate::traits::CommandDevice for Context { name: desc.name.to_string(), commands: Vec::new(), plain_data: Vec::new(), + string_data: Vec::new(), + needs_scopes: self.toggles.scoping, has_present: false, limits: self.limits.clone(), timing_datas, @@ -510,6 +521,7 @@ impl crate::traits::CommandDevice for Context { ExecutionContext { framebuf, plain_buffer, + string_data: mem::take(&mut encoder.string_data).into_boxed_slice(), } }; for command in encoder.commands.iter() {