diff --git a/blade-egui/src/lib.rs b/blade-egui/src/lib.rs
index 35c7c4f9..533a269b 100644
--- a/blade-egui/src/lib.rs
+++ b/blade-egui/src/lib.rs
@@ -265,7 +265,7 @@ impl GuiPainter {
copies.push((src, dst, extent));
}
- if let mut transfer = command_encoder.transfer() {
+ if let mut transfer = command_encoder.transfer("update egui textures") {
for (src, dst, extent) in copies {
transfer.copy_buffer_to_texture(src, 4 * extent.width, dst, extent);
}
diff --git a/blade-graphics/Cargo.toml b/blade-graphics/Cargo.toml
index 226bd3ca..f6b8fc0a 100644
--- a/blade-graphics/Cargo.toml
+++ b/blade-graphics/Cargo.toml
@@ -22,7 +22,11 @@ raw-window-handle = "0.6"
[target.'cfg(any(target_os = "ios", target_os = "macos"))'.dependencies]
block = "0.1"
core-graphics-types = "0.1"
-metal = "0.29"
+#TODO: switch to crates once https://github.com/gfx-rs/metal-rs/pull/335 is published
+#TODO: switch to upstream once these are merged:
+# - https://github.com/gfx-rs/metal-rs/pull/336
+# - https://github.com/gfx-rs/metal-rs/pull/337
+metal = { git = "https://github.com/kvark/metal-rs", branch = "blade" }
objc = "0.2.5"
naga = { workspace = true, features = ["msl-out"] }
diff --git a/blade-graphics/README.md b/blade-graphics/README.md
index 41376b20..6a1c549a 100644
--- a/blade-graphics/README.md
+++ b/blade-graphics/README.md
@@ -42,7 +42,7 @@ All of these required extensions are supported in software by the driver on any
GLES is also supported at a basic level. It's enabled for `wasm32-unknown-unknown` target, and can also be force-enabled on native:
```bash
-RUSTFLAGS="--cfg gles" CARGO_TARGET_DIR=./target-gl cargo test
+RUSTFLAGS="--cfg gles" CARGO_TARGET_DIR=./target-gl cargo run --example bunnymark
```
This path can be activated on all platforms via Angle library.
diff --git a/blade-graphics/src/gles/command.rs b/blade-graphics/src/gles/command.rs
index 0e2d6470..0e9a7097 100644
--- a/blade-graphics/src/gles/command.rs
+++ b/blade-graphics/src/gles/command.rs
@@ -1,3 +1,5 @@
+use std::{str, time::Duration};
+
const COLOR_ATTACHMENTS: &[u32] = &[
glow::COLOR_ATTACHMENT0,
glow::COLOR_ATTACHMENT1,
@@ -79,48 +81,111 @@ impl crate::ShaderBindable for super::AccelerationStructure {
}
impl super::CommandEncoder {
+ fn begin_pass(&mut self, label: &str) {
+ if self.needs_scopes {
+ let start = self.string_data.len();
+ self.string_data.extend_from_slice(label.as_bytes());
+ self.commands.push(super::Command::PushScope {
+ name_range: start..self.string_data.len(),
+ });
+ }
+ if let Some(ref mut timing_datas) = self.timing_datas {
+ let td = timing_datas.first_mut().unwrap();
+ let id = td.pass_names.len();
+ self.commands.push(super::Command::QueryCounter {
+ query: td.queries[id],
+ });
+ td.pass_names.push(label.to_string());
+ }
+ }
+
+ fn pass
(&mut self, kind: super::PassKind) -> super::PassEncoder
{
+ super::PassEncoder {
+ commands: &mut self.commands,
+ plain_data: &mut self.plain_data,
+ kind,
+ invalidate_attachments: Vec::new(),
+ pipeline: Default::default(),
+ limits: &self.limits,
+ has_scope: self.needs_scopes,
+ }
+ }
+
pub fn start(&mut self) {
self.commands.clear();
self.plain_data.clear();
+ self.string_data.clear();
self.has_present = false;
}
+ pub(super) fn finish(&mut self, gl: &glow::Context) {
+ use glow::HasContext as _;
+ #[allow(trivial_casts)]
+ if let Some(ref mut timing_datas) = self.timing_datas {
+ {
+ let td = timing_datas.first_mut().unwrap();
+ let id = td.pass_names.len();
+ self.commands.push(super::Command::QueryCounter {
+ query: td.queries[id],
+ });
+ }
+
+ timing_datas.rotate_left(1);
+ self.timings.clear();
+ let td = timing_datas.first_mut().unwrap();
+ if !td.pass_names.is_empty() {
+ let mut prev = 0;
+ unsafe {
+ gl.get_query_parameter_u64_with_offset(
+ td.queries[0],
+ glow::QUERY_RESULT,
+ &mut prev as *mut _ as usize,
+ );
+ }
+ for (pass_name, &query) in td.pass_names.drain(..).zip(td.queries[1..].iter()) {
+ let mut result: u64 = 0;
+ unsafe {
+ gl.get_query_parameter_u64_with_offset(
+ query,
+ glow::QUERY_RESULT,
+ &mut result as *mut _ as usize,
+ );
+ }
+ let time = Duration::from_nanos(result - prev);
+ self.timings.push((pass_name, time));
+ prev = result
+ }
+ }
+ }
+ }
+
pub fn init_texture(&mut self, _texture: super::Texture) {}
pub fn present(&mut self, _frame: super::Frame) {
self.has_present = true;
}
- pub fn transfer(&mut self) -> super::PassEncoder<()> {
- super::PassEncoder {
- commands: &mut self.commands,
- plain_data: &mut self.plain_data,
- kind: super::PassKind::Transfer,
- invalidate_attachments: Vec::new(),
- pipeline: Default::default(),
- limits: &self.limits,
- }
+ pub fn transfer(&mut self, label: &str) -> super::PassEncoder<()> {
+ self.begin_pass(label);
+ self.pass(super::PassKind::Transfer)
}
- pub fn acceleration_structure(&mut self) -> super::PassEncoder<()> {
+ pub fn acceleration_structure(&mut self, _label: &str) -> super::PassEncoder<()> {
unimplemented!()
}
- pub fn compute(&mut self) -> super::PassEncoder {
- super::PassEncoder {
- commands: &mut self.commands,
- plain_data: &mut self.plain_data,
- kind: super::PassKind::Compute,
- invalidate_attachments: Vec::new(),
- pipeline: Default::default(),
- limits: &self.limits,
- }
+ pub fn compute(&mut self, label: &str) -> super::PassEncoder {
+ self.begin_pass(label);
+ self.pass(super::PassKind::Compute)
}
pub fn render(
&mut self,
+ label: &str,
targets: crate::RenderTargetSet,
) -> super::PassEncoder {
+ self.begin_pass(label);
+
let mut target_size = [0u16; 2];
let mut invalidate_attachments = Vec::new();
for (i, rt) in targets.colors.iter().enumerate() {
@@ -191,14 +256,13 @@ impl super::CommandEncoder {
}
}
- super::PassEncoder {
- commands: &mut self.commands,
- plain_data: &mut self.plain_data,
- kind: super::PassKind::Render,
- invalidate_attachments,
- pipeline: Default::default(),
- limits: &self.limits,
- }
+ let mut pass = self.pass(super::PassKind::Render);
+ pass.invalidate_attachments = invalidate_attachments;
+ pass
+ }
+
+ pub fn timings(&self) -> &[(String, Duration)] {
+ &self.timings
}
}
@@ -266,6 +330,9 @@ impl Drop for super::PassEncoder<'_, T> {
self.commands.push(super::Command::ResetFramebuffer);
}
}
+ if self.has_scope {
+ self.commands.push(super::Command::PopScope);
+ }
}
}
@@ -338,33 +405,6 @@ impl crate::traits::TransferEncoder for super::PassEncoder<'_, ()> {
}
}
-#[hidden_trait::expose]
-impl crate::traits::AccelerationStructureEncoder for super::PassEncoder<'_, ()> {
- type AccelerationStructure = crate::AccelerationStructure;
- type AccelerationStructureMesh = crate::AccelerationStructureMesh;
- type BufferPiece = crate::BufferPiece;
-
- fn build_bottom_level(
- &mut self,
- _acceleration_structure: super::AccelerationStructure,
- _meshes: &[crate::AccelerationStructureMesh],
- _scratch_data: crate::BufferPiece,
- ) {
- unimplemented!()
- }
-
- fn build_top_level(
- &mut self,
- _acceleration_structure: super::AccelerationStructure,
- _bottom_level: &[super::AccelerationStructure],
- _instance_count: u32,
- _instance_data: crate::BufferPiece,
- _scratch_data: crate::BufferPiece,
- ) {
- unimplemented!()
- }
-}
-
#[hidden_trait::expose]
impl crate::traits::PipelineEncoder for super::PipelineEncoder<'_> {
fn bind(&mut self, group: u32, data: &D) {
@@ -393,6 +433,7 @@ impl crate::traits::RenderPipelineEncoder for super::PipelineEncoder<'_> {
}
fn bind_vertex(&mut self, index: u32, vertex_buf: crate::BufferPiece) {
+ assert_eq!(index, 0);
self.commands.push(super::Command::BindVertex {
buffer: vertex_buf.buffer.raw,
});
@@ -605,9 +646,9 @@ impl super::Command {
gl.dispatch_compute_indirect(indirect_buf.offset as i32);
}
Self::FillBuffer {
- ref dst,
- size,
- value,
+ dst: ref _dst,
+ size: _size,
+ value: _value,
} => unimplemented!(),
Self::CopyBufferToBuffer {
ref src,
@@ -981,6 +1022,16 @@ impl super::Command {
gl.bind_sampler(slot, None);
}
}
+ Self::QueryCounter { query } => {
+ gl.query_counter(query, glow::TIMESTAMP);
+ }
+ Self::PushScope { ref name_range } => {
+ let name = str::from_utf8(&ec.string_data[name_range.clone()]).unwrap();
+ gl.push_debug_group(glow::DEBUG_SOURCE_APPLICATION, super::DEBUG_ID, name);
+ }
+ Self::PopScope => {
+ gl.pop_debug_group();
+ }
}
}
}
diff --git a/blade-graphics/src/gles/egl.rs b/blade-graphics/src/gles/egl.rs
index f387c075..3bf90178 100644
--- a/blade-graphics/src/gles/egl.rs
+++ b/blade-graphics/src/gles/egl.rs
@@ -119,12 +119,9 @@ struct ContextInner {
glow: glow::Context,
}
-pub struct Context {
+pub struct PlatformContext {
wsi: Option,
inner: Mutex,
- pub(super) capabilities: super::Capabilities,
- pub(super) limits: super::Limits,
- pub(super) device_information: crate::DeviceInformation,
}
pub struct ContextLock<'a> {
@@ -186,7 +183,7 @@ fn init_egl(desc: &crate::ContextDesc) -> Result<(EglInstance, String), crate::N
Ok((egl, client_ext_str))
}
-impl Context {
+impl super::Context {
pub unsafe fn init(desc: crate::ContextDesc) -> Result {
let (egl, client_extensions) = init_egl(&desc)?;
@@ -209,17 +206,21 @@ impl Context {
let egl_context = EglContext::init(&desc, egl, display)?;
egl_context.make_current();
- let (glow, capabilities, device_information, limits) = egl_context.load_functions(&desc);
+ let (glow, capabilities, toggles, device_information, limits) =
+ egl_context.load_functions(&desc);
egl_context.unmake_current();
Ok(Self {
- wsi: None,
- inner: Mutex::new(ContextInner {
- egl: egl_context,
- swapchain: None,
- glow,
- }),
+ platform: PlatformContext {
+ wsi: None,
+ inner: Mutex::new(ContextInner {
+ egl: egl_context,
+ swapchain: None,
+ glow,
+ }),
+ },
capabilities,
+ toggles,
limits,
device_information,
})
@@ -324,24 +325,28 @@ impl Context {
let egl_context = EglContext::init(&desc, egl, display)?;
egl_context.make_current();
- let (glow, capabilities, device_information, limits) = egl_context.load_functions(&desc);
+ let (glow, capabilities, toggles, device_information, limits) =
+ egl_context.load_functions(&desc);
let renderbuf = glow.create_renderbuffer().unwrap();
let framebuf = glow.create_framebuffer().unwrap();
egl_context.unmake_current();
Ok(Self {
- wsi: Some(WindowSystemInterface {
- library: wsi_library.map(Arc::new),
- window_handle: window.window_handle().unwrap().as_raw(),
- renderbuf,
- framebuf,
- }),
- inner: Mutex::new(ContextInner {
- egl: egl_context,
- swapchain: None,
- glow,
- }),
+ platform: PlatformContext {
+ wsi: Some(WindowSystemInterface {
+ library: wsi_library.map(Arc::new),
+ window_handle: window.window_handle().unwrap().as_raw(),
+ renderbuf,
+ framebuf,
+ }),
+ inner: Mutex::new(ContextInner {
+ egl: egl_context,
+ swapchain: None,
+ glow,
+ }),
+ },
capabilities,
+ toggles,
limits,
device_information,
})
@@ -350,7 +355,7 @@ impl Context {
pub fn resize(&self, config: crate::SurfaceConfig) -> crate::SurfaceInfo {
use raw_window_handle::RawWindowHandle as Rwh;
- let wsi = self.wsi.as_ref().unwrap();
+ let wsi = self.platform.wsi.as_ref().unwrap();
let (mut temp_xlib_handle, mut temp_xcb_handle);
#[allow(trivial_casts)]
let native_window_ptr = match wsi.window_handle {
@@ -399,7 +404,7 @@ impl Context {
log::warn!("Unable to forbid exclusive full screen");
}
- let mut inner = self.inner.lock().unwrap();
+ let mut inner = self.platform.inner.lock().unwrap();
let mut attributes = vec![
egl::RENDER_BUFFER,
@@ -501,8 +506,8 @@ impl Context {
}
pub fn acquire_frame(&self) -> super::Frame {
- let wsi = self.wsi.as_ref().unwrap();
- let inner = self.inner.lock().unwrap();
+ let wsi = self.platform.wsi.as_ref().unwrap();
+ let inner = self.platform.inner.lock().unwrap();
let sc = inner.swapchain.as_ref().unwrap();
super::Frame {
texture: super::Texture {
@@ -514,14 +519,14 @@ impl Context {
}
pub(super) fn lock(&self) -> ContextLock {
- let inner = self.inner.lock().unwrap();
+ let inner = self.platform.inner.lock().unwrap();
inner.egl.make_current();
ContextLock { guard: inner }
}
pub(super) fn present(&self) {
- let inner = self.inner.lock().unwrap();
- let wsi = self.wsi.as_ref().unwrap();
+ let inner = self.platform.inner.lock().unwrap();
+ let wsi = self.platform.wsi.as_ref().unwrap();
inner.present(wsi);
}
}
@@ -761,6 +766,7 @@ impl EglContext {
) -> (
glow::Context,
super::Capabilities,
+ super::Toggles,
crate::DeviceInformation,
super::Limits,
) {
@@ -769,18 +775,22 @@ impl EglContext {
.get_proc_address(name)
.map_or(ptr::null(), |p| p as *const _)
});
- if desc.validation && gl.supports_debug() {
- log::info!("Enabling GLES debug output");
- gl.enable(glow::DEBUG_OUTPUT);
- gl.debug_message_callback(gl_debug_message_callback);
- for &(level, severity) in LOG_LEVEL_SEVERITY.iter() {
- gl.debug_message_control(
- glow::DONT_CARE,
- glow::DONT_CARE,
- severity,
- &[],
- level <= log::max_level(),
- );
+ if desc.validation {
+ if gl.supports_debug() {
+ log::info!("Enabling GLES debug output");
+ gl.enable(glow::DEBUG_OUTPUT);
+ gl.debug_message_callback(gl_debug_message_callback);
+ for &(level, severity) in LOG_LEVEL_SEVERITY.iter() {
+ gl.debug_message_control(
+ glow::DONT_CARE,
+ glow::DONT_CARE,
+ severity,
+ &[],
+ level <= log::max_level(),
+ );
+ }
+ } else {
+ log::warn!("Can't enable validation");
}
}
@@ -815,11 +825,24 @@ impl EglContext {
// Therefore, GL_EXT_draw_buffers_indexed is not sufficient.
);
+ let toggles = super::Toggles {
+ scoping: desc.capture
+ && (gl.supports_debug() || {
+ log::warn!("Scoping is not supported");
+ false
+ }),
+ timing: desc.timing
+ && (extensions.contains("GL_EXT_disjoint_timer_query") || {
+ log::warn!("Timing is not supported");
+ false
+ }),
+ };
+
let limits = super::Limits {
uniform_buffer_alignment: gl.get_parameter_i32(glow::UNIFORM_BUFFER_OFFSET_ALIGNMENT)
as u32,
};
- (gl, capabilities, device_information, limits)
+ (gl, capabilities, toggles, device_information, limits)
}
}
diff --git a/blade-graphics/src/gles/mod.rs b/blade-graphics/src/gles/mod.rs
index 40162aec..d3f6dca6 100644
--- a/blade-graphics/src/gles/mod.rs
+++ b/blade-graphics/src/gles/mod.rs
@@ -5,17 +5,18 @@ mod pipeline;
mod platform;
mod resource;
-type BindTarget = u32;
-
-pub use platform::Context;
-use std::{marker::PhantomData, ops::Range};
+use std::{marker::PhantomData, mem, ops::Range, time::Duration};
+type BindTarget = u32;
const DEBUG_ID: u32 = 0;
+const MAX_TIMEOUT: u64 = 1_000_000_000; // MAX_CLIENT_WAIT_TIMEOUT_WEBGL;
+const MAX_QUERIES: usize = crate::limits::PASS_COUNT + 1;
bitflags::bitflags! {
struct Capabilities: u32 {
const BUFFER_STORAGE = 1 << 0;
const DRAW_BUFFERS_INDEXED = 1 << 1;
+ const DISJOINT_TIMER_QUERY = 1 << 2;
}
}
@@ -24,6 +25,20 @@ struct Limits {
uniform_buffer_alignment: u32,
}
+#[derive(Debug, Default)]
+struct Toggles {
+ scoping: bool,
+ timing: bool,
+}
+
+pub struct Context {
+ platform: platform::PlatformContext,
+ capabilities: Capabilities,
+ toggles: Toggles,
+ limits: Limits,
+ device_information: crate::DeviceInformation,
+}
+
#[derive(Clone, Copy, Debug, Hash, PartialEq)]
pub struct Buffer {
raw: glow::Buffer,
@@ -332,14 +347,30 @@ enum Command {
binding: ImageBinding,
},
ResetAllSamplers,
+ QueryCounter {
+ query: glow::Query,
+ },
+ PushScope {
+ name_range: Range,
+ },
+ PopScope,
+}
+
+struct TimingData {
+ pass_names: Vec,
+ queries: Box<[glow::Query]>,
}
pub struct CommandEncoder {
name: String,
commands: Vec,
plain_data: Vec,
+ string_data: Vec,
+ needs_scopes: bool,
has_present: bool,
limits: Limits,
+ timing_datas: Option>,
+ timings: Vec<(String, Duration)>,
}
enum PassKind {
@@ -356,6 +387,7 @@ pub struct PassEncoder<'a, P> {
invalidate_attachments: Vec,
pipeline: PhantomData,
limits: &'a Limits,
+ has_scope: bool,
}
pub type ComputeCommandEncoder<'a> = PassEncoder<'a, ComputePipeline>;
@@ -396,6 +428,7 @@ pub struct SyncPoint {
struct ExecutionContext {
framebuf: glow::Framebuffer,
plain_buffer: glow::Buffer,
+ string_data: Box<[u8]>,
}
impl Context {
@@ -416,22 +449,58 @@ impl crate::traits::CommandDevice for Context {
type SyncPoint = SyncPoint;
fn create_command_encoder(&self, desc: super::CommandEncoderDesc) -> CommandEncoder {
+ use glow::HasContext as _;
+
+ let timing_datas = if self.toggles.timing {
+ let gl = self.lock();
+ let mut array = Vec::new();
+ // Allocating one extra set of timers because we are resolving them
+ // in submit() as opposed to start().
+ for _ in 0..desc.buffer_count + 1 {
+ array.push(TimingData {
+ pass_names: Vec::new(),
+ queries: (0..MAX_QUERIES)
+ .map(|_| unsafe { gl.create_query().unwrap() })
+ .collect(),
+ });
+ }
+ Some(array.into_boxed_slice())
+ } else {
+ None
+ };
CommandEncoder {
name: desc.name.to_string(),
commands: Vec::new(),
plain_data: Vec::new(),
+ string_data: Vec::new(),
+ needs_scopes: self.toggles.scoping,
has_present: false,
limits: self.limits.clone(),
+ timing_datas,
+ timings: Vec::new(),
}
}
- fn destroy_command_encoder(&self, _command_encoder: &mut CommandEncoder) {}
+ fn destroy_command_encoder(&self, encoder: &mut CommandEncoder) {
+ use glow::HasContext as _;
+
+ if let Some(timing_datas) = encoder.timing_datas.take() {
+ let gl = self.lock();
+ for td in timing_datas {
+ for query in td.queries {
+ unsafe { gl.delete_query(query) };
+ }
+ }
+ }
+ }
fn submit(&self, encoder: &mut CommandEncoder) -> SyncPoint {
use glow::HasContext as _;
let fence = {
let gl = self.lock();
+ encoder.finish(&gl);
+
let push_group = !encoder.name.is_empty() && gl.supports_debug();
let ec = unsafe {
if push_group {
@@ -452,6 +521,7 @@ impl crate::traits::CommandDevice for Context {
ExecutionContext {
framebuf,
plain_buffer,
+ string_data: mem::take(&mut encoder.string_data).into_boxed_slice(),
}
};
for command in encoder.commands.iter() {
@@ -483,7 +553,7 @@ impl crate::traits::CommandDevice for Context {
timeout_ms as u64 * 1_000_000
};
//TODO: https://github.com/grovesNL/glow/issues/287
- let timeout_ns_i32 = timeout_ns.min(std::i32::MAX as u64) as i32;
+ let timeout_ns_i32 = timeout_ns.min(MAX_TIMEOUT) as i32;
let status =
unsafe { gl.client_wait_sync(sp.fence, glow::SYNC_FLUSH_COMMANDS_BIT, timeout_ns_i32) };
diff --git a/blade-graphics/src/gles/web.rs b/blade-graphics/src/gles/web.rs
index 975d2e9f..3d913495 100644
--- a/blade-graphics/src/gles/web.rs
+++ b/blade-graphics/src/gles/web.rs
@@ -10,17 +10,14 @@ struct Swapchain {
extent: Cell,
}
-pub struct Context {
+pub struct PlatformContext {
#[allow(unused)]
webgl2: web_sys::WebGl2RenderingContext,
glow: glow::Context,
swapchain: Swapchain,
- pub(super) capabilities: super::Capabilities,
- pub(super) limits: super::Limits,
- pub(super) device_information: crate::DeviceInformation,
}
-impl Context {
+impl super::Context {
pub unsafe fn init(_desc: crate::ContextDesc) -> Result {
Err(crate::NotSupportedError::PlatformNotSupported)
}
@@ -82,10 +79,13 @@ impl Context {
};
Ok(Self {
- webgl2,
- glow,
- swapchain,
+ platform: PlatformContext {
+ webgl2,
+ glow,
+ swapchain,
+ },
capabilities,
+ toggles: super::Toggles::default(),
limits,
device_information,
})
@@ -93,9 +93,9 @@ impl Context {
pub fn resize(&self, config: crate::SurfaceConfig) -> crate::SurfaceInfo {
//TODO: create WebGL context here
- let sc = &self.swapchain;
+ let sc = &self.platform.swapchain;
let format_desc = super::describe_texture_format(sc.format);
- let gl = &self.glow;
+ let gl = &self.platform.glow;
//Note: this code can be shared with EGL
unsafe {
gl.bind_renderbuffer(glow::RENDERBUFFER, Some(sc.renderbuf));
@@ -123,7 +123,7 @@ impl Context {
}
pub fn acquire_frame(&self) -> super::Frame {
- let sc = &self.swapchain;
+ let sc = &self.platform.swapchain;
let size = sc.extent.get();
super::Frame {
texture: super::Texture {
@@ -137,13 +137,13 @@ impl Context {
/// Obtain a lock to the EGL context and get handle to the [`glow::Context`] that can be used to
/// do rendering.
pub(super) fn lock(&self) -> &glow::Context {
- &self.glow
+ &self.platform.glow
}
pub(super) fn present(&self) {
- let sc = &self.swapchain;
+ let sc = &self.platform.swapchain;
unsafe {
- super::present_blit(&self.glow, sc.framebuf, sc.extent.get());
+ super::present_blit(&self.platform.glow, sc.framebuf, sc.extent.get());
}
}
}
diff --git a/blade-graphics/src/lib.rs b/blade-graphics/src/lib.rs
index 02e973f2..d354aca1 100644
--- a/blade-graphics/src/lib.rs
+++ b/blade-graphics/src/lib.rs
@@ -72,9 +72,15 @@ mod shader;
mod traits;
pub mod util;
pub mod limits {
+ /// Max number of passes inside a command encoder.
+ pub const PASS_COUNT: usize = 100;
+ /// Max plain data size for a pipeline.
pub const PLAIN_DATA_SIZE: u32 = 256;
+ /// Max number of resources in a bind group.
pub const RESOURCES_IN_GROUP: u32 = 8;
+ /// Min storage buffer alignment.
pub const STORAGE_BUFFER_ALIGNMENT: u64 = 256;
+ /// Min acceleration structure scratch buffer alignment.
pub const ACCELERATION_STRUCTURE_SCRATCH_ALIGNMENT: u64 = 256;
}
@@ -87,6 +93,8 @@ pub struct ContextDesc {
/// Enable validation of the GAPI, shaders,
/// and insert crash markers into command buffers.
pub validation: bool,
+ /// Enable GPU timing of all passes.
+ pub timing: bool,
/// Enable capture support with GAPI tools.
pub capture: bool,
/// Enable GAPI overlay.
diff --git a/blade-graphics/src/metal/command.rs b/blade-graphics/src/metal/command.rs
index c9fa1cf7..b2d0f279 100644
--- a/blade-graphics/src/metal/command.rs
+++ b/blade-graphics/src/metal/command.rs
@@ -1,4 +1,4 @@
-use std::{marker::PhantomData, mem};
+use std::{marker::PhantomData, mem, time::Duration};
impl crate::ShaderBindable for T {
fn bind_to(&self, ctx: &mut super::PipelineContext, index: u32) {
@@ -88,8 +88,50 @@ impl crate::ShaderBindable for crate::AccelerationStructure {
}
}
+impl super::TimingData {
+ fn add(&mut self, label: &str) -> u64 {
+ let counter_index = self.pass_names.len() as u64 * 2;
+ self.pass_names.push(label.to_string());
+ counter_index
+ }
+}
+
impl super::CommandEncoder {
+ fn begin_pass(&mut self, label: &str) {
+ if self.enable_debug_groups {
+ //HACK: close the previous group
+ if self.has_open_debug_group {
+ self.raw.as_mut().unwrap().pop_debug_group();
+ } else {
+ self.has_open_debug_group = true;
+ }
+ self.raw.as_mut().unwrap().push_debug_group(label);
+ }
+ }
+
+ pub(super) fn finish(&mut self) -> metal::CommandBuffer {
+ if self.has_open_debug_group {
+ self.raw.as_mut().unwrap().pop_debug_group();
+ }
+ self.raw.take().unwrap()
+ }
+
pub fn start(&mut self) {
+ if let Some(ref mut td_array) = self.timing_datas {
+ self.timings.clear();
+ td_array.rotate_left(1);
+ let td = td_array.first_mut().unwrap();
+ if !td.pass_names.is_empty() {
+ let counters = td
+ .sample_buffer
+ .resolve_counter_range(metal::NSRange::new(0, td.pass_names.len() as u64 * 2));
+ for (name, chunk) in td.pass_names.drain(..).zip(counters.chunks(2)) {
+ let duration = Duration::from_nanos(chunk[1] - chunk[0]);
+ self.timings.push((name, duration));
+ }
+ }
+ }
+
let queue = self.queue.lock().unwrap();
self.raw = Some(objc::rc::autoreleasepool(|| {
let cmd_buf = queue.new_command_buffer_with_unretained_references();
@@ -98,6 +140,7 @@ impl super::CommandEncoder {
}
cmd_buf.to_owned()
}));
+ self.has_open_debug_group = false;
}
pub fn init_texture(&mut self, _texture: super::Texture) {}
@@ -106,12 +149,24 @@ impl super::CommandEncoder {
self.raw.as_mut().unwrap().present_drawable(&frame.drawable);
}
- pub fn transfer(&mut self) -> super::TransferCommandEncoder {
+ pub fn transfer(&mut self, label: &str) -> super::TransferCommandEncoder {
+ self.begin_pass(label);
let raw = objc::rc::autoreleasepool(|| {
+ let descriptor = metal::BlitPassDescriptor::new();
+
+ if let Some(ref mut td_array) = self.timing_datas {
+ let td = td_array.first_mut().unwrap();
+ let counter_index = td.add(label);
+ let sba = descriptor.sample_buffer_attachments().object_at(0).unwrap();
+ sba.set_sample_buffer(&td.sample_buffer);
+ sba.set_start_of_encoder_sample_index(counter_index);
+ sba.set_end_of_encoder_sample_index(counter_index + 1);
+ }
+
self.raw
.as_mut()
.unwrap()
- .new_blit_command_encoder()
+ .blit_command_encoder_with_descriptor(&descriptor)
.to_owned()
});
super::TransferCommandEncoder {
@@ -120,8 +175,22 @@ impl super::CommandEncoder {
}
}
- pub fn acceleration_structure(&mut self) -> super::AccelerationStructureCommandEncoder {
+ pub fn acceleration_structure(
+ &mut self,
+ label: &str,
+ ) -> super::AccelerationStructureCommandEncoder {
let raw = objc::rc::autoreleasepool(|| {
+ let descriptor = metal::AccelerationStructurePassDescriptor::new();
+
+ if let Some(ref mut td_array) = self.timing_datas {
+ let td = td_array.first_mut().unwrap();
+ let counter_index = td.add(label);
+ let sba = descriptor.sample_buffer_attachments().object_at(0).unwrap();
+ sba.set_sample_buffer(&td.sample_buffer);
+ sba.set_start_of_encoder_sample_index(counter_index);
+ sba.set_end_of_encoder_sample_index(counter_index + 1);
+ }
+
self.raw
.as_mut()
.unwrap()
@@ -134,12 +203,26 @@ impl super::CommandEncoder {
}
}
- pub fn compute(&mut self) -> super::ComputeCommandEncoder {
+ pub fn compute(&mut self, label: &str) -> super::ComputeCommandEncoder {
let raw = objc::rc::autoreleasepool(|| {
+ let descriptor = metal::ComputePassDescriptor::new();
+ if self.enable_dispatch_type {
+ descriptor.set_dispatch_type(metal::MTLDispatchType::Concurrent);
+ }
+
+ if let Some(ref mut td_array) = self.timing_datas {
+ let td = td_array.first_mut().unwrap();
+ let counter_index = td.add(label);
+ let sba = descriptor.sample_buffer_attachments().object_at(0).unwrap();
+ sba.set_sample_buffer(&td.sample_buffer);
+ sba.set_start_of_encoder_sample_index(counter_index);
+ sba.set_end_of_encoder_sample_index(counter_index + 1);
+ }
+
self.raw
.as_mut()
.unwrap()
- .new_compute_command_encoder()
+ .compute_command_encoder_with_descriptor(&descriptor)
.to_owned()
});
super::ComputeCommandEncoder {
@@ -148,7 +231,11 @@ impl super::CommandEncoder {
}
}
- pub fn render(&mut self, targets: crate::RenderTargetSet) -> super::RenderCommandEncoder {
+ pub fn render(
+ &mut self,
+ label: &str,
+ targets: crate::RenderTargetSet,
+ ) -> super::RenderCommandEncoder {
let raw = objc::rc::autoreleasepool(|| {
let descriptor = metal::RenderPassDescriptor::new();
@@ -205,6 +292,15 @@ impl super::CommandEncoder {
at_descriptor.set_store_action(store_action);
}
+ if let Some(ref mut td_array) = self.timing_datas {
+ let td = td_array.first_mut().unwrap();
+ let counter_index = td.add(label);
+ let sba = descriptor.sample_buffer_attachments().object_at(0).unwrap();
+ sba.set_sample_buffer(&td.sample_buffer);
+ sba.set_start_of_vertex_sample_index(counter_index);
+ sba.set_end_of_fragment_sample_index(counter_index + 1);
+ }
+
self.raw
.as_mut()
.unwrap()
@@ -217,6 +313,10 @@ impl super::CommandEncoder {
phantom: PhantomData,
}
}
+
+ pub fn timings(&self) -> &[(String, Duration)] {
+ &self.timings
+ }
}
#[hidden_trait::expose]
diff --git a/blade-graphics/src/metal/mod.rs b/blade-graphics/src/metal/mod.rs
index 727f2351..b508f01f 100644
--- a/blade-graphics/src/metal/mod.rs
+++ b/blade-graphics/src/metal/mod.rs
@@ -12,6 +12,8 @@ mod pipeline;
mod resource;
mod surface;
+const MAX_TIMESTAMPS: u64 = crate::limits::PASS_COUNT as u64 * 2;
+
struct Surface {
view: *mut objc::runtime::Object,
render_layer: metal::MetalLayer,
@@ -39,8 +41,12 @@ impl Frame {
}
}
-struct DeviceInfo {
+#[derive(Debug, Clone)]
+struct PrivateInfo {
language_version: metal::MTLLanguageVersion,
+ enable_debug_groups: bool,
+ enable_dispatch_type: bool,
+ timestamp_counter_set: Option,
}
pub struct Context {
@@ -48,7 +54,7 @@ pub struct Context {
queue: Arc>,
surface: Option>,
capture: Option,
- info: DeviceInfo,
+ info: PrivateInfo,
device_information: crate::DeviceInformation,
}
@@ -178,11 +184,22 @@ pub struct SyncPoint {
cmd_buf: metal::CommandBuffer,
}
+#[derive(Debug)]
+struct TimingData {
+ pass_names: Vec,
+ sample_buffer: metal::CounterSampleBuffer,
+}
+
#[derive(Debug)]
pub struct CommandEncoder {
raw: Option,
name: String,
queue: Arc>,
+ enable_debug_groups: bool,
+ enable_dispatch_type: bool,
+ has_open_debug_group: bool,
+ timing_datas: Option>,
+ timings: Vec<(String, time::Duration)>,
}
#[derive(Debug)]
@@ -403,7 +420,8 @@ impl Context {
.ok_or(super::NotSupportedError::NoSupportedDeviceFound)?;
let queue = device.new_command_queue();
- let capture = if desc.capture {
+ let auto_capture_everything = false;
+ let capture = if desc.capture && auto_capture_everything {
objc::rc::autoreleasepool(|| {
let capture_manager = metal::CaptureManager::shared();
let default_capture_scope = capture_manager.new_capture_scope_with_device(&device);
@@ -422,14 +440,34 @@ impl Context {
driver_info: "".to_string(),
};
+ let mut timestamp_counter_set = None;
+ if desc.timing {
+ for counter_set in device.counter_sets() {
+ if counter_set.name() == "timestamp" {
+ timestamp_counter_set = Some(counter_set);
+ }
+ }
+ if timestamp_counter_set.is_none() {
+ log::warn!("Timing counters are not supported by the device");
+ } else if !device
+ .supports_counter_sampling(metal::MTLCounterSamplingPoint::AtStageBoundary)
+ {
+ log::warn!("Timing counters do not support stage boundary");
+ timestamp_counter_set = None;
+ }
+ }
+
Ok(Context {
device: Mutex::new(device),
queue: Arc::new(Mutex::new(queue)),
surface: None,
capture,
- info: DeviceInfo {
+ info: PrivateInfo {
//TODO: determine based on OS version
language_version: metal::MTLLanguageVersion::V2_4,
+ enable_debug_groups: desc.capture,
+ enable_dispatch_type: true,
+ timestamp_counter_set,
},
device_information,
})
@@ -499,17 +537,45 @@ impl crate::traits::CommandDevice for Context {
type SyncPoint = SyncPoint;
fn create_command_encoder(&self, desc: super::CommandEncoderDesc) -> CommandEncoder {
+ let timing_datas = if let Some(ref counter_set) = self.info.timestamp_counter_set {
+ let mut array = Vec::with_capacity(desc.buffer_count as usize);
+ let csb_desc = metal::CounterSampleBufferDescriptor::new();
+ csb_desc.set_counter_set(counter_set);
+ csb_desc.set_storage_mode(metal::MTLStorageMode::Shared);
+ csb_desc.set_sample_count(MAX_TIMESTAMPS);
+ for i in 0..desc.buffer_count {
+ csb_desc.set_label(&format!("{}/counter{}", desc.name, i));
+ let sample_buffer = self
+ .device
+ .lock()
+ .unwrap()
+ .new_counter_sample_buffer_with_descriptor(&csb_desc)
+ .unwrap();
+ array.push(TimingData {
+ sample_buffer,
+ pass_names: Vec::new(),
+ });
+ }
+ Some(array.into_boxed_slice())
+ } else {
+ None
+ };
CommandEncoder {
raw: None,
name: desc.name.to_string(),
queue: Arc::clone(&self.queue),
+ enable_debug_groups: self.info.enable_debug_groups,
+ enable_dispatch_type: self.info.enable_dispatch_type,
+ has_open_debug_group: false,
+ timing_datas,
+ timings: Vec::new(),
}
}
fn destroy_command_encoder(&self, _command_encoder: &mut CommandEncoder) {}
fn submit(&self, encoder: &mut CommandEncoder) -> SyncPoint {
- let cmd_buf = encoder.raw.take().unwrap();
+ let cmd_buf = encoder.finish();
cmd_buf.commit();
SyncPoint { cmd_buf }
}
diff --git a/blade-graphics/src/vulkan/command.rs b/blade-graphics/src/vulkan/command.rs
index 1206d9b2..9de98774 100644
--- a/blade-graphics/src/vulkan/command.rs
+++ b/blade-graphics/src/vulkan/command.rs
@@ -1,5 +1,5 @@
use ash::vk;
-use std::str;
+use std::{str, time::Duration};
impl super::CrashHandler {
fn add_marker(&mut self, marker: &str) -> u32 {
@@ -197,8 +197,16 @@ fn map_render_target(rt: &crate::RenderTarget) -> vk::RenderingAttachmentInfo<'s
vk_info
}
+fn end_pass(device: &super::Device, cmd_buf: vk::CommandBuffer) {
+ if device.command_scope.is_some() {
+ unsafe {
+ device.debug_utils.cmd_end_debug_utils_label(cmd_buf);
+ }
+ }
+}
+
impl super::CommandEncoder {
- pub fn mark(&mut self, marker: &str) {
+ fn add_marker(&mut self, marker: &str) {
if let Some(ref mut ch) = self.crash_handler {
let id = ch.add_marker(marker);
unsafe {
@@ -217,10 +225,52 @@ impl super::CommandEncoder {
}
}
+ fn add_timestamp(&mut self, label: &str) {
+ if let Some(_) = self.device.timing {
+ let cmd_buf = self.buffers.first_mut().unwrap();
+ if cmd_buf.timed_pass_names.len() == crate::limits::PASS_COUNT {
+ log::warn!("Reached the maximum for `limits::PASS_COUNT`, skipping the timer");
+ return;
+ }
+ let index = cmd_buf.timed_pass_names.len() as u32;
+ unsafe {
+ self.device.core.cmd_write_timestamp(
+ cmd_buf.raw,
+ vk::PipelineStageFlags::TOP_OF_PIPE,
+ cmd_buf.query_pool,
+ index,
+ );
+ }
+ cmd_buf.timed_pass_names.push(label.to_string());
+ }
+ }
+
+ fn begin_pass(&mut self, label: &str) {
+ self.barrier();
+ self.add_marker(label);
+ self.add_timestamp(label);
+
+ if let Some(_) = self.device.command_scope {
+ self.temp_label.clear();
+ self.temp_label.extend_from_slice(label.as_bytes());
+ self.temp_label.push(0);
+ unsafe {
+ self.device.debug_utils.cmd_begin_debug_utils_label(
+ self.buffers[0].raw,
+ &vk::DebugUtilsLabelEXT {
+ p_label_name: self.temp_label.as_ptr() as *const _,
+ ..Default::default()
+ },
+ )
+ }
+ }
+ }
+
pub fn start(&mut self) {
self.buffers.rotate_left(1);
+ let cmd_buf = self.buffers.first_mut().unwrap();
self.device
- .reset_descriptor_pool(&mut self.buffers[0].descriptor_pool);
+ .reset_descriptor_pool(&mut cmd_buf.descriptor_pool);
let vk_info = vk::CommandBufferBeginInfo {
flags: vk::CommandBufferUsageFlags::ONE_TIME_SUBMIT,
@@ -229,17 +279,64 @@ impl super::CommandEncoder {
unsafe {
self.device
.core
- .begin_command_buffer(self.buffers[0].raw, &vk_info)
+ .begin_command_buffer(cmd_buf.raw, &vk_info)
.unwrap();
}
+
+ if let Some(ref timing) = self.device.timing {
+ self.timings.clear();
+ if !cmd_buf.timed_pass_names.is_empty() {
+ let mut timestamps = [0u64; super::QUERY_POOL_SIZE];
+ unsafe {
+ self.device
+ .core
+ .get_query_pool_results(
+ cmd_buf.query_pool,
+ 0,
+ &mut timestamps[..cmd_buf.timed_pass_names.len() + 1],
+ vk::QueryResultFlags::TYPE_64,
+ )
+ .unwrap();
+ }
+ let mut prev = timestamps[0];
+ for (name, &ts) in cmd_buf
+ .timed_pass_names
+ .drain(..)
+ .zip(timestamps[1..].iter())
+ {
+ let diff = (ts - prev) as f32 * timing.period;
+ prev = ts;
+ self.timings.push((name, Duration::from_nanos(diff as _)));
+ }
+ }
+ unsafe {
+ self.device.core.cmd_reset_query_pool(
+ cmd_buf.raw,
+ cmd_buf.query_pool,
+ 0,
+ super::QUERY_POOL_SIZE as u32,
+ );
+ }
+ }
}
pub(super) fn finish(&mut self) -> vk::CommandBuffer {
self.barrier();
- self.mark("finish");
- let raw = self.buffers[0].raw;
- unsafe { self.device.core.end_command_buffer(raw).unwrap() }
- raw
+ self.add_marker("finish");
+ let cmd_buf = self.buffers.first_mut().unwrap();
+ unsafe {
+ if self.device.timing.is_some() {
+ let index = cmd_buf.timed_pass_names.len() as u32;
+ self.device.core.cmd_write_timestamp(
+ cmd_buf.raw,
+ vk::PipelineStageFlags::TOP_OF_PIPE,
+ cmd_buf.query_pool,
+ index,
+ );
+ }
+ self.device.core.end_command_buffer(cmd_buf.raw).unwrap();
+ }
+ cmd_buf.raw
}
fn barrier(&mut self) {
@@ -330,27 +427,27 @@ impl super::CommandEncoder {
}
}
- pub fn transfer(&mut self) -> super::TransferCommandEncoder {
- self.barrier();
- self.mark("pass/transfer");
+ pub fn transfer(&mut self, label: &str) -> super::TransferCommandEncoder {
+ self.begin_pass(label);
super::TransferCommandEncoder {
raw: self.buffers[0].raw,
device: &self.device,
}
}
- pub fn acceleration_structure(&mut self) -> super::AccelerationStructureCommandEncoder {
- self.barrier();
- self.mark("pass/acc-struct");
+ pub fn acceleration_structure(
+ &mut self,
+ label: &str,
+ ) -> super::AccelerationStructureCommandEncoder {
+ self.begin_pass(label);
super::AccelerationStructureCommandEncoder {
raw: self.buffers[0].raw,
device: &self.device,
}
}
- pub fn compute(&mut self) -> super::ComputeCommandEncoder {
- self.barrier();
- self.mark("pass/compute");
+ pub fn compute(&mut self, label: &str) -> super::ComputeCommandEncoder {
+ self.begin_pass(label);
super::ComputeCommandEncoder {
cmd_buf: self.buffers.first_mut().unwrap(),
device: &self.device,
@@ -358,9 +455,12 @@ impl super::CommandEncoder {
}
}
- pub fn render(&mut self, targets: crate::RenderTargetSet) -> super::RenderCommandEncoder {
- self.barrier();
- self.mark("pass/render");
+ pub fn render(
+ &mut self,
+ label: &str,
+ targets: crate::RenderTargetSet,
+ ) -> super::RenderCommandEncoder {
+ self.begin_pass(label);
let mut target_size = [0u16; 2];
let mut color_attachments = Vec::with_capacity(targets.colors.len());
@@ -446,6 +546,10 @@ impl super::CommandEncoder {
Err(other) => panic!("GPU error {}", other),
}
}
+
+ pub fn timings(&self) -> &[(String, Duration)] {
+ &self.timings
+ }
}
#[hidden_trait::expose]
@@ -544,6 +648,12 @@ impl crate::traits::TransferEncoder for super::TransferCommandEncoder<'_> {
}
}
+impl Drop for super::TransferCommandEncoder<'_> {
+ fn drop(&mut self) {
+ end_pass(self.device, self.raw);
+ }
+}
+
#[hidden_trait::expose]
impl crate::traits::AccelerationStructureEncoder
for super::AccelerationStructureCommandEncoder<'_>
@@ -628,6 +738,12 @@ impl crate::traits::AccelerationStructureEncoder
}
}
+impl Drop for super::AccelerationStructureCommandEncoder<'_> {
+ fn drop(&mut self) {
+ end_pass(self.device, self.raw);
+ }
+}
+
impl<'a> super::ComputeCommandEncoder<'a> {
pub fn with<'b, 'p>(
&'b mut self,
@@ -644,6 +760,12 @@ impl<'a> super::ComputeCommandEncoder<'a> {
}
}
+impl Drop for super::ComputeCommandEncoder<'_> {
+ fn drop(&mut self) {
+ end_pass(self.device, self.cmd_buf.raw);
+ }
+}
+
impl<'a> super::RenderCommandEncoder<'a> {
pub fn set_scissor_rect(&mut self, rect: &crate::ScissorRect) {
let vk_scissor = vk::Rect2D {
@@ -685,6 +807,7 @@ impl Drop for super::RenderCommandEncoder<'_> {
.dynamic_rendering
.cmd_end_rendering(self.cmd_buf.raw)
};
+ end_pass(self.device, self.cmd_buf.raw);
}
}
diff --git a/blade-graphics/src/vulkan/init.rs b/blade-graphics/src/vulkan/init.rs
index 4a0c95da..9645caa9 100644
--- a/blade-graphics/src/vulkan/init.rs
+++ b/blade-graphics/src/vulkan/init.rs
@@ -44,6 +44,7 @@ struct AdapterCapabilities {
buffer_marker: bool,
shader_info: bool,
full_screen_exclusive: bool,
+ timing: bool,
bugs: SystemBugs,
}
@@ -202,6 +203,13 @@ unsafe fn inspect_adapter(
return None;
}
+ let timing = if properties.limits.timestamp_compute_and_graphics == vk::FALSE {
+ log::info!("No timing because of queue support");
+ false
+ } else {
+ true
+ };
+
let ray_tracing = if !supported_extensions.contains(&vk::KHR_ACCELERATION_STRUCTURE_NAME)
|| !supported_extensions.contains(&vk::KHR_RAY_QUERY_NAME)
{
@@ -269,6 +277,7 @@ unsafe fn inspect_adapter(
buffer_marker,
shader_info,
full_screen_exclusive,
+ timing,
bugs,
})
}
@@ -564,6 +573,18 @@ impl super::Context {
},
core: device_core,
device_information: capabilities.device_information,
+ command_scope: if desc.capture {
+ Some(super::CommandScopeDevice {})
+ } else {
+ None
+ },
+ timing: if desc.timing && capabilities.timing {
+ Some(super::TimingDevice {
+ period: capabilities.properties.limits.timestamp_period,
+ })
+ } else {
+ None
+ },
//TODO: detect GPU family
workarounds: super::Workarounds {
extra_sync_src_access: vk::AccessFlags::TRANSFER_WRITE,
diff --git a/blade-graphics/src/vulkan/mod.rs b/blade-graphics/src/vulkan/mod.rs
index 4707fc8f..136a8f5e 100644
--- a/blade-graphics/src/vulkan/mod.rs
+++ b/blade-graphics/src/vulkan/mod.rs
@@ -1,5 +1,5 @@
use ash::{khr, vk};
-use std::{num::NonZeroU32, path::PathBuf, ptr, sync::Mutex};
+use std::{num::NonZeroU32, path::PathBuf, ptr, sync::Mutex, time::Duration};
mod command;
mod descriptor;
@@ -7,6 +7,8 @@ mod init;
mod pipeline;
mod resource;
+const QUERY_POOL_SIZE: usize = crate::limits::PASS_COUNT + 1;
+
struct Instance {
core: ash::Instance,
_debug_utils: ash::ext::debug_utils::Instance,
@@ -20,6 +22,13 @@ struct RayTracingDevice {
acceleration_structure: khr::acceleration_structure::Device,
}
+#[derive(Clone, Default)]
+struct CommandScopeDevice {}
+#[derive(Clone, Default)]
+struct TimingDevice {
+ period: f32,
+}
+
#[derive(Clone)]
struct Workarounds {
extra_sync_src_access: vk::AccessFlags,
@@ -38,6 +47,8 @@ struct Device {
buffer_marker: Option,
shader_info: Option,
full_screen_exclusive: Option,
+ command_scope: Option,
+ timing: Option,
workarounds: Workarounds,
}
@@ -217,6 +228,8 @@ pub struct RenderPipeline {
struct CommandBuffer {
raw: vk::CommandBuffer,
descriptor_pool: descriptor::DescriptorPool,
+ query_pool: vk::QueryPool,
+ timed_pass_names: Vec,
}
#[derive(Debug, PartialEq)]
@@ -239,6 +252,8 @@ pub struct CommandEncoder {
update_data: Vec,
present: Option,
crash_handler: Option,
+ temp_label: Vec,
+ timings: Vec<(String, Duration)>,
}
pub struct TransferCommandEncoder<'a> {
raw: vk::CommandBuffer,
@@ -338,9 +353,24 @@ impl crate::traits::CommandDevice for Context {
self.set_object_name(raw, desc.name);
};
let descriptor_pool = self.device.create_descriptor_pool();
+ let query_pool = if self.device.timing.is_some() {
+ let query_pool_info = vk::QueryPoolCreateInfo::default()
+ .query_type(vk::QueryType::TIMESTAMP)
+ .query_count(QUERY_POOL_SIZE as u32);
+ unsafe {
+ self.device
+ .core
+ .create_query_pool(&query_pool_info, None)
+ .unwrap()
+ }
+ } else {
+ vk::QueryPool::null()
+ };
CommandBuffer {
raw,
descriptor_pool,
+ query_pool,
+ timed_pass_names: Vec::new(),
}
})
.collect();
@@ -367,6 +397,8 @@ impl crate::traits::CommandDevice for Context {
update_data: Vec::new(),
present: None,
crash_handler,
+ temp_label: Vec::new(),
+ timings: Vec::new(),
}
}
@@ -380,6 +412,13 @@ impl crate::traits::CommandDevice for Context {
}
self.device
.destroy_descriptor_pool(&mut cmd_buf.descriptor_pool);
+ if self.device.timing.is_some() {
+ unsafe {
+ self.device
+ .core
+ .destroy_query_pool(cmd_buf.query_pool, None);
+ }
+ }
}
unsafe {
self.device
diff --git a/blade-render/src/model/mod.rs b/blade-render/src/model/mod.rs
index 4f513860..7c9c0416 100644
--- a/blade-render/src/model/mod.rs
+++ b/blade-render/src/model/mod.rs
@@ -381,7 +381,7 @@ impl Baker {
) {
let mut pending_ops = self.pending_operations.lock().unwrap();
if !pending_ops.transfers.is_empty() {
- let mut pass = encoder.transfer();
+ let mut pass = encoder.transfer("init models");
for transfer in pending_ops.transfers.drain(..) {
pass.copy_buffer_to_buffer(
transfer.stage.into(),
@@ -392,7 +392,7 @@ impl Baker {
}
}
if !pending_ops.blas_constructs.is_empty() {
- let mut pass = encoder.acceleration_structure();
+ let mut pass = encoder.acceleration_structure("BLAS");
for construct in pending_ops.blas_constructs.drain(..) {
pass.build_bottom_level(construct.dst, &construct.meshes, construct.scratch.into());
temp_buffers.push(construct.scratch);
diff --git a/blade-render/src/render/debug.rs b/blade-render/src/render/debug.rs
index 290e9f40..06da0806 100644
--- a/blade-render/src/render/debug.rs
+++ b/blade-render/src/render/debug.rs
@@ -194,7 +194,7 @@ impl DebugRender {
);
}
- let mut transfers = encoder.transfer();
+ let mut transfers = encoder.transfer("upload debug");
transfers.copy_buffer_to_buffer(
this.entry_buffer.at(0),
this.buffer.at(0),
diff --git a/blade-render/src/render/dummy.rs b/blade-render/src/render/dummy.rs
index d3daa3e7..f4c9fb44 100644
--- a/blade-render/src/render/dummy.rs
+++ b/blade-render/src/render/dummy.rs
@@ -79,7 +79,7 @@ impl DummyResources {
command_encoder.init_texture(white_texture);
command_encoder.init_texture(black_texture);
command_encoder.init_texture(red_texture);
- let mut transfers = command_encoder.transfer();
+ let mut transfers = command_encoder.transfer("init dummy");
let staging_buf = gpu.create_buffer(blade_graphics::BufferDesc {
name: "dummy/staging",
size: 4 * 3,
diff --git a/blade-render/src/render/env_map.rs b/blade-render/src/render/env_map.rs
index b157285b..979dfaea 100644
--- a/blade-render/src/render/env_map.rs
+++ b/blade-render/src/render/env_map.rs
@@ -144,7 +144,7 @@ impl EnvironmentMap {
let groups = self
.prepare_pipeline
.get_dispatch_for(weight_extent.at_mip_level(target_level));
- let mut compute = encoder.compute();
+ let mut compute = encoder.compute("pre-process env map");
let mut pass = compute.with(&self.prepare_pipeline);
pass.bind(
0,
diff --git a/blade-render/src/render/mod.rs b/blade-render/src/render/mod.rs
index 751140a5..838f934c 100644
--- a/blade-render/src/render/mod.rs
+++ b/blade-render/src/render/mod.rs
@@ -904,7 +904,7 @@ impl Renderer {
});
temp.buffers.push(hit_staging);
{
- let mut transfers = command_encoder.transfer();
+ let mut transfers = command_encoder.transfer("build-scene");
transfers.copy_buffer_to_buffer(hit_staging.at(0), self.hit_buffer.at(0), hit_size);
}
@@ -1024,7 +1024,7 @@ impl Renderer {
memory: blade_graphics::Memory::Device,
});
- let mut tlas_encoder = command_encoder.acceleration_structure();
+ let mut tlas_encoder = command_encoder.acceleration_structure("TLAS");
tlas_encoder.build_top_level(
self.acceleration_structure,
&blases,
@@ -1076,7 +1076,7 @@ impl Renderer {
camera: &crate::Camera,
config: FrameConfig,
) {
- let mut transfer = command_encoder.transfer();
+ let mut transfer = command_encoder.transfer("prepare");
if config.debug_draw {
self.debug.reset_lines(&mut transfer);
@@ -1128,7 +1128,7 @@ impl Renderer {
let (cur, prev) = self.work_indices();
assert_eq!(cur, self.post_proc_input_index);
- if let mut pass = command_encoder.compute() {
+ if let mut pass = command_encoder.compute("fill-gbuf") {
let mut pc = pass.with(&self.fill_pipeline);
let groups = self.fill_pipeline.get_dispatch_for(self.surface_size);
pc.bind(
@@ -1155,7 +1155,7 @@ impl Renderer {
pc.dispatch(groups);
}
- if let mut pass = command_encoder.compute() {
+ if let mut pass = command_encoder.compute("ray-trace") {
let mut pc = pass.with(&self.main_pipeline);
let groups = self.main_pipeline.get_dispatch_for(self.surface_size);
pc.bind(
@@ -1228,7 +1228,7 @@ impl Renderer {
let temp = 2;
if denoiser_config.temporal_weight < 1.0 {
- let mut pass = command_encoder.compute();
+ let mut pass = command_encoder.compute("temporal-accum");
let mut pc = pass.with(&self.blur.temporal_accum_pipeline);
let groups = self
.blur
@@ -1258,7 +1258,7 @@ impl Renderer {
assert_eq!(cur, self.post_proc_input_index);
let mut ping_pong = [temp, if self.is_frozen { cur } else { prev }];
for _ in 0..denoiser_config.num_passes {
- let mut pass = command_encoder.compute();
+ let mut pass = command_encoder.compute("a-trous");
let mut pc = pass.with(&self.blur.a_trous_pipeline);
let groups = self
.blur
diff --git a/blade-render/src/texture/mod.rs b/blade-render/src/texture/mod.rs
index 5ef59c40..6e5192ce 100644
--- a/blade-render/src/texture/mod.rs
+++ b/blade-render/src/texture/mod.rs
@@ -81,7 +81,7 @@ impl Baker {
encoder.init_texture(init.dst);
}
if !pending_ops.transfers.is_empty() {
- let mut pass = encoder.transfer();
+ let mut pass = encoder.transfer("init textures");
for transfer in pending_ops.transfers.drain(..) {
let dst = blade_graphics::TexturePiece {
texture: transfer.dst,
diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md
index 2c9426ff..a7135dab 100644
--- a/docs/CHANGELOG.md
+++ b/docs/CHANGELOG.md
@@ -4,8 +4,12 @@ Changelog for Blade
- graphics:
- API for destruction of pipelines
+ - every pass now takes a label
+ - automatic GPU pass markers
+ - ability to capture pass GPU timings
- Metal:
- support for workgroup memory
+ - concurrent compute dispatches
## blade-graphics-0.5, blade-macros-0.3, blade-egui-0.4, blade-util-0.1 (27 Aug 2024)
diff --git a/examples/bunnymark/main.rs b/examples/bunnymark/main.rs
index 58a17ed2..0f07d6cd 100644
--- a/examples/bunnymark/main.rs
+++ b/examples/bunnymark/main.rs
@@ -61,12 +61,12 @@ struct Example {
impl Example {
fn new(window: &winit::window::Window) -> Self {
- let window_size = window.inner_size();
let context = unsafe {
gpu::Context::init_windowed(
window,
gpu::ContextDesc {
validation: cfg!(debug_assertions),
+ timing: false,
capture: false,
overlay: true,
},
@@ -74,6 +74,8 @@ impl Example {
.unwrap()
};
println!("{:?}", context.device_information());
+ let window_size = window.inner_size();
+ log::info!("Initial size: {:?}", window_size);
let surface_info = context.resize(gpu::SurfaceConfig {
size: gpu::Extent {
@@ -200,7 +202,7 @@ impl Example {
});
command_encoder.start();
command_encoder.init_texture(texture);
- if let mut transfer = command_encoder.transfer() {
+ if let mut transfer = command_encoder.transfer("init texture") {
transfer.copy_buffer_to_texture(upload_buffer.into(), 4, texture.into(), extent);
}
let sync_point = context.submit(&mut command_encoder);
@@ -273,19 +275,25 @@ impl Example {
}
fn render(&mut self) {
+ if self.window_size == Default::default() {
+ return;
+ }
let frame = self.context.acquire_frame();
self.command_encoder.start();
self.command_encoder.init_texture(frame.texture());
- if let mut pass = self.command_encoder.render(gpu::RenderTargetSet {
- colors: &[gpu::RenderTarget {
- view: frame.texture_view(),
- init_op: gpu::InitOp::Clear(gpu::TextureColor::OpaqueBlack),
- finish_op: gpu::FinishOp::Store,
- }],
- depth_stencil: None,
- }) {
+ if let mut pass = self.command_encoder.render(
+ "main",
+ gpu::RenderTargetSet {
+ colors: &[gpu::RenderTarget {
+ view: frame.texture_view(),
+ init_op: gpu::InitOp::Clear(gpu::TextureColor::OpaqueBlack),
+ finish_op: gpu::FinishOp::Store,
+ }],
+ depth_stencil: None,
+ },
+ ) {
let mut rc = pass.with(&self.pipeline);
rc.bind(
0,
@@ -350,7 +358,7 @@ fn main() {
{
use winit::platform::web::WindowExtWebSys as _;
- std::panic::set_hook(Box::new(console_error_panic_hook::hook));
+ console_error_panic_hook::set_once();
console_log::init().expect("could not initialize logger");
// On wasm, append the canvas to the document body
let canvas = window.canvas().unwrap();
diff --git a/examples/init/main.rs b/examples/init/main.rs
index d01f52fa..a192139b 100644
--- a/examples/init/main.rs
+++ b/examples/init/main.rs
@@ -92,14 +92,17 @@ impl EnvMapSampler {
env_weights: gpu::TextureView,
) {
command_encoder.init_texture(self.accum_texture);
- let mut pass = command_encoder.render(gpu::RenderTargetSet {
- colors: &[gpu::RenderTarget {
- view: self.accum_view,
- init_op: gpu::InitOp::Clear(gpu::TextureColor::TransparentBlack),
- finish_op: gpu::FinishOp::Store,
- }],
- depth_stencil: None,
- });
+ let mut pass = command_encoder.render(
+ "accumulate",
+ gpu::RenderTargetSet {
+ colors: &[gpu::RenderTarget {
+ view: self.accum_view,
+ init_op: gpu::InitOp::Clear(gpu::TextureColor::TransparentBlack),
+ finish_op: gpu::FinishOp::Store,
+ }],
+ depth_stencil: None,
+ },
+ );
if let mut encoder = pass.with(&self.init_pipeline) {
encoder.bind(
0,
diff --git a/examples/mini/main.rs b/examples/mini/main.rs
index d566b338..223df8dd 100644
--- a/examples/mini/main.rs
+++ b/examples/mini/main.rs
@@ -111,7 +111,7 @@ fn main() {
command_encoder.start();
command_encoder.init_texture(texture);
- if let mut transfer = command_encoder.transfer() {
+ if let mut transfer = command_encoder.transfer("gen-mips") {
transfer.copy_buffer_to_texture(
upload_buffer.into(),
extent.width * 4,
@@ -120,7 +120,7 @@ fn main() {
);
}
for i in 1..mip_level_count {
- if let mut compute = command_encoder.compute() {
+ if let mut compute = command_encoder.compute("generate mips") {
if let mut pc = compute.with(&pipeline) {
let groups = pipeline.get_dispatch_for(extent.at_mip_level(i));
pc.bind(
@@ -139,7 +139,7 @@ fn main() {
}
}
}
- if let mut tranfer = command_encoder.transfer() {
+ if let mut tranfer = command_encoder.transfer("init 1x2 texture") {
tranfer.copy_texture_to_buffer(
gpu::TexturePiece {
texture,
diff --git a/examples/particle/main.rs b/examples/particle/main.rs
index 04fc5354..8ae86d8d 100644
--- a/examples/particle/main.rs
+++ b/examples/particle/main.rs
@@ -20,7 +20,8 @@ impl Example {
window,
gpu::ContextDesc {
validation: cfg!(debug_assertions),
- capture: false,
+ timing: true,
+ capture: true,
overlay: false,
},
)
@@ -89,14 +90,17 @@ impl Example {
self.particle_system.update(&mut self.command_encoder);
- if let mut pass = self.command_encoder.render(gpu::RenderTargetSet {
- colors: &[gpu::RenderTarget {
- view: frame.texture_view(),
- init_op: gpu::InitOp::Clear(gpu::TextureColor::OpaqueBlack),
- finish_op: gpu::FinishOp::Store,
- }],
- depth_stencil: None,
- }) {
+ if let mut pass = self.command_encoder.render(
+ "draw",
+ gpu::RenderTargetSet {
+ colors: &[gpu::RenderTarget {
+ view: frame.texture_view(),
+ init_op: gpu::InitOp::Clear(gpu::TextureColor::OpaqueBlack),
+ finish_op: gpu::FinishOp::Store,
+ }],
+ depth_stencil: None,
+ },
+ ) {
self.particle_system.draw(&mut pass);
self.gui_painter
.paint(&mut pass, gui_primitives, screen_desc, &self.context);
@@ -110,6 +114,19 @@ impl Example {
}
self.prev_sync_point = Some(sync_point);
}
+
+ fn add_gui(&mut self, ui: &mut egui::Ui) {
+ ui.heading("Particle System");
+ self.particle_system.add_gui(ui);
+ ui.heading("Timings");
+ for &(ref name, time) in self.command_encoder.timings() {
+ let millis = time.as_secs_f32() * 1000.0;
+ ui.horizontal(|ui| {
+ ui.label(name);
+ ui.colored_label(egui::Color32::WHITE, format!("{:.2} ms", millis));
+ });
+ }
+ }
}
fn main() {
@@ -164,9 +181,8 @@ fn main() {
winit::event::WindowEvent::RedrawRequested => {
let raw_input = egui_winit.take_egui_input(&window);
let egui_output = egui_winit.egui_ctx().run(raw_input, |egui_ctx| {
- egui::SidePanel::left("my_side_panel").show(egui_ctx, |ui| {
- ui.heading("Particle System");
- example.particle_system.add_gui(ui);
+ egui::SidePanel::left("info").show(egui_ctx, |ui| {
+ example.add_gui(ui);
if ui.button("Quit").clicked() {
target.exit();
}
diff --git a/examples/particle/particle.rs b/examples/particle/particle.rs
index 08882018..73145148 100644
--- a/examples/particle/particle.rs
+++ b/examples/particle/particle.rs
@@ -146,6 +146,10 @@ impl System {
pub fn destroy(&mut self, context: &gpu::Context) {
context.destroy_buffer(self.particle_buf);
context.destroy_buffer(self.free_list_buf);
+ context.destroy_compute_pipeline(&mut self.reset_pipeline);
+ context.destroy_compute_pipeline(&mut self.emit_pipeline);
+ context.destroy_compute_pipeline(&mut self.update_pipeline);
+ context.destroy_render_pipeline(&mut self.draw_pipeline);
}
fn main_data(&self) -> MainData {
@@ -156,7 +160,7 @@ impl System {
}
pub fn reset(&self, encoder: &mut gpu::CommandEncoder) {
- let mut pass = encoder.compute();
+ let mut pass = encoder.compute("reset");
let mut pc = pass.with(&self.reset_pipeline);
pc.bind(0, &self.main_data());
let group_size = self.reset_pipeline.get_workgroup_size();
@@ -166,7 +170,7 @@ impl System {
pub fn update(&self, encoder: &mut gpu::CommandEncoder) {
let main_data = self.main_data();
- if let mut pass = encoder.compute() {
+ if let mut pass = encoder.compute("update") {
let mut pc = pass.with(&self.update_pipeline);
pc.bind(0, &main_data);
pc.bind(
@@ -181,7 +185,7 @@ impl System {
pc.dispatch([group_count, 1, 1]);
}
// new pass because both pipelines use the free list
- if let mut pass = encoder.compute() {
+ if let mut pass = encoder.compute("emit") {
let mut pc = pass.with(&self.emit_pipeline);
pc.bind(0, &main_data);
pc.bind(
diff --git a/examples/ray-query/main.rs b/examples/ray-query/main.rs
index 4fd8a608..1e16092f 100644
--- a/examples/ray-query/main.rs
+++ b/examples/ray-query/main.rs
@@ -51,8 +51,7 @@ impl Example {
window,
gpu::ContextDesc {
validation: cfg!(debug_assertions),
- capture: false,
- overlay: false,
+ ..Default::default()
},
)
.unwrap()
@@ -210,11 +209,11 @@ impl Example {
});
command_encoder.start();
command_encoder.init_texture(target);
- if let mut pass = command_encoder.acceleration_structure() {
+ if let mut pass = command_encoder.acceleration_structure("BLAS") {
pass.build_bottom_level(blas, &meshes, scratch_buffer.at(0));
}
//Note: separate pass in order to enforce synchronization
- if let mut pass = command_encoder.acceleration_structure() {
+ if let mut pass = command_encoder.acceleration_structure("TLAS") {
pass.build_top_level(
tlas,
&[blas],
@@ -264,7 +263,7 @@ impl Example {
fn render(&mut self) {
self.command_encoder.start();
- if let mut pass = self.command_encoder.compute() {
+ if let mut pass = self.command_encoder.compute("ray-trace") {
let groups = self.rt_pipeline.get_dispatch_for(self.screen_size);
if let mut pc = pass.with(&self.rt_pipeline) {
let fov_y = 0.3;
@@ -293,14 +292,17 @@ impl Example {
let frame = self.context.acquire_frame();
self.command_encoder.init_texture(frame.texture());
- if let mut pass = self.command_encoder.render(gpu::RenderTargetSet {
- colors: &[gpu::RenderTarget {
- view: frame.texture_view(),
- init_op: gpu::InitOp::Clear(gpu::TextureColor::TransparentBlack),
- finish_op: gpu::FinishOp::Store,
- }],
- depth_stencil: None,
- }) {
+ if let mut pass = self.command_encoder.render(
+ "draw",
+ gpu::RenderTargetSet {
+ colors: &[gpu::RenderTarget {
+ view: frame.texture_view(),
+ init_op: gpu::InitOp::Clear(gpu::TextureColor::TransparentBlack),
+ finish_op: gpu::FinishOp::Store,
+ }],
+ depth_stencil: None,
+ },
+ ) {
if let mut pc = pass.with(&self.draw_pipeline) {
pc.bind(
0,
diff --git a/examples/scene/main.rs b/examples/scene/main.rs
index 237546ef..11437619 100644
--- a/examples/scene/main.rs
+++ b/examples/scene/main.rs
@@ -191,8 +191,7 @@ impl Example {
window,
gpu::ContextDesc {
validation: cfg!(debug_assertions),
- capture: false,
- overlay: false,
+ ..Default::default()
},
)
.unwrap()
@@ -476,14 +475,17 @@ impl Example {
let frame = self.context.acquire_frame();
command_encoder.init_texture(frame.texture());
- if let mut pass = command_encoder.render(gpu::RenderTargetSet {
- colors: &[gpu::RenderTarget {
- view: frame.texture_view(),
- init_op: gpu::InitOp::Clear(gpu::TextureColor::TransparentBlack),
- finish_op: gpu::FinishOp::Store,
- }],
- depth_stencil: None,
- }) {
+ if let mut pass = command_encoder.render(
+ "draw",
+ gpu::RenderTargetSet {
+ colors: &[gpu::RenderTarget {
+ view: frame.texture_view(),
+ init_op: gpu::InitOp::Clear(gpu::TextureColor::TransparentBlack),
+ finish_op: gpu::FinishOp::Store,
+ }],
+ depth_stencil: None,
+ },
+ ) {
let screen_desc = blade_egui::ScreenDescriptor {
physical_size: (physical_size.width, physical_size.height),
scale_factor,
diff --git a/src/lib.rs b/src/lib.rs
index 01889de7..ea293d2b 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -415,6 +415,7 @@ impl Engine {
window,
gpu::ContextDesc {
validation: cfg!(debug_assertions),
+ timing: true,
capture: false,
overlay: false,
},
@@ -679,14 +680,17 @@ impl Engine {
let frame = self.gpu_context.acquire_frame();
command_encoder.init_texture(frame.texture());
- if let mut pass = command_encoder.render(gpu::RenderTargetSet {
- colors: &[gpu::RenderTarget {
- view: frame.texture_view(),
- init_op: gpu::InitOp::Clear(gpu::TextureColor::TransparentBlack),
- finish_op: gpu::FinishOp::Store,
- }],
- depth_stencil: None,
- }) {
+ if let mut pass = command_encoder.render(
+ "draw",
+ gpu::RenderTargetSet {
+ colors: &[gpu::RenderTarget {
+ view: frame.texture_view(),
+ init_op: gpu::InitOp::Clear(gpu::TextureColor::TransparentBlack),
+ finish_op: gpu::FinishOp::Store,
+ }],
+ depth_stencil: None,
+ },
+ ) {
let screen_desc = blade_egui::ScreenDescriptor {
physical_size: (physical_size.width, physical_size.height),
scale_factor,