diff --git a/blade-graphics/src/lib.rs b/blade-graphics/src/lib.rs index 02e973f2..a20da598 100644 --- a/blade-graphics/src/lib.rs +++ b/blade-graphics/src/lib.rs @@ -82,11 +82,14 @@ pub use hal::*; use std::{fmt, num::NonZeroU32}; +//TODO: switch to bitflags? #[derive(Clone, Debug, Default)] pub struct ContextDesc { /// Enable validation of the GAPI, shaders, /// and insert crash markers into command buffers. pub validation: bool, + /// Enable GPU timing of all passes. + pub timing: bool, /// Enable capture support with GAPI tools. pub capture: bool, /// Enable GAPI overlay. diff --git a/blade-graphics/src/vulkan/command.rs b/blade-graphics/src/vulkan/command.rs index 1206d9b2..37242d85 100644 --- a/blade-graphics/src/vulkan/command.rs +++ b/blade-graphics/src/vulkan/command.rs @@ -1,5 +1,5 @@ use ash::vk; -use std::str; +use std::{str, time::Duration}; impl super::CrashHandler { fn add_marker(&mut self, marker: &str) -> u32 { @@ -217,10 +217,44 @@ impl super::CommandEncoder { } } + fn timestamp(&mut self) { + if let Some(_) = self.device.timing { + let (vk_pool, index) = self.device.allocate_query(&mut self.buffers[0].query_pool); + unsafe { + self.device.core.cmd_write_timestamp( + self.buffers[0].raw, + vk::PipelineStageFlags::TOP_OF_PIPE, + vk_pool, + index, + ); + } + } + } + + fn checkpoint(&mut self, name: &str) { + self.barrier(); + self.timestamp(); + self.mark(name); + } + pub fn start(&mut self) { self.buffers.rotate_left(1); + let cmd_buf = self.buffers.first_mut().unwrap(); self.device - .reset_descriptor_pool(&mut self.buffers[0].descriptor_pool); + .reset_descriptor_pool(&mut cmd_buf.descriptor_pool); + if let Some(ref timing) = self.device.timing { + let timestamps = self.device.get_query_pool_results(&cmd_buf.query_pool); + self.timings.clear(); + if !timestamps.is_empty() { + let mut prev = timestamps[0]; + for &ts in timestamps[1..].iter() { + let diff = (ts - prev) as f32 * timing.period; + prev = ts; + self.timings.push(Duration::from_nanos(diff as _)); + } + } + self.device.reset_query_pool(&mut cmd_buf.query_pool); + } let vk_info = vk::CommandBufferBeginInfo { flags: vk::CommandBufferUsageFlags::ONE_TIME_SUBMIT, @@ -229,14 +263,14 @@ impl super::CommandEncoder { unsafe { self.device .core - .begin_command_buffer(self.buffers[0].raw, &vk_info) + .begin_command_buffer(cmd_buf.raw, &vk_info) .unwrap(); } + self.timestamp(); } pub(super) fn finish(&mut self) -> vk::CommandBuffer { - self.barrier(); - self.mark("finish"); + self.checkpoint("finish"); let raw = self.buffers[0].raw; unsafe { self.device.core.end_command_buffer(raw).unwrap() } raw @@ -331,8 +365,7 @@ impl super::CommandEncoder { } pub fn transfer(&mut self) -> super::TransferCommandEncoder { - self.barrier(); - self.mark("pass/transfer"); + self.checkpoint("pass/transfer"); super::TransferCommandEncoder { raw: self.buffers[0].raw, device: &self.device, @@ -340,8 +373,7 @@ impl super::CommandEncoder { } pub fn acceleration_structure(&mut self) -> super::AccelerationStructureCommandEncoder { - self.barrier(); - self.mark("pass/acc-struct"); + self.checkpoint("pass/acc-struct"); super::AccelerationStructureCommandEncoder { raw: self.buffers[0].raw, device: &self.device, @@ -349,8 +381,7 @@ impl super::CommandEncoder { } pub fn compute(&mut self) -> super::ComputeCommandEncoder { - self.barrier(); - self.mark("pass/compute"); + self.checkpoint("pass/compute"); super::ComputeCommandEncoder { cmd_buf: self.buffers.first_mut().unwrap(), device: &self.device, @@ -359,8 +390,7 @@ impl super::CommandEncoder { } pub fn render(&mut self, targets: crate::RenderTargetSet) -> super::RenderCommandEncoder { - self.barrier(); - self.mark("pass/render"); + self.checkpoint("pass/render"); let mut target_size = [0u16; 2]; let mut color_attachments = Vec::with_capacity(targets.colors.len()); @@ -446,6 +476,10 @@ impl super::CommandEncoder { Err(other) => panic!("GPU error {}", other), } } + + pub fn timings(&self) -> &[Duration] { + &self.timings + } } #[hidden_trait::expose] diff --git a/blade-graphics/src/vulkan/descriptor.rs b/blade-graphics/src/vulkan/descriptor.rs index 2acb4374..31e5f4a9 100644 --- a/blade-graphics/src/vulkan/descriptor.rs +++ b/blade-graphics/src/vulkan/descriptor.rs @@ -5,7 +5,7 @@ use ash::vk; const COUNT_BASE: u32 = 16; #[derive(Debug)] -pub struct DescriptorPool { +pub(super) struct DescriptorPool { sub_pools: Vec, growth_iter: usize, } diff --git a/blade-graphics/src/vulkan/init.rs b/blade-graphics/src/vulkan/init.rs index 4a0c95da..ff4d1040 100644 --- a/blade-graphics/src/vulkan/init.rs +++ b/blade-graphics/src/vulkan/init.rs @@ -44,6 +44,7 @@ struct AdapterCapabilities { buffer_marker: bool, shader_info: bool, full_screen_exclusive: bool, + timing: bool, bugs: SystemBugs, } @@ -153,6 +154,7 @@ unsafe fn inspect_adapter( vk::PhysicalDeviceInlineUniformBlockFeaturesEXT::default(); let mut timeline_semaphore_features = vk::PhysicalDeviceTimelineSemaphoreFeaturesKHR::default(); let mut dynamic_rendering_features = vk::PhysicalDeviceDynamicRenderingFeaturesKHR::default(); + let mut host_query_reset_features = vk::PhysicalDeviceHostQueryResetFeatures::default(); let mut descriptor_indexing_features = vk::PhysicalDeviceDescriptorIndexingFeaturesEXT::default(); let mut buffer_device_address_features = @@ -163,6 +165,7 @@ unsafe fn inspect_adapter( let mut features2_khr = vk::PhysicalDeviceFeatures2::default() .push_next(&mut inline_uniform_block_features) .push_next(&mut timeline_semaphore_features) + .push_next(&mut host_query_reset_features) .push_next(&mut dynamic_rendering_features) .push_next(&mut descriptor_indexing_features) .push_next(&mut buffer_device_address_features) @@ -202,6 +205,19 @@ unsafe fn inspect_adapter( return None; } + let timing = if properties.limits.timestamp_compute_and_graphics == vk::FALSE { + log::info!("No timing because of queue support"); + false + } else if host_query_reset_features.host_query_reset == vk::FALSE { + log::info!( + "No timing because of the host query reset. Features = {:?}", + host_query_reset_features + ); + false + } else { + true + }; + let ray_tracing = if !supported_extensions.contains(&vk::KHR_ACCELERATION_STRUCTURE_NAME) || !supported_extensions.contains(&vk::KHR_RAY_QUERY_NAME) { @@ -269,6 +285,7 @@ unsafe fn inspect_adapter( buffer_marker, shader_info, full_screen_exclusive, + timing, bugs, }) } @@ -491,6 +508,17 @@ impl super::Context { .push_next(&mut khr_timeline_semaphore) .push_next(&mut khr_dynamic_rendering); + let mut khr_host_query_reset; + if desc.timing && capabilities.timing { + khr_host_query_reset = vk::PhysicalDeviceHostQueryResetFeatures { + host_query_reset: vk::TRUE, + ..Default::default() + }; + device_create_info = device_create_info.push_next(&mut khr_host_query_reset); + } else if desc.timing { + log::warn!("Unable to gather timestamp information"); + } + let mut ext_descriptor_indexing; let mut khr_buffer_device_address; let mut khr_acceleration_structure; @@ -562,6 +590,13 @@ impl super::Context { } else { None }, + timing: if desc.timing && capabilities.timing { + Some(super::TimingDevice { + period: capabilities.properties.limits.timestamp_period, + }) + } else { + None + }, core: device_core, device_information: capabilities.device_information, //TODO: detect GPU family diff --git a/blade-graphics/src/vulkan/mod.rs b/blade-graphics/src/vulkan/mod.rs index 4707fc8f..5a5428ce 100644 --- a/blade-graphics/src/vulkan/mod.rs +++ b/blade-graphics/src/vulkan/mod.rs @@ -1,10 +1,11 @@ use ash::{khr, vk}; -use std::{num::NonZeroU32, path::PathBuf, ptr, sync::Mutex}; +use std::{num::NonZeroU32, path::PathBuf, ptr, sync::Mutex, time::Duration}; mod command; mod descriptor; mod init; mod pipeline; +mod query; mod resource; struct Instance { @@ -20,6 +21,11 @@ struct RayTracingDevice { acceleration_structure: khr::acceleration_structure::Device, } +#[derive(Clone)] +struct TimingDevice { + period: f32, +} + #[derive(Clone)] struct Workarounds { extra_sync_src_access: vk::AccessFlags, @@ -38,6 +44,7 @@ struct Device { buffer_marker: Option, shader_info: Option, full_screen_exclusive: Option, + timing: Option, workarounds: Workarounds, } @@ -217,6 +224,7 @@ pub struct RenderPipeline { struct CommandBuffer { raw: vk::CommandBuffer, descriptor_pool: descriptor::DescriptorPool, + query_pool: query::QueryPool, } #[derive(Debug, PartialEq)] @@ -237,6 +245,7 @@ pub struct CommandEncoder { buffers: Box<[CommandBuffer]>, device: Device, update_data: Vec, + timings: Vec, present: Option, crash_handler: Option, } @@ -338,9 +347,11 @@ impl crate::traits::CommandDevice for Context { self.set_object_name(raw, desc.name); }; let descriptor_pool = self.device.create_descriptor_pool(); + let query_pool = self.device.create_query_pool(); CommandBuffer { raw, descriptor_pool, + query_pool, } }) .collect(); @@ -365,6 +376,7 @@ impl crate::traits::CommandDevice for Context { buffers, device: self.device.clone(), update_data: Vec::new(), + timings: Vec::new(), present: None, crash_handler, } @@ -380,6 +392,7 @@ impl crate::traits::CommandDevice for Context { } self.device .destroy_descriptor_pool(&mut cmd_buf.descriptor_pool); + self.device.destroy_query_pool(&mut cmd_buf.query_pool); } unsafe { self.device diff --git a/blade-graphics/src/vulkan/query.rs b/blade-graphics/src/vulkan/query.rs new file mode 100644 index 00000000..aa29b25b --- /dev/null +++ b/blade-graphics/src/vulkan/query.rs @@ -0,0 +1,100 @@ +use ash::vk; + +const fn pool_size(i: usize) -> u32 { + const COUNT_BASE: u32 = 4; + COUNT_BASE.pow(i as u32 + 1) +} + +#[derive(Debug)] +pub(super) struct QueryPool { + sub_pools: Vec, + count: u32, +} + +impl super::Device { + fn create_query_sub_pool(&self, max_queries: u32) -> vk::QueryPool { + log::info!("Creating a query set for at most {} queries", max_queries); + + let query_pool_info = vk::QueryPoolCreateInfo::default() + .query_type(vk::QueryType::TIMESTAMP) + .query_count(max_queries); + + unsafe { + let vk_pool = self.core.create_query_pool(&query_pool_info, None).unwrap(); + self.core.reset_query_pool(vk_pool, 0, max_queries); + vk_pool + } + } + + pub(super) fn create_query_pool(&self) -> QueryPool { + QueryPool { + sub_pools: match self.timing { + Some(_) => { + let pool_size = pool_size(0); + let vk_pool = self.create_query_sub_pool(pool_size); + vec![vk_pool] + } + None => Vec::new(), + }, + count: 0, + } + } + + pub(super) fn destroy_query_pool(&self, pool: &mut QueryPool) { + for sub_pool in pool.sub_pools.drain(..) { + unsafe { self.core.destroy_query_pool(sub_pool, None) }; + } + } + + pub(super) fn allocate_query(&self, pool: &mut QueryPool) -> (vk::QueryPool, u32) { + let mut remaining = pool.count; + pool.count += 1; + for (i, &vk_pool) in pool.sub_pools.iter().enumerate() { + let pool_size = pool_size(i); + if remaining < pool_size { + return (vk_pool, remaining); + } + remaining -= pool_size; + } + let next_max_queries = pool_size(pool.sub_pools.len()); + let vk_pool = self.create_query_sub_pool(next_max_queries); + pool.sub_pools.push(vk_pool); + (vk_pool, 0) + } + + pub(super) fn reset_query_pool(&self, pool: &mut QueryPool) { + for (i, &vk_pool) in pool.sub_pools.iter().enumerate() { + let pool_size = pool_size(i); + unsafe { + self.core.reset_query_pool(vk_pool, 0, pool_size); + } + } + pool.count = 0; + } + + pub(super) fn get_query_pool_results(&self, pool: &QueryPool) -> Vec { + let mut timestamps = Vec::new(); + let mut remaining = pool.count; + for (i, &vk_pool) in pool.sub_pools.iter().enumerate() { + if remaining == 0 { + break; + } + let pool_size = pool_size(i); + let count = remaining.min(pool_size); + remaining -= count; + let base = timestamps.len(); + timestamps.resize(base + count as usize, 0); + unsafe { + self.core + .get_query_pool_results( + vk_pool, + 0, + &mut timestamps[base..], + vk::QueryResultFlags::TYPE_64, + ) + .unwrap(); + } + } + timestamps + } +} diff --git a/examples/bunnymark/main.rs b/examples/bunnymark/main.rs index 4829f0c6..1edf4685 100644 --- a/examples/bunnymark/main.rs +++ b/examples/bunnymark/main.rs @@ -67,6 +67,7 @@ impl Example { window, gpu::ContextDesc { validation: cfg!(debug_assertions), + timing: false, capture: false, overlay: true, }, diff --git a/examples/particle/main.rs b/examples/particle/main.rs index b808d7d2..a86ccb5e 100644 --- a/examples/particle/main.rs +++ b/examples/particle/main.rs @@ -20,6 +20,7 @@ impl Example { window, gpu::ContextDesc { validation: cfg!(debug_assertions), + timing: true, capture: false, overlay: false, }, diff --git a/examples/ray-query/main.rs b/examples/ray-query/main.rs index 4fd8a608..a218b6ed 100644 --- a/examples/ray-query/main.rs +++ b/examples/ray-query/main.rs @@ -46,17 +46,8 @@ struct Example { impl Example { fn new(window: &winit::window::Window) -> Self { let window_size = window.inner_size(); - let context = unsafe { - gpu::Context::init_windowed( - window, - gpu::ContextDesc { - validation: cfg!(debug_assertions), - capture: false, - overlay: false, - }, - ) - .unwrap() - }; + let context = + unsafe { gpu::Context::init_windowed(window, gpu::ContextDesc::default()).unwrap() }; let capabilities = context.capabilities(); assert!(capabilities .ray_query diff --git a/examples/scene/main.rs b/examples/scene/main.rs index f09de24a..f6566bb8 100644 --- a/examples/scene/main.rs +++ b/examples/scene/main.rs @@ -191,6 +191,7 @@ impl Example { window, gpu::ContextDesc { validation: cfg!(debug_assertions), + timing: false, capture: false, overlay: false, }, diff --git a/src/lib.rs b/src/lib.rs index 5c8320b6..f6bfbf6c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -415,6 +415,7 @@ impl Engine { window, gpu::ContextDesc { validation: cfg!(debug_assertions), + timing: cfg!(debug_assertions), capture: false, overlay: false, },