Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GPU timing support #174

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions blade-graphics/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -82,11 +82,14 @@ pub use hal::*;

use std::{fmt, num::NonZeroU32};

//TODO: switch to bitflags?
#[derive(Clone, Debug, Default)]
pub struct ContextDesc {
/// Enable validation of the GAPI, shaders,
/// and insert crash markers into command buffers.
pub validation: bool,
/// Enable GPU timing of all passes.
pub timing: bool,
/// Enable capture support with GAPI tools.
pub capture: bool,
/// Enable GAPI overlay.
Expand Down
60 changes: 47 additions & 13 deletions blade-graphics/src/vulkan/command.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use ash::vk;
use std::str;
use std::{str, time::Duration};

impl super::CrashHandler {
fn add_marker(&mut self, marker: &str) -> u32 {
Expand Down Expand Up @@ -217,10 +217,44 @@ impl super::CommandEncoder {
}
}

fn timestamp(&mut self) {
if let Some(_) = self.device.timing {
let (vk_pool, index) = self.device.allocate_query(&mut self.buffers[0].query_pool);
unsafe {
self.device.core.cmd_write_timestamp(
self.buffers[0].raw,
vk::PipelineStageFlags::TOP_OF_PIPE,
vk_pool,
index,
);
}
}
}

fn checkpoint(&mut self, name: &str) {
self.barrier();
self.timestamp();
self.mark(name);
}

pub fn start(&mut self) {
self.buffers.rotate_left(1);
let cmd_buf = self.buffers.first_mut().unwrap();
self.device
.reset_descriptor_pool(&mut self.buffers[0].descriptor_pool);
.reset_descriptor_pool(&mut cmd_buf.descriptor_pool);
if let Some(ref timing) = self.device.timing {
let timestamps = self.device.get_query_pool_results(&cmd_buf.query_pool);
self.timings.clear();
if !timestamps.is_empty() {
let mut prev = timestamps[0];
for &ts in timestamps[1..].iter() {
let diff = (ts - prev) as f32 * timing.period;
prev = ts;
self.timings.push(Duration::from_nanos(diff as _));
}
}
self.device.reset_query_pool(&mut cmd_buf.query_pool);
}

let vk_info = vk::CommandBufferBeginInfo {
flags: vk::CommandBufferUsageFlags::ONE_TIME_SUBMIT,
Expand All @@ -229,14 +263,14 @@ impl super::CommandEncoder {
unsafe {
self.device
.core
.begin_command_buffer(self.buffers[0].raw, &vk_info)
.begin_command_buffer(cmd_buf.raw, &vk_info)
.unwrap();
}
self.timestamp();
}

pub(super) fn finish(&mut self) -> vk::CommandBuffer {
self.barrier();
self.mark("finish");
self.checkpoint("finish");
let raw = self.buffers[0].raw;
unsafe { self.device.core.end_command_buffer(raw).unwrap() }
raw
Expand Down Expand Up @@ -331,26 +365,23 @@ impl super::CommandEncoder {
}

pub fn transfer(&mut self) -> super::TransferCommandEncoder {
self.barrier();
self.mark("pass/transfer");
self.checkpoint("pass/transfer");
super::TransferCommandEncoder {
raw: self.buffers[0].raw,
device: &self.device,
}
}

pub fn acceleration_structure(&mut self) -> super::AccelerationStructureCommandEncoder {
self.barrier();
self.mark("pass/acc-struct");
self.checkpoint("pass/acc-struct");
super::AccelerationStructureCommandEncoder {
raw: self.buffers[0].raw,
device: &self.device,
}
}

pub fn compute(&mut self) -> super::ComputeCommandEncoder {
self.barrier();
self.mark("pass/compute");
self.checkpoint("pass/compute");
super::ComputeCommandEncoder {
cmd_buf: self.buffers.first_mut().unwrap(),
device: &self.device,
Expand All @@ -359,8 +390,7 @@ impl super::CommandEncoder {
}

pub fn render(&mut self, targets: crate::RenderTargetSet) -> super::RenderCommandEncoder {
self.barrier();
self.mark("pass/render");
self.checkpoint("pass/render");

let mut target_size = [0u16; 2];
let mut color_attachments = Vec::with_capacity(targets.colors.len());
Expand Down Expand Up @@ -446,6 +476,10 @@ impl super::CommandEncoder {
Err(other) => panic!("GPU error {}", other),
}
}

pub fn timings(&self) -> &[Duration] {
&self.timings
}
}

#[hidden_trait::expose]
Expand Down
2 changes: 1 addition & 1 deletion blade-graphics/src/vulkan/descriptor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use ash::vk;
const COUNT_BASE: u32 = 16;

#[derive(Debug)]
pub struct DescriptorPool {
pub(super) struct DescriptorPool {
sub_pools: Vec<vk::DescriptorPool>,
growth_iter: usize,
}
Expand Down
35 changes: 35 additions & 0 deletions blade-graphics/src/vulkan/init.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ struct AdapterCapabilities {
buffer_marker: bool,
shader_info: bool,
full_screen_exclusive: bool,
timing: bool,
bugs: SystemBugs,
}

Expand Down Expand Up @@ -153,6 +154,7 @@ unsafe fn inspect_adapter(
vk::PhysicalDeviceInlineUniformBlockFeaturesEXT::default();
let mut timeline_semaphore_features = vk::PhysicalDeviceTimelineSemaphoreFeaturesKHR::default();
let mut dynamic_rendering_features = vk::PhysicalDeviceDynamicRenderingFeaturesKHR::default();
let mut host_query_reset_features = vk::PhysicalDeviceHostQueryResetFeatures::default();
let mut descriptor_indexing_features =
vk::PhysicalDeviceDescriptorIndexingFeaturesEXT::default();
let mut buffer_device_address_features =
Expand All @@ -163,6 +165,7 @@ unsafe fn inspect_adapter(
let mut features2_khr = vk::PhysicalDeviceFeatures2::default()
.push_next(&mut inline_uniform_block_features)
.push_next(&mut timeline_semaphore_features)
.push_next(&mut host_query_reset_features)
.push_next(&mut dynamic_rendering_features)
.push_next(&mut descriptor_indexing_features)
.push_next(&mut buffer_device_address_features)
Expand Down Expand Up @@ -202,6 +205,19 @@ unsafe fn inspect_adapter(
return None;
}

let timing = if properties.limits.timestamp_compute_and_graphics == vk::FALSE {
log::info!("No timing because of queue support");
false
} else if host_query_reset_features.host_query_reset == vk::FALSE {
log::info!(
"No timing because of the host query reset. Features = {:?}",
host_query_reset_features
);
false
} else {
true
};

let ray_tracing = if !supported_extensions.contains(&vk::KHR_ACCELERATION_STRUCTURE_NAME)
|| !supported_extensions.contains(&vk::KHR_RAY_QUERY_NAME)
{
Expand Down Expand Up @@ -269,6 +285,7 @@ unsafe fn inspect_adapter(
buffer_marker,
shader_info,
full_screen_exclusive,
timing,
bugs,
})
}
Expand Down Expand Up @@ -491,6 +508,17 @@ impl super::Context {
.push_next(&mut khr_timeline_semaphore)
.push_next(&mut khr_dynamic_rendering);

let mut khr_host_query_reset;
if desc.timing && capabilities.timing {
khr_host_query_reset = vk::PhysicalDeviceHostQueryResetFeatures {
host_query_reset: vk::TRUE,
..Default::default()
};
device_create_info = device_create_info.push_next(&mut khr_host_query_reset);
} else if desc.timing {
log::warn!("Unable to gather timestamp information");
}

let mut ext_descriptor_indexing;
let mut khr_buffer_device_address;
let mut khr_acceleration_structure;
Expand Down Expand Up @@ -562,6 +590,13 @@ impl super::Context {
} else {
None
},
timing: if desc.timing && capabilities.timing {
Some(super::TimingDevice {
period: capabilities.properties.limits.timestamp_period,
})
} else {
None
},
core: device_core,
device_information: capabilities.device_information,
//TODO: detect GPU family
Expand Down
15 changes: 14 additions & 1 deletion blade-graphics/src/vulkan/mod.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
use ash::{khr, vk};
use std::{num::NonZeroU32, path::PathBuf, ptr, sync::Mutex};
use std::{num::NonZeroU32, path::PathBuf, ptr, sync::Mutex, time::Duration};

mod command;
mod descriptor;
mod init;
mod pipeline;
mod query;
mod resource;

struct Instance {
Expand All @@ -20,6 +21,11 @@ struct RayTracingDevice {
acceleration_structure: khr::acceleration_structure::Device,
}

#[derive(Clone)]
struct TimingDevice {
period: f32,
}

#[derive(Clone)]
struct Workarounds {
extra_sync_src_access: vk::AccessFlags,
Expand All @@ -38,6 +44,7 @@ struct Device {
buffer_marker: Option<ash::amd::buffer_marker::Device>,
shader_info: Option<ash::amd::shader_info::Device>,
full_screen_exclusive: Option<ash::ext::full_screen_exclusive::Device>,
timing: Option<TimingDevice>,
workarounds: Workarounds,
}

Expand Down Expand Up @@ -217,6 +224,7 @@ pub struct RenderPipeline {
struct CommandBuffer {
raw: vk::CommandBuffer,
descriptor_pool: descriptor::DescriptorPool,
query_pool: query::QueryPool,
}

#[derive(Debug, PartialEq)]
Expand All @@ -237,6 +245,7 @@ pub struct CommandEncoder {
buffers: Box<[CommandBuffer]>,
device: Device,
update_data: Vec<u8>,
timings: Vec<Duration>,
present: Option<Presentation>,
crash_handler: Option<CrashHandler>,
}
Expand Down Expand Up @@ -338,9 +347,11 @@ impl crate::traits::CommandDevice for Context {
self.set_object_name(raw, desc.name);
};
let descriptor_pool = self.device.create_descriptor_pool();
let query_pool = self.device.create_query_pool();
CommandBuffer {
raw,
descriptor_pool,
query_pool,
}
})
.collect();
Expand All @@ -365,6 +376,7 @@ impl crate::traits::CommandDevice for Context {
buffers,
device: self.device.clone(),
update_data: Vec::new(),
timings: Vec::new(),
present: None,
crash_handler,
}
Expand All @@ -380,6 +392,7 @@ impl crate::traits::CommandDevice for Context {
}
self.device
.destroy_descriptor_pool(&mut cmd_buf.descriptor_pool);
self.device.destroy_query_pool(&mut cmd_buf.query_pool);
}
unsafe {
self.device
Expand Down
Loading
Loading