diff --git a/profiling/src/profiling/mod.rs b/profiling/src/profiling/mod.rs index 242e9a7a00..f0002eeeb7 100644 --- a/profiling/src/profiling/mod.rs +++ b/profiling/src/profiling/mod.rs @@ -16,6 +16,8 @@ use crate::bindings::ddog_php_prof_get_active_fiber_test as ddog_php_prof_get_ac use crate::bindings::{datadog_php_profiling_get_profiling_context, zend_execute_data}; use crate::config::SystemSettings; +use crate::thin_str::ThinString; +use crate::timeline::IncludeType; use crate::{CLOCKS, TAGS}; use chrono::Utc; use core::{ptr, str}; @@ -391,7 +393,10 @@ impl TimeCollector { for frame in &message.value.frames { let location = Location { function: Function { - name: frame.function.as_ref(), + name: frame + .function + .as_deref() + .unwrap_or_else(|| frame.extra.as_ref()), system_name: "", filename: frame.file.as_deref().unwrap_or(""), start_line: 0, @@ -515,9 +520,6 @@ pub enum UploadMessage { Upload(Box), } -#[cfg(feature = "timeline")] -const COW_EVAL: Cow = Cow::Borrowed("[eval]"); - impl Profiler { pub fn new(system_settings: &SystemSettings) -> Self { let fork_barrier = Arc::new(Barrier::new(3)); @@ -833,9 +835,10 @@ impl Profiler { match self.prepare_and_send_message( vec![ZendFrame { - function: COW_EVAL, - file: Some(filename), + function: None, + file: Some(ThinString::from(&filename)), line, + extra: StaticNames::Eval, }], SampleValues { timeline: duration, @@ -861,7 +864,7 @@ impl Profiler { now: i64, duration: i64, filename: String, - include_type: &str, + include_type: Result, ) { let mut labels = Profiler::common_labels(Self::TIMELINE_COMPILE_FILE_LABELS.len() + 1); labels.extend_from_slice(Self::TIMELINE_COMPILE_FILE_LABELS); @@ -874,9 +877,14 @@ impl Profiler { match self.prepare_and_send_message( vec![ZendFrame { - function: format!("[{include_type}]").into(), + function: None, file: None, line: 0, + extra: match include_type { + Ok(IncludeType::Include) => StaticNames::Include, + Ok(IncludeType::Require) => StaticNames::Require, + Err(_type) => StaticNames::UnknownIncludeType, + }, }], SampleValues { timeline: duration, @@ -910,9 +918,10 @@ impl Profiler { match self.prepare_and_send_message( vec![ZendFrame { - function: "[idle]".into(), + function: None, file: None, line: 0, + extra: StaticNames::Idle, }], SampleValues { timeline: duration, @@ -966,9 +975,10 @@ impl Profiler { match self.prepare_and_send_message( vec![ZendFrame { - function: "[gc]".into(), + function: None, file: None, line: 0, + extra: StaticNames::Gc, }], SampleValues { timeline: duration, @@ -1085,14 +1095,6 @@ mod tests { use datadog_profiling::exporter::Uri; use log::LevelFilter; - fn get_frames() -> Vec { - vec![ZendFrame { - function: "foobar()".into(), - file: Some("foobar.php".into()), - line: 42, - }] - } - pub fn get_system_settings() -> SystemSettings { SystemSettings { profiling_enabled: true, @@ -1126,7 +1128,12 @@ mod tests { #[test] #[cfg(all(feature = "timeline", not(miri)))] fn profiler_prepare_sample_message_works_cpu_time_and_timeline() { - let frames = get_frames(); + let frames = vec![ZendFrame { + function: Some("foobar()".into()), + file: Some("foobar.php".into()), + line: 42, + extra: StaticNames::Empty, + }]; let samples = get_samples(); let labels = Profiler::common_labels(0); let mut settings = get_system_settings(); diff --git a/profiling/src/profiling/stack_walking.rs b/profiling/src/profiling/stack_walking.rs index 339dedb899..5f94eaad12 100644 --- a/profiling/src/profiling/stack_walking.rs +++ b/profiling/src/profiling/stack_walking.rs @@ -2,16 +2,72 @@ use crate::bindings::{zai_str_from_zstr, zend_execute_data, zend_function, ZEND_ use std::borrow::Cow; use std::str::Utf8Error; -const COW_PHP_OPEN_TAG: Cow = Cow::Borrowed(" = Cow::Borrowed("[truncated]"); +#[derive(Clone, Copy, Debug)] +pub enum StaticNames { + Empty, + Eval, + Gc, + Idle, + Include, + PhpOpenTag, + Require, + Truncated, + UnknownIncludeType, +} + +impl AsRef for StaticNames { + fn as_ref(&self) -> &str { + match self { + StaticNames::Empty => "", + StaticNames::Eval => "[eval]", + StaticNames::Gc => "[gc]", + StaticNames::Idle => "[idle]", + StaticNames::Include => "[include]", + StaticNames::PhpOpenTag => " "[require]", + StaticNames::Truncated => "[truncated]", + StaticNames::UnknownIncludeType => "[unknown include type]", + } + } +} + +impl Default for StaticNames { + fn default() -> Self { + Self::Empty + } +} #[derive(Default, Debug)] pub struct ZendFrame { - // Most tools don't like frames that don't have function names, so use a - // fake name if you need to like ", - pub file: Option, + // This field is logically required. If it's None, look at extra. + pub function: Option, + pub file: Option, pub line: u32, // use 0 for no line info + pub extra: StaticNames, +} + +impl ZendFrame { + fn new_from_stack_collection( + function: Option, + file: Option, + line: u32, + ) -> Option { + if function.is_some() || file.is_some() { + let extra = if function.is_none() { + StaticNames::PhpOpenTag + } else { + StaticNames::Empty + }; + Some(Self { + function, + file, + line, + extra, + }) + } else { + None + } + } } /// Extract the "function name" component for the frame. This is a string which @@ -55,12 +111,12 @@ pub fn extract_function_name(func: &zend_function) -> Option { Some(String::from_utf8_lossy(buffer.as_slice()).into_owned()) } -unsafe fn extract_file_and_line(execute_data: &zend_execute_data) -> (Option, u32) { +unsafe fn extract_file_and_line(execute_data: &zend_execute_data) -> (Option>, u32) { // This should be Some, just being cautious. match execute_data.func.as_ref() { Some(func) if func.type_ == ZEND_USER_FUNCTION as u8 => { // Safety: zai_str_from_zstr will return a valid ZaiStr. - let file = zai_str_from_zstr(func.op_array.filename.as_mut()).into_string(); + let file = zai_str_from_zstr(func.op_array.filename.as_mut()).into_string_lossy(); let lineno = match execute_data.opline.as_ref() { Some(opline) => opline.lineno, None => 0, @@ -81,7 +137,10 @@ mod detail { use std::ptr::NonNull; struct StringCache<'a> { - /// Refers to a function's run time cache reserved by this extension. + /// The frame in question. + frame: &'a zend_execute_data, + + /// Frame's run time cache reserved by this extension. cache_slots: &'a mut [usize; 2], /// Refers to the string set in the thread-local storage. @@ -93,9 +152,9 @@ mod detail { /// string in the slot currently, then create one by calling the /// provided function, store it in the string cache and cache slot, /// and return it. - fn get_or_insert(&mut self, slot: usize, f: F) -> Option + fn get_or_insert(&mut self, slot: usize, f: F) -> Option> where - F: FnOnce() -> Option, + F: FnOnce(&'a zend_execute_data) -> Option>, { debug_assert!(slot < self.cache_slots.len()); let cached = unsafe { self.cache_slots.get_unchecked_mut(slot) }; @@ -104,15 +163,12 @@ mod detail { match NonNull::new(ptr) { Some(non_null) => { // SAFETY: transmuting ThinStr from its repr. - let thin_str: ThinStr = unsafe { core::mem::transmute(non_null) }; - // SAFETY: the string set is only reset between requests, - // so this ThinStr points into the same string set that - // created it. - let str = unsafe { self.string_set.get_thin_str(thin_str) }; - Some(str.to_string()) + let thin_str: ThinStr<'a> = unsafe { core::mem::transmute(non_null) }; + let str: &'a str = thin_str.into(); + Some(Cow::Borrowed(str)) } None => { - let string = f()?; + let string = f(self.frame)?; let thin_str = self.string_set.insert(&string); // SAFETY: transmuting ThinStr into its repr. let non_null: NonNull = unsafe { core::mem::transmute(thin_str) }; @@ -209,9 +265,10 @@ mod detail { */ if samples.len() == max_depth - 1 { samples.push(ZendFrame { - function: COW_TRUNCATED, + function: None, file: None, line: 0, + extra: StaticNames::Truncated, }); break; } @@ -234,14 +291,14 @@ mod detail { let func = execute_data.func.as_ref()?; let (function, file, line) = match ddog_php_prof_function_run_time_cache(func) { - Some(slots) => { + Some(cache_slots) => { let mut string_cache = StringCache { - cache_slots: slots, + frame: execute_data, + cache_slots, string_set, }; - let function = handle_function_cache_slot(func, &mut string_cache); - let (file, line) = handle_file_cache_slot(execute_data, &mut string_cache); - + let function = handle_function_cache_slot(&mut string_cache); + let (file, line) = handle_file_cache_slot(&mut string_cache); let cache_slots = string_cache.cache_slots; FUNCTION_CACHE_STATS.with(|cell| { let mut stats = cell.borrow_mut(); @@ -260,57 +317,49 @@ mod detail { let mut stats = cell.borrow_mut(); stats.not_applicable += 1; }); - let function = extract_function_name(func).map(Cow::Owned); + let function = extract_function_name(func).as_deref().map(ThinString::from); let (file, line) = extract_file_and_line(execute_data); + let file = file.as_deref().map(ThinString::from); (function, file, line) } }; - if function.is_some() || file.is_some() { - Some(ZendFrame { - function: function.unwrap_or(COW_PHP_OPEN_TAG), - file, - line, - }) - } else { - None - } + ZendFrame::new_from_stack_collection(function, file, line) } - fn handle_function_cache_slot( - func: &zend_function, - string_cache: &mut StringCache, - ) -> Option> { - let fname = string_cache.get_or_insert(0, || extract_function_name(func))?; - Some(Cow::Owned(fname)) + fn handle_function_cache_slot(string_cache: &mut StringCache) -> Option { + string_cache + .get_or_insert(0, |frame| { + let name = unsafe { extract_function_name(&*frame.func) }; + name.map(Cow::Owned) + }) + .as_deref() + .map(ThinString::from) } - unsafe fn handle_file_cache_slot( - execute_data: &zend_execute_data, - string_cache: &mut StringCache, - ) -> (Option, u32) { - let option = string_cache.get_or_insert(1, || -> Option { + unsafe fn handle_file_cache_slot(string_cache: &mut StringCache) -> (Option, u32) { + let option = string_cache.get_or_insert(1, |frame| -> Option> { unsafe { // Safety: if we have cache slots, we definitely have a func. - let func = &*execute_data.func; + let func = &*frame.func; // Safety: this union member is always valid. if func.type_ != ZEND_USER_FUNCTION as u8 { return None; }; // SAFETY: calling C function with correct args. - let file = zai_str_from_zstr(func.op_array.filename.as_mut()).into_string(); + let file = zai_str_from_zstr(func.op_array.filename.as_mut()).into_string_lossy(); Some(file) } }); match option { Some(filename) => { // SAFETY: if there's a file, then there should be an opline. - let lineno = match execute_data.opline.as_ref() { + let lineno = match string_cache.frame.opline.as_ref() { Some(opline) => opline.lineno, None => 0, }; - (Some(filename), lineno) + (Some(ThinString::from(filename.as_ref())), lineno) } None => (None, 0), } @@ -349,9 +398,10 @@ mod detail { */ if samples.len() == max_depth - 1 { samples.push(ZendFrame { - function: COW_TRUNCATED, + function: None, file: None, line: 0, + extra: StaticNames::Truncated, }); break; } @@ -364,28 +414,33 @@ mod detail { unsafe fn collect_call_frame(execute_data: &zend_execute_data) -> Option { if let Some(func) = execute_data.func.as_ref() { - let function = extract_function_name(func); + let function = extract_function_name(func).as_deref().map(ThinString::from); let (file, line) = extract_file_and_line(execute_data); - - // Only create a new frame if there's file or function info. - if file.is_some() || function.is_some() { - // If there's no function name, use a fake name. - let function = function.map(Cow::Owned).unwrap_or(COW_PHP_OPEN_TAG); - return Some(ZendFrame { - function, - file, - line, - }); - } + let file = file.as_deref().map(ThinString::from); + ZendFrame::new_from_stack_collection(function, file, line) + } else { + None } - None } } +use crate::thin_str::ThinString; pub use detail::*; -#[cfg(all(test, stack_walking_tests))] +#[cfg(test)] mod tests { + use super::*; + use core::mem::size_of; + + #[test] + fn test_frame_size() { + let word_size = size_of::<*mut ()>(); + assert_eq!(size_of::(), word_size * 3); + } +} + +#[cfg(all(test, stack_walking_tests))] +mod stack_walking_tests { use super::*; use crate::bindings as zend; diff --git a/profiling/src/string_set.rs b/profiling/src/string_set.rs index 5a8a89fa01..1d33940f8a 100644 --- a/profiling/src/string_set.rs +++ b/profiling/src/string_set.rs @@ -1,6 +1,5 @@ use crate::thin_str::ThinStr; use core::hash; -use core::ops::Deref; use datadog_alloc::{ChainAllocator, VirtualAllocator}; type Hasher = hash::BuildHasherDefault; @@ -101,16 +100,4 @@ impl StringSet { pub fn arena_used_bytes(&self) -> usize { self.arena.used_bytes() } - - /// Creates a `&str` from the `thin_str`, binding it to the lifetime of - /// the set. - /// - /// # Safety - /// The `thin_str` must live in this string set. - #[inline] - pub unsafe fn get_thin_str(&self, thin_str: ThinStr) -> &str { - // todo: debug_assert it exists in the memory region? - // SAFETY: see function's safety conditions. - unsafe { core::mem::transmute(thin_str.deref()) } - } } diff --git a/profiling/src/thin_str.rs b/profiling/src/thin_str.rs index 01fd8a2f8c..d84e2f5f6a 100644 --- a/profiling/src/thin_str.rs +++ b/profiling/src/thin_str.rs @@ -1,6 +1,7 @@ -use datadog_alloc::{AllocError, Allocator, ChainAllocator}; +use datadog_alloc::{AllocError, Allocator, ChainAllocator, Global}; use std::alloc::Layout; use std::borrow::Borrow; +use std::fmt::{Debug, Formatter}; use std::hash; use std::marker::PhantomData; use std::ops::Deref; @@ -18,10 +19,106 @@ pub struct ThinStr<'a> { _marker: PhantomData<&'a str>, } +pub struct ThinString { + thin_ptr: ThinPtr, + allocator: A, +} + pub trait ArenaAllocator: Allocator {} impl ArenaAllocator for ChainAllocator {} +impl Clone for ThinString { + fn clone(&self) -> Self { + Self::from_str_in(self.deref(), self.allocator.clone()) + } +} + +impl Drop for ThinString { + fn drop(&mut self) { + let ptr = self.thin_ptr.size_ptr; + // Don't drop the empty string. + let empty_str = core::ptr::addr_of!(EMPTY_INLINE_STRING).cast::(); + if ptr.as_ptr() == empty_str.cast_mut() { + return; + } + let layout = self.thin_ptr.layout(); + unsafe { self.allocator.deallocate(ptr, layout) } + } +} + +impl ThinString { + pub fn try_from_str_in(str: &str, allocator: A) -> Result { + let thin_ptr = ThinPtr::try_from_str_in(str, &allocator)?; + Ok(Self { + thin_ptr, + allocator, + }) + } + + pub fn from_str_in(str: &str, allocator: A) -> Self { + Self::try_from_str_in(str, allocator).unwrap() + } + + pub fn new_in(allocator: A) -> Self { + let thin_ptr = EMPTY_INLINE_STRING.as_thin_ptr(); + Self { + thin_ptr, + allocator, + } + } +} + +impl ThinString { + pub fn new() -> Self { + let thin_ptr = EMPTY_INLINE_STRING.as_thin_ptr(); + let allocator = Global; + Self { + thin_ptr, + allocator, + } + } +} + +impl From<&str> for ThinString { + fn from(value: &str) -> Self { + Self::try_from_str_in(value, Global).unwrap() + } +} + +impl From<&String> for ThinString { + fn from(value: &String) -> Self { + Self::try_from_str_in(value, Global).unwrap() + } +} + +impl From for ThinString { + fn from(value: String) -> Self { + Self::try_from_str_in(&value, Global).unwrap() + } +} + +impl<'a> From> for ThinString { + fn from(value: ThinStr<'a>) -> Self { + Self::try_from_str_in(value.deref(), Global).unwrap() + } +} + +impl<'a> From> for &'a str { + fn from(value: ThinStr<'a>) -> Self { + // SAFETY: ThinStr<'a> into &'a str is sound (lifetime has no change). + unsafe { value.thin_ptr.into_str() } + } +} + +unsafe impl Send for ThinString {} + +impl Default for ThinString { + fn default() -> Self { + Self::new() + } +} + impl ThinStr<'static> { pub fn new() -> ThinStr<'static> { Self { @@ -55,23 +152,37 @@ impl<'a> ThinStr<'a> { } } -impl<'a> Deref for ThinStr<'a> { +impl Debug for ThinString { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + self.deref().fmt(f) + } +} + +impl<'a> Debug for ThinStr<'a> { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + self.deref().fmt(f) + } +} + +impl Deref for ThinString { type Target = str; fn deref(&self) -> &Self::Target { - let slice = { - let len = self.thin_ptr.len(); - let data = self.thin_ptr.data().as_ptr(); + self.thin_ptr.deref() + } +} - // SAFETY: bytes are never handed out as mut, so const slices are - // not going to break aliasing rules, and this is the correct - // lifetime for the data. - unsafe { core::slice::from_raw_parts(data, len) } - }; +impl<'a> Deref for ThinStr<'a> { + type Target = str; - // SAFETY: since this is a copy of a valid utf-8 string, then it must - // also be valid utf-8. - unsafe { core::str::from_utf8_unchecked(slice) } + fn deref(&self) -> &Self::Target { + self.thin_ptr.deref() + } +} + +impl hash::Hash for ThinString { + fn hash(&self, state: &mut H) { + self.deref().hash(state) } } @@ -81,14 +192,27 @@ impl<'a> hash::Hash for ThinStr<'a> { } } +impl PartialEq for ThinString { + fn eq(&self, other: &Self) -> bool { + self.deref().eq(other.deref()) + } +} + impl<'a> PartialEq for ThinStr<'a> { fn eq(&self, other: &Self) -> bool { self.deref().eq(other.deref()) } } +impl Eq for ThinString {} impl<'a> Eq for ThinStr<'a> {} +impl Borrow for ThinString { + fn borrow(&self) -> &str { + self.deref() + } +} + impl<'a> Borrow for ThinStr<'a> { fn borrow(&self) -> &str { self.deref() @@ -136,14 +260,14 @@ impl ThinPtr { unsafe { Layout::from_size_align_unchecked(len + USIZE_WIDTH, 1) } } - fn try_from_str_in(str: &str, arena: &impl Allocator) -> Result { + fn try_from_str_in(str: &str, alloc: &impl Allocator) -> Result { let inline_size = str.len() + USIZE_WIDTH; let layout = match Layout::from_size_align(inline_size, 1) { Ok(l) => l, Err(_) => return Err(AllocError), }; - let allocation = arena.allocate(layout)?.cast::().as_ptr(); + let allocation = alloc.allocate(layout)?.cast::().as_ptr(); let size = allocation.cast::<[u8; USIZE_WIDTH]>(); // SAFETY: writing into uninitialized new allocation at correct place. @@ -161,6 +285,33 @@ impl ThinPtr { let size_ptr = unsafe { NonNull::new_unchecked(allocation) }; Ok(ThinPtr { size_ptr }) } + + /// # Safety + /// The caller must ensure the lifetime is correctly bound. + unsafe fn into_str<'a>(self) -> &'a str { + let slice = { + let len = self.len(); + let data = self.data().as_ptr(); + + // SAFETY: bytes are never handed out as mut, so const slices are + // not going to break aliasing rules. Lifetime enforcement must + // be taken care of by the caller. + core::slice::from_raw_parts(data, len) + }; + + // SAFETY: since this is a copy of a valid utf-8 string, then it must + // also be valid utf-8. + core::str::from_utf8_unchecked(slice) + } +} + +impl Deref for ThinPtr { + type Target = str; + + fn deref(&self) -> &Self::Target { + // SAFETY: the self lifetime is correct. + unsafe { self.into_str() } + } } #[repr(C)] @@ -190,34 +341,42 @@ static EMPTY_INLINE_STRING: StaticInlineString<0> = StaticInlineString::<0> { #[cfg(test)] mod tests { use super::*; - use datadog_alloc::Global; + use core::mem::size_of; use datadog_profiling::collections::string_table::wordpress_test_data; - // Not really, please manually de-allocate strings when done with them. - impl ArenaAllocator for Global {} + #[test] + fn test_sizes() { + let word = size_of::>(); + assert_eq!(word, size_of::()); + assert_eq!(word, size_of::()); + + // niche optimization should apply too + assert_eq!(word, size_of::>()); + assert_eq!(word, size_of::>()); + } + + #[test] + fn test_deallocation_of_empty_since_it_is_special_cased() { + let thin_string = ThinString::new(); + drop(thin_string); + } #[test] fn test_allocation_and_deallocation() { - let alloc = Global; - - let mut thin_strs: Vec = wordpress_test_data::WORDPRESS_STRINGS + let thin_strs: Vec = wordpress_test_data::WORDPRESS_STRINGS .iter() .map(|str| { - let thin_str = ThinStr::try_from_str_in(str, &alloc).unwrap(); - let actual = thin_str.deref(); + let thin_string = ThinString::from(*str); + let actual = thin_string.deref(); assert_eq!(*str, actual); - thin_str + thin_string }) .collect(); // This could detect out-of-bounds writes. - for (thin_str, str) in thin_strs.iter().zip(wordpress_test_data::WORDPRESS_STRINGS) { - let actual = thin_str.deref(); + for (thin_string, str) in thin_strs.iter().zip(wordpress_test_data::WORDPRESS_STRINGS) { + let actual = thin_string.deref(); assert_eq!(str, actual); } - - for thin_str in thin_strs.drain(..) { - unsafe { alloc.deallocate(thin_str.thin_ptr.size_ptr, thin_str.layout()) }; - } } } diff --git a/profiling/src/timeline.rs b/profiling/src/timeline.rs index f0e8928526..75d5dccfca 100644 --- a/profiling/src/timeline.rs +++ b/profiling/src/timeline.rs @@ -12,6 +12,11 @@ use std::time::Instant; use std::time::SystemTime; use std::time::UNIX_EPOCH; +pub enum IncludeType { + Include, + Require, +} + /// The engine's original (or neighbouring extensions) `gc_collect_cycles()` function static mut PREV_GC_COLLECT_CYCLES: Option = None; @@ -405,9 +410,13 @@ unsafe extern "C" fn ddog_php_prof_compile_file( } let include_type = match r#type as u32 { - zend::ZEND_INCLUDE => "include", // `include_once()` and `include_once()` - zend::ZEND_REQUIRE => "require", // `require()` and `require_once()` - _default => "", + // `include_once()` and `include_once()` + zend::ZEND_INCLUDE => Ok(IncludeType::Include), + + // `require()` and `require_once()` + zend::ZEND_REQUIRE => Ok(IncludeType::Require), + + include_type => Err(include_type), }; // Extract the filename from the returned op_array. @@ -419,7 +428,16 @@ unsafe extern "C" fn ddog_php_prof_compile_file( let filename = zai_str_from_zstr((*op_array).filename.as_mut()).into_string(); trace!( - "Compile file \"{filename}\" with include type \"{include_type}\" took {} nanoseconds", + "Compile file \"{filename}\" with include type \"{}{}\" took {} nanoseconds", + match include_type { + Ok(IncludeType::Include) => "include", + Ok(IncludeType::Require) => "require", + Err(_) => "unknown include type", + }, + match include_type { + Err(include_type) => format!(" {include_type}"), + _ => String::new(), + }, duration.as_nanos(), );