From 9ecd4364570917cec504cd1a390b374e8697bde0 Mon Sep 17 00:00:00 2001 From: Philip Craig Date: Fri, 17 May 2024 14:32:10 +1000 Subject: [PATCH] Replace `Context::new` with `Loader::new` The Loader encapsulates the process of loading the DWARF data and keeping it alive so that the Context can refer to it. It also handles tasks such as loading dSYM or DWO files. This allows us to remove the `object` crate from the public API. --- .github/workflows/rust.yml | 7 +- Cargo.toml | 23 +- README.md | 34 +-- benches/bench.rs | 145 +------------ src/bin/addr2line.rs | 86 +------- src/builtin_split_dwarf_loader.rs | 164 --------------- src/lib.rs | 131 +++--------- src/loader.rs | 334 ++++++++++++++++++++++++++++++ src/lookup.rs | 4 +- tests/correctness.rs | 86 +------- tests/parse.rs | 35 +--- 11 files changed, 433 insertions(+), 616 deletions(-) delete mode 100644 src/builtin_split_dwarf_loader.rs create mode 100644 src/loader.rs diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 3bd53b7..cb1b159 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -36,11 +36,11 @@ jobs: - name: Test debug run: | cargo test --verbose - cargo test --verbose --features bin + cargo test --verbose --features all - name: Test release run: | cargo test --verbose --release - cargo test --verbose --release --features bin + cargo test --verbose --release --features all features: runs-on: ubuntu-latest @@ -50,10 +50,11 @@ jobs: - run: cargo build --no-default-features --features std - run: cargo build --no-default-features --features std,cpp_demangle - run: cargo build --no-default-features --features std,rustc-demangle - - run: cargo build --no-default-features --features std-object + - run: cargo build --no-default-features --features loader - run: cargo build --no-default-features --features fallible-iterator - run: cargo build --no-default-features --features smallvec - run: cargo build --no-default-features --features bin + - run: cargo build --no-default-features --features all bench: runs-on: ubuntu-latest diff --git a/Cargo.toml b/Cargo.toml index 9e79d21..9378d2d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,15 +15,17 @@ rust-version = "1.65" [dependencies] gimli = { version = "0.29.0", default-features = false, features = ["read"] } fallible-iterator = { version = "0.3.0", default-features = false, optional = true } -memmap2 = { version = "0.9.4", optional = true } -object = { version = "0.35.0", default-features = false, features = ["read"], optional = true } smallvec = { version = "1", default-features = false, optional = true } rustc-demangle = { version = "0.1", optional = true } cpp_demangle = { version = "0.4", default-features = false, features = ["alloc"], optional = true } +# loader dependencies +object = { version = "0.35.0", default-features = false, features = ["read", "compression"], optional = true } +memmap2 = { version = "0.9.4", optional = true } +typed-arena = { version = "2", optional = true } + # bin dependencies clap = { version = "4.3.21", features = ["wrap_help"], optional = true } -typed-arena = { version = "2", optional = true } # Internal feature, only used when building as part of libstd, not part of the # stable interface of this crate. @@ -45,10 +47,15 @@ debug = true codegen-units = 1 [features] -default = ["rustc-demangle", "cpp_demangle", "std-object", "fallible-iterator", "smallvec", "memmap2"] +default = ["rustc-demangle", "cpp_demangle", "loader", "fallible-iterator", "smallvec"] std = ["gimli/std"] -std-object = ["std", "object", "object/std", "object/compression", "gimli/endian-reader"] -bin = ["default", "dep:clap", "dep:typed-arena"] +loader = ["std", "dep:object", "dep:memmap2", "dep:typed-arena"] +bin = ["loader", "rustc-demangle", "cpp_demangle", "smallvec", "dep:clap"] +all = ["bin"] + +# Use of --all-features is not supported. +# This is a dummy feature to detect when --all-features is used. +cargo-all = [] # Internal feature, only used when building as part of libstd, not part of the # stable interface of this crate. @@ -61,11 +68,11 @@ required-features = ["bin"] [[test]] name = "correctness" -required-features = ["default"] +required-features = ["loader", "fallible-iterator"] [[test]] name = "parse" -required-features = ["std-object"] +required-features = ["loader"] [[bin]] name = "addr2line" diff --git a/README.md b/README.md index dc6cb93..88823b6 100644 --- a/README.md +++ b/README.md @@ -4,25 +4,25 @@ [![](https://img.shields.io/docsrs/addr2line.svg)](https://docs.rs/addr2line) [![Coverage Status](https://coveralls.io/repos/github/gimli-rs/addr2line/badge.svg?branch=master)](https://coveralls.io/github/gimli-rs/addr2line?branch=master) -A cross-platform library for retrieving per-address debug information -from files with DWARF debug information. - -`addr2line` uses [`gimli`](https://github.com/gimli-rs/gimli) to parse -the debug information, and exposes an interface for finding -the source file, line number, and wrapping function for instruction -addresses within the target program. These lookups can either be -performed programmatically through `Context::find_location` and -`Context::find_frames`, or via the included example binary, -`addr2line` (named and modelled after the equivalent utility from -[GNU binutils](https://sourceware.org/binutils/docs/binutils/addr2line.html)). +`addr2line` provides a cross-platform library for retrieving per-address debug information +from files with DWARF debug information. Given an address, it can return the file name, +line number, and function name associated with that address, as well as the inline call +stack leading to that address. + +The crate has a CLI wrapper around the library which provides some of +the functionality of the `addr2line` command line tool distributed with +[GNU binutils](https://sourceware.org/binutils/docs/binutils/addr2line.html). # Quickstart - - Add the [`addr2line` crate](https://crates.io/crates/addr2line) to your `Cargo.toml` - - Load the file and parse it with [`addr2line::object::read::File::parse`](https://docs.rs/object/*/object/read/struct.File.html#method.parse) - - Pass the parsed file to [`addr2line::Context::new` ](https://docs.rs/addr2line/*/addr2line/struct.Context.html#method.new) - - Use [`addr2line::Context::find_location`](https://docs.rs/addr2line/*/addr2line/struct.Context.html#method.find_location) - or [`addr2line::Context::find_frames`](https://docs.rs/addr2line/*/addr2line/struct.Context.html#method.find_frames) - to look up debug information for an address + - Add the [`addr2line` crate](https://crates.io/crates/addr2line) to your `Cargo.toml`. + - Call [`addr2line::Loader::new`](https://docs.rs/addr2line/*/addr2line/struct.Loader.html#method.new) with the file path. + - Use [`addr2line::Loader::find_location`](https://docs.rs/addr2line/*/addr2line/struct.Loader.html#method.find_location) + or [`addr2line::Loader::find_frames`](https://docs.rs/addr2line/*/addr2line/struct.Loader.html#method.find_frames) + to look up debug information for an address. + +If you want to provide your own file loading and memory management, use +[`addr2line::Context`](https://docs.rs/addr2line/*/addr2line/struct.Context.html) +instead of `addr2line::Loader`. # Performance diff --git a/benches/bench.rs b/benches/bench.rs index 24ff271..6c74961 100644 --- a/benches/bench.rs +++ b/benches/bench.rs @@ -66,18 +66,7 @@ fn get_test_addresses(target: &object::File<'_>) -> Vec { } #[bench] -fn context_new_rc(b: &mut test::Bencher) { - let target = release_fixture_path(); - - with_file(&target, |file| { - b.iter(|| { - addr2line::Context::new(file).unwrap(); - }); - }); -} - -#[bench] -fn context_new_slice(b: &mut test::Bencher) { +fn context_new(b: &mut test::Bencher) { let target = release_fixture_path(); with_file(&target, |file| { @@ -90,19 +79,7 @@ fn context_new_slice(b: &mut test::Bencher) { } #[bench] -fn context_new_parse_lines_rc(b: &mut test::Bencher) { - let target = release_fixture_path(); - - with_file(&target, |file| { - b.iter(|| { - let context = addr2line::Context::new(file).unwrap(); - context.parse_lines().unwrap(); - }); - }); -} - -#[bench] -fn context_new_parse_lines_slice(b: &mut test::Bencher) { +fn context_new_parse_lines(b: &mut test::Bencher) { let target = release_fixture_path(); with_file(&target, |file| { @@ -116,19 +93,7 @@ fn context_new_parse_lines_slice(b: &mut test::Bencher) { } #[bench] -fn context_new_parse_functions_rc(b: &mut test::Bencher) { - let target = release_fixture_path(); - - with_file(&target, |file| { - b.iter(|| { - let context = addr2line::Context::new(file).unwrap(); - context.parse_functions().unwrap(); - }); - }); -} - -#[bench] -fn context_new_parse_functions_slice(b: &mut test::Bencher) { +fn context_new_parse_functions(b: &mut test::Bencher) { let target = release_fixture_path(); with_file(&target, |file| { @@ -142,19 +107,7 @@ fn context_new_parse_functions_slice(b: &mut test::Bencher) { } #[bench] -fn context_new_parse_inlined_functions_rc(b: &mut test::Bencher) { - let target = release_fixture_path(); - - with_file(&target, |file| { - b.iter(|| { - let context = addr2line::Context::new(file).unwrap(); - context.parse_inlined_functions().unwrap(); - }); - }); -} - -#[bench] -fn context_new_parse_inlined_functions_slice(b: &mut test::Bencher) { +fn context_new_parse_inlined_functions(b: &mut test::Bencher) { let target = release_fixture_path(); with_file(&target, |file| { @@ -168,28 +121,7 @@ fn context_new_parse_inlined_functions_slice(b: &mut test::Bencher) { } #[bench] -fn context_query_location_rc(b: &mut test::Bencher) { - let target = release_fixture_path(); - - with_file(&target, |file| { - let addresses = get_test_addresses(file); - - let ctx = addr2line::Context::new(file).unwrap(); - // Ensure nothing is lazily loaded. - for addr in &addresses { - test::black_box(ctx.find_location(*addr)).ok(); - } - - b.iter(|| { - for addr in &addresses { - test::black_box(ctx.find_location(*addr)).ok(); - } - }); - }); -} - -#[bench] -fn context_query_location_slice(b: &mut test::Bencher) { +fn context_query_location(b: &mut test::Bencher) { let target = release_fixture_path(); with_file(&target, |file| { @@ -212,34 +144,7 @@ fn context_query_location_slice(b: &mut test::Bencher) { } #[bench] -fn context_query_with_functions_rc(b: &mut test::Bencher) { - let target = release_fixture_path(); - - with_file(&target, |file| { - let addresses = get_test_addresses(file); - - let ctx = addr2line::Context::new(file).unwrap(); - // Ensure nothing is lazily loaded. - for addr in &addresses { - let mut frames = ctx.find_frames(*addr).skip_all_loads().unwrap(); - while let Ok(Some(ref frame)) = frames.next() { - test::black_box(frame); - } - } - - b.iter(|| { - for addr in &addresses { - let mut frames = ctx.find_frames(*addr).skip_all_loads().unwrap(); - while let Ok(Some(ref frame)) = frames.next() { - test::black_box(frame); - } - } - }); - }); -} - -#[bench] -fn context_query_with_functions_slice(b: &mut test::Bencher) { +fn context_query_with_functions(b: &mut test::Bencher) { let target = release_fixture_path(); with_file(&target, |file| { @@ -268,23 +173,7 @@ fn context_query_with_functions_slice(b: &mut test::Bencher) { } #[bench] -fn context_new_and_query_location_rc(b: &mut test::Bencher) { - let target = release_fixture_path(); - - with_file(&target, |file| { - let addresses = get_test_addresses(file); - - b.iter(|| { - let ctx = addr2line::Context::new(file).unwrap(); - for addr in addresses.iter().take(100) { - test::black_box(ctx.find_location(*addr)).ok(); - } - }); - }); -} - -#[bench] -fn context_new_and_query_location_slice(b: &mut test::Bencher) { +fn context_new_and_query_location(b: &mut test::Bencher) { let target = release_fixture_path(); with_file(&target, |file| { @@ -302,25 +191,7 @@ fn context_new_and_query_location_slice(b: &mut test::Bencher) { } #[bench] -fn context_new_and_query_with_functions_rc(b: &mut test::Bencher) { - let target = release_fixture_path(); - - with_file(&target, |file| { - let addresses = get_test_addresses(file); - - b.iter(|| { - let ctx = addr2line::Context::new(file).unwrap(); - for addr in addresses.iter().take(100) { - let mut frames = ctx.find_frames(*addr).skip_all_loads().unwrap(); - while let Ok(Some(ref frame)) = frames.next() { - test::black_box(frame); - } - } - }); - }); -} -#[bench] -fn context_new_and_query_with_functions_slice(b: &mut test::Bencher) { +fn context_new_and_query_with_functions(b: &mut test::Bencher) { let target = release_fixture_path(); with_file(&target, |file| { diff --git a/src/bin/addr2line.rs b/src/bin/addr2line.rs index 97fc802..1d3b937 100644 --- a/src/bin/addr2line.rs +++ b/src/bin/addr2line.rs @@ -1,14 +1,10 @@ use std::borrow::Cow; -use std::fs::File; use std::io::{BufRead, Lines, StdinLock, Write}; use std::path::{Path, PathBuf}; use clap::{Arg, ArgAction, Command}; -use fallible_iterator::FallibleIterator; -use object::{Object, ObjectSection, SymbolMap, SymbolMapName}; -use typed_arena::Arena; -use addr2line::{Context, Location}; +use addr2line::{Loader, Location}; fn parse_uint_from_hex_string(string: &str) -> Option { if string.len() > 2 && string.starts_with("0x") { @@ -76,30 +72,6 @@ fn print_function(name: Option<&str>, language: Option, demangle: } } -fn load_file_section<'input, 'arena, Endian: gimli::Endianity>( - id: gimli::SectionId, - file: &object::File<'input>, - endian: Endian, - arena_data: &'arena Arena>, -) -> Result, ()> { - // TODO: Unify with dwarfdump.rs in gimli. - let name = id.name(); - match file.section_by_name(name) { - Some(section) => match section.uncompressed_data().unwrap() { - Cow::Borrowed(b) => Ok(gimli::EndianSlice::new(b, endian)), - Cow::Owned(b) => Ok(gimli::EndianSlice::new(arena_data.alloc(b.into()), endian)), - }, - None => Ok(gimli::EndianSlice::new(&[][..], endian)), - } -} - -fn find_name_from_symbols<'a>( - symbols: &'a SymbolMap>, - probe: u64, -) -> Option<&'a str> { - symbols.get(probe).map(|x| x.name()) -} - struct Options<'a> { do_functions: bool, do_inlines: bool, @@ -175,8 +147,6 @@ fn main() { ]) .get_matches(); - let arena_data = Arena::new(); - let opts = Options { do_functions: matches.get_flag("functions"), do_inlines: matches.get_flag("inlines"), @@ -189,45 +159,7 @@ fn main() { sup: matches.get_one::("sup"), }; - let file = File::open(opts.exe).unwrap(); - let map = unsafe { memmap2::Mmap::map(&file).unwrap() }; - let object = &object::File::parse(&*map).unwrap(); - - let endian = if object.is_little_endian() { - gimli::RunTimeEndian::Little - } else { - gimli::RunTimeEndian::Big - }; - - let mut load_section = |id: gimli::SectionId| -> Result<_, _> { - load_file_section(id, object, endian, &arena_data) - }; - - let sup_map; - let sup_object = if let Some(sup_path) = opts.sup { - let sup_file = File::open(sup_path).unwrap(); - sup_map = unsafe { memmap2::Mmap::map(&sup_file).unwrap() }; - Some(object::File::parse(&*sup_map).unwrap()) - } else { - None - }; - - let symbols = object.symbol_map(); - let mut dwarf = gimli::Dwarf::load(&mut load_section).unwrap(); - if let Some(ref sup_object) = sup_object { - let mut load_sup_section = |id: gimli::SectionId| -> Result<_, _> { - load_file_section(id, sup_object, endian, &arena_data) - }; - dwarf.load_sup(&mut load_sup_section).unwrap(); - } - - let mut split_dwarf_loader = addr2line::builtin_split_dwarf_loader::SplitDwarfLoader::new( - |data, endian| { - gimli::EndianSlice::new(arena_data.alloc(Cow::Owned(data.into_owned())), endian) - }, - Some(opts.exe.clone()), - ); - let ctx = Context::from_dwarf(dwarf).unwrap(); + let ctx = Loader::new_with_sup(opts.exe, opts.sup).unwrap(); let stdin = std::io::stdin(); let addrs = matches @@ -253,13 +185,13 @@ fn main() { if opts.do_functions || opts.do_inlines { let mut printed_anything = false; if let Some(probe) = probe { - let frames = ctx.find_frames(probe); - let frames = split_dwarf_loader.run(frames).unwrap(); - let mut frames = frames.enumerate(); - while let Some((i, frame)) = frames.next().unwrap() { - if opts.pretty && i != 0 { + let mut frames = ctx.find_frames(probe).unwrap(); + let mut first = true; + while let Some(frame) = frames.next().unwrap() { + if opts.pretty && !first { print!(" (inlined by) "); } + first = false; if opts.do_functions { if let Some(func) = frame.function { @@ -269,7 +201,7 @@ fn main() { opts.demangle, ); } else { - let name = find_name_from_symbols(&symbols, probe); + let name = ctx.find_symbol(probe); print_function(name, None, opts.demangle); } @@ -292,7 +224,7 @@ fn main() { if !printed_anything { if opts.do_functions { - let name = probe.and_then(|probe| find_name_from_symbols(&symbols, probe)); + let name = probe.and_then(|probe| ctx.find_symbol(probe)); print_function(name, None, opts.demangle); if opts.pretty { diff --git a/src/builtin_split_dwarf_loader.rs b/src/builtin_split_dwarf_loader.rs deleted file mode 100644 index 7133b22..0000000 --- a/src/builtin_split_dwarf_loader.rs +++ /dev/null @@ -1,164 +0,0 @@ -use alloc::borrow::Cow; -use alloc::sync::Arc; -use std::fs::File; -use std::path::PathBuf; - -use object::Object; - -use crate::{LookupContinuation, LookupResult}; - -#[cfg(unix)] -fn convert_path>( - r: &R, -) -> Result { - use std::ffi::OsStr; - use std::os::unix::ffi::OsStrExt; - let bytes = r.to_slice()?; - let s = OsStr::from_bytes(&bytes); - Ok(PathBuf::from(s)) -} - -#[cfg(not(unix))] -fn convert_path>( - r: &R, -) -> Result { - let bytes = r.to_slice()?; - let s = std::str::from_utf8(&bytes).map_err(|_| gimli::Error::BadUtf8)?; - Ok(PathBuf::from(s)) -} - -fn load_section<'data, O, R, F>( - id: gimli::SectionId, - file: &O, - endian: R::Endian, - loader: &mut F, -) -> Result -where - O: object::Object<'data>, - R: gimli::Reader, - F: FnMut(Cow<'data, [u8]>, R::Endian) -> R, -{ - use object::ObjectSection; - - let data = id - .dwo_name() - .and_then(|dwo_name| { - file.section_by_name(dwo_name) - .and_then(|section| section.uncompressed_data().ok()) - }) - .unwrap_or(Cow::Borrowed(&[])); - Ok(loader(data, endian)) -} - -/// A simple builtin split DWARF loader. -pub struct SplitDwarfLoader -where - R: gimli::Reader, - F: FnMut(Cow<'_, [u8]>, R::Endian) -> R, -{ - loader: F, - dwarf_package: Option>, -} - -impl SplitDwarfLoader -where - R: gimli::Reader, - F: FnMut(Cow<'_, [u8]>, R::Endian) -> R, -{ - fn load_dwarf_package(loader: &mut F, path: Option) -> Option> { - let mut path = path.map(Ok).unwrap_or_else(std::env::current_exe).ok()?; - let dwp_extension = path - .extension() - .map(|previous_extension| { - let mut previous_extension = previous_extension.to_os_string(); - previous_extension.push(".dwp"); - previous_extension - }) - .unwrap_or_else(|| "dwp".into()); - path.set_extension(dwp_extension); - let file = File::open(&path).ok()?; - let map = unsafe { memmap2::Mmap::map(&file).ok()? }; - let dwp = object::File::parse(&*map).ok()?; - - let endian = if dwp.is_little_endian() { - gimli::RunTimeEndian::Little - } else { - gimli::RunTimeEndian::Big - }; - - let empty = loader(Cow::Borrowed(&[]), endian); - gimli::DwarfPackage::load( - |section_id| load_section(section_id, &dwp, endian, loader), - empty, - ) - .ok() - } - - /// Create a new split DWARF loader. - pub fn new(mut loader: F, path: Option) -> SplitDwarfLoader { - let dwarf_package = SplitDwarfLoader::load_dwarf_package(&mut loader, path); - SplitDwarfLoader { - loader, - dwarf_package, - } - } - - /// Run the provided `LookupResult` to completion, loading any necessary - /// split DWARF along the way. - pub fn run(&mut self, mut l: LookupResult) -> L::Output - where - L: LookupContinuation, - { - loop { - let (load, continuation) = match l { - LookupResult::Output(output) => break output, - LookupResult::Load { load, continuation } => (load, continuation), - }; - - let mut r: Option>> = None; - if let Some(dwp) = self.dwarf_package.as_ref() { - if let Ok(Some(cu)) = dwp.find_cu(load.dwo_id, &load.parent) { - r = Some(Arc::new(cu)); - } - } - - if r.is_none() { - let mut path = PathBuf::new(); - if let Some(p) = load.comp_dir.as_ref() { - if let Ok(p) = convert_path(p) { - path.push(p); - } - } - - if let Some(p) = load.path.as_ref() { - if let Ok(p) = convert_path(p) { - path.push(p); - } - } - - if let Ok(file) = File::open(&path) { - if let Ok(map) = unsafe { memmap2::Mmap::map(&file) } { - if let Ok(file) = object::File::parse(&*map) { - let endian = if file.is_little_endian() { - gimli::RunTimeEndian::Little - } else { - gimli::RunTimeEndian::Big - }; - - r = gimli::Dwarf::load(|id| { - load_section(id, &file, endian, &mut self.loader) - }) - .ok() - .map(|mut dwo_dwarf| { - dwo_dwarf.make_dwo(&load.parent); - Arc::new(dwo_dwarf) - }); - } - } - } - } - - l = continuation.resume(r); - } - } -} diff --git a/src/lib.rs b/src/lib.rs index ecba672..3d64dfb 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,30 +1,35 @@ -//! This crate provides a cross-platform library and binary for translating addresses into -//! function names, file names and line numbers. Given an address in an executable or an -//! offset in a section of a relocatable object, it uses the debugging information to -//! figure out which file name and line number are associated with it. +//! `addr2line` provides a cross-platform library for retrieving per-address debug information +//! from files with DWARF debug information. Given an address, it can return the file name, +//! line number, and function name associated with that address, as well as the inline call +//! stack leading to that address. //! -//! When used as a library, files must first be loaded using the -//! [`object`](https://github.com/gimli-rs/object) crate. -//! A context can then be created with [`Context::new`](./struct.Context.html#method.new). -//! The context caches some of the parsed information so that multiple lookups are -//! efficient. -//! Location information is obtained with -//! [`Context::find_location`](./struct.Context.html#method.find_location) or -//! [`Context::find_location_range`](./struct.Context.html#method.find_location_range). -//! Function information is obtained with -//! [`Context::find_frames`](./struct.Context.html#method.find_frames), which returns -//! a frame for each inline function. Each frame contains both name and location. +//! At the lowest level, the library uses a [`Context`] to cache parsed information so that +//! multiple lookups are efficient. To create a `Context`, you first need to open and parse the +//! file using an object file parser such as [`object`](https://github.com/gimli-rs/object), +//! create a [`gimli::Dwarf`], and finally call [`Context::from_dwarf`]. //! -//! The crate has an example CLI wrapper around the library which provides some of -//! the functionality of the `addr2line` command line tool distributed with [GNU -//! binutils](https://www.gnu.org/software/binutils/). +//! Location information is obtained with [`Context::find_location`] or +//! [`Context::find_location_range`]. Function information is obtained with +//! [`Context::find_frames`], which returns a frame for each inline function. Each frame +//! contains both name and location. //! -//! Currently this library only provides information from the DWARF debugging information, -//! which is parsed using [`gimli`](https://github.com/gimli-rs/gimli). The example CLI -//! wrapper also uses symbol table information provided by the `object` crate. +//! The library also provides a [`Loader`] which internally memory maps the files, +//! uses the `object` crate to do the parsing, and creates a `Context`. +//! The `Context` is not exposed, but the `Loader` provides the same functionality +//! via [`Loader::find_location`], [`Loader::find_location_range`], and +//! [`Loader::find_frames`]. The `Loader` also provides [`Loader::find_symbol`] +//! to use the symbol table instead of DWARF debugging information. +//! The `Loader` will load Mach-O dSYM files and split DWARF files as needed. +//! +//! The crate has a CLI wrapper around the library which provides some of +//! the functionality of the `addr2line` command line tool distributed with +//! [GNU binutils](https://sourceware.org/binutils/docs/binutils/addr2line.html). #![deny(missing_docs)] #![no_std] +#[cfg(feature = "cargo-all")] +compile_error!("'--all-features' is not supported; use '--features all' instead"); + #[cfg(feature = "std")] extern crate std; @@ -35,14 +40,8 @@ extern crate alloc; #[cfg(feature = "fallible-iterator")] pub extern crate fallible_iterator; pub extern crate gimli; -#[cfg(feature = "object")] -pub extern crate object; -use alloc::borrow::Cow; -#[cfg(feature = "object")] -use alloc::rc::Rc; use alloc::sync::Arc; - use core::ops::ControlFlow; use core::u64; @@ -62,10 +61,6 @@ mod maybe_small { pub type IntoIter = alloc::vec::IntoIter; } -#[cfg(all(feature = "std", feature = "object", feature = "memmap2"))] -/// A simple builtin split DWARF loader. -pub mod builtin_split_dwarf_loader; - mod frame; pub use frame::{demangle, demangle_auto, Frame, FrameIter, FunctionName, Location}; @@ -73,6 +68,11 @@ mod function; mod lazy; mod line; +#[cfg(feature = "loader")] +mod loader; +#[cfg(feature = "loader")] +pub use loader::Loader; + mod lookup; pub use lookup::{LookupContinuation, LookupResult, SplitDwarfLoad}; @@ -98,71 +98,6 @@ pub struct Context { sup_units: SupUnits, } -/// The type of `Context` that supports the `new` method. -#[cfg(feature = "std-object")] -pub type ObjectContext = Context>; - -#[cfg(feature = "std-object")] -impl Context> { - /// Construct a new `Context`. - /// - /// The resulting `Context` uses `gimli::EndianRcSlice`. - /// This means it is not thread safe, has no lifetime constraints (since it copies - /// the input data), and works for any endianity. - /// - /// Performance sensitive applications may want to use `Context::from_dwarf` - /// with a more specialised `gimli::Reader` implementation. - #[inline] - pub fn new<'data, O: object::Object<'data>>(file: &O) -> Result { - Self::new_with_sup(file, None) - } - - /// Construct a new `Context`. - /// - /// Optionally also use a supplementary object file. - /// - /// The resulting `Context` uses `gimli::EndianRcSlice`. - /// This means it is not thread safe, has no lifetime constraints (since it copies - /// the input data), and works for any endianity. - /// - /// Performance sensitive applications may want to use `Context::from_dwarf` - /// with a more specialised `gimli::Reader` implementation. - pub fn new_with_sup<'data, O: object::Object<'data>>( - file: &O, - sup_file: Option<&O>, - ) -> Result { - let endian = if file.is_little_endian() { - gimli::RunTimeEndian::Little - } else { - gimli::RunTimeEndian::Big - }; - - fn load_section<'data, O, Endian>( - id: gimli::SectionId, - file: &O, - endian: Endian, - ) -> Result, Error> - where - O: object::Object<'data>, - Endian: gimli::Endianity, - { - use object::ObjectSection; - - let data = file - .section_by_name(id.name()) - .and_then(|section| section.uncompressed_data().ok()) - .unwrap_or(Cow::Borrowed(&[])); - Ok(gimli::EndianRcSlice::new(Rc::from(&*data), endian)) - } - - let mut dwarf = gimli::Dwarf::load(|id| load_section(id, file, endian))?; - if let Some(sup_file) = sup_file { - dwarf.load_sup(|id| load_section(id, sup_file, endian))?; - } - Context::from_dwarf(dwarf) - } -} - impl Context { /// Construct a new `Context` from DWARF sections. /// @@ -337,8 +272,8 @@ impl Context { /// ```no_run /// # use addr2line::*; /// # use std::sync::Arc; - /// # let ctx: Context> = todo!(); - /// # let do_split_dwarf_load = |load: SplitDwarfLoad>| -> Option>>> { None }; + /// # let ctx: Context> = todo!(); + /// # let do_split_dwarf_load = |load: SplitDwarfLoad>| -> Option>>> { None }; /// const ADDRESS: u64 = 0xdeadbeef; /// ctx.preload_units(ADDRESS).for_each(|(load, callback)| { /// let dwo = do_split_dwarf_load(load); diff --git a/src/loader.rs b/src/loader.rs new file mode 100644 index 0000000..3f69791 --- /dev/null +++ b/src/loader.rs @@ -0,0 +1,334 @@ +use alloc::borrow::Cow; +use alloc::boxed::Box; +use alloc::sync::Arc; +use alloc::vec::Vec; +use std::ffi::OsStr; +use std::fs::File; +use std::path::{Path, PathBuf}; + +use memmap2::Mmap; +use object::{Object, ObjectSection, SymbolMap, SymbolMapName}; +use typed_arena::Arena; + +use crate::{ + Context, FrameIter, Location, LocationRangeIter, LookupContinuation, LookupResult, + SplitDwarfLoad, +}; + +type Reader<'a> = gimli::EndianSlice<'a, gimli::RunTimeEndian>; +type Error = Box; +type Result = std::result::Result; + +/// A loader for the DWARF data required for a `Context`. +/// +/// For performance reasons, a [`Context`] normally borrows the input data. +/// However, that means the input data must outlive the `Context`, which +/// is inconvenient for long-lived `Context`s. +/// This loader uses an arena to store the input data, together with the +/// `Context` itself. This ensures that the input data lives as long as +/// the `Context`. +/// +/// The loader performs some additional tasks: +/// - Loads the symbol table from the executable file (see [`Self::find_symbol`]). +/// - Loads Mach-O dSYM files that are located next to the executable file. +/// - Locates and loads split DWARF files (DWO and DWP). +pub struct Loader { + internal: LoaderInternal<'static>, + arena_data: Arena>, + arena_mmap: Arena, +} + +impl Loader { + /// Load the DWARF data for an executable file and create a `Context`. + #[inline] + pub fn new(path: impl AsRef) -> Result { + Self::new_with_sup(path, None::<&Path>) + } + + /// Load the DWARF data for an executable file and create a `Context`. + /// + /// Optionally also use a supplementary object file. + pub fn new_with_sup( + path: impl AsRef, + sup_path: Option>, + ) -> Result { + let arena_data = Arena::new(); + let arena_mmap = Arena::new(); + + let internal = LoaderInternal::new( + path.as_ref(), + sup_path.as_ref().map(AsRef::as_ref), + &arena_data, + &arena_mmap, + )?; + Ok(Loader { + // Convert to static lifetime to allow self-reference by `internal`. + // `internal` is only accessed through `borrow_internal`, which ensures + // that the static lifetime does not leak. + internal: unsafe { + core::mem::transmute::, LoaderInternal<'static>>(internal) + }, + arena_data, + arena_mmap, + }) + } + + fn borrow_internal<'a, F, T>(&'a self, f: F) -> T + where + F: FnOnce(&'a LoaderInternal<'a>, &'a Arena>, &'a Arena) -> T, + { + // Do not leak the static lifetime. + let internal = unsafe { + core::mem::transmute::<&LoaderInternal<'static>, &'a LoaderInternal<'a>>(&self.internal) + }; + f(internal, &self.arena_data, &self.arena_mmap) + } + + /// Get the base address used for relative virtual addresses. + /// + /// Currently this is only non-zero for PE. + pub fn relative_address_base(&self) -> u64 { + self.borrow_internal(|i, _data, _mmap| i.relative_address_base) + } + + /// Find the source file and line corresponding to the given virtual memory address. + /// + /// This calls [`Context::find_location`] with the given address. + pub fn find_location(&self, probe: u64) -> Result>> { + self.borrow_internal(|i, _data, _mmap| Ok(i.ctx.find_location(probe)?)) + } + + /// Return source file and lines for a range of addresses. + /// + /// This calls [`Context::find_location_range`] with the given range. + pub fn find_location_range( + &self, + probe_low: u64, + probe_high: u64, + ) -> Result> { + self.borrow_internal(|i, _data, _mmap| { + Ok(i.ctx.find_location_range(probe_low, probe_high)?) + }) + } + + /// Return an iterator for the function frames corresponding to the given virtual + /// memory address. + /// + /// This calls [`Context::find_frames`] with the given address. + pub fn find_frames(&self, probe: u64) -> Result>> { + self.borrow_internal(|i, data, mmap| i.find_frames(probe, data, mmap)) + } + + /// Find the symbol table entry corresponding to the given virtual memory address. + pub fn find_symbol(&self, probe: u64) -> Option<&str> { + self.borrow_internal(|i, _data, _mmap| i.find_symbol(probe)) + } +} + +struct LoaderInternal<'a> { + ctx: Context>, + relative_address_base: u64, + symbols: SymbolMap>, + dwarf_package: Option>>, +} + +impl<'a> LoaderInternal<'a> { + fn new( + path: &Path, + sup_path: Option<&Path>, + arena_data: &'a Arena>, + arena_mmap: &'a Arena, + ) -> Result { + let file = File::open(path)?; + let map = arena_mmap.alloc(unsafe { Mmap::map(&file)? }); + let mut object = object::File::parse(&**map)?; + + let relative_address_base = object.relative_address_base(); + let symbols = object.symbol_map(); + + // Load supplementary object file. + // TODO: use debuglink and debugaltlink + let sup_map; + let sup_object = if let Some(sup_path) = sup_path { + let sup_file = File::open(sup_path)?; + sup_map = arena_mmap.alloc(unsafe { Mmap::map(&sup_file)? }); + Some(object::File::parse(&**sup_map)?) + } else { + None + }; + + // Load Mach-O dSYM file, ignoring errors. + if let Some(map) = (|| { + let uuid = object.mach_uuid().ok()??; + path.parent()?.read_dir().ok()?.find_map(|candidate| { + let candidate = candidate.ok()?; + let path = candidate.path(); + if path.extension().and_then(OsStr::to_str) != Some("dSYM") { + return None; + } + let path = path.join("Contents/Resources/DWARF"); + path.read_dir().ok()?.find_map(|candidate| { + let candidate = candidate.ok()?; + let path = candidate.path(); + let file = File::open(path).ok()?; + let map = unsafe { Mmap::map(&file) }.ok()?; + let object = object::File::parse(&*map).ok()?; + if object.mach_uuid() == Ok(Some(uuid)) { + Some(map) + } else { + None + } + }) + }) + })() { + let map = arena_mmap.alloc(map); + object = object::File::parse(&**map)?; + } + + // Load the DWARF sections. + let endian = if object.is_little_endian() { + gimli::RunTimeEndian::Little + } else { + gimli::RunTimeEndian::Big + }; + let mut dwarf = + gimli::Dwarf::load(|id| load_section(Some(id.name()), &object, endian, arena_data))?; + if let Some(sup_object) = &sup_object { + dwarf.load_sup(|id| load_section(Some(id.name()), sup_object, endian, arena_data))?; + } + + let ctx = Context::from_dwarf(dwarf)?; + + // Load the DWP file, ignoring errors. + let dwarf_package = (|| { + let mut dwp_path = path.to_path_buf(); + let dwp_extension = path + .extension() + .map(|previous_extension| { + let mut previous_extension = previous_extension.to_os_string(); + previous_extension.push(".dwp"); + previous_extension + }) + .unwrap_or_else(|| "dwp".into()); + dwp_path.set_extension(dwp_extension); + let dwp_file = File::open(&dwp_path).ok()?; + let map = arena_mmap.alloc(unsafe { Mmap::map(&dwp_file) }.ok()?); + let dwp_object = object::File::parse(&**map).ok()?; + + let endian = if dwp_object.is_little_endian() { + gimli::RunTimeEndian::Little + } else { + gimli::RunTimeEndian::Big + }; + let empty = gimli::EndianSlice::new(&[][..], endian); + gimli::DwarfPackage::load( + |id| load_section(id.dwo_name(), &dwp_object, endian, arena_data), + empty, + ) + .ok() + })(); + + Ok(LoaderInternal { + ctx, + relative_address_base, + symbols, + dwarf_package, + }) + } + + fn find_symbol(&self, probe: u64) -> Option<&str> { + self.symbols.get(probe).map(|x| x.name()) + } + + fn find_frames( + &self, + probe: u64, + arena_data: &'a Arena>, + arena_mmap: &'a Arena, + ) -> Result> { + let mut frames = self.ctx.find_frames(probe); + loop { + let (load, continuation) = match frames { + LookupResult::Output(output) => return Ok(output?), + LookupResult::Load { load, continuation } => (load, continuation), + }; + + let r = self.load_dwo(load, arena_data, arena_mmap)?; + frames = continuation.resume(r); + } + } + + fn load_dwo( + &self, + load: SplitDwarfLoad>, + arena_data: &'a Arena>, + arena_mmap: &'a Arena, + ) -> Result>>>> { + // Load the DWO file from the DWARF package, if available. + if let Some(dwp) = self.dwarf_package.as_ref() { + if let Some(cu) = dwp.find_cu(load.dwo_id, &load.parent)? { + return Ok(Some(Arc::new(cu))); + } + } + + // Determine the path to the DWO file. + let mut path = PathBuf::new(); + if let Some(p) = load.comp_dir.as_ref() { + path.push(convert_path(p)?); + } + let Some(p) = load.path.as_ref() else { + return Ok(None); + }; + path.push(convert_path(p)?); + + // Load the DWO file, ignoring errors. + let dwo = (|| { + let file = File::open(&path).ok()?; + let map = arena_mmap.alloc(unsafe { Mmap::map(&file) }.ok()?); + let object = object::File::parse(&**map).ok()?; + let endian = if object.is_little_endian() { + gimli::RunTimeEndian::Little + } else { + gimli::RunTimeEndian::Big + }; + let mut dwo_dwarf = + gimli::Dwarf::load(|id| load_section(id.dwo_name(), &object, endian, arena_data)) + .ok()?; + // TODO: verify dwo_id + dwo_dwarf.make_dwo(&load.parent); + Some(Arc::new(dwo_dwarf)) + })(); + Ok(dwo) + } +} + +fn load_section<'input, Endian: gimli::Endianity>( + name: Option<&'static str>, + file: &object::File<'input>, + endian: Endian, + arena_data: &'input Arena>, +) -> Result> { + let data = match name.and_then(|name| file.section_by_name(name)) { + Some(section) => match section.uncompressed_data()? { + Cow::Borrowed(b) => b, + Cow::Owned(b) => arena_data.alloc(b), + }, + None => &[], + }; + Ok(gimli::EndianSlice::new(data, endian)) +} + +#[cfg(unix)] +fn convert_path>(r: &R) -> Result { + use std::os::unix::ffi::OsStrExt; + let bytes = r.to_slice()?; + let s = OsStr::from_bytes(&bytes); + Ok(PathBuf::from(s)) +} + +#[cfg(not(unix))] +fn convert_path>(r: &R) -> Result { + let bytes = r.to_slice()?; + let s = std::str::from_utf8(&bytes)?; + Ok(PathBuf::from(s)) +} diff --git a/src/lookup.rs b/src/lookup.rs index 1efe21d..cc33dd5 100644 --- a/src/lookup.rs +++ b/src/lookup.rs @@ -28,8 +28,8 @@ pub struct SplitDwarfLoad { /// ```no_run /// # use addr2line::*; /// # use std::sync::Arc; -/// # let ctx: Context> = todo!(); -/// # let do_split_dwarf_load = |load: SplitDwarfLoad>| -> Option>>> { None }; +/// # let ctx: Context> = todo!(); +/// # let do_split_dwarf_load = |load: SplitDwarfLoad>| -> Option>>> { None }; /// const ADDRESS: u64 = 0xdeadbeef; /// let mut r = ctx.find_frames(ADDRESS); /// let result = loop { diff --git a/tests/correctness.rs b/tests/correctness.rs index 7cb6c87..e44ba69 100644 --- a/tests/correctness.rs +++ b/tests/correctness.rs @@ -1,74 +1,13 @@ -use addr2line::Context; +use addr2line::Loader; use fallible_iterator::FallibleIterator; use findshlibs::{IterationControl, SharedLibrary, TargetSharedLibrary}; -use object::Object; -use std::borrow::Cow; -use std::fs::File; -use std::sync::Arc; - -fn find_debuginfo() -> memmap2::Mmap { - let path = std::env::current_exe().unwrap(); - let file = File::open(&path).unwrap(); - let map = unsafe { memmap2::Mmap::map(&file).unwrap() }; - let file = &object::File::parse(&*map).unwrap(); - if let Ok(uuid) = file.mach_uuid() { - for candidate in path.parent().unwrap().read_dir().unwrap() { - let path = candidate.unwrap().path(); - if !path.to_str().unwrap().ends_with(".dSYM") { - continue; - } - for candidate in path.join("Contents/Resources/DWARF").read_dir().unwrap() { - let path = candidate.unwrap().path(); - let file = File::open(&path).unwrap(); - let map = unsafe { memmap2::Mmap::map(&file).unwrap() }; - let file = &object::File::parse(&*map).unwrap(); - if file.mach_uuid().unwrap() == uuid { - return map; - } - } - } - } - - map -} #[test] #[allow(clippy::fn_to_numeric_cast)] fn correctness() { - let map = find_debuginfo(); - let file = &object::File::parse(&*map).unwrap(); - let module_base = file.relative_address_base(); - - let endian = if file.is_little_endian() { - gimli::RunTimeEndian::Little - } else { - gimli::RunTimeEndian::Big - }; - - fn load_section<'data, O, Endian>( - id: gimli::SectionId, - file: &O, - endian: Endian, - ) -> Result, gimli::Error> - where - O: object::Object<'data>, - Endian: gimli::Endianity, - { - use object::ObjectSection; - - let data = file - .section_by_name(id.name()) - .and_then(|section| section.uncompressed_data().ok()) - .unwrap_or(Cow::Borrowed(&[])); - Ok(gimli::EndianArcSlice::new(Arc::from(&*data), endian)) - } - - let dwarf = gimli::Dwarf::load(|id| load_section(id, file, endian)).unwrap(); - let ctx = Context::from_dwarf(dwarf).unwrap(); - let mut split_dwarf_loader = addr2line::builtin_split_dwarf_loader::SplitDwarfLoader::new( - |data, endian| gimli::EndianArcSlice::new(Arc::from(&*data), endian), - None, - ); + let path = std::env::current_exe().unwrap(); + let ctx = Loader::new(&path).unwrap(); + let module_base = ctx.relative_address_base(); let mut bias = None; TargetSharedLibrary::each(|lib| { @@ -80,8 +19,7 @@ fn correctness() { let mut test = |sym: u64, expected_prefix: &str| { let ip = sym.wrapping_sub(bias.unwrap()); - let frames = ctx.find_frames(ip); - let frames = split_dwarf_loader.run(frames).unwrap(); + let frames = ctx.find_frames(ip).unwrap(); let frame = frames.last().unwrap().unwrap(); let name = frame.function.as_ref().unwrap().demangle().unwrap(); // Old rust versions generate DWARF with wrong linkage name, @@ -111,17 +49,9 @@ fn test_function() { #[test] fn zero_function() { - let map = find_debuginfo(); - let file = &object::File::parse(&*map).unwrap(); - let ctx = Context::new(file).unwrap(); + let path = std::env::current_exe().unwrap(); + let ctx = Loader::new(&path).unwrap(); for probe in 0..10 { - assert!( - ctx.find_frames(probe) - .skip_all_loads() - .unwrap() - .count() - .unwrap() - < 10 - ); + assert!(ctx.find_frames(probe).unwrap().count().unwrap() < 10); } } diff --git a/tests/parse.rs b/tests/parse.rs index 2e0c328..b699ad9 100644 --- a/tests/parse.rs +++ b/tests/parse.rs @@ -50,16 +50,7 @@ fn dwarf_borrow<'a>( } #[test] -fn parse_base_rc() { - let target = release_fixture_path(); - - with_file(&target, |file| { - addr2line::ObjectContext::new(file).unwrap(); - }); -} - -#[test] -fn parse_base_slice() { +fn parse_base() { let target = release_fixture_path(); with_file(&target, |file| { @@ -70,17 +61,7 @@ fn parse_base_slice() { } #[test] -fn parse_lines_rc() { - let target = release_fixture_path(); - - with_file(&target, |file| { - let context = addr2line::ObjectContext::new(file).unwrap(); - context.parse_lines().unwrap(); - }); -} - -#[test] -fn parse_lines_slice() { +fn parse_lines() { let target = release_fixture_path(); with_file(&target, |file| { @@ -92,17 +73,7 @@ fn parse_lines_slice() { } #[test] -fn parse_functions_rc() { - let target = release_fixture_path(); - - with_file(&target, |file| { - let context = addr2line::ObjectContext::new(file).unwrap(); - context.parse_functions().unwrap(); - }); -} - -#[test] -fn parse_functions_slice() { +fn parse_functions() { let target = release_fixture_path(); with_file(&target, |file| {