Skip to content

Commit

Permalink
Ghidra symbol importer upgrades (#86)
Browse files Browse the repository at this point in the history
* Only output demangled name on success

* Clean up parsing of symbol info

* Split symbol namespaces and names

* Don't treat operators with <> as templates

* Handle function pointer types correctly when splitting demangled functions

* Fix namespace handling in import script

* Slight bit of script cleanup
- Tidy up imports
- Use SourceType explicitly in code
- Add a couple comments about what's happening

* Convert namespaces to classes where possible

* Remove duplicated symbols.txt from batch-demangle folder

* Clean up label creation

* Remove spaces in template parameters
  • Loading branch information
TheNathannator authored Jan 21, 2024
1 parent ce67a24 commit 8e2f5a4
Show file tree
Hide file tree
Showing 5 changed files with 193 additions and 85,919 deletions.
7 changes: 7 additions & 0 deletions tools/batch-demangle/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions tools/batch-demangle/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@ edition = "2021"
[dependencies]
clap = { version = "4.4.18", features = ["derive"] }
cwdemangle = { git = "https://github.com/encounter/cwdemangle", version = "0.1.7" }
lazy_static = "1.4.0"
140 changes: 132 additions & 8 deletions tools/batch-demangle/src/main.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
use clap::Parser;
use cwdemangle::*;
use std::fs::read_to_string;
use std::collections::hash_set::HashSet;

#[macro_use]
extern crate lazy_static;

#[derive(Parser)]
struct Args {
Expand All @@ -12,18 +16,138 @@ fn main() {
let f = read_to_string(args.input).unwrap();

for line in f.lines() {
let parts = line.split(" ").collect::<Vec<_>>();
let sym = parts[0];
let demangled = match demangle(parts[0], &DemangleOptions {
// Symbol info: symbol = section:0x<address>; // type:<type> [flags...]
let (sym, remaining) = line.split_once(" = ").unwrap();
let (_section, remaining) = remaining.split_once(':').unwrap();
let (addr, _remaining) = remaining.split_once("; // ").unwrap();

match demangle(sym, &DemangleOptions {
omit_empty_parameters: false
}) {
Some(x) => x,
None => "@@@@@".into()
Some(demangled) => {
if !demangled.contains('(') {
// Variable: [namespace::]name
let (namespace, name) = split_namespace(demangled.as_str());
println!("{addr}|||{sym}|||{demangled}|||{namespace}|||{name}");
} else {
// Function: [return-type] [namespace::]name([parameter_types]) [cv-qualifier]
let (prolog, _parameters, _epilog) = split_function(demangled.as_str());

let (_return_type, qualified_name) = split_return(prolog);
let (namespace, name) = split_namespace(qualified_name);

println!("{addr}|||{sym}|||{demangled}|||{namespace}|||{name}");
}
},
None => {
println!("{addr}|||{sym}");
}
};
}
}

fn split_function<'a>(demangled_fn: &'a str) -> (&'a str, &'a str, &'a str) {
// Search from the end by depth, to handle function pointer types correctly
let mut parentheses_depth = 0;
for (index, value) in demangled_fn.rmatch_indices(&['(', ')']) {
parentheses_depth += match value {
// We're searching in reverse, so closing parentheses increase our depth
"(" => -1,
")" => 1,
_ => 0
};

let addr_idx = parts[2].find("0x").unwrap();
let addr = &parts[2][addr_idx..addr_idx + 10];
if parentheses_depth == 0 {
let parameter_start = index;
let parameter_end = demangled_fn.rfind(')').unwrap();
let (prolog, remaining) = demangled_fn.split_at(parameter_start);
let (parameters, epilog) = remaining.split_at(parameter_end - parameter_start + 1);
return (prolog, parameters, epilog);
}
}

if !demangled_fn.contains('(') {
panic!("Non-function symbol {demangled_fn} passed to split_function!");
} else {
unreachable!("This branch indicates mismatched parentheses, which would be a demangler bug");
}
}

fn split_return<'a>(prolog: &'a str) -> (&'a str, &'a str) {
const PATTERN: &str = " ";
match prolog.find(PATTERN) {
Some(space_index) => match prolog.find('<') {
// Template args, make sure space doesn't come from there
Some(template_index) => match space_index < template_index {
true => {
let (ret, _) = prolog.split_at(space_index);
let (_, qualified) = prolog.split_at(space_index + PATTERN.len());
(ret, qualified)
},
false => ("", prolog)
},
None => {
let (ret, _) = prolog.split_at(space_index);
let (_, qualified) = prolog.split_at(space_index + PATTERN.len());
(ret, qualified)
}
},
None => ("", prolog)
}
}

lazy_static! {
static ref NON_TEMPLATE_NAMES: HashSet<&'static str> = HashSet::from_iter([
"operator<",
"operator>",
"operator<<",
"operator>>",
"operator>>=",
"operator<<=",
"operator<=",
"operator>=",
"operator->*",
"operator->",
]);
}

fn split_namespace(prolog: &str) -> (String, String) {
const PATTERN: &str = "::";

println!("{sym}|||{demangled}|||{addr}");
let mut namespaces = Vec::<String>::new();

// Ensure template arguments aren't split up
let mut template_depth = 0;
let mut template_buffer = String::new();
for split in prolog.split(PATTERN) {
// Ensure special names that contain <> but aren't templates are added as-is
if NON_TEMPLATE_NAMES.contains(split) {
namespaces.push(split.to_string());
continue;
}

template_depth += split.matches('<').count();

if template_depth > 0 {
template_buffer += split;

template_depth -= split.matches('>').count();
if template_depth < 1 {
namespaces.push(template_buffer);
template_buffer = String::new();
} else {
// Keep namespace delimiter in the final namespace
template_buffer += PATTERN;
}
} else {
namespaces.push(split.to_string());
}
}

assert!(template_buffer.is_empty());

// Split off name, and re-join namespaces with a padded separator to make later splitting easier
let name = namespaces.pop().unwrap();
let namespaces = namespaces.join(" :: ");
return (namespaces, name);
}
Loading

0 comments on commit 8e2f5a4

Please sign in to comment.