Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ghidra symbol importer upgrades #86

Merged
merged 11 commits into from
Jan 21, 2024
7 changes: 7 additions & 0 deletions tools/batch-demangle/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions tools/batch-demangle/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@ edition = "2021"
[dependencies]
clap = { version = "4.4.18", features = ["derive"] }
cwdemangle = { git = "https://github.com/encounter/cwdemangle", version = "0.1.7" }
lazy_static = "1.4.0"
140 changes: 132 additions & 8 deletions tools/batch-demangle/src/main.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
use clap::Parser;
use cwdemangle::*;
use std::fs::read_to_string;
use std::collections::hash_set::HashSet;

#[macro_use]
extern crate lazy_static;

#[derive(Parser)]
struct Args {
Expand All @@ -12,18 +16,138 @@ fn main() {
let f = read_to_string(args.input).unwrap();

for line in f.lines() {
let parts = line.split(" ").collect::<Vec<_>>();
let sym = parts[0];
let demangled = match demangle(parts[0], &DemangleOptions {
// Symbol info: symbol = section:0x<address>; // type:<type> [flags...]
let (sym, remaining) = line.split_once(" = ").unwrap();
let (_section, remaining) = remaining.split_once(':').unwrap();
let (addr, _remaining) = remaining.split_once("; // ").unwrap();

match demangle(sym, &DemangleOptions {
omit_empty_parameters: false
}) {
Some(x) => x,
None => "@@@@@".into()
Some(demangled) => {
if !demangled.contains('(') {
// Variable: [namespace::]name
let (namespace, name) = split_namespace(demangled.as_str());
println!("{addr}|||{sym}|||{demangled}|||{namespace}|||{name}");
} else {
// Function: [return-type] [namespace::]name([parameter_types]) [cv-qualifier]
let (prolog, _parameters, _epilog) = split_function(demangled.as_str());

let (_return_type, qualified_name) = split_return(prolog);
let (namespace, name) = split_namespace(qualified_name);

println!("{addr}|||{sym}|||{demangled}|||{namespace}|||{name}");
}
},
None => {
println!("{addr}|||{sym}");
}
};
}
}

fn split_function<'a>(demangled_fn: &'a str) -> (&'a str, &'a str, &'a str) {
// Search from the end by depth, to handle function pointer types correctly
let mut parentheses_depth = 0;
for (index, value) in demangled_fn.rmatch_indices(&['(', ')']) {
parentheses_depth += match value {
// We're searching in reverse, so closing parentheses increase our depth
"(" => -1,
")" => 1,
_ => 0
};

let addr_idx = parts[2].find("0x").unwrap();
let addr = &parts[2][addr_idx..addr_idx + 10];
if parentheses_depth == 0 {
let parameter_start = index;
let parameter_end = demangled_fn.rfind(')').unwrap();
let (prolog, remaining) = demangled_fn.split_at(parameter_start);
let (parameters, epilog) = remaining.split_at(parameter_end - parameter_start + 1);
return (prolog, parameters, epilog);
}
}

if !demangled_fn.contains('(') {
panic!("Non-function symbol {demangled_fn} passed to split_function!");
} else {
unreachable!("This branch indicates mismatched parentheses, which would be a demangler bug");
}
}

fn split_return<'a>(prolog: &'a str) -> (&'a str, &'a str) {
const PATTERN: &str = " ";
match prolog.find(PATTERN) {
Some(space_index) => match prolog.find('<') {
// Template args, make sure space doesn't come from there
Some(template_index) => match space_index < template_index {
true => {
let (ret, _) = prolog.split_at(space_index);
let (_, qualified) = prolog.split_at(space_index + PATTERN.len());
(ret, qualified)
},
false => ("", prolog)
},
None => {
let (ret, _) = prolog.split_at(space_index);
let (_, qualified) = prolog.split_at(space_index + PATTERN.len());
(ret, qualified)
}
},
None => ("", prolog)
}
}

lazy_static! {
static ref NON_TEMPLATE_NAMES: HashSet<&'static str> = HashSet::from_iter([
"operator<",
"operator>",
"operator<<",
"operator>>",
"operator>>=",
"operator<<=",
"operator<=",
"operator>=",
"operator->*",
"operator->",
]);
}

fn split_namespace(prolog: &str) -> (String, String) {
const PATTERN: &str = "::";

println!("{sym}|||{demangled}|||{addr}");
let mut namespaces = Vec::<String>::new();

// Ensure template arguments aren't split up
let mut template_depth = 0;
let mut template_buffer = String::new();
for split in prolog.split(PATTERN) {
// Ensure special names that contain <> but aren't templates are added as-is
if NON_TEMPLATE_NAMES.contains(split) {
namespaces.push(split.to_string());
continue;
}

template_depth += split.matches('<').count();

if template_depth > 0 {
template_buffer += split;

template_depth -= split.matches('>').count();
if template_depth < 1 {
namespaces.push(template_buffer);
template_buffer = String::new();
} else {
// Keep namespace delimiter in the final namespace
template_buffer += PATTERN;
}
} else {
namespaces.push(split.to_string());
}
}

assert!(template_buffer.is_empty());

// Split off name, and re-join namespaces with a padded separator to make later splitting easier
let name = namespaces.pop().unwrap();
let namespaces = namespaces.join(" :: ");
return (namespaces, name);
}
Loading