Skip to content

Commit

Permalink
Initializes parser for Itanium symbol
Browse files Browse the repository at this point in the history
  • Loading branch information
ultimaweapon committed Oct 13, 2024
1 parent d854e41 commit 329ab4f
Show file tree
Hide file tree
Showing 4 changed files with 142 additions and 6 deletions.
2 changes: 1 addition & 1 deletion macros/src/cpp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ pub fn render(items: Declarations) -> syn::Result<TokenStream> {
fn render_class(item: Class) -> syn::Result<TokenStream> {
// Get metadata.
let class = item.name;
let meta = match META.get_type("_ZN7cppbind9type_infoI6class1E4sizeE") {
let meta = match META.get_type(class.to_string()) {
Some(v) => v,
None => return Err(Error::new_spanned(class, "type_info not found")),
};
Expand Down
27 changes: 25 additions & 2 deletions macros/src/meta.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
pub use self::ty::*;

use crate::symbol::{Segment, Symbol, TemplateArg};
use memmap2::Mmap;
use object::read::archive::ArchiveFile;
use std::collections::HashMap;
Expand Down Expand Up @@ -32,12 +33,34 @@ impl Metadata {
// Parse symbols.
for sym in symbols {
let sym = sym.map_err(MetadataError::ReadSymbolFailed)?;
let sym = match std::str::from_utf8(sym.name()) {
let sym = match Symbol::parse(sym.name()) {
Ok(v) => v,
Err(_) => continue, // Ignore unknown symbol.
};

types.insert(sym.to_owned(), TypeInfo::new());
// Check namespace.
let mut iter = sym.name().iter();
let ns = iter.next();

if !ns.is_some_and(|s| *s == Segment::Ident("cppbind".into())) {
continue;
}

// Check metadata type.
let ty = iter.next();

if !ty.is_some_and(|s| *s == Segment::Ident("type_info".into())) {
continue;
}

// Get class name.
let ty = iter.next().expect("invalid cppbind::type_info definition");
let class = match ty {
Segment::Ident(_) => panic!("invalid argument for cppbind::type_info"),
Segment::TemplateArg(TemplateArg::Ident(v)) => v,
};

types.insert(class.clone().into_owned(), TypeInfo::new());
}

Ok(Self { types })
Expand Down
37 changes: 34 additions & 3 deletions macros/src/symbol.rs
Original file line number Diff line number Diff line change
@@ -1,14 +1,45 @@
use std::borrow::Cow;
use thiserror::Error;

mod itanium;

/// C++ symbol.
pub struct Symbol {}
pub struct Symbol {
name: Vec<Segment<'static>>,
}

impl Symbol {
pub fn parse(mangled: impl AsRef<[u8]>) -> Result<Self, SymbolError> {
Ok(Self {})
let mangled = mangled.as_ref();

if mangled.starts_with(b"_Z") {
self::itanium::parse(&mangled[2..])
} else {
todo!()
}
}

pub fn name(&self) -> &[Segment] {
&self.name
}
}

/// Segment of a C++ name.
#[derive(PartialEq, Eq)]
pub enum Segment<'a> {
Ident(Cow<'a, str>),
TemplateArg(TemplateArg<'a>),
}

/// Argument of a template instantiation.
#[derive(PartialEq, Eq)]
pub enum TemplateArg<'a> {
Ident(Cow<'a, str>),
}

/// Represents an error when [`Symbol`] fails to parse from a mangled name.
#[derive(Debug, Error)]
pub enum SymbolError {}
pub enum SymbolError {
#[error("unknown symbol")]
UnknownSymbol,
}
82 changes: 82 additions & 0 deletions macros/src/symbol/itanium.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
use super::{Segment, Symbol, SymbolError, TemplateArg};
use std::cmp::min;
use std::iter::Peekable;
use std::slice::Iter;

pub fn parse(mangled: &[u8]) -> Result<Symbol, SymbolError> {
let mut name = Vec::new();
let mut iter = mangled.iter().peekable();

match *iter.next().ok_or(SymbolError::UnknownSymbol)? {
b'N' => parse_nested_name(&mut name, &mut iter)?,
_ => return Err(SymbolError::UnknownSymbol),
}

Ok(Symbol { name })
}

fn parse_nested_name(
segments: &mut Vec<Segment>,
iter: &mut Peekable<Iter<u8>>,
) -> Result<(), SymbolError> {
loop {
let b = *iter.next().ok_or(SymbolError::UnknownSymbol)?;

match b {
b'0' => return Err(SymbolError::UnknownSymbol), // Identifier with zero length?
b'1'..=b'9' => segments.push(Segment::Ident(parse_source_name(iter, b)?.into())),
b'I' => parse_template_args(segments, iter)?,
b'E' => break,
_ => return Err(SymbolError::UnknownSymbol),
}
}

Ok(())
}

fn parse_source_name(iter: &mut Peekable<Iter<u8>>, first: u8) -> Result<String, SymbolError> {
// Get length.
let mut len = Into::<usize>::into(first - b'0');

while let Some(&b) = iter.next_if(|b| b.is_ascii_digit()) {
len = len
.checked_mul(10)
.and_then(move |v| v.checked_add((b - b'0').into()))
.ok_or(SymbolError::UnknownSymbol)?;
}

// This ABI does not yet specify a mangling for identifiers containing characters outside of
// _A-Za-z0-9.
let mut name = String::with_capacity(min(len, 128));

for _ in 0..len {
// We don't need to handle unicode here due to the above rule.
let b = *iter.next().ok_or(SymbolError::UnknownSymbol)?;

name.push(b.into());
}

Ok(name)
}

fn parse_template_args(
segments: &mut Vec<Segment>,
iter: &mut Peekable<Iter<u8>>,
) -> Result<(), SymbolError> {
loop {
let b = *iter.next().ok_or(SymbolError::UnknownSymbol)?;
let a = match b {
b'E' => break,
b'0' => return Err(SymbolError::UnknownSymbol), // Identifier with zero length?
b'1'..=b'9' => TemplateArg::Ident(parse_source_name(iter, b)?.into()),
b'X' => todo!("expression"),
b'L' => todo!("simple expressions"),
b'J' => todo!("argument pack"),
_ => todo!(),
};

segments.push(Segment::TemplateArg(a));
}

Ok(())
}

0 comments on commit 329ab4f

Please sign in to comment.