From efe52e3da2038a8b9e9cb07ce74b8db8b0089990 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joel=20H=C3=B6ner?= Date: Tue, 23 May 2023 01:31:32 +0200 Subject: [PATCH] wip --- src/encoder.rs | 602 +++++++++++++++++++++++++++++++++++++++++++++ src/enums.rs | 18 ++ src/ffi/encoder.rs | 14 +- src/lib.rs | 4 + src/status.rs | 1 + 5 files changed, 632 insertions(+), 7 deletions(-) create mode 100644 src/encoder.rs diff --git a/src/encoder.rs b/src/encoder.rs new file mode 100644 index 0000000..f3f888c --- /dev/null +++ b/src/encoder.rs @@ -0,0 +1,602 @@ +use crate::{ffi, *}; +use core::{ + mem::{self, ManuallyDrop, MaybeUninit}, + ops::Deref, +}; + +/// Workaround for missing `const fn` in `core::mem::zeroed`. +/// +/// Concept borrowed from `const_zero` crate. +macro_rules! zeroed { + ($ty:ty) => {{ + union TypeOrArray { + raw: [u8; mem::size_of::<$ty>()], + struc: mem::ManuallyDrop<$ty>, + } + ManuallyDrop::<$ty>::into_inner( + TypeOrArray { + raw: [0; mem::size_of::<$ty>()], + } + .struc, + ) + }}; +} + +#[repr(transparent)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct EncoderRequest(ffi::EncoderRequest); + +impl Deref for EncoderRequest { + type Target = ffi::EncoderRequest; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl EncoderRequest { + pub const fn new32(mnemonic: Mnemonic) -> Self { + Self::new(MachineMode::LONG_COMPAT_32, mnemonic) + } + + pub const fn new64(mnemonic: Mnemonic) -> Self { + Self::new(MachineMode::LONG_64, mnemonic) + } + + /// Create a new encoder request from scratch. + pub const fn new(machine_mode: MachineMode, mnemonic: Mnemonic) -> Self { + let mut request = unsafe { zeroed!(ffi::EncoderRequest) }; + request.machine_mode = machine_mode; + request.mnemonic = mnemonic; + Self(request) + } + + /// Sets the prefixes. + /// + /// Prefixes are simply represented using the corresponding instruction + /// attributes. So e.g. if you wish to add a `GS` segment prefix, specify + /// [`InstructionAttributes::HAS_SEGMENT_CS`]. + pub const fn set_prefixes(mut self, prefixes: InstructionAttributes) -> Self { + self.0.prefixes = prefixes; + self + } + + /// Sets the branch type. + /// + /// Required for branching instructions only. The default of + /// `ZYDIS_BRANCH_TYPE_NONE` lets the encoder pick size-optimal branch type + /// automatically (`short` and `near` are prioritized over `far`). + pub const fn set_branch_type(mut self, branch_type: BranchType) -> Self { + self.0.branch_type = branch_type; + self + } + + /// Sets the branch width. + /// + /// Specifies physical size for relative immediate operands. Use + /// `ZYDIS_BRANCH_WIDTH_NONE` to let encoder pick size-optimal branch width + /// automatically. For segment:offset `far` branches this field applies to + /// physical size of the offset part. For branching instructions without + /// relative operands this field affects effective operand size attribute. + pub const fn set_branch_width(mut self, branch_width: BranchWidth) -> Self { + self.0.branch_width = branch_width; + self + } + + /// Sets the address size hint. + pub const fn set_address_size_hint(mut self, address_size_hint: AddressSizeHint) -> Self { + self.0.address_size_hint = address_size_hint; + self + } + + /// Sets the operand size hint. + pub const fn set_operand_size_hint(mut self, operand_size_hint: OperandSizeHint) -> Self { + self.0.operand_size_hint = operand_size_hint; + self + } + + /// Gets a slice of the operands. + pub const fn operands(&self) -> &[EncoderOperand] { + unsafe { + core::slice::from_raw_parts( + self.0.operands.as_ptr() as *const EncoderOperand, + self.0.operand_count as usize, + ) + } + } + + /// Gets a mutable slice of the operands. + pub fn operands_mut(&mut self) -> &mut [EncoderOperand] { + unsafe { + core::slice::from_raw_parts_mut( + self.0.operands.as_mut_ptr() as *mut EncoderOperand, + self.0.operand_count as usize, + ) + } + } + + /// Adds an operand to the request. + /// + /// # Panics + /// + /// If the operand count exceeds [`ENCODER_MAX_OPERANDS`]. + pub fn add_operand(mut self, op: impl Into) -> Self { + assert!( + self.0.operand_count < ENCODER_MAX_OPERANDS as _, + "too many operands" + ); + self.0.operands[self.0.operand_count as usize] = op.into().0; + self.0.operand_count += 1; + self + } + + /// Clears the operand list. + pub const fn clear_operands(mut self) -> Self { + self.0.operand_count = 0; + self + } + + /// Replaces the operand at the given index. + /// + /// # Panics + /// + /// If the index was not previously populated. + pub fn replace_operand(mut self, idx: usize, new: impl Into) -> Self { + assert!( + idx < self.0.operand_count as _, + "operand index out of bounds" + ); + self.0.operands[idx] = new.into().0; + self + } + + /// Encodes the instruction into the given buffer. + pub fn encode_into(&self, buf: &mut [u8]) -> Result { + unsafe { + let mut length = buf.len(); + ffi::ZydisEncoderEncodeInstruction(&self.0, buf.as_ptr() as _, &mut length) + .as_result()?; + Ok(length) + } + } + + /// Encodes the instruction into a new buffer. + pub fn encode(&self) -> Result> { + let mut out = vec![0; MAX_INSTRUCTION_LENGTH]; + let length = self.encode_into(&mut out[..])?; + out.resize(length, 0); + Ok(out) + } +} + +impl From>> for EncoderRequest { + fn from(instr: Instruction>) -> Self { + unsafe { + let mut request = MaybeUninit::uninit(); + ffi::ZydisEncoderDecodedInstructionToEncoderRequest( + &*instr, + instr.operands().as_ptr(), + instr.operands().len() as _, + request.as_mut_ptr(), + ) + .as_result() + .expect( + "our rust wrapper for instructions is immutable and unchanged decoded \ + instructions should always be convertible", + ); + Self(request.assume_init()) + } + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[repr(transparent)] +pub struct EncoderOperand(ffi::EncoderOperand); + +impl EncoderOperand { + #[doc(hidden)] // needed in `mem!` macro + pub const ZERO_MEM: ffi::OperandMemory = unsafe { zeroed!(ffi::OperandMemory) }; + const ZERO_PTR: ffi::OperandPointer = unsafe { zeroed!(ffi::OperandPointer) }; + const ZERO_REG: ffi::OperandRegister = unsafe { zeroed!(ffi::OperandRegister) }; + + /// Creates a new register operand. + pub const fn reg(reg: Register) -> Self { + Self(ffi::EncoderOperand { + ty: OperandType::REGISTER, + reg: ffi::OperandRegister { + value: reg as _, + is4: false, + }, + mem: Self::ZERO_MEM, + ptr: Self::ZERO_PTR, + imm: 0, + }) + } + + /// Creates a new `[disp]` memory operand. + /// + /// Note that only very few instructions actually accept a full 64-bit + /// displacement. You'll typically only be able to use the lower 32 bits + /// or encoding will fail. + pub const fn mem_abs(size_bytes: u16, abs_disp: u64) -> Self { + Self::mem_custom(ffi::OperandMemory { + displacement: abs_disp as i64, + size: size_bytes, + ..Self::ZERO_MEM + }) + } + + /// Creates a new `[reg + disp]` memory operand. + pub const fn mem_base_disp(size_bytes: u16, base: Register, disp: i32) -> Self { + Self::mem_custom(ffi::OperandMemory { + base, + displacement: disp as _, + size: size_bytes, + ..Self::ZERO_MEM + }) + } + + /// Creates a new `[scale * index]` memory operand. + /// + /// Scale can only be 1, 2, 4, or 8. + pub const fn mem_index_scale(size_bytes: u16, index: Register, scale: u8) -> Self { + Self::mem_custom(ffi::OperandMemory { + index, + scale, + size: size_bytes, + ..Self::ZERO_MEM + }) + } + + /// Creates a custom new memory operand. + pub const fn mem_custom(mem: ffi::OperandMemory) -> Self { + Self(ffi::EncoderOperand { + ty: OperandType::MEMORY, + reg: Self::ZERO_REG, + mem, + ptr: Self::ZERO_PTR, + imm: 0, + }) + } + + /// Creates a new pointer operand. + pub const fn ptr(segment: u16, offset: u32) -> Self { + Self(ffi::EncoderOperand { + ty: OperandType::POINTER, + reg: Self::ZERO_REG, + mem: Self::ZERO_MEM, + ptr: ffi::OperandPointer { segment, offset }, + imm: 0, + }) + } + + /// Creates a new immediate operand (unsigned). + pub const fn imm(imm: u64) -> Self { + Self(ffi::EncoderOperand { + ty: OperandType::IMMEDIATE, + reg: Self::ZERO_REG, + mem: Self::ZERO_MEM, + ptr: Self::ZERO_PTR, + imm, + }) + } + + /// Creates a new immediate operand (signed). + pub const fn imm_signed(imm: i64) -> Self { + Self::imm(imm as _) + } +} + +impl Deref for EncoderOperand { + type Target = ffi::EncoderOperand; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl From for EncoderOperand { + fn from(reg: Register) -> Self { + Self::reg(reg) + } +} + +impl From for EncoderOperand { + fn from(imm: u64) -> Self { + Self::imm(imm) + } +} + +impl From for EncoderOperand { + fn from(imm: i64) -> Self { + Self::imm_signed(imm) + } +} + +#[doc(hidden)] +pub mod mem_macro_plumbing { + pub enum DispOrBase { + Disp(i64), + Base(crate::Register), + } + + impl From for DispOrBase { + fn from(disp: i64) -> DispOrBase { + DispOrBase::Disp(disp) + } + } + + impl From for DispOrBase { + fn from(base: crate::Register) -> DispOrBase { + DispOrBase::Base(base) + } + } +} + +/// Macro for creating memory operands. +/// +/// # Example +/// +/// ```rust +/// # use zydis::*; +/// // Literal (constant) operands. +/// mem!(qword ptr [0x1234]); +/// mem!(dword ptr [RAX + 0x1234]); +/// mem!(dword ptr [RSI * 8]); +/// mem!(dword ptr [RDX + RSI * 8]); +/// mem!(dword ptr [RDX + RSI * 8 + 0x1234]); +/// +/// // Expression (dynamic) operands (must use parenthesis!). +/// let my_dyn_disp = 0x1234 + 837434; +/// let my_dyn_reg = Register::RBX; +/// mem!(qword ptr [(my_dyn_disp)]); +/// mem!(qword ptr [(my_dyn_reg)]); +/// mem!(qword ptr [(my_dyn_reg) * 4]); +/// mem!(qword ptr [(my_dyn_reg) * 4 + (my_dyn_disp)]); +/// ``` +#[macro_export] +macro_rules! mem { + ($size:tt ptr [ $($tail:tt)* ]) => {{ + let mut x = $crate::EncoderOperand::ZERO_MEM.clone(); + x.size = mem!(@size $size); + mem!(@base_or_disp x $($tail)*); + $crate::EncoderOperand::mem_custom(x) + }}; + + (@size byte) => { 8/8 }; + (@size word) => { 16/8 }; + (@size dword) => { 32/8 }; + (@size fword) => { 48/8 }; + (@size qword) => { 64/8 }; + (@size qword) => { 80/8 }; + (@size xmmword) => { 128/8 }; + (@size ymmword) => { 256/8 }; + (@size zmmword) => { 512/8 }; + (@size $x:tt) => { compile_error!(concat!("bad operand size: ", stringify!($x))) }; + + (@base_or_disp $x:ident $disp:literal) => { + $x.displacement = $disp; + }; + (@base_or_disp $x:ident ($base_or_disp:expr)) => { + let y: $crate::mem_macro_plumbing::DispOrBase = $base_or_disp.into(); + match y { + $crate::mem_macro_plumbing::DispOrBase::Disp(disp) => { + $x.displacement = disp; + } + $crate::mem_macro_plumbing::DispOrBase::Base(base) => { + $x.base = base; + } + } + }; + (@base_or_disp $x:ident $base:ident $($tail:tt)*) => { + $x.base = $crate::Register::$base; + mem!(@index_or_disp_or_scale $x $($tail)*); + }; + (@base_or_disp $x:ident ($base:expr) $($tail:tt)*) => { + $x.base = $base; + mem!(@index_or_disp_or_scale $x $($tail)*); + }; + + (@index_or_disp_or_scale $x:ident) => {}; + (@index_or_disp_or_scale $x:ident + $disp:literal) => { + $x.displacement = $disp; + }; + (@index_or_disp_or_scale $x:ident + ($disp:expr)) => { + $x.displacement = $disp; + }; + (@index_or_disp_or_scale $x:ident + $index:ident $($tail:tt)*) => { + $x.index = $crate::Register::$index; + mem!(@scale_or_disp $x $($tail)*); + }; + (@index_or_disp_or_scale $x:ident + ($index:expr) $($tail:tt)*) => { + $x.index = $index; + mem!(@scale_or_disp $x $($tail)*); + }; + (@index_or_disp_or_scale $x:ident * $scale:literal $($tail:tt)*) => { + $x.index = $x.base; + $x.base = $crate::Register::NONE; + $x.scale = $scale; + mem!(@scale_or_disp $x $($tail)*); + }; + (@index_or_disp_or_scale $x:ident * ($scale:expr) $($tail:tt)*) => { + $x.index = $x.base; + $x.base = $crate::Register::NONE; + $x.scale = $scale; + mem!(@scale_or_disp $x $($tail)*); + }; + + (@scale_or_disp $x:ident) => {}; + (@scale_or_disp $x:ident + $disp:literal) => { + $x.displacement = $disp; + }; + (@scale_or_disp $x:ident + ($disp:expr)) => { + $x.displacement = $disp; + }; + (@scale_or_disp $x:ident * $scale:literal $($tail:tt)*) => { + $x.scale = $scale; + mem!(@disp $x $($tail)*); + }; + (@scale_or_disp $x:ident * ($scale:expr) $($tail:tt)*) => { + $x.scale = $scale; + mem!(@disp $x $($tail)*); + }; + + (@disp $x:ident) => {}; + (@disp $x:ident + $disp:literal) => { + $x.displacement = $disp; + }; + (@disp $x:ident + ($disp:expr)) => { + $x.displacement = $disp; + }; +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn mem_macro() { + type EO = EncoderOperand; + type R = Register; + + assert_eq!(mem!(dword ptr [0x1337]), EO::mem_abs(4, 0x1337)); + assert_eq!(mem!(qword ptr [RAX]), EO::mem_base_disp(8, R::RAX, 0)); + assert_eq!( + mem!(qword ptr [RDX + 0x1234]), + EO::mem_base_disp(8, R::RDX, 0x1234) + ); + assert_eq!( + mem!(qword ptr [RAX + RDX]), + EO::mem_custom(ffi::OperandMemory { + size: 8, + base: R::RAX, + index: R::RDX, + ..EncoderOperand::ZERO_MEM + }) + ); + assert_eq!( + mem!(qword ptr [RAX + RDX * 2]), + EO::mem_custom(ffi::OperandMemory { + size: 8, + base: R::RAX, + index: R::RDX, + scale: 2, + ..EncoderOperand::ZERO_MEM + }) + ); + assert_eq!( + mem!(qword ptr [RAX + RDX * 2 + 0x8282]), + EO::mem_custom(ffi::OperandMemory { + size: 8, + base: R::RAX, + index: R::RDX, + scale: 2, + displacement: 0x8282, + }) + ); + assert_eq!(mem!(qword ptr [RAX * 4]), EO::mem_index_scale(8, R::RAX, 4)); + assert_eq!( + mem!(qword ptr [RAX * 4 + 0x234]), + EO::mem_custom(ffi::OperandMemory { + size: 8, + index: R::RAX, + scale: 4, + displacement: 0x234, + ..EncoderOperand::ZERO_MEM + }) + ); + assert_eq!( + mem!(dword ptr [(Register::RDI)]), + EO::mem_base_disp(4, R::RDI, 0) + ); + assert_eq!(mem!(dword ptr [(0x1337 + 8)]), EO::mem_abs(4, 0x1337 + 8)); + assert_eq!( + mem!(qword ptr [RAX * (2 + 2) + 0x234]), + EO::mem_custom(ffi::OperandMemory { + size: 8, + index: R::RAX, + scale: 4, + displacement: 0x234, + ..EncoderOperand::ZERO_MEM + }) + ); + assert_eq!( + mem!(qword ptr [RAX * 2 + (0x234 + 22)]), + EO::mem_custom(ffi::OperandMemory { + size: 8, + index: R::RAX, + scale: 2, + displacement: 0x234 + 22, + ..EncoderOperand::ZERO_MEM + }) + ); + assert_eq!( + mem!(dword ptr [RAX + (Register::RCX) * 8 + 888]), + EO::mem_custom(ffi::OperandMemory { + size: 4, + base: R::RAX, + index: R::RCX, + scale: 8, + displacement: 888 + }) + ); + assert_eq!( + mem!(xmmword ptr [ RAX + (Register::RDX) * (1 + 1) + (0x123 + 0x33) ]), + EO::mem_custom(ffi::OperandMemory { + size: 128 / 8, + base: R::RAX, + index: R::RDX, + scale: 2, + displacement: 0x123 + 0x33, + }) + ); + assert_eq!( + mem!(byte ptr [ (Register::RSI) + (Register::RDX) * (1 + 1) + (0x123 + 0x33) ]), + EO::mem_custom(ffi::OperandMemory { + size: 1, + base: R::RSI, + index: R::RDX, + scale: 2, + displacement: 0x123 + 0x33, + }) + ); + } + + #[test] + fn encode_int3() { + let req = EncoderRequest::new64(Mnemonic::INT3); + let enc = req.encode().unwrap(); + assert_eq!(enc, vec![0xCC]); + } + + #[test] + fn encode_mov() { + let mov = EncoderRequest::new64(Mnemonic::MOV) + .add_operand(Register::RAX) + .add_operand(0x1337u64) + .encode() + .unwrap(); + + assert_eq!(mov, b"\x48\xC7\xC0\x37\x13\x00\x00"); + } + + #[test] + fn reencode() { + let cmp = b"\x48\x81\x78\x7B\x41\x01\x00\x00"; + let dec = Decoder::new64(); + + let insn = dec.decode_first::(cmp).unwrap().unwrap(); + assert_eq!(insn.to_string(), "cmp qword ptr [rax+0x7B], 0x141"); + + let enc = EncoderRequest::from(insn) + .set_prefixes(InstructionAttributes::HAS_SEGMENT_FS) + .replace_operand(0, mem!(qword ptr [RDX + 0xB7])) + .replace_operand(1, 0x1337u64) + .encode() + .unwrap(); + assert_eq!(enc, b"\x64\x48\x81\xBA\xB7\x00\x00\x00\x37\x13\x00\x00"); + + let redec = dec.decode_first::(&enc).unwrap().unwrap(); + assert_eq!(redec.to_string(), "cmp qword ptr fs:[rdx+0xB7], 0x1337"); + } +} diff --git a/src/enums.rs b/src/enums.rs index 3be4171..64cb4a0 100644 --- a/src/enums.rs +++ b/src/enums.rs @@ -25,6 +25,24 @@ pub(crate) const MAX_INSTRUCTION_SEGMENT_COUNT: usize = 9; /// Maximum number of encoder operands. pub const ENCODER_MAX_OPERANDS: usize = 5; +/// Combination of all user-encodable prefixes. +pub const ENCODABLE_PREFIXES: u64 = InstructionAttributes::HAS_LOCK.bits() + | InstructionAttributes::HAS_REP.bits() + | InstructionAttributes::HAS_REPE.bits() + | InstructionAttributes::HAS_REPNE.bits() + | InstructionAttributes::HAS_BND.bits() + | InstructionAttributes::HAS_XACQUIRE.bits() + | InstructionAttributes::HAS_XRELEASE.bits() + | InstructionAttributes::HAS_BRANCH_NOT_TAKEN.bits() + | InstructionAttributes::HAS_BRANCH_TAKEN.bits() + | InstructionAttributes::HAS_NOTRACK.bits() + | InstructionAttributes::HAS_SEGMENT_CS.bits() + | InstructionAttributes::HAS_SEGMENT_SS.bits() + | InstructionAttributes::HAS_SEGMENT_DS.bits() + | InstructionAttributes::HAS_SEGMENT_ES.bits() + | InstructionAttributes::HAS_SEGMENT_FS.bits() + | InstructionAttributes::HAS_SEGMENT_GS.bits(); + impl Mnemonic { /// Returns the static string corresponding to this mnemonic. /// diff --git a/src/ffi/encoder.rs b/src/ffi/encoder.rs index ad4171d..88895a6 100644 --- a/src/ffi/encoder.rs +++ b/src/ffi/encoder.rs @@ -33,7 +33,7 @@ pub struct OperandPointer { #[cfg_attr(feature = "serialization", derive(Deserialize, Serialize))] #[derive(Clone, Debug, Eq, PartialEq, Hash)] #[repr(C)] -pub struct Operand { +pub struct EncoderOperand { pub ty: OperandType, pub reg: OperandRegister, pub mem: OperandMemory, @@ -68,7 +68,7 @@ pub struct MvexFeatures { #[cfg_attr(feature = "serialization", derive(Deserialize, Serialize))] #[derive(Clone, Debug, Eq, PartialEq, Hash)] #[repr(C)] -pub struct Request { +pub struct EncoderRequest { pub machine_mode: MachineMode, pub allowed_encodings: EncodableEncoding, pub mnemonic: Mnemonic, @@ -77,22 +77,22 @@ pub struct Request { pub branch_width: BranchWidth, pub address_size_hint: AddressSizeHint, pub operand_size_hint: OperandSizeHint, - pub operand_count: u8, - pub operands: [Operand; ENCODER_MAX_OPERANDS], + pub(crate) operand_count: u8, + pub(crate) operands: [EncoderOperand; ENCODER_MAX_OPERANDS], pub evex: EvexFeatures, pub mvex: MvexFeatures, } extern "C" { pub fn ZydisEncoderEncodeInstructionAbsolute( - request: *const Request, + request: *const EncoderRequest, buffer: *mut c_void, length: *mut usize, runtime_address: u64, ) -> Status; pub fn ZydisEncoderEncodeInstruction( - request: *const Request, + request: *const EncoderRequest, buffer: *mut c_void, length: *mut usize, ) -> Status; @@ -101,7 +101,7 @@ extern "C" { instruction: *const DecodedInstruction, operands: *const DecodedOperand, operand_count: u8, - request: *mut Request, + request: *mut EncoderRequest, ) -> Status; pub fn ZydisEncoderNopFill(buffer: *mut c_void, length: usize) -> Status; diff --git a/src/lib.rs b/src/lib.rs index 1515d2f..1698037 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -5,12 +5,16 @@ #[macro_use] mod status; mod decoder; +#[cfg(feature = "encoder")] +mod encoder; mod enums; pub mod ffi; #[cfg(feature = "formatter")] mod formatter; pub use decoder::*; +#[cfg(feature = "encoder")] +pub use encoder::*; pub use enums::*; #[cfg(feature = "formatter")] pub use formatter::*; diff --git a/src/status.rs b/src/status.rs index 0136dc7..f7968fa 100644 --- a/src/status.rs +++ b/src/status.rs @@ -116,6 +116,7 @@ impl Status { Status::SkipToken => "skip this token", Status::User => "user error", Status::NotUTF8 => "invalid utf8 data was passed to rust", + Status::ImpossibleInstruction => "requested impossible instruction", _ => "unknown error", } }