From 668d20f5c2dd6a96620a0ec0fbd66f4672087b1b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Kostrubiec?= Date: Thu, 15 Aug 2024 22:52:55 +0200 Subject: [PATCH] Added some very basic optimizations --- Cargo.toml | 2 +- autotest.rs => bin/autotest.rs | 14 +- cargo_tests/build_std/src/main.rs | 214 ++++++++++++++---------------- cilly/src/asm.rs | 6 +- cilly/src/bin/linker/main.rs | 3 +- cilly/src/libc_fns.rs | 17 +++ cilly/src/v2/asm.rs | 54 ++++++++ cilly/src/v2/bimap.rs | 8 ++ cilly/src/v2/builtins/atomics.rs | 27 +++- cilly/src/v2/cache.rs | 116 ++++++++++++++++ cilly/src/v2/class.rs | 4 + cilly/src/v2/mod.rs | 1 + cilly/src/v2/opt/mod.rs | 101 ++++++++++---- dotnet_aot/Cargo.toml | 6 + dotnet_aot/src/lib.rs | 14 ++ src/builtin/mod.rs | 99 +++++++------- src/terminator/intrinsics/mod.rs | 9 ++ test/intrinsics/atomics.rs | 3 +- test/intrinsics/malloc.rs | 2 + 19 files changed, 497 insertions(+), 203 deletions(-) rename autotest.rs => bin/autotest.rs (89%) create mode 100644 cilly/src/v2/cache.rs create mode 100644 dotnet_aot/Cargo.toml create mode 100644 dotnet_aot/src/lib.rs diff --git a/Cargo.toml b/Cargo.toml index 37fd5ce8..43ba4cf3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -29,7 +29,7 @@ opt-level = 3 crate-type = ["dylib"] [workspace] -members = [ "cilly", +members = [ "cilly", "dotnet_aot", "mycorrhiza", ] exclude = ["rust/src/bootstrap"] diff --git a/autotest.rs b/bin/autotest.rs similarity index 89% rename from autotest.rs rename to bin/autotest.rs index 4f0d69d5..f6d6d351 100755 --- a/autotest.rs +++ b/bin/autotest.rs @@ -8,6 +8,7 @@ fn main() { let mut failures: HashSet = HashSet::default(); let mut broken: Vec = Vec::default(); let timeout = std::env::args().nth(2).unwrap_or("20".to_owned()); + let mut shuffles = 4; loop { let mut cmd = std::process::Command::new("timeout"); cmd.arg("-k"); @@ -15,6 +16,11 @@ fn main() { cmd.arg(&timeout); cmd.arg("dotnet"); cmd.arg(exec_path.clone()); + if shuffles > 0 { + cmd.arg("--shuffle"); + cmd.arg("-Z"); + cmd.arg("unstable-options"); + } cmd.arg("--test-threads=1"); cmd.args(broken.iter().flat_map(|arg| ["--skip", arg])); println!("\n{cmd:?}"); @@ -56,8 +62,12 @@ fn main() { | stderr.contains("finished") | stdout.contains("finished") { - println!("search done."); - break; + if shuffles > 0 { + shuffles -= 1; + } else { + println!("search done."); + break; + } } } println!(); diff --git a/cargo_tests/build_std/src/main.rs b/cargo_tests/build_std/src/main.rs index 4d1a4ded..8becb982 100644 --- a/cargo_tests/build_std/src/main.rs +++ b/cargo_tests/build_std/src/main.rs @@ -1,3 +1,4 @@ +#![feature(float_next_up_down)] #[test] fn should_pass() {} #[test] @@ -5,135 +6,114 @@ fn should_pass() {} fn should_panic() { panic!(); } +#[allow(unused_macros)] +macro_rules! assert_f64_biteq { + ($left : expr, $right : expr) => { + let l: &f64 = &$left; + let r: &f64 = &$right; + let lb = l.to_bits(); + let rb = r.to_bits(); + assert_eq!( + lb, rb, + "float {l} ({lb:#018x}) is not bitequal to {r} ({rb:#018x})" + ); + }; +} fn main() { - //select_nth_unstable(); + let nan0 = f64::NAN; + let nan1 = f64::from_bits(f64::NAN.to_bits() ^ NAN_MASK1); + let nan2 = f64::from_bits(f64::NAN.to_bits() ^ NAN_MASK2); + assert_f64_biteq!(next_up(nan0), nan0); + assert_f64_biteq!(next_up(nan1), nan1); + assert_f64_biteq!(next_up(nan2), nan2); } -/* -fn select_nth_unstable() { - use core::cmp::Ordering::{Equal, Greater, Less}; - - use rand::seq::SliceRandom; - use rand::Rng; - let mut rng = rand::thread_rng(); - let mut v = [0; 500]; - for pivot in 0..v.len() { - println!("{}:{}", file!(), line!()); - v.select_nth_unstable_by(pivot, |_, _| { - *[Less, Equal, Greater].choose(&mut rng).unwrap() - }); - v.sort(); - for i in 0..v.len() { - println!("{}:{}", file!(), line!()); - assert_eq!(v[i], i as i32); - } - } - for len in (2..21).chain(500..501) { - let mut orig = vec![0; len]; - println!("{}:{}", file!(), line!()); - for &modulus in &[5, 10, 1000] { - for _ in 0..10 { - println!("{}:{}", file!(), line!()); - for i in 0..len { - println!("{}:{}", file!(), line!()); - orig[i] = rng.gen::() % modulus; - } +#[test] +fn test_next_up() { + let tiny = f64::from_bits(TINY_BITS); + let tiny_up = f64::from_bits(TINY_UP_BITS); + let max_down = f64::from_bits(MAX_DOWN_BITS); + let largest_subnormal = f64::from_bits(LARGEST_SUBNORMAL_BITS); + let smallest_normal = f64::from_bits(SMALLEST_NORMAL_BITS); + assert_f64_biteq!(f64::NEG_INFINITY.next_up(), f64::MIN); + assert_f64_biteq!(f64::MIN.next_up(), -max_down); + assert_f64_biteq!((-1.0 - f64::EPSILON).next_up(), -1.0); + assert_f64_biteq!((-smallest_normal).next_up(), -largest_subnormal); + assert_f64_biteq!((-tiny_up).next_up(), -tiny); + assert_f64_biteq!((-tiny).next_up(), -0.0f64); + assert_f64_biteq!((-0.0f64).next_up(), tiny); + assert_f64_biteq!(0.0f64.next_up(), tiny); + assert_f64_biteq!(tiny.next_up(), tiny_up); + assert_f64_biteq!(largest_subnormal.next_up(), smallest_normal); + assert_f64_biteq!(1.0f64.next_up(), 1.0 + f64::EPSILON); + assert_f64_biteq!(f64::MAX.next_up(), f64::INFINITY); + assert_f64_biteq!(f64::INFINITY.next_up(), f64::INFINITY); - let v_sorted = { - let mut v = orig.clone(); - v.sort(); - v - }; + let nan0 = f64::NAN; + let nan1 = f64::from_bits(f64::NAN.to_bits() ^ NAN_MASK1); + let nan2 = f64::from_bits(f64::NAN.to_bits() ^ NAN_MASK2); + assert_f64_biteq!(next_up(nan0), nan0); + assert_f64_biteq!(next_up(nan1), nan1); + assert_f64_biteq!(next_up(nan2), nan2); +} +pub fn next_up(val: f64) -> f64 { + // Some targets violate Rust's assumption of IEEE semantics, e.g. by flushing + // denormals to zero. This is in general unsound and unsupported, but here + // we do our best to still produce the correct result on such targets. + let bits = val.to_bits(); + eprintln!("bits:{bits:?}"); + if val.is_nan() || bits == f64::INFINITY.to_bits() { + return val; + } - // Sort in default order. - for pivot in 0..len { - println!("{}:{}", file!(), line!()); - let mut v = orig.clone(); - v.select_nth_unstable(pivot); + let abs = bits & !SIGN_MASK; + let next_bits = if abs == 0 { + TINY_BITS + } else if bits == abs { + bits + 1 + } else { + bits - 1 + }; + eprintln!("next_bits:{next_bits:?}"); + f64::from_bits(next_bits) +} +/// Sign bit +const SIGN_MASK: u64 = 0x8000_0000_0000_0000; - assert_eq!(v_sorted[pivot], v[pivot]); - for i in 0..pivot { - for j in pivot..len { - assert!(v[i] <= v[j]); - } - } - } +/// Exponent mask +const EXP_MASK: u64 = 0x7ff0_0000_0000_0000; - // Sort in ascending order. - for pivot in 0..len { - println!("{}:{}", file!(), line!()); - let mut v = orig.clone(); - let (left, pivot, right) = v.select_nth_unstable_by(pivot, |a, b| a.cmp(b)); +/// Mantissa mask +const MAN_MASK: u64 = 0x000f_ffff_ffff_ffff; - assert_eq!(left.len() + right.len(), len - 1); +/// Minimum representable positive value (min subnormal) - for l in left { - assert!(l <= pivot); - for r in right.iter_mut() { - assert!(l <= r); - assert!(pivot <= r); - } - } - } +/// Minimum representable negative value (min negative subnormal) +const NEG_TINY_BITS: u64 = TINY_BITS | SIGN_MASK; +/// Smallest number +#[allow(dead_code)] // unused on x86 +const TINY_BITS: u64 = 0x1; - // Sort in descending order. - let sort_descending_comparator = |a: &i32, b: &i32| b.cmp(a); - let v_sorted_descending = { - let mut v = orig.clone(); - v.sort_by(sort_descending_comparator); - v - }; +/// Next smallest number +#[allow(dead_code)] // unused on x86 +const TINY_UP_BITS: u64 = 0x2; - for pivot in 0..len { - println!("{}:{}", file!(), line!()); - let mut v = orig.clone(); - v.select_nth_unstable_by(pivot, sort_descending_comparator); +/// Exponent = 0b11...10, Sifnificand 0b1111..10. Min val > 0 +#[allow(dead_code)] // unused on x86 +const MAX_DOWN_BITS: u64 = 0x7fef_ffff_ffff_fffe; - assert_eq!(v_sorted_descending[pivot], v[pivot]); - for i in 0..pivot { - for j in pivot..len { - assert!(v[j] <= v[i]); - } - } - } - } - } - } +/// Zeroed exponent, full significant +#[allow(dead_code)] // unused on x86 +const LARGEST_SUBNORMAL_BITS: u64 = 0x000f_ffff_ffff_ffff; - // Sort at index using a completely random comparison function. - // This will reorder the elements *somehow*, but won't panic. - let mut v = [0; 500]; - for i in 0..v.len() { - println!("{}:{}", file!(), line!()); - v[i] = i as i32; - } +/// Exponent = 0b1, zeroed significand +#[allow(dead_code)] // unused on x86 +const SMALLEST_NORMAL_BITS: u64 = 0x0010_0000_0000_0000; - // Should not panic. - [(); 10].select_nth_unstable(0); - [(); 10].select_nth_unstable(5); - [(); 10].select_nth_unstable(9); - [(); 100].select_nth_unstable(0); - [(); 100].select_nth_unstable(50); - [(); 100].select_nth_unstable(99); +/// First pattern over the mantissa +#[allow(dead_code)] // unused on x86 +const NAN_MASK1: u64 = 0x000a_aaaa_aaaa_aaaa; - let mut v = [0xDEADBEEFu64]; - v.select_nth_unstable(0); - assert!(v == [0xDEADBEEF]); - println!("v:{v:?}"); -} -*/ -pub fn test(start: u32, end: u32) -> u32 { - let mut sum = 0; - for i in start..end { - sum += i; - } - sum -} -pub fn test2(mut start: u32, end: u32) -> u32 { - let mut sum = 0; - while start < end { - sum += start; - start += 1; - } - sum -} +/// Second pattern over the mantissa +#[allow(dead_code)] // unused on x86 +const NAN_MASK2: u64 = 0x0005_5555_5555_5555; diff --git a/cilly/src/asm.rs b/cilly/src/asm.rs index f902072f..22923b14 100644 --- a/cilly/src/asm.rs +++ b/cilly/src/asm.rs @@ -12,7 +12,7 @@ use crate::{ static_field_desc::StaticFieldDescriptor, type_def::TypeDef, utilis::MemoryUsage, - v2::opt::SideEffectInfoCache, + v2::opt::{OptFuel, SideEffectInfoCache}, DotnetTypeRef, FnSig, IString, Type, }; @@ -194,8 +194,8 @@ impl Assembly { } let main_module = self.inner.main_module(); - let mut def = crate::v2::MethodDef::from_v1(&method, &mut self.inner, main_module); - def.optimize(&mut self.inner, &mut SideEffectInfoCache::default(), 4); + let def = crate::v2::MethodDef::from_v1(&method, &mut self.inner, main_module); + //def.optimize(&mut self.inner, &mut SideEffectInfoCache::default(), 4); self.inner.new_method(def); } diff --git a/cilly/src/bin/linker/main.rs b/cilly/src/bin/linker/main.rs index 74116a7d..d79e6ffd 100644 --- a/cilly/src/bin/linker/main.rs +++ b/cilly/src/bin/linker/main.rs @@ -351,7 +351,8 @@ fn main() { println!("Eliminating dead code"); final_assembly.eliminate_dead_code(); } - + let mut fuel = final_assembly.fuel_from_env(); + final_assembly.opt(&mut fuel); final_assembly .save_tmp(&mut std::fs::File::create(path.with_extension("cilly2")).unwrap()) .unwrap(); diff --git a/cilly/src/libc_fns.rs b/cilly/src/libc_fns.rs index 3bb059df..61b063e1 100644 --- a/cilly/src/libc_fns.rs +++ b/cilly/src/libc_fns.rs @@ -124,6 +124,7 @@ pub const LIBC_FNS: &[&str] = &[ "fstat64", "fsync", "ftime", + "ftruncate64", "ftok", "ftw64", "ftw", @@ -316,15 +317,19 @@ pub const LIBC_FNS: &[&str] = &[ "posix_spawnattr_setsigdefault", "posix_spawn_file_actions_adddup2", "posix_spawnattr_setflags", + "posix_spawnattr_setpgroup", + "posix_spawn_file_actions_addchdir_np", "posix_spawn_file_actions_destroy", "posix_spawnattr_destroy", "posix_spawnp", "pidfd_spawnp", + "pthread_sigmask", "pipe2", "poll", "ppoll", "preadv2", "preadv64", + "pread64", "preadv64v2", "preadv", "printf", @@ -544,6 +549,8 @@ pub const LIBC_FNS: &[&str] = &[ "vwarnx", "vwprintf", "vwscanf", + "waitid", + "waitpid", "warn", "warnx", "wcscspn", @@ -589,6 +596,7 @@ pub const LIBC_MODIFIES_ERRNO: &[&str] = &[ "create", "fchmod", "fdopendir", + "ftruncate64", "fstat64", "futimens", "getcwd", @@ -612,8 +620,15 @@ pub const LIBC_MODIFIES_ERRNO: &[&str] = &[ "posix_spawnattr_setsigdefault", "posix_spawn_file_actions_adddup2", "posix_spawnattr_setflags", + "posix_spawnattr_setpgroup", + "posix_spawn_file_actions_addchdir_np", + "pthread_sigmask", "posix_spawnp", "pipe2", + "preadv2", + "preadv64", + "pread64", + "preadv64v2", "pidfd_getpid", "pidfd_spawnp", "pwrite64", @@ -644,6 +659,8 @@ pub const LIBC_MODIFIES_ERRNO: &[&str] = &[ "statx", "symlink", "symlinkat", + "waitid", + "waitpid", "write", "writev", ]; diff --git a/cilly/src/v2/asm.rs b/cilly/src/v2/asm.rs index 6579999a..93c7249c 100644 --- a/cilly/src/v2/asm.rs +++ b/cilly/src/v2/asm.rs @@ -1,6 +1,8 @@ use super::{ bimap::{calculate_hash, BiMap, BiMapIndex, IntoBiMapIndex}, + cache::{CachedAssemblyInfo, NonMaxU32, StackUsage}, cilnode::{BinOp, MethodKind, UnOp}, + opt::{OptFuel, SideEffectInfoCache}, Access, CILNode, CILRoot, ClassDef, ClassDefIdx, ClassRef, ClassRefIdx, Const, Exporter, FieldDesc, FieldIdx, FnSig, MethodDef, MethodDefIdx, MethodRef, MethodRefIdx, NodeIdx, RootIdx, SigIdx, StaticFieldDesc, StaticFieldIdx, StringIdx, Type, TypeIdx, @@ -11,6 +13,7 @@ use fxhash::{FxHashMap, FxHashSet}; use lazy_static::*; use serde::{Deserialize, Serialize}; use std::any::type_name; + #[derive(Default, Serialize, Deserialize, Eq, PartialEq, Clone, Debug)] pub(super) struct IStringWrapper(pub(super) IString); impl std::hash::Hash for IStringWrapper { @@ -25,7 +28,9 @@ pub type MissingMethodPatcher = FxHashMap MethodImpl>>; #[derive(Default, Serialize, Deserialize, Clone)] pub struct Assembly { + /// A list of strings used in this assembly strings: BiMap, + /// A list of all types in this assembly types: BiMap, class_refs: BiMap, class_defs: FxHashMap, @@ -36,8 +41,57 @@ pub struct Assembly { fields: BiMap, statics: BiMap, method_defs: FxHashMap, + // Cache containing information about the stack usage of a CIL node. + //#[serde(skip)] + //cache: CachedAssemblyInfo, } impl Assembly { + pub fn fuel_from_env(&self) -> OptFuel { + match std::env::var("OPT_FUEL") { + Ok(fuel) => match fuel.parse::() { + Ok(fuel) => OptFuel::new(fuel), + Err(_) => self.default_fuel(), + }, + Err(_) => self.default_fuel(), + } + } + pub fn default_fuel(&self) -> OptFuel { + OptFuel::new((self.method_defs.len() * 4 + self.roots.len()) as u32) + } + pub(crate) fn borrow_methoddef(&mut self, def_id: MethodDefIdx) -> MethodDef { + self.method_defs.remove(&def_id).unwrap() + } + pub(crate) fn return_methoddef(&mut self, def_id: MethodDefIdx, def: MethodDef) { + assert!( + self.method_defs.insert(def_id, def).is_none(), + "Could not return a methoddef, because a method def is already present." + ); + } + /// Optimizes the assembly uitill all fuel is consumed, or no more progress can be made + pub fn opt(&mut self, fuel: &mut OptFuel) { + let mut cache = SideEffectInfoCache::default(); + while !fuel.exchausted() { + let prev = fuel.clone(); + self.opt_sigle_pass(fuel, &mut cache); + // No fuel consumed, progress can't be made, break. + if *fuel == prev { + break; + } + let _pass_min_cost: bool = fuel.consume(1); + } + } + /// Optimizes the assembly, cosuming some fuel. This performs a single optimization pass. + pub fn opt_sigle_pass(&mut self, fuel: &mut OptFuel, cache: &mut SideEffectInfoCache) { + let method_def_idxs: Box<[_]> = self.method_defs.keys().copied().collect(); + for method in method_def_idxs { + let mut tmp_method = self.borrow_methoddef(method); + tmp_method.optimize(self, cache, fuel); + self.return_methoddef(method, tmp_method); + if fuel.exchausted() { + break; + } + } + } pub fn find_methods_matching<'a, P: std::str::pattern::Pattern + Clone + 'a>( &self, pat: P, diff --git a/cilly/src/v2/bimap.rs b/cilly/src/v2/bimap.rs index 7b7a22b4..a36ed984 100644 --- a/cilly/src/v2/bimap.rs +++ b/cilly/src/v2/bimap.rs @@ -30,6 +30,14 @@ impl &Value { self.0.get(key.as_bimap_index().get() as usize - 1).unwrap() } + + pub fn len(&self) -> usize { + self.0.len() + } + + pub fn is_empty(&self) -> bool { + self.0.is_empty() + } } pub type BiMapIndex = NonZeroU32; pub trait IntoBiMapIndex { diff --git a/cilly/src/v2/builtins/atomics.rs b/cilly/src/v2/builtins/atomics.rs index e2d75142..2c6d8707 100644 --- a/cilly/src/v2/builtins/atomics.rs +++ b/cilly/src/v2/builtins/atomics.rs @@ -1,13 +1,32 @@ use crate::v2::{ asm::MissingMethodPatcher, cilnode::MethodKind, cilroot::BranchCond, BasicBlock, BinOp, - CILNode, CILRoot, ClassRef, Int, MethodDef, MethodImpl, MethodRef, Type, + CILNode, CILRoot, ClassRef, Const, Int, MethodDef, MethodImpl, MethodRef, Type, }; use super::{ super::{Assembly, NodeIdx}, math::{int_max, int_min}, }; - +/// Emulates operations on bytes using operations on int32s. Enidianess dependent, can cause segfuaults when used on a page boundary. +/// TODO: remove when .NET 9 is out. +pub fn emulate_uint8_cmp_xchng(asm: &mut Assembly, patcher: &mut MissingMethodPatcher) { + generate_atomic( + asm, + patcher, + "cmpxchng", + |asm, prev, arg, _| { + // 1st, mask the previous value + let prev_mask = asm.alloc_node(Const::I32(0x00FFFFFF)); + let prev = asm.alloc_node(CILNode::BinOp(prev, prev_mask, BinOp::And)); + // 2nd. Shift the byte into the least siginificant position(by 24 bytes) + let shift_ammount = asm.alloc_node(Const::I32(24)); + let arg = asm.alloc_node(CILNode::BinOp(arg, shift_ammount, BinOp::Shl)); + // Assemble those into a new value for the target memory. + asm.alloc_node(CILNode::BinOp(prev, arg, BinOp::Or)) + }, + Int::I32, + ) +} pub fn generate_atomic( asm: &mut Assembly, patcher: &mut MissingMethodPatcher, @@ -98,7 +117,9 @@ pub fn generate_all_atomics(asm: &mut Assembly, patcher: &mut MissingMethodPatch // Max generate_atomic_for_ints(asm, patcher, "max", int_max); // Max - generate_atomic_for_ints(asm, patcher, "min", int_min) + generate_atomic_for_ints(asm, patcher, "min", int_min); + // Emulates 1 byte compare exchange + emulate_uint8_cmp_xchng(asm, patcher) } /* .method public hidebysig static diff --git a/cilly/src/v2/cache.rs b/cilly/src/v2/cache.rs new file mode 100644 index 00000000..ba54146b --- /dev/null +++ b/cilly/src/v2/cache.rs @@ -0,0 +1,116 @@ +use std::marker::PhantomData; + +use super::{bimap::IntoBiMapIndex, Assembly, NodeIdx}; + +/// A cached information about some data. +#[derive(Clone)] +pub struct CachedAssemblyInfo { + data: Vec>, + function: PhantomData, + key: PhantomData, +} +impl Default for CachedAssemblyInfo { + fn default() -> Self { + Self { + data: Default::default(), + function: Default::default(), + key: Default::default(), + } + } +} +impl> + CachedAssemblyInfo +{ + pub fn get( + &mut self, + cached: &mut CachedAssemblyInfo, + asm: &mut Assembly, + key: Key, + ) -> Cached { + let index = key.as_bimap_index().get() as usize - 1; + // 1st expand the internal data if that is needed + if self.data.len() <= index { + self.data.extend((self.data.len()..=index).map(|_| None)); + } + assert!(!self.data.len() <= index); + match &self.data[index] { + Some(val) => val.clone(), + None => { + self.data[index] = Some(CachedAssemblyFunction::not_in_cache(cached, asm, key)); + self.data[index].clone().unwrap() + } + } + } +} +pub trait CachedAssemblyFunction: Sized { + fn not_in_cache( + cached: &mut CachedAssemblyInfo, + asm: &mut Assembly, + key: Key, + ) -> Cached; +} +/// Cached information about the stack usage of a [`CILNode`] +#[derive(Clone, Copy)] +pub struct StackUsage; +impl CachedAssemblyFunction for StackUsage { + fn not_in_cache( + cached: &mut CachedAssemblyInfo, + asm: &mut Assembly, + key: NodeIdx, + ) -> NonMaxU32 { + match asm.get_node(key) { + super::CILNode::Const(_) => todo!(), + super::CILNode::BinOp(_, _, _) => todo!(), + super::CILNode::UnOp(_, _) => todo!(), + super::CILNode::LdLoc(_) + | super::CILNode::LdLocA(_) + | super::CILNode::LdArg(_) + | super::CILNode::LdArgA(_) => NonMaxU32::new(1), + super::CILNode::Call(_) => todo!(), + super::CILNode::IntCast { + input, + target, + extend, + } => todo!(), + super::CILNode::FloatCast { + input, + target, + is_signed, + } => todo!(), + super::CILNode::RefToPtr(_) => todo!(), + super::CILNode::PtrCast(_, _) => todo!(), + super::CILNode::LdFieldAdress { addr, field } => todo!(), + super::CILNode::LdField { addr, field } => todo!(), + super::CILNode::LdInd { + addr, + tpe, + volitale, + } => todo!(), + super::CILNode::SizeOf(_) => NonMaxU32::new(1), + super::CILNode::GetException => NonMaxU32::new(1), + super::CILNode::IsInst(_, _) => todo!(), + super::CILNode::CheckedCast(_, _) => todo!(), + super::CILNode::CallI(_) => todo!(), + super::CILNode::LocAlloc { size } => todo!(), + super::CILNode::LdStaticField(_) => todo!(), + super::CILNode::LdFtn(_) => todo!(), + super::CILNode::LdTypeToken(_) => todo!(), + super::CILNode::LdLen(_) => todo!(), + super::CILNode::LocAllocAlgined { tpe, align } => todo!(), + super::CILNode::LdElelemRef { array, index } => todo!(), + super::CILNode::UnboxAny { object, tpe } => todo!(), + } + } +} +/// A U32 intiger, which is not u32::MAX. +#[derive(Clone, Copy, Eq, PartialEq)] +pub struct NonMaxU32(std::num::NonZeroU32); +impl NonMaxU32 { + pub fn new(val: u32) -> Self { + assert_ne!(val, u32::MAX); + NonMaxU32(std::num::NonZeroU32::new(val - 1).unwrap()) + } + pub fn get(self) -> u32 { + self.0.get() + 1 + } +} diff --git a/cilly/src/v2/class.rs b/cilly/src/v2/class.rs index 01072787..59c2de4b 100644 --- a/cilly/src/v2/class.rs +++ b/cilly/src/v2/class.rs @@ -8,6 +8,7 @@ use crate::{v2::MethodDef, DotnetTypeRef as V1ClassRef}; use super::{ access::Access, bimap::{BiMapIndex, IntoBiMapIndex}, + opt::{OptFuel, SideEffectInfoCache}, Assembly, MethodDefIdx, StringIdx, Type, }; @@ -290,6 +291,9 @@ impl ClassDef { } self.methods_mut().push(ref_idx); } + /// Optimizes this class definition, consuming fuel + pub fn opt(&mut self, fuel: &mut OptFuel, asm: &mut Assembly, cache: &mut SideEffectInfoCache) { + } } #[derive(Hash, PartialEq, Eq, Clone, Debug, Copy, Serialize, Deserialize)] pub struct ClassDefIdx(pub ClassRefIdx); diff --git a/cilly/src/v2/mod.rs b/cilly/src/v2/mod.rs index 05b6eea3..01aaf8dd 100644 --- a/cilly/src/v2/mod.rs +++ b/cilly/src/v2/mod.rs @@ -24,6 +24,7 @@ pub mod basic_block; pub mod bimap; pub mod builtins; pub mod c_exporter; +pub mod cache; pub mod cilnode; pub mod cilroot; pub mod class; diff --git a/cilly/src/v2/opt/mod.rs b/cilly/src/v2/opt/mod.rs index d5138be8..7e946061 100644 --- a/cilly/src/v2/opt/mod.rs +++ b/cilly/src/v2/opt/mod.rs @@ -3,10 +3,30 @@ use std::collections::HashMap; use crate::v2::{ asm::ILASM_FLAVOUR, cilnode::MethodKind, il_exporter::ILExporter, Assembly, MethodDef, }; - +#[derive(Clone, Eq, PartialEq, Debug)] +pub struct OptFuel(u32); +impl OptFuel { + /// Creates *fuel* fuel + pub fn new(fuel: u32) -> Self { + Self(fuel) + } + /// Decreases the ammount of fuel avalible if fuel present, and returns false if not enough fuel present. + pub fn consume(&mut self, cost: u32) -> bool { + if self.0 > cost { + self.0 -= 1; + true + } else { + false + } + } + /// Checks if no fuel remains + pub fn exchausted(&self) -> bool { + self.0 == 0 + } +} use super::{ - method::LocalDef, BasicBlock, BinOp, CILIter, CILNode, CILRoot, Float, Int, MethodImpl, - NodeIdx, Type, + method::LocalDef, BasicBlock, BinOp, CILIter, CILIterElem, CILNode, CILRoot, Float, Int, + MethodImpl, NodeIdx, Type, }; impl CILNode { pub fn propagate_locals( @@ -15,24 +35,28 @@ impl CILNode { idx: u32, tpe: Type, new_node: NodeIdx, + fuel: &mut OptFuel, ) -> Self { match self { CILNode::Const(_) => self.clone(), CILNode::BinOp(rhs, lhs, biop) => { let rhs = asm.get_node(*rhs).clone(); let lhs = asm.get_node(*lhs).clone(); - let rhs = rhs.propagate_locals(asm, idx, tpe, new_node); - let lhs = lhs.propagate_locals(asm, idx, tpe, new_node); + let rhs = rhs.propagate_locals(asm, idx, tpe, new_node, fuel); + let lhs = lhs.propagate_locals(asm, idx, tpe, new_node, fuel); CILNode::BinOp(asm.alloc_node(rhs), asm.alloc_node(lhs), *biop) } CILNode::UnOp(input, unop) => { let input = asm.get_node(*input).clone(); - let input = input.propagate_locals(asm, idx, tpe, new_node); + let input = input.propagate_locals(asm, idx, tpe, new_node, fuel); let input = asm.alloc_node(input); CILNode::UnOp(input, unop.clone()) } CILNode::LdLoc(loc) => { if *loc == idx { + if !fuel.consume(1) { + return self.clone(); + } match tpe { Type::Float(_) | Type::Bool @@ -57,7 +81,7 @@ impl CILNode { extend, } => { let input = asm.get_node(*input).clone(); - let input = input.propagate_locals(asm, idx, tpe, new_node); + let input = input.propagate_locals(asm, idx, tpe, new_node, fuel); let input = asm.alloc_node(input); CILNode::IntCast { input, @@ -71,7 +95,7 @@ impl CILNode { is_signed, } => { let input = asm.get_node(*input).clone(); - let input = input.propagate_locals(asm, idx, tpe, new_node); + let input = input.propagate_locals(asm, idx, tpe, new_node, fuel); let input = asm.alloc_node(input); CILNode::FloatCast { input, @@ -81,19 +105,19 @@ impl CILNode { } CILNode::RefToPtr(ptr) => { let ptr = asm.get_node(*ptr).clone(); - let ptr = ptr.propagate_locals(asm, idx, tpe, new_node); + let ptr = ptr.propagate_locals(asm, idx, tpe, new_node, fuel); let ptr = asm.alloc_node(ptr); CILNode::RefToPtr(ptr) } CILNode::PtrCast(ptr, cast_res) => { let ptr = asm.get_node(*ptr).clone(); - let ptr = ptr.propagate_locals(asm, idx, tpe, new_node); + let ptr = ptr.propagate_locals(asm, idx, tpe, new_node, fuel); let ptr = asm.alloc_node(ptr); CILNode::PtrCast(ptr, cast_res.clone()) } CILNode::LdFieldAdress { addr, field } => { let addr = asm.get_node(*addr).clone(); - let addr = addr.propagate_locals(asm, idx, tpe, new_node); + let addr = addr.propagate_locals(asm, idx, tpe, new_node, fuel); let addr = asm.alloc_node(addr); CILNode::LdFieldAdress { addr, @@ -102,7 +126,7 @@ impl CILNode { } CILNode::LdField { addr, field } => { let addr = asm.get_node(*addr).clone(); - let addr = addr.propagate_locals(asm, idx, tpe, new_node); + let addr = addr.propagate_locals(asm, idx, tpe, new_node, fuel); let addr = asm.alloc_node(addr); CILNode::LdField { addr, @@ -115,7 +139,7 @@ impl CILNode { volitale, } => { let addr = asm.get_node(*addr).clone(); - let addr = addr.propagate_locals(asm, idx, tpe, new_node); + let addr = addr.propagate_locals(asm, idx, tpe, new_node, fuel); let addr = asm.alloc_node(addr); CILNode::LdInd { addr, @@ -135,7 +159,7 @@ impl CILNode { | CILNode::LocAllocAlgined { .. } => self.clone(), CILNode::LdLen(arr) => { let arr = asm.get_node(*arr).clone(); - let arr = arr.propagate_locals(asm, idx, tpe, new_node); + let arr = arr.propagate_locals(asm, idx, tpe, new_node, fuel); let arr = asm.alloc_node(arr); CILNode::LdLen(arr) } @@ -153,6 +177,7 @@ impl BasicBlock { asm: &mut Assembly, locals: &[LocalDef], cache: &mut SideEffectInfoCache, + fuel: &mut OptFuel, ) { let root_iter: Vec<_> = self .roots_mut() @@ -174,6 +199,12 @@ impl BasicBlock { if cache.has_side_effects(tree, asm) { break 'm; } + // Check that it does not depend on itself + if CILIter::new(asm.get_node(tree).clone(), asm) + .any(|node| node == CILIterElem::Node(CILNode::LdLoc(loc))) + { + break 'm; + } let mut tmp_root = asm.get_root(*root).clone(); for node in tmp_root.nodes_mut() { @@ -189,6 +220,7 @@ impl BasicBlock { loc as u32, *asm.get_type(locals[loc as usize].1), tree, + fuel, ); *node = asm.alloc_node(new_node); } @@ -203,7 +235,12 @@ impl BasicBlock { } impl MethodImpl { /// Propagates writes to local variables. - pub fn propagate_locals(&mut self, asm: &mut Assembly, cache: &mut SideEffectInfoCache) { + pub fn propagate_locals( + &mut self, + asm: &mut Assembly, + cache: &mut SideEffectInfoCache, + fuel: &mut OptFuel, + ) { // Optimization only suported for methods with locals let MethodImpl::MethodBody { blocks, locals } = self else { return; @@ -211,10 +248,15 @@ impl MethodImpl { blocks .iter_mut() - .for_each(|block| block.local_opt(asm, locals, cache)); + .for_each(|block| block.local_opt(asm, locals, cache, fuel)); } /// Replaces writes to locals, which are never read, with pops. - pub fn remove_dead_writes(&mut self, asm: &mut Assembly, cache: &mut SideEffectInfoCache) { + pub fn remove_dead_writes( + &mut self, + asm: &mut Assembly, + cache: &mut SideEffectInfoCache, + fuel: &mut OptFuel, + ) { // Optimization only suported for methods with locals let MethodImpl::MethodBody { blocks, locals } = self else { return; @@ -222,6 +264,9 @@ impl MethodImpl { // Check if each local is ever read or its address is taken let mut local_reads = vec![false; locals.len()]; let mut local_address_of = vec![false; locals.len()]; + if !fuel.consume(8) { + return; + } for node in blocks .iter() .flat_map(|block| block.iter_roots()) @@ -305,11 +350,15 @@ impl SideEffectInfoCache { } } impl MethodDef { - pub fn optimize(&mut self, asm: &mut Assembly, cache: &mut SideEffectInfoCache, passes: usize) { - for _ in 0..passes { - //self.implementation_mut().propagate_locals(asm, cache); - self.implementation_mut().remove_dead_writes(asm, cache); - } + pub fn optimize( + &mut self, + asm: &mut Assembly, + cache: &mut SideEffectInfoCache, + fuel: &mut OptFuel, + ) { + self.implementation_mut().propagate_locals(asm, cache, fuel); + self.implementation_mut() + .remove_dead_writes(asm, cache, fuel); } } #[test] @@ -344,10 +393,6 @@ fn opt_mag() { blocks: vec![bb], locals, }; - for _ in 0..3 { - mimpl.propagate_locals(&mut asm, &mut cache); - mimpl.remove_dead_writes(&mut asm, &mut cache); - } let main_module = asm.main_module(); let sig = asm.sig( @@ -374,6 +419,10 @@ fn opt_mag() { MethodImpl::Missing, vec![None, None], )); + let mut fuel = OptFuel::new(77); + + asm.opt(&mut fuel); + /* .method public hidebysig static float32 'mag'(float32 'x',float32 'y') cil managed {// Method ID MethodDefIdx(MethodRefIdx(18)) .maxstack 8 diff --git a/dotnet_aot/Cargo.toml b/dotnet_aot/Cargo.toml new file mode 100644 index 00000000..8e1b840d --- /dev/null +++ b/dotnet_aot/Cargo.toml @@ -0,0 +1,6 @@ +[package] +name = "dotnet_aot" +version = "0.1.0" +edition = "2021" + +[dependencies] diff --git a/dotnet_aot/src/lib.rs b/dotnet_aot/src/lib.rs new file mode 100644 index 00000000..b93cf3ff --- /dev/null +++ b/dotnet_aot/src/lib.rs @@ -0,0 +1,14 @@ +pub fn add(left: u64, right: u64) -> u64 { + left + right +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn it_works() { + let result = add(2, 2); + assert_eq!(result, 4); + } +} diff --git a/src/builtin/mod.rs b/src/builtin/mod.rs index 99c83474..f7933787 100644 --- a/src/builtin/mod.rs +++ b/src/builtin/mod.rs @@ -245,55 +245,64 @@ pub fn insert_ffi_functions(asm: &mut Assembly, tcx: TyCtxt) { let mut __rust_alloc = Method::new( AccessModifer::Private, MethodType::Static, - FnSig::new(&[Type::USize, Type::USize], Type::Ptr(Type::U8.into())), + FnSig::new(&[Type::USize, Type::USize], ptr!(Type::U8)), "__rust_alloc", - vec![], - if *crate::config::CHECK_ALLOCATIONS { - vec![ - BasicBlock::new( - vec![CILRoot::BTrue { - target: 2, - sub_target: 0, - cond: lt_un!(conv_usize!(ldc_u64!(MAX_ALLOC_SIZE)), CILNode::LDArg(0)), - } - .into()], - 0, - None, - ), - BasicBlock::new( - vec![CILRoot::Ret { + vec![(None, ptr!(Type::U8))], + vec![ + BasicBlock::new( + vec![ + CILRoot::STLoc { + local: 0, tree: call!( CallSite::aligned_alloc(), [CILNode::LDArg(0), CILNode::LDArg(1)] - ), + ) + .cast_ptr(ptr!(Type::U8)), } - .into()], - 1, - None, - ), - BasicBlock::new( + .into(), + CILRoot::GoTo { + target: 0, + sub_target: 2, + } + .into(), + CILRoot::JumpingPad { + source: 0, + target: 2, + } + .into(), + ], + 0, + Some(Handler::Blocks(vec![BasicBlock::new( vec![ - CILRoot::throw(&format!("Max alloc size of {MAX_ALLOC_SIZE} exceeded.")) - .into(), + CILRoot::STLoc { + local: 0, + tree: conv_usize!(ldc_i32!(0)).cast_ptr(ptr!(Type::U8)), + } + .into(), + CILRoot::GoTo { + target: 0, + sub_target: 2, + } + .into(), + CILRoot::JumpingPad { + source: 0, + target: 2, + } + .into(), ], - 2, + 1, None, - ), - ] - } else { - vec![BasicBlock::new( + )])), + ), + BasicBlock::new( vec![CILRoot::Ret { - tree: call!( - CallSite::aligned_alloc(), - [CILNode::LDArg(0), CILNode::LDArg(1)] - ) - .cast_ptr(ptr!(Type::U8)), + tree: CILNode::LDLoc(0), } .into()], - 0, + 2, None, - )] - }, + ), + ], vec![Some("size".into()), Some("align".into())], ); @@ -301,7 +310,7 @@ pub fn insert_ffi_functions(asm: &mut Assembly, tcx: TyCtxt) { let mut __rust_alloc_zeroed = Method::new( AccessModifer::Private, MethodType::Static, - FnSig::new(&[Type::USize, Type::USize], Type::Ptr(Type::U8.into())), + FnSig::new(&[Type::USize, Type::USize], ptr!(Type::U8)), "__rust_alloc_zeroed", vec![(Some("alloc_ptr".into()), ptr!((Type::U8)))], if *crate::config::CHECK_ALLOCATIONS { @@ -370,10 +379,7 @@ pub fn insert_ffi_functions(asm: &mut Assembly, tcx: TyCtxt) { let mut __rust_dealloc = Method::new( AccessModifer::Private, MethodType::Static, - FnSig::new( - &[Type::Ptr(Type::U8.into()), Type::USize, Type::USize], - Type::Void, - ), + FnSig::new(&[ptr!(Type::U8), Type::USize, Type::USize], Type::Void), "__rust_dealloc", vec![], vec![BasicBlock::new( @@ -430,13 +436,8 @@ pub fn insert_ffi_functions(asm: &mut Assembly, tcx: TyCtxt) { AccessModifer::Private, MethodType::Static, FnSig::new( - &[ - Type::Ptr(Type::U8.into()), - Type::USize, - Type::USize, - Type::USize, - ], - Type::Ptr(Type::U8.into()), + &[ptr!(Type::U8), Type::USize, Type::USize, Type::USize], + ptr!(Type::U8), ), "__rust_realloc", vec![], diff --git a/src/terminator/intrinsics/mod.rs b/src/terminator/intrinsics/mod.rs index d403ae75..81879f98 100644 --- a/src/terminator/intrinsics/mod.rs +++ b/src/terminator/intrinsics/mod.rs @@ -436,6 +436,15 @@ pub fn handle_intrinsic<'tcx>( ) .cast_ptr(src_type.clone()) } + // TODO: this is a bug, on purpose. The 1 byte compare exchange is not supported untill .NET 9. Remove after November, when .NET 9 Releases. + /* + + + + + + */ + Type::U8 => comaprand, _ => { let call_site = CallSite::new( Some(interlocked), diff --git a/test/intrinsics/atomics.rs b/test/intrinsics/atomics.rs index dee8a4bb..85b2872e 100644 --- a/test/intrinsics/atomics.rs +++ b/test/intrinsics/atomics.rs @@ -24,9 +24,10 @@ extern crate core; extern "C" { fn atomic_xor_u32(addr: &mut u32, xorand: u32) -> u32; fn atomic_nand_u32(addr: &mut u32, xorand: u32) -> u32; + //fn atomic_cmpxchng_i32(addr: *mut i32, bytes: i32) -> i32; } use core::ptr::addr_of_mut; - +//fn compare_exchange_byte(addr:&mut u8, byte:u8)->u8 fn main() { let mut u: u32 = black_box(20); test_eq!( diff --git a/test/intrinsics/malloc.rs b/test/intrinsics/malloc.rs index 7418ad76..aa683bbd 100644 --- a/test/intrinsics/malloc.rs +++ b/test/intrinsics/malloc.rs @@ -16,6 +16,7 @@ )] #![no_std] include!("../common.rs"); + fn main() { unsafe { let mut buff = malloc(64); @@ -23,5 +24,6 @@ fn main() { buff = realloc(buff, 128); test_ne!(buff, 0_usize as *mut _); free(buff); + let tmp = __rust_alloc(64, 8); } }