From 2a9707549f626464d6ce9cca7847d52b2b5de52d Mon Sep 17 00:00:00 2001 From: lmittmann <3458786+lmittmann@users.noreply.github.com> Date: Thu, 17 Oct 2024 21:34:10 +0200 Subject: [PATCH] =?UTF-8?q?=E2=9C=A8=20Chumsky=20Parser?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * added lexer * added parser (wip) * added instructions * cleanup * added abi types (wip) * extracted `Span` * fix * fixed `sol_type` parser * use choice instead of or * or to choice * dropped old grammar * added parse method * dropped clap for argh * ci on pr * cleanup * refactored ast to separate mod * support public/external, view/pure, payable/nonpayable keywords in sol function definitions * simplify instruction parser * updated lexing error handling * added ariadne (utf8-not working) * fixed span positions of src with utf8 chars * return parser errors in `parse` --------- Co-authored-by: lmittmann --- .github/workflows/ci.yml | 4 +- Cargo.lock | 262 +++++----- Cargo.toml | 4 +- crates/ast/Cargo.toml | 2 +- crates/ast/build.rs | 3 - crates/ast/src/ast.rs | 94 ++++ crates/ast/src/error.rs | 18 - crates/ast/src/grammar.lalrpop | 237 --------- crates/ast/src/lexer.rs | 172 ++++++ crates/ast/src/lib.rs | 101 +--- crates/ast/src/parser.rs | 921 +++++++++++++++++++++++++-------- crates/ast/src/util.rs | 55 ++ crates/cli/Cargo.toml | 3 +- crates/cli/src/main.rs | 27 +- 14 files changed, 1199 insertions(+), 704 deletions(-) delete mode 100644 crates/ast/build.rs create mode 100644 crates/ast/src/ast.rs delete mode 100644 crates/ast/src/error.rs delete mode 100644 crates/ast/src/grammar.lalrpop create mode 100644 crates/ast/src/lexer.rs create mode 100644 crates/ast/src/util.rs diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 64490e5..bb36831 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,5 +1,5 @@ name: Rust CI -on: [push] +on: [push, pull_request] env: CARGO_TERM_COLOR: always @@ -49,4 +49,4 @@ jobs: cache-on-failure: true - name: build id: build - run: cargo build --workspace --all --locked + run: cargo build --workspace --all --locked diff --git a/Cargo.lock b/Cargo.lock index 9a3dfe6..1cd1a19 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,18 @@ # It is not intended for manual editing. version = 4 +[[package]] +name = "ahash" +version = "0.8.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" +dependencies = [ + "cfg-if", + "once_cell", + "version_check", + "zerocopy", +] + [[package]] name = "aho-corasick" version = "1.1.3" @@ -11,11 +23,17 @@ dependencies = [ "memchr", ] +[[package]] +name = "allocator-api2" +version = "0.2.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c6cb57a04249c6480766f7f7cef5467412af1490f8d1e243141daddada3264f" + [[package]] name = "alloy-dyn-abi" -version = "0.8.7" +version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f95d76a38cae906fd394a5afb0736aaceee5432efe76addfd71048e623e208af" +checksum = "e6228abfc751a29cde117b0879b805a3e0b3b641358f063272c83ca459a56886" dependencies = [ "alloy-json-abi", "alloy-primitives", @@ -30,9 +48,9 @@ dependencies = [ [[package]] name = "alloy-json-abi" -version = "0.8.7" +version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03c66eec1acdd96b39b995b8f5ee5239bc0c871d62c527ae1ac9fd1d7fecd455" +checksum = "d46eb5871592c216d39192499c95a99f7175cb94104f88c307e6dc960676d9f1" dependencies = [ "alloy-primitives", "alloy-sol-type-parser", @@ -42,9 +60,9 @@ dependencies = [ [[package]] name = "alloy-primitives" -version = "0.8.7" +version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ecb848c43f6b06ae3de2e4a67496cbbabd78ae87db0f1248934f15d76192c6a" +checksum = "38f35429a652765189c1c5092870d8360ee7b7769b09b06d89ebaefd34676446" dependencies = [ "alloy-rlp", "bytes", @@ -52,7 +70,7 @@ dependencies = [ "const-hex", "derive_more", "foldhash", - "hashbrown", + "hashbrown 0.15.0", "hex-literal", "indexmap", "itoa", @@ -80,9 +98,9 @@ dependencies = [ [[package]] name = "alloy-sol-macro" -version = "0.8.7" +version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "661c516eb1fa3294cc7f2fb8955b3b609d639c282ac81a4eedb14d3046db503a" +checksum = "3b2395336745358cc47207442127c47c63801a7065ecc0aa928da844f8bb5576" dependencies = [ "alloy-sol-macro-expander", "alloy-sol-macro-input", @@ -94,9 +112,9 @@ dependencies = [ [[package]] name = "alloy-sol-macro-expander" -version = "0.8.7" +version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ecbabb8fc3d75a0c2cea5215be22e7a267e3efde835b0f2a8922f5e3f5d47683" +checksum = "9ed5047c9a241df94327879c2b0729155b58b941eae7805a7ada2e19436e6b39" dependencies = [ "alloy-sol-macro-input", "const-hex", @@ -112,9 +130,9 @@ dependencies = [ [[package]] name = "alloy-sol-macro-input" -version = "0.8.7" +version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "16517f2af03064485150d89746b8ffdcdbc9b6eeb3d536fb66efd7c2846fbc75" +checksum = "5dee02a81f529c415082235129f0df8b8e60aa1601b9c9298ffe54d75f57210b" dependencies = [ "const-hex", "dunce", @@ -127,9 +145,9 @@ dependencies = [ [[package]] name = "alloy-sol-type-parser" -version = "0.8.7" +version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c07ebb0c1674ff8cbb08378d7c2e0e27919d2a2dae07ad3bca26174deda8d389" +checksum = "f631f0bd9a9d79619b27c91b6b1ab2c4ef4e606a65192369a1ee05d40dcf81cc" dependencies = [ "serde", "winnow", @@ -137,9 +155,9 @@ dependencies = [ [[package]] name = "alloy-sol-types" -version = "0.8.7" +version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e448d879903624863f608c552d10efb0e0905ddbee98b0049412799911eb062" +checksum = "c2841af22d99e2c0f82a78fe107b6481be3dd20b89bfb067290092794734343a" dependencies = [ "alloy-json-abi", "alloy-primitives", @@ -149,52 +167,34 @@ dependencies = [ ] [[package]] -name = "anstream" -version = "0.6.15" +name = "argh" +version = "0.1.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64e15c1ab1f89faffbf04a634d5e1962e9074f2741eef6d97f3c4e322426d526" +checksum = "7af5ba06967ff7214ce4c7419c7d185be7ecd6cc4965a8f6e1d8ce0398aad219" dependencies = [ - "anstyle", - "anstyle-parse", - "anstyle-query", - "anstyle-wincon", - "colorchoice", - "is_terminal_polyfill", - "utf8parse", + "argh_derive", + "argh_shared", ] [[package]] -name = "anstyle" -version = "1.0.8" +name = "argh_derive" +version = "0.1.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bec1de6f59aedf83baf9ff929c98f2ad654b97c9510f4e70cf6f661d49fd5b1" - -[[package]] -name = "anstyle-parse" -version = "0.2.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb47de1e80c2b463c735db5b217a0ddc39d612e7ac9e2e96a5aed1f57616c1cb" +checksum = "56df0aeedf6b7a2fc67d06db35b09684c3e8da0c95f8f27685cb17e08413d87a" dependencies = [ - "utf8parse", -] - -[[package]] -name = "anstyle-query" -version = "1.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d36fc52c7f6c869915e99412912f22093507da8d9e942ceaf66fe4b7c14422a" -dependencies = [ - "windows-sys 0.52.0", + "argh_shared", + "proc-macro2", + "quote", + "syn 2.0.79", ] [[package]] -name = "anstyle-wincon" -version = "3.0.4" +name = "argh_shared" +version = "0.1.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5bf74e1b6e971609db8ca7a9ce79fd5768ab6ae46441c572e46cf596f59e57f8" +checksum = "5693f39141bda5760ecc4111ab08da40565d1771038c4a0250f03457ec707531" dependencies = [ - "anstyle", - "windows-sys 0.52.0", + "serde", ] [[package]] @@ -462,9 +462,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.1.28" +version = "1.1.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e80e3b6a3ab07840e1cae9b0666a63970dc28e8ed5ffbcdacbfc760c281bfc1" +checksum = "b16803a61b81d9eabb7eae2588776c4c1e584b738ede45fdbb4c972cec1e9945" dependencies = [ "shlex", ] @@ -476,51 +476,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] -name = "clap" -version = "4.5.20" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b97f376d85a664d5837dbae44bf546e6477a679ff6610010f17276f686d867e8" -dependencies = [ - "clap_builder", - "clap_derive", -] - -[[package]] -name = "clap_builder" -version = "4.5.20" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "19bc80abd44e4bed93ca373a0704ccbd1b710dc5749406201bb018272808dc54" -dependencies = [ - "anstream", - "anstyle", - "clap_lex", - "strsim", -] - -[[package]] -name = "clap_derive" -version = "4.5.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ac6a0c7b1a9e9a5186361f67dfa1b88213572f427fb9ab038efb2bd8c582dab" +name = "chumsky" +version = "1.0.0-alpha.7" +source = "git+https://github.com/zesterer/chumsky.git?rev=716bec8#716bec878a861204b9d817c07c4f170de33575c9" dependencies = [ - "heck", - "proc-macro2", - "quote", - "syn 2.0.79", + "hashbrown 0.14.5", + "regex-automata 0.3.9", + "serde", + "stacker", + "unicode-ident", ] -[[package]] -name = "clap_lex" -version = "0.7.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1462739cb27611015575c0c11df5df7601141071f07518d56fcc1be504cbec97" - -[[package]] -name = "colorchoice" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d3fd119d74b830634cea2a0f58bbd0d54540518a14397557951e79340abc28c0" - [[package]] name = "concolor" version = "0.1.1" @@ -835,6 +801,16 @@ dependencies = [ "subtle", ] +[[package]] +name = "hashbrown" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" +dependencies = [ + "ahash", + "allocator-api2", +] + [[package]] name = "hashbrown" version = "0.15.0" @@ -896,9 +872,9 @@ version = "0.0.1" dependencies = [ "alloy-dyn-abi", "alloy-primitives", + "chumsky", "evm-glue", "lalrpop", - "lalrpop-util", "thiserror", ] @@ -906,10 +882,9 @@ dependencies = [ name = "huff-cli" version = "0.0.1" dependencies = [ + "argh", "ariadne", - "clap", "huff-ast", - "lalrpop-util", "thiserror", ] @@ -940,7 +915,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "707907fe3c25f5424cce2cb7e1cbcafee6bdbe735ca90ef77c29e84591e5b9da" dependencies = [ "equivalent", - "hashbrown", + "hashbrown 0.15.0", "serde", ] @@ -955,12 +930,6 @@ dependencies = [ "windows-sys 0.52.0", ] -[[package]] -name = "is_terminal_polyfill" -version = "1.70.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" - [[package]] name = "itertools" version = "0.10.5" @@ -1031,7 +1000,7 @@ dependencies = [ "petgraph", "pico-args", "regex", - "regex-syntax", + "regex-syntax 0.8.5", "sha3", "string_cache", "term", @@ -1045,7 +1014,7 @@ version = "0.22.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "feee752d43abd0f4807a921958ab4131f692a44d4d599733d4419c5d586176ce" dependencies = [ - "regex-automata", + "regex-automata 0.4.8", "rustversion", ] @@ -1057,9 +1026,9 @@ checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" [[package]] name = "libc" -version = "0.2.159" +version = "0.2.160" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "561d97a539a36e26a9a5fad1ea11a3039a67714694aaa379433e580854bc3dc5" +checksum = "f0b21006cd1874ae9e650973c565615676dc4a274c965bb0a73796dac838ce4f" [[package]] name = "libm" @@ -1193,9 +1162,9 @@ checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" [[package]] name = "pest" -version = "2.7.13" +version = "2.7.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fdbef9d1d47087a895abd220ed25eb4ad973a5e26f6a4367b038c25e28dfc2d9" +checksum = "879952a81a83930934cbf1786752d6dedc3b1f29e8f8fb2ad1d0a36f377cf442" dependencies = [ "memchr", "thiserror", @@ -1296,9 +1265,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.87" +version = "1.0.88" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3e4daa0dcf6feba26f985457cdf104d4b4256fc5a09547140f3631bb076b19a" +checksum = "7c3a7fc5db1e57d5a779a352c8cdb57b29aa4c40cc69c3a68a7fedc815fbf2f9" dependencies = [ "unicode-ident", ] @@ -1317,12 +1286,21 @@ dependencies = [ "rand", "rand_chacha", "rand_xorshift", - "regex-syntax", + "regex-syntax 0.8.5", "rusty-fork", "tempfile", "unarray", ] +[[package]] +name = "psm" +version = "0.1.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa37f80ca58604976033fae9515a8a2989fc13797d953f7c04fb8fa36a11f205" +dependencies = [ + "cc", +] + [[package]] name = "quick-error" version = "1.2.3" @@ -1401,8 +1379,19 @@ checksum = "38200e5ee88914975b69f657f0801b6f6dccafd44fd9326302a4aaeecfacb1d8" dependencies = [ "aho-corasick", "memchr", - "regex-automata", - "regex-syntax", + "regex-automata 0.4.8", + "regex-syntax 0.8.5", +] + +[[package]] +name = "regex-automata" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59b23e92ee4318893fa3fe3e6fb365258efbfe6ac6ab30f090cdcbb7aa37efa9" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax 0.7.5", ] [[package]] @@ -1413,9 +1402,15 @@ checksum = "368758f23274712b504848e9d5a6f010445cc8b87a7cdb4d7cbee666c1288da3" dependencies = [ "aho-corasick", "memchr", - "regex-syntax", + "regex-syntax 0.8.5", ] +[[package]] +name = "regex-syntax" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dbb5fb1acd8a1a18b3dd5be62d25485eb770e05afb408a9627d14d451bae12da" + [[package]] name = "regex-syntax" version = "0.8.5" @@ -1517,9 +1512,9 @@ dependencies = [ [[package]] name = "rustversion" -version = "1.0.17" +version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "955d28af4278de8121b7ebeb796b6a45735dc01436d898801014aced2773a3d6" +checksum = "0e819f2bc632f285be6d7cd36e25940d45b2391dd6d9b939e79de557f7014248" [[package]] name = "rusty-fork" @@ -1693,6 +1688,19 @@ dependencies = [ "der", ] +[[package]] +name = "stacker" +version = "0.1.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "799c883d55abdb5e98af1a7b3f23b9b6de8ecada0ecac058672d7635eb48ca7b" +dependencies = [ + "cc", + "cfg-if", + "libc", + "psm", + "windows-sys 0.59.0", +] + [[package]] name = "static_assertions" version = "1.1.0" @@ -1712,12 +1720,6 @@ dependencies = [ "precomputed-hash", ] -[[package]] -name = "strsim" -version = "0.11.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" - [[package]] name = "subtle" version = "2.6.1" @@ -1748,9 +1750,9 @@ dependencies = [ [[package]] name = "syn-solidity" -version = "0.8.7" +version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "20e7b52ad118b2153644eea95c6fc740b6c1555b2344fdab763fc9de4075f665" +checksum = "ebfc1bfd06acc78f16d8fd3ef846bc222ee7002468d10a7dce8d703d6eab89a3" dependencies = [ "paste", "proc-macro2", @@ -1881,12 +1883,6 @@ version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" -[[package]] -name = "utf8parse" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" - [[package]] name = "valuable" version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml index 9930c11..fd76113 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,9 +15,9 @@ huff-ast = { path = "crates/ast" } alloy-dyn-abi = "0.8" alloy-primitives = "0.8" ariadne = { version = "0.4.1", features = ["auto-color"] } -clap = { version = "4", features = ["derive"] } +argh = "0.1" evm-glue = { git = "https://github.com/Philogy/evm-glue.git", rev = "b51f6a0" } -lalrpop-util = { version = "0.22", features = ["lexer", "unicode"] } +chumsky = { git = "https://github.com/zesterer/chumsky.git", rev = "716bec8" } thiserror = "1" [profile.profiling] diff --git a/crates/ast/Cargo.toml b/crates/ast/Cargo.toml index 2340264..f024bd2 100644 --- a/crates/ast/Cargo.toml +++ b/crates/ast/Cargo.toml @@ -11,5 +11,5 @@ lalrpop = "0.22" alloy-dyn-abi.workspace = true alloy-primitives.workspace = true evm-glue.workspace = true -lalrpop-util.workspace = true +chumsky.workspace = true thiserror.workspace = true diff --git a/crates/ast/build.rs b/crates/ast/build.rs deleted file mode 100644 index 7e68f91..0000000 --- a/crates/ast/build.rs +++ /dev/null @@ -1,3 +0,0 @@ -fn main() { - lalrpop::process_src().unwrap(); -} diff --git a/crates/ast/src/ast.rs b/crates/ast/src/ast.rs new file mode 100644 index 0000000..b7d7af9 --- /dev/null +++ b/crates/ast/src/ast.rs @@ -0,0 +1,94 @@ +use alloy_dyn_abi::DynSolType; +use alloy_primitives::U256; +use chumsky::span::SimpleSpan; +use evm_glue::opcodes::Opcode; + +#[derive(Debug, PartialEq, Eq)] +pub struct Root<'src>(pub Box<[Definition<'src>]>); + +#[derive(Debug, PartialEq, Eq)] +pub enum Definition<'src> { + Macro(Macro<'src>), + Constant { + name: Spanned<&'src str>, + value: U256, + }, + Jumptable(Jumptable<'src>), + Table { + name: Spanned<&'src str>, + data: Box<[u8]>, + }, + SolFunction(SolFunction<'src>), + SolEvent(SolEvent<'src>), + SolError(SolError<'src>), +} + +#[derive(Debug, PartialEq, Eq)] +pub struct Macro<'src> { + pub name: Spanned<&'src str>, + pub args: Box<[Spanned<&'src str>]>, + pub takes_returns: Option<(Spanned, Spanned)>, + pub body: Box<[MacroStatement<'src>]>, +} + +#[derive(Debug, PartialEq, Eq)] +pub enum MacroStatement<'src> { + LabelDefinition(Spanned<&'src str>), + Instruction(Instruction<'src>), + Invoke(Invoke<'src>), +} + +#[derive(Debug, PartialEq, Eq)] +pub enum Instruction<'src> { + Op(Spanned), + LabelReference(Spanned<&'src str>), + MacroArgReference(Spanned<&'src str>), + ConstantReference(Spanned<&'src str>), +} + +#[derive(Debug, PartialEq, Eq)] +pub enum Invoke<'src> { + Macro { + name: Spanned<&'src str>, + args: Box<[Instruction<'src>]>, + }, + BuiltinTableStart(Spanned<&'src str>), + BuiltinTableSize(Spanned<&'src str>), + BuiltinCodeSize(Spanned<&'src str>), + BuiltinCodeOffset(Spanned<&'src str>), + BuiltinFuncSig(Spanned<&'src str>), + BuiltinEventHash(Spanned<&'src str>), + BuiltinError(Spanned<&'src str>), +} + +#[derive(Debug, PartialEq, Eq)] +pub struct Jumptable<'src> { + pub name: (Span, &'src str), + pub size: u8, + pub labels: Box<[&'src str]>, +} + +#[derive(Debug, PartialEq, Eq)] +pub struct SolFunction<'src> { + pub name: Spanned<&'src str>, + pub args: Box<[Spanned]>, + pub rets: Box<[Spanned]>, +} + +#[derive(Debug, PartialEq, Eq)] +pub struct SolEvent<'src> { + pub name: Spanned<&'src str>, + pub args: Box<[Spanned]>, +} + +#[derive(Debug, PartialEq, Eq)] +pub struct SolError<'src> { + pub name: Spanned<&'src str>, + pub args: Box<[Spanned]>, +} + +/// A span. +pub type Span = SimpleSpan; + +/// A spanned value. +pub type Spanned = (T, Span); diff --git a/crates/ast/src/error.rs b/crates/ast/src/error.rs deleted file mode 100644 index 7ae4468..0000000 --- a/crates/ast/src/error.rs +++ /dev/null @@ -1,18 +0,0 @@ -use alloy_primitives::{hex, ruint}; -use thiserror::Error as ThisError; - -#[derive(ThisError, Debug, PartialEq)] -pub enum Error { - #[error("{0}")] - WordOverflow(#[from] ruint::ParseError), - - #[error("{0}")] - BytesOddLength(#[from] hex::FromHexError), - - #[error("{0}")] - InvalidSolType(#[from] alloy_dyn_abi::Error), - - /// Placeholder - #[error("TODO: {0}")] - Todo(String), -} diff --git a/crates/ast/src/grammar.lalrpop b/crates/ast/src/grammar.lalrpop deleted file mode 100644 index ed69323..0000000 --- a/crates/ast/src/grammar.lalrpop +++ /dev/null @@ -1,237 +0,0 @@ -use alloy_dyn_abi::DynSolType; -use alloy_primitives::{hex::FromHex, Bytes, U256}; -use evm_glue::opcodes::Opcode; -use lalrpop_util::ParseError; -use std::str::FromStr; -use crate as ast; -use crate::parser::{u256_as_push, u256_as_push_data}; - -grammar; - -extern { - type Error = ast::Error; -} - -pub Root: ast::Root<'input> = { - => ast::Root(def.into_iter().collect()), -}; - -pub Definition: ast::Definition<'input> = { - "#define" => m, - "#define" => c, - "#define" => t, - "#define" => f, - "#define" => e, - "#define" => e, -}; - -pub Macro: ast::Definition<'input> = { - "macro" > - "=" - "{" *> "}" - => { - let takes_returns = if let Some((_, _, takes, _, _, _, returns, _)) = tr { - Some((takes.parse::().unwrap(), returns.parse::().unwrap())) - } else { - None - }; - ast::Definition::Macro(ast::Macro{ name, args, takes_returns, body: stmts.into_boxed_slice() }) - } -}; - -pub MacroArgs: Box<[(ast::Span, &'input str)]> = { - "(" ")" => Box::new([]), - "(" > )*> ")" => { - let mut list = vec![first]; - list.extend(rest.into_iter().map(|(_, t)| t)); - list.into_boxed_slice() - } -}; - -pub MacroStatement: ast::MacroStatement<'input> = { - ":" => ast::MacroStatement::LabelDefinition(label), - => ast::MacroStatement::Instruction(inst), - => ast::MacroStatement::Invoke(invoke), -}; - -pub Instruction: ast::Instruction<'input> = { - => ast::Instruction::Op(u256_as_push(word)), - "push1" =>? u256_as_push_data::<1>(word).map(Opcode::PUSH1).map(ast::Instruction::Op), - "push2" =>? u256_as_push_data::<2>(word).map(Opcode::PUSH2).map(ast::Instruction::Op), - "push3" =>? u256_as_push_data::<3>(word).map(Opcode::PUSH3).map(ast::Instruction::Op), - "push4" =>? u256_as_push_data::<4>(word).map(Opcode::PUSH4).map(ast::Instruction::Op), - "push5" =>? u256_as_push_data::<5>(word).map(Opcode::PUSH5).map(ast::Instruction::Op), - "push6" =>? u256_as_push_data::<6>(word).map(Opcode::PUSH6).map(ast::Instruction::Op), - "push7" =>? u256_as_push_data::<7>(word).map(Opcode::PUSH7).map(ast::Instruction::Op), - "push8" =>? u256_as_push_data::<8>(word).map(Opcode::PUSH8).map(ast::Instruction::Op), - "push9" =>? u256_as_push_data::<9>(word).map(Opcode::PUSH9).map(ast::Instruction::Op), - "push10" =>? u256_as_push_data::<10>(word).map(Opcode::PUSH10).map(ast::Instruction::Op), - "push11" =>? u256_as_push_data::<11>(word).map(Opcode::PUSH11).map(ast::Instruction::Op), - "push12" =>? u256_as_push_data::<12>(word).map(Opcode::PUSH12).map(ast::Instruction::Op), - "push13" =>? u256_as_push_data::<13>(word).map(Opcode::PUSH13).map(ast::Instruction::Op), - "push14" =>? u256_as_push_data::<14>(word).map(Opcode::PUSH14).map(ast::Instruction::Op), - "push15" =>? u256_as_push_data::<15>(word).map(Opcode::PUSH15).map(ast::Instruction::Op), - "push16" =>? u256_as_push_data::<16>(word).map(Opcode::PUSH16).map(ast::Instruction::Op), - "push17" =>? u256_as_push_data::<17>(word).map(Opcode::PUSH17).map(ast::Instruction::Op), - "push18" =>? u256_as_push_data::<18>(word).map(Opcode::PUSH18).map(ast::Instruction::Op), - "push19" =>? u256_as_push_data::<19>(word).map(Opcode::PUSH19).map(ast::Instruction::Op), - "push20" =>? u256_as_push_data::<20>(word).map(Opcode::PUSH20).map(ast::Instruction::Op), - "push21" =>? u256_as_push_data::<21>(word).map(Opcode::PUSH21).map(ast::Instruction::Op), - "push22" =>? u256_as_push_data::<22>(word).map(Opcode::PUSH22).map(ast::Instruction::Op), - "push23" =>? u256_as_push_data::<23>(word).map(Opcode::PUSH23).map(ast::Instruction::Op), - "push24" =>? u256_as_push_data::<24>(word).map(Opcode::PUSH24).map(ast::Instruction::Op), - "push25" =>? u256_as_push_data::<25>(word).map(Opcode::PUSH25).map(ast::Instruction::Op), - "push26" =>? u256_as_push_data::<26>(word).map(Opcode::PUSH26).map(ast::Instruction::Op), - "push27" =>? u256_as_push_data::<27>(word).map(Opcode::PUSH27).map(ast::Instruction::Op), - "push28" =>? u256_as_push_data::<28>(word).map(Opcode::PUSH28).map(ast::Instruction::Op), - "push29" =>? u256_as_push_data::<29>(word).map(Opcode::PUSH29).map(ast::Instruction::Op), - "push30" =>? u256_as_push_data::<30>(word).map(Opcode::PUSH30).map(ast::Instruction::Op), - "push31" =>? u256_as_push_data::<31>(word).map(Opcode::PUSH31).map(ast::Instruction::Op), - "push32" =>? u256_as_push_data::<32>(word).map(Opcode::PUSH32).map(ast::Instruction::Op), - => { - if let Ok(op) = Opcode::from_str(id) { - ast::Instruction::Op(op) - }else { - ast::Instruction::LabelReference(id) - } - }, - "<" ">" => ast::Instruction::MacroArgReference(referenc), - "[" "]" => ast::Instruction::ConstantReference(referenc), -} - -pub Invoke: ast::Invoke<'input> = { - > => ast::Invoke::Macro{ name, args }, - "__tablestart" "(" ")" => ast::Invoke::BuiltinTableStart(arg), - "__tablesize" "(" ")" => ast::Invoke::BuiltinTableSize(arg), - "__codesize" "(" ")" => ast::Invoke::BuiltinCodeSize(arg), - "__codeoffset" "(" ")" => ast::Invoke::BuiltinCodeOffset(arg), - "__FUNC_SIG" "(" ")" => ast::Invoke::BuiltinFuncSig(arg), - "__EVENT_HASH" "(" ")" => ast::Invoke::BuiltinEventHash(arg), - "__ERROR" "(" ")" => ast::Invoke::BuiltinError(arg), -}; - -pub InvokeMacroArgs: Box<[(ast::Span, ast::Instruction<'input>)]> = { - "(" ")" => Box::new([]), - "(" > )*> ")" => { - let mut list = vec![first]; - list.extend(rest.into_iter().map(|(_, t)| t)); - list.into_boxed_slice() - } -} - -pub Constant: ast::Definition<'input> = { - "constant" > "=" => ast::Definition::Constant { name, value } -}; - -pub Table: ast::Definition<'input> = { - "table" > "{" "}" => { - let data = code.into_iter().flatten().collect::>().into_boxed_slice(); - ast::Definition::Codetable { name, data } - }, -}; - -pub SolFunction: ast::Definition<'input> = { - "function" => { - let rets = rets.map(|(_, t)| t).unwrap_or_else(|| Box::new([])); - ast::Definition::SolFunction(ast::SolFunction{ name, args, rets }) - } -}; - -pub SolEvent: ast::Definition<'input> = { - "event" => ast::Definition::SolEvent(ast::SolEvent{ name, args }) -}; - -pub SolError: ast::Definition<'input> = { - "error" => ast::Definition::SolError(ast::SolError{ name, args }) -}; - -pub SolTypeList: Box<[DynSolType]> = { - "(" ")" => Box::new([]), - "(" ")" => { - let mut list = vec![first]; - list.extend(rest.into_iter().map(|(_, t)| t)); - list.into_boxed_slice() - } -}; - -pub SolType: DynSolType = { - =>? DynSolType::parse(&t).map_err(|e| ParseError::User { error: ast::Error::InvalidSolType(e) }) -}; - -pub SolRawType: String = { - IDENT? => { - let mut result = t.to_string(); - if let Some(ex) = ex { - result.push_str(ex.0); - if let Some(num) = ex.1 { - result.push_str(num); - } - result.push_str(ex.2); - } - result - }, - IDENT? => { - let mut result = t.to_string(); - if let Some(ex) = ex { - result.push_str(ex.0); - if let Some(num) = ex.1 { - result.push_str(num); - } - result.push_str(ex.2); - } - result - }, -}; - -pub SolRawPrimitiveType: &'input str = { - => t -}; - -pub SolRawTupleType: String = { - "(" ")" => { - let mut result = "(".to_string(); - result.push_str(&fist); - rest.into_iter().for_each(|(_, t)| { - result.push_str(","); - result.push_str(&t); - }); - result.push_str(")"); - result - } -} - -pub Word: U256 = { - =>? U256::from_str_radix(&s[2..], 16).map_err(|e| { - ParseError::User { error: ast::Error::WordOverflow(e) } - }), - =>? U256::from_str_radix(&s[2..], 2).map_err(|e| { - ParseError::User { error: ast::Error::WordOverflow(e) } - }), - =>? U256::from_str_radix(s, 10).map_err(|e| { - ParseError::User { error: ast::Error::WordOverflow(e) } - }), -}; - -pub Code: Vec = { - =>? Bytes::from_hex(s) - .map(|b| b.to_vec()) - .map_err(|e| ParseError::User{ error: ast::Error::BytesOddLength(e) }) -} - -Spanned: (ast::Span, T) = { - => (start..end, inner) -} - -match { - r"\s+" => {}, // whitespace - r"//[^\n\r]*[\n\r]*" => {}, // single-line comments - r"/\*([^*]|\*[^/])*\*/" => {}, // multi-line comments - - r"[a-zA-Z_][a-zA-Z0-9_]*" => IDENT, // identifier - - r"0|[1-9][0-9]*" => DEC, // decimal number - r"0x[0-9a-f]+" => HEX, // hexadecimal number - r"0b[01]+" => BIN, // binary number - - _ -} diff --git a/crates/ast/src/lexer.rs b/crates/ast/src/lexer.rs new file mode 100644 index 0000000..b6a9321 --- /dev/null +++ b/crates/ast/src/lexer.rs @@ -0,0 +1,172 @@ +use crate::Spanned; +use chumsky::{ + error::Rich, + extra, + primitive::{any, choice, just, one_of}, + text::{self, ascii::keyword}, + IterParser, Parser, +}; +use std::fmt; + +/// Lex the given source code string into tokens. +pub(crate) fn lex(src: &str) -> Result>, Vec>>> { + lexer().parse(src).into_result().map_err(|e| { + e.into_iter() + .map(|errs| errs.map_token(|c| Token::Error(c))) + .collect::>() + }) +} + +/// Lexer token +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum Token<'src> { + Comment(&'src str), + Keyword(&'src str), + Ident(&'src str), + Punct(char), + Dec(&'src str), + Hex(&'src str), + Bin(&'src str), + + Error(char), +} + +impl<'src> fmt::Display for Token<'src> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Token::Comment(s) + | Token::Keyword(s) + | Token::Ident(s) + | Token::Dec(s) + | Token::Hex(s) + | Token::Bin(s) => write!(f, "{}", s), + Token::Punct(c) | Token::Error(c) => write!(f, "{}", c), + } + } +} + +fn lexer<'src>( +) -> impl Parser<'src, &'src str, Vec>>, extra::Err>> { + let keyword = just("#") + .ignore_then(keyword("define").or(keyword("include"))) + .map(Token::Keyword); + + let ident = text::ident().map(Token::Ident); + + let punct = one_of("(){}[]<>:=,").map(Token::Punct); + + let hex = just("0x") + .ignore_then(text::digits(16)) + .to_slice() + .map(Token::Hex); + + let bin = just("0b") + .ignore_then(text::digits(2)) + .to_slice() + .map(Token::Bin); + + let dec = text::digits(10).to_slice().map(Token::Dec); + + let token = choice((keyword, ident, punct, hex, bin, dec)); + + // comments + let single_line_comment = just("//") + .then(any().and_is(just('\n').not()).repeated()) + .padded(); + let multi_line_comment = just("/*") + .then(any().and_is(just("*/").not()).repeated()) + .then_ignore(just("*/")) + .padded(); + let comment = single_line_comment.or(multi_line_comment); + + token + .map_with(|tok, ex| (tok, ex.span())) + .padded_by(comment.repeated()) + .padded() + // .recover_with(skip_then_retry_until(any().ignored(), end())) + .repeated() + .collect() +} + +#[cfg(test)] +mod tests { + use super::*; + use chumsky::span::SimpleSpan; + + macro_rules! assert_ok { + ($input:expr, $($expected:tt)*) => { + assert_eq!( + lexer().parse($input).into_result(), + Ok(vec![$($expected)*]), + ); + }; + } + + // macro_rules! assert_err { + // ($input:expr, $expected:expr) => { + // assert_eq!(lexer().parse($input).into_result(), Err(vec![$expected]),); + // }; + // } + + #[test] + fn lex_keyword() { + assert_ok!("#define", (Token::Keyword("define"), SimpleSpan::new(0, 7))); + assert_ok!( + "#include", + (Token::Keyword("include"), SimpleSpan::new(0, 8)) + ); + } + + #[test] + fn lex_ident() { + assert_ok!("foo", (Token::Ident("foo"), SimpleSpan::new(0, 3))); + assert_ok!( + "foo bar", + (Token::Ident("foo"), SimpleSpan::new(0, 3)), + (Token::Ident("bar"), SimpleSpan::new(4, 7)) + ); + // assert_err!( + // "foo#define", + // Rich::custom(SimpleSpan::new(0, 10), "invalid token") + // ); + } + + #[test] + fn lex_punct() { + assert_ok!("(", (Token::Punct('('), SimpleSpan::new(0, 1))); + assert_ok!( + "()", + (Token::Punct('('), SimpleSpan::new(0, 1)), + (Token::Punct(')'), SimpleSpan::new(1, 2)) + ); + assert_ok!( + "{} // comment", + (Token::Punct('{'), SimpleSpan::new(0, 1)), + (Token::Punct('}'), SimpleSpan::new(1, 2)) + ); + assert_ok!( + "{ /* comment */ }", + (Token::Punct('{'), SimpleSpan::new(0, 1)), + (Token::Punct('}'), SimpleSpan::new(16, 17)) + ); + } + + #[test] + fn lex_hex() { + assert_ok!("0x0", (Token::Hex("0x0"), SimpleSpan::new(0, 3))); + assert_ok!("0x123", (Token::Hex("0x123"), SimpleSpan::new(0, 5))); + // assert_err!("0x", SimpleSpan::new(2, 2)); + } + + #[test] + fn lex_dec() { + assert_ok!("0", (Token::Dec("0"), SimpleSpan::new(0, 1))); + assert_ok!("123", (Token::Dec("123"), SimpleSpan::new(0, 3))); + } + + #[test] + fn lex_bin() { + assert_ok!("0b101", (Token::Bin("0b101"), SimpleSpan::new(0, 5))); + assert_ok!("0b0", (Token::Bin("0b0"), SimpleSpan::new(0, 3))); + } +} diff --git a/crates/ast/src/lib.rs b/crates/ast/src/lib.rs index 1073d7f..be650a9 100644 --- a/crates/ast/src/lib.rs +++ b/crates/ast/src/lib.rs @@ -1,100 +1,7 @@ -mod error; +mod ast; +mod lexer; mod parser; +mod util; -pub use error::Error; +pub use ast::*; pub use parser::parse; - -pub type Span = std::ops::Range; - -lalrpop_util::lalrpop_mod!( - #[allow(clippy::all)] - grammar -); - -use alloy_dyn_abi::DynSolType; -use alloy_primitives::U256; -use evm_glue::opcodes::Opcode; - -#[derive(Debug, PartialEq, Eq)] -pub struct Root<'src>(pub Box<[Definition<'src>]>); - -#[derive(Debug, PartialEq, Eq)] -pub enum Definition<'src> { - Macro(Macro<'src>), - Constant { - name: (Span, &'src str), - value: U256, - }, - Jumptable(Jumptable<'src>), - Codetable { - name: (Span, &'src str), - data: Box<[u8]>, - }, - SolFunction(SolFunction<'src>), - SolEvent(SolEvent<'src>), - SolError(SolError<'src>), -} - -#[derive(Debug, PartialEq, Eq)] -pub struct Macro<'src> { - pub name: (Span, &'src str), - pub args: Box<[(Span, &'src str)]>, - pub takes_returns: Option<(usize, usize)>, - pub body: Box<[(Span, MacroStatement<'src>)]>, -} - -#[derive(Debug, PartialEq, Eq)] -pub enum MacroStatement<'src> { - LabelDefinition(&'src str), - Instruction(Instruction<'src>), - Invoke(Invoke<'src>), -} - -#[derive(Debug, PartialEq, Eq)] -pub enum Instruction<'src> { - Op(Opcode), - LabelReference(&'src str), - MacroArgReference(&'src str), - ConstantReference(&'src str), -} - -#[derive(Debug, PartialEq, Eq)] -pub enum Invoke<'src> { - Macro { - name: (Span, &'src str), - args: Box<[(Span, Instruction<'src>)]>, - }, - BuiltinTableStart(&'src str), - BuiltinTableSize(&'src str), - BuiltinCodeSize(&'src str), - BuiltinCodeOffset(&'src str), - BuiltinFuncSig(&'src str), - BuiltinEventHash(&'src str), - BuiltinError(&'src str), -} - -#[derive(Debug, PartialEq, Eq)] -pub struct Jumptable<'src> { - pub name: (Span, &'src str), - pub size: u8, - pub labels: Box<[&'src str]>, -} - -#[derive(Debug, PartialEq, Eq)] -pub struct SolFunction<'src> { - pub name: &'src str, - pub args: Box<[DynSolType]>, - pub rets: Box<[DynSolType]>, -} - -#[derive(Debug, PartialEq, Eq)] -pub struct SolEvent<'src> { - pub name: &'src str, - pub args: Box<[DynSolType]>, -} - -#[derive(Debug, PartialEq, Eq)] -pub struct SolError<'src> { - pub name: &'src str, - pub args: Box<[DynSolType]>, -} diff --git a/crates/ast/src/parser.rs b/crates/ast/src/parser.rs index ad81ad9..e3faf27 100644 --- a/crates/ast/src/parser.rs +++ b/crates/ast/src/parser.rs @@ -1,282 +1,797 @@ -use crate as ast; -use crate::grammar; -use alloy_primitives::U256; +use crate::{ + ast, + lexer::{ + lex, + Token::{self, *}, + }, + util::{u256_as_push, u256_as_push_data}, + Span, Spanned, +}; +use alloy_dyn_abi::DynSolType; +use alloy_primitives::{hex::FromHex, Bytes, U256}; +use chumsky::{ + error::Rich, + extra, + input::{Input, SpannedInput}, + primitive::{choice, just}, + recursive::recursive, + select, + span::SimpleSpan, + IterParser, Parser as ChumskyParser, +}; use evm_glue::opcodes::Opcode; -use lalrpop_util::{lexer::Token, ParseError}; +use std::str::FromStr; -pub fn parse(src: &str) -> Result, ast::Error>> { - grammar::RootParser::new().parse(src) +/// Parse the given source code string into AST. +/// +/// # Arguments +/// +/// * `src` - A string that holds the source code to be parsed. +pub fn parse(src: &str) -> Result, Vec>>> { + let tokens = lex(src)?; + + let eoi: Span = SimpleSpan::new(src.len(), src.len()); + let tokens = tokens.as_slice().spanned(eoi); + let ast = root() + .parse(tokens) + .into_result() + .map_err(|errs| errs.into_iter().map(|e| e.into_owned()).collect::>()) + .map_err(|e| e)?; + + Ok(ast) +} + +type ParserInput<'tokens, 'src> = SpannedInput, Span, &'tokens [Spanned>]>; + +trait Parser<'tokens, 'src: 'tokens, T>: + ChumskyParser<'tokens, ParserInput<'tokens, 'src>, T, extra::Err, Span>>> +{ +} +impl<'tokens, 'src: 'tokens, P, T> Parser<'tokens, 'src, T> for P where + P: ChumskyParser< + 'tokens, + ParserInput<'tokens, 'src>, + T, + extra::Err, Span>>, + > +{ +} + +fn root<'tokens, 'src: 'tokens>() -> impl Parser<'tokens, 'src, ast::Root<'src>> { + definition() + .repeated() + .collect::>() + .map(|defs| ast::Root(defs.into_boxed_slice())) +} + +fn definition<'tokens, 'src: 'tokens>() -> impl Parser<'tokens, 'src, ast::Definition<'src>> { + just(Keyword("define")).ignore_then(choice(( + r#macro(), + constant(), + table(), + sol_function(), + sol_event(), + sol_error(), + ))) +} + +fn r#macro<'tokens, 'src: 'tokens>() -> impl Parser<'tokens, 'src, ast::Definition<'src>> { + let macro_args = ident().separated_by(just(Punct(','))).collect::>(); + + just(Ident("macro")) + .ignore_then(ident()) + .then_ignore(just(Punct('('))) + .then(macro_args) + .then_ignore(just(Punct(')'))) + .then_ignore(just(Punct('='))) + .then( + just(Ident("takes")) + .ignore_then(just(Punct('('))) + .ignore_then(dec()) + .then_ignore(just(Punct(')'))) + .then_ignore(just(Ident("returns"))) + .then_ignore(just(Punct('('))) + .then(dec()) + .then_ignore(just(Punct(')'))) + .or_not(), + ) + .then_ignore(just(Punct('{'))) + .then(macro_statement().repeated().collect::>()) + .then_ignore(just(Punct('}'))) + .map(|(((name, args), takes_returns), body)| ast::Macro { + name, + args: args.into_boxed_slice(), + takes_returns, + body: body.into_boxed_slice(), + }) + .map(ast::Definition::Macro) +} + +fn macro_statement<'tokens, 'src: 'tokens>() -> impl Parser<'tokens, 'src, ast::MacroStatement<'src>> +{ + let label = ident() + .then_ignore(just(Punct(':'))) + .map(ast::MacroStatement::LabelDefinition); + let instruction = instruction().map(ast::MacroStatement::Instruction); + let invoke = invoke().map(ast::MacroStatement::Invoke); + + choice((label, instruction, invoke)) } -pub(crate) fn u256_as_push_data<'a, const N: usize>( - value: U256, -) -> Result<[u8; N], ParseError, ast::Error>> { - if value.byte_len() > N { - return Err(ParseError::User { - error: ast::Error::Todo(format!("word too large for PUSH{}", N)), - }); +fn instruction<'tokens, 'src: 'tokens>() -> impl Parser<'tokens, 'src, ast::Instruction<'src>> { + let push_auto = word().map(|(value, span)| (ast::Instruction::Op((u256_as_push(value), span)))); + + let push = select! { + Ident("push1") => 1, + Ident("push2") => 2, + Ident("push3") => 3, + Ident("push4") => 4, + Ident("push5") => 5, + Ident("push6") => 6, + Ident("push7") => 7, + Ident("push8") => 8, + Ident("push9") => 9, + Ident("push10") => 10, + Ident("push11") => 11, + Ident("push12") => 12, + Ident("push13") => 13, + Ident("push14") => 14, + Ident("push15") => 15, + Ident("push16") => 16, + Ident("push17") => 17, + Ident("push18") => 18, + Ident("push19") => 19, + Ident("push20") => 20, + Ident("push21") => 21, + Ident("push22") => 22, + Ident("push23") => 23, + Ident("push24") => 24, + Ident("push25") => 25, + Ident("push26") => 26, + Ident("push27") => 27, + Ident("push28") => 28, + Ident("push29") => 29, + Ident("push30") => 30, + Ident("push31") => 31, + Ident("push32") => 32, } - let input = value.to_be_bytes::<32>(); - let mut output = [0u8; N]; - output.copy_from_slice(&input[32 - N..32]); + .then(word()) + .map(|(n, (value, span))| { + ( + match n { + 1 => Opcode::PUSH1(u256_as_push_data::<1>(value).unwrap()), + 2 => Opcode::PUSH2(u256_as_push_data::<2>(value).unwrap()), + 3 => Opcode::PUSH3(u256_as_push_data::<3>(value).unwrap()), + 4 => Opcode::PUSH4(u256_as_push_data::<4>(value).unwrap()), + 5 => Opcode::PUSH5(u256_as_push_data::<5>(value).unwrap()), + 6 => Opcode::PUSH6(u256_as_push_data::<6>(value).unwrap()), + 7 => Opcode::PUSH7(u256_as_push_data::<7>(value).unwrap()), + 8 => Opcode::PUSH8(u256_as_push_data::<8>(value).unwrap()), + 9 => Opcode::PUSH9(u256_as_push_data::<9>(value).unwrap()), + 10 => Opcode::PUSH10(u256_as_push_data::<10>(value).unwrap()), + 11 => Opcode::PUSH11(u256_as_push_data::<11>(value).unwrap()), + 12 => Opcode::PUSH12(u256_as_push_data::<12>(value).unwrap()), + 13 => Opcode::PUSH13(u256_as_push_data::<13>(value).unwrap()), + 14 => Opcode::PUSH14(u256_as_push_data::<14>(value).unwrap()), + 15 => Opcode::PUSH15(u256_as_push_data::<15>(value).unwrap()), + 16 => Opcode::PUSH16(u256_as_push_data::<16>(value).unwrap()), + 17 => Opcode::PUSH17(u256_as_push_data::<17>(value).unwrap()), + 18 => Opcode::PUSH18(u256_as_push_data::<18>(value).unwrap()), + 19 => Opcode::PUSH19(u256_as_push_data::<19>(value).unwrap()), + 20 => Opcode::PUSH20(u256_as_push_data::<20>(value).unwrap()), + 21 => Opcode::PUSH21(u256_as_push_data::<21>(value).unwrap()), + 22 => Opcode::PUSH22(u256_as_push_data::<22>(value).unwrap()), + 23 => Opcode::PUSH23(u256_as_push_data::<23>(value).unwrap()), + 24 => Opcode::PUSH24(u256_as_push_data::<24>(value).unwrap()), + 25 => Opcode::PUSH25(u256_as_push_data::<25>(value).unwrap()), + 26 => Opcode::PUSH26(u256_as_push_data::<26>(value).unwrap()), + 27 => Opcode::PUSH27(u256_as_push_data::<27>(value).unwrap()), + 28 => Opcode::PUSH28(u256_as_push_data::<28>(value).unwrap()), + 29 => Opcode::PUSH29(u256_as_push_data::<29>(value).unwrap()), + 30 => Opcode::PUSH30(u256_as_push_data::<30>(value).unwrap()), + 31 => Opcode::PUSH31(u256_as_push_data::<31>(value).unwrap()), + 32 => Opcode::PUSH32(u256_as_push_data::<32>(value).unwrap()), + _ => unreachable!(), + }, + span, + ) + }) + .map(ast::Instruction::Op); + + let op = ident().map(|(ident, span)| { + if let Ok(op) = Opcode::from_str(ident) { + ast::Instruction::Op((op, span)) + } else { + ast::Instruction::LabelReference((ident, span)) + } + }); + let macro_arg_ref = just(Punct('<')) + .ignore_then(ident()) + .then_ignore(just(Punct('>'))) + .map(ast::Instruction::MacroArgReference); + let constant_ref = just(Punct('[')) + .ignore_then(ident()) + .then_ignore(just(Punct(']'))) + .map(ast::Instruction::ConstantReference); + + choice((push_auto, push, op, macro_arg_ref, constant_ref)) +} + +fn invoke<'tokens, 'src: 'tokens>() -> impl Parser<'tokens, 'src, ast::Invoke<'src>> { + let invoke_macro_args = just(Punct('(')) + .ignore_then( + instruction() + .separated_by(just(Punct(','))) + .collect::>(), + ) + .then_ignore(just(Punct(')'))) + .map(|args| args.into_boxed_slice()); + + let invoke_macro = ident() + .then(invoke_macro_args) + .map(|(name, args)| ast::Invoke::Macro { name, args }); - Ok(output) + let invoke_builtin = |name, constructor: fn((_, Span)) -> ast::Invoke<'src>| { + just(Ident(name)) + .ignore_then(just(Punct('('))) + .ignore_then(ident()) + .then_ignore(just(Punct(')'))) + .map(constructor) + }; + + choice(( + invoke_macro, + invoke_builtin("__tablestart", ast::Invoke::BuiltinTableStart), + invoke_builtin("__tablesize", ast::Invoke::BuiltinTableSize), + invoke_builtin("__codesize", ast::Invoke::BuiltinCodeSize), + invoke_builtin("__codeoffset", ast::Invoke::BuiltinCodeOffset), + invoke_builtin("__FUNC_SIG", ast::Invoke::BuiltinFuncSig), + invoke_builtin("__EVENT_HASH", ast::Invoke::BuiltinEventHash), + invoke_builtin("__ERROR", ast::Invoke::BuiltinError), + )) +} + +fn constant<'tokens, 'src: 'tokens>() -> impl Parser<'tokens, 'src, ast::Definition<'src>> { + just(Ident("constant")) + .ignore_then(ident()) + .then_ignore(just(Punct('='))) + .then(word()) + .map(|(name, (value, _))| ast::Definition::Constant { name, value }) +} + +fn table<'tokens, 'src: 'tokens>() -> impl Parser<'tokens, 'src, ast::Definition<'src>> { + just(Ident("table")) + .ignore_then(ident()) + .then_ignore(just(Punct('{'))) + .then(code().repeated().collect::>()) + .then_ignore(just(Punct('}'))) + .map(|(name, code)| ast::Definition::Table { + name, + data: code + .into_iter() + .flatten() + .collect::>() + .into_boxed_slice(), + }) +} + +fn sol_function<'tokens, 'src: 'tokens>() -> impl Parser<'tokens, 'src, ast::Definition<'src>> { + just(Ident("function")) + .ignore_then(ident()) + .then(sol_type_list()) + .then( + choice((just(Ident("public")), just(Ident("external")))) + .then_ignore(choice((just(Ident("view")), just(Ident("pure")))).or_not()) + .then_ignore(choice((just(Ident("payable")), just(Ident("nonpayable")))).or_not()) + .or_not() + .ignore_then(just(Ident("returns"))) + .ignore_then(sol_type_list()) + .or_not(), + ) + .map(|((name, args), rets)| { + let rets = rets.unwrap_or(Box::new([])); + ast::Definition::SolFunction(ast::SolFunction { name, args, rets }) + }) +} + +fn sol_event<'tokens, 'src: 'tokens>() -> impl Parser<'tokens, 'src, ast::Definition<'src>> { + just(Ident("event")) + .ignore_then(ident()) + .then(sol_type_list()) + .map(|(name, args)| ast::Definition::SolEvent(ast::SolEvent { name, args })) +} + +fn sol_error<'tokens, 'src: 'tokens>() -> impl Parser<'tokens, 'src, ast::Definition<'src>> { + just(Ident("error")) + .ignore_then(ident()) + .then(sol_type_list()) + .map(|(name, args)| ast::Definition::SolError(ast::SolError { name, args })) } -pub(crate) fn u256_as_push(value: U256) -> Opcode { - match value.byte_len() { - 0..=1 => u256_as_push_data::<1>(value).map(Opcode::PUSH1).unwrap(), - 2 => u256_as_push_data::<2>(value).map(Opcode::PUSH2).unwrap(), - 3 => u256_as_push_data::<3>(value).map(Opcode::PUSH3).unwrap(), - 4 => u256_as_push_data::<4>(value).map(Opcode::PUSH4).unwrap(), - 5 => u256_as_push_data::<5>(value).map(Opcode::PUSH5).unwrap(), - 6 => u256_as_push_data::<6>(value).map(Opcode::PUSH6).unwrap(), - 7 => u256_as_push_data::<7>(value).map(Opcode::PUSH7).unwrap(), - 8 => u256_as_push_data::<8>(value).map(Opcode::PUSH8).unwrap(), - 9 => u256_as_push_data::<9>(value).map(Opcode::PUSH9).unwrap(), - 10 => u256_as_push_data::<10>(value).map(Opcode::PUSH10).unwrap(), - 11 => u256_as_push_data::<11>(value).map(Opcode::PUSH11).unwrap(), - 12 => u256_as_push_data::<12>(value).map(Opcode::PUSH12).unwrap(), - 13 => u256_as_push_data::<13>(value).map(Opcode::PUSH13).unwrap(), - 14 => u256_as_push_data::<14>(value).map(Opcode::PUSH14).unwrap(), - 15 => u256_as_push_data::<15>(value).map(Opcode::PUSH15).unwrap(), - 16 => u256_as_push_data::<16>(value).map(Opcode::PUSH16).unwrap(), - 17 => u256_as_push_data::<17>(value).map(Opcode::PUSH17).unwrap(), - 18 => u256_as_push_data::<18>(value).map(Opcode::PUSH18).unwrap(), - 19 => u256_as_push_data::<19>(value).map(Opcode::PUSH19).unwrap(), - 20 => u256_as_push_data::<20>(value).map(Opcode::PUSH20).unwrap(), - 21 => u256_as_push_data::<21>(value).map(Opcode::PUSH21).unwrap(), - 22 => u256_as_push_data::<22>(value).map(Opcode::PUSH22).unwrap(), - 23 => u256_as_push_data::<23>(value).map(Opcode::PUSH23).unwrap(), - 24 => u256_as_push_data::<24>(value).map(Opcode::PUSH24).unwrap(), - 25 => u256_as_push_data::<25>(value).map(Opcode::PUSH25).unwrap(), - 26 => u256_as_push_data::<26>(value).map(Opcode::PUSH26).unwrap(), - 27 => u256_as_push_data::<27>(value).map(Opcode::PUSH27).unwrap(), - 28 => u256_as_push_data::<28>(value).map(Opcode::PUSH28).unwrap(), - 29 => u256_as_push_data::<29>(value).map(Opcode::PUSH29).unwrap(), - 30 => u256_as_push_data::<30>(value).map(Opcode::PUSH30).unwrap(), - 31 => u256_as_push_data::<31>(value).map(Opcode::PUSH31).unwrap(), - 32 => u256_as_push_data::<32>(value).map(Opcode::PUSH32).unwrap(), - _ => unreachable!(), +fn sol_type_list<'tokens, 'src: 'tokens>() -> impl Parser<'tokens, 'src, Box<[Spanned]>> +{ + just(Punct('(')) + .ignore_then( + sol_type() + .separated_by(just(Punct(','))) + .collect::>(), + ) + .then_ignore(just(Punct(')'))) + .map(|args| args.into_boxed_slice()) +} + +fn sol_type<'tokens, 'src: 'tokens>() -> impl Parser<'tokens, 'src, Spanned> { + recursive(|sol_raw_type| { + let sol_raw_primitive_type = ident().map(|(typ, _)| typ.to_string()).boxed(); + + let sol_raw_tuple_type = just(Punct('(')) + .ignore_then( + sol_raw_type + .separated_by(just(Punct(','))) + .collect::>(), + ) + .then_ignore(just(Punct(')'))) + .map(|types| { + let mut result = "(".to_string(); + let types = types.into_iter().collect::>().join(","); + result.push_str(&types); + result.push(')'); + result + }) + .boxed(); + + choice((sol_raw_primitive_type, sol_raw_tuple_type)) + .then( + just(Punct('[')) + .ignore_then(dec().or_not()) + .then_ignore(just(Punct(']'))) + .or_not(), + ) + .then_ignore(ident().or_not()) + .map(|(typ, slice)| { + let mut result = typ; + if let Some(size) = slice { + result.push('['); + if let Some((n, _span)) = size { + result.push_str(&n.to_string()); + } + result.push(']'); + } + result + }) + .boxed() + }) + .try_map_with(|typ, ex| { + DynSolType::parse(&typ) + .map(|typ| (typ, ex.span())) + .map_err(|e| Rich::custom(ex.span(), e)) + }) +} + +fn ident<'tokens, 'src: 'tokens>() -> impl Parser<'tokens, 'src, Spanned<&'src str>> { + select! {Ident(s) => s}.map_with(|s, ex| (s, ex.span())) +} + +fn dec<'tokens, 'src: 'tokens>() -> impl Parser<'tokens, 'src, Spanned> { + select! {Dec(s) => s.parse::().unwrap()}.map_with(|s, ex| (s, ex.span())) +} + +fn word<'tokens, 'src: 'tokens>() -> impl Parser<'tokens, 'src, Spanned> { + select! { + Hex(s) => U256::from_str_radix(&s[2..], 16), + Bin(s) => U256::from_str_radix(&s[2..], 2), + Dec(s) => U256::from_str_radix(s, 10) + } + .try_map_with(|value, ex| value.map_err(|_e| Rich::custom(ex.span(), "word overflows"))) + .map_with(|value, ex| (value, ex.span())) +} + +fn code<'tokens, 'src: 'tokens>() -> impl Parser<'tokens, 'src, Vec> { + select! { + Hex(s) => Bytes::from_hex(s) } + .try_map_with(|code, ex| code.map_err(|_e| Rich::custom(ex.span(), "odd length"))) + .map(|code| code.to_vec()) } #[cfg(test)] mod tests { use super::*; - use alloy_dyn_abi::DynSolType; - use alloy_primitives::{hex, ruint, uint, U256}; + use alloy_primitives::uint; + use chumsky::input::Input; + + /// Macro to assert that a parser successfully parses a given set of tokens + /// and produces the expected result. + /// + /// # Arguments + /// + /// * `$parser` - The parser to be tested. + /// * `$tokens` - A collection of tokens to be parsed. + /// * `$expected` - The expected result after parsing. + macro_rules! assert_ok { + ($parser:expr, $tokens:expr, $expected:expr) => { + let tokens: Vec<(Token<'_>, SimpleSpan)> = $tokens + .into_iter() + .map(|tok| (tok.clone(), SimpleSpan::new(0, 0))) + .collect(); + assert_eq!( + $parser + .parse(tokens.as_slice().spanned(SimpleSpan::new(0, 0))) + .into_result(), + Ok($expected), + ); + }; + } + + /// Macro to assert that a parser returns an expected error when parsing a + /// given set of tokens. + /// + /// # Arguments + /// + /// * `$parser` - The parser to be tested. + /// * `$tokens` - A collection of tokens to be parsed. + /// * `$expected` - The expected error message after parsing. + macro_rules! assert_err { + ($parser:expr, $tokens:expr, $expected:expr) => { + let tokens: Vec<(Token<'_>, SimpleSpan)> = $tokens + .into_iter() + .map(|tok| (tok.clone(), SimpleSpan::new(0, 0))) + .collect(); + let expected = vec![Rich::custom(SimpleSpan::new(0, 0), $expected)]; + assert_eq!( + $parser + .parse(tokens.as_slice().spanned(SimpleSpan::new(0, 0))) + .into_result(), + Err(expected), + ); + }; + } #[test] - fn word_parser() { - assert_eq!(grammar::WordParser::new().parse("0x0"), Ok(U256::ZERO)); - assert_eq!(grammar::WordParser::new().parse("0x1"), Ok(uint!(1_U256))); - assert_eq!(grammar::WordParser::new().parse("0b10"), Ok(uint!(2_U256))); - assert_eq!( - grammar::WordParser::new() - .parse("0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"), - Ok(U256::MAX) + fn parse_word() { + let span: Span = SimpleSpan::new(0, 0); + + assert_ok!(word(), vec![Hex("0x0")], (U256::ZERO, span)); + assert_ok!(word(), vec![Hex("0x1")], (uint!(1_U256), span)); + assert_ok!(word(), vec![Bin("0b10")], (uint!(2_U256), span)); + assert_ok!(word(), vec![Dec("2")], (uint!(2_U256), span)); + assert_ok!( + word(), + vec![Hex("0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff")], + (U256::MAX, span) ); - assert_eq!( - grammar::WordParser::new() - .parse("0x10000000000000000000000000000000000000000000000000000000000000000"), - Err(ParseError::User { - error: ast::Error::WordOverflow(ruint::ParseError::BaseConvertError( - ruint::BaseConvertError::Overflow - )) - }) + assert_err!( + word(), + vec![Hex("0x10000000000000000000000000000000000000000000000000000000000000000")], + "word overflows" ); } #[test] - fn code_parser() { - assert_eq!( - grammar::CodeParser::new().parse("0xc0de"), - Ok(vec![0xc0, 0xde]) - ); - assert_eq!( - grammar::CodeParser::new().parse("0x0"), - Err(ParseError::User { - error: ast::Error::BytesOddLength(hex::FromHexError::OddLength) - }) - ); + fn parse_code() { + assert_ok!(code(), vec![Hex("0xc0de")], vec![0xc0, 0xde]); + assert_err!(code(), vec![Hex("0x0")], "odd length"); } #[test] - fn macro_parser() { - assert_eq!( - grammar::MacroParser::new().parse("macro MAIN() = { }"), - Ok(ast::Definition::Macro(ast::Macro { - name: (6..10, "MAIN"), + fn parse_macro() { + let span: Span = SimpleSpan::new(0, 0); + assert_ok!( + r#macro(), + vec![ + Ident("macro"), + Ident("MAIN"), + Punct('('), + Punct(')'), + Punct('='), + Punct('{'), + Punct('}') + ], + ast::Definition::Macro(ast::Macro { + name: ("MAIN", span), args: Box::new([]), takes_returns: None, body: Box::new([]) - })) + }) ); - assert_eq!( - grammar::MacroParser::new() - .parse("macro READ_ADDRESS(offset) = takes (0) returns (1) { stop }"), - Ok(ast::Definition::Macro(ast::Macro { - name: (6..18, "READ_ADDRESS"), - args: Box::new([(19..25, "offset")]), - takes_returns: Some((0, 1)), - body: Box::new([( - 53..57, - ast::MacroStatement::Instruction(ast::Instruction::Op(Opcode::STOP)) - )]) - })) + assert_ok!( + r#macro(), + vec![ + Ident("macro"), + Ident("READ_ADDRESS"), + Punct('('), + Ident("offset"), + Punct(')'), + Punct('='), + Ident("takes"), + Punct('('), + Dec("0"), + Punct(')'), + Ident("returns"), + Punct('('), + Dec("1"), + Punct(')'), + Punct('{'), + Ident("stop"), + Punct('}') + ], + ast::Definition::Macro(ast::Macro { + name: ("READ_ADDRESS", span), + args: Box::new([("offset", span)]), + takes_returns: Some(((0, span), (1, span))), + body: Box::new([ast::MacroStatement::Instruction(ast::Instruction::Op(( + Opcode::STOP, + span + )))]), + }) ); } #[test] - fn macro_statement_parser() { - assert_eq!( - grammar::MacroStatementParser::new().parse("x:"), - Ok(ast::MacroStatement::LabelDefinition("x")) + fn parse_macro_statement() { + let span: Span = SimpleSpan::new(0, 0); + + assert_ok!( + macro_statement(), + vec![Ident("x"), Punct(':')], + ast::MacroStatement::LabelDefinition(("x", span)) ); - assert_eq!( - grammar::MacroStatementParser::new().parse("__tablestart(TABLE)"), - Ok(ast::MacroStatement::Invoke(ast::Invoke::BuiltinTableStart( - "TABLE" - ))) + assert_ok!( + macro_statement(), + vec![Ident("__tablestart"), Punct('('), Ident("TABLE"), Punct(')')], + ast::MacroStatement::Invoke(ast::Invoke::BuiltinTableStart(("TABLE", span))) ); - assert_eq!( - grammar::MacroStatementParser::new().parse("READ_ADDRESS(0x4)"), - Ok(ast::MacroStatement::Invoke(ast::Invoke::Macro { - name: (0..12, "READ_ADDRESS"), - args: Box::new([(13..16, ast::Instruction::Op(Opcode::PUSH1([0x04])))]) - })) + assert_ok!( + macro_statement(), + vec![Ident("READ_ADDRESS"), Punct('('), Hex("0x4"), Punct(')')], + ast::MacroStatement::Invoke(ast::Invoke::Macro { + name: ("READ_ADDRESS", span), + args: Box::new([ast::Instruction::Op((Opcode::PUSH1([0x04]), span))]) + }) ); } #[test] - fn instruction_parser() { - assert_eq!( - grammar::InstructionParser::new().parse("add"), - Ok(ast::Instruction::Op(Opcode::ADD)) + fn parse_instruction() { + let span: Span = SimpleSpan::new(0, 0); + + assert_ok!( + instruction(), + vec![Ident("add")], + ast::Instruction::Op((Opcode::ADD, span)) ); - assert_eq!( - grammar::InstructionParser::new().parse("0x1"), - Ok(ast::Instruction::Op(Opcode::PUSH1([0x01]))) + assert_ok!( + instruction(), + vec![Hex("0x1")], + ast::Instruction::Op((Opcode::PUSH1([0x01]), span)) ); - assert_eq!( - grammar::InstructionParser::new().parse("push2 0x1"), - Ok(ast::Instruction::Op(Opcode::PUSH2([0x00, 0x01]))) + assert_ok!( + instruction(), + vec![Ident("push2"), Hex("0x1")], + ast::Instruction::Op((Opcode::PUSH2([0x00, 0x01]), span)) ); - assert_eq!( - grammar::InstructionParser::new().parse("x"), - Ok(ast::Instruction::LabelReference("x")) + assert_ok!( + instruction(), + vec![Ident("x")], + ast::Instruction::LabelReference(("x", span)) ); - assert_eq!( - grammar::InstructionParser::new().parse(""), - Ok(ast::Instruction::MacroArgReference("x")) + assert_ok!( + instruction(), + vec![Punct('<'), Ident("x"), Punct('>')], + ast::Instruction::MacroArgReference(("x", span)) ); - assert_eq!( - grammar::InstructionParser::new().parse("[x]"), - Ok(ast::Instruction::ConstantReference("x")) + assert_ok!( + instruction(), + vec![Punct('['), Ident("x"), Punct(']')], + ast::Instruction::ConstantReference(("x", span)) ); } #[test] - fn constant_parser() { - assert_eq!( - grammar::ConstantParser::new().parse("constant TEST = 0x1"), - Ok(ast::Definition::Constant { - name: (9..13, "TEST"), - value: uint!(1_U256), - }) - ); - assert_eq!( - grammar::ConstantParser::new() - .parse(" constant TEST /* comment */ = 0b1101 // comment"), - Ok(ast::Definition::Constant { - name: (10..14, "TEST"), - value: uint!(13_U256), - }) + fn parse_constant() { + let span: Span = SimpleSpan::new(0, 0); + + assert_ok!( + constant(), + vec![Ident("constant"), Ident("TEST"), Punct('='), Hex("0x1")], + ast::Definition::Constant { + name: ("TEST", span), + value: uint!(1_U256) + } ); } #[test] - fn table_parser() { - assert_eq!( - grammar::TableParser::new().parse("table TEST { 0xc0de }"), - Ok(ast::Definition::Codetable { - name: (6..10, "TEST"), + fn parse_table() { + let span: Span = SimpleSpan::new(0, 0); + + assert_ok!( + table(), + vec![Ident("table"), Ident("TEST"), Punct('{'), Hex("0xc0de"), Punct('}')], + ast::Definition::Table { + name: ("TEST", span), data: Box::new([0xc0, 0xde]) - }) + } ); - assert_eq!( - grammar::TableParser::new().parse("table TEST { 0xc0de 0xcc00ddee }"), - Ok(ast::Definition::Codetable { - name: (6..10, "TEST"), + assert_ok!( + table(), + vec![ + Ident("table"), + Ident("TEST"), + Punct('{'), + Hex("0xc0de"), + Hex("0xcc00ddee"), + Punct('}') + ], + ast::Definition::Table { + name: ("TEST", span), data: Box::new([0xc0, 0xde, 0xcc, 0x00, 0xdd, 0xee]) - }) + } ); } #[test] - fn sol_type_list_parser() { - assert_eq!( - grammar::SolTypeListParser::new().parse("(address, uint256)"), - Ok( - vec![DynSolType::parse("address").unwrap(), DynSolType::parse("uint256").unwrap()] - .into_boxed_slice() - ) + fn parse_sol_type() { + let span: Span = SimpleSpan::new(0, 0); + + assert_ok!( + sol_type(), + vec![Ident("address"),], + (DynSolType::parse("address").unwrap(), span) + ); + assert_ok!( + sol_type(), + vec![Ident("address"), Ident("token")], + (DynSolType::parse("address").unwrap(), span) + ); + assert_ok!( + sol_type(), + vec![Ident("address"), Punct('['), Punct(']')], + (DynSolType::parse("address[]").unwrap(), span) ); - assert_eq!( - grammar::SolTypeListParser::new().parse("(address[] tokens)"), - Ok(vec![DynSolType::parse("address[]").unwrap(),].into_boxed_slice()) + assert_ok!( + sol_type(), + vec![Ident("address"), Punct('['), Dec("3"), Punct(']'),], + (DynSolType::parse("address[3]").unwrap(), span) ); - assert_eq!( - grammar::SolTypeListParser::new().parse("(address[3] tokens)"), - Ok(vec![DynSolType::parse("address[3]").unwrap(),].into_boxed_slice()) + assert_ok!( + sol_type(), + vec![ + Punct('('), + Ident("address"), + Ident("to"), + Punct(','), + Ident("uint256"), + Ident("amount"), + Punct(')'), + Punct('['), + Punct(']'), + ], + (DynSolType::parse("(address,uint256)[]").unwrap(), span) ); - assert_eq!( - grammar::SolTypeListParser::new().parse("((address, (address to, uint256 amount)[]))"), - Ok( - vec![DynSolType::parse("(address,(address,uint256)[])").unwrap(),] - .into_boxed_slice() + assert_ok!( + sol_type(), + vec![ + Punct('('), + Ident("address"), + Punct(','), + Punct('('), + Ident("address"), + Punct(','), + Ident("uint256"), + Punct(')'), + Punct('['), + Punct(']'), + Punct(')'), + Punct('['), + Punct(']'), + ], + ( + DynSolType::parse("(address,(address,uint256)[])[]").unwrap(), + span ) ); } #[test] - fn sol_function_parser() { - assert_eq!( - grammar::SolFunctionParser::new() - .parse("function balanceOf(address) returns (uint256)"), - Ok(ast::Definition::SolFunction(ast::SolFunction { - name: "balanceOf", - args: Box::new([DynSolType::parse("address").unwrap()]), - rets: Box::new([DynSolType::parse("uint256").unwrap()]), - })) + fn parse_sol_type_list() { + let span: Span = SimpleSpan::new(0, 0); + + assert_ok!( + sol_type_list(), + vec![Punct('('), Ident("address"), Punct(','), Ident("uint256"), Punct(')')], + vec![ + (DynSolType::parse("address").unwrap(), span), + (DynSolType::parse("uint256").unwrap(), span) + ] + .into_boxed_slice() ); } #[test] - fn sol_event_parser() { - assert_eq!( - grammar::SolEventParser::new() - .parse("event Transfer(address from, address to, uint256 value)"), - Ok(ast::Definition::SolEvent(ast::SolEvent { - name: "Transfer", + fn parse_sol_function() { + let span: Span = SimpleSpan::new(0, 0); + + assert_ok!( + sol_function(), + vec![ + Ident("function"), + Ident("balanceOf"), + Punct('('), + Ident("address"), + Punct(')'), + Ident("returns"), + Punct('('), + Ident("uint256"), + Punct(')') + ], + ast::Definition::SolFunction(ast::SolFunction { + name: ("balanceOf", span), + args: Box::new([(DynSolType::parse("address").unwrap(), span)]), + rets: Box::new([(DynSolType::parse("uint256").unwrap(), span)]), + }) + ); + assert_ok!( + sol_function(), + vec![ + Ident("function"), + Ident("balanceOf"), + Punct('('), + Ident("address"), + Punct(')'), + Ident("public"), + Ident("view"), + Ident("returns"), + Punct('('), + Ident("uint256"), + Punct(')') + ], + ast::Definition::SolFunction(ast::SolFunction { + name: ("balanceOf", span), + args: Box::new([(DynSolType::parse("address").unwrap(), span)]), + rets: Box::new([(DynSolType::parse("uint256").unwrap(), span)]), + }) + ); + } + + #[test] + fn parse_sol_event() { + let span: Span = SimpleSpan::new(0, 0); + assert_ok!( + sol_event(), + vec![ + Ident("event"), + Ident("Transfer"), + Punct('('), + Ident("address"), + Punct(','), + Ident("address"), + Punct(','), + Ident("uint256"), + Punct(')') + ], + ast::Definition::SolEvent(ast::SolEvent { + name: ("Transfer", span), args: Box::new([ - DynSolType::parse("address").unwrap(), - DynSolType::parse("address").unwrap(), - DynSolType::parse("uint256").unwrap() + (DynSolType::parse("address").unwrap(), span), + (DynSolType::parse("address").unwrap(), span), + (DynSolType::parse("uint256").unwrap(), span), ]), - })) + }) ); } #[test] - fn sol_error_parser() { - assert_eq!( - grammar::SolErrorParser::new().parse("error PanicError(uint256)"), - Ok(ast::Definition::SolError(ast::SolError { - name: "PanicError", - args: Box::new([DynSolType::parse("uint256").unwrap(),]), - })) + fn parse_sol_error() { + let span: Span = SimpleSpan::new(0, 0); + + assert_ok!( + sol_error(), + vec![Ident("error"), Ident("PanicError"), Punct('('), Ident("uint256"), Punct(')')], + ast::Definition::SolError(ast::SolError { + name: ("PanicError", span), + args: Box::new([(DynSolType::parse("uint256").unwrap(), span)]), + }) ); } } diff --git a/crates/ast/src/util.rs b/crates/ast/src/util.rs new file mode 100644 index 0000000..382ce3d --- /dev/null +++ b/crates/ast/src/util.rs @@ -0,0 +1,55 @@ +use alloy_primitives::U256; +use evm_glue::opcodes::Opcode; + +pub(crate) fn u256_as_push_data(value: U256) -> Result<[u8; N], String> { + if value.byte_len() > N { + return Err(format!( + "word with {} bytes is too large for PUSH{}", + value.byte_len(), + N + )); + } + let input = value.to_be_bytes::<32>(); + let mut output = [0u8; N]; + output.copy_from_slice(&input[32 - N..32]); + + Ok(output) +} + +pub(crate) fn u256_as_push(value: U256) -> Opcode { + match value.byte_len() { + 0..=1 => u256_as_push_data::<1>(value).map(Opcode::PUSH1).unwrap(), + 2 => u256_as_push_data::<2>(value).map(Opcode::PUSH2).unwrap(), + 3 => u256_as_push_data::<3>(value).map(Opcode::PUSH3).unwrap(), + 4 => u256_as_push_data::<4>(value).map(Opcode::PUSH4).unwrap(), + 5 => u256_as_push_data::<5>(value).map(Opcode::PUSH5).unwrap(), + 6 => u256_as_push_data::<6>(value).map(Opcode::PUSH6).unwrap(), + 7 => u256_as_push_data::<7>(value).map(Opcode::PUSH7).unwrap(), + 8 => u256_as_push_data::<8>(value).map(Opcode::PUSH8).unwrap(), + 9 => u256_as_push_data::<9>(value).map(Opcode::PUSH9).unwrap(), + 10 => u256_as_push_data::<10>(value).map(Opcode::PUSH10).unwrap(), + 11 => u256_as_push_data::<11>(value).map(Opcode::PUSH11).unwrap(), + 12 => u256_as_push_data::<12>(value).map(Opcode::PUSH12).unwrap(), + 13 => u256_as_push_data::<13>(value).map(Opcode::PUSH13).unwrap(), + 14 => u256_as_push_data::<14>(value).map(Opcode::PUSH14).unwrap(), + 15 => u256_as_push_data::<15>(value).map(Opcode::PUSH15).unwrap(), + 16 => u256_as_push_data::<16>(value).map(Opcode::PUSH16).unwrap(), + 17 => u256_as_push_data::<17>(value).map(Opcode::PUSH17).unwrap(), + 18 => u256_as_push_data::<18>(value).map(Opcode::PUSH18).unwrap(), + 19 => u256_as_push_data::<19>(value).map(Opcode::PUSH19).unwrap(), + 20 => u256_as_push_data::<20>(value).map(Opcode::PUSH20).unwrap(), + 21 => u256_as_push_data::<21>(value).map(Opcode::PUSH21).unwrap(), + 22 => u256_as_push_data::<22>(value).map(Opcode::PUSH22).unwrap(), + 23 => u256_as_push_data::<23>(value).map(Opcode::PUSH23).unwrap(), + 24 => u256_as_push_data::<24>(value).map(Opcode::PUSH24).unwrap(), + 25 => u256_as_push_data::<25>(value).map(Opcode::PUSH25).unwrap(), + 26 => u256_as_push_data::<26>(value).map(Opcode::PUSH26).unwrap(), + 27 => u256_as_push_data::<27>(value).map(Opcode::PUSH27).unwrap(), + 28 => u256_as_push_data::<28>(value).map(Opcode::PUSH28).unwrap(), + 29 => u256_as_push_data::<29>(value).map(Opcode::PUSH29).unwrap(), + 30 => u256_as_push_data::<30>(value).map(Opcode::PUSH30).unwrap(), + 31 => u256_as_push_data::<31>(value).map(Opcode::PUSH31).unwrap(), + 32 => u256_as_push_data::<32>(value).map(Opcode::PUSH32).unwrap(), + _ => unreachable!(), + } +} diff --git a/crates/cli/Cargo.toml b/crates/cli/Cargo.toml index da2a8fc..ac97c02 100644 --- a/crates/cli/Cargo.toml +++ b/crates/cli/Cargo.toml @@ -7,8 +7,7 @@ license.workspace = true [dependencies] huff-ast.workspace = true ariadne.workspace = true -clap.workspace = true -lalrpop-util.workspace = true +argh.workspace = true thiserror.workspace = true [[bin]] diff --git a/crates/cli/src/main.rs b/crates/cli/src/main.rs index 6e502fb..b01ca56 100644 --- a/crates/cli/src/main.rs +++ b/crates/cli/src/main.rs @@ -1,10 +1,11 @@ -use clap::Parser as ClapParser; +use argh::FromArgs; +use ariadne::{sources, Color, Config, IndexType, Label, Report, ReportKind}; use huff_ast::parse; use std::{fs::read_to_string, io, process::exit}; use thiserror::Error; fn main() { - let cli = Cli::parse(); + let cli = argh::from_env(); if let Err(e) = run(cli) { eprintln!("error: {}", e); exit(1); @@ -13,9 +14,22 @@ fn main() { fn run(cli: Cli) -> HuffResult { let src = read_to_string(&cli.filename)?; + let filename: String = cli.filename; match parse(&src) { Ok(ast) => println!("{:?}", ast), - Err(e) => println!("error: {}", e), + Err(errs) => errs.into_iter().for_each(|e| { + Report::build(ReportKind::Error, filename.clone(), e.span().start) + .with_config(Config::default().with_index_type(IndexType::Byte)) + .with_message(e.reason()) + .with_label( + Label::new((filename.clone(), e.span().into_range())) + .with_message(e.reason()) + .with_color(Color::Red), + ) + .finish() + .print(sources([(filename.clone(), &src)])) + .unwrap() + }), } Ok(()) @@ -33,9 +47,10 @@ enum HuffError { type HuffResult = Result<(), HuffError>; -#[derive(ClapParser)] -#[command(name = "huff")] +#[derive(FromArgs)] +/// Huff Language Compiler struct Cli { - /// Filename + /// filename + #[argh(positional)] filename: String, }