From dd7c3eff43207f4f09ae9e7ba319abff6e39d8f2 Mon Sep 17 00:00:00 2001 From: Antal Spector-Zabusky Date: Wed, 8 Jan 2025 23:22:28 -0500 Subject: [PATCH 1/8] test: failing round-trip proptest case: `mut[0]` --- quil-rs/proptest-regressions/expression/mod.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/quil-rs/proptest-regressions/expression/mod.txt b/quil-rs/proptest-regressions/expression/mod.txt index 0fad84e1..b24d9cb8 100644 --- a/quil-rs/proptest-regressions/expression/mod.txt +++ b/quil-rs/proptest-regressions/expression/mod.txt @@ -6,3 +6,4 @@ # everyone who runs the test benefits from these saved cases. cc 4c32128d724ed0f840715fae4e194c99262dc153c64be39d2acf45b8903b20f7 # shrinks to value = Complex { re: 0.0, im: -0.13530277317700273 } cc 5cc95f2159ad7120bbaf296d3a9fb26fef30f57b61e76b3e0dc99f4759009fdb # shrinks to e = Number(Complex { re: 0.0, im: -2.772221265116396 }) +cc de70a1853ccef983fac85a87761ba08bfb2d54b2d4e880d5d90e7b4a75ecafb5 # shrinks to e = Address(MemoryReference { name: "mut", index: 0 }) From 91b88f41ccaba7ab4ea1a34a7f69975bc503f2e0 Mon Sep 17 00:00:00 2001 From: Antal Spector-Zabusky Date: Thu, 9 Jan 2025 01:04:22 -0500 Subject: [PATCH 2/8] test: add some failing test cases of the lexing errors discovered later --- quil-rs/src/parser/lexer/mod.rs | 10 ++++++++-- quil-rs/src/parser/mod.rs | 2 +- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/quil-rs/src/parser/lexer/mod.rs b/quil-rs/src/parser/lexer/mod.rs index 9c043cc6..8f21e8fe 100644 --- a/quil-rs/src/parser/lexer/mod.rs +++ b/quil-rs/src/parser/lexer/mod.rs @@ -399,7 +399,7 @@ mod tests { use nom_locate::LocatedSpan; use rstest::*; - use crate::parser::common::tests::KITCHEN_SINK_QUIL; + use crate::parser::{common::tests::KITCHEN_SINK_QUIL, DataType}; use super::{lex, Command, Operator, Token}; @@ -568,7 +568,13 @@ mod tests { Token::Identifier("a-2".to_string()), Token::Operator(Operator::Minus), Token::Variable("var".to_string()) - ]) + ]), + case("BIT", vec![Token::DataType(DataType::Bit)]), + case("BITS", vec![Token::Identifier("BITS".to_string())]), + case("NaN", vec![Token::Identifier("NaN".to_string())]), + case("nan", vec![Token::Identifier("nan".to_string())]), + case("NaNa", vec![Token::Identifier("NaNa".to_string())]), + case("nana", vec![Token::Identifier("nana".to_string())]), )] fn it_lexes_identifier(input: &str, expected: Vec) { let input = LocatedSpan::new(input); diff --git a/quil-rs/src/parser/mod.rs b/quil-rs/src/parser/mod.rs index d4d00cf1..dd4727f3 100644 --- a/quil-rs/src/parser/mod.rs +++ b/quil-rs/src/parser/mod.rs @@ -31,7 +31,7 @@ mod token; pub(crate) use error::{ErrorInput, InternalParseError}; pub use error::{ParseError, ParserErrorKind}; -pub use lexer::LexError; +pub use lexer::{DataType, LexError}; pub use token::{Token, TokenWithLocation}; pub(crate) type ParserInput<'a> = &'a [TokenWithLocation<'a>]; From b48558109e8ede10ca6672f51c414d0a825bbe52 Mon Sep 17 00:00:00 2001 From: Antal Spector-Zabusky Date: Wed, 8 Jan 2025 23:22:50 -0500 Subject: [PATCH 3/8] feat!: make `mut` lowercase-only and add it to `ReservedKeyword` We now fail on `muta` --- quil-rs/src/parser/lexer/mod.rs | 4 ++-- quil-rs/src/parser/token.rs | 5 ++++- quil-rs/src/reserved.rs | 2 ++ 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/quil-rs/src/parser/lexer/mod.rs b/quil-rs/src/parser/lexer/mod.rs index 8f21e8fe..450a7173 100644 --- a/quil-rs/src/parser/lexer/mod.rs +++ b/quil-rs/src/parser/lexer/mod.rs @@ -17,7 +17,7 @@ mod quoted_strings; mod wrapped_parsers; use nom::{ - bytes::complete::{is_a, tag_no_case, take_till, take_while, take_while1}, + bytes::complete::{is_a, take_till, take_while, take_while1}, character::complete::{digit1, one_of}, combinator::{all_consuming, map, recognize, value}, multi::many0, @@ -327,7 +327,7 @@ fn lex_modifier(input: LexInput) -> InternalLexResult { value(Token::Modifier(Modifier::Controlled), tag("CONTROLLED")), value(Token::Modifier(Modifier::Dagger), tag("DAGGER")), value(Token::Modifier(Modifier::Forked), tag("FORKED")), - value(Token::Mutable, tag_no_case("MUT")), + value(Token::Mutable, tag("mut")), value(Token::Offset, tag("OFFSET")), value(Token::PauliSum, tag("PAULI-SUM")), value(Token::Permutation, tag("PERMUTATION")), diff --git a/quil-rs/src/parser/token.rs b/quil-rs/src/parser/token.rs index 53164b57..bf253014 100644 --- a/quil-rs/src/parser/token.rs +++ b/quil-rs/src/parser/token.rs @@ -67,6 +67,9 @@ where } } +// When adding tokens that are keywords, you also need to update +// [`crate::reserved::ReservedKeyword`], and similarly for gates ([`crate::reserved::ReservedGate`]) +// and constants ([`crate::reserved::ReservedConstant`]). #[derive(Clone, PartialEq)] pub enum Token { As, @@ -118,7 +121,7 @@ impl fmt::Display for Token { Token::NonBlocking => write!(f, "NONBLOCKING"), Token::Matrix => write!(f, "MATRIX"), Token::Modifier(m) => write!(f, "{m}"), - Token::Mutable => write!(f, "MUT"), + Token::Mutable => write!(f, "mut"), Token::NewLine => write!(f, "NEWLINE"), Token::Operator(op) => write!(f, "{op}"), Token::Offset => write!(f, "OFFSET"), diff --git a/quil-rs/src/reserved.rs b/quil-rs/src/reserved.rs index c0af544e..8f0c9a07 100644 --- a/quil-rs/src/reserved.rs +++ b/quil-rs/src/reserved.rs @@ -77,6 +77,8 @@ pub enum ReservedKeyword { Measure, Move, Mul, + #[strum(serialize = "mut")] + Mutable, Neg, Nop, Not, From 8d75e67dd94049de64dac30f1a05b792b64d4dea Mon Sep 17 00:00:00 2001 From: Antal Spector-Zabusky Date: Thu, 9 Jan 2025 00:42:48 -0500 Subject: [PATCH 4/8] fix!: only parse keywords if they form the whole identifier This prevents `BITS` from being treated as the keyword `BIT` followed by the identifier `S`. --- quil-rs/src/parser/lexer/mod.rs | 116 +++++--------------------------- quil-rs/src/parser/mod.rs | 4 +- quil-rs/src/parser/token.rs | 91 ++++++++++++++++++++++--- quil-rs/src/reserved.rs | 86 +++++++---------------- 4 files changed, 122 insertions(+), 175 deletions(-) diff --git a/quil-rs/src/parser/lexer/mod.rs b/quil-rs/src/parser/lexer/mod.rs index 450a7173..4f22ea93 100644 --- a/quil-rs/src/parser/lexer/mod.rs +++ b/quil-rs/src/parser/lexer/mod.rs @@ -16,6 +16,8 @@ mod error; mod quoted_strings; mod wrapped_parsers; +use std::str::FromStr; + use nom::{ bytes::complete::{is_a, take_till, take_while, take_while1}, character::complete::{digit1, one_of}, @@ -28,7 +30,7 @@ use nom::{ use nom_locate::LocatedSpan; use wrapped_parsers::{alt, tag}; -pub use super::token::{Token, TokenWithLocation}; +pub use super::token::{KeywordToken, Token, TokenWithLocation}; use crate::parser::lexer::wrapped_parsers::expecting; use crate::parser::token::token_with_location; pub(crate) use error::InternalLexError; @@ -92,7 +94,7 @@ pub enum Command { Xor, } -#[derive(Debug, Clone, PartialEq, Eq, strum::Display)] +#[derive(Debug, Clone, PartialEq, Eq, strum::Display, strum::EnumString)] #[strum(serialize_all = "UPPERCASE")] pub enum DataType { Bit, @@ -101,7 +103,7 @@ pub enum DataType { Integer, } -#[derive(Debug, Clone, PartialEq, Eq, strum::Display)] +#[derive(Debug, Clone, PartialEq, Eq, strum::Display, strum::EnumString)] #[strum(serialize_all = "UPPERCASE")] pub enum Modifier { Controlled, @@ -163,8 +165,6 @@ fn lex_token(input: LexInput) -> InternalLexResult { "a token", ( token_with_location(lex_comment), - token_with_location(lex_data_type), - token_with_location(lex_modifier), token_with_location(lex_punctuation), token_with_location(lex_target), token_with_location(lex_string), @@ -172,21 +172,7 @@ fn lex_token(input: LexInput) -> InternalLexResult { token_with_location(lex_operator), token_with_location(lex_number), token_with_location(lex_variable), - token_with_location(lex_non_blocking), - // This should come last because it's sort of a catch all - token_with_location(lex_command_or_identifier), - ), - )(input) -} - -fn lex_data_type(input: LexInput) -> InternalLexResult { - alt( - "a data type", - ( - value(Token::DataType(DataType::Bit), tag("BIT")), - value(Token::DataType(DataType::Integer), tag("INTEGER")), - value(Token::DataType(DataType::Octet), tag("OCTET")), - value(Token::DataType(DataType::Real), tag("REAL")), + token_with_location(lex_keyword_or_identifier), ), )(input) } @@ -197,62 +183,16 @@ fn lex_comment(input: LexInput) -> InternalLexResult { Ok((input, Token::Comment(content.to_string()))) } -/// If the given identifier string matches a command keyword, return the keyword; -/// otherwise, return the original identifier as a token. -fn recognize_command_or_identifier(identifier: String) -> Token { - use Command::*; - - match identifier.as_str() { - "DEFGATE" => Token::Command(DefGate), - "ADD" => Token::Command(Add), - "AND" => Token::Command(And), - "CALL" => Token::Command(Call), - "CONVERT" => Token::Command(Convert), - "DIV" => Token::Command(Div), - "EQ" => Token::Command(Eq), - "EXCHANGE" => Token::Command(Exchange), - "GE" => Token::Command(GE), - "GT" => Token::Command(GT), - "IOR" => Token::Command(Ior), - "LE" => Token::Command(LE), - "LOAD" => Token::Command(Load), - "LT" => Token::Command(LT), - "MOVE" => Token::Command(Move), - "MUL" => Token::Command(Mul), - "NEG" => Token::Command(Neg), - "NOT" => Token::Command(Not), - "STORE" => Token::Command(Store), - "SUB" => Token::Command(Sub), - "XOR" => Token::Command(Xor), - "DEFCIRCUIT" => Token::Command(DefCircuit), - "MEASURE" => Token::Command(Measure), - "HALT" => Token::Command(Halt), - "WAIT" => Token::Command(Wait), - "JUMP-WHEN" => Token::Command(JumpWhen), - "JUMP-UNLESS" => Token::Command(JumpUnless), - "JUMP" => Token::Command(Jump), - "RESET" => Token::Command(Reset), - "NOP" => Token::Command(Nop), - "INCLUDE" => Token::Command(Include), - "PRAGMA" => Token::Command(Pragma), - "DECLARE" => Token::Command(Declare), - "CAPTURE" => Token::Command(Capture), - "DEFCAL" => Token::Command(DefCal), - "DEFFRAME" => Token::Command(DefFrame), - "DEFWAVEFORM" => Token::Command(DefWaveform), - "DELAY" => Token::Command(Delay), - "FENCE" => Token::Command(Fence), - "PULSE" => Token::Command(Pulse), - "RAW-CAPTURE" => Token::Command(RawCapture), - "SET-FREQUENCY" => Token::Command(SetFrequency), - "SET-PHASE" => Token::Command(SetPhase), - "SET-SCALE" => Token::Command(SetScale), - "SWAP-PHASES" => Token::Command(SwapPhases), - "SHIFT-FREQUENCY" => Token::Command(ShiftFrequency), - "SHIFT-PHASE" => Token::Command(ShiftPhase), - "LABEL" => Token::Command(Label), - _ => Token::Identifier(identifier), +fn keyword_or_identifier(identifier: String) -> Token { + fn parse(token: impl Fn(T) -> Token, identifier: &str) -> Result { + T::from_str(identifier).map(token) } + + parse(KeywordToken::into, &identifier) + .or_else(|_| parse(Token::Command, &identifier)) + .or_else(|_| parse(Token::DataType, &identifier)) + .or_else(|_| parse(Token::Modifier, &identifier)) + .unwrap_or(Token::Identifier(identifier)) } fn is_valid_identifier_leading_character(chr: char) -> bool { @@ -286,9 +226,9 @@ fn lex_identifier_raw(input: LexInput) -> InternalLexResult { )(input) } -fn lex_command_or_identifier(input: LexInput) -> InternalLexResult { +fn lex_keyword_or_identifier(input: LexInput) -> InternalLexResult { let (input, identifier) = lex_identifier_raw(input)?; - let token = recognize_command_or_identifier(identifier); + let token = keyword_or_identifier(identifier); Ok((input, token)) } @@ -298,10 +238,6 @@ fn lex_target(input: LexInput) -> InternalLexResult { Ok((input, Token::Target(label))) } -fn lex_non_blocking(input: LexInput) -> InternalLexResult { - value(Token::NonBlocking, tag("NONBLOCKING"))(input) -} - fn lex_number(input: LexInput) -> InternalLexResult { let (input, float_string): (LexInput, LexInput) = recognize(double)(input)?; let integer_parse_result: IResult = all_consuming(digit1)(float_string); @@ -318,24 +254,6 @@ fn lex_number(input: LexInput) -> InternalLexResult { )) } -fn lex_modifier(input: LexInput) -> InternalLexResult { - alt( - "a modifier token", - ( - value(Token::As, tag("AS")), - value(Token::Matrix, tag("MATRIX")), - value(Token::Modifier(Modifier::Controlled), tag("CONTROLLED")), - value(Token::Modifier(Modifier::Dagger), tag("DAGGER")), - value(Token::Modifier(Modifier::Forked), tag("FORKED")), - value(Token::Mutable, tag("mut")), - value(Token::Offset, tag("OFFSET")), - value(Token::PauliSum, tag("PAULI-SUM")), - value(Token::Permutation, tag("PERMUTATION")), - value(Token::Sharing, tag("SHARING")), - ), - )(input) -} - fn lex_operator(input: LexInput) -> InternalLexResult { use Operator::*; map( diff --git a/quil-rs/src/parser/mod.rs b/quil-rs/src/parser/mod.rs index dd4727f3..5a90658b 100644 --- a/quil-rs/src/parser/mod.rs +++ b/quil-rs/src/parser/mod.rs @@ -31,8 +31,8 @@ mod token; pub(crate) use error::{ErrorInput, InternalParseError}; pub use error::{ParseError, ParserErrorKind}; -pub use lexer::{DataType, LexError}; -pub use token::{Token, TokenWithLocation}; +pub use lexer::{Command, DataType, LexError, Modifier}; +pub use token::{KeywordToken, Token, TokenWithLocation}; pub(crate) type ParserInput<'a> = &'a [TokenWithLocation<'a>]; type InternalParserResult<'a, R, E = InternalParseError<'a>> = IResult, R, E>; diff --git a/quil-rs/src/parser/token.rs b/quil-rs/src/parser/token.rs index bf253014..24b5d4e3 100644 --- a/quil-rs/src/parser/token.rs +++ b/quil-rs/src/parser/token.rs @@ -67,9 +67,78 @@ where } } -// When adding tokens that are keywords, you also need to update -// [`crate::reserved::ReservedKeyword`], and similarly for gates ([`crate::reserved::ReservedGate`]) -// and constants ([`crate::reserved::ReservedConstant`]). +/// The subset of [`Token`]s which (a) do not contain more specific data and (b) are keywords. Used +/// to ensure that keyword-checking remains in sync with the definition of [`Token`]. +#[derive(Debug, Copy, Clone, PartialEq, Eq, strum::Display, strum::EnumString)] +#[strum(serialize_all = "SCREAMING-KEBAB-CASE")] +pub enum KeywordToken { + As, + Matrix, + #[strum(serialize = "mut")] + Mutable, + #[strum(serialize = "NONBLOCKING")] + NonBlocking, + Offset, + PauliSum, + Permutation, + Sharing, +} + +impl From for Token { + fn from(token: KeywordToken) -> Self { + match token { + KeywordToken::As => Token::As, + KeywordToken::Matrix => Token::Matrix, + KeywordToken::Mutable => Token::Mutable, + KeywordToken::NonBlocking => Token::NonBlocking, + KeywordToken::Offset => Token::Offset, + KeywordToken::PauliSum => Token::PauliSum, + KeywordToken::Permutation => Token::Permutation, + KeywordToken::Sharing => Token::Sharing, + } + } +} + +impl TryFrom for KeywordToken { + type Error = (); + + fn try_from(token: Token) -> Result { + // This match is explicit so that if you add a new [`Token`] constructor you have to decide + // if it's a keyword. Please do not add a top-level wildcard match here. + match token { + Token::As => Ok(KeywordToken::As), + Token::Matrix => Ok(KeywordToken::Matrix), + Token::Mutable => Ok(KeywordToken::Mutable), + Token::Offset => Ok(KeywordToken::Offset), + Token::PauliSum => Ok(KeywordToken::PauliSum), + Token::Permutation => Ok(KeywordToken::Permutation), + Token::Sharing => Ok(KeywordToken::Sharing), + + Token::Colon + | Token::Comma + | Token::Command(_) + | Token::Comment(_) + | Token::DataType(_) + | Token::Float(_) + | Token::Identifier(_) + | Token::Indentation + | Token::Integer(_) + | Token::Target(_) + | Token::LBracket + | Token::LParenthesis + | Token::NonBlocking + | Token::Modifier(_) + | Token::NewLine + | Token::Operator(_) + | Token::RBracket + | Token::RParenthesis + | Token::Semicolon + | Token::String(_) + | Token::Variable(_) => Err(()), + } + } +} + #[derive(Clone, PartialEq)] pub enum Token { As, @@ -105,7 +174,7 @@ pub enum Token { impl fmt::Display for Token { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { - Token::As => write!(f, "AS"), + Token::As => write!(f, "{}", KeywordToken::As), Token::Colon => write!(f, ":"), Token::Comma => write!(f, ","), Token::Command(cmd) => write!(f, "{cmd}"), @@ -118,19 +187,19 @@ impl fmt::Display for Token { Token::Target(label) => write!(f, "{label}"), Token::LBracket => write!(f, "["), Token::LParenthesis => write!(f, "("), - Token::NonBlocking => write!(f, "NONBLOCKING"), - Token::Matrix => write!(f, "MATRIX"), + Token::NonBlocking => write!(f, "{}", KeywordToken::NonBlocking), + Token::Matrix => write!(f, "{}", KeywordToken::Matrix), Token::Modifier(m) => write!(f, "{m}"), - Token::Mutable => write!(f, "mut"), + Token::Mutable => write!(f, "{}", KeywordToken::Mutable), Token::NewLine => write!(f, "NEWLINE"), Token::Operator(op) => write!(f, "{op}"), - Token::Offset => write!(f, "OFFSET"), - Token::PauliSum => write!(f, "PAULI-SUM"), - Token::Permutation => write!(f, "PERMUTATION"), + Token::Offset => write!(f, "{}", KeywordToken::Offset), + Token::PauliSum => write!(f, "{}", KeywordToken::PauliSum), + Token::Permutation => write!(f, "{}", KeywordToken::Permutation), Token::RBracket => write!(f, "]"), Token::RParenthesis => write!(f, ")"), Token::Semicolon => write!(f, ";"), - Token::Sharing => write!(f, "SHARING"), + Token::Sharing => write!(f, "{}", KeywordToken::Sharing), Token::String(s) => write!(f, "{}", QuotedString(s)), Token::Variable(v) => write!(f, "{v}"), } diff --git a/quil-rs/src/reserved.rs b/quil-rs/src/reserved.rs index 8f0c9a07..eeab6e2f 100644 --- a/quil-rs/src/reserved.rs +++ b/quil-rs/src/reserved.rs @@ -4,10 +4,15 @@ use std::{fmt::Display, str::FromStr}; use strum; +pub use crate::parser::{Command, DataType, KeywordToken, Modifier}; + /// An enum that can represent any reserved token in quil. #[derive(Clone, Debug, PartialEq, Eq)] pub enum ReservedToken { - Keyword(ReservedKeyword), + Command(Command), + DataType(DataType), + Modifier(Modifier), + OtherKeyword(KeywordToken), Gate(ReservedGate), Constant(ReservedConstant), } @@ -20,81 +25,36 @@ impl FromStr for ReservedToken { type Err = NotReservedToken; fn from_str(s: &str) -> Result { - if let Ok(keyword) = ReservedKeyword::from_str(s) { - Ok(Self::Keyword(keyword)) - } else if let Ok(gate) = ReservedGate::from_str(s) { - Ok(Self::Gate(gate)) - } else if let Ok(constant) = ReservedConstant::from_str(s) { - Ok(Self::Constant(constant)) - } else { - Err(NotReservedToken(s.to_string())) + fn parse( + reserved: impl Fn(T) -> ReservedToken, + s: &str, + ) -> Result { + T::from_str(s).map(reserved) } + + parse(Self::Command, s) + .or_else(|_| parse(Self::DataType, s)) + .or_else(|_| parse(Self::Modifier, s)) + .or_else(|_| parse(Self::OtherKeyword, s)) + .or_else(|_| parse(Self::Gate, s)) + .or_else(|_| parse(Self::Constant, s)) + .map_err(|_| NotReservedToken(s.to_string())) } } impl Display for ReservedToken { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { - Self::Keyword(keyword) => write!(f, "{keyword}"), + Self::Command(command) => write!(f, "{command}"), + Self::DataType(data_type) => write!(f, "{data_type}"), + Self::Modifier(modifier) => write!(f, "{modifier}"), + Self::OtherKeyword(keyword_token) => write!(f, "{keyword_token}"), Self::Gate(gate) => write!(f, "{gate}"), Self::Constant(constant) => write!(f, "{constant}"), } } } -/// Any reserved keyword that isn't specifically a gate identifier or constant -#[derive(Clone, Copy, Debug, PartialEq, Eq, strum::Display, strum::EnumString)] -#[strum(serialize_all = "UPPERCASE")] -pub enum ReservedKeyword { - Add, - And, - As, - Controlled, - Convert, - Dagger, - Declare, - DefCircuit, - DefGate, - Div, - Eq, - Exchange, - Forked, - Ge, - Gt, - Halt, - Include, - Ior, - Jump, - #[strum(serialize = "JUMP-UNLESS")] - JumpUnless, - #[strum(serialize = "JUMP-WHEN")] - JumpWhen, - Label, - Le, - Load, - Lt, - Matrix, - Measure, - Move, - Mul, - #[strum(serialize = "mut")] - Mutable, - Neg, - Nop, - Not, - Offset, - #[strum(serialize = "PAULI-SUM")] - PauliSum, - Permutation, - Pragma, - Reset, - Sharing, - Store, - Sub, - Wait, - Xor, -} - /// Every reserved Gate identifier #[derive(Clone, Copy, Debug, PartialEq, Eq, strum::Display, strum::EnumString)] #[strum(serialize_all = "UPPERCASE")] From e29e57862c02e8a950d3fd1d3584d8929813da11 Mon Sep 17 00:00:00 2001 From: Antal Spector-Zabusky Date: Thu, 9 Jan 2025 00:44:06 -0500 Subject: [PATCH 5/8] test: new failing `round_trip` test, unrelated to prior changes: `iNf[0]` --- quil-rs/proptest-regressions/expression/mod.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/quil-rs/proptest-regressions/expression/mod.txt b/quil-rs/proptest-regressions/expression/mod.txt index b24d9cb8..846843a0 100644 --- a/quil-rs/proptest-regressions/expression/mod.txt +++ b/quil-rs/proptest-regressions/expression/mod.txt @@ -7,3 +7,4 @@ cc 4c32128d724ed0f840715fae4e194c99262dc153c64be39d2acf45b8903b20f7 # shrinks to value = Complex { re: 0.0, im: -0.13530277317700273 } cc 5cc95f2159ad7120bbaf296d3a9fb26fef30f57b61e76b3e0dc99f4759009fdb # shrinks to e = Number(Complex { re: 0.0, im: -2.772221265116396 }) cc de70a1853ccef983fac85a87761ba08bfb2d54b2d4e880d5d90e7b4a75ecafb5 # shrinks to e = Address(MemoryReference { name: "mut", index: 0 }) +cc 9ad50859b68cb403ce1a67af0feef1f55d25587466878e364ba2810be5910b14 # shrinks to e = Address(MemoryReference { name: "iNf", index: 0 }) From 977ada1e7135298b524ac14a927061d5c933f72a Mon Sep 17 00:00:00 2001 From: Antal Spector-Zabusky Date: Thu, 9 Jan 2025 01:10:49 -0500 Subject: [PATCH 6/8] fix!: don't parse `NaN`, `inf`, and `infinity` as floats We could have resolved this the other way, by marking these keywords as reserved, but the Quil spec doesn't allow for `NaN` or `Infinity` and I think this is more useful for our purposes. --- quil-rs/src/parser/lexer/mod.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/quil-rs/src/parser/lexer/mod.rs b/quil-rs/src/parser/lexer/mod.rs index 4f22ea93..4102ec82 100644 --- a/quil-rs/src/parser/lexer/mod.rs +++ b/quil-rs/src/parser/lexer/mod.rs @@ -170,9 +170,12 @@ fn lex_token(input: LexInput) -> InternalLexResult { token_with_location(lex_string), // Operator must come before number (or it may be parsed as a prefix) token_with_location(lex_operator), - token_with_location(lex_number), token_with_location(lex_variable), + // Identifiers must come before numbers so that `NaN`, `Inf`, and `Infinity` aren't + // parsed as floats; Nom, as of version 7.1.1, will parse those strings, + // case-insensitively, as floats token_with_location(lex_keyword_or_identifier), + token_with_location(lex_number), ), )(input) } From 97334832c0c0063a9afe06dc1a2b493caf38104c Mon Sep 17 00:00:00 2001 From: Antal Spector-Zabusky Date: Thu, 9 Jan 2025 12:32:26 -0500 Subject: [PATCH 7/8] chore: respond to review from @jselig-rigetti --- quil-rs/src/parser/lexer/mod.rs | 19 ++++++++++++++++--- quil-rs/src/parser/token.rs | 7 ++++--- 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/quil-rs/src/parser/lexer/mod.rs b/quil-rs/src/parser/lexer/mod.rs index 4102ec82..9731eadd 100644 --- a/quil-rs/src/parser/lexer/mod.rs +++ b/quil-rs/src/parser/lexer/mod.rs @@ -486,9 +486,9 @@ mod tests { case("a", vec![Token::Identifier("a".to_string())]), case("_a-2_b-2_", vec![Token::Identifier("_a-2_b-2_".to_string())]), case("a-2-%var", vec![ - Token::Identifier("a-2".to_string()), - Token::Operator(Operator::Minus), - Token::Variable("var".to_string()) + Token::Identifier("a-2".to_string()), + Token::Operator(Operator::Minus), + Token::Variable("var".to_string()) ]), case("BIT", vec![Token::DataType(DataType::Bit)]), case("BITS", vec![Token::Identifier("BITS".to_string())]), @@ -496,6 +496,19 @@ mod tests { case("nan", vec![Token::Identifier("nan".to_string())]), case("NaNa", vec![Token::Identifier("NaNa".to_string())]), case("nana", vec![Token::Identifier("nana".to_string())]), + case("INF", vec![Token::Identifier("INF".to_string())]), + case("Infinity", vec![Token::Identifier("Infinity".to_string())]), + case("Inferior", vec![Token::Identifier("Inferior".to_string())]), + case("-NaN", vec![Token::Operator(Operator::Minus), Token::Identifier("NaN".to_string())]), + case("-inf", vec![Token::Operator(Operator::Minus), Token::Identifier("inf".to_string())]), + case("-Infinity", vec![ + Token::Operator(Operator::Minus), + Token::Identifier("Infinity".to_string()) + ]), + case("-inferior", vec![ + Token::Operator(Operator::Minus), + Token::Identifier("inferior".to_string()) + ]), )] fn it_lexes_identifier(input: &str, expected: Vec) { let input = LocatedSpan::new(input); diff --git a/quil-rs/src/parser/token.rs b/quil-rs/src/parser/token.rs index 24b5d4e3..47dc1623 100644 --- a/quil-rs/src/parser/token.rs +++ b/quil-rs/src/parser/token.rs @@ -67,8 +67,8 @@ where } } -/// The subset of [`Token`]s which (a) do not contain more specific data and (b) are keywords. Used -/// to ensure that keyword-checking remains in sync with the definition of [`Token`]. +/// The subset of [`Token`]s which (a) do not have arguments and (b) are keywords. Used to ensure +/// that keyword-checking remains in sync with the definition of [`Token`]. #[derive(Debug, Copy, Clone, PartialEq, Eq, strum::Display, strum::EnumString)] #[strum(serialize_all = "SCREAMING-KEBAB-CASE")] pub enum KeywordToken { @@ -104,7 +104,8 @@ impl TryFrom for KeywordToken { fn try_from(token: Token) -> Result { // This match is explicit so that if you add a new [`Token`] constructor you have to decide - // if it's a keyword. Please do not add a top-level wildcard match here. + // if it's a keyword. + #[deny(clippy::wildcard_enum_match_arm, clippy::wildcard_in_or_patterns)] match token { Token::As => Ok(KeywordToken::As), Token::Matrix => Ok(KeywordToken::Matrix), From 4a65159b0afc663219028cef701e30031432a2ab Mon Sep 17 00:00:00 2001 From: Antal Spector-Zabusky Date: Thu, 9 Jan 2025 19:03:00 -0500 Subject: [PATCH 8/8] chore: update Cargo.lock --- Cargo.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index abfd1315..f4112525 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1139,7 +1139,7 @@ checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" [[package]] name = "quil-cli" -version = "0.6.2" +version = "0.6.3-rc.0" dependencies = [ "anyhow", "clap", @@ -1148,7 +1148,7 @@ dependencies = [ [[package]] name = "quil-py" -version = "0.13.3-rc.0" +version = "0.13.3-rc.1" dependencies = [ "indexmap", "ndarray", @@ -1163,7 +1163,7 @@ dependencies = [ [[package]] name = "quil-rs" -version = "0.29.2" +version = "0.29.3-rc.0" dependencies = [ "approx", "clap",