From 6daa4b059cde8b77b67a3699b174ef0f8edff350 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Fri, 27 Dec 2024 09:17:52 -0500 Subject: [PATCH] Refactor advancing token to avoid duplication, avoid borrow checker issues (#1618) Co-authored-by: Ifeanyi Ubah --- src/parser/mod.rs | 102 ++++++++++++++++++++++++++++++++-------------- 1 file changed, 71 insertions(+), 31 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 2756ed6ca..65991d324 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1315,7 +1315,9 @@ impl<'a> Parser<'a> { let dialect = self.dialect; - let (next_token, next_token_index) = self.next_token_ref_with_index(); + self.advance_token(); + let next_token_index = self.get_current_index(); + let next_token = self.get_current_token(); let span = next_token.span; let expr = match &next_token.token { Token::Word(w) => { @@ -2953,7 +2955,9 @@ impl<'a> Parser<'a> { let dialect = self.dialect; - let (tok, tok_index) = self.next_token_ref_with_index(); + self.advance_token(); + let tok = self.get_current_token(); + let tok_index = self.get_current_index(); let span = tok.span; let regular_binary_operator = match &tok.token { Token::Spaceship => Some(BinaryOperator::Spaceship), @@ -3033,7 +3037,8 @@ impl<'a> Parser<'a> { // See https://www.postgresql.org/docs/current/sql-createoperator.html let mut idents = vec![]; loop { - idents.push(self.next_token_ref().to_string()); + self.advance_token(); + idents.push(self.get_current_token().to_string()); if !self.consume_token(&Token::Period) { break; } @@ -3480,6 +3485,8 @@ impl<'a> Parser<'a> { /// Return the first non-whitespace token that has not yet been processed /// or Token::EOF + /// + /// See [`Self::peek_token_ref`] to avoid the copy. pub fn peek_token(&self) -> TokenWithSpan { self.peek_nth_token(0) } @@ -3594,21 +3601,31 @@ impl<'a> Parser<'a> { /// Advances to the next non-whitespace token and returns a copy. /// - /// See [`Self::next_token_ref`] to avoid the copy. + /// Please use [`Self::advance_token`] and [`Self::get_current_token`] to + /// avoid the copy. pub fn next_token(&mut self) -> TokenWithSpan { - self.next_token_ref().clone() + self.advance_token(); + self.get_current_token().clone() } - pub fn next_token_ref(&mut self) -> &TokenWithSpan { - self.next_token_ref_with_index().0 + /// Returns the index of the current token + /// + /// This can be used with APIs that expect an index, such as + /// [`Self::token_at`] + pub fn get_current_index(&self) -> usize { + self.index.saturating_sub(1) } - /// Return the first non-whitespace token that has not yet been processed - /// and that tokens index and advances the tokens + /// Return the next unprocessed token, possibly whitespace. + pub fn next_token_no_skip(&mut self) -> Option<&TokenWithSpan> { + self.index += 1; + self.tokens.get(self.index - 1) + } + + /// Advances the current token to the next non-whitespace token /// - /// # Notes: - /// OK to call repeatedly after reaching EOF. - pub fn next_token_ref_with_index(&mut self) -> (&TokenWithSpan, usize) { + /// See [`Self::get_current_token`] to get the current token after advancing + pub fn advance_token(&mut self) { loop { self.index += 1; match self.tokens.get(self.index - 1) { @@ -3616,25 +3633,38 @@ impl<'a> Parser<'a> { token: Token::Whitespace(_), span: _, }) => continue, - token => return (token.unwrap_or(&EOF_TOKEN), self.index - 1), + _ => break, } } } /// Returns a reference to the current token - pub fn current_token(&self) -> &TokenWithSpan { - self.tokens.get(self.index - 1).unwrap_or(&EOF_TOKEN) + /// + /// Does not advance the current token. + pub fn get_current_token(&self) -> &TokenWithSpan { + self.token_at(self.index.saturating_sub(1)) } - /// Return the first unprocessed token, possibly whitespace. - pub fn next_token_no_skip(&mut self) -> Option<&TokenWithSpan> { - self.index += 1; - self.tokens.get(self.index - 1) + /// Returns a reference to the previous token + /// + /// Does not advance the current token. + pub fn get_previous_token(&self) -> &TokenWithSpan { + self.token_at(self.index.saturating_sub(2)) } - /// Push back the last one non-whitespace token. Must be called after - /// `next_token()`, otherwise might panic. OK to call after - /// `next_token()` indicates an EOF. + /// Returns a reference to the next token + /// + /// Does not advance the current token. + pub fn get_next_token(&self) -> &TokenWithSpan { + self.token_at(self.index) + } + + /// Seek back the last one non-whitespace token. + /// + /// Must be called after `next_token()`, otherwise might panic. OK to call + /// after `next_token()` indicates an EOF. + /// + // TODO rename to backup_token and deprecate prev_token? pub fn prev_token(&mut self) { loop { assert!(self.index > 0); @@ -3680,22 +3710,30 @@ impl<'a> Parser<'a> { #[must_use] pub fn parse_keyword(&mut self, expected: Keyword) -> bool { if self.peek_keyword(expected) { - self.next_token_ref(); + self.advance_token(); true } else { false } } + /// If the current token is the `expected` keyword, consume it and returns + /// + /// See [`Self::parse_keyword_token_ref`] to avoid the copy. #[must_use] pub fn parse_keyword_token(&mut self, expected: Keyword) -> Option { self.parse_keyword_token_ref(expected).cloned() } + /// If the current token is the `expected` keyword, consume it and returns a reference to the next token. + /// #[must_use] pub fn parse_keyword_token_ref(&mut self, expected: Keyword) -> Option<&TokenWithSpan> { match &self.peek_token_ref().token { - Token::Word(w) if expected == w.keyword => Some(self.next_token_ref()), + Token::Word(w) if expected == w.keyword => { + self.advance_token(); + Some(self.get_current_token()) + } _ => None, } } @@ -3722,7 +3760,7 @@ impl<'a> Parser<'a> { } // consume all tokens for _ in 0..(tokens.len() + 1) { - self.next_token_ref(); + self.advance_token(); } true } @@ -3758,7 +3796,7 @@ impl<'a> Parser<'a> { .iter() .find(|keyword| **keyword == w.keyword) .map(|keyword| { - self.next_token_ref(); + self.advance_token(); *keyword }) } @@ -3813,10 +3851,12 @@ impl<'a> Parser<'a> { } /// Consume the next token if it matches the expected token, otherwise return false + /// + /// See [Self::advance_token] to consume the token unconditionally #[must_use] pub fn consume_token(&mut self, expected: &Token) -> bool { if self.peek_token_ref() == expected { - self.next_token_ref(); + self.advance_token(); true } else { false @@ -8338,9 +8378,9 @@ impl<'a> Parser<'a> { &mut self, ) -> Result<(DataType, MatchedTrailingBracket), ParserError> { let dialect = self.dialect; - let (next_token, next_token_index) = self.next_token_ref_with_index(); - let _ = next_token; // release ref - let next_token = self.current_token(); + self.advance_token(); + let next_token = self.get_current_token(); + let next_token_index = self.get_current_index(); let mut trailing_bracket: MatchedTrailingBracket = false.into(); let mut data = match &next_token.token { @@ -8866,7 +8906,7 @@ impl<'a> Parser<'a> { Token::EOF | Token::Eq => break, _ => {} } - self.next_token_ref(); + self.advance_token(); } Ok(idents) }