From 1227fddd48882a19c99b45185e60396dbf06cd3a Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Sat, 11 May 2019 02:32:31 +0300 Subject: [PATCH 1/2] Reduce cloning of tokens - Avoid cloning whitespace tokens in `peek_nth_token()` by using a &Token from `tokens.get()` instead of a cloned `Token` from `token_at()` - Similarly avoid cloning in `next_token_no_skip`, and clone the non-whitespace tokens in `next_token` instead. - Remove `token_at`, which was only used in `peek_token` and `peek_nth_token` - Fold `prev_token_no_skip()` into `prev_token()` and make `prev_token` return nothing, as the return value isn't used anyway. --- src/sqlparser.rs | 57 ++++++++++++++---------------------------------- 1 file changed, 16 insertions(+), 41 deletions(-) diff --git a/src/sqlparser.rs b/src/sqlparser.rs index b8a001bed..6bb69c75a 100644 --- a/src/sqlparser.rs +++ b/src/sqlparser.rs @@ -567,13 +567,13 @@ impl Parser { pub fn peek_nth_token(&self, mut n: usize) -> Option { let mut index = self.index; loop { - match self.token_at(index) { + match self.tokens.get(index) { Some(Token::Whitespace(_)) => { index += 1; } Some(token) => { if n == 0 { - return Some(token); + return Some(token.clone()); } index += 1; n -= 1; @@ -589,56 +589,32 @@ impl Parser { pub fn next_token(&mut self) -> Option { loop { match self.next_token_no_skip() { - Some(Token::Whitespace(_)) => { - continue; - } - token => { - return token; - } + Some(Token::Whitespace(_)) => continue, + token => return token.cloned(), } } } - /// see the token at this index - fn token_at(&self, n: usize) -> Option { - if let Some(token) = self.tokens.get(n) { - Some(token.clone()) - } else { - None - } - } - - pub fn next_token_no_skip(&mut self) -> Option { + pub fn next_token_no_skip(&mut self) -> Option<&Token> { if self.index < self.tokens.len() { self.index += 1; - Some(self.tokens[self.index - 1].clone()) + Some(&self.tokens[self.index - 1]) } else { None } } /// Push back the last one non-whitespace token - pub fn prev_token(&mut self) -> Option { - // TODO: returned value is unused (available via peek_token) + pub fn prev_token(&mut self) { loop { - match self.prev_token_no_skip() { - Some(Token::Whitespace(_)) => { + assert!(self.index > 0); + if self.index > 0 { + self.index -= 1; + if let Token::Whitespace(_) = &self.tokens[self.index] { continue; } - token => { - return token; - } - } - } - } - - /// Get the previous token and decrement the token index - fn prev_token_no_skip(&mut self) -> Option { - if self.index > 0 { - self.index -= 1; - Some(self.tokens[self.index].clone()) - } else { - None + }; + return; } } @@ -1776,13 +1752,12 @@ mod tests { fn test_prev_index() { let sql = "SELECT version()"; all_dialects().run_parser_method(sql, |parser| { - assert_eq!(parser.prev_token(), None); assert_eq!(parser.next_token(), Some(Token::make_keyword("SELECT"))); assert_eq!(parser.next_token(), Some(Token::make_word("version", None))); - assert_eq!(parser.prev_token(), Some(Token::make_word("version", None))); + parser.prev_token(); assert_eq!(parser.peek_token(), Some(Token::make_word("version", None))); - assert_eq!(parser.prev_token(), Some(Token::make_keyword("SELECT"))); - assert_eq!(parser.prev_token(), None); + parser.prev_token(); + assert_eq!(parser.peek_token(), Some(Token::make_keyword("SELECT"))); }); } } From e02625719e4d9d90392c11076e8bab558c82e0ca Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Sun, 12 May 2019 01:23:23 +0300 Subject: [PATCH 2/2] Allow calling `prev_token()` after EOF Before this `next_token()` would only increment the index when returning `Some(token)`. This means that the caller wishing to rewind must be careful not to call `prev_token()` on EOF (`None`), while not forgetting to call it for `Some`. Not doing this resulted in bugs in the undertested code that does error handling. After making this mistake several times, I'm changing `next_token()` / `prev_token()` so that calling `next_token(); prev_token(); next_token()` returns the same token in the first and the last invocation. --- src/sqlparser.rs | 76 +++++++++++++++++++++++------------------------- 1 file changed, 37 insertions(+), 39 deletions(-) diff --git a/src/sqlparser.rs b/src/sqlparser.rs index 6bb69c75a..85121eef4 100644 --- a/src/sqlparser.rs +++ b/src/sqlparser.rs @@ -66,6 +66,7 @@ impl Error for ParserError {} /// SQL Parser pub struct Parser { tokens: Vec, + /// The index of the first unprocessed token in `self.tokens` index: usize, } @@ -558,7 +559,8 @@ impl Parser { } } - /// Return first non-whitespace token that has not yet been processed + /// Return the first non-whitespace token that has not yet been processed + /// (or None if reached end-of-file) pub fn peek_token(&self) -> Option { self.peek_nth_token(0) } @@ -567,53 +569,48 @@ impl Parser { pub fn peek_nth_token(&self, mut n: usize) -> Option { let mut index = self.index; loop { - match self.tokens.get(index) { - Some(Token::Whitespace(_)) => { - index += 1; - } - Some(token) => { + index += 1; + match self.tokens.get(index - 1) { + Some(Token::Whitespace(_)) => continue, + non_whitespace => { if n == 0 { - return Some(token.clone()); + return non_whitespace.cloned(); } - index += 1; n -= 1; } - None => { - return None; - } } } } - /// Get the next token skipping whitespace and increment the token index + /// Return the first non-whitespace token that has not yet been processed + /// (or None if reached end-of-file) and mark it as processed. OK to call + /// repeatedly after reaching EOF. pub fn next_token(&mut self) -> Option { loop { - match self.next_token_no_skip() { + self.index += 1; + match self.tokens.get(self.index - 1) { Some(Token::Whitespace(_)) => continue, token => return token.cloned(), } } } + /// Return the first unprocessed token, possibly whitespace. pub fn next_token_no_skip(&mut self) -> Option<&Token> { - if self.index < self.tokens.len() { - self.index += 1; - Some(&self.tokens[self.index - 1]) - } else { - None - } + self.index += 1; + self.tokens.get(self.index - 1) } - /// Push back the last one non-whitespace token + /// Push back the last one non-whitespace token. Must be called after + /// `next_token()`, otherwise might panic. OK to call after + /// `next_token()` indicates an EOF. pub fn prev_token(&mut self) { loop { assert!(self.index > 0); - if self.index > 0 { - self.index -= 1; - if let Token::Whitespace(_) = &self.tokens[self.index] { - continue; - } - }; + self.index -= 1; + if let Some(Token::Whitespace(_)) = self.tokens.get(self.index) { + continue; + } return; } } @@ -929,9 +926,7 @@ impl Parser { if name.is_some() { self.expected("PRIMARY, UNIQUE, FOREIGN, or CHECK", unexpected) } else { - if unexpected.is_some() { - self.prev_token(); - } + self.prev_token(); Ok(None) } } @@ -1149,8 +1144,7 @@ impl Parser { reserved_kwds: &[&str], ) -> Result, ParserError> { let after_as = self.parse_keyword("AS"); - let maybe_alias = self.next_token(); - match maybe_alias { + match self.next_token() { // Accept any identifier after `AS` (though many dialects have restrictions on // keywords that may appear here). If there's no `AS`: don't parse keywords, // which may start a construct allowed in this position, to be parsed as aliases. @@ -1168,9 +1162,7 @@ impl Parser { if after_as { return self.expected("an identifier after AS", not_an_ident); } - if not_an_ident.is_some() { - self.prev_token(); - } + self.prev_token(); Ok(None) // no alias found } } @@ -1192,9 +1184,7 @@ impl Parser { continue; } _ => { - if token.is_some() { - self.prev_token(); - } + self.prev_token(); break; } } @@ -1750,14 +1740,22 @@ mod tests { #[test] fn test_prev_index() { - let sql = "SELECT version()"; + let sql = "SELECT version"; all_dialects().run_parser_method(sql, |parser| { + assert_eq!(parser.peek_token(), Some(Token::make_keyword("SELECT"))); + assert_eq!(parser.next_token(), Some(Token::make_keyword("SELECT"))); + parser.prev_token(); assert_eq!(parser.next_token(), Some(Token::make_keyword("SELECT"))); assert_eq!(parser.next_token(), Some(Token::make_word("version", None))); parser.prev_token(); assert_eq!(parser.peek_token(), Some(Token::make_word("version", None))); + assert_eq!(parser.next_token(), Some(Token::make_word("version", None))); + assert_eq!(parser.peek_token(), None); + parser.prev_token(); + assert_eq!(parser.next_token(), Some(Token::make_word("version", None))); + assert_eq!(parser.next_token(), None); + assert_eq!(parser.next_token(), None); parser.prev_token(); - assert_eq!(parser.peek_token(), Some(Token::make_keyword("SELECT"))); }); } }