diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index a8993e685..eaa3e9b77 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -128,14 +128,39 @@ pub trait Dialect: Debug + Any { ch == '"' || ch == '`' } - /// Return the character used to quote identifiers. - fn identifier_quote_style(&self, _identifier: &str) -> Option { + /// Determine if a character starts a potential nested quoted identifier. + /// Example: RedShift supports the following quote styles to all mean the same thing: + /// ```sql + /// SELECT 1 AS foo; + /// SELECT 1 AS "foo"; + /// SELECT 1 AS [foo]; + /// SELECT 1 AS ["foo"]; + /// ``` + fn is_nested_delimited_identifier_start(&self, _ch: char) -> bool { + false + } + + /// Only applicable whenever [`Self::is_nested_delimited_identifier_start`] returns true + /// If the next sequence of tokens potentially represent a nested identifier, then this method + /// returns a tuple containing the outer quote style, and if present, the inner (nested) quote style. + /// + /// Example (Redshift): + /// ```text + /// `["foo"]` => Some(`[`, Some(`"`)) + /// `[foo]` => Some(`[`, None) + /// `[0]` => None + /// `"foo"` => None + /// ``` + fn peek_nested_delimited_identifier_quotes( + &self, + mut _chars: Peekable>, + ) -> Option<(char, Option)> { None } - /// Determine if quoted characters are proper for identifier - fn is_proper_identifier_inside_quotes(&self, mut _chars: Peekable>) -> bool { - true + /// Return the character used to quote identifiers. + fn identifier_quote_style(&self, _identifier: &str) -> Option { + None } /// Determine if a character is a valid start character for an unquoted identifier @@ -848,6 +873,17 @@ mod tests { self.0.is_delimited_identifier_start(ch) } + fn is_nested_delimited_identifier_start(&self, ch: char) -> bool { + self.0.is_nested_delimited_identifier_start(ch) + } + + fn peek_nested_delimited_identifier_quotes( + &self, + chars: std::iter::Peekable>, + ) -> Option<(char, Option)> { + self.0.peek_nested_delimited_identifier_quotes(chars) + } + fn identifier_quote_style(&self, identifier: &str) -> Option { self.0.identifier_quote_style(identifier) } @@ -856,13 +892,6 @@ mod tests { self.0.supports_string_literal_backslash_escape() } - fn is_proper_identifier_inside_quotes( - &self, - chars: std::iter::Peekable>, - ) -> bool { - self.0.is_proper_identifier_inside_quotes(chars) - } - fn supports_filter_during_aggregation(&self) -> bool { self.0.supports_filter_during_aggregation() } diff --git a/src/dialect/redshift.rs b/src/dialect/redshift.rs index 48eb00ab1..55405ba53 100644 --- a/src/dialect/redshift.rs +++ b/src/dialect/redshift.rs @@ -32,21 +32,51 @@ pub struct RedshiftSqlDialect {} // in the Postgres dialect, the query will be parsed as an array, while in the Redshift dialect it will // be a json path impl Dialect for RedshiftSqlDialect { - fn is_delimited_identifier_start(&self, ch: char) -> bool { - ch == '"' || ch == '[' + /// Determine if a character starts a potential nested quoted identifier. + /// Example: RedShift supports the following quote styles to all mean the same thing: + /// ```sql + /// SELECT 1 AS foo; + /// SELECT 1 AS "foo"; + /// SELECT 1 AS [foo]; + /// SELECT 1 AS ["foo"]; + /// ``` + fn is_nested_delimited_identifier_start(&self, ch: char) -> bool { + ch == '[' } - /// Determine if quoted characters are proper for identifier - /// It's needed to distinguish treating square brackets as quotes from - /// treating them as json path. If there is identifier then we assume - /// there is no json path. - fn is_proper_identifier_inside_quotes(&self, mut chars: Peekable>) -> bool { + /// Only applicable whenever [`Self::is_nested_delimited_identifier_start`] returns true + /// If the next sequence of tokens potentially represent a nested identifier, then this method + /// returns a tuple containing the outer quote style, and if present, the inner (nested) quote style. + /// + /// Example (Redshift): + /// ```text + /// `["foo"]` => Some(`[`, Some(`"`)) + /// `[foo]` => Some(`[`, None) + /// `[0]` => None + /// `"foo"` => None + /// ``` + fn peek_nested_delimited_identifier_quotes( + &self, + mut chars: Peekable>, + ) -> Option<(char, Option)> { + if chars.peek() != Some(&'[') { + return None; + } + chars.next(); + let mut not_white_chars = chars.skip_while(|ch| ch.is_whitespace()).peekable(); + if let Some(&ch) = not_white_chars.peek() { - return self.is_identifier_start(ch); + if ch == '"' { + return Some(('[', Some('"'))); + } + if self.is_identifier_start(ch) { + return Some(('[', None)); + } } - false + + None } fn is_identifier_start(&self, ch: char) -> bool { diff --git a/src/tokenizer.rs b/src/tokenizer.rs index aacfc16fa..9269f4fe6 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -1075,25 +1075,61 @@ impl<'a> Tokenizer<'a> { Ok(Some(Token::DoubleQuotedString(s))) } // delimited (quoted) identifier + quote_start if self.dialect.is_delimited_identifier_start(ch) => { + let word = self.tokenize_quoted_identifier(quote_start, chars)?; + Ok(Some(Token::make_word(&word, Some(quote_start)))) + } + // Potentially nested delimited (quoted) identifier quote_start - if self.dialect.is_delimited_identifier_start(ch) + if self + .dialect + .is_nested_delimited_identifier_start(quote_start) && self .dialect - .is_proper_identifier_inside_quotes(chars.peekable.clone()) => + .peek_nested_delimited_identifier_quotes(chars.peekable.clone()) + .is_some() => { - let error_loc = chars.location(); - chars.next(); // consume the opening quote + let Some((quote_start, nested_quote_start)) = self + .dialect + .peek_nested_delimited_identifier_quotes(chars.peekable.clone()) + else { + return self.tokenizer_error( + chars.location(), + format!("Expected nested delimiter '{quote_start}' before EOF."), + ); + }; + + let Some(nested_quote_start) = nested_quote_start else { + let word = self.tokenize_quoted_identifier(quote_start, chars)?; + return Ok(Some(Token::make_word(&word, Some(quote_start)))); + }; + + let mut word = vec![]; let quote_end = Word::matching_end_quote(quote_start); - let (s, last_char) = self.parse_quoted_ident(chars, quote_end); + let nested_quote_end = Word::matching_end_quote(nested_quote_start); + let error_loc = chars.location(); - if last_char == Some(quote_end) { - Ok(Some(Token::make_word(&s, Some(quote_start)))) - } else { - self.tokenizer_error( + chars.next(); // skip the first delimiter + peeking_take_while(chars, |ch| ch.is_whitespace()); + if chars.peek() != Some(&nested_quote_start) { + return self.tokenizer_error( + error_loc, + format!("Expected nested delimiter '{nested_quote_start}' before EOF."), + ); + } + word.push(nested_quote_start.into()); + word.push(self.tokenize_quoted_identifier(nested_quote_end, chars)?); + word.push(nested_quote_end.into()); + peeking_take_while(chars, |ch| ch.is_whitespace()); + if chars.peek() != Some("e_end) { + return self.tokenizer_error( error_loc, format!("Expected close delimiter '{quote_end}' before EOF."), - ) + ); } + chars.next(); // skip close delimiter + + Ok(Some(Token::make_word(&word.concat(), Some(quote_start)))) } // numbers and period '0'..='9' | '.' => { @@ -1597,6 +1633,27 @@ impl<'a> Tokenizer<'a> { s } + /// Read a quoted identifier + fn tokenize_quoted_identifier( + &self, + quote_start: char, + chars: &mut State, + ) -> Result { + let error_loc = chars.location(); + chars.next(); // consume the opening quote + let quote_end = Word::matching_end_quote(quote_start); + let (s, last_char) = self.parse_quoted_ident(chars, quote_end); + + if last_char == Some(quote_end) { + Ok(s) + } else { + self.tokenizer_error( + error_loc, + format!("Expected close delimiter '{quote_end}' before EOF."), + ) + } + } + /// Read a single quoted string, starting with the opening quote. fn tokenize_escaped_single_quoted_string( &self, diff --git a/tests/sqlparser_redshift.rs b/tests/sqlparser_redshift.rs index f0c1f0c74..2f206d15b 100644 --- a/tests/sqlparser_redshift.rs +++ b/tests/sqlparser_redshift.rs @@ -176,6 +176,8 @@ fn parse_delimited_identifiers() { } redshift().verified_stmt(r#"CREATE TABLE "foo" ("bar" "int")"#); + // An alias starting with a number + redshift().verified_stmt(r#"CREATE TABLE "foo" ("1" INT)"#); redshift().verified_stmt(r#"ALTER TABLE foo ADD CONSTRAINT "bar" PRIMARY KEY (baz)"#); //TODO verified_stmt(r#"UPDATE foo SET "bar" = 5"#); } @@ -222,7 +224,7 @@ fn test_redshift_json_path() { path: JsonPath { path: vec![ JsonPathElem::Bracket { - key: Expr::Value(Value::Number("0".parse().unwrap(), false)) + key: Expr::Value(number("0")) }, JsonPathElem::Dot { key: "o_orderkey".to_string(), @@ -245,7 +247,7 @@ fn test_redshift_json_path() { path: JsonPath { path: vec![ JsonPathElem::Bracket { - key: Expr::Value(Value::Number("0".parse().unwrap(), false)) + key: Expr::Value(number("0")) }, JsonPathElem::Bracket { key: Expr::Value(Value::SingleQuotedString("id".to_owned())) @@ -269,7 +271,7 @@ fn test_redshift_json_path() { path: JsonPath { path: vec![ JsonPathElem::Bracket { - key: Expr::Value(Value::Number("0".parse().unwrap(), false)) + key: Expr::Value(number("0")) }, JsonPathElem::Bracket { key: Expr::Value(Value::SingleQuotedString("id".to_owned())) @@ -279,6 +281,31 @@ fn test_redshift_json_path() { }, expr_from_projection(only(&select.projection)) ); + + let sql = r#"SELECT db1.sc1.tbl1.col1[0]."id" FROM customer_orders_lineitem"#; + let select = dialects.verified_only_select(sql); + assert_eq!( + &Expr::JsonAccess { + value: Box::new(Expr::CompoundIdentifier(vec![ + Ident::new("db1"), + Ident::new("sc1"), + Ident::new("tbl1"), + Ident::new("col1") + ])), + path: JsonPath { + path: vec![ + JsonPathElem::Bracket { + key: Expr::Value(number("0")) + }, + JsonPathElem::Dot { + key: "id".to_string(), + quoted: true, + } + ] + } + }, + expr_from_projection(only(&select.projection)) + ); } #[test] @@ -295,7 +322,7 @@ fn test_parse_json_path_from() { &Some(JsonPath { path: vec![ JsonPathElem::Bracket { - key: Expr::Value(Value::Number("0".parse().unwrap(), false)) + key: Expr::Value(number("0")) }, JsonPathElem::Dot { key: "a".to_string(), @@ -319,7 +346,7 @@ fn test_parse_json_path_from() { &Some(JsonPath { path: vec![ JsonPathElem::Bracket { - key: Expr::Value(Value::Number("0".parse().unwrap(), false)) + key: Expr::Value(number("0")) }, JsonPathElem::Dot { key: "a".to_string(), @@ -353,3 +380,24 @@ fn test_parse_json_path_from() { _ => panic!(), } } + +#[test] +fn test_parse_select_numbered_columns() { + // An alias starting with a number + redshift_and_generic().verified_stmt(r#"SELECT 1 AS "1" FROM a"#); + redshift_and_generic().verified_stmt(r#"SELECT 1 AS "1abc" FROM a"#); +} + +#[test] +fn test_parse_nested_quoted_identifier() { + redshift().verified_stmt(r#"SELECT 1 AS ["1"] FROM a"#); + redshift().verified_stmt(r#"SELECT 1 AS ["[="] FROM a"#); + redshift().verified_stmt(r#"SELECT 1 AS ["=]"] FROM a"#); + redshift().verified_stmt(r#"SELECT 1 AS ["a[b]"] FROM a"#); + // trim spaces + redshift().one_statement_parses_to(r#"SELECT 1 AS [ " 1 " ]"#, r#"SELECT 1 AS [" 1 "]"#); + // invalid query + assert!(redshift() + .parse_sql_statements(r#"SELECT 1 AS ["1]"#) + .is_err()); +}