diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index 64dbc4b1b..c69253b76 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -820,6 +820,20 @@ pub trait Dialect: Debug + Any { fn supports_set_stmt_without_operator(&self) -> bool { false } + + /// Returns true if the specified keyword should be parsed as a select item alias. + /// When explicit is true, the keyword is preceded by an `AS` word. Parser is provided + /// to enable looking ahead if needed. + fn is_select_item_alias(&self, explicit: bool, kw: &Keyword, _parser: &mut Parser) -> bool { + explicit || !keywords::RESERVED_FOR_COLUMN_ALIAS.contains(kw) + } + + /// Returns true if the specified keyword should be parsed as a table factor alias. + /// When explicit is true, the keyword is preceded by an `AS` word. Parser is provided + /// to enable looking ahead if needed. + fn is_table_factor_alias(&self, explicit: bool, kw: &Keyword, _parser: &mut Parser) -> bool { + explicit || !keywords::RESERVED_FOR_TABLE_ALIAS.contains(kw) + } } /// This represents the operators for which precedence must be defined diff --git a/src/dialect/snowflake.rs b/src/dialect/snowflake.rs index 6b8380d63..f6e9c9eba 100644 --- a/src/dialect/snowflake.rs +++ b/src/dialect/snowflake.rs @@ -251,6 +251,51 @@ impl Dialect for SnowflakeDialect { fn supports_partiql(&self) -> bool { true } + + fn is_select_item_alias(&self, explicit: bool, kw: &Keyword, parser: &mut Parser) -> bool { + explicit + || match kw { + // The following keywords can be considered an alias as long as + // they are not followed by other tokens that may change their meaning + // e.g. `SELECT * EXCEPT (col1) FROM tbl` + Keyword::EXCEPT + // e.g. `SELECT 1 LIMIT 5` + | Keyword::LIMIT + // e.g. `SELECT 1 OFFSET 5 ROWS` + | Keyword::OFFSET + // e.g. `INSERT INTO t SELECT 1 RETURNING *` + | Keyword::RETURNING if !matches!(parser.peek_token_ref().token, Token::Comma | Token::EOF) => + { + false + } + + // `FETCH` can be considered an alias as long as it's not followed by `FIRST`` or `NEXT` + // which would give it a different meanins, for example: `SELECT 1 FETCH FIRST 10 ROWS` - not an alias + Keyword::FETCH + if parser.peek_keyword(Keyword::FIRST) || parser.peek_keyword(Keyword::NEXT) => + { + false + } + + // Reserved keywords by the Snowflake dialect, which seem to be less strictive + // than what is listed in `keywords::RESERVED_FOR_COLUMN_ALIAS`. The following + // keywords were tested with the this statement: `SELECT 1 `. + Keyword::FROM + | Keyword::GROUP + | Keyword::HAVING + | Keyword::INTERSECT + | Keyword::INTO + | Keyword::MINUS + | Keyword::ORDER + | Keyword::SELECT + | Keyword::UNION + | Keyword::WHERE + | Keyword::WITH => false, + + // Any other word is considered an alias + _ => true, + } + } } fn parse_file_staging_command(kw: Keyword, parser: &mut Parser) -> Result { diff --git a/src/parser/mod.rs b/src/parser/mod.rs index ac764a535..aa4eb31d1 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -8837,38 +8837,76 @@ impl<'a> Parser<'a> { Ok(IdentWithAlias { ident, alias }) } - /// Parse `AS identifier` (or simply `identifier` if it's not a reserved keyword) - /// Some examples with aliases: `SELECT 1 foo`, `SELECT COUNT(*) AS cnt`, - /// `SELECT ... FROM t1 foo, t2 bar`, `SELECT ... FROM (...) AS bar` + /// Optionally parses an alias for a select list item + fn maybe_parse_select_item_alias(&mut self) -> Result, ParserError> { + fn validator(explicit: bool, kw: &Keyword, parser: &mut Parser) -> bool { + parser.dialect.is_select_item_alias(explicit, kw, parser) + } + self.parse_optional_alias_inner(None, validator) + } + + /// Optionally parses an alias for a table like in `... FROM generate_series(1, 10) AS t (col)`. + /// In this case, the alias is allowed to optionally name the columns in the table, in + /// addition to the table itself. + pub fn maybe_parse_table_alias(&mut self) -> Result, ParserError> { + fn validator(explicit: bool, kw: &Keyword, parser: &mut Parser) -> bool { + parser.dialect.is_table_factor_alias(explicit, kw, parser) + } + match self.parse_optional_alias_inner(None, validator)? { + Some(name) => { + let columns = self.parse_table_alias_column_defs()?; + Ok(Some(TableAlias { name, columns })) + } + None => Ok(None), + } + } + + /// Wrapper for parse_optional_alias_inner, left for backwards-compatibility + /// but new flows should use the context-specific methods such as `maybe_parse_select_item_alias` + /// and `maybe_parse_table_alias`. pub fn parse_optional_alias( &mut self, reserved_kwds: &[Keyword], ) -> Result, ParserError> { + fn validator(_explicit: bool, _kw: &Keyword, _parser: &mut Parser) -> bool { + false + } + self.parse_optional_alias_inner(Some(reserved_kwds), validator) + } + + /// Parses an optional alias after a SQL element such as a select list item + /// or a table name. + /// + /// This method accepts an optional list of reserved keywords or a function + /// to call to validate if a keyword should be parsed as an alias, to allow + /// callers to customize the parsing logic based on their context. + fn parse_optional_alias_inner( + &mut self, + reserved_kwds: Option<&[Keyword]>, + validator: F, + ) -> Result, ParserError> + where + F: Fn(bool, &Keyword, &mut Parser) -> bool, + { let after_as = self.parse_keyword(Keyword::AS); + let next_token = self.next_token(); match next_token.token { - // Accept any identifier after `AS` (though many dialects have restrictions on - // keywords that may appear here). If there's no `AS`: don't parse keywords, - // which may start a construct allowed in this position, to be parsed as aliases. - // (For example, in `FROM t1 JOIN` the `JOIN` will always be parsed as a keyword, - // not an alias.) - Token::Word(w) if after_as || !reserved_kwds.contains(&w.keyword) => { + // By default, if a word is located after the `AS` keyword we consider it an alias + // as long as it's not reserved. + Token::Word(w) + if after_as || reserved_kwds.is_some_and(|x| !x.contains(&w.keyword)) => + { Ok(Some(w.into_ident(next_token.span))) } - // MSSQL supports single-quoted strings as aliases for columns - // We accept them as table aliases too, although MSSQL does not. - // - // Note, that this conflicts with an obscure rule from the SQL - // standard, which we don't implement: - // https://crate.io/docs/sql-99/en/latest/chapters/07.html#character-string-literal-s - // "[Obscure Rule] SQL allows you to break a long up into two or more smaller s, split by a that includes a newline - // character. When it sees such a , your DBMS will - // ignore the and treat the multiple strings as - // a single ." + // This pattern allows for customizing the acceptance of words as aliases based on the caller's + // context, such as to what SQL element this word is a potential alias of (select item alias, table name + // alias, etc.) or dialect-specific logic that goes beyond a simple list of reserved keywords. + Token::Word(w) if validator(after_as, &w.keyword, self) => { + Ok(Some(w.into_ident(next_token.span))) + } + // For backwards-compatibility, we accept quoted strings as aliases regardless of the context. Token::SingleQuotedString(s) => Ok(Some(Ident::with_quote('\'', s))), - // Support for MySql dialect double-quoted string, `AS "HOUR"` for example Token::DoubleQuotedString(s) => Ok(Some(Ident::with_quote('\"', s))), _ => { if after_as { @@ -8880,23 +8918,6 @@ impl<'a> Parser<'a> { } } - /// Parse `AS identifier` when the AS is describing a table-valued object, - /// like in `... FROM generate_series(1, 10) AS t (col)`. In this case - /// the alias is allowed to optionally name the columns in the table, in - /// addition to the table itself. - pub fn parse_optional_table_alias( - &mut self, - reserved_kwds: &[Keyword], - ) -> Result, ParserError> { - match self.parse_optional_alias(reserved_kwds)? { - Some(name) => { - let columns = self.parse_table_alias_column_defs()?; - Ok(Some(TableAlias { name, columns })) - } - None => Ok(None), - } - } - pub fn parse_optional_group_by(&mut self) -> Result, ParserError> { if self.parse_keywords(&[Keyword::GROUP, Keyword::BY]) { let expressions = if self.parse_keyword(Keyword::ALL) { @@ -10898,7 +10919,7 @@ impl<'a> Parser<'a> { let name = self.parse_object_name(false)?; self.expect_token(&Token::LParen)?; let args = self.parse_optional_args()?; - let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; + let alias = self.maybe_parse_table_alias()?; Ok(TableFactor::Function { lateral: true, name, @@ -10911,7 +10932,7 @@ impl<'a> Parser<'a> { self.expect_token(&Token::LParen)?; let expr = self.parse_expr()?; self.expect_token(&Token::RParen)?; - let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; + let alias = self.maybe_parse_table_alias()?; Ok(TableFactor::TableFunction { expr, alias }) } else if self.consume_token(&Token::LParen) { // A left paren introduces either a derived table (i.e., a subquery) @@ -10960,7 +10981,7 @@ impl<'a> Parser<'a> { #[allow(clippy::if_same_then_else)] if !table_and_joins.joins.is_empty() { self.expect_token(&Token::RParen)?; - let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; + let alias = self.maybe_parse_table_alias()?; Ok(TableFactor::NestedJoin { table_with_joins: Box::new(table_and_joins), alias, @@ -10973,7 +10994,7 @@ impl<'a> Parser<'a> { // (B): `table_and_joins` (what we found inside the parentheses) // is a nested join `(foo JOIN bar)`, not followed by other joins. self.expect_token(&Token::RParen)?; - let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; + let alias = self.maybe_parse_table_alias()?; Ok(TableFactor::NestedJoin { table_with_joins: Box::new(table_and_joins), alias, @@ -10987,9 +11008,7 @@ impl<'a> Parser<'a> { // [AS alias])`) as well. self.expect_token(&Token::RParen)?; - if let Some(outer_alias) = - self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)? - { + if let Some(outer_alias) = self.maybe_parse_table_alias()? { // Snowflake also allows specifying an alias *after* parens // e.g. `FROM (mytable) AS alias` match &mut table_and_joins.relation { @@ -11042,7 +11061,7 @@ impl<'a> Parser<'a> { // SELECT * FROM VALUES (1, 'a'), (2, 'b') AS t (col1, col2) // where there are no parentheses around the VALUES clause. let values = SetExpr::Values(self.parse_values(false)?); - let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; + let alias = self.maybe_parse_table_alias()?; Ok(TableFactor::Derived { lateral: false, subquery: Box::new(Query { @@ -11068,7 +11087,7 @@ impl<'a> Parser<'a> { self.expect_token(&Token::RParen)?; let with_ordinality = self.parse_keywords(&[Keyword::WITH, Keyword::ORDINALITY]); - let alias = match self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS) { + let alias = match self.maybe_parse_table_alias() { Ok(Some(alias)) => Some(alias), Ok(None) => None, Err(e) => return Err(e), @@ -11105,7 +11124,7 @@ impl<'a> Parser<'a> { let columns = self.parse_comma_separated(Parser::parse_json_table_column_def)?; self.expect_token(&Token::RParen)?; self.expect_token(&Token::RParen)?; - let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; + let alias = self.maybe_parse_table_alias()?; Ok(TableFactor::JsonTable { json_expr, json_path, @@ -11150,7 +11169,7 @@ impl<'a> Parser<'a> { } } - let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; + let alias = self.maybe_parse_table_alias()?; // MSSQL-specific table hints: let mut with_hints = vec![]; @@ -11328,7 +11347,7 @@ impl<'a> Parser<'a> { } else { Vec::new() }; - let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; + let alias = self.maybe_parse_table_alias()?; Ok(TableFactor::OpenJsonTable { json_expr, json_path, @@ -11427,7 +11446,7 @@ impl<'a> Parser<'a> { self.expect_token(&Token::RParen)?; - let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; + let alias = self.maybe_parse_table_alias()?; Ok(TableFactor::MatchRecognize { table: Box::new(table), @@ -11671,7 +11690,7 @@ impl<'a> Parser<'a> { ) -> Result { let subquery = self.parse_query()?; self.expect_token(&Token::RParen)?; - let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; + let alias = self.maybe_parse_table_alias()?; Ok(TableFactor::Derived { lateral: match lateral { Lateral => true, @@ -11765,7 +11784,7 @@ impl<'a> Parser<'a> { }; self.expect_token(&Token::RParen)?; - let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; + let alias = self.maybe_parse_table_alias()?; Ok(TableFactor::Pivot { table: Box::new(table), aggregate_functions, @@ -11787,7 +11806,7 @@ impl<'a> Parser<'a> { self.expect_keyword_is(Keyword::IN)?; let columns = self.parse_parenthesized_column_list(Mandatory, false)?; self.expect_token(&Token::RParen)?; - let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; + let alias = self.maybe_parse_table_alias()?; Ok(TableFactor::Unpivot { table: Box::new(table), value, @@ -12613,7 +12632,7 @@ impl<'a> Parser<'a> { }) } expr => self - .parse_optional_alias(keywords::RESERVED_FOR_COLUMN_ALIAS) + .maybe_parse_select_item_alias() .map(|alias| match alias { Some(alias) => SelectItem::ExprWithAlias { expr, alias }, None => SelectItem::UnnamedExpr(expr), diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index 112aa5264..fe6439b36 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -3022,3 +3022,32 @@ fn parse_ls_and_rm() { snowflake().verified_stmt(r#"LIST @"STAGE_WITH_QUOTES""#); } + +#[test] +fn test_sql_keywords_as_select_item_aliases() { + // Some keywords that should be parsed as an alias + let unreserved_kws = vec!["CLUSTER", "FETCH", "RETURNING", "LIMIT", "EXCEPT"]; + for kw in unreserved_kws { + snowflake() + .one_statement_parses_to(&format!("SELECT 1 {kw}"), &format!("SELECT 1 AS {kw}")); + } + + // Some keywords that should not be parsed as an alias + let reserved_kws = vec![ + "FROM", + "GROUP", + "HAVING", + "INTERSECT", + "INTO", + "ORDER", + "SELECT", + "UNION", + "WHERE", + "WITH", + ]; + for kw in reserved_kws { + assert!(snowflake() + .parse_sql_statements(&format!("SELECT 1 {kw}")) + .is_err()); + } +}