From dc334628e78e70157a80d9ec6226262da6b5dc8b Mon Sep 17 00:00:00 2001 From: Jesse Bakker Date: Mon, 29 Jul 2024 09:34:58 +0200 Subject: [PATCH 1/6] Parse SETTINGS clause for ClickHouse table-valued functions --- src/ast/mod.rs | 4 +- src/ast/query.rs | 17 ++++++- src/parser/mod.rs | 96 ++++++++++++++++++++++++----------- tests/sqlparser_clickhouse.rs | 32 ++++++++++++ 4 files changed, 115 insertions(+), 34 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index d27baadc4..f3e951ee5 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -50,8 +50,8 @@ pub use self::query::{ OffsetRows, OrderBy, OrderByExpr, PivotValueSource, Query, RenameSelectItem, RepetitionQuantifier, ReplaceSelectElement, ReplaceSelectItem, RowsPerMatch, Select, SelectInto, SelectItem, SetExpr, SetOperator, SetQuantifier, Setting, SymbolDefinition, Table, - TableAlias, TableFactor, TableVersion, TableWithJoins, Top, TopQuantity, ValueTableMode, - Values, WildcardAdditionalOptions, With, WithFill, + TableAlias, TableFactor, TableFunctionArgs, TableVersion, TableWithJoins, Top, TopQuantity, + ValueTableMode, Values, WildcardAdditionalOptions, With, WithFill, }; pub use self::value::{ escape_double_quote_string, escape_quoted_string, DateTimeField, DollarQuotedString, diff --git a/src/ast/query.rs b/src/ast/query.rs index 978604266..fc22303fc 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -899,6 +899,14 @@ impl fmt::Display for ExprWithAlias { } } +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct TableFunctionArgs { + pub args: Vec, + pub settings: Option>, +} + /// A table name or a parenthesized subquery with an optional alias #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] @@ -916,7 +924,7 @@ pub enum TableFactor { /// This field's value is `Some(v)`, where `v` is a (possibly empty) /// vector of arguments, in the case of a table-valued function call, /// whereas it's `None` in the case of a regular table name. - args: Option>, + args: Option, /// MSSQL-specific `WITH (...)` hints such as NOLOCK. with_hints: Vec, /// Optional version qualifier to facilitate table time-travel, as @@ -1314,7 +1322,12 @@ impl fmt::Display for TableFactor { write!(f, "PARTITION ({})", display_comma_separated(partitions))?; } if let Some(args) = args { - write!(f, "({})", display_comma_separated(args))?; + write!(f, "(")?; + write!(f, "{}", display_comma_separated(&args.args))?; + if let Some(ref settings) = args.settings { + write!(f, ", SETTINGS {}", display_comma_separated(settings))?; + } + write!(f, ")")?; } if *with_ordinality { write!(f, " WITH ORDINALITY")?; diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 2b1c1ab7f..477449491 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -3426,6 +3426,27 @@ impl<'a> Parser<'a> { Ok(values) } + fn parse_comma_separated_end(&mut self) -> Option { + if !self.consume_token(&Token::Comma) { + Some(Token::Comma) + } else if self.options.trailing_commas { + let token = self.peek_token().token; + match token { + Token::Word(ref kw) + if keywords::RESERVED_FOR_COLUMN_ALIAS.contains(&kw.keyword) => + { + Some(token) + } + Token::RParen | Token::SemiColon | Token::EOF | Token::RBracket | Token::RBrace => { + Some(token) + } + _ => None, + } + } else { + None + } + } + /// Parse a comma-separated list of 1+ items accepted by `F` pub fn parse_comma_separated(&mut self, mut f: F) -> Result, ParserError> where @@ -3434,22 +3455,8 @@ impl<'a> Parser<'a> { let mut values = vec![]; loop { values.push(f(self)?); - if !self.consume_token(&Token::Comma) { + if self.parse_comma_separated_end().is_some() { break; - } else if self.options.trailing_commas { - match self.peek_token().token { - Token::Word(kw) - if keywords::RESERVED_FOR_COLUMN_ALIAS.contains(&kw.keyword) => - { - break; - } - Token::RParen - | Token::SemiColon - | Token::EOF - | Token::RBracket - | Token::RBrace => break, - _ => continue, - } } } Ok(values) @@ -8099,19 +8106,7 @@ impl<'a> Parser<'a> { vec![] }; - let settings = if dialect_of!(self is ClickHouseDialect|GenericDialect) - && self.parse_keyword(Keyword::SETTINGS) - { - let key_values = self.parse_comma_separated(|p| { - let key = p.parse_identifier(false)?; - p.expect_token(&Token::Eq)?; - let value = p.parse_value()?; - Ok(Setting { key, value }) - })?; - Some(key_values) - } else { - None - }; + let settings = self.parse_settings()?; let fetch = if self.parse_keyword(Keyword::FETCH) { Some(self.parse_fetch()?) @@ -8158,6 +8153,23 @@ impl<'a> Parser<'a> { } } + fn parse_settings(&mut self) -> Result>, ParserError> { + let settings = if dialect_of!(self is ClickHouseDialect|GenericDialect) + && self.parse_keyword(Keyword::SETTINGS) + { + let key_values = self.parse_comma_separated(|p| { + let key = p.parse_identifier(false)?; + p.expect_token(&Token::Eq)?; + let value = p.parse_value()?; + Ok(Setting { key, value }) + })?; + Some(key_values) + } else { + None + }; + Ok(settings) + } + /// Parse a mssql `FOR [XML | JSON | BROWSE]` clause pub fn parse_for_clause(&mut self) -> Result, ParserError> { if self.parse_keyword(Keyword::XML) { @@ -9372,9 +9384,9 @@ impl<'a> Parser<'a> { // Parse potential version qualifier let version = self.parse_table_version()?; - // Postgres, MSSQL: table-valued functions: + // Postgres, MSSQL, ClickHouse: table-valued functions: let args = if self.consume_token(&Token::LParen) { - Some(self.parse_optional_args()?) + Some(self.parse_table_function_args()?) } else { None }; @@ -10317,6 +10329,30 @@ impl<'a> Parser<'a> { } } + fn parse_table_function_args(&mut self) -> Result { + { + let settings = self.parse_settings()?; + if self.consume_token(&Token::RParen) { + return Ok(TableFunctionArgs { + args: vec![], + settings, + }); + } + } + let mut args = vec![]; + let settings = loop { + if let Some(settings) = self.parse_settings()? { + break Some(settings); + } + args.push(self.parse_function_args()?); + if self.parse_comma_separated_end().is_some() { + break None; + } + }; + self.expect_token(&Token::RParen)?; + Ok(TableFunctionArgs { args, settings }) + } + /// Parses a potentially empty list of arguments to a window function /// (including the closing parenthesis). /// diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index 6fdadc366..8344ec83d 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -1091,6 +1091,38 @@ fn parse_create_table_on_commit_and_as_query() { } } +#[test] +fn parse_select_table_function_settings() { + let sql = r#"SELECT * FROM table_function(arg, SETTINGS setting = 3)"#; + match clickhouse_and_generic().verified_stmt(sql) { + Statement::Query(q) => { + let from = &q.body.as_select().unwrap().from; + assert_eq!(from.len(), 1); + assert_eq!(from[0].joins, vec![]); + match &from[0].relation { + Table { args, .. } => { + let args = args.as_ref().unwrap(); + assert_eq!( + args.args, + vec![FunctionArg::Unnamed(FunctionArgExpr::Expr( + Expr::Identifier("arg".into()) + ))] + ); + assert_eq!( + args.settings, + Some(vec![Setting { + key: "setting".into(), + value: Value::Number("3".into(), false) + }]) + ) + } + _ => unreachable!(), + } + } + _ => unreachable!(), + } +} + fn clickhouse() -> TestedDialects { TestedDialects { dialects: vec![Box::new(ClickHouseDialect {})], From 44563f9380e116666b8a97c74accd51a0dcf2764 Mon Sep 17 00:00:00 2001 From: Jesse Bakker Date: Tue, 30 Jul 2024 08:51:13 +0200 Subject: [PATCH 2/6] Add documentation for `TableFunctionArgs::settings` --- src/ast/query.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/ast/query.rs b/src/ast/query.rs index fc22303fc..ddf8d9277 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -899,11 +899,16 @@ impl fmt::Display for ExprWithAlias { } } +/// Arguments to a table-valued function #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct TableFunctionArgs { pub args: Vec, + /// ClickHouse-specific SETTINGS clause. + /// For example, + /// `SELECT * FROM executable('generate_random.py', TabSeparated, 'id UInt32, random String', SETTINGS send_chunk_header = false, pool_size = 16)` + /// [`executable` table function](https://clickhouse.com/docs/en/engines/table-functions/executable) pub settings: Option>, } From 155e17ffa4ffca54be4864d6a346946d1f3ff4b0 Mon Sep 17 00:00:00 2001 From: Jesse Bakker Date: Tue, 30 Jul 2024 08:51:46 +0200 Subject: [PATCH 3/6] Expand test case for ClickHouse table-function SETTINGS clause --- tests/sqlparser_clickhouse.rs | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index 8344ec83d..704e1071f 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -1093,7 +1093,7 @@ fn parse_create_table_on_commit_and_as_query() { #[test] fn parse_select_table_function_settings() { - let sql = r#"SELECT * FROM table_function(arg, SETTINGS setting = 3)"#; + let sql = r#"SELECT * FROM table_function(arg, SETTINGS s0 = 3, s1 = 's')"#; match clickhouse_and_generic().verified_stmt(sql) { Statement::Query(q) => { let from = &q.body.as_select().unwrap().from; @@ -1110,10 +1110,16 @@ fn parse_select_table_function_settings() { ); assert_eq!( args.settings, - Some(vec![Setting { - key: "setting".into(), - value: Value::Number("3".into(), false) - }]) + Some(vec![ + Setting { + key: "s0".into(), + value: Value::Number("3".into(), false) + }, + Setting { + key: "s1".into(), + value: Value::SingleQuotedString("s".into()) + } + ]) ) } _ => unreachable!(), From c6b9d6a1ed7193ef6faf21c1a869744db2196b7a Mon Sep 17 00:00:00 2001 From: Jesse Bakker Date: Tue, 30 Jul 2024 09:10:34 +0200 Subject: [PATCH 4/6] Fix test --- tests/sqlparser_clickhouse.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index 704e1071f..7fcba0c99 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -1113,7 +1113,7 @@ fn parse_select_table_function_settings() { Some(vec![ Setting { key: "s0".into(), - value: Value::Number("3".into(), false) + value: Value::Number("3".parse().unwrap(), false) }, Setting { key: "s1".into(), From 9257400c3d260e50b3ff6a7fb213e472305fb3b1 Mon Sep 17 00:00:00 2001 From: Jesse Bakker Date: Wed, 31 Jul 2024 13:08:56 +0200 Subject: [PATCH 5/6] Expand test case and fix display impl --- src/ast/query.rs | 5 +- tests/sqlparser_clickhouse.rs | 99 ++++++++++++++++++++++++----------- 2 files changed, 73 insertions(+), 31 deletions(-) diff --git a/src/ast/query.rs b/src/ast/query.rs index ddf8d9277..6162eae1a 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -1330,7 +1330,10 @@ impl fmt::Display for TableFactor { write!(f, "(")?; write!(f, "{}", display_comma_separated(&args.args))?; if let Some(ref settings) = args.settings { - write!(f, ", SETTINGS {}", display_comma_separated(settings))?; + if !args.args.is_empty() { + write!(f, ", ")?; + } + write!(f, "SETTINGS {}", display_comma_separated(settings))?; } write!(f, ")")?; } diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index 7fcba0c99..100075e58 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -1093,39 +1093,78 @@ fn parse_create_table_on_commit_and_as_query() { #[test] fn parse_select_table_function_settings() { - let sql = r#"SELECT * FROM table_function(arg, SETTINGS s0 = 3, s1 = 's')"#; - match clickhouse_and_generic().verified_stmt(sql) { - Statement::Query(q) => { - let from = &q.body.as_select().unwrap().from; - assert_eq!(from.len(), 1); - assert_eq!(from[0].joins, vec![]); - match &from[0].relation { - Table { args, .. } => { - let args = args.as_ref().unwrap(); - assert_eq!( - args.args, - vec![FunctionArg::Unnamed(FunctionArgExpr::Expr( - Expr::Identifier("arg".into()) - ))] - ); - assert_eq!( - args.settings, - Some(vec![ - Setting { - key: "s0".into(), - value: Value::Number("3".parse().unwrap(), false) - }, - Setting { - key: "s1".into(), - value: Value::SingleQuotedString("s".into()) - } - ]) - ) + fn check_settings(sql: &str, expected: &TableFunctionArgs) { + match clickhouse_and_generic().verified_stmt(sql) { + Statement::Query(q) => { + let from = &q.body.as_select().unwrap().from; + assert_eq!(from.len(), 1); + assert_eq!(from[0].joins, vec![]); + match &from[0].relation { + Table { args, .. } => { + let args = args.as_ref().unwrap(); + assert_eq!(args, expected); + } + _ => unreachable!(), } - _ => unreachable!(), } + _ => unreachable!(), } - _ => unreachable!(), + } + check_settings( + "SELECT * FROM table_function(arg, SETTINGS s0 = 3, s1 = 's')", + &TableFunctionArgs { + args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr( + Expr::Identifier("arg".into()), + ))], + + settings: Some(vec![ + Setting { + key: "s0".into(), + value: Value::Number("3".parse().unwrap(), false), + }, + Setting { + key: "s1".into(), + value: Value::SingleQuotedString("s".into()), + }, + ]), + }, + ); + check_settings( + r#"SELECT * FROM table_function(arg)"#, + &TableFunctionArgs { + args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr( + Expr::Identifier("arg".into()), + ))], + settings: None, + }, + ); + check_settings( + "SELECT * FROM table_function(SETTINGS s0 = 3, s1 = 's')", + &TableFunctionArgs { + args: vec![], + settings: Some(vec![ + Setting { + key: "s0".into(), + value: Value::Number("3".parse().unwrap(), false), + }, + Setting { + key: "s1".into(), + value: Value::SingleQuotedString("s".into()), + }, + ]), + }, + ); + let invalid_cases = vec![ + "SELECT * FROM t(SETTINGS a)", + "SELECT * FROM t(SETTINGS a=)", + "SELECT * FROM t(SETTINGS a=1, b)", + "SELECT * FROM t(SETTINGS a=1, b=)", + "SELECT * FROM t(SETTINGS a=1, b=c)", + ]; + for sql in invalid_cases { + clickhouse_and_generic() + .parse_sql_statements(sql) + .expect_err("Expected: SETTINGS key = value, found: "); } } From e174bd3e8a25b32c48a7fd0b64bdc5c302923e3f Mon Sep 17 00:00:00 2001 From: Jesse Bakker Date: Wed, 31 Jul 2024 13:09:08 +0200 Subject: [PATCH 6/6] Clean up implementation --- src/parser/mod.rs | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 477449491..5dfe7dc26 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -3426,24 +3426,26 @@ impl<'a> Parser<'a> { Ok(values) } - fn parse_comma_separated_end(&mut self) -> Option { + /// Parse the comma of a comma-separated syntax element. + /// Returns true if there is a next element + fn is_parse_comma_separated_end(&mut self) -> bool { if !self.consume_token(&Token::Comma) { - Some(Token::Comma) + true } else if self.options.trailing_commas { let token = self.peek_token().token; match token { Token::Word(ref kw) if keywords::RESERVED_FOR_COLUMN_ALIAS.contains(&kw.keyword) => { - Some(token) + true } Token::RParen | Token::SemiColon | Token::EOF | Token::RBracket | Token::RBrace => { - Some(token) + true } - _ => None, + _ => false, } } else { - None + false } } @@ -3455,7 +3457,7 @@ impl<'a> Parser<'a> { let mut values = vec![]; loop { values.push(f(self)?); - if self.parse_comma_separated_end().is_some() { + if self.is_parse_comma_separated_end() { break; } } @@ -10330,14 +10332,11 @@ impl<'a> Parser<'a> { } fn parse_table_function_args(&mut self) -> Result { - { - let settings = self.parse_settings()?; - if self.consume_token(&Token::RParen) { - return Ok(TableFunctionArgs { - args: vec![], - settings, - }); - } + if self.consume_token(&Token::RParen) { + return Ok(TableFunctionArgs { + args: vec![], + settings: None, + }); } let mut args = vec![]; let settings = loop { @@ -10345,7 +10344,7 @@ impl<'a> Parser<'a> { break Some(settings); } args.push(self.parse_function_args()?); - if self.parse_comma_separated_end().is_some() { + if self.is_parse_comma_separated_end() { break None; } };