From 54184460b5d873a67c2801e8b7c6e4f145bc65df Mon Sep 17 00:00:00 2001 From: Joey Hain Date: Thu, 16 May 2024 11:16:43 -0700 Subject: [PATCH 01/53] Snowflake: support IGNORE/RESPECT NULLS inside function argument list (#1263) --- src/dialect/snowflake.rs | 6 ++++++ tests/sqlparser_snowflake.rs | 9 +++++++++ 2 files changed, 15 insertions(+) diff --git a/src/dialect/snowflake.rs b/src/dialect/snowflake.rs index 19267d7c56..21bc535548 100644 --- a/src/dialect/snowflake.rs +++ b/src/dialect/snowflake.rs @@ -71,6 +71,12 @@ impl Dialect for SnowflakeDialect { true } + // Snowflake doesn't document this but `FIRST_VALUE(arg, { IGNORE | RESPECT } NULLS)` + // works (i.e. inside the argument list instead of after). + fn supports_window_function_null_treatment_arg(&self) -> bool { + true + } + /// See [doc](https://docs.snowflake.com/en/sql-reference/sql/set#syntax) fn supports_parenthesized_set_variables(&self) -> bool { true diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index 25eaa2f719..30f2cc601d 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -1576,3 +1576,12 @@ fn test_select_wildcard_with_ilike_replace() { "sql parser error: Expected end of statement, found: EXCLUDE" ); } + +#[test] +fn first_value_ignore_nulls() { + snowflake().verified_only_select(concat!( + "SELECT FIRST_VALUE(column2 IGNORE NULLS) ", + "OVER (PARTITION BY column1 ORDER BY column2) ", + "FROM some_table" + )); +} From 9d15f7e9a92a4ef36fcc6d3166fd0221a22a20e2 Mon Sep 17 00:00:00 2001 From: Joey Hain Date: Mon, 20 May 2024 17:15:40 -0700 Subject: [PATCH 02/53] Support: Databricks and generic: fix for `values` as table name (#1278) --- src/ast/query.rs | 11 ++++++ src/parser/mod.rs | 13 ++++++- tests/sqlparser_databricks.rs | 66 ++++++++++++++++++++++++++++++++++- 3 files changed, 88 insertions(+), 2 deletions(-) diff --git a/src/ast/query.rs b/src/ast/query.rs index 08a0bc5af6..07863bd7ca 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -108,6 +108,17 @@ pub enum SetExpr { Table(Box), } +impl SetExpr { + /// If this `SetExpr` is a `SELECT`, returns the [`Select`]. + pub fn as_select(&self) -> Option<&Select> { + if let Self::Select(select) = self { + Some(&**select) + } else { + None + } + } +} + impl fmt::Display for SetExpr { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { diff --git a/src/parser/mod.rs b/src/parser/mod.rs index a7ec4d0936..8132921f14 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -8594,8 +8594,19 @@ impl<'a> Parser<'a> { self.expected("joined table", self.peek_token()) } } else if dialect_of!(self is SnowflakeDialect | DatabricksDialect | GenericDialect) - && self.parse_keyword(Keyword::VALUES) + && matches!( + self.peek_tokens(), + [ + Token::Word(Word { + keyword: Keyword::VALUES, + .. + }), + Token::LParen + ] + ) { + self.expect_keyword(Keyword::VALUES)?; + // Snowflake and Databricks allow syntax like below: // SELECT * FROM VALUES (1, 'a'), (2, 'b') AS t (col1, col2) // where there are no parentheses around the VALUES clause. diff --git a/tests/sqlparser_databricks.rs b/tests/sqlparser_databricks.rs index 8f0579fc9b..430647ded8 100644 --- a/tests/sqlparser_databricks.rs +++ b/tests/sqlparser_databricks.rs @@ -1,5 +1,5 @@ use sqlparser::ast::*; -use sqlparser::dialect::DatabricksDialect; +use sqlparser::dialect::{DatabricksDialect, GenericDialect}; use sqlparser::parser::ParserError; use test_utils::*; @@ -13,6 +13,13 @@ fn databricks() -> TestedDialects { } } +fn databricks_and_generic() -> TestedDialects { + TestedDialects { + dialects: vec![Box::new(DatabricksDialect {}), Box::new(GenericDialect {})], + options: None, + } +} + #[test] fn test_databricks_identifiers() { // databricks uses backtick for delimited identifiers @@ -124,3 +131,60 @@ fn test_databricks_lambdas() { ); databricks().verified_expr("transform(array(1, 2, 3), x -> x + 1)"); } + +#[test] +fn test_values_clause() { + let values = Values { + explicit_row: false, + rows: vec![ + vec![ + Expr::Value(Value::DoubleQuotedString("one".to_owned())), + Expr::Value(number("1")), + ], + vec![ + Expr::Value(Value::SingleQuotedString("two".to_owned())), + Expr::Value(number("2")), + ], + ], + }; + + let query = databricks().verified_query(r#"VALUES ("one", 1), ('two', 2)"#); + assert_eq!(SetExpr::Values(values.clone()), *query.body); + + // VALUES is permitted in a FROM clause without a subquery + let query = databricks().verified_query_with_canonical( + r#"SELECT * FROM VALUES ("one", 1), ('two', 2)"#, + r#"SELECT * FROM (VALUES ("one", 1), ('two', 2))"#, + ); + let Some(TableFactor::Derived { subquery, .. }) = query + .body + .as_select() + .map(|select| &select.from[0].relation) + else { + panic!("expected subquery"); + }; + assert_eq!(SetExpr::Values(values), *subquery.body); + + // values is also a valid table name + let query = databricks_and_generic().verified_query(concat!( + "WITH values AS (SELECT 42) ", + "SELECT * FROM values", + )); + assert_eq!( + Some(&TableFactor::Table { + name: ObjectName(vec![Ident::new("values")]), + alias: None, + args: None, + with_hints: vec![], + version: None, + partitions: vec![] + }), + query + .body + .as_select() + .map(|select| &select.from[0].relation) + ); + + // TODO: support this example from https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-values.html#examples + // databricks().verified_query("VALUES 1, 2, 3"); +} From d5faf3c54bba16a0b617117a312eb96eee59706a Mon Sep 17 00:00:00 2001 From: Joey Hain Date: Thu, 23 May 2024 10:30:05 -0700 Subject: [PATCH 03/53] Support expression in AT TIME ZONE and fix precedence (#1272) --- src/ast/mod.rs | 4 +-- src/parser/mod.rs | 62 +++++++++---------------------------- tests/sqlparser_common.rs | 12 +++++-- tests/sqlparser_postgres.rs | 40 ++++++++++++++++++++++++ 4 files changed, 65 insertions(+), 53 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index d937b72753..c9de747c7d 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -584,7 +584,7 @@ pub enum Expr { /// AT a timestamp to a different timezone e.g. `FROM_UNIXTIME(0) AT TIME ZONE 'UTC-06:00'` AtTimeZone { timestamp: Box, - time_zone: String, + time_zone: Box, }, /// Extract a field from a timestamp e.g. `EXTRACT(MONTH FROM foo)` /// @@ -1270,7 +1270,7 @@ impl fmt::Display for Expr { timestamp, time_zone, } => { - write!(f, "{timestamp} AT TIME ZONE '{time_zone}'") + write!(f, "{timestamp} AT TIME ZONE {time_zone}") } Expr::Interval(interval) => { write!(f, "{interval}") diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 8132921f14..f88aefd102 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -2469,26 +2469,11 @@ impl<'a> Parser<'a> { } } Keyword::AT => { - // if self.parse_keyword(Keyword::TIME) { - // self.expect_keyword(Keyword::ZONE)?; - if self.parse_keywords(&[Keyword::TIME, Keyword::ZONE]) { - let time_zone = self.next_token(); - match time_zone.token { - Token::SingleQuotedString(time_zone) => { - log::trace!("Peek token: {:?}", self.peek_token()); - Ok(Expr::AtTimeZone { - timestamp: Box::new(expr), - time_zone, - }) - } - _ => self.expected( - "Expected Token::SingleQuotedString after AT TIME ZONE", - time_zone, - ), - } - } else { - self.expected("Expected Token::Word after AT", tok) - } + self.expect_keywords(&[Keyword::TIME, Keyword::ZONE])?; + Ok(Expr::AtTimeZone { + timestamp: Box::new(expr), + time_zone: Box::new(self.parse_subexpr(precedence)?), + }) } Keyword::NOT | Keyword::IN @@ -2545,35 +2530,12 @@ impl<'a> Parser<'a> { ), } } else if Token::DoubleColon == tok { - let data_type = self.parse_data_type()?; - - let cast_expr = Expr::Cast { + Ok(Expr::Cast { kind: CastKind::DoubleColon, expr: Box::new(expr), - data_type: data_type.clone(), + data_type: self.parse_data_type()?, format: None, - }; - - match data_type { - DataType::Date - | DataType::Datetime(_) - | DataType::Timestamp(_, _) - | DataType::Time(_, _) => { - let value = self.parse_optional_time_zone()?; - match value { - Some(Value::SingleQuotedString(tz)) => Ok(Expr::AtTimeZone { - timestamp: Box::new(cast_expr), - time_zone: tz, - }), - None => Ok(cast_expr), - _ => Err(ParserError::ParserError(format!( - "Expected Token::SingleQuotedString after AT TIME ZONE, but found: {}", - value.unwrap() - ))), - } - } - _ => Ok(cast_expr), - } + }) } else if Token::ExclamationMark == tok { // PostgreSQL factorial operation Ok(Expr::UnaryOp { @@ -2784,10 +2746,14 @@ impl<'a> Parser<'a> { // use https://www.postgresql.org/docs/7.0/operators.htm#AEN2026 as a reference // higher number = higher precedence + // + // NOTE: The pg documentation is incomplete, e.g. the AT TIME ZONE operator + // actually has higher precedence than addition. + // See https://postgrespro.com/list/thread-id/2673331. + const AT_TZ_PREC: u8 = 41; const MUL_DIV_MOD_OP_PREC: u8 = 40; const PLUS_MINUS_PREC: u8 = 30; const XOR_PREC: u8 = 24; - const TIME_ZONE_PREC: u8 = 20; const BETWEEN_PREC: u8 = 20; const LIKE_PREC: u8 = 19; const IS_PREC: u8 = 17; @@ -2817,7 +2783,7 @@ impl<'a> Parser<'a> { (Token::Word(w), Token::Word(w2)) if w.keyword == Keyword::TIME && w2.keyword == Keyword::ZONE => { - Ok(Self::TIME_ZONE_PREC) + Ok(Self::AT_TZ_PREC) } _ => Ok(0), } diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 6668ce8f46..f8b7d02657 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -4995,7 +4995,9 @@ fn parse_at_timezone() { assert_eq!( &Expr::AtTimeZone { timestamp: Box::new(call("FROM_UNIXTIME", [zero.clone()])), - time_zone: "UTC-06:00".to_string(), + time_zone: Box::new(Expr::Value(Value::SingleQuotedString( + "UTC-06:00".to_string() + ))), }, expr_from_projection(only(&select.projection)), ); @@ -5009,7 +5011,9 @@ fn parse_at_timezone() { [ Expr::AtTimeZone { timestamp: Box::new(call("FROM_UNIXTIME", [zero])), - time_zone: "UTC-06:00".to_string(), + time_zone: Box::new(Expr::Value(Value::SingleQuotedString( + "UTC-06:00".to_string() + ))), }, Expr::Value(Value::SingleQuotedString("%Y-%m-%dT%H".to_string()),) ] @@ -7037,7 +7041,9 @@ fn parse_double_colon_cast_at_timezone() { data_type: DataType::Timestamp(None, TimezoneInfo::None), format: None }), - time_zone: "Europe/Brussels".to_string() + time_zone: Box::new(Expr::Value(Value::SingleQuotedString( + "Europe/Brussels".to_string() + ))), }, expr_from_projection(only(&select.projection)), ); diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index d68ebd5568..5c3b653dda 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -3882,3 +3882,43 @@ fn parse_mat_cte() { let sql2 = r#"WITH cte AS NOT MATERIALIZED (SELECT id FROM accounts) SELECT id FROM cte"#; pg().verified_stmt(sql2); } + +#[test] +fn parse_at_time_zone() { + pg_and_generic().verified_expr("CURRENT_TIMESTAMP AT TIME ZONE tz"); + pg_and_generic().verified_expr("CURRENT_TIMESTAMP AT TIME ZONE ('America/' || 'Los_Angeles')"); + + // check precedence + let expr = Expr::BinaryOp { + left: Box::new(Expr::AtTimeZone { + timestamp: Box::new(Expr::TypedString { + data_type: DataType::Timestamp(None, TimezoneInfo::None), + value: "2001-09-28 01:00".to_owned(), + }), + time_zone: Box::new(Expr::Cast { + kind: CastKind::DoubleColon, + expr: Box::new(Expr::Value(Value::SingleQuotedString( + "America/Los_Angeles".to_owned(), + ))), + data_type: DataType::Text, + format: None, + }), + }), + op: BinaryOperator::Plus, + right: Box::new(Expr::Interval(Interval { + value: Box::new(Expr::Value(Value::SingleQuotedString( + "23 hours".to_owned(), + ))), + leading_field: None, + leading_precision: None, + last_field: None, + fractional_seconds_precision: None, + })), + }; + pretty_assertions::assert_eq!( + pg_and_generic().verified_expr( + "TIMESTAMP '2001-09-28 01:00' AT TIME ZONE 'America/Los_Angeles'::TEXT + INTERVAL '23 hours'", + ), + expr + ); +} From 792e389baaee74d2c035e8467eda0440417e0523 Mon Sep 17 00:00:00 2001 From: Ifeanyi Ubah Date: Thu, 30 May 2024 18:18:41 +0200 Subject: [PATCH 04/53] Support CREATE FUNCTION for BigQuery (#1253) --- src/ast/mod.rs | 213 +++++++++++++++++------- src/keywords.rs | 1 + src/parser/mod.rs | 322 ++++++++++++++++++++++++++---------- tests/sqlparser_bigquery.rs | 139 ++++++++++++++++ tests/sqlparser_hive.rs | 27 +-- tests/sqlparser_postgres.rs | 22 +-- 6 files changed, 554 insertions(+), 170 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index c9de747c7d..1227ce9357 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -2454,14 +2454,64 @@ pub enum Statement { /// Supported variants: /// 1. [Hive](https://cwiki.apache.org/confluence/display/hive/languagemanual+ddl#LanguageManualDDL-Create/Drop/ReloadFunction) /// 2. [Postgres](https://www.postgresql.org/docs/15/sql-createfunction.html) + /// 3. [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement) CreateFunction { or_replace: bool, temporary: bool, + if_not_exists: bool, name: ObjectName, args: Option>, return_type: Option, - /// Optional parameters. - params: CreateFunctionBody, + /// The expression that defines the function. + /// + /// Examples: + /// ```sql + /// AS ((SELECT 1)) + /// AS "console.log();" + /// ``` + function_body: Option, + /// Behavior attribute for the function + /// + /// IMMUTABLE | STABLE | VOLATILE + /// + /// [Postgres](https://www.postgresql.org/docs/current/sql-createfunction.html) + behavior: Option, + /// CALLED ON NULL INPUT | RETURNS NULL ON NULL INPUT | STRICT + /// + /// [Postgres](https://www.postgresql.org/docs/current/sql-createfunction.html) + called_on_null: Option, + /// PARALLEL { UNSAFE | RESTRICTED | SAFE } + /// + /// [Postgres](https://www.postgresql.org/docs/current/sql-createfunction.html) + parallel: Option, + /// USING ... (Hive only) + using: Option, + /// Language used in a UDF definition. + /// + /// Example: + /// ```sql + /// CREATE FUNCTION foo() LANGUAGE js AS "console.log();" + /// ``` + /// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_a_javascript_udf) + language: Option, + /// Determinism keyword used for non-sql UDF definitions. + /// + /// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#syntax_11) + determinism_specifier: Option, + /// List of options for creating the function. + /// + /// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#syntax_11) + options: Option>, + /// Connection resource for a remote function. + /// + /// Example: + /// ```sql + /// CREATE FUNCTION foo() + /// RETURNS FLOAT64 + /// REMOTE WITH CONNECTION us.myconnection + /// ``` + /// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_a_remote_function) + remote_connection: Option, }, /// ```sql /// CREATE PROCEDURE @@ -3152,16 +3202,26 @@ impl fmt::Display for Statement { Statement::CreateFunction { or_replace, temporary, + if_not_exists, name, args, return_type, - params, + function_body, + language, + behavior, + called_on_null, + parallel, + using, + determinism_specifier, + options, + remote_connection, } => { write!( f, - "CREATE {or_replace}{temp}FUNCTION {name}", + "CREATE {or_replace}{temp}FUNCTION {if_not_exists}{name}", temp = if *temporary { "TEMPORARY " } else { "" }, or_replace = if *or_replace { "OR REPLACE " } else { "" }, + if_not_exists = if *if_not_exists { "IF NOT EXISTS " } else { "" }, )?; if let Some(args) = args { write!(f, "({})", display_comma_separated(args))?; @@ -3169,7 +3229,43 @@ impl fmt::Display for Statement { if let Some(return_type) = return_type { write!(f, " RETURNS {return_type}")?; } - write!(f, "{params}")?; + if let Some(determinism_specifier) = determinism_specifier { + write!(f, " {determinism_specifier}")?; + } + if let Some(language) = language { + write!(f, " LANGUAGE {language}")?; + } + if let Some(behavior) = behavior { + write!(f, " {behavior}")?; + } + if let Some(called_on_null) = called_on_null { + write!(f, " {called_on_null}")?; + } + if let Some(parallel) = parallel { + write!(f, " {parallel}")?; + } + if let Some(remote_connection) = remote_connection { + write!(f, " REMOTE WITH CONNECTION {remote_connection}")?; + } + if let Some(CreateFunctionBody::AsBeforeOptions(function_body)) = function_body { + write!(f, " AS {function_body}")?; + } + if let Some(CreateFunctionBody::Return(function_body)) = function_body { + write!(f, " RETURN {function_body}")?; + } + if let Some(using) = using { + write!(f, " {using}")?; + } + if let Some(options) = options { + write!( + f, + " OPTIONS({})", + display_comma_separated(options.as_slice()) + )?; + } + if let Some(CreateFunctionBody::AsAfterOptions(function_body)) = function_body { + write!(f, " AS {function_body}")?; + } Ok(()) } Statement::CreateProcedure { @@ -6143,75 +6239,74 @@ impl fmt::Display for FunctionParallel { } } +/// [BigQuery] Determinism specifier used in a UDF definition. +/// +/// [BigQuery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#syntax_11 #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] -pub enum FunctionDefinition { - SingleQuotedDef(String), - DoubleDollarDef(String), +pub enum FunctionDeterminismSpecifier { + Deterministic, + NotDeterministic, } -impl fmt::Display for FunctionDefinition { +impl fmt::Display for FunctionDeterminismSpecifier { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { - FunctionDefinition::SingleQuotedDef(s) => write!(f, "'{s}'")?, - FunctionDefinition::DoubleDollarDef(s) => write!(f, "$${s}$$")?, + FunctionDeterminismSpecifier::Deterministic => { + write!(f, "DETERMINISTIC") + } + FunctionDeterminismSpecifier::NotDeterministic => { + write!(f, "NOT DETERMINISTIC") + } } - Ok(()) } } -/// Postgres specific feature. +/// Represent the expression body of a `CREATE FUNCTION` statement as well as +/// where within the statement, the body shows up. /// -/// See [Postgres docs](https://www.postgresql.org/docs/15/sql-createfunction.html) -/// for more details -#[derive(Debug, Default, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +/// [BigQuery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#syntax_11 +/// [Postgres]: https://www.postgresql.org/docs/15/sql-createfunction.html +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] -pub struct CreateFunctionBody { - /// LANGUAGE lang_name - pub language: Option, - /// IMMUTABLE | STABLE | VOLATILE - pub behavior: Option, - /// CALLED ON NULL INPUT | RETURNS NULL ON NULL INPUT | STRICT - pub called_on_null: Option, - /// PARALLEL { UNSAFE | RESTRICTED | SAFE } - pub parallel: Option, - /// AS 'definition' +pub enum CreateFunctionBody { + /// A function body expression using the 'AS' keyword and shows up + /// before any `OPTIONS` clause. /// - /// Note that Hive's `AS class_name` is also parsed here. - pub as_: Option, - /// RETURN expression - pub return_: Option, - /// USING ... (Hive only) - pub using: Option, -} - -impl fmt::Display for CreateFunctionBody { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - if let Some(language) = &self.language { - write!(f, " LANGUAGE {language}")?; - } - if let Some(behavior) = &self.behavior { - write!(f, " {behavior}")?; - } - if let Some(called_on_null) = &self.called_on_null { - write!(f, " {called_on_null}")?; - } - if let Some(parallel) = &self.parallel { - write!(f, " {parallel}")?; - } - if let Some(definition) = &self.as_ { - write!(f, " AS {definition}")?; - } - if let Some(expr) = &self.return_ { - write!(f, " RETURN {expr}")?; - } - if let Some(using) = &self.using { - write!(f, " {using}")?; - } - Ok(()) - } + /// Example: + /// ```sql + /// CREATE FUNCTION myfunc(x FLOAT64, y FLOAT64) RETURNS FLOAT64 + /// AS (x * y) + /// OPTIONS(description="desc"); + /// ``` + /// + /// [BigQuery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#syntax_11 + AsBeforeOptions(Expr), + /// A function body expression using the 'AS' keyword and shows up + /// after any `OPTIONS` clause. + /// + /// Example: + /// ```sql + /// CREATE FUNCTION myfunc(x FLOAT64, y FLOAT64) RETURNS FLOAT64 + /// OPTIONS(description="desc") + /// AS (x * y); + /// ``` + /// + /// [BigQuery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#syntax_11 + AsAfterOptions(Expr), + /// Function body expression using the 'RETURN' keyword. + /// + /// Example: + /// ```sql + /// CREATE FUNCTION myfunc(a INTEGER, IN b INTEGER = 1) RETURNS INTEGER + /// LANGUAGE SQL + /// RETURN a + b; + /// ``` + /// + /// [Postgres]: https://www.postgresql.org/docs/current/sql-createfunction.html + Return(Expr), } #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] diff --git a/src/keywords.rs b/src/keywords.rs index e67fffd97d..06086297c4 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -572,6 +572,7 @@ define_keywords!( RELATIVE, RELAY, RELEASE, + REMOTE, RENAME, REORG, REPAIR, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index f88aefd102..f88f2c1899 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -3590,95 +3590,53 @@ impl<'a> Parser<'a> { temporary: bool, ) -> Result { if dialect_of!(self is HiveDialect) { - let name = self.parse_object_name(false)?; - self.expect_keyword(Keyword::AS)?; - let class_name = self.parse_function_definition()?; - let params = CreateFunctionBody { - as_: Some(class_name), - using: self.parse_optional_create_function_using()?, - ..Default::default() - }; - - Ok(Statement::CreateFunction { - or_replace, - temporary, - name, - args: None, - return_type: None, - params, - }) + self.parse_hive_create_function(or_replace, temporary) } else if dialect_of!(self is PostgreSqlDialect | GenericDialect) { - let name = self.parse_object_name(false)?; - self.expect_token(&Token::LParen)?; - let args = if self.consume_token(&Token::RParen) { - self.prev_token(); - None - } else { - Some(self.parse_comma_separated(Parser::parse_function_arg)?) - }; - - self.expect_token(&Token::RParen)?; - - let return_type = if self.parse_keyword(Keyword::RETURNS) { - Some(self.parse_data_type()?) - } else { - None - }; - - let params = self.parse_create_function_body()?; - - Ok(Statement::CreateFunction { - or_replace, - temporary, - name, - args, - return_type, - params, - }) + self.parse_postgres_create_function(or_replace, temporary) } else if dialect_of!(self is DuckDbDialect) { self.parse_create_macro(or_replace, temporary) + } else if dialect_of!(self is BigQueryDialect) { + self.parse_bigquery_create_function(or_replace, temporary) } else { self.prev_token(); self.expected("an object type after CREATE", self.peek_token()) } } - fn parse_function_arg(&mut self) -> Result { - let mode = if self.parse_keyword(Keyword::IN) { - Some(ArgMode::In) - } else if self.parse_keyword(Keyword::OUT) { - Some(ArgMode::Out) - } else if self.parse_keyword(Keyword::INOUT) { - Some(ArgMode::InOut) - } else { + /// Parse `CREATE FUNCTION` for [Postgres] + /// + /// [Postgres]: https://www.postgresql.org/docs/15/sql-createfunction.html + fn parse_postgres_create_function( + &mut self, + or_replace: bool, + temporary: bool, + ) -> Result { + let name = self.parse_object_name(false)?; + self.expect_token(&Token::LParen)?; + let args = if self.consume_token(&Token::RParen) { + self.prev_token(); None + } else { + Some(self.parse_comma_separated(Parser::parse_function_arg)?) }; - // parse: [ argname ] argtype - let mut name = None; - let mut data_type = self.parse_data_type()?; - if let DataType::Custom(n, _) = &data_type { - // the first token is actually a name - name = Some(n.0[0].clone()); - data_type = self.parse_data_type()?; - } + self.expect_token(&Token::RParen)?; - let default_expr = if self.parse_keyword(Keyword::DEFAULT) || self.consume_token(&Token::Eq) - { - Some(self.parse_expr()?) + let return_type = if self.parse_keyword(Keyword::RETURNS) { + Some(self.parse_data_type()?) } else { None }; - Ok(OperateFunctionArg { - mode, - name, - data_type, - default_expr, - }) - } - fn parse_create_function_body(&mut self) -> Result { - let mut body = CreateFunctionBody::default(); + #[derive(Default)] + struct Body { + language: Option, + behavior: Option, + function_body: Option, + called_on_null: Option, + parallel: Option, + } + let mut body = Body::default(); loop { fn ensure_not_set(field: &Option, name: &str) -> Result<(), ParserError> { if field.is_some() { @@ -3689,8 +3647,10 @@ impl<'a> Parser<'a> { Ok(()) } if self.parse_keyword(Keyword::AS) { - ensure_not_set(&body.as_, "AS")?; - body.as_ = Some(self.parse_function_definition()?); + ensure_not_set(&body.function_body, "AS")?; + body.function_body = Some(CreateFunctionBody::AsBeforeOptions( + self.parse_create_function_body_string()?, + )); } else if self.parse_keyword(Keyword::LANGUAGE) { ensure_not_set(&body.language, "LANGUAGE")?; body.language = Some(self.parse_identifier(false)?); @@ -3744,12 +3704,186 @@ impl<'a> Parser<'a> { return self.expected("one of UNSAFE | RESTRICTED | SAFE", self.peek_token()); } } else if self.parse_keyword(Keyword::RETURN) { - ensure_not_set(&body.return_, "RETURN")?; - body.return_ = Some(self.parse_expr()?); + ensure_not_set(&body.function_body, "RETURN")?; + body.function_body = Some(CreateFunctionBody::Return(self.parse_expr()?)); + } else { + break; + } + } + + Ok(Statement::CreateFunction { + or_replace, + temporary, + name, + args, + return_type, + behavior: body.behavior, + called_on_null: body.called_on_null, + parallel: body.parallel, + language: body.language, + function_body: body.function_body, + if_not_exists: false, + using: None, + determinism_specifier: None, + options: None, + remote_connection: None, + }) + } + + /// Parse `CREATE FUNCTION` for [Hive] + /// + /// [Hive]: https://cwiki.apache.org/confluence/display/hive/languagemanual+ddl#LanguageManualDDL-Create/Drop/ReloadFunction + fn parse_hive_create_function( + &mut self, + or_replace: bool, + temporary: bool, + ) -> Result { + let name = self.parse_object_name(false)?; + self.expect_keyword(Keyword::AS)?; + + let as_ = self.parse_create_function_body_string()?; + let using = self.parse_optional_create_function_using()?; + + Ok(Statement::CreateFunction { + or_replace, + temporary, + name, + function_body: Some(CreateFunctionBody::AsBeforeOptions(as_)), + using, + if_not_exists: false, + args: None, + return_type: None, + behavior: None, + called_on_null: None, + parallel: None, + language: None, + determinism_specifier: None, + options: None, + remote_connection: None, + }) + } + + /// Parse `CREATE FUNCTION` for [BigQuery] + /// + /// [BigQuery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement + fn parse_bigquery_create_function( + &mut self, + or_replace: bool, + temporary: bool, + ) -> Result { + let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); + let name = self.parse_object_name(false)?; + + let parse_function_param = + |parser: &mut Parser| -> Result { + let name = parser.parse_identifier(false)?; + let data_type = parser.parse_data_type()?; + Ok(OperateFunctionArg { + mode: None, + name: Some(name), + data_type, + default_expr: None, + }) + }; + self.expect_token(&Token::LParen)?; + let args = self.parse_comma_separated0(parse_function_param)?; + self.expect_token(&Token::RParen)?; + + let return_type = if self.parse_keyword(Keyword::RETURNS) { + Some(self.parse_data_type()?) + } else { + None + }; + + let determinism_specifier = if self.parse_keyword(Keyword::DETERMINISTIC) { + Some(FunctionDeterminismSpecifier::Deterministic) + } else if self.parse_keywords(&[Keyword::NOT, Keyword::DETERMINISTIC]) { + Some(FunctionDeterminismSpecifier::NotDeterministic) + } else { + None + }; + + let language = if self.parse_keyword(Keyword::LANGUAGE) { + Some(self.parse_identifier(false)?) + } else { + None + }; + + let remote_connection = + if self.parse_keywords(&[Keyword::REMOTE, Keyword::WITH, Keyword::CONNECTION]) { + Some(self.parse_object_name(false)?) } else { - return Ok(body); + None + }; + + // `OPTIONS` may come before of after the function body but + // may be specified at most once. + let mut options = self.maybe_parse_options(Keyword::OPTIONS)?; + + let function_body = if remote_connection.is_none() { + self.expect_keyword(Keyword::AS)?; + let expr = self.parse_expr()?; + if options.is_none() { + options = self.maybe_parse_options(Keyword::OPTIONS)?; + Some(CreateFunctionBody::AsBeforeOptions(expr)) + } else { + Some(CreateFunctionBody::AsAfterOptions(expr)) } + } else { + None + }; + + Ok(Statement::CreateFunction { + or_replace, + temporary, + if_not_exists, + name, + args: Some(args), + return_type, + function_body, + language, + determinism_specifier, + options, + remote_connection, + using: None, + behavior: None, + called_on_null: None, + parallel: None, + }) + } + + fn parse_function_arg(&mut self) -> Result { + let mode = if self.parse_keyword(Keyword::IN) { + Some(ArgMode::In) + } else if self.parse_keyword(Keyword::OUT) { + Some(ArgMode::Out) + } else if self.parse_keyword(Keyword::INOUT) { + Some(ArgMode::InOut) + } else { + None + }; + + // parse: [ argname ] argtype + let mut name = None; + let mut data_type = self.parse_data_type()?; + if let DataType::Custom(n, _) = &data_type { + // the first token is actually a name + name = Some(n.0[0].clone()); + data_type = self.parse_data_type()?; } + + let default_expr = if self.parse_keyword(Keyword::DEFAULT) || self.consume_token(&Token::Eq) + { + Some(self.parse_expr()?) + } else { + None + }; + Ok(OperateFunctionArg { + mode, + name, + data_type, + default_expr, + }) } pub fn parse_create_macro( @@ -3893,12 +4027,9 @@ impl<'a> Parser<'a> { }; if dialect_of!(self is BigQueryDialect | GenericDialect) { - if let Token::Word(word) = self.peek_token().token { - if word.keyword == Keyword::OPTIONS { - let opts = self.parse_options(Keyword::OPTIONS)?; - if !opts.is_empty() { - options = CreateTableOptions::Options(opts); - } + if let Some(opts) = self.maybe_parse_options(Keyword::OPTIONS)? { + if !opts.is_empty() { + options = CreateTableOptions::Options(opts); } }; } @@ -5680,6 +5811,18 @@ impl<'a> Parser<'a> { } } + pub fn maybe_parse_options( + &mut self, + keyword: Keyword, + ) -> Result>, ParserError> { + if let Token::Word(word) = self.peek_token().token { + if word.keyword == keyword { + return Ok(Some(self.parse_options(keyword)?)); + } + }; + Ok(None) + } + pub fn parse_options(&mut self, keyword: Keyword) -> Result, ParserError> { if self.parse_keyword(keyword) { self.expect_token(&Token::LParen)?; @@ -6521,19 +6664,22 @@ impl<'a> Parser<'a> { } } - pub fn parse_function_definition(&mut self) -> Result { + /// Parse the body of a `CREATE FUNCTION` specified as a string. + /// e.g. `CREATE FUNCTION ... AS $$ body $$`. + fn parse_create_function_body_string(&mut self) -> Result { let peek_token = self.peek_token(); match peek_token.token { - Token::DollarQuotedString(value) if dialect_of!(self is PostgreSqlDialect | GenericDialect) => + Token::DollarQuotedString(s) if dialect_of!(self is PostgreSqlDialect | GenericDialect) => { self.next_token(); - Ok(FunctionDefinition::DoubleDollarDef(value.value)) + Ok(Expr::Value(Value::DollarQuotedString(s))) } - _ => Ok(FunctionDefinition::SingleQuotedDef( + _ => Ok(Expr::Value(Value::SingleQuotedString( self.parse_literal_string()?, - )), + ))), } } + /// Parse a literal string pub fn parse_literal_string(&mut self) -> Result { let next_token = self.next_token(); diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index 179755e0c1..0bb91cb361 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -1950,6 +1950,145 @@ fn parse_map_access_expr() { bigquery().verified_only_select(sql); } +#[test] +fn test_bigquery_create_function() { + let sql = concat!( + "CREATE OR REPLACE TEMPORARY FUNCTION ", + "project1.mydataset.myfunction(x FLOAT64) ", + "RETURNS FLOAT64 ", + "OPTIONS(x = 'y') ", + "AS 42" + ); + + let stmt = bigquery().verified_stmt(sql); + assert_eq!( + stmt, + Statement::CreateFunction { + or_replace: true, + temporary: true, + if_not_exists: false, + name: ObjectName(vec![ + Ident::new("project1"), + Ident::new("mydataset"), + Ident::new("myfunction"), + ]), + args: Some(vec![OperateFunctionArg::with_name("x", DataType::Float64),]), + return_type: Some(DataType::Float64), + function_body: Some(CreateFunctionBody::AsAfterOptions(Expr::Value(number( + "42" + )))), + options: Some(vec![SqlOption { + name: Ident::new("x"), + value: Expr::Value(Value::SingleQuotedString("y".into())), + }]), + behavior: None, + using: None, + language: None, + determinism_specifier: None, + remote_connection: None, + called_on_null: None, + parallel: None, + } + ); + + let sqls = [ + // Arbitrary Options expressions. + concat!( + "CREATE OR REPLACE TEMPORARY FUNCTION ", + "myfunction(a FLOAT64, b INT64, c STRING) ", + "RETURNS ARRAY ", + "OPTIONS(a = [1, 2], b = 'two', c = [('k1', 'v1'), ('k2', 'v2')]) ", + "AS ((SELECT 1 FROM mytable))" + ), + // Options after body. + concat!( + "CREATE OR REPLACE TEMPORARY FUNCTION ", + "myfunction(a FLOAT64, b INT64, c STRING) ", + "RETURNS ARRAY ", + "AS ((SELECT 1 FROM mytable)) ", + "OPTIONS(a = [1, 2], b = 'two', c = [('k1', 'v1'), ('k2', 'v2')])", + ), + // IF NOT EXISTS + concat!( + "CREATE OR REPLACE TEMPORARY FUNCTION IF NOT EXISTS ", + "myfunction(a FLOAT64, b INT64, c STRING) ", + "RETURNS ARRAY ", + "OPTIONS(a = [1, 2]) ", + "AS ((SELECT 1 FROM mytable))" + ), + // No return type. + concat!( + "CREATE OR REPLACE TEMPORARY FUNCTION ", + "myfunction(a FLOAT64, b INT64, c STRING) ", + "OPTIONS(a = [1, 2]) ", + "AS ((SELECT 1 FROM mytable))" + ), + // With language - body after options + concat!( + "CREATE OR REPLACE TEMPORARY FUNCTION ", + "myfunction(a FLOAT64, b INT64, c STRING) ", + "DETERMINISTIC ", + "LANGUAGE js ", + "OPTIONS(a = [1, 2]) ", + "AS \"console.log('hello');\"" + ), + // With language - body before options + concat!( + "CREATE OR REPLACE TEMPORARY FUNCTION ", + "myfunction(a FLOAT64, b INT64, c STRING) ", + "NOT DETERMINISTIC ", + "LANGUAGE js ", + "AS \"console.log('hello');\" ", + "OPTIONS(a = [1, 2])", + ), + // Remote + concat!( + "CREATE OR REPLACE TEMPORARY FUNCTION ", + "myfunction(a FLOAT64, b INT64, c STRING) ", + "RETURNS INT64 ", + "REMOTE WITH CONNECTION us.myconnection ", + "OPTIONS(a = [1, 2])", + ), + ]; + for sql in sqls { + bigquery().verified_stmt(sql); + } + + let error_sqls = [ + ( + concat!( + "CREATE TEMPORARY FUNCTION myfunction() ", + "OPTIONS(a = [1, 2]) ", + "AS ((SELECT 1 FROM mytable)) ", + "OPTIONS(a = [1, 2])", + ), + "Expected end of statement, found: OPTIONS", + ), + ( + concat!( + "CREATE TEMPORARY FUNCTION myfunction() ", + "IMMUTABLE ", + "AS ((SELECT 1 FROM mytable)) ", + ), + "Expected AS, found: IMMUTABLE", + ), + ( + concat!( + "CREATE TEMPORARY FUNCTION myfunction() ", + "AS \"console.log('hello');\" ", + "LANGUAGE js ", + ), + "Expected end of statement, found: LANGUAGE", + ), + ]; + for (sql, error) in error_sqls { + assert_eq!( + ParserError::ParserError(error.to_owned()), + bigquery().parse_sql_statements(sql).unwrap_err() + ); + } +} + #[test] fn test_bigquery_trim() { let real_sql = r#"SELECT customer_id, TRIM(item_price_id, '"', "a") AS item_price_id FROM models_staging.subscriptions"#; diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index 78db48ec25..b661b6cd3e 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -17,8 +17,8 @@ use sqlparser::ast::{ CreateFunctionBody, CreateFunctionUsing, Expr, Function, FunctionArgumentList, - FunctionArguments, FunctionDefinition, Ident, ObjectName, OneOrManyWithParens, SelectItem, - Statement, TableFactor, UnaryOperator, + FunctionArguments, Ident, ObjectName, OneOrManyWithParens, SelectItem, Statement, TableFactor, + UnaryOperator, Value, }; use sqlparser::dialect::{GenericDialect, HiveDialect, MsSqlDialect}; use sqlparser::parser::{ParserError, ParserOptions}; @@ -296,22 +296,23 @@ fn parse_create_function() { Statement::CreateFunction { temporary, name, - params, + function_body, + using, .. } => { assert!(temporary); assert_eq!(name.to_string(), "mydb.myfunc"); assert_eq!( - params, - CreateFunctionBody { - as_: Some(FunctionDefinition::SingleQuotedDef( - "org.random.class.Name".to_string() - )), - using: Some(CreateFunctionUsing::Jar( - "hdfs://somewhere.com:8020/very/far".to_string() - )), - ..Default::default() - } + function_body, + Some(CreateFunctionBody::AsBeforeOptions(Expr::Value( + Value::SingleQuotedString("org.random.class.Name".to_string()) + ))) + ); + assert_eq!( + using, + Some(CreateFunctionUsing::Jar( + "hdfs://somewhere.com:8020/very/far".to_string() + )), ) } _ => unreachable!(), diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 5c3b653dda..ffcd783f06 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -3285,16 +3285,18 @@ fn parse_create_function() { OperateFunctionArg::unnamed(DataType::Integer(None)), ]), return_type: Some(DataType::Integer(None)), - params: CreateFunctionBody { - language: Some("SQL".into()), - behavior: Some(FunctionBehavior::Immutable), - called_on_null: Some(FunctionCalledOnNull::Strict), - parallel: Some(FunctionParallel::Safe), - as_: Some(FunctionDefinition::SingleQuotedDef( - "select $1 + $2;".into() - )), - ..Default::default() - }, + language: Some("SQL".into()), + behavior: Some(FunctionBehavior::Immutable), + called_on_null: Some(FunctionCalledOnNull::Strict), + parallel: Some(FunctionParallel::Safe), + function_body: Some(CreateFunctionBody::AsBeforeOptions(Expr::Value( + Value::SingleQuotedString("select $1 + $2;".into()) + ))), + if_not_exists: false, + using: None, + determinism_specifier: None, + options: None, + remote_connection: None, } ); } From c2d84f568371d249d35c673432604a4b9a7988ba Mon Sep 17 00:00:00 2001 From: Joey Hain Date: Thu, 30 May 2024 09:20:16 -0700 Subject: [PATCH 05/53] Support for Snowflake dynamic pivot (#1280) --- src/ast/mod.rs | 9 +++--- src/ast/query.rs | 49 +++++++++++++++++++++++++++++---- src/parser/mod.rs | 32 ++++++++++++++++++++-- tests/sqlparser_common.rs | 10 ++++--- tests/sqlparser_snowflake.rs | 53 ++++++++++++++++++++++++++++++++++++ 5 files changed, 138 insertions(+), 15 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 1227ce9357..ed2cba5e3e 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -46,10 +46,11 @@ pub use self::query::{ GroupByExpr, IdentWithAlias, IlikeSelectItem, Join, JoinConstraint, JoinOperator, JsonTableColumn, JsonTableColumnErrorHandling, LateralView, LockClause, LockType, MatchRecognizePattern, MatchRecognizeSymbol, Measure, NamedWindowDefinition, NamedWindowExpr, - NonBlock, Offset, OffsetRows, OrderByExpr, Query, RenameSelectItem, RepetitionQuantifier, - ReplaceSelectElement, ReplaceSelectItem, RowsPerMatch, Select, SelectInto, SelectItem, SetExpr, - SetOperator, SetQuantifier, SymbolDefinition, Table, TableAlias, TableFactor, TableVersion, - TableWithJoins, Top, TopQuantity, ValueTableMode, Values, WildcardAdditionalOptions, With, + NonBlock, Offset, OffsetRows, OrderByExpr, PivotValueSource, Query, RenameSelectItem, + RepetitionQuantifier, ReplaceSelectElement, ReplaceSelectItem, RowsPerMatch, Select, + SelectInto, SelectItem, SetExpr, SetOperator, SetQuantifier, SymbolDefinition, Table, + TableAlias, TableFactor, TableVersion, TableWithJoins, Top, TopQuantity, ValueTableMode, + Values, WildcardAdditionalOptions, With, }; pub use self::value::{ escape_double_quote_string, escape_quoted_string, DateTimeField, DollarQuotedString, diff --git a/src/ast/query.rs b/src/ast/query.rs index 07863bd7ca..c0fa738cd3 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -957,7 +957,8 @@ pub enum TableFactor { table: Box, aggregate_functions: Vec, // Function expression value_column: Vec, - pivot_values: Vec, + value_source: PivotValueSource, + default_on_null: Option, alias: Option, }, /// An UNPIVOT operation on a table. @@ -998,6 +999,41 @@ pub enum TableFactor { }, } +/// The source of values in a `PIVOT` operation. +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum PivotValueSource { + /// Pivot on a static list of values. + /// + /// See . + List(Vec), + /// Pivot on all distinct values of the pivot column. + /// + /// See . + Any(Vec), + /// Pivot on all values returned by a subquery. + /// + /// See . + Subquery(Query), +} + +impl fmt::Display for PivotValueSource { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + PivotValueSource::List(values) => write!(f, "{}", display_comma_separated(values)), + PivotValueSource::Any(order_by) => { + write!(f, "ANY")?; + if !order_by.is_empty() { + write!(f, " ORDER BY {}", display_comma_separated(order_by))?; + } + Ok(()) + } + PivotValueSource::Subquery(query) => write!(f, "{query}"), + } + } +} + /// An item in the `MEASURES` subclause of a `MATCH_RECOGNIZE` operation. /// /// See . @@ -1324,17 +1360,20 @@ impl fmt::Display for TableFactor { table, aggregate_functions, value_column, - pivot_values, + value_source, + default_on_null, alias, } => { write!( f, - "{} PIVOT({} FOR {} IN ({}))", - table, + "{table} PIVOT({} FOR {} IN ({value_source})", display_comma_separated(aggregate_functions), Expr::CompoundIdentifier(value_column.to_vec()), - display_comma_separated(pivot_values) )?; + if let Some(expr) = default_on_null { + write!(f, " DEFAULT ON NULL ({expr})")?; + } + write!(f, ")")?; if alias.is_some() { write!(f, " AS {}", alias.as_ref().unwrap())?; } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index f88f2c1899..ea110ec34d 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -9191,16 +9191,44 @@ impl<'a> Parser<'a> { self.expect_keyword(Keyword::FOR)?; let value_column = self.parse_object_name(false)?.0; self.expect_keyword(Keyword::IN)?; + self.expect_token(&Token::LParen)?; - let pivot_values = self.parse_comma_separated(Self::parse_expr_with_alias)?; + let value_source = if self.parse_keyword(Keyword::ANY) { + let order_by = if self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) { + self.parse_comma_separated(Parser::parse_order_by_expr)? + } else { + vec![] + }; + PivotValueSource::Any(order_by) + } else if self + .parse_one_of_keywords(&[Keyword::SELECT, Keyword::WITH]) + .is_some() + { + self.prev_token(); + PivotValueSource::Subquery(self.parse_query()?) + } else { + PivotValueSource::List(self.parse_comma_separated(Self::parse_expr_with_alias)?) + }; self.expect_token(&Token::RParen)?; + + let default_on_null = + if self.parse_keywords(&[Keyword::DEFAULT, Keyword::ON, Keyword::NULL]) { + self.expect_token(&Token::LParen)?; + let expr = self.parse_expr()?; + self.expect_token(&Token::RParen)?; + Some(expr) + } else { + None + }; + self.expect_token(&Token::RParen)?; let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; Ok(TableFactor::Pivot { table: Box::new(table), aggregate_functions, value_column, - pivot_values, + value_source, + default_on_null, alias, }) } diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index f8b7d02657..5ae867278c 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -8618,7 +8618,7 @@ fn parse_pivot_table() { expected_function("c", Some("u")), ], value_column: vec![Ident::new("a"), Ident::new("MONTH")], - pivot_values: vec![ + value_source: PivotValueSource::List(vec![ ExprWithAlias { expr: Expr::Value(number("1")), alias: Some(Ident::new("x")) @@ -8631,7 +8631,8 @@ fn parse_pivot_table() { expr: Expr::Identifier(Ident::new("three")), alias: Some(Ident::new("y")) }, - ], + ]), + default_on_null: None, alias: Some(TableAlias { name: Ident { value: "p".to_string(), @@ -8769,7 +8770,7 @@ fn parse_pivot_unpivot_table() { alias: None }], value_column: vec![Ident::new("year")], - pivot_values: vec![ + value_source: PivotValueSource::List(vec![ ExprWithAlias { expr: Expr::Value(Value::SingleQuotedString("population_2000".to_string())), alias: None @@ -8778,7 +8779,8 @@ fn parse_pivot_unpivot_table() { expr: Expr::Value(Value::SingleQuotedString("population_2010".to_string())), alias: None }, - ], + ]), + default_on_null: None, alias: Some(TableAlias { name: Ident::new("p"), columns: vec![] diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index 30f2cc601d..e1dba252d6 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -1585,3 +1585,56 @@ fn first_value_ignore_nulls() { "FROM some_table" )); } + +#[test] +fn test_pivot() { + // pivot on static list of values with default + #[rustfmt::skip] + snowflake().verified_only_select(concat!( + "SELECT * ", + "FROM quarterly_sales ", + "PIVOT(SUM(amount) ", + "FOR quarter IN (", + "'2023_Q1', ", + "'2023_Q2', ", + "'2023_Q3', ", + "'2023_Q4', ", + "'2024_Q1') ", + "DEFAULT ON NULL (0)", + ") ", + "ORDER BY empid", + )); + + // dynamic pivot from subquery + #[rustfmt::skip] + snowflake().verified_only_select(concat!( + "SELECT * ", + "FROM quarterly_sales ", + "PIVOT(SUM(amount) FOR quarter IN (", + "SELECT DISTINCT quarter ", + "FROM ad_campaign_types_by_quarter ", + "WHERE television = true ", + "ORDER BY quarter)", + ") ", + "ORDER BY empid", + )); + + // dynamic pivot on any value (with order by) + #[rustfmt::skip] + snowflake().verified_only_select(concat!( + "SELECT * ", + "FROM quarterly_sales ", + "PIVOT(SUM(amount) FOR quarter IN (ANY ORDER BY quarter)) ", + "ORDER BY empid", + )); + + // dynamic pivot on any value (without order by) + #[rustfmt::skip] + snowflake().verified_only_select(concat!( + "SELECT * ", + "FROM sales_data ", + "PIVOT(SUM(total_sales) FOR fis_quarter IN (ANY)) ", + "WHERE fis_year IN (2023) ", + "ORDER BY region", + )); +} From 029a9996459e23db987cbe521f86d1a9ec1315ea Mon Sep 17 00:00:00 2001 From: Simon Sawert Date: Thu, 30 May 2024 18:21:39 +0200 Subject: [PATCH 06/53] Add support for view comments for Snowflake (#1287) Co-authored-by: Joey Hain --- src/ast/mod.rs | 11 ++++++++ src/parser/mod.rs | 14 ++++++++++ tests/sqlparser_bigquery.rs | 2 ++ tests/sqlparser_common.rs | 14 ++++++++++ tests/sqlparser_snowflake.rs | 52 +++++++++++++++++++++++++++++++++++- tests/sqlparser_sqlite.rs | 2 ++ 6 files changed, 94 insertions(+), 1 deletion(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index ed2cba5e3e..ee39294a37 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -1959,6 +1959,9 @@ pub enum Statement { query: Box, options: CreateTableOptions, cluster_by: Vec, + /// Snowflake: Views can have comments in Snowflake. + /// + comment: Option, /// if true, has RedShift [`WITH NO SCHEMA BINDING`] clause with_no_schema_binding: bool, /// if true, has SQLite `IF NOT EXISTS` clause @@ -3323,6 +3326,7 @@ impl fmt::Display for Statement { materialized, options, cluster_by, + comment, with_no_schema_binding, if_not_exists, temporary, @@ -3336,6 +3340,13 @@ impl fmt::Display for Statement { temporary = if *temporary { "TEMPORARY " } else { "" }, if_not_exists = if *if_not_exists { "IF NOT EXISTS " } else { "" } )?; + if let Some(comment) = comment { + write!( + f, + " COMMENT = '{}'", + value::escape_single_quote_string(comment) + )?; + } if matches!(options, CreateTableOptions::With(_)) { write!(f, " {options}")?; } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index ea110ec34d..f3e42de00c 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -4034,6 +4034,19 @@ impl<'a> Parser<'a> { }; } + let comment = if dialect_of!(self is SnowflakeDialect | GenericDialect) + && self.parse_keyword(Keyword::COMMENT) + { + self.expect_token(&Token::Eq)?; + let next_token = self.next_token(); + match next_token.token { + Token::SingleQuotedString(str) => Some(str), + _ => self.expected("string literal", next_token)?, + } + } else { + None + }; + self.expect_keyword(Keyword::AS)?; let query = self.parse_boxed_query()?; // Optional `WITH [ CASCADED | LOCAL ] CHECK OPTION` is widely supported here. @@ -4054,6 +4067,7 @@ impl<'a> Parser<'a> { or_replace, options, cluster_by, + comment, with_no_schema_binding, if_not_exists, temporary, diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index 0bb91cb361..ea40f67d19 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -309,6 +309,7 @@ fn parse_create_view_if_not_exists() { materialized, options, cluster_by, + comment, with_no_schema_binding: late_binding, if_not_exists, temporary, @@ -320,6 +321,7 @@ fn parse_create_view_if_not_exists() { assert!(!or_replace); assert_eq!(options, CreateTableOptions::None); assert_eq!(cluster_by, vec![]); + assert!(comment.is_none()); assert!(!late_binding); assert!(if_not_exists); assert!(!temporary); diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 5ae867278c..c475388950 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -6251,6 +6251,7 @@ fn parse_create_view() { materialized, options, cluster_by, + comment, with_no_schema_binding: late_binding, if_not_exists, temporary, @@ -6262,6 +6263,7 @@ fn parse_create_view() { assert!(!or_replace); assert_eq!(options, CreateTableOptions::None); assert_eq!(cluster_by, vec![]); + assert!(comment.is_none()); assert!(!late_binding); assert!(!if_not_exists); assert!(!temporary); @@ -6305,6 +6307,7 @@ fn parse_create_view_with_columns() { query, materialized, cluster_by, + comment, with_no_schema_binding: late_binding, if_not_exists, temporary, @@ -6325,6 +6328,7 @@ fn parse_create_view_with_columns() { assert!(!materialized); assert!(!or_replace); assert_eq!(cluster_by, vec![]); + assert!(comment.is_none()); assert!(!late_binding); assert!(!if_not_exists); assert!(!temporary); @@ -6345,6 +6349,7 @@ fn parse_create_view_temporary() { materialized, options, cluster_by, + comment, with_no_schema_binding: late_binding, if_not_exists, temporary, @@ -6356,6 +6361,7 @@ fn parse_create_view_temporary() { assert!(!or_replace); assert_eq!(options, CreateTableOptions::None); assert_eq!(cluster_by, vec![]); + assert!(comment.is_none()); assert!(!late_binding); assert!(!if_not_exists); assert!(temporary); @@ -6376,6 +6382,7 @@ fn parse_create_or_replace_view() { query, materialized, cluster_by, + comment, with_no_schema_binding: late_binding, if_not_exists, temporary, @@ -6387,6 +6394,7 @@ fn parse_create_or_replace_view() { assert!(!materialized); assert!(or_replace); assert_eq!(cluster_by, vec![]); + assert!(comment.is_none()); assert!(!late_binding); assert!(!if_not_exists); assert!(!temporary); @@ -6411,6 +6419,7 @@ fn parse_create_or_replace_materialized_view() { query, materialized, cluster_by, + comment, with_no_schema_binding: late_binding, if_not_exists, temporary, @@ -6422,6 +6431,7 @@ fn parse_create_or_replace_materialized_view() { assert!(materialized); assert!(or_replace); assert_eq!(cluster_by, vec![]); + assert!(comment.is_none()); assert!(!late_binding); assert!(!if_not_exists); assert!(!temporary); @@ -6442,6 +6452,7 @@ fn parse_create_materialized_view() { materialized, options, cluster_by, + comment, with_no_schema_binding: late_binding, if_not_exists, temporary, @@ -6453,6 +6464,7 @@ fn parse_create_materialized_view() { assert_eq!(options, CreateTableOptions::None); assert!(!or_replace); assert_eq!(cluster_by, vec![]); + assert!(comment.is_none()); assert!(!late_binding); assert!(!if_not_exists); assert!(!temporary); @@ -6473,6 +6485,7 @@ fn parse_create_materialized_view_with_cluster_by() { materialized, options, cluster_by, + comment, with_no_schema_binding: late_binding, if_not_exists, temporary, @@ -6484,6 +6497,7 @@ fn parse_create_materialized_view_with_cluster_by() { assert_eq!(options, CreateTableOptions::None); assert!(!or_replace); assert_eq!(cluster_by, vec![Ident::new("foo")]); + assert!(comment.is_none()); assert!(!late_binding); assert!(!if_not_exists); assert!(!temporary); diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index e1dba252d6..c11f609935 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -18,7 +18,7 @@ use sqlparser::ast::helpers::stmt_data_loading::{ DataLoadingOption, DataLoadingOptionType, StageLoadSelectItem, }; use sqlparser::ast::*; -use sqlparser::dialect::{GenericDialect, SnowflakeDialect}; +use sqlparser::dialect::{Dialect, GenericDialect, SnowflakeDialect}; use sqlparser::parser::{ParserError, ParserOptions}; use sqlparser::tokenizer::*; use test_utils::*; @@ -91,6 +91,56 @@ fn test_snowflake_single_line_tokenize() { assert_eq!(expected, tokens); } +#[test] +fn parse_sf_create_or_replace_view_with_comment_missing_equal() { + assert!(snowflake_and_generic() + .parse_sql_statements("CREATE OR REPLACE VIEW v COMMENT = 'hello, world' AS SELECT 1") + .is_ok()); + + assert!(snowflake_and_generic() + .parse_sql_statements("CREATE OR REPLACE VIEW v COMMENT 'hello, world' AS SELECT 1") + .is_err()); +} + +#[test] +fn parse_sf_create_or_replace_with_comment_for_snowflake() { + let sql = "CREATE OR REPLACE VIEW v COMMENT = 'hello, world' AS SELECT 1"; + let dialect = test_utils::TestedDialects { + dialects: vec![Box::new(SnowflakeDialect {}) as Box], + options: None, + }; + + match dialect.verified_stmt(sql) { + Statement::CreateView { + name, + columns, + or_replace, + options, + query, + materialized, + cluster_by, + comment, + with_no_schema_binding: late_binding, + if_not_exists, + temporary, + } => { + assert_eq!("v", name.to_string()); + assert_eq!(columns, vec![]); + assert_eq!(options, CreateTableOptions::None); + assert_eq!("SELECT 1", query.to_string()); + assert!(!materialized); + assert!(or_replace); + assert_eq!(cluster_by, vec![]); + assert!(comment.is_some()); + assert_eq!(comment.expect("expected comment"), "hello, world"); + assert!(!late_binding); + assert!(!if_not_exists); + assert!(!temporary); + } + _ => unreachable!(), + } +} + #[test] fn test_sf_derived_table_in_parenthesis() { // Nesting a subquery in an extra set of parentheses is non-standard, diff --git a/tests/sqlparser_sqlite.rs b/tests/sqlparser_sqlite.rs index 5742754c06..fe5346f149 100644 --- a/tests/sqlparser_sqlite.rs +++ b/tests/sqlparser_sqlite.rs @@ -167,6 +167,7 @@ fn parse_create_view_temporary_if_not_exists() { materialized, options, cluster_by, + comment, with_no_schema_binding: late_binding, if_not_exists, temporary, @@ -178,6 +179,7 @@ fn parse_create_view_temporary_if_not_exists() { assert!(!or_replace); assert_eq!(options, CreateTableOptions::None); assert_eq!(cluster_by, vec![]); + assert!(comment.is_none()); assert!(!late_binding); assert!(if_not_exists); assert!(temporary); From 375742d1fa2e3255078dd1c9ced0decc80d270e4 Mon Sep 17 00:00:00 2001 From: Aleksei Piianin Date: Thu, 30 May 2024 18:24:12 +0200 Subject: [PATCH 07/53] ClickHouse: create view with fields and data types (#1292) --- src/ast/ddl.rs | 7 +++++- src/parser/mod.rs | 11 +++++++++- tests/sqlparser_bigquery.rs | 2 ++ tests/sqlparser_clickhouse.rs | 41 +++++++++++++++++++++++++++++++++++ tests/sqlparser_common.rs | 1 + 5 files changed, 60 insertions(+), 2 deletions(-) diff --git a/src/ast/ddl.rs b/src/ast/ddl.rs index de514550be..9c30999ab1 100644 --- a/src/ast/ddl.rs +++ b/src/ast/ddl.rs @@ -815,7 +815,7 @@ impl fmt::Display for ColumnDef { /// /// Syntax /// ```markdown -/// [OPTIONS(option, ...)] +/// [data_type][OPTIONS(option, ...)] /// /// option: = /// ``` @@ -824,18 +824,23 @@ impl fmt::Display for ColumnDef { /// ```sql /// name /// age OPTIONS(description = "age column", tag = "prod") +/// created_at DateTime64 /// ``` #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct ViewColumnDef { pub name: Ident, + pub data_type: Option, pub options: Option>, } impl fmt::Display for ViewColumnDef { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "{}", self.name)?; + if let Some(data_type) = self.data_type.as_ref() { + write!(f, " {}", data_type)?; + } if let Some(options) = self.options.as_ref() { write!( f, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index f3e42de00c..fef307106d 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -7282,7 +7282,16 @@ impl<'a> Parser<'a> { } else { None }; - Ok(ViewColumnDef { name, options }) + let data_type = if dialect_of!(self is ClickHouseDialect) { + Some(self.parse_data_type()?) + } else { + None + }; + Ok(ViewColumnDef { + name, + data_type, + options, + }) } /// Parse a parenthesized comma-separated list of unqualified, possibly quoted identifiers diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index ea40f67d19..1cec15c30c 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -261,10 +261,12 @@ fn parse_create_view_with_options() { vec![ ViewColumnDef { name: Ident::new("name"), + data_type: None, options: None, }, ViewColumnDef { name: Ident::new("age"), + data_type: None, options: Some(vec![SqlOption { name: Ident::new("description"), value: Expr::Value(Value::DoubleQuotedString("field age".to_string())), diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index 7150a94890..a693936bc7 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -220,6 +220,47 @@ fn parse_create_table() { ); } +#[test] +fn parse_create_view_with_fields_data_types() { + match clickhouse().verified_stmt(r#"CREATE VIEW v (i "int", f "String") AS SELECT * FROM t"#) { + Statement::CreateView { name, columns, .. } => { + assert_eq!(name, ObjectName(vec!["v".into()])); + assert_eq!( + columns, + vec![ + ViewColumnDef { + name: "i".into(), + data_type: Some(DataType::Custom( + ObjectName(vec![Ident { + value: "int".into(), + quote_style: Some('"') + }]), + vec![] + )), + options: None + }, + ViewColumnDef { + name: "f".into(), + data_type: Some(DataType::Custom( + ObjectName(vec![Ident { + value: "String".into(), + quote_style: Some('"') + }]), + vec![] + )), + options: None + }, + ] + ); + } + _ => unreachable!(), + } + + clickhouse() + .parse_sql_statements(r#"CREATE VIEW v (i, f) AS SELECT * FROM t"#) + .expect_err("CREATE VIEW with fields and without data types should be invalid"); +} + #[test] fn parse_double_equal() { clickhouse().one_statement_parses_to( diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index c475388950..5f2d2cc025 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -6319,6 +6319,7 @@ fn parse_create_view_with_columns() { .into_iter() .map(|name| ViewColumnDef { name, + data_type: None, options: None }) .collect::>() From 80c03f5c6a42c2414d93dc411c013155dea676d4 Mon Sep 17 00:00:00 2001 From: Joey Hain Date: Fri, 31 May 2024 02:45:07 -0700 Subject: [PATCH 08/53] Support for Snowflake ASOF joins (#1288) --- src/ast/query.rs | 17 +++++++++ src/keywords.rs | 2 ++ src/parser/mod.rs | 26 ++++++++++++-- src/test_utils.rs | 14 ++++++++ tests/sqlparser_snowflake.rs | 67 ++++++++++++++++++++++++++++++++++++ 5 files changed, 123 insertions(+), 3 deletions(-) diff --git a/src/ast/query.rs b/src/ast/query.rs index c0fa738cd3..fcd5b970d1 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -1562,6 +1562,15 @@ impl fmt::Display for Join { ), JoinOperator::CrossApply => write!(f, " CROSS APPLY {}", self.relation), JoinOperator::OuterApply => write!(f, " OUTER APPLY {}", self.relation), + JoinOperator::AsOf { + match_condition, + constraint, + } => write!( + f, + " ASOF JOIN {} MATCH_CONDITION ({match_condition}){}", + self.relation, + suffix(constraint) + ), } } } @@ -1587,6 +1596,14 @@ pub enum JoinOperator { CrossApply, /// OUTER APPLY (non-standard) OuterApply, + /// `ASOF` joins are used for joining tables containing time-series data + /// whose timestamp columns do not match exactly. + /// + /// See . + AsOf { + match_condition: Expr, + constraint: JoinConstraint, + }, } #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] diff --git a/src/keywords.rs b/src/keywords.rs index 06086297c4..6c6c642c36 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -91,6 +91,7 @@ define_keywords!( AS, ASC, ASENSITIVE, + ASOF, ASSERT, ASYMMETRIC, AT, @@ -418,6 +419,7 @@ define_keywords!( MATCH, MATCHED, MATCHES, + MATCH_CONDITION, MATCH_RECOGNIZE, MATERIALIZED, MAX, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index fef307106d..123af045ad 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -3196,6 +3196,16 @@ impl<'a> Parser<'a> { Ok(values) } + pub fn parse_parenthesized(&mut self, mut f: F) -> Result + where + F: FnMut(&mut Parser<'a>) -> Result, + { + self.expect_token(&Token::LParen)?; + let res = f(self)?; + self.expect_token(&Token::RParen)?; + Ok(res) + } + /// Parse a comma-separated list of 0+ items accepted by `F` pub fn parse_comma_separated0(&mut self, f: F) -> Result, ParserError> where @@ -8505,6 +8515,18 @@ impl<'a> Parser<'a> { relation: self.parse_table_factor()?, join_operator: JoinOperator::OuterApply, } + } else if self.parse_keyword(Keyword::ASOF) { + self.expect_keyword(Keyword::JOIN)?; + let relation = self.parse_table_factor()?; + self.expect_keyword(Keyword::MATCH_CONDITION)?; + let match_condition = self.parse_parenthesized(Self::parse_expr)?; + Join { + relation, + join_operator: JoinOperator::AsOf { + match_condition, + constraint: self.parse_join_constraint(false)?, + }, + } } else { let natural = self.parse_keyword(Keyword::NATURAL); let peek_keyword = if let Token::Word(w) = self.peek_token().token { @@ -8951,9 +8973,7 @@ impl<'a> Parser<'a> { }; self.expect_keyword(Keyword::PATTERN)?; - self.expect_token(&Token::LParen)?; - let pattern = self.parse_pattern()?; - self.expect_token(&Token::RParen)?; + let pattern = self.parse_parenthesized(Self::parse_pattern)?; self.expect_keyword(Keyword::DEFINE)?; diff --git a/src/test_utils.rs b/src/test_utils.rs index 464366ae49..9af9c80986 100644 --- a/src/test_utils.rs +++ b/src/test_utils.rs @@ -312,6 +312,20 @@ pub fn table(name: impl Into) -> TableFactor { } } +pub fn table_with_alias(name: impl Into, alias: impl Into) -> TableFactor { + TableFactor::Table { + name: ObjectName(vec![Ident::new(name)]), + alias: Some(TableAlias { + name: Ident::new(alias), + columns: vec![], + }), + args: None, + with_hints: vec![], + version: None, + partitions: vec![], + } +} + pub fn join(relation: TableFactor) -> Join { Join { relation, diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index c11f609935..7492802c7e 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -1688,3 +1688,70 @@ fn test_pivot() { "ORDER BY region", )); } + +#[test] +fn asof_joins() { + #[rustfmt::skip] + let query = snowflake_and_generic().verified_only_select(concat!( + "SELECT * ", + "FROM trades_unixtime AS tu ", + "ASOF JOIN quotes_unixtime AS qu ", + "MATCH_CONDITION (tu.trade_time >= qu.quote_time)", + )); + + assert_eq!( + query.from[0], + TableWithJoins { + relation: table_with_alias("trades_unixtime", "tu"), + joins: vec![Join { + relation: table_with_alias("quotes_unixtime", "qu"), + join_operator: JoinOperator::AsOf { + match_condition: Expr::BinaryOp { + left: Box::new(Expr::CompoundIdentifier(vec![ + Ident::new("tu"), + Ident::new("trade_time"), + ])), + op: BinaryOperator::GtEq, + right: Box::new(Expr::CompoundIdentifier(vec![ + Ident::new("qu"), + Ident::new("quote_time"), + ])), + }, + constraint: JoinConstraint::None, + }, + }], + } + ); + + #[rustfmt::skip] + snowflake_and_generic().verified_query(concat!( + "SELECT t.stock_symbol, t.trade_time, t.quantity, q.quote_time, q.price ", + "FROM trades AS t ASOF JOIN quotes AS q ", + "MATCH_CONDITION (t.trade_time >= quote_time) ", + "ON t.stock_symbol = q.stock_symbol ", + "ORDER BY t.stock_symbol", + )); + + #[rustfmt::skip] + snowflake_and_generic().verified_query(concat!( + "SELECT t.stock_symbol, c.company_name, t.trade_time, t.quantity, q.quote_time, q.price ", + "FROM trades AS t ASOF JOIN quotes AS q ", + "MATCH_CONDITION (t.trade_time <= quote_time) ", + "USING(stock_symbol) ", + "JOIN companies AS c ON c.stock_symbol = t.stock_symbol ", + "ORDER BY t.stock_symbol", + )); + + #[rustfmt::skip] + snowflake_and_generic().verified_query(concat!( + "SELECT * ", + "FROM snowtime AS s ", + "ASOF JOIN raintime AS r ", + "MATCH_CONDITION (s.observed >= r.observed) ", + "ON s.state = r.state ", + "ASOF JOIN preciptime AS p ", + "MATCH_CONDITION (s.observed >= p.observed) ", + "ON s.state = p.state ", + "ORDER BY s.observed", + )); +} From afa5f08db9b1f3a4805f21fea6b1e72710cdb138 Mon Sep 17 00:00:00 2001 From: Joey Hain Date: Fri, 31 May 2024 14:38:35 -0700 Subject: [PATCH 09/53] Support for Postgres array slice syntax (#1290) Co-authored-by: Andrew Lamb --- src/ast/mod.rs | 83 +++++++++++++-- src/parser/mod.rs | 94 +++++++++++++--- tests/sqlparser_duckdb.rs | 8 +- tests/sqlparser_postgres.rs | 200 +++++++++++++++++++++++++++++------ tests/sqlparser_snowflake.rs | 30 ++++++ 5 files changed, 355 insertions(+), 60 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index ee39294a37..320dfc60e3 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -679,7 +679,7 @@ pub enum Expr { }, /// Access a map-like object by field (e.g. `column['field']` or `column[4]` /// Note that depending on the dialect, struct like accesses may be - /// parsed as [`ArrayIndex`](Self::ArrayIndex) or [`MapAccess`](Self::MapAccess) + /// parsed as [`Subscript`](Self::Subscript) or [`MapAccess`](Self::MapAccess) /// MapAccess { column: Box, @@ -746,10 +746,10 @@ pub enum Expr { /// ``` /// [1]: https://duckdb.org/docs/sql/data_types/struct#creating-structs Dictionary(Vec), - /// An array index expression e.g. `(ARRAY[1, 2])[1]` or `(current_schemas(FALSE))[1]` - ArrayIndex { - obj: Box, - indexes: Vec, + /// An access of nested data using subscript syntax, for example `array[2]`. + Subscript { + expr: Box, + subscript: Box, }, /// An array expression e.g. `ARRAY[1, 2]` Array(Array), @@ -805,6 +805,68 @@ pub enum Expr { Lambda(LambdaFunction), } +/// The contents inside the `[` and `]` in a subscript expression. +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum Subscript { + /// Accesses the element of the array at the given index. + Index { index: Expr }, + + /// Accesses a slice of an array on PostgreSQL, e.g. + /// + /// ```plaintext + /// => select (array[1,2,3,4,5,6])[2:5]; + /// ----------- + /// {2,3,4,5} + /// ``` + /// + /// The lower and/or upper bound can be omitted to slice from the start or + /// end of the array respectively. + /// + /// See . + /// + /// Also supports an optional "stride" as the last element (this is not + /// supported by postgres), e.g. + /// + /// ```plaintext + /// => select (array[1,2,3,4,5,6])[1:6:2]; + /// ----------- + /// {1,3,5} + /// ``` + Slice { + lower_bound: Option, + upper_bound: Option, + stride: Option, + }, +} + +impl fmt::Display for Subscript { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Subscript::Index { index } => write!(f, "{index}"), + Subscript::Slice { + lower_bound, + upper_bound, + stride, + } => { + if let Some(lower) = lower_bound { + write!(f, "{lower}")?; + } + write!(f, ":")?; + if let Some(upper) = upper_bound { + write!(f, "{upper}")?; + } + if let Some(stride) = stride { + write!(f, ":")?; + write!(f, "{stride}")?; + } + Ok(()) + } + } + } +} + /// A lambda function. #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] @@ -1251,12 +1313,11 @@ impl fmt::Display for Expr { Expr::Dictionary(fields) => { write!(f, "{{{}}}", display_comma_separated(fields)) } - Expr::ArrayIndex { obj, indexes } => { - write!(f, "{obj}")?; - for i in indexes { - write!(f, "[{i}]")?; - } - Ok(()) + Expr::Subscript { + expr, + subscript: key, + } => { + write!(f, "{expr}[{key}]") } Expr::Array(set) => { write!(f, "{set}") diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 123af045ad..c6750644cd 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -2544,8 +2544,7 @@ impl<'a> Parser<'a> { }) } else if Token::LBracket == tok { if dialect_of!(self is PostgreSqlDialect | DuckDbDialect | GenericDialect) { - // parse index - self.parse_array_index(expr) + self.parse_subscript(expr) } else if dialect_of!(self is SnowflakeDialect) { self.prev_token(); self.parse_json_access(expr) @@ -2573,18 +2572,87 @@ impl<'a> Parser<'a> { } } - pub fn parse_array_index(&mut self, expr: Expr) -> Result { - let index = self.parse_expr()?; - self.expect_token(&Token::RBracket)?; - let mut indexes: Vec = vec![index]; - while self.consume_token(&Token::LBracket) { - let index = self.parse_expr()?; + /// Parses an array subscript like + /// * `[:]` + /// * `[l]` + /// * `[l:]` + /// * `[:u]` + /// * `[l:u]` + /// * `[l:u:s]` + /// + /// Parser is right after `[` + fn parse_subscript_inner(&mut self) -> Result { + // at either `:(rest)` or `:(rest)]` + let lower_bound = if self.consume_token(&Token::Colon) { + None + } else { + Some(self.parse_expr()?) + }; + + // check for end + if self.consume_token(&Token::RBracket) { + if let Some(lower_bound) = lower_bound { + return Ok(Subscript::Index { index: lower_bound }); + }; + return Ok(Subscript::Slice { + lower_bound, + upper_bound: None, + stride: None, + }); + } + + // consume the `:` + if lower_bound.is_some() { + self.expect_token(&Token::Colon)?; + } + + // we are now at either `]`, `(rest)]` + let upper_bound = if self.consume_token(&Token::RBracket) { + return Ok(Subscript::Slice { + lower_bound, + upper_bound: None, + stride: None, + }); + } else { + Some(self.parse_expr()?) + }; + + // check for end + if self.consume_token(&Token::RBracket) { + return Ok(Subscript::Slice { + lower_bound, + upper_bound, + stride: None, + }); + } + + // we are now at `:]` or `:stride]` + self.expect_token(&Token::Colon)?; + let stride = if self.consume_token(&Token::RBracket) { + None + } else { + Some(self.parse_expr()?) + }; + + if stride.is_some() { self.expect_token(&Token::RBracket)?; - indexes.push(index); } - Ok(Expr::ArrayIndex { - obj: Box::new(expr), - indexes, + + Ok(Subscript::Slice { + lower_bound, + upper_bound, + stride, + }) + } + + /// Parses an array subscript like `[1:3]` + /// + /// Parser is right after `[` + pub fn parse_subscript(&mut self, expr: Expr) -> Result { + let subscript = self.parse_subscript_inner()?; + Ok(Expr::Subscript { + expr: Box::new(expr), + subscript: Box::new(subscript), }) } @@ -2838,7 +2906,7 @@ impl<'a> Parser<'a> { Ok(Self::MUL_DIV_MOD_OP_PREC) } Token::DoubleColon => Ok(50), - Token::Colon => Ok(50), + Token::Colon if dialect_of!(self is SnowflakeDialect) => Ok(50), Token::ExclamationMark => Ok(50), Token::LBracket | Token::Overlap | Token::CaretAt => Ok(50), Token::Arrow diff --git a/tests/sqlparser_duckdb.rs b/tests/sqlparser_duckdb.rs index a84da53780..8d12945ddd 100644 --- a/tests/sqlparser_duckdb.rs +++ b/tests/sqlparser_duckdb.rs @@ -528,8 +528,8 @@ fn test_array_index() { _ => panic!("Expected an expression with alias"), }; assert_eq!( - &Expr::ArrayIndex { - obj: Box::new(Expr::Array(Array { + &Expr::Subscript { + expr: Box::new(Expr::Array(Array { elem: vec![ Expr::Value(Value::SingleQuotedString("a".to_owned())), Expr::Value(Value::SingleQuotedString("b".to_owned())), @@ -537,7 +537,9 @@ fn test_array_index() { ], named: false })), - indexes: vec![Expr::Value(number("3"))] + subscript: Box::new(Subscript::Index { + index: Expr::Value(number("3")) + }) }, expr ); diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index ffcd783f06..677246a51c 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -1873,9 +1873,11 @@ fn parse_array_index_expr() { let sql = "SELECT foo[0] FROM foos"; let select = pg_and_generic().verified_only_select(sql); assert_eq!( - &Expr::ArrayIndex { - obj: Box::new(Expr::Identifier(Ident::new("foo"))), - indexes: vec![num[0].clone()], + &Expr::Subscript { + expr: Box::new(Expr::Identifier(Ident::new("foo"))), + subscript: Box::new(Subscript::Index { + index: num[0].clone() + }), }, expr_from_projection(only(&select.projection)), ); @@ -1883,9 +1885,16 @@ fn parse_array_index_expr() { let sql = "SELECT foo[0][0] FROM foos"; let select = pg_and_generic().verified_only_select(sql); assert_eq!( - &Expr::ArrayIndex { - obj: Box::new(Expr::Identifier(Ident::new("foo"))), - indexes: vec![num[0].clone(), num[0].clone()], + &Expr::Subscript { + expr: Box::new(Expr::Subscript { + expr: Box::new(Expr::Identifier(Ident::new("foo"))), + subscript: Box::new(Subscript::Index { + index: num[0].clone() + }), + }), + subscript: Box::new(Subscript::Index { + index: num[0].clone() + }), }, expr_from_projection(only(&select.projection)), ); @@ -1893,19 +1902,27 @@ fn parse_array_index_expr() { let sql = r#"SELECT bar[0]["baz"]["fooz"] FROM foos"#; let select = pg_and_generic().verified_only_select(sql); assert_eq!( - &Expr::ArrayIndex { - obj: Box::new(Expr::Identifier(Ident::new("bar"))), - indexes: vec![ - num[0].clone(), - Expr::Identifier(Ident { - value: "baz".to_string(), - quote_style: Some('"') + &Expr::Subscript { + expr: Box::new(Expr::Subscript { + expr: Box::new(Expr::Subscript { + expr: Box::new(Expr::Identifier(Ident::new("bar"))), + subscript: Box::new(Subscript::Index { + index: num[0].clone() + }) }), - Expr::Identifier(Ident { + subscript: Box::new(Subscript::Index { + index: Expr::Identifier(Ident { + value: "baz".to_string(), + quote_style: Some('"') + }) + }) + }), + subscript: Box::new(Subscript::Index { + index: Expr::Identifier(Ident { value: "fooz".to_string(), quote_style: Some('"') }) - ], + }) }, expr_from_projection(only(&select.projection)), ); @@ -1913,26 +1930,33 @@ fn parse_array_index_expr() { let sql = "SELECT (CAST(ARRAY[ARRAY[2, 3]] AS INT[][]))[1][2]"; let select = pg_and_generic().verified_only_select(sql); assert_eq!( - &Expr::ArrayIndex { - obj: Box::new(Expr::Nested(Box::new(Expr::Cast { - kind: CastKind::Cast, - expr: Box::new(Expr::Array(Array { - elem: vec![Expr::Array(Array { - elem: vec![num[2].clone(), num[3].clone(),], + &Expr::Subscript { + expr: Box::new(Expr::Subscript { + expr: Box::new(Expr::Nested(Box::new(Expr::Cast { + kind: CastKind::Cast, + expr: Box::new(Expr::Array(Array { + elem: vec![Expr::Array(Array { + elem: vec![num[2].clone(), num[3].clone(),], + named: true, + })], named: true, - })], - named: true, - })), - data_type: DataType::Array(ArrayElemTypeDef::SquareBracket( - Box::new(DataType::Array(ArrayElemTypeDef::SquareBracket( - Box::new(DataType::Int(None)), + })), + data_type: DataType::Array(ArrayElemTypeDef::SquareBracket( + Box::new(DataType::Array(ArrayElemTypeDef::SquareBracket( + Box::new(DataType::Int(None)), + None + ))), None - ))), - None - )), - format: None, - }))), - indexes: vec![num[1].clone(), num[2].clone()], + )), + format: None, + }))), + subscript: Box::new(Subscript::Index { + index: num[1].clone() + }), + }), + subscript: Box::new(Subscript::Index { + index: num[2].clone() + }), }, expr_from_projection(only(&select.projection)), ); @@ -1948,6 +1972,116 @@ fn parse_array_index_expr() { ); } +#[test] +fn parse_array_subscript() { + let tests = [ + ( + "(ARRAY[1, 2, 3, 4, 5, 6])[2]", + Subscript::Index { + index: Expr::Value(number("2")), + }, + ), + ( + "(ARRAY[1, 2, 3, 4, 5, 6])[foo]", + Subscript::Index { + index: Expr::Identifier(Ident::new("foo")), + }, + ), + ( + "(ARRAY[1, 2, 3, 4, 5, 6])[2:5]", + Subscript::Slice { + lower_bound: Some(Expr::Value(number("2"))), + upper_bound: Some(Expr::Value(number("5"))), + stride: None, + }, + ), + ( + "(ARRAY[1, 2, 3, 4, 5, 6])[2:5:3]", + Subscript::Slice { + lower_bound: Some(Expr::Value(number("2"))), + upper_bound: Some(Expr::Value(number("5"))), + stride: Some(Expr::Value(number("3"))), + }, + ), + ( + "arr[array_length(arr) - 3:array_length(arr) - 1]", + Subscript::Slice { + lower_bound: Some(Expr::BinaryOp { + left: Box::new(call("array_length", [Expr::Identifier(Ident::new("arr"))])), + op: BinaryOperator::Minus, + right: Box::new(Expr::Value(number("3"))), + }), + upper_bound: Some(Expr::BinaryOp { + left: Box::new(call("array_length", [Expr::Identifier(Ident::new("arr"))])), + op: BinaryOperator::Minus, + right: Box::new(Expr::Value(number("1"))), + }), + stride: None, + }, + ), + ( + "(ARRAY[1, 2, 3, 4, 5, 6])[:5]", + Subscript::Slice { + lower_bound: None, + upper_bound: Some(Expr::Value(number("5"))), + stride: None, + }, + ), + ( + "(ARRAY[1, 2, 3, 4, 5, 6])[2:]", + Subscript::Slice { + lower_bound: Some(Expr::Value(number("2"))), + upper_bound: None, + stride: None, + }, + ), + ( + "(ARRAY[1, 2, 3, 4, 5, 6])[:]", + Subscript::Slice { + lower_bound: None, + upper_bound: None, + stride: None, + }, + ), + ]; + for (sql, expect) in tests { + let Expr::Subscript { subscript, .. } = pg_and_generic().verified_expr(sql) else { + panic!("expected subscript expr"); + }; + assert_eq!(expect, *subscript); + } + + pg_and_generic().verified_expr("schedule[:2][2:]"); +} + +#[test] +fn parse_array_multi_subscript() { + let expr = pg_and_generic().verified_expr("make_array(1, 2, 3)[1:2][2]"); + assert_eq!( + Expr::Subscript { + expr: Box::new(Expr::Subscript { + expr: Box::new(call( + "make_array", + vec![ + Expr::Value(number("1")), + Expr::Value(number("2")), + Expr::Value(number("3")) + ] + )), + subscript: Box::new(Subscript::Slice { + lower_bound: Some(Expr::Value(number("1"))), + upper_bound: Some(Expr::Value(number("2"))), + stride: None, + }), + }), + subscript: Box::new(Subscript::Index { + index: Expr::Value(number("2")), + }), + }, + expr, + ); +} + #[test] fn parse_create_index() { let sql = "CREATE INDEX IF NOT EXISTS my_index ON my_table(col1,col2)"; diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index 7492802c7e..d213efd7b3 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -394,6 +394,36 @@ fn parse_semi_structured_data_traversal() { })], select.projection ); + + // a json access used as a key to another json access + assert_eq!( + snowflake().verified_expr("a[b:c]"), + Expr::JsonAccess { + value: Box::new(Expr::Identifier(Ident::new("a"))), + path: JsonPath { + path: vec![JsonPathElem::Bracket { + key: Expr::JsonAccess { + value: Box::new(Expr::Identifier(Ident::new("b"))), + path: JsonPath { + path: vec![JsonPathElem::Dot { + key: "c".to_owned(), + quoted: false + }] + } + } + }] + } + } + ); + + // unquoted object keys cannot start with a digit + assert_eq!( + snowflake() + .parse_sql_statements("SELECT a:42") + .unwrap_err() + .to_string(), + "sql parser error: Expected variant object key name, found: 42" + ); } #[test] From 521a2c9e7aa4102db78eaf81ebbba941342976bb Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Sat, 1 Jun 2024 06:22:06 -0400 Subject: [PATCH 10/53] Add CHANGELOG for 0.47.0 (#1295) --- CHANGELOG.md | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index ab654525fb..18df2e33ad 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,33 @@ changes that break via addition as "Added". ## [Unreleased] Check https://github.com/sqlparser-rs/sqlparser-rs/commits/main for undocumented changes. + +## [0.47.0] 2024-06-01 + +### Fixed +* Re-support Postgres array slice syntax (#1290) - Thanks @jmhain +* Fix DoubleColon cast skipping AT TIME ZONE #1266 (#1267) - Thanks @dmitrybugakov +* Fix for values as table name in Databricks and generic (#1278) - Thanks @jmhain + +### Added +* Support `ASOF` joins in Snowflake (#1288) - Thanks @jmhain +* Support `CREATE VIEW` with fields and data types ClickHouse (#1292) - Thanks @7phs +* Support view comments for Snowflake (#1287) - Thanks @bombsimon +* Support dynamic pivot in Snowflake (#1280) - Thanks @jmhain +* Support `CREATE FUNCTION` for BigQuery, generalize AST (#1253) - Thanks @iffyio +* Support expression in `AT TIME ZONE` and fix precedence (#1272) - Thanks @jmhain +* Support `IGNORE/RESPECT NULLS` inside function argument list for Databricks (#1263) - Thanks @jmhain +* Support `SELECT * EXCEPT` Databricks (#1261) - Thanks @jmhain +* Support triple quoted strings (#1262) - Thanks @iffyio +* Support array indexing for duckdb (#1265) - Thanks @JichaoS +* Support multiple SET variables (#1252) - Thanks @iffyio +* Support `ANY_VALUE` `HAVING` clause (#1258) in BigQuery - Thanks @jmhain +* Support keywords as field names in BigQuery struct syntax (#1254) - Thanks @iffyio +* Support `GROUP_CONCAT()` in MySQL (#1256) - Thanks @jmhain +* Support lambda functions in Databricks (#1257) - Thanks @jmhain +* Add const generic peek_tokens method to parser (#1255) - Thanks @jmhain + + ## [0.46.0] 2024-05-03 ### Changed From f3f5de51e55cccdde9c10b4804cf790ccd532970 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Sat, 1 Jun 2024 06:23:18 -0400 Subject: [PATCH 11/53] chore: Release sqlparser version 0.47.0 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index c9bf58bbb3..8d015968b8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "sqlparser" description = "Extensible SQL Lexer and Parser with support for ANSI SQL:2011" -version = "0.46.0" +version = "0.47.0" authors = ["Andy Grove "] homepage = "https://github.com/sqlparser-rs/sqlparser-rs" documentation = "https://docs.rs/sqlparser/" From a0f511cb21efd9e992fb4c6ef273ef41802d308c Mon Sep 17 00:00:00 2001 From: Philip Cristiano Date: Wed, 5 Jun 2024 05:25:42 -0400 Subject: [PATCH 12/53] Encapsulate `CreateTable`, `CreateIndex` into specific structs (#1291) --- src/ast/dml.rs | 292 +++++++++++++++++++++++- src/ast/helpers/stmt_create_table.rs | 9 +- src/ast/mod.rs | 318 +-------------------------- src/parser/mod.rs | 4 +- tests/sqlparser_bigquery.rs | 8 +- tests/sqlparser_common.rs | 54 ++--- tests/sqlparser_mysql.rs | 42 ++-- tests/sqlparser_postgres.rs | 44 ++-- tests/sqlparser_snowflake.rs | 6 +- tests/sqlparser_sqlite.rs | 10 +- 10 files changed, 387 insertions(+), 400 deletions(-) diff --git a/src/ast/dml.rs b/src/ast/dml.rs index badc58a7d4..91232218f9 100644 --- a/src/ast/dml.rs +++ b/src/ast/dml.rs @@ -11,18 +11,304 @@ // limitations under the License. #[cfg(not(feature = "std"))] -use alloc::{boxed::Box, vec::Vec}; +use alloc::{boxed::Box, string::String, vec::Vec}; +use core::fmt::{self, Display}; #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; #[cfg(feature = "visitor")] use sqlparser_derive::{Visit, VisitMut}; +pub use super::ddl::{ColumnDef, TableConstraint}; + use super::{ - Expr, FromTable, Ident, InsertAliases, MysqlInsertPriority, ObjectName, OnInsert, OrderByExpr, - Query, SelectItem, SqliteOnConflict, TableWithJoins, + display_comma_separated, display_separated, Expr, FileFormat, FromTable, HiveDistributionStyle, + HiveFormat, HiveIOFormat, HiveRowFormat, Ident, InsertAliases, MysqlInsertPriority, ObjectName, + OnCommit, OnInsert, OrderByExpr, Query, SelectItem, SqlOption, SqliteOnConflict, + TableWithJoins, }; +/// CREATE INDEX statement. +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct CreateIndex { + /// index name + pub name: Option, + #[cfg_attr(feature = "visitor", visit(with = "visit_relation"))] + pub table_name: ObjectName, + pub using: Option, + pub columns: Vec, + pub unique: bool, + pub concurrently: bool, + pub if_not_exists: bool, + pub include: Vec, + pub nulls_distinct: Option, + pub predicate: Option, +} +/// CREATE TABLE statement. +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct CreateTable { + pub or_replace: bool, + pub temporary: bool, + pub external: bool, + pub global: Option, + pub if_not_exists: bool, + pub transient: bool, + /// Table name + #[cfg_attr(feature = "visitor", visit(with = "visit_relation"))] + pub name: ObjectName, + /// Optional schema + pub columns: Vec, + pub constraints: Vec, + pub hive_distribution: HiveDistributionStyle, + pub hive_formats: Option, + pub table_properties: Vec, + pub with_options: Vec, + pub file_format: Option, + pub location: Option, + pub query: Option>, + pub without_rowid: bool, + pub like: Option, + pub clone: Option, + pub engine: Option, + pub comment: Option, + pub auto_increment_offset: Option, + pub default_charset: Option, + pub collation: Option, + pub on_commit: Option, + /// ClickHouse "ON CLUSTER" clause: + /// + pub on_cluster: Option, + /// ClickHouse "ORDER BY " clause. Note that omitted ORDER BY is different + /// than empty (represented as ()), the latter meaning "no sorting". + /// + pub order_by: Option>, + /// BigQuery: A partition expression for the table. + /// + pub partition_by: Option>, + /// BigQuery: Table clustering column list. + /// + pub cluster_by: Option>, + /// BigQuery: Table options list. + /// + pub options: Option>, + /// SQLite "STRICT" clause. + /// if the "STRICT" table-option keyword is added to the end, after the closing ")", + /// then strict typing rules apply to that table. + pub strict: bool, +} + +impl Display for CreateTable { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + // We want to allow the following options + // Empty column list, allowed by PostgreSQL: + // `CREATE TABLE t ()` + // No columns provided for CREATE TABLE AS: + // `CREATE TABLE t AS SELECT a from t2` + // Columns provided for CREATE TABLE AS: + // `CREATE TABLE t (a INT) AS SELECT a from t2` + write!( + f, + "CREATE {or_replace}{external}{global}{temporary}{transient}TABLE {if_not_exists}{name}", + or_replace = if self.or_replace { "OR REPLACE " } else { "" }, + external = if self.external { "EXTERNAL " } else { "" }, + global = self.global + .map(|global| { + if global { + "GLOBAL " + } else { + "LOCAL " + } + }) + .unwrap_or(""), + if_not_exists = if self.if_not_exists { "IF NOT EXISTS " } else { "" }, + temporary = if self.temporary { "TEMPORARY " } else { "" }, + transient = if self.transient { "TRANSIENT " } else { "" }, + name = self.name, + )?; + if let Some(on_cluster) = &self.on_cluster { + write!( + f, + " ON CLUSTER {}", + on_cluster.replace('{', "'{").replace('}', "}'") + )?; + } + if !self.columns.is_empty() || !self.constraints.is_empty() { + write!(f, " ({}", display_comma_separated(&self.columns))?; + if !self.columns.is_empty() && !self.constraints.is_empty() { + write!(f, ", ")?; + } + write!(f, "{})", display_comma_separated(&self.constraints))?; + } else if self.query.is_none() && self.like.is_none() && self.clone.is_none() { + // PostgreSQL allows `CREATE TABLE t ();`, but requires empty parens + write!(f, " ()")?; + } + // Only for SQLite + if self.without_rowid { + write!(f, " WITHOUT ROWID")?; + } + + // Only for Hive + if let Some(l) = &self.like { + write!(f, " LIKE {l}")?; + } + + if let Some(c) = &self.clone { + write!(f, " CLONE {c}")?; + } + + match &self.hive_distribution { + HiveDistributionStyle::PARTITIONED { columns } => { + write!(f, " PARTITIONED BY ({})", display_comma_separated(columns))?; + } + HiveDistributionStyle::CLUSTERED { + columns, + sorted_by, + num_buckets, + } => { + write!(f, " CLUSTERED BY ({})", display_comma_separated(columns))?; + if !sorted_by.is_empty() { + write!(f, " SORTED BY ({})", display_comma_separated(sorted_by))?; + } + if *num_buckets > 0 { + write!(f, " INTO {num_buckets} BUCKETS")?; + } + } + HiveDistributionStyle::SKEWED { + columns, + on, + stored_as_directories, + } => { + write!( + f, + " SKEWED BY ({})) ON ({})", + display_comma_separated(columns), + display_comma_separated(on) + )?; + if *stored_as_directories { + write!(f, " STORED AS DIRECTORIES")?; + } + } + _ => (), + } + + if let Some(HiveFormat { + row_format, + serde_properties, + storage, + location, + }) = &self.hive_formats + { + match row_format { + Some(HiveRowFormat::SERDE { class }) => write!(f, " ROW FORMAT SERDE '{class}'")?, + Some(HiveRowFormat::DELIMITED { delimiters }) => { + write!(f, " ROW FORMAT DELIMITED")?; + if !delimiters.is_empty() { + write!(f, " {}", display_separated(delimiters, " "))?; + } + } + None => (), + } + match storage { + Some(HiveIOFormat::IOF { + input_format, + output_format, + }) => write!( + f, + " STORED AS INPUTFORMAT {input_format} OUTPUTFORMAT {output_format}" + )?, + Some(HiveIOFormat::FileFormat { format }) if !self.external => { + write!(f, " STORED AS {format}")? + } + _ => (), + } + if let Some(serde_properties) = serde_properties.as_ref() { + write!( + f, + " WITH SERDEPROPERTIES ({})", + display_comma_separated(serde_properties) + )?; + } + if !self.external { + if let Some(loc) = location { + write!(f, " LOCATION '{loc}'")?; + } + } + } + if self.external { + if let Some(file_format) = self.file_format { + write!(f, " STORED AS {file_format}")?; + } + write!(f, " LOCATION '{}'", self.location.as_ref().unwrap())?; + } + if !self.table_properties.is_empty() { + write!( + f, + " TBLPROPERTIES ({})", + display_comma_separated(&self.table_properties) + )?; + } + if !self.with_options.is_empty() { + write!(f, " WITH ({})", display_comma_separated(&self.with_options))?; + } + if let Some(engine) = &self.engine { + write!(f, " ENGINE={engine}")?; + } + if let Some(comment) = &self.comment { + write!(f, " COMMENT '{comment}'")?; + } + if let Some(auto_increment_offset) = self.auto_increment_offset { + write!(f, " AUTO_INCREMENT {auto_increment_offset}")?; + } + if let Some(order_by) = &self.order_by { + write!(f, " ORDER BY ({})", display_comma_separated(order_by))?; + } + if let Some(partition_by) = self.partition_by.as_ref() { + write!(f, " PARTITION BY {partition_by}")?; + } + if let Some(cluster_by) = self.cluster_by.as_ref() { + write!( + f, + " CLUSTER BY {}", + display_comma_separated(cluster_by.as_slice()) + )?; + } + if let Some(options) = self.options.as_ref() { + write!( + f, + " OPTIONS({})", + display_comma_separated(options.as_slice()) + )?; + } + if let Some(query) = &self.query { + write!(f, " AS {query}")?; + } + if let Some(default_charset) = &self.default_charset { + write!(f, " DEFAULT CHARSET={default_charset}")?; + } + if let Some(collation) = &self.collation { + write!(f, " COLLATE={collation}")?; + } + + if self.on_commit.is_some() { + let on_commit = match self.on_commit { + Some(OnCommit::DeleteRows) => "ON COMMIT DELETE ROWS", + Some(OnCommit::PreserveRows) => "ON COMMIT PRESERVE ROWS", + Some(OnCommit::Drop) => "ON COMMIT DROP", + None => "", + }; + write!(f, " {on_commit}")?; + } + if self.strict { + write!(f, " STRICT")?; + } + Ok(()) + } +} + /// INSERT statement. #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] diff --git a/src/ast/helpers/stmt_create_table.rs b/src/ast/helpers/stmt_create_table.rs index 1265423791..c50e7bbd9c 100644 --- a/src/ast/helpers/stmt_create_table.rs +++ b/src/ast/helpers/stmt_create_table.rs @@ -7,6 +7,7 @@ use serde::{Deserialize, Serialize}; #[cfg(feature = "visitor")] use sqlparser_derive::{Visit, VisitMut}; +use super::super::dml::CreateTable; use crate::ast::{ ColumnDef, Expr, FileFormat, HiveDistributionStyle, HiveFormat, Ident, ObjectName, OnCommit, Query, SqlOption, Statement, TableConstraint, @@ -263,7 +264,7 @@ impl CreateTableBuilder { } pub fn build(self) -> Statement { - Statement::CreateTable { + Statement::CreateTable(CreateTable { or_replace: self.or_replace, temporary: self.temporary, external: self.external, @@ -295,7 +296,7 @@ impl CreateTableBuilder { cluster_by: self.cluster_by, options: self.options, strict: self.strict, - } + }) } } @@ -306,7 +307,7 @@ impl TryFrom for CreateTableBuilder { // ownership. fn try_from(stmt: Statement) -> Result { match stmt { - Statement::CreateTable { + Statement::CreateTable(CreateTable { or_replace, temporary, external, @@ -338,7 +339,7 @@ impl TryFrom for CreateTableBuilder { cluster_by, options, strict, - } => Ok(Self { + }) => Ok(Self { or_replace, temporary, external, diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 320dfc60e3..e29a8df049 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -38,7 +38,7 @@ pub use self::ddl::{ ReferentialAction, TableConstraint, UserDefinedTypeCompositeAttributeDef, UserDefinedTypeRepresentation, ViewColumnDef, }; -pub use self::dml::{Delete, Insert}; +pub use self::dml::{CreateIndex, CreateTable, Delete, Insert}; pub use self::operator::{BinaryOperator, UnaryOperator}; pub use self::query::{ AfterMatchSkip, ConnectBy, Cte, CteAsMaterialized, Distinct, EmptyMatchesMode, @@ -75,7 +75,7 @@ mod value; #[cfg(feature = "visitor")] mod visitor; -struct DisplaySeparated<'a, T> +pub struct DisplaySeparated<'a, T> where T: fmt::Display, { @@ -98,14 +98,14 @@ where } } -fn display_separated<'a, T>(slice: &'a [T], sep: &'static str) -> DisplaySeparated<'a, T> +pub fn display_separated<'a, T>(slice: &'a [T], sep: &'static str) -> DisplaySeparated<'a, T> where T: fmt::Display, { DisplaySeparated { slice, sep } } -fn display_comma_separated(slice: &[T]) -> DisplaySeparated<'_, T> +pub fn display_comma_separated(slice: &[T]) -> DisplaySeparated<'_, T> where T: fmt::Display, { @@ -2033,56 +2033,7 @@ pub enum Statement { /// ```sql /// CREATE TABLE /// ``` - CreateTable { - or_replace: bool, - temporary: bool, - external: bool, - global: Option, - if_not_exists: bool, - transient: bool, - /// Table name - #[cfg_attr(feature = "visitor", visit(with = "visit_relation"))] - name: ObjectName, - /// Optional schema - columns: Vec, - constraints: Vec, - hive_distribution: HiveDistributionStyle, - hive_formats: Option, - table_properties: Vec, - with_options: Vec, - file_format: Option, - location: Option, - query: Option>, - without_rowid: bool, - like: Option, - clone: Option, - engine: Option, - comment: Option, - auto_increment_offset: Option, - default_charset: Option, - collation: Option, - on_commit: Option, - /// ClickHouse "ON CLUSTER" clause: - /// - on_cluster: Option, - /// ClickHouse "ORDER BY " clause. Note that omitted ORDER BY is different - /// than empty (represented as ()), the latter meaning "no sorting". - /// - order_by: Option>, - /// BigQuery: A partition expression for the table. - /// - partition_by: Option>, - /// BigQuery: Table clustering column list. - /// - cluster_by: Option>, - /// BigQuery: Table options list. - /// - options: Option>, - /// SQLite "STRICT" clause. - /// if the "STRICT" table-option keyword is added to the end, after the closing ")", - /// then strict typing rules apply to that table. - strict: bool, - }, + CreateTable(CreateTable), /// ```sql /// CREATE VIRTUAL TABLE .. USING ()` /// ``` @@ -2097,20 +2048,7 @@ pub enum Statement { /// ```sql /// `CREATE INDEX` /// ``` - CreateIndex { - /// index name - name: Option, - #[cfg_attr(feature = "visitor", visit(with = "visit_relation"))] - table_name: ObjectName, - using: Option, - columns: Vec, - unique: bool, - concurrently: bool, - if_not_exists: bool, - include: Vec, - nulls_distinct: Option, - predicate: Option, - }, + CreateIndex(CreateIndex), /// ```sql /// CREATE ROLE /// ``` @@ -3426,245 +3364,7 @@ impl fmt::Display for Statement { } Ok(()) } - Statement::CreateTable { - name, - columns, - constraints, - table_properties, - with_options, - or_replace, - if_not_exists, - transient, - hive_distribution, - hive_formats, - external, - global, - temporary, - file_format, - location, - query, - without_rowid, - like, - clone, - default_charset, - engine, - comment, - auto_increment_offset, - collation, - on_commit, - on_cluster, - order_by, - partition_by, - cluster_by, - options, - strict, - } => { - // We want to allow the following options - // Empty column list, allowed by PostgreSQL: - // `CREATE TABLE t ()` - // No columns provided for CREATE TABLE AS: - // `CREATE TABLE t AS SELECT a from t2` - // Columns provided for CREATE TABLE AS: - // `CREATE TABLE t (a INT) AS SELECT a from t2` - write!( - f, - "CREATE {or_replace}{external}{global}{temporary}{transient}TABLE {if_not_exists}{name}", - or_replace = if *or_replace { "OR REPLACE " } else { "" }, - external = if *external { "EXTERNAL " } else { "" }, - global = global - .map(|global| { - if global { - "GLOBAL " - } else { - "LOCAL " - } - }) - .unwrap_or(""), - if_not_exists = if *if_not_exists { "IF NOT EXISTS " } else { "" }, - temporary = if *temporary { "TEMPORARY " } else { "" }, - transient = if *transient { "TRANSIENT " } else { "" }, - name = name, - )?; - if let Some(on_cluster) = on_cluster { - write!( - f, - " ON CLUSTER {}", - on_cluster.replace('{', "'{").replace('}', "}'") - )?; - } - if !columns.is_empty() || !constraints.is_empty() { - write!(f, " ({}", display_comma_separated(columns))?; - if !columns.is_empty() && !constraints.is_empty() { - write!(f, ", ")?; - } - write!(f, "{})", display_comma_separated(constraints))?; - } else if query.is_none() && like.is_none() && clone.is_none() { - // PostgreSQL allows `CREATE TABLE t ();`, but requires empty parens - write!(f, " ()")?; - } - // Only for SQLite - if *without_rowid { - write!(f, " WITHOUT ROWID")?; - } - - // Only for Hive - if let Some(l) = like { - write!(f, " LIKE {l}")?; - } - - if let Some(c) = clone { - write!(f, " CLONE {c}")?; - } - - match hive_distribution { - HiveDistributionStyle::PARTITIONED { columns } => { - write!(f, " PARTITIONED BY ({})", display_comma_separated(columns))?; - } - HiveDistributionStyle::CLUSTERED { - columns, - sorted_by, - num_buckets, - } => { - write!(f, " CLUSTERED BY ({})", display_comma_separated(columns))?; - if !sorted_by.is_empty() { - write!(f, " SORTED BY ({})", display_comma_separated(sorted_by))?; - } - if *num_buckets > 0 { - write!(f, " INTO {num_buckets} BUCKETS")?; - } - } - HiveDistributionStyle::SKEWED { - columns, - on, - stored_as_directories, - } => { - write!( - f, - " SKEWED BY ({})) ON ({})", - display_comma_separated(columns), - display_comma_separated(on) - )?; - if *stored_as_directories { - write!(f, " STORED AS DIRECTORIES")?; - } - } - _ => (), - } - - if let Some(HiveFormat { - row_format, - serde_properties, - storage, - location, - }) = hive_formats - { - match row_format { - Some(HiveRowFormat::SERDE { class }) => { - write!(f, " ROW FORMAT SERDE '{class}'")? - } - Some(HiveRowFormat::DELIMITED { delimiters }) => { - write!(f, " ROW FORMAT DELIMITED")?; - if !delimiters.is_empty() { - write!(f, " {}", display_separated(delimiters, " "))?; - } - } - None => (), - } - match storage { - Some(HiveIOFormat::IOF { - input_format, - output_format, - }) => write!( - f, - " STORED AS INPUTFORMAT {input_format} OUTPUTFORMAT {output_format}" - )?, - Some(HiveIOFormat::FileFormat { format }) if !*external => { - write!(f, " STORED AS {format}")? - } - _ => (), - } - if let Some(serde_properties) = serde_properties.as_ref() { - write!( - f, - " WITH SERDEPROPERTIES ({})", - display_comma_separated(serde_properties) - )?; - } - if !*external { - if let Some(loc) = location { - write!(f, " LOCATION '{loc}'")?; - } - } - } - if *external { - if let Some(file_format) = &file_format { - write!(f, " STORED AS {file_format}")?; - } - write!(f, " LOCATION '{}'", location.as_ref().unwrap())?; - } - if !table_properties.is_empty() { - write!( - f, - " TBLPROPERTIES ({})", - display_comma_separated(table_properties) - )?; - } - if !with_options.is_empty() { - write!(f, " WITH ({})", display_comma_separated(with_options))?; - } - if let Some(engine) = engine { - write!(f, " ENGINE={engine}")?; - } - if let Some(comment) = comment { - write!(f, " COMMENT '{comment}'")?; - } - if let Some(auto_increment_offset) = auto_increment_offset { - write!(f, " AUTO_INCREMENT {auto_increment_offset}")?; - } - if let Some(order_by) = order_by { - write!(f, " ORDER BY ({})", display_comma_separated(order_by))?; - } - if let Some(partition_by) = partition_by.as_ref() { - write!(f, " PARTITION BY {partition_by}")?; - } - if let Some(cluster_by) = cluster_by.as_ref() { - write!( - f, - " CLUSTER BY {}", - display_comma_separated(cluster_by.as_slice()) - )?; - } - if let Some(options) = options.as_ref() { - write!( - f, - " OPTIONS({})", - display_comma_separated(options.as_slice()) - )?; - } - if let Some(query) = query { - write!(f, " AS {query}")?; - } - if let Some(default_charset) = default_charset { - write!(f, " DEFAULT CHARSET={default_charset}")?; - } - if let Some(collation) = collation { - write!(f, " COLLATE={collation}")?; - } - - if on_commit.is_some() { - let on_commit = match on_commit { - Some(OnCommit::DeleteRows) => "ON COMMIT DELETE ROWS", - Some(OnCommit::PreserveRows) => "ON COMMIT PRESERVE ROWS", - Some(OnCommit::Drop) => "ON COMMIT DROP", - None => "", - }; - write!(f, " {on_commit}")?; - } - if *strict { - write!(f, " STRICT")?; - } - Ok(()) - } + Statement::CreateTable(create_table) => create_table.fmt(f), Statement::CreateVirtualTable { name, if_not_exists, @@ -3683,7 +3383,7 @@ impl fmt::Display for Statement { } Ok(()) } - Statement::CreateIndex { + Statement::CreateIndex(CreateIndex { name, table_name, using, @@ -3694,7 +3394,7 @@ impl fmt::Display for Statement { include, nulls_distinct, predicate, - } => { + }) => { write!( f, "CREATE {unique}INDEX {concurrently}{if_not_exists}", diff --git a/src/parser/mod.rs b/src/parser/mod.rs index c6750644cd..a2468af3dd 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -4963,7 +4963,7 @@ impl<'a> Parser<'a> { None }; - Ok(Statement::CreateIndex { + Ok(Statement::CreateIndex(CreateIndex { name: index_name, table_name, using, @@ -4974,7 +4974,7 @@ impl<'a> Parser<'a> { include, nulls_distinct, predicate, - }) + })) } pub fn parse_create_extension(&mut self) -> Result { diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index 1cec15c30c..3b6d6bfcb4 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -354,7 +354,7 @@ fn parse_create_view_with_unquoted_hyphen() { fn parse_create_table_with_unquoted_hyphen() { let sql = "CREATE TABLE my-pro-ject.mydataset.mytable (x INT64)"; match bigquery().verified_stmt(sql) { - Statement::CreateTable { name, columns, .. } => { + Statement::CreateTable(CreateTable { name, columns, .. }) => { assert_eq!( name, ObjectName(vec![ @@ -388,14 +388,14 @@ fn parse_create_table_with_options() { r#"OPTIONS(partition_expiration_days = 1, description = "table option description")"# ); match bigquery().verified_stmt(sql) { - Statement::CreateTable { + Statement::CreateTable(CreateTable { name, columns, partition_by, cluster_by, options, .. - } => { + }) => { assert_eq!( name, ObjectName(vec!["mydataset".into(), "newtable".into()]) @@ -477,7 +477,7 @@ fn parse_create_table_with_options() { fn parse_nested_data_types() { let sql = "CREATE TABLE table (x STRUCT, b BYTES(42)>, y ARRAY>)"; match bigquery_and_generic().one_statement_parses_to(sql, sql) { - Statement::CreateTable { name, columns, .. } => { + Statement::CreateTable(CreateTable { name, columns, .. }) => { assert_eq!(name, ObjectName(vec!["table".into()])); assert_eq!( columns, diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 5f2d2cc025..580ae9867f 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -2747,7 +2747,7 @@ fn parse_create_table() { FOREIGN KEY (lng) REFERENCES othertable4(longitude) ON UPDATE SET NULL)", ); match ast { - Statement::CreateTable { + Statement::CreateTable(CreateTable { name, columns, constraints, @@ -2757,7 +2757,7 @@ fn parse_create_table() { file_format: None, location: None, .. - } => { + }) => { assert_eq!("uk_cities", name.to_string()); assert_eq!( columns, @@ -2936,7 +2936,7 @@ fn parse_create_table_with_constraint_characteristics() { FOREIGN KEY (lng) REFERENCES othertable4(longitude) ON UPDATE SET NULL NOT DEFERRABLE INITIALLY IMMEDIATE ENFORCED)", ); match ast { - Statement::CreateTable { + Statement::CreateTable(CreateTable { name, columns, constraints, @@ -2946,7 +2946,7 @@ fn parse_create_table_with_constraint_characteristics() { file_format: None, location: None, .. - } => { + }) => { assert_eq!("uk_cities", name.to_string()); assert_eq!( columns, @@ -3104,7 +3104,7 @@ fn parse_create_table_column_constraint_characteristics() { }; match ast { - Statement::CreateTable { columns, .. } => { + Statement::CreateTable(CreateTable { columns, .. }) => { assert_eq!( columns, vec![ColumnDef { @@ -3214,12 +3214,12 @@ fn parse_create_table_hive_array() { }; match dialects.one_statement_parses_to(sql.as_str(), sql.as_str()) { - Statement::CreateTable { + Statement::CreateTable(CreateTable { if_not_exists, name, columns, .. - } => { + }) => { assert!(if_not_exists); assert_eq!(name, ObjectName(vec!["something".into()])); assert_eq!( @@ -3373,7 +3373,7 @@ fn parse_create_table_as() { let sql = "CREATE TABLE t AS SELECT * FROM a"; match verified_stmt(sql) { - Statement::CreateTable { name, query, .. } => { + Statement::CreateTable(CreateTable { name, query, .. }) => { assert_eq!(name.to_string(), "t".to_string()); assert_eq!(query, Some(Box::new(verified_query("SELECT * FROM a")))); } @@ -3385,7 +3385,7 @@ fn parse_create_table_as() { // (without data types) in a CTAS, but we have yet to support that. let sql = "CREATE TABLE t (a INT, b INT) AS SELECT 1 AS b, 2 AS a"; match verified_stmt(sql) { - Statement::CreateTable { columns, query, .. } => { + Statement::CreateTable(CreateTable { columns, query, .. }) => { assert_eq!(columns.len(), 2); assert_eq!(columns[0].to_string(), "a INT".to_string()); assert_eq!(columns[1].to_string(), "b INT".to_string()); @@ -3418,7 +3418,7 @@ fn parse_create_table_as_table() { }); match verified_stmt(sql1) { - Statement::CreateTable { query, name, .. } => { + Statement::CreateTable(CreateTable { query, name, .. }) => { assert_eq!(name, ObjectName(vec![Ident::new("new_table")])); assert_eq!(query.unwrap(), expected_query1); } @@ -3443,7 +3443,7 @@ fn parse_create_table_as_table() { }); match verified_stmt(sql2) { - Statement::CreateTable { query, name, .. } => { + Statement::CreateTable(CreateTable { query, name, .. }) => { assert_eq!(name, ObjectName(vec![Ident::new("new_table")])); assert_eq!(query.unwrap(), expected_query2); } @@ -3456,7 +3456,7 @@ fn parse_create_table_on_cluster() { // Using single-quote literal to define current cluster let sql = "CREATE TABLE t ON CLUSTER '{cluster}' (a INT, b INT)"; match verified_stmt(sql) { - Statement::CreateTable { on_cluster, .. } => { + Statement::CreateTable(CreateTable { on_cluster, .. }) => { assert_eq!(on_cluster.unwrap(), "{cluster}".to_string()); } _ => unreachable!(), @@ -3465,7 +3465,7 @@ fn parse_create_table_on_cluster() { // Using explicitly declared cluster name let sql = "CREATE TABLE t ON CLUSTER my_cluster (a INT, b INT)"; match verified_stmt(sql) { - Statement::CreateTable { on_cluster, .. } => { + Statement::CreateTable(CreateTable { on_cluster, .. }) => { assert_eq!(on_cluster.unwrap(), "my_cluster".to_string()); } _ => unreachable!(), @@ -3477,9 +3477,9 @@ fn parse_create_or_replace_table() { let sql = "CREATE OR REPLACE TABLE t (a INT)"; match verified_stmt(sql) { - Statement::CreateTable { + Statement::CreateTable(CreateTable { name, or_replace, .. - } => { + }) => { assert_eq!(name.to_string(), "t".to_string()); assert!(or_replace); } @@ -3488,7 +3488,7 @@ fn parse_create_or_replace_table() { let sql = "CREATE TABLE t (a INT, b INT) AS SELECT 1 AS b, 2 AS a"; match verified_stmt(sql) { - Statement::CreateTable { columns, query, .. } => { + Statement::CreateTable(CreateTable { columns, query, .. }) => { assert_eq!(columns.len(), 2); assert_eq!(columns[0].to_string(), "a INT".to_string()); assert_eq!(columns[1].to_string(), "b INT".to_string()); @@ -3519,7 +3519,7 @@ fn parse_create_table_with_on_delete_on_update_2in_any_order() -> Result<(), Par fn parse_create_table_with_options() { let sql = "CREATE TABLE t (c INT) WITH (foo = 'bar', a = 123)"; match verified_stmt(sql) { - Statement::CreateTable { with_options, .. } => { + Statement::CreateTable(CreateTable { with_options, .. }) => { assert_eq!( vec![ SqlOption { @@ -3542,7 +3542,7 @@ fn parse_create_table_with_options() { fn parse_create_table_clone() { let sql = "CREATE OR REPLACE TABLE a CLONE a_tmp"; match verified_stmt(sql) { - Statement::CreateTable { name, clone, .. } => { + Statement::CreateTable(CreateTable { name, clone, .. }) => { assert_eq!(ObjectName(vec![Ident::new("a")]), name); assert_eq!(Some(ObjectName(vec![(Ident::new("a_tmp"))])), clone) } @@ -3572,7 +3572,7 @@ fn parse_create_external_table() { STORED AS TEXTFILE LOCATION '/tmp/example.csv'", ); match ast { - Statement::CreateTable { + Statement::CreateTable(CreateTable { name, columns, constraints, @@ -3582,7 +3582,7 @@ fn parse_create_external_table() { file_format, location, .. - } => { + }) => { assert_eq!("uk_cities", name.to_string()); assert_eq!( columns, @@ -3643,7 +3643,7 @@ fn parse_create_or_replace_external_table() { STORED AS TEXTFILE LOCATION '/tmp/example.csv'", ); match ast { - Statement::CreateTable { + Statement::CreateTable(CreateTable { name, columns, constraints, @@ -3654,7 +3654,7 @@ fn parse_create_or_replace_external_table() { location, or_replace, .. - } => { + }) => { assert_eq!("uk_cities", name.to_string()); assert_eq!( columns, @@ -3700,7 +3700,7 @@ fn parse_create_external_table_lowercase() { lng DOUBLE) \ STORED AS PARQUET LOCATION '/tmp/example.csv'", ); - assert_matches!(ast, Statement::CreateTable { .. }); + assert_matches!(ast, Statement::CreateTable(CreateTable { .. })); } #[test] @@ -7210,14 +7210,14 @@ fn parse_create_index() { }, ]; match verified_stmt(sql) { - Statement::CreateIndex { + Statement::CreateIndex(CreateIndex { name: Some(name), table_name, columns, unique, if_not_exists, .. - } => { + }) => { assert_eq!("idx_name", name.to_string()); assert_eq!("test", table_name.to_string()); assert_eq!(indexed_columns, columns); @@ -7244,7 +7244,7 @@ fn test_create_index_with_using_function() { }, ]; match verified_stmt(sql) { - Statement::CreateIndex { + Statement::CreateIndex(CreateIndex { name: Some(name), table_name, using, @@ -7255,7 +7255,7 @@ fn test_create_index_with_using_function() { include, nulls_distinct: None, predicate: None, - } => { + }) => { assert_eq!("idx_name", name.to_string()); assert_eq!("test", table_name.to_string()); assert_eq!("btree", using.unwrap().to_string()); diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index 1e185915b7..3041b60012 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -470,7 +470,7 @@ fn parse_set_variables() { fn parse_create_table_auto_increment() { let sql = "CREATE TABLE foo (bar INT PRIMARY KEY AUTO_INCREMENT)"; match mysql().verified_stmt(sql) { - Statement::CreateTable { name, columns, .. } => { + Statement::CreateTable(CreateTable { name, columns, .. }) => { assert_eq!(name.to_string(), "foo"); assert_eq!( vec![ColumnDef { @@ -541,12 +541,12 @@ fn parse_create_table_primary_and_unique_key() { for (sql, index_type_display) in sqls.iter().zip(index_type_display) { match mysql().one_statement_parses_to(sql, "") { - Statement::CreateTable { + Statement::CreateTable(CreateTable { name, columns, constraints, .. - } => { + }) => { assert_eq!(name.to_string(), "foo"); let expected_constraint = table_constraint_unique_primary_ctor( @@ -609,9 +609,9 @@ fn parse_create_table_primary_and_unique_key_with_index_options() { for (sql, index_type_display) in sqls.iter().zip(index_type_display) { match mysql_and_generic().one_statement_parses_to(sql, "") { - Statement::CreateTable { + Statement::CreateTable(CreateTable { name, constraints, .. - } => { + }) => { assert_eq!(name.to_string(), "foo"); let expected_constraint = table_constraint_unique_primary_ctor( @@ -647,9 +647,9 @@ fn parse_create_table_primary_and_unique_key_with_index_type() { for (sql, index_type_display) in sqls.iter().zip(index_type_display) { match mysql_and_generic().one_statement_parses_to(sql, "") { - Statement::CreateTable { + Statement::CreateTable(CreateTable { name, constraints, .. - } => { + }) => { assert_eq!(name.to_string(), "foo"); let expected_constraint = table_constraint_unique_primary_ctor( @@ -690,7 +690,7 @@ fn parse_create_table_comment() { for sql in [canonical, with_equal] { match mysql().one_statement_parses_to(sql, canonical) { - Statement::CreateTable { name, comment, .. } => { + Statement::CreateTable(CreateTable { name, comment, .. }) => { assert_eq!(name.to_string(), "foo"); assert_eq!(comment.expect("Should exist").to_string(), "baz"); } @@ -708,11 +708,11 @@ fn parse_create_table_auto_increment_offset() { for sql in [canonical, with_equal] { match mysql().one_statement_parses_to(sql, canonical) { - Statement::CreateTable { + Statement::CreateTable(CreateTable { name, auto_increment_offset, .. - } => { + }) => { assert_eq!(name.to_string(), "foo"); assert_eq!( auto_increment_offset.expect("Should exist").to_string(), @@ -728,7 +728,7 @@ fn parse_create_table_auto_increment_offset() { fn parse_create_table_set_enum() { let sql = "CREATE TABLE foo (bar SET('a', 'b'), baz ENUM('a', 'b'))"; match mysql().verified_stmt(sql) { - Statement::CreateTable { name, columns, .. } => { + Statement::CreateTable(CreateTable { name, columns, .. }) => { assert_eq!(name.to_string(), "foo"); assert_eq!( vec![ @@ -756,13 +756,13 @@ fn parse_create_table_set_enum() { fn parse_create_table_engine_default_charset() { let sql = "CREATE TABLE foo (id INT(11)) ENGINE=InnoDB DEFAULT CHARSET=utf8mb3"; match mysql().verified_stmt(sql) { - Statement::CreateTable { + Statement::CreateTable(CreateTable { name, columns, engine, default_charset, .. - } => { + }) => { assert_eq!(name.to_string(), "foo"); assert_eq!( vec![ColumnDef { @@ -784,12 +784,12 @@ fn parse_create_table_engine_default_charset() { fn parse_create_table_collate() { let sql = "CREATE TABLE foo (id INT(11)) COLLATE=utf8mb4_0900_ai_ci"; match mysql().verified_stmt(sql) { - Statement::CreateTable { + Statement::CreateTable(CreateTable { name, columns, collation, .. - } => { + }) => { assert_eq!(name.to_string(), "foo"); assert_eq!( vec![ColumnDef { @@ -810,7 +810,7 @@ fn parse_create_table_collate() { fn parse_create_table_comment_character_set() { let sql = "CREATE TABLE foo (s TEXT CHARACTER SET utf8mb4 COMMENT 'comment')"; match mysql().verified_stmt(sql) { - Statement::CreateTable { name, columns, .. } => { + Statement::CreateTable(CreateTable { name, columns, .. }) => { assert_eq!(name.to_string(), "foo"); assert_eq!( vec![ColumnDef { @@ -857,7 +857,7 @@ fn parse_create_table_gencol() { fn parse_quote_identifiers() { let sql = "CREATE TABLE `PRIMARY` (`BEGIN` INT PRIMARY KEY)"; match mysql().verified_stmt(sql) { - Statement::CreateTable { name, columns, .. } => { + Statement::CreateTable(CreateTable { name, columns, .. }) => { assert_eq!(name.to_string(), "`PRIMARY`"); assert_eq!( vec![ColumnDef { @@ -1126,7 +1126,7 @@ fn check_roundtrip_of_escaped_string() { fn parse_create_table_with_minimum_display_width() { let sql = "CREATE TABLE foo (bar_tinyint TINYINT(3), bar_smallint SMALLINT(5), bar_mediumint MEDIUMINT(6), bar_int INT(11), bar_bigint BIGINT(20))"; match mysql().verified_stmt(sql) { - Statement::CreateTable { name, columns, .. } => { + Statement::CreateTable(CreateTable { name, columns, .. }) => { assert_eq!(name.to_string(), "foo"); assert_eq!( vec![ @@ -1172,7 +1172,7 @@ fn parse_create_table_with_minimum_display_width() { fn parse_create_table_unsigned() { let sql = "CREATE TABLE foo (bar_tinyint TINYINT(3) UNSIGNED, bar_smallint SMALLINT(5) UNSIGNED, bar_mediumint MEDIUMINT(13) UNSIGNED, bar_int INT(11) UNSIGNED, bar_bigint BIGINT(20) UNSIGNED)"; match mysql().verified_stmt(sql) { - Statement::CreateTable { name, columns, .. } => { + Statement::CreateTable(CreateTable { name, columns, .. }) => { assert_eq!(name.to_string(), "foo"); assert_eq!( vec![ @@ -2321,7 +2321,7 @@ fn parse_kill() { fn parse_table_colum_option_on_update() { let sql1 = "CREATE TABLE foo (`modification_time` DATETIME ON UPDATE CURRENT_TIMESTAMP())"; match mysql().verified_stmt(sql1) { - Statement::CreateTable { name, columns, .. } => { + Statement::CreateTable(CreateTable { name, columns, .. }) => { assert_eq!(name.to_string(), "foo"); assert_eq!( vec![ColumnDef { @@ -2622,7 +2622,7 @@ fn parse_create_table_with_column_collate() { let sql = "CREATE TABLE tb (id TEXT CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci)"; let canonical = "CREATE TABLE tb (id TEXT COLLATE utf8mb4_0900_ai_ci CHARACTER SET utf8mb4)"; match mysql().one_statement_parses_to(sql, canonical) { - Statement::CreateTable { name, columns, .. } => { + Statement::CreateTable(CreateTable { name, columns, .. }) => { assert_eq!(name.to_string(), "tb"); assert_eq!( vec![ColumnDef { diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 677246a51c..7118d650ee 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -317,7 +317,7 @@ fn parse_create_table_with_defaults() { active int NOT NULL ) WITH (fillfactor = 20, user_catalog_table = true, autovacuum_vacuum_threshold = 100)"; match pg_and_generic().one_statement_parses_to(sql, "") { - Statement::CreateTable { + Statement::CreateTable(CreateTable { name, columns, constraints, @@ -327,7 +327,7 @@ fn parse_create_table_with_defaults() { file_format: None, location: None, .. - } => { + }) => { use pretty_assertions::assert_eq; assert_eq!("public.customer", name.to_string()); assert_eq!( @@ -537,12 +537,12 @@ fn parse_create_table_constraints_only() { let sql = "CREATE TABLE t (CONSTRAINT positive CHECK (2 > 1))"; let ast = pg_and_generic().verified_stmt(sql); match ast { - Statement::CreateTable { + Statement::CreateTable(CreateTable { name, columns, constraints, .. - } => { + }) => { assert_eq!("t", name.to_string()); assert!(columns.is_empty()); assert_eq!( @@ -718,11 +718,11 @@ fn parse_create_table_if_not_exists() { let sql = "CREATE TABLE IF NOT EXISTS uk_cities ()"; let ast = pg_and_generic().verified_stmt(sql); match ast { - Statement::CreateTable { + Statement::CreateTable(CreateTable { name, if_not_exists: true, .. - } => { + }) => { assert_eq!("uk_cities", name.to_string()); } _ => unreachable!(), @@ -2086,7 +2086,7 @@ fn parse_array_multi_subscript() { fn parse_create_index() { let sql = "CREATE INDEX IF NOT EXISTS my_index ON my_table(col1,col2)"; match pg().verified_stmt(sql) { - Statement::CreateIndex { + Statement::CreateIndex(CreateIndex { name: Some(ObjectName(name)), table_name: ObjectName(table_name), using, @@ -2097,7 +2097,7 @@ fn parse_create_index() { nulls_distinct: None, include, predicate: None, - } => { + }) => { assert_eq_vec(&["my_index"], &name); assert_eq_vec(&["my_table"], &table_name); assert_eq!(None, using); @@ -2115,7 +2115,7 @@ fn parse_create_index() { fn parse_create_anonymous_index() { let sql = "CREATE INDEX ON my_table(col1,col2)"; match pg().verified_stmt(sql) { - Statement::CreateIndex { + Statement::CreateIndex(CreateIndex { name, table_name: ObjectName(table_name), using, @@ -2126,7 +2126,7 @@ fn parse_create_anonymous_index() { include, nulls_distinct: None, predicate: None, - } => { + }) => { assert_eq!(None, name); assert_eq_vec(&["my_table"], &table_name); assert_eq!(None, using); @@ -2144,7 +2144,7 @@ fn parse_create_anonymous_index() { fn parse_create_index_concurrently() { let sql = "CREATE INDEX CONCURRENTLY IF NOT EXISTS my_index ON my_table(col1,col2)"; match pg().verified_stmt(sql) { - Statement::CreateIndex { + Statement::CreateIndex(CreateIndex { name: Some(ObjectName(name)), table_name: ObjectName(table_name), using, @@ -2155,7 +2155,7 @@ fn parse_create_index_concurrently() { include, nulls_distinct: None, predicate: None, - } => { + }) => { assert_eq_vec(&["my_index"], &name); assert_eq_vec(&["my_table"], &table_name); assert_eq!(None, using); @@ -2173,7 +2173,7 @@ fn parse_create_index_concurrently() { fn parse_create_index_with_predicate() { let sql = "CREATE INDEX IF NOT EXISTS my_index ON my_table(col1,col2) WHERE col3 IS NULL"; match pg().verified_stmt(sql) { - Statement::CreateIndex { + Statement::CreateIndex(CreateIndex { name: Some(ObjectName(name)), table_name: ObjectName(table_name), using, @@ -2184,7 +2184,7 @@ fn parse_create_index_with_predicate() { include, nulls_distinct: None, predicate: Some(_), - } => { + }) => { assert_eq_vec(&["my_index"], &name); assert_eq_vec(&["my_table"], &table_name); assert_eq!(None, using); @@ -2202,7 +2202,7 @@ fn parse_create_index_with_predicate() { fn parse_create_index_with_include() { let sql = "CREATE INDEX IF NOT EXISTS my_index ON my_table(col1,col2) INCLUDE (col3)"; match pg().verified_stmt(sql) { - Statement::CreateIndex { + Statement::CreateIndex(CreateIndex { name: Some(ObjectName(name)), table_name: ObjectName(table_name), using, @@ -2213,7 +2213,7 @@ fn parse_create_index_with_include() { include, nulls_distinct: None, predicate: None, - } => { + }) => { assert_eq_vec(&["my_index"], &name); assert_eq_vec(&["my_table"], &table_name); assert_eq!(None, using); @@ -2231,7 +2231,7 @@ fn parse_create_index_with_include() { fn parse_create_index_with_nulls_distinct() { let sql = "CREATE INDEX IF NOT EXISTS my_index ON my_table(col1,col2) NULLS NOT DISTINCT"; match pg().verified_stmt(sql) { - Statement::CreateIndex { + Statement::CreateIndex(CreateIndex { name: Some(ObjectName(name)), table_name: ObjectName(table_name), using, @@ -2242,7 +2242,7 @@ fn parse_create_index_with_nulls_distinct() { include, nulls_distinct: Some(nulls_distinct), predicate: None, - } => { + }) => { assert_eq_vec(&["my_index"], &name); assert_eq_vec(&["my_table"], &table_name); assert_eq!(None, using); @@ -2258,7 +2258,7 @@ fn parse_create_index_with_nulls_distinct() { let sql = "CREATE INDEX IF NOT EXISTS my_index ON my_table(col1,col2) NULLS DISTINCT"; match pg().verified_stmt(sql) { - Statement::CreateIndex { + Statement::CreateIndex(CreateIndex { name: Some(ObjectName(name)), table_name: ObjectName(table_name), using, @@ -2269,7 +2269,7 @@ fn parse_create_index_with_nulls_distinct() { include, nulls_distinct: Some(nulls_distinct), predicate: None, - } => { + }) => { assert_eq_vec(&["my_index"], &name); assert_eq_vec(&["my_table"], &table_name); assert_eq!(None, using); @@ -3704,7 +3704,7 @@ fn parse_create_table_with_alias() { bool_col BOOL, );"; match pg_and_generic().one_statement_parses_to(sql, "") { - Statement::CreateTable { + Statement::CreateTable(CreateTable { name, columns, constraints, @@ -3714,7 +3714,7 @@ fn parse_create_table_with_alias() { file_format: None, location: None, .. - } => { + }) => { assert_eq!("public.datatype_aliases", name.to_string()); assert_eq!( columns, diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index d213efd7b3..a21e9d5d6b 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -33,7 +33,7 @@ use pretty_assertions::assert_eq; fn test_snowflake_create_table() { let sql = "CREATE TABLE _my_$table (am00unt number)"; match snowflake_and_generic().verified_stmt(sql) { - Statement::CreateTable { name, .. } => { + Statement::CreateTable(CreateTable { name, .. }) => { assert_eq!("_my_$table", name.to_string()); } _ => unreachable!(), @@ -44,9 +44,9 @@ fn test_snowflake_create_table() { fn test_snowflake_create_transient_table() { let sql = "CREATE TRANSIENT TABLE CUSTOMER (id INT, name VARCHAR(255))"; match snowflake_and_generic().verified_stmt(sql) { - Statement::CreateTable { + Statement::CreateTable(CreateTable { name, transient, .. - } => { + }) => { assert_eq!("CUSTOMER", name.to_string()); assert!(transient) } diff --git a/tests/sqlparser_sqlite.rs b/tests/sqlparser_sqlite.rs index fe5346f149..16ea9eb8cb 100644 --- a/tests/sqlparser_sqlite.rs +++ b/tests/sqlparser_sqlite.rs @@ -122,11 +122,11 @@ fn pragma_eq_placeholder_style() { fn parse_create_table_without_rowid() { let sql = "CREATE TABLE t (a INT) WITHOUT ROWID"; match sqlite_and_generic().verified_stmt(sql) { - Statement::CreateTable { + Statement::CreateTable(CreateTable { name, without_rowid: true, .. - } => { + }) => { assert_eq!("t", name.to_string()); } _ => unreachable!(), @@ -200,7 +200,7 @@ fn double_equality_operator() { fn parse_create_table_auto_increment() { let sql = "CREATE TABLE foo (bar INT PRIMARY KEY AUTOINCREMENT)"; match sqlite_and_generic().verified_stmt(sql) { - Statement::CreateTable { name, columns, .. } => { + Statement::CreateTable(CreateTable { name, columns, .. }) => { assert_eq!(name.to_string(), "foo"); assert_eq!( vec![ColumnDef { @@ -234,7 +234,7 @@ fn parse_create_table_auto_increment() { fn parse_create_sqlite_quote() { let sql = "CREATE TABLE `PRIMARY` (\"KEY\" INT, [INDEX] INT)"; match sqlite().verified_stmt(sql) { - Statement::CreateTable { name, columns, .. } => { + Statement::CreateTable(CreateTable { name, columns, .. }) => { assert_eq!(name.to_string(), "`PRIMARY`"); assert_eq!( vec![ @@ -295,7 +295,7 @@ fn test_placeholder() { #[test] fn parse_create_table_with_strict() { let sql = "CREATE TABLE Fruits (id TEXT NOT NULL PRIMARY KEY) STRICT"; - if let Statement::CreateTable { name, strict, .. } = sqlite().verified_stmt(sql) { + if let Statement::CreateTable(CreateTable { name, strict, .. }) = sqlite().verified_stmt(sql) { assert_eq!(name.to_string(), "Fruits"); assert!(strict); } From 6d4776b4825dbc62f975a78e51b4ff69d8f49d34 Mon Sep 17 00:00:00 2001 From: Mohamed Abdeen <83442793+MohamedAbdeen21@users.noreply.github.com> Date: Fri, 7 Jun 2024 13:44:04 +0300 Subject: [PATCH 13/53] Enhancing Trailing Comma Option (#1212) --- src/dialect/bigquery.rs | 4 ++ src/dialect/duckdb.rs | 4 ++ src/dialect/mod.rs | 8 ++++ src/dialect/snowflake.rs | 4 ++ src/parser/mod.rs | 28 ++++++++++--- tests/sqlparser_common.rs | 80 ++++++++++++++++++++++++++++++++++--- tests/sqlparser_postgres.rs | 2 +- 7 files changed, 118 insertions(+), 12 deletions(-) diff --git a/src/dialect/bigquery.rs b/src/dialect/bigquery.rs index d65de3a47a..d3673337f7 100644 --- a/src/dialect/bigquery.rs +++ b/src/dialect/bigquery.rs @@ -22,6 +22,10 @@ impl Dialect for BigQueryDialect { ch == '`' } + fn supports_projection_trailing_commas(&self) -> bool { + true + } + fn is_identifier_start(&self, ch: char) -> bool { ch.is_ascii_lowercase() || ch.is_ascii_uppercase() || ch == '_' } diff --git a/src/dialect/duckdb.rs b/src/dialect/duckdb.rs index e141f941f9..c6edeac141 100644 --- a/src/dialect/duckdb.rs +++ b/src/dialect/duckdb.rs @@ -18,6 +18,10 @@ pub struct DuckDbDialect; // In most cases the redshift dialect is identical to [`PostgresSqlDialect`]. impl Dialect for DuckDbDialect { + fn supports_trailing_commas(&self) -> bool { + true + } + fn is_identifier_start(&self, ch: char) -> bool { ch.is_alphabetic() || ch == '_' } diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index da5c8c5acc..e06c07a1c5 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -251,6 +251,14 @@ pub trait Dialect: Debug + Any { // return None to fall back to the default behavior None } + /// Does the dialect support trailing commas around the query? + fn supports_trailing_commas(&self) -> bool { + false + } + /// Does the dialect support trailing commas in the projection list? + fn supports_projection_trailing_commas(&self) -> bool { + self.supports_trailing_commas() + } /// Dialect-specific infix parser override fn parse_infix( &self, diff --git a/src/dialect/snowflake.rs b/src/dialect/snowflake.rs index 21bc535548..894b004381 100644 --- a/src/dialect/snowflake.rs +++ b/src/dialect/snowflake.rs @@ -38,6 +38,10 @@ impl Dialect for SnowflakeDialect { ch.is_ascii_lowercase() || ch.is_ascii_uppercase() || ch == '_' } + fn supports_projection_trailing_commas(&self) -> bool { + true + } + fn is_identifier_part(&self, ch: char) -> bool { ch.is_ascii_lowercase() || ch.is_ascii_uppercase() diff --git a/src/parser/mod.rs b/src/parser/mod.rs index a2468af3dd..e0a5b86ab1 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -305,7 +305,7 @@ impl<'a> Parser<'a> { state: ParserState::Normal, dialect, recursion_counter: RecursionCounter::new(DEFAULT_REMAINING_DEPTH), - options: ParserOptions::default(), + options: ParserOptions::new().with_trailing_commas(dialect.supports_trailing_commas()), } } @@ -3225,7 +3225,7 @@ impl<'a> Parser<'a> { // This pattern could be captured better with RAII type semantics, but it's quite a bit of // code to add for just one case, so we'll just do it manually here. let old_value = self.options.trailing_commas; - self.options.trailing_commas |= dialect_of!(self is BigQueryDialect | SnowflakeDialect); + self.options.trailing_commas |= self.dialect.supports_projection_trailing_commas(); let ret = self.parse_comma_separated(|p| p.parse_select_item()); self.options.trailing_commas = old_value; @@ -5413,12 +5413,17 @@ impl<'a> Parser<'a> { } else { return self.expected("column name or constraint definition", self.peek_token()); } + let comma = self.consume_token(&Token::Comma); - if self.consume_token(&Token::RParen) { - // allow a trailing comma, even though it's not in standard - break; - } else if !comma { + let rparen = self.peek_token().token == Token::RParen; + + if !comma && !rparen { return self.expected("',' or ')' after column definition", self.peek_token()); + }; + + if rparen && (!comma || self.options.trailing_commas) { + let _ = self.consume_token(&Token::RParen); + break; } } @@ -9411,6 +9416,9 @@ impl<'a> Parser<'a> { with_privileges_keyword: self.parse_keyword(Keyword::PRIVILEGES), } } else { + let old_value = self.options.trailing_commas; + self.options.trailing_commas = false; + let (actions, err): (Vec<_>, Vec<_>) = self .parse_comma_separated(Parser::parse_grant_permission)? .into_iter() @@ -9434,6 +9442,8 @@ impl<'a> Parser<'a> { }) .partition(Result::is_ok); + self.options.trailing_commas = old_value; + if !err.is_empty() { let errors: Vec = err.into_iter().filter_map(|x| x.err()).collect(); return Err(ParserError::ParserError(format!( @@ -9939,6 +9949,12 @@ impl<'a> Parser<'a> { Expr::Wildcard => Ok(SelectItem::Wildcard( self.parse_wildcard_additional_options()?, )), + Expr::Identifier(v) if v.value.to_lowercase() == "from" => { + parser_err!( + format!("Expected an expression, found: {}", v), + self.peek_token().location + ) + } expr => self .parse_optional_alias(keywords::RESERVED_FOR_COLUMN_ALIAS) .map(|alias| match alias { diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 580ae9867f..8fe7b862c4 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -3552,8 +3552,13 @@ fn parse_create_table_clone() { #[test] fn parse_create_table_trailing_comma() { - let sql = "CREATE TABLE foo (bar int,)"; - all_dialects().one_statement_parses_to(sql, "CREATE TABLE foo (bar INT)"); + let dialect = TestedDialects { + dialects: vec![Box::new(DuckDbDialect {})], + options: None, + }; + + let sql = "CREATE TABLE foo (bar int,);"; + dialect.one_statement_parses_to(sql, "CREATE TABLE foo (bar INT)"); } #[test] @@ -4418,7 +4423,7 @@ fn parse_window_clause() { ORDER BY C3"; verified_only_select(sql); - let sql = "SELECT from mytable WINDOW window1 AS window2"; + let sql = "SELECT * from mytable WINDOW window1 AS window2"; let dialects = all_dialects_except(|d| d.is::() || d.is::()); let res = dialects.parse_sql_statements(sql); assert_eq!( @@ -8846,9 +8851,11 @@ fn parse_non_latin_identifiers() { #[test] fn parse_trailing_comma() { + // At the moment, Duck DB is the only dialect that allows + // trailing commas anywhere in the query let trailing_commas = TestedDialects { - dialects: vec![Box::new(GenericDialect {})], - options: Some(ParserOptions::new().with_trailing_commas(true)), + dialects: vec![Box::new(DuckDbDialect {})], + options: None, }; trailing_commas.one_statement_parses_to( @@ -8866,11 +8873,74 @@ fn parse_trailing_comma() { "SELECT DISTINCT ON (album_id) name FROM track", ); + trailing_commas.one_statement_parses_to( + "CREATE TABLE employees (name text, age int,)", + "CREATE TABLE employees (name TEXT, age INT)", + ); + trailing_commas.verified_stmt("SELECT album_id, name FROM track"); trailing_commas.verified_stmt("SELECT * FROM track ORDER BY milliseconds"); trailing_commas.verified_stmt("SELECT DISTINCT ON (album_id) name FROM track"); + + // doesn't allow any trailing commas + let trailing_commas = TestedDialects { + dialects: vec![Box::new(GenericDialect {})], + options: None, + }; + + assert_eq!( + trailing_commas + .parse_sql_statements("SELECT name, age, from employees;") + .unwrap_err(), + ParserError::ParserError("Expected an expression, found: from".to_string()) + ); + + assert_eq!( + trailing_commas + .parse_sql_statements("CREATE TABLE employees (name text, age int,)") + .unwrap_err(), + ParserError::ParserError( + "Expected column name or constraint definition, found: )".to_string() + ) + ); +} + +#[test] +fn parse_projection_trailing_comma() { + // Some dialects allow trailing commas only in the projection + let trailing_commas = TestedDialects { + dialects: vec![Box::new(SnowflakeDialect {}), Box::new(BigQueryDialect {})], + options: None, + }; + + trailing_commas.one_statement_parses_to( + "SELECT album_id, name, FROM track", + "SELECT album_id, name FROM track", + ); + + trailing_commas.verified_stmt("SELECT album_id, name FROM track"); + + trailing_commas.verified_stmt("SELECT * FROM track ORDER BY milliseconds"); + + trailing_commas.verified_stmt("SELECT DISTINCT ON (album_id) name FROM track"); + + assert_eq!( + trailing_commas + .parse_sql_statements("SELECT * FROM track ORDER BY milliseconds,") + .unwrap_err(), + ParserError::ParserError("Expected an expression:, found: EOF".to_string()) + ); + + assert_eq!( + trailing_commas + .parse_sql_statements("CREATE TABLE employees (name text, age int,)") + .unwrap_err(), + ParserError::ParserError( + "Expected column name or constraint definition, found: )".to_string() + ), + ); } #[test] diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 7118d650ee..1df94b1005 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -3701,7 +3701,7 @@ fn parse_create_table_with_alias() { int2_col INT2, float8_col FLOAT8, float4_col FLOAT4, - bool_col BOOL, + bool_col BOOL );"; match pg_and_generic().one_statement_parses_to(sql, "") { Statement::CreateTable(CreateTable { From 2fb919d8b21129bd8faf62fc4aadeb9629ac5e10 Mon Sep 17 00:00:00 2001 From: Aleksei Piianin Date: Fri, 7 Jun 2024 13:09:42 +0200 Subject: [PATCH 14/53] ClickHouse data types (#1285) --- src/ast/data_type.rs | 191 ++++++++++++++++++++++++++++++++-- src/ast/mod.rs | 2 +- src/keywords.rs | 18 ++++ src/parser/mod.rs | 151 ++++++++++++++++++++++++--- tests/sqlparser_clickhouse.rs | 190 +++++++++++++++++++++++++++++++++ 5 files changed, 528 insertions(+), 24 deletions(-) diff --git a/src/ast/data_type.rs b/src/ast/data_type.rs index d71900bffe..7d0aec8fca 100644 --- a/src/ast/data_type.rs +++ b/src/ast/data_type.rs @@ -22,7 +22,7 @@ use sqlparser_derive::{Visit, VisitMut}; use crate::ast::{display_comma_separated, ObjectName, StructField}; -use super::value::escape_single_quote_string; +use super::{value::escape_single_quote_string, ColumnDef}; /// SQL data types #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] @@ -129,10 +129,39 @@ pub enum DataType { /// /// [postgresql]: https://www.postgresql.org/docs/15/datatype.html Int4(Option), - /// Integer type in [bigquery] + /// Int8 as alias for Bigint in [postgresql] and integer type in [clickhouse] + /// Note: Int8 mean 8 bytes in [postgresql] (not 8 bits) + /// Int8 with optional display width e.g. INT8 or INT8(11) + /// Note: Int8 mean 8 bits in [clickhouse] + /// + /// [postgresql]: https://www.postgresql.org/docs/15/datatype.html + /// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint + Int8(Option), + /// Integer type in [clickhouse] + /// Note: Int16 mean 16 bits in [clickhouse] + /// + /// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint + Int16, + /// Integer type in [clickhouse] + /// Note: Int16 mean 32 bits in [clickhouse] + /// + /// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint + Int32, + /// Integer type in [bigquery], [clickhouse] /// /// [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#integer_types + /// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint Int64, + /// Integer type in [clickhouse] + /// Note: Int128 mean 128 bits in [clickhouse] + /// + /// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint + Int128, + /// Integer type in [clickhouse] + /// Note: Int256 mean 256 bits in [clickhouse] + /// + /// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint + Int256, /// Integer with optional display width e.g. INTEGER or INTEGER(11) Integer(Option), /// Unsigned int with optional display width e.g. INT UNSIGNED or INT(11) UNSIGNED @@ -141,25 +170,54 @@ pub enum DataType { UnsignedInt4(Option), /// Unsigned integer with optional display width e.g. INTGER UNSIGNED or INTEGER(11) UNSIGNED UnsignedInteger(Option), + /// Unsigned integer type in [clickhouse] + /// Note: UInt8 mean 8 bits in [clickhouse] + /// + /// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint + UInt8, + /// Unsigned integer type in [clickhouse] + /// Note: UInt16 mean 16 bits in [clickhouse] + /// + /// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint + UInt16, + /// Unsigned integer type in [clickhouse] + /// Note: UInt32 mean 32 bits in [clickhouse] + /// + /// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint + UInt32, + /// Unsigned integer type in [clickhouse] + /// Note: UInt64 mean 64 bits in [clickhouse] + /// + /// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint + UInt64, + /// Unsigned integer type in [clickhouse] + /// Note: UInt128 mean 128 bits in [clickhouse] + /// + /// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint + UInt128, + /// Unsigned integer type in [clickhouse] + /// Note: UInt256 mean 256 bits in [clickhouse] + /// + /// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/int-uint + UInt256, /// Big integer with optional display width e.g. BIGINT or BIGINT(20) BigInt(Option), /// Unsigned big integer with optional display width e.g. BIGINT UNSIGNED or BIGINT(20) UNSIGNED UnsignedBigInt(Option), - /// Int8 as alias for Bigint in [postgresql] - /// Note: Int8 mean 8 bytes in postgres (not 8 bits) - /// Int8 with optional display width e.g. INT8 or INT8(11) - /// - /// [postgresql]: https://www.postgresql.org/docs/15/datatype.html - Int8(Option), /// Unsigned Int8 with optional display width e.g. INT8 UNSIGNED or INT8(11) UNSIGNED UnsignedInt8(Option), /// Float4 as alias for Real in [postgresql] /// /// [postgresql]: https://www.postgresql.org/docs/15/datatype.html Float4, + /// Floating point in [clickhouse] + /// + /// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/float + Float32, /// Floating point in [bigquery] /// /// [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#floating_point_types + /// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/float Float64, /// Floating point e.g. REAL Real, @@ -182,6 +240,10 @@ pub enum DataType { Boolean, /// Date Date, + /// Date32 with the same range as Datetime64 + /// + /// [1]: https://clickhouse.com/docs/en/sql-reference/data-types/date32 + Date32, /// Time with optional time precision and time zone information e.g. [standard][1]. /// /// [1]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#datetime-type @@ -190,6 +252,10 @@ pub enum DataType { /// /// [1]: https://dev.mysql.com/doc/refman/8.0/en/datetime.html Datetime(Option), + /// Datetime with time precision and optional timezone e.g. [ClickHouse][1]. + /// + /// [1]: https://clickhouse.com/docs/en/sql-reference/data-types/datetime64 + Datetime64(u64, Option), /// Timestamp with optional time precision and time zone information e.g. [standard][1]. /// /// [1]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#datetime-type @@ -206,12 +272,28 @@ pub enum DataType { Text, /// String with optional length. String(Option), + /// A fixed-length string e.g [ClickHouse][1]. + /// + /// [1]: https://clickhouse.com/docs/en/sql-reference/data-types/fixedstring + FixedString(u64), /// Bytea Bytea, /// Custom type such as enums Custom(ObjectName, Vec), /// Arrays Array(ArrayElemTypeDef), + /// Map + /// + /// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/map + Map(Box, Box), + /// Tuple + /// + /// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/tuple + Tuple(Vec), + /// Nested + /// + /// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/nested-data-structures/nested + Nested(Vec), /// Enums Enum(Vec), /// Set @@ -221,6 +303,14 @@ pub enum DataType { /// [hive]: https://docs.cloudera.com/cdw-runtime/cloud/impala-sql-reference/topics/impala-struct.html /// [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#struct_type Struct(Vec), + /// Nullable - special marker NULL represents in ClickHouse as a data type. + /// + /// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/nullable + Nullable(Box), + /// LowCardinality - changes the internal representation of other data types to be dictionary-encoded. + /// + /// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/lowcardinality + LowCardinality(Box), /// No type specified - only used with /// [`SQLiteDialect`](crate::dialect::SQLiteDialect), from statements such /// as `CREATE TABLE t1 (a)`. @@ -296,9 +386,24 @@ impl fmt::Display for DataType { DataType::Int4(zerofill) => { format_type_with_optional_length(f, "INT4", zerofill, false) } + DataType::Int8(zerofill) => { + format_type_with_optional_length(f, "INT8", zerofill, false) + } + DataType::Int16 => { + write!(f, "Int16") + } + DataType::Int32 => { + write!(f, "Int32") + } DataType::Int64 => { write!(f, "INT64") } + DataType::Int128 => { + write!(f, "Int128") + } + DataType::Int256 => { + write!(f, "Int256") + } DataType::UnsignedInt4(zerofill) => { format_type_with_optional_length(f, "INT4", zerofill, true) } @@ -314,14 +419,30 @@ impl fmt::Display for DataType { DataType::UnsignedBigInt(zerofill) => { format_type_with_optional_length(f, "BIGINT", zerofill, true) } - DataType::Int8(zerofill) => { - format_type_with_optional_length(f, "INT8", zerofill, false) - } DataType::UnsignedInt8(zerofill) => { format_type_with_optional_length(f, "INT8", zerofill, true) } + DataType::UInt8 => { + write!(f, "UInt8") + } + DataType::UInt16 => { + write!(f, "UInt16") + } + DataType::UInt32 => { + write!(f, "UInt32") + } + DataType::UInt64 => { + write!(f, "UInt64") + } + DataType::UInt128 => { + write!(f, "UInt128") + } + DataType::UInt256 => { + write!(f, "UInt256") + } DataType::Real => write!(f, "REAL"), DataType::Float4 => write!(f, "FLOAT4"), + DataType::Float32 => write!(f, "Float32"), DataType::Float64 => write!(f, "FLOAT64"), DataType::Double => write!(f, "DOUBLE"), DataType::Float8 => write!(f, "FLOAT8"), @@ -329,6 +450,7 @@ impl fmt::Display for DataType { DataType::Bool => write!(f, "BOOL"), DataType::Boolean => write!(f, "BOOLEAN"), DataType::Date => write!(f, "DATE"), + DataType::Date32 => write!(f, "Date32"), DataType::Time(precision, timezone_info) => { format_datetime_precision_and_tz(f, "TIME", precision, timezone_info) } @@ -338,6 +460,14 @@ impl fmt::Display for DataType { DataType::Timestamp(precision, timezone_info) => { format_datetime_precision_and_tz(f, "TIMESTAMP", precision, timezone_info) } + DataType::Datetime64(precision, timezone) => { + format_clickhouse_datetime_precision_and_timezone( + f, + "DateTime64", + precision, + timezone, + ) + } DataType::Interval => write!(f, "INTERVAL"), DataType::JSON => write!(f, "JSON"), DataType::JSONB => write!(f, "JSONB"), @@ -350,6 +480,7 @@ impl fmt::Display for DataType { ArrayElemTypeDef::SquareBracket(t, None) => write!(f, "{t}[]"), ArrayElemTypeDef::SquareBracket(t, Some(size)) => write!(f, "{t}[{size}]"), ArrayElemTypeDef::AngleBracket(t) => write!(f, "ARRAY<{t}>"), + ArrayElemTypeDef::Parenthesis(t) => write!(f, "Array({t})"), }, DataType::Custom(ty, modifiers) => { if modifiers.is_empty() { @@ -385,6 +516,25 @@ impl fmt::Display for DataType { write!(f, "STRUCT") } } + // ClickHouse + DataType::Nullable(data_type) => { + write!(f, "Nullable({})", data_type) + } + DataType::FixedString(character_length) => { + write!(f, "FixedString({})", character_length) + } + DataType::LowCardinality(data_type) => { + write!(f, "LowCardinality({})", data_type) + } + DataType::Map(key_data_type, value_data_type) => { + write!(f, "Map({}, {})", key_data_type, value_data_type) + } + DataType::Tuple(fields) => { + write!(f, "Tuple({})", display_comma_separated(fields)) + } + DataType::Nested(fields) => { + write!(f, "Nested({})", display_comma_separated(fields)) + } DataType::Unspecified => Ok(()), } } @@ -439,6 +589,23 @@ fn format_datetime_precision_and_tz( Ok(()) } +fn format_clickhouse_datetime_precision_and_timezone( + f: &mut fmt::Formatter, + sql_type: &'static str, + len: &u64, + time_zone: &Option, +) -> fmt::Result { + write!(f, "{sql_type}({len}")?; + + if let Some(time_zone) = time_zone { + write!(f, ", '{time_zone}'")?; + } + + write!(f, ")")?; + + Ok(()) +} + /// Timestamp and Time data types information about TimeZone formatting. /// /// This is more related to a display information than real differences between each variant. To @@ -593,4 +760,6 @@ pub enum ArrayElemTypeDef { AngleBracket(Box), /// `INT[]` or `INT[2]` SquareBracket(Box, Option), + /// `Array(Int64)` + Parenthesis(Box), } diff --git a/src/ast/mod.rs b/src/ast/mod.rs index e29a8df049..0a0f8dd66f 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -273,7 +273,7 @@ impl fmt::Display for Interval { } } -/// A field definition within a struct. +/// A field definition within a struct /// /// [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#struct_type #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] diff --git a/src/keywords.rs b/src/keywords.rs index 6c6c642c36..1b204a8d5a 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -202,7 +202,9 @@ define_keywords!( DATA, DATABASE, DATE, + DATE32, DATETIME, + DATETIME64, DAY, DAYOFWEEK, DAYOFYEAR, @@ -292,7 +294,9 @@ define_keywords!( FILTER, FIRST, FIRST_VALUE, + FIXEDSTRING, FLOAT, + FLOAT32, FLOAT4, FLOAT64, FLOAT8, @@ -362,7 +366,11 @@ define_keywords!( INSERT, INSTALL, INT, + INT128, + INT16, INT2, + INT256, + INT32, INT4, INT64, INT8, @@ -411,6 +419,7 @@ define_keywords!( LOCKED, LOGIN, LOGS, + LOWCARDINALITY, LOWER, LOW_PRIORITY, MACRO, @@ -455,6 +464,7 @@ define_keywords!( NATURAL, NCHAR, NCLOB, + NESTED, NEW, NEXT, NO, @@ -475,6 +485,7 @@ define_keywords!( NTH_VALUE, NTILE, NULL, + NULLABLE, NULLIF, NULLS, NUMERIC, @@ -713,8 +724,15 @@ define_keywords!( TRUE, TRUNCATE, TRY_CAST, + TUPLE, TYPE, UESCAPE, + UINT128, + UINT16, + UINT256, + UINT32, + UINT64, + UINT8, UNBOUNDED, UNCACHE, UNCOMMITTED, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index e0a5b86ab1..c0a00c9fec 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -2099,7 +2099,7 @@ impl<'a> Parser<'a> { /// ``` fn parse_bigquery_struct_literal(&mut self) -> Result { let (fields, trailing_bracket) = - self.parse_struct_type_def(Self::parse_big_query_struct_field_def)?; + self.parse_struct_type_def(Self::parse_struct_field_def)?; if trailing_bracket.0 { return parser_err!("unmatched > in STRUCT literal", self.peek_token().location); } @@ -2194,13 +2194,16 @@ impl<'a> Parser<'a> { )) } - /// Parse a field definition in a BigQuery struct. + /// Parse a field definition in a struct [1] or tuple [2]. /// Syntax: /// /// ```sql /// [field_name] field_type /// ``` - fn parse_big_query_struct_field_def( + /// + /// [1]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#declaring_a_struct_type + /// [2]: https://clickhouse.com/docs/en/sql-reference/data-types/tuple + fn parse_struct_field_def( &mut self, ) -> Result<(StructField, MatchedTrailingBracket), ParserError> { // Look beyond the next item to infer whether both field name @@ -2266,6 +2269,47 @@ impl<'a> Parser<'a> { }) } + /// Parse clickhouse map [1] + /// Syntax + /// ```sql + /// Map(key_data_type, value_data_type) + /// ``` + /// + /// [1]: https://clickhouse.com/docs/en/sql-reference/data-types/map + fn parse_click_house_map_def(&mut self) -> Result<(DataType, DataType), ParserError> { + self.expect_keyword(Keyword::MAP)?; + self.expect_token(&Token::LParen)?; + let key_data_type = self.parse_data_type()?; + self.expect_token(&Token::Comma)?; + let value_data_type = self.parse_data_type()?; + self.expect_token(&Token::RParen)?; + + Ok((key_data_type, value_data_type)) + } + + /// Parse clickhouse tuple [1] + /// Syntax + /// ```sql + /// Tuple([field_name] field_type, ...) + /// ``` + /// + /// [1]: https://clickhouse.com/docs/en/sql-reference/data-types/tuple + fn parse_click_house_tuple_def(&mut self) -> Result, ParserError> { + self.expect_keyword(Keyword::TUPLE)?; + self.expect_token(&Token::LParen)?; + let mut field_defs = vec![]; + loop { + let (def, _) = self.parse_struct_field_def()?; + field_defs.push(def); + if !self.consume_token(&Token::Comma) { + break; + } + } + self.expect_token(&Token::RParen)?; + + Ok(field_defs) + } + /// For nested types that use the angle bracket syntax, this matches either /// `>`, `>>` or nothing depending on which variant is expected (specified by the previously /// matched `trailing_bracket` argument). It returns whether there is a trailing @@ -6820,6 +6864,7 @@ impl<'a> Parser<'a> { Keyword::FLOAT => Ok(DataType::Float(self.parse_optional_precision()?)), Keyword::REAL => Ok(DataType::Real), Keyword::FLOAT4 => Ok(DataType::Float4), + Keyword::FLOAT32 => Ok(DataType::Float32), Keyword::FLOAT64 => Ok(DataType::Float64), Keyword::FLOAT8 => Ok(DataType::Float8), Keyword::DOUBLE => { @@ -6877,7 +6922,19 @@ impl<'a> Parser<'a> { Ok(DataType::Int4(optional_precision?)) } } + Keyword::INT8 => { + let optional_precision = self.parse_optional_precision(); + if self.parse_keyword(Keyword::UNSIGNED) { + Ok(DataType::UnsignedInt8(optional_precision?)) + } else { + Ok(DataType::Int8(optional_precision?)) + } + } + Keyword::INT16 => Ok(DataType::Int16), + Keyword::INT32 => Ok(DataType::Int32), Keyword::INT64 => Ok(DataType::Int64), + Keyword::INT128 => Ok(DataType::Int128), + Keyword::INT256 => Ok(DataType::Int256), Keyword::INTEGER => { let optional_precision = self.parse_optional_precision(); if self.parse_keyword(Keyword::UNSIGNED) { @@ -6894,14 +6951,12 @@ impl<'a> Parser<'a> { Ok(DataType::BigInt(optional_precision?)) } } - Keyword::INT8 => { - let optional_precision = self.parse_optional_precision(); - if self.parse_keyword(Keyword::UNSIGNED) { - Ok(DataType::UnsignedInt8(optional_precision?)) - } else { - Ok(DataType::Int8(optional_precision?)) - } - } + Keyword::UINT8 => Ok(DataType::UInt8), + Keyword::UINT16 => Ok(DataType::UInt16), + Keyword::UINT32 => Ok(DataType::UInt32), + Keyword::UINT64 => Ok(DataType::UInt64), + Keyword::UINT128 => Ok(DataType::UInt128), + Keyword::UINT256 => Ok(DataType::UInt256), Keyword::VARCHAR => Ok(DataType::Varchar(self.parse_optional_character_length()?)), Keyword::NVARCHAR => { Ok(DataType::Nvarchar(self.parse_optional_character_length()?)) @@ -6937,7 +6992,13 @@ impl<'a> Parser<'a> { Keyword::BYTES => Ok(DataType::Bytes(self.parse_optional_precision()?)), Keyword::UUID => Ok(DataType::Uuid), Keyword::DATE => Ok(DataType::Date), + Keyword::DATE32 => Ok(DataType::Date32), Keyword::DATETIME => Ok(DataType::Datetime(self.parse_optional_precision()?)), + Keyword::DATETIME64 => { + self.prev_token(); + let (precision, time_zone) = self.parse_datetime_64()?; + Ok(DataType::Datetime64(precision, time_zone)) + } Keyword::TIMESTAMP => { let precision = self.parse_optional_precision()?; let tz = if self.parse_keyword(Keyword::WITH) { @@ -6980,6 +7041,12 @@ impl<'a> Parser<'a> { Keyword::JSONB => Ok(DataType::JSONB), Keyword::REGCLASS => Ok(DataType::Regclass), Keyword::STRING => Ok(DataType::String(self.parse_optional_precision()?)), + Keyword::FIXEDSTRING => { + self.expect_token(&Token::LParen)?; + let character_length = self.parse_literal_uint()?; + self.expect_token(&Token::RParen)?; + Ok(DataType::FixedString(character_length)) + } Keyword::TEXT => Ok(DataType::Text), Keyword::BYTEA => Ok(DataType::Bytea), Keyword::NUMERIC => Ok(DataType::Numeric( @@ -7002,6 +7069,10 @@ impl<'a> Parser<'a> { Keyword::ARRAY => { if dialect_of!(self is SnowflakeDialect) { Ok(DataType::Array(ArrayElemTypeDef::None)) + } else if dialect_of!(self is ClickHouseDialect) { + Ok(self.parse_sub_type(|internal_type| { + DataType::Array(ArrayElemTypeDef::Parenthesis(internal_type)) + })?) } else { self.expect_token(&Token::Lt)?; let (inside_type, _trailing_bracket) = self.parse_data_type_helper()?; @@ -7014,10 +7085,35 @@ impl<'a> Parser<'a> { Keyword::STRUCT if dialect_of!(self is BigQueryDialect | GenericDialect) => { self.prev_token(); let (field_defs, _trailing_bracket) = - self.parse_struct_type_def(Self::parse_big_query_struct_field_def)?; + self.parse_struct_type_def(Self::parse_struct_field_def)?; trailing_bracket = _trailing_bracket; Ok(DataType::Struct(field_defs)) } + Keyword::NULLABLE if dialect_of!(self is ClickHouseDialect | GenericDialect) => { + Ok(self.parse_sub_type(DataType::Nullable)?) + } + Keyword::LOWCARDINALITY if dialect_of!(self is ClickHouseDialect | GenericDialect) => { + Ok(self.parse_sub_type(DataType::LowCardinality)?) + } + Keyword::MAP if dialect_of!(self is ClickHouseDialect | GenericDialect) => { + self.prev_token(); + let (key_data_type, value_data_type) = self.parse_click_house_map_def()?; + Ok(DataType::Map( + Box::new(key_data_type), + Box::new(value_data_type), + )) + } + Keyword::NESTED if dialect_of!(self is ClickHouseDialect | GenericDialect) => { + self.expect_token(&Token::LParen)?; + let field_defs = self.parse_comma_separated(Parser::parse_column_def)?; + self.expect_token(&Token::RParen)?; + Ok(DataType::Nested(field_defs)) + } + Keyword::TUPLE if dialect_of!(self is ClickHouseDialect | GenericDialect) => { + self.prev_token(); + let field_defs = self.parse_click_house_tuple_def()?; + Ok(DataType::Tuple(field_defs)) + } _ => { self.prev_token(); let type_name = self.parse_object_name(false)?; @@ -7416,6 +7512,26 @@ impl<'a> Parser<'a> { } } + /// Parse datetime64 [1] + /// Syntax + /// ```sql + /// DateTime64(precision[, timezone]) + /// ``` + /// + /// [1]: https://clickhouse.com/docs/en/sql-reference/data-types/datetime64 + pub fn parse_datetime_64(&mut self) -> Result<(u64, Option), ParserError> { + self.expect_keyword(Keyword::DATETIME64)?; + self.expect_token(&Token::LParen)?; + let precision = self.parse_literal_uint()?; + let time_zone = if self.consume_token(&Token::Comma) { + Some(self.parse_literal_string()?) + } else { + None + }; + self.expect_token(&Token::RParen)?; + Ok((precision, time_zone)) + } + pub fn parse_optional_character_length( &mut self, ) -> Result, ParserError> { @@ -7508,6 +7624,17 @@ impl<'a> Parser<'a> { } } + /// Parse a parenthesized sub data type + fn parse_sub_type(&mut self, parent_type: F) -> Result + where + F: FnOnce(Box) -> DataType, + { + self.expect_token(&Token::LParen)?; + let inside_type = self.parse_data_type()?; + self.expect_token(&Token::RParen)?; + Ok(parent_type(inside_type.into())) + } + pub fn parse_delete(&mut self) -> Result { let (tables, with_from_keyword) = if !self.parse_keyword(Keyword::FROM) { // `FROM` keyword is optional in BigQuery SQL. diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index a693936bc7..20c3d0569d 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -220,6 +220,196 @@ fn parse_create_table() { ); } +fn column_def(name: Ident, data_type: DataType) -> ColumnDef { + ColumnDef { + name, + data_type, + collation: None, + options: vec![], + } +} + +#[test] +fn parse_clickhouse_data_types() { + let sql = concat!( + "CREATE TABLE table (", + "a1 UInt8, a2 UInt16, a3 UInt32, a4 UInt64, a5 UInt128, a6 UInt256,", + " b1 Int8, b2 Int16, b3 Int32, b4 Int64, b5 Int128, b6 Int256,", + " c1 Float32, c2 Float64,", + " d1 Date32, d2 DateTime64(3), d3 DateTime64(3, 'UTC'),", + " e1 FixedString(255),", + " f1 LowCardinality(Int32)", + ") ORDER BY (a1)", + ); + // ClickHouse has a case-sensitive definition of data type, but canonical representation is not + let canonical_sql = sql + .replace(" Int8", " INT8") + .replace(" Int64", " INT64") + .replace(" Float64", " FLOAT64"); + + match clickhouse_and_generic().one_statement_parses_to(sql, &canonical_sql) { + Statement::CreateTable { name, columns, .. } => { + assert_eq!(name, ObjectName(vec!["table".into()])); + assert_eq!( + columns, + vec![ + column_def("a1".into(), DataType::UInt8), + column_def("a2".into(), DataType::UInt16), + column_def("a3".into(), DataType::UInt32), + column_def("a4".into(), DataType::UInt64), + column_def("a5".into(), DataType::UInt128), + column_def("a6".into(), DataType::UInt256), + column_def("b1".into(), DataType::Int8(None)), + column_def("b2".into(), DataType::Int16), + column_def("b3".into(), DataType::Int32), + column_def("b4".into(), DataType::Int64), + column_def("b5".into(), DataType::Int128), + column_def("b6".into(), DataType::Int256), + column_def("c1".into(), DataType::Float32), + column_def("c2".into(), DataType::Float64), + column_def("d1".into(), DataType::Date32), + column_def("d2".into(), DataType::Datetime64(3, None)), + column_def("d3".into(), DataType::Datetime64(3, Some("UTC".into()))), + column_def("e1".into(), DataType::FixedString(255)), + column_def( + "f1".into(), + DataType::LowCardinality(Box::new(DataType::Int32)) + ), + ] + ); + } + _ => unreachable!(), + } +} + +#[test] +fn parse_create_table_with_nullable() { + let sql = r#"CREATE TABLE table (k UInt8, `a` Nullable(String), `b` Nullable(DateTime64(9, 'UTC')), c Nullable(DateTime64(9)), d Date32 NULL) ENGINE=MergeTree ORDER BY (`k`)"#; + // ClickHouse has a case-sensitive definition of data type, but canonical representation is not + let canonical_sql = sql.replace("String", "STRING"); + + match clickhouse_and_generic().one_statement_parses_to(sql, &canonical_sql) { + Statement::CreateTable { name, columns, .. } => { + assert_eq!(name, ObjectName(vec!["table".into()])); + assert_eq!( + columns, + vec![ + column_def("k".into(), DataType::UInt8), + column_def( + Ident::with_quote('`', "a"), + DataType::Nullable(Box::new(DataType::String(None))) + ), + column_def( + Ident::with_quote('`', "b"), + DataType::Nullable(Box::new(DataType::Datetime64( + 9, + Some("UTC".to_string()) + ))) + ), + column_def( + "c".into(), + DataType::Nullable(Box::new(DataType::Datetime64(9, None))) + ), + ColumnDef { + name: "d".into(), + data_type: DataType::Date32, + collation: None, + options: vec![ColumnOptionDef { + name: None, + option: ColumnOption::Null + }], + } + ] + ); + } + _ => unreachable!(), + } +} + +#[test] +fn parse_create_table_with_nested_data_types() { + let sql = concat!( + "CREATE TABLE table (", + " i Nested(a Array(Int16), b LowCardinality(String)),", + " k Array(Tuple(FixedString(128), Int128)),", + " l Tuple(a DateTime64(9), b Array(UUID)),", + " m Map(String, UInt16)", + ") ENGINE=MergeTree ORDER BY (k)" + ); + + match clickhouse().one_statement_parses_to(sql, "") { + Statement::CreateTable { name, columns, .. } => { + assert_eq!(name, ObjectName(vec!["table".into()])); + assert_eq!( + columns, + vec![ + ColumnDef { + name: Ident::new("i"), + data_type: DataType::Nested(vec![ + column_def( + "a".into(), + DataType::Array(ArrayElemTypeDef::Parenthesis(Box::new( + DataType::Int16 + ),)) + ), + column_def( + "b".into(), + DataType::LowCardinality(Box::new(DataType::String(None))) + ) + ]), + collation: None, + options: vec![], + }, + ColumnDef { + name: Ident::new("k"), + data_type: DataType::Array(ArrayElemTypeDef::Parenthesis(Box::new( + DataType::Tuple(vec![ + StructField { + field_name: None, + field_type: DataType::FixedString(128) + }, + StructField { + field_name: None, + field_type: DataType::Int128 + } + ]) + ))), + collation: None, + options: vec![], + }, + ColumnDef { + name: Ident::new("l"), + data_type: DataType::Tuple(vec![ + StructField { + field_name: Some("a".into()), + field_type: DataType::Datetime64(9, None), + }, + StructField { + field_name: Some("b".into()), + field_type: DataType::Array(ArrayElemTypeDef::Parenthesis( + Box::new(DataType::Uuid) + )) + }, + ]), + collation: None, + options: vec![], + }, + ColumnDef { + name: Ident::new("m"), + data_type: DataType::Map( + Box::new(DataType::String(None)), + Box::new(DataType::UInt16) + ), + collation: None, + options: vec![], + }, + ] + ); + } + _ => unreachable!(), + } +} + #[test] fn parse_create_view_with_fields_data_types() { match clickhouse().verified_stmt(r#"CREATE VIEW v (i "int", f "String") AS SELECT * FROM t"#) { From 4b60866bc7ae0c2cd44c4d35ca6cd2e625c1cd79 Mon Sep 17 00:00:00 2001 From: Ophir LOJKINE Date: Fri, 7 Jun 2024 13:12:18 +0200 Subject: [PATCH 15/53] add support for custom operators in postgres (#1302) Co-authored-by: Joey Hain --- src/ast/operator.rs | 2 +- src/dialect/mod.rs | 6 +++ src/dialect/postgresql.rs | 27 ++++++++++ src/parser/mod.rs | 9 ++-- src/tokenizer.rs | 103 +++++++++++++++++++++++++----------- tests/sqlparser_mssql.rs | 6 +++ tests/sqlparser_postgres.rs | 96 +++++++++++++++++++++++++++++---- 7 files changed, 203 insertions(+), 46 deletions(-) diff --git a/src/ast/operator.rs b/src/ast/operator.rs index 3c4f192e34..e70df344a3 100644 --- a/src/ast/operator.rs +++ b/src/ast/operator.rs @@ -111,7 +111,7 @@ pub enum BinaryOperator { DuckIntegerDivide, /// MySQL [`DIV`](https://dev.mysql.com/doc/refman/8.0/en/arithmetic-functions.html) integer division MyIntegerDivide, - /// Support for custom operators (built by parsers outside this crate) + /// Support for custom operators (such as Postgres custom operators) Custom(String), /// Bitwise XOR, e.g. `a # b` (PostgreSQL-specific) PGBitwiseXor, diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index e06c07a1c5..b223ead479 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -122,6 +122,12 @@ pub trait Dialect: Debug + Any { fn is_identifier_start(&self, ch: char) -> bool; /// Determine if a character is a valid unquoted identifier character fn is_identifier_part(&self, ch: char) -> bool; + + /// Most dialects do not have custom operators. Override this method to provide custom operators. + fn is_custom_operator_part(&self, _ch: char) -> bool { + false + } + /// Determine if the dialect supports escaping characters via '\' in string literals. /// /// Some dialects like BigQuery and Snowflake support this while others like diff --git a/src/dialect/postgresql.rs b/src/dialect/postgresql.rs index f179111e0a..0e04bfa273 100644 --- a/src/dialect/postgresql.rs +++ b/src/dialect/postgresql.rs @@ -25,6 +25,10 @@ impl Dialect for PostgreSqlDialect { Some('"') } + fn is_delimited_identifier_start(&self, ch: char) -> bool { + ch == '"' // Postgres does not support backticks to quote identifiers + } + fn is_identifier_start(&self, ch: char) -> bool { // See https://www.postgresql.org/docs/11/sql-syntax-lexical.html#SQL-SYNTAX-IDENTIFIERS // We don't yet support identifiers beginning with "letters with @@ -36,6 +40,29 @@ impl Dialect for PostgreSqlDialect { ch.is_alphabetic() || ch.is_ascii_digit() || ch == '$' || ch == '_' } + /// See + fn is_custom_operator_part(&self, ch: char) -> bool { + matches!( + ch, + '+' | '-' + | '*' + | '/' + | '<' + | '>' + | '=' + | '~' + | '!' + | '@' + | '#' + | '%' + | '^' + | '&' + | '|' + | '`' + | '?' + ) + } + fn parse_statement(&self, parser: &mut Parser) -> Option> { if parser.parse_keyword(Keyword::COMMENT) { Some(parse_comment(parser)) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index c0a00c9fec..7aaef555e0 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -2344,9 +2344,8 @@ impl<'a> Parser<'a> { return infix; } - let tok = self.next_token(); - - let regular_binary_operator = match &tok.token { + let mut tok = self.next_token(); + let regular_binary_operator = match &mut tok.token { Token::Spaceship => Some(BinaryOperator::Spaceship), Token::DoubleEq => Some(BinaryOperator::Eq), Token::Eq => Some(BinaryOperator::Eq), @@ -2410,6 +2409,7 @@ impl<'a> Parser<'a> { Token::Question => Some(BinaryOperator::Question), Token::QuestionAnd => Some(BinaryOperator::QuestionAnd), Token::QuestionPipe => Some(BinaryOperator::QuestionPipe), + Token::CustomBinaryOperator(s) => Some(BinaryOperator::Custom(core::mem::take(s))), Token::Word(w) => match w.keyword { Keyword::AND => Some(BinaryOperator::And), @@ -2964,7 +2964,8 @@ impl<'a> Parser<'a> { | Token::AtAt | Token::Question | Token::QuestionAnd - | Token::QuestionPipe => Ok(Self::PG_OTHER_PREC), + | Token::QuestionPipe + | Token::CustomBinaryOperator(_) => Ok(Self::PG_OTHER_PREC), _ => Ok(0), } } diff --git a/src/tokenizer.rs b/src/tokenizer.rs index b6fed354d1..bcc5478bc7 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -231,6 +231,10 @@ pub enum Token { /// jsonb ?| text[] -> boolean: Check whether any member of the text array exists as top-level /// keys within the jsonb object QuestionPipe, + /// Custom binary operator + /// This is used to represent any custom binary operator that is not part of the SQL standard. + /// PostgreSQL allows defining custom binary operators using CREATE OPERATOR. + CustomBinaryOperator(String), } impl fmt::Display for Token { @@ -320,6 +324,7 @@ impl fmt::Display for Token { Token::Question => write!(f, "?"), Token::QuestionAnd => write!(f, "?&"), Token::QuestionPipe => write!(f, "?|"), + Token::CustomBinaryOperator(s) => f.write_str(s), } } } @@ -961,15 +966,12 @@ impl<'a> Tokenizer<'a> { Some('>') => { chars.next(); match chars.peek() { - Some('>') => { - chars.next(); - Ok(Some(Token::LongArrow)) - } - _ => Ok(Some(Token::Arrow)), + Some('>') => self.consume_for_binop(chars, "->>", Token::LongArrow), + _ => self.start_binop(chars, "->", Token::Arrow), } } // a regular '-' operator - _ => Ok(Some(Token::Minus)), + _ => self.start_binop(chars, "-", Token::Minus), } } '/' => { @@ -999,26 +1001,28 @@ impl<'a> Tokenizer<'a> { '%' => { chars.next(); // advance past '%' match chars.peek() { - Some(' ') => Ok(Some(Token::Mod)), + Some(s) if s.is_whitespace() => Ok(Some(Token::Mod)), Some(sch) if self.dialect.is_identifier_start('%') => { self.tokenize_identifier_or_keyword([ch, *sch], chars) } - _ => Ok(Some(Token::Mod)), + _ => self.start_binop(chars, "%", Token::Mod), } } '|' => { chars.next(); // consume the '|' match chars.peek() { - Some('/') => self.consume_and_return(chars, Token::PGSquareRoot), + Some('/') => self.consume_for_binop(chars, "|/", Token::PGSquareRoot), Some('|') => { chars.next(); // consume the second '|' match chars.peek() { - Some('/') => self.consume_and_return(chars, Token::PGCubeRoot), - _ => Ok(Some(Token::StringConcat)), + Some('/') => { + self.consume_for_binop(chars, "||/", Token::PGCubeRoot) + } + _ => self.start_binop(chars, "||", Token::StringConcat), } } // Bitshift '|' operator - _ => Ok(Some(Token::Pipe)), + _ => self.start_binop(chars, "|", Token::Pipe), } } '=' => { @@ -1061,22 +1065,22 @@ impl<'a> Tokenizer<'a> { Some('=') => { chars.next(); match chars.peek() { - Some('>') => self.consume_and_return(chars, Token::Spaceship), - _ => Ok(Some(Token::LtEq)), + Some('>') => self.consume_for_binop(chars, "<=>", Token::Spaceship), + _ => self.start_binop(chars, "<=", Token::LtEq), } } - Some('>') => self.consume_and_return(chars, Token::Neq), - Some('<') => self.consume_and_return(chars, Token::ShiftLeft), - Some('@') => self.consume_and_return(chars, Token::ArrowAt), - _ => Ok(Some(Token::Lt)), + Some('>') => self.consume_for_binop(chars, "<>", Token::Neq), + Some('<') => self.consume_for_binop(chars, "<<", Token::ShiftLeft), + Some('@') => self.consume_for_binop(chars, "<@", Token::ArrowAt), + _ => self.start_binop(chars, "<", Token::Lt), } } '>' => { chars.next(); // consume match chars.peek() { - Some('=') => self.consume_and_return(chars, Token::GtEq), - Some('>') => self.consume_and_return(chars, Token::ShiftRight), - _ => Ok(Some(Token::Gt)), + Some('=') => self.consume_for_binop(chars, ">=", Token::GtEq), + Some('>') => self.consume_for_binop(chars, ">>", Token::ShiftRight), + _ => self.start_binop(chars, ">", Token::Gt), } } ':' => { @@ -1094,9 +1098,12 @@ impl<'a> Tokenizer<'a> { '&' => { chars.next(); // consume the '&' match chars.peek() { - Some('&') => self.consume_and_return(chars, Token::Overlap), + Some('&') => { + chars.next(); // consume the second '&' + self.start_binop(chars, "&&", Token::Overlap) + } // Bitshift '&' operator - _ => Ok(Some(Token::Ampersand)), + _ => self.start_binop(chars, "&", Token::Ampersand), } } '^' => { @@ -1119,38 +1126,37 @@ impl<'a> Tokenizer<'a> { '~' => { chars.next(); // consume match chars.peek() { - Some('*') => self.consume_and_return(chars, Token::TildeAsterisk), + Some('*') => self.consume_for_binop(chars, "~*", Token::TildeAsterisk), Some('~') => { chars.next(); match chars.peek() { Some('*') => { - self.consume_and_return(chars, Token::DoubleTildeAsterisk) + self.consume_for_binop(chars, "~~*", Token::DoubleTildeAsterisk) } - _ => Ok(Some(Token::DoubleTilde)), + _ => self.start_binop(chars, "~~", Token::DoubleTilde), } } - _ => Ok(Some(Token::Tilde)), + _ => self.start_binop(chars, "~", Token::Tilde), } } '#' => { chars.next(); match chars.peek() { - Some('-') => self.consume_and_return(chars, Token::HashMinus), + Some('-') => self.consume_for_binop(chars, "#-", Token::HashMinus), Some('>') => { chars.next(); match chars.peek() { Some('>') => { - chars.next(); - Ok(Some(Token::HashLongArrow)) + self.consume_for_binop(chars, "#>>", Token::HashLongArrow) } - _ => Ok(Some(Token::HashArrow)), + _ => self.start_binop(chars, "#>", Token::HashArrow), } } Some(' ') => Ok(Some(Token::Sharp)), Some(sch) if self.dialect.is_identifier_start('#') => { self.tokenize_identifier_or_keyword([ch, *sch], chars) } - _ => Ok(Some(Token::Sharp)), + _ => self.start_binop(chars, "#", Token::Sharp), } } '@' => { @@ -1206,6 +1212,39 @@ impl<'a> Tokenizer<'a> { } } + /// Consume the next character, then parse a custom binary operator. The next character should be included in the prefix + fn consume_for_binop( + &self, + chars: &mut State, + prefix: &str, + default: Token, + ) -> Result, TokenizerError> { + chars.next(); // consume the first char + self.start_binop(chars, prefix, default) + } + + /// parse a custom binary operator + fn start_binop( + &self, + chars: &mut State, + prefix: &str, + default: Token, + ) -> Result, TokenizerError> { + let mut custom = None; + while let Some(&ch) = chars.peek() { + if !self.dialect.is_custom_operator_part(ch) { + break; + } + + custom.get_or_insert_with(|| prefix.to_string()).push(ch); + chars.next(); + } + + Ok(Some( + custom.map(Token::CustomBinaryOperator).unwrap_or(default), + )) + } + /// Tokenize dollar preceded value (i.e: a string/placeholder) fn tokenize_dollar_preceded_value(&self, chars: &mut State) -> Result { let mut s = String::new(); diff --git a/tests/sqlparser_mssql.rs b/tests/sqlparser_mssql.rs index 5d61c6ab92..86d3990f61 100644 --- a/tests/sqlparser_mssql.rs +++ b/tests/sqlparser_mssql.rs @@ -437,6 +437,12 @@ fn parse_for_json_expect_ast() { ); } +#[test] +fn parse_ampersand_arobase() { + // In SQL Server, a&@b means (a) & (@b), in PostgreSQL it means (a) &@ (b) + ms().expr_parses_to("a&@b", "a & @b"); +} + #[test] fn parse_cast_varchar_max() { ms_and_generic().verified_expr("CAST('foo' AS VARCHAR(MAX))"); diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 1df94b1005..93b3c044aa 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -1757,6 +1757,29 @@ fn parse_pg_returning() { }; } +fn test_operator(operator: &str, dialect: &TestedDialects, expected: BinaryOperator) { + let operator_tokens = + sqlparser::tokenizer::Tokenizer::new(&PostgreSqlDialect {}, &format!("a{operator}b")) + .tokenize() + .unwrap(); + assert_eq!( + operator_tokens.len(), + 3, + "binary op should be 3 tokens, not {operator_tokens:?}" + ); + let expected_expr = Expr::BinaryOp { + left: Box::new(Expr::Identifier(Ident::new("a"))), + op: expected, + right: Box::new(Expr::Identifier(Ident::new("b"))), + }; + let str_expr_canonical = format!("a {operator} b"); + assert_eq!(expected_expr, dialect.verified_expr(&str_expr_canonical)); + assert_eq!( + expected_expr, + dialect.expr_parses_to(&format!("a{operator}b"), &str_expr_canonical) + ); +} + #[test] fn parse_pg_binary_ops() { let binary_ops = &[ @@ -1770,18 +1793,73 @@ fn parse_pg_binary_ops() { ]; for (str_op, op, dialects) in binary_ops { - let select = dialects.verified_only_select(&format!("SELECT a {} b", &str_op)); - assert_eq!( - SelectItem::UnnamedExpr(Expr::BinaryOp { - left: Box::new(Expr::Identifier(Ident::new("a"))), - op: op.clone(), - right: Box::new(Expr::Identifier(Ident::new("b"))), - }), - select.projection[0] - ); + test_operator(str_op, dialects, op.clone()); + } +} + +#[test] +fn parse_pg_custom_binary_ops() { + // Postgres supports declaring custom binary operators, using any character in the following set: + // + - * / < > = ~ ! @ # % ^ & | ` ? + + // Here, we test the ones used by common extensions + let operators = [ + // PostGIS + "&&&", // n-D bounding boxes intersect + "&<", // (is strictly to the left of) + "&>", // (is strictly to the right of) + "|=|", // distance between A and B trajectories at their closest point of approach + "<<#>>", // n-D distance between A and B bounding boxes + "|>>", // A's bounding box is strictly above B's. + "~=", // bounding box is the same + // PGroonga + "&@", // Full text search by a keyword + "&@~", // Full text search by easy to use query language + "&@*", // Similar search + "&`", // Advanced search by ECMAScript like query language + "&@|", // Full text search by an array of keywords + "&@~|", // Full text search by an array of queries in easy to use query language + // pgtrgm + "<<%", // second argument has a continuous extent of an ordered trigram set that matches word boundaries + "%>>", // commutator of <<% + "<<<->", // distance between arguments + // hstore + "#=", // Replace fields with matching values from hstore + // ranges + "-|-", // Is adjacent to + // pg_similarity + "~++", // L1 distance + "~##", // Cosine Distance + "~-~", // Dice Coefficient + "~!!", // Euclidean Distance + "~@~", // Hamming Distance + "~??", // Jaccard Coefficient + "~%%", // Jaro Distance + "~@@", // Jaro-Winkler Distance + "~==", // Levenshtein Distance + "~^^", // Matching Coefficient + "~||", // Monge-Elkan Coefficient + "~#~", // Needleman-Wunsch Coefficient + "~**", // Overlap Coefficient + "~~~", // Q-Gram Distance + "~=~", // Smith-Waterman Coefficient + "~!~", // Smith-Waterman-Gotoh Coefficient + "~*~", // Soundex Distance + // soundex_operator + ">@@<", // Soundex matches + "<@@>", // Soundex doesn't match + ]; + for op in &operators { + test_operator(op, &pg(), BinaryOperator::Custom(op.to_string())); } } +#[test] +fn parse_ampersand_arobase() { + // In SQL Server, a&@b means (a) & (@b), in PostgreSQL it means (a) &@ (b) + pg().expr_parses_to("a&@b", "a &@ b"); +} + #[test] fn parse_pg_unary_ops() { let pg_unary_ops = &[ From 3c33ac15bd9a33ff819d42bbeb2994049dd03fdf Mon Sep 17 00:00:00 2001 From: Aleksei Piianin Date: Fri, 7 Jun 2024 14:19:32 +0200 Subject: [PATCH 16/53] ClickHouse: support of create table query with primary key and parametrised table engine (#1289) --- src/ast/dml.rs | 16 +++-- src/ast/helpers/stmt_create_table.rs | 20 ++++-- src/ast/mod.rs | 23 +++++++ src/parser/mod.rs | 31 +++++++-- tests/sqlparser_clickhouse.rs | 95 +++++++++++++++++++++++++--- tests/sqlparser_mysql.rs | 8 ++- 6 files changed, 168 insertions(+), 25 deletions(-) diff --git a/src/ast/dml.rs b/src/ast/dml.rs index 91232218f9..7238785ca9 100644 --- a/src/ast/dml.rs +++ b/src/ast/dml.rs @@ -24,8 +24,8 @@ pub use super::ddl::{ColumnDef, TableConstraint}; use super::{ display_comma_separated, display_separated, Expr, FileFormat, FromTable, HiveDistributionStyle, HiveFormat, HiveIOFormat, HiveRowFormat, Ident, InsertAliases, MysqlInsertPriority, ObjectName, - OnCommit, OnInsert, OrderByExpr, Query, SelectItem, SqlOption, SqliteOnConflict, - TableWithJoins, + OnCommit, OnInsert, OneOrManyWithParens, OrderByExpr, Query, SelectItem, SqlOption, + SqliteOnConflict, TableEngine, TableWithJoins, }; /// CREATE INDEX statement. @@ -73,7 +73,7 @@ pub struct CreateTable { pub without_rowid: bool, pub like: Option, pub clone: Option, - pub engine: Option, + pub engine: Option, pub comment: Option, pub auto_increment_offset: Option, pub default_charset: Option, @@ -82,10 +82,13 @@ pub struct CreateTable { /// ClickHouse "ON CLUSTER" clause: /// pub on_cluster: Option, + /// ClickHouse "PRIMARY KEY " clause. + /// + pub primary_key: Option>, /// ClickHouse "ORDER BY " clause. Note that omitted ORDER BY is different /// than empty (represented as ()), the latter meaning "no sorting". /// - pub order_by: Option>, + pub order_by: Option>, /// BigQuery: A partition expression for the table. /// pub partition_by: Option>, @@ -263,8 +266,11 @@ impl Display for CreateTable { if let Some(auto_increment_offset) = self.auto_increment_offset { write!(f, " AUTO_INCREMENT {auto_increment_offset}")?; } + if let Some(primary_key) = &self.primary_key { + write!(f, " PRIMARY KEY {}", primary_key)?; + } if let Some(order_by) = &self.order_by { - write!(f, " ORDER BY ({})", display_comma_separated(order_by))?; + write!(f, " ORDER BY {}", order_by)?; } if let Some(partition_by) = self.partition_by.as_ref() { write!(f, " PARTITION BY {partition_by}")?; diff --git a/src/ast/helpers/stmt_create_table.rs b/src/ast/helpers/stmt_create_table.rs index c50e7bbd9c..b2b3f56881 100644 --- a/src/ast/helpers/stmt_create_table.rs +++ b/src/ast/helpers/stmt_create_table.rs @@ -10,7 +10,7 @@ use sqlparser_derive::{Visit, VisitMut}; use super::super::dml::CreateTable; use crate::ast::{ ColumnDef, Expr, FileFormat, HiveDistributionStyle, HiveFormat, Ident, ObjectName, OnCommit, - Query, SqlOption, Statement, TableConstraint, + OneOrManyWithParens, Query, SqlOption, Statement, TableConstraint, TableEngine, }; use crate::parser::ParserError; @@ -65,14 +65,15 @@ pub struct CreateTableBuilder { pub without_rowid: bool, pub like: Option, pub clone: Option, - pub engine: Option, + pub engine: Option, pub comment: Option, pub auto_increment_offset: Option, pub default_charset: Option, pub collation: Option, pub on_commit: Option, pub on_cluster: Option, - pub order_by: Option>, + pub primary_key: Option>, + pub order_by: Option>, pub partition_by: Option>, pub cluster_by: Option>, pub options: Option>, @@ -108,6 +109,7 @@ impl CreateTableBuilder { collation: None, on_commit: None, on_cluster: None, + primary_key: None, order_by: None, partition_by: None, cluster_by: None, @@ -203,7 +205,7 @@ impl CreateTableBuilder { self } - pub fn engine(mut self, engine: Option) -> Self { + pub fn engine(mut self, engine: Option) -> Self { self.engine = engine; self } @@ -238,7 +240,12 @@ impl CreateTableBuilder { self } - pub fn order_by(mut self, order_by: Option>) -> Self { + pub fn primary_key(mut self, primary_key: Option>) -> Self { + self.primary_key = primary_key; + self + } + + pub fn order_by(mut self, order_by: Option>) -> Self { self.order_by = order_by; self } @@ -291,6 +298,7 @@ impl CreateTableBuilder { collation: self.collation, on_commit: self.on_commit, on_cluster: self.on_cluster, + primary_key: self.primary_key, order_by: self.order_by, partition_by: self.partition_by, cluster_by: self.cluster_by, @@ -334,6 +342,7 @@ impl TryFrom for CreateTableBuilder { collation, on_commit, on_cluster, + primary_key, order_by, partition_by, cluster_by, @@ -366,6 +375,7 @@ impl TryFrom for CreateTableBuilder { collation, on_commit, on_cluster, + primary_key, order_by, partition_by, cluster_by, diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 0a0f8dd66f..1747d677e0 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -6315,6 +6315,29 @@ impl Display for MySQLColumnPosition { } } +/// Engine of DB. Some warehouse has parameters of engine, e.g. [clickhouse] +/// +/// [clickhouse]: https://clickhouse.com/docs/en/engines/table-engines +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct TableEngine { + pub name: String, + pub parameters: Option>, +} + +impl Display for TableEngine { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}", self.name)?; + + if let Some(parameters) = self.parameters.as_ref() { + write!(f, "({})", display_comma_separated(parameters))?; + } + + Ok(()) + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 7aaef555e0..6406bd4e51 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -5262,7 +5262,15 @@ impl<'a> Parser<'a> { self.expect_token(&Token::Eq)?; let next_token = self.next_token(); match next_token.token { - Token::Word(w) => Some(w.value), + Token::Word(w) => { + let name = w.value; + let parameters = if self.peek_token() == Token::LParen { + Some(self.parse_parenthesized_identifiers()?) + } else { + None + }; + Some(TableEngine { name, parameters }) + } _ => self.expected("identifier", next_token)?, } } else { @@ -5280,17 +5288,27 @@ impl<'a> Parser<'a> { None }; + // ClickHouse supports `PRIMARY KEY`, before `ORDER BY` + // https://clickhouse.com/docs/en/sql-reference/statements/create/table#primary-key + let primary_key = if dialect_of!(self is ClickHouseDialect | GenericDialect) + && self.parse_keywords(&[Keyword::PRIMARY, Keyword::KEY]) + { + Some(Box::new(self.parse_expr()?)) + } else { + None + }; + let order_by = if self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) { if self.consume_token(&Token::LParen) { let columns = if self.peek_token() != Token::RParen { - self.parse_comma_separated(|p| p.parse_identifier(false))? + self.parse_comma_separated(|p| p.parse_expr())? } else { vec![] }; self.expect_token(&Token::RParen)?; - Some(columns) + Some(OneOrManyWithParens::Many(columns)) } else { - Some(vec![self.parse_identifier(false)?]) + Some(OneOrManyWithParens::One(self.parse_expr()?)) } } else { None @@ -5388,6 +5406,7 @@ impl<'a> Parser<'a> { .partition_by(big_query_config.partition_by) .cluster_by(big_query_config.cluster_by) .options(big_query_config.options) + .primary_key(primary_key) .strict(strict) .build()) } @@ -9041,7 +9060,7 @@ impl<'a> Parser<'a> { let partitions: Vec = if dialect_of!(self is MySqlDialect | GenericDialect) && self.parse_keyword(Keyword::PARTITION) { - self.parse_partitions()? + self.parse_parenthesized_identifiers()? } else { vec![] }; @@ -10969,7 +10988,7 @@ impl<'a> Parser<'a> { }) } - fn parse_partitions(&mut self) -> Result, ParserError> { + fn parse_parenthesized_identifiers(&mut self) -> Result, ParserError> { self.expect_token(&Token::LParen)?; let partitions = self.parse_comma_separated(|p| p.parse_identifier(false))?; self.expect_token(&Token::RParen)?; diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index 20c3d0569d..ed3b2de22d 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -211,12 +211,9 @@ fn parse_delimited_identifiers() { #[test] fn parse_create_table() { clickhouse().verified_stmt(r#"CREATE TABLE "x" ("a" "int") ENGINE=MergeTree ORDER BY ("x")"#); - clickhouse().one_statement_parses_to( - r#"CREATE TABLE "x" ("a" "int") ENGINE=MergeTree ORDER BY "x""#, - r#"CREATE TABLE "x" ("a" "int") ENGINE=MergeTree ORDER BY ("x")"#, - ); + clickhouse().verified_stmt(r#"CREATE TABLE "x" ("a" "int") ENGINE=MergeTree ORDER BY "x""#); clickhouse().verified_stmt( - r#"CREATE TABLE "x" ("a" "int") ENGINE=MergeTree ORDER BY ("x") AS SELECT * FROM "t" WHERE true"#, + r#"CREATE TABLE "x" ("a" "int") ENGINE=MergeTree ORDER BY "x" AS SELECT * FROM "t" WHERE true"#, ); } @@ -248,7 +245,7 @@ fn parse_clickhouse_data_types() { .replace(" Float64", " FLOAT64"); match clickhouse_and_generic().one_statement_parses_to(sql, &canonical_sql) { - Statement::CreateTable { name, columns, .. } => { + Statement::CreateTable(CreateTable { name, columns, .. }) => { assert_eq!(name, ObjectName(vec!["table".into()])); assert_eq!( columns, @@ -289,7 +286,7 @@ fn parse_create_table_with_nullable() { let canonical_sql = sql.replace("String", "STRING"); match clickhouse_and_generic().one_statement_parses_to(sql, &canonical_sql) { - Statement::CreateTable { name, columns, .. } => { + Statement::CreateTable(CreateTable { name, columns, .. }) => { assert_eq!(name, ObjectName(vec!["table".into()])); assert_eq!( columns, @@ -338,7 +335,7 @@ fn parse_create_table_with_nested_data_types() { ); match clickhouse().one_statement_parses_to(sql, "") { - Statement::CreateTable { name, columns, .. } => { + Statement::CreateTable(CreateTable { name, columns, .. }) => { assert_eq!(name, ObjectName(vec!["table".into()])); assert_eq!( columns, @@ -410,6 +407,88 @@ fn parse_create_table_with_nested_data_types() { } } +#[test] +fn parse_create_table_with_primary_key() { + match clickhouse_and_generic().verified_stmt(concat!( + r#"CREATE TABLE db.table (`i` INT, `k` INT)"#, + " ENGINE=SharedMergeTree('/clickhouse/tables/{uuid}/{shard}', '{replica}')", + " PRIMARY KEY tuple(i)", + " ORDER BY tuple(i)", + )) { + Statement::CreateTable(CreateTable { + name, + columns, + engine, + primary_key, + order_by, + .. + }) => { + assert_eq!(name.to_string(), "db.table"); + assert_eq!( + vec![ + ColumnDef { + name: Ident::with_quote('`', "i"), + data_type: DataType::Int(None), + collation: None, + options: vec![], + }, + ColumnDef { + name: Ident::with_quote('`', "k"), + data_type: DataType::Int(None), + collation: None, + options: vec![], + }, + ], + columns + ); + assert_eq!( + engine, + Some(TableEngine { + name: "SharedMergeTree".to_string(), + parameters: Some(vec![ + Ident::with_quote('\'', "/clickhouse/tables/{uuid}/{shard}"), + Ident::with_quote('\'', "{replica}"), + ]), + }) + ); + fn assert_function(actual: &Function, name: &str, arg: &str) -> bool { + assert_eq!(actual.name, ObjectName(vec![Ident::new(name)])); + assert_eq!( + actual.args, + FunctionArguments::List(FunctionArgumentList { + args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr(Identifier( + Ident::new(arg) + )),)], + duplicate_treatment: None, + clauses: vec![], + }) + ); + true + } + match primary_key.unwrap().as_ref() { + Expr::Function(primary_key) => { + assert!(assert_function(primary_key, "tuple", "i")); + } + _ => panic!("unexpected primary key type"), + } + match order_by { + Some(OneOrManyWithParens::One(Expr::Function(order_by))) => { + assert!(assert_function(&order_by, "tuple", "i")); + } + _ => panic!("unexpected order by type"), + }; + } + _ => unreachable!(), + } + + clickhouse_and_generic() + .parse_sql_statements(concat!( + r#"CREATE TABLE db.table (`i` Int, `k` Int)"#, + " ORDER BY tuple(i), tuple(k)", + )) + .expect_err("ORDER BY supports one expression with tuple"); +} + #[test] fn parse_create_view_with_fields_data_types() { match clickhouse().verified_stmt(r#"CREATE VIEW v (i "int", f "String") AS SELECT * FROM t"#) { diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index 3041b60012..e65fc181b6 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -773,7 +773,13 @@ fn parse_create_table_engine_default_charset() { },], columns ); - assert_eq!(engine, Some("InnoDB".to_string())); + assert_eq!( + engine, + Some(TableEngine { + name: "InnoDB".to_string(), + parameters: None + }) + ); assert_eq!(default_charset, Some("utf8mb3".to_string())); } _ => unreachable!(), From be77ce50ca34958f94bc05d92795b38ca286614a Mon Sep 17 00:00:00 2001 From: Ilson Balliego Date: Sun, 9 Jun 2024 23:47:21 +0200 Subject: [PATCH 17/53] Add support for snowflake exclusive create table options (#1233) Co-authored-by: Ilson Roberto Balliego Junior --- src/ast/dml.rs | 114 ++++++- src/ast/helpers/stmt_create_table.rs | 121 +++++++- src/ast/mod.rs | 111 +++++++ src/dialect/snowflake.rs | 223 +++++++++++++- src/keywords.rs | 10 + src/parser/mod.rs | 8 +- tests/sqlparser_bigquery.rs | 5 +- tests/sqlparser_common.rs | 16 +- tests/sqlparser_postgres.rs | 23 ++ tests/sqlparser_snowflake.rs | 429 +++++++++++++++++++++++++++ 10 files changed, 1029 insertions(+), 31 deletions(-) diff --git a/src/ast/dml.rs b/src/ast/dml.rs index 7238785ca9..74bb5435c8 100644 --- a/src/ast/dml.rs +++ b/src/ast/dml.rs @@ -22,10 +22,11 @@ use sqlparser_derive::{Visit, VisitMut}; pub use super::ddl::{ColumnDef, TableConstraint}; use super::{ - display_comma_separated, display_separated, Expr, FileFormat, FromTable, HiveDistributionStyle, - HiveFormat, HiveIOFormat, HiveRowFormat, Ident, InsertAliases, MysqlInsertPriority, ObjectName, - OnCommit, OnInsert, OneOrManyWithParens, OrderByExpr, Query, SelectItem, SqlOption, - SqliteOnConflict, TableEngine, TableWithJoins, + display_comma_separated, display_separated, CommentDef, Expr, FileFormat, FromTable, + HiveDistributionStyle, HiveFormat, HiveIOFormat, HiveRowFormat, Ident, InsertAliases, + MysqlInsertPriority, ObjectName, OnCommit, OnInsert, OneOrManyWithParens, OrderByExpr, Query, + RowAccessPolicy, SelectItem, SqlOption, SqliteOnConflict, TableEngine, TableWithJoins, Tag, + WrappedCollection, }; /// CREATE INDEX statement. @@ -57,6 +58,7 @@ pub struct CreateTable { pub global: Option, pub if_not_exists: bool, pub transient: bool, + pub volatile: bool, /// Table name #[cfg_attr(feature = "visitor", visit(with = "visit_relation"))] pub name: ObjectName, @@ -74,7 +76,7 @@ pub struct CreateTable { pub like: Option, pub clone: Option, pub engine: Option, - pub comment: Option, + pub comment: Option, pub auto_increment_offset: Option, pub default_charset: Option, pub collation: Option, @@ -94,7 +96,7 @@ pub struct CreateTable { pub partition_by: Option>, /// BigQuery: Table clustering column list. /// - pub cluster_by: Option>, + pub cluster_by: Option>>, /// BigQuery: Table options list. /// pub options: Option>, @@ -102,6 +104,33 @@ pub struct CreateTable { /// if the "STRICT" table-option keyword is added to the end, after the closing ")", /// then strict typing rules apply to that table. pub strict: bool, + /// Snowflake "COPY GRANTS" clause + /// + pub copy_grants: bool, + /// Snowflake "ENABLE_SCHEMA_EVOLUTION" clause + /// + pub enable_schema_evolution: Option, + /// Snowflake "CHANGE_TRACKING" clause + /// + pub change_tracking: Option, + /// Snowflake "DATA_RETENTION_TIME_IN_DAYS" clause + /// + pub data_retention_time_in_days: Option, + /// Snowflake "MAX_DATA_EXTENSION_TIME_IN_DAYS" clause + /// + pub max_data_extension_time_in_days: Option, + /// Snowflake "DEFAULT_DDL_COLLATION" clause + /// + pub default_ddl_collation: Option, + /// Snowflake "WITH AGGREGATION POLICY" clause + /// + pub with_aggregation_policy: Option, + /// Snowflake "WITH ROW ACCESS POLICY" clause + /// + pub with_row_access_policy: Option, + /// Snowflake "WITH TAG" clause + /// + pub with_tags: Option>, } impl Display for CreateTable { @@ -115,7 +144,7 @@ impl Display for CreateTable { // `CREATE TABLE t (a INT) AS SELECT a from t2` write!( f, - "CREATE {or_replace}{external}{global}{temporary}{transient}TABLE {if_not_exists}{name}", + "CREATE {or_replace}{external}{global}{temporary}{transient}{volatile}TABLE {if_not_exists}{name}", or_replace = if self.or_replace { "OR REPLACE " } else { "" }, external = if self.external { "EXTERNAL " } else { "" }, global = self.global @@ -130,6 +159,7 @@ impl Display for CreateTable { if_not_exists = if self.if_not_exists { "IF NOT EXISTS " } else { "" }, temporary = if self.temporary { "TEMPORARY " } else { "" }, transient = if self.transient { "TRANSIENT " } else { "" }, + volatile = if self.volatile { "VOLATILE " } else { "" }, name = self.name, )?; if let Some(on_cluster) = &self.on_cluster { @@ -260,9 +290,17 @@ impl Display for CreateTable { if let Some(engine) = &self.engine { write!(f, " ENGINE={engine}")?; } - if let Some(comment) = &self.comment { - write!(f, " COMMENT '{comment}'")?; + if let Some(comment_def) = &self.comment { + match comment_def { + CommentDef::WithEq(comment) => { + write!(f, " COMMENT = '{comment}'")?; + } + CommentDef::WithoutEq(comment) => { + write!(f, " COMMENT '{comment}'")?; + } + } } + if let Some(auto_increment_offset) = self.auto_increment_offset { write!(f, " AUTO_INCREMENT {auto_increment_offset}")?; } @@ -276,12 +314,9 @@ impl Display for CreateTable { write!(f, " PARTITION BY {partition_by}")?; } if let Some(cluster_by) = self.cluster_by.as_ref() { - write!( - f, - " CLUSTER BY {}", - display_comma_separated(cluster_by.as_slice()) - )?; + write!(f, " CLUSTER BY {cluster_by}")?; } + if let Some(options) = self.options.as_ref() { write!( f, @@ -289,6 +324,57 @@ impl Display for CreateTable { display_comma_separated(options.as_slice()) )?; } + + if self.copy_grants { + write!(f, " COPY GRANTS")?; + } + + if let Some(is_enabled) = self.enable_schema_evolution { + write!( + f, + " ENABLE_SCHEMA_EVOLUTION={}", + if is_enabled { "TRUE" } else { "FALSE" } + )?; + } + + if let Some(is_enabled) = self.change_tracking { + write!( + f, + " CHANGE_TRACKING={}", + if is_enabled { "TRUE" } else { "FALSE" } + )?; + } + + if let Some(data_retention_time_in_days) = self.data_retention_time_in_days { + write!( + f, + " DATA_RETENTION_TIME_IN_DAYS={data_retention_time_in_days}", + )?; + } + + if let Some(max_data_extension_time_in_days) = self.max_data_extension_time_in_days { + write!( + f, + " MAX_DATA_EXTENSION_TIME_IN_DAYS={max_data_extension_time_in_days}", + )?; + } + + if let Some(default_ddl_collation) = &self.default_ddl_collation { + write!(f, " DEFAULT_DDL_COLLATION='{default_ddl_collation}'",)?; + } + + if let Some(with_aggregation_policy) = &self.with_aggregation_policy { + write!(f, " WITH AGGREGATION POLICY {with_aggregation_policy}",)?; + } + + if let Some(row_access_policy) = &self.with_row_access_policy { + write!(f, " {row_access_policy}",)?; + } + + if let Some(tag) = &self.with_tags { + write!(f, " WITH TAG ({})", display_comma_separated(tag.as_slice()))?; + } + if let Some(query) = &self.query { write!(f, " AS {query}")?; } diff --git a/src/ast/helpers/stmt_create_table.rs b/src/ast/helpers/stmt_create_table.rs index b2b3f56881..d862a36aef 100644 --- a/src/ast/helpers/stmt_create_table.rs +++ b/src/ast/helpers/stmt_create_table.rs @@ -9,8 +9,9 @@ use sqlparser_derive::{Visit, VisitMut}; use super::super::dml::CreateTable; use crate::ast::{ - ColumnDef, Expr, FileFormat, HiveDistributionStyle, HiveFormat, Ident, ObjectName, OnCommit, - OneOrManyWithParens, Query, SqlOption, Statement, TableConstraint, TableEngine, + ColumnDef, CommentDef, Expr, FileFormat, HiveDistributionStyle, HiveFormat, Ident, ObjectName, + OnCommit, OneOrManyWithParens, Query, RowAccessPolicy, SqlOption, Statement, TableConstraint, + TableEngine, Tag, WrappedCollection, }; use crate::parser::ParserError; @@ -52,6 +53,7 @@ pub struct CreateTableBuilder { pub global: Option, pub if_not_exists: bool, pub transient: bool, + pub volatile: bool, pub name: ObjectName, pub columns: Vec, pub constraints: Vec, @@ -66,7 +68,7 @@ pub struct CreateTableBuilder { pub like: Option, pub clone: Option, pub engine: Option, - pub comment: Option, + pub comment: Option, pub auto_increment_offset: Option, pub default_charset: Option, pub collation: Option, @@ -75,9 +77,18 @@ pub struct CreateTableBuilder { pub primary_key: Option>, pub order_by: Option>, pub partition_by: Option>, - pub cluster_by: Option>, + pub cluster_by: Option>>, pub options: Option>, pub strict: bool, + pub copy_grants: bool, + pub enable_schema_evolution: Option, + pub change_tracking: Option, + pub data_retention_time_in_days: Option, + pub max_data_extension_time_in_days: Option, + pub default_ddl_collation: Option, + pub with_aggregation_policy: Option, + pub with_row_access_policy: Option, + pub with_tags: Option>, } impl CreateTableBuilder { @@ -89,6 +100,7 @@ impl CreateTableBuilder { global: None, if_not_exists: false, transient: false, + volatile: false, name, columns: vec![], constraints: vec![], @@ -115,6 +127,15 @@ impl CreateTableBuilder { cluster_by: None, options: None, strict: false, + copy_grants: false, + enable_schema_evolution: None, + change_tracking: None, + data_retention_time_in_days: None, + max_data_extension_time_in_days: None, + default_ddl_collation: None, + with_aggregation_policy: None, + with_row_access_policy: None, + with_tags: None, } } pub fn or_replace(mut self, or_replace: bool) -> Self { @@ -147,6 +168,11 @@ impl CreateTableBuilder { self } + pub fn volatile(mut self, volatile: bool) -> Self { + self.volatile = volatile; + self + } + pub fn columns(mut self, columns: Vec) -> Self { self.columns = columns; self @@ -210,7 +236,7 @@ impl CreateTableBuilder { self } - pub fn comment(mut self, comment: Option) -> Self { + pub fn comment(mut self, comment: Option) -> Self { self.comment = comment; self } @@ -255,7 +281,7 @@ impl CreateTableBuilder { self } - pub fn cluster_by(mut self, cluster_by: Option>) -> Self { + pub fn cluster_by(mut self, cluster_by: Option>>) -> Self { self.cluster_by = cluster_by; self } @@ -270,6 +296,57 @@ impl CreateTableBuilder { self } + pub fn copy_grants(mut self, copy_grants: bool) -> Self { + self.copy_grants = copy_grants; + self + } + + pub fn enable_schema_evolution(mut self, enable_schema_evolution: Option) -> Self { + self.enable_schema_evolution = enable_schema_evolution; + self + } + + pub fn change_tracking(mut self, change_tracking: Option) -> Self { + self.change_tracking = change_tracking; + self + } + + pub fn data_retention_time_in_days(mut self, data_retention_time_in_days: Option) -> Self { + self.data_retention_time_in_days = data_retention_time_in_days; + self + } + + pub fn max_data_extension_time_in_days( + mut self, + max_data_extension_time_in_days: Option, + ) -> Self { + self.max_data_extension_time_in_days = max_data_extension_time_in_days; + self + } + + pub fn default_ddl_collation(mut self, default_ddl_collation: Option) -> Self { + self.default_ddl_collation = default_ddl_collation; + self + } + + pub fn with_aggregation_policy(mut self, with_aggregation_policy: Option) -> Self { + self.with_aggregation_policy = with_aggregation_policy; + self + } + + pub fn with_row_access_policy( + mut self, + with_row_access_policy: Option, + ) -> Self { + self.with_row_access_policy = with_row_access_policy; + self + } + + pub fn with_tags(mut self, with_tags: Option>) -> Self { + self.with_tags = with_tags; + self + } + pub fn build(self) -> Statement { Statement::CreateTable(CreateTable { or_replace: self.or_replace, @@ -278,6 +355,7 @@ impl CreateTableBuilder { global: self.global, if_not_exists: self.if_not_exists, transient: self.transient, + volatile: self.volatile, name: self.name, columns: self.columns, constraints: self.constraints, @@ -304,6 +382,15 @@ impl CreateTableBuilder { cluster_by: self.cluster_by, options: self.options, strict: self.strict, + copy_grants: self.copy_grants, + enable_schema_evolution: self.enable_schema_evolution, + change_tracking: self.change_tracking, + data_retention_time_in_days: self.data_retention_time_in_days, + max_data_extension_time_in_days: self.max_data_extension_time_in_days, + default_ddl_collation: self.default_ddl_collation, + with_aggregation_policy: self.with_aggregation_policy, + with_row_access_policy: self.with_row_access_policy, + with_tags: self.with_tags, }) } } @@ -322,6 +409,7 @@ impl TryFrom for CreateTableBuilder { global, if_not_exists, transient, + volatile, name, columns, constraints, @@ -348,6 +436,15 @@ impl TryFrom for CreateTableBuilder { cluster_by, options, strict, + copy_grants, + enable_schema_evolution, + change_tracking, + data_retention_time_in_days, + max_data_extension_time_in_days, + default_ddl_collation, + with_aggregation_policy, + with_row_access_policy, + with_tags, }) => Ok(Self { or_replace, temporary, @@ -381,6 +478,16 @@ impl TryFrom for CreateTableBuilder { cluster_by, options, strict, + copy_grants, + enable_schema_evolution, + change_tracking, + data_retention_time_in_days, + max_data_extension_time_in_days, + default_ddl_collation, + with_aggregation_policy, + with_row_access_policy, + with_tags, + volatile, }), _ => Err(ParserError::ParserError(format!( "Expected create table statement, but received: {stmt}" @@ -393,7 +500,7 @@ impl TryFrom for CreateTableBuilder { #[derive(Default)] pub(crate) struct BigQueryTableConfiguration { pub partition_by: Option>, - pub cluster_by: Option>, + pub cluster_by: Option>>, pub options: Option>, } diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 1747d677e0..49d6499c5f 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -6338,6 +6338,117 @@ impl Display for TableEngine { } } +/// Snowflake `WITH ROW ACCESS POLICY policy_name ON (identifier, ...)` +/// +/// +/// +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct RowAccessPolicy { + pub policy: ObjectName, + pub on: Vec, +} + +impl RowAccessPolicy { + pub fn new(policy: ObjectName, on: Vec) -> Self { + Self { policy, on } + } +} + +impl Display for RowAccessPolicy { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "WITH ROW ACCESS POLICY {} ON ({})", + self.policy, + display_comma_separated(self.on.as_slice()) + ) + } +} + +/// Snowflake `WITH TAG ( tag_name = '', ...)` +/// +/// +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct Tag { + pub key: Ident, + pub value: String, +} + +impl Tag { + pub fn new(key: Ident, value: String) -> Self { + Self { key, value } + } +} + +impl Display for Tag { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}='{}'", self.key, self.value) + } +} + +/// Helper to indicate if a comment includes the `=` in the display form +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum CommentDef { + /// Includes `=` when printing the comment, as `COMMENT = 'comment'` + /// Does not include `=` when printing the comment, as `COMMENT 'comment'` + WithEq(String), + WithoutEq(String), +} + +impl Display for CommentDef { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + CommentDef::WithEq(comment) | CommentDef::WithoutEq(comment) => write!(f, "{comment}"), + } + } +} + +/// Helper to indicate if a collection should be wrapped by a symbol in the display form +/// +/// [`Display`] is implemented for every [`Vec`] where `T: Display`. +/// The string output is a comma separated list for the vec items +/// +/// # Examples +/// ``` +/// # use sqlparser::ast::WrappedCollection; +/// let items = WrappedCollection::Parentheses(vec!["one", "two", "three"]); +/// assert_eq!("(one, two, three)", items.to_string()); +/// +/// let items = WrappedCollection::NoWrapping(vec!["one", "two", "three"]); +/// assert_eq!("one, two, three", items.to_string()); +/// ``` +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum WrappedCollection { + /// Print the collection without wrapping symbols, as `item, item, item` + NoWrapping(T), + /// Wraps the collection in Parentheses, as `(item, item, item)` + Parentheses(T), +} + +impl Display for WrappedCollection> +where + T: Display, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + WrappedCollection::NoWrapping(inner) => { + write!(f, "{}", display_comma_separated(inner.as_slice())) + } + WrappedCollection::Parentheses(inner) => { + write!(f, "({})", display_comma_separated(inner.as_slice())) + } + } + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/src/dialect/snowflake.rs b/src/dialect/snowflake.rs index 894b004381..9f1d7f27b5 100644 --- a/src/dialect/snowflake.rs +++ b/src/dialect/snowflake.rs @@ -12,11 +12,14 @@ #[cfg(not(feature = "std"))] use crate::alloc::string::ToString; +use crate::ast::helpers::stmt_create_table::CreateTableBuilder; use crate::ast::helpers::stmt_data_loading::{ DataLoadingOption, DataLoadingOptionType, DataLoadingOptions, StageLoadSelectItem, StageParamsObject, }; -use crate::ast::{Ident, ObjectName, Statement}; +use crate::ast::{ + CommentDef, Ident, ObjectName, RowAccessPolicy, Statement, Tag, WrappedCollection, +}; use crate::dialect::Dialect; use crate::keywords::Keyword; use crate::parser::{Parser, ParserError}; @@ -91,12 +94,36 @@ impl Dialect for SnowflakeDialect { // possibly CREATE STAGE //[ OR REPLACE ] let or_replace = parser.parse_keywords(&[Keyword::OR, Keyword::REPLACE]); - //[ TEMPORARY ] - let temporary = parser.parse_keyword(Keyword::TEMPORARY); + // LOCAL | GLOBAL + let global = match parser.parse_one_of_keywords(&[Keyword::LOCAL, Keyword::GLOBAL]) { + Some(Keyword::LOCAL) => Some(false), + Some(Keyword::GLOBAL) => Some(true), + _ => None, + }; + + let mut temporary = false; + let mut volatile = false; + let mut transient = false; + + match parser.parse_one_of_keywords(&[ + Keyword::TEMP, + Keyword::TEMPORARY, + Keyword::VOLATILE, + Keyword::TRANSIENT, + ]) { + Some(Keyword::TEMP | Keyword::TEMPORARY) => temporary = true, + Some(Keyword::VOLATILE) => volatile = true, + Some(Keyword::TRANSIENT) => transient = true, + _ => {} + } if parser.parse_keyword(Keyword::STAGE) { // OK - this is CREATE STAGE statement return Some(parse_create_stage(or_replace, temporary, parser)); + } else if parser.parse_keyword(Keyword::TABLE) { + return Some(parse_create_table( + or_replace, global, temporary, volatile, transient, parser, + )); } else { // need to go back with the cursor let mut back = 1; @@ -120,6 +147,196 @@ impl Dialect for SnowflakeDialect { } } +/// Parse snowflake create table statement. +/// +pub fn parse_create_table( + or_replace: bool, + global: Option, + temporary: bool, + volatile: bool, + transient: bool, + parser: &mut Parser, +) -> Result { + let if_not_exists = parser.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); + let table_name = parser.parse_object_name(false)?; + + let mut builder = CreateTableBuilder::new(table_name) + .or_replace(or_replace) + .if_not_exists(if_not_exists) + .temporary(temporary) + .transient(transient) + .volatile(volatile) + .global(global) + .hive_formats(Some(Default::default())); + + // Snowflake does not enforce order of the parameters in the statement. The parser needs to + // parse the statement in a loop. + // + // "CREATE TABLE x COPY GRANTS (c INT)" and "CREATE TABLE x (c INT) COPY GRANTS" are both + // accepted by Snowflake + + loop { + let next_token = parser.next_token(); + match &next_token.token { + Token::Word(word) => match word.keyword { + Keyword::COPY => { + parser.expect_keyword(Keyword::GRANTS)?; + builder = builder.copy_grants(true); + } + Keyword::COMMENT => { + parser.expect_token(&Token::Eq)?; + let next_token = parser.next_token(); + let comment = match next_token.token { + Token::SingleQuotedString(str) => Some(CommentDef::WithEq(str)), + _ => parser.expected("comment", next_token)?, + }; + builder = builder.comment(comment); + } + Keyword::AS => { + let query = parser.parse_boxed_query()?; + builder = builder.query(Some(query)); + break; + } + Keyword::CLONE => { + let clone = parser.parse_object_name(false).ok(); + builder = builder.clone_clause(clone); + break; + } + Keyword::LIKE => { + let like = parser.parse_object_name(false).ok(); + builder = builder.like(like); + break; + } + Keyword::CLUSTER => { + parser.expect_keyword(Keyword::BY)?; + parser.expect_token(&Token::LParen)?; + let cluster_by = Some(WrappedCollection::Parentheses( + parser.parse_comma_separated(|p| p.parse_identifier(false))?, + )); + parser.expect_token(&Token::RParen)?; + + builder = builder.cluster_by(cluster_by) + } + Keyword::ENABLE_SCHEMA_EVOLUTION => { + parser.expect_token(&Token::Eq)?; + let enable_schema_evolution = + match parser.parse_one_of_keywords(&[Keyword::TRUE, Keyword::FALSE]) { + Some(Keyword::TRUE) => true, + Some(Keyword::FALSE) => false, + _ => { + return parser.expected("TRUE or FALSE", next_token); + } + }; + + builder = builder.enable_schema_evolution(Some(enable_schema_evolution)); + } + Keyword::CHANGE_TRACKING => { + parser.expect_token(&Token::Eq)?; + let change_tracking = + match parser.parse_one_of_keywords(&[Keyword::TRUE, Keyword::FALSE]) { + Some(Keyword::TRUE) => true, + Some(Keyword::FALSE) => false, + _ => { + return parser.expected("TRUE or FALSE", next_token); + } + }; + + builder = builder.change_tracking(Some(change_tracking)); + } + Keyword::DATA_RETENTION_TIME_IN_DAYS => { + parser.expect_token(&Token::Eq)?; + let data_retention_time_in_days = parser.parse_literal_uint()?; + builder = + builder.data_retention_time_in_days(Some(data_retention_time_in_days)); + } + Keyword::MAX_DATA_EXTENSION_TIME_IN_DAYS => { + parser.expect_token(&Token::Eq)?; + let max_data_extension_time_in_days = parser.parse_literal_uint()?; + builder = builder + .max_data_extension_time_in_days(Some(max_data_extension_time_in_days)); + } + Keyword::DEFAULT_DDL_COLLATION => { + parser.expect_token(&Token::Eq)?; + let default_ddl_collation = parser.parse_literal_string()?; + builder = builder.default_ddl_collation(Some(default_ddl_collation)); + } + // WITH is optional, we just verify that next token is one of the expected ones and + // fallback to the default match statement + Keyword::WITH => { + parser.expect_one_of_keywords(&[ + Keyword::AGGREGATION, + Keyword::TAG, + Keyword::ROW, + ])?; + parser.prev_token(); + } + Keyword::AGGREGATION => { + parser.expect_keyword(Keyword::POLICY)?; + let aggregation_policy = parser.parse_object_name(false)?; + builder = builder.with_aggregation_policy(Some(aggregation_policy)); + } + Keyword::ROW => { + parser.expect_keywords(&[Keyword::ACCESS, Keyword::POLICY])?; + let policy = parser.parse_object_name(false)?; + parser.expect_keyword(Keyword::ON)?; + parser.expect_token(&Token::LParen)?; + let columns = parser.parse_comma_separated(|p| p.parse_identifier(false))?; + parser.expect_token(&Token::RParen)?; + + builder = + builder.with_row_access_policy(Some(RowAccessPolicy::new(policy, columns))) + } + Keyword::TAG => { + fn parse_tag(parser: &mut Parser) -> Result { + let name = parser.parse_identifier(false)?; + parser.expect_token(&Token::Eq)?; + let value = parser.parse_literal_string()?; + + Ok(Tag::new(name, value)) + } + + parser.expect_token(&Token::LParen)?; + let tags = parser.parse_comma_separated(parse_tag)?; + parser.expect_token(&Token::RParen)?; + builder = builder.with_tags(Some(tags)); + } + _ => { + return parser.expected("end of statement", next_token); + } + }, + Token::LParen => { + parser.prev_token(); + let (columns, constraints) = parser.parse_columns()?; + builder = builder.columns(columns).constraints(constraints); + } + Token::EOF => { + if builder.columns.is_empty() { + return Err(ParserError::ParserError( + "unexpected end of input".to_string(), + )); + } + + break; + } + Token::SemiColon => { + if builder.columns.is_empty() { + return Err(ParserError::ParserError( + "unexpected end of input".to_string(), + )); + } + + parser.prev_token(); + break; + } + _ => { + return parser.expected("end of statement", next_token); + } + } + } + + Ok(builder.build()) +} + pub fn parse_create_stage( or_replace: bool, temporary: bool, diff --git a/src/keywords.rs b/src/keywords.rs index 1b204a8d5a..e75d45e441 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -70,11 +70,13 @@ define_keywords!( ABORT, ABS, ABSOLUTE, + ACCESS, ACTION, ADD, ADMIN, AFTER, AGAINST, + AGGREGATION, ALL, ALLOCATE, ALTER, @@ -138,6 +140,7 @@ define_keywords!( CENTURY, CHAIN, CHANGE, + CHANGE_TRACKING, CHANNEL, CHAR, CHARACTER, @@ -201,6 +204,7 @@ define_keywords!( CYCLE, DATA, DATABASE, + DATA_RETENTION_TIME_IN_DAYS, DATE, DATE32, DATETIME, @@ -214,6 +218,7 @@ define_keywords!( DECIMAL, DECLARE, DEFAULT, + DEFAULT_DDL_COLLATION, DEFERRABLE, DEFERRED, DEFINE, @@ -251,6 +256,7 @@ define_keywords!( ELSE, EMPTY, ENABLE, + ENABLE_SCHEMA_EVOLUTION, ENCODING, ENCRYPTION, END, @@ -330,6 +336,7 @@ define_keywords!( GLOBAL, GRANT, GRANTED, + GRANTS, GRAPHVIZ, GROUP, GROUPING, @@ -433,6 +440,7 @@ define_keywords!( MATERIALIZED, MAX, MAXVALUE, + MAX_DATA_EXTENSION_TIME_IN_DAYS, MEASURES, MEDIUMINT, MEMBER, @@ -539,6 +547,7 @@ define_keywords!( PIVOT, PLACING, PLANS, + POLICY, PORTION, POSITION, POSITION_REGEX, @@ -690,6 +699,7 @@ define_keywords!( TABLE, TABLES, TABLESAMPLE, + TAG, TARGET, TBLPROPERTIES, TEMP, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 6406bd4e51..c591b8116f 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -5372,7 +5372,7 @@ impl<'a> Parser<'a> { let _ = self.consume_token(&Token::Eq); let next_token = self.next_token(); match next_token.token { - Token::SingleQuotedString(str) => Some(str), + Token::SingleQuotedString(str) => Some(CommentDef::WithoutEq(str)), _ => self.expected("comment", next_token)?, } } else { @@ -5423,7 +5423,9 @@ impl<'a> Parser<'a> { let mut cluster_by = None; if self.parse_keywords(&[Keyword::CLUSTER, Keyword::BY]) { - cluster_by = Some(self.parse_comma_separated(|p| p.parse_identifier(false))?); + cluster_by = Some(WrappedCollection::NoWrapping( + self.parse_comma_separated(|p| p.parse_identifier(false))?, + )); }; let mut options = None; @@ -7783,7 +7785,7 @@ impl<'a> Parser<'a> { /// This function can be used to reduce the stack size required in debug /// builds. Instead of `sizeof(Query)` only a pointer (`Box`) /// is used. - fn parse_boxed_query(&mut self) -> Result, ParserError> { + pub fn parse_boxed_query(&mut self) -> Result, ParserError> { self.parse_query().map(Box::new) } diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index 3b6d6bfcb4..171439d198 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -442,7 +442,10 @@ fn parse_create_table_with_options() { assert_eq!( ( Some(Box::new(Expr::Identifier(Ident::new("_PARTITIONDATE")))), - Some(vec![Ident::new("userid"), Ident::new("age"),]), + Some(WrappedCollection::NoWrapping(vec![ + Ident::new("userid"), + Ident::new("age"), + ])), Some(vec![ SqlOption { name: Ident::new("partition_expiration_days"), diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 8fe7b862c4..f6518e2763 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -3453,9 +3453,14 @@ fn parse_create_table_as_table() { #[test] fn parse_create_table_on_cluster() { + let generic = TestedDialects { + dialects: vec![Box::new(GenericDialect {})], + options: None, + }; + // Using single-quote literal to define current cluster let sql = "CREATE TABLE t ON CLUSTER '{cluster}' (a INT, b INT)"; - match verified_stmt(sql) { + match generic.verified_stmt(sql) { Statement::CreateTable(CreateTable { on_cluster, .. }) => { assert_eq!(on_cluster.unwrap(), "{cluster}".to_string()); } @@ -3464,7 +3469,7 @@ fn parse_create_table_on_cluster() { // Using explicitly declared cluster name let sql = "CREATE TABLE t ON CLUSTER my_cluster (a INT, b INT)"; - match verified_stmt(sql) { + match generic.verified_stmt(sql) { Statement::CreateTable(CreateTable { on_cluster, .. }) => { assert_eq!(on_cluster.unwrap(), "my_cluster".to_string()); } @@ -3517,8 +3522,13 @@ fn parse_create_table_with_on_delete_on_update_2in_any_order() -> Result<(), Par #[test] fn parse_create_table_with_options() { + let generic = TestedDialects { + dialects: vec![Box::new(GenericDialect {})], + options: None, + }; + let sql = "CREATE TABLE t (c INT) WITH (foo = 'bar', a = 123)"; - match verified_stmt(sql) { + match generic.verified_stmt(sql) { Statement::CreateTable(CreateTable { with_options, .. }) => { assert_eq!( vec![ diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 93b3c044aa..5343fe5e0f 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -4136,3 +4136,26 @@ fn parse_at_time_zone() { expr ); } + +#[test] +fn parse_create_table_with_options() { + let sql = "CREATE TABLE t (c INT) WITH (foo = 'bar', a = 123)"; + match pg().verified_stmt(sql) { + Statement::CreateTable(CreateTable { with_options, .. }) => { + assert_eq!( + vec![ + SqlOption { + name: "foo".into(), + value: Expr::Value(Value::SingleQuotedString("bar".into())), + }, + SqlOption { + name: "a".into(), + value: Expr::Value(number("123")), + }, + ], + with_options + ); + } + _ => unreachable!(), + } +} diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index a21e9d5d6b..f0a7c7735f 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -40,6 +40,279 @@ fn test_snowflake_create_table() { } } +#[test] +fn test_snowflake_create_or_replace_table() { + let sql = "CREATE OR REPLACE TABLE my_table (a number)"; + match snowflake().verified_stmt(sql) { + Statement::CreateTable(CreateTable { + name, or_replace, .. + }) => { + assert_eq!("my_table", name.to_string()); + assert!(or_replace); + } + _ => unreachable!(), + } +} + +#[test] +fn test_snowflake_create_or_replace_table_copy_grants() { + let sql = "CREATE OR REPLACE TABLE my_table (a number) COPY GRANTS"; + match snowflake().verified_stmt(sql) { + Statement::CreateTable(CreateTable { + name, + or_replace, + copy_grants, + .. + }) => { + assert_eq!("my_table", name.to_string()); + assert!(or_replace); + assert!(copy_grants); + } + _ => unreachable!(), + } +} + +#[test] +fn test_snowflake_create_or_replace_table_copy_grants_at_end() { + let sql = "CREATE OR REPLACE TABLE my_table COPY GRANTS (a number) "; + let parsed = "CREATE OR REPLACE TABLE my_table (a number) COPY GRANTS"; + match snowflake().one_statement_parses_to(sql, parsed) { + Statement::CreateTable(CreateTable { + name, + or_replace, + copy_grants, + .. + }) => { + assert_eq!("my_table", name.to_string()); + assert!(or_replace); + assert!(copy_grants); + } + _ => unreachable!(), + } +} + +#[test] +fn test_snowflake_create_or_replace_table_copy_grants_cta() { + let sql = "CREATE OR REPLACE TABLE my_table COPY GRANTS AS SELECT 1 AS a"; + match snowflake().verified_stmt(sql) { + Statement::CreateTable(CreateTable { + name, + or_replace, + copy_grants, + .. + }) => { + assert_eq!("my_table", name.to_string()); + assert!(or_replace); + assert!(copy_grants); + } + _ => unreachable!(), + } +} + +#[test] +fn test_snowflake_create_table_enable_schema_evolution() { + let sql = "CREATE TABLE my_table (a number) ENABLE_SCHEMA_EVOLUTION=TRUE"; + match snowflake().verified_stmt(sql) { + Statement::CreateTable(CreateTable { + name, + enable_schema_evolution, + .. + }) => { + assert_eq!("my_table", name.to_string()); + assert_eq!(Some(true), enable_schema_evolution); + } + _ => unreachable!(), + } +} + +#[test] +fn test_snowflake_create_table_change_tracking() { + let sql = "CREATE TABLE my_table (a number) CHANGE_TRACKING=TRUE"; + match snowflake().verified_stmt(sql) { + Statement::CreateTable(CreateTable { + name, + change_tracking, + .. + }) => { + assert_eq!("my_table", name.to_string()); + assert_eq!(Some(true), change_tracking); + } + _ => unreachable!(), + } +} + +#[test] +fn test_snowflake_create_table_data_retention_time_in_days() { + let sql = "CREATE TABLE my_table (a number) DATA_RETENTION_TIME_IN_DAYS=5"; + match snowflake().verified_stmt(sql) { + Statement::CreateTable(CreateTable { + name, + data_retention_time_in_days, + .. + }) => { + assert_eq!("my_table", name.to_string()); + assert_eq!(Some(5), data_retention_time_in_days); + } + _ => unreachable!(), + } +} + +#[test] +fn test_snowflake_create_table_max_data_extension_time_in_days() { + let sql = "CREATE TABLE my_table (a number) MAX_DATA_EXTENSION_TIME_IN_DAYS=5"; + match snowflake().verified_stmt(sql) { + Statement::CreateTable(CreateTable { + name, + max_data_extension_time_in_days, + .. + }) => { + assert_eq!("my_table", name.to_string()); + assert_eq!(Some(5), max_data_extension_time_in_days); + } + _ => unreachable!(), + } +} + +#[test] +fn test_snowflake_create_table_with_aggregation_policy() { + match snowflake() + .verified_stmt("CREATE TABLE my_table (a number) WITH AGGREGATION POLICY policy_name") + { + Statement::CreateTable(CreateTable { + name, + with_aggregation_policy, + .. + }) => { + assert_eq!("my_table", name.to_string()); + assert_eq!( + Some("policy_name".to_string()), + with_aggregation_policy.map(|name| name.to_string()) + ); + } + _ => unreachable!(), + } + + match snowflake() + .parse_sql_statements("CREATE TABLE my_table (a number) AGGREGATION POLICY policy_name") + .unwrap() + .pop() + .unwrap() + { + Statement::CreateTable(CreateTable { + name, + with_aggregation_policy, + .. + }) => { + assert_eq!("my_table", name.to_string()); + assert_eq!( + Some("policy_name".to_string()), + with_aggregation_policy.map(|name| name.to_string()) + ); + } + _ => unreachable!(), + } +} + +#[test] +fn test_snowflake_create_table_with_row_access_policy() { + match snowflake().verified_stmt( + "CREATE TABLE my_table (a number, b number) WITH ROW ACCESS POLICY policy_name ON (a)", + ) { + Statement::CreateTable(CreateTable { + name, + with_row_access_policy, + .. + }) => { + assert_eq!("my_table", name.to_string()); + assert_eq!( + Some("WITH ROW ACCESS POLICY policy_name ON (a)".to_string()), + with_row_access_policy.map(|policy| policy.to_string()) + ); + } + _ => unreachable!(), + } + + match snowflake() + .parse_sql_statements( + "CREATE TABLE my_table (a number, b number) ROW ACCESS POLICY policy_name ON (a)", + ) + .unwrap() + .pop() + .unwrap() + { + Statement::CreateTable(CreateTable { + name, + with_row_access_policy, + .. + }) => { + assert_eq!("my_table", name.to_string()); + assert_eq!( + Some("WITH ROW ACCESS POLICY policy_name ON (a)".to_string()), + with_row_access_policy.map(|policy| policy.to_string()) + ); + } + _ => unreachable!(), + } +} + +#[test] +fn test_snowflake_create_table_with_tag() { + match snowflake() + .verified_stmt("CREATE TABLE my_table (a number) WITH TAG (A='TAG A', B='TAG B')") + { + Statement::CreateTable(CreateTable { + name, with_tags, .. + }) => { + assert_eq!("my_table", name.to_string()); + assert_eq!( + Some(vec![ + Tag::new("A".into(), "TAG A".to_string()), + Tag::new("B".into(), "TAG B".to_string()) + ]), + with_tags + ); + } + _ => unreachable!(), + } + + match snowflake() + .parse_sql_statements("CREATE TABLE my_table (a number) TAG (A='TAG A', B='TAG B')") + .unwrap() + .pop() + .unwrap() + { + Statement::CreateTable(CreateTable { + name, with_tags, .. + }) => { + assert_eq!("my_table", name.to_string()); + assert_eq!( + Some(vec![ + Tag::new("A".into(), "TAG A".to_string()), + Tag::new("B".into(), "TAG B".to_string()) + ]), + with_tags + ); + } + _ => unreachable!(), + } +} + +#[test] +fn test_snowflake_create_table_default_ddl_collation() { + let sql = "CREATE TABLE my_table (a number) DEFAULT_DDL_COLLATION='de'"; + match snowflake().verified_stmt(sql) { + Statement::CreateTable(CreateTable { + name, + default_ddl_collation, + .. + }) => { + assert_eq!("my_table", name.to_string()); + assert_eq!(Some("de".to_string()), default_ddl_collation); + } + _ => unreachable!(), + } +} + #[test] fn test_snowflake_create_transient_table() { let sql = "CREATE TRANSIENT TABLE CUSTOMER (id INT, name VARCHAR(255))"; @@ -54,6 +327,162 @@ fn test_snowflake_create_transient_table() { } } +#[test] +fn test_snowflake_create_table_column_comment() { + let sql = "CREATE TABLE my_table (a STRING COMMENT 'some comment')"; + match snowflake().verified_stmt(sql) { + Statement::CreateTable(CreateTable { name, columns, .. }) => { + assert_eq!("my_table", name.to_string()); + assert_eq!( + vec![ColumnDef { + name: "a".into(), + data_type: DataType::String(None), + options: vec![ColumnOptionDef { + name: None, + option: ColumnOption::Comment("some comment".to_string()) + }], + collation: None + }], + columns + ) + } + _ => unreachable!(), + } +} + +#[test] +fn test_snowflake_create_local_table() { + match snowflake().verified_stmt("CREATE TABLE my_table (a INT)") { + Statement::CreateTable(CreateTable { name, global, .. }) => { + assert_eq!("my_table", name.to_string()); + assert!(global.is_none()) + } + _ => unreachable!(), + } + + match snowflake().verified_stmt("CREATE LOCAL TABLE my_table (a INT)") { + Statement::CreateTable(CreateTable { name, global, .. }) => { + assert_eq!("my_table", name.to_string()); + assert_eq!(Some(false), global) + } + _ => unreachable!(), + } +} + +#[test] +fn test_snowflake_create_global_table() { + match snowflake().verified_stmt("CREATE GLOBAL TABLE my_table (a INT)") { + Statement::CreateTable(CreateTable { name, global, .. }) => { + assert_eq!("my_table", name.to_string()); + assert_eq!(Some(true), global) + } + _ => unreachable!(), + } +} + +#[test] +fn test_snowflake_create_invalid_local_global_table() { + assert_eq!( + snowflake().parse_sql_statements("CREATE LOCAL GLOBAL TABLE my_table (a INT)"), + Err(ParserError::ParserError( + "Expected an SQL statement, found: LOCAL".to_string() + )) + ); + + assert_eq!( + snowflake().parse_sql_statements("CREATE GLOBAL LOCAL TABLE my_table (a INT)"), + Err(ParserError::ParserError( + "Expected an SQL statement, found: GLOBAL".to_string() + )) + ); +} + +#[test] +fn test_snowflake_create_invalid_temporal_table() { + assert_eq!( + snowflake().parse_sql_statements("CREATE TEMP TEMPORARY TABLE my_table (a INT)"), + Err(ParserError::ParserError( + "Expected an object type after CREATE, found: TEMPORARY".to_string() + )) + ); + + assert_eq!( + snowflake().parse_sql_statements("CREATE TEMP VOLATILE TABLE my_table (a INT)"), + Err(ParserError::ParserError( + "Expected an object type after CREATE, found: VOLATILE".to_string() + )) + ); + + assert_eq!( + snowflake().parse_sql_statements("CREATE TEMP TRANSIENT TABLE my_table (a INT)"), + Err(ParserError::ParserError( + "Expected an object type after CREATE, found: TRANSIENT".to_string() + )) + ); +} + +#[test] +fn test_snowflake_create_table_if_not_exists() { + match snowflake().verified_stmt("CREATE TABLE IF NOT EXISTS my_table (a INT)") { + Statement::CreateTable(CreateTable { + name, + if_not_exists, + .. + }) => { + assert_eq!("my_table", name.to_string()); + assert!(if_not_exists) + } + _ => unreachable!(), + } +} + +#[test] +fn test_snowflake_create_table_cluster_by() { + match snowflake().verified_stmt("CREATE TABLE my_table (a INT) CLUSTER BY (a, b)") { + Statement::CreateTable(CreateTable { + name, cluster_by, .. + }) => { + assert_eq!("my_table", name.to_string()); + assert_eq!( + Some(WrappedCollection::Parentheses(vec![ + Ident::new("a"), + Ident::new("b"), + ])), + cluster_by + ) + } + _ => unreachable!(), + } +} + +#[test] +fn test_snowflake_create_table_comment() { + match snowflake().verified_stmt("CREATE TABLE my_table (a INT) COMMENT = 'some comment'") { + Statement::CreateTable(CreateTable { name, comment, .. }) => { + assert_eq!("my_table", name.to_string()); + assert_eq!("some comment", comment.unwrap().to_string()); + } + _ => unreachable!(), + } +} + +#[test] +fn test_snowflake_create_table_incomplete_statement() { + assert_eq!( + snowflake().parse_sql_statements("CREATE TABLE my_table"), + Err(ParserError::ParserError( + "unexpected end of input".to_string() + )) + ); + + assert_eq!( + snowflake().parse_sql_statements("CREATE TABLE my_table; (c int)"), + Err(ParserError::ParserError( + "unexpected end of input".to_string() + )) + ); +} + #[test] fn test_snowflake_single_line_tokenize() { let sql = "CREATE TABLE# this is a comment \ntable_1"; From deac26971084d0790e718a3352a43ecbbc868e64 Mon Sep 17 00:00:00 2001 From: Philip Cristiano Date: Mon, 17 Jun 2024 14:10:40 -0400 Subject: [PATCH 18/53] CreateIndex: Move Display fmt to struct (#1307) --- src/ast/dml.rs | 43 +++++++++++++++++++++++++++++++++++++++++++ src/ast/mod.rs | 43 +------------------------------------------ 2 files changed, 44 insertions(+), 42 deletions(-) diff --git a/src/ast/dml.rs b/src/ast/dml.rs index 74bb5435c8..b35b2b970d 100644 --- a/src/ast/dml.rs +++ b/src/ast/dml.rs @@ -47,6 +47,49 @@ pub struct CreateIndex { pub nulls_distinct: Option, pub predicate: Option, } + +impl Display for CreateIndex { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "CREATE {unique}INDEX {concurrently}{if_not_exists}", + unique = if self.unique { "UNIQUE " } else { "" }, + concurrently = if self.concurrently { + "CONCURRENTLY " + } else { + "" + }, + if_not_exists = if self.if_not_exists { + "IF NOT EXISTS " + } else { + "" + }, + )?; + if let Some(value) = &self.name { + write!(f, "{value} ")?; + } + write!(f, "ON {}", self.table_name)?; + if let Some(value) = &self.using { + write!(f, " USING {value} ")?; + } + write!(f, "({})", display_separated(&self.columns, ","))?; + if !self.include.is_empty() { + write!(f, " INCLUDE ({})", display_separated(&self.include, ","))?; + } + if let Some(value) = self.nulls_distinct { + if value { + write!(f, " NULLS DISTINCT")?; + } else { + write!(f, " NULLS NOT DISTINCT")?; + } + } + if let Some(predicate) = &self.predicate { + write!(f, " WHERE {predicate}")?; + } + Ok(()) + } +} + /// CREATE TABLE statement. #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 49d6499c5f..6e306b1e37 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -3383,48 +3383,7 @@ impl fmt::Display for Statement { } Ok(()) } - Statement::CreateIndex(CreateIndex { - name, - table_name, - using, - columns, - unique, - concurrently, - if_not_exists, - include, - nulls_distinct, - predicate, - }) => { - write!( - f, - "CREATE {unique}INDEX {concurrently}{if_not_exists}", - unique = if *unique { "UNIQUE " } else { "" }, - concurrently = if *concurrently { "CONCURRENTLY " } else { "" }, - if_not_exists = if *if_not_exists { "IF NOT EXISTS " } else { "" }, - )?; - if let Some(value) = name { - write!(f, "{value} ")?; - } - write!(f, "ON {table_name}")?; - if let Some(value) = using { - write!(f, " USING {value} ")?; - } - write!(f, "({})", display_separated(columns, ","))?; - if !include.is_empty() { - write!(f, " INCLUDE ({})", display_separated(include, ","))?; - } - if let Some(value) = nulls_distinct { - if *value { - write!(f, " NULLS DISTINCT")?; - } else { - write!(f, " NULLS NOT DISTINCT")?; - } - } - if let Some(predicate) = predicate { - write!(f, " WHERE {predicate}")?; - } - Ok(()) - } + Statement::CreateIndex(create_index) => create_index.fmt(f), Statement::CreateExtension { name, if_not_exists, From 0330f9def5ebd6b7813dc4656f40edc717dbd0a3 Mon Sep 17 00:00:00 2001 From: Alexander Beedie Date: Mon, 17 Jun 2024 22:14:40 +0400 Subject: [PATCH 19/53] Support use of `BY NAME` quantifier across all set ops (#1309) Co-authored-by: Alexander Beedie Co-authored-by: Joey Hain --- README.md | 6 +++--- src/ast/data_type.rs | 4 ++-- src/ast/mod.rs | 6 +++--- src/parser/mod.rs | 15 +++------------ src/tokenizer.rs | 2 +- tests/sqlparser_common.rs | 6 ++++++ 6 files changed, 18 insertions(+), 21 deletions(-) diff --git a/README.md b/README.md index 512f5f6c06..3226b9549b 100644 --- a/README.md +++ b/README.md @@ -114,13 +114,12 @@ $ cargo run --features json_example --example cli FILENAME.sql [--dialectname] ## Users -This parser is currently being used by the [DataFusion] query engine, -[LocustDB], [Ballista], [GlueSQL], [Opteryx], [PRQL], [Qrlew], [JumpWire], and [ParadeDB]. +This parser is currently being used by the [DataFusion] query engine, [LocustDB], +[Ballista], [GlueSQL], [Opteryx], [Polars], [PRQL], [Qrlew], [JumpWire], and [ParadeDB]. If your project is using sqlparser-rs feel free to make a PR to add it to this list. - ## Design The core expression parser uses the [Pratt Parser] design, which is a top-down @@ -210,6 +209,7 @@ licensed as above, without any additional terms or conditions. [Ballista]: https://github.com/apache/arrow-ballista [GlueSQL]: https://github.com/gluesql/gluesql [Opteryx]: https://github.com/mabel-dev/opteryx +[Polars]: https://pola.rs/ [PRQL]: https://github.com/PRQL/prql [Qrlew]: https://github.com/Qrlew/qrlew [JumpWire]: https://github.com/extragoodlabs/jumpwire diff --git a/src/ast/data_type.rs b/src/ast/data_type.rs index 7d0aec8fca..6b1a542f42 100644 --- a/src/ast/data_type.rs +++ b/src/ast/data_type.rs @@ -168,7 +168,7 @@ pub enum DataType { UnsignedInt(Option), /// Unsigned int4 with optional display width e.g. INT4 UNSIGNED or INT4(11) UNSIGNED UnsignedInt4(Option), - /// Unsigned integer with optional display width e.g. INTGER UNSIGNED or INTEGER(11) UNSIGNED + /// Unsigned integer with optional display width e.g. INTEGER UNSIGNED or INTEGER(11) UNSIGNED UnsignedInteger(Option), /// Unsigned integer type in [clickhouse] /// Note: UInt8 mean 8 bits in [clickhouse] @@ -699,7 +699,7 @@ pub enum CharacterLength { /// Optional unit. If not informed, the ANSI handles it as CHARACTERS implicitly unit: Option, }, - /// VARCHAR(MAX) or NVARCHAR(MAX), used in T-SQL (Miscrosoft SQL Server) + /// VARCHAR(MAX) or NVARCHAR(MAX), used in T-SQL (Microsoft SQL Server) Max, } diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 6e306b1e37..7af8efaec0 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -2265,7 +2265,7 @@ pub enum Statement { /// SET [ SESSION | LOCAL ] ROLE role_name /// ``` /// - /// Sets sesssion state. Examples: [ANSI][1], [Postgresql][2], [MySQL][3], and [Oracle][4] + /// Sets session state. Examples: [ANSI][1], [Postgresql][2], [MySQL][3], and [Oracle][4] /// /// [1]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#set-role-statement /// [2]: https://www.postgresql.org/docs/14/sql-set-role.html @@ -2283,7 +2283,7 @@ pub enum Statement { /// ``` /// /// Note: this is not a standard SQL statement, but it is supported by at - /// least MySQL and PostgreSQL. Not all MySQL-specific syntatic forms are + /// least MySQL and PostgreSQL. Not all MySQL-specific syntactic forms are /// supported yet. SetVariable { local: bool, @@ -4750,7 +4750,7 @@ impl fmt::Display for FunctionArguments { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct FunctionArgumentList { - /// `[ ALL | DISTINCT ] + /// `[ ALL | DISTINCT ]` pub duplicate_treatment: Option, /// The function arguments. pub args: Vec, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index c591b8116f..e240441b98 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -8138,7 +8138,7 @@ impl<'a> Parser<'a> { pub fn parse_set_quantifier(&mut self, op: &Option) -> SetQuantifier { match op { - Some(SetOperator::Union) => { + Some(SetOperator::Except | SetOperator::Intersect | SetOperator::Union) => { if self.parse_keywords(&[Keyword::DISTINCT, Keyword::BY, Keyword::NAME]) { SetQuantifier::DistinctByName } else if self.parse_keywords(&[Keyword::BY, Keyword::NAME]) { @@ -8155,15 +8155,6 @@ impl<'a> Parser<'a> { SetQuantifier::None } } - Some(SetOperator::Except) | Some(SetOperator::Intersect) => { - if self.parse_keyword(Keyword::ALL) { - SetQuantifier::All - } else if self.parse_keyword(Keyword::DISTINCT) { - SetQuantifier::Distinct - } else { - SetQuantifier::None - } - } _ => SetQuantifier::None, } } @@ -8547,10 +8538,10 @@ impl<'a> Parser<'a> { }) } else if variable.to_string() == "TRANSACTION" && modifier.is_none() { if self.parse_keyword(Keyword::SNAPSHOT) { - let snaphot_id = self.parse_value()?; + let snapshot_id = self.parse_value()?; return Ok(Statement::SetTransaction { modes: vec![], - snapshot: Some(snaphot_id), + snapshot: Some(snapshot_id), session: false, }); } diff --git a/src/tokenizer.rs b/src/tokenizer.rs index bcc5478bc7..4e64e07127 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -654,7 +654,7 @@ impl<'a> Tokenizer<'a> { Ok(()) } - // Tokenize the identifer or keywords in `ch` + // Tokenize the identifier or keywords in `ch` fn tokenize_identifier_or_keyword( &self, ch: impl IntoIterator, diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index f6518e2763..a86858129c 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -6010,6 +6010,12 @@ fn parse_union_except_intersect() { verified_stmt("SELECT foo FROM tab UNION SELECT bar FROM TAB"); verified_stmt("(SELECT * FROM new EXCEPT SELECT * FROM old) UNION ALL (SELECT * FROM old EXCEPT SELECT * FROM new) ORDER BY 1"); verified_stmt("(SELECT * FROM new EXCEPT DISTINCT SELECT * FROM old) UNION DISTINCT (SELECT * FROM old EXCEPT DISTINCT SELECT * FROM new) ORDER BY 1"); + verified_stmt("SELECT 1 AS x, 2 AS y EXCEPT BY NAME SELECT 9 AS y, 8 AS x"); + verified_stmt("SELECT 1 AS x, 2 AS y EXCEPT ALL BY NAME SELECT 9 AS y, 8 AS x"); + verified_stmt("SELECT 1 AS x, 2 AS y EXCEPT DISTINCT BY NAME SELECT 9 AS y, 8 AS x"); + verified_stmt("SELECT 1 AS x, 2 AS y INTERSECT BY NAME SELECT 9 AS y, 8 AS x"); + verified_stmt("SELECT 1 AS x, 2 AS y INTERSECT ALL BY NAME SELECT 9 AS y, 8 AS x"); + verified_stmt("SELECT 1 AS x, 2 AS y INTERSECT DISTINCT BY NAME SELECT 9 AS y, 8 AS x"); } #[test] From 345e2098fb3cdd720f99e8183720529c4fd0acc5 Mon Sep 17 00:00:00 2001 From: Ophir LOJKINE Date: Tue, 18 Jun 2024 15:28:39 +0200 Subject: [PATCH 20/53] add support for update statements that contain tuple assignments (#1317) --- src/ast/mod.rs | 26 ++++++++++++++++++++++++-- src/parser/mod.rs | 16 ++++++++++++++-- tests/sqlparser_bigquery.rs | 4 ++-- tests/sqlparser_common.rs | 23 ++++++++++++++++------- tests/sqlparser_mysql.rs | 25 +++++++++++++++++++------ tests/sqlparser_postgres.rs | 10 +++++----- tests/sqlparser_sqlite.rs | 34 ++++++++++++++++++++++++++++++++++ 7 files changed, 114 insertions(+), 24 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 7af8efaec0..769bda5989 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -4553,13 +4553,35 @@ impl fmt::Display for GrantObjects { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct Assignment { - pub id: Vec, + pub target: AssignmentTarget, pub value: Expr, } impl fmt::Display for Assignment { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{} = {}", display_separated(&self.id, "."), self.value) + write!(f, "{} = {}", self.target, self.value) + } +} + +/// Left-hand side of an assignment in an UPDATE statement, +/// e.g. `foo` in `foo = 5` (ColumnName assignment) or +/// `(a, b)` in `(a, b) = (1, 2)` (Tuple assignment). +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum AssignmentTarget { + /// A single column + ColumnName(ObjectName), + /// A tuple of columns + Tuple(Vec), +} + +impl fmt::Display for AssignmentTarget { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + AssignmentTarget::ColumnName(column) => write!(f, "{}", column), + AssignmentTarget::Tuple(columns) => write!(f, "({})", display_comma_separated(columns)), + } } } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index e240441b98..62222c6fba 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -9937,10 +9937,22 @@ impl<'a> Parser<'a> { /// Parse a `var = expr` assignment, used in an UPDATE statement pub fn parse_assignment(&mut self) -> Result { - let id = self.parse_identifiers()?; + let target = self.parse_assignment_target()?; self.expect_token(&Token::Eq)?; let value = self.parse_expr()?; - Ok(Assignment { id, value }) + Ok(Assignment { target, value }) + } + + /// Parse the left-hand side of an assignment, used in an UPDATE statement + pub fn parse_assignment_target(&mut self) -> Result { + if self.consume_token(&Token::LParen) { + let columns = self.parse_comma_separated(|p| p.parse_object_name(false))?; + self.expect_token(&Token::RParen)?; + Ok(AssignmentTarget::Tuple(columns)) + } else { + let column = self.parse_object_name(false)?; + Ok(AssignmentTarget::ColumnName(column)) + } } pub fn parse_function_args(&mut self) -> Result { diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index 171439d198..fb6e3b88ad 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -1590,11 +1590,11 @@ fn parse_merge() { let update_action = MergeAction::Update { assignments: vec![ Assignment { - id: vec![Ident::new("a")], + target: AssignmentTarget::ColumnName(ObjectName(vec![Ident::new("a")])), value: Expr::Value(number("1")), }, Assignment { - id: vec![Ident::new("b")], + target: AssignmentTarget::ColumnName(ObjectName(vec![Ident::new("b")])), value: Expr::Value(number("2")), }, ], diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index a86858129c..15b3b69dd9 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -296,15 +296,15 @@ fn parse_update() { assignments, vec![ Assignment { - id: vec!["a".into()], + target: AssignmentTarget::ColumnName(ObjectName(vec!["a".into()])), value: Expr::Value(number("1")), }, Assignment { - id: vec!["b".into()], + target: AssignmentTarget::ColumnName(ObjectName(vec!["b".into()])), value: Expr::Value(number("2")), }, Assignment { - id: vec!["c".into()], + target: AssignmentTarget::ColumnName(ObjectName(vec!["c".into()])), value: Expr::Value(number("3")), }, ] @@ -363,7 +363,7 @@ fn parse_update_set_from() { joins: vec![], }, assignments: vec![Assignment { - id: vec![Ident::new("name")], + target: AssignmentTarget::ColumnName(ObjectName(vec![Ident::new("name")])), value: Expr::CompoundIdentifier(vec![Ident::new("t2"), Ident::new("name")]) }], from: Some(TableWithJoins { @@ -466,7 +466,10 @@ fn parse_update_with_table_alias() { ); assert_eq!( vec![Assignment { - id: vec![Ident::new("u"), Ident::new("username")], + target: AssignmentTarget::ColumnName(ObjectName(vec![ + Ident::new("u"), + Ident::new("username") + ])), value: Expr::Value(Value::SingleQuotedString("new_user".to_string())), }], assignments @@ -7702,14 +7705,20 @@ fn parse_merge() { action: MergeAction::Update { assignments: vec![ Assignment { - id: vec![Ident::new("dest"), Ident::new("F")], + target: AssignmentTarget::ColumnName(ObjectName(vec![ + Ident::new("dest"), + Ident::new("F") + ])), value: Expr::CompoundIdentifier(vec![ Ident::new("stg"), Ident::new("F"), ]), }, Assignment { - id: vec![Ident::new("dest"), Ident::new("G")], + target: AssignmentTarget::ColumnName(ObjectName(vec![ + Ident::new("dest"), + Ident::new("G") + ])), value: Expr::CompoundIdentifier(vec![ Ident::new("stg"), Ident::new("G"), diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index e65fc181b6..ff8a49de72 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -1639,23 +1639,33 @@ fn parse_insert_with_on_duplicate_update() { assert_eq!( Some(OnInsert::DuplicateKeyUpdate(vec![ Assignment { - id: vec![Ident::new("description".to_string())], + target: AssignmentTarget::ColumnName(ObjectName(vec![Ident::new( + "description".to_string() + )])), value: call("VALUES", [Expr::Identifier(Ident::new("description"))]), }, Assignment { - id: vec![Ident::new("perm_create".to_string())], + target: AssignmentTarget::ColumnName(ObjectName(vec![Ident::new( + "perm_create".to_string() + )])), value: call("VALUES", [Expr::Identifier(Ident::new("perm_create"))]), }, Assignment { - id: vec![Ident::new("perm_read".to_string())], + target: AssignmentTarget::ColumnName(ObjectName(vec![Ident::new( + "perm_read".to_string() + )])), value: call("VALUES", [Expr::Identifier(Ident::new("perm_read"))]), }, Assignment { - id: vec![Ident::new("perm_update".to_string())], + target: AssignmentTarget::ColumnName(ObjectName(vec![Ident::new( + "perm_update".to_string() + )])), value: call("VALUES", [Expr::Identifier(Ident::new("perm_update"))]), }, Assignment { - id: vec![Ident::new("perm_delete".to_string())], + target: AssignmentTarget::ColumnName(ObjectName(vec![Ident::new( + "perm_delete".to_string() + )])), value: call("VALUES", [Expr::Identifier(Ident::new("perm_delete"))]), }, ])), @@ -1835,7 +1845,10 @@ fn parse_update_with_joins() { ); assert_eq!( vec![Assignment { - id: vec![Ident::new("o"), Ident::new("completed")], + target: AssignmentTarget::ColumnName(ObjectName(vec![ + Ident::new("o"), + Ident::new("completed") + ])), value: Expr::Value(Value::Boolean(true)) }], assignments diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 5343fe5e0f..fe735b8b2e 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -1557,7 +1557,7 @@ fn parse_pg_on_conflict() { assert_eq!( OnConflictAction::DoUpdate(DoUpdate { assignments: vec![Assignment { - id: vec!["dname".into()], + target: AssignmentTarget::ColumnName(ObjectName(vec!["dname".into()])), value: Expr::CompoundIdentifier(vec!["EXCLUDED".into(), "dname".into()]) },], selection: None @@ -1588,14 +1588,14 @@ fn parse_pg_on_conflict() { OnConflictAction::DoUpdate(DoUpdate { assignments: vec![ Assignment { - id: vec!["dname".into()], + target: AssignmentTarget::ColumnName(ObjectName(vec!["dname".into()])), value: Expr::CompoundIdentifier(vec![ "EXCLUDED".into(), "dname".into() ]) }, Assignment { - id: vec!["area".into()], + target: AssignmentTarget::ColumnName(ObjectName(vec!["area".into()])), value: Expr::CompoundIdentifier(vec!["EXCLUDED".into(), "area".into()]) }, ], @@ -1645,7 +1645,7 @@ fn parse_pg_on_conflict() { assert_eq!( OnConflictAction::DoUpdate(DoUpdate { assignments: vec![Assignment { - id: vec!["dname".into()], + target: AssignmentTarget::ColumnName(ObjectName(vec!["dname".into()])), value: Expr::Value(Value::Placeholder("$1".to_string())) },], selection: Some(Expr::BinaryOp { @@ -1682,7 +1682,7 @@ fn parse_pg_on_conflict() { assert_eq!( OnConflictAction::DoUpdate(DoUpdate { assignments: vec![Assignment { - id: vec!["dname".into()], + target: AssignmentTarget::ColumnName(ObjectName(vec!["dname".into()])), value: Expr::Value(Value::Placeholder("$1".to_string())) },], selection: Some(Expr::BinaryOp { diff --git a/tests/sqlparser_sqlite.rs b/tests/sqlparser_sqlite.rs index 16ea9eb8cb..1181c480b9 100644 --- a/tests/sqlparser_sqlite.rs +++ b/tests/sqlparser_sqlite.rs @@ -373,6 +373,40 @@ fn parse_attach_database() { } } +#[test] +fn parse_update_tuple_row_values() { + // See https://github.com/sqlparser-rs/sqlparser-rs/issues/1311 + assert_eq!( + sqlite().verified_stmt("UPDATE x SET (a, b) = (1, 2)"), + Statement::Update { + assignments: vec![Assignment { + target: AssignmentTarget::Tuple(vec![ + ObjectName(vec![Ident::new("a"),]), + ObjectName(vec![Ident::new("b"),]), + ]), + value: Expr::Tuple(vec![ + Expr::Value(Value::Number("1".parse().unwrap(), false)), + Expr::Value(Value::Number("2".parse().unwrap(), false)) + ]) + }], + selection: None, + table: TableWithJoins { + relation: TableFactor::Table { + name: ObjectName(vec![Ident::new("x")]), + alias: None, + args: None, + with_hints: vec![], + version: None, + partitions: vec![] + }, + joins: vec![], + }, + from: None, + returning: None + } + ); +} + #[test] fn parse_where_in_empty_list() { let sql = "SELECT * FROM t1 WHERE a IN ()"; From 79af31b6727fbe60e21705f4bbf8dafc59516e42 Mon Sep 17 00:00:00 2001 From: Emil Ejbyfeldt Date: Tue, 18 Jun 2024 15:30:24 +0200 Subject: [PATCH 21/53] Return errors, not panic, when integers fail to parse in `AUTO_INCREMENT` and `TOP` (#1305) --- src/parser/mod.rs | 40 ++++++++++++++++++++++++--------------- tests/sqlparser_common.rs | 15 +++++++++++++++ 2 files changed, 40 insertions(+), 15 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 62222c6fba..67aebcb330 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -20,7 +20,10 @@ use alloc::{ vec, vec::Vec, }; -use core::fmt; +use core::{ + fmt::{self, Display}, + str::FromStr, +}; use log::debug; @@ -3260,6 +3263,18 @@ impl<'a> Parser<'a> { } } + fn parse(s: String, loc: Location) -> Result + where + ::Err: Display, + { + s.parse::().map_err(|e| { + ParserError::ParserError(format!( + "Could not parse '{s}' as {}: {e}{loc}", + core::any::type_name::() + )) + }) + } + /// Parse a comma-separated list of 1+ SelectItem pub fn parse_projection(&mut self) -> Result, ParserError> { // BigQuery and Snowflake allow trailing commas, but only in project lists @@ -5281,7 +5296,7 @@ impl<'a> Parser<'a> { let _ = self.consume_token(&Token::Eq); let next_token = self.next_token(); match next_token.token { - Token::Number(s, _) => Some(s.parse::().expect("literal int")), + Token::Number(s, _) => Some(Self::parse::(s, next_token.location)?), _ => self.expected("literal int", next_token)?, } } else { @@ -6725,10 +6740,7 @@ impl<'a> Parser<'a> { // The call to n.parse() returns a bigdecimal when the // bigdecimal feature is enabled, and is otherwise a no-op // (i.e., it returns the input string). - Token::Number(ref n, l) => match n.parse() { - Ok(n) => Ok(Value::Number(n, l)), - Err(e) => parser_err!(format!("Could not parse '{n}' as number: {e}"), location), - }, + Token::Number(n, l) => Ok(Value::Number(Self::parse(n, location)?, l)), Token::SingleQuotedString(ref s) => Ok(Value::SingleQuotedString(s.to_string())), Token::DoubleQuotedString(ref s) => Ok(Value::DoubleQuotedString(s.to_string())), Token::TripleSingleQuotedString(ref s) => { @@ -6820,9 +6832,7 @@ impl<'a> Parser<'a> { pub fn parse_literal_uint(&mut self) -> Result { let next_token = self.next_token(); match next_token.token { - Token::Number(s, _) => s.parse::().map_err(|e| { - ParserError::ParserError(format!("Could not parse '{s}' as u64: {e}")) - }), + Token::Number(s, _) => Self::parse::(s, next_token.location), _ => self.expected("literal int", next_token), } } @@ -9273,7 +9283,7 @@ impl<'a> Parser<'a> { return self.expected("literal number", next_token); }; self.expect_token(&Token::RBrace)?; - RepetitionQuantifier::AtMost(n.parse().expect("literal int")) + RepetitionQuantifier::AtMost(Self::parse(n, token.location)?) } Token::Number(n, _) if self.consume_token(&Token::Comma) => { let next_token = self.next_token(); @@ -9281,12 +9291,12 @@ impl<'a> Parser<'a> { Token::Number(m, _) => { self.expect_token(&Token::RBrace)?; RepetitionQuantifier::Range( - n.parse().expect("literal int"), - m.parse().expect("literal int"), + Self::parse(n, token.location)?, + Self::parse(m, token.location)?, ) } Token::RBrace => { - RepetitionQuantifier::AtLeast(n.parse().expect("literal int")) + RepetitionQuantifier::AtLeast(Self::parse(n, token.location)?) } _ => { return self.expected("} or upper bound", next_token); @@ -9295,7 +9305,7 @@ impl<'a> Parser<'a> { } Token::Number(n, _) => { self.expect_token(&Token::RBrace)?; - RepetitionQuantifier::Exactly(n.parse().expect("literal int")) + RepetitionQuantifier::Exactly(Self::parse(n, token.location)?) } _ => return self.expected("quantifier range", token), } @@ -10329,7 +10339,7 @@ impl<'a> Parser<'a> { } else { let next_token = self.next_token(); let quantity = match next_token.token { - Token::Number(s, _) => s.parse::().expect("literal int"), + Token::Number(s, _) => Self::parse::(s, next_token.location)?, _ => self.expected("literal int", next_token)?, }; Some(TopQuantity::Constant(quantity)) diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 15b3b69dd9..a87883908b 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -10006,3 +10006,18 @@ fn parse_select_wildcard_with_except() { "sql parser error: Expected identifier, found: )" ); } + +#[test] +fn parse_auto_increment_too_large() { + let dialect = GenericDialect {}; + let u64_max = u64::MAX; + let sql = + format!("CREATE TABLE foo (bar INT NOT NULL AUTO_INCREMENT) AUTO_INCREMENT=1{u64_max}"); + + let res = Parser::new(&dialect) + .try_with_sql(&sql) + .expect("tokenize to work") + .parse_statements(); + + assert!(res.is_err(), "{res:?}"); +} From f16c1afed0fa273228e74a633f3885c9c6609911 Mon Sep 17 00:00:00 2001 From: Lorrens Pantelis <100197010+LorrensP-2158466@users.noreply.github.com> Date: Sat, 22 Jun 2024 00:26:23 +0200 Subject: [PATCH 22/53] Improve error messages with additional colons (#1319) --- src/parser/mod.rs | 6 +- src/tokenizer.rs | 4 +- tests/sqlparser_bigquery.rs | 20 ++-- tests/sqlparser_common.rs | 168 +++++++++++++++++----------------- tests/sqlparser_databricks.rs | 2 +- tests/sqlparser_hive.rs | 8 +- tests/sqlparser_mssql.rs | 2 +- tests/sqlparser_mysql.rs | 2 +- tests/sqlparser_postgres.rs | 28 +++--- tests/sqlparser_snowflake.rs | 40 ++++---- tests/sqlparser_sqlite.rs | 8 +- 11 files changed, 144 insertions(+), 144 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 67aebcb330..27520a6c44 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -3116,7 +3116,7 @@ impl<'a> Parser<'a> { /// Report `found` was encountered instead of `expected` pub fn expected(&self, expected: &str, found: TokenWithLocation) -> Result { parser_err!( - format!("Expected {expected}, found: {found}"), + format!("Expected: {expected}, found: {found}"), found.location ) } @@ -11581,7 +11581,7 @@ mod tests { assert_eq!( ast, Err(ParserError::TokenizerError( - "Unterminated string literal at Line: 1, Column 5".to_string() + "Unterminated string literal at Line: 1, Column: 5".to_string() )) ); } @@ -11593,7 +11593,7 @@ mod tests { assert_eq!( ast, Err(ParserError::ParserError( - "Expected [NOT] NULL or TRUE|FALSE or [NOT] DISTINCT FROM after IS, found: a at Line: 1, Column 16" + "Expected: [NOT] NULL or TRUE|FALSE or [NOT] DISTINCT FROM after IS, found: a at Line: 1, Column: 16" .to_string() )) ); diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 4e64e07127..b8336cec88 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -429,7 +429,7 @@ impl fmt::Display for Location { write!( f, // TODO: use standard compiler location syntax (::) - " at Line: {}, Column {}", + " at Line: {}, Column: {}", self.line, self.column, ) } @@ -1816,7 +1816,7 @@ mod tests { use std::error::Error; assert!(err.source().is_none()); } - assert_eq!(err.to_string(), "test at Line: 1, Column 1"); + assert_eq!(err.to_string(), "test at Line: 1, Column: 1"); } #[test] diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index fb6e3b88ad..ec4ddca96f 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -535,7 +535,7 @@ fn parse_invalid_brackets() { bigquery_and_generic() .parse_sql_statements(sql) .unwrap_err(), - ParserError::ParserError("Expected (, found: >".to_string()) + ParserError::ParserError("Expected: (, found: >".to_string()) ); let sql = "CREATE TABLE table (x STRUCT>>)"; @@ -544,7 +544,7 @@ fn parse_invalid_brackets() { .parse_sql_statements(sql) .unwrap_err(), ParserError::ParserError( - "Expected ',' or ')' after column definition, found: >".to_string() + "Expected: ',' or ')' after column definition, found: >".to_string() ) ); } @@ -1753,11 +1753,11 @@ fn parse_merge_invalid_statements() { for (sql, err_msg) in [ ( "MERGE T USING U ON TRUE WHEN MATCHED BY TARGET AND 1 THEN DELETE", - "Expected THEN, found: BY", + "Expected: THEN, found: BY", ), ( "MERGE T USING U ON TRUE WHEN MATCHED BY SOURCE AND 1 THEN DELETE", - "Expected THEN, found: BY", + "Expected: THEN, found: BY", ), ( "MERGE T USING U ON TRUE WHEN NOT MATCHED BY SOURCE THEN INSERT(a) VALUES (b)", @@ -1898,13 +1898,13 @@ fn parse_big_query_declare() { let error_sql = "DECLARE x"; assert_eq!( - ParserError::ParserError("Expected a data type name, found: EOF".to_owned()), + ParserError::ParserError("Expected: a data type name, found: EOF".to_owned()), bigquery().parse_sql_statements(error_sql).unwrap_err() ); let error_sql = "DECLARE x 42"; assert_eq!( - ParserError::ParserError("Expected a data type name, found: 42".to_owned()), + ParserError::ParserError("Expected: a data type name, found: 42".to_owned()), bigquery().parse_sql_statements(error_sql).unwrap_err() ); } @@ -2069,7 +2069,7 @@ fn test_bigquery_create_function() { "AS ((SELECT 1 FROM mytable)) ", "OPTIONS(a = [1, 2])", ), - "Expected end of statement, found: OPTIONS", + "Expected: end of statement, found: OPTIONS", ), ( concat!( @@ -2077,7 +2077,7 @@ fn test_bigquery_create_function() { "IMMUTABLE ", "AS ((SELECT 1 FROM mytable)) ", ), - "Expected AS, found: IMMUTABLE", + "Expected: AS, found: IMMUTABLE", ), ( concat!( @@ -2085,7 +2085,7 @@ fn test_bigquery_create_function() { "AS \"console.log('hello');\" ", "LANGUAGE js ", ), - "Expected end of statement, found: LANGUAGE", + "Expected: end of statement, found: LANGUAGE", ), ]; for (sql, error) in error_sqls { @@ -2116,7 +2116,7 @@ fn test_bigquery_trim() { // missing comma separation let error_sql = "SELECT TRIM('xyz' 'a')"; assert_eq!( - ParserError::ParserError("Expected ), found: 'a'".to_owned()), + ParserError::ParserError("Expected: ), found: 'a'".to_owned()), bigquery().parse_sql_statements(error_sql).unwrap_err() ); } diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index a87883908b..0149bad5d5 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -115,7 +115,7 @@ fn parse_replace_into() { let sql = "REPLACE INTO public.customer (id, name, active) VALUES (1, 2, 3)"; assert_eq!( - ParserError::ParserError("Unsupported statement REPLACE at Line: 1, Column 9".to_string()), + ParserError::ParserError("Unsupported statement REPLACE at Line: 1, Column: 9".to_string()), Parser::parse_sql(&dialect, sql,).unwrap_err(), ) } @@ -199,7 +199,7 @@ fn parse_insert_default_values() { let insert_with_columns_and_default_values = "INSERT INTO test_table (test_col) DEFAULT VALUES"; assert_eq!( ParserError::ParserError( - "Expected SELECT, VALUES, or a subquery in the query body, found: DEFAULT".to_string() + "Expected: SELECT, VALUES, or a subquery in the query body, found: DEFAULT".to_string() ), parse_sql_statements(insert_with_columns_and_default_values).unwrap_err() ); @@ -207,20 +207,20 @@ fn parse_insert_default_values() { let insert_with_default_values_and_hive_after_columns = "INSERT INTO test_table DEFAULT VALUES (some_column)"; assert_eq!( - ParserError::ParserError("Expected end of statement, found: (".to_string()), + ParserError::ParserError("Expected: end of statement, found: (".to_string()), parse_sql_statements(insert_with_default_values_and_hive_after_columns).unwrap_err() ); let insert_with_default_values_and_hive_partition = "INSERT INTO test_table DEFAULT VALUES PARTITION (some_column)"; assert_eq!( - ParserError::ParserError("Expected end of statement, found: PARTITION".to_string()), + ParserError::ParserError("Expected: end of statement, found: PARTITION".to_string()), parse_sql_statements(insert_with_default_values_and_hive_partition).unwrap_err() ); let insert_with_default_values_and_values_list = "INSERT INTO test_table DEFAULT VALUES (1)"; assert_eq!( - ParserError::ParserError("Expected end of statement, found: (".to_string()), + ParserError::ParserError("Expected: end of statement, found: (".to_string()), parse_sql_statements(insert_with_default_values_and_values_list).unwrap_err() ); } @@ -319,14 +319,14 @@ fn parse_update() { let sql = "UPDATE t WHERE 1"; let res = parse_sql_statements(sql); assert_eq!( - ParserError::ParserError("Expected SET, found: WHERE".to_string()), + ParserError::ParserError("Expected: SET, found: WHERE".to_string()), res.unwrap_err() ); let sql = "UPDATE t SET a = 1 extrabadstuff"; let res = parse_sql_statements(sql); assert_eq!( - ParserError::ParserError("Expected end of statement, found: extrabadstuff".to_string()), + ParserError::ParserError("Expected: end of statement, found: extrabadstuff".to_string()), res.unwrap_err() ); } @@ -577,7 +577,7 @@ fn parse_delete_without_from_error() { let dialects = all_dialects_except(|d| d.is::() || d.is::()); let res = dialects.parse_sql_statements(sql); assert_eq!( - ParserError::ParserError("Expected FROM, found: WHERE".to_string()), + ParserError::ParserError("Expected: FROM, found: WHERE".to_string()), res.unwrap_err() ); } @@ -892,7 +892,7 @@ fn parse_select_distinct_on() { fn parse_select_distinct_missing_paren() { let result = parse_sql_statements("SELECT DISTINCT (name, id FROM customer"); assert_eq!( - ParserError::ParserError("Expected ), found: FROM".to_string()), + ParserError::ParserError("Expected: ), found: FROM".to_string()), result.unwrap_err(), ); } @@ -936,7 +936,7 @@ fn parse_select_into() { let sql = "SELECT * INTO table0 asdf FROM table1"; let result = parse_sql_statements(sql); assert_eq!( - ParserError::ParserError("Expected end of statement, found: asdf".to_string()), + ParserError::ParserError("Expected: end of statement, found: asdf".to_string()), result.unwrap_err() ) } @@ -973,7 +973,7 @@ fn parse_select_wildcard() { let sql = "SELECT * + * FROM foo;"; let result = parse_sql_statements(sql); assert_eq!( - ParserError::ParserError("Expected end of statement, found: +".to_string()), + ParserError::ParserError("Expected: end of statement, found: +".to_string()), result.unwrap_err(), ); } @@ -1002,7 +1002,7 @@ fn parse_column_aliases() { assert_eq!(&Expr::Value(number("1")), right.as_ref()); assert_eq!(&Ident::new("newname"), alias); } else { - panic!("Expected ExprWithAlias") + panic!("Expected: ExprWithAlias") } // alias without AS is parsed correctly: @@ -1013,13 +1013,13 @@ fn parse_column_aliases() { fn test_eof_after_as() { let res = parse_sql_statements("SELECT foo AS"); assert_eq!( - ParserError::ParserError("Expected an identifier after AS, found: EOF".to_string()), + ParserError::ParserError("Expected: an identifier after AS, found: EOF".to_string()), res.unwrap_err() ); let res = parse_sql_statements("SELECT 1 FROM foo AS"); assert_eq!( - ParserError::ParserError("Expected an identifier after AS, found: EOF".to_string()), + ParserError::ParserError("Expected: an identifier after AS, found: EOF".to_string()), res.unwrap_err() ); } @@ -1104,7 +1104,7 @@ fn parse_not() { fn parse_invalid_infix_not() { let res = parse_sql_statements("SELECT c FROM t WHERE c NOT ("); assert_eq!( - ParserError::ParserError("Expected end of statement, found: NOT".to_string()), + ParserError::ParserError("Expected: end of statement, found: NOT".to_string()), res.unwrap_err(), ); } @@ -1177,11 +1177,11 @@ fn parse_exponent_in_select() -> Result<(), ParserError> { let select = match select.pop().unwrap() { Statement::Query(inner) => *inner, - _ => panic!("Expected Query"), + _ => panic!("Expected: Query"), }; let select = match *select.body { SetExpr::Select(inner) => *inner, - _ => panic!("Expected SetExpr::Select"), + _ => panic!("Expected: SetExpr::Select"), }; assert_eq!( @@ -1810,7 +1810,7 @@ fn parse_in_error() { let sql = "SELECT * FROM customers WHERE segment in segment"; let res = parse_sql_statements(sql); assert_eq!( - ParserError::ParserError("Expected (, found: segment".to_string()), + ParserError::ParserError("Expected: (, found: segment".to_string()), res.unwrap_err() ); } @@ -2023,14 +2023,14 @@ fn parse_tuple_invalid() { let sql = "select (1"; let res = parse_sql_statements(sql); assert_eq!( - ParserError::ParserError("Expected ), found: EOF".to_string()), + ParserError::ParserError("Expected: ), found: EOF".to_string()), res.unwrap_err() ); let sql = "select (), 2"; let res = parse_sql_statements(sql); assert_eq!( - ParserError::ParserError("Expected an expression:, found: )".to_string()), + ParserError::ParserError("Expected: an expression:, found: )".to_string()), res.unwrap_err() ); } @@ -2442,7 +2442,7 @@ fn parse_extract() { let dialects = all_dialects_except(|d| d.is::() || d.is::()); let res = dialects.parse_sql_statements("SELECT EXTRACT(JIFFY FROM d)"); assert_eq!( - ParserError::ParserError("Expected date/time field, found: JIFFY".to_string()), + ParserError::ParserError("Expected: date/time field, found: JIFFY".to_string()), res.unwrap_err() ); } @@ -2481,7 +2481,7 @@ fn parse_ceil_datetime() { let dialects = all_dialects_except(|d| d.is::() || d.is::()); let res = dialects.parse_sql_statements("SELECT CEIL(d TO JIFFY) FROM df"); assert_eq!( - ParserError::ParserError("Expected date/time field, found: JIFFY".to_string()), + ParserError::ParserError("Expected: date/time field, found: JIFFY".to_string()), res.unwrap_err() ); } @@ -2508,7 +2508,7 @@ fn parse_floor_datetime() { let dialects = all_dialects_except(|d| d.is::() || d.is::()); let res = dialects.parse_sql_statements("SELECT FLOOR(d TO JIFFY) FROM df"); assert_eq!( - ParserError::ParserError("Expected date/time field, found: JIFFY".to_string()), + ParserError::ParserError("Expected: date/time field, found: JIFFY".to_string()), res.unwrap_err() ); } @@ -2709,7 +2709,7 @@ fn parse_window_function_null_treatment_arg() { let sql = "SELECT LAG(1 IGNORE NULLS) IGNORE NULLS OVER () FROM t1"; assert_eq!( dialects.parse_sql_statements(sql).unwrap_err(), - ParserError::ParserError("Expected end of statement, found: NULLS".to_string()) + ParserError::ParserError("Expected: end of statement, found: NULLS".to_string()) ); let sql = "SELECT LAG(1 IGNORE NULLS) IGNORE NULLS OVER () FROM t1"; @@ -2717,7 +2717,7 @@ fn parse_window_function_null_treatment_arg() { all_dialects_where(|d| !d.supports_window_function_null_treatment_arg()) .parse_sql_statements(sql) .unwrap_err(), - ParserError::ParserError("Expected ), found: IGNORE".to_string()) + ParserError::ParserError("Expected: ), found: IGNORE".to_string()) ); } @@ -2907,13 +2907,13 @@ fn parse_create_table() { assert!(res .unwrap_err() .to_string() - .contains("Expected \',\' or \')\' after column definition, found: GARBAGE")); + .contains("Expected: \',\' or \')\' after column definition, found: GARBAGE")); let res = parse_sql_statements("CREATE TABLE t (a int NOT NULL CONSTRAINT foo)"); assert!(res .unwrap_err() .to_string() - .contains("Expected constraint details after CONSTRAINT ")); + .contains("Expected: constraint details after CONSTRAINT ")); } #[test] @@ -3052,7 +3052,7 @@ fn parse_create_table_with_constraint_characteristics() { assert!(res .unwrap_err() .to_string() - .contains("Expected \',\' or \')\' after column definition, found: NOT")); + .contains("Expected: \',\' or \')\' after column definition, found: NOT")); let res = parse_sql_statements("CREATE TABLE t ( a int NOT NULL, @@ -3061,7 +3061,7 @@ fn parse_create_table_with_constraint_characteristics() { assert!(res .unwrap_err() .to_string() - .contains("Expected \',\' or \')\' after column definition, found: ENFORCED")); + .contains("Expected: \',\' or \')\' after column definition, found: ENFORCED")); let res = parse_sql_statements("CREATE TABLE t ( a int NOT NULL, @@ -3070,7 +3070,7 @@ fn parse_create_table_with_constraint_characteristics() { assert!(res .unwrap_err() .to_string() - .contains("Expected \',\' or \')\' after column definition, found: INITIALLY")); + .contains("Expected: \',\' or \')\' after column definition, found: INITIALLY")); } #[test] @@ -3161,7 +3161,7 @@ fn parse_create_table_column_constraint_characteristics() { assert!(res .unwrap_err() .to_string() - .contains("Expected one of DEFERRED or IMMEDIATE, found: BADVALUE")); + .contains("Expected: one of DEFERRED or IMMEDIATE, found: BADVALUE")); let res = parse_sql_statements( "CREATE TABLE t (a int NOT NULL UNIQUE INITIALLY IMMEDIATE DEFERRABLE INITIALLY DEFERRED)", @@ -3260,7 +3260,7 @@ fn parse_create_table_hive_array() { assert_eq!( dialects.parse_sql_statements(sql).unwrap_err(), - ParserError::ParserError("Expected >, found: )".to_string()) + ParserError::ParserError("Expected: >, found: )".to_string()) ); } @@ -4035,7 +4035,7 @@ fn parse_alter_table_alter_column_type() { let res = dialect.parse_sql_statements(&format!("{alter_stmt} ALTER COLUMN is_active TYPE TEXT")); assert_eq!( - ParserError::ParserError("Expected SET/DROP NOT NULL, SET DEFAULT, or SET DATA TYPE after ALTER COLUMN, found: TYPE".to_string()), + ParserError::ParserError("Expected: SET/DROP NOT NULL, SET DEFAULT, or SET DATA TYPE after ALTER COLUMN, found: TYPE".to_string()), res.unwrap_err() ); @@ -4043,7 +4043,7 @@ fn parse_alter_table_alter_column_type() { "{alter_stmt} ALTER COLUMN is_active SET DATA TYPE TEXT USING 'text'" )); assert_eq!( - ParserError::ParserError("Expected end of statement, found: USING".to_string()), + ParserError::ParserError("Expected: end of statement, found: USING".to_string()), res.unwrap_err() ); } @@ -4082,7 +4082,7 @@ fn parse_alter_table_drop_constraint() { let res = parse_sql_statements(&format!("{alter_stmt} DROP CONSTRAINT is_active TEXT")); assert_eq!( - ParserError::ParserError("Expected end of statement, found: TEXT".to_string()), + ParserError::ParserError("Expected: end of statement, found: TEXT".to_string()), res.unwrap_err() ); } @@ -4091,14 +4091,14 @@ fn parse_alter_table_drop_constraint() { fn parse_bad_constraint() { let res = parse_sql_statements("ALTER TABLE tab ADD"); assert_eq!( - ParserError::ParserError("Expected identifier, found: EOF".to_string()), + ParserError::ParserError("Expected: identifier, found: EOF".to_string()), res.unwrap_err() ); let res = parse_sql_statements("CREATE TABLE tab (foo int,"); assert_eq!( ParserError::ParserError( - "Expected column name or constraint definition, found: EOF".to_string() + "Expected: column name or constraint definition, found: EOF".to_string() ), res.unwrap_err() ); @@ -4440,7 +4440,7 @@ fn parse_window_clause() { let dialects = all_dialects_except(|d| d.is::() || d.is::()); let res = dialects.parse_sql_statements(sql); assert_eq!( - ParserError::ParserError("Expected (, found: window2".to_string()), + ParserError::ParserError("Expected: (, found: window2".to_string()), res.unwrap_err() ); } @@ -4851,13 +4851,13 @@ fn parse_interval() { let result = parse_sql_statements("SELECT INTERVAL '1' SECOND TO SECOND"); assert_eq!( - ParserError::ParserError("Expected end of statement, found: SECOND".to_string()), + ParserError::ParserError("Expected: end of statement, found: SECOND".to_string()), result.unwrap_err(), ); let result = parse_sql_statements("SELECT INTERVAL '10' HOUR (1) TO HOUR (2)"); assert_eq!( - ParserError::ParserError("Expected end of statement, found: (".to_string()), + ParserError::ParserError("Expected: end of statement, found: (".to_string()), result.unwrap_err(), ); @@ -5198,13 +5198,13 @@ fn parse_table_function() { let res = parse_sql_statements("SELECT * FROM TABLE '1' AS a"); assert_eq!( - ParserError::ParserError("Expected (, found: \'1\'".to_string()), + ParserError::ParserError("Expected: (, found: \'1\'".to_string()), res.unwrap_err() ); let res = parse_sql_statements("SELECT * FROM TABLE (FUN(a) AS a"); assert_eq!( - ParserError::ParserError("Expected ), found: AS".to_string()), + ParserError::ParserError("Expected: ), found: AS".to_string()), res.unwrap_err() ); } @@ -5752,7 +5752,7 @@ fn parse_natural_join() { let sql = "SELECT * FROM t1 natural"; assert_eq!( - ParserError::ParserError("Expected a join type after NATURAL, found: EOF".to_string()), + ParserError::ParserError("Expected: a join type after NATURAL, found: EOF".to_string()), parse_sql_statements(sql).unwrap_err(), ); } @@ -5833,7 +5833,7 @@ fn parse_join_syntax_variants() { let res = parse_sql_statements("SELECT * FROM a OUTER JOIN b ON 1"); assert_eq!( - ParserError::ParserError("Expected APPLY, found: JOIN".to_string()), + ParserError::ParserError("Expected: APPLY, found: JOIN".to_string()), res.unwrap_err() ); } @@ -5871,7 +5871,7 @@ fn parse_ctes() { Expr::Subquery(ref subquery) => { assert_ctes_in_select(&cte_sqls, subquery.as_ref()); } - _ => panic!("Expected subquery"), + _ => panic!("Expected: subquery"), } // CTE in a derived table let sql = &format!("SELECT * FROM ({with})"); @@ -5880,13 +5880,13 @@ fn parse_ctes() { TableFactor::Derived { subquery, .. } => { assert_ctes_in_select(&cte_sqls, subquery.as_ref()) } - _ => panic!("Expected derived table"), + _ => panic!("Expected: derived table"), } // CTE in a view let sql = &format!("CREATE VIEW v AS {with}"); match verified_stmt(sql) { Statement::CreateView { query, .. } => assert_ctes_in_select(&cte_sqls, &query), - _ => panic!("Expected CREATE VIEW"), + _ => panic!("Expected: CREATE VIEW"), } // CTE in a CTE... let sql = &format!("WITH outer_cte AS ({with}) SELECT * FROM outer_cte"); @@ -6047,7 +6047,7 @@ fn parse_multiple_statements() { // Check that forgetting the semicolon results in an error: let res = parse_sql_statements(&(sql1.to_owned() + " " + sql2_kw + sql2_rest)); assert_eq!( - ParserError::ParserError("Expected end of statement, found: ".to_string() + sql2_kw), + ParserError::ParserError("Expected: end of statement, found: ".to_string() + sql2_kw), res.unwrap_err() ); } @@ -6102,7 +6102,7 @@ fn parse_overlay() { "SELECT OVERLAY('abccccde' PLACING 'abc' FROM 3 FOR 12)", ); assert_eq!( - ParserError::ParserError("Expected PLACING, found: FROM".to_owned()), + ParserError::ParserError("Expected: PLACING, found: FROM".to_owned()), parse_sql_statements("SELECT OVERLAY('abccccde' FROM 3)").unwrap_err(), ); @@ -6151,7 +6151,7 @@ fn parse_trim() { ); assert_eq!( - ParserError::ParserError("Expected ), found: 'xyz'".to_owned()), + ParserError::ParserError("Expected: ), found: 'xyz'".to_owned()), parse_sql_statements("SELECT TRIM(FOO 'xyz' FROM 'xyzfooxyz')").unwrap_err() ); @@ -6173,7 +6173,7 @@ fn parse_trim() { options: None, }; assert_eq!( - ParserError::ParserError("Expected ), found: 'a'".to_owned()), + ParserError::ParserError("Expected: ), found: 'a'".to_owned()), all_expected_snowflake .parse_sql_statements("SELECT TRIM('xyz', 'a')") .unwrap_err() @@ -6210,7 +6210,7 @@ fn parse_exists_subquery() { .parse_sql_statements("SELECT EXISTS ("); assert_eq!( ParserError::ParserError( - "Expected SELECT, VALUES, or a subquery in the query body, found: EOF".to_string() + "Expected: SELECT, VALUES, or a subquery in the query body, found: EOF".to_string() ), res.unwrap_err(), ); @@ -6219,7 +6219,7 @@ fn parse_exists_subquery() { .parse_sql_statements("SELECT EXISTS (NULL)"); assert_eq!( ParserError::ParserError( - "Expected SELECT, VALUES, or a subquery in the query body, found: NULL".to_string() + "Expected: SELECT, VALUES, or a subquery in the query body, found: NULL".to_string() ), res.unwrap_err(), ); @@ -6581,7 +6581,7 @@ fn parse_drop_table() { let sql = "DROP TABLE"; assert_eq!( - ParserError::ParserError("Expected identifier, found: EOF".to_string()), + ParserError::ParserError("Expected: identifier, found: EOF".to_string()), parse_sql_statements(sql).unwrap_err(), ); @@ -6613,7 +6613,7 @@ fn parse_drop_view() { fn parse_invalid_subquery_without_parens() { let res = parse_sql_statements("SELECT SELECT 1 FROM bar WHERE 1=1 FROM baz"); assert_eq!( - ParserError::ParserError("Expected end of statement, found: 1".to_string()), + ParserError::ParserError("Expected: end of statement, found: 1".to_string()), res.unwrap_err() ); } @@ -6826,7 +6826,7 @@ fn lateral_derived() { let sql = "SELECT * FROM LATERAL UNNEST ([10,20,30]) as numbers WITH OFFSET;"; let res = parse_sql_statements(sql); assert_eq!( - ParserError::ParserError("Expected end of statement, found: WITH".to_string()), + ParserError::ParserError("Expected: end of statement, found: WITH".to_string()), res.unwrap_err() ); @@ -6834,7 +6834,7 @@ fn lateral_derived() { let res = parse_sql_statements(sql); assert_eq!( ParserError::ParserError( - "Expected SELECT, VALUES, or a subquery in the query body, found: b".to_string() + "Expected: SELECT, VALUES, or a subquery in the query body, found: b".to_string() ), res.unwrap_err() ); @@ -6952,19 +6952,19 @@ fn parse_start_transaction() { let res = parse_sql_statements("START TRANSACTION ISOLATION LEVEL BAD"); assert_eq!( - ParserError::ParserError("Expected isolation level, found: BAD".to_string()), + ParserError::ParserError("Expected: isolation level, found: BAD".to_string()), res.unwrap_err() ); let res = parse_sql_statements("START TRANSACTION BAD"); assert_eq!( - ParserError::ParserError("Expected end of statement, found: BAD".to_string()), + ParserError::ParserError("Expected: end of statement, found: BAD".to_string()), res.unwrap_err() ); let res = parse_sql_statements("START TRANSACTION READ ONLY,"); assert_eq!( - ParserError::ParserError("Expected transaction mode, found: EOF".to_string()), + ParserError::ParserError("Expected: transaction mode, found: EOF".to_string()), res.unwrap_err() ); } @@ -7050,8 +7050,8 @@ fn parse_set_variable() { } let error_sqls = [ - ("SET (a, b, c) = (1, 2, 3", "Expected ), found: EOF"), - ("SET (a, b, c) = 1, 2, 3", "Expected (, found: 1"), + ("SET (a, b, c) = (1, 2, 3", "Expected: ), found: EOF"), + ("SET (a, b, c) = 1, 2, 3", "Expected: (, found: 1"), ]; for (sql, error) in error_sqls { assert_eq!( @@ -8051,19 +8051,19 @@ fn parse_offset_and_limit() { // Can't repeat OFFSET / LIMIT let res = parse_sql_statements("SELECT foo FROM bar OFFSET 2 OFFSET 2"); assert_eq!( - ParserError::ParserError("Expected end of statement, found: OFFSET".to_string()), + ParserError::ParserError("Expected: end of statement, found: OFFSET".to_string()), res.unwrap_err() ); let res = parse_sql_statements("SELECT foo FROM bar LIMIT 2 LIMIT 2"); assert_eq!( - ParserError::ParserError("Expected end of statement, found: LIMIT".to_string()), + ParserError::ParserError("Expected: end of statement, found: LIMIT".to_string()), res.unwrap_err() ); let res = parse_sql_statements("SELECT foo FROM bar OFFSET 2 LIMIT 2 OFFSET 2"); assert_eq!( - ParserError::ParserError("Expected end of statement, found: OFFSET".to_string()), + ParserError::ParserError("Expected: end of statement, found: OFFSET".to_string()), res.unwrap_err() ); } @@ -8132,7 +8132,7 @@ fn parse_position_negative() { let sql = "SELECT POSITION(foo IN) from bar"; let res = parse_sql_statements(sql); assert_eq!( - ParserError::ParserError("Expected an expression:, found: )".to_string()), + ParserError::ParserError("Expected: an expression:, found: )".to_string()), res.unwrap_err() ); } @@ -8190,7 +8190,7 @@ fn parse_is_boolean() { let res = parse_sql_statements(sql); assert_eq!( ParserError::ParserError( - "Expected [NOT] NULL or TRUE|FALSE or [NOT] DISTINCT FROM after IS, found: 0" + "Expected: [NOT] NULL or TRUE|FALSE or [NOT] DISTINCT FROM after IS, found: 0" .to_string() ), res.unwrap_err() @@ -8383,7 +8383,7 @@ fn parse_cache_table() { let res = parse_sql_statements("CACHE TABLE 'table_name' foo"); assert_eq!( ParserError::ParserError( - "Expected SELECT, VALUES, or a subquery in the query body, found: foo".to_string() + "Expected: SELECT, VALUES, or a subquery in the query body, found: foo".to_string() ), res.unwrap_err() ); @@ -8391,7 +8391,7 @@ fn parse_cache_table() { let res = parse_sql_statements("CACHE flag TABLE 'table_name' OPTIONS('K1'='V1') foo"); assert_eq!( ParserError::ParserError( - "Expected SELECT, VALUES, or a subquery in the query body, found: foo".to_string() + "Expected: SELECT, VALUES, or a subquery in the query body, found: foo".to_string() ), res.unwrap_err() ); @@ -8399,7 +8399,7 @@ fn parse_cache_table() { let res = parse_sql_statements("CACHE TABLE 'table_name' AS foo"); assert_eq!( ParserError::ParserError( - "Expected SELECT, VALUES, or a subquery in the query body, found: foo".to_string() + "Expected: SELECT, VALUES, or a subquery in the query body, found: foo".to_string() ), res.unwrap_err() ); @@ -8407,26 +8407,26 @@ fn parse_cache_table() { let res = parse_sql_statements("CACHE flag TABLE 'table_name' OPTIONS('K1'='V1') AS foo"); assert_eq!( ParserError::ParserError( - "Expected SELECT, VALUES, or a subquery in the query body, found: foo".to_string() + "Expected: SELECT, VALUES, or a subquery in the query body, found: foo".to_string() ), res.unwrap_err() ); let res = parse_sql_statements("CACHE 'table_name'"); assert_eq!( - ParserError::ParserError("Expected a `TABLE` keyword, found: 'table_name'".to_string()), + ParserError::ParserError("Expected: a `TABLE` keyword, found: 'table_name'".to_string()), res.unwrap_err() ); let res = parse_sql_statements("CACHE 'table_name' OPTIONS('K1'='V1')"); assert_eq!( - ParserError::ParserError("Expected a `TABLE` keyword, found: OPTIONS".to_string()), + ParserError::ParserError("Expected: a `TABLE` keyword, found: OPTIONS".to_string()), res.unwrap_err() ); let res = parse_sql_statements("CACHE flag 'table_name' OPTIONS('K1'='V1')"); assert_eq!( - ParserError::ParserError("Expected a `TABLE` keyword, found: 'table_name'".to_string()), + ParserError::ParserError("Expected: a `TABLE` keyword, found: 'table_name'".to_string()), res.unwrap_err() ); } @@ -8451,19 +8451,19 @@ fn parse_uncache_table() { let res = parse_sql_statements("UNCACHE TABLE 'table_name' foo"); assert_eq!( - ParserError::ParserError("Expected an `EOF`, found: foo".to_string()), + ParserError::ParserError("Expected: an `EOF`, found: foo".to_string()), res.unwrap_err() ); let res = parse_sql_statements("UNCACHE 'table_name' foo"); assert_eq!( - ParserError::ParserError("Expected a `TABLE` keyword, found: 'table_name'".to_string()), + ParserError::ParserError("Expected: a `TABLE` keyword, found: 'table_name'".to_string()), res.unwrap_err() ); let res = parse_sql_statements("UNCACHE IF EXISTS 'table_name' foo"); assert_eq!( - ParserError::ParserError("Expected a `TABLE` keyword, found: IF".to_string()), + ParserError::ParserError("Expected: a `TABLE` keyword, found: IF".to_string()), res.unwrap_err() ); } @@ -8927,7 +8927,7 @@ fn parse_trailing_comma() { .parse_sql_statements("CREATE TABLE employees (name text, age int,)") .unwrap_err(), ParserError::ParserError( - "Expected column name or constraint definition, found: )".to_string() + "Expected: column name or constraint definition, found: )".to_string() ) ); } @@ -8955,7 +8955,7 @@ fn parse_projection_trailing_comma() { trailing_commas .parse_sql_statements("SELECT * FROM track ORDER BY milliseconds,") .unwrap_err(), - ParserError::ParserError("Expected an expression:, found: EOF".to_string()) + ParserError::ParserError("Expected: an expression:, found: EOF".to_string()) ); assert_eq!( @@ -8963,7 +8963,7 @@ fn parse_projection_trailing_comma() { .parse_sql_statements("CREATE TABLE employees (name text, age int,)") .unwrap_err(), ParserError::ParserError( - "Expected column name or constraint definition, found: )".to_string() + "Expected: column name or constraint definition, found: )".to_string() ), ); } @@ -9962,14 +9962,14 @@ fn tests_select_values_without_parens_and_set_op() { assert_eq!(SetOperator::Union, op); match *left { SetExpr::Select(_) => {} - _ => panic!("Expected a SELECT statement"), + _ => panic!("Expected: a SELECT statement"), } match *right { SetExpr::Select(_) => {} - _ => panic!("Expected a SELECT statement"), + _ => panic!("Expected: a SELECT statement"), } } - _ => panic!("Expected a SET OPERATION"), + _ => panic!("Expected: a SET OPERATION"), } } @@ -10003,7 +10003,7 @@ fn parse_select_wildcard_with_except() { .parse_sql_statements("SELECT * EXCEPT () FROM employee_table") .unwrap_err() .to_string(), - "sql parser error: Expected identifier, found: )" + "sql parser error: Expected: identifier, found: )" ); } diff --git a/tests/sqlparser_databricks.rs b/tests/sqlparser_databricks.rs index 430647ded8..90056f0f72 100644 --- a/tests/sqlparser_databricks.rs +++ b/tests/sqlparser_databricks.rs @@ -64,7 +64,7 @@ fn test_databricks_exists() { let res = databricks().parse_sql_statements("SELECT EXISTS ("); assert_eq!( // TODO: improve this error message... - ParserError::ParserError("Expected an expression:, found: EOF".to_string()), + ParserError::ParserError("Expected: an expression:, found: EOF".to_string()), res.unwrap_err(), ); } diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index b661b6cd3e..a5a6e24357 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -284,7 +284,7 @@ fn set_statement_with_minus() { assert_eq!( hive().parse_sql_statements("SET hive.tez.java.opts = -"), Err(ParserError::ParserError( - "Expected variable value, found: EOF".to_string() + "Expected: variable value, found: EOF".to_string() )) ) } @@ -327,14 +327,14 @@ fn parse_create_function() { assert_eq!( unsupported_dialects.parse_sql_statements(sql).unwrap_err(), ParserError::ParserError( - "Expected an object type after CREATE, found: FUNCTION".to_string() + "Expected: an object type after CREATE, found: FUNCTION".to_string() ) ); let sql = "CREATE TEMPORARY FUNCTION mydb.myfunc AS 'org.random.class.Name' USING JAR"; assert_eq!( hive().parse_sql_statements(sql).unwrap_err(), - ParserError::ParserError("Expected literal string, found: EOF".to_string()), + ParserError::ParserError("Expected: literal string, found: EOF".to_string()), ); } @@ -398,7 +398,7 @@ fn parse_delimited_identifiers() { assert_eq!(&Expr::Identifier(Ident::with_quote('"', "simple id")), expr); assert_eq!(&Ident::with_quote('"', "column alias"), alias); } - _ => panic!("Expected ExprWithAlias"), + _ => panic!("Expected: ExprWithAlias"), } hive().verified_stmt(r#"CREATE TABLE "foo" ("bar" "int")"#); diff --git a/tests/sqlparser_mssql.rs b/tests/sqlparser_mssql.rs index 86d3990f61..f570de11de 100644 --- a/tests/sqlparser_mssql.rs +++ b/tests/sqlparser_mssql.rs @@ -481,7 +481,7 @@ fn parse_convert() { let error_sql = "SELECT CONVERT(INT, 'foo',) FROM T"; assert_eq!( - ParserError::ParserError("Expected an expression:, found: )".to_owned()), + ParserError::ParserError("Expected: an expression:, found: )".to_owned()), ms().parse_sql_statements(error_sql).unwrap_err() ); } diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index ff8a49de72..a25f4c2084 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -2518,7 +2518,7 @@ fn parse_fulltext_expression() { } #[test] -#[should_panic = "Expected FULLTEXT or SPATIAL option without constraint name, found: cons"] +#[should_panic = "Expected: FULLTEXT or SPATIAL option without constraint name, found: cons"] fn parse_create_table_with_fulltext_definition_should_not_accept_constraint_name() { mysql_and_generic().verified_stmt("CREATE TABLE tb (c1 INT, CONSTRAINT cons FULLTEXT (c1))"); } diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index fe735b8b2e..63c53227ad 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -648,7 +648,7 @@ fn parse_alter_table_alter_column_add_generated() { "ALTER TABLE t ALTER COLUMN id ADD GENERATED ( INCREMENT 1 MINVALUE 1 )", ); assert_eq!( - ParserError::ParserError("Expected AS, found: (".to_string()), + ParserError::ParserError("Expected: AS, found: (".to_string()), res.unwrap_err() ); @@ -656,14 +656,14 @@ fn parse_alter_table_alter_column_add_generated() { "ALTER TABLE t ALTER COLUMN id ADD GENERATED AS IDENTITY ( INCREMENT )", ); assert_eq!( - ParserError::ParserError("Expected a value, found: )".to_string()), + ParserError::ParserError("Expected: a value, found: )".to_string()), res.unwrap_err() ); let res = pg().parse_sql_statements("ALTER TABLE t ALTER COLUMN id ADD GENERATED AS IDENTITY ("); assert_eq!( - ParserError::ParserError("Expected ), found: EOF".to_string()), + ParserError::ParserError("Expected: ), found: EOF".to_string()), res.unwrap_err() ); } @@ -733,25 +733,25 @@ fn parse_create_table_if_not_exists() { fn parse_bad_if_not_exists() { let res = pg().parse_sql_statements("CREATE TABLE NOT EXISTS uk_cities ()"); assert_eq!( - ParserError::ParserError("Expected end of statement, found: EXISTS".to_string()), + ParserError::ParserError("Expected: end of statement, found: EXISTS".to_string()), res.unwrap_err() ); let res = pg().parse_sql_statements("CREATE TABLE IF EXISTS uk_cities ()"); assert_eq!( - ParserError::ParserError("Expected end of statement, found: EXISTS".to_string()), + ParserError::ParserError("Expected: end of statement, found: EXISTS".to_string()), res.unwrap_err() ); let res = pg().parse_sql_statements("CREATE TABLE IF uk_cities ()"); assert_eq!( - ParserError::ParserError("Expected end of statement, found: uk_cities".to_string()), + ParserError::ParserError("Expected: end of statement, found: uk_cities".to_string()), res.unwrap_err() ); let res = pg().parse_sql_statements("CREATE TABLE IF NOT uk_cities ()"); assert_eq!( - ParserError::ParserError("Expected end of statement, found: NOT".to_string()), + ParserError::ParserError("Expected: end of statement, found: NOT".to_string()), res.unwrap_err() ); } @@ -1300,21 +1300,21 @@ fn parse_set() { assert_eq!( pg_and_generic().parse_sql_statements("SET"), Err(ParserError::ParserError( - "Expected identifier, found: EOF".to_string() + "Expected: identifier, found: EOF".to_string() )), ); assert_eq!( pg_and_generic().parse_sql_statements("SET a b"), Err(ParserError::ParserError( - "Expected equals sign or TO, found: b".to_string() + "Expected: equals sign or TO, found: b".to_string() )), ); assert_eq!( pg_and_generic().parse_sql_statements("SET a ="), Err(ParserError::ParserError( - "Expected variable value, found: EOF".to_string() + "Expected: variable value, found: EOF".to_string() )), ); } @@ -2685,7 +2685,7 @@ fn parse_json_table_is_not_reserved() { name: ObjectName(name), .. } => assert_eq!("JSON_TABLE", name[0].value), - other => panic!("Expected JSON_TABLE to be parsed as a table name, but got {other:?}"), + other => panic!("Expected: JSON_TABLE to be parsed as a table name, but got {other:?}"), } } @@ -2874,7 +2874,7 @@ fn parse_escaped_literal_string() { .parse_sql_statements(sql) .unwrap_err() .to_string(), - "sql parser error: Unterminated encoded string literal at Line: 1, Column 8" + "sql parser error: Unterminated encoded string literal at Line: 1, Column: 8" ); let sql = r"SELECT E'\u0001', E'\U0010FFFF', E'\xC', E'\x25', E'\2', E'\45', E'\445'"; @@ -2917,7 +2917,7 @@ fn parse_escaped_literal_string() { .parse_sql_statements(sql) .unwrap_err() .to_string(), - "sql parser error: Unterminated encoded string literal at Line: 1, Column 8" + "sql parser error: Unterminated encoded string literal at Line: 1, Column: 8" ); } } @@ -3455,7 +3455,7 @@ fn parse_delimited_identifiers() { assert_eq!(&Expr::Identifier(Ident::with_quote('"', "simple id")), expr); assert_eq!(&Ident::with_quote('"', "column alias"), alias); } - _ => panic!("Expected ExprWithAlias"), + _ => panic!("Expected: ExprWithAlias"), } pg().verified_stmt(r#"CREATE TABLE "foo" ("bar" "int")"#); diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index f0a7c7735f..160bbcbd59 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -385,14 +385,14 @@ fn test_snowflake_create_invalid_local_global_table() { assert_eq!( snowflake().parse_sql_statements("CREATE LOCAL GLOBAL TABLE my_table (a INT)"), Err(ParserError::ParserError( - "Expected an SQL statement, found: LOCAL".to_string() + "Expected: an SQL statement, found: LOCAL".to_string() )) ); assert_eq!( snowflake().parse_sql_statements("CREATE GLOBAL LOCAL TABLE my_table (a INT)"), Err(ParserError::ParserError( - "Expected an SQL statement, found: GLOBAL".to_string() + "Expected: an SQL statement, found: GLOBAL".to_string() )) ); } @@ -402,21 +402,21 @@ fn test_snowflake_create_invalid_temporal_table() { assert_eq!( snowflake().parse_sql_statements("CREATE TEMP TEMPORARY TABLE my_table (a INT)"), Err(ParserError::ParserError( - "Expected an object type after CREATE, found: TEMPORARY".to_string() + "Expected: an object type after CREATE, found: TEMPORARY".to_string() )) ); assert_eq!( snowflake().parse_sql_statements("CREATE TEMP VOLATILE TABLE my_table (a INT)"), Err(ParserError::ParserError( - "Expected an object type after CREATE, found: VOLATILE".to_string() + "Expected: an object type after CREATE, found: VOLATILE".to_string() )) ); assert_eq!( snowflake().parse_sql_statements("CREATE TEMP TRANSIENT TABLE my_table (a INT)"), Err(ParserError::ParserError( - "Expected an object type after CREATE, found: TRANSIENT".to_string() + "Expected: an object type after CREATE, found: TRANSIENT".to_string() )) ); } @@ -851,7 +851,7 @@ fn parse_semi_structured_data_traversal() { .parse_sql_statements("SELECT a:42") .unwrap_err() .to_string(), - "sql parser error: Expected variant object key name, found: 42" + "sql parser error: Expected: variant object key name, found: 42" ); } @@ -908,7 +908,7 @@ fn parse_delimited_identifiers() { assert_eq!(&Expr::Identifier(Ident::with_quote('"', "simple id")), expr); assert_eq!(&Ident::with_quote('"', "column alias"), alias); } - _ => panic!("Expected ExprWithAlias"), + _ => panic!("Expected: ExprWithAlias"), } snowflake().verified_stmt(r#"CREATE TABLE "foo" ("bar" "int")"#); @@ -1034,7 +1034,7 @@ fn test_select_wildcard_with_exclude_and_rename() { .parse_sql_statements("SELECT * RENAME col_a AS col_b EXCLUDE col_z FROM data") .unwrap_err() .to_string(), - "sql parser error: Expected end of statement, found: EXCLUDE" + "sql parser error: Expected: end of statement, found: EXCLUDE" ); } @@ -1134,13 +1134,13 @@ fn parse_snowflake_declare_cursor() { let error_sql = "DECLARE c1 CURSOR SELECT id FROM invoices"; assert_eq!( - ParserError::ParserError("Expected FOR, found: SELECT".to_owned()), + ParserError::ParserError("Expected: FOR, found: SELECT".to_owned()), snowflake().parse_sql_statements(error_sql).unwrap_err() ); let error_sql = "DECLARE c1 CURSOR res"; assert_eq!( - ParserError::ParserError("Expected FOR, found: res".to_owned()), + ParserError::ParserError("Expected: FOR, found: res".to_owned()), snowflake().parse_sql_statements(error_sql).unwrap_err() ); } @@ -1188,13 +1188,13 @@ fn parse_snowflake_declare_result_set() { let error_sql = "DECLARE res RESULTSET DEFAULT"; assert_eq!( - ParserError::ParserError("Expected an expression:, found: EOF".to_owned()), + ParserError::ParserError("Expected: an expression:, found: EOF".to_owned()), snowflake().parse_sql_statements(error_sql).unwrap_err() ); let error_sql = "DECLARE res RESULTSET :="; assert_eq!( - ParserError::ParserError("Expected an expression:, found: EOF".to_owned()), + ParserError::ParserError("Expected: an expression:, found: EOF".to_owned()), snowflake().parse_sql_statements(error_sql).unwrap_err() ); } @@ -1280,19 +1280,19 @@ fn parse_snowflake_declare_variable() { let error_sql = "DECLARE profit INT 2"; assert_eq!( - ParserError::ParserError("Expected end of statement, found: 2".to_owned()), + ParserError::ParserError("Expected: end of statement, found: 2".to_owned()), snowflake().parse_sql_statements(error_sql).unwrap_err() ); let error_sql = "DECLARE profit INT DEFAULT"; assert_eq!( - ParserError::ParserError("Expected an expression:, found: EOF".to_owned()), + ParserError::ParserError("Expected: an expression:, found: EOF".to_owned()), snowflake().parse_sql_statements(error_sql).unwrap_err() ); let error_sql = "DECLARE profit DEFAULT"; assert_eq!( - ParserError::ParserError("Expected an expression:, found: EOF".to_owned()), + ParserError::ParserError("Expected: an expression:, found: EOF".to_owned()), snowflake().parse_sql_statements(error_sql).unwrap_err() ); } @@ -1327,7 +1327,7 @@ fn parse_snowflake_declare_multi_statements() { let error_sql = "DECLARE profit DEFAULT 42 c1 CURSOR FOR res;"; assert_eq!( - ParserError::ParserError("Expected end of statement, found: c1".to_owned()), + ParserError::ParserError("Expected: end of statement, found: c1".to_owned()), snowflake().parse_sql_statements(error_sql).unwrap_err() ); } @@ -1902,7 +1902,7 @@ fn test_snowflake_trim() { // missing comma separation let error_sql = "SELECT TRIM('xyz' 'a')"; assert_eq!( - ParserError::ParserError("Expected ), found: 'a'".to_owned()), + ParserError::ParserError("Expected: ), found: 'a'".to_owned()), snowflake().parse_sql_statements(error_sql).unwrap_err() ); } @@ -2064,7 +2064,7 @@ fn test_select_wildcard_with_ilike_double_quote() { let res = snowflake().parse_sql_statements(r#"SELECT * ILIKE "%id" FROM tbl"#); assert_eq!( res.unwrap_err().to_string(), - "sql parser error: Expected ilike pattern, found: \"%id\"" + "sql parser error: Expected: ilike pattern, found: \"%id\"" ); } @@ -2073,7 +2073,7 @@ fn test_select_wildcard_with_ilike_number() { let res = snowflake().parse_sql_statements(r#"SELECT * ILIKE 42 FROM tbl"#); assert_eq!( res.unwrap_err().to_string(), - "sql parser error: Expected ilike pattern, found: 42" + "sql parser error: Expected: ilike pattern, found: 42" ); } @@ -2082,7 +2082,7 @@ fn test_select_wildcard_with_ilike_replace() { let res = snowflake().parse_sql_statements(r#"SELECT * ILIKE '%id%' EXCLUDE col FROM tbl"#); assert_eq!( res.unwrap_err().to_string(), - "sql parser error: Expected end of statement, found: EXCLUDE" + "sql parser error: Expected: end of statement, found: EXCLUDE" ); } diff --git a/tests/sqlparser_sqlite.rs b/tests/sqlparser_sqlite.rs index 1181c480b9..e329abae75 100644 --- a/tests/sqlparser_sqlite.rs +++ b/tests/sqlparser_sqlite.rs @@ -428,7 +428,7 @@ fn invalid_empty_list() { let sql = "SELECT * FROM t1 WHERE a IN (,,)"; let sqlite = sqlite_with_options(ParserOptions::new().with_trailing_commas(true)); assert_eq!( - "sql parser error: Expected an expression:, found: ,", + "sql parser error: Expected: an expression:, found: ,", sqlite.parse_sql_statements(sql).unwrap_err().to_string() ); } @@ -452,17 +452,17 @@ fn parse_start_transaction_with_modifier() { }; let res = unsupported_dialects.parse_sql_statements("BEGIN DEFERRED"); assert_eq!( - ParserError::ParserError("Expected end of statement, found: DEFERRED".to_string()), + ParserError::ParserError("Expected: end of statement, found: DEFERRED".to_string()), res.unwrap_err(), ); let res = unsupported_dialects.parse_sql_statements("BEGIN IMMEDIATE"); assert_eq!( - ParserError::ParserError("Expected end of statement, found: IMMEDIATE".to_string()), + ParserError::ParserError("Expected: end of statement, found: IMMEDIATE".to_string()), res.unwrap_err(), ); let res = unsupported_dialects.parse_sql_statements("BEGIN EXCLUSIVE"); assert_eq!( - ParserError::ParserError("Expected end of statement, found: EXCLUSIVE".to_string()), + ParserError::ParserError("Expected: end of statement, found: EXCLUSIVE".to_string()), res.unwrap_err(), ); } From f3d2f78fb2b7ebdc539b0bec0be535b3d1b9d88f Mon Sep 17 00:00:00 2001 From: Bidaya0 Date: Sun, 23 Jun 2024 19:13:16 +0800 Subject: [PATCH 23/53] Support `TO` in `CREATE VIEW` clause for Clickhouse (#1313) Co-authored-by: Ifeanyi Ubah Co-authored-by: Andrew Lamb --- src/ast/mod.rs | 12 ++++++++++-- src/parser/mod.rs | 9 +++++++++ tests/sqlparser_bigquery.rs | 1 + tests/sqlparser_clickhouse.rs | 15 +++++++++++++++ tests/sqlparser_common.rs | 14 ++++++++++++++ tests/sqlparser_snowflake.rs | 1 + tests/sqlparser_sqlite.rs | 1 + 7 files changed, 51 insertions(+), 2 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 769bda5989..70190b35b7 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -2029,6 +2029,9 @@ pub enum Statement { if_not_exists: bool, /// if true, has SQLite `TEMP` or `TEMPORARY` clause temporary: bool, + /// if not None, has Clickhouse `TO` clause, specify the table into which to insert results + /// + to: Option, }, /// ```sql /// CREATE TABLE @@ -3329,15 +3332,20 @@ impl fmt::Display for Statement { with_no_schema_binding, if_not_exists, temporary, + to, } => { write!( f, - "CREATE {or_replace}{materialized}{temporary}VIEW {if_not_exists}{name}", + "CREATE {or_replace}{materialized}{temporary}VIEW {if_not_exists}{name}{to}", or_replace = if *or_replace { "OR REPLACE " } else { "" }, materialized = if *materialized { "MATERIALIZED " } else { "" }, name = name, temporary = if *temporary { "TEMPORARY " } else { "" }, - if_not_exists = if *if_not_exists { "IF NOT EXISTS " } else { "" } + if_not_exists = if *if_not_exists { "IF NOT EXISTS " } else { "" }, + to = to + .as_ref() + .map(|to| format!(" TO {to}")) + .unwrap_or_default() )?; if let Some(comment) = comment { write!( diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 27520a6c44..c568640a99 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -4172,6 +4172,14 @@ impl<'a> Parser<'a> { }; } + let to = if dialect_of!(self is ClickHouseDialect | GenericDialect) + && self.parse_keyword(Keyword::TO) + { + Some(self.parse_object_name(false)?) + } else { + None + }; + let comment = if dialect_of!(self is SnowflakeDialect | GenericDialect) && self.parse_keyword(Keyword::COMMENT) { @@ -4209,6 +4217,7 @@ impl<'a> Parser<'a> { with_no_schema_binding, if_not_exists, temporary, + to, }) } diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index ec4ddca96f..88e2ef9129 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -315,6 +315,7 @@ fn parse_create_view_if_not_exists() { with_no_schema_binding: late_binding, if_not_exists, temporary, + .. } => { assert_eq!("mydataset.newview", name.to_string()); assert_eq!(Vec::::new(), columns); diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index ed3b2de22d..5cd4832422 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -561,6 +561,21 @@ fn parse_select_star_except_no_parens() { ); } +#[test] +fn parse_create_materialized_view() { + // example sql + // https://clickhouse.com/docs/en/guides/developer/cascading-materialized-views + let sql = concat!( + "CREATE MATERIALIZED VIEW analytics.monthly_aggregated_data_mv ", + "TO analytics.monthly_aggregated_data ", + "AS SELECT toDate(toStartOfMonth(event_time)) ", + "AS month, domain_name, sumState(count_views) ", + "AS sumCountViews FROM analytics.hourly_data ", + "GROUP BY domain_name, month" + ); + clickhouse_and_generic().verified_stmt(sql); +} + fn clickhouse() -> TestedDialects { TestedDialects { dialects: vec![Box::new(ClickHouseDialect {})], diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 0149bad5d5..f7162ddef8 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -6279,6 +6279,7 @@ fn parse_create_view() { with_no_schema_binding: late_binding, if_not_exists, temporary, + to, } => { assert_eq!("myschema.myview", name.to_string()); assert_eq!(Vec::::new(), columns); @@ -6291,6 +6292,7 @@ fn parse_create_view() { assert!(!late_binding); assert!(!if_not_exists); assert!(!temporary); + assert!(to.is_none()) } _ => unreachable!(), } @@ -6335,6 +6337,7 @@ fn parse_create_view_with_columns() { with_no_schema_binding: late_binding, if_not_exists, temporary, + to, } => { assert_eq!("v", name.to_string()); assert_eq!( @@ -6357,6 +6360,7 @@ fn parse_create_view_with_columns() { assert!(!late_binding); assert!(!if_not_exists); assert!(!temporary); + assert!(to.is_none()) } _ => unreachable!(), } @@ -6378,6 +6382,7 @@ fn parse_create_view_temporary() { with_no_schema_binding: late_binding, if_not_exists, temporary, + to, } => { assert_eq!("myschema.myview", name.to_string()); assert_eq!(Vec::::new(), columns); @@ -6390,6 +6395,7 @@ fn parse_create_view_temporary() { assert!(!late_binding); assert!(!if_not_exists); assert!(temporary); + assert!(to.is_none()) } _ => unreachable!(), } @@ -6411,6 +6417,7 @@ fn parse_create_or_replace_view() { with_no_schema_binding: late_binding, if_not_exists, temporary, + to, } => { assert_eq!("v", name.to_string()); assert_eq!(columns, vec![]); @@ -6423,6 +6430,7 @@ fn parse_create_or_replace_view() { assert!(!late_binding); assert!(!if_not_exists); assert!(!temporary); + assert!(to.is_none()) } _ => unreachable!(), } @@ -6448,6 +6456,7 @@ fn parse_create_or_replace_materialized_view() { with_no_schema_binding: late_binding, if_not_exists, temporary, + to, } => { assert_eq!("v", name.to_string()); assert_eq!(columns, vec![]); @@ -6460,6 +6469,7 @@ fn parse_create_or_replace_materialized_view() { assert!(!late_binding); assert!(!if_not_exists); assert!(!temporary); + assert!(to.is_none()) } _ => unreachable!(), } @@ -6481,6 +6491,7 @@ fn parse_create_materialized_view() { with_no_schema_binding: late_binding, if_not_exists, temporary, + to, } => { assert_eq!("myschema.myview", name.to_string()); assert_eq!(Vec::::new(), columns); @@ -6493,6 +6504,7 @@ fn parse_create_materialized_view() { assert!(!late_binding); assert!(!if_not_exists); assert!(!temporary); + assert!(to.is_none()) } _ => unreachable!(), } @@ -6514,6 +6526,7 @@ fn parse_create_materialized_view_with_cluster_by() { with_no_schema_binding: late_binding, if_not_exists, temporary, + to, } => { assert_eq!("myschema.myview", name.to_string()); assert_eq!(Vec::::new(), columns); @@ -6526,6 +6539,7 @@ fn parse_create_materialized_view_with_cluster_by() { assert!(!late_binding); assert!(!if_not_exists); assert!(!temporary); + assert!(to.is_none()) } _ => unreachable!(), } diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index 160bbcbd59..b6be2c3f52 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -552,6 +552,7 @@ fn parse_sf_create_or_replace_with_comment_for_snowflake() { with_no_schema_binding: late_binding, if_not_exists, temporary, + .. } => { assert_eq!("v", name.to_string()); assert_eq!(columns, vec![]); diff --git a/tests/sqlparser_sqlite.rs b/tests/sqlparser_sqlite.rs index e329abae75..3670b17841 100644 --- a/tests/sqlparser_sqlite.rs +++ b/tests/sqlparser_sqlite.rs @@ -171,6 +171,7 @@ fn parse_create_view_temporary_if_not_exists() { with_no_schema_binding: late_binding, if_not_exists, temporary, + .. } => { assert_eq!("myschema.myview", name.to_string()); assert_eq!(Vec::::new(), columns); From 7a9793b72e268f6e7e830ec6f4e857878e0b6bc7 Mon Sep 17 00:00:00 2001 From: Lorrens Pantelis <100197010+LorrensP-2158466@users.noreply.github.com> Date: Sun, 23 Jun 2024 13:14:57 +0200 Subject: [PATCH 24/53] Allow semi-colon at the end of UNCACHE statement (#1320) --- src/parser/mod.rs | 22 +++++++--------------- tests/sqlparser_common.rs | 6 +++--- 2 files changed, 10 insertions(+), 18 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index c568640a99..337c1dac51 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -3611,21 +3611,13 @@ impl<'a> Parser<'a> { /// Parse a UNCACHE TABLE statement pub fn parse_uncache_table(&mut self) -> Result { - let has_table = self.parse_keyword(Keyword::TABLE); - if has_table { - let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); - let table_name = self.parse_object_name(false)?; - if self.peek_token().token == Token::EOF { - Ok(Statement::UNCache { - table_name, - if_exists, - }) - } else { - self.expected("an `EOF`", self.peek_token()) - } - } else { - self.expected("a `TABLE` keyword", self.peek_token()) - } + self.expect_keyword(Keyword::TABLE)?; + let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); + let table_name = self.parse_object_name(false)?; + Ok(Statement::UNCache { + table_name, + if_exists, + }) } /// SQLite-specific `CREATE VIRTUAL TABLE` diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index f7162ddef8..0f5afb3413 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -8465,19 +8465,19 @@ fn parse_uncache_table() { let res = parse_sql_statements("UNCACHE TABLE 'table_name' foo"); assert_eq!( - ParserError::ParserError("Expected: an `EOF`, found: foo".to_string()), + ParserError::ParserError("Expected: end of statement, found: foo".to_string()), res.unwrap_err() ); let res = parse_sql_statements("UNCACHE 'table_name' foo"); assert_eq!( - ParserError::ParserError("Expected: a `TABLE` keyword, found: 'table_name'".to_string()), + ParserError::ParserError("Expected: TABLE, found: 'table_name'".to_string()), res.unwrap_err() ); let res = parse_sql_statements("UNCACHE IF EXISTS 'table_name' foo"); assert_eq!( - ParserError::ParserError("Expected: a `TABLE` keyword, found: IF".to_string()), + ParserError::ParserError("Expected: TABLE, found: IF".to_string()), res.unwrap_err() ); } From a685e1199355b0150fd5a4f6c7b938ecc07a6818 Mon Sep 17 00:00:00 2001 From: hulk Date: Sun, 23 Jun 2024 19:36:05 +0800 Subject: [PATCH 25/53] Support parametric arguments to `FUNCTION` for ClickHouse dialect (#1315) Co-authored-by: Andrew Lamb --- src/ast/mod.rs | 12 ++++++++- src/ast/visitor.rs | 1 + src/parser/mod.rs | 33 ++++++++++++++++++++--- src/test_utils.rs | 1 + tests/sqlparser_clickhouse.rs | 50 +++++++++++++++++++++++++++++++++++ tests/sqlparser_common.rs | 14 ++++++++++ tests/sqlparser_duckdb.rs | 1 + tests/sqlparser_hive.rs | 1 + tests/sqlparser_mssql.rs | 1 + tests/sqlparser_postgres.rs | 7 +++++ tests/sqlparser_redshift.rs | 1 + tests/sqlparser_snowflake.rs | 1 + tests/sqlparser_sqlite.rs | 1 + 13 files changed, 119 insertions(+), 5 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 70190b35b7..8182d11445 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -4695,6 +4695,16 @@ impl fmt::Display for CloseCursor { #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct Function { pub name: ObjectName, + /// The parameters to the function, including any options specified within the + /// delimiting parentheses. + /// + /// Example: + /// ```plaintext + /// HISTOGRAM(0.5, 0.6)(x, y) + /// ``` + /// + /// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/aggregate-functions/parametric-functions) + pub parameters: FunctionArguments, /// The arguments to the function, including any options specified within the /// delimiting parentheses. pub args: FunctionArguments, @@ -4723,7 +4733,7 @@ pub struct Function { impl fmt::Display for Function { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{}{}", self.name, self.args)?; + write!(f, "{}{}{}", self.name, self.parameters, self.args)?; if !self.within_group.is_empty() { write!( diff --git a/src/ast/visitor.rs b/src/ast/visitor.rs index 57dcca2e54..1b8a43802b 100644 --- a/src/ast/visitor.rs +++ b/src/ast/visitor.rs @@ -533,6 +533,7 @@ where /// null_treatment: None, /// filter: None, /// over: None, +/// parameters: FunctionArguments::None, /// within_group: vec![], /// }); /// } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 337c1dac51..5376099733 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -27,6 +27,7 @@ use core::{ use log::debug; +use recursion::RecursionCounter; use IsLateral::*; use IsOptional::*; @@ -146,8 +147,6 @@ mod recursion { pub struct DepthGuard {} } -use recursion::RecursionCounter; - #[derive(PartialEq, Eq)] pub enum IsOptional { Optional, @@ -1002,6 +1001,7 @@ impl<'a> Parser<'a> { { Ok(Expr::Function(Function { name: ObjectName(vec![w.to_ident()]), + parameters: FunctionArguments::None, args: FunctionArguments::None, null_treatment: None, filter: None, @@ -1058,6 +1058,7 @@ impl<'a> Parser<'a> { self.expect_token(&Token::RParen)?; Ok(Expr::Function(Function { name: ObjectName(vec![w.to_ident()]), + parameters: FunctionArguments::None, args: FunctionArguments::Subquery(query), filter: None, null_treatment: None, @@ -1293,6 +1294,7 @@ impl<'a> Parser<'a> { self.expect_token(&Token::RParen)?; return Ok(Expr::Function(Function { name, + parameters: FunctionArguments::None, args: FunctionArguments::Subquery(subquery), filter: None, null_treatment: None, @@ -1301,7 +1303,16 @@ impl<'a> Parser<'a> { })); } - let args = self.parse_function_argument_list()?; + let mut args = self.parse_function_argument_list()?; + let mut parameters = FunctionArguments::None; + // ClickHouse aggregations support parametric functions like `HISTOGRAM(0.5, 0.6)(x, y)` + // which (0.5, 0.6) is a parameter to the function. + if dialect_of!(self is ClickHouseDialect | GenericDialect) + && self.consume_token(&Token::LParen) + { + parameters = FunctionArguments::List(args); + args = self.parse_function_argument_list()?; + } let within_group = if self.parse_keywords(&[Keyword::WITHIN, Keyword::GROUP]) { self.expect_token(&Token::LParen)?; @@ -1350,6 +1361,7 @@ impl<'a> Parser<'a> { Ok(Expr::Function(Function { name, + parameters, args: FunctionArguments::List(args), null_treatment, filter, @@ -1382,6 +1394,7 @@ impl<'a> Parser<'a> { }; Ok(Expr::Function(Function { name, + parameters: FunctionArguments::None, args, filter: None, over: None, @@ -6470,6 +6483,7 @@ impl<'a> Parser<'a> { } else { Ok(Statement::Call(Function { name: object_name, + parameters: FunctionArguments::None, args: FunctionArguments::None, over: None, filter: None, @@ -8092,7 +8106,7 @@ impl<'a> Parser<'a> { pub fn parse_query_body(&mut self, precedence: u8) -> Result { // We parse the expression using a Pratt parser, as in `parse_expr()`. // Start by parsing a restricted SELECT or a `(subquery)`: - let mut expr = if self.parse_keyword(Keyword::SELECT) { + let expr = if self.parse_keyword(Keyword::SELECT) { SetExpr::Select(self.parse_select().map(Box::new)?) } else if self.consume_token(&Token::LParen) { // CTEs are not allowed here, but the parser currently accepts them @@ -8111,6 +8125,17 @@ impl<'a> Parser<'a> { ); }; + self.parse_remaining_set_exprs(expr, precedence) + } + + /// Parse any extra set expressions that may be present in a query body + /// + /// (this is its own function to reduce required stack size in debug builds) + fn parse_remaining_set_exprs( + &mut self, + mut expr: SetExpr, + precedence: u8, + ) -> Result { loop { // The query can be optionally followed by a set operator: let op = self.parse_set_operator(&self.peek_token().token); diff --git a/src/test_utils.rs b/src/test_utils.rs index 9af9c80986..1a31d4611e 100644 --- a/src/test_utils.rs +++ b/src/test_utils.rs @@ -336,6 +336,7 @@ pub fn join(relation: TableFactor) -> Join { pub fn call(function: &str, args: impl IntoIterator) -> Expr { Expr::Function(Function { name: ObjectName(vec![Ident::new(function)]), + parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, args: args diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index 5cd4832422..50d4faf5d2 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -183,6 +183,7 @@ fn parse_delimited_identifiers() { assert_eq!( &Expr::Function(Function { name: ObjectName(vec![Ident::with_quote('"', "myfun")]), + parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, args: vec![], @@ -553,6 +554,55 @@ fn parse_select_star_except() { clickhouse().verified_stmt("SELECT * EXCEPT (prev_status) FROM anomalies"); } +#[test] +fn parse_select_parametric_function() { + match clickhouse_and_generic().verified_stmt("SELECT HISTOGRAM(0.5, 0.6)(x, y) FROM t") { + Statement::Query(query) => { + let projection: &Vec = query.body.as_select().unwrap().projection.as_ref(); + assert_eq!(projection.len(), 1); + match &projection[0] { + UnnamedExpr(Expr::Function(f)) => { + let args = match &f.args { + FunctionArguments::List(ref args) => args, + _ => unreachable!(), + }; + assert_eq!(args.args.len(), 2); + assert_eq!( + args.args[0], + FunctionArg::Unnamed(FunctionArgExpr::Expr(Identifier(Ident::from("x")))) + ); + assert_eq!( + args.args[1], + FunctionArg::Unnamed(FunctionArgExpr::Expr(Identifier(Ident::from("y")))) + ); + + let parameters = match f.parameters { + FunctionArguments::List(ref args) => args, + _ => unreachable!(), + }; + assert_eq!(parameters.args.len(), 2); + assert_eq!( + parameters.args[0], + FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(Value::Number( + "0.5".parse().unwrap(), + false + )))) + ); + assert_eq!( + parameters.args[1], + FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(Value::Number( + "0.6".parse().unwrap(), + false + )))) + ); + } + _ => unreachable!(), + } + } + _ => unreachable!(), + } +} + #[test] fn parse_select_star_except_no_parens() { clickhouse().one_statement_parses_to( diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 0f5afb3413..76e6a98bbd 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -1045,6 +1045,7 @@ fn parse_select_count_wildcard() { assert_eq!( &Expr::Function(Function { name: ObjectName(vec![Ident::new("COUNT")]), + parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, args: vec![FunctionArg::Unnamed(FunctionArgExpr::Wildcard)], @@ -1066,6 +1067,7 @@ fn parse_select_count_distinct() { assert_eq!( &Expr::Function(Function { name: ObjectName(vec![Ident::new("COUNT")]), + parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: Some(DuplicateTreatment::Distinct), args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::UnaryOp { @@ -2151,6 +2153,7 @@ fn parse_select_having() { Some(Expr::BinaryOp { left: Box::new(Expr::Function(Function { name: ObjectName(vec![Ident::new("COUNT")]), + parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, args: vec![FunctionArg::Unnamed(FunctionArgExpr::Wildcard)], @@ -2180,6 +2183,7 @@ fn parse_select_qualify() { Some(Expr::BinaryOp { left: Box::new(Expr::Function(Function { name: ObjectName(vec![Ident::new("ROW_NUMBER")]), + parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, args: vec![], @@ -2523,6 +2527,7 @@ fn parse_listagg() { assert_eq!( &Expr::Function(Function { name: ObjectName(vec![Ident::new("LISTAGG")]), + parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: Some(DuplicateTreatment::Distinct), args: vec![ @@ -4227,6 +4232,7 @@ fn parse_named_argument_function() { assert_eq!( &Expr::Function(Function { name: ObjectName(vec![Ident::new("FUN")]), + parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, args: vec![ @@ -4265,6 +4271,7 @@ fn parse_named_argument_function_with_eq_operator() { assert_eq!( &Expr::Function(Function { name: ObjectName(vec![Ident::new("FUN")]), + parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, args: vec![ @@ -4337,6 +4344,7 @@ fn parse_window_functions() { assert_eq!( &Expr::Function(Function { name: ObjectName(vec![Ident::new("row_number")]), + parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, args: vec![], @@ -4465,6 +4473,7 @@ fn test_parse_named_window() { value: "MIN".to_string(), quote_style: None, }]), + parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr( @@ -4494,6 +4503,7 @@ fn test_parse_named_window() { value: "MAX".to_string(), quote_style: None, }]), + parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr( @@ -8089,6 +8099,7 @@ fn parse_time_functions() { let select = verified_only_select(&sql); let select_localtime_func_call_ast = Function { name: ObjectName(vec![Ident::new(func_name)]), + parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, args: vec![], @@ -9017,6 +9028,7 @@ fn parse_call() { assert_eq!( verified_stmt("CALL my_procedure('a')"), Statement::Call(Function { + parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value( @@ -9418,6 +9430,7 @@ fn test_selective_aggregation() { vec![ SelectItem::UnnamedExpr(Expr::Function(Function { name: ObjectName(vec![Ident::new("ARRAY_AGG")]), + parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr( @@ -9435,6 +9448,7 @@ fn test_selective_aggregation() { SelectItem::ExprWithAlias { expr: Expr::Function(Function { name: ObjectName(vec![Ident::new("ARRAY_AGG")]), + parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr( diff --git a/tests/sqlparser_duckdb.rs b/tests/sqlparser_duckdb.rs index 8d12945ddd..eaa1faa909 100644 --- a/tests/sqlparser_duckdb.rs +++ b/tests/sqlparser_duckdb.rs @@ -488,6 +488,7 @@ fn test_duckdb_named_argument_function_with_assignment_operator() { assert_eq!( &Expr::Function(Function { name: ObjectName(vec![Ident::new("FUN")]), + parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, args: vec![ diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index a5a6e24357..53280d7d8e 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -381,6 +381,7 @@ fn parse_delimited_identifiers() { assert_eq!( &Expr::Function(Function { name: ObjectName(vec![Ident::with_quote('"', "myfun")]), + parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, args: vec![], diff --git a/tests/sqlparser_mssql.rs b/tests/sqlparser_mssql.rs index f570de11de..5f03bb0939 100644 --- a/tests/sqlparser_mssql.rs +++ b/tests/sqlparser_mssql.rs @@ -354,6 +354,7 @@ fn parse_delimited_identifiers() { assert_eq!( &Expr::Function(Function { name: ObjectName(vec![Ident::with_quote('"', "myfun")]), + parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, args: vec![], diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 63c53227ad..197597e9bd 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -2369,6 +2369,7 @@ fn parse_array_subquery_expr() { assert_eq!( &Expr::Function(Function { name: ObjectName(vec![Ident::new("ARRAY")]), + parameters: FunctionArguments::None, args: FunctionArguments::Subquery(Box::new(Query { with: None, body: Box::new(SetExpr::SetOperation { @@ -2729,6 +2730,7 @@ fn test_composite_value() { Ident::new("information_schema"), Ident::new("_pg_expandarray") ]), + parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Array( @@ -2955,6 +2957,7 @@ fn parse_current_functions() { assert_eq!( &Expr::Function(Function { name: ObjectName(vec![Ident::new("CURRENT_CATALOG")]), + parameters: FunctionArguments::None, args: FunctionArguments::None, null_treatment: None, filter: None, @@ -2966,6 +2969,7 @@ fn parse_current_functions() { assert_eq!( &Expr::Function(Function { name: ObjectName(vec![Ident::new("CURRENT_USER")]), + parameters: FunctionArguments::None, args: FunctionArguments::None, null_treatment: None, filter: None, @@ -2977,6 +2981,7 @@ fn parse_current_functions() { assert_eq!( &Expr::Function(Function { name: ObjectName(vec![Ident::new("SESSION_USER")]), + parameters: FunctionArguments::None, args: FunctionArguments::None, null_treatment: None, filter: None, @@ -2988,6 +2993,7 @@ fn parse_current_functions() { assert_eq!( &Expr::Function(Function { name: ObjectName(vec![Ident::new("USER")]), + parameters: FunctionArguments::None, args: FunctionArguments::None, null_treatment: None, filter: None, @@ -3438,6 +3444,7 @@ fn parse_delimited_identifiers() { assert_eq!( &Expr::Function(Function { name: ObjectName(vec![Ident::with_quote('"', "myfun")]), + parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, args: vec![], diff --git a/tests/sqlparser_redshift.rs b/tests/sqlparser_redshift.rs index 0a5710ff44..938e6e8872 100644 --- a/tests/sqlparser_redshift.rs +++ b/tests/sqlparser_redshift.rs @@ -136,6 +136,7 @@ fn parse_delimited_identifiers() { assert_eq!( &Expr::Function(Function { name: ObjectName(vec![Ident::with_quote('"', "myfun")]), + parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, args: vec![], diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index b6be2c3f52..5e8fef0c52 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -892,6 +892,7 @@ fn parse_delimited_identifiers() { assert_eq!( &Expr::Function(Function { name: ObjectName(vec![Ident::with_quote('"', "myfun")]), + parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, args: vec![], diff --git a/tests/sqlparser_sqlite.rs b/tests/sqlparser_sqlite.rs index 3670b17841..dd1e77d5df 100644 --- a/tests/sqlparser_sqlite.rs +++ b/tests/sqlparser_sqlite.rs @@ -335,6 +335,7 @@ fn parse_window_function_with_filter() { select.projection, vec![SelectItem::UnnamedExpr(Expr::Function(Function { name: ObjectName(vec![Ident::new(func_name)]), + parameters: FunctionArguments::None, args: FunctionArguments::List(FunctionArgumentList { duplicate_treatment: None, args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr( From f5ccef6ea9234dc2b9fcd15dfda2551aced19309 Mon Sep 17 00:00:00 2001 From: Alexander Beedie Date: Thu, 27 Jun 2024 15:56:21 +0400 Subject: [PATCH 26/53] Fix Snowflake `SELECT *` wildcard `REPLACE ... RENAME` order (#1321) --- src/ast/query.rs | 13 ++++++------ src/parser/mod.rs | 11 +++++----- tests/sqlparser_snowflake.rs | 39 ++++++++++++++++++++++++++++++++++++ 3 files changed, 51 insertions(+), 12 deletions(-) diff --git a/src/ast/query.rs b/src/ast/query.rs index fcd5b970d1..0fde3e6b73 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -547,19 +547,20 @@ impl fmt::Display for IdentWithAlias { #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct WildcardAdditionalOptions { /// `[ILIKE...]`. - /// Snowflake syntax: + /// Snowflake syntax: pub opt_ilike: Option, /// `[EXCLUDE...]`. pub opt_exclude: Option, /// `[EXCEPT...]`. /// Clickhouse syntax: pub opt_except: Option, - /// `[RENAME ...]`. - pub opt_rename: Option, /// `[REPLACE]` /// BigQuery syntax: /// Clickhouse syntax: + /// Snowflake syntax: pub opt_replace: Option, + /// `[RENAME ...]`. + pub opt_rename: Option, } impl fmt::Display for WildcardAdditionalOptions { @@ -573,12 +574,12 @@ impl fmt::Display for WildcardAdditionalOptions { if let Some(except) = &self.opt_except { write!(f, " {except}")?; } - if let Some(rename) = &self.opt_rename { - write!(f, " {rename}")?; - } if let Some(replace) = &self.opt_replace { write!(f, " {replace}")?; } + if let Some(rename) = &self.opt_rename { + write!(f, " {rename}")?; + } Ok(()) } } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 5376099733..33095c4286 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -10175,15 +10175,14 @@ impl<'a> Parser<'a> { } else { None }; - let opt_rename = if dialect_of!(self is GenericDialect | SnowflakeDialect) { - self.parse_optional_select_item_rename()? + let opt_replace = if dialect_of!(self is GenericDialect | BigQueryDialect | ClickHouseDialect | DuckDbDialect | SnowflakeDialect) + { + self.parse_optional_select_item_replace()? } else { None }; - - let opt_replace = if dialect_of!(self is GenericDialect | BigQueryDialect | ClickHouseDialect | DuckDbDialect | SnowflakeDialect) - { - self.parse_optional_select_item_replace()? + let opt_rename = if dialect_of!(self is GenericDialect | SnowflakeDialect) { + self.parse_optional_select_item_rename()? } else { None }; diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index 5e8fef0c52..2f4ed1316f 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -1016,6 +1016,44 @@ fn test_select_wildcard_with_rename() { assert_eq!(expected, select.projection[0]); } +#[test] +fn test_select_wildcard_with_replace_and_rename() { + let select = snowflake_and_generic().verified_only_select( + "SELECT * REPLACE (col_z || col_z AS col_z) RENAME (col_z AS col_zz) FROM data", + ); + let expected = SelectItem::Wildcard(WildcardAdditionalOptions { + opt_replace: Some(ReplaceSelectItem { + items: vec![Box::new(ReplaceSelectElement { + expr: Expr::BinaryOp { + left: Box::new(Expr::Identifier(Ident::new("col_z"))), + op: BinaryOperator::StringConcat, + right: Box::new(Expr::Identifier(Ident::new("col_z"))), + }, + column_name: Ident::new("col_z"), + as_keyword: true, + })], + }), + opt_rename: Some(RenameSelectItem::Multiple(vec![IdentWithAlias { + ident: Ident::new("col_z"), + alias: Ident::new("col_zz"), + }])), + ..Default::default() + }); + assert_eq!(expected, select.projection[0]); + + // rename cannot precede replace + // https://docs.snowflake.com/en/sql-reference/sql/select#parameters + assert_eq!( + snowflake_and_generic() + .parse_sql_statements( + "SELECT * RENAME (col_z AS col_zz) REPLACE (col_z || col_z AS col_z) FROM data" + ) + .unwrap_err() + .to_string(), + "sql parser error: Expected: end of statement, found: REPLACE" + ); +} + #[test] fn test_select_wildcard_with_exclude_and_rename() { let select = snowflake_and_generic() @@ -1031,6 +1069,7 @@ fn test_select_wildcard_with_exclude_and_rename() { assert_eq!(expected, select.projection[0]); // rename cannot precede exclude + // https://docs.snowflake.com/en/sql-reference/sql/select#parameters assert_eq!( snowflake_and_generic() .parse_sql_statements("SELECT * RENAME col_a AS col_b EXCLUDE col_z FROM data") From f9ab8dcc27fd2d55030b9c5fa71e41d5c08dd601 Mon Sep 17 00:00:00 2001 From: gstvg <28798827+gstvg@users.noreply.github.com> Date: Thu, 27 Jun 2024 08:58:11 -0300 Subject: [PATCH 27/53] Support for DuckDB Union datatype (#1322) --- src/ast/data_type.rs | 9 +++- src/ast/mod.rs | 17 +++++++ src/parser/mod.rs | 31 +++++++++++++ tests/sqlparser_duckdb.rs | 95 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 151 insertions(+), 1 deletion(-) diff --git a/src/ast/data_type.rs b/src/ast/data_type.rs index 6b1a542f42..e6477f56bf 100644 --- a/src/ast/data_type.rs +++ b/src/ast/data_type.rs @@ -20,7 +20,7 @@ use serde::{Deserialize, Serialize}; #[cfg(feature = "visitor")] use sqlparser_derive::{Visit, VisitMut}; -use crate::ast::{display_comma_separated, ObjectName, StructField}; +use crate::ast::{display_comma_separated, ObjectName, StructField, UnionField}; use super::{value::escape_single_quote_string, ColumnDef}; @@ -303,6 +303,10 @@ pub enum DataType { /// [hive]: https://docs.cloudera.com/cdw-runtime/cloud/impala-sql-reference/topics/impala-struct.html /// [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#struct_type Struct(Vec), + /// Union + /// + /// [duckdb]: https://duckdb.org/docs/sql/data_types/union.html + Union(Vec), /// Nullable - special marker NULL represents in ClickHouse as a data type. /// /// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/nullable @@ -516,6 +520,9 @@ impl fmt::Display for DataType { write!(f, "STRUCT") } } + DataType::Union(fields) => { + write!(f, "UNION({})", display_comma_separated(fields)) + } // ClickHouse DataType::Nullable(data_type) => { write!(f, "Nullable({})", data_type) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 8182d11445..9ed837825d 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -294,6 +294,23 @@ impl fmt::Display for StructField { } } +/// A field definition within a union +/// +/// [duckdb]: https://duckdb.org/docs/sql/data_types/union.html +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct UnionField { + pub field_name: Ident, + pub field_type: DataType, +} + +impl fmt::Display for UnionField { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{} {}", self.field_name, self.field_type) + } +} + /// A dictionary field within a dictionary. /// /// [duckdb]: https://duckdb.org/docs/sql/data_types/struct#creating-structs diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 33095c4286..f583049608 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -2246,6 +2246,32 @@ impl<'a> Parser<'a> { )) } + /// DuckDB specific: Parse a Union type definition as a sequence of field-value pairs. + /// + /// Syntax: + /// + /// ```sql + /// UNION(field_name field_type[,...]) + /// ``` + /// + /// [1]: https://duckdb.org/docs/sql/data_types/union.html + fn parse_union_type_def(&mut self) -> Result, ParserError> { + self.expect_keyword(Keyword::UNION)?; + + self.expect_token(&Token::LParen)?; + + let fields = self.parse_comma_separated(|p| { + Ok(UnionField { + field_name: p.parse_identifier(false)?, + field_type: p.parse_data_type()?, + }) + })?; + + self.expect_token(&Token::RParen)?; + + Ok(fields) + } + /// DuckDB specific: Parse a duckdb dictionary [1] /// /// Syntax: @@ -7136,6 +7162,11 @@ impl<'a> Parser<'a> { trailing_bracket = _trailing_bracket; Ok(DataType::Struct(field_defs)) } + Keyword::UNION if dialect_of!(self is DuckDbDialect | GenericDialect) => { + self.prev_token(); + let fields = self.parse_union_type_def()?; + Ok(DataType::Union(fields)) + } Keyword::NULLABLE if dialect_of!(self is ClickHouseDialect | GenericDialect) => { Ok(self.parse_sub_type(DataType::Nullable)?) } diff --git a/tests/sqlparser_duckdb.rs b/tests/sqlparser_duckdb.rs index eaa1faa909..253318b327 100644 --- a/tests/sqlparser_duckdb.rs +++ b/tests/sqlparser_duckdb.rs @@ -545,3 +545,98 @@ fn test_array_index() { expr ); } + +#[test] +fn test_duckdb_union_datatype() { + let sql = "CREATE TABLE tbl1 (one UNION(a INT), two UNION(a INT, b INT), nested UNION(a UNION(b INT)))"; + let stmt = duckdb_and_generic().verified_stmt(sql); + assert_eq!( + Statement::CreateTable(CreateTable { + or_replace: Default::default(), + temporary: Default::default(), + external: Default::default(), + global: Default::default(), + if_not_exists: Default::default(), + transient: Default::default(), + volatile: Default::default(), + name: ObjectName(vec!["tbl1".into()]), + columns: vec![ + ColumnDef { + name: "one".into(), + data_type: DataType::Union(vec![UnionField { + field_name: "a".into(), + field_type: DataType::Int(None) + }]), + collation: Default::default(), + options: Default::default() + }, + ColumnDef { + name: "two".into(), + data_type: DataType::Union(vec![ + UnionField { + field_name: "a".into(), + field_type: DataType::Int(None) + }, + UnionField { + field_name: "b".into(), + field_type: DataType::Int(None) + } + ]), + collation: Default::default(), + options: Default::default() + }, + ColumnDef { + name: "nested".into(), + data_type: DataType::Union(vec![UnionField { + field_name: "a".into(), + field_type: DataType::Union(vec![UnionField { + field_name: "b".into(), + field_type: DataType::Int(None) + }]) + }]), + collation: Default::default(), + options: Default::default() + } + ], + constraints: Default::default(), + hive_distribution: HiveDistributionStyle::NONE, + hive_formats: Some(HiveFormat { + row_format: Default::default(), + serde_properties: Default::default(), + storage: Default::default(), + location: Default::default() + }), + table_properties: Default::default(), + with_options: Default::default(), + file_format: Default::default(), + location: Default::default(), + query: Default::default(), + without_rowid: Default::default(), + like: Default::default(), + clone: Default::default(), + engine: Default::default(), + comment: Default::default(), + auto_increment_offset: Default::default(), + default_charset: Default::default(), + collation: Default::default(), + on_commit: Default::default(), + on_cluster: Default::default(), + primary_key: Default::default(), + order_by: Default::default(), + partition_by: Default::default(), + cluster_by: Default::default(), + options: Default::default(), + strict: Default::default(), + copy_grants: Default::default(), + enable_schema_evolution: Default::default(), + change_tracking: Default::default(), + data_retention_time_in_days: Default::default(), + max_data_extension_time_in_days: Default::default(), + default_ddl_collation: Default::default(), + with_aggregation_policy: Default::default(), + with_row_access_policy: Default::default(), + with_tags: Default::default() + }), + stmt + ); +} From 376889ae5de7b4e738dd097ce08b0867475aacbb Mon Sep 17 00:00:00 2001 From: Emil Sivervik Date: Sun, 30 Jun 2024 13:03:08 +0200 Subject: [PATCH 28/53] chore(docs): refine docs (#1326) --- src/parser/mod.rs | 105 +++++++++++++++++++++++----------------------- 1 file changed, 53 insertions(+), 52 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index f583049608..8696629760 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -71,11 +71,11 @@ mod recursion { use super::ParserError; /// Tracks remaining recursion depth. This value is decremented on - /// each call to `try_decrease()`, when it reaches 0 an error will + /// each call to [`RecursionCounter::try_decrease()`], when it reaches 0 an error will /// be returned. /// - /// Note: Uses an Rc and Cell in order to satisfy the Rust - /// borrow checker so the automatic DepthGuard decrement a + /// Note: Uses an [`std::rc::Rc`] and [`std::cell::Cell`] in order to satisfy the Rust + /// borrow checker so the automatic [`DepthGuard`] decrement a /// reference to the counter. pub(crate) struct RecursionCounter { remaining_depth: Rc>, @@ -92,7 +92,7 @@ mod recursion { /// Decreases the remaining depth by 1. /// - /// Returns `Err` if the remaining depth falls to 0. + /// Returns [`Err`] if the remaining depth falls to 0. /// /// Returns a [`DepthGuard`] which will adds 1 to the /// remaining depth upon drop; @@ -131,7 +131,7 @@ mod recursion { /// Implementation [`RecursionCounter`] if std is NOT available (and does not /// guard against stack overflow). /// - /// Has the same API as the std RecursionCounter implementation + /// Has the same API as the std [`RecursionCounter`] implementation /// but does not actually limit stack depth. pub(crate) struct RecursionCounter {} @@ -270,17 +270,17 @@ enum ParserState { pub struct Parser<'a> { tokens: Vec, - /// The index of the first unprocessed token in `self.tokens` + /// The index of the first unprocessed token in [`Parser::tokens`]. index: usize, /// The current state of the parser. state: ParserState, - /// The current dialect to use + /// The current dialect to use. dialect: &'a dyn Dialect, /// Additional options that allow you to mix & match behavior /// otherwise constrained to certain dialects (e.g. trailing - /// commas) and/or format of parse (e.g. unescaping) + /// commas) and/or format of parse (e.g. unescaping). options: ParserOptions, - /// ensure the stack does not overflow by limiting recursion depth + /// Ensure the stack does not overflow by limiting recursion depth. recursion_counter: RecursionCounter, } @@ -313,7 +313,6 @@ impl<'a> Parser<'a> { /// Specify the maximum recursion limit while parsing. /// - /// /// [`Parser`] prevents stack overflows by returning /// [`ParserError::RecursionLimitExceeded`] if the parser exceeds /// this depth while processing the query. @@ -338,7 +337,6 @@ impl<'a> Parser<'a> { /// Specify additional parser options /// - /// /// [`Parser`] supports additional options ([`ParserOptions`]) /// that allow you to mix & match behavior otherwise constrained /// to certain dialects (e.g. trailing commas). @@ -824,7 +822,7 @@ impl<'a> Parser<'a> { }) } - /// Parse a new expression including wildcard & qualified wildcard + /// Parse a new expression including wildcard & qualified wildcard. pub fn parse_wildcard_expr(&mut self) -> Result { let index = self.index; @@ -867,13 +865,13 @@ impl<'a> Parser<'a> { self.parse_expr() } - /// Parse a new expression + /// Parse a new expression. pub fn parse_expr(&mut self) -> Result { let _guard = self.recursion_counter.try_decrease()?; self.parse_subexpr(0) } - /// Parse tokens until the precedence changes + /// Parse tokens until the precedence changes. pub fn parse_subexpr(&mut self, precedence: u8) -> Result { debug!("parsing expr"); let mut expr = self.parse_prefix()?; @@ -908,8 +906,7 @@ impl<'a> Parser<'a> { Ok(expr) } - /// Get the precedence of the next token - /// With AND, OR, and XOR + /// Get the precedence of the next token, with AND, OR, and XOR. pub fn get_next_interval_precedence(&self) -> Result { let token = self.peek_token(); @@ -944,7 +941,7 @@ impl<'a> Parser<'a> { Ok(Statement::ReleaseSavepoint { name }) } - /// Parse an expression prefix + /// Parse an expression prefix. pub fn parse_prefix(&mut self) -> Result { // allow the dialect to override prefix parsing if let Some(prefix) = self.dialect.parse_prefix(self) { @@ -1456,8 +1453,7 @@ impl<'a> Parser<'a> { } } - /// parse a group by expr. a group by expr can be one of group sets, roll up, cube, or simple - /// expr. + /// Parse a group by expr. Group by expr can be one of group sets, roll up, cube, or simple expr. fn parse_group_by_expr(&mut self) -> Result { if self.dialect.supports_group_by_expr() { if self.parse_keywords(&[Keyword::GROUPING, Keyword::SETS]) { @@ -1484,7 +1480,7 @@ impl<'a> Parser<'a> { } } - /// parse a tuple with `(` and `)`. + /// Parse a tuple with `(` and `)`. /// If `lift_singleton` is true, then a singleton tuple is lifted to a tuple of length 1, otherwise it will fail. /// If `allow_empty` is true, then an empty tuple is allowed. fn parse_tuple( @@ -1953,13 +1949,11 @@ impl<'a> Parser<'a> { } } - /// Parses fulltext expressions [(1)] + /// Parses fulltext expressions [`sqlparser::ast::Expr::MatchAgainst`] /// /// # Errors /// This method will raise an error if the column list is empty or with invalid identifiers, /// the match expression is not a literal string, or if the search modifier is not valid. - /// - /// [(1)]: Expr::MatchAgainst pub fn parse_match_against(&mut self) -> Result { let columns = self.parse_parenthesized_column_list(Mandatory, false)?; @@ -2004,17 +1998,19 @@ impl<'a> Parser<'a> { }) } - /// Parse an INTERVAL expression. + /// Parse an `INTERVAL` expression. /// /// Some syntactically valid intervals: /// - /// 1. `INTERVAL '1' DAY` - /// 2. `INTERVAL '1-1' YEAR TO MONTH` - /// 3. `INTERVAL '1' SECOND` - /// 4. `INTERVAL '1:1:1.1' HOUR (5) TO SECOND (5)` - /// 5. `INTERVAL '1.1' SECOND (2, 2)` - /// 6. `INTERVAL '1:1' HOUR (5) TO MINUTE (5)` - /// 7. (MySql and BigQuey only):`INTERVAL 1 DAY` + /// ```sql + /// 1. INTERVAL '1' DAY + /// 2. INTERVAL '1-1' YEAR TO MONTH + /// 3. INTERVAL '1' SECOND + /// 4. INTERVAL '1:1:1.1' HOUR (5) TO SECOND (5) + /// 5. INTERVAL '1.1' SECOND (2, 2) + /// 6. INTERVAL '1:1' HOUR (5) TO MINUTE (5) + /// 7. (MySql & BigQuey only): INTERVAL 1 DAY + /// ``` /// /// Note that we do not currently attempt to parse the quoted value. pub fn parse_interval(&mut self) -> Result { @@ -2210,15 +2206,15 @@ impl<'a> Parser<'a> { )) } - /// Parse a field definition in a struct [1] or tuple [2]. + /// Parse a field definition in a [struct] or [tuple]. /// Syntax: /// /// ```sql /// [field_name] field_type /// ``` /// - /// [1]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#declaring_a_struct_type - /// [2]: https://clickhouse.com/docs/en/sql-reference/data-types/tuple + /// [struct]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#declaring_a_struct_type + /// [tuple]: https://clickhouse.com/docs/en/sql-reference/data-types/tuple fn parse_struct_field_def( &mut self, ) -> Result<(StructField, MatchedTrailingBracket), ParserError> { @@ -2272,7 +2268,7 @@ impl<'a> Parser<'a> { Ok(fields) } - /// DuckDB specific: Parse a duckdb dictionary [1] + /// DuckDB specific: Parse a duckdb [dictionary] /// /// Syntax: /// @@ -2280,7 +2276,7 @@ impl<'a> Parser<'a> { /// {'field_name': expr1[, ... ]} /// ``` /// - /// [1]: https://duckdb.org/docs/sql/data_types/struct#creating-structs + /// [dictionary]: https://duckdb.org/docs/sql/data_types/struct#creating-structs fn parse_duckdb_struct_literal(&mut self) -> Result { self.expect_token(&Token::LBrace)?; @@ -2291,13 +2287,15 @@ impl<'a> Parser<'a> { Ok(Expr::Dictionary(fields)) } - /// Parse a field for a duckdb dictionary [1] + /// Parse a field for a duckdb [dictionary] + /// /// Syntax + /// /// ```sql /// 'name': expr /// ``` /// - /// [1]: https://duckdb.org/docs/sql/data_types/struct#creating-structs + /// [dictionary]: https://duckdb.org/docs/sql/data_types/struct#creating-structs fn parse_duckdb_dictionary_field(&mut self) -> Result { let key = self.parse_identifier(false)?; @@ -2311,13 +2309,15 @@ impl<'a> Parser<'a> { }) } - /// Parse clickhouse map [1] + /// Parse clickhouse [map] + /// /// Syntax + /// /// ```sql /// Map(key_data_type, value_data_type) /// ``` /// - /// [1]: https://clickhouse.com/docs/en/sql-reference/data-types/map + /// [map]: https://clickhouse.com/docs/en/sql-reference/data-types/map fn parse_click_house_map_def(&mut self) -> Result<(DataType, DataType), ParserError> { self.expect_keyword(Keyword::MAP)?; self.expect_token(&Token::LParen)?; @@ -2329,13 +2329,15 @@ impl<'a> Parser<'a> { Ok((key_data_type, value_data_type)) } - /// Parse clickhouse tuple [1] + /// Parse clickhouse [tuple] + /// /// Syntax + /// /// ```sql /// Tuple([field_name] field_type, ...) /// ``` /// - /// [1]: https://clickhouse.com/docs/en/sql-reference/data-types/tuple + /// [tuple]: https://clickhouse.com/docs/en/sql-reference/data-types/tuple fn parse_click_house_tuple_def(&mut self) -> Result, ParserError> { self.expect_keyword(Keyword::TUPLE)?; self.expect_token(&Token::LParen)?; @@ -2649,7 +2651,7 @@ impl<'a> Parser<'a> { } } - /// parse the ESCAPE CHAR portion of LIKE, ILIKE, and SIMILAR TO + /// Parse the `ESCAPE CHAR` portion of `LIKE`, `ILIKE`, and `SIMILAR TO` pub fn parse_escape_char(&mut self) -> Result, ParserError> { if self.parse_keyword(Keyword::ESCAPE) { Ok(Some(self.parse_literal_string()?)) @@ -2836,7 +2838,7 @@ impl<'a> Parser<'a> { }) } - /// Parses the parens following the `[ NOT ] IN` operator + /// Parses the parens following the `[ NOT ] IN` operator. pub fn parse_in(&mut self, expr: Expr, negated: bool) -> Result { // BigQuery allows `IN UNNEST(array_expression)` // https://cloud.google.com/bigquery/docs/reference/standard-sql/operators#in_operators @@ -2873,7 +2875,7 @@ impl<'a> Parser<'a> { Ok(in_op) } - /// Parses `BETWEEN AND `, assuming the `BETWEEN` keyword was already consumed + /// Parses `BETWEEN AND `, assuming the `BETWEEN` keyword was already consumed. pub fn parse_between(&mut self, expr: Expr, negated: bool) -> Result { // Stop parsing subexpressions for and on tokens with // precedence lower than that of `BETWEEN`, such as `AND`, `IS`, etc. @@ -2888,7 +2890,7 @@ impl<'a> Parser<'a> { }) } - /// Parse a postgresql casting style which is in the form of `expr::datatype` + /// Parse a postgresql casting style which is in the form of `expr::datatype`. pub fn parse_pg_cast(&mut self, expr: Expr) -> Result { Ok(Expr::Cast { kind: CastKind::DoubleColon, @@ -2898,7 +2900,7 @@ impl<'a> Parser<'a> { }) } - // use https://www.postgresql.org/docs/7.0/operators.htm#AEN2026 as a reference + // Use https://www.postgresql.org/docs/7.0/operators.htm#AEN2026 as a reference // higher number = higher precedence // // NOTE: The pg documentation is incomplete, e.g. the AT TIME ZONE operator @@ -3217,7 +3219,7 @@ impl<'a> Parser<'a> { /// If the current token is one of the given `keywords`, consume the token /// and return the keyword that matches. Otherwise, no tokens are consumed - /// and returns `None`. + /// and returns [`None`]. #[must_use] pub fn parse_one_of_keywords(&mut self, keywords: &[Keyword]) -> Option { match self.peek_token().token { @@ -3393,8 +3395,7 @@ impl<'a> Parser<'a> { self.parse_comma_separated(f) } - /// Run a parser method `f`, reverting back to the current position - /// if unsuccessful. + /// Run a parser method `f`, reverting back to the current position if unsuccessful. #[must_use] fn maybe_parse(&mut self, mut f: F) -> Option where @@ -3409,8 +3410,8 @@ impl<'a> Parser<'a> { } } - /// Parse either `ALL`, `DISTINCT` or `DISTINCT ON (...)`. Returns `None` if `ALL` is parsed - /// and results in a `ParserError` if both `ALL` and `DISTINCT` are found. + /// Parse either `ALL`, `DISTINCT` or `DISTINCT ON (...)`. Returns [`None`] if `ALL` is parsed + /// and results in a [`ParserError`] if both `ALL` and `DISTINCT` are found. pub fn parse_all_or_distinct(&mut self) -> Result, ParserError> { let loc = self.peek_token().location; let all = self.parse_keyword(Keyword::ALL); From 0b1a413e64006286308500731323d50617dc6ed8 Mon Sep 17 00:00:00 2001 From: hulk Date: Sun, 30 Jun 2024 19:06:20 +0800 Subject: [PATCH 29/53] Fix a few typos in comment lines (#1316) Co-authored-by: Andrew Lamb --- src/parser/mod.rs | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 8696629760..563fd86bc2 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -108,7 +108,7 @@ mod recursion { } } - /// Guard that increass the remaining depth by 1 on drop + /// Guard that increases the remaining depth by 1 on drop pub struct DepthGuard { remaining_depth: Rc>, } @@ -194,7 +194,7 @@ const DEFAULT_REMAINING_DEPTH: usize = 50; /// nested such that the following declaration is possible: /// `ARRAY>` /// But the tokenizer recognizes the `>>` as a ShiftRight token. -/// We work-around that limitation when parsing a data type by accepting +/// We work around that limitation when parsing a data type by accepting /// either a `>` or `>>` token in such cases, remembering which variant we /// matched. /// In the latter case having matched a `>>`, the parent type will not look to @@ -1075,7 +1075,7 @@ impl<'a> Parser<'a> { let expr = self.parse_subexpr(Self::PLUS_MINUS_PREC)?; Ok(Expr::Prior(Box::new(expr))) } - // Here `w` is a word, check if it's a part of a multi-part + // Here `w` is a word, check if it's a part of a multipart // identifier, a function call, or a simple identifier: _ => match self.peek_token().token { Token::LParen | Token::Period => { @@ -2009,7 +2009,7 @@ impl<'a> Parser<'a> { /// 4. INTERVAL '1:1:1.1' HOUR (5) TO SECOND (5) /// 5. INTERVAL '1.1' SECOND (2, 2) /// 6. INTERVAL '1:1' HOUR (5) TO MINUTE (5) - /// 7. (MySql & BigQuey only): INTERVAL 1 DAY + /// 7. (MySql & BigQuery only): INTERVAL 1 DAY /// ``` /// /// Note that we do not currently attempt to parse the quoted value. @@ -2749,7 +2749,7 @@ impl<'a> Parser<'a> { match token.token { Token::Word(Word { value, - // path segments in SF dot notation can be unquoted or double quoted + // path segments in SF dot notation can be unquoted or double-quoted quote_style: quote_style @ (Some('"') | None), // some experimentation suggests that snowflake permits // any keyword here unquoted. @@ -2948,7 +2948,7 @@ impl<'a> Parser<'a> { Token::Word(w) if w.keyword == Keyword::NOT => match self.peek_nth_token(1).token { // The precedence of NOT varies depending on keyword that // follows it. If it is followed by IN, BETWEEN, or LIKE, - // it takes on the precedence of those tokens. Otherwise it + // it takes on the precedence of those tokens. Otherwise, it // is not an infix operator, and therefore has zero // precedence. Token::Word(w) if w.keyword == Keyword::IN => Ok(Self::BETWEEN_PREC), @@ -3251,7 +3251,7 @@ impl<'a> Parser<'a> { } /// If the current token is the `expected` keyword, consume the token. - /// Otherwise return an error. + /// Otherwise, return an error. pub fn expect_keyword(&mut self, expected: Keyword) -> Result<(), ParserError> { if self.parse_keyword(expected) { Ok(()) @@ -4508,7 +4508,7 @@ impl<'a> Parser<'a> { self.peek_token(), ); }; - // Many dialects support the non standard `IF EXISTS` clause and allow + // Many dialects support the non-standard `IF EXISTS` clause and allow // specifying multiple objects to delete in a single statement let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); let names = self.parse_comma_separated(|p| p.parse_object_name(false))?; @@ -4822,7 +4822,7 @@ impl<'a> Parser<'a> { continue; } _ => { - // Put back the semi-colon, this is the end of the DECLARE statement. + // Put back the semicolon, this is the end of the DECLARE statement. self.prev_token(); } } @@ -7278,7 +7278,7 @@ impl<'a> Parser<'a> { // ignore the and treat the multiple strings as // a single ." Token::SingleQuotedString(s) => Ok(Some(Ident::with_quote('\'', s))), - // Support for MySql dialect double quoted string, `AS "HOUR"` for example + // Support for MySql dialect double-quoted string, `AS "HOUR"` for example Token::DoubleQuotedString(s) => Ok(Some(Ident::with_quote('\"', s))), _ => { if after_as { From 44d7a20f641c9cd8f0c3e08f7d77f02534452ce8 Mon Sep 17 00:00:00 2001 From: hulk Date: Sun, 30 Jun 2024 19:33:43 +0800 Subject: [PATCH 30/53] Support `GROUP BY WITH MODIFIER` for ClickHouse (#1323) Co-authored-by: Ifeanyi Ubah --- src/ast/mod.rs | 4 +-- src/ast/query.rs | 56 +++++++++++++++++++++++++++------ src/keywords.rs | 1 + src/parser/mod.rs | 37 +++++++++++++++++++--- tests/sqlparser_clickhouse.rs | 57 ++++++++++++++++++++++++++++++++- tests/sqlparser_common.rs | 53 +++++++++++++++++-------------- tests/sqlparser_duckdb.rs | 4 +-- tests/sqlparser_mssql.rs | 4 +-- tests/sqlparser_mysql.rs | 16 +++++----- tests/sqlparser_postgres.rs | 59 ++++++++++++++++++++--------------- 10 files changed, 215 insertions(+), 76 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 9ed837825d..c7f461418c 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -43,8 +43,8 @@ pub use self::operator::{BinaryOperator, UnaryOperator}; pub use self::query::{ AfterMatchSkip, ConnectBy, Cte, CteAsMaterialized, Distinct, EmptyMatchesMode, ExceptSelectItem, ExcludeSelectItem, ExprWithAlias, Fetch, ForClause, ForJson, ForXml, - GroupByExpr, IdentWithAlias, IlikeSelectItem, Join, JoinConstraint, JoinOperator, - JsonTableColumn, JsonTableColumnErrorHandling, LateralView, LockClause, LockType, + GroupByExpr, GroupByWithModifier, IdentWithAlias, IlikeSelectItem, Join, JoinConstraint, + JoinOperator, JsonTableColumn, JsonTableColumnErrorHandling, LateralView, LockClause, LockType, MatchRecognizePattern, MatchRecognizeSymbol, Measure, NamedWindowDefinition, NamedWindowExpr, NonBlock, Offset, OffsetRows, OrderByExpr, PivotValueSource, Query, RenameSelectItem, RepetitionQuantifier, ReplaceSelectElement, ReplaceSelectItem, RowsPerMatch, Select, diff --git a/src/ast/query.rs b/src/ast/query.rs index 0fde3e6b73..d00a0dfcc5 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -299,10 +299,10 @@ impl fmt::Display for Select { write!(f, " WHERE {selection}")?; } match &self.group_by { - GroupByExpr::All => write!(f, " GROUP BY ALL")?, - GroupByExpr::Expressions(exprs) => { + GroupByExpr::All(_) => write!(f, " {}", self.group_by)?, + GroupByExpr::Expressions(exprs, _) => { if !exprs.is_empty() { - write!(f, " GROUP BY {}", display_comma_separated(exprs))?; + write!(f, " {}", self.group_by)? } } } @@ -1866,27 +1866,65 @@ impl fmt::Display for SelectInto { } } +/// ClickHouse supports GROUP BY WITH modifiers(includes ROLLUP|CUBE|TOTALS). +/// e.g. GROUP BY year WITH ROLLUP WITH TOTALS +/// +/// [ClickHouse]: +#[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum GroupByWithModifier { + Rollup, + Cube, + Totals, +} + +impl fmt::Display for GroupByWithModifier { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + GroupByWithModifier::Rollup => write!(f, "WITH ROLLUP"), + GroupByWithModifier::Cube => write!(f, "WITH CUBE"), + GroupByWithModifier::Totals => write!(f, "WITH TOTALS"), + } + } +} + #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum GroupByExpr { - /// ALL syntax of [Snowflake], and [DuckDB] + /// ALL syntax of [Snowflake], [DuckDB] and [ClickHouse]. /// /// [Snowflake]: /// [DuckDB]: - All, + /// [ClickHouse]: + /// + /// ClickHouse also supports WITH modifiers after GROUP BY ALL and expressions. + /// + /// [ClickHouse]: + All(Vec), /// Expressions - Expressions(Vec), + Expressions(Vec, Vec), } impl fmt::Display for GroupByExpr { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { - GroupByExpr::All => write!(f, "GROUP BY ALL"), - GroupByExpr::Expressions(col_names) => { + GroupByExpr::All(modifiers) => { + write!(f, "GROUP BY ALL")?; + if !modifiers.is_empty() { + write!(f, " {}", display_separated(modifiers, " "))?; + } + Ok(()) + } + GroupByExpr::Expressions(col_names, modifiers) => { let col_names = display_comma_separated(col_names); - write!(f, "GROUP BY ({col_names})") + write!(f, "GROUP BY {col_names}")?; + if !modifiers.is_empty() { + write!(f, " {}", display_separated(modifiers, " "))?; + } + Ok(()) } } } diff --git a/src/keywords.rs b/src/keywords.rs index e75d45e441..5db55e9da3 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -721,6 +721,7 @@ define_keywords!( TINYINT, TO, TOP, + TOTALS, TRAILING, TRANSACTION, TRANSIENT, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 563fd86bc2..4e9c3836b3 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -8319,13 +8319,42 @@ impl<'a> Parser<'a> { }; let group_by = if self.parse_keywords(&[Keyword::GROUP, Keyword::BY]) { - if self.parse_keyword(Keyword::ALL) { - GroupByExpr::All + let expressions = if self.parse_keyword(Keyword::ALL) { + None } else { - GroupByExpr::Expressions(self.parse_comma_separated(Parser::parse_group_by_expr)?) + Some(self.parse_comma_separated(Parser::parse_group_by_expr)?) + }; + + let mut modifiers = vec![]; + if dialect_of!(self is ClickHouseDialect | GenericDialect) { + loop { + if !self.parse_keyword(Keyword::WITH) { + break; + } + let keyword = self.expect_one_of_keywords(&[ + Keyword::ROLLUP, + Keyword::CUBE, + Keyword::TOTALS, + ])?; + modifiers.push(match keyword { + Keyword::ROLLUP => GroupByWithModifier::Rollup, + Keyword::CUBE => GroupByWithModifier::Cube, + Keyword::TOTALS => GroupByWithModifier::Totals, + _ => { + return parser_err!( + "BUG: expected to match GroupBy modifier keyword", + self.peek_token().location + ) + } + }); + } + } + match expressions { + None => GroupByExpr::All(modifiers), + Some(exprs) => GroupByExpr::Expressions(exprs, modifiers), } } else { - GroupByExpr::Expressions(vec![]) + GroupByExpr::Expressions(vec![], vec![]) }; let cluster_by = if self.parse_keywords(&[Keyword::CLUSTER, Keyword::BY]) { diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index 50d4faf5d2..0c188a24bf 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -88,7 +88,7 @@ fn parse_map_access_expr() { right: Box::new(Expr::Value(Value::SingleQuotedString("foo".to_string()))), }), }), - group_by: GroupByExpr::Expressions(vec![]), + group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -626,6 +626,61 @@ fn parse_create_materialized_view() { clickhouse_and_generic().verified_stmt(sql); } +#[test] +fn parse_group_by_with_modifier() { + let clauses = ["x", "a, b", "ALL"]; + let modifiers = [ + "WITH ROLLUP", + "WITH CUBE", + "WITH TOTALS", + "WITH ROLLUP WITH CUBE", + ]; + let expected_modifiers = [ + vec![GroupByWithModifier::Rollup], + vec![GroupByWithModifier::Cube], + vec![GroupByWithModifier::Totals], + vec![GroupByWithModifier::Rollup, GroupByWithModifier::Cube], + ]; + for clause in &clauses { + for (modifier, expected_modifier) in modifiers.iter().zip(expected_modifiers.iter()) { + let sql = format!("SELECT * FROM t GROUP BY {clause} {modifier}"); + match clickhouse_and_generic().verified_stmt(&sql) { + Statement::Query(query) => { + let group_by = &query.body.as_select().unwrap().group_by; + if clause == &"ALL" { + assert_eq!(group_by, &GroupByExpr::All(expected_modifier.to_vec())); + } else { + assert_eq!( + group_by, + &GroupByExpr::Expressions( + clause + .split(", ") + .map(|c| Identifier(Ident::new(c))) + .collect(), + expected_modifier.to_vec() + ) + ); + } + } + _ => unreachable!(), + } + } + } + + // invalid cases + let invalid_cases = [ + "SELECT * FROM t GROUP BY x WITH", + "SELECT * FROM t GROUP BY x WITH ROLLUP CUBE", + "SELECT * FROM t GROUP BY x WITH WITH ROLLUP", + "SELECT * FROM t GROUP BY WITH ROLLUP", + ]; + for sql in invalid_cases { + clickhouse_and_generic() + .parse_sql_statements(sql) + .expect_err("Expected: one of ROLLUP or CUBE or TOTALS, found: WITH"); + } +} + fn clickhouse() -> TestedDialects { TestedDialects { dialects: vec![Box::new(ClickHouseDialect {})], diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 76e6a98bbd..ac2133946e 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -392,9 +392,10 @@ fn parse_update_set_from() { }], lateral_views: vec![], selection: None, - group_by: GroupByExpr::Expressions(vec![Expr::Identifier(Ident::new( - "id" - ))]), + group_by: GroupByExpr::Expressions( + vec![Expr::Identifier(Ident::new("id"))], + vec![] + ), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -2119,10 +2120,13 @@ fn parse_select_group_by() { let sql = "SELECT id, fname, lname FROM customer GROUP BY lname, fname"; let select = verified_only_select(sql); assert_eq!( - GroupByExpr::Expressions(vec![ - Expr::Identifier(Ident::new("lname")), - Expr::Identifier(Ident::new("fname")), - ]), + GroupByExpr::Expressions( + vec![ + Expr::Identifier(Ident::new("lname")), + Expr::Identifier(Ident::new("fname")), + ], + vec![] + ), select.group_by ); @@ -2137,7 +2141,7 @@ fn parse_select_group_by() { fn parse_select_group_by_all() { let sql = "SELECT id, fname, lname, SUM(order) FROM customer GROUP BY ALL"; let select = verified_only_select(sql); - assert_eq!(GroupByExpr::All, select.group_by); + assert_eq!(GroupByExpr::All(vec![]), select.group_by); one_statement_parses_to( "SELECT id, fname, lname, SUM(order) FROM customer GROUP BY ALL", @@ -4545,7 +4549,7 @@ fn test_parse_named_window() { }], lateral_views: vec![], selection: None, - group_by: GroupByExpr::Expressions(vec![]), + group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -4974,7 +4978,7 @@ fn parse_interval_and_or_xor() { }), }), }), - group_by: GroupByExpr::Expressions(vec![]), + group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -6908,7 +6912,7 @@ fn lateral_function() { }], lateral_views: vec![], selection: None, - group_by: GroupByExpr::Expressions(vec![]), + group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -7627,7 +7631,7 @@ fn parse_merge() { }], lateral_views: vec![], selection: None, - group_by: GroupByExpr::Expressions(vec![]), + group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -9133,7 +9137,7 @@ fn parse_unload() { }], lateral_views: vec![], selection: None, - group_by: GroupByExpr::Expressions(vec![]), + group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -9276,7 +9280,7 @@ fn parse_connect_by() { into: None, lateral_views: vec![], selection: None, - group_by: GroupByExpr::Expressions(vec![]), + group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -9364,7 +9368,7 @@ fn parse_connect_by() { op: BinaryOperator::NotEq, right: Box::new(Expr::Value(number("42"))), }), - group_by: GroupByExpr::Expressions(vec![]), + group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -9484,15 +9488,18 @@ fn test_group_by_grouping_sets() { all_dialects_where(|d| d.supports_group_by_expr()) .verified_only_select(sql) .group_by, - GroupByExpr::Expressions(vec![Expr::GroupingSets(vec![ - vec![ - Expr::Identifier(Ident::new("city")), - Expr::Identifier(Ident::new("car_model")) - ], - vec![Expr::Identifier(Ident::new("city")),], - vec![Expr::Identifier(Ident::new("car_model"))], + GroupByExpr::Expressions( + vec![Expr::GroupingSets(vec![ + vec![ + Expr::Identifier(Ident::new("city")), + Expr::Identifier(Ident::new("car_model")) + ], + vec![Expr::Identifier(Ident::new("city")),], + vec![Expr::Identifier(Ident::new("car_model"))], + vec![] + ])], vec![] - ])]) + ) ); } diff --git a/tests/sqlparser_duckdb.rs b/tests/sqlparser_duckdb.rs index 253318b327..948e150c9e 100644 --- a/tests/sqlparser_duckdb.rs +++ b/tests/sqlparser_duckdb.rs @@ -171,7 +171,7 @@ fn test_select_union_by_name() { }], lateral_views: vec![], selection: None, - group_by: GroupByExpr::Expressions(vec![]), + group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -209,7 +209,7 @@ fn test_select_union_by_name() { }], lateral_views: vec![], selection: None, - group_by: GroupByExpr::Expressions(vec![]), + group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], diff --git a/tests/sqlparser_mssql.rs b/tests/sqlparser_mssql.rs index 5f03bb0939..993850299d 100644 --- a/tests/sqlparser_mssql.rs +++ b/tests/sqlparser_mssql.rs @@ -111,7 +111,7 @@ fn parse_create_procedure() { from: vec![], lateral_views: vec![], selection: None, - group_by: GroupByExpr::Expressions(vec![]), + group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -528,7 +528,7 @@ fn parse_substring_in_select() { }], lateral_views: vec![], selection: None, - group_by: GroupByExpr::Expressions(vec![]), + group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index a25f4c2084..4c18d4a755 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -907,7 +907,7 @@ fn parse_escaped_quote_identifiers_with_escape() { from: vec![], lateral_views: vec![], selection: None, - group_by: GroupByExpr::Expressions(vec![]), + group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -954,7 +954,7 @@ fn parse_escaped_quote_identifiers_with_no_escape() { from: vec![], lateral_views: vec![], selection: None, - group_by: GroupByExpr::Expressions(vec![]), + group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -998,7 +998,7 @@ fn parse_escaped_backticks_with_escape() { from: vec![], lateral_views: vec![], selection: None, - group_by: GroupByExpr::Expressions(vec![]), + group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -1042,7 +1042,7 @@ fn parse_escaped_backticks_with_no_escape() { from: vec![], lateral_views: vec![], selection: None, - group_by: GroupByExpr::Expressions(vec![]), + group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -1703,7 +1703,7 @@ fn parse_select_with_numeric_prefix_column_name() { }], lateral_views: vec![], selection: None, - group_by: GroupByExpr::Expressions(vec![]), + group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -1756,7 +1756,7 @@ fn parse_select_with_concatenation_of_exp_number_and_numeric_prefix_column() { }], lateral_views: vec![], selection: None, - group_by: GroupByExpr::Expressions(vec![]), + group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -2255,7 +2255,7 @@ fn parse_substring_in_select() { }], lateral_views: vec![], selection: None, - group_by: GroupByExpr::Expressions(vec![]), + group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -2559,7 +2559,7 @@ fn parse_hex_string_introducer() { from: vec![], lateral_views: vec![], selection: None, - group_by: GroupByExpr::Expressions(vec![]), + group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 197597e9bd..2606fb96e9 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -1075,7 +1075,7 @@ fn parse_copy_to() { from: vec![], lateral_views: vec![], selection: None, - group_by: GroupByExpr::Expressions(vec![]), + group_by: GroupByExpr::Expressions(vec![], vec![]), having: None, named_window: vec![], window_before_qualify: false, @@ -2383,7 +2383,7 @@ fn parse_array_subquery_expr() { from: vec![], lateral_views: vec![], selection: None, - group_by: GroupByExpr::Expressions(vec![]), + group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -2402,7 +2402,7 @@ fn parse_array_subquery_expr() { from: vec![], lateral_views: vec![], selection: None, - group_by: GroupByExpr::Expressions(vec![]), + group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -3711,14 +3711,17 @@ fn parse_select_group_by_grouping_sets() { "SELECT brand, size, sum(sales) FROM items_sold GROUP BY size, GROUPING SETS ((brand), (size), ())" ); assert_eq!( - GroupByExpr::Expressions(vec![ - Expr::Identifier(Ident::new("size")), - Expr::GroupingSets(vec![ - vec![Expr::Identifier(Ident::new("brand"))], - vec![Expr::Identifier(Ident::new("size"))], - vec![], - ]), - ]), + GroupByExpr::Expressions( + vec![ + Expr::Identifier(Ident::new("size")), + Expr::GroupingSets(vec![ + vec![Expr::Identifier(Ident::new("brand"))], + vec![Expr::Identifier(Ident::new("size"))], + vec![], + ]), + ], + vec![] + ), select.group_by ); } @@ -3729,13 +3732,16 @@ fn parse_select_group_by_rollup() { "SELECT brand, size, sum(sales) FROM items_sold GROUP BY size, ROLLUP (brand, size)", ); assert_eq!( - GroupByExpr::Expressions(vec![ - Expr::Identifier(Ident::new("size")), - Expr::Rollup(vec![ - vec![Expr::Identifier(Ident::new("brand"))], - vec![Expr::Identifier(Ident::new("size"))], - ]), - ]), + GroupByExpr::Expressions( + vec![ + Expr::Identifier(Ident::new("size")), + Expr::Rollup(vec![ + vec![Expr::Identifier(Ident::new("brand"))], + vec![Expr::Identifier(Ident::new("size"))], + ]), + ], + vec![] + ), select.group_by ); } @@ -3746,13 +3752,16 @@ fn parse_select_group_by_cube() { "SELECT brand, size, sum(sales) FROM items_sold GROUP BY size, CUBE (brand, size)", ); assert_eq!( - GroupByExpr::Expressions(vec![ - Expr::Identifier(Ident::new("size")), - Expr::Cube(vec![ - vec![Expr::Identifier(Ident::new("brand"))], - vec![Expr::Identifier(Ident::new("size"))], - ]), - ]), + GroupByExpr::Expressions( + vec![ + Expr::Identifier(Ident::new("size")), + Expr::Cube(vec![ + vec![Expr::Identifier(Ident::new("brand"))], + vec![Expr::Identifier(Ident::new("size"))], + ]), + ], + vec![] + ), select.group_by ); } From 700bd03d6f4aa97c5b0901fd399dd3c10114a760 Mon Sep 17 00:00:00 2001 From: hulk Date: Sun, 7 Jul 2024 19:17:43 +0800 Subject: [PATCH 31/53] Support `SETTINGS` pairs for ClickHouse dialect (#1327) Co-authored-by: Ifeanyi Ubah --- src/ast/mod.rs | 2 +- src/ast/query.rs | 21 +++++++++++++++++++ src/keywords.rs | 3 +++ src/parser/mod.rs | 18 +++++++++++++++++ tests/sqlparser_clickhouse.rs | 38 ++++++++++++++++++++++++++++++++++- tests/sqlparser_common.rs | 6 ++++++ tests/sqlparser_mssql.rs | 2 ++ tests/sqlparser_mysql.rs | 15 ++++++++++++++ tests/sqlparser_postgres.rs | 11 +++++++--- 9 files changed, 111 insertions(+), 5 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index c7f461418c..c904d4bc9b 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -48,7 +48,7 @@ pub use self::query::{ MatchRecognizePattern, MatchRecognizeSymbol, Measure, NamedWindowDefinition, NamedWindowExpr, NonBlock, Offset, OffsetRows, OrderByExpr, PivotValueSource, Query, RenameSelectItem, RepetitionQuantifier, ReplaceSelectElement, ReplaceSelectItem, RowsPerMatch, Select, - SelectInto, SelectItem, SetExpr, SetOperator, SetQuantifier, SymbolDefinition, Table, + SelectInto, SelectItem, SetExpr, SetOperator, SetQuantifier, Setting, SymbolDefinition, Table, TableAlias, TableFactor, TableVersion, TableWithJoins, Top, TopQuantity, ValueTableMode, Values, WildcardAdditionalOptions, With, }; diff --git a/src/ast/query.rs b/src/ast/query.rs index d00a0dfcc5..241e45a9c1 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -50,6 +50,10 @@ pub struct Query { /// `FOR JSON { AUTO | PATH } [ , INCLUDE_NULL_VALUES ]` /// (MSSQL-specific) pub for_clause: Option, + /// ClickHouse syntax: `SELECT * FROM t SETTINGS key1 = value1, key2 = value2` + /// + /// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/select#settings-in-select-query) + pub settings: Option>, } impl fmt::Display for Query { @@ -70,6 +74,9 @@ impl fmt::Display for Query { if !self.limit_by.is_empty() { write!(f, " BY {}", display_separated(&self.limit_by, ", "))?; } + if let Some(ref settings) = self.settings { + write!(f, " SETTINGS {}", display_comma_separated(settings))?; + } if let Some(ref fetch) = self.fetch { write!(f, " {fetch}")?; } @@ -828,6 +835,20 @@ impl fmt::Display for ConnectBy { } } +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct Setting { + pub key: Ident, + pub value: Value, +} + +impl fmt::Display for Setting { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{} = {}", self.key, self.value) + } +} + /// An expression optionally followed by an alias. /// /// Example: diff --git a/src/keywords.rs b/src/keywords.rs index 5db55e9da3..cbba92c5be 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -650,6 +650,7 @@ define_keywords!( SESSION_USER, SET, SETS, + SETTINGS, SHARE, SHOW, SIMILAR, @@ -850,6 +851,8 @@ pub const RESERVED_FOR_TABLE_ALIAS: &[Keyword] = &[ Keyword::FOR, // for MYSQL PARTITION SELECTION Keyword::PARTITION, + // for ClickHouse SELECT * FROM t SETTINGS ... + Keyword::SETTINGS, // for Snowflake START WITH .. CONNECT BY Keyword::START, Keyword::CONNECT, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 4e9c3836b3..7614307bf6 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -7871,6 +7871,7 @@ impl<'a> Parser<'a> { fetch: None, locks: vec![], for_clause: None, + settings: None, }) } else if self.parse_keyword(Keyword::UPDATE) { Ok(Query { @@ -7883,6 +7884,7 @@ impl<'a> Parser<'a> { fetch: None, locks: vec![], for_clause: None, + settings: None, }) } else { let body = self.parse_boxed_query_body(0)?; @@ -7928,6 +7930,20 @@ impl<'a> Parser<'a> { vec![] }; + let settings = if dialect_of!(self is ClickHouseDialect|GenericDialect) + && self.parse_keyword(Keyword::SETTINGS) + { + let key_values = self.parse_comma_separated(|p| { + let key = p.parse_identifier(false)?; + p.expect_token(&Token::Eq)?; + let value = p.parse_value()?; + Ok(Setting { key, value }) + })?; + Some(key_values) + } else { + None + }; + let fetch = if self.parse_keyword(Keyword::FETCH) { Some(self.parse_fetch()?) } else { @@ -7955,6 +7971,7 @@ impl<'a> Parser<'a> { fetch, locks, for_clause, + settings, }) } } @@ -9091,6 +9108,7 @@ impl<'a> Parser<'a> { fetch: None, locks: vec![], for_clause: None, + settings: None, }), alias, }) diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index 0c188a24bf..b3e03c4abb 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -21,8 +21,8 @@ use test_utils::*; use sqlparser::ast::Expr::{BinaryOp, Identifier, MapAccess}; use sqlparser::ast::SelectItem::UnnamedExpr; use sqlparser::ast::TableFactor::Table; +use sqlparser::ast::Value::Number; use sqlparser::ast::*; - use sqlparser::dialect::ClickHouseDialect; use sqlparser::dialect::GenericDialect; @@ -549,6 +549,42 @@ fn parse_limit_by() { ); } +#[test] +fn parse_settings_in_query() { + match clickhouse_and_generic() + .verified_stmt(r#"SELECT * FROM t SETTINGS max_threads = 1, max_block_size = 10000"#) + { + Statement::Query(query) => { + assert_eq!( + query.settings, + Some(vec![ + Setting { + key: Ident::new("max_threads"), + value: Number("1".parse().unwrap(), false) + }, + Setting { + key: Ident::new("max_block_size"), + value: Number("10000".parse().unwrap(), false) + }, + ]) + ); + } + _ => unreachable!(), + } + + let invalid_cases = vec![ + "SELECT * FROM t SETTINGS a", + "SELECT * FROM t SETTINGS a=", + "SELECT * FROM t SETTINGS a=1, b", + "SELECT * FROM t SETTINGS a=1, b=", + "SELECT * FROM t SETTINGS a=1, b=c", + ]; + for sql in invalid_cases { + clickhouse_and_generic() + .parse_sql_statements(sql) + .expect_err("Expected: SETTINGS key = value, found: "); + } +} #[test] fn parse_select_star_except() { clickhouse().verified_stmt("SELECT * EXCEPT (prev_status) FROM anomalies"); diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index ac2133946e..609d2600d0 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -413,6 +413,7 @@ fn parse_update_set_from() { fetch: None, locks: vec![], for_clause: None, + settings: None, }), alias: Some(TableAlias { name: Ident::new("t2"), @@ -3427,6 +3428,7 @@ fn parse_create_table_as_table() { fetch: None, locks: vec![], for_clause: None, + settings: None, }); match verified_stmt(sql1) { @@ -3452,6 +3454,7 @@ fn parse_create_table_as_table() { fetch: None, locks: vec![], for_clause: None, + settings: None, }); match verified_stmt(sql2) { @@ -4996,6 +4999,7 @@ fn parse_interval_and_or_xor() { fetch: None, locks: vec![], for_clause: None, + settings: None, }))]; assert_eq!(actual_ast, expected_ast); @@ -7649,6 +7653,7 @@ fn parse_merge() { fetch: None, locks: vec![], for_clause: None, + settings: None, }), alias: Some(TableAlias { name: Ident { @@ -9156,6 +9161,7 @@ fn parse_unload() { locks: vec![], for_clause: None, order_by: vec![], + settings: None, }), to: Ident { value: "s3://...".to_string(), diff --git a/tests/sqlparser_mssql.rs b/tests/sqlparser_mssql.rs index 993850299d..84ab474b08 100644 --- a/tests/sqlparser_mssql.rs +++ b/tests/sqlparser_mssql.rs @@ -103,6 +103,7 @@ fn parse_create_procedure() { locks: vec![], for_clause: None, order_by: vec![], + settings: None, body: Box::new(SetExpr::Select(Box::new(Select { distinct: None, top: None, @@ -546,6 +547,7 @@ fn parse_substring_in_select() { fetch: None, locks: vec![], for_clause: None, + settings: None, }), query ); diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index 4c18d4a755..cf9b717be3 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -925,6 +925,7 @@ fn parse_escaped_quote_identifiers_with_escape() { fetch: None, locks: vec![], for_clause: None, + settings: None, })) ); } @@ -972,6 +973,7 @@ fn parse_escaped_quote_identifiers_with_no_escape() { fetch: None, locks: vec![], for_clause: None, + settings: None, })) ); } @@ -1016,6 +1018,7 @@ fn parse_escaped_backticks_with_escape() { fetch: None, locks: vec![], for_clause: None, + settings: None, })) ); } @@ -1060,6 +1063,7 @@ fn parse_escaped_backticks_with_no_escape() { fetch: None, locks: vec![], for_clause: None, + settings: None, })) ); } @@ -1264,6 +1268,7 @@ fn parse_simple_insert() { fetch: None, locks: vec![], for_clause: None, + settings: None, })), source ); @@ -1306,6 +1311,7 @@ fn parse_ignore_insert() { fetch: None, locks: vec![], for_clause: None, + settings: None, })), source ); @@ -1348,6 +1354,7 @@ fn parse_priority_insert() { fetch: None, locks: vec![], for_clause: None, + settings: None, })), source ); @@ -1387,6 +1394,7 @@ fn parse_priority_insert() { fetch: None, locks: vec![], for_clause: None, + settings: None, })), source ); @@ -1434,6 +1442,7 @@ fn parse_insert_as() { fetch: None, locks: vec![], for_clause: None, + settings: None, })), source ); @@ -1493,6 +1502,7 @@ fn parse_insert_as() { fetch: None, locks: vec![], for_clause: None, + settings: None, })), source ); @@ -1536,6 +1546,7 @@ fn parse_replace_insert() { fetch: None, locks: vec![], for_clause: None, + settings: None, })), source ); @@ -1573,6 +1584,7 @@ fn parse_empty_row_insert() { fetch: None, locks: vec![], for_clause: None, + settings: None, })), source ); @@ -1633,6 +1645,7 @@ fn parse_insert_with_on_duplicate_update() { fetch: None, locks: vec![], for_clause: None, + settings: None, })), source ); @@ -2273,6 +2286,7 @@ fn parse_substring_in_select() { fetch: None, locks: vec![], for_clause: None, + settings: None, }), query ); @@ -2578,6 +2592,7 @@ fn parse_hex_string_introducer() { fetch: None, locks: vec![], for_clause: None, + settings: None, })) ) } diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 2606fb96e9..243116a3f2 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -1093,6 +1093,7 @@ fn parse_copy_to() { fetch: None, locks: vec![], for_clause: None, + settings: None, })), to: true, target: CopyTarget::File { @@ -2421,6 +2422,7 @@ fn parse_array_subquery_expr() { fetch: None, locks: vec![], for_clause: None, + settings: None, })), filter: None, null_treatment: None, @@ -3941,7 +3943,8 @@ fn test_simple_postgres_insert_with_alias() { offset: None, fetch: None, locks: vec![], - for_clause: None + for_clause: None, + settings: None, })), partitioned: None, after_columns: vec![], @@ -4008,7 +4011,8 @@ fn test_simple_postgres_insert_with_alias() { offset: None, fetch: None, locks: vec![], - for_clause: None + for_clause: None, + settings: None, })), partitioned: None, after_columns: vec![], @@ -4071,7 +4075,8 @@ fn test_simple_insert_with_quoted_alias() { offset: None, fetch: None, locks: vec![], - for_clause: None + for_clause: None, + settings: None, })), partitioned: None, after_columns: vec![], From 0884dd920d2a2bbd5c8c67cbf9ed812ce8a1dd5d Mon Sep 17 00:00:00 2001 From: hulk Date: Sun, 7 Jul 2024 20:03:23 +0800 Subject: [PATCH 32/53] Support `PREWHERE` condition for ClickHouse dialect (#1328) --- src/ast/query.rs | 8 ++++++ src/keywords.rs | 3 +++ src/parser/mod.rs | 9 +++++++ tests/sqlparser_clickhouse.rs | 51 +++++++++++++++++++++++++++++++++++ tests/sqlparser_common.rs | 8 ++++++ tests/sqlparser_duckdb.rs | 2 ++ tests/sqlparser_mssql.rs | 2 ++ tests/sqlparser_mysql.rs | 8 ++++++ tests/sqlparser_postgres.rs | 3 +++ 9 files changed, 94 insertions(+) diff --git a/src/ast/query.rs b/src/ast/query.rs index 241e45a9c1..7d2626b2da 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -247,6 +247,11 @@ pub struct Select { pub from: Vec, /// LATERAL VIEWs pub lateral_views: Vec, + /// ClickHouse syntax: `PREWHERE a = 1 WHERE b = 2`, + /// and it can be used together with WHERE selection. + /// + /// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/select/prewhere) + pub prewhere: Option, /// WHERE pub selection: Option, /// GROUP BY @@ -302,6 +307,9 @@ impl fmt::Display for Select { write!(f, "{lv}")?; } } + if let Some(ref prewhere) = self.prewhere { + write!(f, " PREWHERE {prewhere}")?; + } if let Some(ref selection) = self.selection { write!(f, " WHERE {selection}")?; } diff --git a/src/keywords.rs b/src/keywords.rs index cbba92c5be..eb69a209b1 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -558,6 +558,7 @@ define_keywords!( PRECISION, PREPARE, PRESERVE, + PREWHERE, PRIMARY, PRIOR, PRIVILEGES, @@ -851,6 +852,8 @@ pub const RESERVED_FOR_TABLE_ALIAS: &[Keyword] = &[ Keyword::FOR, // for MYSQL PARTITION SELECTION Keyword::PARTITION, + // for Clickhouse PREWHERE + Keyword::PREWHERE, // for ClickHouse SELECT * FROM t SETTINGS ... Keyword::SETTINGS, // for Snowflake START WITH .. CONNECT BY diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 7614307bf6..a81d53e7cc 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -8329,6 +8329,14 @@ impl<'a> Parser<'a> { } } + let prewhere = if dialect_of!(self is ClickHouseDialect|GenericDialect) + && self.parse_keyword(Keyword::PREWHERE) + { + Some(self.parse_expr()?) + } else { + None + }; + let selection = if self.parse_keyword(Keyword::WHERE) { Some(self.parse_expr()?) } else { @@ -8440,6 +8448,7 @@ impl<'a> Parser<'a> { into, from, lateral_views, + prewhere, selection, group_by, cluster_by, diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index b3e03c4abb..29a5b15aa8 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -63,6 +63,7 @@ fn parse_map_access_expr() { joins: vec![], }], lateral_views: vec![], + prewhere: None, selection: Some(BinaryOp { left: Box::new(BinaryOp { left: Box::new(Identifier(Ident::new("id"))), @@ -717,6 +718,56 @@ fn parse_group_by_with_modifier() { } } +#[test] +fn test_prewhere() { + match clickhouse_and_generic().verified_stmt("SELECT * FROM t PREWHERE x = 1 WHERE y = 2") { + Statement::Query(query) => { + let prewhere = query.body.as_select().unwrap().prewhere.as_ref(); + assert_eq!( + prewhere, + Some(&BinaryOp { + left: Box::new(Identifier(Ident::new("x"))), + op: BinaryOperator::Eq, + right: Box::new(Expr::Value(Value::Number("1".parse().unwrap(), false))), + }) + ); + let selection = query.as_ref().body.as_select().unwrap().selection.as_ref(); + assert_eq!( + selection, + Some(&BinaryOp { + left: Box::new(Identifier(Ident::new("y"))), + op: BinaryOperator::Eq, + right: Box::new(Expr::Value(Value::Number("2".parse().unwrap(), false))), + }) + ); + } + _ => unreachable!(), + } + + match clickhouse_and_generic().verified_stmt("SELECT * FROM t PREWHERE x = 1 AND y = 2") { + Statement::Query(query) => { + let prewhere = query.body.as_select().unwrap().prewhere.as_ref(); + assert_eq!( + prewhere, + Some(&BinaryOp { + left: Box::new(BinaryOp { + left: Box::new(Identifier(Ident::new("x"))), + op: BinaryOperator::Eq, + right: Box::new(Expr::Value(Value::Number("1".parse().unwrap(), false))), + }), + op: BinaryOperator::And, + right: Box::new(BinaryOp { + left: Box::new(Identifier(Ident::new("y"))), + op: BinaryOperator::Eq, + right: Box::new(Expr::Value(Value::Number("2".parse().unwrap(), false))), + }), + }) + ); + } + _ => unreachable!(), + } +} + fn clickhouse() -> TestedDialects { TestedDialects { dialects: vec![Box::new(ClickHouseDialect {})], diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 609d2600d0..256680b3e1 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -391,6 +391,7 @@ fn parse_update_set_from() { joins: vec![], }], lateral_views: vec![], + prewhere: None, selection: None, group_by: GroupByExpr::Expressions( vec![Expr::Identifier(Ident::new("id"))], @@ -4551,6 +4552,7 @@ fn test_parse_named_window() { joins: vec![], }], lateral_views: vec![], + prewhere: None, selection: None, group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], @@ -4932,6 +4934,7 @@ fn parse_interval_and_or_xor() { joins: vec![], }], lateral_views: vec![], + prewhere: None, selection: Some(Expr::BinaryOp { left: Box::new(Expr::BinaryOp { left: Box::new(Expr::Identifier(Ident { @@ -6915,6 +6918,7 @@ fn lateral_function() { }], }], lateral_views: vec![], + prewhere: None, selection: None, group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], @@ -7634,6 +7638,7 @@ fn parse_merge() { joins: vec![], }], lateral_views: vec![], + prewhere: None, selection: None, group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], @@ -9141,6 +9146,7 @@ fn parse_unload() { joins: vec![], }], lateral_views: vec![], + prewhere: None, selection: None, group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], @@ -9285,6 +9291,7 @@ fn parse_connect_by() { }], into: None, lateral_views: vec![], + prewhere: None, selection: None, group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], @@ -9369,6 +9376,7 @@ fn parse_connect_by() { }], into: None, lateral_views: vec![], + prewhere: None, selection: Some(Expr::BinaryOp { left: Box::new(Expr::Identifier(Ident::new("employee_id"))), op: BinaryOperator::NotEq, diff --git a/tests/sqlparser_duckdb.rs b/tests/sqlparser_duckdb.rs index 948e150c9e..400daa8a89 100644 --- a/tests/sqlparser_duckdb.rs +++ b/tests/sqlparser_duckdb.rs @@ -170,6 +170,7 @@ fn test_select_union_by_name() { joins: vec![], }], lateral_views: vec![], + prewhere: None, selection: None, group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], @@ -208,6 +209,7 @@ fn test_select_union_by_name() { joins: vec![], }], lateral_views: vec![], + prewhere: None, selection: None, group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], diff --git a/tests/sqlparser_mssql.rs b/tests/sqlparser_mssql.rs index 84ab474b08..e0e0f7c700 100644 --- a/tests/sqlparser_mssql.rs +++ b/tests/sqlparser_mssql.rs @@ -111,6 +111,7 @@ fn parse_create_procedure() { into: None, from: vec![], lateral_views: vec![], + prewhere: None, selection: None, group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], @@ -528,6 +529,7 @@ fn parse_substring_in_select() { joins: vec![] }], lateral_views: vec![], + prewhere: None, selection: None, group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index cf9b717be3..a5fa752001 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -906,6 +906,7 @@ fn parse_escaped_quote_identifiers_with_escape() { into: None, from: vec![], lateral_views: vec![], + prewhere: None, selection: None, group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], @@ -954,6 +955,7 @@ fn parse_escaped_quote_identifiers_with_no_escape() { into: None, from: vec![], lateral_views: vec![], + prewhere: None, selection: None, group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], @@ -999,6 +1001,7 @@ fn parse_escaped_backticks_with_escape() { into: None, from: vec![], lateral_views: vec![], + prewhere: None, selection: None, group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], @@ -1044,6 +1047,7 @@ fn parse_escaped_backticks_with_no_escape() { into: None, from: vec![], lateral_views: vec![], + prewhere: None, selection: None, group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], @@ -1715,6 +1719,7 @@ fn parse_select_with_numeric_prefix_column_name() { joins: vec![] }], lateral_views: vec![], + prewhere: None, selection: None, group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], @@ -1768,6 +1773,7 @@ fn parse_select_with_concatenation_of_exp_number_and_numeric_prefix_column() { joins: vec![] }], lateral_views: vec![], + prewhere: None, selection: None, group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], @@ -2267,6 +2273,7 @@ fn parse_substring_in_select() { joins: vec![] }], lateral_views: vec![], + prewhere: None, selection: None, group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], @@ -2572,6 +2579,7 @@ fn parse_hex_string_introducer() { })], from: vec![], lateral_views: vec![], + prewhere: None, selection: None, group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 243116a3f2..2d3097cf9d 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -1074,6 +1074,7 @@ fn parse_copy_to() { into: None, from: vec![], lateral_views: vec![], + prewhere: None, selection: None, group_by: GroupByExpr::Expressions(vec![], vec![]), having: None, @@ -2383,6 +2384,7 @@ fn parse_array_subquery_expr() { into: None, from: vec![], lateral_views: vec![], + prewhere: None, selection: None, group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], @@ -2402,6 +2404,7 @@ fn parse_array_subquery_expr() { into: None, from: vec![], lateral_views: vec![], + prewhere: None, selection: None, group_by: GroupByExpr::Expressions(vec![], vec![]), cluster_by: vec![], From f77192d4ec19c47c90654aa6514a7e63b0d67a0b Mon Sep 17 00:00:00 2001 From: Mohamed Abdeen <83442793+MohamedAbdeen21@users.noreply.github.com> Date: Mon, 8 Jul 2024 13:31:33 +0300 Subject: [PATCH 33/53] Re-enable trailing commas in DCL (#1318) --- src/parser/mod.rs | 39 +++++++++++++++++++++++++++++---------- tests/sqlparser_common.rs | 12 ++++++++++++ 2 files changed, 41 insertions(+), 10 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index a81d53e7cc..1dc6bff5ef 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -46,6 +46,9 @@ pub enum ParserError { RecursionLimitExceeded, } +// avoid clippy type_complexity warnings +type ParsedAction = (Keyword, Option>); + // Use `Parser::expected` instead, if possible macro_rules! parser_err { ($MSG:expr, $loc:expr) => { @@ -3334,6 +3337,29 @@ impl<'a> Parser<'a> { ret } + pub fn parse_actions_list(&mut self) -> Result, ParserError> { + let mut values = vec![]; + loop { + values.push(self.parse_grant_permission()?); + if !self.consume_token(&Token::Comma) { + break; + } else if self.options.trailing_commas { + match self.peek_token().token { + Token::Word(kw) if kw.keyword == Keyword::ON => { + break; + } + Token::RParen + | Token::SemiColon + | Token::EOF + | Token::RBracket + | Token::RBrace => break, + _ => continue, + } + } + } + Ok(values) + } + /// Parse a comma-separated list of 1+ items accepted by `F` pub fn parse_comma_separated(&mut self, mut f: F) -> Result, ParserError> where @@ -3347,9 +3373,7 @@ impl<'a> Parser<'a> { } else if self.options.trailing_commas { match self.peek_token().token { Token::Word(kw) - if keywords::RESERVED_FOR_COLUMN_ALIAS - .iter() - .any(|d| kw.keyword == *d) => + if keywords::RESERVED_FOR_COLUMN_ALIAS.contains(&kw.keyword) => { break; } @@ -9680,11 +9704,8 @@ impl<'a> Parser<'a> { with_privileges_keyword: self.parse_keyword(Keyword::PRIVILEGES), } } else { - let old_value = self.options.trailing_commas; - self.options.trailing_commas = false; - let (actions, err): (Vec<_>, Vec<_>) = self - .parse_comma_separated(Parser::parse_grant_permission)? + .parse_actions_list()? .into_iter() .map(|(kw, columns)| match kw { Keyword::DELETE => Ok(Action::Delete), @@ -9706,8 +9727,6 @@ impl<'a> Parser<'a> { }) .partition(Result::is_ok); - self.options.trailing_commas = old_value; - if !err.is_empty() { let errors: Vec = err.into_iter().filter_map(|x| x.err()).collect(); return Err(ParserError::ParserError(format!( @@ -9753,7 +9772,7 @@ impl<'a> Parser<'a> { Ok((privileges, objects)) } - pub fn parse_grant_permission(&mut self) -> Result<(Keyword, Option>), ParserError> { + pub fn parse_grant_permission(&mut self) -> Result { if let Some(kw) = self.parse_one_of_keywords(&[ Keyword::CONNECT, Keyword::CREATE, diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 256680b3e1..132874aa9d 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -8942,6 +8942,11 @@ fn parse_trailing_comma() { "CREATE TABLE employees (name TEXT, age INT)", ); + trailing_commas.one_statement_parses_to( + "GRANT USAGE, SELECT, INSERT, ON p TO u", + "GRANT USAGE, SELECT, INSERT ON p TO u", + ); + trailing_commas.verified_stmt("SELECT album_id, name FROM track"); trailing_commas.verified_stmt("SELECT * FROM track ORDER BY milliseconds"); @@ -8961,6 +8966,13 @@ fn parse_trailing_comma() { ParserError::ParserError("Expected an expression, found: from".to_string()) ); + assert_eq!( + trailing_commas + .parse_sql_statements("REVOKE USAGE, SELECT, ON p TO u") + .unwrap_err(), + ParserError::ParserError("Expected a privilege keyword, found: ON".to_string()) + ); + assert_eq!( trailing_commas .parse_sql_statements("CREATE TABLE employees (name text, age int,)") From 66b4ec8486a18d2f542d6b83450d421ceca6572c Mon Sep 17 00:00:00 2001 From: Leonardo Yvens Date: Mon, 8 Jul 2024 11:32:45 +0100 Subject: [PATCH 34/53] Fix typo in `sqlparser-derive` README (#1310) --- derive/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/derive/README.md b/derive/README.md index ad4978a89c..ffb5d266ef 100644 --- a/derive/README.md +++ b/derive/README.md @@ -97,7 +97,7 @@ impl Visit for TableFactor { match self { Self::Table { name, alias } => { visitor.pre_visit_relation(name)?; - alias.visit(name)?; + name.visit(visitor)?; visitor.post_visit_relation(name)?; alias.visit(visitor)?; } From 17e5c0c1b6c3c52e5ffd0d2caa4aad7bd7d35958 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Mon, 8 Jul 2024 07:37:00 -0400 Subject: [PATCH 35/53] Fix CI error message in CI (#1333) --- tests/sqlparser_common.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 132874aa9d..2b208016a3 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -8970,7 +8970,7 @@ fn parse_trailing_comma() { trailing_commas .parse_sql_statements("REVOKE USAGE, SELECT, ON p TO u") .unwrap_err(), - ParserError::ParserError("Expected a privilege keyword, found: ON".to_string()) + ParserError::ParserError("Expected: a privilege keyword, found: ON".to_string()) ); assert_eq!( From bbee052890bb3eb64fe3e9fc20ad70ca06df3c5f Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Mon, 8 Jul 2024 14:38:59 -0400 Subject: [PATCH 36/53] Add stale PR github workflow (#1331) --- .github/workflows/stale.yml | 38 +++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 .github/workflows/stale.yml diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml new file mode 100644 index 0000000000..2312526824 --- /dev/null +++ b/.github/workflows/stale.yml @@ -0,0 +1,38 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: "Close stale PRs" +on: + schedule: + - cron: "30 1 * * *" + +jobs: + close-stale-prs: + runs-on: ubuntu-latest + permissions: + issues: write + pull-requests: write + steps: + - uses: actions/stale@v9 + with: + stale-pr-message: "Thank you for your contribution. Unfortunately, this pull request is stale because it has been open 60 days with no activity. Please remove the stale label or comment or this will be closed in 7 days." + days-before-pr-stale: 60 + days-before-pr-close: 7 + # do not close stale issues + days-before-issue-stale: -1 + days-before-issue-close: -1 + repo-token: ${{ secrets.GITHUB_TOKEN }} From 9f60eb1571c4513140cb9a95bd107e26fcf6c7be Mon Sep 17 00:00:00 2001 From: Lorrens Pantelis <100197010+LorrensP-2158466@users.noreply.github.com> Date: Tue, 9 Jul 2024 13:46:49 +0200 Subject: [PATCH 37/53] Support `DROP PROCEDURE` statement (#1324) Co-authored-by: Andrew Lamb --- src/ast/mod.rs | 26 +++++++++ src/parser/mod.rs | 24 ++++++++- tests/sqlparser_postgres.rs | 102 ++++++++++++++++++++++++++++++++++++ 3 files changed, 151 insertions(+), 1 deletion(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index c904d4bc9b..beee9f4bc9 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -2215,6 +2215,16 @@ pub enum Statement { option: Option, }, /// ```sql + /// DROP PROCEDURE + /// ``` + DropProcedure { + if_exists: bool, + /// One or more function to drop + proc_desc: Vec, + /// `CASCADE` or `RESTRICT` + option: Option, + }, + /// ```sql /// DROP SECRET /// ``` DropSecret { @@ -3644,6 +3654,22 @@ impl fmt::Display for Statement { } Ok(()) } + Statement::DropProcedure { + if_exists, + proc_desc, + option, + } => { + write!( + f, + "DROP PROCEDURE{} {}", + if *if_exists { " IF EXISTS" } else { "" }, + display_comma_separated(proc_desc), + )?; + if let Some(op) = option { + write!(f, " {op}")?; + } + Ok(()) + } Statement::DropSecret { if_exists, temporary, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 1dc6bff5ef..a88cfcb9c5 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -4524,11 +4524,13 @@ impl<'a> Parser<'a> { ObjectType::Stage } else if self.parse_keyword(Keyword::FUNCTION) { return self.parse_drop_function(); + } else if self.parse_keyword(Keyword::PROCEDURE) { + return self.parse_drop_procedure(); } else if self.parse_keyword(Keyword::SECRET) { return self.parse_drop_secret(temporary, persistent); } else { return self.expected( - "TABLE, VIEW, INDEX, ROLE, SCHEMA, FUNCTION, STAGE or SEQUENCE after DROP", + "TABLE, VIEW, INDEX, ROLE, SCHEMA, FUNCTION, PROCEDURE, STAGE or SEQUENCE after DROP", self.peek_token(), ); }; @@ -4580,6 +4582,26 @@ impl<'a> Parser<'a> { }) } + /// ```sql + /// DROP PROCEDURE [ IF EXISTS ] name [ ( [ [ argmode ] [ argname ] argtype [, ...] ] ) ] [, ...] + /// [ CASCADE | RESTRICT ] + /// ``` + fn parse_drop_procedure(&mut self) -> Result { + let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); + let proc_desc = self.parse_comma_separated(Parser::parse_drop_function_desc)?; + let option = match self.parse_one_of_keywords(&[Keyword::CASCADE, Keyword::RESTRICT]) { + Some(Keyword::CASCADE) => Some(ReferentialAction::Cascade), + Some(Keyword::RESTRICT) => Some(ReferentialAction::Restrict), + Some(_) => unreachable!(), // parse_one_of_keywords does not return other keywords + None => None, + }; + Ok(Statement::DropProcedure { + if_exists, + proc_desc, + option, + }) + } + fn parse_drop_function_desc(&mut self) -> Result { let name = self.parse_object_name(false)?; diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 2d3097cf9d..2da82c1225 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -3629,6 +3629,108 @@ fn parse_drop_function() { ); } +#[test] +fn parse_drop_procedure() { + let sql = "DROP PROCEDURE IF EXISTS test_proc"; + assert_eq!( + pg().verified_stmt(sql), + Statement::DropProcedure { + if_exists: true, + proc_desc: vec![DropFunctionDesc { + name: ObjectName(vec![Ident { + value: "test_proc".to_string(), + quote_style: None + }]), + args: None + }], + option: None + } + ); + + let sql = "DROP PROCEDURE IF EXISTS test_proc(a INTEGER, IN b INTEGER = 1)"; + assert_eq!( + pg().verified_stmt(sql), + Statement::DropProcedure { + if_exists: true, + proc_desc: vec![DropFunctionDesc { + name: ObjectName(vec![Ident { + value: "test_proc".to_string(), + quote_style: None + }]), + args: Some(vec![ + OperateFunctionArg::with_name("a", DataType::Integer(None)), + OperateFunctionArg { + mode: Some(ArgMode::In), + name: Some("b".into()), + data_type: DataType::Integer(None), + default_expr: Some(Expr::Value(Value::Number("1".parse().unwrap(), false))), + } + ]), + }], + option: None + } + ); + + let sql = "DROP PROCEDURE IF EXISTS test_proc1(a INTEGER, IN b INTEGER = 1), test_proc2(a VARCHAR, IN b INTEGER = 1)"; + assert_eq!( + pg().verified_stmt(sql), + Statement::DropProcedure { + if_exists: true, + proc_desc: vec![ + DropFunctionDesc { + name: ObjectName(vec![Ident { + value: "test_proc1".to_string(), + quote_style: None + }]), + args: Some(vec![ + OperateFunctionArg::with_name("a", DataType::Integer(None)), + OperateFunctionArg { + mode: Some(ArgMode::In), + name: Some("b".into()), + data_type: DataType::Integer(None), + default_expr: Some(Expr::Value(Value::Number( + "1".parse().unwrap(), + false + ))), + } + ]), + }, + DropFunctionDesc { + name: ObjectName(vec![Ident { + value: "test_proc2".to_string(), + quote_style: None + }]), + args: Some(vec![ + OperateFunctionArg::with_name("a", DataType::Varchar(None)), + OperateFunctionArg { + mode: Some(ArgMode::In), + name: Some("b".into()), + data_type: DataType::Integer(None), + default_expr: Some(Expr::Value(Value::Number( + "1".parse().unwrap(), + false + ))), + } + ]), + } + ], + option: None + } + ); + + let res = pg().parse_sql_statements("DROP PROCEDURE testproc DROP"); + assert_eq!( + ParserError::ParserError("Expected: end of statement, found: DROP".to_string()), + res.unwrap_err() + ); + + let res = pg().parse_sql_statements("DROP PROCEDURE testproc SET NULL"); + assert_eq!( + ParserError::ParserError("Expected: end of statement, found: SET".to_string()), + res.unwrap_err() + ); +} + #[test] fn parse_dollar_quoted_string() { let sql = "SELECT $$hello$$, $tag_name$world$tag_name$, $$Foo$Bar$$, $$Foo$Bar$$col_name, $$$$, $tag_name$$tag_name$"; From 07278952f9ba9c717652ae463febf14db13777ce Mon Sep 17 00:00:00 2001 From: hulk Date: Tue, 9 Jul 2024 19:49:04 +0800 Subject: [PATCH 38/53] Add support of FORMAT clause for ClickHouse parser (#1335) --- src/ast/mod.rs | 16 ++++++++-------- src/ast/query.rs | 28 ++++++++++++++++++++++++++++ src/keywords.rs | 2 ++ src/parser/mod.rs | 16 ++++++++++++++++ tests/sqlparser_clickhouse.rs | 32 ++++++++++++++++++++++++++++++++ tests/sqlparser_common.rs | 6 ++++++ tests/sqlparser_mssql.rs | 2 ++ tests/sqlparser_mysql.rs | 15 +++++++++++++++ tests/sqlparser_postgres.rs | 5 +++++ 9 files changed, 114 insertions(+), 8 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index beee9f4bc9..58f0944110 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -43,14 +43,14 @@ pub use self::operator::{BinaryOperator, UnaryOperator}; pub use self::query::{ AfterMatchSkip, ConnectBy, Cte, CteAsMaterialized, Distinct, EmptyMatchesMode, ExceptSelectItem, ExcludeSelectItem, ExprWithAlias, Fetch, ForClause, ForJson, ForXml, - GroupByExpr, GroupByWithModifier, IdentWithAlias, IlikeSelectItem, Join, JoinConstraint, - JoinOperator, JsonTableColumn, JsonTableColumnErrorHandling, LateralView, LockClause, LockType, - MatchRecognizePattern, MatchRecognizeSymbol, Measure, NamedWindowDefinition, NamedWindowExpr, - NonBlock, Offset, OffsetRows, OrderByExpr, PivotValueSource, Query, RenameSelectItem, - RepetitionQuantifier, ReplaceSelectElement, ReplaceSelectItem, RowsPerMatch, Select, - SelectInto, SelectItem, SetExpr, SetOperator, SetQuantifier, Setting, SymbolDefinition, Table, - TableAlias, TableFactor, TableVersion, TableWithJoins, Top, TopQuantity, ValueTableMode, - Values, WildcardAdditionalOptions, With, + FormatClause, GroupByExpr, GroupByWithModifier, IdentWithAlias, IlikeSelectItem, Join, + JoinConstraint, JoinOperator, JsonTableColumn, JsonTableColumnErrorHandling, LateralView, + LockClause, LockType, MatchRecognizePattern, MatchRecognizeSymbol, Measure, + NamedWindowDefinition, NamedWindowExpr, NonBlock, Offset, OffsetRows, OrderByExpr, + PivotValueSource, Query, RenameSelectItem, RepetitionQuantifier, ReplaceSelectElement, + ReplaceSelectItem, RowsPerMatch, Select, SelectInto, SelectItem, SetExpr, SetOperator, + SetQuantifier, Setting, SymbolDefinition, Table, TableAlias, TableFactor, TableVersion, + TableWithJoins, Top, TopQuantity, ValueTableMode, Values, WildcardAdditionalOptions, With, }; pub use self::value::{ escape_double_quote_string, escape_quoted_string, DateTimeField, DollarQuotedString, diff --git a/src/ast/query.rs b/src/ast/query.rs index 7d2626b2da..70c781409b 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -54,6 +54,11 @@ pub struct Query { /// /// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/select#settings-in-select-query) pub settings: Option>, + /// `SELECT * FROM t FORMAT JSONCompact` + /// + /// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/select/format) + /// (ClickHouse-specific) + pub format_clause: Option, } impl fmt::Display for Query { @@ -86,6 +91,9 @@ impl fmt::Display for Query { if let Some(ref for_clause) = self.for_clause { write!(f, " {}", for_clause)?; } + if let Some(ref format) = self.format_clause { + write!(f, " {}", format)?; + } Ok(()) } } @@ -1959,6 +1967,26 @@ impl fmt::Display for GroupByExpr { } } +/// FORMAT identifier or FORMAT NULL clause, specific to ClickHouse. +/// +/// [ClickHouse]: +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum FormatClause { + Identifier(Ident), + Null, +} + +impl fmt::Display for FormatClause { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + FormatClause::Identifier(ident) => write!(f, "FORMAT {}", ident), + FormatClause::Null => write!(f, "FORMAT NULL"), + } + } +} + /// FOR XML or FOR JSON clause, specific to MSSQL /// (formats the output of a query as XML or JSON) #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] diff --git a/src/keywords.rs b/src/keywords.rs index eb69a209b1..edd3271f39 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -856,6 +856,8 @@ pub const RESERVED_FOR_TABLE_ALIAS: &[Keyword] = &[ Keyword::PREWHERE, // for ClickHouse SELECT * FROM t SETTINGS ... Keyword::SETTINGS, + // for ClickHouse SELECT * FROM t FORMAT... + Keyword::FORMAT, // for Snowflake START WITH .. CONNECT BY Keyword::START, Keyword::CONNECT, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index a88cfcb9c5..aada0bc56d 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -7918,6 +7918,7 @@ impl<'a> Parser<'a> { locks: vec![], for_clause: None, settings: None, + format_clause: None, }) } else if self.parse_keyword(Keyword::UPDATE) { Ok(Query { @@ -7931,6 +7932,7 @@ impl<'a> Parser<'a> { locks: vec![], for_clause: None, settings: None, + format_clause: None, }) } else { let body = self.parse_boxed_query_body(0)?; @@ -8006,6 +8008,18 @@ impl<'a> Parser<'a> { locks.push(self.parse_lock()?); } } + let format_clause = if dialect_of!(self is ClickHouseDialect | GenericDialect) + && self.parse_keyword(Keyword::FORMAT) + { + if self.parse_keyword(Keyword::NULL) { + Some(FormatClause::Null) + } else { + let ident = self.parse_identifier(false)?; + Some(FormatClause::Identifier(ident)) + } + } else { + None + }; Ok(Query { with, @@ -8018,6 +8032,7 @@ impl<'a> Parser<'a> { locks, for_clause, settings, + format_clause, }) } } @@ -9164,6 +9179,7 @@ impl<'a> Parser<'a> { locks: vec![], for_clause: None, settings: None, + format_clause: None, }), alias, }) diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index 29a5b15aa8..f6b787f5c2 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -768,6 +768,38 @@ fn test_prewhere() { } } +#[test] +fn test_query_with_format_clause() { + let format_options = vec!["TabSeparated", "JSONCompact", "NULL"]; + for format in &format_options { + let sql = format!("SELECT * FROM t FORMAT {}", format); + match clickhouse_and_generic().verified_stmt(&sql) { + Statement::Query(query) => { + if *format == "NULL" { + assert_eq!(query.format_clause, Some(FormatClause::Null)); + } else { + assert_eq!( + query.format_clause, + Some(FormatClause::Identifier(Ident::new(*format))) + ); + } + } + _ => unreachable!(), + } + } + + let invalid_cases = [ + "SELECT * FROM t FORMAT", + "SELECT * FROM t FORMAT TabSeparated JSONCompact", + "SELECT * FROM t FORMAT TabSeparated TabSeparated", + ]; + for sql in &invalid_cases { + clickhouse_and_generic() + .parse_sql_statements(sql) + .expect_err("Expected: FORMAT {identifier}, found: "); + } +} + fn clickhouse() -> TestedDialects { TestedDialects { dialects: vec![Box::new(ClickHouseDialect {})], diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 2b208016a3..86357234c8 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -415,6 +415,7 @@ fn parse_update_set_from() { locks: vec![], for_clause: None, settings: None, + format_clause: None, }), alias: Some(TableAlias { name: Ident::new("t2"), @@ -3430,6 +3431,7 @@ fn parse_create_table_as_table() { locks: vec![], for_clause: None, settings: None, + format_clause: None, }); match verified_stmt(sql1) { @@ -3456,6 +3458,7 @@ fn parse_create_table_as_table() { locks: vec![], for_clause: None, settings: None, + format_clause: None, }); match verified_stmt(sql2) { @@ -5003,6 +5006,7 @@ fn parse_interval_and_or_xor() { locks: vec![], for_clause: None, settings: None, + format_clause: None, }))]; assert_eq!(actual_ast, expected_ast); @@ -7659,6 +7663,7 @@ fn parse_merge() { locks: vec![], for_clause: None, settings: None, + format_clause: None, }), alias: Some(TableAlias { name: Ident { @@ -9180,6 +9185,7 @@ fn parse_unload() { for_clause: None, order_by: vec![], settings: None, + format_clause: None, }), to: Ident { value: "s3://...".to_string(), diff --git a/tests/sqlparser_mssql.rs b/tests/sqlparser_mssql.rs index e0e0f7c700..6968347ece 100644 --- a/tests/sqlparser_mssql.rs +++ b/tests/sqlparser_mssql.rs @@ -104,6 +104,7 @@ fn parse_create_procedure() { for_clause: None, order_by: vec![], settings: None, + format_clause: None, body: Box::new(SetExpr::Select(Box::new(Select { distinct: None, top: None, @@ -550,6 +551,7 @@ fn parse_substring_in_select() { locks: vec![], for_clause: None, settings: None, + format_clause: None, }), query ); diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index a5fa752001..74def31bf2 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -927,6 +927,7 @@ fn parse_escaped_quote_identifiers_with_escape() { locks: vec![], for_clause: None, settings: None, + format_clause: None, })) ); } @@ -976,6 +977,7 @@ fn parse_escaped_quote_identifiers_with_no_escape() { locks: vec![], for_clause: None, settings: None, + format_clause: None, })) ); } @@ -1022,6 +1024,7 @@ fn parse_escaped_backticks_with_escape() { locks: vec![], for_clause: None, settings: None, + format_clause: None, })) ); } @@ -1068,6 +1071,7 @@ fn parse_escaped_backticks_with_no_escape() { locks: vec![], for_clause: None, settings: None, + format_clause: None, })) ); } @@ -1273,6 +1277,7 @@ fn parse_simple_insert() { locks: vec![], for_clause: None, settings: None, + format_clause: None, })), source ); @@ -1316,6 +1321,7 @@ fn parse_ignore_insert() { locks: vec![], for_clause: None, settings: None, + format_clause: None, })), source ); @@ -1359,6 +1365,7 @@ fn parse_priority_insert() { locks: vec![], for_clause: None, settings: None, + format_clause: None, })), source ); @@ -1399,6 +1406,7 @@ fn parse_priority_insert() { locks: vec![], for_clause: None, settings: None, + format_clause: None, })), source ); @@ -1447,6 +1455,7 @@ fn parse_insert_as() { locks: vec![], for_clause: None, settings: None, + format_clause: None, })), source ); @@ -1507,6 +1516,7 @@ fn parse_insert_as() { locks: vec![], for_clause: None, settings: None, + format_clause: None, })), source ); @@ -1551,6 +1561,7 @@ fn parse_replace_insert() { locks: vec![], for_clause: None, settings: None, + format_clause: None, })), source ); @@ -1589,6 +1600,7 @@ fn parse_empty_row_insert() { locks: vec![], for_clause: None, settings: None, + format_clause: None, })), source ); @@ -1650,6 +1662,7 @@ fn parse_insert_with_on_duplicate_update() { locks: vec![], for_clause: None, settings: None, + format_clause: None, })), source ); @@ -2294,6 +2307,7 @@ fn parse_substring_in_select() { locks: vec![], for_clause: None, settings: None, + format_clause: None, }), query ); @@ -2601,6 +2615,7 @@ fn parse_hex_string_introducer() { locks: vec![], for_clause: None, settings: None, + format_clause: None, })) ) } diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 2da82c1225..74f70a6e57 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -1095,6 +1095,7 @@ fn parse_copy_to() { locks: vec![], for_clause: None, settings: None, + format_clause: None, })), to: true, target: CopyTarget::File { @@ -2426,6 +2427,7 @@ fn parse_array_subquery_expr() { locks: vec![], for_clause: None, settings: None, + format_clause: None, })), filter: None, null_treatment: None, @@ -4050,6 +4052,7 @@ fn test_simple_postgres_insert_with_alias() { locks: vec![], for_clause: None, settings: None, + format_clause: None, })), partitioned: None, after_columns: vec![], @@ -4118,6 +4121,7 @@ fn test_simple_postgres_insert_with_alias() { locks: vec![], for_clause: None, settings: None, + format_clause: None, })), partitioned: None, after_columns: vec![], @@ -4182,6 +4186,7 @@ fn test_simple_insert_with_quoted_alias() { locks: vec![], for_clause: None, settings: None, + format_clause: None, })), partitioned: None, after_columns: vec![], From 32b8276b328ad014cdfbeb85d1618bb0b25c7130 Mon Sep 17 00:00:00 2001 From: gai takano Date: Tue, 9 Jul 2024 20:49:48 +0900 Subject: [PATCH 39/53] Postgres: support for `OWNER TO` clause (#1314) Co-authored-by: Andrew Lamb --- src/ast/ddl.rs | 29 +++++++++++++++ src/ast/mod.rs | 4 +-- src/keywords.rs | 1 + src/parser/mod.rs | 19 ++++++++++ tests/sqlparser_postgres.rs | 72 +++++++++++++++++++++++++++++++++++++ 5 files changed, 123 insertions(+), 2 deletions(-) diff --git a/src/ast/ddl.rs b/src/ast/ddl.rs index 9c30999ab1..1ed3857d78 100644 --- a/src/ast/ddl.rs +++ b/src/ast/ddl.rs @@ -157,6 +157,32 @@ pub enum AlterTableOperation { SwapWith { table_name: ObjectName }, /// 'SET TBLPROPERTIES ( { property_key [ = ] property_val } [, ...] )' SetTblProperties { table_properties: Vec }, + + /// `OWNER TO { | CURRENT_ROLE | CURRENT_USER | SESSION_USER }` + /// + /// Note: this is PostgreSQL-specific + OwnerTo { new_owner: Owner }, +} + +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum Owner { + Ident(Ident), + CurrentRole, + CurrentUser, + SessionUser, +} + +impl fmt::Display for Owner { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Owner::Ident(ident) => write!(f, "{}", ident), + Owner::CurrentRole => write!(f, "CURRENT_ROLE"), + Owner::CurrentUser => write!(f, "CURRENT_USER"), + Owner::SessionUser => write!(f, "SESSION_USER"), + } + } } #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] @@ -322,6 +348,9 @@ impl fmt::Display for AlterTableOperation { AlterTableOperation::SwapWith { table_name } => { write!(f, "SWAP WITH {table_name}") } + AlterTableOperation::OwnerTo { new_owner } => { + write!(f, "OWNER TO {new_owner}") + } AlterTableOperation::SetTblProperties { table_properties } => { write!( f, diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 58f0944110..b8d72e2338 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -34,8 +34,8 @@ pub use self::dcl::{AlterRoleOperation, ResetConfig, RoleOption, SetConfigValue} pub use self::ddl::{ AlterColumnOperation, AlterIndexOperation, AlterTableOperation, ColumnDef, ColumnOption, ColumnOptionDef, ConstraintCharacteristics, DeferrableInitial, GeneratedAs, - GeneratedExpressionMode, IndexOption, IndexType, KeyOrIndexDisplay, Partition, ProcedureParam, - ReferentialAction, TableConstraint, UserDefinedTypeCompositeAttributeDef, + GeneratedExpressionMode, IndexOption, IndexType, KeyOrIndexDisplay, Owner, Partition, + ProcedureParam, ReferentialAction, TableConstraint, UserDefinedTypeCompositeAttributeDef, UserDefinedTypeRepresentation, ViewColumnDef, }; pub use self::dml::{CreateIndex, CreateTable, Delete, Insert}; diff --git a/src/keywords.rs b/src/keywords.rs index edd3271f39..7146c4efe1 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -527,6 +527,7 @@ define_keywords!( OVERLAY, OVERWRITE, OWNED, + OWNER, PARALLEL, PARAMETER, PARQUET, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index aada0bc56d..87166f5038 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -6447,6 +6447,25 @@ impl<'a> Parser<'a> { self.expect_keyword(Keyword::WITH)?; let table_name = self.parse_object_name(false)?; AlterTableOperation::SwapWith { table_name } + } else if dialect_of!(self is PostgreSqlDialect | GenericDialect) + && self.parse_keywords(&[Keyword::OWNER, Keyword::TO]) + { + let new_owner = match self.parse_one_of_keywords( &[Keyword::CURRENT_USER, Keyword::CURRENT_ROLE, Keyword::SESSION_USER]) { + Some(Keyword::CURRENT_USER) => Owner::CurrentUser, + Some(Keyword::CURRENT_ROLE) => Owner::CurrentRole, + Some(Keyword::SESSION_USER) => Owner::SessionUser, + Some(_) => unreachable!(), + None => { + match self.parse_identifier(false) { + Ok(ident) => Owner::Ident(ident), + Err(e) => { + return Err(ParserError::ParserError(format!("Expected: CURRENT_USER, CURRENT_ROLE, SESSION_USER or identifier after OWNER TO. {e}"))) + } + } + }, + }; + + AlterTableOperation::OwnerTo { new_owner } } else { let options: Vec = self.parse_options_with_keywords(&[Keyword::SET, Keyword::TBLPROPERTIES])?; diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 74f70a6e57..9af4f4d6c0 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -713,6 +713,78 @@ fn parse_alter_table_add_columns() { } } +#[test] +fn parse_alter_table_owner_to() { + struct TestCase { + sql: &'static str, + expected_owner: Owner, + } + + let test_cases = vec![ + TestCase { + sql: "ALTER TABLE tab OWNER TO new_owner", + expected_owner: Owner::Ident(Ident::new("new_owner".to_string())), + }, + TestCase { + sql: "ALTER TABLE tab OWNER TO postgres", + expected_owner: Owner::Ident(Ident::new("postgres".to_string())), + }, + TestCase { + sql: "ALTER TABLE tab OWNER TO CREATE", // treats CREATE as an identifier + expected_owner: Owner::Ident(Ident::new("CREATE".to_string())), + }, + TestCase { + sql: "ALTER TABLE tab OWNER TO \"new_owner\"", + expected_owner: Owner::Ident(Ident::with_quote('\"', "new_owner".to_string())), + }, + TestCase { + sql: "ALTER TABLE tab OWNER TO CURRENT_USER", + expected_owner: Owner::CurrentUser, + }, + TestCase { + sql: "ALTER TABLE tab OWNER TO CURRENT_ROLE", + expected_owner: Owner::CurrentRole, + }, + TestCase { + sql: "ALTER TABLE tab OWNER TO SESSION_USER", + expected_owner: Owner::SessionUser, + }, + ]; + + for case in test_cases { + match pg_and_generic().verified_stmt(case.sql) { + Statement::AlterTable { + name, + if_exists: _, + only: _, + operations, + location: _, + } => { + assert_eq!(name.to_string(), "tab"); + assert_eq!( + operations, + vec![AlterTableOperation::OwnerTo { + new_owner: case.expected_owner.clone() + }] + ); + } + _ => unreachable!("Expected an AlterTable statement"), + } + } + + let res = pg().parse_sql_statements("ALTER TABLE tab OWNER TO CREATE FOO"); + assert_eq!( + ParserError::ParserError("Expected: end of statement, found: FOO".to_string()), + res.unwrap_err() + ); + + let res = pg().parse_sql_statements("ALTER TABLE tab OWNER TO 4"); + assert_eq!( + ParserError::ParserError("Expected: CURRENT_USER, CURRENT_ROLE, SESSION_USER or identifier after OWNER TO. sql parser error: Expected: identifier, found: 4".to_string()), + res.unwrap_err() + ); +} + #[test] fn parse_create_table_if_not_exists() { let sql = "CREATE TABLE IF NOT EXISTS uk_cities ()"; From 4e956a172344952f1162405db74599391bc25860 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 9 Jul 2024 08:58:02 -0400 Subject: [PATCH 40/53] Add CHANGELOG for 0.48.0 (#1334) --- CHANGELOG.md | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 18df2e33ad..ed5c9ecb41 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,43 @@ changes that break via addition as "Added". ## [Unreleased] Check https://github.com/sqlparser-rs/sqlparser-rs/commits/main for undocumented changes. +## [0.48.0] 2024-07-09 + +Huge shout out to @iffyio @jmhain and @lovasoa for their help reviewing and merging PRs! + +### Fixed +* Fix CI error message in CI (#1333) - Thanks @alamb +* Fix typo in sqlparser-derive README (#1310) - Thanks @leoyvens +* Re-enable trailing commas in DCL (#1318) - Thanks @MohamedAbdeen21 +* Fix a few typos in comment lines (#1316) - Thanks @git-hulk +* Fix Snowflake `SELECT * wildcard REPLACE ... RENAME` order (#1321) - Thanks @alexander-beedie +* Allow semi-colon at the end of UNCACHE statement (#1320) - Thanks @LorrensP-2158466 +* Return errors, not panic, when integers fail to parse in `AUTO_INCREMENT` and `TOP` (#1305) - Thanks @eejbyfeldt + +### Added +* Support `OWNER TO` clause in Postgres (#1314) - Thanks @gainings +* Support `FORMAT` clause for ClickHouse (#1335) - Thanks @git-hulk +* Support `DROP PROCEDURE` statement (#1324) - Thanks @LorrensP-2158466 +* Support `PREWHERE` condition for ClickHouse dialect (#1328) - Thanks @git-hulk +* Support `SETTINGS` pairs for ClickHouse dialect (#1327) - Thanks @git-hulk +* Support `GROUP BY WITH MODIFIER` for ClickHouse dialect (#1323) - Thanks @git-hulk +* Support DuckDB Union datatype (#1322) - Thanks @gstvg +* Support parametric arguments to `FUNCTION` for ClickHouse dialect (#1315) - Thanks @git-hulk +* Support `TO` in `CREATE VIEW` clause for Clickhouse (#1313) - Thanks @Bidaya0 +* Support `UPDATE` statements that contain tuple assignments (#1317) - Thanks @lovasoa +* Support `BY NAME quantifier across all set ops (#1309) - Thanks @alexander-beedie +* Support SnowFlake exclusive `CREATE TABLE` options (#1233) - Thanks @balliegojr +* Support ClickHouse `CREATE TABLE` with primary key and parametrised table engine (#1289) - Thanks @7phs +* Support custom operators in Postgres (#1302) - Thanks @lovasoa +* Support ClickHouse data types (#1285) - Thanks @7phs + +### Changed +* Add stale PR github workflow (#1331) - Thanks @alamb +* Refine docs (#1326) - Thanks @emilsivervik +* Improve error messages with additional colons (#1319) - Thanks @LorrensP-2158466 +* Move Display fmt to struct for `CreateIndex` (#1307) - Thanks @philipcristiano +* Enhancing Trailing Comma Option (#1212) - Thanks @MohamedAbdeen21 +* Encapsulate `CreateTable`, `CreateIndex` into specific structs (#1291) - Thanks @philipcristiano ## [0.47.0] 2024-06-01 From 285f49258967df22a455febe22773d158dd2476f Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 9 Jul 2024 08:58:59 -0400 Subject: [PATCH 41/53] chore: Release sqlparser version 0.48.0 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 8d015968b8..b0bee003e3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "sqlparser" description = "Extensible SQL Lexer and Parser with support for ANSI SQL:2011" -version = "0.47.0" +version = "0.48.0" authors = ["Andy Grove "] homepage = "https://github.com/sqlparser-rs/sqlparser-rs" documentation = "https://docs.rs/sqlparser/" From 9108bffc9a021aa1f5137381c8f3aec47e71e319 Mon Sep 17 00:00:00 2001 From: hulk Date: Wed, 10 Jul 2024 05:43:22 +0800 Subject: [PATCH 42/53] Add support of table function WITH ORDINALITY modifier for Postgre Parser (#1337) --- src/ast/query.rs | 14 ++++++++++++ src/keywords.rs | 1 + src/parser/mod.rs | 5 ++++ src/test_utils.rs | 2 ++ tests/sqlparser_bigquery.rs | 8 ++++++- tests/sqlparser_clickhouse.rs | 2 ++ tests/sqlparser_common.rs | 43 +++++++++++++++++++++++++++++++++++ tests/sqlparser_databricks.rs | 3 ++- tests/sqlparser_duckdb.rs | 2 ++ tests/sqlparser_hive.rs | 1 + tests/sqlparser_mssql.rs | 3 +++ tests/sqlparser_mysql.rs | 5 ++++ tests/sqlparser_postgres.rs | 37 +++++++++++++++++++++++++++++- tests/sqlparser_redshift.rs | 3 +++ tests/sqlparser_snowflake.rs | 1 + tests/sqlparser_sqlite.rs | 3 ++- 16 files changed, 129 insertions(+), 4 deletions(-) diff --git a/src/ast/query.rs b/src/ast/query.rs index 70c781409b..608ac2e960 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -913,6 +913,10 @@ pub enum TableFactor { /// Optional version qualifier to facilitate table time-travel, as /// supported by BigQuery and MSSQL. version: Option, + // Optional table function modifier to generate the ordinality for column. + /// For example, `SELECT * FROM generate_series(1, 10) WITH ORDINALITY AS t(a, b);` + /// [WITH ORDINALITY](https://www.postgresql.org/docs/current/functions-srf.html), supported by Postgres. + with_ordinality: bool, /// [Partition selection](https://dev.mysql.com/doc/refman/8.0/en/partitioning-selection.html), supported by MySQL. partitions: Vec, }, @@ -948,6 +952,7 @@ pub enum TableFactor { array_exprs: Vec, with_offset: bool, with_offset_alias: Option, + with_ordinality: bool, }, /// The `JSON_TABLE` table-valued function. /// Part of the SQL standard, but implemented only by MySQL, Oracle, and DB2. @@ -1293,6 +1298,7 @@ impl fmt::Display for TableFactor { with_hints, version, partitions, + with_ordinality, } => { write!(f, "{name}")?; if !partitions.is_empty() { @@ -1301,6 +1307,9 @@ impl fmt::Display for TableFactor { if let Some(args) = args { write!(f, "({})", display_comma_separated(args))?; } + if *with_ordinality { + write!(f, " WITH ORDINALITY")?; + } if let Some(alias) = alias { write!(f, " AS {alias}")?; } @@ -1354,9 +1363,14 @@ impl fmt::Display for TableFactor { array_exprs, with_offset, with_offset_alias, + with_ordinality, } => { write!(f, "UNNEST({})", display_comma_separated(array_exprs))?; + if *with_ordinality { + write!(f, " WITH ORDINALITY")?; + } + if let Some(alias) = alias { write!(f, " AS {alias}")?; } diff --git a/src/keywords.rs b/src/keywords.rs index 7146c4efe1..a53eaccbad 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -518,6 +518,7 @@ define_keywords!( OR, ORC, ORDER, + ORDINALITY, OUT, OUTER, OUTPUTFORMAT, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 87166f5038..e89eba9b19 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -9209,6 +9209,7 @@ impl<'a> Parser<'a> { let array_exprs = self.parse_comma_separated(Parser::parse_expr)?; self.expect_token(&Token::RParen)?; + let with_ordinality = self.parse_keywords(&[Keyword::WITH, Keyword::ORDINALITY]); let alias = match self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS) { Ok(Some(alias)) => Some(alias), Ok(None) => None, @@ -9235,6 +9236,7 @@ impl<'a> Parser<'a> { array_exprs, with_offset, with_offset_alias, + with_ordinality, }) } else if self.parse_keyword_with_tokens(Keyword::JSON_TABLE, &[Token::LParen]) { let json_expr = self.parse_expr()?; @@ -9273,6 +9275,8 @@ impl<'a> Parser<'a> { None }; + let with_ordinality = self.parse_keywords(&[Keyword::WITH, Keyword::ORDINALITY]); + let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; // MSSQL-specific table hints: @@ -9294,6 +9298,7 @@ impl<'a> Parser<'a> { with_hints, version, partitions, + with_ordinality, }; while let Some(kw) = self.parse_one_of_keywords(&[Keyword::PIVOT, Keyword::UNPIVOT]) { diff --git a/src/test_utils.rs b/src/test_utils.rs index 1a31d4611e..1f5300be10 100644 --- a/src/test_utils.rs +++ b/src/test_utils.rs @@ -309,6 +309,7 @@ pub fn table(name: impl Into) -> TableFactor { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, } } @@ -323,6 +324,7 @@ pub fn table_with_alias(name: impl Into, alias: impl Into) -> Ta with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, } } diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index 88e2ef9129..089a41889c 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -224,6 +224,7 @@ fn parse_delete_statement() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, from[0].relation ); @@ -1353,6 +1354,7 @@ fn parse_table_identifiers() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![] },] @@ -1525,6 +1527,7 @@ fn parse_table_time_travel() { Value::SingleQuotedString(version) ))), partitions: vec![], + with_ordinality: false, }, joins: vec![] },] @@ -1551,7 +1554,8 @@ fn parse_join_constraint_unnest_alias() { Ident::new("a") ])], with_offset: false, - with_offset_alias: None + with_offset_alias: None, + with_ordinality: false, }, join_operator: JoinOperator::Inner(JoinConstraint::On(Expr::BinaryOp { left: Box::new(Expr::Identifier("c1".into())), @@ -1620,6 +1624,7 @@ fn parse_merge() { with_hints: Default::default(), version: Default::default(), partitions: Default::default(), + with_ordinality: false, }, table ); @@ -1634,6 +1639,7 @@ fn parse_merge() { with_hints: Default::default(), version: Default::default(), partitions: Default::default(), + with_ordinality: false, }, source ); diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index f6b787f5c2..99db3d10cb 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -59,6 +59,7 @@ fn parse_map_access_expr() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![], }], @@ -162,6 +163,7 @@ fn parse_delimited_identifiers() { args, with_hints, version, + with_ordinality: _, partitions: _, } => { assert_eq!(vec![Ident::with_quote('"', "a table")], name.0); diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 86357234c8..1adda149eb 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -359,6 +359,7 @@ fn parse_update_set_from() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![], }, @@ -387,6 +388,7 @@ fn parse_update_set_from() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![], }], @@ -463,6 +465,7 @@ fn parse_update_with_table_alias() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![], }, @@ -530,6 +533,7 @@ fn parse_select_with_table_alias() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![], }] @@ -566,6 +570,7 @@ fn parse_delete_statement() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, from[0].relation ); @@ -612,6 +617,7 @@ fn parse_delete_statement_for_multi_tables() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, from[0].relation ); @@ -623,6 +629,7 @@ fn parse_delete_statement_for_multi_tables() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, from[0].joins[0].relation ); @@ -648,6 +655,7 @@ fn parse_delete_statement_for_multi_tables_with_using() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, from[0].relation ); @@ -659,6 +667,7 @@ fn parse_delete_statement_for_multi_tables_with_using() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, from[1].relation ); @@ -670,6 +679,7 @@ fn parse_delete_statement_for_multi_tables_with_using() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, using[0].relation ); @@ -681,6 +691,7 @@ fn parse_delete_statement_for_multi_tables_with_using() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, using[0].joins[0].relation ); @@ -711,6 +722,7 @@ fn parse_where_delete_statement() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, from[0].relation, ); @@ -755,6 +767,7 @@ fn parse_where_delete_with_alias_statement() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, from[0].relation, ); @@ -770,6 +783,7 @@ fn parse_where_delete_with_alias_statement() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![], }]), @@ -4551,6 +4565,7 @@ fn test_parse_named_window() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![], }], @@ -4933,6 +4948,7 @@ fn parse_interval_and_or_xor() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![], }], @@ -5286,6 +5302,7 @@ fn parse_unnest_in_from_clause() { array_exprs: vec![Expr::Identifier(Ident::new("expr"))], with_offset: true, with_offset_alias: None, + with_ordinality: false, }, joins: vec![], }], @@ -5303,6 +5320,7 @@ fn parse_unnest_in_from_clause() { array_exprs: vec![Expr::Identifier(Ident::new("expr"))], with_offset: false, with_offset_alias: None, + with_ordinality: false, }, joins: vec![], }], @@ -5320,6 +5338,7 @@ fn parse_unnest_in_from_clause() { array_exprs: vec![Expr::Identifier(Ident::new("expr"))], with_offset: true, with_offset_alias: None, + with_ordinality: false, }, joins: vec![], }], @@ -5340,6 +5359,7 @@ fn parse_unnest_in_from_clause() { array_exprs: vec![Expr::Identifier(Ident::new("expr"))], with_offset: false, with_offset_alias: None, + with_ordinality: false, }, joins: vec![], }], @@ -5364,6 +5384,7 @@ fn parse_unnest_in_from_clause() { )], with_offset: false, with_offset_alias: None, + with_ordinality: false, }, joins: vec![], }], @@ -5394,6 +5415,7 @@ fn parse_unnest_in_from_clause() { ], with_offset: false, with_offset_alias: None, + with_ordinality: false, }, joins: vec![], }], @@ -5503,6 +5525,7 @@ fn parse_implicit_join() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![], }, @@ -5514,6 +5537,7 @@ fn parse_implicit_join() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![], }, @@ -5533,6 +5557,7 @@ fn parse_implicit_join() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![Join { relation: TableFactor::Table { @@ -5542,6 +5567,7 @@ fn parse_implicit_join() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, join_operator: JoinOperator::Inner(JoinConstraint::Natural), }], @@ -5554,6 +5580,7 @@ fn parse_implicit_join() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![Join { relation: TableFactor::Table { @@ -5563,6 +5590,7 @@ fn parse_implicit_join() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, join_operator: JoinOperator::Inner(JoinConstraint::Natural), }], @@ -5585,6 +5613,7 @@ fn parse_cross_join() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, join_operator: JoinOperator::CrossJoin, }, @@ -5607,6 +5636,7 @@ fn parse_joins_on() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, join_operator: f(JoinConstraint::On(Expr::BinaryOp { left: Box::new(Expr::Identifier("c1".into())), @@ -5678,6 +5708,7 @@ fn parse_joins_using() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, join_operator: f(JoinConstraint::Using(vec!["c1".into()])), } @@ -5741,6 +5772,7 @@ fn parse_natural_join() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, join_operator: f(JoinConstraint::Natural), } @@ -6008,6 +6040,7 @@ fn parse_derived_tables() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, join_operator: JoinOperator::Inner(JoinConstraint::Natural), }], @@ -6905,6 +6938,7 @@ fn lateral_function() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![Join { relation: TableFactor::Function { @@ -7613,6 +7647,7 @@ fn parse_merge() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, } ); assert_eq!(table, table_no_into); @@ -7638,6 +7673,7 @@ fn parse_merge() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![], }], @@ -8700,6 +8736,7 @@ fn parse_pivot_table() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }), aggregate_functions: vec![ expected_function("a", None), @@ -8769,6 +8806,7 @@ fn parse_unpivot_table() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }), value: Ident { value: "quantity".to_string(), @@ -8835,6 +8873,7 @@ fn parse_pivot_unpivot_table() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }), value: Ident { value: "population".to_string(), @@ -9159,6 +9198,7 @@ fn parse_unload() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![], }], @@ -9304,6 +9344,7 @@ fn parse_connect_by() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![], }], @@ -9389,6 +9430,7 @@ fn parse_connect_by() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![], }], @@ -9548,6 +9590,7 @@ fn test_match_recognize() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }; fn check(options: &str, expect: TableFactor) { diff --git a/tests/sqlparser_databricks.rs b/tests/sqlparser_databricks.rs index 90056f0f72..280b97b497 100644 --- a/tests/sqlparser_databricks.rs +++ b/tests/sqlparser_databricks.rs @@ -177,7 +177,8 @@ fn test_values_clause() { args: None, with_hints: vec![], version: None, - partitions: vec![] + partitions: vec![], + with_ordinality: false, }), query .body diff --git a/tests/sqlparser_duckdb.rs b/tests/sqlparser_duckdb.rs index 400daa8a89..0e61b86c93 100644 --- a/tests/sqlparser_duckdb.rs +++ b/tests/sqlparser_duckdb.rs @@ -166,6 +166,7 @@ fn test_select_union_by_name() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![], }], @@ -205,6 +206,7 @@ fn test_select_union_by_name() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![], }], diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index 53280d7d8e..5f0b9f5750 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -359,6 +359,7 @@ fn parse_delimited_identifiers() { args, with_hints, version, + with_ordinality: _, partitions: _, } => { assert_eq!(vec![Ident::with_quote('"', "a table")], name.0); diff --git a/tests/sqlparser_mssql.rs b/tests/sqlparser_mssql.rs index 6968347ece..26bece81d7 100644 --- a/tests/sqlparser_mssql.rs +++ b/tests/sqlparser_mssql.rs @@ -64,6 +64,7 @@ fn parse_table_time_travel() { Value::SingleQuotedString(version) ))), partitions: vec![], + with_ordinality: false, }, joins: vec![] },] @@ -335,6 +336,7 @@ fn parse_delimited_identifiers() { args, with_hints, version, + with_ordinality: _, partitions: _, } => { assert_eq!(vec![Ident::with_quote('"', "a table")], name.0); @@ -526,6 +528,7 @@ fn parse_substring_in_select() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![] }], diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index 74def31bf2..ec094bcd68 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -1728,6 +1728,7 @@ fn parse_select_with_numeric_prefix_column_name() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![] }], @@ -1782,6 +1783,7 @@ fn parse_select_with_concatenation_of_exp_number_and_numeric_prefix_column() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![] }], @@ -1847,6 +1849,7 @@ fn parse_update_with_joins() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![Join { relation: TableFactor::Table { @@ -1859,6 +1862,7 @@ fn parse_update_with_joins() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, join_operator: JoinOperator::Inner(JoinConstraint::On(Expr::BinaryOp { left: Box::new(Expr::CompoundIdentifier(vec![ @@ -2282,6 +2286,7 @@ fn parse_substring_in_select() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![] }], diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 9af4f4d6c0..164bb72c79 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -3501,6 +3501,7 @@ fn parse_delimited_identifiers() { args, with_hints, version, + with_ordinality: _, partitions: _, } => { assert_eq!(vec![Ident::with_quote('"', "a table")], name.0); @@ -4054,7 +4055,8 @@ fn parse_join_constraint_unnest_alias() { Ident::new("a") ])], with_offset: false, - with_offset_alias: None + with_offset_alias: None, + with_ordinality: false, }, join_operator: JoinOperator::Inner(JoinConstraint::On(Expr::BinaryOp { left: Box::new(Expr::Identifier("c1".into())), @@ -4362,3 +4364,36 @@ fn parse_create_table_with_options() { _ => unreachable!(), } } + +#[test] +fn test_table_function_with_ordinality() { + let from = pg_and_generic() + .verified_only_select("SELECT * FROM generate_series(1, 10) WITH ORDINALITY AS t") + .from; + assert_eq!(1, from.len()); + match from[0].relation { + TableFactor::Table { + ref name, + with_ordinality: true, + .. + } => { + assert_eq!("generate_series", name.to_string().as_str()); + } + _ => panic!("Expecting TableFactor::Table with ordinality"), + } +} + +#[test] +fn test_table_unnest_with_ordinality() { + let from = pg_and_generic() + .verified_only_select("SELECT * FROM UNNEST([10, 20, 30]) WITH ORDINALITY AS t") + .from; + assert_eq!(1, from.len()); + match from[0].relation { + TableFactor::UNNEST { + with_ordinality: true, + .. + } => {} + _ => panic!("Expecting TableFactor::UNNEST with ordinality"), + } +} diff --git a/tests/sqlparser_redshift.rs b/tests/sqlparser_redshift.rs index 938e6e8872..440116e026 100644 --- a/tests/sqlparser_redshift.rs +++ b/tests/sqlparser_redshift.rs @@ -48,6 +48,7 @@ fn test_square_brackets_over_db_schema_table_name() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![], } @@ -94,6 +95,7 @@ fn test_double_quotes_over_db_schema_table_name() { with_hints: vec![], version: None, partitions: vec![], + with_ordinality: false, }, joins: vec![], } @@ -114,6 +116,7 @@ fn parse_delimited_identifiers() { args, with_hints, version, + with_ordinality: _, partitions: _, } => { assert_eq!(vec![Ident::with_quote('"', "a table")], name.0); diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index 2f4ed1316f..7a2288cbbc 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -870,6 +870,7 @@ fn parse_delimited_identifiers() { args, with_hints, version, + with_ordinality: _, partitions: _, } => { assert_eq!(vec![Ident::with_quote('"', "a table")], name.0); diff --git a/tests/sqlparser_sqlite.rs b/tests/sqlparser_sqlite.rs index dd1e77d5df..629ab5fc2d 100644 --- a/tests/sqlparser_sqlite.rs +++ b/tests/sqlparser_sqlite.rs @@ -399,7 +399,8 @@ fn parse_update_tuple_row_values() { args: None, with_hints: vec![], version: None, - partitions: vec![] + partitions: vec![], + with_ordinality: false, }, joins: vec![], }, From 993216f3ac279e1e86a16de8696e60dc78d5a418 Mon Sep 17 00:00:00 2001 From: hulk Date: Sat, 13 Jul 2024 17:46:26 +0800 Subject: [PATCH 43/53] Enable PARTITION BY feature for PostgreSQL while parsing the create table statement (#1338) --- src/ast/helpers/stmt_create_table.rs | 4 +- src/parser/mod.rs | 59 +++++++++++++++------------- tests/sqlparser_postgres.rs | 44 +++++++++++++++++++++ 3 files changed, 77 insertions(+), 30 deletions(-) diff --git a/src/ast/helpers/stmt_create_table.rs b/src/ast/helpers/stmt_create_table.rs index d862a36aef..92c75e6a44 100644 --- a/src/ast/helpers/stmt_create_table.rs +++ b/src/ast/helpers/stmt_create_table.rs @@ -496,9 +496,9 @@ impl TryFrom for CreateTableBuilder { } } -/// Helper return type when parsing configuration for a BigQuery `CREATE TABLE` statement. +/// Helper return type when parsing configuration for a `CREATE TABLE` statement. #[derive(Default)] -pub(crate) struct BigQueryTableConfiguration { +pub(crate) struct CreateTableConfiguration { pub partition_by: Option>, pub cluster_by: Option>>, pub options: Option>, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index e89eba9b19..4d2319a082 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -31,7 +31,7 @@ use recursion::RecursionCounter; use IsLateral::*; use IsOptional::*; -use crate::ast::helpers::stmt_create_table::{BigQueryTableConfiguration, CreateTableBuilder}; +use crate::ast::helpers::stmt_create_table::{CreateTableBuilder, CreateTableConfiguration}; use crate::ast::*; use crate::dialect::*; use crate::keywords::{Keyword, ALL_KEYWORDS}; @@ -5416,11 +5416,7 @@ impl<'a> Parser<'a> { None }; - let big_query_config = if dialect_of!(self is BigQueryDialect | GenericDialect) { - self.parse_optional_big_query_create_table_config()? - } else { - Default::default() - }; + let create_table_config = self.parse_optional_create_table_config()?; // Parse optional `AS ( query )` let query = if self.parse_keyword(Keyword::AS) { @@ -5505,39 +5501,46 @@ impl<'a> Parser<'a> { .collation(collation) .on_commit(on_commit) .on_cluster(on_cluster) - .partition_by(big_query_config.partition_by) - .cluster_by(big_query_config.cluster_by) - .options(big_query_config.options) + .partition_by(create_table_config.partition_by) + .cluster_by(create_table_config.cluster_by) + .options(create_table_config.options) .primary_key(primary_key) .strict(strict) .build()) } - /// Parse configuration like partitioning, clustering information during big-query table creation. - /// - fn parse_optional_big_query_create_table_config( + /// Parse configuration like partitioning, clustering information during the table creation. + /// + /// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#syntax_2) + /// [PostgreSQL](https://www.postgresql.org/docs/current/ddl-partitioning.html) + fn parse_optional_create_table_config( &mut self, - ) -> Result { - let mut partition_by = None; - if self.parse_keywords(&[Keyword::PARTITION, Keyword::BY]) { - partition_by = Some(Box::new(self.parse_expr()?)); + ) -> Result { + let partition_by = if dialect_of!(self is BigQueryDialect | PostgreSqlDialect | GenericDialect) + && self.parse_keywords(&[Keyword::PARTITION, Keyword::BY]) + { + Some(Box::new(self.parse_expr()?)) + } else { + None }; let mut cluster_by = None; - if self.parse_keywords(&[Keyword::CLUSTER, Keyword::BY]) { - cluster_by = Some(WrappedCollection::NoWrapping( - self.parse_comma_separated(|p| p.parse_identifier(false))?, - )); - }; - let mut options = None; - if let Token::Word(word) = self.peek_token().token { - if word.keyword == Keyword::OPTIONS { - options = Some(self.parse_options(Keyword::OPTIONS)?); - } - }; + if dialect_of!(self is BigQueryDialect | GenericDialect) { + if self.parse_keywords(&[Keyword::CLUSTER, Keyword::BY]) { + cluster_by = Some(WrappedCollection::NoWrapping( + self.parse_comma_separated(|p| p.parse_identifier(false))?, + )); + }; + + if let Token::Word(word) = self.peek_token().token { + if word.keyword == Keyword::OPTIONS { + options = Some(self.parse_options(Keyword::OPTIONS)?); + } + }; + } - Ok(BigQueryTableConfiguration { + Ok(CreateTableConfiguration { partition_by, cluster_by, options, diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 164bb72c79..ed17e9d8f2 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -4039,6 +4039,50 @@ fn parse_create_table_with_alias() { } } +#[test] +fn parse_create_table_with_partition_by() { + let sql = "CREATE TABLE t1 (a INT, b TEXT) PARTITION BY RANGE(a)"; + match pg_and_generic().verified_stmt(sql) { + Statement::CreateTable(create_table) => { + assert_eq!("t1", create_table.name.to_string()); + assert_eq!( + vec![ + ColumnDef { + name: "a".into(), + data_type: DataType::Int(None), + collation: None, + options: vec![] + }, + ColumnDef { + name: "b".into(), + data_type: DataType::Text, + collation: None, + options: vec![] + } + ], + create_table.columns + ); + match *create_table.partition_by.unwrap() { + Expr::Function(f) => { + assert_eq!("RANGE", f.name.to_string()); + assert_eq!( + FunctionArguments::List(FunctionArgumentList { + duplicate_treatment: None, + clauses: vec![], + args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr( + Expr::Identifier(Ident::new("a")) + ))], + }), + f.args + ); + } + _ => unreachable!(), + } + } + _ => unreachable!(), + } +} + #[test] fn parse_join_constraint_unnest_alias() { assert_eq!( From 20f7ac59e38d52e293476b7ad844e7f744a16c43 Mon Sep 17 00:00:00 2001 From: hulk Date: Tue, 16 Jul 2024 01:54:44 +0800 Subject: [PATCH 44/53] Fix AS query clause should be after the create table options (#1339) --- src/ast/dml.rs | 6 +++--- src/parser/mod.rs | 14 +++++++------- tests/sqlparser_clickhouse.rs | 24 ++++++++++++++++++++++++ tests/sqlparser_mysql.rs | 27 +++++++++++++++++++++++++++ 4 files changed, 61 insertions(+), 10 deletions(-) diff --git a/src/ast/dml.rs b/src/ast/dml.rs index b35b2b970d..0ebbaa3e91 100644 --- a/src/ast/dml.rs +++ b/src/ast/dml.rs @@ -418,9 +418,6 @@ impl Display for CreateTable { write!(f, " WITH TAG ({})", display_comma_separated(tag.as_slice()))?; } - if let Some(query) = &self.query { - write!(f, " AS {query}")?; - } if let Some(default_charset) = &self.default_charset { write!(f, " DEFAULT CHARSET={default_charset}")?; } @@ -440,6 +437,9 @@ impl Display for CreateTable { if self.strict { write!(f, " STRICT")?; } + if let Some(query) = &self.query { + write!(f, " AS {query}")?; + } Ok(()) } } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 4d2319a082..d00f28a555 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -5418,13 +5418,6 @@ impl<'a> Parser<'a> { let create_table_config = self.parse_optional_create_table_config()?; - // Parse optional `AS ( query )` - let query = if self.parse_keyword(Keyword::AS) { - Some(self.parse_boxed_query()?) - } else { - None - }; - let default_charset = if self.parse_keywords(&[Keyword::DEFAULT, Keyword::CHARSET]) { self.expect_token(&Token::Eq)?; let next_token = self.next_token(); @@ -5477,6 +5470,13 @@ impl<'a> Parser<'a> { None }; + // Parse optional `AS ( query )` + let query = if self.parse_keyword(Keyword::AS) { + Some(self.parse_boxed_query()?) + } else { + None + }; + Ok(CreateTableBuilder::new(table_name) .temporary(temporary) .columns(columns) diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index 99db3d10cb..752940551f 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -802,6 +802,30 @@ fn test_query_with_format_clause() { } } +#[test] +fn parse_create_table_on_commit_and_as_query() { + let sql = r#"CREATE LOCAL TEMPORARY TABLE test ON COMMIT PRESERVE ROWS AS SELECT 1"#; + match clickhouse_and_generic().verified_stmt(sql) { + Statement::CreateTable(CreateTable { + name, + on_commit, + query, + .. + }) => { + assert_eq!(name.to_string(), "test"); + assert_eq!(on_commit, Some(OnCommit::PreserveRows)); + assert_eq!( + query.unwrap().body.as_select().unwrap().projection, + vec![UnnamedExpr(Expr::Value(Value::Number( + "1".parse().unwrap(), + false + )))] + ); + } + _ => unreachable!(), + } +} + fn clickhouse() -> TestedDialects { TestedDialects { dialects: vec![Box::new(ClickHouseDialect {})], diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index ec094bcd68..c2ce407a78 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -812,6 +812,33 @@ fn parse_create_table_collate() { } } +#[test] +fn parse_create_table_both_options_and_as_query() { + let sql = "CREATE TABLE foo (id INT(11)) ENGINE=InnoDB DEFAULT CHARSET=utf8mb3 COLLATE=utf8mb4_0900_ai_ci AS SELECT 1"; + match mysql_and_generic().verified_stmt(sql) { + Statement::CreateTable(CreateTable { + name, + collation, + query, + .. + }) => { + assert_eq!(name.to_string(), "foo"); + assert_eq!(collation, Some("utf8mb4_0900_ai_ci".to_string())); + assert_eq!( + query.unwrap().body.as_select().unwrap().projection, + vec![SelectItem::UnnamedExpr(Expr::Value(number("1")))] + ); + } + _ => unreachable!(), + } + + let sql = r"CREATE TABLE foo (id INT(11)) ENGINE=InnoDB AS SELECT 1 DEFAULT CHARSET=utf8mb3"; + assert!(matches!( + mysql_and_generic().parse_sql_statements(sql), + Err(ParserError::ParserError(_)) + )); +} + #[test] fn parse_create_table_comment_character_set() { let sql = "CREATE TABLE foo (s TEXT CHARACTER SET utf8mb4 COMMENT 'comment')"; From 845a1aaddd371a586c41ab9b68ad21a4bbc3884f Mon Sep 17 00:00:00 2001 From: Nick Presta Date: Sat, 20 Jul 2024 06:51:12 -0400 Subject: [PATCH 45/53] [ClickHouse] Add support for WITH FILL to OrderByExpr (#1330) Co-authored-by: Andrew Lamb --- src/ast/mod.rs | 17 ++-- src/ast/query.rs | 91 +++++++++++++++++- src/keywords.rs | 3 + src/parser/mod.rs | 84 ++++++++++++++++- tests/sqlparser_clickhouse.rs | 169 ++++++++++++++++++++++++++++++++++ tests/sqlparser_common.rs | 35 +++++-- tests/sqlparser_mssql.rs | 4 +- tests/sqlparser_mysql.rs | 31 ++++--- tests/sqlparser_postgres.rs | 10 +- 9 files changed, 397 insertions(+), 47 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index b8d72e2338..2a519fc7c1 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -43,14 +43,15 @@ pub use self::operator::{BinaryOperator, UnaryOperator}; pub use self::query::{ AfterMatchSkip, ConnectBy, Cte, CteAsMaterialized, Distinct, EmptyMatchesMode, ExceptSelectItem, ExcludeSelectItem, ExprWithAlias, Fetch, ForClause, ForJson, ForXml, - FormatClause, GroupByExpr, GroupByWithModifier, IdentWithAlias, IlikeSelectItem, Join, - JoinConstraint, JoinOperator, JsonTableColumn, JsonTableColumnErrorHandling, LateralView, - LockClause, LockType, MatchRecognizePattern, MatchRecognizeSymbol, Measure, - NamedWindowDefinition, NamedWindowExpr, NonBlock, Offset, OffsetRows, OrderByExpr, - PivotValueSource, Query, RenameSelectItem, RepetitionQuantifier, ReplaceSelectElement, - ReplaceSelectItem, RowsPerMatch, Select, SelectInto, SelectItem, SetExpr, SetOperator, - SetQuantifier, Setting, SymbolDefinition, Table, TableAlias, TableFactor, TableVersion, - TableWithJoins, Top, TopQuantity, ValueTableMode, Values, WildcardAdditionalOptions, With, + FormatClause, GroupByExpr, GroupByWithModifier, IdentWithAlias, IlikeSelectItem, Interpolate, + InterpolateExpr, Join, JoinConstraint, JoinOperator, JsonTableColumn, + JsonTableColumnErrorHandling, LateralView, LockClause, LockType, MatchRecognizePattern, + MatchRecognizeSymbol, Measure, NamedWindowDefinition, NamedWindowExpr, NonBlock, Offset, + OffsetRows, OrderBy, OrderByExpr, PivotValueSource, Query, RenameSelectItem, + RepetitionQuantifier, ReplaceSelectElement, ReplaceSelectItem, RowsPerMatch, Select, + SelectInto, SelectItem, SetExpr, SetOperator, SetQuantifier, Setting, SymbolDefinition, Table, + TableAlias, TableFactor, TableVersion, TableWithJoins, Top, TopQuantity, ValueTableMode, + Values, WildcardAdditionalOptions, With, WithFill, }; pub use self::value::{ escape_double_quote_string, escape_quoted_string, DateTimeField, DollarQuotedString, diff --git a/src/ast/query.rs b/src/ast/query.rs index 608ac2e960..9786042667 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -33,7 +33,7 @@ pub struct Query { /// SELECT or UNION / EXCEPT / INTERSECT pub body: Box, /// ORDER BY - pub order_by: Vec, + pub order_by: Option, /// `LIMIT { | ALL }` pub limit: Option, @@ -67,8 +67,17 @@ impl fmt::Display for Query { write!(f, "{with} ")?; } write!(f, "{}", self.body)?; - if !self.order_by.is_empty() { - write!(f, " ORDER BY {}", display_comma_separated(&self.order_by))?; + if let Some(ref order_by) = self.order_by { + write!(f, " ORDER BY")?; + if !order_by.exprs.is_empty() { + write!(f, " {}", display_comma_separated(&order_by.exprs))?; + } + if let Some(ref interpolate) = order_by.interpolate { + match &interpolate.exprs { + Some(exprs) => write!(f, " INTERPOLATE ({})", display_comma_separated(exprs))?, + None => write!(f, " INTERPOLATE")?, + } + } } if let Some(ref limit) = self.limit { write!(f, " LIMIT {limit}")?; @@ -1668,6 +1677,18 @@ pub enum JoinConstraint { None, } +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct OrderBy { + pub exprs: Vec, + /// Optional: `INTERPOLATE` + /// Supported by [ClickHouse syntax] + /// + /// [ClickHouse syntax]: + pub interpolate: Option, +} + /// An `ORDER BY` expression #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] @@ -1678,6 +1699,9 @@ pub struct OrderByExpr { pub asc: Option, /// Optional `NULLS FIRST` or `NULLS LAST` pub nulls_first: Option, + /// Optional: `WITH FILL` + /// Supported by [ClickHouse syntax]: + pub with_fill: Option, } impl fmt::Display for OrderByExpr { @@ -1693,6 +1717,67 @@ impl fmt::Display for OrderByExpr { Some(false) => write!(f, " NULLS LAST")?, None => (), } + if let Some(ref with_fill) = self.with_fill { + write!(f, " {}", with_fill)? + } + Ok(()) + } +} + +/// ClickHouse `WITH FILL` modifier for `ORDER BY` clause. +/// Supported by [ClickHouse syntax] +/// +/// [ClickHouse syntax]: +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct WithFill { + pub from: Option, + pub to: Option, + pub step: Option, +} + +impl fmt::Display for WithFill { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "WITH FILL")?; + if let Some(ref from) = self.from { + write!(f, " FROM {}", from)?; + } + if let Some(ref to) = self.to { + write!(f, " TO {}", to)?; + } + if let Some(ref step) = self.step { + write!(f, " STEP {}", step)?; + } + Ok(()) + } +} + +/// ClickHouse `INTERPOLATE` clause for use in `ORDER BY` clause when using `WITH FILL` modifier. +/// Supported by [ClickHouse syntax] +/// +/// [ClickHouse syntax]: +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct InterpolateExpr { + pub column: Ident, + pub expr: Option, +} + +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct Interpolate { + pub exprs: Option>, +} + +impl fmt::Display for InterpolateExpr { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}", self.column)?; + if let Some(ref expr) = self.expr { + write!(f, " AS {}", expr)?; + } Ok(()) } } diff --git a/src/keywords.rs b/src/keywords.rs index a53eaccbad..2b6900fba3 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -297,6 +297,7 @@ define_keywords!( FILE, FILES, FILE_FORMAT, + FILL, FILTER, FIRST, FIRST_VALUE, @@ -382,6 +383,7 @@ define_keywords!( INT64, INT8, INTEGER, + INTERPOLATE, INTERSECT, INTERSECTION, INTERVAL, @@ -682,6 +684,7 @@ define_keywords!( STDDEV_SAMP, STDIN, STDOUT, + STEP, STORAGE_INTEGRATION, STORED, STRICT, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index d00f28a555..fb15275e9e 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -7934,7 +7934,7 @@ impl<'a> Parser<'a> { body: self.parse_insert_setexpr_boxed()?, limit: None, limit_by: vec![], - order_by: vec![], + order_by: None, offset: None, fetch: None, locks: vec![], @@ -7948,7 +7948,7 @@ impl<'a> Parser<'a> { body: self.parse_update_setexpr_boxed()?, limit: None, limit_by: vec![], - order_by: vec![], + order_by: None, offset: None, fetch: None, locks: vec![], @@ -7960,9 +7960,19 @@ impl<'a> Parser<'a> { let body = self.parse_boxed_query_body(0)?; let order_by = if self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) { - self.parse_comma_separated(Parser::parse_order_by_expr)? + let order_by_exprs = self.parse_comma_separated(Parser::parse_order_by_expr)?; + let interpolate = if dialect_of!(self is ClickHouseDialect | GenericDialect) { + self.parse_interpolations()? + } else { + None + }; + + Some(OrderBy { + exprs: order_by_exprs, + interpolate, + }) } else { - vec![] + None }; let mut limit = None; @@ -9193,7 +9203,7 @@ impl<'a> Parser<'a> { subquery: Box::new(Query { with: None, body: Box::new(values), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, @@ -10519,13 +10529,77 @@ impl<'a> Parser<'a> { None }; + let with_fill = if dialect_of!(self is ClickHouseDialect | GenericDialect) + && self.parse_keywords(&[Keyword::WITH, Keyword::FILL]) + { + Some(self.parse_with_fill()?) + } else { + None + }; + Ok(OrderByExpr { expr, asc, nulls_first, + with_fill, }) } + // Parse a WITH FILL clause (ClickHouse dialect) + // that follow the WITH FILL keywords in a ORDER BY clause + pub fn parse_with_fill(&mut self) -> Result { + let from = if self.parse_keyword(Keyword::FROM) { + Some(self.parse_expr()?) + } else { + None + }; + + let to = if self.parse_keyword(Keyword::TO) { + Some(self.parse_expr()?) + } else { + None + }; + + let step = if self.parse_keyword(Keyword::STEP) { + Some(self.parse_expr()?) + } else { + None + }; + + Ok(WithFill { from, to, step }) + } + + // Parse a set of comma seperated INTERPOLATE expressions (ClickHouse dialect) + // that follow the INTERPOLATE keyword in an ORDER BY clause with the WITH FILL modifier + pub fn parse_interpolations(&mut self) -> Result, ParserError> { + if !self.parse_keyword(Keyword::INTERPOLATE) { + return Ok(None); + } + + if self.consume_token(&Token::LParen) { + let interpolations = self.parse_comma_separated0(|p| p.parse_interpolation())?; + self.expect_token(&Token::RParen)?; + // INTERPOLATE () and INTERPOLATE ( ... ) variants + return Ok(Some(Interpolate { + exprs: Some(interpolations), + })); + } + + // INTERPOLATE + Ok(Some(Interpolate { exprs: None })) + } + + // Parse a INTERPOLATE expression (ClickHouse dialect) + pub fn parse_interpolation(&mut self) -> Result { + let column = self.parse_identifier(false)?; + let expr = if self.parse_keyword(Keyword::AS) { + Some(self.parse_expr()?) + } else { + None + }; + Ok(InterpolateExpr { column, expr }) + } + /// Parse a TOP clause, MSSQL equivalent of LIMIT, /// that follows after `SELECT [DISTINCT]`. pub fn parse_top(&mut self) -> Result { diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index 752940551f..10d7d66ffc 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -720,6 +720,175 @@ fn parse_group_by_with_modifier() { } } +#[test] +fn parse_select_order_by_with_fill_interpolate() { + let sql = "SELECT id, fname, lname FROM customer WHERE id < 5 \ + ORDER BY \ + fname ASC NULLS FIRST WITH FILL FROM 10 TO 20 STEP 2, \ + lname DESC NULLS LAST WITH FILL FROM 30 TO 40 STEP 3 \ + INTERPOLATE (col1 AS col1 + 1) \ + LIMIT 2"; + let select = clickhouse().verified_query(sql); + assert_eq!( + OrderBy { + exprs: vec![ + OrderByExpr { + expr: Expr::Identifier(Ident::new("fname")), + asc: Some(true), + nulls_first: Some(true), + with_fill: Some(WithFill { + from: Some(Expr::Value(number("10"))), + to: Some(Expr::Value(number("20"))), + step: Some(Expr::Value(number("2"))), + }), + }, + OrderByExpr { + expr: Expr::Identifier(Ident::new("lname")), + asc: Some(false), + nulls_first: Some(false), + with_fill: Some(WithFill { + from: Some(Expr::Value(number("30"))), + to: Some(Expr::Value(number("40"))), + step: Some(Expr::Value(number("3"))), + }), + }, + ], + interpolate: Some(Interpolate { + exprs: Some(vec![InterpolateExpr { + column: Ident::new("col1"), + expr: Some(Expr::BinaryOp { + left: Box::new(Expr::Identifier(Ident::new("col1"))), + op: BinaryOperator::Plus, + right: Box::new(Expr::Value(number("1"))), + }), + }]) + }) + }, + select.order_by.expect("ORDER BY expected") + ); + assert_eq!(Some(Expr::Value(number("2"))), select.limit); +} + +#[test] +fn parse_select_order_by_with_fill_interpolate_multi_interpolates() { + let sql = "SELECT id, fname, lname FROM customer ORDER BY fname WITH FILL \ + INTERPOLATE (col1 AS col1 + 1) INTERPOLATE (col2 AS col2 + 2)"; + clickhouse_and_generic() + .parse_sql_statements(sql) + .expect_err("ORDER BY only accepts a single INTERPOLATE clause"); +} + +#[test] +fn parse_select_order_by_with_fill_interpolate_multi_with_fill_interpolates() { + let sql = "SELECT id, fname, lname FROM customer \ + ORDER BY \ + fname WITH FILL INTERPOLATE (col1 AS col1 + 1), \ + lname WITH FILL INTERPOLATE (col2 AS col2 + 2)"; + clickhouse_and_generic() + .parse_sql_statements(sql) + .expect_err("ORDER BY only accepts a single INTERPOLATE clause"); +} + +#[test] +fn parse_select_order_by_interpolate_not_last() { + let sql = "SELECT id, fname, lname FROM customer \ + ORDER BY \ + fname INTERPOLATE (col2 AS col2 + 2), + lname"; + clickhouse_and_generic() + .parse_sql_statements(sql) + .expect_err("ORDER BY INTERPOLATE must be in the last position"); +} + +#[test] +fn parse_with_fill() { + let sql = "SELECT fname FROM customer ORDER BY fname \ + WITH FILL FROM 10 TO 20 STEP 2"; + let select = clickhouse().verified_query(sql); + assert_eq!( + Some(WithFill { + from: Some(Expr::Value(number("10"))), + to: Some(Expr::Value(number("20"))), + step: Some(Expr::Value(number("2"))), + }), + select.order_by.expect("ORDER BY expected").exprs[0].with_fill + ); +} + +#[test] +fn parse_with_fill_missing_single_argument() { + let sql = "SELECT id, fname, lname FROM customer ORDER BY \ + fname WITH FILL FROM TO 20"; + clickhouse_and_generic() + .parse_sql_statements(sql) + .expect_err("WITH FILL requires expressions for all arguments"); +} + +#[test] +fn parse_with_fill_multiple_incomplete_arguments() { + let sql = "SELECT id, fname, lname FROM customer ORDER BY \ + fname WITH FILL FROM TO 20, lname WITH FILL FROM TO STEP 1"; + clickhouse_and_generic() + .parse_sql_statements(sql) + .expect_err("WITH FILL requires expressions for all arguments"); +} + +#[test] +fn parse_interpolate_body_with_columns() { + let sql = "SELECT fname FROM customer ORDER BY fname WITH FILL \ + INTERPOLATE (col1 AS col1 + 1, col2 AS col3, col4 AS col4 + 4)"; + let select = clickhouse().verified_query(sql); + assert_eq!( + Some(Interpolate { + exprs: Some(vec![ + InterpolateExpr { + column: Ident::new("col1"), + expr: Some(Expr::BinaryOp { + left: Box::new(Expr::Identifier(Ident::new("col1"))), + op: BinaryOperator::Plus, + right: Box::new(Expr::Value(number("1"))), + }), + }, + InterpolateExpr { + column: Ident::new("col2"), + expr: Some(Expr::Identifier(Ident::new("col3"))), + }, + InterpolateExpr { + column: Ident::new("col4"), + expr: Some(Expr::BinaryOp { + left: Box::new(Expr::Identifier(Ident::new("col4"))), + op: BinaryOperator::Plus, + right: Box::new(Expr::Value(number("4"))), + }), + }, + ]) + }), + select.order_by.expect("ORDER BY expected").interpolate + ); +} + +#[test] +fn parse_interpolate_without_body() { + let sql = "SELECT fname FROM customer ORDER BY fname WITH FILL INTERPOLATE"; + let select = clickhouse().verified_query(sql); + assert_eq!( + Some(Interpolate { exprs: None }), + select.order_by.expect("ORDER BY expected").interpolate + ); +} + +#[test] +fn parse_interpolate_with_empty_body() { + let sql = "SELECT fname FROM customer ORDER BY fname WITH FILL INTERPOLATE ()"; + let select = clickhouse().verified_query(sql); + assert_eq!( + Some(Interpolate { + exprs: Some(vec![]) + }), + select.order_by.expect("ORDER BY expected").interpolate + ); +} + #[test] fn test_prewhere() { match clickhouse_and_generic().verified_stmt("SELECT * FROM t PREWHERE x = 1 WHERE y = 2") { diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 1adda149eb..125e5f1f80 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -409,7 +409,7 @@ fn parse_update_set_from() { value_table_mode: None, connect_by: None, }))), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, @@ -2065,19 +2065,22 @@ fn parse_select_order_by() { expr: Expr::Identifier(Ident::new("lname")), asc: Some(true), nulls_first: None, + with_fill: None, }, OrderByExpr { expr: Expr::Identifier(Ident::new("fname")), asc: Some(false), nulls_first: None, + with_fill: None, }, OrderByExpr { expr: Expr::Identifier(Ident::new("id")), asc: None, nulls_first: None, + with_fill: None, }, ], - select.order_by + select.order_by.expect("ORDER BY expected").exprs ); } chk("SELECT id, fname, lname FROM customer WHERE id < 5 ORDER BY lname ASC, fname DESC, id"); @@ -2097,14 +2100,16 @@ fn parse_select_order_by_limit() { expr: Expr::Identifier(Ident::new("lname")), asc: Some(true), nulls_first: None, + with_fill: None, }, OrderByExpr { expr: Expr::Identifier(Ident::new("fname")), asc: Some(false), nulls_first: None, + with_fill: None, }, ], - select.order_by + select.order_by.expect("ORDER BY expected").exprs ); assert_eq!(Some(Expr::Value(number("2"))), select.limit); } @@ -2120,14 +2125,16 @@ fn parse_select_order_by_nulls_order() { expr: Expr::Identifier(Ident::new("lname")), asc: Some(true), nulls_first: Some(true), + with_fill: None, }, OrderByExpr { expr: Expr::Identifier(Ident::new("fname")), asc: Some(false), nulls_first: Some(false), + with_fill: None, }, ], - select.order_by + select.order_by.expect("ORDER BY expeccted").exprs ); assert_eq!(Some(Expr::Value(number("2"))), select.limit); } @@ -2219,6 +2226,7 @@ fn parse_select_qualify() { expr: Expr::Identifier(Ident::new("o")), asc: None, nulls_first: None, + with_fill: None, }], window_frame: None, })), @@ -2579,6 +2587,7 @@ fn parse_listagg() { }), asc: None, nulls_first: None, + with_fill: None, }, OrderByExpr { expr: Expr::Identifier(Ident { @@ -2587,6 +2596,7 @@ fn parse_listagg() { }), asc: None, nulls_first: None, + with_fill: None, }, ] }), @@ -3437,7 +3447,7 @@ fn parse_create_table_as_table() { table_name: Some("old_table".to_string()), schema_name: None, }))), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, @@ -3464,7 +3474,7 @@ fn parse_create_table_as_table() { table_name: Some("old_table".to_string()), schema_name: Some("schema_name".to_string()), }))), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, @@ -4384,6 +4394,7 @@ fn parse_window_functions() { expr: Expr::Identifier(Ident::new("dt")), asc: Some(false), nulls_first: None, + with_fill: None, }], window_frame: None, })), @@ -4593,6 +4604,7 @@ fn test_parse_named_window() { }), asc: None, nulls_first: None, + with_fill: None, }], window_frame: None, }), @@ -5014,7 +5026,7 @@ fn parse_interval_and_or_xor() { value_table_mode: None, connect_by: None, }))), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, @@ -7300,11 +7312,13 @@ fn parse_create_index() { expr: Expr::Identifier(Ident::new("name")), asc: None, nulls_first: None, + with_fill: None, }, OrderByExpr { expr: Expr::Identifier(Ident::new("age")), asc: Some(false), nulls_first: None, + with_fill: None, }, ]; match verified_stmt(sql) { @@ -7334,11 +7348,13 @@ fn test_create_index_with_using_function() { expr: Expr::Identifier(Ident::new("name")), asc: None, nulls_first: None, + with_fill: None, }, OrderByExpr { expr: Expr::Identifier(Ident::new("age")), asc: Some(false), nulls_first: None, + with_fill: None, }, ]; match verified_stmt(sql) { @@ -7691,7 +7707,7 @@ fn parse_merge() { value_table_mode: None, connect_by: None, }))), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, @@ -9223,7 +9239,7 @@ fn parse_unload() { fetch: None, locks: vec![], for_clause: None, - order_by: vec![], + order_by: None, settings: None, format_clause: None, }), @@ -9622,6 +9638,7 @@ fn test_match_recognize() { expr: Expr::Identifier(Ident::new("price_date")), asc: None, nulls_first: None, + with_fill: None, }], measures: vec![ Measure { diff --git a/tests/sqlparser_mssql.rs b/tests/sqlparser_mssql.rs index 26bece81d7..3e8b6afbfd 100644 --- a/tests/sqlparser_mssql.rs +++ b/tests/sqlparser_mssql.rs @@ -103,7 +103,7 @@ fn parse_create_procedure() { fetch: None, locks: vec![], for_clause: None, - order_by: vec![], + order_by: None, settings: None, format_clause: None, body: Box::new(SetExpr::Select(Box::new(Select { @@ -546,7 +546,7 @@ fn parse_substring_in_select() { value_table_mode: None, connect_by: None, }))), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index c2ce407a78..b0b29f3471 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -946,7 +946,7 @@ fn parse_escaped_quote_identifiers_with_escape() { value_table_mode: None, connect_by: None, }))), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, @@ -996,7 +996,7 @@ fn parse_escaped_quote_identifiers_with_no_escape() { value_table_mode: None, connect_by: None, }))), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, @@ -1043,7 +1043,7 @@ fn parse_escaped_backticks_with_escape() { value_table_mode: None, connect_by: None, }))), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, @@ -1090,7 +1090,7 @@ fn parse_escaped_backticks_with_no_escape() { value_table_mode: None, connect_by: None, }))), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, @@ -1296,7 +1296,7 @@ fn parse_simple_insert() { ] ] })), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, @@ -1340,7 +1340,7 @@ fn parse_ignore_insert() { Expr::Value(number("1")) ]] })), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, @@ -1384,7 +1384,7 @@ fn parse_priority_insert() { Expr::Value(number("1")) ]] })), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, @@ -1425,7 +1425,7 @@ fn parse_priority_insert() { Expr::Value(number("1")) ]] })), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, @@ -1474,7 +1474,7 @@ fn parse_insert_as() { "2024-01-01".to_string() ))]] })), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, @@ -1535,7 +1535,7 @@ fn parse_insert_as() { Expr::Value(Value::SingleQuotedString("2024-01-01".to_string())) ]] })), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, @@ -1580,7 +1580,7 @@ fn parse_replace_insert() { Expr::Value(number("1")) ]] })), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, @@ -1619,7 +1619,7 @@ fn parse_empty_row_insert() { explicit_row: false, rows: vec![vec![], vec![]] })), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, @@ -1681,7 +1681,7 @@ fn parse_insert_with_on_duplicate_update() { Expr::Value(Value::Boolean(true)), ]] })), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, @@ -1946,6 +1946,7 @@ fn parse_delete_with_order_by() { }), asc: Some(false), nulls_first: None, + with_fill: None, }], order_by ); @@ -2331,7 +2332,7 @@ fn parse_substring_in_select() { value_table_mode: None, connect_by: None, }))), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, @@ -2639,7 +2640,7 @@ fn parse_hex_string_introducer() { into: None, connect_by: None, }))), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index ed17e9d8f2..5ac421da02 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -1159,7 +1159,7 @@ fn parse_copy_to() { value_table_mode: None, connect_by: None, }))), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, @@ -2491,7 +2491,7 @@ fn parse_array_subquery_expr() { connect_by: None, }))), }), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, @@ -4162,7 +4162,7 @@ fn test_simple_postgres_insert_with_alias() { Expr::Value(Value::Number("123".to_string(), false)) ]] })), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, @@ -4231,7 +4231,7 @@ fn test_simple_postgres_insert_with_alias() { )) ]] })), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, @@ -4296,7 +4296,7 @@ fn test_simple_insert_with_quoted_alias() { Expr::Value(Value::SingleQuotedString("0123".to_string())) ]] })), - order_by: vec![], + order_by: None, limit: None, limit_by: vec![], offset: None, From 028ada8350d3b2ada4aa67f5e828b318565590f2 Mon Sep 17 00:00:00 2001 From: Ifeanyi Ubah Date: Sat, 20 Jul 2024 12:55:24 +0200 Subject: [PATCH 46/53] Support subquery expression in SET expressions (#1343) --- src/parser/mod.rs | 42 +++++++++++++++++++++++++-------------- tests/sqlparser_common.rs | 30 ++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+), 15 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index fb15275e9e..132e4f04ec 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1208,20 +1208,18 @@ impl<'a> Parser<'a> { Ok(Expr::Value(self.parse_value()?)) } Token::LParen => { - let expr = - if self.parse_keyword(Keyword::SELECT) || self.parse_keyword(Keyword::WITH) { - self.prev_token(); - Expr::Subquery(self.parse_boxed_query()?) - } else if let Some(lambda) = self.try_parse_lambda() { - return Ok(lambda); - } else { - let exprs = self.parse_comma_separated(Parser::parse_expr)?; - match exprs.len() { - 0 => unreachable!(), // parse_comma_separated ensures 1 or more - 1 => Expr::Nested(Box::new(exprs.into_iter().next().unwrap())), - _ => Expr::Tuple(exprs), - } - }; + let expr = if let Some(expr) = self.try_parse_expr_sub_query()? { + expr + } else if let Some(lambda) = self.try_parse_lambda() { + return Ok(lambda); + } else { + let exprs = self.parse_comma_separated(Parser::parse_expr)?; + match exprs.len() { + 0 => unreachable!(), // parse_comma_separated ensures 1 or more + 1 => Expr::Nested(Box::new(exprs.into_iter().next().unwrap())), + _ => Expr::Tuple(exprs), + } + }; self.expect_token(&Token::RParen)?; if !self.consume_token(&Token::Period) { Ok(expr) @@ -1263,6 +1261,18 @@ impl<'a> Parser<'a> { } } + fn try_parse_expr_sub_query(&mut self) -> Result, ParserError> { + if self + .parse_one_of_keywords(&[Keyword::SELECT, Keyword::WITH]) + .is_none() + { + return Ok(None); + } + self.prev_token(); + + Ok(Some(Expr::Subquery(self.parse_boxed_query()?))) + } + fn try_parse_lambda(&mut self) -> Option { if !self.dialect.supports_lambda_functions() { return None; @@ -8709,7 +8719,9 @@ impl<'a> Parser<'a> { let mut values = vec![]; loop { - let value = if let Ok(expr) = self.parse_expr() { + let value = if let Some(expr) = self.try_parse_expr_sub_query()? { + expr + } else if let Ok(expr) = self.parse_expr() { expr } else { self.expected("variable value", self.peek_token())? diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 125e5f1f80..b1afdf28b6 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -7135,9 +7135,39 @@ fn parse_set_variable() { _ => unreachable!(), } + // Subquery expression + for (sql, canonical) in [ + ( + "SET (a) = (SELECT 22 FROM tbl1)", + "SET (a) = ((SELECT 22 FROM tbl1))", + ), + ( + "SET (a) = (SELECT 22 FROM tbl1, (SELECT 1 FROM tbl2))", + "SET (a) = ((SELECT 22 FROM tbl1, (SELECT 1 FROM tbl2)))", + ), + ( + "SET (a) = ((SELECT 22 FROM tbl1, (SELECT 1 FROM tbl2)))", + "SET (a) = ((SELECT 22 FROM tbl1, (SELECT 1 FROM tbl2)))", + ), + ( + "SET (a, b) = ((SELECT 22 FROM tbl1, (SELECT 1 FROM tbl2)), SELECT 33 FROM tbl3)", + "SET (a, b) = ((SELECT 22 FROM tbl1, (SELECT 1 FROM tbl2)), (SELECT 33 FROM tbl3))", + ), + ] { + multi_variable_dialects.one_statement_parses_to(sql, canonical); + } + let error_sqls = [ ("SET (a, b, c) = (1, 2, 3", "Expected: ), found: EOF"), ("SET (a, b, c) = 1, 2, 3", "Expected: (, found: 1"), + ( + "SET (a) = ((SELECT 22 FROM tbl1)", + "Expected: ), found: EOF", + ), + ( + "SET (a) = ((SELECT 22 FROM tbl1) (SELECT 22 FROM tbl1))", + "Expected: ), found: (", + ), ]; for (sql, error) in error_sqls { assert_eq!( From 71dc96658655e25288acdb9dc1d5c9d0f245016a Mon Sep 17 00:00:00 2001 From: Alexander Beedie Date: Sun, 21 Jul 2024 14:02:12 +0400 Subject: [PATCH 47/53] Fix quoted identifier regression edge-case with "from" in SELECT (#1346) --- src/parser/mod.rs | 2 +- tests/sqlparser_common.rs | 11 ++++++++--- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 132e4f04ec..175b027656 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -10331,7 +10331,7 @@ impl<'a> Parser<'a> { Expr::Wildcard => Ok(SelectItem::Wildcard( self.parse_wildcard_additional_options()?, )), - Expr::Identifier(v) if v.value.to_lowercase() == "from" => { + Expr::Identifier(v) if v.value.to_lowercase() == "from" && v.quote_style.is_none() => { parser_err!( format!("Expected an expression, found: {}", v), self.peek_token().location diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index b1afdf28b6..dbadb4813e 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -9005,7 +9005,7 @@ fn parse_non_latin_identifiers() { #[test] fn parse_trailing_comma() { - // At the moment, Duck DB is the only dialect that allows + // At the moment, DuckDB is the only dialect that allows // trailing commas anywhere in the query let trailing_commas = TestedDialects { dialects: vec![Box::new(DuckDbDialect {})], @@ -9038,11 +9038,16 @@ fn parse_trailing_comma() { ); trailing_commas.verified_stmt("SELECT album_id, name FROM track"); - trailing_commas.verified_stmt("SELECT * FROM track ORDER BY milliseconds"); - trailing_commas.verified_stmt("SELECT DISTINCT ON (album_id) name FROM track"); + // check quoted "from" identifier edge-case + trailing_commas.one_statement_parses_to( + r#"SELECT "from", FROM "from""#, + r#"SELECT "from" FROM "from""#, + ); + trailing_commas.verified_stmt(r#"SELECT "from" FROM "from""#); + // doesn't allow any trailing commas let trailing_commas = TestedDialects { dialects: vec![Box::new(GenericDialect {})], From 48ea5640a221b91a93fad769f96cd2aa37932436 Mon Sep 17 00:00:00 2001 From: Jax Liu Date: Sun, 21 Jul 2024 20:18:50 +0800 Subject: [PATCH 48/53] Support Map literal syntax for DuckDB and Generic (#1344) --- src/ast/mod.rs | 42 ++++++++++++++ src/dialect/duckdb.rs | 7 +++ src/dialect/generic.rs | 4 ++ src/dialect/mod.rs | 5 ++ src/parser/mod.rs | 44 ++++++++++++++ tests/sqlparser_common.rs | 95 +++++++++++++++++++++++++++++++ tests/sqlparser_custom_dialect.rs | 22 +++++++ 7 files changed, 219 insertions(+) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 2a519fc7c1..cdc2e2049d 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -329,6 +329,37 @@ impl fmt::Display for DictionaryField { } } +/// Represents a Map expression. +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct Map { + pub entries: Vec, +} + +impl Display for Map { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "MAP {{{}}}", display_comma_separated(&self.entries)) + } +} + +/// A map field within a map. +/// +/// [duckdb]: https://duckdb.org/docs/sql/data_types/map.html#creating-maps +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct MapEntry { + pub key: Box, + pub value: Box, +} + +impl fmt::Display for MapEntry { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}: {}", self.key, self.value) + } +} + /// Options for `CAST` / `TRY_CAST` /// BigQuery: #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] @@ -764,6 +795,14 @@ pub enum Expr { /// ``` /// [1]: https://duckdb.org/docs/sql/data_types/struct#creating-structs Dictionary(Vec), + /// `DuckDB` specific `Map` literal expression [1] + /// + /// Syntax: + /// ```sql + /// syntax: Map {key1: value1[, ... ]} + /// ``` + /// [1]: https://duckdb.org/docs/sql/data_types/map#creating-maps + Map(Map), /// An access of nested data using subscript syntax, for example `array[2]`. Subscript { expr: Box, @@ -1331,6 +1370,9 @@ impl fmt::Display for Expr { Expr::Dictionary(fields) => { write!(f, "{{{}}}", display_comma_separated(fields)) } + Expr::Map(map) => { + write!(f, "{map}") + } Expr::Subscript { expr, subscript: key, diff --git a/src/dialect/duckdb.rs b/src/dialect/duckdb.rs index c6edeac141..1fc211685a 100644 --- a/src/dialect/duckdb.rs +++ b/src/dialect/duckdb.rs @@ -48,4 +48,11 @@ impl Dialect for DuckDbDialect { fn supports_dictionary_syntax(&self) -> bool { true } + + // DuckDB uses this syntax for `MAP`s. + // + // https://duckdb.org/docs/sql/data_types/map.html#creating-maps + fn support_map_literal_syntax(&self) -> bool { + true + } } diff --git a/src/dialect/generic.rs b/src/dialect/generic.rs index 33391d4797..8d762d7804 100644 --- a/src/dialect/generic.rs +++ b/src/dialect/generic.rs @@ -70,4 +70,8 @@ impl Dialect for GenericDialect { fn supports_select_wildcard_except(&self) -> bool { true } + + fn support_map_literal_syntax(&self) -> bool { + true + } } diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index b223ead479..3ff7bb2a5f 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -215,6 +215,11 @@ pub trait Dialect: Debug + Any { fn supports_dictionary_syntax(&self) -> bool { false } + /// Returns true if the dialect supports defining object using the + /// syntax like `Map {1: 10, 2: 20}`. + fn support_map_literal_syntax(&self) -> bool { + false + } /// Returns true if the dialect supports lambda functions, for example: /// /// ```sql diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 175b027656..878cabfcc8 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1078,6 +1078,9 @@ impl<'a> Parser<'a> { let expr = self.parse_subexpr(Self::PLUS_MINUS_PREC)?; Ok(Expr::Prior(Box::new(expr))) } + Keyword::MAP if self.peek_token() == Token::LBrace && self.dialect.support_map_literal_syntax() => { + self.parse_duckdb_map_literal() + } // Here `w` is a word, check if it's a part of a multipart // identifier, a function call, or a simple identifier: _ => match self.peek_token().token { @@ -2322,6 +2325,47 @@ impl<'a> Parser<'a> { }) } + /// DuckDB specific: Parse a duckdb [map] + /// + /// Syntax: + /// + /// ```sql + /// Map {key1: value1[, ... ]} + /// ``` + /// + /// [map]: https://duckdb.org/docs/sql/data_types/map.html#creating-maps + fn parse_duckdb_map_literal(&mut self) -> Result { + self.expect_token(&Token::LBrace)?; + + let fields = self.parse_comma_separated(Self::parse_duckdb_map_field)?; + + self.expect_token(&Token::RBrace)?; + + Ok(Expr::Map(Map { entries: fields })) + } + + /// Parse a field for a duckdb [map] + /// + /// Syntax + /// + /// ```sql + /// key: value + /// ``` + /// + /// [map]: https://duckdb.org/docs/sql/data_types/map.html#creating-maps + fn parse_duckdb_map_field(&mut self) -> Result { + let key = self.parse_expr()?; + + self.expect_token(&Token::Colon)?; + + let value = self.parse_expr()?; + + Ok(MapEntry { + key: Box::new(key), + value: Box::new(value), + }) + } + /// Parse clickhouse [map] /// /// Syntax diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index dbadb4813e..ac5098f58a 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -10077,6 +10077,101 @@ fn test_dictionary_syntax() { ) } +#[test] +fn test_map_syntax() { + fn check(sql: &str, expect: Expr) { + assert_eq!( + all_dialects_where(|d| d.support_map_literal_syntax()).verified_expr(sql), + expect + ); + } + + check( + "MAP {'Alberta': 'Edmonton', 'Manitoba': 'Winnipeg'}", + Expr::Map(Map { + entries: vec![ + MapEntry { + key: Box::new(Expr::Value(Value::SingleQuotedString("Alberta".to_owned()))), + value: Box::new(Expr::Value(Value::SingleQuotedString( + "Edmonton".to_owned(), + ))), + }, + MapEntry { + key: Box::new(Expr::Value(Value::SingleQuotedString( + "Manitoba".to_owned(), + ))), + value: Box::new(Expr::Value(Value::SingleQuotedString( + "Winnipeg".to_owned(), + ))), + }, + ], + }), + ); + + fn number_expr(s: &str) -> Expr { + Expr::Value(number(s)) + } + + check( + "MAP {1: 10.0, 2: 20.0}", + Expr::Map(Map { + entries: vec![ + MapEntry { + key: Box::new(number_expr("1")), + value: Box::new(number_expr("10.0")), + }, + MapEntry { + key: Box::new(number_expr("2")), + value: Box::new(number_expr("20.0")), + }, + ], + }), + ); + + check( + "MAP {[1, 2, 3]: 10.0, [4, 5, 6]: 20.0}", + Expr::Map(Map { + entries: vec![ + MapEntry { + key: Box::new(Expr::Array(Array { + elem: vec![number_expr("1"), number_expr("2"), number_expr("3")], + named: false, + })), + value: Box::new(Expr::Value(number("10.0"))), + }, + MapEntry { + key: Box::new(Expr::Array(Array { + elem: vec![number_expr("4"), number_expr("5"), number_expr("6")], + named: false, + })), + value: Box::new(Expr::Value(number("20.0"))), + }, + ], + }), + ); + + check( + "MAP {'a': 10, 'b': 20}['a']", + Expr::Subscript { + expr: Box::new(Expr::Map(Map { + entries: vec![ + MapEntry { + key: Box::new(Expr::Value(Value::SingleQuotedString("a".to_owned()))), + value: Box::new(number_expr("10")), + }, + MapEntry { + key: Box::new(Expr::Value(Value::SingleQuotedString("b".to_owned()))), + value: Box::new(number_expr("20")), + }, + ], + })), + subscript: Box::new(Subscript::Index { + index: Expr::Value(Value::SingleQuotedString("a".to_owned())), + }), + }, + ); +} + #[test] fn parse_within_group() { verified_expr("PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY sales_amount)"); diff --git a/tests/sqlparser_custom_dialect.rs b/tests/sqlparser_custom_dialect.rs index 5165913821..5b29047a45 100644 --- a/tests/sqlparser_custom_dialect.rs +++ b/tests/sqlparser_custom_dialect.rs @@ -125,6 +125,28 @@ fn custom_statement_parser() -> Result<(), ParserError> { Ok(()) } +#[test] +fn test_map_syntax_not_support_default() -> Result<(), ParserError> { + #[derive(Debug)] + struct MyDialect {} + + impl Dialect for MyDialect { + fn is_identifier_start(&self, ch: char) -> bool { + is_identifier_start(ch) + } + + fn is_identifier_part(&self, ch: char) -> bool { + is_identifier_part(ch) + } + } + + let dialect = MyDialect {}; + let sql = "SELECT MAP {1: 2}"; + let ast = Parser::parse_sql(&dialect, sql); + assert!(ast.is_err()); + Ok(()) +} + fn is_identifier_start(ch: char) -> bool { ch.is_ascii_lowercase() || ch.is_ascii_uppercase() || ch == '_' } From b27abf00e2e67b28b25afc9da7c2ddd2a104c449 Mon Sep 17 00:00:00 2001 From: hulk Date: Tue, 23 Jul 2024 03:50:24 +0800 Subject: [PATCH 49/53] Allow to use `()` as the GROUP BY nothing (#1347) --- src/parser/mod.rs | 5 +++++ tests/sqlparser_common.rs | 28 ++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 878cabfcc8..11fa9e4a97 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1487,6 +1487,11 @@ impl<'a> Parser<'a> { let result = self.parse_comma_separated(|p| p.parse_tuple(true, true))?; self.expect_token(&Token::RParen)?; Ok(Expr::Rollup(result)) + } else if self.consume_tokens(&[Token::LParen, Token::RParen]) { + // PostgreSQL allow to use empty tuple as a group by expression, + // e.g. `GROUP BY (), name`. Please refer to GROUP BY Clause section in + // [PostgreSQL](https://www.postgresql.org/docs/16/sql-select.html) + Ok(Expr::Tuple(vec![])) } else { self.parse_expr() } diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index ac5098f58a..dd3ed05155 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -42,6 +42,7 @@ mod test_utils; #[cfg(test)] use pretty_assertions::assert_eq; +use sqlparser::ast::Expr::Identifier; use sqlparser::test_utils::all_dialects_except; #[test] @@ -10278,3 +10279,30 @@ fn parse_auto_increment_too_large() { assert!(res.is_err(), "{res:?}"); } + +#[test] +fn test_group_by_nothing() { + let Select { group_by, .. } = all_dialects_where(|d| d.supports_group_by_expr()) + .verified_only_select("SELECT count(1) FROM t GROUP BY ()"); + { + std::assert_eq!( + GroupByExpr::Expressions(vec![Expr::Tuple(vec![])], vec![]), + group_by + ); + } + + let Select { group_by, .. } = all_dialects_where(|d| d.supports_group_by_expr()) + .verified_only_select("SELECT name, count(1) FROM t GROUP BY name, ()"); + { + std::assert_eq!( + GroupByExpr::Expressions( + vec![ + Identifier(Ident::new("name".to_string())), + Expr::Tuple(vec![]) + ], + vec![] + ), + group_by + ); + } +} From 390d4d3554580f618c6d8edd177b875b849f326f Mon Sep 17 00:00:00 2001 From: hulk Date: Wed, 24 Jul 2024 00:41:07 +0800 Subject: [PATCH 50/53] Add support of MATERIALIZED/ALIAS/EPHERMERAL default column options for ClickHouse (#1348) --- src/ast/ddl.rs | 21 ++++++++ src/keywords.rs | 2 + src/parser/mod.rs | 18 +++++++ tests/sqlparser_clickhouse.rs | 96 +++++++++++++++++++++++++++++++++++ 4 files changed, 137 insertions(+) diff --git a/src/ast/ddl.rs b/src/ast/ddl.rs index 1ed3857d78..5cc671cf5e 100644 --- a/src/ast/ddl.rs +++ b/src/ast/ddl.rs @@ -923,6 +923,18 @@ pub enum ColumnOption { NotNull, /// `DEFAULT ` Default(Expr), + + /// ClickHouse supports `MATERIALIZE`, `EPHEMERAL` and `ALIAS` expr to generate default values. + /// Syntax: `b INT MATERIALIZE (a + 1)` + /// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/create/table#default_values) + + /// `MATERIALIZE ` + Materialized(Expr), + /// `EPHEMERAL []` + Ephemeral(Option), + /// `ALIAS ` + Alias(Expr), + /// `{ PRIMARY KEY | UNIQUE } []` Unique { is_primary: bool, @@ -978,6 +990,15 @@ impl fmt::Display for ColumnOption { Null => write!(f, "NULL"), NotNull => write!(f, "NOT NULL"), Default(expr) => write!(f, "DEFAULT {expr}"), + Materialized(expr) => write!(f, "MATERIALIZED {expr}"), + Ephemeral(expr) => { + if let Some(e) = expr { + write!(f, "EPHEMERAL {e}") + } else { + write!(f, "EPHEMERAL") + } + } + Alias(expr) => write!(f, "ALIAS {expr}"), Unique { is_primary, characteristics, diff --git a/src/keywords.rs b/src/keywords.rs index 2b6900fba3..e59e493396 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -77,6 +77,7 @@ define_keywords!( AFTER, AGAINST, AGGREGATION, + ALIAS, ALL, ALLOCATE, ALTER, @@ -267,6 +268,7 @@ define_keywords!( ENFORCED, ENGINE, ENUM, + EPHEMERAL, EPOCH, EQUALS, ERROR, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 11fa9e4a97..f8267a7cb7 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -5748,6 +5748,24 @@ impl<'a> Parser<'a> { Ok(Some(ColumnOption::Null)) } else if self.parse_keyword(Keyword::DEFAULT) { Ok(Some(ColumnOption::Default(self.parse_expr()?))) + } else if dialect_of!(self is ClickHouseDialect| GenericDialect) + && self.parse_keyword(Keyword::MATERIALIZED) + { + Ok(Some(ColumnOption::Materialized(self.parse_expr()?))) + } else if dialect_of!(self is ClickHouseDialect| GenericDialect) + && self.parse_keyword(Keyword::ALIAS) + { + Ok(Some(ColumnOption::Alias(self.parse_expr()?))) + } else if dialect_of!(self is ClickHouseDialect| GenericDialect) + && self.parse_keyword(Keyword::EPHEMERAL) + { + // The expression is optional for the EPHEMERAL syntax, so we need to check + // if the column definition has remaining tokens before parsing the expression. + if matches!(self.peek_token().token, Token::Comma | Token::RParen) { + Ok(Some(ColumnOption::Ephemeral(None))) + } else { + Ok(Some(ColumnOption::Ephemeral(Some(self.parse_expr()?)))) + } } else if self.parse_keywords(&[Keyword::PRIMARY, Keyword::KEY]) { let characteristics = self.parse_constraint_characteristics()?; Ok(Some(ColumnOption::Unique { diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index 10d7d66ffc..6fdadc3661 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -493,6 +493,102 @@ fn parse_create_table_with_primary_key() { .expect_err("ORDER BY supports one expression with tuple"); } +#[test] +fn parse_create_table_with_variant_default_expressions() { + let sql = concat!( + "CREATE TABLE table (", + "a DATETIME MATERIALIZED now(),", + " b DATETIME EPHEMERAL now(),", + " c DATETIME EPHEMERAL,", + " d STRING ALIAS toString(c)", + ") ENGINE=MergeTree" + ); + match clickhouse_and_generic().verified_stmt(sql) { + Statement::CreateTable(CreateTable { columns, .. }) => { + assert_eq!( + columns, + vec![ + ColumnDef { + name: Ident::new("a"), + data_type: DataType::Datetime(None), + collation: None, + options: vec![ColumnOptionDef { + name: None, + option: ColumnOption::Materialized(Expr::Function(Function { + name: ObjectName(vec![Ident::new("now")]), + args: FunctionArguments::List(FunctionArgumentList { + args: vec![], + duplicate_treatment: None, + clauses: vec![], + }), + parameters: FunctionArguments::None, + null_treatment: None, + filter: None, + over: None, + within_group: vec![], + })) + }], + }, + ColumnDef { + name: Ident::new("b"), + data_type: DataType::Datetime(None), + collation: None, + options: vec![ColumnOptionDef { + name: None, + option: ColumnOption::Ephemeral(Some(Expr::Function(Function { + name: ObjectName(vec![Ident::new("now")]), + args: FunctionArguments::List(FunctionArgumentList { + args: vec![], + duplicate_treatment: None, + clauses: vec![], + }), + parameters: FunctionArguments::None, + null_treatment: None, + filter: None, + over: None, + within_group: vec![], + }))) + }], + }, + ColumnDef { + name: Ident::new("c"), + data_type: DataType::Datetime(None), + collation: None, + options: vec![ColumnOptionDef { + name: None, + option: ColumnOption::Ephemeral(None) + }], + }, + ColumnDef { + name: Ident::new("d"), + data_type: DataType::String(None), + collation: None, + options: vec![ColumnOptionDef { + name: None, + option: ColumnOption::Alias(Expr::Function(Function { + name: ObjectName(vec![Ident::new("toString")]), + args: FunctionArguments::List(FunctionArgumentList { + args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr( + Identifier(Ident::new("c")) + ))], + duplicate_treatment: None, + clauses: vec![], + }), + parameters: FunctionArguments::None, + null_treatment: None, + filter: None, + over: None, + within_group: vec![], + })) + }], + } + ] + ) + } + _ => unreachable!(), + } +} + #[test] fn parse_create_view_with_fields_data_types() { match clickhouse().verified_stmt(r#"CREATE VIEW v (i "int", f "String") AS SELECT * FROM t"#) { From 1e82a145adcc090b2768814f19f23fd4d80267a5 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 23 Jul 2024 12:56:55 -0400 Subject: [PATCH 51/53] Add CHANGELOG for 0.49.0 (#1350) --- CHANGELOG.md | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index ed5c9ecb41..cf2d1321bd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,27 @@ changes that break via addition as "Added". ## [Unreleased] Check https://github.com/sqlparser-rs/sqlparser-rs/commits/main for undocumented changes. +## [0.49.0] 2024-07-23 +As always, huge props to @iffyio @jmhain and @lovasoa for their help reviewing and merging PRs! + +We are in the process of moving sqlparser to governed as part of the Apache +DataFusion project: https://github.com/sqlparser-rs/sqlparser-rs/issues/1294 + +### Fixed +* Fix quoted identifier regression edge-case with "from" in SELECT (#1346) - Thanks @alexander-beedie +* Fix `AS` query clause should be after the create table options (#1339) - Thanks @git-hulk + +### Added + +* Support `MATERIALIZED`/`ALIAS`/`EPHERMERAL` default column options for ClickHouse (#1348) - Thanks @git-hulk +* Support `()` as the `GROUP BY` nothing (#1347) - Thanks @git-hulk +* Support Map literal syntax for DuckDB and Generic (#1344) - Thanks @goldmedal +* Support subquery expression in `SET` expressions (#1343) - Thanks @iffyio +* Support `WITH FILL` for ClickHouse (#1330) - Thanks @nickpresta +* Support `PARTITION BY` for PostgreSQL in `CREATE TABLE` statement (#1338) - Thanks @git-hulk +* Support of table function `WITH ORDINALITY` modifier for Postgres (#1337) - Thanks @git-hulk + + ## [0.48.0] 2024-07-09 Huge shout out to @iffyio @jmhain and @lovasoa for their help reviewing and merging PRs! From 6c64d43e1bbf4ebc78754c63560894f0d867bdac Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 23 Jul 2024 13:11:16 -0400 Subject: [PATCH 52/53] chore: Release sqlparser version 0.49.0 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index b0bee003e3..4c510a8c6f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "sqlparser" description = "Extensible SQL Lexer and Parser with support for ANSI SQL:2011" -version = "0.48.0" +version = "0.49.0" authors = ["Andy Grove "] homepage = "https://github.com/sqlparser-rs/sqlparser-rs" documentation = "https://docs.rs/sqlparser/" From b93b5f2394c191bf5cffb1a90f04953cc7036ad7 Mon Sep 17 00:00:00 2001 From: Jesse Bakker Date: Mon, 29 Jul 2024 09:34:58 +0200 Subject: [PATCH 53/53] Parse SETTINGS clause for ClickHouse table-valued functions --- src/ast/mod.rs | 4 +- src/ast/query.rs | 17 ++++++- src/parser/mod.rs | 94 ++++++++++++++++++++++++----------- tests/sqlparser_clickhouse.rs | 32 ++++++++++++ 4 files changed, 113 insertions(+), 34 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index cdc2e2049d..6d40995fdf 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -50,8 +50,8 @@ pub use self::query::{ OffsetRows, OrderBy, OrderByExpr, PivotValueSource, Query, RenameSelectItem, RepetitionQuantifier, ReplaceSelectElement, ReplaceSelectItem, RowsPerMatch, Select, SelectInto, SelectItem, SetExpr, SetOperator, SetQuantifier, Setting, SymbolDefinition, Table, - TableAlias, TableFactor, TableVersion, TableWithJoins, Top, TopQuantity, ValueTableMode, - Values, WildcardAdditionalOptions, With, WithFill, + TableAlias, TableFactor, TableFunctionArgs, TableVersion, TableWithJoins, Top, TopQuantity, + ValueTableMode, Values, WildcardAdditionalOptions, With, WithFill, }; pub use self::value::{ escape_double_quote_string, escape_quoted_string, DateTimeField, DollarQuotedString, diff --git a/src/ast/query.rs b/src/ast/query.rs index 9786042667..83d14ecb91 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -899,6 +899,14 @@ impl fmt::Display for ExprWithAlias { } } +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct TableFunctionArgs { + pub args: Vec, + pub settings: Option>, +} + /// A table name or a parenthesized subquery with an optional alias #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] @@ -916,7 +924,7 @@ pub enum TableFactor { /// This field's value is `Some(v)`, where `v` is a (possibly empty) /// vector of arguments, in the case of a table-valued function call, /// whereas it's `None` in the case of a regular table name. - args: Option>, + args: Option, /// MSSQL-specific `WITH (...)` hints such as NOLOCK. with_hints: Vec, /// Optional version qualifier to facilitate table time-travel, as @@ -1314,7 +1322,12 @@ impl fmt::Display for TableFactor { write!(f, "PARTITION ({})", display_comma_separated(partitions))?; } if let Some(args) = args { - write!(f, "({})", display_comma_separated(args))?; + write!(f, "(")?; + write!(f, "{}", display_comma_separated(&args.args))?; + if let Some(ref settings) = args.settings { + write!(f, ", SETTINGS {}", display_comma_separated(&settings))?; + } + write!(f, ")")?; } if *with_ordinality { write!(f, " WITH ORDINALITY")?; diff --git a/src/parser/mod.rs b/src/parser/mod.rs index f8267a7cb7..e927b5a576 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -3419,6 +3419,25 @@ impl<'a> Parser<'a> { Ok(values) } + fn parse_comma_separated_end(&mut self) -> Option { + if !self.consume_token(&Token::Comma) { + Some(Token::Comma) + } else if self.options.trailing_commas { + let token = self.peek_token().token; + match token { + Token::Word(ref kw) if keywords::RESERVED_FOR_COLUMN_ALIAS.contains(&kw.keyword) => { + Some(token) + } + Token::RParen | Token::SemiColon | Token::EOF | Token::RBracket | Token::RBrace => { + Some(token) + } + _ => None, + } + } else { + None + } + } + /// Parse a comma-separated list of 1+ items accepted by `F` pub fn parse_comma_separated(&mut self, mut f: F) -> Result, ParserError> where @@ -3427,22 +3446,8 @@ impl<'a> Parser<'a> { let mut values = vec![]; loop { values.push(f(self)?); - if !self.consume_token(&Token::Comma) { + if self.parse_comma_separated_end().is_some() { break; - } else if self.options.trailing_commas { - match self.peek_token().token { - Token::Word(kw) - if keywords::RESERVED_FOR_COLUMN_ALIAS.contains(&kw.keyword) => - { - break; - } - Token::RParen - | Token::SemiColon - | Token::EOF - | Token::RBracket - | Token::RBrace => break, - _ => continue, - } } } Ok(values) @@ -8087,19 +8092,7 @@ impl<'a> Parser<'a> { vec![] }; - let settings = if dialect_of!(self is ClickHouseDialect|GenericDialect) - && self.parse_keyword(Keyword::SETTINGS) - { - let key_values = self.parse_comma_separated(|p| { - let key = p.parse_identifier(false)?; - p.expect_token(&Token::Eq)?; - let value = p.parse_value()?; - Ok(Setting { key, value }) - })?; - Some(key_values) - } else { - None - }; + let settings = self.parse_settings()?; let fetch = if self.parse_keyword(Keyword::FETCH) { Some(self.parse_fetch()?) @@ -8146,6 +8139,23 @@ impl<'a> Parser<'a> { } } + fn parse_settings(&mut self) -> Result>, ParserError> { + let settings = if dialect_of!(self is ClickHouseDialect|GenericDialect) + && self.parse_keyword(Keyword::SETTINGS) + { + let key_values = self.parse_comma_separated(|p| { + let key = p.parse_identifier(false)?; + p.expect_token(&Token::Eq)?; + let value = p.parse_value()?; + Ok(Setting { key, value }) + })?; + Some(key_values) + } else { + None + }; + Ok(settings) + } + /// Parse a mssql `FOR [XML | JSON | BROWSE]` clause pub fn parse_for_clause(&mut self) -> Result, ParserError> { if self.parse_keyword(Keyword::XML) { @@ -9360,9 +9370,9 @@ impl<'a> Parser<'a> { // Parse potential version qualifier let version = self.parse_table_version()?; - // Postgres, MSSQL: table-valued functions: + // Postgres, MSSQL, ClickHouse: table-valued functions: let args = if self.consume_token(&Token::LParen) { - Some(self.parse_optional_args()?) + Some(self.parse_table_function_args()?) } else { None }; @@ -10305,6 +10315,30 @@ impl<'a> Parser<'a> { } } + fn parse_table_function_args(&mut self) -> Result { + { + let settings = self.parse_settings()?; + if self.consume_token(&Token::RParen) { + return Ok(TableFunctionArgs { + args: vec![], + settings, + }); + } + } + let mut args = vec![]; + let settings = loop { + if let Some(settings) = self.parse_settings()? { + break Some(settings); + } + args.push(self.parse_function_args()?); + if self.parse_comma_separated_end().is_some() { + break None; + } + }; + self.expect_token(&Token::RParen)?; + Ok(TableFunctionArgs { args, settings }) + } + /// Parses a potentially empty list of arguments to a window function /// (including the closing parenthesis). /// diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index 6fdadc3661..8344ec83d7 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -1091,6 +1091,38 @@ fn parse_create_table_on_commit_and_as_query() { } } +#[test] +fn parse_select_table_function_settings() { + let sql = r#"SELECT * FROM table_function(arg, SETTINGS setting = 3)"#; + match clickhouse_and_generic().verified_stmt(sql) { + Statement::Query(q) => { + let from = &q.body.as_select().unwrap().from; + assert_eq!(from.len(), 1); + assert_eq!(from[0].joins, vec![]); + match &from[0].relation { + Table { args, .. } => { + let args = args.as_ref().unwrap(); + assert_eq!( + args.args, + vec![FunctionArg::Unnamed(FunctionArgExpr::Expr( + Expr::Identifier("arg".into()) + ))] + ); + assert_eq!( + args.settings, + Some(vec![Setting { + key: "setting".into(), + value: Value::Number("3".into(), false) + }]) + ) + } + _ => unreachable!(), + } + } + _ => unreachable!(), + } +} + fn clickhouse() -> TestedDialects { TestedDialects { dialects: vec![Box::new(ClickHouseDialect {})],