diff --git a/src/ast/ddl.rs b/src/ast/ddl.rs index 5cc671cf5..af679d469 100644 --- a/src/ast/ddl.rs +++ b/src/ast/ddl.rs @@ -1296,20 +1296,45 @@ impl fmt::Display for UserDefinedTypeCompositeAttributeDef { } } -/// PARTITION statement used in ALTER TABLE et al. such as in Hive SQL +/// PARTITION statement used in ALTER TABLE et al. such as in Hive and ClickHouse SQL. +/// For example, ClickHouse's OPTIMIZE TABLE supports syntax like PARTITION ID 'partition_id' and PARTITION expr. +/// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/optimize) #[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] -pub struct Partition { - pub partitions: Vec, +pub enum Partition { + Identifier(Ident), + Expr(Expr), + Partitions(Vec), } impl fmt::Display for Partition { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!( - f, - "PARTITION ({})", - display_comma_separated(&self.partitions) - ) + match self { + Partition::Identifier(id) => write!(f, "PARTITION ID {id}"), + Partition::Expr(expr) => write!(f, "PARTITION {expr}"), + Partition::Partitions(partitions) => { + write!(f, "PARTITION ({})", display_comma_separated(partitions)) + } + } + } +} + +/// DEDUPLICATE statement used in OPTIMIZE TABLE et al. such as in ClickHouse SQL +/// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/optimize) +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum Deduplicate { + All, + ByExpression(Expr), +} + +impl fmt::Display for Deduplicate { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Deduplicate::All => write!(f, "DEDUPLICATE"), + Deduplicate::ByExpression(expr) => write!(f, "DEDUPLICATE BY {expr}"), + } } } diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 70f96c5c5..6444556ef 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -33,7 +33,7 @@ pub use self::data_type::{ pub use self::dcl::{AlterRoleOperation, ResetConfig, RoleOption, SetConfigValue}; pub use self::ddl::{ AlterColumnOperation, AlterIndexOperation, AlterTableOperation, ColumnDef, ColumnOption, - ColumnOptionDef, ConstraintCharacteristics, DeferrableInitial, GeneratedAs, + ColumnOptionDef, ConstraintCharacteristics, Deduplicate, DeferrableInitial, GeneratedAs, GeneratedExpressionMode, IndexOption, IndexType, KeyOrIndexDisplay, Owner, Partition, ProcedureParam, ReferentialAction, TableConstraint, UserDefinedTypeCompositeAttributeDef, UserDefinedTypeRepresentation, ViewColumnDef, @@ -2831,6 +2831,18 @@ pub enum Statement { to: Ident, with: Vec, }, + /// ```sql + /// OPTIMIZE TABLE [db.]name [ON CLUSTER cluster] [PARTITION partition | PARTITION ID 'partition_id'] [FINAL] [DEDUPLICATE [BY expression]] + /// ``` + /// + /// See ClickHouse + OptimizeTable { + name: ObjectName, + on_cluster: Option, + partition: Option, + include_final: bool, + deduplicate: Option, + }, } impl fmt::Display for Statement { @@ -4283,6 +4295,28 @@ impl fmt::Display for Statement { Ok(()) } + Statement::OptimizeTable { + name, + on_cluster, + partition, + include_final, + deduplicate, + } => { + write!(f, "OPTIMIZE TABLE {name}")?; + if let Some(on_cluster) = on_cluster { + write!(f, " ON CLUSTER {on_cluster}", on_cluster = on_cluster)?; + } + if let Some(partition) = partition { + write!(f, " {partition}", partition = partition)?; + } + if *include_final { + write!(f, " FINAL")?; + } + if let Some(deduplicate) = deduplicate { + write!(f, " {deduplicate}")?; + } + Ok(()) + } } } } diff --git a/src/keywords.rs b/src/keywords.rs index ee2bd6173..49bd969af 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -218,6 +218,7 @@ define_keywords!( DECADE, DECIMAL, DECLARE, + DEDUPLICATE, DEFAULT, DEFAULT_DDL_COLLATION, DEFERRABLE, @@ -301,6 +302,7 @@ define_keywords!( FILE_FORMAT, FILL, FILTER, + FINAL, FIRST, FIRST_VALUE, FIXEDSTRING, @@ -354,6 +356,7 @@ define_keywords!( HOSTS, HOUR, HOURS, + ID, IDENTITY, IF, IGNORE, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 725e24bfb..67d58ea75 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -551,6 +551,10 @@ impl<'a> Parser<'a> { Keyword::LOAD if dialect_of!(self is DuckDbDialect | GenericDialect) => { Ok(self.parse_load()?) } + // `OPTIMIZE` is clickhouse specific https://clickhouse.tech/docs/en/sql-reference/statements/optimize/ + Keyword::OPTIMIZE if dialect_of!(self is ClickHouseDialect | GenericDialect) => { + Ok(self.parse_optimize_table()?) + } _ => self.expected("an SQL statement", next_token), }, Token::LParen => { @@ -6270,7 +6274,7 @@ impl<'a> Parser<'a> { self.expect_token(&Token::LParen)?; let partitions = self.parse_comma_separated(Parser::parse_expr)?; self.expect_token(&Token::RParen)?; - Ok(Partition { partitions }) + Ok(Partition::Partitions(partitions)) } pub fn parse_alter_table_operation(&mut self) -> Result { @@ -11165,6 +11169,45 @@ impl<'a> Parser<'a> { Ok(Statement::Load { extension_name }) } + /// ```sql + /// OPTIMIZE TABLE [db.]name [ON CLUSTER cluster] [PARTITION partition | PARTITION ID 'partition_id'] [FINAL] [DEDUPLICATE [BY expression]] + /// ``` + /// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/optimize) + pub fn parse_optimize_table(&mut self) -> Result { + self.expect_keyword(Keyword::TABLE)?; + let name = self.parse_object_name(false)?; + let on_cluster = self.parse_optional_on_cluster()?; + + let partition = if self.parse_keyword(Keyword::PARTITION) { + if self.parse_keyword(Keyword::ID) { + Some(Partition::Identifier(self.parse_identifier(false)?)) + } else { + Some(Partition::Expr(self.parse_expr()?)) + } + } else { + None + }; + + let include_final = self.parse_keyword(Keyword::FINAL); + let deduplicate = if self.parse_keyword(Keyword::DEDUPLICATE) { + if self.parse_keyword(Keyword::BY) { + Some(Deduplicate::ByExpression(self.parse_expr()?)) + } else { + Some(Deduplicate::All) + } + } else { + None + }; + + Ok(Statement::OptimizeTable { + name, + on_cluster, + partition, + include_final, + deduplicate, + }) + } + /// ```sql /// CREATE [ { TEMPORARY | TEMP } ] SEQUENCE [ IF NOT EXISTS ] /// ``` diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index 6fdadc366..5263be29e 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -25,6 +25,7 @@ use sqlparser::ast::Value::Number; use sqlparser::ast::*; use sqlparser::dialect::ClickHouseDialect; use sqlparser::dialect::GenericDialect; +use sqlparser::parser::ParserError::ParserError; #[test] fn parse_map_access_expr() { @@ -221,6 +222,65 @@ fn parse_create_table() { ); } +#[test] +fn parse_optimize_table() { + clickhouse_and_generic().verified_stmt("OPTIMIZE TABLE t0"); + clickhouse_and_generic().verified_stmt("OPTIMIZE TABLE db.t0"); + clickhouse_and_generic().verified_stmt("OPTIMIZE TABLE t0 ON CLUSTER 'cluster'"); + clickhouse_and_generic().verified_stmt("OPTIMIZE TABLE t0 ON CLUSTER 'cluster' FINAL"); + clickhouse_and_generic().verified_stmt("OPTIMIZE TABLE t0 FINAL DEDUPLICATE"); + clickhouse_and_generic().verified_stmt("OPTIMIZE TABLE t0 DEDUPLICATE"); + clickhouse_and_generic().verified_stmt("OPTIMIZE TABLE t0 DEDUPLICATE BY id"); + clickhouse_and_generic().verified_stmt("OPTIMIZE TABLE t0 FINAL DEDUPLICATE BY id"); + clickhouse_and_generic() + .verified_stmt("OPTIMIZE TABLE t0 PARTITION tuple('2023-04-22') DEDUPLICATE BY id"); + match clickhouse_and_generic().verified_stmt( + "OPTIMIZE TABLE t0 ON CLUSTER cluster PARTITION ID '2024-07' FINAL DEDUPLICATE BY id", + ) { + Statement::OptimizeTable { + name, + on_cluster, + partition, + include_final, + deduplicate, + .. + } => { + assert_eq!(name.to_string(), "t0"); + assert_eq!(on_cluster, Some(Ident::new("cluster"))); + assert_eq!( + partition, + Some(Partition::Identifier(Ident::with_quote('\'', "2024-07"))) + ); + assert!(include_final); + assert_eq!( + deduplicate, + Some(Deduplicate::ByExpression(Identifier(Ident::new("id")))) + ); + } + _ => unreachable!(), + } + + // negative cases + assert_eq!( + clickhouse_and_generic() + .parse_sql_statements("OPTIMIZE TABLE t0 DEDUPLICATE BY") + .unwrap_err(), + ParserError("Expected: an expression:, found: EOF".to_string()) + ); + assert_eq!( + clickhouse_and_generic() + .parse_sql_statements("OPTIMIZE TABLE t0 PARTITION") + .unwrap_err(), + ParserError("Expected: an expression:, found: EOF".to_string()) + ); + assert_eq!( + clickhouse_and_generic() + .parse_sql_statements("OPTIMIZE TABLE t0 PARTITION ID") + .unwrap_err(), + ParserError("Expected: identifier, found: EOF".to_string()) + ); +} + fn column_def(name: Ident, data_type: DataType) -> ColumnDef { ColumnDef { name,