Skip to content

Add support of parsing OPTIMIZE TABLE statement for ClickHouse #1359

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Aug 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 33 additions & 8 deletions src/ast/ddl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1296,20 +1296,45 @@ impl fmt::Display for UserDefinedTypeCompositeAttributeDef {
}
}

/// PARTITION statement used in ALTER TABLE et al. such as in Hive SQL
/// PARTITION statement used in ALTER TABLE et al. such as in Hive and ClickHouse SQL.
/// For example, ClickHouse's OPTIMIZE TABLE supports syntax like PARTITION ID 'partition_id' and PARTITION expr.
/// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/optimize)
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub struct Partition {
pub partitions: Vec<Expr>,
pub enum Partition {
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I changed the type of Partition from struct to enum to support more parsing states.

Identifier(Ident),
Expr(Expr),
Partitions(Vec<Expr>),
}

impl fmt::Display for Partition {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(
f,
"PARTITION ({})",
display_comma_separated(&self.partitions)
)
match self {
Partition::Identifier(id) => write!(f, "PARTITION ID {id}"),
Partition::Expr(expr) => write!(f, "PARTITION {expr}"),
Partition::Partitions(partitions) => {
write!(f, "PARTITION ({})", display_comma_separated(partitions))
}
}
}
}

/// DEDUPLICATE statement used in OPTIMIZE TABLE et al. such as in ClickHouse SQL
/// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/optimize)
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub enum Deduplicate {
All,
ByExpression(Expr),
}

impl fmt::Display for Deduplicate {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
Deduplicate::All => write!(f, "DEDUPLICATE"),
Deduplicate::ByExpression(expr) => write!(f, "DEDUPLICATE BY {expr}"),
}
}
}
36 changes: 35 additions & 1 deletion src/ast/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ pub use self::data_type::{
pub use self::dcl::{AlterRoleOperation, ResetConfig, RoleOption, SetConfigValue};
pub use self::ddl::{
AlterColumnOperation, AlterIndexOperation, AlterTableOperation, ColumnDef, ColumnOption,
ColumnOptionDef, ConstraintCharacteristics, DeferrableInitial, GeneratedAs,
ColumnOptionDef, ConstraintCharacteristics, Deduplicate, DeferrableInitial, GeneratedAs,
GeneratedExpressionMode, IndexOption, IndexType, KeyOrIndexDisplay, Owner, Partition,
ProcedureParam, ReferentialAction, TableConstraint, UserDefinedTypeCompositeAttributeDef,
UserDefinedTypeRepresentation, ViewColumnDef,
Expand Down Expand Up @@ -2831,6 +2831,18 @@ pub enum Statement {
to: Ident,
with: Vec<SqlOption>,
},
/// ```sql
/// OPTIMIZE TABLE [db.]name [ON CLUSTER cluster] [PARTITION partition | PARTITION ID 'partition_id'] [FINAL] [DEDUPLICATE [BY expression]]
/// ```
///
/// See ClickHouse <https://clickhouse.com/docs/en/sql-reference/statements/optimize>
OptimizeTable {
name: ObjectName,
on_cluster: Option<Ident>,
partition: Option<Partition>,
include_final: bool,
deduplicate: Option<Deduplicate>,
},
}

impl fmt::Display for Statement {
Expand Down Expand Up @@ -4283,6 +4295,28 @@ impl fmt::Display for Statement {

Ok(())
}
Statement::OptimizeTable {
name,
on_cluster,
partition,
include_final,
deduplicate,
} => {
write!(f, "OPTIMIZE TABLE {name}")?;
if let Some(on_cluster) = on_cluster {
write!(f, " ON CLUSTER {on_cluster}", on_cluster = on_cluster)?;
}
if let Some(partition) = partition {
write!(f, " {partition}", partition = partition)?;
}
if *include_final {
write!(f, " FINAL")?;
}
if let Some(deduplicate) = deduplicate {
write!(f, " {deduplicate}")?;
}
Ok(())
}
}
}
}
Expand Down
3 changes: 3 additions & 0 deletions src/keywords.rs
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,7 @@ define_keywords!(
DECADE,
DECIMAL,
DECLARE,
DEDUPLICATE,
DEFAULT,
DEFAULT_DDL_COLLATION,
DEFERRABLE,
Expand Down Expand Up @@ -301,6 +302,7 @@ define_keywords!(
FILE_FORMAT,
FILL,
FILTER,
FINAL,
FIRST,
FIRST_VALUE,
FIXEDSTRING,
Expand Down Expand Up @@ -354,6 +356,7 @@ define_keywords!(
HOSTS,
HOUR,
HOURS,
ID,
IDENTITY,
IF,
IGNORE,
Expand Down
45 changes: 44 additions & 1 deletion src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -551,6 +551,10 @@ impl<'a> Parser<'a> {
Keyword::LOAD if dialect_of!(self is DuckDbDialect | GenericDialect) => {
Ok(self.parse_load()?)
}
// `OPTIMIZE` is clickhouse specific https://clickhouse.tech/docs/en/sql-reference/statements/optimize/
Keyword::OPTIMIZE if dialect_of!(self is ClickHouseDialect | GenericDialect) => {
Ok(self.parse_optimize_table()?)
}
_ => self.expected("an SQL statement", next_token),
},
Token::LParen => {
Expand Down Expand Up @@ -6270,7 +6274,7 @@ impl<'a> Parser<'a> {
self.expect_token(&Token::LParen)?;
let partitions = self.parse_comma_separated(Parser::parse_expr)?;
self.expect_token(&Token::RParen)?;
Ok(Partition { partitions })
Ok(Partition::Partitions(partitions))
}

pub fn parse_alter_table_operation(&mut self) -> Result<AlterTableOperation, ParserError> {
Expand Down Expand Up @@ -11165,6 +11169,45 @@ impl<'a> Parser<'a> {
Ok(Statement::Load { extension_name })
}

/// ```sql
/// OPTIMIZE TABLE [db.]name [ON CLUSTER cluster] [PARTITION partition | PARTITION ID 'partition_id'] [FINAL] [DEDUPLICATE [BY expression]]
/// ```
/// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/optimize)
pub fn parse_optimize_table(&mut self) -> Result<Statement, ParserError> {
self.expect_keyword(Keyword::TABLE)?;
let name = self.parse_object_name(false)?;
let on_cluster = self.parse_optional_on_cluster()?;

let partition = if self.parse_keyword(Keyword::PARTITION) {
if self.parse_keyword(Keyword::ID) {
Some(Partition::Identifier(self.parse_identifier(false)?))
} else {
Some(Partition::Expr(self.parse_expr()?))
}
} else {
None
};

let include_final = self.parse_keyword(Keyword::FINAL);
let deduplicate = if self.parse_keyword(Keyword::DEDUPLICATE) {
if self.parse_keyword(Keyword::BY) {
Some(Deduplicate::ByExpression(self.parse_expr()?))
} else {
Some(Deduplicate::All)
}
} else {
None
};

Ok(Statement::OptimizeTable {
name,
on_cluster,
partition,
include_final,
deduplicate,
})
}

/// ```sql
/// CREATE [ { TEMPORARY | TEMP } ] SEQUENCE [ IF NOT EXISTS ] <sequence_name>
/// ```
Expand Down
60 changes: 60 additions & 0 deletions tests/sqlparser_clickhouse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ use sqlparser::ast::Value::Number;
use sqlparser::ast::*;
use sqlparser::dialect::ClickHouseDialect;
use sqlparser::dialect::GenericDialect;
use sqlparser::parser::ParserError::ParserError;

#[test]
fn parse_map_access_expr() {
Expand Down Expand Up @@ -221,6 +222,65 @@ fn parse_create_table() {
);
}

#[test]
fn parse_optimize_table() {
clickhouse_and_generic().verified_stmt("OPTIMIZE TABLE t0");
clickhouse_and_generic().verified_stmt("OPTIMIZE TABLE db.t0");
clickhouse_and_generic().verified_stmt("OPTIMIZE TABLE t0 ON CLUSTER 'cluster'");
clickhouse_and_generic().verified_stmt("OPTIMIZE TABLE t0 ON CLUSTER 'cluster' FINAL");
clickhouse_and_generic().verified_stmt("OPTIMIZE TABLE t0 FINAL DEDUPLICATE");
clickhouse_and_generic().verified_stmt("OPTIMIZE TABLE t0 DEDUPLICATE");
clickhouse_and_generic().verified_stmt("OPTIMIZE TABLE t0 DEDUPLICATE BY id");
clickhouse_and_generic().verified_stmt("OPTIMIZE TABLE t0 FINAL DEDUPLICATE BY id");
clickhouse_and_generic()
.verified_stmt("OPTIMIZE TABLE t0 PARTITION tuple('2023-04-22') DEDUPLICATE BY id");
match clickhouse_and_generic().verified_stmt(
"OPTIMIZE TABLE t0 ON CLUSTER cluster PARTITION ID '2024-07' FINAL DEDUPLICATE BY id",
) {
Statement::OptimizeTable {
name,
on_cluster,
partition,
include_final,
deduplicate,
..
} => {
assert_eq!(name.to_string(), "t0");
assert_eq!(on_cluster, Some(Ident::new("cluster")));
assert_eq!(
partition,
Some(Partition::Identifier(Ident::with_quote('\'', "2024-07")))
);
assert!(include_final);
assert_eq!(
deduplicate,
Some(Deduplicate::ByExpression(Identifier(Ident::new("id"))))
);
}
_ => unreachable!(),
}

// negative cases
assert_eq!(
clickhouse_and_generic()
.parse_sql_statements("OPTIMIZE TABLE t0 DEDUPLICATE BY")
.unwrap_err(),
ParserError("Expected: an expression:, found: EOF".to_string())
);
assert_eq!(
clickhouse_and_generic()
.parse_sql_statements("OPTIMIZE TABLE t0 PARTITION")
.unwrap_err(),
ParserError("Expected: an expression:, found: EOF".to_string())
);
assert_eq!(
clickhouse_and_generic()
.parse_sql_statements("OPTIMIZE TABLE t0 PARTITION ID")
.unwrap_err(),
ParserError("Expected: identifier, found: EOF".to_string())
);
}

fn column_def(name: Ident, data_type: DataType) -> ColumnDef {
ColumnDef {
name,
Expand Down
Loading