From c00b4fab9628c50987dae7705936885eec7808fc Mon Sep 17 00:00:00 2001 From: Simon Sawert Date: Tue, 7 Jan 2025 10:45:20 +0100 Subject: [PATCH 1/8] chore: Rebase --- src/ast/dml.rs | 28 +++++++-- src/ast/query.rs | 15 ++++- src/ast/spans.rs | 2 + src/dialect/clickhouse.rs | 7 +++ src/keywords.rs | 2 +- src/parser/mod.rs | 103 +++++++++++++++++++++++++--------- tests/sqlparser_clickhouse.rs | 26 ++++++++- tests/sqlparser_postgres.rs | 10 +++- 8 files changed, 155 insertions(+), 38 deletions(-) diff --git a/src/ast/dml.rs b/src/ast/dml.rs index d68a2277e..fb913500b 100644 --- a/src/ast/dml.rs +++ b/src/ast/dml.rs @@ -33,10 +33,10 @@ pub use super::ddl::{ColumnDef, TableConstraint}; use super::{ display_comma_separated, display_separated, Assignment, ClusteredBy, CommentDef, Expr, - FileFormat, FromTable, HiveDistributionStyle, HiveFormat, HiveIOFormat, HiveRowFormat, Ident, - InsertAliases, MysqlInsertPriority, ObjectName, OnCommit, OnInsert, OneOrManyWithParens, - OrderByExpr, Query, RowAccessPolicy, SelectItem, SqlOption, SqliteOnConflict, TableEngine, - TableObject, TableWithJoins, Tag, WrappedCollection, + FileFormat, FormatClause, FromTable, HiveDistributionStyle, HiveFormat, HiveIOFormat, + HiveRowFormat, Ident, InsertAliases, MysqlInsertPriority, ObjectName, OnCommit, OnInsert, + OneOrManyWithParens, OrderByExpr, Query, RowAccessPolicy, SelectItem, Setting, SqlOption, + SqliteOnConflict, TableEngine, TableObject, TableWithJoins, Tag, WrappedCollection, }; /// CREATE INDEX statement. @@ -497,6 +497,20 @@ pub struct Insert { pub priority: Option, /// Only for mysql pub insert_alias: Option, + /// Settings used in together with a specified `FORMAT`. + /// + /// ClickHouse syntax: `INSERT INTO tbl SETTINGS format_template_resultset = '/some/path/resultset.format'` + /// + /// [ClickHouse `INSERT INTO`](https://clickhouse.com/docs/en/sql-reference/statements/insert-into) + /// [ClickHouse Formats](https://clickhouse.com/docs/en/interfaces/formats) + pub settings: Option>, + /// Format for `INSERT` statement when not using standard SQL format. Can be e.g. `CSV`, + /// `JSON`, `JSONAsString`, `LineAsString` and more. + /// + /// ClickHouse syntax: `INSERT INTO tbl FORMAT JSONEachRow {"foo": 1, "bar": 2}, {"foo": 3}` + /// + /// [ClickHouse formats JSON insert](https://clickhouse.com/docs/en/interfaces/formats#json-inserting-data) + pub format_clause: Option, } impl Display for Insert { @@ -545,11 +559,17 @@ impl Display for Insert { write!(f, "({}) ", display_comma_separated(&self.after_columns))?; } + if let Some(settings) = &self.settings { + write!(f, "SETTINGS {} ", display_comma_separated(settings))?; + } + if let Some(source) = &self.source { write!(f, "{source}")?; } else if !self.assignments.is_empty() { write!(f, "SET ")?; write!(f, "{}", display_comma_separated(&self.assignments))?; + } else if let Some(format_clause) = &self.format_clause { + write!(f, "{format_clause}")?; } else if self.source.is_none() && self.columns.is_empty() { write!(f, "DEFAULT VALUES")?; } diff --git a/src/ast/query.rs b/src/ast/query.rs index 2f0663a5f..9977fa06d 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -2467,14 +2467,25 @@ impl fmt::Display for GroupByExpr { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum FormatClause { - Identifier(Ident), + Identifier { + ident: Ident, + expr: Option>, + }, Null, } impl fmt::Display for FormatClause { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { - FormatClause::Identifier(ident) => write!(f, "FORMAT {}", ident), + FormatClause::Identifier { ident, expr } => { + write!(f, "FORMAT {}", ident)?; + + if let Some(exprs) = expr { + write!(f, " {}", display_comma_separated(exprs))?; + } + + Ok(()) + } FormatClause::Null => write!(f, "FORMAT NULL"), } } diff --git a/src/ast/spans.rs b/src/ast/spans.rs index 8a27c4ac1..19f6074b3 100644 --- a/src/ast/spans.rs +++ b/src/ast/spans.rs @@ -1156,6 +1156,8 @@ impl Spanned for Insert { priority: _, // todo, mysql specific insert_alias: _, // todo, mysql specific assignments, + settings: _, // todo, clickhouse specific + format_clause: _, // todo, clickhouse specific } = self; union_spans( diff --git a/src/dialect/clickhouse.rs b/src/dialect/clickhouse.rs index 267f766f7..22304b78c 100644 --- a/src/dialect/clickhouse.rs +++ b/src/dialect/clickhouse.rs @@ -54,4 +54,11 @@ impl Dialect for ClickHouseDialect { fn supports_insert_table_function(&self) -> bool { true } + // ClickHouse uses this for some FORMAT expressions in `INSERT` context, e.g. when inserting + // with FORMAT JSONEachRow a raw JSON key-value expression is valid and expected. + // + // [ClickHouse formats](https://clickhouse.com/docs/en/interfaces/formats) + fn supports_dictionary_syntax(&self) -> bool { + true + } } diff --git a/src/keywords.rs b/src/keywords.rs index bd538ec69..066c76d01 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -951,7 +951,7 @@ pub const RESERVED_FOR_TABLE_ALIAS: &[Keyword] = &[ Keyword::PREWHERE, // for ClickHouse SELECT * FROM t SETTINGS ... Keyword::SETTINGS, - // for ClickHouse SELECT * FROM t FORMAT... + // for ClickHouse SELECT * FROM t FORMAT... or INSERT INTO t FORMAT... Keyword::FORMAT, // for Snowflake START WITH .. CONNECT BY Keyword::START, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index b6e3fd1c4..acfb8552b 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -9702,12 +9702,7 @@ impl<'a> Parser<'a> { let format_clause = if dialect_of!(self is ClickHouseDialect | GenericDialect) && self.parse_keyword(Keyword::FORMAT) { - if self.parse_keyword(Keyword::NULL) { - Some(FormatClause::Null) - } else { - let ident = self.parse_identifier()?; - Some(FormatClause::Identifier(ident)) - } + Some(self.parse_format_clause(false)?) } else { None }; @@ -12033,35 +12028,56 @@ impl<'a> Parser<'a> { let is_mysql = dialect_of!(self is MySqlDialect); - let (columns, partitioned, after_columns, source, assignments) = - if self.parse_keywords(&[Keyword::DEFAULT, Keyword::VALUES]) { - (vec![], None, vec![], None, vec![]) - } else { - let (columns, partitioned, after_columns) = if !self.peek_subquery_start() { - let columns = self.parse_parenthesized_column_list(Optional, is_mysql)?; + let (columns, partitioned, after_columns, source, assignments) = if self + .parse_keywords(&[Keyword::DEFAULT, Keyword::VALUES]) + { + (vec![], None, vec![], None, vec![]) + } else { + let (columns, partitioned, after_columns) = if !self.peek_subquery_start() { + let columns = self.parse_parenthesized_column_list(Optional, is_mysql)?; - let partitioned = self.parse_insert_partition()?; - // Hive allows you to specify columns after partitions as well if you want. - let after_columns = if dialect_of!(self is HiveDialect) { - self.parse_parenthesized_column_list(Optional, false)? - } else { - vec![] - }; - (columns, partitioned, after_columns) + let partitioned = self.parse_insert_partition()?; + // Hive allows you to specify columns after partitions as well if you want. + let after_columns = if dialect_of!(self is HiveDialect) { + self.parse_parenthesized_column_list(Optional, false)? } else { - Default::default() + vec![] }; + (columns, partitioned, after_columns) + } else { + Default::default() + }; - let (source, assignments) = - if self.dialect.supports_insert_set() && self.parse_keyword(Keyword::SET) { - (None, self.parse_comma_separated(Parser::parse_assignment)?) - } else { - (Some(self.parse_query()?), vec![]) - }; + let (source, assignments) = if self.peek_keyword(Keyword::FORMAT) + || self.peek_keyword(Keyword::SETTINGS) + { + (None, vec![]) + } else if self.dialect.supports_insert_set() && self.parse_keyword(Keyword::SET) { + (None, self.parse_comma_separated(Parser::parse_assignment)?) + } else { + (Some(self.parse_query()?), vec![]) + }; + + (columns, partitioned, after_columns, source, assignments) + }; + + let (format_clause, settings) = if dialect_of!(self is ClickHouseDialect | GenericDialect) + { + // Settings always comes before `FORMAT` for ClickHouse: + // + let settings = self.parse_settings()?; - (columns, partitioned, after_columns, source, assignments) + let format = if self.parse_keyword(Keyword::FORMAT) { + Some(self.parse_format_clause(true)?) + } else { + None }; + (format, settings) + } else { + (None, None) + }; + let insert_alias = if dialect_of!(self is MySqlDialect | GenericDialect) && self.parse_keyword(Keyword::AS) { @@ -12146,10 +12162,41 @@ impl<'a> Parser<'a> { replace_into, priority, insert_alias, + settings, + format_clause, })) } } + // Parses format clause used for [ClickHouse]. Formats are different when using `SELECT` and + // `INSERT` and also when using the CLI for pipes. It may or may not take an additional + // expression after the format so we try to parse the expression but allow failure. + // + // Since we know we never take an additional expression in `SELECT` context we never only try + // to parse if `can_have_expression` is true. + // + // + pub fn parse_format_clause( + &mut self, + can_have_expression: bool, + ) -> Result { + if self.parse_keyword(Keyword::NULL) { + Ok(FormatClause::Null) + } else { + let ident = self.parse_identifier()?; + let expr = if can_have_expression { + match self.try_parse(|p| p.parse_comma_separated(|p| p.parse_expr())) { + Ok(expr) => Some(expr), + _ => None, + } + } else { + None + }; + + Ok(FormatClause::Identifier { ident, expr }) + } + } + /// Returns true if the immediate tokens look like the /// beginning of a subquery. `(SELECT ...` fn peek_subquery_start(&mut self) -> bool { diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index 4fa657baa..d1ac4c033 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -1384,7 +1384,10 @@ fn test_query_with_format_clause() { } else { assert_eq!( query.format_clause, - Some(FormatClause::Identifier(Ident::new(*format))) + Some(FormatClause::Identifier { + ident: Ident::new(*format), + expr: None + }) ); } } @@ -1404,6 +1407,27 @@ fn test_query_with_format_clause() { } } +#[test] +fn test_insert_query_with_format_clause() { + let cases = [ + r#"INSERT INTO tbl FORMAT JSONEachRow {"id": 1, "value": "foo"}, {"id": 2, "value": "bar"}"#, + r#"INSERT INTO tbl FORMAT JSONEachRow ["first", "second", "third"]"#, + r#"INSERT INTO tbl FORMAT JSONEachRow [{"first": 1}]"#, + r#"INSERT INTO tbl FORMAT jsoneachrow {"id": 1}"#, + r#"INSERT INTO tbl (foo) FORMAT JSONAsObject {"foo": {"bar": {"x": "y"}, "baz": 1}}"#, + r#"INSERT INTO tbl (foo, bar) FORMAT JSON {"foo": 1, "bar": 2}"#, + r#"INSERT INTO tbl FORMAT CSV col1, col2, col3"#, + r#"INSERT INTO tbl FORMAT LineAsString "I love apple", "I love banana", "I love orange""#, + r#"INSERT INTO tbl (foo) SETTINGS input_format_json_read_bools_as_numbers = true FORMAT JSONEachRow {"id": 1, "value": "foo"}"#, + r#"INSERT INTO tbl SETTINGS format_template_resultset = '/some/path/resultset.format', format_template_row = '/some/path/row.format' FORMAT Template"#, + r#"INSERT INTO tbl SETTINGS input_format_json_read_bools_as_numbers = true FORMAT JSONEachRow {"id": 1, "value": "foo"}"#, + ]; + + for sql in &cases { + clickhouse_and_generic().verified_stmt(sql); + } +} + #[test] fn parse_create_table_on_commit_and_as_query() { let sql = r#"CREATE LOCAL TEMPORARY TABLE test ON COMMIT PRESERVE ROWS AS SELECT 1"#; diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index ce31a0628..864fb5eb3 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -4431,7 +4431,9 @@ fn test_simple_postgres_insert_with_alias() { returning: None, replace_into: false, priority: None, - insert_alias: None + insert_alias: None, + settings: None, + format_clause: None, }) ) } @@ -4502,7 +4504,9 @@ fn test_simple_postgres_insert_with_alias() { returning: None, replace_into: false, priority: None, - insert_alias: None + insert_alias: None, + settings: None, + format_clause: None, }) ) } @@ -4570,6 +4574,8 @@ fn test_simple_insert_with_quoted_alias() { replace_into: false, priority: None, insert_alias: None, + settings: None, + format_clause: None, }) ) } From 41aa1694c289f30ada5b85115d1a4c44769e0c66 Mon Sep 17 00:00:00 2001 From: Simon Sawert Date: Tue, 7 Jan 2025 10:46:59 +0100 Subject: [PATCH 2/8] doc: Remove keyword documentation --- src/keywords.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/keywords.rs b/src/keywords.rs index 066c76d01..8c8860e12 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -949,9 +949,7 @@ pub const RESERVED_FOR_TABLE_ALIAS: &[Keyword] = &[ Keyword::PARTITION, // for Clickhouse PREWHERE Keyword::PREWHERE, - // for ClickHouse SELECT * FROM t SETTINGS ... Keyword::SETTINGS, - // for ClickHouse SELECT * FROM t FORMAT... or INSERT INTO t FORMAT... Keyword::FORMAT, // for Snowflake START WITH .. CONNECT BY Keyword::START, From b0c3285fd4d166b31ffb12e4c35523b0ce2c643c Mon Sep 17 00:00:00 2001 From: Simon Sawert Date: Tue, 7 Jan 2025 14:26:19 +0100 Subject: [PATCH 3/8] docs: Simplify docs --- src/ast/dml.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/ast/dml.rs b/src/ast/dml.rs index fb913500b..9d4f15505 100644 --- a/src/ast/dml.rs +++ b/src/ast/dml.rs @@ -497,12 +497,11 @@ pub struct Insert { pub priority: Option, /// Only for mysql pub insert_alias: Option, - /// Settings used in together with a specified `FORMAT`. + /// Settings used for ClickHouse. /// /// ClickHouse syntax: `INSERT INTO tbl SETTINGS format_template_resultset = '/some/path/resultset.format'` /// /// [ClickHouse `INSERT INTO`](https://clickhouse.com/docs/en/sql-reference/statements/insert-into) - /// [ClickHouse Formats](https://clickhouse.com/docs/en/interfaces/formats) pub settings: Option>, /// Format for `INSERT` statement when not using standard SQL format. Can be e.g. `CSV`, /// `JSON`, `JSONAsString`, `LineAsString` and more. From 8679c2421d51f7b5c00b58055154124b5d3e4c72 Mon Sep 17 00:00:00 2001 From: Simon Sawert Date: Tue, 7 Jan 2025 14:26:43 +0100 Subject: [PATCH 4/8] test: Dedupe test cases for case insensitivity --- tests/sqlparser_clickhouse.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index d1ac4c033..e9cfc46db 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -1413,7 +1413,6 @@ fn test_insert_query_with_format_clause() { r#"INSERT INTO tbl FORMAT JSONEachRow {"id": 1, "value": "foo"}, {"id": 2, "value": "bar"}"#, r#"INSERT INTO tbl FORMAT JSONEachRow ["first", "second", "third"]"#, r#"INSERT INTO tbl FORMAT JSONEachRow [{"first": 1}]"#, - r#"INSERT INTO tbl FORMAT jsoneachrow {"id": 1}"#, r#"INSERT INTO tbl (foo) FORMAT JSONAsObject {"foo": {"bar": {"x": "y"}, "baz": 1}}"#, r#"INSERT INTO tbl (foo, bar) FORMAT JSON {"foo": 1, "bar": 2}"#, r#"INSERT INTO tbl FORMAT CSV col1, col2, col3"#, From 346d9120a9afb19b1f675d8c93b853120c53cb7f Mon Sep 17 00:00:00 2001 From: Simon Sawert Date: Tue, 7 Jan 2025 14:32:36 +0100 Subject: [PATCH 5/8] fix: Remove `source` check in `columns` branch If `source` is not `None` we will land in the first if branch so no need to check if it's `None` again. --- src/ast/dml.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ast/dml.rs b/src/ast/dml.rs index 9d4f15505..7d21c4366 100644 --- a/src/ast/dml.rs +++ b/src/ast/dml.rs @@ -569,7 +569,7 @@ impl Display for Insert { write!(f, "{}", display_comma_separated(&self.assignments))?; } else if let Some(format_clause) = &self.format_clause { write!(f, "{format_clause}")?; - } else if self.source.is_none() && self.columns.is_empty() { + } else if self.columns.is_empty() { write!(f, "DEFAULT VALUES")?; } From b8699fbe4b46b6768ee6584b42e451a36ccf0c9b Mon Sep 17 00:00:00 2001 From: Simon Sawert Date: Fri, 10 Jan 2025 09:19:52 +0100 Subject: [PATCH 6/8] fix: Use separate types for `FormatClause` and `InputFormatClause` --- src/ast/dml.rs | 6 +++--- src/ast/mod.rs | 8 +++---- src/ast/query.rs | 36 ++++++++++++++++++++----------- src/parser/mod.rs | 40 +++++++++++++---------------------- tests/sqlparser_clickhouse.rs | 5 +---- 5 files changed, 47 insertions(+), 48 deletions(-) diff --git a/src/ast/dml.rs b/src/ast/dml.rs index 7d21c4366..de555c109 100644 --- a/src/ast/dml.rs +++ b/src/ast/dml.rs @@ -32,8 +32,8 @@ use sqlparser_derive::{Visit, VisitMut}; pub use super::ddl::{ColumnDef, TableConstraint}; use super::{ - display_comma_separated, display_separated, Assignment, ClusteredBy, CommentDef, Expr, - FileFormat, FormatClause, FromTable, HiveDistributionStyle, HiveFormat, HiveIOFormat, + display_comma_separated, display_separated, query::InputFormatClause, Assignment, ClusteredBy, + CommentDef, Expr, FileFormat, FromTable, HiveDistributionStyle, HiveFormat, HiveIOFormat, HiveRowFormat, Ident, InsertAliases, MysqlInsertPriority, ObjectName, OnCommit, OnInsert, OneOrManyWithParens, OrderByExpr, Query, RowAccessPolicy, SelectItem, Setting, SqlOption, SqliteOnConflict, TableEngine, TableObject, TableWithJoins, Tag, WrappedCollection, @@ -509,7 +509,7 @@ pub struct Insert { /// ClickHouse syntax: `INSERT INTO tbl FORMAT JSONEachRow {"foo": 1, "bar": 2}, {"foo": 3}` /// /// [ClickHouse formats JSON insert](https://clickhouse.com/docs/en/interfaces/formats#json-inserting-data) - pub format_clause: Option, + pub format_clause: Option, } impl Display for Insert { diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 5ab2fc939..1f8df3529 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -61,10 +61,10 @@ pub use self::operator::{BinaryOperator, UnaryOperator}; pub use self::query::{ AfterMatchSkip, ConnectBy, Cte, CteAsMaterialized, Distinct, EmptyMatchesMode, ExceptSelectItem, ExcludeSelectItem, ExprWithAlias, Fetch, ForClause, ForJson, ForXml, - FormatClause, GroupByExpr, GroupByWithModifier, IdentWithAlias, IlikeSelectItem, Interpolate, - InterpolateExpr, Join, JoinConstraint, JoinOperator, JsonTableColumn, - JsonTableColumnErrorHandling, JsonTableNamedColumn, JsonTableNestedColumn, LateralView, - LockClause, LockType, MatchRecognizePattern, MatchRecognizeSymbol, Measure, + FormatClause, GroupByExpr, GroupByWithModifier, IdentWithAlias, IlikeSelectItem, + InputFormatClause, Interpolate, InterpolateExpr, Join, JoinConstraint, JoinOperator, + JsonTableColumn, JsonTableColumnErrorHandling, JsonTableNamedColumn, JsonTableNestedColumn, + LateralView, LockClause, LockType, MatchRecognizePattern, MatchRecognizeSymbol, Measure, NamedWindowDefinition, NamedWindowExpr, NonBlock, Offset, OffsetRows, OpenJsonTableColumn, OrderBy, OrderByExpr, PivotValueSource, ProjectionSelect, Query, RenameSelectItem, RepetitionQuantifier, ReplaceSelectElement, ReplaceSelectItem, RowsPerMatch, Select, diff --git a/src/ast/query.rs b/src/ast/query.rs index 9977fa06d..e7020ae23 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -2467,27 +2467,39 @@ impl fmt::Display for GroupByExpr { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum FormatClause { - Identifier { - ident: Ident, - expr: Option>, - }, + Identifier(Ident), Null, } impl fmt::Display for FormatClause { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { - FormatClause::Identifier { ident, expr } => { - write!(f, "FORMAT {}", ident)?; + FormatClause::Identifier(ident) => write!(f, "FORMAT {}", ident), + FormatClause::Null => write!(f, "FORMAT NULL"), + } + } +} - if let Some(exprs) = expr { - write!(f, " {}", display_comma_separated(exprs))?; - } +/// FORMAT identifier in input context, specific to ClickHouse. +/// +/// [ClickHouse]: +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct InputFormatClause { + pub ident: Ident, + pub values: Vec, +} - Ok(()) - } - FormatClause::Null => write!(f, "FORMAT NULL"), +impl fmt::Display for InputFormatClause { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "FORMAT {}", self.ident)?; + + if !self.values.is_empty() { + write!(f, " {}", display_comma_separated(self.values.as_slice()))?; } + + Ok(()) } } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index acfb8552b..b07b92b4f 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -9702,7 +9702,12 @@ impl<'a> Parser<'a> { let format_clause = if dialect_of!(self is ClickHouseDialect | GenericDialect) && self.parse_keyword(Keyword::FORMAT) { - Some(self.parse_format_clause(false)?) + if self.parse_keyword(Keyword::NULL) { + Some(FormatClause::Null) + } else { + let ident = self.parse_identifier()?; + Some(FormatClause::Identifier(ident)) + } } else { None }; @@ -12068,7 +12073,7 @@ impl<'a> Parser<'a> { let settings = self.parse_settings()?; let format = if self.parse_keyword(Keyword::FORMAT) { - Some(self.parse_format_clause(true)?) + Some(self.parse_input_format_clause()?) } else { None }; @@ -12169,32 +12174,17 @@ impl<'a> Parser<'a> { } // Parses format clause used for [ClickHouse]. Formats are different when using `SELECT` and - // `INSERT` and also when using the CLI for pipes. It may or may not take an additional - // expression after the format so we try to parse the expression but allow failure. - // - // Since we know we never take an additional expression in `SELECT` context we never only try - // to parse if `can_have_expression` is true. + // `INSERT` and also when using the CLI for pipes. For `INSERT` it can take an optional values + // list which we try to parse here. // // - pub fn parse_format_clause( - &mut self, - can_have_expression: bool, - ) -> Result { - if self.parse_keyword(Keyword::NULL) { - Ok(FormatClause::Null) - } else { - let ident = self.parse_identifier()?; - let expr = if can_have_expression { - match self.try_parse(|p| p.parse_comma_separated(|p| p.parse_expr())) { - Ok(expr) => Some(expr), - _ => None, - } - } else { - None - }; + pub fn parse_input_format_clause(&mut self) -> Result { + let ident = self.parse_identifier()?; + let values = self + .try_parse(|p| p.parse_comma_separated(|p| p.parse_expr())) + .unwrap_or_default(); - Ok(FormatClause::Identifier { ident, expr }) - } + Ok(InputFormatClause { ident, values }) } /// Returns true if the immediate tokens look like the diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index e9cfc46db..d604e5dc8 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -1384,10 +1384,7 @@ fn test_query_with_format_clause() { } else { assert_eq!( query.format_clause, - Some(FormatClause::Identifier { - ident: Ident::new(*format), - expr: None - }) + Some(FormatClause::Identifier(Ident::new(*format))) ); } } From 2943608ee3c2a15a0cf8d2f241a3590e3cadf2a9 Mon Sep 17 00:00:00 2001 From: Simon Sawert Date: Fri, 10 Jan 2025 17:52:24 +0100 Subject: [PATCH 7/8] chore: Fix rebase --- src/dialect/clickhouse.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/dialect/clickhouse.rs b/src/dialect/clickhouse.rs index 22304b78c..5a0737770 100644 --- a/src/dialect/clickhouse.rs +++ b/src/dialect/clickhouse.rs @@ -54,6 +54,7 @@ impl Dialect for ClickHouseDialect { fn supports_insert_table_function(&self) -> bool { true } + // ClickHouse uses this for some FORMAT expressions in `INSERT` context, e.g. when inserting // with FORMAT JSONEachRow a raw JSON key-value expression is valid and expected. // From 3845c7f6757f6ca5f944b7a50cf98ef478d17d3e Mon Sep 17 00:00:00 2001 From: Simon Sawert Date: Fri, 10 Jan 2025 18:04:19 +0100 Subject: [PATCH 8/8] fix: Add new trait methos for insert format, use `maybe_parse` --- src/dialect/clickhouse.rs | 4 ++++ src/dialect/mod.rs | 5 +++++ src/parser/mod.rs | 11 ++++------- tests/sqlparser_clickhouse.rs | 2 +- 4 files changed, 14 insertions(+), 8 deletions(-) diff --git a/src/dialect/clickhouse.rs b/src/dialect/clickhouse.rs index 5a0737770..884dfcbcb 100644 --- a/src/dialect/clickhouse.rs +++ b/src/dialect/clickhouse.rs @@ -55,6 +55,10 @@ impl Dialect for ClickHouseDialect { true } + fn supports_insert_format(&self) -> bool { + true + } + // ClickHouse uses this for some FORMAT expressions in `INSERT` context, e.g. when inserting // with FORMAT JSONEachRow a raw JSON key-value expression is valid and expected. // diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index a682e4f63..32b0ed482 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -797,6 +797,11 @@ pub trait Dialect: Debug + Any { fn supports_insert_table_function(&self) -> bool { false } + + /// Does the dialect support insert formats, e.g. `INSERT INTO ... FORMAT ` + fn supports_insert_format(&self) -> bool { + false + } } /// This represents the operators for which precedence must be defined diff --git a/src/parser/mod.rs b/src/parser/mod.rs index b07b92b4f..c17402515 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -12066,8 +12066,7 @@ impl<'a> Parser<'a> { (columns, partitioned, after_columns, source, assignments) }; - let (format_clause, settings) = if dialect_of!(self is ClickHouseDialect | GenericDialect) - { + let (format_clause, settings) = if self.dialect.supports_insert_format() { // Settings always comes before `FORMAT` for ClickHouse: // let settings = self.parse_settings()?; @@ -12080,7 +12079,7 @@ impl<'a> Parser<'a> { (format, settings) } else { - (None, None) + Default::default() }; let insert_alias = if dialect_of!(self is MySqlDialect | GenericDialect) @@ -12173,15 +12172,13 @@ impl<'a> Parser<'a> { } } - // Parses format clause used for [ClickHouse]. Formats are different when using `SELECT` and - // `INSERT` and also when using the CLI for pipes. For `INSERT` it can take an optional values - // list which we try to parse here. + // Parses input format clause used for [ClickHouse]. // // pub fn parse_input_format_clause(&mut self) -> Result { let ident = self.parse_identifier()?; let values = self - .try_parse(|p| p.parse_comma_separated(|p| p.parse_expr())) + .maybe_parse(|p| p.parse_comma_separated(|p| p.parse_expr()))? .unwrap_or_default(); Ok(InputFormatClause { ident, values }) diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index d604e5dc8..fed4308fc 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -1420,7 +1420,7 @@ fn test_insert_query_with_format_clause() { ]; for sql in &cases { - clickhouse_and_generic().verified_stmt(sql); + clickhouse().verified_stmt(sql); } }