From 478ee0215ca7cfd062beb6ffe4b5f1202c123ba5 Mon Sep 17 00:00:00 2001 From: Jia-Xuan Liu Date: Wed, 17 Jul 2024 23:36:22 +0800 Subject: [PATCH 1/5] support map literal syntax for duckdb and generic --- src/ast/mod.rs | 42 ++++++++++++++++++++++++++++++++++++++++ src/dialect/duckdb.rs | 7 +++++++ src/dialect/generic.rs | 4 ++++ src/dialect/mod.rs | 5 +++++ src/parser/mod.rs | 44 ++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 102 insertions(+) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index b8d72e233..835f635da 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -328,6 +328,37 @@ impl fmt::Display for DictionaryField { } } +/// Represents a Map expression. +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct Map { + pub fields: Vec, +} + +impl Display for Map { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "MAP {{{}}}", display_comma_separated(&self.fields)) + } +} + +/// A map field within a map. +/// +/// [duckdb]: https://duckdb.org/docs/sql/data_types/map.html#creating-maps +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct MapField { + pub key: Box, + pub value: Box, +} + +impl fmt::Display for MapField { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}: {}", self.key, self.value) + } +} + /// Options for `CAST` / `TRY_CAST` /// BigQuery: #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] @@ -763,6 +794,14 @@ pub enum Expr { /// ``` /// [1]: https://duckdb.org/docs/sql/data_types/struct#creating-structs Dictionary(Vec), + /// `DuckDB` specific `Map` literal expression [1] + /// + /// Syntax: + /// ```sql + /// syntax: Map {key1: value1[, ... ]} + /// ``` + /// [1]: https://duckdb.org/docs/sql/data_types/map#creating-maps + Map(Map), /// An access of nested data using subscript syntax, for example `array[2]`. Subscript { expr: Box, @@ -1330,6 +1369,9 @@ impl fmt::Display for Expr { Expr::Dictionary(fields) => { write!(f, "{{{}}}", display_comma_separated(fields)) } + Expr::Map(map) => { + write!(f, "{map}") + } Expr::Subscript { expr, subscript: key, diff --git a/src/dialect/duckdb.rs b/src/dialect/duckdb.rs index c6edeac14..1fc211685 100644 --- a/src/dialect/duckdb.rs +++ b/src/dialect/duckdb.rs @@ -48,4 +48,11 @@ impl Dialect for DuckDbDialect { fn supports_dictionary_syntax(&self) -> bool { true } + + // DuckDB uses this syntax for `MAP`s. + // + // https://duckdb.org/docs/sql/data_types/map.html#creating-maps + fn support_map_literal_syntax(&self) -> bool { + true + } } diff --git a/src/dialect/generic.rs b/src/dialect/generic.rs index 33391d479..8d762d780 100644 --- a/src/dialect/generic.rs +++ b/src/dialect/generic.rs @@ -70,4 +70,8 @@ impl Dialect for GenericDialect { fn supports_select_wildcard_except(&self) -> bool { true } + + fn support_map_literal_syntax(&self) -> bool { + true + } } diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index b223ead47..3ff7bb2a5 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -215,6 +215,11 @@ pub trait Dialect: Debug + Any { fn supports_dictionary_syntax(&self) -> bool { false } + /// Returns true if the dialect supports defining object using the + /// syntax like `Map {1: 10, 2: 20}`. + fn support_map_literal_syntax(&self) -> bool { + false + } /// Returns true if the dialect supports lambda functions, for example: /// /// ```sql diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 4d2319a08..e44592247 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1078,6 +1078,9 @@ impl<'a> Parser<'a> { let expr = self.parse_subexpr(Self::PLUS_MINUS_PREC)?; Ok(Expr::Prior(Box::new(expr))) } + Keyword::MAP if self.peek_token() == Token::LBrace => { + self.parse_duckdb_map_literal() + } // Here `w` is a word, check if it's a part of a multipart // identifier, a function call, or a simple identifier: _ => match self.peek_token().token { @@ -2312,6 +2315,47 @@ impl<'a> Parser<'a> { }) } + /// DuckDB specific: Parse a duckdb [map] + /// + /// Syntax: + /// + /// ```sql + /// Map {key1: value1[, ... ]} + /// ``` + /// + /// [map]: https://duckdb.org/docs/sql/data_types/map.html#creating-maps + fn parse_duckdb_map_literal(&mut self) -> Result { + self.expect_token(&Token::LBrace)?; + + let fields = self.parse_comma_separated(Self::parse_duckdb_map_field)?; + + self.expect_token(&Token::RBrace)?; + + Ok(Expr::Map(Map { fields })) + } + + /// Parse a field for a duckdb [map] + /// + /// Syntax + /// + /// ```sql + /// key: value + /// ``` + /// + /// [map]: https://duckdb.org/docs/sql/data_types/map.html#creating-maps + fn parse_duckdb_map_field(&mut self) -> Result { + let key = self.parse_expr()?; + + self.expect_token(&Token::Colon)?; + + let value = self.parse_expr()?; + + Ok(MapField { + key: Box::new(key), + value: Box::new(value), + }) + } + /// Parse clickhouse [map] /// /// Syntax From 7b50f09253e9a03e0ff3157a9f5b33abc227e6e5 Mon Sep 17 00:00:00 2001 From: Jia-Xuan Liu Date: Wed, 17 Jul 2024 23:36:32 +0800 Subject: [PATCH 2/5] add test case for map literal --- tests/sqlparser_common.rs | 95 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 95 insertions(+) diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 1adda149e..f3cf34ba4 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -10025,6 +10025,101 @@ fn test_dictionary_syntax() { ) } +#[test] +fn test_map_syntax() { + fn check(sql: &str, expect: Expr) { + assert_eq!( + all_dialects_where(|d| d.support_map_literal_syntax()).verified_expr(sql), + expect + ); + } + + check( + "MAP {'Alberta': 'Edmonton', 'Manitoba': 'Winnipeg'}", + Expr::Map(Map { + fields: vec![ + MapField { + key: Box::new(Expr::Value(Value::SingleQuotedString("Alberta".to_owned()))), + value: Box::new(Expr::Value(Value::SingleQuotedString( + "Edmonton".to_owned(), + ))), + }, + MapField { + key: Box::new(Expr::Value(Value::SingleQuotedString( + "Manitoba".to_owned(), + ))), + value: Box::new(Expr::Value(Value::SingleQuotedString( + "Winnipeg".to_owned(), + ))), + }, + ], + }), + ); + + fn number_expr(s: &str) -> Expr { + Expr::Value(number(s)) + } + + check( + "MAP {1: 10.0, 2: 20.0}", + Expr::Map(Map { + fields: vec![ + MapField { + key: Box::new(number_expr("1")), + value: Box::new(number_expr("10.0")), + }, + MapField { + key: Box::new(number_expr("2")), + value: Box::new(number_expr("20.0")), + }, + ], + }), + ); + + check( + "MAP {[1, 2, 3]: 10.0, [4, 5, 6]: 20.0}", + Expr::Map(Map { + fields: vec![ + MapField { + key: Box::new(Expr::Array(Array { + elem: vec![number_expr("1"), number_expr("2"), number_expr("3")], + named: false, + })), + value: Box::new(Expr::Value(number("10.0"))), + }, + MapField { + key: Box::new(Expr::Array(Array { + elem: vec![number_expr("4"), number_expr("5"), number_expr("6")], + named: false, + })), + value: Box::new(Expr::Value(number("20.0"))), + }, + ], + }), + ); + + check( + "MAP {'a': 10, 'b': 20}['a']", + Expr::Subscript { + expr: Box::new(Expr::Map(Map { + fields: vec![ + MapField { + key: Box::new(Expr::Value(Value::SingleQuotedString("a".to_owned()))), + value: Box::new(number_expr("10")), + }, + MapField { + key: Box::new(Expr::Value(Value::SingleQuotedString("b".to_owned()))), + value: Box::new(number_expr("20")), + }, + ], + })), + subscript: Box::new(Subscript::Index { + index: Expr::Value(Value::SingleQuotedString("a".to_owned())), + }), + }, + ); +} + #[test] fn parse_within_group() { verified_expr("PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY sales_amount)"); From 501c399156316257c50477fc3559fdbcaee84953 Mon Sep 17 00:00:00 2001 From: Jia-Xuan Liu Date: Sat, 20 Jul 2024 17:40:29 +0800 Subject: [PATCH 3/5] add dialect check when parsing --- src/parser/mod.rs | 2 +- tests/sqlparser_custom_dialect.rs | 22 ++++++++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index e44592247..2d8ce04d8 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1078,7 +1078,7 @@ impl<'a> Parser<'a> { let expr = self.parse_subexpr(Self::PLUS_MINUS_PREC)?; Ok(Expr::Prior(Box::new(expr))) } - Keyword::MAP if self.peek_token() == Token::LBrace => { + Keyword::MAP if self.peek_token() == Token::LBrace && self.dialect.supports_dictionary_syntax() => { self.parse_duckdb_map_literal() } // Here `w` is a word, check if it's a part of a multipart diff --git a/tests/sqlparser_custom_dialect.rs b/tests/sqlparser_custom_dialect.rs index 516591382..5b29047a4 100644 --- a/tests/sqlparser_custom_dialect.rs +++ b/tests/sqlparser_custom_dialect.rs @@ -125,6 +125,28 @@ fn custom_statement_parser() -> Result<(), ParserError> { Ok(()) } +#[test] +fn test_map_syntax_not_support_default() -> Result<(), ParserError> { + #[derive(Debug)] + struct MyDialect {} + + impl Dialect for MyDialect { + fn is_identifier_start(&self, ch: char) -> bool { + is_identifier_start(ch) + } + + fn is_identifier_part(&self, ch: char) -> bool { + is_identifier_part(ch) + } + } + + let dialect = MyDialect {}; + let sql = "SELECT MAP {1: 2}"; + let ast = Parser::parse_sql(&dialect, sql); + assert!(ast.is_err()); + Ok(()) +} + fn is_identifier_start(ch: char) -> bool { ch.is_ascii_lowercase() || ch.is_ascii_uppercase() || ch == '_' } From cdf21ffcd9689734d2af52589e1b7f3d3e4285e4 Mon Sep 17 00:00:00 2001 From: Jia-Xuan Liu Date: Sat, 20 Jul 2024 17:41:59 +0800 Subject: [PATCH 4/5] fix wrong check --- src/parser/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 2d8ce04d8..48b210f38 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1078,7 +1078,7 @@ impl<'a> Parser<'a> { let expr = self.parse_subexpr(Self::PLUS_MINUS_PREC)?; Ok(Expr::Prior(Box::new(expr))) } - Keyword::MAP if self.peek_token() == Token::LBrace && self.dialect.supports_dictionary_syntax() => { + Keyword::MAP if self.peek_token() == Token::LBrace && self.dialect.support_map_literal_syntax() => { self.parse_duckdb_map_literal() } // Here `w` is a word, check if it's a part of a multipart From bf95e3a8035c8d2ade545a4a7294fe2ce66d779b Mon Sep 17 00:00:00 2001 From: Jia-Xuan Liu Date: Sun, 21 Jul 2024 19:08:45 +0800 Subject: [PATCH 5/5] rename fields to entries --- src/ast/mod.rs | 8 ++++---- src/parser/mod.rs | 6 +++--- tests/sqlparser_common.rs | 24 ++++++++++++------------ 3 files changed, 19 insertions(+), 19 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 835f635da..467821541 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -333,12 +333,12 @@ impl fmt::Display for DictionaryField { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct Map { - pub fields: Vec, + pub entries: Vec, } impl Display for Map { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "MAP {{{}}}", display_comma_separated(&self.fields)) + write!(f, "MAP {{{}}}", display_comma_separated(&self.entries)) } } @@ -348,12 +348,12 @@ impl Display for Map { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] -pub struct MapField { +pub struct MapEntry { pub key: Box, pub value: Box, } -impl fmt::Display for MapField { +impl fmt::Display for MapEntry { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "{}: {}", self.key, self.value) } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 48b210f38..37db5637a 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -2331,7 +2331,7 @@ impl<'a> Parser<'a> { self.expect_token(&Token::RBrace)?; - Ok(Expr::Map(Map { fields })) + Ok(Expr::Map(Map { entries: fields })) } /// Parse a field for a duckdb [map] @@ -2343,14 +2343,14 @@ impl<'a> Parser<'a> { /// ``` /// /// [map]: https://duckdb.org/docs/sql/data_types/map.html#creating-maps - fn parse_duckdb_map_field(&mut self) -> Result { + fn parse_duckdb_map_field(&mut self) -> Result { let key = self.parse_expr()?; self.expect_token(&Token::Colon)?; let value = self.parse_expr()?; - Ok(MapField { + Ok(MapEntry { key: Box::new(key), value: Box::new(value), }) diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index f3cf34ba4..83b99eff1 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -10037,14 +10037,14 @@ fn test_map_syntax() { check( "MAP {'Alberta': 'Edmonton', 'Manitoba': 'Winnipeg'}", Expr::Map(Map { - fields: vec![ - MapField { + entries: vec![ + MapEntry { key: Box::new(Expr::Value(Value::SingleQuotedString("Alberta".to_owned()))), value: Box::new(Expr::Value(Value::SingleQuotedString( "Edmonton".to_owned(), ))), }, - MapField { + MapEntry { key: Box::new(Expr::Value(Value::SingleQuotedString( "Manitoba".to_owned(), ))), @@ -10063,12 +10063,12 @@ fn test_map_syntax() { check( "MAP {1: 10.0, 2: 20.0}", Expr::Map(Map { - fields: vec![ - MapField { + entries: vec![ + MapEntry { key: Box::new(number_expr("1")), value: Box::new(number_expr("10.0")), }, - MapField { + MapEntry { key: Box::new(number_expr("2")), value: Box::new(number_expr("20.0")), }, @@ -10079,15 +10079,15 @@ fn test_map_syntax() { check( "MAP {[1, 2, 3]: 10.0, [4, 5, 6]: 20.0}", Expr::Map(Map { - fields: vec![ - MapField { + entries: vec![ + MapEntry { key: Box::new(Expr::Array(Array { elem: vec![number_expr("1"), number_expr("2"), number_expr("3")], named: false, })), value: Box::new(Expr::Value(number("10.0"))), }, - MapField { + MapEntry { key: Box::new(Expr::Array(Array { elem: vec![number_expr("4"), number_expr("5"), number_expr("6")], named: false, @@ -10102,12 +10102,12 @@ fn test_map_syntax() { "MAP {'a': 10, 'b': 20}['a']", Expr::Subscript { expr: Box::new(Expr::Map(Map { - fields: vec![ - MapField { + entries: vec![ + MapEntry { key: Box::new(Expr::Value(Value::SingleQuotedString("a".to_owned()))), value: Box::new(number_expr("10")), }, - MapField { + MapEntry { key: Box::new(Expr::Value(Value::SingleQuotedString("b".to_owned()))), value: Box::new(number_expr("20")), },