diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 2d79f7d6b..e59e5b5ca 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -83,7 +83,7 @@ pub use self::trigger::{ pub use self::value::{ escape_double_quote_string, escape_quoted_string, DateTimeField, DollarQuotedString, - TrimWhereField, Value, + NormalizationForm, TrimWhereField, Value, }; use crate::ast::helpers::stmt_data_loading::{ @@ -653,6 +653,12 @@ pub enum Expr { IsDistinctFrom(Box, Box), /// `IS NOT DISTINCT FROM` operator IsNotDistinctFrom(Box, Box), + /// ` IS [ NOT ] [ form ] NORMALIZED` + IsNormalized { + expr: Box, + form: Option, + negated: bool, + }, /// `[ NOT ] IN (val1, val2, ...)` InList { expr: Box, @@ -1118,7 +1124,7 @@ impl fmt::Display for LambdaFunction { /// `OneOrManyWithParens` implements `Deref` and `IntoIterator`, /// so you can call slice methods on it and iterate over items /// # Examples -/// Acessing as a slice: +/// Accessing as a slice: /// ``` /// # use sqlparser::ast::OneOrManyWithParens; /// let one = OneOrManyWithParens::One("a"); @@ -1419,6 +1425,24 @@ impl fmt::Display for Expr { if *regexp { "REGEXP" } else { "RLIKE" }, pattern ), + Expr::IsNormalized { + expr, + form, + negated, + } => { + let not_ = if *negated { "NOT " } else { "" }; + if form.is_none() { + write!(f, "{} IS {}NORMALIZED", expr, not_) + } else { + write!( + f, + "{} IS {}{} NORMALIZED", + expr, + not_, + form.as_ref().unwrap() + ) + } + } Expr::SimilarTo { negated, expr, @@ -7749,7 +7773,7 @@ where /// ```sql /// EXPLAIN (ANALYZE, VERBOSE TRUE, FORMAT TEXT) SELECT * FROM my_table; /// -/// VACCUM (VERBOSE, ANALYZE ON, PARALLEL 10) my_table; +/// VACUUM (VERBOSE, ANALYZE ON, PARALLEL 10) my_table; /// ``` #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] diff --git a/src/ast/query.rs b/src/ast/query.rs index e7020ae23..9bcdc2e74 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -2821,10 +2821,10 @@ impl fmt::Display for ValueTableMode { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum UpdateTableFromKind { - /// Update Statment where the 'FROM' clause is before the 'SET' keyword (Supported by Snowflake) + /// Update Statement where the 'FROM' clause is before the 'SET' keyword (Supported by Snowflake) /// For Example: `UPDATE FROM t1 SET t1.name='aaa'` BeforeSet(TableWithJoins), - /// Update Statment where the 'FROM' clause is after the 'SET' keyword (Which is the standard way) + /// Update Statement where the 'FROM' clause is after the 'SET' keyword (Which is the standard way) /// For Example: `UPDATE SET t1.name='aaa' FROM t1` AfterSet(TableWithJoins), } diff --git a/src/ast/spans.rs b/src/ast/spans.rs index 183bebf8c..fca2865bd 100644 --- a/src/ast/spans.rs +++ b/src/ast/spans.rs @@ -1324,6 +1324,12 @@ impl Spanned for Expr { escape_char: _, any: _, } => expr.span().union(&pattern.span()), + Expr::RLike { .. } => Span::empty(), + Expr::IsNormalized { + expr, + form: _, + negated: _, + } => expr.span(), Expr::SimilarTo { negated: _, expr, @@ -1359,7 +1365,6 @@ impl Spanned for Expr { Expr::Array(array) => array.span(), Expr::MatchAgainst { .. } => Span::empty(), Expr::JsonAccess { value, path } => value.span().union(&path.span()), - Expr::RLike { .. } => Span::empty(), Expr::AnyOp { left, compare_op: _, diff --git a/src/ast/value.rs b/src/ast/value.rs index 45cc06a07..1b16646be 100644 --- a/src/ast/value.rs +++ b/src/ast/value.rs @@ -270,6 +270,35 @@ impl fmt::Display for DateTimeField { } } +#[derive(Debug, Clone, PartialEq, Eq, Ord, PartialOrd, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// The Unicode Standard defines four normalization forms, which are intended to eliminate +/// certain distinctions between visually or functionally identical characters. +/// +/// See [Unicode Normalization Forms](https://unicode.org/reports/tr15/) for details. +pub enum NormalizationForm { + /// Canonical Decomposition, followed by Canonical Composition. + NFC, + /// Canonical Decomposition. + NFD, + /// Compatibility Decomposition, followed by Canonical Composition. + NFKC, + /// Compatibility Decomposition. + NFKD, +} + +impl fmt::Display for NormalizationForm { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + NormalizationForm::NFC => write!(f, "NFC"), + NormalizationForm::NFD => write!(f, "NFD"), + NormalizationForm::NFKC => write!(f, "NFKC"), + NormalizationForm::NFKD => write!(f, "NFKD"), + } + } +} + pub struct EscapeQuotedString<'a> { string: &'a str, quote: char, diff --git a/src/keywords.rs b/src/keywords.rs index eb9e3ea6f..6a5677b54 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -529,6 +529,10 @@ define_keywords!( NESTED, NEW, NEXT, + NFC, + NFD, + NFKC, + NFKD, NO, NOBYPASSRLS, NOCREATEDB, @@ -539,6 +543,7 @@ define_keywords!( NOORDER, NOREPLICATION, NORMALIZE, + NORMALIZED, NOSCAN, NOSUPERUSER, NOT, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index ac764a535..786a0701a 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -3183,9 +3183,11 @@ impl<'a> Parser<'a> { { let expr2 = self.parse_expr()?; Ok(Expr::IsNotDistinctFrom(Box::new(expr), Box::new(expr2))) + } else if let Ok(is_normalized) = self.parse_unicode_is_normalized(expr) { + Ok(is_normalized) } else { self.expected( - "[NOT] NULL or TRUE|FALSE or [NOT] DISTINCT FROM after IS", + "[NOT] NULL | TRUE | FALSE | DISTINCT | [form] NORMALIZED FROM after IS", self.peek_token(), ) } @@ -3850,7 +3852,7 @@ impl<'a> Parser<'a> { /// If the current token is the `expected` keyword, consume the token. /// Otherwise, return an error. /// - // todo deprecate infavor of expected_keyword_is + // todo deprecate in favor of expected_keyword_is pub fn expect_keyword(&mut self, expected: Keyword) -> Result { if self.parse_keyword(expected) { Ok(self.get_current_token().clone()) @@ -8452,6 +8454,33 @@ impl<'a> Parser<'a> { } } + /// Parse a literal unicode normalization clause + pub fn parse_unicode_is_normalized(&mut self, expr: Expr) -> Result { + let neg = self.parse_keyword(Keyword::NOT); + let normalized_form = self.maybe_parse(|parser| { + match parser.parse_one_of_keywords(&[ + Keyword::NFC, + Keyword::NFD, + Keyword::NFKC, + Keyword::NFKD, + ]) { + Some(Keyword::NFC) => Ok(NormalizationForm::NFC), + Some(Keyword::NFD) => Ok(NormalizationForm::NFD), + Some(Keyword::NFKC) => Ok(NormalizationForm::NFKC), + Some(Keyword::NFKD) => Ok(NormalizationForm::NFKD), + _ => parser.expected("unicode normalization form", parser.peek_token()), + } + })?; + if self.parse_keyword(Keyword::NORMALIZED) { + return Ok(Expr::IsNormalized { + expr: Box::new(expr), + form: normalized_form, + negated: neg, + }); + } + self.expected("unicode normalization form", self.peek_token()) + } + pub fn parse_enum_values(&mut self) -> Result, ParserError> { self.expect_token(&Token::LParen)?; let values = self.parse_comma_separated(|parser| { @@ -8957,7 +8986,7 @@ impl<'a> Parser<'a> { } } - /// Parse a table object for insetion + /// Parse a table object for insertion /// e.g. `some_database.some_table` or `FUNCTION some_table_func(...)` pub fn parse_table_object(&mut self) -> Result { if self.dialect.supports_insert_table_function() && self.parse_keyword(Keyword::FUNCTION) { @@ -11867,7 +11896,7 @@ impl<'a> Parser<'a> { } else { let mut name = self.parse_grantee_name()?; if self.consume_token(&Token::Colon) { - // Redshift supports namespace prefix for extenrnal users and groups: + // Redshift supports namespace prefix for external users and groups: // : or : // https://docs.aws.amazon.com/redshift/latest/mgmt/redshift-iam-access-control-native-idp.html let ident = self.parse_identifier()?; @@ -12863,7 +12892,7 @@ impl<'a> Parser<'a> { Ok(WithFill { from, to, step }) } - // Parse a set of comma seperated INTERPOLATE expressions (ClickHouse dialect) + // Parse a set of comma separated INTERPOLATE expressions (ClickHouse dialect) // that follow the INTERPOLATE keyword in an ORDER BY clause with the WITH FILL modifier pub fn parse_interpolations(&mut self) -> Result, ParserError> { if !self.parse_keyword(Keyword::INTERPOLATE) { @@ -14372,7 +14401,7 @@ mod tests { assert_eq!( ast, Err(ParserError::ParserError( - "Expected: [NOT] NULL or TRUE|FALSE or [NOT] DISTINCT FROM after IS, found: a at Line: 1, Column: 16" + "Expected: [NOT] NULL | TRUE | FALSE | DISTINCT | [form] NORMALIZED FROM after IS, found: a at Line: 1, Column: 16" .to_string() )) ); diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 07a30bc08..bbc252429 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -4579,7 +4579,7 @@ fn run_explain_analyze( expected_verbose: bool, expected_analyze: bool, expected_format: Option, - exepcted_options: Option>, + expected_options: Option>, ) { match dialect.verified_stmt(query) { Statement::Explain { @@ -4595,7 +4595,7 @@ fn run_explain_analyze( assert_eq!(verbose, expected_verbose); assert_eq!(analyze, expected_analyze); assert_eq!(format, expected_format); - assert_eq!(options, exepcted_options); + assert_eq!(options, expected_options); assert!(!query_plan); assert!(!estimate); assert_eq!("SELECT sqrt(id) FROM foo", statement.to_string()); @@ -9296,6 +9296,46 @@ fn parse_is_boolean() { verified_expr(sql) ); + let sql = "a IS NORMALIZED"; + assert_eq!( + IsNormalized { + expr: Box::new(Identifier(Ident::new("a"))), + form: None, + negated: false, + }, + verified_expr(sql) + ); + + let sql = "a IS NOT NORMALIZED"; + assert_eq!( + IsNormalized { + expr: Box::new(Identifier(Ident::new("a"))), + form: None, + negated: true, + }, + verified_expr(sql) + ); + + let sql = "a IS NFKC NORMALIZED"; + assert_eq!( + IsNormalized { + expr: Box::new(Identifier(Ident::new("a"))), + form: Some(NormalizationForm::NFKC), + negated: false, + }, + verified_expr(sql) + ); + + let sql = "a IS NOT NFKD NORMALIZED"; + assert_eq!( + IsNormalized { + expr: Box::new(Identifier(Ident::new("a"))), + form: Some(NormalizationForm::NFKD), + negated: true, + }, + verified_expr(sql) + ); + let sql = "a IS UNKNOWN"; assert_eq!( IsUnknown(Box::new(Identifier(Ident::new("a")))), @@ -9314,6 +9354,12 @@ fn parse_is_boolean() { verified_stmt("SELECT f FROM foo WHERE field IS FALSE"); verified_stmt("SELECT f FROM foo WHERE field IS NOT FALSE"); + verified_stmt("SELECT f FROM foo WHERE field IS NORMALIZED"); + verified_stmt("SELECT f FROM foo WHERE field IS NFC NORMALIZED"); + verified_stmt("SELECT f FROM foo WHERE field IS NFD NORMALIZED"); + verified_stmt("SELECT f FROM foo WHERE field IS NOT NORMALIZED"); + verified_stmt("SELECT f FROM foo WHERE field IS NOT NFKC NORMALIZED"); + verified_stmt("SELECT f FROM foo WHERE field IS UNKNOWN"); verified_stmt("SELECT f FROM foo WHERE field IS NOT UNKNOWN"); @@ -9321,7 +9367,37 @@ fn parse_is_boolean() { let res = parse_sql_statements(sql); assert_eq!( ParserError::ParserError( - "Expected: [NOT] NULL or TRUE|FALSE or [NOT] DISTINCT FROM after IS, found: 0" + "Expected: [NOT] NULL | TRUE | FALSE | DISTINCT | [form] NORMALIZED FROM after IS, found: 0" + .to_string() + ), + res.unwrap_err() + ); + + let sql = "SELECT s, s IS XYZ NORMALIZED FROM foo"; + let res = parse_sql_statements(sql); + assert_eq!( + ParserError::ParserError( + "Expected: [NOT] NULL | TRUE | FALSE | DISTINCT | [form] NORMALIZED FROM after IS, found: XYZ" + .to_string() + ), + res.unwrap_err() + ); + + let sql = "SELECT s, s IS NFKC FROM foo"; + let res = parse_sql_statements(sql); + assert_eq!( + ParserError::ParserError( + "Expected: [NOT] NULL | TRUE | FALSE | DISTINCT | [form] NORMALIZED FROM after IS, found: FROM" + .to_string() + ), + res.unwrap_err() + ); + + let sql = "SELECT s, s IS TRIM(' NFKC ') FROM foo"; + let res = parse_sql_statements(sql); + assert_eq!( + ParserError::ParserError( + "Expected: [NOT] NULL | TRUE | FALSE | DISTINCT | [form] NORMALIZED FROM after IS, found: TRIM" .to_string() ), res.unwrap_err() @@ -12982,7 +13058,7 @@ fn test_trailing_commas_in_from() { let sql = "SELECT a FROM b, WHERE c = 1"; let _ = dialects.parse_sql_statements(sql).unwrap(); - // nasted + // nested let sql = "SELECT 1, 2 FROM (SELECT * FROM t,),"; let _ = dialects.parse_sql_statements(sql).unwrap(); diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index dcf3f57fe..e93ac5695 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -2572,7 +2572,7 @@ fn parse_kill() { } #[test] -fn parse_table_colum_option_on_update() { +fn parse_table_column_option_on_update() { let sql1 = "CREATE TABLE foo (`modification_time` DATETIME ON UPDATE CURRENT_TIMESTAMP())"; match mysql().verified_stmt(sql1) { Statement::CreateTable(CreateTable { name, columns, .. }) => {