From 10f82242d0f86dd65d72f3c616b18f14cdc2aa1b Mon Sep 17 00:00:00 2001 From: Hans Ott Date: Tue, 21 Jan 2025 11:44:39 +0100 Subject: [PATCH 1/4] National strings: check if dialect supports backslash escape --- src/tokenizer.rs | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 39ca84c9f..9a5d77838 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -971,7 +971,8 @@ impl<'a> Tokenizer<'a> { match chars.peek() { Some('\'') => { // N'...' - a - let s = self.tokenize_single_quoted_string(chars, '\'', true)?; + let backslash_escape = self.dialect.supports_string_literal_backslash_escape(); + let s = self.tokenize_single_quoted_string(chars, '\'', backslash_escape)?; Ok(Some(Token::NationalStringLiteral(s))) } _ => { @@ -3543,4 +3544,17 @@ mod tests { ]; compare(expected, tokens); } + + #[test] + fn test_national_strings() { + let dialect = PostgreSqlDialect {}; + let sql = "select n'''''\\'"; + let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap(); + let expected = vec![ + Token::make_keyword("select"), + Token::Whitespace(Whitespace::Space), + Token::NationalStringLiteral("''\\".to_string()), + ]; + compare(expected, tokens); + } } From 78c4580a71c1c8f0182b952996fe331c8003da90 Mon Sep 17 00:00:00 2001 From: Hans Ott Date: Tue, 21 Jan 2025 11:47:50 +0100 Subject: [PATCH 2/4] Format code --- src/tokenizer.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 9a5d77838..c03d36b15 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -971,8 +971,10 @@ impl<'a> Tokenizer<'a> { match chars.peek() { Some('\'') => { // N'...' - a - let backslash_escape = self.dialect.supports_string_literal_backslash_escape(); - let s = self.tokenize_single_quoted_string(chars, '\'', backslash_escape)?; + let backslash_escape = + self.dialect.supports_string_literal_backslash_escape(); + let s = + self.tokenize_single_quoted_string(chars, '\'', backslash_escape)?; Ok(Some(Token::NationalStringLiteral(s))) } _ => { From da2ce86489e5107e864f6743a39c2a8ba0fb8a01 Mon Sep 17 00:00:00 2001 From: Hans Ott Date: Thu, 23 Jan 2025 15:18:53 +0100 Subject: [PATCH 3/4] Use `all_dialects_where` to test backslash escape for every dialect --- src/test_utils.rs | 14 +++++++++++++- src/tokenizer.rs | 36 ++++++++++++++++++++++++++---------- 2 files changed, 39 insertions(+), 11 deletions(-) diff --git a/src/test_utils.rs b/src/test_utils.rs index 914be7d9f..961668f67 100644 --- a/src/test_utils.rs +++ b/src/test_utils.rs @@ -33,7 +33,7 @@ use core::fmt::Debug; use crate::dialect::*; use crate::parser::{Parser, ParserError}; -use crate::tokenizer::Tokenizer; +use crate::tokenizer::{Tokenizer, Token}; use crate::{ast::*, parser::ParserOptions}; #[cfg(test)] @@ -237,6 +237,18 @@ impl TestedDialects { pub fn verified_expr(&self, sql: &str) -> Expr { self.expr_parses_to(sql, sql) } + + /// Check that the tokenizer returns the expected tokens for the given SQL. + pub fn tokenizes_to(&self, sql: &str, expected: Vec) { + self.dialects.iter().for_each(|dialect| { + let mut tokenizer = Tokenizer::new(&**dialect, sql); + if let Some(options) = &self.options { + tokenizer = tokenizer.with_unescape(options.unescape); + } + let tokens = tokenizer.tokenize().unwrap(); + assert_eq!(expected, tokens); + }); + } } /// Returns all available dialects. diff --git a/src/tokenizer.rs b/src/tokenizer.rs index c03d36b15..13d4c46d7 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -2159,6 +2159,7 @@ mod tests { BigQueryDialect, ClickHouseDialect, HiveDialect, MsSqlDialect, MySqlDialect, SQLiteDialect, }; use core::fmt::Debug; + use crate::test_utils::all_dialects_where; #[test] fn tokenizer_error_impl() { @@ -3548,15 +3549,30 @@ mod tests { } #[test] - fn test_national_strings() { - let dialect = PostgreSqlDialect {}; - let sql = "select n'''''\\'"; - let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap(); - let expected = vec![ - Token::make_keyword("select"), - Token::Whitespace(Whitespace::Space), - Token::NationalStringLiteral("''\\".to_string()), - ]; - compare(expected, tokens); + fn test_national_strings_backslash_escape_not_supported() { + all_dialects_where( + |dialect| !dialect.supports_string_literal_backslash_escape(), + ).tokenizes_to( + "select n'''''\\'", + vec![ + Token::make_keyword("select"), + Token::Whitespace(Whitespace::Space), + Token::NationalStringLiteral("''\\".to_string()), + ], + ); + } + + #[test] + fn test_national_strings_backslash_escape_supported() { + all_dialects_where( + |dialect| dialect.supports_string_literal_backslash_escape(), + ).tokenizes_to( + "select n'''''\\''", + vec![ + Token::make_keyword("select"), + Token::Whitespace(Whitespace::Space), + Token::NationalStringLiteral("'''".to_string()), + ], + ); } } From 761f5098a608cb815448254dfd6d257754fd6717 Mon Sep 17 00:00:00 2001 From: Hans Ott Date: Thu, 23 Jan 2025 15:20:01 +0100 Subject: [PATCH 4/4] Format code --- src/test_utils.rs | 2 +- src/tokenizer.rs | 40 +++++++++++++++++++--------------------- 2 files changed, 20 insertions(+), 22 deletions(-) diff --git a/src/test_utils.rs b/src/test_utils.rs index 961668f67..51e4fd748 100644 --- a/src/test_utils.rs +++ b/src/test_utils.rs @@ -33,7 +33,7 @@ use core::fmt::Debug; use crate::dialect::*; use crate::parser::{Parser, ParserError}; -use crate::tokenizer::{Tokenizer, Token}; +use crate::tokenizer::{Token, Tokenizer}; use crate::{ast::*, parser::ParserOptions}; #[cfg(test)] diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 13d4c46d7..08e233b66 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -2158,8 +2158,8 @@ mod tests { use crate::dialect::{ BigQueryDialect, ClickHouseDialect, HiveDialect, MsSqlDialect, MySqlDialect, SQLiteDialect, }; - use core::fmt::Debug; use crate::test_utils::all_dialects_where; + use core::fmt::Debug; #[test] fn tokenizer_error_impl() { @@ -3550,29 +3550,27 @@ mod tests { #[test] fn test_national_strings_backslash_escape_not_supported() { - all_dialects_where( - |dialect| !dialect.supports_string_literal_backslash_escape(), - ).tokenizes_to( - "select n'''''\\'", - vec![ - Token::make_keyword("select"), - Token::Whitespace(Whitespace::Space), - Token::NationalStringLiteral("''\\".to_string()), - ], - ); + all_dialects_where(|dialect| !dialect.supports_string_literal_backslash_escape()) + .tokenizes_to( + "select n'''''\\'", + vec![ + Token::make_keyword("select"), + Token::Whitespace(Whitespace::Space), + Token::NationalStringLiteral("''\\".to_string()), + ], + ); } #[test] fn test_national_strings_backslash_escape_supported() { - all_dialects_where( - |dialect| dialect.supports_string_literal_backslash_escape(), - ).tokenizes_to( - "select n'''''\\''", - vec![ - Token::make_keyword("select"), - Token::Whitespace(Whitespace::Space), - Token::NationalStringLiteral("'''".to_string()), - ], - ); + all_dialects_where(|dialect| dialect.supports_string_literal_backslash_escape()) + .tokenizes_to( + "select n'''''\\''", + vec![ + Token::make_keyword("select"), + Token::Whitespace(Whitespace::Space), + Token::NationalStringLiteral("'''".to_string()), + ], + ); } }