From ffa1c8f8531706776981708b82dd5a112bad888c Mon Sep 17 00:00:00 2001 From: Nikhil Benesch Date: Wed, 29 May 2019 15:46:21 -0400 Subject: [PATCH] Parse column constraints in any order CREATE TABLE t (a INT NOT NULL DEFAULT 1 PRIMARY KEY) is as valid as CREATE TABLE t (a INT DEFAULT 1 PRIMARY KEY NOT NULL). --- src/sqlast/ddl.rs | 120 +++++++++++++++++- src/sqlast/mod.rs | 34 +----- src/sqlparser.rs | 89 ++++++++------ tests/sqlparser_common.rs | 150 +++++++++++++++++------ tests/sqlparser_postgres.rs | 238 ++++++++++++++++++++---------------- 5 files changed, 412 insertions(+), 219 deletions(-) diff --git a/src/sqlast/ddl.rs b/src/sqlast/ddl.rs index 38cd5adfb..266a42550 100644 --- a/src/sqlast/ddl.rs +++ b/src/sqlast/ddl.rs @@ -1,6 +1,6 @@ //! AST types specific to CREATE/ALTER variants of `SQLStatement` //! (commonly referred to as Data Definition Language, or DDL) -use super::{ASTNode, SQLIdent, SQLObjectName}; +use super::{ASTNode, SQLIdent, SQLObjectName, SQLType}; /// An `ALTER TABLE` (`SQLStatement::SQLAlterTable`) operation #[derive(Debug, Clone, PartialEq, Hash)] @@ -48,11 +48,6 @@ pub enum TableConstraint { impl ToString for TableConstraint { fn to_string(&self) -> String { - fn format_constraint_name(name: &Option) -> String { - name.as_ref() - .map(|name| format!("CONSTRAINT {} ", name)) - .unwrap_or_default() - } match self { TableConstraint::Unique { name, @@ -84,3 +79,116 @@ impl ToString for TableConstraint { } } } + +/// SQL column definition +#[derive(Debug, Clone, PartialEq, Hash)] +pub struct SQLColumnDef { + pub name: SQLIdent, + pub data_type: SQLType, + pub collation: Option, + pub options: Vec, +} + +impl ToString for SQLColumnDef { + fn to_string(&self) -> String { + format!( + "{} {}{}", + self.name, + self.data_type.to_string(), + self.options + .iter() + .map(|c| format!(" {}", c.to_string())) + .collect::>() + .join("") + ) + } +} + +/// An optionally-named `ColumnOption`: `[ CONSTRAINT ] `. +/// +/// Note that implementations are substantially more permissive than the ANSI +/// specification on what order column options can be presented in, and whether +/// they are allowed to be named. The specification distinguishes between +/// constraints (NOT NULL, UNIQUE, PRIMARY KEY, and CHECK), which can be named +/// and can appear in any order, and other options (DEFAULT, GENERATED), which +/// cannot be named and must appear in a fixed order. PostgreSQL, however, +/// allows preceding any option with `CONSTRAINT `, even those that are +/// not really constraints, like NULL and DEFAULT. MSSQL is less permissive, +/// allowing DEFAULT, UNIQUE, PRIMARY KEY and CHECK to be named, but not NULL or +/// NOT NULL constraints (the last of which is in violation of the spec). +/// +/// For maximum flexibility, we don't distinguish between constraint and +/// non-constraint options, lumping them all together under the umbrella of +/// "column options," and we allow any column option to be named. +#[derive(Debug, Clone, PartialEq, Hash)] +pub struct ColumnOptionDef { + pub name: Option, + pub option: ColumnOption, +} + +impl ToString for ColumnOptionDef { + fn to_string(&self) -> String { + format!( + "{}{}", + format_constraint_name(&self.name), + self.option.to_string() + ) + } +} + +/// `ColumnOption`s are modifiers that follow a column definition in a `CREATE +/// TABLE` statement. +#[derive(Debug, Clone, PartialEq, Hash)] +pub enum ColumnOption { + /// `NULL` + Null, + /// `NOT NULL` + NotNull, + /// `DEFAULT ` + Default(ASTNode), + /// `{ PRIMARY KEY | UNIQUE }` + Unique { + is_primary: bool, + }, + /// A referential integrity constraint (`[FOREIGN KEY REFERENCES + /// ()`). + ForeignKey { + foreign_table: SQLObjectName, + referred_columns: Vec, + }, + // `CHECK ()` + Check(ASTNode), +} + +impl ToString for ColumnOption { + fn to_string(&self) -> String { + use ColumnOption::*; + match self { + Null => "NULL".to_string(), + NotNull => "NOT NULL".to_string(), + Default(expr) => format!("DEFAULT {}", expr.to_string()), + Unique { is_primary } => { + if *is_primary { + "PRIMARY KEY".to_string() + } else { + "UNIQUE".to_string() + } + } + ForeignKey { + foreign_table, + referred_columns, + } => format!( + "REFERENCES {} ({})", + foreign_table.to_string(), + referred_columns.join(", ") + ), + Check(expr) => format!("CHECK ({})", expr.to_string(),), + } + } +} + +fn format_constraint_name(name: &Option) -> String { + name.as_ref() + .map(|name| format!("CONSTRAINT {} ", name)) + .unwrap_or_default() +} diff --git a/src/sqlast/mod.rs b/src/sqlast/mod.rs index 9431cce16..c112a153a 100644 --- a/src/sqlast/mod.rs +++ b/src/sqlast/mod.rs @@ -22,7 +22,9 @@ mod value; use std::ops::Deref; -pub use self::ddl::{AlterTableOperation, TableConstraint}; +pub use self::ddl::{ + AlterTableOperation, ColumnOption, ColumnOptionDef, SQLColumnDef, TableConstraint, +}; pub use self::query::{ Cte, Fetch, Join, JoinConstraint, JoinOperator, SQLOrderByExpr, SQLQuery, SQLSelect, SQLSelectItem, SQLSetExpr, SQLSetOperator, SQLValues, TableAlias, TableFactor, @@ -580,36 +582,6 @@ impl ToString for SQLAssignment { } } -/// SQL column definition -#[derive(Debug, Clone, PartialEq, Hash)] -pub struct SQLColumnDef { - pub name: SQLIdent, - pub data_type: SQLType, - pub is_primary: bool, - pub is_unique: bool, - pub default: Option, - pub allow_null: bool, -} - -impl ToString for SQLColumnDef { - fn to_string(&self) -> String { - let mut s = format!("{} {}", self.name, self.data_type.to_string()); - if self.is_primary { - s += " PRIMARY KEY"; - } - if self.is_unique { - s += " UNIQUE"; - } - if let Some(ref default) = self.default { - s += &format!(" DEFAULT {}", default.to_string()); - } - if !self.allow_null { - s += " NOT NULL"; - } - s - } -} - /// SQL function #[derive(Debug, Clone, PartialEq, Hash)] pub struct SQLFunction { diff --git a/src/sqlparser.rs b/src/sqlparser.rs index 35a7145a3..ec05e99ba 100644 --- a/src/sqlparser.rs +++ b/src/sqlparser.rs @@ -156,29 +156,6 @@ impl Parser { Ok(expr) } - /// Parse expression for DEFAULT clause in CREATE TABLE - pub fn parse_default_expr(&mut self, precedence: u8) -> Result { - debug!("parsing expr"); - let mut expr = self.parse_prefix()?; - debug!("prefix: {:?}", expr); - loop { - // stop parsing on `NULL` | `NOT NULL` - match self.peek_token() { - Some(Token::SQLWord(ref k)) if k.keyword == "NOT" || k.keyword == "NULL" => break, - _ => {} - } - - let next_precedence = self.get_next_precedence()?; - debug!("next precedence: {:?}", next_precedence); - if precedence >= next_precedence { - break; - } - - expr = self.parse_infix(expr, next_precedence)?; - } - Ok(expr) - } - /// Parse an expression prefix pub fn parse_prefix(&mut self) -> Result { let tok = self @@ -897,29 +874,24 @@ impl Parser { } else if let Some(Token::SQLWord(column_name)) = self.peek_token() { self.next_token(); let data_type = self.parse_data_type()?; - let is_primary = self.parse_keywords(vec!["PRIMARY", "KEY"]); - let is_unique = self.parse_keyword("UNIQUE"); - let default = if self.parse_keyword("DEFAULT") { - let expr = self.parse_default_expr(0)?; - Some(expr) + let collation = if self.parse_keyword("COLLATE") { + Some(self.parse_object_name()?) } else { None }; - let allow_null = if self.parse_keywords(vec!["NOT", "NULL"]) { - false - } else { - let _ = self.parse_keyword("NULL"); - true - }; - debug!("default: {:?}", default); + let mut options = vec![]; + loop { + match self.peek_token() { + None | Some(Token::Comma) | Some(Token::RParen) => break, + _ => options.push(self.parse_column_option_def()?), + } + } columns.push(SQLColumnDef { name: column_name.as_sql_ident(), data_type, - allow_null, - is_primary, - is_unique, - default, + collation, + options, }); } else { return self.expected("column name or constraint definition", self.peek_token()); @@ -936,6 +908,45 @@ impl Parser { Ok((columns, constraints)) } + pub fn parse_column_option_def(&mut self) -> Result { + let name = if self.parse_keyword("CONSTRAINT") { + Some(self.parse_identifier()?) + } else { + None + }; + + let option = if self.parse_keywords(vec!["NOT", "NULL"]) { + ColumnOption::NotNull + } else if self.parse_keyword("NULL") { + ColumnOption::Null + } else if self.parse_keyword("DEFAULT") { + ColumnOption::Default(self.parse_expr()?) + } else if self.parse_keywords(vec!["PRIMARY", "KEY"]) { + ColumnOption::Unique { is_primary: true } + } else if self.parse_keyword("UNIQUE") { + ColumnOption::Unique { is_primary: false } + } else if self.parse_keyword("REFERENCES") { + let foreign_table = self.parse_object_name()?; + let referred_columns = self.parse_parenthesized_column_list(Mandatory)?; + ColumnOption::ForeignKey { + foreign_table, + referred_columns, + } + } else if self.parse_keyword("CHECK") { + self.expect_token(&Token::LParen)?; + let expr = self.parse_expr()?; + self.expect_token(&Token::RParen)?; + ColumnOption::Check(expr) + } else { + return parser_err!(format!( + "Unexpected token in column definition: {:?}", + self.peek_token() + )); + }; + + Ok(ColumnOptionDef { name, option }) + } + pub fn parse_optional_table_constraint( &mut self, ) -> Result, ParserError> { diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 697cbc088..d84a609ed 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -831,13 +831,17 @@ fn parse_create_table() { let sql = "CREATE TABLE uk_cities (\ name VARCHAR(100) NOT NULL,\ lat DOUBLE NULL,\ - lng DOUBLE NULL)"; + lng DOUBLE, + constrained INT NULL CONSTRAINT pkey PRIMARY KEY NOT NULL UNIQUE CHECK (constrained > 0), + ref INT REFERENCES othertable (a, b))"; let ast = one_statement_parses_to( sql, "CREATE TABLE uk_cities (\ name character varying(100) NOT NULL, \ - lat double, \ - lng double)", + lat double NULL, \ + lng double, \ + constrained int NULL CONSTRAINT pkey PRIMARY KEY NOT NULL UNIQUE CHECK (constrained > 0), \ + ref int REFERENCES othertable (a, b))", ); match ast { SQLStatement::SQLCreateTable { @@ -850,28 +854,85 @@ fn parse_create_table() { location: None, } => { assert_eq!("uk_cities", name.to_string()); - assert_eq!(3, columns.len()); + assert_eq!( + columns, + vec![ + SQLColumnDef { + name: "name".into(), + data_type: SQLType::Varchar(Some(100)), + collation: None, + options: vec![ColumnOptionDef { + name: None, + option: ColumnOption::NotNull + }], + }, + SQLColumnDef { + name: "lat".into(), + data_type: SQLType::Double, + collation: None, + options: vec![ColumnOptionDef { + name: None, + option: ColumnOption::Null + }], + }, + SQLColumnDef { + name: "lng".into(), + data_type: SQLType::Double, + collation: None, + options: vec![], + }, + SQLColumnDef { + name: "constrained".into(), + data_type: SQLType::Int, + collation: None, + options: vec![ + ColumnOptionDef { + name: None, + option: ColumnOption::Null + }, + ColumnOptionDef { + name: Some("pkey".into()), + option: ColumnOption::Unique { is_primary: true } + }, + ColumnOptionDef { + name: None, + option: ColumnOption::NotNull + }, + ColumnOptionDef { + name: None, + option: ColumnOption::Unique { is_primary: false }, + }, + ColumnOptionDef { + name: None, + option: ColumnOption::Check(verified_expr("constrained > 0")), + } + ], + }, + SQLColumnDef { + name: "ref".into(), + data_type: SQLType::Int, + collation: None, + options: vec![ColumnOptionDef { + name: None, + option: ColumnOption::ForeignKey { + foreign_table: SQLObjectName(vec!["othertable".into()]), + referred_columns: vec!["a".into(), "b".into(),], + } + }] + } + ] + ); assert!(constraints.is_empty()); - - let c_name = &columns[0]; - assert_eq!("name", c_name.name); - assert_eq!(SQLType::Varchar(Some(100)), c_name.data_type); - assert_eq!(false, c_name.allow_null); - - let c_lat = &columns[1]; - assert_eq!("lat", c_lat.name); - assert_eq!(SQLType::Double, c_lat.data_type); - assert_eq!(true, c_lat.allow_null); - - let c_lng = &columns[2]; - assert_eq!("lng", c_lng.name); - assert_eq!(SQLType::Double, c_lng.data_type); - assert_eq!(true, c_lng.allow_null); - assert_eq!(with_options, vec![]); } _ => unreachable!(), } + + let res = parse_sql_statements("CREATE TABLE t (a int NOT NULL GARBAGE)"); + assert!(res + .unwrap_err() + .to_string() + .contains("Unexpected token in column definition")); } #[test] @@ -908,13 +969,13 @@ fn parse_create_external_table() { let sql = "CREATE EXTERNAL TABLE uk_cities (\ name VARCHAR(100) NOT NULL,\ lat DOUBLE NULL,\ - lng DOUBLE NULL)\ + lng DOUBLE)\ STORED AS TEXTFILE LOCATION '/tmp/example.csv"; let ast = one_statement_parses_to( sql, "CREATE EXTERNAL TABLE uk_cities (\ name character varying(100) NOT NULL, \ - lat double, \ + lat double NULL, \ lng double) \ STORED AS TEXTFILE LOCATION '/tmp/example.csv'", ); @@ -929,24 +990,37 @@ fn parse_create_external_table() { location, } => { assert_eq!("uk_cities", name.to_string()); - assert_eq!(3, columns.len()); + assert_eq!( + columns, + vec![ + SQLColumnDef { + name: "name".into(), + data_type: SQLType::Varchar(Some(100)), + collation: None, + options: vec![ColumnOptionDef { + name: None, + option: ColumnOption::NotNull + }], + }, + SQLColumnDef { + name: "lat".into(), + data_type: SQLType::Double, + collation: None, + options: vec![ColumnOptionDef { + name: None, + option: ColumnOption::Null + }], + }, + SQLColumnDef { + name: "lng".into(), + data_type: SQLType::Double, + collation: None, + options: vec![], + }, + ] + ); assert!(constraints.is_empty()); - let c_name = &columns[0]; - assert_eq!("name", c_name.name); - assert_eq!(SQLType::Varchar(Some(100)), c_name.data_type); - assert_eq!(false, c_name.allow_null); - - let c_lat = &columns[1]; - assert_eq!("lat", c_lat.name); - assert_eq!(SQLType::Double, c_lat.data_type); - assert_eq!(true, c_lat.allow_null); - - let c_lng = &columns[2]; - assert_eq!("lng", c_lng.name); - assert_eq!(SQLType::Double, c_lng.data_type); - assert_eq!(true, c_lng.allow_null); - assert!(external); assert_eq!(FileFormat::TEXTFILE, file_format.unwrap()); assert_eq!("/tmp/example.csv", location.unwrap()); diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index f8753429c..4529b591b 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -9,10 +9,10 @@ use sqlparser::test_utils::*; #[test] fn parse_create_table_with_defaults() { let sql = "CREATE TABLE public.customer ( - customer_id integer DEFAULT nextval(public.customer_customer_id_seq) NOT NULL, + customer_id integer DEFAULT nextval(public.customer_customer_id_seq), store_id smallint NOT NULL, first_name character varying(45) NOT NULL, - last_name character varying(45) NOT NULL, + last_name character varying(45) COLLATE \"es_ES\" NOT NULL, email character varying(50), address_id smallint NOT NULL, activebool boolean DEFAULT true NOT NULL, @@ -31,24 +31,123 @@ fn parse_create_table_with_defaults() { location: None, } => { assert_eq!("public.customer", name.to_string()); - assert_eq!(10, columns.len()); + assert_eq!( + columns, + vec![ + SQLColumnDef { + name: "customer_id".into(), + data_type: SQLType::Int, + collation: None, + options: vec![ColumnOptionDef { + name: None, + option: ColumnOption::Default( + pg().verified_expr("nextval(public.customer_customer_id_seq)") + ) + }], + }, + SQLColumnDef { + name: "store_id".into(), + data_type: SQLType::SmallInt, + collation: None, + options: vec![ColumnOptionDef { + name: None, + option: ColumnOption::NotNull, + }], + }, + SQLColumnDef { + name: "first_name".into(), + data_type: SQLType::Varchar(Some(45)), + collation: None, + options: vec![ColumnOptionDef { + name: None, + option: ColumnOption::NotNull, + }], + }, + SQLColumnDef { + name: "last_name".into(), + data_type: SQLType::Varchar(Some(45)), + collation: Some(SQLObjectName(vec!["\"es_ES\"".into()])), + options: vec![ColumnOptionDef { + name: None, + option: ColumnOption::NotNull, + }], + }, + SQLColumnDef { + name: "email".into(), + data_type: SQLType::Varchar(Some(50)), + collation: None, + options: vec![], + }, + SQLColumnDef { + name: "address_id".into(), + data_type: SQLType::SmallInt, + collation: None, + options: vec![ColumnOptionDef { + name: None, + option: ColumnOption::NotNull + }], + }, + SQLColumnDef { + name: "activebool".into(), + data_type: SQLType::Boolean, + collation: None, + options: vec![ + ColumnOptionDef { + name: None, + option: ColumnOption::Default(ASTNode::SQLValue(Value::Boolean( + true + ))), + }, + ColumnOptionDef { + name: None, + option: ColumnOption::NotNull, + } + ], + }, + SQLColumnDef { + name: "create_date".into(), + data_type: SQLType::Date, + collation: None, + options: vec![ + ColumnOptionDef { + name: None, + option: ColumnOption::Default( + pg().verified_expr("CAST(now() AS text)") + ) + }, + ColumnOptionDef { + name: None, + option: ColumnOption::NotNull, + } + ], + }, + SQLColumnDef { + name: "last_update".into(), + data_type: SQLType::Timestamp, + collation: None, + options: vec![ + ColumnOptionDef { + name: None, + option: ColumnOption::Default(pg().verified_expr("now()")), + }, + ColumnOptionDef { + name: None, + option: ColumnOption::NotNull, + } + ], + }, + SQLColumnDef { + name: "active".into(), + data_type: SQLType::Int, + collation: None, + options: vec![ColumnOptionDef { + name: None, + option: ColumnOption::NotNull + }], + }, + ] + ); assert!(constraints.is_empty()); - - let c_name = &columns[0]; - assert_eq!("customer_id", c_name.name); - assert_eq!(SQLType::Int, c_name.data_type); - assert_eq!(false, c_name.allow_null); - - let c_lat = &columns[1]; - assert_eq!("store_id", c_lat.name); - assert_eq!(SQLType::SmallInt, c_lat.data_type); - assert_eq!(false, c_lat.allow_null); - - let c_lng = &columns[2]; - assert_eq!("first_name", c_lng.name); - assert_eq!(SQLType::Varchar(Some(45)), c_lng.data_type); - assert_eq!(false, c_lng.allow_null); - assert_eq!( with_options, vec![ @@ -87,61 +186,20 @@ fn parse_create_table_from_pg_dump() { release_year public.year, active integer )"; - match pg().one_statement_parses_to(sql, "") { - SQLStatement::SQLCreateTable { - name, - columns, - constraints, - with_options, - external: false, - file_format: None, - location: None, - } => { - assert_eq!("public.customer", name.to_string()); - assert!(constraints.is_empty()); - - let c_customer_id = &columns[0]; - assert_eq!("customer_id", c_customer_id.name); - assert_eq!(SQLType::Int, c_customer_id.data_type); - assert_eq!(false, c_customer_id.allow_null); - - let c_store_id = &columns[1]; - assert_eq!("store_id", c_store_id.name); - assert_eq!(SQLType::SmallInt, c_store_id.data_type); - assert_eq!(false, c_store_id.allow_null); - - let c_first_name = &columns[2]; - assert_eq!("first_name", c_first_name.name); - assert_eq!(SQLType::Varchar(Some(45)), c_first_name.data_type); - assert_eq!(false, c_first_name.allow_null); - - let c_create_date1 = &columns[8]; - assert_eq!( - Some(ASTNode::SQLCast { - expr: Box::new(ASTNode::SQLCast { - expr: Box::new(ASTNode::SQLValue(Value::SingleQuotedString( - "now".to_string() - ))), - data_type: SQLType::Text - }), - data_type: SQLType::Date - }), - c_create_date1.default - ); - - let c_release_year = &columns[10]; - assert_eq!( - SQLType::Custom(SQLObjectName(vec![ - "public".to_string(), - "year".to_string() - ])), - c_release_year.data_type - ); - - assert_eq!(with_options, vec![]); - } - _ => unreachable!(), - } + pg().one_statement_parses_to(sql, "CREATE TABLE public.customer (\ + customer_id int DEFAULT nextval(CAST('public.customer_customer_id_seq' AS regclass)) NOT NULL, \ + store_id smallint NOT NULL, \ + first_name character varying(45) NOT NULL, \ + last_name character varying(45) NOT NULL, \ + info text[], \ + address_id smallint NOT NULL, \ + activebool boolean DEFAULT true NOT NULL, \ + create_date date DEFAULT CAST(now() AS date) NOT NULL, \ + create_date1 date DEFAULT CAST(CAST('now' AS text) AS date) NOT NULL, \ + last_update timestamp DEFAULT now(), \ + release_year public.year, \ + active int\ + )"); } #[test] @@ -153,37 +211,7 @@ fn parse_create_table_with_inherit() { value text[], \ use_metric boolean DEFAULT true\ )"; - match pg().verified_stmt(sql) { - SQLStatement::SQLCreateTable { - name, - columns, - constraints, - with_options, - external: false, - file_format: None, - location: None, - } => { - assert_eq!("bazaar.settings", name.to_string()); - assert!(constraints.is_empty()); - - let c_name = &columns[0]; - assert_eq!("settings_id", c_name.name); - assert_eq!(SQLType::Uuid, c_name.data_type); - assert_eq!(false, c_name.allow_null); - assert_eq!(true, c_name.is_primary); - assert_eq!(false, c_name.is_unique); - - let c_name = &columns[1]; - assert_eq!("user_id", c_name.name); - assert_eq!(SQLType::Uuid, c_name.data_type); - assert_eq!(true, c_name.allow_null); - assert_eq!(false, c_name.is_primary); - assert_eq!(true, c_name.is_unique); - - assert_eq!(with_options, vec![]); - } - _ => unreachable!(), - } + pg().verified_stmt(sql); } #[test]