Skip to content

Commit d4a7ade

Browse files
committed
Add support for table sample, initial commit
1 parent d0fcc06 commit d4a7ade

20 files changed

+506
-458
lines changed

src/ast/mod.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -69,8 +69,10 @@ pub use self::query::{
6969
OrderBy, OrderByExpr, PivotValueSource, ProjectionSelect, Query, RenameSelectItem,
7070
RepetitionQuantifier, ReplaceSelectElement, ReplaceSelectItem, RowsPerMatch, Select,
7171
SelectInto, SelectItem, SetExpr, SetOperator, SetQuantifier, Setting, SymbolDefinition, Table,
72-
TableAlias, TableAliasColumnDef, TableFactor, TableFunctionArgs, TableVersion, TableWithJoins,
73-
Top, TopQuantity, ValueTableMode, Values, WildcardAdditionalOptions, With, WithFill,
72+
TableAlias, TableAliasColumnDef, TableFactor, TableFunctionArgs, TableSample,
73+
TableSampleBernoulli, TableSampleBucket, TableSampleImplicit, TableSampleSystem,
74+
TableSampleUnit, TableVersion, TableWithJoins, Top, TopQuantity, ValueTableMode, Values,
75+
WildcardAdditionalOptions, With, WithFill,
7476
};
7577

7678
pub use self::trigger::{

src/ast/query.rs

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1002,6 +1002,12 @@ pub enum TableFactor {
10021002
partitions: Vec<Ident>,
10031003
/// Optional PartiQL JsonPath: <https://partiql.org/dql/from.html>
10041004
json_path: Option<JsonPath>,
1005+
/// Optional table sample modifier
1006+
/// See: <https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#sample-clause>
1007+
sample: Option<TableSample>,
1008+
/// Position of the table sample modifier in the table factor. Default is after the table alias
1009+
/// e.g. `SELECT * FROM tbl t TABLESAMPLE (10 ROWS)`. See `Dialect::supports_table_sample_before_alias`.
1010+
sample_before_alias: bool,
10051011
},
10061012
Derived {
10071013
lateral: bool,
@@ -1146,6 +1152,121 @@ pub enum TableFactor {
11461152
},
11471153
}
11481154

1155+
/// The table sample modifier options
1156+
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
1157+
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
1158+
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
1159+
pub enum TableSample {
1160+
Bernoulli(TableSampleBernoulli),
1161+
System(TableSampleSystem),
1162+
Bucket(TableSampleBucket),
1163+
Implicit(TableSampleImplicit),
1164+
}
1165+
1166+
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
1167+
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
1168+
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
1169+
pub struct TableSampleBernoulli {
1170+
pub probability: Option<Expr>,
1171+
pub value: Option<Expr>,
1172+
pub unit: Option<TableSampleUnit>,
1173+
}
1174+
1175+
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
1176+
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
1177+
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
1178+
pub struct TableSampleSystem {
1179+
pub probability: Expr,
1180+
pub seed: Option<Expr>,
1181+
}
1182+
1183+
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
1184+
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
1185+
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
1186+
pub enum TableSampleUnit {
1187+
Rows,
1188+
Percent,
1189+
}
1190+
1191+
impl fmt::Display for TableSampleUnit {
1192+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1193+
match self {
1194+
TableSampleUnit::Percent => write!(f, "PERCENT"),
1195+
TableSampleUnit::Rows => write!(f, "ROWS"),
1196+
}
1197+
}
1198+
}
1199+
1200+
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
1201+
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
1202+
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
1203+
pub struct TableSampleBucket {
1204+
pub bucket: Value,
1205+
pub total: Value,
1206+
pub on: Option<Expr>,
1207+
}
1208+
1209+
impl fmt::Display for TableSampleBucket {
1210+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1211+
write!(f, "BUCKET {} OUT OF {}", self.bucket, self.total)?;
1212+
if let Some(on) = &self.on {
1213+
write!(f, " ON {}", on)?;
1214+
}
1215+
Ok(())
1216+
}
1217+
}
1218+
1219+
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
1220+
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
1221+
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
1222+
pub struct TableSampleImplicit {
1223+
pub value: Value,
1224+
pub unit: Option<TableSampleUnit>,
1225+
}
1226+
1227+
impl fmt::Display for TableSampleImplicit {
1228+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1229+
write!(f, "{}", self.value)?;
1230+
if let Some(unit) = &self.unit {
1231+
write!(f, " {}", unit)?;
1232+
}
1233+
Ok(())
1234+
}
1235+
}
1236+
1237+
impl fmt::Display for TableSample {
1238+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1239+
write!(f, " TABLESAMPLE")?;
1240+
match self {
1241+
TableSample::Bernoulli(sample) => {
1242+
write!(f, " BERNOULLI (")?;
1243+
if let Some(probability) = &sample.probability {
1244+
write!(f, "{})", probability)?;
1245+
} else if let Some(value) = &sample.value {
1246+
write!(f, "{}", value)?;
1247+
if let Some(unit) = &sample.unit {
1248+
write!(f, " {}", unit)?;
1249+
}
1250+
write!(f, ")")?;
1251+
}
1252+
}
1253+
TableSample::System(sample) => {
1254+
write!(f, " SYSTEM ({})", sample.probability)?;
1255+
if let Some(seed) = &sample.seed {
1256+
write!(f, " SEED ({})", seed)?;
1257+
}
1258+
}
1259+
TableSample::Bucket(sample) => {
1260+
write!(f, " ({})", sample)?;
1261+
}
1262+
TableSample::Implicit(sample) => {
1263+
write!(f, " ({})", sample)?;
1264+
}
1265+
}
1266+
Ok(())
1267+
}
1268+
}
1269+
11491270
/// The source of values in a `PIVOT` operation.
11501271
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
11511272
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
@@ -1404,6 +1525,8 @@ impl fmt::Display for TableFactor {
14041525
partitions,
14051526
with_ordinality,
14061527
json_path,
1528+
sample,
1529+
sample_before_alias,
14071530
} => {
14081531
write!(f, "{name}")?;
14091532
if let Some(json_path) = json_path {
@@ -1426,6 +1549,9 @@ impl fmt::Display for TableFactor {
14261549
if *with_ordinality {
14271550
write!(f, " WITH ORDINALITY")?;
14281551
}
1552+
if let (Some(sample), true) = (sample, sample_before_alias) {
1553+
write!(f, "{sample}")?;
1554+
}
14291555
if let Some(alias) = alias {
14301556
write!(f, " AS {alias}")?;
14311557
}
@@ -1435,6 +1561,9 @@ impl fmt::Display for TableFactor {
14351561
if let Some(version) = version {
14361562
write!(f, "{version}")?;
14371563
}
1564+
if let (Some(sample), false) = (sample, sample_before_alias) {
1565+
write!(f, "{sample}")?;
1566+
}
14381567
Ok(())
14391568
}
14401569
TableFactor::Derived {

src/ast/spans.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1681,6 +1681,8 @@ impl Spanned for TableFactor {
16811681
with_ordinality: _,
16821682
partitions: _,
16831683
json_path: _,
1684+
sample: _,
1685+
sample_before_alias: _,
16841686
} => union_spans(
16851687
name.0
16861688
.iter()

src/dialect/hive.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,4 +61,9 @@ impl Dialect for HiveDialect {
6161
fn supports_load_data(&self) -> bool {
6262
true
6363
}
64+
65+
/// See Hive <https://cwiki.apache.org/confluence/display/hive/languagemanual+sampling>
66+
fn supports_table_sample_before_alias(&self) -> bool {
67+
true
68+
}
6469
}

src/dialect/mod.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -707,6 +707,13 @@ pub trait Dialect: Debug + Any {
707707
fn is_reserved_for_identifier(&self, kw: Keyword) -> bool {
708708
keywords::RESERVED_FOR_IDENTIFIER.contains(&kw)
709709
}
710+
711+
/// Returns true if the dialect supports the `TABLESAMPLE` option
712+
/// before the table alias option.
713+
/// <https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#_7_6_table_reference>
714+
fn supports_table_sample_before_alias(&self) -> bool {
715+
false
716+
}
710717
}
711718

712719
/// This represents the operators for which precedence must be defined

src/keywords.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,7 @@ define_keywords!(
120120
BEGIN,
121121
BEGIN_FRAME,
122122
BEGIN_PARTITION,
123+
BERNOULLI,
123124
BETWEEN,
124125
BIGDECIMAL,
125126
BIGINT,
@@ -128,12 +129,14 @@ define_keywords!(
128129
BINDING,
129130
BIT,
130131
BLOB,
132+
BLOCK,
131133
BLOOMFILTER,
132134
BOOL,
133135
BOOLEAN,
134136
BOTH,
135137
BROWSE,
136138
BTREE,
139+
BUCKET,
137140
BUCKETS,
138141
BY,
139142
BYPASSRLS,
@@ -679,6 +682,7 @@ define_keywords!(
679682
RUN,
680683
SAFE,
681684
SAFE_CAST,
685+
SAMPLE,
682686
SAVEPOINT,
683687
SCHEMA,
684688
SCHEMAS,
@@ -689,6 +693,7 @@ define_keywords!(
689693
SECONDARY,
690694
SECRET,
691695
SECURITY,
696+
SEED,
692697
SELECT,
693698
SEMI,
694699
SENSITIVE,
@@ -931,6 +936,9 @@ pub const RESERVED_FOR_TABLE_ALIAS: &[Keyword] = &[
931936
Keyword::CONNECT,
932937
// Reserved for snowflake MATCH_RECOGNIZE
933938
Keyword::MATCH_RECOGNIZE,
939+
// Reserved for Snowflake table sample
940+
Keyword::SAMPLE,
941+
Keyword::TABLESAMPLE,
934942
];
935943

936944
/// Can't be used as a column alias, so that `SELECT <expr> alias`

0 commit comments

Comments
 (0)