Skip to content

Commit d083d97

Browse files
authored
[DataFrame] - Add DataFrame::distinct binding (#34)
* feat: add DataFrame::distinct binding * fix: fmt * fix: python linter
1 parent 8a2c905 commit d083d97

File tree

3 files changed

+32
-1
lines changed

3 files changed

+32
-1
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
target
22
Cargo.lock
3+
/venv
34
.idea
45

56
# Byte-compiled / optimized / DLL files

datafusion/tests/test_dataframe.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,30 @@ def test_join():
156156
assert table.to_pydict() == expected
157157

158158

159+
def test_distinct():
160+
ctx = SessionContext()
161+
162+
batch = pa.RecordBatch.from_arrays(
163+
[pa.array([1, 2, 3, 1, 2, 3]), pa.array([4, 5, 6, 4, 5, 6])],
164+
names=["a", "b"],
165+
)
166+
df_a = (
167+
ctx.create_dataframe([[batch]])
168+
.distinct()
169+
.sort(column("a").sort(ascending=True))
170+
)
171+
172+
batch = pa.RecordBatch.from_arrays(
173+
[pa.array([1, 2, 3]), pa.array([4, 5, 6])],
174+
names=["a", "b"],
175+
)
176+
df_b = ctx.create_dataframe([[batch]]).sort(
177+
column("a").sort(ascending=True)
178+
)
179+
180+
assert df_a.collect() == df_b.collect()
181+
182+
159183
def test_window_lead(df):
160184
df = df.select(
161185
column("a"),

src/dataframe.rs

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,12 @@ impl PyDataFrame {
129129
Ok(pretty::print_batches(&batches)?)
130130
}
131131

132+
/// Filter out duplicate rows
133+
fn distinct(&self) -> PyResult<Self> {
134+
let df = self.df.distinct()?;
135+
Ok(Self::new(df))
136+
}
137+
132138
fn join(
133139
&self,
134140
right: PyDataFrame,
@@ -147,7 +153,7 @@ impl PyDataFrame {
147153
"The join type {} does not exist or is not implemented",
148154
how
149155
))
150-
.into())
156+
.into());
151157
}
152158
};
153159

0 commit comments

Comments
 (0)