Skip to content

Commit df86058

Browse files
authored
TST: Refactor more slow tests (#53820)
1 parent 4823492 commit df86058

File tree

8 files changed

+116
-77
lines changed

8 files changed

+116
-77
lines changed

pandas/core/algorithms.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -450,6 +450,9 @@ def unique_with_mask(values, mask: npt.NDArray[np.bool_] | None = None):
450450
unique1d = unique
451451

452452

453+
_MINIMUM_COMP_ARR_LEN = 1_000_000
454+
455+
453456
def isin(comps: ListLike, values: ListLike) -> npt.NDArray[np.bool_]:
454457
"""
455458
Compute the isin boolean array.
@@ -518,7 +521,7 @@ def isin(comps: ListLike, values: ListLike) -> npt.NDArray[np.bool_]:
518521
# Albeit hashmap has O(1) look-up (vs. O(logn) in sorted array),
519522
# in1d is faster for small sizes
520523
if (
521-
len(comps_array) > 1_000_000
524+
len(comps_array) > _MINIMUM_COMP_ARR_LEN
522525
and len(values) <= 26
523526
and comps_array.dtype != object
524527
):

pandas/io/formats/csvs.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,9 @@
4545
from pandas.io.formats.format import DataFrameFormatter
4646

4747

48+
_DEFAULT_CHUNKSIZE_CELLS = 100_000
49+
50+
4851
class CSVFormatter:
4952
cols: np.ndarray
5053

@@ -163,7 +166,7 @@ def _initialize_columns(self, cols: Sequence[Hashable] | None) -> np.ndarray:
163166

164167
def _initialize_chunksize(self, chunksize: int | None) -> int:
165168
if chunksize is None:
166-
return (100000 // (len(self.cols) or 1)) or 1
169+
return (_DEFAULT_CHUNKSIZE_CELLS // (len(self.cols) or 1)) or 1
167170
return int(chunksize)
168171

169172
@property

pandas/tests/frame/methods/test_to_csv.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -752,13 +752,16 @@ def test_to_csv_chunking(self, chunksize):
752752
tm.assert_frame_equal(rs, aa)
753753

754754
@pytest.mark.slow
755-
def test_to_csv_wide_frame_formatting(self):
755+
def test_to_csv_wide_frame_formatting(self, monkeypatch):
756756
# Issue #8621
757-
df = DataFrame(np.random.randn(1, 100010), columns=None, index=None)
757+
chunksize = 100
758+
df = DataFrame(np.random.randn(1, chunksize + 10), columns=None, index=None)
758759
with tm.ensure_clean() as filename:
759-
df.to_csv(filename, header=False, index=False)
760+
with monkeypatch.context() as m:
761+
m.setattr("pandas.io.formats.csvs._DEFAULT_CHUNKSIZE_CELLS", chunksize)
762+
df.to_csv(filename, header=False, index=False)
760763
rs = read_csv(filename, header=None)
761-
tm.assert_frame_equal(rs, df)
764+
tm.assert_frame_equal(rs, df)
762765

763766
def test_to_csv_bug(self):
764767
f1 = StringIO("a,1.0\nb,2.0")

pandas/tests/indexing/multiindex/test_chaining_and_caching.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -70,13 +70,11 @@ def test_indexer_caching():
7070
# GH5727
7171
# make sure that indexers are in the _internal_names_set
7272
n = 1000001
73-
arrays = (range(n), range(n))
74-
index = MultiIndex.from_tuples(zip(*arrays))
73+
index = MultiIndex.from_arrays([np.arange(n), np.arange(n)])
7574
s = Series(np.zeros(n), index=index)
7675
str(s)
7776

7877
# setitem
7978
expected = Series(np.ones(n), index=index)
80-
s = Series(np.zeros(n), index=index)
8179
s[s == 0] = 1
8280
tm.assert_series_equal(s, expected)

pandas/tests/io/parser/test_index_col.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -228,16 +228,18 @@ def test_header_with_index_col(all_parsers):
228228

229229

230230
@pytest.mark.slow
231-
def test_index_col_large_csv(all_parsers):
231+
def test_index_col_large_csv(all_parsers, monkeypatch):
232232
# https://github.com/pandas-dev/pandas/issues/37094
233233
parser = all_parsers
234234

235-
N = 1_000_001
236-
df = DataFrame({"a": range(N), "b": np.random.randn(N)})
235+
ARR_LEN = 100
236+
df = DataFrame({"a": range(ARR_LEN + 1), "b": np.random.randn(ARR_LEN + 1)})
237237

238238
with tm.ensure_clean() as path:
239239
df.to_csv(path, index=False)
240-
result = parser.read_csv(path, index_col=[0])
240+
with monkeypatch.context() as m:
241+
m.setattr("pandas.core.algorithms._MINIMUM_COMP_ARR_LEN", ARR_LEN)
242+
result = parser.read_csv(path, index_col=[0])
241243

242244
tm.assert_frame_equal(result, df.set_index("a"))
243245

pandas/tests/plotting/frame/test_frame.py

Lines changed: 61 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -423,44 +423,60 @@ def test_line_area_stacked(self, kind):
423423
df2 = df.set_index(df.index + 1)
424424
_check_plot_works(df2.plot, kind=kind, logx=True, stacked=True)
425425

426-
def test_line_area_nan_df(self):
426+
@pytest.mark.parametrize(
427+
"idx", [range(4), date_range("2023-01-1", freq="D", periods=4)]
428+
)
429+
def test_line_area_nan_df(self, idx):
427430
values1 = [1, 2, np.nan, 3]
428431
values2 = [3, np.nan, 2, 1]
429-
df = DataFrame({"a": values1, "b": values2})
430-
tdf = DataFrame({"a": values1, "b": values2}, index=tm.makeDateIndex(k=4))
431-
432-
for d in [df, tdf]:
433-
ax = _check_plot_works(d.plot)
434-
masked1 = ax.lines[0].get_ydata()
435-
masked2 = ax.lines[1].get_ydata()
436-
# remove nan for comparison purpose
437-
438-
exp = np.array([1, 2, 3], dtype=np.float64)
439-
tm.assert_numpy_array_equal(np.delete(masked1.data, 2), exp)
440-
441-
exp = np.array([3, 2, 1], dtype=np.float64)
442-
tm.assert_numpy_array_equal(np.delete(masked2.data, 1), exp)
443-
tm.assert_numpy_array_equal(
444-
masked1.mask, np.array([False, False, True, False])
445-
)
446-
tm.assert_numpy_array_equal(
447-
masked2.mask, np.array([False, True, False, False])
448-
)
432+
df = DataFrame({"a": values1, "b": values2}, index=idx)
449433

450-
expected1 = np.array([1, 2, 0, 3], dtype=np.float64)
451-
expected2 = np.array([3, 0, 2, 1], dtype=np.float64)
434+
ax = _check_plot_works(df.plot)
435+
masked1 = ax.lines[0].get_ydata()
436+
masked2 = ax.lines[1].get_ydata()
437+
# remove nan for comparison purpose
452438

453-
ax = _check_plot_works(d.plot, stacked=True)
454-
tm.assert_numpy_array_equal(ax.lines[0].get_ydata(), expected1)
455-
tm.assert_numpy_array_equal(ax.lines[1].get_ydata(), expected1 + expected2)
439+
exp = np.array([1, 2, 3], dtype=np.float64)
440+
tm.assert_numpy_array_equal(np.delete(masked1.data, 2), exp)
456441

457-
ax = _check_plot_works(d.plot.area)
458-
tm.assert_numpy_array_equal(ax.lines[0].get_ydata(), expected1)
459-
tm.assert_numpy_array_equal(ax.lines[1].get_ydata(), expected1 + expected2)
442+
exp = np.array([3, 2, 1], dtype=np.float64)
443+
tm.assert_numpy_array_equal(np.delete(masked2.data, 1), exp)
444+
tm.assert_numpy_array_equal(masked1.mask, np.array([False, False, True, False]))
445+
tm.assert_numpy_array_equal(masked2.mask, np.array([False, True, False, False]))
446+
447+
@pytest.mark.parametrize(
448+
"idx", [range(4), date_range("2023-01-1", freq="D", periods=4)]
449+
)
450+
def test_line_area_nan_df_stacked(self, idx):
451+
values1 = [1, 2, np.nan, 3]
452+
values2 = [3, np.nan, 2, 1]
453+
df = DataFrame({"a": values1, "b": values2}, index=idx)
454+
455+
expected1 = np.array([1, 2, 0, 3], dtype=np.float64)
456+
expected2 = np.array([3, 0, 2, 1], dtype=np.float64)
457+
458+
ax = _check_plot_works(df.plot, stacked=True)
459+
tm.assert_numpy_array_equal(ax.lines[0].get_ydata(), expected1)
460+
tm.assert_numpy_array_equal(ax.lines[1].get_ydata(), expected1 + expected2)
461+
462+
@pytest.mark.parametrize(
463+
"idx", [range(4), date_range("2023-01-1", freq="D", periods=4)]
464+
)
465+
@pytest.mark.parametrize("kwargs", [{}, {"stacked": False}])
466+
def test_line_area_nan_df_stacked_area(self, idx, kwargs):
467+
values1 = [1, 2, np.nan, 3]
468+
values2 = [3, np.nan, 2, 1]
469+
df = DataFrame({"a": values1, "b": values2}, index=idx)
470+
471+
expected1 = np.array([1, 2, 0, 3], dtype=np.float64)
472+
expected2 = np.array([3, 0, 2, 1], dtype=np.float64)
460473

461-
ax = _check_plot_works(d.plot.area, stacked=False)
462-
tm.assert_numpy_array_equal(ax.lines[0].get_ydata(), expected1)
474+
ax = _check_plot_works(df.plot.area, **kwargs)
475+
tm.assert_numpy_array_equal(ax.lines[0].get_ydata(), expected1)
476+
if kwargs:
463477
tm.assert_numpy_array_equal(ax.lines[1].get_ydata(), expected2)
478+
else:
479+
tm.assert_numpy_array_equal(ax.lines[1].get_ydata(), expected1 + expected2)
464480

465481
def test_line_lim(self):
466482
df = DataFrame(np.random.rand(6, 3), columns=["x", "y", "z"])
@@ -1537,27 +1553,31 @@ def test_errorbar_with_integer_column_names(self):
15371553
_check_has_errorbars(ax, xerr=0, yerr=1)
15381554

15391555
@pytest.mark.slow
1540-
def test_errorbar_with_partial_columns(self):
1556+
@pytest.mark.parametrize("kind", ["line", "bar"])
1557+
def test_errorbar_with_partial_columns_kind(self, kind):
15411558
df = DataFrame(np.abs(np.random.randn(10, 3)))
15421559
df_err = DataFrame(np.abs(np.random.randn(10, 2)), columns=[0, 2])
1543-
kinds = ["line", "bar"]
1544-
for kind in kinds:
1545-
ax = _check_plot_works(df.plot, yerr=df_err, kind=kind)
1546-
_check_has_errorbars(ax, xerr=0, yerr=2)
1560+
ax = _check_plot_works(df.plot, yerr=df_err, kind=kind)
1561+
_check_has_errorbars(ax, xerr=0, yerr=2)
15471562

1563+
@pytest.mark.slow
1564+
def test_errorbar_with_partial_columns_dti(self):
1565+
df = DataFrame(np.abs(np.random.randn(10, 3)))
1566+
df_err = DataFrame(np.abs(np.random.randn(10, 2)), columns=[0, 2])
15481567
ix = date_range("1/1/2000", periods=10, freq="M")
15491568
df.set_index(ix, inplace=True)
15501569
df_err.set_index(ix, inplace=True)
15511570
ax = _check_plot_works(df.plot, yerr=df_err, kind="line")
15521571
_check_has_errorbars(ax, xerr=0, yerr=2)
15531572

1573+
@pytest.mark.slow
1574+
@pytest.mark.parametrize("err_box", [lambda x: x, DataFrame])
1575+
def test_errorbar_with_partial_columns_box(self, err_box):
15541576
d = {"x": np.arange(12), "y": np.arange(12, 0, -1)}
15551577
df = DataFrame(d)
1556-
d_err = {"x": np.ones(12) * 0.2, "z": np.ones(12) * 0.4}
1557-
df_err = DataFrame(d_err)
1558-
for err in [d_err, df_err]:
1559-
ax = _check_plot_works(df.plot, yerr=err)
1560-
_check_has_errorbars(ax, xerr=0, yerr=1)
1578+
err = err_box({"x": np.ones(12) * 0.2, "z": np.ones(12) * 0.4})
1579+
ax = _check_plot_works(df.plot, yerr=err)
1580+
_check_has_errorbars(ax, xerr=0, yerr=1)
15611581

15621582
@pytest.mark.parametrize("kind", ["line", "bar", "barh"])
15631583
def test_errorbar_timeseries(self, kind):

pandas/tests/plotting/frame/test_frame_subplots.py

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -245,15 +245,11 @@ def test_subplots_layout_single_column(
245245
assert axes.shape == expected_shape
246246

247247
@pytest.mark.slow
248-
def test_subplots_warnings(self):
248+
@pytest.mark.parametrize("idx", [range(5), date_range("1/1/2000", periods=5)])
249+
def test_subplots_warnings(self, idx):
249250
# GH 9464
250251
with tm.assert_produces_warning(None):
251-
df = DataFrame(np.random.randn(100, 4))
252-
df.plot(subplots=True, layout=(3, 2))
253-
254-
df = DataFrame(
255-
np.random.randn(100, 4), index=date_range("1/1/2000", periods=100)
256-
)
252+
df = DataFrame(np.random.randn(5, 4), index=idx)
257253
df.plot(subplots=True, layout=(3, 2))
258254

259255
def test_subplots_multiple_axes(self):

pandas/tests/plotting/test_misc.py

Lines changed: 30 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -218,44 +218,58 @@ def test_andrews_curves_handle(self):
218218
_check_colors(handles, linecolors=colors)
219219

220220
@pytest.mark.slow
221-
def test_parallel_coordinates(self, iris):
222-
from matplotlib import cm
223-
221+
@pytest.mark.parametrize(
222+
"color",
223+
[("#556270", "#4ECDC4", "#C7F464"), ["dodgerblue", "aquamarine", "seagreen"]],
224+
)
225+
def test_parallel_coordinates_colors(self, iris, color):
224226
from pandas.plotting import parallel_coordinates
225227

226228
df = iris
227229

228-
ax = _check_plot_works(parallel_coordinates, frame=df, class_column="Name")
229-
nlines = len(ax.get_lines())
230-
nxticks = len(ax.xaxis.get_ticklabels())
231-
232-
rgba = ("#556270", "#4ECDC4", "#C7F464")
233230
ax = _check_plot_works(
234-
parallel_coordinates, frame=df, class_column="Name", color=rgba
231+
parallel_coordinates, frame=df, class_column="Name", color=color
235232
)
236-
_check_colors(ax.get_lines()[:10], linecolors=rgba, mapping=df["Name"][:10])
233+
_check_colors(ax.get_lines()[:10], linecolors=color, mapping=df["Name"][:10])
237234

238-
cnames = ["dodgerblue", "aquamarine", "seagreen"]
239-
ax = _check_plot_works(
240-
parallel_coordinates, frame=df, class_column="Name", color=cnames
241-
)
242-
_check_colors(ax.get_lines()[:10], linecolors=cnames, mapping=df["Name"][:10])
235+
@pytest.mark.slow
236+
def test_parallel_coordinates_cmap(self, iris):
237+
from matplotlib import cm
238+
239+
from pandas.plotting import parallel_coordinates
240+
241+
df = iris
243242

244243
ax = _check_plot_works(
245244
parallel_coordinates, frame=df, class_column="Name", colormap=cm.jet
246245
)
247246
cmaps = [cm.jet(n) for n in np.linspace(0, 1, df["Name"].nunique())]
248247
_check_colors(ax.get_lines()[:10], linecolors=cmaps, mapping=df["Name"][:10])
249248

249+
@pytest.mark.slow
250+
def test_parallel_coordinates_line_diff(self, iris):
251+
from pandas.plotting import parallel_coordinates
252+
253+
df = iris
254+
255+
ax = _check_plot_works(parallel_coordinates, frame=df, class_column="Name")
256+
nlines = len(ax.get_lines())
257+
nxticks = len(ax.xaxis.get_ticklabels())
258+
250259
ax = _check_plot_works(
251260
parallel_coordinates, frame=df, class_column="Name", axvlines=False
252261
)
253262
assert len(ax.get_lines()) == (nlines - nxticks)
254263

264+
@pytest.mark.slow
265+
def test_parallel_coordinates_handles(self, iris):
266+
from pandas.plotting import parallel_coordinates
267+
268+
df = iris
255269
colors = ["b", "g", "r"]
256270
df = DataFrame({"A": [1, 2, 3], "B": [1, 2, 3], "C": [1, 2, 3], "Name": colors})
257271
ax = parallel_coordinates(df, "Name", color=colors)
258-
handles, labels = ax.get_legend_handles_labels()
272+
handles, _ = ax.get_legend_handles_labels()
259273
_check_colors(handles, linecolors=colors)
260274

261275
# not sure if this is indicative of a problem

0 commit comments

Comments
 (0)