diff --git a/doc/source/whatsnew/v0.20.3.txt b/doc/source/whatsnew/v0.20.3.txt index 52f7701724f18..0210ec53125b8 100644 --- a/doc/source/whatsnew/v0.20.3.txt +++ b/doc/source/whatsnew/v0.20.3.txt @@ -36,6 +36,7 @@ Performance Improvements Bug Fixes ~~~~~~~~~ +- Fixed issue with dataframe scatter plot for categorical data that reports incorrect column key not found when categorical data is used for plotting (:issue:`16199`) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index 9169eb86895fb..391fa377f3c6f 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -778,6 +778,11 @@ def __init__(self, data, x, y, **kwargs): x = self.data.columns[x] if is_integer(y) and not self.data.columns.holds_integer(): y = self.data.columns[y] + if len(self.data[x]._get_numeric_data()) == 0: + raise ValueError(self._kind + ' requires x column to be numeric') + if len(self.data[y]._get_numeric_data()) == 0: + raise ValueError(self._kind + ' requires y column to be numeric') + self.x = x self.y = y diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py index e40ec5a1faea8..ba674e10be384 100644 --- a/pandas/tests/plotting/test_frame.py +++ b/pandas/tests/plotting/test_frame.py @@ -915,6 +915,24 @@ def test_plot_scatter(self): axes = df.plot(x='x', y='y', kind='scatter', subplots=True) self._check_axes_shape(axes, axes_num=1, layout=(1, 1)) + @slow + def test_plot_scatter_with_categorical_data(self): + # GH 16199 + df = pd.DataFrame({'x': [1, 2, 3, 4], + 'y': pd.Categorical(['a', 'b', 'a', 'c'])}) + + with pytest.raises(ValueError) as ve: + df.plot(x='x', y='y', kind='scatter') + ve.match('requires y column to be numeric') + + with pytest.raises(ValueError) as ve: + df.plot(x='y', y='x', kind='scatter') + ve.match('requires x column to be numeric') + + with pytest.raises(ValueError) as ve: + df.plot(x='y', y='y', kind='scatter') + ve.match('requires x column to be numeric') + @slow def test_plot_scatter_with_c(self): df = DataFrame(randn(6, 4),