From f07232c6d1dae9b1fc8b3d9faef0424e8a2acfa8 Mon Sep 17 00:00:00 2001 From: phofl Date: Wed, 16 Dec 2020 23:49:50 +0100 Subject: [PATCH 1/2] BUG: Regression in logical ops raising ValueError with Categorical columns with unused categories --- doc/source/whatsnew/v1.2.0.rst | 2 +- pandas/core/ops/__init__.py | 4 +++- pandas/tests/frame/test_logical_ops.py | 23 ++++++++++++++++++++++- 3 files changed, 26 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index e2521cedb64cc..372dfa0f8ad42 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -859,7 +859,7 @@ Other - Bug in :meth:`RangeIndex.difference` returning :class:`Int64Index` in some cases where it should return :class:`RangeIndex` (:issue:`38028`) - Fixed bug in :func:`assert_series_equal` when comparing a datetime-like array with an equivalent non extension dtype array (:issue:`37609`) - Bug in :func:`.is_bool_dtype` would raise when passed a valid string such as ``"boolean"`` (:issue:`38386`) - +- Fixed regression in logical operators raising ``ValueError`` when columns of :class:`DataFrame` are a :class:`CategoricalIndex` with unused categories (:issue:`38367`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py index d8b5dba424cbf..bf23757e4af88 100644 --- a/pandas/core/ops/__init__.py +++ b/pandas/core/ops/__init__.py @@ -309,7 +309,9 @@ def should_reindex_frame_op( if fill_value is None and level is None and axis is default_axis: # TODO: any other cases we should handle here? - cols = left.columns.intersection(right.columns) + # Only doing unique because of CategoricalIndex. Can be removed after + # GH#38140 is merged + cols = left.columns.intersection(right.columns).unique() # Intersection is always unique so we have to check the unique columns left_uniques = left.columns.unique() diff --git a/pandas/tests/frame/test_logical_ops.py b/pandas/tests/frame/test_logical_ops.py index efabc666993ee..dca12c632a418 100644 --- a/pandas/tests/frame/test_logical_ops.py +++ b/pandas/tests/frame/test_logical_ops.py @@ -4,7 +4,7 @@ import numpy as np import pytest -from pandas import DataFrame, Series +from pandas import CategoricalIndex, DataFrame, Interval, Series, isnull import pandas._testing as tm @@ -162,3 +162,24 @@ def test_logical_with_nas(self): result = d["a"].fillna(False, downcast=False) | d["b"] expected = Series([True, True]) tm.assert_series_equal(result, expected) + + def test_logical_ops_categorical_columns(self): + # GH#38367 + intervals = [Interval(1, 2), Interval(3, 4)] + data = DataFrame( + [[1, np.nan], [2, np.nan]], + columns=CategoricalIndex( + intervals, categories=intervals + [Interval(5, 6)] + ), + ) + mask = DataFrame( + [[False, False], [False, False]], columns=data.columns, dtype=bool + ) + result = mask | isnull(data) + expected = DataFrame( + [[False, True], [False, True]], + columns=CategoricalIndex( + intervals, categories=intervals + [Interval(5, 6)] + ), + ) + tm.assert_frame_equal(result, expected) From 356bf8f19c7cff9f0db3a5740e570d4cba1d7c1f Mon Sep 17 00:00:00 2001 From: phofl Date: Fri, 18 Dec 2020 23:56:35 +0100 Subject: [PATCH 2/2] Move intersection --- pandas/core/ops/__init__.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py index bf23757e4af88..7b14a5c636abe 100644 --- a/pandas/core/ops/__init__.py +++ b/pandas/core/ops/__init__.py @@ -309,13 +309,11 @@ def should_reindex_frame_op( if fill_value is None and level is None and axis is default_axis: # TODO: any other cases we should handle here? - # Only doing unique because of CategoricalIndex. Can be removed after - # GH#38140 is merged - cols = left.columns.intersection(right.columns).unique() # Intersection is always unique so we have to check the unique columns left_uniques = left.columns.unique() right_uniques = right.columns.unique() + cols = left_uniques.intersection(right_uniques) if len(cols) and not (cols.equals(left_uniques) and cols.equals(right_uniques)): # TODO: is there a shortcut available when len(cols) == 0? return True