From a020c1081f86948a3f9332ad05ab35d8b6bf46d3 Mon Sep 17 00:00:00 2001
From: Matt Roeschke <emailformattr@gmail.com>
Date: Sun, 19 Mar 2017 17:58:08 -0700
Subject: [PATCH] PERF: Improve drop_duplicates for bool columns (#12963)

Add whatsnew

Add dtype label and reorg logic
---
 asv_bench/benchmarks/reindex.py | 5 +++++
 doc/source/whatsnew/v0.20.0.txt | 1 +
 pandas/core/algorithms.py       | 5 +++++
 3 files changed, 11 insertions(+)

diff --git a/asv_bench/benchmarks/reindex.py b/asv_bench/benchmarks/reindex.py
index 6fe6c32a96df9..537d275e7c727 100644
--- a/asv_bench/benchmarks/reindex.py
+++ b/asv_bench/benchmarks/reindex.py
@@ -132,6 +132,9 @@ def setup(self):
         self.K = 10000
         self.key1 = np.random.randint(0, self.K, size=self.N)
         self.df_int = DataFrame({'key1': self.key1})
+        self.df_bool = DataFrame({i: np.random.randint(0, 2, size=self.K,
+                                                       dtype=bool)
+                                  for i in range(10)})
 
     def time_frame_drop_dups(self):
         self.df.drop_duplicates(['key1', 'key2'])
@@ -154,6 +157,8 @@ def time_series_drop_dups_string(self):
     def time_frame_drop_dups_int(self):
         self.df_int.drop_duplicates()
 
+    def time_frame_drop_dups_bool(self):
+        self.df_bool.drop_duplicates()
 
 #----------------------------------------------------------------------
 # blog "pandas escaped the zoo"
diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt
index 680aefc4041fb..02e80dd77aa0a 100644
--- a/doc/source/whatsnew/v0.20.0.txt
+++ b/doc/source/whatsnew/v0.20.0.txt
@@ -788,6 +788,7 @@ Performance Improvements
 - Improved performance of ``.rank()`` for categorical data (:issue:`15498`)
 - Improved performance when using ``.unstack()`` (:issue:`15503`)
 - Improved performance of merge/join on ``category`` columns (:issue:`10409`)
+- Improved performance of ``drop_duplicates()`` on ``bool`` columns (:issue:`12963`)
 
 
 .. _whatsnew_0200.bug_fixes:
diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index 6937675603c10..b6f496f417a74 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -19,6 +19,7 @@
                                  is_period_dtype,
                                  is_period_arraylike,
                                  is_float_dtype,
+                                 is_bool_dtype,
                                  needs_i8_conversion,
                                  is_categorical,
                                  is_datetime64_dtype,
@@ -341,6 +342,10 @@ def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None):
             # numpy dtype
             dtype = values.dtype
             vals = values.view(np.int64)
+    elif is_bool_dtype(values):
+        dtype = bool
+        # transform to int dtype to avoid object path
+        vals = np.asarray(values).view('uint8')
     else:
         vals = np.asarray(values)