From 7d789249de675505c88216bf86026b6a4a3fdc4c Mon Sep 17 00:00:00 2001
From: MarcoGorelli <>
Date: Wed, 2 Nov 2022 14:39:02 +0000
Subject: [PATCH 1/3] use default_index more

---
 pandas/core/frame.py                  | 2 +-
 pandas/core/groupby/generic.py        | 5 +++--
 pandas/core/internals/construction.py | 2 +-
 pandas/core/reshape/encoding.py       | 7 +++++--
 pandas/core/reshape/merge.py          | 3 ++-
 5 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 044c40c58b85c..75f51051761f8 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -4012,7 +4012,7 @@ def igetitem(obj, i: int):
             # Using self.iloc[:, i] = ... may set values inplace, which
             #  by convention we do not do in __setitem__
             try:
-                self.columns = Index(range(len(self.columns)))
+                self.columns = default_index(len(self.columns))
                 for i, iloc in enumerate(ilocs):
                     self[iloc] = igetitem(value, i)
             finally:
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index cea9aaf70ccd0..e4e20ef98224c 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -95,6 +95,7 @@
     Index,
     MultiIndex,
     all_indexes_same,
+    default_index,
 )
 from pandas.core.indexes.category import CategoricalIndex
 from pandas.core.series import Series
@@ -1159,7 +1160,7 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
 
         if not self.as_index:
             self._insert_inaxis_grouper_inplace(result)
-            result.index = Index(range(len(result)))
+            result.index = default_index(len(result))
 
         return result
 
@@ -1778,7 +1779,7 @@ def nunique(self, dropna: bool = True) -> DataFrame:
         )
 
         if not self.as_index:
-            results.index = Index(range(len(results)))
+            results.index = default_index(len(results))
             self._insert_inaxis_grouper_inplace(results)
 
         return results
diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py
index 28eab57ac7bde..356621a47c5d5 100644
--- a/pandas/core/internals/construction.py
+++ b/pandas/core/internals/construction.py
@@ -311,7 +311,7 @@ def ndarray_to_mgr(
             values = [values]
 
         if columns is None:
-            columns = Index(range(len(values)))
+            columns = default_index(len(values))
         else:
             columns = ensure_index(columns)
 
diff --git a/pandas/core/reshape/encoding.py b/pandas/core/reshape/encoding.py
index a39e3c1f10956..ec077caeef69e 100644
--- a/pandas/core/reshape/encoding.py
+++ b/pandas/core/reshape/encoding.py
@@ -21,7 +21,10 @@
 from pandas.core.arrays import SparseArray
 from pandas.core.arrays.categorical import factorize_from_iterable
 from pandas.core.frame import DataFrame
-from pandas.core.indexes.api import Index
+from pandas.core.indexes.api import (
+    Index,
+    default_index,
+)
 from pandas.core.series import Series
 
 
@@ -249,7 +252,7 @@ def get_empty_frame(data) -> DataFrame:
         if isinstance(data, Series):
             index = data.index
         else:
-            index = Index(range(len(data)))
+            index = default_index(len(data))
         return DataFrame(index=index)
 
     # if all NaN
diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py
index f4332f2c7eb1b..2de399d8648b8 100644
--- a/pandas/core/reshape/merge.py
+++ b/pandas/core/reshape/merge.py
@@ -84,6 +84,7 @@
 import pandas.core.common as com
 from pandas.core.construction import extract_array
 from pandas.core.frame import _merge_doc
+from pandas.core.indexes.api import default_index
 from pandas.core.sorting import is_int64_overflow_possible
 
 if TYPE_CHECKING:
@@ -1060,7 +1061,7 @@ def _get_join_info(
                 else:
                     join_index = self.left.index.take(left_indexer)
             else:
-                join_index = Index(np.arange(len(left_indexer)))
+                join_index = default_index(len(left_indexer))
 
         if len(join_index) == 0:
             join_index = join_index.astype(object)

From c944f27ceb023451459188667e73f73015127aab Mon Sep 17 00:00:00 2001
From: MarcoGorelli <>
Date: Wed, 2 Nov 2022 15:13:00 +0000
Subject: [PATCH 2/3] dont change for columns

---
 pandas/core/frame.py                  | 2 +-
 pandas/core/internals/construction.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 75f51051761f8..044c40c58b85c 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -4012,7 +4012,7 @@ def igetitem(obj, i: int):
             # Using self.iloc[:, i] = ... may set values inplace, which
             #  by convention we do not do in __setitem__
             try:
-                self.columns = default_index(len(self.columns))
+                self.columns = Index(range(len(self.columns)))
                 for i, iloc in enumerate(ilocs):
                     self[iloc] = igetitem(value, i)
             finally:
diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py
index 356621a47c5d5..28eab57ac7bde 100644
--- a/pandas/core/internals/construction.py
+++ b/pandas/core/internals/construction.py
@@ -311,7 +311,7 @@ def ndarray_to_mgr(
             values = [values]
 
         if columns is None:
-            columns = default_index(len(values))
+            columns = Index(range(len(values)))
         else:
             columns = ensure_index(columns)
 

From 6e6149afe89d5b5d88d76c2c300c1f56af8c833b Mon Sep 17 00:00:00 2001
From: MarcoGorelli <>
Date: Thu, 3 Nov 2022 16:14:44 +0000
Subject: [PATCH 3/3] :memo: add whatsnew note

---
 doc/source/whatsnew/v2.0.0.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst
index 47b7de04d7f95..f3396e5224222 100644
--- a/doc/source/whatsnew/v2.0.0.rst
+++ b/doc/source/whatsnew/v2.0.0.rst
@@ -336,6 +336,7 @@ Performance improvements
 - Performance improvements to :func:`read_sas` (:issue:`47403`, :issue:`47405`, :issue:`47656`, :issue:`48502`)
 - Memory improvement in :meth:`RangeIndex.sort_values` (:issue:`48801`)
 - Performance improvement in :class:`DataFrameGroupBy` and :class:`SeriesGroupBy` when ``by`` is a categorical type and ``sort=False`` (:issue:`48976`)
+- Performance improvement in :func:`merge` when not merging on the index - the new index will now be :class:`RangeIndex` instead of :class:`Int64Index` (:issue:`49478`)
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_200.bug_fixes: