From 22dd26500005e37f4078509a510a48edc1120ab7 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 14 Dec 2020 13:35:39 -0800 Subject: [PATCH 1/7] BUG: require arraylike in infer_dtype_from_array --- pandas/core/dtypes/cast.py | 4 ++-- pandas/core/missing.py | 4 ++-- pandas/tests/dtypes/cast/test_infer_dtype.py | 21 +++++++++++++++++--- 3 files changed, 22 insertions(+), 7 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 3274725016b40..32f4e8a62de69 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -712,7 +712,7 @@ def infer_dtype_from(val, pandas_dtype: bool = False) -> Tuple[DtypeObj, Any]: If False, scalar/array belongs to pandas extension types is inferred as object """ - if is_scalar(val): + if not is_list_like(val): return infer_dtype_from_scalar(val, pandas_dtype=pandas_dtype) return infer_dtype_from_array(val, pandas_dtype=pandas_dtype) @@ -853,7 +853,7 @@ def infer_dtype_from_array( return arr.dtype, arr if not is_list_like(arr): - arr = [arr] + raise TypeError("'arr' must be list-like") if pandas_dtype and is_extension_array_dtype(arr): return arr.dtype, arr diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 445c1efae22e4..1120416eebeb9 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -10,7 +10,7 @@ from pandas._typing import ArrayLike, Axis, DtypeObj from pandas.compat._optional import import_optional_dependency -from pandas.core.dtypes.cast import infer_dtype_from_array +from pandas.core.dtypes.cast import infer_dtype_from from pandas.core.dtypes.common import ( ensure_float64, is_integer_dtype, @@ -40,7 +40,7 @@ def mask_missing(arr: ArrayLike, values_to_mask) -> np.ndarray: # When called from Block.replace/replace_list, values_to_mask is a scalar # known to be holdable by arr. # When called from Series._single_replace, values_to_mask is tuple or list - dtype, values_to_mask = infer_dtype_from_array(values_to_mask) + dtype, values_to_mask = infer_dtype_from(values_to_mask) values_to_mask = np.array(values_to_mask, dtype=dtype) na_mask = isna(values_to_mask) diff --git a/pandas/tests/dtypes/cast/test_infer_dtype.py b/pandas/tests/dtypes/cast/test_infer_dtype.py index 65da8985843f9..96747cef57e22 100644 --- a/pandas/tests/dtypes/cast/test_infer_dtype.py +++ b/pandas/tests/dtypes/cast/test_infer_dtype.py @@ -137,12 +137,29 @@ def test_infer_dtype_from_scalar_errors(): @pytest.mark.parametrize( - "arr, expected, pandas_dtype", + "value, expected, pandas_dtype", [ ("foo", np.object_, False), (b"foo", np.object_, False), (1, np.int_, False), (1.5, np.float_, False), + (np.datetime64("2016-01-01"), np.dtype("M8[ns]"), False), + (Timestamp("20160101"), np.dtype("M8[ns]"), False), + (Timestamp("20160101", tz="UTC"), np.object_, False), + (Timestamp("20160101", tz="UTC"), "datetime64[ns, UTC]", True), + ], +) +def test_infer_dtype_from_scalar(value, expected, pandas_dtype): + dtype, _ = infer_dtype_from_scalar(value, pandas_dtype=pandas_dtype) + assert is_dtype_equal(dtype, expected) + + with pytest.raises(TypeError, match="must be list-like"): + infer_dtype_from_array(value, pandas_dtype=pandas_dtype) + + +@pytest.mark.parametrize( + "arr, expected, pandas_dtype", + [ ([1], np.int_, False), (np.array([1], dtype=np.int64), np.int64, False), ([np.nan, 1, ""], np.object_, False), @@ -151,8 +168,6 @@ def test_infer_dtype_from_scalar_errors(): (Categorical([1, 2, 3]), np.int64, False), (Categorical(list("aabc")), "category", True), (Categorical([1, 2, 3]), "category", True), - (Timestamp("20160101"), np.object_, False), - (np.datetime64("2016-01-01"), np.dtype("=M8[D]"), False), (date_range("20160101", periods=3), np.dtype("=M8[ns]"), False), ( date_range("20160101", periods=3, tz="US/Eastern"), From fbb6f103a41fec23b988e983234adc646476a431 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 15 Dec 2020 08:36:38 -0800 Subject: [PATCH 2/7] 32bit compat --- pandas/core/dtypes/cast.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 40cd4bc11d605..f7ab95673dc4e 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -768,7 +768,7 @@ def infer_dtype_from_scalar(val, pandas_dtype: bool = False) -> Tuple[DtypeObj, if isinstance(val, np.integer): dtype = np.dtype(type(val)) else: - dtype = np.dtype(np.int64) + dtype = np.dtype(np.intp) try: np.array(val, dtype=dtype) From eb1df23a9d963aa377085456ffbd2796467b5333 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 15 Dec 2020 17:43:55 -0800 Subject: [PATCH 3/7] troubleshoot 32bit builds --- pandas/tests/dtypes/cast/test_infer_dtype.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/dtypes/cast/test_infer_dtype.py b/pandas/tests/dtypes/cast/test_infer_dtype.py index 96747cef57e22..3f11cd1ad5d2c 100644 --- a/pandas/tests/dtypes/cast/test_infer_dtype.py +++ b/pandas/tests/dtypes/cast/test_infer_dtype.py @@ -141,7 +141,7 @@ def test_infer_dtype_from_scalar_errors(): [ ("foo", np.object_, False), (b"foo", np.object_, False), - (1, np.int_, False), + (1, np.int64, False), (1.5, np.float_, False), (np.datetime64("2016-01-01"), np.dtype("M8[ns]"), False), (Timestamp("20160101"), np.dtype("M8[ns]"), False), From 6f7732ec3073b5a475740a308996a0c1fae89a84 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 17 Dec 2020 10:31:28 -0800 Subject: [PATCH 4/7] troubleshoot 32 bit builds --- pandas/core/dtypes/cast.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 0295e8a19a99d..1b622a85f6a9f 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -674,8 +674,14 @@ def infer_dtype_from(val, pandas_dtype: bool = False) -> Tuple[DtypeObj, Any]: If False, scalar/array belongs to pandas extension types is inferred as object """ - if not is_list_like(val): + if not is_scalar(val): return infer_dtype_from_scalar(val, pandas_dtype=pandas_dtype) + elif not is_list_like(val): + v1 = infer_dtype_from_scalar(val, pandas_dtype=pandas_dtype) + v2 = infer_dtype_from_array(val, pandas_dtype=pandas_dtype) + if v1[1] != v2[1]: + # Troubleshooting 32 bit builds + raise ValueError(val, pandas_dtype, v1[1], v2[1]) return infer_dtype_from_array(val, pandas_dtype=pandas_dtype) @@ -734,7 +740,7 @@ def infer_dtype_from_scalar(val, pandas_dtype: bool = False) -> Tuple[DtypeObj, if isinstance(val, np.integer): dtype = np.dtype(type(val)) else: - dtype = np.dtype(np.intp) + dtype = np.dtype(np.int64) try: np.array(val, dtype=dtype) From 6342f17a9b62cedc2e047a3161f54988c5c84ac9 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 17 Dec 2020 12:28:12 -0800 Subject: [PATCH 5/7] troubleshoot 32bit --- pandas/core/dtypes/cast.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 1b622a85f6a9f..7f2606ac7b8f9 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -678,7 +678,7 @@ def infer_dtype_from(val, pandas_dtype: bool = False) -> Tuple[DtypeObj, Any]: return infer_dtype_from_scalar(val, pandas_dtype=pandas_dtype) elif not is_list_like(val): v1 = infer_dtype_from_scalar(val, pandas_dtype=pandas_dtype) - v2 = infer_dtype_from_array(val, pandas_dtype=pandas_dtype) + v2 = infer_dtype_from_array([val], pandas_dtype=pandas_dtype) if v1[1] != v2[1]: # Troubleshooting 32 bit builds raise ValueError(val, pandas_dtype, v1[1], v2[1]) From 171e1c6fbf23a201e8652f7dc0fc1d0f03796f75 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 17 Dec 2020 14:33:50 -0800 Subject: [PATCH 6/7] keep trying --- pandas/core/dtypes/cast.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 7f2606ac7b8f9..e320014e67a7e 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -679,9 +679,9 @@ def infer_dtype_from(val, pandas_dtype: bool = False) -> Tuple[DtypeObj, Any]: elif not is_list_like(val): v1 = infer_dtype_from_scalar(val, pandas_dtype=pandas_dtype) v2 = infer_dtype_from_array([val], pandas_dtype=pandas_dtype) - if v1[1] != v2[1]: + if v1[0] != v2[0]: # Troubleshooting 32 bit builds - raise ValueError(val, pandas_dtype, v1[1], v2[1]) + raise ValueError(val, pandas_dtype, v1[0], v2[0]) return infer_dtype_from_array(val, pandas_dtype=pandas_dtype) From 372e5b0c4e176ef31b92a79636acdfdc39f3114a Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 17 Dec 2020 17:08:35 -0800 Subject: [PATCH 7/7] keep trying --- pandas/core/dtypes/cast.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index e320014e67a7e..8c795c2aad224 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -674,14 +674,8 @@ def infer_dtype_from(val, pandas_dtype: bool = False) -> Tuple[DtypeObj, Any]: If False, scalar/array belongs to pandas extension types is inferred as object """ - if not is_scalar(val): + if not is_list_like(val): return infer_dtype_from_scalar(val, pandas_dtype=pandas_dtype) - elif not is_list_like(val): - v1 = infer_dtype_from_scalar(val, pandas_dtype=pandas_dtype) - v2 = infer_dtype_from_array([val], pandas_dtype=pandas_dtype) - if v1[0] != v2[0]: - # Troubleshooting 32 bit builds - raise ValueError(val, pandas_dtype, v1[0], v2[0]) return infer_dtype_from_array(val, pandas_dtype=pandas_dtype)