diff --git a/pandas/tests/frame/methods/test_update.py b/pandas/tests/frame/methods/test_update.py index 59e0605cc5a91..408113e9bc417 100644 --- a/pandas/tests/frame/methods/test_update.py +++ b/pandas/tests/frame/methods/test_update.py @@ -137,3 +137,12 @@ def test_update_datetime_tz(self): result.update(result) expected = DataFrame([pd.Timestamp("2019", tz="UTC")]) tm.assert_frame_equal(result, expected) + + def test_update_with_different_dtype(self): + # GH#3217 + df = DataFrame({"a": [1, 3], "b": [np.nan, 2]}) + df["c"] = np.nan + df["c"].update(Series(["foo"], index=[0])) + + expected = DataFrame({"a": [1, 3], "b": [np.nan, 2], "c": ["foo", np.nan]}) + tm.assert_frame_equal(df, expected) diff --git a/pandas/tests/indexing/test_chaining_and_caching.py b/pandas/tests/indexing/test_chaining_and_caching.py index 4a66073d4f7a5..49181f0fdee7e 100644 --- a/pandas/tests/indexing/test_chaining_and_caching.py +++ b/pandas/tests/indexing/test_chaining_and_caching.py @@ -457,3 +457,19 @@ def test_cache_updating2(self): tm.assert_frame_equal(df, expected) expected = Series([0, 0, 0, 2, 0], name="f") tm.assert_series_equal(df.f, expected) + + def test_iloc_setitem_chained_assignment(self): + # GH#3970 + with option_context("chained_assignment", None): + df = DataFrame({"aa": range(5), "bb": [2.2] * 5}) + df["cc"] = 0.0 + + ck = [True] * len(df) + + df["bb"].iloc[0] = 0.13 + + # TODO: unused + df_tmp = df.iloc[ck] # noqa + + df["bb"].iloc[0] = 0.15 + assert df["bb"].iloc[0] == 0.15 diff --git a/pandas/tests/indexing/test_datetime.py b/pandas/tests/indexing/test_datetime.py index 28a1098c10d9f..a177c6e18f4cc 100644 --- a/pandas/tests/indexing/test_datetime.py +++ b/pandas/tests/indexing/test_datetime.py @@ -1,6 +1,3 @@ -import numpy as np -import pytest - import pandas as pd from pandas import ( DataFrame, @@ -55,28 +52,6 @@ def test_indexing_fast_xs(self): expected = df.iloc[4:] tm.assert_frame_equal(result, expected) - def test_setitem_with_expansion(self): - # indexing - setting an element - df = DataFrame( - data=pd.to_datetime(["2015-03-30 20:12:32", "2015-03-12 00:11:11"]), - columns=["time"], - ) - df["new_col"] = ["new", "old"] - df.time = df.set_index("time").index.tz_localize("UTC") - v = df[df.new_col == "new"].set_index("time").index.tz_convert("US/Pacific") - - # trying to set a single element on a part of a different timezone - # this converts to object - df2 = df.copy() - df2.loc[df2.new_col == "new", "time"] = v - - expected = Series([v[0], df.loc[1, "time"]], name="time") - tm.assert_series_equal(df2.time, expected) - - v = df.loc[df.new_col == "new", "time"] + pd.Timedelta("1s") - df.loc[df.new_col == "new", "time"] = v - tm.assert_series_equal(df.loc[df.new_col == "new", "time"], v) - def test_consistency_with_tz_aware_scalar(self): # xef gh-12938 # various ways of indexing the same tz-aware scalar @@ -163,48 +138,6 @@ def test_indexing_with_datetimeindex_tz(self): expected = Series([0, 5], index=index) tm.assert_series_equal(result, expected) - @pytest.mark.parametrize("to_period", [True, False]) - def test_loc_getitem_listlike_of_datetimelike_keys(self, to_period): - # GH 11497 - - idx = date_range("2011-01-01", "2011-01-02", freq="D", name="idx") - if to_period: - idx = idx.to_period("D") - ser = Series([0.1, 0.2], index=idx, name="s") - - keys = [Timestamp("2011-01-01"), Timestamp("2011-01-02")] - if to_period: - keys = [x.to_period("D") for x in keys] - result = ser.loc[keys] - exp = Series([0.1, 0.2], index=idx, name="s") - if not to_period: - exp.index = exp.index._with_freq(None) - tm.assert_series_equal(result, exp, check_index_type=True) - - keys = [ - Timestamp("2011-01-02"), - Timestamp("2011-01-02"), - Timestamp("2011-01-01"), - ] - if to_period: - keys = [x.to_period("D") for x in keys] - exp = Series( - [0.2, 0.2, 0.1], index=Index(keys, name="idx", dtype=idx.dtype), name="s" - ) - result = ser.loc[keys] - tm.assert_series_equal(result, exp, check_index_type=True) - - keys = [ - Timestamp("2011-01-03"), - Timestamp("2011-01-02"), - Timestamp("2011-01-03"), - ] - if to_period: - keys = [x.to_period("D") for x in keys] - - with pytest.raises(KeyError, match="with any missing labels"): - ser.loc[keys] - def test_nanosecond_getitem_setitem_with_tz(self): # GH 11679 data = ["2016-06-28 08:30:00.123456789"] @@ -219,24 +152,6 @@ def test_nanosecond_getitem_setitem_with_tz(self): expected = DataFrame(-1, index=index, columns=["a"]) tm.assert_frame_equal(result, expected) - def test_loc_setitem_with_expansion_and_existing_dst(self): - # GH 18308 - start = Timestamp("2017-10-29 00:00:00+0200", tz="Europe/Madrid") - end = Timestamp("2017-10-29 03:00:00+0100", tz="Europe/Madrid") - ts = Timestamp("2016-10-10 03:00:00", tz="Europe/Madrid") - idx = pd.date_range(start, end, closed="left", freq="H") - assert ts not in idx # i.e. result.loc setitem is with-expansion - - result = DataFrame(index=idx, columns=["value"]) - result.loc[ts, "value"] = 12 - expected = DataFrame( - [np.nan] * len(idx) + [12], - index=idx.append(pd.DatetimeIndex([ts])), - columns=["value"], - dtype=object, - ) - tm.assert_frame_equal(result, expected) - def test_getitem_millisecond_resolution(self, frame_or_series): # GH#33589 diff --git a/pandas/tests/indexing/test_floats.py b/pandas/tests/indexing/test_floats.py index 21ea6fbd2e3c6..a84be049ebff4 100644 --- a/pandas/tests/indexing/test_floats.py +++ b/pandas/tests/indexing/test_floats.py @@ -50,7 +50,7 @@ def check(self, result, original, indexer, getitem): tm.makePeriodIndex, ], ) - def test_scalar_non_numeric(self, index_func, frame_or_series): + def test_scalar_non_numeric(self, index_func, frame_or_series, indexer_sl): # GH 4892 # float_indexers should raise exceptions @@ -61,10 +61,7 @@ def test_scalar_non_numeric(self, index_func, frame_or_series): # getting with pytest.raises(KeyError, match="^3.0$"): - s[3.0] - - with pytest.raises(KeyError, match="^3.0$"): - s.loc[3.0] + indexer_sl(s)[3.0] # contains assert 3.0 not in s @@ -88,11 +85,7 @@ def test_scalar_non_numeric(self, index_func, frame_or_series): else: s2 = s.copy() - s2.loc[3.0] = 10 - assert s2.index.is_object() - - s2 = s.copy() - s2[3.0] = 0 + indexer_sl(s2)[3.0] = 10 assert s2.index.is_object() @pytest.mark.parametrize( @@ -114,7 +107,7 @@ def test_scalar_non_numeric_series_fallback(self, index_func): with pytest.raises(KeyError, match="^3.0$"): s[3.0] - def test_scalar_with_mixed(self): + def test_scalar_with_mixed(self, indexer_sl): s2 = Series([1, 2, 3], index=["a", "b", "c"]) s3 = Series([1, 2, 3], index=["a", "b", 1.5]) @@ -122,36 +115,36 @@ def test_scalar_with_mixed(self): # lookup in a pure string index with an invalid indexer with pytest.raises(KeyError, match="^1.0$"): - s2[1.0] + indexer_sl(s2)[1.0] with pytest.raises(KeyError, match=r"^1\.0$"): - s2.loc[1.0] + indexer_sl(s2)[1.0] - result = s2.loc["b"] + result = indexer_sl(s2)["b"] expected = 2 assert result == expected # mixed index so we have label # indexing with pytest.raises(KeyError, match="^1.0$"): - s3[1.0] + indexer_sl(s3)[1.0] - result = s3[1] - expected = 2 - assert result == expected + if indexer_sl is not tm.loc: + # __getitem__ falls back to positional + result = s3[1] + expected = 2 + assert result == expected with pytest.raises(KeyError, match=r"^1\.0$"): - s3.loc[1.0] + indexer_sl(s3)[1.0] - result = s3.loc[1.5] + result = indexer_sl(s3)[1.5] expected = 3 assert result == expected - @pytest.mark.parametrize( - "idxr,getitem", [(lambda x: x.loc, False), (lambda x: x, True)] - ) @pytest.mark.parametrize("index_func", [tm.makeIntIndex, tm.makeRangeIndex]) - def test_scalar_integer(self, index_func, frame_or_series, idxr, getitem): + def test_scalar_integer(self, index_func, frame_or_series, indexer_sl): + getitem = indexer_sl is not tm.loc # test how scalar float indexers work on int indexes @@ -161,7 +154,7 @@ def test_scalar_integer(self, index_func, frame_or_series, idxr, getitem): # coerce to equal int - result = idxr(obj)[3.0] + result = indexer_sl(obj)[3.0] self.check(result, obj, 3, getitem) if isinstance(obj, Series): @@ -178,12 +171,12 @@ def compare(x, y): expected = Series(100.0, index=range(len(obj)), name=3) s2 = obj.copy() - idxr(s2)[3.0] = 100 + indexer_sl(s2)[3.0] = 100 - result = idxr(s2)[3.0] + result = indexer_sl(s2)[3.0] compare(result, expected) - result = idxr(s2)[3] + result = indexer_sl(s2)[3] compare(result, expected) @pytest.mark.parametrize("index_func", [tm.makeIntIndex, tm.makeRangeIndex]) @@ -204,7 +197,8 @@ def test_scalar_float(self, frame_or_series): # assert all operations except for iloc are ok indexer = index[3] - for idxr, getitem in [(lambda x: x.loc, False), (lambda x: x, True)]: + for idxr in [tm.loc, tm.setitem]: + getitem = idxr is not tm.loc # getting result = idxr(s)[indexer] @@ -242,7 +236,7 @@ def test_scalar_float(self, frame_or_series): ], ) @pytest.mark.parametrize("idx", [slice(3.0, 4), slice(3, 4.0), slice(3.0, 4.0)]) - def test_slice_non_numeric(self, index_func, idx, frame_or_series): + def test_slice_non_numeric(self, index_func, idx, frame_or_series, indexer_sli): # GH 4892 # float_indexers should raise exceptions @@ -252,38 +246,28 @@ def test_slice_non_numeric(self, index_func, idx, frame_or_series): s = gen_obj(frame_or_series, index) # getitem - msg = ( - "cannot do positional indexing " - fr"on {type(index).__name__} with these indexers \[(3|4)\.0\] of " - "type float" - ) + if indexer_sli is tm.iloc: + msg = ( + "cannot do positional indexing " + fr"on {type(index).__name__} with these indexers \[(3|4)\.0\] of " + "type float" + ) + else: + msg = ( + "cannot do slice indexing " + fr"on {type(index).__name__} with these indexers " + r"\[(3|4)(\.0)?\] " + r"of type (float|int)" + ) with pytest.raises(TypeError, match=msg): - s.iloc[idx] - - msg = ( - "cannot do (slice|positional) indexing " - fr"on {type(index).__name__} with these indexers " - r"\[(3|4)(\.0)?\] " - r"of type (float|int)" - ) - for idxr in [lambda x: x.loc, lambda x: x.iloc, lambda x: x]: - with pytest.raises(TypeError, match=msg): - idxr(s)[idx] + indexer_sli(s)[idx] # setitem - msg = "slice indices must be integers or None or have an __index__ method" + if indexer_sli is tm.iloc: + # otherwise we keep the same message as above + msg = "slice indices must be integers or None or have an __index__ method" with pytest.raises(TypeError, match=msg): - s.iloc[idx] = 0 - - msg = ( - "cannot do (slice|positional) indexing " - fr"on {type(index).__name__} with these indexers " - r"\[(3|4)(\.0)?\] " - r"of type (float|int)" - ) - for idxr in [lambda x: x.loc, lambda x: x]: - with pytest.raises(TypeError, match=msg): - idxr(s)[idx] = 0 + indexer_sli(s)[idx] = 0 def test_slice_integer(self): @@ -469,25 +453,24 @@ def test_float_slice_getitem_with_integer_index_raises(self, idx, index_func): s[idx] @pytest.mark.parametrize("idx", [slice(3.0, 4), slice(3, 4.0), slice(3.0, 4.0)]) - def test_slice_float(self, idx, frame_or_series): + def test_slice_float(self, idx, frame_or_series, indexer_sl): # same as above, but for floats index = Index(np.arange(5.0)) + 0.1 s = gen_obj(frame_or_series, index) expected = s.iloc[3:4] - for idxr in [lambda x: x.loc, lambda x: x]: - # getitem - result = idxr(s)[idx] - assert isinstance(result, type(s)) - tm.assert_equal(result, expected) + # getitem + result = indexer_sl(s)[idx] + assert isinstance(result, type(s)) + tm.assert_equal(result, expected) - # setitem - s2 = s.copy() - idxr(s2)[idx] = 0 - result = idxr(s2)[idx].values.ravel() - assert (result == 0).all() + # setitem + s2 = s.copy() + indexer_sl(s2)[idx] = 0 + result = indexer_sl(s2)[idx].values.ravel() + assert (result == 0).all() def test_floating_index_doc_example(self): @@ -564,19 +547,6 @@ def test_floating_misc(self, indexer_sl): result = indexer_sl(s)[[2.5]] tm.assert_series_equal(result, Series([1], index=[2.5])) - def test_floating_tuples(self): - # see gh-13509 - s = Series([(1, 1), (2, 2), (3, 3)], index=[0.0, 0.1, 0.2], name="foo") - - result = s[0.0] - assert result == (1, 1) - - expected = Series([(1, 1), (2, 2)], index=[0.0, 0.0], name="foo") - s = Series([(1, 1), (2, 2), (3, 3)], index=[0.0, 0.0, 0.2], name="foo") - - result = s[0.0] - tm.assert_series_equal(result, expected) - def test_float64index_slicing_bug(self): # GH 5557, related to slicing a float index ser = { diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index 2c0b88159c8f2..696693ec158ca 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -1006,6 +1006,40 @@ def test_iloc_getitem_float_duplicates(self): expect = df.iloc[[1, -1], 0] tm.assert_series_equal(df.loc[0.2, "a"], expect) + def test_iloc_setitem_custom_object(self): + # iloc with an object + class TO: + def __init__(self, value): + self.value = value + + def __str__(self) -> str: + return f"[{self.value}]" + + __repr__ = __str__ + + def __eq__(self, other) -> bool: + return self.value == other.value + + def view(self): + return self + + df = DataFrame(index=[0, 1], columns=[0]) + df.iloc[1, 0] = TO(1) + df.iloc[1, 0] = TO(2) + + result = DataFrame(index=[0, 1], columns=[0]) + result.iloc[1, 0] = TO(2) + + tm.assert_frame_equal(result, df) + + # remains object dtype even after setting it back + df = DataFrame(index=[0, 1], columns=[0]) + df.iloc[1, 0] = TO(1) + df.iloc[1, 0] = np.nan + result = DataFrame(index=[0, 1], columns=[0]) + + tm.assert_frame_equal(result, df) + class TestILocErrors: # NB: this test should work for _any_ Series we can pass as diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index 34f7ec9418028..f55a0ae2c199b 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -101,12 +101,12 @@ def test_setitem_ndarray_3d(self, index, frame_or_series, indexer_sli): idxr = indexer_sli(obj) nd3 = np.random.randint(5, size=(2, 2, 2)) - if indexer_sli.__name__ == "iloc": + if indexer_sli is tm.iloc: err = ValueError msg = f"Cannot set values with ndim > {obj.ndim}" elif ( isinstance(index, pd.IntervalIndex) - and indexer_sli.__name__ == "setitem" + and indexer_sli is tm.setitem and obj.ndim == 1 ): err = AttributeError @@ -138,17 +138,6 @@ def test_inf_upcast(self): expected = pd.Float64Index([1, 2, np.inf]) tm.assert_index_equal(result, expected) - def test_loc_setitem_with_expasnion_inf_upcast_empty(self): - # Test with np.inf in columns - df = DataFrame() - df.loc[0, 0] = 1 - df.loc[1, 1] = 2 - df.loc[0, np.inf] = 3 - - result = df.columns - expected = pd.Float64Index([0, 1, np.inf]) - tm.assert_index_equal(result, expected) - def test_setitem_dtype_upcast(self): # GH3216 @@ -308,12 +297,11 @@ def test_dups_fancy_indexing3(self): result = df.loc[[1, 2], ["a", "b"]] tm.assert_frame_equal(result, expected) - @pytest.mark.parametrize("case", [tm.getitem, tm.loc]) - def test_duplicate_int_indexing(self, case): + def test_duplicate_int_indexing(self, indexer_sl): # GH 17347 s = Series(range(3), index=[1, 1, 3]) expected = s[1] - result = case(s)[[1]] + result = indexer_sl(s)[[1]] tm.assert_series_equal(result, expected) def test_indexing_mixed_frame_bug(self): @@ -499,40 +487,6 @@ def test_setitem_list(self): tm.assert_frame_equal(result, df) - def test_iloc_setitem_custom_object(self): - # iloc with an object - class TO: - def __init__(self, value): - self.value = value - - def __str__(self) -> str: - return f"[{self.value}]" - - __repr__ = __str__ - - def __eq__(self, other) -> bool: - return self.value == other.value - - def view(self): - return self - - df = DataFrame(index=[0, 1], columns=[0]) - df.iloc[1, 0] = TO(1) - df.iloc[1, 0] = TO(2) - - result = DataFrame(index=[0, 1], columns=[0]) - result.iloc[1, 0] = TO(2) - - tm.assert_frame_equal(result, df) - - # remains object dtype even after setting it back - df = DataFrame(index=[0, 1], columns=[0]) - df.iloc[1, 0] = TO(1) - df.iloc[1, 0] = np.nan - result = DataFrame(index=[0, 1], columns=[0]) - - tm.assert_frame_equal(result, df) - def test_string_slice(self): # GH 14424 # string indexing against datetimelike with object @@ -748,7 +702,7 @@ def test_slice_with_zero_step_raises(self, indexer_sl): with pytest.raises(ValueError, match="slice step cannot be zero"): indexer_sl(ser)[::0] - def test_indexing_assignment_dict_already_exists(self): + def test_loc_setitem_indexing_assignment_dict_already_exists(self): index = Index([-5, 0, 5], name="z") df = DataFrame({"x": [1, 2, 6], "y": [2, 2, 8]}, index=index) expected = df.copy() @@ -763,7 +717,7 @@ def test_indexing_assignment_dict_already_exists(self): expected = DataFrame({"x": [1, 2, 9], "y": [2.0, 2.0, 99.0]}, index=index) tm.assert_frame_equal(df, expected) - def test_indexing_dtypes_on_empty(self): + def test_iloc_getitem_indexing_dtypes_on_empty(self): # Check that .iloc returns correct dtypes GH9983 df = DataFrame({"a": [1, 2, 3], "b": ["b", "b2", "b3"]}) df2 = df.iloc[[], :] @@ -772,7 +726,7 @@ def test_indexing_dtypes_on_empty(self): tm.assert_series_equal(df2.loc[:, "a"], df2.iloc[:, 0]) @pytest.mark.parametrize("size", [5, 999999, 1000000]) - def test_range_in_series_indexing(self, size): + def test_loc_range_in_series_indexing(self, size): # range can cause an indexing error # GH 11652 s = Series(index=range(size), dtype=np.float64) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index fd22b684aba07..3726bbecde827 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -1230,6 +1230,39 @@ def test_loc_setitem_unsorted_multiindex_columns(self, key): expected = expected.sort_index(1) tm.assert_frame_equal(df, expected) + def test_loc_setitem_uint_drop(self, any_int_dtype): + # see GH#18311 + # assigning series.loc[0] = 4 changed series.dtype to int + series = Series([1, 2, 3], dtype=any_int_dtype) + series.loc[0] = 4 + expected = Series([4, 2, 3], dtype=any_int_dtype) + tm.assert_series_equal(series, expected) + + def test_loc_setitem_td64_non_nano(self): + # GH#14155 + ser = Series(10 * [np.timedelta64(10, "m")]) + ser.loc[[1, 2, 3]] = np.timedelta64(20, "m") + expected = Series(10 * [np.timedelta64(10, "m")]) + expected.loc[[1, 2, 3]] = Timedelta(np.timedelta64(20, "m")) + tm.assert_series_equal(ser, expected) + + def test_loc_setitem_2d_to_1d_raises(self): + data = np.random.randn(2, 2) + ser = Series(range(2)) + + msg = "|".join( + [ + r"shape mismatch: value array of shape \(2,2\)", + r"cannot reshape array of size 4 into shape \(2,\)", + ] + ) + with pytest.raises(ValueError, match=msg): + ser.loc[range(2)] = data + + msg = r"could not broadcast input array from shape \(2,2\) into shape \(2,?\)" + with pytest.raises(ValueError, match=msg): + ser.loc[:] = data + class TestLocWithMultiIndex: @pytest.mark.parametrize( @@ -1454,6 +1487,57 @@ def test_loc_setitem_categorical_column_retains_dtype(self, ordered): expected = DataFrame({"A": [1], "B": Categorical(["b"], ordered=ordered)}) tm.assert_frame_equal(result, expected) + def test_loc_setitem_with_expansion_and_existing_dst(self): + # GH#18308 + start = Timestamp("2017-10-29 00:00:00+0200", tz="Europe/Madrid") + end = Timestamp("2017-10-29 03:00:00+0100", tz="Europe/Madrid") + ts = Timestamp("2016-10-10 03:00:00", tz="Europe/Madrid") + idx = pd.date_range(start, end, closed="left", freq="H") + assert ts not in idx # i.e. result.loc setitem is with-expansion + + result = DataFrame(index=idx, columns=["value"]) + result.loc[ts, "value"] = 12 + expected = DataFrame( + [np.nan] * len(idx) + [12], + index=idx.append(pd.DatetimeIndex([ts])), + columns=["value"], + dtype=object, + ) + tm.assert_frame_equal(result, expected) + + def test_setitem_with_expansion(self): + # indexing - setting an element + df = DataFrame( + data=pd.to_datetime(["2015-03-30 20:12:32", "2015-03-12 00:11:11"]), + columns=["time"], + ) + df["new_col"] = ["new", "old"] + df.time = df.set_index("time").index.tz_localize("UTC") + v = df[df.new_col == "new"].set_index("time").index.tz_convert("US/Pacific") + + # trying to set a single element on a part of a different timezone + # this converts to object + df2 = df.copy() + df2.loc[df2.new_col == "new", "time"] = v + + expected = Series([v[0], df.loc[1, "time"]], name="time") + tm.assert_series_equal(df2.time, expected) + + v = df.loc[df.new_col == "new", "time"] + pd.Timedelta("1s") + df.loc[df.new_col == "new", "time"] = v + tm.assert_series_equal(df.loc[df.new_col == "new", "time"], v) + + def test_loc_setitem_with_expansion_inf_upcast_empty(self): + # Test with np.inf in columns + df = DataFrame() + df.loc[0, 0] = 1 + df.loc[1, 1] = 2 + df.loc[0, np.inf] = 3 + + result = df.columns + expected = pd.Float64Index([0, 1, np.inf]) + tm.assert_index_equal(result, expected) + class TestLocCallable: def test_frame_loc_getitem_callable(self): @@ -1657,6 +1741,35 @@ def test_loc_getitem_partial_slice_non_monotonicity( class TestLabelSlicing: + def test_loc_getitem_slicing_datetimes_frame(self): + # GH#7523 + + # unique + df_unique = DataFrame( + np.arange(4.0, dtype="float64"), + index=[datetime(2001, 1, i, 10, 00) for i in [1, 2, 3, 4]], + ) + + # duplicates + df_dups = DataFrame( + np.arange(5.0, dtype="float64"), + index=[datetime(2001, 1, i, 10, 00) for i in [1, 2, 2, 3, 4]], + ) + + for df in [df_unique, df_dups]: + result = df.loc[datetime(2001, 1, 1, 10) :] + tm.assert_frame_equal(result, df) + result = df.loc[: datetime(2001, 1, 4, 10)] + tm.assert_frame_equal(result, df) + result = df.loc[datetime(2001, 1, 1, 10) : datetime(2001, 1, 4, 10)] + tm.assert_frame_equal(result, df) + + result = df.loc[datetime(2001, 1, 1, 11) :] + expected = df.iloc[1:] + tm.assert_frame_equal(result, expected) + result = df.loc["20010101 11":] + tm.assert_frame_equal(result, expected) + def test_loc_getitem_label_slice_across_dst(self): # GH#21846 idx = date_range( @@ -1905,6 +2018,48 @@ def test_loc_getitem_series_label_list_missing_integer_values(self): with pytest.raises(KeyError, match="with any missing labels"): ser.loc[np.array([9730701000001104, 10047311000001102])] + @pytest.mark.parametrize("to_period", [True, False]) + def test_loc_getitem_listlike_of_datetimelike_keys(self, to_period): + # GH#11497 + + idx = date_range("2011-01-01", "2011-01-02", freq="D", name="idx") + if to_period: + idx = idx.to_period("D") + ser = Series([0.1, 0.2], index=idx, name="s") + + keys = [Timestamp("2011-01-01"), Timestamp("2011-01-02")] + if to_period: + keys = [x.to_period("D") for x in keys] + result = ser.loc[keys] + exp = Series([0.1, 0.2], index=idx, name="s") + if not to_period: + exp.index = exp.index._with_freq(None) + tm.assert_series_equal(result, exp, check_index_type=True) + + keys = [ + Timestamp("2011-01-02"), + Timestamp("2011-01-02"), + Timestamp("2011-01-01"), + ] + if to_period: + keys = [x.to_period("D") for x in keys] + exp = Series( + [0.2, 0.2, 0.1], index=Index(keys, name="idx", dtype=idx.dtype), name="s" + ) + result = ser.loc[keys] + tm.assert_series_equal(result, exp, check_index_type=True) + + keys = [ + Timestamp("2011-01-03"), + Timestamp("2011-01-02"), + Timestamp("2011-01-03"), + ] + if to_period: + keys = [x.to_period("D") for x in keys] + + with pytest.raises(KeyError, match="with any missing labels"): + ser.loc[keys] + @pytest.mark.parametrize( "columns, column_key, expected_columns", diff --git a/pandas/tests/series/indexing/test_datetime.py b/pandas/tests/series/indexing/test_datetime.py index 347aa8d66405c..8098b195c3838 100644 --- a/pandas/tests/series/indexing/test_datetime.py +++ b/pandas/tests/series/indexing/test_datetime.py @@ -61,89 +61,17 @@ def test_fancy_setitem(): assert (s[48:54] == -3).all() -def test_slicing_datetimes(): - # GH 7523 - - # unique - df = DataFrame( - np.arange(4.0, dtype="float64"), - index=[datetime(2001, 1, i, 10, 00) for i in [1, 2, 3, 4]], - ) - result = df.loc[datetime(2001, 1, 1, 10) :] - tm.assert_frame_equal(result, df) - result = df.loc[: datetime(2001, 1, 4, 10)] - tm.assert_frame_equal(result, df) - result = df.loc[datetime(2001, 1, 1, 10) : datetime(2001, 1, 4, 10)] - tm.assert_frame_equal(result, df) - - result = df.loc[datetime(2001, 1, 1, 11) :] - expected = df.iloc[1:] - tm.assert_frame_equal(result, expected) - result = df.loc["20010101 11":] - tm.assert_frame_equal(result, expected) - - # duplicates - df = DataFrame( - np.arange(5.0, dtype="float64"), - index=[datetime(2001, 1, i, 10, 00) for i in [1, 2, 2, 3, 4]], - ) - - result = df.loc[datetime(2001, 1, 1, 10) :] - tm.assert_frame_equal(result, df) - result = df.loc[: datetime(2001, 1, 4, 10)] - tm.assert_frame_equal(result, df) - result = df.loc[datetime(2001, 1, 1, 10) : datetime(2001, 1, 4, 10)] - tm.assert_frame_equal(result, df) - - result = df.loc[datetime(2001, 1, 1, 11) :] - expected = df.iloc[1:] - tm.assert_frame_equal(result, expected) - result = df.loc["20010101 11":] - tm.assert_frame_equal(result, expected) - - -def test_getitem_setitem_datetime_tz_pytz(): - N = 50 - # testing with timezone, GH #2785 - rng = date_range("1/1/1990", periods=N, freq="H", tz="US/Eastern") - ts = Series(np.random.randn(N), index=rng) - - # also test Timestamp tz handling, GH #2789 - result = ts.copy() - result["1990-01-01 09:00:00+00:00"] = 0 - result["1990-01-01 09:00:00+00:00"] = ts[4] - tm.assert_series_equal(result, ts) - - result = ts.copy() - result["1990-01-01 03:00:00-06:00"] = 0 - result["1990-01-01 03:00:00-06:00"] = ts[4] - tm.assert_series_equal(result, ts) - - # repeat with datetimes - result = ts.copy() - result[datetime(1990, 1, 1, 9, tzinfo=pytz.timezone("UTC"))] = 0 - result[datetime(1990, 1, 1, 9, tzinfo=pytz.timezone("UTC"))] = ts[4] - tm.assert_series_equal(result, ts) - - result = ts.copy() - - # comparison dates with datetime MUST be localized! - date = pytz.timezone("US/Central").localize(datetime(1990, 1, 1, 3)) - result[date] = 0 - result[date] = ts[4] - tm.assert_series_equal(result, ts) - - -def test_getitem_setitem_datetime_tz_dateutil(): - - tz = ( - lambda x: tzutc() if x == "UTC" else gettz(x) - ) # handle special case for utc in dateutil +@pytest.mark.parametrize("tz_source", ["pytz", "dateutil"]) +def test_getitem_setitem_datetime_tz(tz_source): + if tz_source == "pytz": + tzget = pytz.timezone + else: + # handle special case for utc in dateutil + tzget = lambda x: tzutc() if x == "UTC" else gettz(x) N = 50 - # testing with timezone, GH #2785 - rng = date_range("1/1/1990", periods=N, freq="H", tz="America/New_York") + rng = date_range("1/1/1990", periods=N, freq="H", tz=tzget("US/Eastern")) ts = Series(np.random.randn(N), index=rng) # also test Timestamp tz handling, GH #2789 @@ -159,13 +87,15 @@ def test_getitem_setitem_datetime_tz_dateutil(): # repeat with datetimes result = ts.copy() - result[datetime(1990, 1, 1, 9, tzinfo=tz("UTC"))] = 0 - result[datetime(1990, 1, 1, 9, tzinfo=tz("UTC"))] = ts[4] + result[datetime(1990, 1, 1, 9, tzinfo=tzget("UTC"))] = 0 + result[datetime(1990, 1, 1, 9, tzinfo=tzget("UTC"))] = ts[4] tm.assert_series_equal(result, ts) result = ts.copy() - result[datetime(1990, 1, 1, 3, tzinfo=tz("America/Chicago"))] = 0 - result[datetime(1990, 1, 1, 3, tzinfo=tz("America/Chicago"))] = ts[4] + dt = Timestamp(1990, 1, 1, 3).tz_localize(tzget("US/Central")) + dt = dt.to_pydatetime() + result[dt] = 0 + result[dt] = ts[4] tm.assert_series_equal(result, ts) @@ -382,7 +312,7 @@ def test_indexing_with_duplicate_datetimeindex( assert ts[datetime(2000, 1, 6)] == 0 -def test_indexing_over_size_cutoff(monkeypatch): +def test_loc_getitem_over_size_cutoff(monkeypatch): # #1821 monkeypatch.setattr(libindex, "_SIZE_CUTOFF", 1000) diff --git a/pandas/tests/series/indexing/test_getitem.py b/pandas/tests/series/indexing/test_getitem.py index 64d763f410666..e6dfafabbfec2 100644 --- a/pandas/tests/series/indexing/test_getitem.py +++ b/pandas/tests/series/indexing/test_getitem.py @@ -36,6 +36,32 @@ class TestSeriesGetitemScalars: + def test_getitem_float_keys_tuple_values(self): + # see GH#13509 + + # unique Index + ser = Series([(1, 1), (2, 2), (3, 3)], index=[0.0, 0.1, 0.2], name="foo") + result = ser[0.0] + assert result == (1, 1) + + # non-unique Index + expected = Series([(1, 1), (2, 2)], index=[0.0, 0.0], name="foo") + ser = Series([(1, 1), (2, 2), (3, 3)], index=[0.0, 0.0, 0.2], name="foo") + + result = ser[0.0] + tm.assert_series_equal(result, expected) + + def test_getitem_unrecognized_scalar(self): + # GH#32684 a scalar key that is not recognized by lib.is_scalar + + # a series that might be produced via `frame.dtypes` + ser = Series([1, 2], index=[np.dtype("O"), np.dtype("i8")]) + + key = ser.index[1] + + result = ser[key] + assert result == 2 + def test_getitem_negative_out_of_bounds(self): ser = Series(tm.rands_array(5, 10), index=tm.rands_array(10, 10)) @@ -595,3 +621,8 @@ def test_getitem_categorical_str(): with tm.assert_produces_warning(FutureWarning): result = ser.index.get_value(ser, "a") tm.assert_series_equal(result, expected) + + +def test_slice_can_reorder_not_uniquely_indexed(): + ser = Series(1, index=["a", "a", "b", "b", "c"]) + ser[::-1] # it works! diff --git a/pandas/tests/series/indexing/test_indexing.py b/pandas/tests/series/indexing/test_indexing.py index 49264c5b669d7..34ba20c03b732 100644 --- a/pandas/tests/series/indexing/test_indexing.py +++ b/pandas/tests/series/indexing/test_indexing.py @@ -82,16 +82,6 @@ def test_getitem_setitem_ellipsis(): assert (result == 5).all() -def test_setitem_with_expansion_type_promotion(): - # GH12599 - s = Series(dtype=object) - s["a"] = Timestamp("2016-01-01") - s["b"] = 3.0 - s["c"] = "foo" - expected = Series([Timestamp("2016-01-01"), 3.0, "foo"], index=["a", "b", "c"]) - tm.assert_series_equal(s, expected) - - @pytest.mark.parametrize( "result_1, duplicate_item, expected_1", [ @@ -193,40 +183,12 @@ def test_setitem_slicestep(): assert (series[::2] == 0).all() -def test_setitem_not_contained(string_series): - # set item that's not contained - ser = string_series.copy() - ser["foobar"] = 1 - - app = Series([1], index=["foobar"], name="series") - expected = string_series.append(app) - tm.assert_series_equal(ser, expected) - - def test_setslice(datetime_series): sl = datetime_series[5:20] assert len(sl) == len(sl.index) assert sl.index.is_unique is True -def test_loc_setitem_2d_to_1d_raises(): - x = np.random.randn(2, 2) - y = Series(range(2)) - - msg = "|".join( - [ - r"shape mismatch: value array of shape \(2,2\)", - r"cannot reshape array of size 4 into shape \(2,\)", - ] - ) - with pytest.raises(ValueError, match=msg): - y.loc[range(2)] = x - - msg = r"could not broadcast input array from shape \(2,2\) into shape \(2,?\)" - with pytest.raises(ValueError, match=msg): - y.loc[:] = x - - # FutureWarning from NumPy about [slice(None, 5). @pytest.mark.filterwarnings("ignore:Using a non-tuple:FutureWarning") def test_basic_getitem_setitem_corner(datetime_series): @@ -252,84 +214,7 @@ def test_basic_getitem_setitem_corner(datetime_series): datetime_series[[5, slice(None, None)]] = 2 -@pytest.mark.parametrize("tz", ["US/Eastern", "UTC", "Asia/Tokyo"]) -def test_setitem_with_tz(tz, indexer_sli): - orig = Series(pd.date_range("2016-01-01", freq="H", periods=3, tz=tz)) - assert orig.dtype == f"datetime64[ns, {tz}]" - - exp = Series( - [ - Timestamp("2016-01-01 00:00", tz=tz), - Timestamp("2011-01-01 00:00", tz=tz), - Timestamp("2016-01-01 02:00", tz=tz), - ] - ) - - # scalar - ser = orig.copy() - indexer_sli(ser)[1] = Timestamp("2011-01-01", tz=tz) - tm.assert_series_equal(ser, exp) - - # vector - vals = Series( - [Timestamp("2011-01-01", tz=tz), Timestamp("2012-01-01", tz=tz)], - index=[1, 2], - ) - assert vals.dtype == f"datetime64[ns, {tz}]" - - exp = Series( - [ - Timestamp("2016-01-01 00:00", tz=tz), - Timestamp("2011-01-01 00:00", tz=tz), - Timestamp("2012-01-01 00:00", tz=tz), - ] - ) - - ser = orig.copy() - indexer_sli(ser)[[1, 2]] = vals - tm.assert_series_equal(ser, exp) - - -def test_setitem_with_tz_dst(indexer_sli): - # GH XXX TODO: fill in GH ref - tz = "US/Eastern" - orig = Series(pd.date_range("2016-11-06", freq="H", periods=3, tz=tz)) - assert orig.dtype == f"datetime64[ns, {tz}]" - - exp = Series( - [ - Timestamp("2016-11-06 00:00-04:00", tz=tz), - Timestamp("2011-01-01 00:00-05:00", tz=tz), - Timestamp("2016-11-06 01:00-05:00", tz=tz), - ] - ) - - # scalar - ser = orig.copy() - indexer_sli(ser)[1] = Timestamp("2011-01-01", tz=tz) - tm.assert_series_equal(ser, exp) - - # vector - vals = Series( - [Timestamp("2011-01-01", tz=tz), Timestamp("2012-01-01", tz=tz)], - index=[1, 2], - ) - assert vals.dtype == f"datetime64[ns, {tz}]" - - exp = Series( - [ - Timestamp("2016-11-06 00:00", tz=tz), - Timestamp("2011-01-01 00:00", tz=tz), - Timestamp("2012-01-01 00:00", tz=tz), - ] - ) - - ser = orig.copy() - indexer_sli(ser)[[1, 2]] = vals - tm.assert_series_equal(ser, exp) - - -def test_categorical_assigning_ops(): +def test_setitem_categorical_assigning_ops(): orig = Series(Categorical(["b", "b"], categories=["a", "b"])) s = orig.copy() s[:] = "a" @@ -387,11 +272,6 @@ def test_slice(string_series, object_series): assert (string_series[10:20] == 0).all() -def test_slice_can_reorder_not_uniquely_indexed(): - s = Series(1, index=["a", "a", "b", "b", "c"]) - s[::-1] # it works! - - def test_loc_setitem(string_series): inds = string_series.index[[3, 4, 7]] @@ -433,15 +313,6 @@ def test_timedelta_assignment(): tm.assert_series_equal(s, expected) -def test_setitem_td64_non_nano(): - # GH 14155 - ser = Series(10 * [np.timedelta64(10, "m")]) - ser.loc[[1, 2, 3]] = np.timedelta64(20, "m") - expected = Series(10 * [np.timedelta64(10, "m")]) - expected.loc[[1, 2, 3]] = Timedelta(np.timedelta64(20, "m")) - tm.assert_series_equal(ser, expected) - - def test_underlying_data_conversion(): # GH 4080 df = DataFrame({c: [1, 2, 3] for c in ["a", "b", "c"]}) @@ -460,33 +331,6 @@ def test_underlying_data_conversion(): tm.assert_frame_equal(df, expected) -def test_chained_assignment(): - # GH 3970 - with pd.option_context("chained_assignment", None): - df = DataFrame({"aa": range(5), "bb": [2.2] * 5}) - df["cc"] = 0.0 - - ck = [True] * len(df) - - df["bb"].iloc[0] = 0.13 - - # TODO: unused - df_tmp = df.iloc[ck] # noqa - - df["bb"].iloc[0] = 0.15 - assert df["bb"].iloc[0] == 0.15 - - -def test_setitem_with_expansion_dtype(): - # GH 3217 - df = DataFrame({"a": [1, 3], "b": [np.nan, 2]}) - df["c"] = np.nan - df["c"].update(Series(["foo"], index=[0])) - - expected = DataFrame({"a": [1, 3], "b": [np.nan, 2], "c": ["foo", np.nan]}) - tm.assert_frame_equal(df, expected) - - def test_preserve_refs(datetime_series): seq = datetime_series[[5, 10, 15]] seq[1] = np.NaN @@ -535,18 +379,16 @@ def test_setitem_mask_promote(): tm.assert_series_equal(ser, expected) -def test_multilevel_preserve_name(): +def test_multilevel_preserve_name(indexer_sl): index = MultiIndex( levels=[["foo", "bar", "baz", "qux"], ["one", "two", "three"]], codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], names=["first", "second"], ) - s = Series(np.random.randn(len(index)), index=index, name="sth") + ser = Series(np.random.randn(len(index)), index=index, name="sth") - result = s["foo"] - result2 = s.loc["foo"] - assert result.name == s.name - assert result2.name == s.name + result = indexer_sl(ser)["foo"] + assert result.name == ser.name """ @@ -554,27 +396,6 @@ def test_multilevel_preserve_name(): """ -def test_uint_drop(any_int_dtype): - # see GH18311 - # assigning series.loc[0] = 4 changed series.dtype to int - series = Series([1, 2, 3], dtype=any_int_dtype) - series.loc[0] = 4 - expected = Series([4, 2, 3], dtype=any_int_dtype) - tm.assert_series_equal(series, expected) - - -def test_getitem_unrecognized_scalar(): - # GH#32684 a scalar key that is not recognized by lib.is_scalar - - # a series that might be produced via `frame.dtypes` - ser = Series([1, 2], index=[np.dtype("O"), np.dtype("i8")]) - - key = ser.index[1] - - result = ser[key] - assert result == 2 - - def test_slice_with_zero_step_raises(index, frame_or_series, indexer_sli): ts = frame_or_series(np.arange(len(index)), index=index) diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index ba9593067a412..bbe328114fd20 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -63,6 +63,81 @@ def test_setitem_tuple_with_datetimetz_values(self): expected.iloc[0] = np.nan tm.assert_series_equal(result, expected) + @pytest.mark.parametrize("tz", ["US/Eastern", "UTC", "Asia/Tokyo"]) + def test_setitem_with_tz(self, tz, indexer_sli): + orig = Series(date_range("2016-01-01", freq="H", periods=3, tz=tz)) + assert orig.dtype == f"datetime64[ns, {tz}]" + + exp = Series( + [ + Timestamp("2016-01-01 00:00", tz=tz), + Timestamp("2011-01-01 00:00", tz=tz), + Timestamp("2016-01-01 02:00", tz=tz), + ] + ) + + # scalar + ser = orig.copy() + indexer_sli(ser)[1] = Timestamp("2011-01-01", tz=tz) + tm.assert_series_equal(ser, exp) + + # vector + vals = Series( + [Timestamp("2011-01-01", tz=tz), Timestamp("2012-01-01", tz=tz)], + index=[1, 2], + ) + assert vals.dtype == f"datetime64[ns, {tz}]" + + exp = Series( + [ + Timestamp("2016-01-01 00:00", tz=tz), + Timestamp("2011-01-01 00:00", tz=tz), + Timestamp("2012-01-01 00:00", tz=tz), + ] + ) + + ser = orig.copy() + indexer_sli(ser)[[1, 2]] = vals + tm.assert_series_equal(ser, exp) + + def test_setitem_with_tz_dst(self, indexer_sli): + # GH XXX TODO: fill in GH ref + tz = "US/Eastern" + orig = Series(date_range("2016-11-06", freq="H", periods=3, tz=tz)) + assert orig.dtype == f"datetime64[ns, {tz}]" + + exp = Series( + [ + Timestamp("2016-11-06 00:00-04:00", tz=tz), + Timestamp("2011-01-01 00:00-05:00", tz=tz), + Timestamp("2016-11-06 01:00-05:00", tz=tz), + ] + ) + + # scalar + ser = orig.copy() + indexer_sli(ser)[1] = Timestamp("2011-01-01", tz=tz) + tm.assert_series_equal(ser, exp) + + # vector + vals = Series( + [Timestamp("2011-01-01", tz=tz), Timestamp("2012-01-01", tz=tz)], + index=[1, 2], + ) + assert vals.dtype == f"datetime64[ns, {tz}]" + + exp = Series( + [ + Timestamp("2016-11-06 00:00", tz=tz), + Timestamp("2011-01-01 00:00", tz=tz), + Timestamp("2012-01-01 00:00", tz=tz), + ] + ) + + ser = orig.copy() + indexer_sli(ser)[[1, 2]] = vals + tm.assert_series_equal(ser, exp) + class TestSetitemScalarIndexer: def test_setitem_negative_out_of_bounds(self): @@ -303,6 +378,25 @@ def test_append_timedelta_does_not_cast(self, td): tm.assert_series_equal(ser, expected) assert isinstance(ser["td"], Timedelta) + def test_setitem_with_expansion_type_promotion(self): + # GH#12599 + ser = Series(dtype=object) + ser["a"] = Timestamp("2016-01-01") + ser["b"] = 3.0 + ser["c"] = "foo" + expected = Series([Timestamp("2016-01-01"), 3.0, "foo"], index=["a", "b", "c"]) + tm.assert_series_equal(ser, expected) + + def test_setitem_not_contained(self, string_series): + # set item that's not contained + ser = string_series.copy() + assert "foobar" not in ser.index + ser["foobar"] = 1 + + app = Series([1], index=["foobar"], name="series") + expected = string_series.append(app) + tm.assert_series_equal(ser, expected) + def test_setitem_scalar_into_readonly_backing_data(): # GH#14359: test that you cannot mutate a read only buffer