diff --git a/doc/source/release.rst b/doc/source/release.rst index 2fb3a231660a4..fe1af472700ac 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -109,7 +109,8 @@ Improvements to existing features (:issue:`4039`) - Add ``rename`` and ``set_names`` methods to ``Index`` as well as ``set_names``, ``set_levels``, ``set_labels`` to ``MultiIndex``. - (:issue:`4039`) + (:issue:`4039`) with improved validation for all (:issue:`4039`, + :issue:`4794`) - A Series of dtype ``timedelta64[ns]`` can now be divided/multiplied by an integer series (:issue`4521`) - A Series of dtype ``timedelta64[ns]`` can now be divided by another diff --git a/doc/source/v0.13.0.txt b/doc/source/v0.13.0.txt index dee3ff20b9538..6bf32b2343084 100644 --- a/doc/source/v0.13.0.txt +++ b/doc/source/v0.13.0.txt @@ -52,7 +52,7 @@ API changes # but setting names is not deprecated. index = index.set_names(["bob", "cranberry"]) - # and all methods take an inplace kwarg + # and all methods take an inplace kwarg - but returns None index.set_names(["bob", "cranberry"], inplace=True) - Infer and downcast dtype if ``downcast='infer'`` is passed to ``fillna/ffill/bfill`` (:issue:`4604`) diff --git a/pandas/core/index.py b/pandas/core/index.py index 98f190360bc33..141eba8783926 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -1672,8 +1672,8 @@ def __new__(cls, data, dtype=None, copy=False, name=None, fastpath=False): subarr = np.array(data, dtype=np.int64, copy=copy) if len(data) > 0: if (subarr != data).any(): - raise TypeError('Unsafe NumPy casting, you must ' - 'explicitly cast') + raise TypeError('Unsafe NumPy casting to integer, you must' + ' explicitly cast') subarr = subarr.view(cls) subarr.name = name @@ -1857,11 +1857,12 @@ class MultiIndex(Index): def __new__(cls, levels=None, labels=None, sortorder=None, names=None, copy=False): + if levels is None or labels is None: + raise TypeError("Must pass both levels and labels") if len(levels) != len(labels): - raise ValueError( - 'Length of levels and labels must be the same') + raise ValueError('Length of levels and labels must be the same.') if len(levels) == 0: - raise TypeError('Must pass non-zero number of levels/labels') + raise ValueError('Must pass non-zero number of levels/labels') if len(levels) == 1: if names: name = names[0] @@ -1872,10 +1873,12 @@ def __new__(cls, levels=None, labels=None, sortorder=None, names=None, # v3, 0.8.0 subarr = np.empty(0, dtype=object).view(cls) - subarr._set_levels(levels, copy=copy) - subarr._set_labels(labels, copy=copy) + # we've already validated levels and labels, so shortcut here + subarr._set_levels(levels, copy=copy, validate=False) + subarr._set_labels(labels, copy=copy, validate=False) if names is not None: + # handles name validation subarr._set_names(names) if sortorder is not None: @@ -1888,12 +1891,14 @@ def __new__(cls, levels=None, labels=None, sortorder=None, names=None, def _get_levels(self): return self._levels - def _set_levels(self, levels, copy=False): + def _set_levels(self, levels, copy=False, validate=True): # This is NOT part of the levels property because it should be # externally not allowed to set levels. User beware if you change # _levels directly - if len(levels) == 0: - raise ValueError("Must set non-zero number of levels.") + if validate and len(levels) == 0: + raise ValueError('Must set non-zero number of levels.') + if validate and len(levels) != len(self._labels): + raise ValueError('Length of levels must match length of labels.') levels = FrozenList(_ensure_index(lev, copy=copy)._shallow_copy() for lev in levels) names = self.names @@ -1917,13 +1922,16 @@ def set_levels(self, levels, inplace=False): ------- new index (of same type and class...etc) """ + if not com.is_list_like(levels) or not com.is_list_like(levels[0]): + raise TypeError("Levels must be list of lists-like") if inplace: idx = self else: idx = self._shallow_copy() idx._reset_identity() idx._set_levels(levels) - return idx + if not inplace: + return idx # remove me in 0.14 and change to read only property __set_levels = deprecate("setting `levels` directly", @@ -1934,9 +1942,9 @@ def set_levels(self, levels, inplace=False): def _get_labels(self): return self._labels - def _set_labels(self, labels, copy=False): - if len(labels) != self.nlevels: - raise ValueError("Length of levels and labels must be the same.") + def _set_labels(self, labels, copy=False, validate=True): + if validate and len(labels) != self.nlevels: + raise ValueError("Length of labels must match length of levels") self._labels = FrozenList(_ensure_frozen(labs, copy=copy)._shallow_copy() for labs in labels) @@ -1956,13 +1964,16 @@ def set_labels(self, labels, inplace=False): ------- new index (of same type and class...etc) """ + if not com.is_list_like(labels) or not com.is_list_like(labels[0]): + raise TypeError("Labels must be list of lists-like") if inplace: idx = self else: idx = self._shallow_copy() idx._reset_identity() idx._set_labels(labels) - return idx + if not inplace: + return idx # remove me in 0.14 and change to readonly property __set_labels = deprecate("setting labels directly", @@ -2021,7 +2032,8 @@ def __array_finalize__(self, obj): # instance. return - self._set_levels(getattr(obj, 'levels', [])) + # skip the validation on first, rest will catch the errors + self._set_levels(getattr(obj, 'levels', []), validate=False) self._set_labels(getattr(obj, 'labels', [])) self._set_names(getattr(obj, 'names', [])) self.sortorder = getattr(obj, 'sortorder', None) @@ -2083,16 +2095,15 @@ def _convert_slice_indexer(self, key, typ=None): def _get_names(self): return FrozenList(level.name for level in self.levels) - def _set_names(self, values): + def _set_names(self, values, validate=True): """ sets names on levels. WARNING: mutates! Note that you generally want to set this *after* changing levels, so that it only acts on copies""" values = list(values) - if len(values) != self.nlevels: - raise ValueError('Length of names (%d) must be same as level ' - '(%d)' % (len(values), self.nlevels)) + if validate and len(values) != self.nlevels: + raise ValueError('Length of names must match length of levels') # set the name for name, level in zip(values, self.levels): level.rename(name, inplace=True) @@ -2446,7 +2457,7 @@ def __setstate__(self, state): np.ndarray.__setstate__(self, nd_state) levels, labels, sortorder, names = own_state - self._set_levels([Index(x) for x in levels]) + self._set_levels([Index(x) for x in levels], validate=False) self._set_labels(labels) self._set_names(names) self.sortorder = sortorder @@ -2473,7 +2484,7 @@ def __getitem__(self, key): new_labels = [lab[key] for lab in self.labels] # an optimization - result._set_levels(self.levels) + result._set_levels(self.levels, validate=False) result._set_labels(new_labels) result.sortorder = sortorder result._set_names(self.names) @@ -3351,17 +3362,12 @@ def _ensure_index(index_like, copy=False): return Index(index_like) -def _ensure_frozen(nd_array_like, copy=False): - if not isinstance(nd_array_like, FrozenNDArray): - arr = np.asarray(nd_array_like, dtype=np.int_) - # have to do this separately so that non-index input gets copied - if copy: - arr = arr.copy() - nd_array_like = arr.view(FrozenNDArray) - else: - if copy: - nd_array_like = nd_array_like.copy() - return nd_array_like +def _ensure_frozen(array_like, copy=False): + array_like = np.asanyarray(array_like, dtype=np.int_) + array_like = array_like.view(FrozenNDArray) + if copy: + array_like = array_like.copy() + return array_like def _validate_join_method(method): diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py index cd26016acba5c..c7c9fd1de0fcd 100644 --- a/pandas/tests/test_index.py +++ b/pandas/tests/test_index.py @@ -823,7 +823,12 @@ def test_constructor_corner(self): # preventing casting arr = np.array([1, '2', 3, '4'], dtype=object) - self.assertRaises(TypeError, Int64Index, arr) + with tm.assertRaisesRegexp(TypeError, 'casting'): + Int64Index(arr) + + arr_with_floats = [0, 2, 3, 4, 5, 1.25, 3, -1] + with tm.assertRaisesRegexp(TypeError, 'casting'): + Int64Index(arr_with_floats) def test_hash_error(self): with tm.assertRaisesRegexp(TypeError, @@ -1240,6 +1245,62 @@ def test_set_levels_and_set_labels(self): minor_labels = [(x + 1) % 1 for x in minor_labels] new_labels = [major_labels, minor_labels] + def assert_matching(actual, expected): + # avoid specifying internal representation + # as much as possible + self.assertEqual(len(actual), len(expected)) + for act, exp in zip(actual, expected): + act = np.asarray(act) + exp = np.asarray(exp) + assert_almost_equal(act, exp) + + # level changing [w/o mutation] + ind2 = self.index.set_levels(new_levels) + assert_matching(ind2.levels, new_levels) + assert_matching(self.index.levels, levels) + + # level changing [w/ mutation] + ind2 = self.index.copy() + inplace_return = ind2.set_levels(new_levels, inplace=True) + self.assert_(inplace_return is None) + assert_matching(ind2.levels, new_levels) + + # label changing [w/o mutation] + ind2 = self.index.set_labels(new_labels) + assert_matching(ind2.labels, new_labels) + assert_matching(self.index.labels, labels) + + # label changing [w/ mutation] + ind2 = self.index.copy() + inplace_return = ind2.set_labels(new_labels, inplace=True) + self.assert_(inplace_return is None) + assert_matching(ind2.labels, new_labels) + + def test_set_levels_labels_names_bad_input(self): + levels, labels = self.index.levels, self.index.labels + names = self.index.names + + with tm.assertRaisesRegexp(ValueError, 'Length of levels'): + self.index.set_levels([levels[0]]) + + with tm.assertRaisesRegexp(ValueError, 'Length of labels'): + self.index.set_labels([labels[0]]) + + with tm.assertRaisesRegexp(ValueError, 'Length of names'): + self.index.set_names([names[0]]) + + # shouldn't scalar data error, instead should demand list-like + with tm.assertRaisesRegexp(TypeError, 'list of lists-like'): + self.index.set_levels(levels[0]) + + # shouldn't scalar data error, instead should demand list-like + with tm.assertRaisesRegexp(TypeError, 'list of lists-like'): + self.index.set_labels(labels[0]) + + # shouldn't scalar data error, instead should demand list-like + with tm.assertRaisesRegexp(TypeError, 'list-like'): + self.index.set_names(names[0]) + def test_metadata_immutable(self): levels, labels = self.index.levels, self.index.labels # shouldn't be able to set at either the top level or base level @@ -1342,8 +1403,13 @@ def test_constructor_single_level(self): self.assert_(single_level.name is None) def test_constructor_no_levels(self): - assertRaisesRegexp(TypeError, "non-zero number of levels/labels", + assertRaisesRegexp(ValueError, "non-zero number of levels/labels", MultiIndex, levels=[], labels=[]) + both_re = re.compile('Must pass both levels and labels') + with tm.assertRaisesRegexp(TypeError, both_re): + MultiIndex(levels=[]) + with tm.assertRaisesRegexp(TypeError, both_re): + MultiIndex(labels=[]) def test_constructor_mismatched_label_levels(self): levels = [np.array([1]), np.array([2]), np.array([3])] diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index 5ec97344373a2..bd431843a6b20 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -347,8 +347,8 @@ def test_frame_setitem_multi_column(self): # it broadcasts df['B', '1'] = [1, 2, 3] df['A'] = df['B', '1'] - assert_almost_equal(df['A', '1'], df['B', '1']) - assert_almost_equal(df['A', '2'], df['B', '1']) + assert_series_equal(df['A', '1'], df['B', '1']) + assert_series_equal(df['A', '2'], df['B', '1']) def test_getitem_tuple_plus_slice(self): # GH #671 diff --git a/pandas/util/testing.py b/pandas/util/testing.py index dfe81237ee15d..dd59524e90f10 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -332,11 +332,11 @@ def equalContents(arr1, arr2): return frozenset(arr1) == frozenset(arr2) -def assert_isinstance(obj, class_type_or_tuple): +def assert_isinstance(obj, class_type_or_tuple, msg=''): """asserts that obj is an instance of class_type_or_tuple""" assert isinstance(obj, class_type_or_tuple), ( - "Expected object to be of type %r, found %r instead" % ( - type(obj), class_type_or_tuple)) + "%sExpected object to be of type %r, found %r instead" % ( + msg, class_type_or_tuple, type(obj))) def assert_equal(a, b, msg=""): @@ -355,6 +355,8 @@ def assert_equal(a, b, msg=""): def assert_index_equal(left, right): + assert_isinstance(left, Index, '[index] ') + assert_isinstance(right, Index, '[index] ') if not left.equals(right): raise AssertionError("[index] left [{0} {1}], right [{2} {3}]".format(left.dtype, left, @@ -373,6 +375,8 @@ def isiterable(obj): return hasattr(obj, '__iter__') +# NOTE: don't pass an NDFrame or index to this function - may not handle it +# well. def assert_almost_equal(a, b, check_less_precise=False): if isinstance(a, dict) or isinstance(b, dict): return assert_dict_equal(a, b) @@ -385,9 +389,6 @@ def assert_almost_equal(a, b, check_less_precise=False): np.testing.assert_(isiterable(b)) na, nb = len(a), len(b) assert na == nb, "%s != %s" % (na, nb) - # TODO: Figure out why I thought this needed instance cheacks... - # if (isinstance(a, np.ndarray) and isinstance(b, np.ndarray) and - # np.array_equal(a, b)): if np.array_equal(a, b): return True else: