import numpy as np import pytest from pandas.core.dtypes.common import is_any_real_numeric_dtype import pandas as pd from pandas import ( Index, MultiIndex, Series, ) import pandas._testing as tm def test_equals(idx): assert idx.equals(idx) assert idx.equals(idx.copy()) assert idx.equals(idx.astype(object)) assert idx.equals(idx.to_flat_index()) assert idx.equals(idx.to_flat_index().astype("category")) assert not idx.equals(list(idx)) assert not idx.equals(np.array(idx)) same_values = Index(idx, dtype=object) assert idx.equals(same_values) assert same_values.equals(idx) if idx.nlevels == 1: # do not test MultiIndex assert not idx.equals(Series(idx)) def test_equals_op(idx): # GH9947, GH10637 index_a = idx n = len(index_a) index_b = index_a[0:-1] index_c = index_a[0:-1].append(index_a[-2:-1]) index_d = index_a[0:1] with pytest.raises(ValueError, match="Lengths must match"): index_a == index_b expected1 = np.array([True] * n) expected2 = np.array([True] * (n - 1) + [False]) tm.assert_numpy_array_equal(index_a == index_a, expected1) tm.assert_numpy_array_equal(index_a == index_c, expected2) # test comparisons with numpy arrays array_a = np.array(index_a) array_b = np.array(index_a[0:-1]) array_c = np.array(index_a[0:-1].append(index_a[-2:-1])) array_d = np.array(index_a[0:1]) with pytest.raises(ValueError, match="Lengths must match"): index_a == array_b tm.assert_numpy_array_equal(index_a == array_a, expected1) tm.assert_numpy_array_equal(index_a == array_c, expected2) # test comparisons with Series series_a = Series(array_a) series_b = Series(array_b) series_c = Series(array_c) series_d = Series(array_d) with pytest.raises(ValueError, match="Lengths must match"): index_a == series_b tm.assert_numpy_array_equal(index_a == series_a, expected1) tm.assert_numpy_array_equal(index_a == series_c, expected2) # cases where length is 1 for one of them with pytest.raises(ValueError, match="Lengths must match"): index_a == index_d with pytest.raises(ValueError, match="Lengths must match"): index_a == series_d with pytest.raises(ValueError, match="Lengths must match"): index_a == array_d msg = "Can only compare identically-labeled Series objects" with pytest.raises(ValueError, match=msg): series_a == series_d with pytest.raises(ValueError, match="Lengths must match"): series_a == array_d # comparing with a scalar should broadcast; note that we are excluding # MultiIndex because in this case each item in the index is a tuple of # length 2, and therefore is considered an array of length 2 in the # comparison instead of a scalar if not isinstance(index_a, MultiIndex): expected3 = np.array([False] * (len(index_a) - 2) + [True, False]) # assuming the 2nd to last item is unique in the data item = index_a[-2] tm.assert_numpy_array_equal(index_a == item, expected3) tm.assert_series_equal(series_a == item, Series(expected3)) def test_compare_tuple(): # GH#21517 mi = MultiIndex.from_product([[1, 2]] * 2) all_false = np.array([False, False, False, False]) result = mi == mi[0] expected = np.array([True, False, False, False]) tm.assert_numpy_array_equal(result, expected) result = mi != mi[0] tm.assert_numpy_array_equal(result, ~expected) result = mi < mi[0] tm.assert_numpy_array_equal(result, all_false) result = mi <= mi[0] tm.assert_numpy_array_equal(result, expected) result = mi > mi[0] tm.assert_numpy_array_equal(result, ~expected) result = mi >= mi[0] tm.assert_numpy_array_equal(result, ~all_false) def test_compare_tuple_strs(): # GH#34180 mi = MultiIndex.from_tuples([("a", "b"), ("b", "c"), ("c", "a")]) result = mi == ("c", "a") expected = np.array([False, False, True]) tm.assert_numpy_array_equal(result, expected) result = mi == ("c",) expected = np.array([False, False, False]) tm.assert_numpy_array_equal(result, expected) def test_equals_multi(idx): assert idx.equals(idx) assert not idx.equals(idx.values) assert idx.equals(Index(idx.values)) assert idx.equal_levels(idx) assert not idx.equals(idx[:-1]) assert not idx.equals(idx[-1]) # different number of levels index = MultiIndex( levels=[Index(list(range(4))), Index(list(range(4))), Index(list(range(4)))], codes=[ np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array([0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0]), ], ) index2 = MultiIndex(levels=index.levels[:-1], codes=index.codes[:-1]) assert not index.equals(index2) assert not index.equal_levels(index2) # levels are different major_axis = Index(list(range(4))) minor_axis = Index(list(range(2))) major_codes = np.array([0, 0, 1, 2, 2, 3]) minor_codes = np.array([0, 1, 0, 0, 1, 0]) index = MultiIndex( levels=[major_axis, minor_axis], codes=[major_codes, minor_codes] ) assert not idx.equals(index) assert not idx.equal_levels(index) # some of the labels are different major_axis = Index(["foo", "bar", "baz", "qux"]) minor_axis = Index(["one", "two"]) major_codes = np.array([0, 0, 2, 2, 3, 3]) minor_codes = np.array([0, 1, 0, 1, 0, 1]) index = MultiIndex( levels=[major_axis, minor_axis], codes=[major_codes, minor_codes] ) assert not idx.equals(index) def test_identical(idx): mi = idx.copy() mi2 = idx.copy() assert mi.identical(mi2) mi = mi.set_names(["new1", "new2"]) assert mi.equals(mi2) assert not mi.identical(mi2) mi2 = mi2.set_names(["new1", "new2"]) assert mi.identical(mi2) mi4 = Index(mi.tolist(), tupleize_cols=False) assert not mi.identical(mi4) assert mi.equals(mi4) def test_equals_operator(idx): # GH9785 assert (idx == idx).all() def test_equals_missing_values(): # make sure take is not using -1 i = MultiIndex.from_tuples([(0, pd.NaT), (0, pd.Timestamp("20130101"))]) result = i[0:1].equals(i[0]) assert not result result = i[1:2].equals(i[1]) assert not result def test_equals_missing_values_differently_sorted(): # GH#38439 mi1 = MultiIndex.from_tuples([(81.0, np.nan), (np.nan, np.nan)]) mi2 = MultiIndex.from_tuples([(np.nan, np.nan), (81.0, np.nan)]) assert not mi1.equals(mi2) mi2 = MultiIndex.from_tuples([(81.0, np.nan), (np.nan, np.nan)]) assert mi1.equals(mi2) def test_is_(): mi = MultiIndex.from_tuples(zip(range(10), range(10))) assert mi.is_(mi) assert mi.is_(mi.view()) assert mi.is_(mi.view().view().view().view()) mi2 = mi.view() # names are metadata, they don't change id mi2.names = ["A", "B"] assert mi2.is_(mi) assert mi.is_(mi2) assert not mi.is_(mi.set_names(["C", "D"])) # levels are inherent properties, they change identity mi3 = mi2.set_levels([list(range(10)), list(range(10))]) assert not mi3.is_(mi2) # shouldn't change assert mi2.is_(mi) mi4 = mi3.view() # GH 17464 - Remove duplicate MultiIndex levels mi4 = mi4.set_levels([list(range(10)), list(range(10))]) assert not mi4.is_(mi3) mi5 = mi.view() mi5 = mi5.set_levels(mi5.levels) assert not mi5.is_(mi) def test_is_all_dates(idx): assert not idx._is_all_dates def test_is_numeric(idx): # MultiIndex is never numeric assert not is_any_real_numeric_dtype(idx) def test_multiindex_compare(): # GH 21149 # Ensure comparison operations for MultiIndex with nlevels == 1 # behave consistently with those for MultiIndex with nlevels > 1 midx = MultiIndex.from_product([[0, 1]]) # Equality self-test: MultiIndex object vs self expected = Series([True, True]) result = Series(midx == midx) tm.assert_series_equal(result, expected) # Greater than comparison: MultiIndex object vs self expected = Series([False, False]) result = Series(midx > midx) tm.assert_series_equal(result, expected) def test_equals_ea_int_regular_int(): # GH#46026 mi1 = MultiIndex.from_arrays([Index([1, 2], dtype="Int64"), [3, 4]]) mi2 = MultiIndex.from_arrays([[1, 2], [3, 4]]) assert not mi1.equals(mi2) assert not mi2.equals(mi1)