from datetime import timedelta import re import numpy as np import pytest from pandas._libs import index as libindex from pandas.errors import ( InvalidIndexError, PerformanceWarning, ) import pandas as pd from pandas import ( Categorical, DataFrame, Index, MultiIndex, date_range, ) import pandas._testing as tm class TestSliceLocs: def test_slice_locs_partial(self, idx): sorted_idx, _ = idx.sortlevel(0) result = sorted_idx.slice_locs(("foo", "two"), ("qux", "one")) assert result == (1, 5) result = sorted_idx.slice_locs(None, ("qux", "one")) assert result == (0, 5) result = sorted_idx.slice_locs(("foo", "two"), None) assert result == (1, len(sorted_idx)) result = sorted_idx.slice_locs("bar", "baz") assert result == (2, 4) def test_slice_locs(self): df = DataFrame( np.random.default_rng(2).standard_normal((50, 4)), columns=Index(list("ABCD"), dtype=object), index=date_range("2000-01-01", periods=50, freq="B"), ) stacked = df.stack(future_stack=True) idx = stacked.index slob = slice(*idx.slice_locs(df.index[5], df.index[15])) sliced = stacked[slob] expected = df[5:16].stack(future_stack=True) tm.assert_almost_equal(sliced.values, expected.values) slob = slice( *idx.slice_locs( df.index[5] + timedelta(seconds=30), df.index[15] - timedelta(seconds=30), ) ) sliced = stacked[slob] expected = df[6:15].stack(future_stack=True) tm.assert_almost_equal(sliced.values, expected.values) def test_slice_locs_with_type_mismatch(self): df = DataFrame( np.random.default_rng(2).standard_normal((10, 4)), columns=Index(list("ABCD"), dtype=object), index=date_range("2000-01-01", periods=10, freq="B"), ) stacked = df.stack(future_stack=True) idx = stacked.index with pytest.raises(TypeError, match="^Level type mismatch"): idx.slice_locs((1, 3)) with pytest.raises(TypeError, match="^Level type mismatch"): idx.slice_locs(df.index[5] + timedelta(seconds=30), (5, 2)) df = DataFrame( np.ones((5, 5)), index=Index([f"i-{i}" for i in range(5)], name="a"), columns=Index([f"i-{i}" for i in range(5)], name="a"), ) stacked = df.stack(future_stack=True) idx = stacked.index with pytest.raises(TypeError, match="^Level type mismatch"): idx.slice_locs(timedelta(seconds=30)) # TODO: Try creating a UnicodeDecodeError in exception message with pytest.raises(TypeError, match="^Level type mismatch"): idx.slice_locs(df.index[1], (16, "a")) def test_slice_locs_not_sorted(self): index = MultiIndex( levels=[Index(np.arange(4)), Index(np.arange(4)), Index(np.arange(4))], codes=[ np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array([0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0]), ], ) msg = "[Kk]ey length.*greater than MultiIndex lexsort depth" with pytest.raises(KeyError, match=msg): index.slice_locs((1, 0, 1), (2, 1, 0)) # works sorted_index, _ = index.sortlevel(0) # should there be a test case here??? sorted_index.slice_locs((1, 0, 1), (2, 1, 0)) def test_slice_locs_not_contained(self): # some searchsorted action index = MultiIndex( levels=[[0, 2, 4, 6], [0, 2, 4]], codes=[[0, 0, 0, 1, 1, 2, 3, 3, 3], [0, 1, 2, 1, 2, 2, 0, 1, 2]], ) result = index.slice_locs((1, 0), (5, 2)) assert result == (3, 6) result = index.slice_locs(1, 5) assert result == (3, 6) result = index.slice_locs((2, 2), (5, 2)) assert result == (3, 6) result = index.slice_locs(2, 5) assert result == (3, 6) result = index.slice_locs((1, 0), (6, 3)) assert result == (3, 8) result = index.slice_locs(-1, 10) assert result == (0, len(index)) @pytest.mark.parametrize( "index_arr,expected,start_idx,end_idx", [ ([[np.nan, "a", "b"], ["c", "d", "e"]], (0, 3), np.nan, None), ([[np.nan, "a", "b"], ["c", "d", "e"]], (0, 3), np.nan, "b"), ([[np.nan, "a", "b"], ["c", "d", "e"]], (0, 3), np.nan, ("b", "e")), ([["a", "b", "c"], ["d", np.nan, "e"]], (1, 3), ("b", np.nan), None), ([["a", "b", "c"], ["d", np.nan, "e"]], (1, 3), ("b", np.nan), "c"), ([["a", "b", "c"], ["d", np.nan, "e"]], (1, 3), ("b", np.nan), ("c", "e")), ], ) def test_slice_locs_with_missing_value( self, index_arr, expected, start_idx, end_idx ): # issue 19132 idx = MultiIndex.from_arrays(index_arr) result = idx.slice_locs(start=start_idx, end=end_idx) assert result == expected class TestPutmask: def test_putmask_with_wrong_mask(self, idx): # GH18368 msg = "putmask: mask and data must be the same size" with pytest.raises(ValueError, match=msg): idx.putmask(np.ones(len(idx) + 1, np.bool_), 1) with pytest.raises(ValueError, match=msg): idx.putmask(np.ones(len(idx) - 1, np.bool_), 1) with pytest.raises(ValueError, match=msg): idx.putmask("foo", 1) def test_putmask_multiindex_other(self): # GH#43212 `value` is also a MultiIndex left = MultiIndex.from_tuples([(np.nan, 6), (np.nan, 6), ("a", 4)]) right = MultiIndex.from_tuples([("a", 1), ("a", 1), ("d", 1)]) mask = np.array([True, True, False]) result = left.putmask(mask, right) expected = MultiIndex.from_tuples([right[0], right[1], left[2]]) tm.assert_index_equal(result, expected) def test_putmask_keep_dtype(self, any_numeric_ea_dtype): # GH#49830 midx = MultiIndex.from_arrays( [pd.Series([1, 2, 3], dtype=any_numeric_ea_dtype), [10, 11, 12]] ) midx2 = MultiIndex.from_arrays( [pd.Series([5, 6, 7], dtype=any_numeric_ea_dtype), [-1, -2, -3]] ) result = midx.putmask([True, False, False], midx2) expected = MultiIndex.from_arrays( [pd.Series([5, 2, 3], dtype=any_numeric_ea_dtype), [-1, 11, 12]] ) tm.assert_index_equal(result, expected) def test_putmask_keep_dtype_shorter_value(self, any_numeric_ea_dtype): # GH#49830 midx = MultiIndex.from_arrays( [pd.Series([1, 2, 3], dtype=any_numeric_ea_dtype), [10, 11, 12]] ) midx2 = MultiIndex.from_arrays( [pd.Series([5], dtype=any_numeric_ea_dtype), [-1]] ) result = midx.putmask([True, False, False], midx2) expected = MultiIndex.from_arrays( [pd.Series([5, 2, 3], dtype=any_numeric_ea_dtype), [-1, 11, 12]] ) tm.assert_index_equal(result, expected) class TestGetIndexer: def test_get_indexer(self): major_axis = Index(np.arange(4)) minor_axis = Index(np.arange(2)) major_codes = np.array([0, 0, 1, 2, 2, 3, 3], dtype=np.intp) minor_codes = np.array([0, 1, 0, 0, 1, 0, 1], dtype=np.intp) index = MultiIndex( levels=[major_axis, minor_axis], codes=[major_codes, minor_codes] ) idx1 = index[:5] idx2 = index[[1, 3, 5]] r1 = idx1.get_indexer(idx2) tm.assert_almost_equal(r1, np.array([1, 3, -1], dtype=np.intp)) r1 = idx2.get_indexer(idx1, method="pad") e1 = np.array([-1, 0, 0, 1, 1], dtype=np.intp) tm.assert_almost_equal(r1, e1) r2 = idx2.get_indexer(idx1[::-1], method="pad") tm.assert_almost_equal(r2, e1[::-1]) rffill1 = idx2.get_indexer(idx1, method="ffill") tm.assert_almost_equal(r1, rffill1) r1 = idx2.get_indexer(idx1, method="backfill") e1 = np.array([0, 0, 1, 1, 2], dtype=np.intp) tm.assert_almost_equal(r1, e1) r2 = idx2.get_indexer(idx1[::-1], method="backfill") tm.assert_almost_equal(r2, e1[::-1]) rbfill1 = idx2.get_indexer(idx1, method="bfill") tm.assert_almost_equal(r1, rbfill1) # pass non-MultiIndex r1 = idx1.get_indexer(idx2.values) rexp1 = idx1.get_indexer(idx2) tm.assert_almost_equal(r1, rexp1) r1 = idx1.get_indexer([1, 2, 3]) assert (r1 == [-1, -1, -1]).all() # create index with duplicates idx1 = Index(list(range(10)) + list(range(10))) idx2 = Index(list(range(20))) msg = "Reindexing only valid with uniquely valued Index objects" with pytest.raises(InvalidIndexError, match=msg): idx1.get_indexer(idx2) def test_get_indexer_nearest(self): midx = MultiIndex.from_tuples([("a", 1), ("b", 2)]) msg = ( "method='nearest' not implemented yet for MultiIndex; " "see GitHub issue 9365" ) with pytest.raises(NotImplementedError, match=msg): midx.get_indexer(["a"], method="nearest") msg = "tolerance not implemented yet for MultiIndex" with pytest.raises(NotImplementedError, match=msg): midx.get_indexer(["a"], method="pad", tolerance=2) def test_get_indexer_categorical_time(self): # https://github.com/pandas-dev/pandas/issues/21390 midx = MultiIndex.from_product( [ Categorical(["a", "b", "c"]), Categorical(date_range("2012-01-01", periods=3, freq="h")), ] ) result = midx.get_indexer(midx) tm.assert_numpy_array_equal(result, np.arange(9, dtype=np.intp)) @pytest.mark.parametrize( "index_arr,labels,expected", [ ( [[1, np.nan, 2], [3, 4, 5]], [1, np.nan, 2], np.array([-1, -1, -1], dtype=np.intp), ), ([[1, np.nan, 2], [3, 4, 5]], [(np.nan, 4)], np.array([1], dtype=np.intp)), ([[1, 2, 3], [np.nan, 4, 5]], [(1, np.nan)], np.array([0], dtype=np.intp)), ( [[1, 2, 3], [np.nan, 4, 5]], [np.nan, 4, 5], np.array([-1, -1, -1], dtype=np.intp), ), ], ) def test_get_indexer_with_missing_value(self, index_arr, labels, expected): # issue 19132 idx = MultiIndex.from_arrays(index_arr) result = idx.get_indexer(labels) tm.assert_numpy_array_equal(result, expected) def test_get_indexer_methods(self): # https://github.com/pandas-dev/pandas/issues/29896 # test getting an indexer for another index with different methods # confirms that getting an indexer without a filling method, getting an # indexer and backfilling, and getting an indexer and padding all behave # correctly in the case where all of the target values fall in between # several levels in the MultiIndex into which they are getting an indexer # # visually, the MultiIndexes used in this test are: # mult_idx_1: # 0: -1 0 # 1: 2 # 2: 3 # 3: 4 # 4: 0 0 # 5: 2 # 6: 3 # 7: 4 # 8: 1 0 # 9: 2 # 10: 3 # 11: 4 # # mult_idx_2: # 0: 0 1 # 1: 3 # 2: 4 mult_idx_1 = MultiIndex.from_product([[-1, 0, 1], [0, 2, 3, 4]]) mult_idx_2 = MultiIndex.from_product([[0], [1, 3, 4]]) indexer = mult_idx_1.get_indexer(mult_idx_2) expected = np.array([-1, 6, 7], dtype=indexer.dtype) tm.assert_almost_equal(expected, indexer) backfill_indexer = mult_idx_1.get_indexer(mult_idx_2, method="backfill") expected = np.array([5, 6, 7], dtype=backfill_indexer.dtype) tm.assert_almost_equal(expected, backfill_indexer) # ensure the legacy "bfill" option functions identically to "backfill" backfill_indexer = mult_idx_1.get_indexer(mult_idx_2, method="bfill") expected = np.array([5, 6, 7], dtype=backfill_indexer.dtype) tm.assert_almost_equal(expected, backfill_indexer) pad_indexer = mult_idx_1.get_indexer(mult_idx_2, method="pad") expected = np.array([4, 6, 7], dtype=pad_indexer.dtype) tm.assert_almost_equal(expected, pad_indexer) # ensure the legacy "ffill" option functions identically to "pad" pad_indexer = mult_idx_1.get_indexer(mult_idx_2, method="ffill") expected = np.array([4, 6, 7], dtype=pad_indexer.dtype) tm.assert_almost_equal(expected, pad_indexer) @pytest.mark.parametrize("method", ["pad", "ffill", "backfill", "bfill", "nearest"]) def test_get_indexer_methods_raise_for_non_monotonic(self, method): # 53452 mi = MultiIndex.from_arrays([[0, 4, 2], [0, 4, 2]]) if method == "nearest": err = NotImplementedError msg = "not implemented yet for MultiIndex" else: err = ValueError msg = "index must be monotonic increasing or decreasing" with pytest.raises(err, match=msg): mi.get_indexer([(1, 1)], method=method) def test_get_indexer_three_or_more_levels(self): # https://github.com/pandas-dev/pandas/issues/29896 # tests get_indexer() on MultiIndexes with 3+ levels # visually, these are # mult_idx_1: # 0: 1 2 5 # 1: 7 # 2: 4 5 # 3: 7 # 4: 6 5 # 5: 7 # 6: 3 2 5 # 7: 7 # 8: 4 5 # 9: 7 # 10: 6 5 # 11: 7 # # mult_idx_2: # 0: 1 1 8 # 1: 1 5 9 # 2: 1 6 7 # 3: 2 1 6 # 4: 2 7 6 # 5: 2 7 8 # 6: 3 6 8 mult_idx_1 = MultiIndex.from_product([[1, 3], [2, 4, 6], [5, 7]]) mult_idx_2 = MultiIndex.from_tuples( [ (1, 1, 8), (1, 5, 9), (1, 6, 7), (2, 1, 6), (2, 7, 7), (2, 7, 8), (3, 6, 8), ] ) # sanity check assert mult_idx_1.is_monotonic_increasing assert mult_idx_1.is_unique assert mult_idx_2.is_monotonic_increasing assert mult_idx_2.is_unique # show the relationships between the two assert mult_idx_2[0] < mult_idx_1[0] assert mult_idx_1[3] < mult_idx_2[1] < mult_idx_1[4] assert mult_idx_1[5] == mult_idx_2[2] assert mult_idx_1[5] < mult_idx_2[3] < mult_idx_1[6] assert mult_idx_1[5] < mult_idx_2[4] < mult_idx_1[6] assert mult_idx_1[5] < mult_idx_2[5] < mult_idx_1[6] assert mult_idx_1[-1] < mult_idx_2[6] indexer_no_fill = mult_idx_1.get_indexer(mult_idx_2) expected = np.array([-1, -1, 5, -1, -1, -1, -1], dtype=indexer_no_fill.dtype) tm.assert_almost_equal(expected, indexer_no_fill) # test with backfilling indexer_backfilled = mult_idx_1.get_indexer(mult_idx_2, method="backfill") expected = np.array([0, 4, 5, 6, 6, 6, -1], dtype=indexer_backfilled.dtype) tm.assert_almost_equal(expected, indexer_backfilled) # now, the same thing, but forward-filled (aka "padded") indexer_padded = mult_idx_1.get_indexer(mult_idx_2, method="pad") expected = np.array([-1, 3, 5, 5, 5, 5, 11], dtype=indexer_padded.dtype) tm.assert_almost_equal(expected, indexer_padded) # now, do the indexing in the other direction assert mult_idx_2[0] < mult_idx_1[0] < mult_idx_2[1] assert mult_idx_2[0] < mult_idx_1[1] < mult_idx_2[1] assert mult_idx_2[0] < mult_idx_1[2] < mult_idx_2[1] assert mult_idx_2[0] < mult_idx_1[3] < mult_idx_2[1] assert mult_idx_2[1] < mult_idx_1[4] < mult_idx_2[2] assert mult_idx_2[2] == mult_idx_1[5] assert mult_idx_2[5] < mult_idx_1[6] < mult_idx_2[6] assert mult_idx_2[5] < mult_idx_1[7] < mult_idx_2[6] assert mult_idx_2[5] < mult_idx_1[8] < mult_idx_2[6] assert mult_idx_2[5] < mult_idx_1[9] < mult_idx_2[6] assert mult_idx_2[5] < mult_idx_1[10] < mult_idx_2[6] assert mult_idx_2[5] < mult_idx_1[11] < mult_idx_2[6] indexer = mult_idx_2.get_indexer(mult_idx_1) expected = np.array( [-1, -1, -1, -1, -1, 2, -1, -1, -1, -1, -1, -1], dtype=indexer.dtype ) tm.assert_almost_equal(expected, indexer) backfill_indexer = mult_idx_2.get_indexer(mult_idx_1, method="bfill") expected = np.array( [1, 1, 1, 1, 2, 2, 6, 6, 6, 6, 6, 6], dtype=backfill_indexer.dtype ) tm.assert_almost_equal(expected, backfill_indexer) pad_indexer = mult_idx_2.get_indexer(mult_idx_1, method="pad") expected = np.array( [0, 0, 0, 0, 1, 2, 5, 5, 5, 5, 5, 5], dtype=pad_indexer.dtype ) tm.assert_almost_equal(expected, pad_indexer) def test_get_indexer_crossing_levels(self): # https://github.com/pandas-dev/pandas/issues/29896 # tests a corner case with get_indexer() with MultiIndexes where, when we # need to "carry" across levels, proper tuple ordering is respected # # the MultiIndexes used in this test, visually, are: # mult_idx_1: # 0: 1 1 1 1 # 1: 2 # 2: 2 1 # 3: 2 # 4: 1 2 1 1 # 5: 2 # 6: 2 1 # 7: 2 # 8: 2 1 1 1 # 9: 2 # 10: 2 1 # 11: 2 # 12: 2 2 1 1 # 13: 2 # 14: 2 1 # 15: 2 # # mult_idx_2: # 0: 1 3 2 2 # 1: 2 3 2 2 mult_idx_1 = MultiIndex.from_product([[1, 2]] * 4) mult_idx_2 = MultiIndex.from_tuples([(1, 3, 2, 2), (2, 3, 2, 2)]) # show the tuple orderings, which get_indexer() should respect assert mult_idx_1[7] < mult_idx_2[0] < mult_idx_1[8] assert mult_idx_1[-1] < mult_idx_2[1] indexer = mult_idx_1.get_indexer(mult_idx_2) expected = np.array([-1, -1], dtype=indexer.dtype) tm.assert_almost_equal(expected, indexer) backfill_indexer = mult_idx_1.get_indexer(mult_idx_2, method="bfill") expected = np.array([8, -1], dtype=backfill_indexer.dtype) tm.assert_almost_equal(expected, backfill_indexer) pad_indexer = mult_idx_1.get_indexer(mult_idx_2, method="ffill") expected = np.array([7, 15], dtype=pad_indexer.dtype) tm.assert_almost_equal(expected, pad_indexer) def test_get_indexer_kwarg_validation(self): # GH#41918 mi = MultiIndex.from_product([range(3), ["A", "B"]]) msg = "limit argument only valid if doing pad, backfill or nearest" with pytest.raises(ValueError, match=msg): mi.get_indexer(mi[:-1], limit=4) msg = "tolerance argument only valid if doing pad, backfill or nearest" with pytest.raises(ValueError, match=msg): mi.get_indexer(mi[:-1], tolerance="piano") def test_get_indexer_nan(self): # GH#37222 idx1 = MultiIndex.from_product([["A"], [1.0, 2.0]], names=["id1", "id2"]) idx2 = MultiIndex.from_product([["A"], [np.nan, 2.0]], names=["id1", "id2"]) expected = np.array([-1, 1]) result = idx2.get_indexer(idx1) tm.assert_numpy_array_equal(result, expected, check_dtype=False) result = idx1.get_indexer(idx2) tm.assert_numpy_array_equal(result, expected, check_dtype=False) def test_getitem(idx): # scalar assert idx[2] == ("bar", "one") # slice result = idx[2:5] expected = idx[[2, 3, 4]] assert result.equals(expected) # boolean result = idx[[True, False, True, False, True, True]] result2 = idx[np.array([True, False, True, False, True, True])] expected = idx[[0, 2, 4, 5]] assert result.equals(expected) assert result2.equals(expected) def test_getitem_group_select(idx): sorted_idx, _ = idx.sortlevel(0) assert sorted_idx.get_loc("baz") == slice(3, 4) assert sorted_idx.get_loc("foo") == slice(0, 2) @pytest.mark.parametrize("ind1", [[True] * 5, Index([True] * 5)]) @pytest.mark.parametrize( "ind2", [[True, False, True, False, False], Index([True, False, True, False, False])], ) def test_getitem_bool_index_all(ind1, ind2): # GH#22533 idx = MultiIndex.from_tuples([(10, 1), (20, 2), (30, 3), (40, 4), (50, 5)]) tm.assert_index_equal(idx[ind1], idx) expected = MultiIndex.from_tuples([(10, 1), (30, 3)]) tm.assert_index_equal(idx[ind2], expected) @pytest.mark.parametrize("ind1", [[True], Index([True])]) @pytest.mark.parametrize("ind2", [[False], Index([False])]) def test_getitem_bool_index_single(ind1, ind2): # GH#22533 idx = MultiIndex.from_tuples([(10, 1)]) tm.assert_index_equal(idx[ind1], idx) expected = MultiIndex( levels=[np.array([], dtype=np.int64), np.array([], dtype=np.int64)], codes=[[], []], ) tm.assert_index_equal(idx[ind2], expected) class TestGetLoc: def test_get_loc(self, idx): assert idx.get_loc(("foo", "two")) == 1 assert idx.get_loc(("baz", "two")) == 3 with pytest.raises(KeyError, match=r"^\('bar', 'two'\)$"): idx.get_loc(("bar", "two")) with pytest.raises(KeyError, match=r"^'quux'$"): idx.get_loc("quux") # 3 levels index = MultiIndex( levels=[Index(np.arange(4)), Index(np.arange(4)), Index(np.arange(4))], codes=[ np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array([0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0]), ], ) with pytest.raises(KeyError, match=r"^\(1, 1\)$"): index.get_loc((1, 1)) assert index.get_loc((2, 0)) == slice(3, 5) def test_get_loc_duplicates(self): index = Index([2, 2, 2, 2]) result = index.get_loc(2) expected = slice(0, 4) assert result == expected index = Index(["c", "a", "a", "b", "b"]) rs = index.get_loc("c") xp = 0 assert rs == xp with pytest.raises(KeyError, match="2"): index.get_loc(2) def test_get_loc_level(self): index = MultiIndex( levels=[Index(np.arange(4)), Index(np.arange(4)), Index(np.arange(4))], codes=[ np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array([0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0]), ], ) loc, new_index = index.get_loc_level((0, 1)) expected = slice(1, 2) exp_index = index[expected].droplevel(0).droplevel(0) assert loc == expected assert new_index.equals(exp_index) loc, new_index = index.get_loc_level((0, 1, 0)) expected = 1 assert loc == expected assert new_index is None with pytest.raises(KeyError, match=r"^\(2, 2\)$"): index.get_loc_level((2, 2)) # GH 22221: unused label with pytest.raises(KeyError, match=r"^2$"): index.drop(2).get_loc_level(2) # Unused label on unsorted level: with pytest.raises(KeyError, match=r"^2$"): index.drop(1, level=2).get_loc_level(2, level=2) index = MultiIndex( levels=[[2000], list(range(4))], codes=[np.array([0, 0, 0, 0]), np.array([0, 1, 2, 3])], ) result, new_index = index.get_loc_level((2000, slice(None, None))) expected = slice(None, None) assert result == expected assert new_index.equals(index.droplevel(0)) @pytest.mark.parametrize("dtype1", [int, float, bool, str]) @pytest.mark.parametrize("dtype2", [int, float, bool, str]) def test_get_loc_multiple_dtypes(self, dtype1, dtype2): # GH 18520 levels = [np.array([0, 1]).astype(dtype1), np.array([0, 1]).astype(dtype2)] idx = MultiIndex.from_product(levels) assert idx.get_loc(idx[2]) == 2 @pytest.mark.parametrize("level", [0, 1]) @pytest.mark.parametrize("dtypes", [[int, float], [float, int]]) def test_get_loc_implicit_cast(self, level, dtypes): # GH 18818, GH 15994 : as flat index, cast int to float and vice-versa levels = [["a", "b"], ["c", "d"]] key = ["b", "d"] lev_dtype, key_dtype = dtypes levels[level] = np.array([0, 1], dtype=lev_dtype) key[level] = key_dtype(1) idx = MultiIndex.from_product(levels) assert idx.get_loc(tuple(key)) == 3 @pytest.mark.parametrize("dtype", [bool, object]) def test_get_loc_cast_bool(self, dtype): # GH 19086 : int is casted to bool, but not vice-versa (for object dtype) # With bool dtype, we don't cast in either direction. levels = [Index([False, True], dtype=dtype), np.arange(2, dtype="int64")] idx = MultiIndex.from_product(levels) if dtype is bool: with pytest.raises(KeyError, match=r"^\(0, 1\)$"): assert idx.get_loc((0, 1)) == 1 with pytest.raises(KeyError, match=r"^\(1, 0\)$"): assert idx.get_loc((1, 0)) == 2 else: # We use python object comparisons, which treat 0 == False and 1 == True assert idx.get_loc((0, 1)) == 1 assert idx.get_loc((1, 0)) == 2 with pytest.raises(KeyError, match=r"^\(False, True\)$"): idx.get_loc((False, True)) with pytest.raises(KeyError, match=r"^\(True, False\)$"): idx.get_loc((True, False)) @pytest.mark.parametrize("level", [0, 1]) def test_get_loc_nan(self, level, nulls_fixture): # GH 18485 : NaN in MultiIndex levels = [["a", "b"], ["c", "d"]] key = ["b", "d"] levels[level] = np.array([0, nulls_fixture], dtype=type(nulls_fixture)) key[level] = nulls_fixture idx = MultiIndex.from_product(levels) assert idx.get_loc(tuple(key)) == 3 def test_get_loc_missing_nan(self): # GH 8569 idx = MultiIndex.from_arrays([[1.0, 2.0], [3.0, 4.0]]) assert isinstance(idx.get_loc(1), slice) with pytest.raises(KeyError, match=r"^3$"): idx.get_loc(3) with pytest.raises(KeyError, match=r"^nan$"): idx.get_loc(np.nan) with pytest.raises(InvalidIndexError, match=r"\[nan\]"): # listlike/non-hashable raises TypeError idx.get_loc([np.nan]) def test_get_loc_with_values_including_missing_values(self): # issue 19132 idx = MultiIndex.from_product([[np.nan, 1]] * 2) expected = slice(0, 2, None) assert idx.get_loc(np.nan) == expected idx = MultiIndex.from_arrays([[np.nan, 1, 2, np.nan]]) expected = np.array([True, False, False, True]) tm.assert_numpy_array_equal(idx.get_loc(np.nan), expected) idx = MultiIndex.from_product([[np.nan, 1]] * 3) expected = slice(2, 4, None) assert idx.get_loc((np.nan, 1)) == expected def test_get_loc_duplicates2(self): # TODO: de-duplicate with test_get_loc_duplicates above? index = MultiIndex( levels=[["D", "B", "C"], [0, 26, 27, 37, 57, 67, 75, 82]], codes=[[0, 0, 0, 1, 2, 2, 2, 2, 2, 2], [1, 3, 4, 6, 0, 2, 2, 3, 5, 7]], names=["tag", "day"], ) assert index.get_loc("D") == slice(0, 3) def test_get_loc_past_lexsort_depth(self): # GH#30053 idx = MultiIndex( levels=[["a"], [0, 7], [1]], codes=[[0, 0], [1, 0], [0, 0]], names=["x", "y", "z"], sortorder=0, ) key = ("a", 7) with tm.assert_produces_warning(PerformanceWarning): # PerformanceWarning: indexing past lexsort depth may impact performance result = idx.get_loc(key) assert result == slice(0, 1, None) def test_multiindex_get_loc_list_raises(self): # GH#35878 idx = MultiIndex.from_tuples([("a", 1), ("b", 2)]) msg = r"\[\]" with pytest.raises(InvalidIndexError, match=msg): idx.get_loc([]) def test_get_loc_nested_tuple_raises_keyerror(self): # raise KeyError, not TypeError mi = MultiIndex.from_product([range(3), range(4), range(5), range(6)]) key = ((2, 3, 4), "foo") with pytest.raises(KeyError, match=re.escape(str(key))): mi.get_loc(key) class TestWhere: def test_where(self): i = MultiIndex.from_tuples([("A", 1), ("A", 2)]) msg = r"\.where is not supported for MultiIndex operations" with pytest.raises(NotImplementedError, match=msg): i.where(True) def test_where_array_like(self, listlike_box): mi = MultiIndex.from_tuples([("A", 1), ("A", 2)]) cond = [False, True] msg = r"\.where is not supported for MultiIndex operations" with pytest.raises(NotImplementedError, match=msg): mi.where(listlike_box(cond)) class TestContains: def test_contains_top_level(self): midx = MultiIndex.from_product([["A", "B"], [1, 2]]) assert "A" in midx assert "A" not in midx._engine def test_contains_with_nat(self): # MI with a NaT mi = MultiIndex( levels=[["C"], date_range("2012-01-01", periods=5)], codes=[[0, 0, 0, 0, 0, 0], [-1, 0, 1, 2, 3, 4]], names=[None, "B"], ) assert ("C", pd.Timestamp("2012-01-01")) in mi for val in mi.values: assert val in mi def test_contains(self, idx): assert ("foo", "two") in idx assert ("bar", "two") not in idx assert None not in idx def test_contains_with_missing_value(self): # GH#19132 idx = MultiIndex.from_arrays([[1, np.nan, 2]]) assert np.nan in idx idx = MultiIndex.from_arrays([[1, 2], [np.nan, 3]]) assert np.nan not in idx assert (1, np.nan) in idx def test_multiindex_contains_dropped(self): # GH#19027 # test that dropped MultiIndex levels are not in the MultiIndex # despite continuing to be in the MultiIndex's levels idx = MultiIndex.from_product([[1, 2], [3, 4]]) assert 2 in idx idx = idx.drop(2) # drop implementation keeps 2 in the levels assert 2 in idx.levels[0] # but it should no longer be in the index itself assert 2 not in idx # also applies to strings idx = MultiIndex.from_product([["a", "b"], ["c", "d"]]) assert "a" in idx idx = idx.drop("a") assert "a" in idx.levels[0] assert "a" not in idx def test_contains_td64_level(self): # GH#24570 tx = pd.timedelta_range("09:30:00", "16:00:00", freq="30 min") idx = MultiIndex.from_arrays([tx, np.arange(len(tx))]) assert tx[0] in idx assert "element_not_exit" not in idx assert "0 day 09:30:00" in idx def test_large_mi_contains(self, monkeypatch): # GH#10645 with monkeypatch.context(): monkeypatch.setattr(libindex, "_SIZE_CUTOFF", 10) result = MultiIndex.from_arrays([range(10), range(10)]) assert (10, 0) not in result def test_timestamp_multiindex_indexer(): # https://github.com/pandas-dev/pandas/issues/26944 idx = MultiIndex.from_product( [ date_range("2019-01-01T00:15:33", periods=100, freq="h", name="date"), ["x"], [3], ] ) df = DataFrame({"foo": np.arange(len(idx))}, idx) result = df.loc[pd.IndexSlice["2019-1-2":, "x", :], "foo"] qidx = MultiIndex.from_product( [ date_range( start="2019-01-02T00:15:33", end="2019-01-05T03:15:33", freq="h", name="date", ), ["x"], [3], ] ) should_be = pd.Series(data=np.arange(24, len(qidx) + 24), index=qidx, name="foo") tm.assert_series_equal(result, should_be) @pytest.mark.parametrize( "index_arr,expected,target,algo", [ ([[np.nan, "a", "b"], ["c", "d", "e"]], 0, np.nan, "left"), ([[np.nan, "a", "b"], ["c", "d", "e"]], 1, (np.nan, "c"), "right"), ([["a", "b", "c"], ["d", np.nan, "d"]], 1, ("b", np.nan), "left"), ], ) def test_get_slice_bound_with_missing_value(index_arr, expected, target, algo): # issue 19132 idx = MultiIndex.from_arrays(index_arr) result = idx.get_slice_bound(target, side=algo) assert result == expected @pytest.mark.parametrize( "index_arr,expected,start_idx,end_idx", [ ([[np.nan, 1, 2], [3, 4, 5]], slice(0, 2, None), np.nan, 1), ([[np.nan, 1, 2], [3, 4, 5]], slice(0, 3, None), np.nan, (2, 5)), ([[1, 2, 3], [4, np.nan, 5]], slice(1, 3, None), (2, np.nan), 3), ([[1, 2, 3], [4, np.nan, 5]], slice(1, 3, None), (2, np.nan), (3, 5)), ], ) def test_slice_indexer_with_missing_value(index_arr, expected, start_idx, end_idx): # issue 19132 idx = MultiIndex.from_arrays(index_arr) result = idx.slice_indexer(start=start_idx, end=end_idx) assert result == expected def test_pyint_engine(): # GH#18519 : when combinations of codes cannot be represented in 64 # bits, the index underlying the MultiIndex engine works with Python # integers, rather than uint64. N = 5 keys = [ tuple(arr) for arr in [ [0] * 10 * N, [1] * 10 * N, [2] * 10 * N, [np.nan] * N + [2] * 9 * N, [0] * N + [2] * 9 * N, [np.nan] * N + [2] * 8 * N + [0] * N, ] ] # Each level contains 4 elements (including NaN), so it is represented # in 2 bits, for a total of 2*N*10 = 100 > 64 bits. If we were using a # 64 bit engine and truncating the first levels, the fourth and fifth # keys would collide; if truncating the last levels, the fifth and # sixth; if rotating bits rather than shifting, the third and fifth. for idx, key_value in enumerate(keys): index = MultiIndex.from_tuples(keys) assert index.get_loc(key_value) == idx expected = np.arange(idx + 1, dtype=np.intp) result = index.get_indexer([keys[i] for i in expected]) tm.assert_numpy_array_equal(result, expected) # With missing key: idces = range(len(keys)) expected = np.array([-1] + list(idces), dtype=np.intp) missing = tuple([0, 1] * 5 * N) result = index.get_indexer([missing] + [keys[i] for i in idces]) tm.assert_numpy_array_equal(result, expected) @pytest.mark.parametrize( "keys,expected", [ ((slice(None), [5, 4]), [1, 0]), ((slice(None), [4, 5]), [0, 1]), (([True, False, True], [4, 6]), [0, 2]), (([True, False, True], [6, 4]), [0, 2]), ((2, [4, 5]), [0, 1]), ((2, [5, 4]), [1, 0]), (([2], [4, 5]), [0, 1]), (([2], [5, 4]), [1, 0]), ], ) def test_get_locs_reordering(keys, expected): # GH48384 idx = MultiIndex.from_arrays( [ [2, 2, 1], [4, 5, 6], ] ) result = idx.get_locs(keys) expected = np.array(expected, dtype=np.intp) tm.assert_numpy_array_equal(result, expected) def test_get_indexer_for_multiindex_with_nans(nulls_fixture): # GH37222 idx1 = MultiIndex.from_product([["A"], [1.0, 2.0]], names=["id1", "id2"]) idx2 = MultiIndex.from_product([["A"], [nulls_fixture, 2.0]], names=["id1", "id2"]) result = idx2.get_indexer(idx1) expected = np.array([-1, 1], dtype=np.intp) tm.assert_numpy_array_equal(result, expected) result = idx1.get_indexer(idx2) expected = np.array([-1, 1], dtype=np.intp) tm.assert_numpy_array_equal(result, expected)