from decimal import Decimal import numpy as np import pytest from pandas._libs.missing import ( NA, is_matching_na, ) from pandas.compat import pa_version_under16p0 import pandas.util._test_decorators as td import pandas as pd from pandas import Index import pandas._testing as tm class TestGetIndexer: @pytest.mark.parametrize( "method,expected", [ ("pad", np.array([-1, 0, 1, 1], dtype=np.intp)), ("backfill", np.array([0, 0, 1, -1], dtype=np.intp)), ], ) def test_get_indexer_strings(self, method, expected): index = Index(["b", "c"]) actual = index.get_indexer(["a", "b", "c", "d"], method=method) tm.assert_numpy_array_equal(actual, expected) def test_get_indexer_strings_raises(self, using_infer_string): index = Index(["b", "c"]) if using_infer_string: import pyarrow as pa msg = "has no kernel" with pytest.raises(pa.lib.ArrowNotImplementedError, match=msg): index.get_indexer(["a", "b", "c", "d"], method="nearest") with pytest.raises(pa.lib.ArrowNotImplementedError, match=msg): index.get_indexer(["a", "b", "c", "d"], method="pad", tolerance=2) with pytest.raises(pa.lib.ArrowNotImplementedError, match=msg): index.get_indexer( ["a", "b", "c", "d"], method="pad", tolerance=[2, 2, 2, 2] ) else: msg = r"unsupported operand type\(s\) for -: 'str' and 'str'" with pytest.raises(TypeError, match=msg): index.get_indexer(["a", "b", "c", "d"], method="nearest") with pytest.raises(TypeError, match=msg): index.get_indexer(["a", "b", "c", "d"], method="pad", tolerance=2) with pytest.raises(TypeError, match=msg): index.get_indexer( ["a", "b", "c", "d"], method="pad", tolerance=[2, 2, 2, 2] ) def test_get_indexer_with_NA_values( self, unique_nulls_fixture, unique_nulls_fixture2 ): # GH#22332 # check pairwise, that no pair of na values # is mangled if unique_nulls_fixture is unique_nulls_fixture2: return # skip it, values are not unique arr = np.array([unique_nulls_fixture, unique_nulls_fixture2], dtype=object) index = Index(arr, dtype=object) result = index.get_indexer( Index( [unique_nulls_fixture, unique_nulls_fixture2, "Unknown"], dtype=object ) ) expected = np.array([0, 1, -1], dtype=np.intp) tm.assert_numpy_array_equal(result, expected) class TestGetIndexerNonUnique: def test_get_indexer_non_unique_nas( self, nulls_fixture, request, using_infer_string ): # even though this isn't non-unique, this should still work if using_infer_string and (nulls_fixture is None or nulls_fixture is NA): request.applymarker(pytest.mark.xfail(reason="NAs are cast to NaN")) index = Index(["a", "b", nulls_fixture]) indexer, missing = index.get_indexer_non_unique([nulls_fixture]) expected_indexer = np.array([2], dtype=np.intp) expected_missing = np.array([], dtype=np.intp) tm.assert_numpy_array_equal(indexer, expected_indexer) tm.assert_numpy_array_equal(missing, expected_missing) # actually non-unique index = Index(["a", nulls_fixture, "b", nulls_fixture]) indexer, missing = index.get_indexer_non_unique([nulls_fixture]) expected_indexer = np.array([1, 3], dtype=np.intp) tm.assert_numpy_array_equal(indexer, expected_indexer) tm.assert_numpy_array_equal(missing, expected_missing) # matching-but-not-identical nans if is_matching_na(nulls_fixture, float("NaN")): index = Index(["a", float("NaN"), "b", float("NaN")]) match_but_not_identical = True elif is_matching_na(nulls_fixture, Decimal("NaN")): index = Index(["a", Decimal("NaN"), "b", Decimal("NaN")]) match_but_not_identical = True else: match_but_not_identical = False if match_but_not_identical: indexer, missing = index.get_indexer_non_unique([nulls_fixture]) expected_indexer = np.array([1, 3], dtype=np.intp) tm.assert_numpy_array_equal(indexer, expected_indexer) tm.assert_numpy_array_equal(missing, expected_missing) @pytest.mark.filterwarnings("ignore:elementwise comp:DeprecationWarning") def test_get_indexer_non_unique_np_nats(self, np_nat_fixture, np_nat_fixture2): expected_missing = np.array([], dtype=np.intp) # matching-but-not-identical nats if is_matching_na(np_nat_fixture, np_nat_fixture2): # ensure nats are different objects index = Index( np.array( ["2021-10-02", np_nat_fixture.copy(), np_nat_fixture2.copy()], dtype=object, ), dtype=object, ) # pass as index to prevent target from being casted to DatetimeIndex indexer, missing = index.get_indexer_non_unique( Index([np_nat_fixture], dtype=object) ) expected_indexer = np.array([1, 2], dtype=np.intp) tm.assert_numpy_array_equal(indexer, expected_indexer) tm.assert_numpy_array_equal(missing, expected_missing) # dt64nat vs td64nat else: try: np_nat_fixture == np_nat_fixture2 except (TypeError, OverflowError): # Numpy will raise on uncomparable types, like # np.datetime64('NaT', 'Y') and np.datetime64('NaT', 'ps') # https://github.com/numpy/numpy/issues/22762 return index = Index( np.array( [ "2021-10-02", np_nat_fixture, np_nat_fixture2, np_nat_fixture, np_nat_fixture2, ], dtype=object, ), dtype=object, ) # pass as index to prevent target from being casted to DatetimeIndex indexer, missing = index.get_indexer_non_unique( Index([np_nat_fixture], dtype=object) ) expected_indexer = np.array([1, 3], dtype=np.intp) tm.assert_numpy_array_equal(indexer, expected_indexer) tm.assert_numpy_array_equal(missing, expected_missing) class TestSliceLocs: @pytest.mark.parametrize( "dtype", [ "object", pytest.param("string[pyarrow_numpy]", marks=td.skip_if_no("pyarrow")), ], ) @pytest.mark.parametrize( "in_slice,expected", [ # error: Slice index must be an integer or None (pd.IndexSlice[::-1], "yxdcb"), (pd.IndexSlice["b":"y":-1], ""), # type: ignore[misc] (pd.IndexSlice["b"::-1], "b"), # type: ignore[misc] (pd.IndexSlice[:"b":-1], "yxdcb"), # type: ignore[misc] (pd.IndexSlice[:"y":-1], "y"), # type: ignore[misc] (pd.IndexSlice["y"::-1], "yxdcb"), # type: ignore[misc] (pd.IndexSlice["y"::-4], "yb"), # type: ignore[misc] # absent labels (pd.IndexSlice[:"a":-1], "yxdcb"), # type: ignore[misc] (pd.IndexSlice[:"a":-2], "ydb"), # type: ignore[misc] (pd.IndexSlice["z"::-1], "yxdcb"), # type: ignore[misc] (pd.IndexSlice["z"::-3], "yc"), # type: ignore[misc] (pd.IndexSlice["m"::-1], "dcb"), # type: ignore[misc] (pd.IndexSlice[:"m":-1], "yx"), # type: ignore[misc] (pd.IndexSlice["a":"a":-1], ""), # type: ignore[misc] (pd.IndexSlice["z":"z":-1], ""), # type: ignore[misc] (pd.IndexSlice["m":"m":-1], ""), # type: ignore[misc] ], ) def test_slice_locs_negative_step(self, in_slice, expected, dtype, request): if ( not pa_version_under16p0 and dtype == "string[pyarrow_numpy]" and in_slice == slice("a", "a", -1) ): request.applymarker( pytest.mark.xfail(reason="https://github.com/apache/arrow/issues/40642") ) index = Index(list("bcdxy"), dtype=dtype) s_start, s_stop = index.slice_locs(in_slice.start, in_slice.stop, in_slice.step) result = index[s_start : s_stop : in_slice.step] expected = Index(list(expected), dtype=dtype) tm.assert_index_equal(result, expected) @td.skip_if_no("pyarrow") def test_slice_locs_negative_step_oob(self): index = Index(list("bcdxy"), dtype="string[pyarrow_numpy]") result = index[-10:5:1] tm.assert_index_equal(result, index) result = index[4:-10:-1] expected = Index(list("yxdcb"), dtype="string[pyarrow_numpy]") tm.assert_index_equal(result, expected) def test_slice_locs_dup(self): index = Index(["a", "a", "b", "c", "d", "d"]) assert index.slice_locs("a", "d") == (0, 6) assert index.slice_locs(end="d") == (0, 6) assert index.slice_locs("a", "c") == (0, 4) assert index.slice_locs("b", "d") == (2, 6) index2 = index[::-1] assert index2.slice_locs("d", "a") == (0, 6) assert index2.slice_locs(end="a") == (0, 6) assert index2.slice_locs("d", "b") == (0, 4) assert index2.slice_locs("c", "a") == (2, 6)