from copy import ( copy, deepcopy, ) import numpy as np import pytest from pandas.core.dtypes.common import is_scalar from pandas import ( DataFrame, Index, Series, date_range, ) import pandas._testing as tm # ---------------------------------------------------------------------- # Generic types test cases def construct(box, shape, value=None, dtype=None, **kwargs): """ construct an object for the given shape if value is specified use that if its a scalar if value is an array, repeat it as needed """ if isinstance(shape, int): shape = tuple([shape] * box._AXIS_LEN) if value is not None: if is_scalar(value): if value == "empty": arr = None dtype = np.float64 # remove the info axis kwargs.pop(box._info_axis_name, None) else: arr = np.empty(shape, dtype=dtype) arr.fill(value) else: fshape = np.prod(shape) arr = value.ravel() new_shape = fshape / arr.shape[0] if fshape % arr.shape[0] != 0: raise Exception("invalid value passed in construct") arr = np.repeat(arr, new_shape).reshape(shape) else: arr = np.random.default_rng(2).standard_normal(shape) return box(arr, dtype=dtype, **kwargs) class TestGeneric: @pytest.mark.parametrize( "func", [ str.lower, {x: x.lower() for x in list("ABCD")}, Series({x: x.lower() for x in list("ABCD")}), ], ) def test_rename(self, frame_or_series, func): # single axis idx = list("ABCD") for axis in frame_or_series._AXIS_ORDERS: kwargs = {axis: idx} obj = construct(frame_or_series, 4, **kwargs) # rename a single axis result = obj.rename(**{axis: func}) expected = obj.copy() setattr(expected, axis, list("abcd")) tm.assert_equal(result, expected) def test_get_numeric_data(self, frame_or_series): n = 4 kwargs = { frame_or_series._get_axis_name(i): list(range(n)) for i in range(frame_or_series._AXIS_LEN) } # get the numeric data o = construct(frame_or_series, n, **kwargs) result = o._get_numeric_data() tm.assert_equal(result, o) # non-inclusion result = o._get_bool_data() expected = construct(frame_or_series, n, value="empty", **kwargs) if isinstance(o, DataFrame): # preserve columns dtype expected.columns = o.columns[:0] # https://github.com/pandas-dev/pandas/issues/50862 tm.assert_equal(result.reset_index(drop=True), expected) # get the bool data arr = np.array([True, True, False, True]) o = construct(frame_or_series, n, value=arr, **kwargs) result = o._get_numeric_data() tm.assert_equal(result, o) def test_nonzero(self, frame_or_series): # GH 4633 # look at the boolean/nonzero behavior for objects obj = construct(frame_or_series, shape=4) msg = f"The truth value of a {frame_or_series.__name__} is ambiguous" with pytest.raises(ValueError, match=msg): bool(obj == 0) with pytest.raises(ValueError, match=msg): bool(obj == 1) with pytest.raises(ValueError, match=msg): bool(obj) obj = construct(frame_or_series, shape=4, value=1) with pytest.raises(ValueError, match=msg): bool(obj == 0) with pytest.raises(ValueError, match=msg): bool(obj == 1) with pytest.raises(ValueError, match=msg): bool(obj) obj = construct(frame_or_series, shape=4, value=np.nan) with pytest.raises(ValueError, match=msg): bool(obj == 0) with pytest.raises(ValueError, match=msg): bool(obj == 1) with pytest.raises(ValueError, match=msg): bool(obj) # empty obj = construct(frame_or_series, shape=0) with pytest.raises(ValueError, match=msg): bool(obj) # invalid behaviors obj1 = construct(frame_or_series, shape=4, value=1) obj2 = construct(frame_or_series, shape=4, value=1) with pytest.raises(ValueError, match=msg): if obj1: pass with pytest.raises(ValueError, match=msg): obj1 and obj2 with pytest.raises(ValueError, match=msg): obj1 or obj2 with pytest.raises(ValueError, match=msg): not obj1 def test_frame_or_series_compound_dtypes(self, frame_or_series): # see gh-5191 # Compound dtypes should raise NotImplementedError. def f(dtype): return construct(frame_or_series, shape=3, value=1, dtype=dtype) msg = ( "compound dtypes are not implemented " f"in the {frame_or_series.__name__} constructor" ) with pytest.raises(NotImplementedError, match=msg): f([("A", "datetime64[h]"), ("B", "str"), ("C", "int32")]) # these work (though results may be unexpected) f("int64") f("float64") f("M8[ns]") def test_metadata_propagation(self, frame_or_series): # check that the metadata matches up on the resulting ops o = construct(frame_or_series, shape=3) o.name = "foo" o2 = construct(frame_or_series, shape=3) o2.name = "bar" # ---------- # preserving # ---------- # simple ops with scalars for op in ["__add__", "__sub__", "__truediv__", "__mul__"]: result = getattr(o, op)(1) tm.assert_metadata_equivalent(o, result) # ops with like for op in ["__add__", "__sub__", "__truediv__", "__mul__"]: result = getattr(o, op)(o) tm.assert_metadata_equivalent(o, result) # simple boolean for op in ["__eq__", "__le__", "__ge__"]: v1 = getattr(o, op)(o) tm.assert_metadata_equivalent(o, v1) tm.assert_metadata_equivalent(o, v1 & v1) tm.assert_metadata_equivalent(o, v1 | v1) # combine_first result = o.combine_first(o2) tm.assert_metadata_equivalent(o, result) # --------------------------- # non-preserving (by default) # --------------------------- # add non-like result = o + o2 tm.assert_metadata_equivalent(result) # simple boolean for op in ["__eq__", "__le__", "__ge__"]: # this is a name matching op v1 = getattr(o, op)(o) v2 = getattr(o, op)(o2) tm.assert_metadata_equivalent(v2) tm.assert_metadata_equivalent(v1 & v2) tm.assert_metadata_equivalent(v1 | v2) def test_size_compat(self, frame_or_series): # GH8846 # size property should be defined o = construct(frame_or_series, shape=10) assert o.size == np.prod(o.shape) assert o.size == 10 ** len(o.axes) def test_split_compat(self, frame_or_series): # xref GH8846 o = construct(frame_or_series, shape=10) with tm.assert_produces_warning( FutureWarning, match=".swapaxes' is deprecated", check_stacklevel=False ): assert len(np.array_split(o, 5)) == 5 assert len(np.array_split(o, 2)) == 2 # See gh-12301 def test_stat_unexpected_keyword(self, frame_or_series): obj = construct(frame_or_series, 5) starwars = "Star Wars" errmsg = "unexpected keyword" with pytest.raises(TypeError, match=errmsg): obj.max(epic=starwars) # stat_function with pytest.raises(TypeError, match=errmsg): obj.var(epic=starwars) # stat_function_ddof with pytest.raises(TypeError, match=errmsg): obj.sum(epic=starwars) # cum_function with pytest.raises(TypeError, match=errmsg): obj.any(epic=starwars) # logical_function @pytest.mark.parametrize("func", ["sum", "cumsum", "any", "var"]) def test_api_compat(self, func, frame_or_series): # GH 12021 # compat for __name__, __qualname__ obj = construct(frame_or_series, 5) f = getattr(obj, func) assert f.__name__ == func assert f.__qualname__.endswith(func) def test_stat_non_defaults_args(self, frame_or_series): obj = construct(frame_or_series, 5) out = np.array([0]) errmsg = "the 'out' parameter is not supported" with pytest.raises(ValueError, match=errmsg): obj.max(out=out) # stat_function with pytest.raises(ValueError, match=errmsg): obj.var(out=out) # stat_function_ddof with pytest.raises(ValueError, match=errmsg): obj.sum(out=out) # cum_function with pytest.raises(ValueError, match=errmsg): obj.any(out=out) # logical_function def test_truncate_out_of_bounds(self, frame_or_series): # GH11382 # small shape = [2000] + ([1] * (frame_or_series._AXIS_LEN - 1)) small = construct(frame_or_series, shape, dtype="int8", value=1) tm.assert_equal(small.truncate(), small) tm.assert_equal(small.truncate(before=0, after=3e3), small) tm.assert_equal(small.truncate(before=-1, after=2e3), small) # big shape = [2_000_000] + ([1] * (frame_or_series._AXIS_LEN - 1)) big = construct(frame_or_series, shape, dtype="int8", value=1) tm.assert_equal(big.truncate(), big) tm.assert_equal(big.truncate(before=0, after=3e6), big) tm.assert_equal(big.truncate(before=-1, after=2e6), big) @pytest.mark.parametrize( "func", [copy, deepcopy, lambda x: x.copy(deep=False), lambda x: x.copy(deep=True)], ) @pytest.mark.parametrize("shape", [0, 1, 2]) def test_copy_and_deepcopy(self, frame_or_series, shape, func): # GH 15444 obj = construct(frame_or_series, shape) obj_copy = func(obj) assert obj_copy is not obj tm.assert_equal(obj_copy, obj) def test_data_deprecated(self, frame_or_series): obj = frame_or_series() msg = "(Series|DataFrame)._data is deprecated" with tm.assert_produces_warning(DeprecationWarning, match=msg): mgr = obj._data assert mgr is obj._mgr class TestNDFrame: # tests that don't fit elsewhere @pytest.mark.parametrize( "ser", [ Series(range(10), dtype=np.float64), Series([str(i) for i in range(10)], dtype=object), ], ) def test_squeeze_series_noop(self, ser): # noop tm.assert_series_equal(ser.squeeze(), ser) def test_squeeze_frame_noop(self): # noop df = DataFrame(np.eye(2)) tm.assert_frame_equal(df.squeeze(), df) def test_squeeze_frame_reindex(self): # squeezing df = DataFrame( np.random.default_rng(2).standard_normal((10, 4)), columns=Index(list("ABCD"), dtype=object), index=date_range("2000-01-01", periods=10, freq="B"), ).reindex(columns=["A"]) tm.assert_series_equal(df.squeeze(), df["A"]) def test_squeeze_0_len_dim(self): # don't fail with 0 length dimensions GH11229 & GH8999 empty_series = Series([], name="five", dtype=np.float64) empty_frame = DataFrame([empty_series]) tm.assert_series_equal(empty_series, empty_series.squeeze()) tm.assert_series_equal(empty_series, empty_frame.squeeze()) def test_squeeze_axis(self): # axis argument df = DataFrame( np.random.default_rng(2).standard_normal((1, 4)), columns=Index(list("ABCD"), dtype=object), index=date_range("2000-01-01", periods=1, freq="B"), ).iloc[:, :1] assert df.shape == (1, 1) tm.assert_series_equal(df.squeeze(axis=0), df.iloc[0]) tm.assert_series_equal(df.squeeze(axis="index"), df.iloc[0]) tm.assert_series_equal(df.squeeze(axis=1), df.iloc[:, 0]) tm.assert_series_equal(df.squeeze(axis="columns"), df.iloc[:, 0]) assert df.squeeze() == df.iloc[0, 0] msg = "No axis named 2 for object type DataFrame" with pytest.raises(ValueError, match=msg): df.squeeze(axis=2) msg = "No axis named x for object type DataFrame" with pytest.raises(ValueError, match=msg): df.squeeze(axis="x") def test_squeeze_axis_len_3(self): df = DataFrame( np.random.default_rng(2).standard_normal((3, 4)), columns=Index(list("ABCD"), dtype=object), index=date_range("2000-01-01", periods=3, freq="B"), ) tm.assert_frame_equal(df.squeeze(axis=0), df) def test_numpy_squeeze(self): s = Series(range(2), dtype=np.float64) tm.assert_series_equal(np.squeeze(s), s) df = DataFrame( np.random.default_rng(2).standard_normal((10, 4)), columns=Index(list("ABCD"), dtype=object), index=date_range("2000-01-01", periods=10, freq="B"), ).reindex(columns=["A"]) tm.assert_series_equal(np.squeeze(df), df["A"]) @pytest.mark.parametrize( "ser", [ Series(range(10), dtype=np.float64), Series([str(i) for i in range(10)], dtype=object), ], ) def test_transpose_series(self, ser): # calls implementation in pandas/core/base.py tm.assert_series_equal(ser.transpose(), ser) def test_transpose_frame(self): df = DataFrame( np.random.default_rng(2).standard_normal((10, 4)), columns=Index(list("ABCD"), dtype=object), index=date_range("2000-01-01", periods=10, freq="B"), ) tm.assert_frame_equal(df.transpose().transpose(), df) def test_numpy_transpose(self, frame_or_series): obj = DataFrame( np.random.default_rng(2).standard_normal((10, 4)), columns=Index(list("ABCD"), dtype=object), index=date_range("2000-01-01", periods=10, freq="B"), ) obj = tm.get_obj(obj, frame_or_series) if frame_or_series is Series: # 1D -> np.transpose is no-op tm.assert_series_equal(np.transpose(obj), obj) # round-trip preserved tm.assert_equal(np.transpose(np.transpose(obj)), obj) msg = "the 'axes' parameter is not supported" with pytest.raises(ValueError, match=msg): np.transpose(obj, axes=1) @pytest.mark.parametrize( "ser", [ Series(range(10), dtype=np.float64), Series([str(i) for i in range(10)], dtype=object), ], ) def test_take_series(self, ser): indices = [1, 5, -2, 6, 3, -1] out = ser.take(indices) expected = Series( data=ser.values.take(indices), index=ser.index.take(indices), dtype=ser.dtype, ) tm.assert_series_equal(out, expected) def test_take_frame(self): indices = [1, 5, -2, 6, 3, -1] df = DataFrame( np.random.default_rng(2).standard_normal((10, 4)), columns=Index(list("ABCD"), dtype=object), index=date_range("2000-01-01", periods=10, freq="B"), ) out = df.take(indices) expected = DataFrame( data=df.values.take(indices, axis=0), index=df.index.take(indices), columns=df.columns, ) tm.assert_frame_equal(out, expected) def test_take_invalid_kwargs(self, frame_or_series): indices = [-3, 2, 0, 1] obj = DataFrame(range(5)) obj = tm.get_obj(obj, frame_or_series) msg = r"take\(\) got an unexpected keyword argument 'foo'" with pytest.raises(TypeError, match=msg): obj.take(indices, foo=2) msg = "the 'out' parameter is not supported" with pytest.raises(ValueError, match=msg): obj.take(indices, out=indices) msg = "the 'mode' parameter is not supported" with pytest.raises(ValueError, match=msg): obj.take(indices, mode="clip") def test_axis_classmethods(self, frame_or_series): box = frame_or_series obj = box(dtype=object) values = box._AXIS_TO_AXIS_NUMBER.keys() for v in values: assert obj._get_axis_number(v) == box._get_axis_number(v) assert obj._get_axis_name(v) == box._get_axis_name(v) assert obj._get_block_manager_axis(v) == box._get_block_manager_axis(v) def test_flags_identity(self, frame_or_series): obj = Series([1, 2]) if frame_or_series is DataFrame: obj = obj.to_frame() assert obj.flags is obj.flags obj2 = obj.copy() assert obj2.flags is not obj.flags def test_bool_dep(self) -> None: # GH-51749 msg_warn = ( "DataFrame.bool is now deprecated and will be removed " "in future version of pandas" ) with tm.assert_produces_warning(FutureWarning, match=msg_warn): DataFrame({"col": [False]}).bool()