import datetime as dt from datetime import datetime import dateutil import numpy as np import pytest import pandas as pd from pandas import ( DataFrame, DatetimeIndex, Index, MultiIndex, Series, Timestamp, concat, date_range, to_timedelta, ) import pandas._testing as tm class TestDatetimeConcat: def test_concat_datetime64_block(self): rng = date_range("1/1/2000", periods=10) df = DataFrame({"time": rng}) result = concat([df, df]) assert (result.iloc[:10]["time"] == rng).all() assert (result.iloc[10:]["time"] == rng).all() def test_concat_datetime_datetime64_frame(self): # GH#2624 rows = [] rows.append([datetime(2010, 1, 1), 1]) rows.append([datetime(2010, 1, 2), "hi"]) df2_obj = DataFrame.from_records(rows, columns=["date", "test"]) ind = date_range(start="2000/1/1", freq="D", periods=10) df1 = DataFrame({"date": ind, "test": range(10)}) # it works! concat([df1, df2_obj]) def test_concat_datetime_timezone(self): # GH 18523 idx1 = date_range("2011-01-01", periods=3, freq="h", tz="Europe/Paris") idx2 = date_range(start=idx1[0], end=idx1[-1], freq="h") df1 = DataFrame({"a": [1, 2, 3]}, index=idx1) df2 = DataFrame({"b": [1, 2, 3]}, index=idx2) result = concat([df1, df2], axis=1) exp_idx = DatetimeIndex( [ "2011-01-01 00:00:00+01:00", "2011-01-01 01:00:00+01:00", "2011-01-01 02:00:00+01:00", ], dtype="M8[ns, Europe/Paris]", freq="h", ) expected = DataFrame( [[1, 1], [2, 2], [3, 3]], index=exp_idx, columns=["a", "b"] ) tm.assert_frame_equal(result, expected) idx3 = date_range("2011-01-01", periods=3, freq="h", tz="Asia/Tokyo") df3 = DataFrame({"b": [1, 2, 3]}, index=idx3) result = concat([df1, df3], axis=1) exp_idx = DatetimeIndex( [ "2010-12-31 15:00:00+00:00", "2010-12-31 16:00:00+00:00", "2010-12-31 17:00:00+00:00", "2010-12-31 23:00:00+00:00", "2011-01-01 00:00:00+00:00", "2011-01-01 01:00:00+00:00", ] ).as_unit("ns") expected = DataFrame( [ [np.nan, 1], [np.nan, 2], [np.nan, 3], [1, np.nan], [2, np.nan], [3, np.nan], ], index=exp_idx, columns=["a", "b"], ) tm.assert_frame_equal(result, expected) # GH 13783: Concat after resample result = concat([df1.resample("h").mean(), df2.resample("h").mean()], sort=True) expected = DataFrame( {"a": [1, 2, 3] + [np.nan] * 3, "b": [np.nan] * 3 + [1, 2, 3]}, index=idx1.append(idx1), ) tm.assert_frame_equal(result, expected) def test_concat_datetimeindex_freq(self): # GH 3232 # Monotonic index result dr = date_range("01-Jan-2013", periods=100, freq="50ms", tz="UTC") data = list(range(100)) expected = DataFrame(data, index=dr) result = concat([expected[:50], expected[50:]]) tm.assert_frame_equal(result, expected) # Non-monotonic index result result = concat([expected[50:], expected[:50]]) expected = DataFrame(data[50:] + data[:50], index=dr[50:].append(dr[:50])) expected.index._data.freq = None tm.assert_frame_equal(result, expected) def test_concat_multiindex_datetime_object_index(self): # https://github.com/pandas-dev/pandas/issues/11058 idx = Index( [dt.date(2013, 1, 1), dt.date(2014, 1, 1), dt.date(2015, 1, 1)], dtype="object", ) s = Series( ["a", "b"], index=MultiIndex.from_arrays( [ [1, 2], idx[:-1], ], names=["first", "second"], ), ) s2 = Series( ["a", "b"], index=MultiIndex.from_arrays( [[1, 2], idx[::2]], names=["first", "second"], ), ) mi = MultiIndex.from_arrays( [[1, 2, 2], idx], names=["first", "second"], ) assert mi.levels[1].dtype == object expected = DataFrame( [["a", "a"], ["b", np.nan], [np.nan, "b"]], index=mi, ) result = concat([s, s2], axis=1) tm.assert_frame_equal(result, expected) def test_concat_NaT_series(self): # GH 11693 # test for merging NaT series with datetime series. x = Series( date_range("20151124 08:00", "20151124 09:00", freq="1h", tz="US/Eastern") ) y = Series(pd.NaT, index=[0, 1], dtype="datetime64[ns, US/Eastern]") expected = Series([x[0], x[1], pd.NaT, pd.NaT]) result = concat([x, y], ignore_index=True) tm.assert_series_equal(result, expected) # all NaT with tz expected = Series(pd.NaT, index=range(4), dtype="datetime64[ns, US/Eastern]") result = concat([y, y], ignore_index=True) tm.assert_series_equal(result, expected) def test_concat_NaT_series2(self): # without tz x = Series(date_range("20151124 08:00", "20151124 09:00", freq="1h")) y = Series(date_range("20151124 10:00", "20151124 11:00", freq="1h")) y[:] = pd.NaT expected = Series([x[0], x[1], pd.NaT, pd.NaT]) result = concat([x, y], ignore_index=True) tm.assert_series_equal(result, expected) # all NaT without tz x[:] = pd.NaT expected = Series(pd.NaT, index=range(4), dtype="datetime64[ns]") result = concat([x, y], ignore_index=True) tm.assert_series_equal(result, expected) @pytest.mark.parametrize("tz", [None, "UTC"]) def test_concat_NaT_dataframes(self, tz): # GH 12396 dti = DatetimeIndex([pd.NaT, pd.NaT], tz=tz) first = DataFrame({0: dti}) second = DataFrame( [[Timestamp("2015/01/01", tz=tz)], [Timestamp("2016/01/01", tz=tz)]], index=[2, 3], ) expected = DataFrame( [ pd.NaT, pd.NaT, Timestamp("2015/01/01", tz=tz), Timestamp("2016/01/01", tz=tz), ] ) result = concat([first, second], axis=0) tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("tz1", [None, "UTC"]) @pytest.mark.parametrize("tz2", [None, "UTC"]) @pytest.mark.parametrize("item", [pd.NaT, Timestamp("20150101")]) def test_concat_NaT_dataframes_all_NaT_axis_0( self, tz1, tz2, item, using_array_manager ): # GH 12396 # tz-naive first = DataFrame([[pd.NaT], [pd.NaT]]).apply(lambda x: x.dt.tz_localize(tz1)) second = DataFrame([item]).apply(lambda x: x.dt.tz_localize(tz2)) result = concat([first, second], axis=0) expected = DataFrame(Series([pd.NaT, pd.NaT, item], index=[0, 1, 0])) expected = expected.apply(lambda x: x.dt.tz_localize(tz2)) if tz1 != tz2: expected = expected.astype(object) if item is pd.NaT and not using_array_manager: # GH#18463 # TODO: setting nan here is to keep the test passing as we # make assert_frame_equal stricter, but is nan really the # ideal behavior here? if tz1 is not None: expected.iloc[-1, 0] = np.nan else: expected.iloc[:-1, 0] = np.nan tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("tz1", [None, "UTC"]) @pytest.mark.parametrize("tz2", [None, "UTC"]) def test_concat_NaT_dataframes_all_NaT_axis_1(self, tz1, tz2): # GH 12396 first = DataFrame(Series([pd.NaT, pd.NaT]).dt.tz_localize(tz1)) second = DataFrame(Series([pd.NaT]).dt.tz_localize(tz2), columns=[1]) expected = DataFrame( { 0: Series([pd.NaT, pd.NaT]).dt.tz_localize(tz1), 1: Series([pd.NaT, pd.NaT]).dt.tz_localize(tz2), } ) result = concat([first, second], axis=1) tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("tz1", [None, "UTC"]) @pytest.mark.parametrize("tz2", [None, "UTC"]) def test_concat_NaT_series_dataframe_all_NaT(self, tz1, tz2): # GH 12396 # tz-naive first = Series([pd.NaT, pd.NaT]).dt.tz_localize(tz1) second = DataFrame( [ [Timestamp("2015/01/01", tz=tz2)], [Timestamp("2016/01/01", tz=tz2)], ], index=[2, 3], ) expected = DataFrame( [ pd.NaT, pd.NaT, Timestamp("2015/01/01", tz=tz2), Timestamp("2016/01/01", tz=tz2), ] ) if tz1 != tz2: expected = expected.astype(object) result = concat([first, second]) tm.assert_frame_equal(result, expected) class TestTimezoneConcat: def test_concat_tz_series(self): # gh-11755: tz and no tz x = Series(date_range("20151124 08:00", "20151124 09:00", freq="1h", tz="UTC")) y = Series(date_range("2012-01-01", "2012-01-02")) expected = Series([x[0], x[1], y[0], y[1]], dtype="object") result = concat([x, y], ignore_index=True) tm.assert_series_equal(result, expected) def test_concat_tz_series2(self): # gh-11887: concat tz and object x = Series(date_range("20151124 08:00", "20151124 09:00", freq="1h", tz="UTC")) y = Series(["a", "b"]) expected = Series([x[0], x[1], y[0], y[1]], dtype="object") result = concat([x, y], ignore_index=True) tm.assert_series_equal(result, expected) def test_concat_tz_series3(self, unit, unit2): # see gh-12217 and gh-12306 # Concatenating two UTC times first = DataFrame([[datetime(2016, 1, 1)]], dtype=f"M8[{unit}]") first[0] = first[0].dt.tz_localize("UTC") second = DataFrame([[datetime(2016, 1, 2)]], dtype=f"M8[{unit2}]") second[0] = second[0].dt.tz_localize("UTC") result = concat([first, second]) exp_unit = tm.get_finest_unit(unit, unit2) assert result[0].dtype == f"datetime64[{exp_unit}, UTC]" def test_concat_tz_series4(self, unit, unit2): # Concatenating two London times first = DataFrame([[datetime(2016, 1, 1)]], dtype=f"M8[{unit}]") first[0] = first[0].dt.tz_localize("Europe/London") second = DataFrame([[datetime(2016, 1, 2)]], dtype=f"M8[{unit2}]") second[0] = second[0].dt.tz_localize("Europe/London") result = concat([first, second]) exp_unit = tm.get_finest_unit(unit, unit2) assert result[0].dtype == f"datetime64[{exp_unit}, Europe/London]" def test_concat_tz_series5(self, unit, unit2): # Concatenating 2+1 London times first = DataFrame( [[datetime(2016, 1, 1)], [datetime(2016, 1, 2)]], dtype=f"M8[{unit}]" ) first[0] = first[0].dt.tz_localize("Europe/London") second = DataFrame([[datetime(2016, 1, 3)]], dtype=f"M8[{unit2}]") second[0] = second[0].dt.tz_localize("Europe/London") result = concat([first, second]) exp_unit = tm.get_finest_unit(unit, unit2) assert result[0].dtype == f"datetime64[{exp_unit}, Europe/London]" def test_concat_tz_series6(self, unit, unit2): # Concatenating 1+2 London times first = DataFrame([[datetime(2016, 1, 1)]], dtype=f"M8[{unit}]") first[0] = first[0].dt.tz_localize("Europe/London") second = DataFrame( [[datetime(2016, 1, 2)], [datetime(2016, 1, 3)]], dtype=f"M8[{unit2}]" ) second[0] = second[0].dt.tz_localize("Europe/London") result = concat([first, second]) exp_unit = tm.get_finest_unit(unit, unit2) assert result[0].dtype == f"datetime64[{exp_unit}, Europe/London]" def test_concat_tz_series_tzlocal(self): # see gh-13583 x = [ Timestamp("2011-01-01", tz=dateutil.tz.tzlocal()), Timestamp("2011-02-01", tz=dateutil.tz.tzlocal()), ] y = [ Timestamp("2012-01-01", tz=dateutil.tz.tzlocal()), Timestamp("2012-02-01", tz=dateutil.tz.tzlocal()), ] result = concat([Series(x), Series(y)], ignore_index=True) tm.assert_series_equal(result, Series(x + y)) assert result.dtype == "datetime64[ns, tzlocal()]" def test_concat_tz_series_with_datetimelike(self): # see gh-12620: tz and timedelta x = [ Timestamp("2011-01-01", tz="US/Eastern"), Timestamp("2011-02-01", tz="US/Eastern"), ] y = [pd.Timedelta("1 day"), pd.Timedelta("2 day")] result = concat([Series(x), Series(y)], ignore_index=True) tm.assert_series_equal(result, Series(x + y, dtype="object")) # tz and period y = [pd.Period("2011-03", freq="M"), pd.Period("2011-04", freq="M")] result = concat([Series(x), Series(y)], ignore_index=True) tm.assert_series_equal(result, Series(x + y, dtype="object")) def test_concat_tz_frame(self): df2 = DataFrame( { "A": Timestamp("20130102", tz="US/Eastern"), "B": Timestamp("20130603", tz="CET"), }, index=range(5), ) # concat df3 = concat([df2.A.to_frame(), df2.B.to_frame()], axis=1) tm.assert_frame_equal(df2, df3) def test_concat_multiple_tzs(self): # GH#12467 # combining datetime tz-aware and naive DataFrames ts1 = Timestamp("2015-01-01", tz=None) ts2 = Timestamp("2015-01-01", tz="UTC") ts3 = Timestamp("2015-01-01", tz="EST") df1 = DataFrame({"time": [ts1]}) df2 = DataFrame({"time": [ts2]}) df3 = DataFrame({"time": [ts3]}) results = concat([df1, df2]).reset_index(drop=True) expected = DataFrame({"time": [ts1, ts2]}, dtype=object) tm.assert_frame_equal(results, expected) results = concat([df1, df3]).reset_index(drop=True) expected = DataFrame({"time": [ts1, ts3]}, dtype=object) tm.assert_frame_equal(results, expected) results = concat([df2, df3]).reset_index(drop=True) expected = DataFrame({"time": [ts2, ts3]}) tm.assert_frame_equal(results, expected) def test_concat_multiindex_with_tz(self): # GH 6606 df = DataFrame( { "dt": DatetimeIndex( [ datetime(2014, 1, 1), datetime(2014, 1, 2), datetime(2014, 1, 3), ], dtype="M8[ns, US/Pacific]", ), "b": ["A", "B", "C"], "c": [1, 2, 3], "d": [4, 5, 6], } ) df = df.set_index(["dt", "b"]) exp_idx1 = DatetimeIndex( ["2014-01-01", "2014-01-02", "2014-01-03"] * 2, dtype="M8[ns, US/Pacific]", name="dt", ) exp_idx2 = Index(["A", "B", "C"] * 2, name="b") exp_idx = MultiIndex.from_arrays([exp_idx1, exp_idx2]) expected = DataFrame( {"c": [1, 2, 3] * 2, "d": [4, 5, 6] * 2}, index=exp_idx, columns=["c", "d"] ) result = concat([df, df]) tm.assert_frame_equal(result, expected) def test_concat_tz_not_aligned(self): # GH#22796 ts = pd.to_datetime([1, 2]).tz_localize("UTC") a = DataFrame({"A": ts}) b = DataFrame({"A": ts, "B": ts}) result = concat([a, b], sort=True, ignore_index=True) expected = DataFrame( {"A": list(ts) + list(ts), "B": [pd.NaT, pd.NaT] + list(ts)} ) tm.assert_frame_equal(result, expected) @pytest.mark.parametrize( "t1", [ "2015-01-01", pytest.param( pd.NaT, marks=pytest.mark.xfail( reason="GH23037 incorrect dtype when concatenating" ), ), ], ) def test_concat_tz_NaT(self, t1): # GH#22796 # Concatenating tz-aware multicolumn DataFrames ts1 = Timestamp(t1, tz="UTC") ts2 = Timestamp("2015-01-01", tz="UTC") ts3 = Timestamp("2015-01-01", tz="UTC") df1 = DataFrame([[ts1, ts2]]) df2 = DataFrame([[ts3]]) result = concat([df1, df2]) expected = DataFrame([[ts1, ts2], [ts3, pd.NaT]], index=[0, 0]) tm.assert_frame_equal(result, expected) def test_concat_tz_with_empty(self): # GH 9188 result = concat( [DataFrame(date_range("2000", periods=1, tz="UTC")), DataFrame()] ) expected = DataFrame(date_range("2000", periods=1, tz="UTC")) tm.assert_frame_equal(result, expected) class TestPeriodConcat: def test_concat_period_series(self): x = Series(pd.PeriodIndex(["2015-11-01", "2015-12-01"], freq="D")) y = Series(pd.PeriodIndex(["2015-10-01", "2016-01-01"], freq="D")) expected = Series([x[0], x[1], y[0], y[1]], dtype="Period[D]") result = concat([x, y], ignore_index=True) tm.assert_series_equal(result, expected) def test_concat_period_multiple_freq_series(self): x = Series(pd.PeriodIndex(["2015-11-01", "2015-12-01"], freq="D")) y = Series(pd.PeriodIndex(["2015-10-01", "2016-01-01"], freq="M")) expected = Series([x[0], x[1], y[0], y[1]], dtype="object") result = concat([x, y], ignore_index=True) tm.assert_series_equal(result, expected) assert result.dtype == "object" def test_concat_period_other_series(self): x = Series(pd.PeriodIndex(["2015-11-01", "2015-12-01"], freq="D")) y = Series(pd.PeriodIndex(["2015-11-01", "2015-12-01"], freq="M")) expected = Series([x[0], x[1], y[0], y[1]], dtype="object") result = concat([x, y], ignore_index=True) tm.assert_series_equal(result, expected) assert result.dtype == "object" def test_concat_period_other_series2(self): # non-period x = Series(pd.PeriodIndex(["2015-11-01", "2015-12-01"], freq="D")) y = Series(DatetimeIndex(["2015-11-01", "2015-12-01"])) expected = Series([x[0], x[1], y[0], y[1]], dtype="object") result = concat([x, y], ignore_index=True) tm.assert_series_equal(result, expected) assert result.dtype == "object" def test_concat_period_other_series3(self): x = Series(pd.PeriodIndex(["2015-11-01", "2015-12-01"], freq="D")) y = Series(["A", "B"]) expected = Series([x[0], x[1], y[0], y[1]], dtype="object") result = concat([x, y], ignore_index=True) tm.assert_series_equal(result, expected) assert result.dtype == "object" def test_concat_timedelta64_block(): rng = to_timedelta(np.arange(10), unit="s") df = DataFrame({"time": rng}) result = concat([df, df]) tm.assert_frame_equal(result.iloc[:10], df) tm.assert_frame_equal(result.iloc[10:], df) def test_concat_multiindex_datetime_nat(): # GH#44900 left = DataFrame({"a": 1}, index=MultiIndex.from_tuples([(1, pd.NaT)])) right = DataFrame( {"b": 2}, index=MultiIndex.from_tuples([(1, pd.NaT), (2, pd.NaT)]) ) result = concat([left, right], axis="columns") expected = DataFrame( {"a": [1.0, np.nan], "b": 2}, MultiIndex.from_tuples([(1, pd.NaT), (2, pd.NaT)]) ) tm.assert_frame_equal(result, expected) def test_concat_float_datetime64(using_array_manager): # GH#32934 df_time = DataFrame({"A": pd.array(["2000"], dtype="datetime64[ns]")}) df_float = DataFrame({"A": pd.array([1.0], dtype="float64")}) expected = DataFrame( { "A": [ pd.array(["2000"], dtype="datetime64[ns]")[0], pd.array([1.0], dtype="float64")[0], ] }, index=[0, 0], ) result = concat([df_time, df_float]) tm.assert_frame_equal(result, expected) expected = DataFrame({"A": pd.array([], dtype="object")}) result = concat([df_time.iloc[:0], df_float.iloc[:0]]) tm.assert_frame_equal(result, expected) expected = DataFrame({"A": pd.array([1.0], dtype="object")}) result = concat([df_time.iloc[:0], df_float]) tm.assert_frame_equal(result, expected) if not using_array_manager: expected = DataFrame({"A": pd.array(["2000"], dtype="datetime64[ns]")}) msg = "The behavior of DataFrame concatenation with empty or all-NA entries" with tm.assert_produces_warning(FutureWarning, match=msg): result = concat([df_time, df_float.iloc[:0]]) tm.assert_frame_equal(result, expected) else: expected = DataFrame({"A": pd.array(["2000"], dtype="datetime64[ns]")}).astype( {"A": "object"} ) result = concat([df_time, df_float.iloc[:0]]) tm.assert_frame_equal(result, expected)