371 lines
		
	
	
		
			11 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			371 lines
		
	
	
		
			11 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
from datetime import (
 | 
						|
    date,
 | 
						|
    timedelta,
 | 
						|
)
 | 
						|
 | 
						|
import numpy as np
 | 
						|
import pytest
 | 
						|
 | 
						|
from pandas._libs.tslibs.timezones import maybe_get_tz
 | 
						|
import pandas.util._test_decorators as td
 | 
						|
 | 
						|
import pandas as pd
 | 
						|
from pandas import (
 | 
						|
    DataFrame,
 | 
						|
    DatetimeIndex,
 | 
						|
    Series,
 | 
						|
    Timestamp,
 | 
						|
    date_range,
 | 
						|
)
 | 
						|
import pandas._testing as tm
 | 
						|
from pandas.tests.io.pytables.common import (
 | 
						|
    _maybe_remove,
 | 
						|
    ensure_clean_path,
 | 
						|
    ensure_clean_store,
 | 
						|
)
 | 
						|
 | 
						|
 | 
						|
def _compare_with_tz(a, b):
 | 
						|
    tm.assert_frame_equal(a, b)
 | 
						|
 | 
						|
    # compare the zones on each element
 | 
						|
    for c in a.columns:
 | 
						|
        for i in a.index:
 | 
						|
            a_e = a.loc[i, c]
 | 
						|
            b_e = b.loc[i, c]
 | 
						|
            if not (a_e == b_e and a_e.tz == b_e.tz):
 | 
						|
                raise AssertionError(f"invalid tz comparison [{a_e}] [{b_e}]")
 | 
						|
 | 
						|
 | 
						|
# use maybe_get_tz instead of dateutil.tz.gettz to handle the windows
 | 
						|
# filename issues.
 | 
						|
gettz_dateutil = lambda x: maybe_get_tz("dateutil/" + x)
 | 
						|
gettz_pytz = lambda x: x
 | 
						|
 | 
						|
 | 
						|
@pytest.mark.parametrize("gettz", [gettz_dateutil, gettz_pytz])
 | 
						|
def test_append_with_timezones(setup_path, gettz):
 | 
						|
    # as columns
 | 
						|
 | 
						|
    # Single-tzinfo, no DST transition
 | 
						|
    df_est = DataFrame(
 | 
						|
        {
 | 
						|
            "A": [
 | 
						|
                Timestamp("20130102 2:00:00", tz=gettz("US/Eastern"))
 | 
						|
                + timedelta(hours=1) * i
 | 
						|
                for i in range(5)
 | 
						|
            ]
 | 
						|
        }
 | 
						|
    )
 | 
						|
 | 
						|
    # frame with all columns having same tzinfo, but different sides
 | 
						|
    #  of DST transition
 | 
						|
    df_crosses_dst = DataFrame(
 | 
						|
        {
 | 
						|
            "A": Timestamp("20130102", tz=gettz("US/Eastern")),
 | 
						|
            "B": Timestamp("20130603", tz=gettz("US/Eastern")),
 | 
						|
        },
 | 
						|
        index=range(5),
 | 
						|
    )
 | 
						|
 | 
						|
    df_mixed_tz = DataFrame(
 | 
						|
        {
 | 
						|
            "A": Timestamp("20130102", tz=gettz("US/Eastern")),
 | 
						|
            "B": Timestamp("20130102", tz=gettz("EET")),
 | 
						|
        },
 | 
						|
        index=range(5),
 | 
						|
    )
 | 
						|
 | 
						|
    df_different_tz = DataFrame(
 | 
						|
        {
 | 
						|
            "A": Timestamp("20130102", tz=gettz("US/Eastern")),
 | 
						|
            "B": Timestamp("20130102", tz=gettz("CET")),
 | 
						|
        },
 | 
						|
        index=range(5),
 | 
						|
    )
 | 
						|
 | 
						|
    with ensure_clean_store(setup_path) as store:
 | 
						|
 | 
						|
        _maybe_remove(store, "df_tz")
 | 
						|
        store.append("df_tz", df_est, data_columns=["A"])
 | 
						|
        result = store["df_tz"]
 | 
						|
        _compare_with_tz(result, df_est)
 | 
						|
        tm.assert_frame_equal(result, df_est)
 | 
						|
 | 
						|
        # select with tz aware
 | 
						|
        expected = df_est[df_est.A >= df_est.A[3]]
 | 
						|
        result = store.select("df_tz", where="A>=df_est.A[3]")
 | 
						|
        _compare_with_tz(result, expected)
 | 
						|
 | 
						|
        # ensure we include dates in DST and STD time here.
 | 
						|
        _maybe_remove(store, "df_tz")
 | 
						|
        store.append("df_tz", df_crosses_dst)
 | 
						|
        result = store["df_tz"]
 | 
						|
        _compare_with_tz(result, df_crosses_dst)
 | 
						|
        tm.assert_frame_equal(result, df_crosses_dst)
 | 
						|
 | 
						|
        msg = (
 | 
						|
            r"invalid info for \[values_block_1\] for \[tz\], "
 | 
						|
            r"existing_value \[(dateutil/.*)?US/Eastern\] "
 | 
						|
            r"conflicts with new value \[(dateutil/.*)?EET\]"
 | 
						|
        )
 | 
						|
        with pytest.raises(ValueError, match=msg):
 | 
						|
            store.append("df_tz", df_mixed_tz)
 | 
						|
 | 
						|
        # this is ok
 | 
						|
        _maybe_remove(store, "df_tz")
 | 
						|
        store.append("df_tz", df_mixed_tz, data_columns=["A", "B"])
 | 
						|
        result = store["df_tz"]
 | 
						|
        _compare_with_tz(result, df_mixed_tz)
 | 
						|
        tm.assert_frame_equal(result, df_mixed_tz)
 | 
						|
 | 
						|
        # can't append with diff timezone
 | 
						|
        msg = (
 | 
						|
            r"invalid info for \[B\] for \[tz\], "
 | 
						|
            r"existing_value \[(dateutil/.*)?EET\] "
 | 
						|
            r"conflicts with new value \[(dateutil/.*)?CET\]"
 | 
						|
        )
 | 
						|
        with pytest.raises(ValueError, match=msg):
 | 
						|
            store.append("df_tz", df_different_tz)
 | 
						|
 | 
						|
 | 
						|
@pytest.mark.parametrize("gettz", [gettz_dateutil, gettz_pytz])
 | 
						|
def test_append_with_timezones_as_index(setup_path, gettz):
 | 
						|
    # GH#4098 example
 | 
						|
 | 
						|
    dti = date_range("2000-1-1", periods=3, freq="H", tz=gettz("US/Eastern"))
 | 
						|
    dti = dti._with_freq(None)  # freq doesn't round-trip
 | 
						|
 | 
						|
    df = DataFrame({"A": Series(range(3), index=dti)})
 | 
						|
 | 
						|
    with ensure_clean_store(setup_path) as store:
 | 
						|
 | 
						|
        _maybe_remove(store, "df")
 | 
						|
        store.put("df", df)
 | 
						|
        result = store.select("df")
 | 
						|
        tm.assert_frame_equal(result, df)
 | 
						|
 | 
						|
        _maybe_remove(store, "df")
 | 
						|
        store.append("df", df)
 | 
						|
        result = store.select("df")
 | 
						|
        tm.assert_frame_equal(result, df)
 | 
						|
 | 
						|
 | 
						|
def test_roundtrip_tz_aware_index(setup_path):
 | 
						|
    # GH 17618
 | 
						|
    time = Timestamp("2000-01-01 01:00:00", tz="US/Eastern")
 | 
						|
    df = DataFrame(data=[0], index=[time])
 | 
						|
 | 
						|
    with ensure_clean_store(setup_path) as store:
 | 
						|
        store.put("frame", df, format="fixed")
 | 
						|
        recons = store["frame"]
 | 
						|
        tm.assert_frame_equal(recons, df)
 | 
						|
        assert recons.index[0].value == 946706400000000000
 | 
						|
 | 
						|
 | 
						|
def test_store_index_name_with_tz(setup_path):
 | 
						|
    # GH 13884
 | 
						|
    df = DataFrame({"A": [1, 2]})
 | 
						|
    df.index = DatetimeIndex([1234567890123456787, 1234567890123456788])
 | 
						|
    df.index = df.index.tz_localize("UTC")
 | 
						|
    df.index.name = "foo"
 | 
						|
 | 
						|
    with ensure_clean_store(setup_path) as store:
 | 
						|
        store.put("frame", df, format="table")
 | 
						|
        recons = store["frame"]
 | 
						|
        tm.assert_frame_equal(recons, df)
 | 
						|
 | 
						|
 | 
						|
def test_tseries_select_index_column(setup_path):
 | 
						|
    # GH7777
 | 
						|
    # selecting a UTC datetimeindex column did
 | 
						|
    # not preserve UTC tzinfo set before storing
 | 
						|
 | 
						|
    # check that no tz still works
 | 
						|
    rng = date_range("1/1/2000", "1/30/2000")
 | 
						|
    frame = DataFrame(np.random.randn(len(rng), 4), index=rng)
 | 
						|
 | 
						|
    with ensure_clean_store(setup_path) as store:
 | 
						|
        store.append("frame", frame)
 | 
						|
        result = store.select_column("frame", "index")
 | 
						|
        assert rng.tz == DatetimeIndex(result.values).tz
 | 
						|
 | 
						|
    # check utc
 | 
						|
    rng = date_range("1/1/2000", "1/30/2000", tz="UTC")
 | 
						|
    frame = DataFrame(np.random.randn(len(rng), 4), index=rng)
 | 
						|
 | 
						|
    with ensure_clean_store(setup_path) as store:
 | 
						|
        store.append("frame", frame)
 | 
						|
        result = store.select_column("frame", "index")
 | 
						|
        assert rng.tz == result.dt.tz
 | 
						|
 | 
						|
    # double check non-utc
 | 
						|
    rng = date_range("1/1/2000", "1/30/2000", tz="US/Eastern")
 | 
						|
    frame = DataFrame(np.random.randn(len(rng), 4), index=rng)
 | 
						|
 | 
						|
    with ensure_clean_store(setup_path) as store:
 | 
						|
        store.append("frame", frame)
 | 
						|
        result = store.select_column("frame", "index")
 | 
						|
        assert rng.tz == result.dt.tz
 | 
						|
 | 
						|
 | 
						|
def test_timezones_fixed_format_frame_non_empty(setup_path):
 | 
						|
    with ensure_clean_store(setup_path) as store:
 | 
						|
 | 
						|
        # index
 | 
						|
        rng = date_range("1/1/2000", "1/30/2000", tz="US/Eastern")
 | 
						|
        rng = rng._with_freq(None)  # freq doesn't round-trip
 | 
						|
        df = DataFrame(np.random.randn(len(rng), 4), index=rng)
 | 
						|
        store["df"] = df
 | 
						|
        result = store["df"]
 | 
						|
        tm.assert_frame_equal(result, df)
 | 
						|
 | 
						|
        # as data
 | 
						|
        # GH11411
 | 
						|
        _maybe_remove(store, "df")
 | 
						|
        df = DataFrame(
 | 
						|
            {
 | 
						|
                "A": rng,
 | 
						|
                "B": rng.tz_convert("UTC").tz_localize(None),
 | 
						|
                "C": rng.tz_convert("CET"),
 | 
						|
                "D": range(len(rng)),
 | 
						|
            },
 | 
						|
            index=rng,
 | 
						|
        )
 | 
						|
        store["df"] = df
 | 
						|
        result = store["df"]
 | 
						|
        tm.assert_frame_equal(result, df)
 | 
						|
 | 
						|
 | 
						|
def test_timezones_fixed_format_empty(setup_path, tz_aware_fixture, frame_or_series):
 | 
						|
    # GH 20594
 | 
						|
 | 
						|
    dtype = pd.DatetimeTZDtype(tz=tz_aware_fixture)
 | 
						|
 | 
						|
    obj = Series(dtype=dtype, name="A")
 | 
						|
    if frame_or_series is DataFrame:
 | 
						|
        obj = obj.to_frame()
 | 
						|
 | 
						|
    with ensure_clean_store(setup_path) as store:
 | 
						|
        store["obj"] = obj
 | 
						|
        result = store["obj"]
 | 
						|
        tm.assert_equal(result, obj)
 | 
						|
 | 
						|
 | 
						|
def test_timezones_fixed_format_series_nonempty(setup_path, tz_aware_fixture):
 | 
						|
    # GH 20594
 | 
						|
 | 
						|
    dtype = pd.DatetimeTZDtype(tz=tz_aware_fixture)
 | 
						|
 | 
						|
    with ensure_clean_store(setup_path) as store:
 | 
						|
        s = Series([0], dtype=dtype)
 | 
						|
        store["s"] = s
 | 
						|
        result = store["s"]
 | 
						|
        tm.assert_series_equal(result, s)
 | 
						|
 | 
						|
 | 
						|
def test_fixed_offset_tz(setup_path):
 | 
						|
    rng = date_range("1/1/2000 00:00:00-07:00", "1/30/2000 00:00:00-07:00")
 | 
						|
    frame = DataFrame(np.random.randn(len(rng), 4), index=rng)
 | 
						|
 | 
						|
    with ensure_clean_store(setup_path) as store:
 | 
						|
        store["frame"] = frame
 | 
						|
        recons = store["frame"]
 | 
						|
        tm.assert_index_equal(recons.index, rng)
 | 
						|
        assert rng.tz == recons.index.tz
 | 
						|
 | 
						|
 | 
						|
@td.skip_if_windows
 | 
						|
def test_store_timezone(setup_path):
 | 
						|
    # GH2852
 | 
						|
    # issue storing datetime.date with a timezone as it resets when read
 | 
						|
    # back in a new timezone
 | 
						|
 | 
						|
    # original method
 | 
						|
    with ensure_clean_store(setup_path) as store:
 | 
						|
 | 
						|
        today = date(2013, 9, 10)
 | 
						|
        df = DataFrame([1, 2, 3], index=[today, today, today])
 | 
						|
        store["obj1"] = df
 | 
						|
        result = store["obj1"]
 | 
						|
        tm.assert_frame_equal(result, df)
 | 
						|
 | 
						|
    # with tz setting
 | 
						|
    with ensure_clean_store(setup_path) as store:
 | 
						|
 | 
						|
        with tm.set_timezone("EST5EDT"):
 | 
						|
            today = date(2013, 9, 10)
 | 
						|
            df = DataFrame([1, 2, 3], index=[today, today, today])
 | 
						|
            store["obj1"] = df
 | 
						|
 | 
						|
        with tm.set_timezone("CST6CDT"):
 | 
						|
            result = store["obj1"]
 | 
						|
 | 
						|
        tm.assert_frame_equal(result, df)
 | 
						|
 | 
						|
 | 
						|
def test_legacy_datetimetz_object(datapath, setup_path):
 | 
						|
    # legacy from < 0.17.0
 | 
						|
    # 8260
 | 
						|
    expected = DataFrame(
 | 
						|
        {
 | 
						|
            "A": Timestamp("20130102", tz="US/Eastern"),
 | 
						|
            "B": Timestamp("20130603", tz="CET"),
 | 
						|
        },
 | 
						|
        index=range(5),
 | 
						|
    )
 | 
						|
    with ensure_clean_store(
 | 
						|
        datapath("io", "data", "legacy_hdf", "datetimetz_object.h5"), mode="r"
 | 
						|
    ) as store:
 | 
						|
        result = store["df"]
 | 
						|
        tm.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
 | 
						|
def test_dst_transitions(setup_path):
 | 
						|
    # make sure we are not failing on transitions
 | 
						|
    with ensure_clean_store(setup_path) as store:
 | 
						|
        times = date_range(
 | 
						|
            "2013-10-26 23:00",
 | 
						|
            "2013-10-27 01:00",
 | 
						|
            tz="Europe/London",
 | 
						|
            freq="H",
 | 
						|
            ambiguous="infer",
 | 
						|
        )
 | 
						|
        times = times._with_freq(None)  # freq doesn't round-trip
 | 
						|
 | 
						|
        for i in [times, times + pd.Timedelta("10min")]:
 | 
						|
            _maybe_remove(store, "df")
 | 
						|
            df = DataFrame({"A": range(len(i)), "B": i}, index=i)
 | 
						|
            store.append("df", df)
 | 
						|
            result = store.select("df")
 | 
						|
            tm.assert_frame_equal(result, df)
 | 
						|
 | 
						|
 | 
						|
def test_read_with_where_tz_aware_index(setup_path):
 | 
						|
    # GH 11926
 | 
						|
    periods = 10
 | 
						|
    dts = date_range("20151201", periods=periods, freq="D", tz="UTC")
 | 
						|
    mi = pd.MultiIndex.from_arrays([dts, range(periods)], names=["DATE", "NO"])
 | 
						|
    expected = DataFrame({"MYCOL": 0}, index=mi)
 | 
						|
 | 
						|
    key = "mykey"
 | 
						|
    with ensure_clean_path(setup_path) as path:
 | 
						|
        with pd.HDFStore(path) as store:
 | 
						|
            store.append(key, expected, format="table", append=True)
 | 
						|
        result = pd.read_hdf(path, key, where="DATE > 20151130")
 | 
						|
        tm.assert_frame_equal(result, expected)
 | 
						|
 | 
						|
 | 
						|
def test_py2_created_with_datetimez(datapath, setup_path):
 | 
						|
    # The test HDF5 file was created in Python 2, but could not be read in
 | 
						|
    # Python 3.
 | 
						|
    #
 | 
						|
    # GH26443
 | 
						|
    index = [Timestamp("2019-01-01T18:00").tz_localize("America/New_York")]
 | 
						|
    expected = DataFrame({"data": 123}, index=index)
 | 
						|
    with ensure_clean_store(
 | 
						|
        datapath("io", "data", "legacy_hdf", "gh26443.h5"), mode="r"
 | 
						|
    ) as store:
 | 
						|
        result = store["key"]
 | 
						|
        tm.assert_frame_equal(result, expected)
 |