375 lines
		
	
	
		
			11 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			375 lines
		
	
	
		
			11 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
import datetime
 | 
						|
import re
 | 
						|
from warnings import (
 | 
						|
    catch_warnings,
 | 
						|
    simplefilter,
 | 
						|
)
 | 
						|
 | 
						|
import numpy as np
 | 
						|
import pytest
 | 
						|
 | 
						|
from pandas._libs.tslibs import Timestamp
 | 
						|
 | 
						|
import pandas as pd
 | 
						|
from pandas import (
 | 
						|
    DataFrame,
 | 
						|
    HDFStore,
 | 
						|
    Index,
 | 
						|
    MultiIndex,
 | 
						|
    RangeIndex,
 | 
						|
    Series,
 | 
						|
    _testing as tm,
 | 
						|
    concat,
 | 
						|
)
 | 
						|
from pandas.core.api import Int64Index
 | 
						|
from pandas.tests.io.pytables.common import (
 | 
						|
    _maybe_remove,
 | 
						|
    ensure_clean_path,
 | 
						|
    ensure_clean_store,
 | 
						|
)
 | 
						|
from pandas.util import _test_decorators as td
 | 
						|
 | 
						|
pytestmark = pytest.mark.single_cpu
 | 
						|
 | 
						|
 | 
						|
def test_format_type(setup_path):
 | 
						|
    df = DataFrame({"A": [1, 2]})
 | 
						|
    with ensure_clean_path(setup_path) as path:
 | 
						|
        with HDFStore(path) as store:
 | 
						|
            store.put("a", df, format="fixed")
 | 
						|
            store.put("b", df, format="table")
 | 
						|
 | 
						|
            assert store.get_storer("a").format_type == "fixed"
 | 
						|
            assert store.get_storer("b").format_type == "table"
 | 
						|
 | 
						|
 | 
						|
def test_format_kwarg_in_constructor(setup_path):
 | 
						|
    # GH 13291
 | 
						|
 | 
						|
    msg = "format is not a defined argument for HDFStore"
 | 
						|
 | 
						|
    with tm.ensure_clean(setup_path) as path:
 | 
						|
        with pytest.raises(ValueError, match=msg):
 | 
						|
            HDFStore(path, format="table")
 | 
						|
 | 
						|
 | 
						|
def test_api_default_format(setup_path):
 | 
						|
 | 
						|
    # default_format option
 | 
						|
    with ensure_clean_store(setup_path) as store:
 | 
						|
        df = tm.makeDataFrame()
 | 
						|
 | 
						|
        pd.set_option("io.hdf.default_format", "fixed")
 | 
						|
        _maybe_remove(store, "df")
 | 
						|
        store.put("df", df)
 | 
						|
        assert not store.get_storer("df").is_table
 | 
						|
 | 
						|
        msg = "Can only append to Tables"
 | 
						|
 | 
						|
        with pytest.raises(ValueError, match=msg):
 | 
						|
            store.append("df2", df)
 | 
						|
 | 
						|
        pd.set_option("io.hdf.default_format", "table")
 | 
						|
        _maybe_remove(store, "df")
 | 
						|
        store.put("df", df)
 | 
						|
        assert store.get_storer("df").is_table
 | 
						|
        _maybe_remove(store, "df2")
 | 
						|
        store.append("df2", df)
 | 
						|
        assert store.get_storer("df").is_table
 | 
						|
 | 
						|
        pd.set_option("io.hdf.default_format", None)
 | 
						|
 | 
						|
    with ensure_clean_path(setup_path) as path:
 | 
						|
 | 
						|
        df = tm.makeDataFrame()
 | 
						|
 | 
						|
        pd.set_option("io.hdf.default_format", "fixed")
 | 
						|
        df.to_hdf(path, "df")
 | 
						|
        with HDFStore(path) as store:
 | 
						|
            assert not store.get_storer("df").is_table
 | 
						|
        with pytest.raises(ValueError, match=msg):
 | 
						|
            df.to_hdf(path, "df2", append=True)
 | 
						|
 | 
						|
        pd.set_option("io.hdf.default_format", "table")
 | 
						|
        df.to_hdf(path, "df3")
 | 
						|
        with HDFStore(path) as store:
 | 
						|
            assert store.get_storer("df3").is_table
 | 
						|
        df.to_hdf(path, "df4", append=True)
 | 
						|
        with HDFStore(path) as store:
 | 
						|
            assert store.get_storer("df4").is_table
 | 
						|
 | 
						|
        pd.set_option("io.hdf.default_format", None)
 | 
						|
 | 
						|
 | 
						|
def test_put(setup_path):
 | 
						|
 | 
						|
    with ensure_clean_store(setup_path) as store:
 | 
						|
 | 
						|
        ts = tm.makeTimeSeries()
 | 
						|
        df = tm.makeTimeDataFrame()
 | 
						|
        store["a"] = ts
 | 
						|
        store["b"] = df[:10]
 | 
						|
        store["foo/bar/bah"] = df[:10]
 | 
						|
        store["foo"] = df[:10]
 | 
						|
        store["/foo"] = df[:10]
 | 
						|
        store.put("c", df[:10], format="table")
 | 
						|
 | 
						|
        # not OK, not a table
 | 
						|
        msg = "Can only append to Tables"
 | 
						|
        with pytest.raises(ValueError, match=msg):
 | 
						|
            store.put("b", df[10:], append=True)
 | 
						|
 | 
						|
        # node does not currently exist, test _is_table_type returns False
 | 
						|
        # in this case
 | 
						|
        _maybe_remove(store, "f")
 | 
						|
        with pytest.raises(ValueError, match=msg):
 | 
						|
            store.put("f", df[10:], append=True)
 | 
						|
 | 
						|
        # can't put to a table (use append instead)
 | 
						|
        with pytest.raises(ValueError, match=msg):
 | 
						|
            store.put("c", df[10:], append=True)
 | 
						|
 | 
						|
        # overwrite table
 | 
						|
        store.put("c", df[:10], format="table", append=False)
 | 
						|
        tm.assert_frame_equal(df[:10], store["c"])
 | 
						|
 | 
						|
 | 
						|
def test_put_string_index(setup_path):
 | 
						|
 | 
						|
    with ensure_clean_store(setup_path) as store:
 | 
						|
 | 
						|
        index = Index([f"I am a very long string index: {i}" for i in range(20)])
 | 
						|
        s = Series(np.arange(20), index=index)
 | 
						|
        df = DataFrame({"A": s, "B": s})
 | 
						|
 | 
						|
        store["a"] = s
 | 
						|
        tm.assert_series_equal(store["a"], s)
 | 
						|
 | 
						|
        store["b"] = df
 | 
						|
        tm.assert_frame_equal(store["b"], df)
 | 
						|
 | 
						|
        # mixed length
 | 
						|
        index = Index(
 | 
						|
            ["abcdefghijklmnopqrstuvwxyz1234567890"]
 | 
						|
            + [f"I am a very long string index: {i}" for i in range(20)]
 | 
						|
        )
 | 
						|
        s = Series(np.arange(21), index=index)
 | 
						|
        df = DataFrame({"A": s, "B": s})
 | 
						|
        store["a"] = s
 | 
						|
        tm.assert_series_equal(store["a"], s)
 | 
						|
 | 
						|
        store["b"] = df
 | 
						|
        tm.assert_frame_equal(store["b"], df)
 | 
						|
 | 
						|
 | 
						|
def test_put_compression(setup_path):
 | 
						|
 | 
						|
    with ensure_clean_store(setup_path) as store:
 | 
						|
        df = tm.makeTimeDataFrame()
 | 
						|
 | 
						|
        store.put("c", df, format="table", complib="zlib")
 | 
						|
        tm.assert_frame_equal(store["c"], df)
 | 
						|
 | 
						|
        # can't compress if format='fixed'
 | 
						|
        msg = "Compression not supported on Fixed format stores"
 | 
						|
        with pytest.raises(ValueError, match=msg):
 | 
						|
            store.put("b", df, format="fixed", complib="zlib")
 | 
						|
 | 
						|
 | 
						|
@td.skip_if_windows
 | 
						|
def test_put_compression_blosc(setup_path):
 | 
						|
    df = tm.makeTimeDataFrame()
 | 
						|
 | 
						|
    with ensure_clean_store(setup_path) as store:
 | 
						|
 | 
						|
        # can't compress if format='fixed'
 | 
						|
        msg = "Compression not supported on Fixed format stores"
 | 
						|
        with pytest.raises(ValueError, match=msg):
 | 
						|
            store.put("b", df, format="fixed", complib="blosc")
 | 
						|
 | 
						|
        store.put("c", df, format="table", complib="blosc")
 | 
						|
        tm.assert_frame_equal(store["c"], df)
 | 
						|
 | 
						|
 | 
						|
def test_put_mixed_type(setup_path):
 | 
						|
    df = tm.makeTimeDataFrame()
 | 
						|
    df["obj1"] = "foo"
 | 
						|
    df["obj2"] = "bar"
 | 
						|
    df["bool1"] = df["A"] > 0
 | 
						|
    df["bool2"] = df["B"] > 0
 | 
						|
    df["bool3"] = True
 | 
						|
    df["int1"] = 1
 | 
						|
    df["int2"] = 2
 | 
						|
    df["timestamp1"] = Timestamp("20010102")
 | 
						|
    df["timestamp2"] = Timestamp("20010103")
 | 
						|
    df["datetime1"] = datetime.datetime(2001, 1, 2, 0, 0)
 | 
						|
    df["datetime2"] = datetime.datetime(2001, 1, 3, 0, 0)
 | 
						|
    df.loc[df.index[3:6], ["obj1"]] = np.nan
 | 
						|
    df = df._consolidate()._convert(datetime=True)
 | 
						|
 | 
						|
    with ensure_clean_store(setup_path) as store:
 | 
						|
        _maybe_remove(store, "df")
 | 
						|
 | 
						|
        # PerformanceWarning
 | 
						|
        with catch_warnings(record=True):
 | 
						|
            simplefilter("ignore", pd.errors.PerformanceWarning)
 | 
						|
            store.put("df", df)
 | 
						|
 | 
						|
        expected = store.get("df")
 | 
						|
        tm.assert_frame_equal(expected, df)
 | 
						|
 | 
						|
 | 
						|
@pytest.mark.parametrize(
 | 
						|
    "format, index",
 | 
						|
    [
 | 
						|
        ["table", tm.makeFloatIndex],
 | 
						|
        ["table", tm.makeStringIndex],
 | 
						|
        ["table", tm.makeIntIndex],
 | 
						|
        ["table", tm.makeDateIndex],
 | 
						|
        ["fixed", tm.makeFloatIndex],
 | 
						|
        ["fixed", tm.makeStringIndex],
 | 
						|
        ["fixed", tm.makeIntIndex],
 | 
						|
        ["fixed", tm.makeDateIndex],
 | 
						|
        ["table", tm.makePeriodIndex],  # GH#7796
 | 
						|
        ["fixed", tm.makePeriodIndex],
 | 
						|
        ["table", tm.makeUnicodeIndex],
 | 
						|
        ["fixed", tm.makeUnicodeIndex],
 | 
						|
    ],
 | 
						|
)
 | 
						|
def test_store_index_types(setup_path, format, index):
 | 
						|
    # GH5386
 | 
						|
    # test storing various index types
 | 
						|
 | 
						|
    with ensure_clean_store(setup_path) as store:
 | 
						|
 | 
						|
        df = DataFrame(np.random.randn(10, 2), columns=list("AB"))
 | 
						|
        df.index = index(len(df))
 | 
						|
 | 
						|
        _maybe_remove(store, "df")
 | 
						|
        store.put("df", df, format=format)
 | 
						|
        tm.assert_frame_equal(df, store["df"])
 | 
						|
 | 
						|
 | 
						|
def test_column_multiindex(setup_path):
 | 
						|
    # GH 4710
 | 
						|
    # recreate multi-indexes properly
 | 
						|
 | 
						|
    index = MultiIndex.from_tuples(
 | 
						|
        [("A", "a"), ("A", "b"), ("B", "a"), ("B", "b")], names=["first", "second"]
 | 
						|
    )
 | 
						|
    df = DataFrame(np.arange(12).reshape(3, 4), columns=index)
 | 
						|
    expected = df.copy()
 | 
						|
    if isinstance(expected.index, RangeIndex):
 | 
						|
        expected.index = Int64Index(expected.index)
 | 
						|
 | 
						|
    with ensure_clean_store(setup_path) as store:
 | 
						|
 | 
						|
        store.put("df", df)
 | 
						|
        tm.assert_frame_equal(
 | 
						|
            store["df"], expected, check_index_type=True, check_column_type=True
 | 
						|
        )
 | 
						|
 | 
						|
        store.put("df1", df, format="table")
 | 
						|
        tm.assert_frame_equal(
 | 
						|
            store["df1"], expected, check_index_type=True, check_column_type=True
 | 
						|
        )
 | 
						|
 | 
						|
        msg = re.escape("cannot use a multi-index on axis [1] with data_columns ['A']")
 | 
						|
        with pytest.raises(ValueError, match=msg):
 | 
						|
            store.put("df2", df, format="table", data_columns=["A"])
 | 
						|
        msg = re.escape("cannot use a multi-index on axis [1] with data_columns True")
 | 
						|
        with pytest.raises(ValueError, match=msg):
 | 
						|
            store.put("df3", df, format="table", data_columns=True)
 | 
						|
 | 
						|
    # appending multi-column on existing table (see GH 6167)
 | 
						|
    with ensure_clean_store(setup_path) as store:
 | 
						|
        store.append("df2", df)
 | 
						|
        store.append("df2", df)
 | 
						|
 | 
						|
        tm.assert_frame_equal(store["df2"], concat((df, df)))
 | 
						|
 | 
						|
    # non_index_axes name
 | 
						|
    df = DataFrame(np.arange(12).reshape(3, 4), columns=Index(list("ABCD"), name="foo"))
 | 
						|
    expected = df.copy()
 | 
						|
    if isinstance(expected.index, RangeIndex):
 | 
						|
        expected.index = Int64Index(expected.index)
 | 
						|
 | 
						|
    with ensure_clean_store(setup_path) as store:
 | 
						|
 | 
						|
        store.put("df1", df, format="table")
 | 
						|
        tm.assert_frame_equal(
 | 
						|
            store["df1"], expected, check_index_type=True, check_column_type=True
 | 
						|
        )
 | 
						|
 | 
						|
 | 
						|
def test_store_multiindex(setup_path):
 | 
						|
 | 
						|
    # validate multi-index names
 | 
						|
    # GH 5527
 | 
						|
    with ensure_clean_store(setup_path) as store:
 | 
						|
 | 
						|
        def make_index(names=None):
 | 
						|
            return MultiIndex.from_tuples(
 | 
						|
                [
 | 
						|
                    (datetime.datetime(2013, 12, d), s, t)
 | 
						|
                    for d in range(1, 3)
 | 
						|
                    for s in range(2)
 | 
						|
                    for t in range(3)
 | 
						|
                ],
 | 
						|
                names=names,
 | 
						|
            )
 | 
						|
 | 
						|
        # no names
 | 
						|
        _maybe_remove(store, "df")
 | 
						|
        df = DataFrame(np.zeros((12, 2)), columns=["a", "b"], index=make_index())
 | 
						|
        store.append("df", df)
 | 
						|
        tm.assert_frame_equal(store.select("df"), df)
 | 
						|
 | 
						|
        # partial names
 | 
						|
        _maybe_remove(store, "df")
 | 
						|
        df = DataFrame(
 | 
						|
            np.zeros((12, 2)),
 | 
						|
            columns=["a", "b"],
 | 
						|
            index=make_index(["date", None, None]),
 | 
						|
        )
 | 
						|
        store.append("df", df)
 | 
						|
        tm.assert_frame_equal(store.select("df"), df)
 | 
						|
 | 
						|
        # series
 | 
						|
        _maybe_remove(store, "s")
 | 
						|
        s = Series(np.zeros(12), index=make_index(["date", None, None]))
 | 
						|
        store.append("s", s)
 | 
						|
        xp = Series(np.zeros(12), index=make_index(["date", "level_1", "level_2"]))
 | 
						|
        tm.assert_series_equal(store.select("s"), xp)
 | 
						|
 | 
						|
        # dup with column
 | 
						|
        _maybe_remove(store, "df")
 | 
						|
        df = DataFrame(
 | 
						|
            np.zeros((12, 2)),
 | 
						|
            columns=["a", "b"],
 | 
						|
            index=make_index(["date", "a", "t"]),
 | 
						|
        )
 | 
						|
        msg = "duplicate names/columns in the multi-index when storing as a table"
 | 
						|
        with pytest.raises(ValueError, match=msg):
 | 
						|
            store.append("df", df)
 | 
						|
 | 
						|
        # dup within level
 | 
						|
        _maybe_remove(store, "df")
 | 
						|
        df = DataFrame(
 | 
						|
            np.zeros((12, 2)),
 | 
						|
            columns=["a", "b"],
 | 
						|
            index=make_index(["date", "date", "date"]),
 | 
						|
        )
 | 
						|
        with pytest.raises(ValueError, match=msg):
 | 
						|
            store.append("df", df)
 | 
						|
 | 
						|
        # fully names
 | 
						|
        _maybe_remove(store, "df")
 | 
						|
        df = DataFrame(
 | 
						|
            np.zeros((12, 2)),
 | 
						|
            columns=["a", "b"],
 | 
						|
            index=make_index(["date", "s", "t"]),
 | 
						|
        )
 | 
						|
        store.append("df", df)
 | 
						|
        tm.assert_frame_equal(store.select("df"), df)
 |