Skip to content
Open
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
118 changes: 70 additions & 48 deletions pandas/tests/io/pytables/test_append.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import pandas as pd
from pandas import (
DataFrame,
HDFStore,
Index,
Series,
_testing as tm,
Expand All @@ -20,7 +21,6 @@
)
from pandas.tests.io.pytables.common import (
_maybe_remove,
ensure_clean_store,
)

pytestmark = [pytest.mark.single_cpu]
Expand All @@ -29,8 +29,9 @@


@pytest.mark.filterwarnings("ignore::tables.NaturalNameWarning")
def test_append(setup_path):
with ensure_clean_store(setup_path) as store:
def test_append(tmp_path):
path = tmp_path / "test_append.h5"
with HDFStore(path) as store:
# this is allowed by almost always don't want to do it
# tables.NaturalNameWarning):
df = DataFrame(
Expand Down Expand Up @@ -101,8 +102,9 @@ def test_append(setup_path):
tm.assert_frame_equal(store["uints"], uint_data, check_index_type=True)


def test_append_series(setup_path):
with ensure_clean_store(setup_path) as store:
def test_append_series(tmp_path):
path = tmp_path / "test_append_series.h5"
with HDFStore(path) as store:
# basic
ss = Series(range(20), dtype=np.float64, index=[f"i_{i}" for i in range(20)])
ts = Series(
Expand Down Expand Up @@ -150,8 +152,9 @@ def test_append_series(setup_path):
tm.assert_series_equal(store["mi"], s, check_index_type=True)


def test_append_some_nans(setup_path):
with ensure_clean_store(setup_path) as store:
def test_append_some_nans(tmp_path):
path = tmp_path / "test_append_some_nans.h5"
with HDFStore(path) as store:
df = DataFrame(
{
"A": Series(np.random.default_rng(2).standard_normal(20)).astype(
Expand Down Expand Up @@ -198,8 +201,9 @@ def test_append_some_nans(setup_path):
tm.assert_frame_equal(store["df3"], df3, check_index_type=True)


def test_append_all_nans(setup_path, using_infer_string):
with ensure_clean_store(setup_path) as store:
def test_append_all_nans(tmp_path, using_infer_string):
path = tmp_path / "test_append_all_nans.h5"
with HDFStore(path) as store:
df = DataFrame(
{
"A1": np.random.default_rng(2).standard_normal(20),
Expand Down Expand Up @@ -290,8 +294,9 @@ def test_append_all_nans(setup_path, using_infer_string):
tm.assert_frame_equal(store["df2"], df, check_index_type=True)


def test_append_frame_column_oriented(setup_path, request):
with ensure_clean_store(setup_path) as store:
def test_append_frame_column_oriented(tmp_path, request):
path = tmp_path / "test_append_frame_column_oriented.h5"
with HDFStore(path) as store:
# column oriented
df = DataFrame(
np.random.default_rng(2).standard_normal((10, 4)),
Expand Down Expand Up @@ -330,9 +335,10 @@ def test_append_frame_column_oriented(setup_path, request):
store.select("df1", "columns=A and index>df.index[4]")


def test_append_with_different_block_ordering(setup_path):
def test_append_with_different_block_ordering(tmp_path):
# GH 4096; using same frames, but different block orderings
with ensure_clean_store(setup_path) as store:
path = tmp_path / "test_append_with_different_block_ordering.h5"
with HDFStore(path) as store:
for i in range(10):
df = DataFrame(
np.random.default_rng(2).standard_normal((10, 2)), columns=list("AB")
Expand All @@ -355,14 +361,15 @@ def test_append_with_different_block_ordering(setup_path):

# test a different ordering but with more fields (like invalid
# combinations)
with ensure_clean_store(setup_path) as store:
with HDFStore(path) as store:
df = DataFrame(
np.random.default_rng(2).standard_normal((10, 2)),
columns=list("AB"),
dtype="float64",
)
df["int64"] = Series([1] * len(df), dtype="int64")
df["int16"] = Series([1] * len(df), dtype="int16")
_maybe_remove(store, "df")
store.append("df", df)

# store additional fields in different blocks
Expand All @@ -382,8 +389,9 @@ def test_append_with_different_block_ordering(setup_path):
store.append("df", df)


def test_append_with_strings(setup_path):
with ensure_clean_store(setup_path) as store:
def test_append_with_strings(tmp_path):
path = tmp_path / "test_append_with_strings.h5"
with HDFStore(path) as store:

def check_col(key, name, size):
assert (
Expand Down Expand Up @@ -467,7 +475,7 @@ def check_col(key, name, size):
result = store.select("df")
tm.assert_frame_equal(result, df)

with ensure_clean_store(setup_path) as store:
with HDFStore(path) as store:
df = DataFrame({"A": "foo", "B": "bar"}, index=range(10))

# a min_itemsize that creates a data_column
Expand Down Expand Up @@ -505,17 +513,19 @@ def check_col(key, name, size):
store.append("df", df, min_itemsize={"foo": 20, "foobar": 20})


def test_append_with_empty_string(setup_path):
with ensure_clean_store(setup_path) as store:
def test_append_with_empty_string(tmp_path):
path = tmp_path / "test_append_with_empty_string.h5"
with HDFStore(path) as store:
# with all empty strings (GH 12242)
df = DataFrame({"x": ["a", "b", "c", "d", "e", "f", ""]})
store.append("df", df[:-1], min_itemsize={"x": 1})
store.append("df", df[-1:], min_itemsize={"x": 1})
tm.assert_frame_equal(store.select("df"), df)


def test_append_with_data_columns(setup_path):
with ensure_clean_store(setup_path) as store:
def test_append_with_data_columns(tmp_path):
path = tmp_path / "test_append_with_data_columns.h5"
with HDFStore(path) as store:
df = DataFrame(
np.random.default_rng(2).standard_normal((10, 4)),
columns=Index(list("ABCD")),
Expand Down Expand Up @@ -559,7 +569,7 @@ def check_col(key, name, size):
getattr(store.get_storer(key).table.description, name).itemsize == size
)

with ensure_clean_store(setup_path) as store:
with HDFStore(path) as store:
_maybe_remove(store, "df")
store.append("df", df_new, data_columns=["string"], min_itemsize={"string": 30})
check_col("df", "string", 30)
Expand All @@ -570,7 +580,7 @@ def check_col(key, name, size):
store.append("df", df_new, data_columns=["string"], min_itemsize={"values": 30})
check_col("df", "string", 30)

with ensure_clean_store(setup_path) as store:
with HDFStore(path) as store:
df_new["string2"] = "foobarbah"
df_new["string_block1"] = "foobarbah1"
df_new["string_block2"] = "foobarbah2"
Expand All @@ -585,7 +595,7 @@ def check_col(key, name, size):
check_col("df", "string2", 40)
check_col("df", "values_block_1", 50)

with ensure_clean_store(setup_path) as store:
with HDFStore(path) as store:
# multiple data columns
df_new = df.copy()
df_new.iloc[0, df_new.columns.get_loc("A")] = 1.0
Expand Down Expand Up @@ -617,7 +627,7 @@ def check_col(key, name, size):
expected = df_new[(df_new.string == "foo") & (df_new.string2 == "cool")]
tm.assert_frame_equal(result, expected)

with ensure_clean_store(setup_path) as store:
with HDFStore(path) as store:
# doc example
df_dc = df.copy()
df_dc["string"] = "foo"
Expand All @@ -642,7 +652,7 @@ def check_col(key, name, size):
# FIXME: 2020-12-07 intermittent build failures here with freq of
# None instead of BDay(4)

with ensure_clean_store(setup_path) as store:
with HDFStore(path) as store:
# doc example part 2

index = date_range("1/1/2000", periods=8)
Expand All @@ -658,6 +668,7 @@ def check_col(key, name, size):
df_dc["string2"] = "cool"

# on-disk operations
_maybe_remove(store, "df_dc")
store.append("df_dc", df_dc, data_columns=["B", "C", "string", "string2"])

result = store.select("df_dc", "B>0")
Expand All @@ -669,11 +680,12 @@ def check_col(key, name, size):
tm.assert_frame_equal(result, expected)


def test_append_hierarchical(tmp_path, setup_path, multiindex_dataframe_random_data):
def test_append_hierarchical(tmp_path, multiindex_dataframe_random_data):
df = multiindex_dataframe_random_data
df.columns.name = None

with ensure_clean_store(setup_path) as store:
path = tmp_path / "test_append_hierarchical.h5"
with HDFStore(path) as store:
store.append("mi", df)
result = store.select("mi")
tm.assert_frame_equal(result, df)
Expand All @@ -690,8 +702,9 @@ def test_append_hierarchical(tmp_path, setup_path, multiindex_dataframe_random_d
tm.assert_frame_equal(result, expected)


def test_append_misc(setup_path):
with ensure_clean_store(setup_path) as store:
def test_append_misc(tmp_path):
path = tmp_path / "test_append_misc.h5"
with HDFStore(path) as store:
df = DataFrame(
1.1 * np.arange(120).reshape((30, 4)),
columns=Index(list("ABCD")),
Expand All @@ -707,7 +720,7 @@ def test_append_misc(setup_path):


@pytest.mark.parametrize("chunksize", [10, 200, 1000])
def test_append_misc_chunksize(setup_path, chunksize):
def test_append_misc_chunksize(tmp_path, chunksize):
# more chunksize in append tests
df = DataFrame(
1.1 * np.arange(120).reshape((30, 4)),
Expand All @@ -720,15 +733,17 @@ def test_append_misc_chunksize(setup_path, chunksize):
df["bool"] = df["float322"] > 0
df["time1"] = Timestamp("20130101").as_unit("ns")
df["time2"] = Timestamp("20130102").as_unit("ns")
with ensure_clean_store(setup_path, mode="w") as store:
path = tmp_path / "test_append_misc_chunksize.h5"
with HDFStore(path) as store:
store.append("obj", df, chunksize=chunksize)
result = store.select("obj")
tm.assert_frame_equal(result, df)


def test_append_misc_empty_frame(setup_path):
def test_append_misc_empty_frame(tmp_path):
path = tmp_path / "test_append_misc_empty_frame.h5"
# empty frame, GH4273
with ensure_clean_store(setup_path) as store:
with HDFStore(path) as store:
# 0 len
df_empty = DataFrame(columns=list("ABC"))
store.append("df", df_empty)
Expand All @@ -748,8 +763,9 @@ def test_append_misc_empty_frame(setup_path):
tm.assert_frame_equal(store.select("df2"), df)


def test_append_raise(setup_path, using_infer_string):
with ensure_clean_store(setup_path) as store:
def test_append_raise(tmp_path, using_infer_string):
path = tmp_path / "test_append_raise.h5"
with HDFStore(path) as store:
# test append with invalid input to get good error messages

# list in column
Expand Down Expand Up @@ -832,7 +848,7 @@ def test_append_raise(setup_path, using_infer_string):
store.append("df", df)


def test_append_with_timedelta(setup_path):
def test_append_with_timedelta(tmp_path):
# GH 3577
# append timedelta

Expand All @@ -846,7 +862,8 @@ def test_append_with_timedelta(setup_path):
df["C"] = df["A"] - df["B"]
df.loc[3:5, "C"] = np.nan

with ensure_clean_store(setup_path) as store:
path = tmp_path / "test_append_with_timedelta.h5"
with HDFStore(path) as store:
# table
_maybe_remove(store, "df")
store.append("df", df, data_columns=True)
Expand Down Expand Up @@ -879,7 +896,7 @@ def test_append_with_timedelta(setup_path):
tm.assert_frame_equal(result, df)


def test_append_to_multiple(setup_path):
def test_append_to_multiple(tmp_path):
df1 = DataFrame(
np.random.default_rng(2).standard_normal((10, 4)),
columns=Index(list("ABCD")),
Expand All @@ -889,7 +906,8 @@ def test_append_to_multiple(setup_path):
df2["foo"] = "bar"
df = concat([df1, df2], axis=1)

with ensure_clean_store(setup_path) as store:
path = tmp_path / "test_append_to_multiple.h5"
with HDFStore(path) as store:
# exceptions
msg = "append_to_multiple requires a selector that is in passed dict"
with pytest.raises(ValueError, match=msg):
Expand All @@ -916,7 +934,7 @@ def test_append_to_multiple(setup_path):
tm.assert_frame_equal(result, expected)


def test_append_to_multiple_dropna(setup_path):
def test_append_to_multiple_dropna(tmp_path):
df1 = DataFrame(
np.random.default_rng(2).standard_normal((10, 4)),
columns=Index(list("ABCD")),
Expand All @@ -930,7 +948,8 @@ def test_append_to_multiple_dropna(setup_path):
df1.iloc[1, df1.columns.get_indexer(["A", "B"])] = np.nan
df = concat([df1, df2], axis=1)

with ensure_clean_store(setup_path) as store:
path = tmp_path / "test_append_to_multiple_dropna.h5"
with HDFStore(path) as store:
# dropna=True should guarantee rows are synchronized
store.append_to_multiple(
{"df1": ["A", "B"], "df2": None}, df, selector="df1", dropna=True
Expand All @@ -941,7 +960,7 @@ def test_append_to_multiple_dropna(setup_path):
tm.assert_index_equal(store.select("df1").index, store.select("df2").index)


def test_append_to_multiple_dropna_false(setup_path):
def test_append_to_multiple_dropna_false(tmp_path):
df1 = DataFrame(
np.random.default_rng(2).standard_normal((10, 4)),
columns=Index(list("ABCD")),
Expand All @@ -951,8 +970,9 @@ def test_append_to_multiple_dropna_false(setup_path):
df1.iloc[1, df1.columns.get_indexer(["A", "B"])] = np.nan
df = concat([df1, df2], axis=1)

path = tmp_path / "test_append_to_multiple_dropna_false.h5"
with (
ensure_clean_store(setup_path) as store,
HDFStore(path) as store,
pd.option_context("io.hdf.dropna_table", True),
):
# dropna=False shouldn't synchronize row indexes
Expand All @@ -967,7 +987,7 @@ def test_append_to_multiple_dropna_false(setup_path):
assert not store.select("df1a").index.equals(store.select("df2a").index)


def test_append_to_multiple_min_itemsize(setup_path):
def test_append_to_multiple_min_itemsize(tmp_path):
# GH 11238
df = DataFrame(
{
Expand All @@ -982,7 +1002,8 @@ def test_append_to_multiple_min_itemsize(setup_path):
# Reading/writing RangeIndex info is not supported yet
expected.index = Index(list(range(len(expected.index))))

with ensure_clean_store(setup_path) as store:
path = tmp_path / "test_append_to_multiple_min_itemsize.h5"
with HDFStore(path) as store:
store.append_to_multiple(
{
"index": ["IX"],
Expand All @@ -997,14 +1018,15 @@ def test_append_to_multiple_min_itemsize(setup_path):
tm.assert_frame_equal(result, expected, check_index_type=True)


def test_append_string_nan_rep(setup_path):
def test_append_string_nan_rep(tmp_path):
# GH 16300
df = DataFrame({"A": "a", "B": "foo"}, index=np.arange(10))
df_nan = df.copy()
df_nan.loc[0:4, :] = np.nan
msg = "NaN representation is too large for existing column size"

with ensure_clean_store(setup_path) as store:
path = tmp_path / "test_append_string_nan_rep.h5"
with HDFStore(path) as store:
# string column too small
store.append("sa", df["A"])
with pytest.raises(ValueError, match=msg):
Expand Down
Loading