From 0049d667e75c260f8a6f49326be9ef01bca854c1 Mon Sep 17 00:00:00 2001 From: Jeffrey Kenneth Go <186915942+jeffreykenneth@users.noreply.github.com> Date: Thu, 13 Nov 2025 23:34:33 +0800 Subject: [PATCH] TST: Replace ensure_clean_store with tmp_path in tests/io/pytables/test_append.py --- pandas/tests/io/pytables/test_append.py | 118 ++++++++++++++---------- 1 file changed, 70 insertions(+), 48 deletions(-) diff --git a/pandas/tests/io/pytables/test_append.py b/pandas/tests/io/pytables/test_append.py index 479f2468a86ab..a6dc97ed01661 100644 --- a/pandas/tests/io/pytables/test_append.py +++ b/pandas/tests/io/pytables/test_append.py @@ -11,6 +11,7 @@ import pandas as pd from pandas import ( DataFrame, + HDFStore, Index, Series, _testing as tm, @@ -20,7 +21,6 @@ ) from pandas.tests.io.pytables.common import ( _maybe_remove, - ensure_clean_store, ) pytestmark = [pytest.mark.single_cpu] @@ -29,8 +29,9 @@ @pytest.mark.filterwarnings("ignore::tables.NaturalNameWarning") -def test_append(setup_path): - with ensure_clean_store(setup_path) as store: +def test_append(tmp_path): + path = tmp_path / "test_append.h5" + with HDFStore(path) as store: # this is allowed by almost always don't want to do it # tables.NaturalNameWarning): df = DataFrame( @@ -101,8 +102,9 @@ def test_append(setup_path): tm.assert_frame_equal(store["uints"], uint_data, check_index_type=True) -def test_append_series(setup_path): - with ensure_clean_store(setup_path) as store: +def test_append_series(tmp_path): + path = tmp_path / "test_append_series.h5" + with HDFStore(path) as store: # basic ss = Series(range(20), dtype=np.float64, index=[f"i_{i}" for i in range(20)]) ts = Series( @@ -150,8 +152,9 @@ def test_append_series(setup_path): tm.assert_series_equal(store["mi"], s, check_index_type=True) -def test_append_some_nans(setup_path): - with ensure_clean_store(setup_path) as store: +def test_append_some_nans(tmp_path): + path = tmp_path / "test_append_some_nans.h5" + with HDFStore(path) as store: df = DataFrame( { "A": Series(np.random.default_rng(2).standard_normal(20)).astype( @@ -198,8 +201,9 @@ def test_append_some_nans(setup_path): tm.assert_frame_equal(store["df3"], df3, check_index_type=True) -def test_append_all_nans(setup_path, using_infer_string): - with ensure_clean_store(setup_path) as store: +def test_append_all_nans(tmp_path, using_infer_string): + path = tmp_path / "test_append_all_nans.h5" + with HDFStore(path) as store: df = DataFrame( { "A1": np.random.default_rng(2).standard_normal(20), @@ -290,8 +294,9 @@ def test_append_all_nans(setup_path, using_infer_string): tm.assert_frame_equal(store["df2"], df, check_index_type=True) -def test_append_frame_column_oriented(setup_path, request): - with ensure_clean_store(setup_path) as store: +def test_append_frame_column_oriented(tmp_path, request): + path = tmp_path / "test_append_frame_column_oriented.h5" + with HDFStore(path) as store: # column oriented df = DataFrame( np.random.default_rng(2).standard_normal((10, 4)), @@ -330,9 +335,10 @@ def test_append_frame_column_oriented(setup_path, request): store.select("df1", "columns=A and index>df.index[4]") -def test_append_with_different_block_ordering(setup_path): +def test_append_with_different_block_ordering(tmp_path): # GH 4096; using same frames, but different block orderings - with ensure_clean_store(setup_path) as store: + path = tmp_path / "test_append_with_different_block_ordering.h5" + with HDFStore(path) as store: for i in range(10): df = DataFrame( np.random.default_rng(2).standard_normal((10, 2)), columns=list("AB") @@ -355,7 +361,7 @@ def test_append_with_different_block_ordering(setup_path): # test a different ordering but with more fields (like invalid # combinations) - with ensure_clean_store(setup_path) as store: + with HDFStore(path) as store: df = DataFrame( np.random.default_rng(2).standard_normal((10, 2)), columns=list("AB"), @@ -363,6 +369,7 @@ def test_append_with_different_block_ordering(setup_path): ) df["int64"] = Series([1] * len(df), dtype="int64") df["int16"] = Series([1] * len(df), dtype="int16") + _maybe_remove(store, "df") store.append("df", df) # store additional fields in different blocks @@ -382,8 +389,9 @@ def test_append_with_different_block_ordering(setup_path): store.append("df", df) -def test_append_with_strings(setup_path): - with ensure_clean_store(setup_path) as store: +def test_append_with_strings(tmp_path): + path = tmp_path / "test_append_with_strings.h5" + with HDFStore(path) as store: def check_col(key, name, size): assert ( @@ -467,7 +475,7 @@ def check_col(key, name, size): result = store.select("df") tm.assert_frame_equal(result, df) - with ensure_clean_store(setup_path) as store: + with HDFStore(path) as store: df = DataFrame({"A": "foo", "B": "bar"}, index=range(10)) # a min_itemsize that creates a data_column @@ -505,8 +513,9 @@ def check_col(key, name, size): store.append("df", df, min_itemsize={"foo": 20, "foobar": 20}) -def test_append_with_empty_string(setup_path): - with ensure_clean_store(setup_path) as store: +def test_append_with_empty_string(tmp_path): + path = tmp_path / "test_append_with_empty_string.h5" + with HDFStore(path) as store: # with all empty strings (GH 12242) df = DataFrame({"x": ["a", "b", "c", "d", "e", "f", ""]}) store.append("df", df[:-1], min_itemsize={"x": 1}) @@ -514,8 +523,9 @@ def test_append_with_empty_string(setup_path): tm.assert_frame_equal(store.select("df"), df) -def test_append_with_data_columns(setup_path): - with ensure_clean_store(setup_path) as store: +def test_append_with_data_columns(tmp_path): + path = tmp_path / "test_append_with_data_columns.h5" + with HDFStore(path) as store: df = DataFrame( np.random.default_rng(2).standard_normal((10, 4)), columns=Index(list("ABCD")), @@ -559,7 +569,7 @@ def check_col(key, name, size): getattr(store.get_storer(key).table.description, name).itemsize == size ) - with ensure_clean_store(setup_path) as store: + with HDFStore(path) as store: _maybe_remove(store, "df") store.append("df", df_new, data_columns=["string"], min_itemsize={"string": 30}) check_col("df", "string", 30) @@ -570,7 +580,7 @@ def check_col(key, name, size): store.append("df", df_new, data_columns=["string"], min_itemsize={"values": 30}) check_col("df", "string", 30) - with ensure_clean_store(setup_path) as store: + with HDFStore(path) as store: df_new["string2"] = "foobarbah" df_new["string_block1"] = "foobarbah1" df_new["string_block2"] = "foobarbah2" @@ -585,7 +595,7 @@ def check_col(key, name, size): check_col("df", "string2", 40) check_col("df", "values_block_1", 50) - with ensure_clean_store(setup_path) as store: + with HDFStore(path) as store: # multiple data columns df_new = df.copy() df_new.iloc[0, df_new.columns.get_loc("A")] = 1.0 @@ -617,7 +627,7 @@ def check_col(key, name, size): expected = df_new[(df_new.string == "foo") & (df_new.string2 == "cool")] tm.assert_frame_equal(result, expected) - with ensure_clean_store(setup_path) as store: + with HDFStore(path) as store: # doc example df_dc = df.copy() df_dc["string"] = "foo" @@ -642,7 +652,7 @@ def check_col(key, name, size): # FIXME: 2020-12-07 intermittent build failures here with freq of # None instead of BDay(4) - with ensure_clean_store(setup_path) as store: + with HDFStore(path) as store: # doc example part 2 index = date_range("1/1/2000", periods=8) @@ -658,6 +668,7 @@ def check_col(key, name, size): df_dc["string2"] = "cool" # on-disk operations + _maybe_remove(store, "df_dc") store.append("df_dc", df_dc, data_columns=["B", "C", "string", "string2"]) result = store.select("df_dc", "B>0") @@ -669,11 +680,12 @@ def check_col(key, name, size): tm.assert_frame_equal(result, expected) -def test_append_hierarchical(tmp_path, setup_path, multiindex_dataframe_random_data): +def test_append_hierarchical(tmp_path, multiindex_dataframe_random_data): df = multiindex_dataframe_random_data df.columns.name = None - with ensure_clean_store(setup_path) as store: + path = tmp_path / "test_append_hierarchical.h5" + with HDFStore(path) as store: store.append("mi", df) result = store.select("mi") tm.assert_frame_equal(result, df) @@ -690,8 +702,9 @@ def test_append_hierarchical(tmp_path, setup_path, multiindex_dataframe_random_d tm.assert_frame_equal(result, expected) -def test_append_misc(setup_path): - with ensure_clean_store(setup_path) as store: +def test_append_misc(tmp_path): + path = tmp_path / "test_append_misc.h5" + with HDFStore(path) as store: df = DataFrame( 1.1 * np.arange(120).reshape((30, 4)), columns=Index(list("ABCD")), @@ -707,7 +720,7 @@ def test_append_misc(setup_path): @pytest.mark.parametrize("chunksize", [10, 200, 1000]) -def test_append_misc_chunksize(setup_path, chunksize): +def test_append_misc_chunksize(tmp_path, chunksize): # more chunksize in append tests df = DataFrame( 1.1 * np.arange(120).reshape((30, 4)), @@ -720,15 +733,17 @@ def test_append_misc_chunksize(setup_path, chunksize): df["bool"] = df["float322"] > 0 df["time1"] = Timestamp("20130101").as_unit("ns") df["time2"] = Timestamp("20130102").as_unit("ns") - with ensure_clean_store(setup_path, mode="w") as store: + path = tmp_path / "test_append_misc_chunksize.h5" + with HDFStore(path) as store: store.append("obj", df, chunksize=chunksize) result = store.select("obj") tm.assert_frame_equal(result, df) -def test_append_misc_empty_frame(setup_path): +def test_append_misc_empty_frame(tmp_path): + path = tmp_path / "test_append_misc_empty_frame.h5" # empty frame, GH4273 - with ensure_clean_store(setup_path) as store: + with HDFStore(path) as store: # 0 len df_empty = DataFrame(columns=list("ABC")) store.append("df", df_empty) @@ -748,8 +763,9 @@ def test_append_misc_empty_frame(setup_path): tm.assert_frame_equal(store.select("df2"), df) -def test_append_raise(setup_path, using_infer_string): - with ensure_clean_store(setup_path) as store: +def test_append_raise(tmp_path, using_infer_string): + path = tmp_path / "test_append_raise.h5" + with HDFStore(path) as store: # test append with invalid input to get good error messages # list in column @@ -832,7 +848,7 @@ def test_append_raise(setup_path, using_infer_string): store.append("df", df) -def test_append_with_timedelta(setup_path): +def test_append_with_timedelta(tmp_path): # GH 3577 # append timedelta @@ -846,7 +862,8 @@ def test_append_with_timedelta(setup_path): df["C"] = df["A"] - df["B"] df.loc[3:5, "C"] = np.nan - with ensure_clean_store(setup_path) as store: + path = tmp_path / "test_append_with_timedelta.h5" + with HDFStore(path) as store: # table _maybe_remove(store, "df") store.append("df", df, data_columns=True) @@ -879,7 +896,7 @@ def test_append_with_timedelta(setup_path): tm.assert_frame_equal(result, df) -def test_append_to_multiple(setup_path): +def test_append_to_multiple(tmp_path): df1 = DataFrame( np.random.default_rng(2).standard_normal((10, 4)), columns=Index(list("ABCD")), @@ -889,7 +906,8 @@ def test_append_to_multiple(setup_path): df2["foo"] = "bar" df = concat([df1, df2], axis=1) - with ensure_clean_store(setup_path) as store: + path = tmp_path / "test_append_to_multiple.h5" + with HDFStore(path) as store: # exceptions msg = "append_to_multiple requires a selector that is in passed dict" with pytest.raises(ValueError, match=msg): @@ -916,7 +934,7 @@ def test_append_to_multiple(setup_path): tm.assert_frame_equal(result, expected) -def test_append_to_multiple_dropna(setup_path): +def test_append_to_multiple_dropna(tmp_path): df1 = DataFrame( np.random.default_rng(2).standard_normal((10, 4)), columns=Index(list("ABCD")), @@ -930,7 +948,8 @@ def test_append_to_multiple_dropna(setup_path): df1.iloc[1, df1.columns.get_indexer(["A", "B"])] = np.nan df = concat([df1, df2], axis=1) - with ensure_clean_store(setup_path) as store: + path = tmp_path / "test_append_to_multiple_dropna.h5" + with HDFStore(path) as store: # dropna=True should guarantee rows are synchronized store.append_to_multiple( {"df1": ["A", "B"], "df2": None}, df, selector="df1", dropna=True @@ -941,7 +960,7 @@ def test_append_to_multiple_dropna(setup_path): tm.assert_index_equal(store.select("df1").index, store.select("df2").index) -def test_append_to_multiple_dropna_false(setup_path): +def test_append_to_multiple_dropna_false(tmp_path): df1 = DataFrame( np.random.default_rng(2).standard_normal((10, 4)), columns=Index(list("ABCD")), @@ -951,8 +970,9 @@ def test_append_to_multiple_dropna_false(setup_path): df1.iloc[1, df1.columns.get_indexer(["A", "B"])] = np.nan df = concat([df1, df2], axis=1) + path = tmp_path / "test_append_to_multiple_dropna_false.h5" with ( - ensure_clean_store(setup_path) as store, + HDFStore(path) as store, pd.option_context("io.hdf.dropna_table", True), ): # dropna=False shouldn't synchronize row indexes @@ -967,7 +987,7 @@ def test_append_to_multiple_dropna_false(setup_path): assert not store.select("df1a").index.equals(store.select("df2a").index) -def test_append_to_multiple_min_itemsize(setup_path): +def test_append_to_multiple_min_itemsize(tmp_path): # GH 11238 df = DataFrame( { @@ -982,7 +1002,8 @@ def test_append_to_multiple_min_itemsize(setup_path): # Reading/writing RangeIndex info is not supported yet expected.index = Index(list(range(len(expected.index)))) - with ensure_clean_store(setup_path) as store: + path = tmp_path / "test_append_to_multiple_min_itemsize.h5" + with HDFStore(path) as store: store.append_to_multiple( { "index": ["IX"], @@ -997,14 +1018,15 @@ def test_append_to_multiple_min_itemsize(setup_path): tm.assert_frame_equal(result, expected, check_index_type=True) -def test_append_string_nan_rep(setup_path): +def test_append_string_nan_rep(tmp_path): # GH 16300 df = DataFrame({"A": "a", "B": "foo"}, index=np.arange(10)) df_nan = df.copy() df_nan.loc[0:4, :] = np.nan msg = "NaN representation is too large for existing column size" - with ensure_clean_store(setup_path) as store: + path = tmp_path / "test_append_string_nan_rep.h5" + with HDFStore(path) as store: # string column too small store.append("sa", df["A"]) with pytest.raises(ValueError, match=msg):