diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py index 5cfefeb469e8a..2681ab8a860f7 100644 --- a/pandas/tests/io/pytables/test_store.py +++ b/pandas/tests/io/pytables/test_store.py @@ -41,22 +41,22 @@ tables = pytest.importorskip("tables") -def test_context(setup_path): - with tm.ensure_clean(setup_path) as path: - try: - with HDFStore(path) as tbl: - raise ValueError("blah") - except ValueError: - pass - with tm.ensure_clean(setup_path) as path: +def test_context(setup_path, tmp_path): + path = tmp_path / setup_path + try: with HDFStore(path) as tbl: - tbl["a"] = DataFrame( - 1.1 * np.arange(120).reshape((30, 4)), - columns=Index(list("ABCD"), dtype=object), - index=Index([f"i-{i}" for i in range(30)], dtype=object), - ) - assert len(tbl) == 1 - assert type(tbl["a"]) == DataFrame + raise ValueError("blah") + except ValueError: + pass + path = tmp_path / setup_path + with HDFStore(path) as tbl: + tbl["a"] = DataFrame( + 1.1 * np.arange(120).reshape((30, 4)), + columns=Index(list("ABCD"), dtype=object), + index=Index([f"i-{i}" for i in range(30)], dtype=object), + ) + assert len(tbl) == 1 + assert type(tbl["a"]) == DataFrame def test_no_track_times(tmp_path, setup_path): @@ -971,37 +971,36 @@ def test_pickle_path_localpath(): @pytest.mark.parametrize("propindexes", [True, False]) -def test_copy(propindexes): +def test_copy(propindexes, temp_file): df = DataFrame( 1.1 * np.arange(120).reshape((30, 4)), columns=Index(list("ABCD")), index=Index([f"i-{i}" for i in range(30)]), ) - with tm.ensure_clean() as path: - with HDFStore(path) as st: - st.append("df", df, data_columns=["A"]) - with tempfile.NamedTemporaryFile() as new_f: - with HDFStore(path) as store: - with contextlib.closing( - store.copy(new_f.name, keys=None, propindexes=propindexes) - ) as tstore: - # check keys - keys = store.keys() - assert set(keys) == set(tstore.keys()) - # check indices & nrows - for k in tstore.keys(): - if tstore.get_storer(k).is_table: - new_t = tstore.get_storer(k) - orig_t = store.get_storer(k) - - assert orig_t.nrows == new_t.nrows - - # check propindixes - if propindexes: - for a in orig_t.axes: - if a.is_indexed: - assert new_t[a.name].is_indexed + with HDFStore(temp_file) as st: + st.append("df", df, data_columns=["A"]) + with tempfile.NamedTemporaryFile() as new_f: + with HDFStore(temp_file) as store: + with contextlib.closing( + store.copy(new_f.name, keys=None, propindexes=propindexes) + ) as tstore: + # check keys + keys = store.keys() + assert set(keys) == set(tstore.keys()) + # check indices & nrows + for k in tstore.keys(): + if tstore.get_storer(k).is_table: + new_t = tstore.get_storer(k) + orig_t = store.get_storer(k) + + assert orig_t.nrows == new_t.nrows + + # check propindixes + if propindexes: + for a in orig_t.axes: + if a.is_indexed: + assert new_t[a.name].is_indexed def test_duplicate_column_name(tmp_path, setup_path): diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index ab27fda8dcdf5..dccf13d15a6a0 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -86,12 +86,11 @@ def test_stringify_path_fspath(self): result = icom.stringify_path(p) assert result == "foo/bar.csv" - def test_stringify_file_and_path_like(self): + def test_stringify_file_and_path_like(self,temp_file): # GH 38125: do not stringify file objects that are also path-like fsspec = pytest.importorskip("fsspec") - with tm.ensure_clean() as path: - with fsspec.open(f"file://{path}", mode="wb") as fsspec_obj: - assert fsspec_obj == icom.stringify_path(fsspec_obj) + with fsspec.open(f"file://{temp_file}", mode="wb") as fsspec_obj: + assert fsspec_obj == icom.stringify_path(fsspec_obj) @pytest.mark.parametrize("path_type", [str, CustomFSPath, Path]) def test_infer_compression_from_path(self, compression_format, path_type): @@ -338,49 +337,47 @@ def test_read_fspath_all(self, reader, module, path, datapath): ("to_stata", {"time_stamp": pd.to_datetime("2019-01-01 00:00")}, "os"), ], ) - def test_write_fspath_all(self, writer_name, writer_kwargs, module): + def test_write_fspath_all(self, writer_name, writer_kwargs, module, tmp_path): if writer_name in ["to_latex"]: # uses Styler implementation pytest.importorskip("jinja2") - p1 = tm.ensure_clean("string") - p2 = tm.ensure_clean("fspath") + string = str(tmp_path / "string") + fspath = str(tmp_path / "fspath") df = pd.DataFrame({"A": [1, 2]}) - with p1 as string, p2 as fspath: - pytest.importorskip(module) - mypath = CustomFSPath(fspath) - writer = getattr(df, writer_name) - - writer(string, **writer_kwargs) - writer(mypath, **writer_kwargs) - with open(string, "rb") as f_str, open(fspath, "rb") as f_path: - if writer_name == "to_excel": - # binary representation of excel contains time creation - # data that causes flaky CI failures - result = pd.read_excel(f_str, **writer_kwargs) - expected = pd.read_excel(f_path, **writer_kwargs) - tm.assert_frame_equal(result, expected) - else: - result = f_str.read() - expected = f_path.read() - assert result == expected - - def test_write_fspath_hdf5(self): + pytest.importorskip(module) + mypath = CustomFSPath(fspath) + writer = getattr(df, writer_name) + + writer(string, **writer_kwargs) + writer(mypath, **writer_kwargs) + with open(string, "rb") as f_str, open(fspath, "rb") as f_path: + if writer_name == "to_excel": + # binary representation of excel contains time creation + # data that causes flaky CI failures + result = pd.read_excel(f_str, **writer_kwargs) + expected = pd.read_excel(f_path, **writer_kwargs) + tm.assert_frame_equal(result, expected) + else: + result = f_str.read() + expected = f_path.read() + assert result == expected + + def test_write_fspath_hdf5(self, tmp_path): # Same test as write_fspath_all, except HDF5 files aren't # necessarily byte-for-byte identical for a given dataframe, so we'll # have to read and compare equality pytest.importorskip("tables") df = pd.DataFrame({"A": [1, 2]}) - p1 = tm.ensure_clean("string") - p2 = tm.ensure_clean("fspath") + string = str(tmp_path / "string") + fspath = str(tmp_path / "fspath") - with p1 as string, p2 as fspath: - mypath = CustomFSPath(fspath) - df.to_hdf(mypath, key="bar") - df.to_hdf(string, key="bar") + mypath = CustomFSPath(fspath) + df.to_hdf(mypath, key="bar") + df.to_hdf(string, key="bar") - result = pd.read_hdf(fspath, key="bar") - expected = pd.read_hdf(string, key="bar") + result = pd.read_hdf(fspath, key="bar") + expected = pd.read_hdf(string, key="bar") tm.assert_frame_equal(result, expected) @@ -432,35 +429,33 @@ def test_next(self, mmap_file): with pytest.raises(StopIteration, match=r"^$"): next(wrapper) - def test_unknown_engine(self): - with tm.ensure_clean() as path: - df = pd.DataFrame( - 1.1 * np.arange(120).reshape((30, 4)), - columns=pd.Index(list("ABCD")), - index=pd.Index([f"i-{i}" for i in range(30)]), - ) - df.to_csv(path) - with pytest.raises(ValueError, match="Unknown engine"): - pd.read_csv(path, engine="pyt") - - def test_binary_mode(self): + def test_unknown_engine(self, temp_file): + df = pd.DataFrame( + 1.1 * np.arange(120).reshape((30, 4)), + columns=pd.Index(list("ABCD")), + index=pd.Index([f"i-{i}" for i in range(30)]), + ) + df.to_csv(temp_file) + with pytest.raises(ValueError, match="Unknown engine"): + pd.read_csv(temp_file, engine="pyt") + + def test_binary_mode(self, temp_file): """ 'encoding' shouldn't be passed to 'open' in binary mode. GH 35058 """ - with tm.ensure_clean() as path: - df = pd.DataFrame( - 1.1 * np.arange(120).reshape((30, 4)), - columns=pd.Index(list("ABCD")), - index=pd.Index([f"i-{i}" for i in range(30)]), - ) - df.to_csv(path, mode="w+b") - tm.assert_frame_equal(df, pd.read_csv(path, index_col=0)) + df = pd.DataFrame( + 1.1 * np.arange(120).reshape((30, 4)), + columns=pd.Index(list("ABCD")), + index=pd.Index([f"i-{i}" for i in range(30)]), + ) + df.to_csv(temp_file, mode="w+b") + tm.assert_frame_equal(df, pd.read_csv(temp_file, index_col=0)) @pytest.mark.parametrize("encoding", ["utf-16", "utf-32"]) @pytest.mark.parametrize("compression_", ["bz2", "xz"]) - def test_warning_missing_utf_bom(self, encoding, compression_): + def test_warning_missing_utf_bom(self, encoding, compression_, temp_file): """ bz2 and xz do not write the byte order mark (BOM) for utf-16/32. @@ -473,17 +468,16 @@ def test_warning_missing_utf_bom(self, encoding, compression_): columns=pd.Index(list("ABCD")), index=pd.Index([f"i-{i}" for i in range(30)]), ) - with tm.ensure_clean() as path: - with tm.assert_produces_warning(UnicodeWarning, match="byte order mark"): - df.to_csv(path, compression=compression_, encoding=encoding) + with tm.assert_produces_warning(UnicodeWarning, match="byte order mark"): + df.to_csv(temp_file, compression=compression_, encoding=encoding) - # reading should fail (otherwise we wouldn't need the warning) - msg = ( - r"UTF-\d+ stream does not start with BOM|" - r"'utf-\d+' codec can't decode byte" - ) - with pytest.raises(UnicodeError, match=msg): - pd.read_csv(path, compression=compression_, encoding=encoding) + # reading should fail (otherwise we wouldn't need the warning) + msg = ( + r"UTF-\d+ stream does not start with BOM|" + r"'utf-\d+' codec can't decode byte" + ) + with pytest.raises(UnicodeError, match=msg): + pd.read_csv(temp_file, compression=compression_, encoding=encoding) def test_is_fsspec_url(): @@ -514,38 +508,36 @@ def test_is_fsspec_url_chained(): @pytest.mark.parametrize("format", ["csv", "json"]) -def test_codecs_encoding(format): +def test_codecs_encoding(format, temp_file): # GH39247 expected = pd.DataFrame( 1.1 * np.arange(120).reshape((30, 4)), columns=pd.Index(list("ABCD")), index=pd.Index([f"i-{i}" for i in range(30)]), ) - with tm.ensure_clean() as path: - with open(path, mode="w", encoding="utf-8") as handle: - getattr(expected, f"to_{format}")(handle) - with open(path, encoding="utf-8") as handle: - if format == "csv": - df = pd.read_csv(handle, index_col=0) - else: - df = pd.read_json(handle) + with open(temp_file, mode="w", encoding="utf-8") as handle: + getattr(expected, f"to_{format}")(handle) + with open(temp_file, encoding="utf-8") as handle: + if format == "csv": + df = pd.read_csv(handle, index_col=0) + else: + df = pd.read_json(handle) tm.assert_frame_equal(expected, df) -def test_codecs_get_writer_reader(): +def test_codecs_get_writer_reader(temp_file): # GH39247 expected = pd.DataFrame( 1.1 * np.arange(120).reshape((30, 4)), columns=pd.Index(list("ABCD")), index=pd.Index([f"i-{i}" for i in range(30)]), ) - with tm.ensure_clean() as path: - with open(path, "wb") as handle: - with codecs.getwriter("utf-8")(handle) as encoded: - expected.to_csv(encoded) - with open(path, "rb") as handle: - with codecs.getreader("utf-8")(handle) as encoded: - df = pd.read_csv(encoded, index_col=0) + with open(temp_file, "wb") as handle: + with codecs.getwriter("utf-8")(handle) as encoded: + expected.to_csv(encoded) + with open(temp_file, "rb") as handle: + with codecs.getreader("utf-8")(handle) as encoded: + df = pd.read_csv(encoded, index_col=0) tm.assert_frame_equal(expected, df) @@ -572,7 +564,7 @@ def test_explicit_encoding(io_class, mode, msg): @pytest.mark.parametrize("encoding_errors", ["strict", "replace"]) @pytest.mark.parametrize("format", ["csv", "json"]) -def test_encoding_errors(encoding_errors, format): +def test_encoding_errors(encoding_errors, format, temp_file): # GH39450 msg = "'utf-8' codec can't decode byte" bad_encoding = b"\xe4" @@ -591,18 +583,17 @@ def test_encoding_errors(encoding_errors, format): + b'"}}' ) reader = partial(pd.read_json, orient="index") - with tm.ensure_clean() as path: - file = Path(path) - file.write_bytes(content) + file = Path(temp_file) + file.write_bytes(content) - if encoding_errors != "replace": - with pytest.raises(UnicodeDecodeError, match=msg): - reader(path, encoding_errors=encoding_errors) - else: - df = reader(path, encoding_errors=encoding_errors) - decoded = bad_encoding.decode(errors=encoding_errors) - expected = pd.DataFrame({decoded: [decoded]}, index=[decoded * 2]) - tm.assert_frame_equal(df, expected) + if encoding_errors != "replace": + with pytest.raises(UnicodeDecodeError, match=msg): + reader(temp_file, encoding_errors=encoding_errors) + else: + df = reader(temp_file, encoding_errors=encoding_errors) + decoded = bad_encoding.decode(errors=encoding_errors) + expected = pd.DataFrame({decoded: [decoded]}, index=[decoded * 2]) + tm.assert_frame_equal(df, expected) @pytest.mark.parametrize("encoding_errors", [0, None]) @@ -616,11 +607,10 @@ def test_encoding_errors_badtype(encoding_errors): reader(content) -def test_bad_encdoing_errors(): +def test_bad_encdoing_errors(temp_file): # GH 39777 - with tm.ensure_clean() as path: - with pytest.raises(LookupError, match="unknown error handler name"): - icom.get_handle(path, "w", errors="bad") + with pytest.raises(LookupError, match="unknown error handler name"): + icom.get_handle(temp_file, "w", errors="bad") @pytest.mark.skipif(WASM, reason="limited file system access on WASM") @@ -653,7 +643,7 @@ def close(self): @pytest.mark.parametrize("compression", [None, "infer"]) def test_read_csv_chained_url_no_error(compression): # GH 60100 - tar_file_path = "pandas/tests/io/data/tar/test-csv.tar" + tar_file_path = "data/tar/test-csv.tar" chained_file_url = f"tar://test.csv::file://{tar_file_path}" result = pd.read_csv(chained_file_url, compression=compression, sep=";")