diff --git a/pandas/tests/io/parser/common/test_file_buffer_url.py b/pandas/tests/io/parser/common/test_file_buffer_url.py index cef57318195ec..2777714dec8b2 100644 --- a/pandas/tests/io/parser/common/test_file_buffer_url.py +++ b/pandas/tests/io/parser/common/test_file_buffer_url.py @@ -97,25 +97,25 @@ def test_nonexistent_path(all_parsers): @pytest.mark.skipif(WASM, reason="limited file system access on WASM") @td.skip_if_windows # os.chmod does not work in windows -def test_no_permission(all_parsers): +def test_no_permission(all_parsers, temp_file): # GH 23784 parser = all_parsers msg = r"\[Errno 13\]" - with tm.ensure_clean() as path: - os.chmod(path, 0) # make file unreadable + path = temp_file + os.chmod(path, 0) # make file unreadable - # verify that this process cannot open the file (not running as sudo) - try: - with open(path, encoding="utf-8"): - pass - pytest.skip("Running as sudo.") - except PermissionError: + # verify that this process cannot open the file (not running as sudo) + try: + with open(path, encoding="utf-8"): pass + pytest.skip("Running as sudo.") + except PermissionError: + pass - with pytest.raises(PermissionError, match=msg) as e: - parser.read_csv(path) - assert path == e.value.filename + with pytest.raises(PermissionError, match=msg) as e: + parser.read_csv(path) + assert path == e.value.filename @pytest.mark.parametrize( @@ -269,19 +269,18 @@ def test_internal_eof_byte(all_parsers): tm.assert_frame_equal(result, expected) -def test_internal_eof_byte_to_file(all_parsers): +def test_internal_eof_byte_to_file(all_parsers, tmp_path): # see gh-16559 parser = all_parsers data = b'c1,c2\r\n"test \x1a test", test\r\n' expected = DataFrame([["test \x1a test", " test"]], columns=["c1", "c2"]) - path = f"__{uuid.uuid4()}__.csv" + path = tmp_path / f"__{uuid.uuid4()}__.csv" - with tm.ensure_clean(path) as path: - with open(path, "wb") as f: - f.write(data) + with open(path, "wb") as f: + f.write(data) - result = parser.read_csv(path) - tm.assert_frame_equal(result, expected) + result = parser.read_csv(path) + tm.assert_frame_equal(result, expected) def test_file_handle_string_io(all_parsers): @@ -372,7 +371,7 @@ def test_read_csv_file_handle(all_parsers, io_class, encoding): assert not handle.closed -def test_memory_map_compression(all_parsers, compression): +def test_memory_map_compression(all_parsers, compression, temp_file): """ Support memory map for compressed files. @@ -381,16 +380,16 @@ def test_memory_map_compression(all_parsers, compression): parser = all_parsers expected = DataFrame({"a": [1], "b": [2]}) - with tm.ensure_clean() as path: - expected.to_csv(path, index=False, compression=compression) + path = temp_file + expected.to_csv(path, index=False, compression=compression) - if parser.engine == "pyarrow": - msg = "The 'memory_map' option is not supported with the 'pyarrow' engine" - with pytest.raises(ValueError, match=msg): - parser.read_csv(path, memory_map=True, compression=compression) - return + if parser.engine == "pyarrow": + msg = "The 'memory_map' option is not supported with the 'pyarrow' engine" + with pytest.raises(ValueError, match=msg): + parser.read_csv(path, memory_map=True, compression=compression) + return - result = parser.read_csv(path, memory_map=True, compression=compression) + result = parser.read_csv(path, memory_map=True, compression=compression) tm.assert_frame_equal( result, @@ -442,12 +441,11 @@ def test_context_manageri_user_provided(all_parsers, datapath): @skip_pyarrow # ParserError: Empty CSV file -def test_file_descriptor_leak(all_parsers): +def test_file_descriptor_leak(all_parsers, temp_file): # GH 31488 parser = all_parsers - with tm.ensure_clean() as path: - with pytest.raises(EmptyDataError, match="No columns to parse from file"): - parser.read_csv(path) + with pytest.raises(EmptyDataError, match="No columns to parse from file"): + parser.read_csv(temp_file) def test_memory_map(all_parsers, csv_dir_path): diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py index 5cfefeb469e8a..8565c255ac067 100644 --- a/pandas/tests/io/pytables/test_store.py +++ b/pandas/tests/io/pytables/test_store.py @@ -41,22 +41,20 @@ tables = pytest.importorskip("tables") -def test_context(setup_path): - with tm.ensure_clean(setup_path) as path: - try: - with HDFStore(path) as tbl: - raise ValueError("blah") - except ValueError: - pass - with tm.ensure_clean(setup_path) as path: - with HDFStore(path) as tbl: - tbl["a"] = DataFrame( - 1.1 * np.arange(120).reshape((30, 4)), - columns=Index(list("ABCD"), dtype=object), - index=Index([f"i-{i}" for i in range(30)], dtype=object), - ) - assert len(tbl) == 1 - assert type(tbl["a"]) == DataFrame +def test_context(temp_file): + try: + with HDFStore(temp_file) as tbl: + raise ValueError("blah") + except ValueError: + pass + with HDFStore(temp_file) as tbl: + tbl["a"] = DataFrame( + 1.1 * np.arange(120).reshape((30, 4)), + columns=Index(list("ABCD"), dtype=object), + index=Index([f"i-{i}" for i in range(30)], dtype=object), + ) + assert len(tbl) == 1 + assert type(tbl["a"]) == DataFrame def test_no_track_times(tmp_path, setup_path): @@ -971,37 +969,36 @@ def test_pickle_path_localpath(): @pytest.mark.parametrize("propindexes", [True, False]) -def test_copy(propindexes): +def test_copy(propindexes, temp_file): df = DataFrame( 1.1 * np.arange(120).reshape((30, 4)), columns=Index(list("ABCD")), index=Index([f"i-{i}" for i in range(30)]), ) - with tm.ensure_clean() as path: - with HDFStore(path) as st: - st.append("df", df, data_columns=["A"]) - with tempfile.NamedTemporaryFile() as new_f: - with HDFStore(path) as store: - with contextlib.closing( - store.copy(new_f.name, keys=None, propindexes=propindexes) - ) as tstore: - # check keys - keys = store.keys() - assert set(keys) == set(tstore.keys()) - # check indices & nrows - for k in tstore.keys(): - if tstore.get_storer(k).is_table: - new_t = tstore.get_storer(k) - orig_t = store.get_storer(k) - - assert orig_t.nrows == new_t.nrows - - # check propindixes - if propindexes: - for a in orig_t.axes: - if a.is_indexed: - assert new_t[a.name].is_indexed + with HDFStore(temp_file) as st: + st.append("df", df, data_columns=["A"]) + with tempfile.NamedTemporaryFile() as new_f: + with HDFStore(temp_file) as store: + with contextlib.closing( + store.copy(new_f.name, keys=None, propindexes=propindexes) + ) as tstore: + # check keys + keys = store.keys() + assert set(keys) == set(tstore.keys()) + # check indices & nrows + for k in tstore.keys(): + if tstore.get_storer(k).is_table: + new_t = tstore.get_storer(k) + orig_t = store.get_storer(k) + + assert orig_t.nrows == new_t.nrows + + # check propindixes + if propindexes: + for a in orig_t.axes: + if a.is_indexed: + assert new_t[a.name].is_indexed def test_duplicate_column_name(tmp_path, setup_path): diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index ab27fda8dcdf5..20b3103b74b9b 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -86,12 +86,11 @@ def test_stringify_path_fspath(self): result = icom.stringify_path(p) assert result == "foo/bar.csv" - def test_stringify_file_and_path_like(self): + def test_stringify_file_and_path_like(self, temp_file): # GH 38125: do not stringify file objects that are also path-like fsspec = pytest.importorskip("fsspec") - with tm.ensure_clean() as path: - with fsspec.open(f"file://{path}", mode="wb") as fsspec_obj: - assert fsspec_obj == icom.stringify_path(fsspec_obj) + with fsspec.open(f"file://{temp_file}", mode="wb") as fsspec_obj: + assert fsspec_obj == icom.stringify_path(fsspec_obj) @pytest.mark.parametrize("path_type", [str, CustomFSPath, Path]) def test_infer_compression_from_path(self, compression_format, path_type): @@ -338,49 +337,53 @@ def test_read_fspath_all(self, reader, module, path, datapath): ("to_stata", {"time_stamp": pd.to_datetime("2019-01-01 00:00")}, "os"), ], ) - def test_write_fspath_all(self, writer_name, writer_kwargs, module): + def test_write_fspath_all(self, writer_name, writer_kwargs, module, tmp_path): if writer_name in ["to_latex"]: # uses Styler implementation pytest.importorskip("jinja2") - p1 = tm.ensure_clean("string") - p2 = tm.ensure_clean("fspath") + p1 = tmp_path / "string" + p2 = tmp_path / "fspath" df = pd.DataFrame({"A": [1, 2]}) - with p1 as string, p2 as fspath: - pytest.importorskip(module) - mypath = CustomFSPath(fspath) - writer = getattr(df, writer_name) - - writer(string, **writer_kwargs) - writer(mypath, **writer_kwargs) - with open(string, "rb") as f_str, open(fspath, "rb") as f_path: - if writer_name == "to_excel": - # binary representation of excel contains time creation - # data that causes flaky CI failures - result = pd.read_excel(f_str, **writer_kwargs) - expected = pd.read_excel(f_path, **writer_kwargs) - tm.assert_frame_equal(result, expected) - else: - result = f_str.read() - expected = f_path.read() - assert result == expected - - def test_write_fspath_hdf5(self): + string = str(p1) + fspath = str(p2) # if hasattr(p2, '__fspath__') else p2 + + pytest.importorskip(module) + mypath = CustomFSPath(fspath) + writer = getattr(df, writer_name) + + writer(string, **writer_kwargs) + writer(mypath, **writer_kwargs) + with open(string, "rb") as f_str, open(fspath, "rb") as f_path: + if writer_name == "to_excel": + # binary representation of excel contains time creation + # data that causes flaky CI failures + result = pd.read_excel(f_str, **writer_kwargs) + expected = pd.read_excel(f_path, **writer_kwargs) + tm.assert_frame_equal(result, expected) + else: + result = f_str.read() + expected = f_path.read() + assert result == expected + + def test_write_fspath_hdf5(self, tmp_path): # Same test as write_fspath_all, except HDF5 files aren't # necessarily byte-for-byte identical for a given dataframe, so we'll # have to read and compare equality pytest.importorskip("tables") df = pd.DataFrame({"A": [1, 2]}) - p1 = tm.ensure_clean("string") - p2 = tm.ensure_clean("fspath") + p1 = tmp_path / "string" + p2 = tmp_path / "fspath" + + string = str(p1) + fspath = str(p2) - with p1 as string, p2 as fspath: - mypath = CustomFSPath(fspath) - df.to_hdf(mypath, key="bar") - df.to_hdf(string, key="bar") + mypath = CustomFSPath(fspath) + df.to_hdf(mypath, key="bar") + df.to_hdf(string, key="bar") - result = pd.read_hdf(fspath, key="bar") - expected = pd.read_hdf(string, key="bar") + result = pd.read_hdf(fspath, key="bar") + expected = pd.read_hdf(string, key="bar") tm.assert_frame_equal(result, expected) @@ -432,35 +435,35 @@ def test_next(self, mmap_file): with pytest.raises(StopIteration, match=r"^$"): next(wrapper) - def test_unknown_engine(self): - with tm.ensure_clean() as path: - df = pd.DataFrame( - 1.1 * np.arange(120).reshape((30, 4)), - columns=pd.Index(list("ABCD")), - index=pd.Index([f"i-{i}" for i in range(30)]), - ) - df.to_csv(path) - with pytest.raises(ValueError, match="Unknown engine"): - pd.read_csv(path, engine="pyt") - - def test_binary_mode(self): + def test_unknown_engine(self, temp_file): + path = temp_file + df = pd.DataFrame( + 1.1 * np.arange(120).reshape((30, 4)), + columns=pd.Index(list("ABCD")), + index=pd.Index([f"i-{i}" for i in range(30)]), + ) + df.to_csv(path) + with pytest.raises(ValueError, match="Unknown engine"): + pd.read_csv(path, engine="pyt") + + def test_binary_mode(self, temp_file): """ 'encoding' shouldn't be passed to 'open' in binary mode. GH 35058 """ - with tm.ensure_clean() as path: - df = pd.DataFrame( - 1.1 * np.arange(120).reshape((30, 4)), - columns=pd.Index(list("ABCD")), - index=pd.Index([f"i-{i}" for i in range(30)]), - ) - df.to_csv(path, mode="w+b") - tm.assert_frame_equal(df, pd.read_csv(path, index_col=0)) + path = temp_file + df = pd.DataFrame( + 1.1 * np.arange(120).reshape((30, 4)), + columns=pd.Index(list("ABCD")), + index=pd.Index([f"i-{i}" for i in range(30)]), + ) + df.to_csv(path, mode="w+b") + tm.assert_frame_equal(df, pd.read_csv(path, index_col=0)) @pytest.mark.parametrize("encoding", ["utf-16", "utf-32"]) @pytest.mark.parametrize("compression_", ["bz2", "xz"]) - def test_warning_missing_utf_bom(self, encoding, compression_): + def test_warning_missing_utf_bom(self, encoding, compression_, temp_file): """ bz2 and xz do not write the byte order mark (BOM) for utf-16/32. @@ -473,17 +476,17 @@ def test_warning_missing_utf_bom(self, encoding, compression_): columns=pd.Index(list("ABCD")), index=pd.Index([f"i-{i}" for i in range(30)]), ) - with tm.ensure_clean() as path: - with tm.assert_produces_warning(UnicodeWarning, match="byte order mark"): - df.to_csv(path, compression=compression_, encoding=encoding) - - # reading should fail (otherwise we wouldn't need the warning) - msg = ( - r"UTF-\d+ stream does not start with BOM|" - r"'utf-\d+' codec can't decode byte" - ) - with pytest.raises(UnicodeError, match=msg): - pd.read_csv(path, compression=compression_, encoding=encoding) + path = temp_file + with tm.assert_produces_warning(UnicodeWarning, match="byte order mark"): + df.to_csv(path, compression=compression_, encoding=encoding) + + # reading should fail (otherwise we wouldn't need the warning) + msg = ( + r"UTF-\d+ stream does not start with BOM|" + r"'utf-\d+' codec can't decode byte" + ) + with pytest.raises(UnicodeError, match=msg): + pd.read_csv(path, compression=compression_, encoding=encoding) def test_is_fsspec_url(): @@ -514,38 +517,39 @@ def test_is_fsspec_url_chained(): @pytest.mark.parametrize("format", ["csv", "json"]) -def test_codecs_encoding(format): +def test_codecs_encoding(format, temp_file): # GH39247 expected = pd.DataFrame( 1.1 * np.arange(120).reshape((30, 4)), columns=pd.Index(list("ABCD")), index=pd.Index([f"i-{i}" for i in range(30)]), ) - with tm.ensure_clean() as path: - with open(path, mode="w", encoding="utf-8") as handle: - getattr(expected, f"to_{format}")(handle) - with open(path, encoding="utf-8") as handle: - if format == "csv": - df = pd.read_csv(handle, index_col=0) - else: - df = pd.read_json(handle) + + path = temp_file + with open(path, mode="w", encoding="utf-8") as handle: + getattr(expected, f"to_{format}")(handle) + with open(path, encoding="utf-8") as handle: + if format == "csv": + df = pd.read_csv(handle, index_col=0) + else: + df = pd.read_json(handle) tm.assert_frame_equal(expected, df) -def test_codecs_get_writer_reader(): +def test_codecs_get_writer_reader(temp_file): # GH39247 expected = pd.DataFrame( 1.1 * np.arange(120).reshape((30, 4)), columns=pd.Index(list("ABCD")), index=pd.Index([f"i-{i}" for i in range(30)]), ) - with tm.ensure_clean() as path: - with open(path, "wb") as handle: - with codecs.getwriter("utf-8")(handle) as encoded: - expected.to_csv(encoded) - with open(path, "rb") as handle: - with codecs.getreader("utf-8")(handle) as encoded: - df = pd.read_csv(encoded, index_col=0) + path = temp_file + with open(path, "wb") as handle: + with codecs.getwriter("utf-8")(handle) as encoded: + expected.to_csv(encoded) + with open(path, "rb") as handle: + with codecs.getreader("utf-8")(handle) as encoded: + df = pd.read_csv(encoded, index_col=0) tm.assert_frame_equal(expected, df) @@ -572,7 +576,7 @@ def test_explicit_encoding(io_class, mode, msg): @pytest.mark.parametrize("encoding_errors", ["strict", "replace"]) @pytest.mark.parametrize("format", ["csv", "json"]) -def test_encoding_errors(encoding_errors, format): +def test_encoding_errors(encoding_errors, format, temp_file): # GH39450 msg = "'utf-8' codec can't decode byte" bad_encoding = b"\xe4" @@ -591,18 +595,18 @@ def test_encoding_errors(encoding_errors, format): + b'"}}' ) reader = partial(pd.read_json, orient="index") - with tm.ensure_clean() as path: - file = Path(path) - file.write_bytes(content) + path = temp_file + file = Path(path) + file.write_bytes(content) - if encoding_errors != "replace": - with pytest.raises(UnicodeDecodeError, match=msg): - reader(path, encoding_errors=encoding_errors) - else: - df = reader(path, encoding_errors=encoding_errors) - decoded = bad_encoding.decode(errors=encoding_errors) - expected = pd.DataFrame({decoded: [decoded]}, index=[decoded * 2]) - tm.assert_frame_equal(df, expected) + if encoding_errors != "replace": + with pytest.raises(UnicodeDecodeError, match=msg): + reader(path, encoding_errors=encoding_errors) + else: + df = reader(path, encoding_errors=encoding_errors) + decoded = bad_encoding.decode(errors=encoding_errors) + expected = pd.DataFrame({decoded: [decoded]}, index=[decoded * 2]) + tm.assert_frame_equal(df, expected) @pytest.mark.parametrize("encoding_errors", [0, None]) @@ -616,11 +620,10 @@ def test_encoding_errors_badtype(encoding_errors): reader(content) -def test_bad_encdoing_errors(): +def test_bad_encdoing_errors(temp_file): # GH 39777 - with tm.ensure_clean() as path: - with pytest.raises(LookupError, match="unknown error handler name"): - icom.get_handle(path, "w", errors="bad") + with pytest.raises(LookupError, match="unknown error handler name"): + icom.get_handle(temp_file, "w", errors="bad") @pytest.mark.skipif(WASM, reason="limited file system access on WASM") diff --git a/pandas/tests/io/test_feather.py b/pandas/tests/io/test_feather.py index 14643587465ea..4df716592e467 100644 --- a/pandas/tests/io/test_feather.py +++ b/pandas/tests/io/test_feather.py @@ -48,9 +48,10 @@ def check_round_trip( if expected is None: expected = df.copy() - to_feather(df, temp_file, **write_kwargs) + path = temp_file + to_feather(df, path, **write_kwargs) - result = read_feather(temp_file, **read_kwargs) + result = read_feather(path, **read_kwargs) tm.assert_frame_equal(result, expected) @@ -229,15 +230,17 @@ def test_int_columns_and_index(self, temp_file): df = pd.DataFrame({"a": [1, 2, 3]}, index=pd.Index([3, 4, 5], name="test")) self.check_round_trip(df, temp_file) - def test_invalid_dtype_backend(self, temp_file): + def test_invalid_dtype_backend(self, tmp_path): msg = ( "dtype_backend numpy is invalid, only 'numpy_nullable' and " "'pyarrow' are allowed." ) df = pd.DataFrame({"int": list(range(1, 4))}) - df.to_feather(temp_file) + + path = tmp_path / "tmp.feather" + df.to_feather(path) with pytest.raises(ValueError, match=msg): - read_feather(temp_file, dtype_backend="numpy") + read_feather(path, dtype_backend="numpy") def test_string_inference(self, tmp_path, using_infer_string): # GH#54431