From 46e36042f104d8bb830015e2703048162fbf2376 Mon Sep 17 00:00:00 2001
From: Harshit Pande <pandeconscious@users.noreply.github.com>
Date: Wed, 24 Sep 2025 17:47:21 -0400
Subject: [PATCH 01/10] Update test_to_csv.py

_return_result_expected updated to temp file fixture
---
 pandas/tests/frame/methods/test_to_csv.py | 39 ++++++++++++-----------
 1 file changed, 21 insertions(+), 18 deletions(-)

diff --git a/pandas/tests/frame/methods/test_to_csv.py b/pandas/tests/frame/methods/test_to_csv.py
index 34d120145b381..0cf5330c01f67 100644
--- a/pandas/tests/frame/methods/test_to_csv.py
+++ b/pandas/tests/frame/methods/test_to_csv.py
@@ -248,6 +248,7 @@ def _return_result_expected(
         self,
         df,
         chunksize,
+        temp_file,
         r_dtype=None,
         c_dtype=None,
         rnlvl=None,
@@ -260,15 +261,16 @@ def _return_result_expected(
                 kwargs["index_col"] = list(range(rnlvl))
             kwargs["header"] = list(range(cnlvl))
 
-            with tm.ensure_clean("__tmp_to_csv_moar__") as path:
-                df.to_csv(path, encoding="utf8", chunksize=chunksize)
-                recons = self.read_csv(path, **kwargs)
+            
+            path = str(temp_file)
+            df.to_csv(path, encoding="utf8", chunksize=chunksize)
+            recons = self.read_csv(path, **kwargs)
         else:
             kwargs["header"] = 0
 
-            with tm.ensure_clean("__tmp_to_csv_moar__") as path:
-                df.to_csv(path, encoding="utf8", chunksize=chunksize)
-                recons = self.read_csv(path, **kwargs)
+            path  = str(temp_file)
+            df.to_csv(path, encoding="utf8", chunksize=chunksize)
+            recons = self.read_csv(path, **kwargs)
 
         def _to_uni(x):
             if not isinstance(x, str):
@@ -353,13 +355,13 @@ def _to_uni(x):
     @pytest.mark.parametrize(
         "nrows", [2, 10, 99, 100, 101, 102, 198, 199, 200, 201, 202, 249, 250, 251]
     )
-    def test_to_csv_nrows(self, nrows):
+    def test_to_csv_nrows(self, nrows, temp_file):
         df = DataFrame(
             np.ones((nrows, 4)),
             index=date_range("2020-01-01", periods=nrows),
             columns=Index(list("abcd"), dtype=object),
         )
-        result, expected = self._return_result_expected(df, 1000, "dt", "s")
+        result, expected = self._return_result_expected(df, 1000, temp_file, "dt", "s")
         expected.index = expected.index.astype("M8[ns]")
         tm.assert_frame_equal(result, expected, check_names=False)
 
@@ -372,7 +374,7 @@ def test_to_csv_nrows(self, nrows):
     )
     @pytest.mark.parametrize("ncols", [1, 2, 3, 4])
     @pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
-    def test_to_csv_idx_types(self, nrows, r_idx_type, c_idx_type, ncols):
+    def test_to_csv_idx_types(self, nrows, r_idx_type, c_idx_type, ncols, temp_file):
         axes = {
             "i": lambda n: Index(np.arange(n), dtype=np.int64),
             "s": lambda n: Index([f"{i}_{chr(i)}" for i in range(97, 97 + n)]),
@@ -387,6 +389,7 @@ def test_to_csv_idx_types(self, nrows, r_idx_type, c_idx_type, ncols):
         result, expected = self._return_result_expected(
             df,
             1000,
+            temp_file,
             r_idx_type,
             c_idx_type,
         )
@@ -401,13 +404,13 @@ def test_to_csv_idx_types(self, nrows, r_idx_type, c_idx_type, ncols):
         "nrows", [10, 98, 99, 100, 101, 102, 198, 199, 200, 201, 202, 249, 250, 251]
     )
     @pytest.mark.parametrize("ncols", [1, 2, 3, 4])
-    def test_to_csv_idx_ncols(self, nrows, ncols):
+    def test_to_csv_idx_ncols(self, nrows, ncols, temp_file):
         df = DataFrame(
             np.ones((nrows, ncols)),
             index=Index([f"i-{i}" for i in range(nrows)], name="a"),
             columns=Index([f"i-{i}" for i in range(ncols)], name="a"),
         )
-        result, expected = self._return_result_expected(df, 1000)
+        result, expected = self._return_result_expected(df, 1000, temp_file)
         tm.assert_frame_equal(result, expected, check_names=False)
 
     @pytest.mark.slow
@@ -427,17 +430,17 @@ def test_to_csv_dup_cols(self, nrows):
         ix[-2:] = ["rdupe", "rdupe"]
         df.index = ix
         df.columns = cols
-        result, expected = self._return_result_expected(df, 1000, dupe_col=True)
+        result, expected = self._return_result_expected(df, 1000, temp_file, dupe_col=True)
         tm.assert_frame_equal(result, expected, check_names=False)
 
     @pytest.mark.slow
-    def test_to_csv_empty(self):
+    def test_to_csv_empty(self, temp_file):
         df = DataFrame(index=np.arange(10, dtype=np.int64))
-        result, expected = self._return_result_expected(df, 1000)
+        result, expected = self._return_result_expected(df, 1000, temp_file)
         tm.assert_frame_equal(result, expected, check_column_type=False)
 
     @pytest.mark.slow
-    def test_to_csv_chunksize(self):
+    def test_to_csv_chunksize(self, temp_file):
         chunksize = 1000
         rows = chunksize // 2 + 1
         df = DataFrame(
@@ -445,7 +448,7 @@ def test_to_csv_chunksize(self):
             columns=Index(list("ab")),
             index=MultiIndex.from_arrays([range(rows) for _ in range(2)]),
         )
-        result, expected = self._return_result_expected(df, chunksize, rnlvl=2)
+        result, expected = self._return_result_expected(df, chunksize, temp_file, rnlvl=2)
         tm.assert_frame_equal(result, expected, check_names=False)
 
     @pytest.mark.slow
@@ -461,7 +464,7 @@ def test_to_csv_chunksize(self):
             [{"r_idx_nlevels": 2, "c_idx_nlevels": 2}, {"rnlvl": 2, "cnlvl": 2}],
         ],
     )
-    def test_to_csv_params(self, nrows, df_params, func_params, ncols):
+    def test_to_csv_params(self, nrows, df_params, func_params, ncols, temp_file):
         if df_params.get("r_idx_nlevels"):
             index = MultiIndex.from_arrays(
                 [f"i-{i}" for i in range(nrows)]
@@ -478,7 +481,7 @@ def test_to_csv_params(self, nrows, df_params, func_params, ncols):
         else:
             columns = Index([f"i-{i}" for i in range(ncols)])
         df = DataFrame(np.ones((nrows, ncols)), index=index, columns=columns)
-        result, expected = self._return_result_expected(df, 1000, **func_params)
+        result, expected = self._return_result_expected(df, 1000, temp_file, **func_params)
         tm.assert_frame_equal(result, expected, check_names=False)
 
     def test_to_csv_from_csv_w_some_infs(self, temp_file, float_frame):

From 37cc958537146f3a07688cae34f60aab78c36168 Mon Sep 17 00:00:00 2001
From: Harshit Pande <pandeconscious@users.noreply.github.com>
Date: Wed, 24 Sep 2025 17:48:57 -0400
Subject: [PATCH 02/10] Update test_to_csv.py

fix missed temp file
---
 pandas/tests/frame/methods/test_to_csv.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/frame/methods/test_to_csv.py b/pandas/tests/frame/methods/test_to_csv.py
index 0cf5330c01f67..84455076b60ba 100644
--- a/pandas/tests/frame/methods/test_to_csv.py
+++ b/pandas/tests/frame/methods/test_to_csv.py
@@ -415,7 +415,7 @@ def test_to_csv_idx_ncols(self, nrows, ncols, temp_file):
 
     @pytest.mark.slow
     @pytest.mark.parametrize("nrows", [10, 98, 99, 100, 101, 102])
-    def test_to_csv_dup_cols(self, nrows):
+    def test_to_csv_dup_cols(self, nrows, temp_file):
         df = DataFrame(
             np.ones((nrows, 3)),
             index=Index([f"i-{i}" for i in range(nrows)], name="a"),

From 889f3d908c9be8164c66651b684c48bc2384e7bb Mon Sep 17 00:00:00 2001
From: Harshit Pande <pandeconscious@users.noreply.github.com>
Date: Fri, 26 Sep 2025 12:16:18 +0000
Subject: [PATCH 03/10] temp file instead of ensure clean

---
 pandas/tests/frame/methods/test_to_csv.py | 249 ++++++++-------
 pandas/tests/io/formats/test_to_csv.py    | 353 +++++++++++-----------
 pandas/tests/io/test_parquet.py           | 284 ++++++++---------
 3 files changed, 449 insertions(+), 437 deletions(-)

diff --git a/pandas/tests/frame/methods/test_to_csv.py b/pandas/tests/frame/methods/test_to_csv.py
index 84455076b60ba..13f13c70ff748 100644
--- a/pandas/tests/frame/methods/test_to_csv.py
+++ b/pandas/tests/frame/methods/test_to_csv.py
@@ -261,14 +261,13 @@ def _return_result_expected(
                 kwargs["index_col"] = list(range(rnlvl))
             kwargs["header"] = list(range(cnlvl))
 
-            
             path = str(temp_file)
             df.to_csv(path, encoding="utf8", chunksize=chunksize)
             recons = self.read_csv(path, **kwargs)
         else:
             kwargs["header"] = 0
 
-            path  = str(temp_file)
+            path = str(temp_file)
             df.to_csv(path, encoding="utf8", chunksize=chunksize)
             recons = self.read_csv(path, **kwargs)
 
@@ -430,7 +429,9 @@ def test_to_csv_dup_cols(self, nrows, temp_file):
         ix[-2:] = ["rdupe", "rdupe"]
         df.index = ix
         df.columns = cols
-        result, expected = self._return_result_expected(df, 1000, temp_file, dupe_col=True)
+        result, expected = self._return_result_expected(
+            df, 1000, temp_file, dupe_col=True
+        )
         tm.assert_frame_equal(result, expected, check_names=False)
 
     @pytest.mark.slow
@@ -448,7 +449,9 @@ def test_to_csv_chunksize(self, temp_file):
             columns=Index(list("ab")),
             index=MultiIndex.from_arrays([range(rows) for _ in range(2)]),
         )
-        result, expected = self._return_result_expected(df, chunksize, temp_file, rnlvl=2)
+        result, expected = self._return_result_expected(
+            df, chunksize, temp_file, rnlvl=2
+        )
         tm.assert_frame_equal(result, expected, check_names=False)
 
     @pytest.mark.slow
@@ -481,7 +484,9 @@ def test_to_csv_params(self, nrows, df_params, func_params, ncols, temp_file):
         else:
             columns = Index([f"i-{i}" for i in range(ncols)])
         df = DataFrame(np.ones((nrows, ncols)), index=index, columns=columns)
-        result, expected = self._return_result_expected(df, 1000, temp_file, **func_params)
+        result, expected = self._return_result_expected(
+            df, 1000, temp_file, **func_params
+        )
         tm.assert_frame_equal(result, expected, check_names=False)
 
     def test_to_csv_from_csv_w_some_infs(self, temp_file, float_frame):
@@ -598,108 +603,104 @@ def test_to_csv_multiindex(self, temp_file, float_frame, datetime_frame):
         # needed if setUp becomes class method
         datetime_frame.index = old_index
 
-        with tm.ensure_clean("__tmp_to_csv_multiindex__") as path:
-            # GH3571, GH1651, GH3141
-
-            def _make_frame(names=None):
-                if names is True:
-                    names = ["first", "second"]
-                return DataFrame(
-                    np.random.default_rng(2).integers(0, 10, size=(3, 3)),
-                    columns=MultiIndex.from_tuples(
-                        [("bah", "foo"), ("bah", "bar"), ("ban", "baz")], names=names
-                    ),
-                    dtype="int64",
-                )
-
-            # column & index are multi-index
-            df = DataFrame(
-                np.ones((5, 3)),
-                columns=MultiIndex.from_arrays(
-                    [[f"i-{i}" for i in range(3)] for _ in range(4)], names=list("abcd")
-                ),
-                index=MultiIndex.from_arrays(
-                    [[f"i-{i}" for i in range(5)] for _ in range(2)], names=list("ab")
-                ),
-            )
-            df.to_csv(path)
-            result = read_csv(path, header=[0, 1, 2, 3], index_col=[0, 1])
-            tm.assert_frame_equal(df, result)
-
-            # column is mi
-            df = DataFrame(
-                np.ones((5, 3)),
-                columns=MultiIndex.from_arrays(
-                    [[f"i-{i}" for i in range(3)] for _ in range(4)], names=list("abcd")
+        def _make_frame(names=None):
+            if names is True:
+                names = ["first", "second"]
+            return DataFrame(
+                np.random.default_rng(2).integers(0, 10, size=(3, 3)),
+                columns=MultiIndex.from_tuples(
+                    [("bah", "foo"), ("bah", "bar"), ("ban", "baz")], names=names
                 ),
+                dtype="int64",
             )
-            df.to_csv(path)
-            result = read_csv(path, header=[0, 1, 2, 3], index_col=0)
-            tm.assert_frame_equal(df, result)
-
-            # dup column names?
-            df = DataFrame(
-                np.ones((5, 3)),
-                columns=MultiIndex.from_arrays(
-                    [[f"i-{i}" for i in range(3)] for _ in range(4)], names=list("abcd")
-                ),
-                index=MultiIndex.from_arrays(
-                    [[f"i-{i}" for i in range(5)] for _ in range(3)], names=list("abc")
-                ),
-            )
-            df.to_csv(path)
-            result = read_csv(path, header=[0, 1, 2, 3], index_col=[0, 1, 2])
-            tm.assert_frame_equal(df, result)
-
-            # writing with no index
-            df = _make_frame()
-            df.to_csv(path, index=False)
-            result = read_csv(path, header=[0, 1])
-            tm.assert_frame_equal(df, result)
-
-            # we lose the names here
-            df = _make_frame(True)
-            df.to_csv(path, index=False)
-            result = read_csv(path, header=[0, 1])
-            assert com.all_none(*result.columns.names)
-            result.columns.names = df.columns.names
-            tm.assert_frame_equal(df, result)
-
-            # whatsnew example
-            df = _make_frame()
-            df.to_csv(path)
-            result = read_csv(path, header=[0, 1], index_col=[0])
-            tm.assert_frame_equal(df, result)
-
-            df = _make_frame(True)
-            df.to_csv(path)
-            result = read_csv(path, header=[0, 1], index_col=[0])
-            tm.assert_frame_equal(df, result)
-
-            # invalid options
-            df = _make_frame(True)
-            df.to_csv(path)
-
-            for i in [6, 7]:
-                msg = f"len of {i}, but only 5 lines in file"
-                with pytest.raises(ParserError, match=msg):
-                    read_csv(path, header=list(range(i)), index_col=0)
-
-            # write with cols
-            msg = "cannot specify cols with a MultiIndex"
-            with pytest.raises(TypeError, match=msg):
-                df.to_csv(path, columns=["foo", "bar"])
-
-        with tm.ensure_clean("__tmp_to_csv_multiindex__") as path:
-            # empty
-            tsframe[:0].to_csv(path)
-            recons = self.read_csv(path)
-
-            exp = tsframe[:0]
-            exp.index = []
-
-            tm.assert_index_equal(recons.columns, exp.columns)
-            assert len(recons) == 0
+
+        # column & index are multi-index
+        df = DataFrame(
+            np.ones((5, 3)),
+            columns=MultiIndex.from_arrays(
+                [[f"i-{i}" for i in range(3)] for _ in range(4)], names=list("abcd")
+            ),
+            index=MultiIndex.from_arrays(
+                [[f"i-{i}" for i in range(5)] for _ in range(2)], names=list("ab")
+            ),
+        )
+        df.to_csv(path)
+        result = read_csv(path, header=[0, 1, 2, 3], index_col=[0, 1])
+        tm.assert_frame_equal(df, result)
+
+        # column is mi
+        df = DataFrame(
+            np.ones((5, 3)),
+            columns=MultiIndex.from_arrays(
+                [[f"i-{i}" for i in range(3)] for _ in range(4)], names=list("abcd")
+            ),
+        )
+        df.to_csv(path)
+        result = read_csv(path, header=[0, 1, 2, 3], index_col=0)
+        tm.assert_frame_equal(df, result)
+
+        # dup column names?
+        df = DataFrame(
+            np.ones((5, 3)),
+            columns=MultiIndex.from_arrays(
+                [[f"i-{i}" for i in range(3)] for _ in range(4)], names=list("abcd")
+            ),
+            index=MultiIndex.from_arrays(
+                [[f"i-{i}" for i in range(5)] for _ in range(3)], names=list("abc")
+            ),
+        )
+        df.to_csv(path)
+        result = read_csv(path, header=[0, 1, 2, 3], index_col=[0, 1, 2])
+        tm.assert_frame_equal(df, result)
+
+        # writing with no index
+        df = _make_frame()
+        df.to_csv(path, index=False)
+        result = read_csv(path, header=[0, 1])
+        tm.assert_frame_equal(df, result)
+
+        # we lose the names here
+        df = _make_frame(True)
+        df.to_csv(path, index=False)
+        result = read_csv(path, header=[0, 1])
+        assert com.all_none(*result.columns.names)
+        result.columns.names = df.columns.names
+        tm.assert_frame_equal(df, result)
+
+        # whatsnew example
+        df = _make_frame()
+        df.to_csv(path)
+        result = read_csv(path, header=[0, 1], index_col=[0])
+        tm.assert_frame_equal(df, result)
+
+        df = _make_frame(True)
+        df.to_csv(path)
+        result = read_csv(path, header=[0, 1], index_col=[0])
+        tm.assert_frame_equal(df, result)
+
+        # invalid options
+        df = _make_frame(True)
+        df.to_csv(path)
+
+        for i in [6, 7]:
+            msg = f"len of {i}, but only 5 lines in file"
+            with pytest.raises(ParserError, match=msg):
+                read_csv(path, header=list(range(i)), index_col=0)
+
+        # write with cols
+        msg = "cannot specify cols with a MultiIndex"
+        with pytest.raises(TypeError, match=msg):
+            df.to_csv(path, columns=["foo", "bar"])
+
+        # empty
+        tsframe[:0].to_csv(path)
+        recons = self.read_csv(path)
+
+        exp = tsframe[:0]
+        exp.index = []
+
+        tm.assert_index_equal(recons.columns, exp.columns)
+        assert len(recons) == 0
 
     def test_to_csv_interval_index(self, temp_file, using_infer_string):
         # GH 28210
@@ -811,16 +812,15 @@ def test_to_csv_dups_cols(self, temp_file):
 
         df.columns = [0, 1, 2] * 5
 
-        with tm.ensure_clean() as filename:
-            df.to_csv(filename)
-            result = read_csv(filename, index_col=0)
+        df.to_csv(path)
+        result = read_csv(path, index_col=0)
 
-            # date cols
-            for i in ["0.4", "1.4", "2.4"]:
-                result[i] = to_datetime(result[i])
+        # date cols
+        for i in ["0.4", "1.4", "2.4"]:
+            result[i] = to_datetime(result[i])
 
-            result.columns = df.columns
-            tm.assert_frame_equal(result, df)
+        result.columns = df.columns
+        tm.assert_frame_equal(result, df)
 
     def test_to_csv_dups_cols2(self, temp_file):
         # GH3457
@@ -1200,18 +1200,17 @@ def test_to_csv_with_dst_transitions_with_pickle(self, start, end, temp_file):
         idx = idx._with_freq(None)  # freq does not round-trip
         idx._data._freq = None  # otherwise there is trouble on unpickle
         df = DataFrame({"values": 1, "idx": idx}, index=idx)
-        with tm.ensure_clean("csv_date_format_with_dst") as path:
-            df.to_csv(path, index=True)
-            result = read_csv(path, index_col=0)
-            result.index = (
-                to_datetime(result.index, utc=True)
-                .tz_convert("Europe/Paris")
-                .as_unit("ns")
-            )
-            result["idx"] = to_datetime(result["idx"], utc=True).astype(
-                "datetime64[ns, Europe/Paris]"
-            )
-            tm.assert_frame_equal(result, df)
+
+        path = str(temp_file)
+        df.to_csv(path, index=True)
+        result = read_csv(path, index_col=0)
+        result.index = (
+            to_datetime(result.index, utc=True).tz_convert("Europe/Paris").as_unit("ns")
+        )
+        result["idx"] = to_datetime(result["idx"], utc=True).astype(
+            "datetime64[ns, Europe/Paris]"
+        )
+        tm.assert_frame_equal(result, df)
 
         # assert working
         df.astype(str)
diff --git a/pandas/tests/io/formats/test_to_csv.py b/pandas/tests/io/formats/test_to_csv.py
index 52f521d0d36eb..385615fe4e3a2 100644
--- a/pandas/tests/io/formats/test_to_csv.py
+++ b/pandas/tests/io/formats/test_to_csv.py
@@ -17,7 +17,7 @@
 
 
 class TestToCSV:
-    def test_to_csv_with_single_column(self):
+    def test_to_csv_with_single_column(self, temp_file):
         # see gh-18676, https://bugs.python.org/issue32255
         #
         # Python's CSV library adds an extraneous '""'
@@ -30,31 +30,30 @@ def test_to_csv_with_single_column(self):
 ""
 1.0
 """
-        with tm.ensure_clean("test.csv") as path:
-            df1.to_csv(path, header=None, index=None)
-            with open(path, encoding="utf-8") as f:
-                assert f.read() == expected1
+        path = str(temp_file)
+        df1.to_csv(path, header=None, index=None)
+        with open(path, encoding="utf-8") as f:
+            assert f.read() == expected1
 
         df2 = DataFrame([1, None])
         expected2 = """\
 1.0
 ""
 """
-        with tm.ensure_clean("test.csv") as path:
-            df2.to_csv(path, header=None, index=None)
-            with open(path, encoding="utf-8") as f:
-                assert f.read() == expected2
+        df2.to_csv(path, header=None, index=None)
+        with open(path, encoding="utf-8") as f:
+            assert f.read() == expected2
 
-    def test_to_csv_default_encoding(self):
+    def test_to_csv_default_encoding(self, temp_file):
         # GH17097
         df = DataFrame({"col": ["AAAAA", "ÄÄÄÄÄ", "ßßßßß", "聞聞聞聞聞"]})
 
-        with tm.ensure_clean("test.csv") as path:
-            # the default to_csv encoding is uft-8.
-            df.to_csv(path)
-            tm.assert_frame_equal(pd.read_csv(path, index_col=0), df)
+        path = str(temp_file)
+        # the default to_csv encoding is uft-8.
+        df.to_csv(path)
+        tm.assert_frame_equal(pd.read_csv(path, index_col=0), df)
 
-    def test_to_csv_quotechar(self):
+    def test_to_csv_quotechar(self, temp_file):
         df = DataFrame({"col": [1, 2]})
         expected = """\
 "","col"
@@ -62,10 +61,10 @@ def test_to_csv_quotechar(self):
 "1","2"
 """
 
-        with tm.ensure_clean("test.csv") as path:
-            df.to_csv(path, quoting=1)  # 1=QUOTE_ALL
-            with open(path, encoding="utf-8") as f:
-                assert f.read() == expected
+        path = str(temp_file)
+        df.to_csv(path, quoting=1)  # 1=QUOTE_ALL
+        with open(path, encoding="utf-8") as f:
+            assert f.read() == expected
 
         expected = """\
 $$,$col$
@@ -73,16 +72,15 @@ def test_to_csv_quotechar(self):
 $1$,$2$
 """
 
-        with tm.ensure_clean("test.csv") as path:
-            df.to_csv(path, quoting=1, quotechar="$")
-            with open(path, encoding="utf-8") as f:
-                assert f.read() == expected
+        path = str(temp_file)
+        df.to_csv(path, quoting=1, quotechar="$")
+        with open(path, encoding="utf-8") as f:
+            assert f.read() == expected
 
-        with tm.ensure_clean("test.csv") as path:
-            with pytest.raises(TypeError, match="quotechar"):
-                df.to_csv(path, quoting=1, quotechar=None)
+        with pytest.raises(TypeError, match="quotechar"):
+            df.to_csv(path, quoting=1, quotechar=None)
 
-    def test_to_csv_doublequote(self):
+    def test_to_csv_doublequote(self, temp_file):
         df = DataFrame({"col": ['a"a', '"bb"']})
         expected = '''\
 "","col"
@@ -90,16 +88,15 @@ def test_to_csv_doublequote(self):
 "1","""bb"""
 '''
 
-        with tm.ensure_clean("test.csv") as path:
-            df.to_csv(path, quoting=1, doublequote=True)  # QUOTE_ALL
-            with open(path, encoding="utf-8") as f:
-                assert f.read() == expected
+        path = str(temp_file)
+        df.to_csv(path, quoting=1, doublequote=True)  # QUOTE_ALL
+        with open(path, encoding="utf-8") as f:
+            assert f.read() == expected
 
-        with tm.ensure_clean("test.csv") as path:
-            with pytest.raises(Error, match="escapechar"):
-                df.to_csv(path, doublequote=False)  # no escapechar set
+        with pytest.raises(Error, match="escapechar"):
+            df.to_csv(path, doublequote=False)  # no escapechar set
 
-    def test_to_csv_escapechar(self):
+    def test_to_csv_escapechar(self, temp_file):
         df = DataFrame({"col": ['a"a', '"bb"']})
         expected = """\
 "","col"
@@ -107,10 +104,10 @@ def test_to_csv_escapechar(self):
 "1","\\"bb\\""
 """
 
-        with tm.ensure_clean("test.csv") as path:  # QUOTE_ALL
-            df.to_csv(path, quoting=1, doublequote=False, escapechar="\\")
-            with open(path, encoding="utf-8") as f:
-                assert f.read() == expected
+        path = str(temp_file)
+        df.to_csv(path, quoting=1, doublequote=False, escapechar="\\")
+        with open(path, encoding="utf-8") as f:
+            assert f.read() == expected
 
         df = DataFrame({"col": ["a,a", ",bb,"]})
         expected = """\
@@ -119,10 +116,9 @@ def test_to_csv_escapechar(self):
 1,\\,bb\\,
 """
 
-        with tm.ensure_clean("test.csv") as path:
-            df.to_csv(path, quoting=3, escapechar="\\")  # QUOTE_NONE
-            with open(path, encoding="utf-8") as f:
-                assert f.read() == expected
+        df.to_csv(path, quoting=3, escapechar="\\")  # QUOTE_NONE
+        with open(path, encoding="utf-8") as f:
+            assert f.read() == expected
 
     def test_csv_to_string(self):
         df = DataFrame({"col": [1, 2]})
@@ -390,7 +386,7 @@ def test_to_csv_single_level_multi_index(self, ind, expected, frame_or_series):
         result = obj.to_csv(lineterminator="\n", header=True)
         assert result == expected
 
-    def test_to_csv_string_array_ascii(self):
+    def test_to_csv_string_array_ascii(self, temp_file):
         # GH 10813
         str_array = [{"names": ["foo", "bar"]}, {"names": ["baz", "qux"]}]
         df = DataFrame(str_array)
@@ -399,12 +395,12 @@ def test_to_csv_string_array_ascii(self):
 0,"['foo', 'bar']"
 1,"['baz', 'qux']"
 """
-        with tm.ensure_clean("str_test.csv") as path:
-            df.to_csv(path, encoding="ascii")
-            with open(path, encoding="utf-8") as f:
-                assert f.read() == expected_ascii
+        path = str(temp_file)
+        df.to_csv(path, encoding="ascii")
+        with open(path, encoding="utf-8") as f:
+            assert f.read() == expected_ascii
 
-    def test_to_csv_string_array_utf8(self):
+    def test_to_csv_string_array_utf8(self, temp_file):
         # GH 10813
         str_array = [{"names": ["foo", "bar"]}, {"names": ["baz", "qux"]}]
         df = DataFrame(str_array)
@@ -413,80 +409,81 @@ def test_to_csv_string_array_utf8(self):
 0,"['foo', 'bar']"
 1,"['baz', 'qux']"
 """
-        with tm.ensure_clean("unicode_test.csv") as path:
-            df.to_csv(path, encoding="utf-8")
-            with open(path, encoding="utf-8") as f:
-                assert f.read() == expected_utf8
+        path = str(temp_file)
+        df.to_csv(path, encoding="utf-8")
+        with open(path, encoding="utf-8") as f:
+            assert f.read() == expected_utf8
 
-    def test_to_csv_string_with_lf(self):
+    def test_to_csv_string_with_lf(self, temp_file):
         # GH 20353
         data = {"int": [1, 2, 3], "str_lf": ["abc", "d\nef", "g\nh\n\ni"]}
         df = DataFrame(data)
-        with tm.ensure_clean("lf_test.csv") as path:
-            # case 1: The default line terminator(=os.linesep)(PR 21406)
-            os_linesep = os.linesep.encode("utf-8")
-            expected_noarg = (
-                b"int,str_lf"
-                + os_linesep
-                + b"1,abc"
-                + os_linesep
-                + b'2,"d\nef"'
-                + os_linesep
-                + b'3,"g\nh\n\ni"'
-                + os_linesep
-            )
-            df.to_csv(path, index=False)
-            with open(path, "rb") as f:
-                assert f.read() == expected_noarg
-        with tm.ensure_clean("lf_test.csv") as path:
-            # case 2: LF as line terminator
-            expected_lf = b'int,str_lf\n1,abc\n2,"d\nef"\n3,"g\nh\n\ni"\n'
-            df.to_csv(path, lineterminator="\n", index=False)
-            with open(path, "rb") as f:
-                assert f.read() == expected_lf
-        with tm.ensure_clean("lf_test.csv") as path:
-            # case 3: CRLF as line terminator
-            # 'lineterminator' should not change inner element
-            expected_crlf = b'int,str_lf\r\n1,abc\r\n2,"d\nef"\r\n3,"g\nh\n\ni"\r\n'
-            df.to_csv(path, lineterminator="\r\n", index=False)
-            with open(path, "rb") as f:
-                assert f.read() == expected_crlf
-
-    def test_to_csv_string_with_crlf(self):
+        path = str(temp_file)
+
+        # case 1: The default line terminator(=os.linesep)(PR 21406)
+        os_linesep = os.linesep.encode("utf-8")
+        expected_noarg = (
+            b"int,str_lf"
+            + os_linesep
+            + b"1,abc"
+            + os_linesep
+            + b'2,"d\nef"'
+            + os_linesep
+            + b'3,"g\nh\n\ni"'
+            + os_linesep
+        )
+        df.to_csv(path, index=False)
+        with open(path, "rb") as f:
+            assert f.read() == expected_noarg
+
+        # case 2: LF as line terminator
+        expected_lf = b'int,str_lf\n1,abc\n2,"d\nef"\n3,"g\nh\n\ni"\n'
+        df.to_csv(path, lineterminator="\n", index=False)
+        with open(path, "rb") as f:
+            assert f.read() == expected_lf
+
+        # case 3: CRLF as line terminator
+        # 'lineterminator' should not change inner element
+        expected_crlf = b'int,str_lf\r\n1,abc\r\n2,"d\nef"\r\n3,"g\nh\n\ni"\r\n'
+        df.to_csv(path, lineterminator="\r\n", index=False)
+        with open(path, "rb") as f:
+            assert f.read() == expected_crlf
+
+    def test_to_csv_string_with_crlf(self, temp_file):
         # GH 20353
         data = {"int": [1, 2, 3], "str_crlf": ["abc", "d\r\nef", "g\r\nh\r\n\r\ni"]}
         df = DataFrame(data)
-        with tm.ensure_clean("crlf_test.csv") as path:
-            # case 1: The default line terminator(=os.linesep)(PR 21406)
-            os_linesep = os.linesep.encode("utf-8")
-            expected_noarg = (
-                b"int,str_crlf"
-                + os_linesep
-                + b"1,abc"
-                + os_linesep
-                + b'2,"d\r\nef"'
-                + os_linesep
-                + b'3,"g\r\nh\r\n\r\ni"'
-                + os_linesep
-            )
-            df.to_csv(path, index=False)
-            with open(path, "rb") as f:
-                assert f.read() == expected_noarg
-        with tm.ensure_clean("crlf_test.csv") as path:
-            # case 2: LF as line terminator
-            expected_lf = b'int,str_crlf\n1,abc\n2,"d\r\nef"\n3,"g\r\nh\r\n\r\ni"\n'
-            df.to_csv(path, lineterminator="\n", index=False)
-            with open(path, "rb") as f:
-                assert f.read() == expected_lf
-        with tm.ensure_clean("crlf_test.csv") as path:
-            # case 3: CRLF as line terminator
-            # 'lineterminator' should not change inner element
-            expected_crlf = (
-                b'int,str_crlf\r\n1,abc\r\n2,"d\r\nef"\r\n3,"g\r\nh\r\n\r\ni"\r\n'
-            )
-            df.to_csv(path, lineterminator="\r\n", index=False)
-            with open(path, "rb") as f:
-                assert f.read() == expected_crlf
+        path = str(temp_file)
+        # case 1: The default line terminator(=os.linesep)(PR 21406)
+        os_linesep = os.linesep.encode("utf-8")
+        expected_noarg = (
+            b"int,str_crlf"
+            + os_linesep
+            + b"1,abc"
+            + os_linesep
+            + b'2,"d\r\nef"'
+            + os_linesep
+            + b'3,"g\r\nh\r\n\r\ni"'
+            + os_linesep
+        )
+        df.to_csv(path, index=False)
+        with open(path, "rb") as f:
+            assert f.read() == expected_noarg
+
+        # case 2: LF as line terminator
+        expected_lf = b'int,str_crlf\n1,abc\n2,"d\r\nef"\n3,"g\r\nh\r\n\r\ni"\n'
+        df.to_csv(path, lineterminator="\n", index=False)
+        with open(path, "rb") as f:
+            assert f.read() == expected_lf
+
+        # case 3: CRLF as line terminator
+        # 'lineterminator' should not change inner element
+        expected_crlf = (
+            b'int,str_crlf\r\n1,abc\r\n2,"d\r\nef"\r\n3,"g\r\nh\r\n\r\ni"\r\n'
+        )
+        df.to_csv(path, lineterminator="\r\n", index=False)
+        with open(path, "rb") as f:
+            assert f.read() == expected_crlf
 
     def test_to_csv_stdout_file(self, capsys):
         # GH 21561
@@ -508,7 +505,7 @@ def test_to_csv_stdout_file(self, capsys):
             "(https://docs.python.org/3/library/csv.html#csv.writer)"
         ),
     )
-    def test_to_csv_write_to_open_file(self):
+    def test_to_csv_write_to_open_file(self, temp_file):
         # GH 21696
         df = DataFrame({"a": ["x", "y", "z"]})
         expected = """\
@@ -517,31 +514,37 @@ def test_to_csv_write_to_open_file(self):
 y
 z
 """
-        with tm.ensure_clean("test.txt") as path:
-            with open(path, "w", encoding="utf-8") as f:
-                f.write("manual header\n")
-                df.to_csv(f, header=None, index=None)
-            with open(path, encoding="utf-8") as f:
-                assert f.read() == expected
-
-    def test_to_csv_write_to_open_file_with_newline_py3(self):
+        path = str(temp_file)
+        with open(path, "w", encoding="utf-8") as f:
+            f.write("manual header\n")
+            df.to_csv(f, header=None, index=None)
+        with open(path, encoding="utf-8") as f:
+            assert f.read() == expected
+
+    def test_to_csv_write_to_open_file_with_newline_py3(self, temp_file):
         # see gh-21696
         # see gh-20353
         df = DataFrame({"a": ["x", "y", "z"]})
         expected_rows = ["x", "y", "z"]
         expected = "manual header\n" + tm.convert_rows_list_to_csv_str(expected_rows)
-        with tm.ensure_clean("test.txt") as path:
-            with open(path, "w", newline="", encoding="utf-8") as f:
-                f.write("manual header\n")
-                df.to_csv(f, header=None, index=None)
 
-            with open(path, "rb") as f:
-                assert f.read() == bytes(expected, "utf-8")
+        path = str(temp_file)
+        with open(path, "w", newline="", encoding="utf-8") as f:
+            f.write("manual header\n")
+            df.to_csv(f, header=None, index=None)
+
+        with open(path, "rb") as f:
+            assert f.read() == bytes(expected, "utf-8")
 
     @pytest.mark.parametrize("to_infer", [True, False])
     @pytest.mark.parametrize("read_infer", [True, False])
     def test_to_csv_compression(
-        self, compression_only, read_infer, to_infer, compression_to_extension
+        self,
+        compression_only,
+        read_infer,
+        to_infer,
+        compression_to_extension,
+        temp_file,
     ):
         # see gh-15008
         compression = compression_only
@@ -555,12 +558,12 @@ def test_to_csv_compression(
         to_compression = "infer" if to_infer else compression
         read_compression = "infer" if read_infer else compression
 
-        with tm.ensure_clean(filename) as path:
-            df.to_csv(path, compression=to_compression)
-            result = pd.read_csv(path, index_col=0, compression=read_compression)
-            tm.assert_frame_equal(result, df)
+        path = str(temp_file)
+        df.to_csv(path, compression=to_compression)
+        result = pd.read_csv(path, index_col=0, compression=read_compression)
+        tm.assert_frame_equal(result, df)
 
-    def test_to_csv_compression_dict(self, compression_only):
+    def test_to_csv_compression_dict(self, compression_only, temp_file):
         # GH 26023
         method = compression_only
         df = DataFrame({"ABC": [1]})
@@ -570,34 +573,36 @@ def test_to_csv_compression_dict(self, compression_only):
             "zstd": "zst",
         }.get(method, method)
         filename += extension
-        with tm.ensure_clean(filename) as path:
-            df.to_csv(path, compression={"method": method})
-            read_df = pd.read_csv(path, index_col=0)
-            tm.assert_frame_equal(read_df, df)
 
-    def test_to_csv_compression_dict_no_method_raises(self):
+        path = str(temp_file)
+        df.to_csv(path, compression={"method": method})
+        read_df = pd.read_csv(path, index_col=0)
+        tm.assert_frame_equal(read_df, df)
+
+    def test_to_csv_compression_dict_no_method_raises(self, temp_file):
         # GH 26023
         df = DataFrame({"ABC": [1]})
         compression = {"some_option": True}
         msg = "must have key 'method'"
 
-        with tm.ensure_clean("out.zip") as path:
-            with pytest.raises(ValueError, match=msg):
-                df.to_csv(path, compression=compression)
+        path = str(temp_file)
+        with pytest.raises(ValueError, match=msg):
+            df.to_csv(path, compression=compression)
 
     @pytest.mark.parametrize("compression", ["zip", "infer"])
     @pytest.mark.parametrize("archive_name", ["test_to_csv.csv", "test_to_csv.zip"])
-    def test_to_csv_zip_arguments(self, compression, archive_name):
+    def test_to_csv_zip_arguments(self, compression, archive_name, temp_file):
         # GH 26023
         df = DataFrame({"ABC": [1]})
-        with tm.ensure_clean("to_csv_archive_name.zip") as path:
-            df.to_csv(
-                path, compression={"method": compression, "archive_name": archive_name}
-            )
-            with ZipFile(path) as zp:
-                assert len(zp.filelist) == 1
-                archived_file = zp.filelist[0].filename
-                assert archived_file == archive_name
+
+        path = str(temp_file)
+        df.to_csv(
+            path, compression={"method": compression, "archive_name": archive_name}
+        )
+        with ZipFile(path) as zp:
+            assert len(zp.filelist) == 1
+            archived_file = zp.filelist[0].filename
+            assert archived_file == archive_name
 
     @pytest.mark.parametrize(
         "filename,expected_arcname",
@@ -660,17 +665,18 @@ def test_na_rep_truncated(self):
         assert result == expected
 
     @pytest.mark.parametrize("errors", ["surrogatepass", "ignore", "replace"])
-    def test_to_csv_errors(self, errors):
+    def test_to_csv_errors(self, errors, temp_file):
         # GH 22610
         data = ["\ud800foo"]
         ser = pd.Series(data, index=Index(data, dtype=object), dtype=object)
-        with tm.ensure_clean("test.csv") as path:
-            ser.to_csv(path, errors=errors)
+
+        path = str(temp_file)
+        ser.to_csv(path, errors=errors)
         # No use in reading back the data as it is not the same anymore
         # due to the error handling
 
     @pytest.mark.parametrize("mode", ["wb", "w"])
-    def test_to_csv_binary_handle(self, mode):
+    def test_to_csv_binary_handle(self, mode, temp_file):
         """
         Binary file objects should work (if 'mode' contains a 'b') or even without
         it in most cases.
@@ -682,13 +688,14 @@ def test_to_csv_binary_handle(self, mode):
             columns=Index(list("ABCD")),
             index=Index([f"i-{i}" for i in range(30)]),
         )
-        with tm.ensure_clean() as path:
-            with open(path, mode="w+b") as handle:
-                df.to_csv(handle, mode=mode)
-            tm.assert_frame_equal(df, pd.read_csv(path, index_col=0))
+
+        path = str(temp_file)
+        with open(path, mode="w+b") as handle:
+            df.to_csv(handle, mode=mode)
+        tm.assert_frame_equal(df, pd.read_csv(path, index_col=0))
 
     @pytest.mark.parametrize("mode", ["wb", "w"])
-    def test_to_csv_encoding_binary_handle(self, mode):
+    def test_to_csv_encoding_binary_handle(self, mode, temp_file):
         """
         Binary file objects should honor a specified encoding.
 
@@ -705,26 +712,24 @@ def test_to_csv_encoding_binary_handle(self, mode):
         assert buffer.getvalue().startswith(content)
 
         # example from GH 13068
-        with tm.ensure_clean() as path:
-            with open(path, "w+b") as handle:
-                DataFrame().to_csv(handle, mode=mode, encoding="utf-8-sig")
+        path = str(temp_file)
+        with open(path, "w+b") as handle:
+            DataFrame().to_csv(handle, mode=mode, encoding="utf-8-sig")
 
-                handle.seek(0)
-                assert handle.read().startswith(b'\xef\xbb\xbf""')
+            handle.seek(0)
+            assert handle.read().startswith(b'\xef\xbb\xbf""')
 
 
-def test_to_csv_iterative_compression_name(compression):
+def test_to_csv_iterative_compression_name(compression, temp_file):
     # GH 38714
     df = DataFrame(
         1.1 * np.arange(120).reshape((30, 4)),
         columns=Index(list("ABCD")),
         index=Index([f"i-{i}" for i in range(30)]),
     )
-    with tm.ensure_clean() as path:
-        df.to_csv(path, compression=compression, chunksize=1)
-        tm.assert_frame_equal(
-            pd.read_csv(path, compression=compression, index_col=0), df
-        )
+    path = str(temp_file)
+    df.to_csv(path, compression=compression, chunksize=1)
+    tm.assert_frame_equal(pd.read_csv(path, compression=compression, index_col=0), df)
 
 
 def test_to_csv_iterative_compression_buffer(compression):
diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py
index 908a26874f150..799669ee2c5d1 100644
--- a/pandas/tests/io/test_parquet.py
+++ b/pandas/tests/io/test_parquet.py
@@ -163,8 +163,10 @@ def timezone_aware_date_list(request):
     return request.param
 
 
+@pytest.fixture
 def check_round_trip(
     df,
+    temp_file,
     engine=None,
     path=None,
     write_kwargs=None,
@@ -223,8 +225,8 @@ def compare(repeat):
             )
 
     if path is None:
-        with tm.ensure_clean() as path:
-            compare(repeat)
+        path = str(temp_file)
+        compare(repeat)
     else:
         compare(repeat)
 
@@ -337,49 +339,48 @@ def test_get_engine_auto_error_message():
                 get_engine("auto")
 
 
-def test_cross_engine_pa_fp(df_cross_compat, pa, fp):
+def test_cross_engine_pa_fp(df_cross_compat, pa, fp, temp_file):
     # cross-compat with differing reading/writing engines
 
     df = df_cross_compat
-    with tm.ensure_clean() as path:
-        df.to_parquet(path, engine=pa, compression=None)
+    path = str(temp_file)
+    df.to_parquet(path, engine=pa, compression=None)
 
-        result = read_parquet(path, engine=fp)
-        tm.assert_frame_equal(result, df)
+    result = read_parquet(path, engine=fp)
+    tm.assert_frame_equal(result, df)
 
-        result = read_parquet(path, engine=fp, columns=["a", "d"])
-        tm.assert_frame_equal(result, df[["a", "d"]])
+    result = read_parquet(path, engine=fp, columns=["a", "d"])
+    tm.assert_frame_equal(result, df[["a", "d"]])
 
 
-def test_cross_engine_fp_pa(df_cross_compat, pa, fp):
+def test_cross_engine_fp_pa(df_cross_compat, pa, fp, temp_file):
     # cross-compat with differing reading/writing engines
     df = df_cross_compat
-    with tm.ensure_clean() as path:
-        df.to_parquet(path, engine=fp, compression=None)
+    path = str(temp_file)
 
-        result = read_parquet(path, engine=pa)
-        tm.assert_frame_equal(result, df)
+    df.to_parquet(path, engine=fp, compression=None)
+
+    result = read_parquet(path, engine=pa)
+    tm.assert_frame_equal(result, df)
 
-        result = read_parquet(path, engine=pa, columns=["a", "d"])
-        tm.assert_frame_equal(result, df[["a", "d"]])
+    result = read_parquet(path, engine=pa, columns=["a", "d"])
+    tm.assert_frame_equal(result, df[["a", "d"]])
 
 
 class Base:
-    def check_error_on_write(self, df, engine, exc, err_msg):
+    def check_error_on_write(self, df, engine, exc, err_msg, temp_file_path):
         # check that we are raising the exception on writing
-        with tm.ensure_clean() as path:
-            with pytest.raises(exc, match=err_msg):
-                to_parquet(df, path, engine, compression=None)
+        with pytest.raises(exc, match=err_msg):
+            to_parquet(df, temp_file_path, engine, compression=None)
 
-    def check_external_error_on_write(self, df, engine, exc):
+    def check_external_error_on_write(self, df, engine, exc, temp_file_path):
         # check that an external library is raising the exception on writing
-        with tm.ensure_clean() as path:
-            with tm.external_error_raised(exc):
-                to_parquet(df, path, engine, compression=None)
+        with tm.external_error_raised(exc):
+            to_parquet(df, temp_file_path, engine, compression=None)
 
 
 class TestBasic(Base):
-    def test_error(self, engine):
+    def test_error(self, engine, temp_file):
         for obj in [
             pd.Series([1, 2, 3]),
             1,
@@ -388,7 +389,8 @@ def test_error(self, engine):
             np.array([1, 2, 3]),
         ]:
             msg = "to_parquet only supports IO with DataFrames"
-            self.check_error_on_write(obj, engine, ValueError, msg)
+            path = str(temp_file)
+            self.check_error_on_write(obj, engine, ValueError, msg, path)
 
     def test_columns_dtypes(self, engine):
         df = pd.DataFrame({"string": list("abc"), "int": list(range(1, 4))})
@@ -513,21 +515,22 @@ def test_write_ignoring_index(self, engine):
         expected = df.reset_index(drop=True)
         check_round_trip(df, engine, write_kwargs=write_kwargs, expected=expected)
 
-    def test_write_column_multiindex(self, engine):
+    def test_write_column_multiindex(self, engine, temp_file):
         # Not able to write column multi-indexes with non-string column names.
         mi_columns = pd.MultiIndex.from_tuples([("a", 1), ("a", 2), ("b", 1)])
         df = pd.DataFrame(
             np.random.default_rng(2).standard_normal((4, 3)), columns=mi_columns
         )
 
+        path = str(temp_file)
         if engine == "fastparquet":
             self.check_error_on_write(
-                df, engine, TypeError, "Column name must be a string"
+                df, engine, TypeError, "Column name must be a string", path
             )
         elif engine == "pyarrow":
             check_round_trip(df, engine)
 
-    def test_write_column_multiindex_nonstring(self, engine):
+    def test_write_column_multiindex_nonstring(self, engine, temp_file):
         # GH #34777
 
         # Not able to write column multi-indexes with non-string column names
@@ -539,8 +542,9 @@ def test_write_column_multiindex_nonstring(self, engine):
             np.random.default_rng(2).standard_normal((8, 8)), columns=arrays
         )
         df.columns.names = ["Level1", "Level2"]
+        path = str(temp_file)
         if engine == "fastparquet":
-            self.check_error_on_write(df, engine, ValueError, "Column name")
+            self.check_error_on_write(df, engine, ValueError, "Column name", path)
         elif engine == "pyarrow":
             check_round_trip(df, engine)
 
@@ -575,7 +579,7 @@ def test_write_column_index_string(self, pa):
 
         check_round_trip(df, engine)
 
-    def test_write_column_index_nonstring(self, engine):
+    def test_write_column_index_nonstring(self, engine, temp_file):
         # GH #34777
 
         # Write column indexes with string column names
@@ -584,14 +588,15 @@ def test_write_column_index_nonstring(self, engine):
             np.random.default_rng(2).standard_normal((8, 4)), columns=arrays
         )
         df.columns.name = "NonStringCol"
+        path = str(temp_file)
         if engine == "fastparquet":
             self.check_error_on_write(
-                df, engine, TypeError, "Column name must be a string"
+                df, engine, TypeError, "Column name must be a string", path
             )
         else:
             check_round_trip(df, engine)
 
-    def test_dtype_backend(self, engine, request):
+    def test_dtype_backend(self, engine, request, temp_file):
         pq = pytest.importorskip("pyarrow.parquet")
 
         if engine == "fastparquet":
@@ -615,11 +620,11 @@ def test_dtype_backend(self, engine, request):
                 "g": pyarrow.array([1.0, 2.0, 3.0, None], "float64"),
             }
         )
-        with tm.ensure_clean() as path:
-            # write manually with pyarrow to write integers
-            pq.write_table(table, path)
-            result1 = read_parquet(path, engine=engine)
-            result2 = read_parquet(path, engine=engine, dtype_backend="numpy_nullable")
+        path = str(temp_file)
+        # write manually with pyarrow to write integers
+        pq.write_table(table, path)
+        result1 = read_parquet(path, engine=engine)
+        result2 = read_parquet(path, engine=engine, dtype_backend="numpy_nullable")
 
         assert result1["a"].dtype == np.dtype("float64")
         expected = pd.DataFrame(
@@ -730,29 +735,34 @@ def test_to_bytes_without_path_or_buf_provided(self, pa, df_full):
         expected["datetime_with_nat"] = expected["datetime_with_nat"].astype("M8[ms]")
         tm.assert_frame_equal(res, expected)
 
-    def test_duplicate_columns(self, pa):
+    def test_duplicate_columns(self, pa, temp_file):
         # not currently able to handle duplicate columns
         df = pd.DataFrame(np.arange(12).reshape(4, 3), columns=list("aaa")).copy()
-        self.check_error_on_write(df, pa, ValueError, "Duplicate column names found")
+        path = str(temp_file)
+        self.check_error_on_write(
+            df, pa, ValueError, "Duplicate column names found", path
+        )
 
     def test_timedelta(self, pa):
         df = pd.DataFrame({"a": pd.timedelta_range("1 day", periods=3)})
         check_round_trip(df, pa)
 
-    def test_unsupported(self, pa):
+    def test_unsupported(self, pa, temp_file):
         # mixed python objects
         df = pd.DataFrame({"a": ["a", 1, 2.0]})
         # pyarrow 0.11 raises ArrowTypeError
         # older pyarrows raise ArrowInvalid
-        self.check_external_error_on_write(df, pa, pyarrow.ArrowException)
+        path = str(temp_file)
+        self.check_external_error_on_write(df, pa, pyarrow.ArrowException, path)
 
-    def test_unsupported_float16(self, pa):
+    def test_unsupported_float16(self, pa, temp_file):
         # #44847, #44914
         # Not able to write float 16 column using pyarrow.
         data = np.arange(2, 10, dtype=np.float16)
         df = pd.DataFrame(data=data, columns=["fp16"])
+        path = str(temp_file)
         if pa_version_under15p0:
-            self.check_external_error_on_write(df, pa, pyarrow.ArrowException)
+            self.check_external_error_on_write(df, pa, pyarrow.ArrowException, path)
         else:
             check_round_trip(df, pa)
 
@@ -765,18 +775,18 @@ def test_unsupported_float16(self, pa):
     )
     @pytest.mark.skipif(not pa_version_under15p0, reason="float16 works on 15")
     @pytest.mark.parametrize("path_type", [str, pathlib.Path])
-    def test_unsupported_float16_cleanup(self, pa, path_type):
+    def test_unsupported_float16_cleanup(self, pa, path_type, temp_file):
         # #44847, #44914
         # Not able to write float 16 column using pyarrow.
         # Tests cleanup by pyarrow in case of an error
         data = np.arange(2, 10, dtype=np.float16)
         df = pd.DataFrame(data=data, columns=["fp16"])
 
-        with tm.ensure_clean() as path_str:
-            path = path_type(path_str)
-            with tm.external_error_raised(pyarrow.ArrowException):
-                df.to_parquet(path=path, engine=pa)
-            assert not os.path.isfile(path)
+        path_str = str(temp_file)
+        path = path_type(path_str)
+        with tm.external_error_raised(pyarrow.ArrowException):
+            df.to_parquet(path=path, engine=pa)
+        assert not os.path.isfile(path)
 
     def test_categorical(self, pa):
         # supported in >= 0.7.0
@@ -1005,13 +1015,13 @@ def test_timezone_aware_index(self, pa, timezone_aware_date_list):
                 expected["index_as_col"] = expected["index_as_col"].dt.tz_convert(tz)
         check_round_trip(df, pa, check_dtype=False, expected=expected)
 
-    def test_filter_row_groups(self, pa):
+    def test_filter_row_groups(self, pa, temp_file):
         # https://github.com/pandas-dev/pandas/issues/26551
         pytest.importorskip("pyarrow")
         df = pd.DataFrame({"a": list(range(3))})
-        with tm.ensure_clean() as path:
-            df.to_parquet(path, engine=pa)
-            result = read_parquet(path, pa, filters=[("a", "==", 0)])
+        path = str(temp_file)
+        df.to_parquet(path, engine=pa)
+        result = read_parquet(path, pa, filters=[("a", "==", 0)])
         assert len(result) == 1
 
     @pytest.mark.filterwarnings("ignore:make_block is deprecated:DeprecationWarning")
@@ -1206,32 +1216,34 @@ def test_basic(self, fp, df_full, request):
         df["timedelta"] = pd.timedelta_range("1 day", periods=3)
         check_round_trip(df, fp)
 
-    def test_columns_dtypes_invalid(self, fp):
+    def test_columns_dtypes_invalid(self, fp, temp_file):
         df = pd.DataFrame({"string": list("abc"), "int": list(range(1, 4))})
 
         err = TypeError
         msg = "Column name must be a string"
+        path = str(temp_file)
 
         # numeric
         df.columns = [0, 1]
-        self.check_error_on_write(df, fp, err, msg)
+        self.check_error_on_write(df, fp, err, msg, path)
 
         # bytes
         df.columns = [b"foo", b"bar"]
-        self.check_error_on_write(df, fp, err, msg)
+        self.check_error_on_write(df, fp, err, msg, path)
 
         # python object
         df.columns = [
             datetime.datetime(2011, 1, 1, 0, 0),
             datetime.datetime(2011, 1, 1, 1, 1),
         ]
-        self.check_error_on_write(df, fp, err, msg)
+        self.check_error_on_write(df, fp, err, msg, path)
 
-    def test_duplicate_columns(self, fp):
+    def test_duplicate_columns(self, fp, temp_file):
         # not currently able to handle duplicate columns
         df = pd.DataFrame(np.arange(12).reshape(4, 3), columns=list("aaa")).copy()
         msg = "Cannot create parquet dataset with duplicate column names"
-        self.check_error_on_write(df, fp, ValueError, msg)
+        path = str(temp_file)
+        self.check_error_on_write(df, fp, ValueError, msg, path)
 
     def test_bool_with_none(self, fp, request):
         df = pd.DataFrame({"a": [True, None, False]})
@@ -1240,27 +1252,28 @@ def test_bool_with_none(self, fp, request):
         # float64
         check_round_trip(df, fp, expected=expected, check_dtype=False)
 
-    def test_unsupported(self, fp):
+    def test_unsupported(self, fp, temp_file):
         # period
         df = pd.DataFrame({"a": pd.period_range("2013", freq="M", periods=3)})
         # error from fastparquet -> don't check exact error message
-        self.check_error_on_write(df, fp, ValueError, None)
+        path = str(temp_file)
+        self.check_error_on_write(df, fp, ValueError, None, path)
 
         # mixed
         df = pd.DataFrame({"a": ["a", 1, 2.0]})
         msg = "Can't infer object conversion type"
-        self.check_error_on_write(df, fp, ValueError, msg)
+        self.check_error_on_write(df, fp, ValueError, msg, path)
 
     def test_categorical(self, fp):
         df = pd.DataFrame({"a": pd.Categorical(list("abc"))})
         check_round_trip(df, fp)
 
-    def test_filter_row_groups(self, fp):
+    def test_filter_row_groups(self, fp, temp_file):
         d = {"a": list(range(3))}
         df = pd.DataFrame(d)
-        with tm.ensure_clean() as path:
-            df.to_parquet(path, engine=fp, compression=None, row_group_offsets=1)
-            result = read_parquet(path, fp, filters=[("a", "==", 0)])
+        path = str(temp_file)
+        df.to_parquet(path, engine=fp, compression=None, row_group_offsets=1)
+        result = read_parquet(path, fp, filters=[("a", "==", 0)])
         assert len(result) == 1
 
     @pytest.mark.single_cpu
@@ -1356,91 +1369,86 @@ def test_timezone_aware_index(self, fp, timezone_aware_date_list, request):
         expected.index.name = "index"
         check_round_trip(df, fp, expected=expected)
 
-    def test_close_file_handle_on_read_error(self):
-        with tm.ensure_clean("test.parquet") as path:
-            pathlib.Path(path).write_bytes(b"breakit")
-            with tm.external_error_raised(Exception):  # Not important which exception
-                read_parquet(path, engine="fastparquet")
-            # The next line raises an error on Windows if the file is still open
-            pathlib.Path(path).unlink(missing_ok=False)
+    def test_close_file_handle_on_read_error(self, temp_file):
+        path = str(temp_file)
+        pathlib.Path(path).write_bytes(b"breakit")
+        with tm.external_error_raised(Exception):  # Not important which exception
+            read_parquet(path, engine="fastparquet")
+        # The next line raises an error on Windows if the file is still open
+        pathlib.Path(path).unlink(missing_ok=False)
 
-    def test_bytes_file_name(self, engine):
+    def test_bytes_file_name(self, engine, temp_file):
         # GH#48944
         df = pd.DataFrame(data={"A": [0, 1], "B": [1, 0]})
-        with tm.ensure_clean("test.parquet") as path:
-            with open(path.encode(), "wb") as f:
-                df.to_parquet(f)
+        path = str(temp_file)
+        with open(path.encode(), "wb") as f:
+            df.to_parquet(f)
 
-            result = read_parquet(path, engine=engine)
+        result = read_parquet(path, engine=engine)
         tm.assert_frame_equal(result, df)
 
-    def test_filesystem_notimplemented(self):
+    def test_filesystem_notimplemented(self, temp_file):
         pytest.importorskip("fastparquet")
         df = pd.DataFrame(data={"A": [0, 1], "B": [1, 0]})
-        with tm.ensure_clean() as path:
-            with pytest.raises(
-                NotImplementedError, match="filesystem is not implemented"
-            ):
-                df.to_parquet(path, engine="fastparquet", filesystem="foo")
-
-        with tm.ensure_clean() as path:
-            pathlib.Path(path).write_bytes(b"foo")
-            with pytest.raises(
-                NotImplementedError, match="filesystem is not implemented"
-            ):
-                read_parquet(path, engine="fastparquet", filesystem="foo")
-
-    def test_invalid_filesystem(self):
+        path = str(temp_file)
+        with pytest.raises(NotImplementedError, match="filesystem is not implemented"):
+            df.to_parquet(path, engine="fastparquet", filesystem="foo")
+
+        pathlib.Path(path).write_bytes(b"foo")
+        with pytest.raises(NotImplementedError, match="filesystem is not implemented"):
+            read_parquet(path, engine="fastparquet", filesystem="foo")
+
+    def test_invalid_filesystem(self, temp_file):
         pytest.importorskip("pyarrow")
         df = pd.DataFrame(data={"A": [0, 1], "B": [1, 0]})
-        with tm.ensure_clean() as path:
-            with pytest.raises(
-                ValueError, match="filesystem must be a pyarrow or fsspec FileSystem"
-            ):
-                df.to_parquet(path, engine="pyarrow", filesystem="foo")
-
-        with tm.ensure_clean() as path:
-            pathlib.Path(path).write_bytes(b"foo")
-            with pytest.raises(
-                ValueError, match="filesystem must be a pyarrow or fsspec FileSystem"
-            ):
-                read_parquet(path, engine="pyarrow", filesystem="foo")
-
-    def test_unsupported_pa_filesystem_storage_options(self):
+        path = str(temp_file)
+
+        with pytest.raises(
+            ValueError, match="filesystem must be a pyarrow or fsspec FileSystem"
+        ):
+            df.to_parquet(path, engine="pyarrow", filesystem="foo")
+
+        pathlib.Path(path).write_bytes(b"foo")
+        with pytest.raises(
+            ValueError, match="filesystem must be a pyarrow or fsspec FileSystem"
+        ):
+            read_parquet(path, engine="pyarrow", filesystem="foo")
+
+    def test_unsupported_pa_filesystem_storage_options(self, temp_file):
         pa_fs = pytest.importorskip("pyarrow.fs")
         df = pd.DataFrame(data={"A": [0, 1], "B": [1, 0]})
-        with tm.ensure_clean() as path:
-            with pytest.raises(
-                NotImplementedError,
-                match="storage_options not supported with a pyarrow FileSystem.",
-            ):
-                df.to_parquet(
-                    path,
-                    engine="pyarrow",
-                    filesystem=pa_fs.LocalFileSystem(),
-                    storage_options={"foo": "bar"},
-                )
-
-        with tm.ensure_clean() as path:
-            pathlib.Path(path).write_bytes(b"foo")
-            with pytest.raises(
-                NotImplementedError,
-                match="storage_options not supported with a pyarrow FileSystem.",
-            ):
-                read_parquet(
-                    path,
-                    engine="pyarrow",
-                    filesystem=pa_fs.LocalFileSystem(),
-                    storage_options={"foo": "bar"},
-                )
-
-    def test_invalid_dtype_backend(self, engine):
+        path = str(temp_file)
+
+        with pytest.raises(
+            NotImplementedError,
+            match="storage_options not supported with a pyarrow FileSystem.",
+        ):
+            df.to_parquet(
+                path,
+                engine="pyarrow",
+                filesystem=pa_fs.LocalFileSystem(),
+                storage_options={"foo": "bar"},
+            )
+
+        pathlib.Path(path).write_bytes(b"foo")
+        with pytest.raises(
+            NotImplementedError,
+            match="storage_options not supported with a pyarrow FileSystem.",
+        ):
+            read_parquet(
+                path,
+                engine="pyarrow",
+                filesystem=pa_fs.LocalFileSystem(),
+                storage_options={"foo": "bar"},
+            )
+
+    def test_invalid_dtype_backend(self, engine, temp_file):
         msg = (
             "dtype_backend numpy is invalid, only 'numpy_nullable' and "
             "'pyarrow' are allowed."
         )
         df = pd.DataFrame({"int": list(range(1, 4))})
-        with tm.ensure_clean("tmp.parquet") as path:
-            df.to_parquet(path)
-            with pytest.raises(ValueError, match=msg):
-                read_parquet(path, dtype_backend="numpy")
+        path = str(temp_file)
+        df.to_parquet(path)
+        with pytest.raises(ValueError, match=msg):
+            read_parquet(path, dtype_backend="numpy")

From c184ea559d9e275a536fb8ecdd6d5a46448fa7cb Mon Sep 17 00:00:00 2001
From: Harshit Pande <pandeconscious@users.noreply.github.com>
Date: Fri, 26 Sep 2025 21:09:27 +0000
Subject: [PATCH 04/10] extension fix

---
 pandas/tests/io/formats/test_to_csv.py | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/pandas/tests/io/formats/test_to_csv.py b/pandas/tests/io/formats/test_to_csv.py
index 385615fe4e3a2..f716cde8c3ad8 100644
--- a/pandas/tests/io/formats/test_to_csv.py
+++ b/pandas/tests/io/formats/test_to_csv.py
@@ -549,16 +549,12 @@ def test_to_csv_compression(
         # see gh-15008
         compression = compression_only
 
-        # We'll complete file extension subsequently.
-        filename = "test."
-        filename += compression_to_extension[compression]
-
         df = DataFrame({"A": [1]})
 
         to_compression = "infer" if to_infer else compression
         read_compression = "infer" if read_infer else compression
 
-        path = str(temp_file)
+        path = str(temp_file) + "." + compression_to_extension[compression]
         df.to_csv(path, compression=to_compression)
         result = pd.read_csv(path, index_col=0, compression=read_compression)
         tm.assert_frame_equal(result, df)
@@ -567,14 +563,12 @@ def test_to_csv_compression_dict(self, compression_only, temp_file):
         # GH 26023
         method = compression_only
         df = DataFrame({"ABC": [1]})
-        filename = "to_csv_compress_as_dict."
         extension = {
             "gzip": "gz",
             "zstd": "zst",
         }.get(method, method)
-        filename += extension
 
-        path = str(temp_file)
+        path = str(temp_file) + "." + extension
         df.to_csv(path, compression={"method": method})
         read_df = pd.read_csv(path, index_col=0)
         tm.assert_frame_equal(read_df, df)
@@ -595,7 +589,7 @@ def test_to_csv_zip_arguments(self, compression, archive_name, temp_file):
         # GH 26023
         df = DataFrame({"ABC": [1]})
 
-        path = str(temp_file)
+        path = str(temp_file) + ".zip"
         df.to_csv(
             path, compression={"method": compression, "archive_name": archive_name}
         )

From afdd1810381d90799b7c82a6c30e1848106e170e Mon Sep 17 00:00:00 2001
From: Harshit Pande <pandeconscious@users.noreply.github.com>
Date: Mon, 29 Sep 2025 00:27:48 +0000
Subject: [PATCH 05/10] refactor test parquet to fix fixture error

---
 pandas/tests/io/test_parquet.py | 193 +++++++++++++++++++-------------
 1 file changed, 113 insertions(+), 80 deletions(-)

diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py
index 799669ee2c5d1..79660394e1197 100644
--- a/pandas/tests/io/test_parquet.py
+++ b/pandas/tests/io/test_parquet.py
@@ -163,7 +163,6 @@ def timezone_aware_date_list(request):
     return request.param
 
 
-@pytest.fixture
 def check_round_trip(
     df,
     temp_file,
@@ -247,33 +246,33 @@ def check_partition_names(path, expected):
     assert dataset.partitioning.schema.names == expected
 
 
-def test_invalid_engine(df_compat):
+def test_invalid_engine(df_compat, temp_file):
     msg = "engine must be one of 'pyarrow', 'fastparquet'"
     with pytest.raises(ValueError, match=msg):
-        check_round_trip(df_compat, "foo", "bar")
+        check_round_trip(df_compat, temp_file, "foo", "bar")
 
 
-def test_options_py(df_compat, pa, using_infer_string):
+def test_options_py(df_compat, pa, using_infer_string, temp_file):
     # use the set option
     if using_infer_string and not pa_version_under19p0:
         df_compat.columns = df_compat.columns.astype("str")
 
     with pd.option_context("io.parquet.engine", "pyarrow"):
-        check_round_trip(df_compat)
+        check_round_trip(df_compat, temp_file)
 
 
-def test_options_fp(df_compat, fp):
+def test_options_fp(df_compat, fp, temp_file):
     # use the set option
 
     with pd.option_context("io.parquet.engine", "fastparquet"):
-        check_round_trip(df_compat)
+        check_round_trip(df_compat, temp_file)
 
 
-def test_options_auto(df_compat, fp, pa):
+def test_options_auto(df_compat, fp, pa, temp_file):
     # use the set option
 
     with pd.option_context("io.parquet.engine", "auto"):
-        check_round_trip(df_compat)
+        check_round_trip(df_compat, temp_file)
 
 
 def test_options_get_engine(fp, pa):
@@ -392,25 +391,29 @@ def test_error(self, engine, temp_file):
             path = str(temp_file)
             self.check_error_on_write(obj, engine, ValueError, msg, path)
 
-    def test_columns_dtypes(self, engine):
+    def test_columns_dtypes(self, engine, temp_file):
         df = pd.DataFrame({"string": list("abc"), "int": list(range(1, 4))})
 
         # unicode
         df.columns = ["foo", "bar"]
-        check_round_trip(df, engine)
+        check_round_trip(df, temp_file, engine)
 
     @pytest.mark.parametrize("compression", [None, "gzip", "snappy", "brotli"])
     def test_compression(self, engine, compression):
         df = pd.DataFrame({"A": [1, 2, 3]})
         check_round_trip(df, engine, write_kwargs={"compression": compression})
 
-    def test_read_columns(self, engine):
+    def test_read_columns(self, engine, temp_file):
         # GH18154
         df = pd.DataFrame({"string": list("abc"), "int": list(range(1, 4))})
 
         expected = pd.DataFrame({"string": list("abc")})
         check_round_trip(
-            df, engine, expected=expected, read_kwargs={"columns": ["string"]}
+            df,
+            temp_file,
+            engine,
+            expected=expected,
+            read_kwargs={"columns": ["string"]},
         )
 
     def test_read_filters(self, engine, tmp_path):
@@ -432,10 +435,10 @@ def test_read_filters(self, engine, tmp_path):
             repeat=1,
         )
 
-    def test_write_index(self):
+    def test_write_index(self, temp_file):
         pytest.importorskip("pyarrow")
         df = pd.DataFrame({"A": [1, 2, 3]})
-        check_round_trip(df, "pyarrow")
+        check_round_trip(df, temp_file, "pyarrow")
 
         indexes = [
             [2, 3, 4],
@@ -448,23 +451,23 @@ def test_write_index(self):
             df.index = index
             if isinstance(index, pd.DatetimeIndex):
                 df.index = df.index._with_freq(None)  # freq doesn't round-trip
-            check_round_trip(df, "pyarrow")
+            check_round_trip(df, temp_file, "pyarrow")
 
         # index with meta-data
         df.index = [0, 1, 2]
         df.index.name = "foo"
-        check_round_trip(df, "pyarrow")
+        check_round_trip(df, temp_file, "pyarrow")
 
-    def test_write_multiindex(self, pa):
+    def test_write_multiindex(self, pa, temp_file):
         # Not supported in fastparquet as of 0.1.3 or older pyarrow version
         engine = pa
 
         df = pd.DataFrame({"A": [1, 2, 3]})
         index = pd.MultiIndex.from_tuples([("a", 1), ("a", 2), ("b", 1)])
         df.index = index
-        check_round_trip(df, engine)
+        check_round_trip(df, temp_file, engine)
 
-    def test_multiindex_with_columns(self, pa):
+    def test_multiindex_with_columns(self, pa, temp_file):
         engine = pa
         dates = pd.date_range("01-Jan-2018", "01-Dec-2018", freq="MS")
         df = pd.DataFrame(
@@ -478,12 +481,16 @@ def test_multiindex_with_columns(self, pa):
         for index in [index1, index2]:
             df.index = index
 
-            check_round_trip(df, engine)
+            check_round_trip(df, temp_file, engine)
             check_round_trip(
-                df, engine, read_kwargs={"columns": ["A", "B"]}, expected=df[["A", "B"]]
+                df,
+                temp_file,
+                engine,
+                read_kwargs={"columns": ["A", "B"]},
+                expected=df[["A", "B"]],
             )
 
-    def test_write_ignoring_index(self, engine):
+    def test_write_ignoring_index(self, engine, temp_file):
         # ENH 20768
         # Ensure index=False omits the index from the written Parquet file.
         df = pd.DataFrame({"a": [1, 2, 3], "b": ["q", "r", "s"]})
@@ -494,14 +501,18 @@ def test_write_ignoring_index(self, engine):
         # have the default integer index.
         expected = df.reset_index(drop=True)
 
-        check_round_trip(df, engine, write_kwargs=write_kwargs, expected=expected)
+        check_round_trip(
+            df, temp_file, engine, write_kwargs=write_kwargs, expected=expected
+        )
 
         # Ignore custom index
         df = pd.DataFrame(
             {"a": [1, 2, 3], "b": ["q", "r", "s"]}, index=["zyx", "wvu", "tsr"]
         )
 
-        check_round_trip(df, engine, write_kwargs=write_kwargs, expected=expected)
+        check_round_trip(
+            df, temp_file, engine, write_kwargs=write_kwargs, expected=expected
+        )
 
         # Ignore multi-indexes as well.
         arrays = [
@@ -513,7 +524,9 @@ def test_write_ignoring_index(self, engine):
         )
 
         expected = df.reset_index(drop=True)
-        check_round_trip(df, engine, write_kwargs=write_kwargs, expected=expected)
+        check_round_trip(
+            df, temp_file, engine, write_kwargs=write_kwargs, expected=expected
+        )
 
     def test_write_column_multiindex(self, engine, temp_file):
         # Not able to write column multi-indexes with non-string column names.
@@ -528,7 +541,7 @@ def test_write_column_multiindex(self, engine, temp_file):
                 df, engine, TypeError, "Column name must be a string", path
             )
         elif engine == "pyarrow":
-            check_round_trip(df, engine)
+            check_round_trip(df, temp_file, engine)
 
     def test_write_column_multiindex_nonstring(self, engine, temp_file):
         # GH #34777
@@ -546,9 +559,9 @@ def test_write_column_multiindex_nonstring(self, engine, temp_file):
         if engine == "fastparquet":
             self.check_error_on_write(df, engine, ValueError, "Column name", path)
         elif engine == "pyarrow":
-            check_round_trip(df, engine)
+            check_round_trip(df, temp_file, engine)
 
-    def test_write_column_multiindex_string(self, pa):
+    def test_write_column_multiindex_string(self, pa, temp_file):
         # GH #34777
         # Not supported in fastparquet as of 0.1.3
         engine = pa
@@ -563,9 +576,9 @@ def test_write_column_multiindex_string(self, pa):
         )
         df.columns.names = ["ColLevel1", "ColLevel2"]
 
-        check_round_trip(df, engine)
+        check_round_trip(df, temp_file, engine)
 
-    def test_write_column_index_string(self, pa):
+    def test_write_column_index_string(self, pa, temp_file):
         # GH #34777
         # Not supported in fastparquet as of 0.1.3
         engine = pa
@@ -577,7 +590,7 @@ def test_write_column_index_string(self, pa):
         )
         df.columns.name = "StringCol"
 
-        check_round_trip(df, engine)
+        check_round_trip(df, temp_file, engine)
 
     def test_write_column_index_nonstring(self, engine, temp_file):
         # GH #34777
@@ -594,7 +607,7 @@ def test_write_column_index_nonstring(self, engine, temp_file):
                 df, engine, TypeError, "Column name must be a string", path
             )
         else:
-            check_round_trip(df, engine)
+            check_round_trip(df, temp_file, engine)
 
     def test_dtype_backend(self, engine, request, temp_file):
         pq = pytest.importorskip("pyarrow.parquet")
@@ -659,7 +672,7 @@ def test_dtype_backend(self, engine, request, temp_file):
             "string",
         ],
     )
-    def test_read_empty_array(self, pa, dtype):
+    def test_read_empty_array(self, pa, dtype, temp_file):
         # GH #41241
         df = pd.DataFrame(
             {
@@ -676,7 +689,11 @@ def test_read_empty_array(self, pa, dtype):
                 }
             )
         check_round_trip(
-            df, pa, read_kwargs={"dtype_backend": "numpy_nullable"}, expected=expected
+            df,
+            temp_file,
+            pa,
+            read_kwargs={"dtype_backend": "numpy_nullable"},
+            expected=expected,
         )
 
     @pytest.mark.network
@@ -696,7 +713,7 @@ def test_parquet_read_from_url(self, httpserver, datapath, df_compat, engine):
 
 class TestParquetPyArrow(Base):
     @pytest.mark.xfail(reason="datetime_with_nat unit doesn't round-trip")
-    def test_basic(self, pa, df_full):
+    def test_basic(self, pa, df_full, temp_file):
         df = df_full
         pytest.importorskip("pyarrow", "11.0.0")
 
@@ -706,9 +723,9 @@ def test_basic(self, pa, df_full):
         df["datetime_tz"] = dti
         df["bool_with_none"] = [True, None, True]
 
-        check_round_trip(df, pa)
+        check_round_trip(df, temp_file, pa)
 
-    def test_basic_subset_columns(self, pa, df_full):
+    def test_basic_subset_columns(self, pa, df_full, temp_file):
         # GH18628
 
         df = df_full
@@ -717,6 +734,7 @@ def test_basic_subset_columns(self, pa, df_full):
 
         check_round_trip(
             df,
+            temp_file,
             pa,
             expected=df[["string", "int"]],
             read_kwargs={"columns": ["string", "int"]},
@@ -743,9 +761,9 @@ def test_duplicate_columns(self, pa, temp_file):
             df, pa, ValueError, "Duplicate column names found", path
         )
 
-    def test_timedelta(self, pa):
+    def test_timedelta(self, pa, temp_file):
         df = pd.DataFrame({"a": pd.timedelta_range("1 day", periods=3)})
-        check_round_trip(df, pa)
+        check_round_trip(df, temp_file, pa)
 
     def test_unsupported(self, pa, temp_file):
         # mixed python objects
@@ -764,7 +782,7 @@ def test_unsupported_float16(self, pa, temp_file):
         if pa_version_under15p0:
             self.check_external_error_on_write(df, pa, pyarrow.ArrowException, path)
         else:
-            check_round_trip(df, pa)
+            check_round_trip(df, temp_file, pa)
 
     @pytest.mark.xfail(
         is_platform_windows(),
@@ -788,7 +806,7 @@ def test_unsupported_float16_cleanup(self, pa, path_type, temp_file):
             df.to_parquet(path=path, engine=pa)
         assert not os.path.isfile(path)
 
-    def test_categorical(self, pa):
+    def test_categorical(self, pa, temp_file):
         # supported in >= 0.7.0
         df = pd.DataFrame(
             {
@@ -807,15 +825,18 @@ def test_categorical(self, pa):
             }
         )
 
-        check_round_trip(df, pa)
+        check_round_trip(df, temp_file, pa)
 
     @pytest.mark.single_cpu
-    def test_s3_roundtrip_explicit_fs(self, df_compat, s3_bucket_public, s3so, pa):
+    def test_s3_roundtrip_explicit_fs(
+        self, df_compat, s3_bucket_public, s3so, pa, temp_file
+    ):
         s3fs = pytest.importorskip("s3fs")
         s3 = s3fs.S3FileSystem(**s3so)
         kw = {"filesystem": s3}
         check_round_trip(
             df_compat,
+            temp_file,
             pa,
             path=f"{s3_bucket_public.name}/pyarrow.parquet",
             read_kwargs=kw,
@@ -823,11 +844,12 @@ def test_s3_roundtrip_explicit_fs(self, df_compat, s3_bucket_public, s3so, pa):
         )
 
     @pytest.mark.single_cpu
-    def test_s3_roundtrip(self, df_compat, s3_bucket_public, s3so, pa):
+    def test_s3_roundtrip(self, df_compat, s3_bucket_public, s3so, pa, temp_file):
         # GH #19134
         s3so = {"storage_options": s3so}
         check_round_trip(
             df_compat,
+            temp_file,
             pa,
             path=f"s3://{s3_bucket_public.name}/pyarrow.parquet",
             read_kwargs=s3so,
@@ -837,7 +859,7 @@ def test_s3_roundtrip(self, df_compat, s3_bucket_public, s3so, pa):
     @pytest.mark.single_cpu
     @pytest.mark.parametrize("partition_col", [["A"], []])
     def test_s3_roundtrip_for_dir(
-        self, df_compat, s3_bucket_public, pa, partition_col, s3so
+        self, df_compat, s3_bucket_public, pa, partition_col, s3so, temp_file
     ):
         pytest.importorskip("s3fs")
         # GH #26388
@@ -854,6 +876,7 @@ def test_s3_roundtrip_for_dir(
 
         check_round_trip(
             df_compat,
+            temp_file,
             pa,
             expected=expected_df,
             path=f"s3://{s3_bucket_public.name}/parquet_dir",
@@ -916,20 +939,22 @@ def test_partition_cols_pathlib(self, tmp_path, pa, df_compat, path_type):
         df.to_parquet(path, partition_cols=partition_cols_list)
         assert read_parquet(path).shape == df.shape
 
-    def test_empty_dataframe(self, pa):
+    def test_empty_dataframe(self, pa, temp_file):
         # GH #27339
         df = pd.DataFrame(index=[], columns=[])
-        check_round_trip(df, pa)
+        check_round_trip(df, temp_file, pa)
 
-    def test_write_with_schema(self, pa):
+    def test_write_with_schema(self, pa, temp_file):
         import pyarrow
 
         df = pd.DataFrame({"x": [0, 1]})
         schema = pyarrow.schema([pyarrow.field("x", type=pyarrow.bool_())])
         out_df = df.astype(bool)
-        check_round_trip(df, pa, write_kwargs={"schema": schema}, expected=out_df)
+        check_round_trip(
+            df, temp_file, pa, write_kwargs={"schema": schema}, expected=out_df
+        )
 
-    def test_additional_extension_arrays(self, pa, using_infer_string):
+    def test_additional_extension_arrays(self, pa, using_infer_string, temp_file):
         # test additional ExtensionArrays that are supported through the
         # __arrow_array__ protocol
         pytest.importorskip("pyarrow")
@@ -941,14 +966,16 @@ def test_additional_extension_arrays(self, pa, using_infer_string):
             }
         )
         if using_infer_string and pa_version_under19p0:
-            check_round_trip(df, pa, expected=df.astype({"c": "str"}))
+            check_round_trip(df, temp_file, pa, expected=df.astype({"c": "str"}))
         else:
-            check_round_trip(df, pa)
+            check_round_trip(df, temp_file, pa)
 
         df = pd.DataFrame({"a": pd.Series([1, 2, 3, None], dtype="Int64")})
-        check_round_trip(df, pa)
+        check_round_trip(df, temp_file, pa)
 
-    def test_pyarrow_backed_string_array(self, pa, string_storage, using_infer_string):
+    def test_pyarrow_backed_string_array(
+        self, pa, string_storage, using_infer_string, temp_file
+    ):
         # test ArrowStringArray supported through the __arrow_array__ protocol
         pytest.importorskip("pyarrow")
         df = pd.DataFrame({"a": pd.Series(["a", None, "c"], dtype="string[pyarrow]")})
@@ -961,9 +988,9 @@ def test_pyarrow_backed_string_array(self, pa, string_storage, using_infer_strin
                 expected.columns = expected.columns.astype("str")
             else:
                 expected = df.astype(f"string[{string_storage}]")
-            check_round_trip(df, pa, expected=expected)
+            check_round_trip(df, temp_file, pa, expected=expected)
 
-    def test_additional_extension_types(self, pa):
+    def test_additional_extension_types(self, pa, temp_file):
         # test additional ExtensionArrays that are supported through the
         # __arrow_array__ protocol + by defining a custom ExtensionType
         pytest.importorskip("pyarrow")
@@ -977,16 +1004,16 @@ def test_additional_extension_types(self, pa):
                 ),
             }
         )
-        check_round_trip(df, pa)
+        check_round_trip(df, temp_file, pa)
 
-    def test_timestamp_nanoseconds(self, pa):
+    def test_timestamp_nanoseconds(self, pa, temp_file):
         # with version 2.6, pyarrow defaults to writing the nanoseconds, so
         # this should work without error, even for pyarrow < 13
         ver = "2.6"
         df = pd.DataFrame({"a": pd.date_range("2017-01-01", freq="1ns", periods=10)})
-        check_round_trip(df, pa, write_kwargs={"version": ver})
+        check_round_trip(df, temp_file, pa, write_kwargs={"version": ver})
 
-    def test_timezone_aware_index(self, pa, timezone_aware_date_list):
+    def test_timezone_aware_index(self, pa, timezone_aware_date_list, temp_file):
         idx = 5 * [timezone_aware_date_list]
         df = pd.DataFrame(index=idx, data={"index_as_col": idx})
 
@@ -1013,7 +1040,7 @@ def test_timezone_aware_index(self, pa, timezone_aware_date_list):
                 tz = pytz.FixedOffset(offset.total_seconds() / 60)
                 expected.index = expected.index.tz_convert(tz)
                 expected["index_as_col"] = expected["index_as_col"].dt.tz_convert(tz)
-        check_round_trip(df, pa, check_dtype=False, expected=expected)
+        check_round_trip(df, temp_file, pa, check_dtype=False, expected=expected)
 
     def test_filter_row_groups(self, pa, temp_file):
         # https://github.com/pandas-dev/pandas/issues/26551
@@ -1025,7 +1052,7 @@ def test_filter_row_groups(self, pa, temp_file):
         assert len(result) == 1
 
     @pytest.mark.filterwarnings("ignore:make_block is deprecated:DeprecationWarning")
-    def test_read_dtype_backend_pyarrow_config(self, pa, df_full):
+    def test_read_dtype_backend_pyarrow_config(self, pa, df_full, temp_file):
         import pyarrow
 
         df = df_full
@@ -1044,12 +1071,13 @@ def test_read_dtype_backend_pyarrow_config(self, pa, df_full):
 
         check_round_trip(
             df,
+            temp_file,
             engine=pa,
             read_kwargs={"dtype_backend": "pyarrow"},
             expected=expected,
         )
 
-    def test_read_dtype_backend_pyarrow_config_index(self, pa):
+    def test_read_dtype_backend_pyarrow_config_index(self, pa, temp_file):
         df = pd.DataFrame(
             {"a": [1, 2]}, index=pd.Index([3, 4], name="test"), dtype="int64[pyarrow]"
         )
@@ -1058,6 +1086,7 @@ def test_read_dtype_backend_pyarrow_config_index(self, pa):
         expected.index = expected.index.astype("int64[pyarrow]")
         check_round_trip(
             df,
+            temp_file,
             engine=pa,
             read_kwargs={"dtype_backend": "pyarrow"},
             expected=expected,
@@ -1087,16 +1116,16 @@ def test_read_dtype_backend_pyarrow_config_index(self, pa):
             ),
         ],
     )
-    def test_columns_dtypes_not_invalid(self, pa, columns):
+    def test_columns_dtypes_not_invalid(self, pa, columns, temp_file):
         df = pd.DataFrame({"string": list("abc"), "int": list(range(1, 4))})
 
         df.columns = columns
-        check_round_trip(df, pa)
+        check_round_trip(df, temp_file, pa)
 
-    def test_empty_columns(self, pa):
+    def test_empty_columns(self, pa, temp_file):
         # GH 52034
         df = pd.DataFrame(index=pd.Index(["a", "b", "c"], name="custom name"))
-        check_round_trip(df, pa)
+        check_round_trip(df, temp_file, pa)
 
     def test_df_attrs_persistence(self, tmp_path, pa):
         path = tmp_path / "test_df_metadata.p"
@@ -1188,7 +1217,7 @@ def test_non_nanosecond_timestamps(self, temp_file):
         )
         tm.assert_frame_equal(result, expected)
 
-    def test_maps_as_pydicts(self, pa):
+    def test_maps_as_pydicts(self, pa, temp_file):
         pyarrow = pytest.importorskip("pyarrow", "13.0.0")
 
         schema = pyarrow.schema(
@@ -1197,6 +1226,7 @@ def test_maps_as_pydicts(self, pa):
         df = pd.DataFrame([{"foo": {"A": 1}}, {"foo": {"B": 2}}])
         check_round_trip(
             df,
+            temp_file,
             pa,
             write_kwargs={"schema": schema},
             read_kwargs={"to_pandas_kwargs": {"maps_as_pydicts": "strict"}},
@@ -1204,7 +1234,7 @@ def test_maps_as_pydicts(self, pa):
 
 
 class TestParquetFastParquet(Base):
-    def test_basic(self, fp, df_full, request):
+    def test_basic(self, fp, df_full, request, temp_file):
         pytz = pytest.importorskip("pytz")
 
         tz = pytz.timezone("US/Eastern")
@@ -1214,7 +1244,7 @@ def test_basic(self, fp, df_full, request):
         dti = dti._with_freq(None)  # freq doesn't round-trip
         df["datetime_tz"] = dti
         df["timedelta"] = pd.timedelta_range("1 day", periods=3)
-        check_round_trip(df, fp)
+        check_round_trip(df, temp_file, fp)
 
     def test_columns_dtypes_invalid(self, fp, temp_file):
         df = pd.DataFrame({"string": list("abc"), "int": list(range(1, 4))})
@@ -1245,12 +1275,12 @@ def test_duplicate_columns(self, fp, temp_file):
         path = str(temp_file)
         self.check_error_on_write(df, fp, ValueError, msg, path)
 
-    def test_bool_with_none(self, fp, request):
+    def test_bool_with_none(self, fp, request, temp_file):
         df = pd.DataFrame({"a": [True, None, False]})
         expected = pd.DataFrame({"a": [1.0, np.nan, 0.0]}, dtype="float16")
         # Fastparquet bug in 0.7.1 makes it so that this dtype becomes
         # float64
-        check_round_trip(df, fp, expected=expected, check_dtype=False)
+        check_round_trip(df, temp_file, fp, expected=expected, check_dtype=False)
 
     def test_unsupported(self, fp, temp_file):
         # period
@@ -1264,9 +1294,9 @@ def test_unsupported(self, fp, temp_file):
         msg = "Can't infer object conversion type"
         self.check_error_on_write(df, fp, ValueError, msg, path)
 
-    def test_categorical(self, fp):
+    def test_categorical(self, fp, temp_file):
         df = pd.DataFrame({"a": pd.Categorical(list("abc"))})
-        check_round_trip(df, fp)
+        check_round_trip(df, temp_file, fp)
 
     def test_filter_row_groups(self, fp, temp_file):
         d = {"a": list(range(3))}
@@ -1277,10 +1307,11 @@ def test_filter_row_groups(self, fp, temp_file):
         assert len(result) == 1
 
     @pytest.mark.single_cpu
-    def test_s3_roundtrip(self, df_compat, s3_bucket_public, s3so, fp):
+    def test_s3_roundtrip(self, df_compat, s3_bucket_public, s3so, fp, temp_file):
         # GH #19134
         check_round_trip(
             df_compat,
+            temp_file,
             fp,
             path=f"s3://{s3_bucket_public.name}/fastparquet.parquet",
             read_kwargs={"storage_options": s3so},
@@ -1354,20 +1385,22 @@ def test_error_on_using_partition_cols_and_partition_on(
                 partition_cols=partition_cols,
             )
 
-    def test_empty_dataframe(self, fp):
+    def test_empty_dataframe(self, fp, temp_file):
         # GH #27339
         df = pd.DataFrame()
         expected = df.copy()
-        check_round_trip(df, fp, expected=expected)
+        check_round_trip(df, temp_file, fp, expected=expected)
 
-    def test_timezone_aware_index(self, fp, timezone_aware_date_list, request):
+    def test_timezone_aware_index(
+        self, fp, timezone_aware_date_list, request, temp_file
+    ):
         idx = 5 * [timezone_aware_date_list]
 
         df = pd.DataFrame(index=idx, data={"index_as_col": idx})
 
         expected = df.copy()
         expected.index.name = "index"
-        check_round_trip(df, fp, expected=expected)
+        check_round_trip(df, temp_file, fp, expected=expected)
 
     def test_close_file_handle_on_read_error(self, temp_file):
         path = str(temp_file)

From 52ee1a8400a038e9cb39ba66c7e2d27b47140846 Mon Sep 17 00:00:00 2001
From: Harshit Pande <pandeconscious@users.noreply.github.com>
Date: Wed, 1 Oct 2025 10:02:03 +0000
Subject: [PATCH 06/10] fix str type cast in temp file in frame methods to csv

---
 pandas/tests/frame/methods/test_to_csv.py | 57 +++++++++++------------
 1 file changed, 27 insertions(+), 30 deletions(-)

diff --git a/pandas/tests/frame/methods/test_to_csv.py b/pandas/tests/frame/methods/test_to_csv.py
index 13f13c70ff748..f9b7d9c741c79 100644
--- a/pandas/tests/frame/methods/test_to_csv.py
+++ b/pandas/tests/frame/methods/test_to_csv.py
@@ -261,15 +261,13 @@ def _return_result_expected(
                 kwargs["index_col"] = list(range(rnlvl))
             kwargs["header"] = list(range(cnlvl))
 
-            path = str(temp_file)
-            df.to_csv(path, encoding="utf8", chunksize=chunksize)
-            recons = self.read_csv(path, **kwargs)
+            df.to_csv(temp_file, encoding="utf8", chunksize=chunksize)
+            recons = self.read_csv(temp_file, **kwargs)
         else:
             kwargs["header"] = 0
 
-            path = str(temp_file)
-            df.to_csv(path, encoding="utf8", chunksize=chunksize)
-            recons = self.read_csv(path, **kwargs)
+            df.to_csv(temp_file, encoding="utf8", chunksize=chunksize)
+            recons = self.read_csv(temp_file, **kwargs)
 
         def _to_uni(x):
             if not isinstance(x, str):
@@ -624,8 +622,8 @@ def _make_frame(names=None):
                 [[f"i-{i}" for i in range(5)] for _ in range(2)], names=list("ab")
             ),
         )
-        df.to_csv(path)
-        result = read_csv(path, header=[0, 1, 2, 3], index_col=[0, 1])
+        df.to_csv(temp_file)
+        result = read_csv(temp_file, header=[0, 1, 2, 3], index_col=[0, 1])
         tm.assert_frame_equal(df, result)
 
         # column is mi
@@ -635,8 +633,8 @@ def _make_frame(names=None):
                 [[f"i-{i}" for i in range(3)] for _ in range(4)], names=list("abcd")
             ),
         )
-        df.to_csv(path)
-        result = read_csv(path, header=[0, 1, 2, 3], index_col=0)
+        df.to_csv(temp_file)
+        result = read_csv(temp_file, header=[0, 1, 2, 3], index_col=0)
         tm.assert_frame_equal(df, result)
 
         # dup column names?
@@ -649,52 +647,52 @@ def _make_frame(names=None):
                 [[f"i-{i}" for i in range(5)] for _ in range(3)], names=list("abc")
             ),
         )
-        df.to_csv(path)
-        result = read_csv(path, header=[0, 1, 2, 3], index_col=[0, 1, 2])
+        df.to_csv(temp_file)
+        result = read_csv(temp_file, header=[0, 1, 2, 3], index_col=[0, 1, 2])
         tm.assert_frame_equal(df, result)
 
         # writing with no index
         df = _make_frame()
-        df.to_csv(path, index=False)
-        result = read_csv(path, header=[0, 1])
+        df.to_csv(temp_file, index=False)
+        result = read_csv(temp_file, header=[0, 1])
         tm.assert_frame_equal(df, result)
 
         # we lose the names here
         df = _make_frame(True)
-        df.to_csv(path, index=False)
-        result = read_csv(path, header=[0, 1])
+        df.to_csv(temp_file, index=False)
+        result = read_csv(temp_file, header=[0, 1])
         assert com.all_none(*result.columns.names)
         result.columns.names = df.columns.names
         tm.assert_frame_equal(df, result)
 
         # whatsnew example
         df = _make_frame()
-        df.to_csv(path)
-        result = read_csv(path, header=[0, 1], index_col=[0])
+        df.to_csv(temp_file)
+        result = read_csv(temp_file, header=[0, 1], index_col=[0])
         tm.assert_frame_equal(df, result)
 
         df = _make_frame(True)
-        df.to_csv(path)
-        result = read_csv(path, header=[0, 1], index_col=[0])
+        df.to_csv(temp_file)
+        result = read_csv(temp_file, header=[0, 1], index_col=[0])
         tm.assert_frame_equal(df, result)
 
         # invalid options
         df = _make_frame(True)
-        df.to_csv(path)
+        df.to_csv(temp_file)
 
         for i in [6, 7]:
             msg = f"len of {i}, but only 5 lines in file"
             with pytest.raises(ParserError, match=msg):
-                read_csv(path, header=list(range(i)), index_col=0)
+                read_csv(temp_file, header=list(range(i)), index_col=0)
 
         # write with cols
         msg = "cannot specify cols with a MultiIndex"
         with pytest.raises(TypeError, match=msg):
-            df.to_csv(path, columns=["foo", "bar"])
+            df.to_csv(temp_file, columns=["foo", "bar"])
 
         # empty
-        tsframe[:0].to_csv(path)
-        recons = self.read_csv(path)
+        tsframe[:0].to_csv(temp_file)
+        recons = self.read_csv(temp_file)
 
         exp = tsframe[:0]
         exp.index = []
@@ -812,8 +810,8 @@ def test_to_csv_dups_cols(self, temp_file):
 
         df.columns = [0, 1, 2] * 5
 
-        df.to_csv(path)
-        result = read_csv(path, index_col=0)
+        df.to_csv(temp_file)
+        result = read_csv(temp_file, index_col=0)
 
         # date cols
         for i in ["0.4", "1.4", "2.4"]:
@@ -1201,9 +1199,8 @@ def test_to_csv_with_dst_transitions_with_pickle(self, start, end, temp_file):
         idx._data._freq = None  # otherwise there is trouble on unpickle
         df = DataFrame({"values": 1, "idx": idx}, index=idx)
 
-        path = str(temp_file)
-        df.to_csv(path, index=True)
-        result = read_csv(path, index_col=0)
+        df.to_csv(temp_file, index=True)
+        result = read_csv(temp_file, index_col=0)
         result.index = (
             to_datetime(result.index, utc=True).tz_convert("Europe/Paris").as_unit("ns")
         )

From c9ee1c1cbd9bcb9b2b45cc0f85d2026f805b7a60 Mon Sep 17 00:00:00 2001
From: Harshit Pande <pandeconscious@users.noreply.github.com>
Date: Wed, 1 Oct 2025 10:58:00 +0000
Subject: [PATCH 07/10] fix str type cast for temp file in io format to csv

---
 pandas/tests/io/formats/test_to_csv.py | 103 +++++++++++--------------
 1 file changed, 45 insertions(+), 58 deletions(-)

diff --git a/pandas/tests/io/formats/test_to_csv.py b/pandas/tests/io/formats/test_to_csv.py
index f716cde8c3ad8..e6bfe40c5433f 100644
--- a/pandas/tests/io/formats/test_to_csv.py
+++ b/pandas/tests/io/formats/test_to_csv.py
@@ -30,9 +30,8 @@ def test_to_csv_with_single_column(self, temp_file):
 ""
 1.0
 """
-        path = str(temp_file)
-        df1.to_csv(path, header=None, index=None)
-        with open(path, encoding="utf-8") as f:
+        df1.to_csv(temp_file, header=None, index=None)
+        with open(temp_file, encoding="utf-8") as f:
             assert f.read() == expected1
 
         df2 = DataFrame([1, None])
@@ -40,18 +39,17 @@ def test_to_csv_with_single_column(self, temp_file):
 1.0
 ""
 """
-        df2.to_csv(path, header=None, index=None)
-        with open(path, encoding="utf-8") as f:
+        df2.to_csv(temp_file, header=None, index=None)
+        with open(temp_file, encoding="utf-8") as f:
             assert f.read() == expected2
 
     def test_to_csv_default_encoding(self, temp_file):
         # GH17097
         df = DataFrame({"col": ["AAAAA", "ÄÄÄÄÄ", "ßßßßß", "聞聞聞聞聞"]})
 
-        path = str(temp_file)
         # the default to_csv encoding is uft-8.
-        df.to_csv(path)
-        tm.assert_frame_equal(pd.read_csv(path, index_col=0), df)
+        df.to_csv(temp_file)
+        tm.assert_frame_equal(pd.read_csv(temp_file, index_col=0), df)
 
     def test_to_csv_quotechar(self, temp_file):
         df = DataFrame({"col": [1, 2]})
@@ -61,9 +59,8 @@ def test_to_csv_quotechar(self, temp_file):
 "1","2"
 """
 
-        path = str(temp_file)
-        df.to_csv(path, quoting=1)  # 1=QUOTE_ALL
-        with open(path, encoding="utf-8") as f:
+        df.to_csv(temp_file, quoting=1)  # 1=QUOTE_ALL
+        with open(temp_file, encoding="utf-8") as f:
             assert f.read() == expected
 
         expected = """\
@@ -72,13 +69,12 @@ def test_to_csv_quotechar(self, temp_file):
 $1$,$2$
 """
 
-        path = str(temp_file)
-        df.to_csv(path, quoting=1, quotechar="$")
-        with open(path, encoding="utf-8") as f:
+        df.to_csv(temp_file, quoting=1, quotechar="$")
+        with open(temp_file, encoding="utf-8") as f:
             assert f.read() == expected
 
         with pytest.raises(TypeError, match="quotechar"):
-            df.to_csv(path, quoting=1, quotechar=None)
+            df.to_csv(temp_file, quoting=1, quotechar=None)
 
     def test_to_csv_doublequote(self, temp_file):
         df = DataFrame({"col": ['a"a', '"bb"']})
@@ -88,13 +84,12 @@ def test_to_csv_doublequote(self, temp_file):
 "1","""bb"""
 '''
 
-        path = str(temp_file)
-        df.to_csv(path, quoting=1, doublequote=True)  # QUOTE_ALL
-        with open(path, encoding="utf-8") as f:
+        df.to_csv(temp_file, quoting=1, doublequote=True)  # QUOTE_ALL
+        with open(temp_file, encoding="utf-8") as f:
             assert f.read() == expected
 
         with pytest.raises(Error, match="escapechar"):
-            df.to_csv(path, doublequote=False)  # no escapechar set
+            df.to_csv(temp_file, doublequote=False)  # no escapechar set
 
     def test_to_csv_escapechar(self, temp_file):
         df = DataFrame({"col": ['a"a', '"bb"']})
@@ -104,9 +99,8 @@ def test_to_csv_escapechar(self, temp_file):
 "1","\\"bb\\""
 """
 
-        path = str(temp_file)
-        df.to_csv(path, quoting=1, doublequote=False, escapechar="\\")
-        with open(path, encoding="utf-8") as f:
+        df.to_csv(temp_file, quoting=1, doublequote=False, escapechar="\\")
+        with open(temp_file, encoding="utf-8") as f:
             assert f.read() == expected
 
         df = DataFrame({"col": ["a,a", ",bb,"]})
@@ -116,8 +110,8 @@ def test_to_csv_escapechar(self, temp_file):
 1,\\,bb\\,
 """
 
-        df.to_csv(path, quoting=3, escapechar="\\")  # QUOTE_NONE
-        with open(path, encoding="utf-8") as f:
+        df.to_csv(temp_file, quoting=3, escapechar="\\")  # QUOTE_NONE
+        with open(temp_file, encoding="utf-8") as f:
             assert f.read() == expected
 
     def test_csv_to_string(self):
@@ -395,9 +389,8 @@ def test_to_csv_string_array_ascii(self, temp_file):
 0,"['foo', 'bar']"
 1,"['baz', 'qux']"
 """
-        path = str(temp_file)
-        df.to_csv(path, encoding="ascii")
-        with open(path, encoding="utf-8") as f:
+        df.to_csv(temp_file, encoding="ascii")
+        with open(temp_file, encoding="utf-8") as f:
             assert f.read() == expected_ascii
 
     def test_to_csv_string_array_utf8(self, temp_file):
@@ -409,16 +402,14 @@ def test_to_csv_string_array_utf8(self, temp_file):
 0,"['foo', 'bar']"
 1,"['baz', 'qux']"
 """
-        path = str(temp_file)
-        df.to_csv(path, encoding="utf-8")
-        with open(path, encoding="utf-8") as f:
+        df.to_csv(temp_file, encoding="utf-8")
+        with open(temp_file, encoding="utf-8") as f:
             assert f.read() == expected_utf8
 
     def test_to_csv_string_with_lf(self, temp_file):
         # GH 20353
         data = {"int": [1, 2, 3], "str_lf": ["abc", "d\nef", "g\nh\n\ni"]}
         df = DataFrame(data)
-        path = str(temp_file)
 
         # case 1: The default line terminator(=os.linesep)(PR 21406)
         os_linesep = os.linesep.encode("utf-8")
@@ -432,28 +423,27 @@ def test_to_csv_string_with_lf(self, temp_file):
             + b'3,"g\nh\n\ni"'
             + os_linesep
         )
-        df.to_csv(path, index=False)
-        with open(path, "rb") as f:
+        df.to_csv(temp_file, index=False)
+        with open(temp_file, "rb") as f:
             assert f.read() == expected_noarg
 
         # case 2: LF as line terminator
         expected_lf = b'int,str_lf\n1,abc\n2,"d\nef"\n3,"g\nh\n\ni"\n'
-        df.to_csv(path, lineterminator="\n", index=False)
-        with open(path, "rb") as f:
+        df.to_csv(temp_file, lineterminator="\n", index=False)
+        with open(temp_file, "rb") as f:
             assert f.read() == expected_lf
 
         # case 3: CRLF as line terminator
         # 'lineterminator' should not change inner element
         expected_crlf = b'int,str_lf\r\n1,abc\r\n2,"d\nef"\r\n3,"g\nh\n\ni"\r\n'
-        df.to_csv(path, lineterminator="\r\n", index=False)
-        with open(path, "rb") as f:
+        df.to_csv(temp_file, lineterminator="\r\n", index=False)
+        with open(temp_file, "rb") as f:
             assert f.read() == expected_crlf
 
     def test_to_csv_string_with_crlf(self, temp_file):
         # GH 20353
         data = {"int": [1, 2, 3], "str_crlf": ["abc", "d\r\nef", "g\r\nh\r\n\r\ni"]}
         df = DataFrame(data)
-        path = str(temp_file)
         # case 1: The default line terminator(=os.linesep)(PR 21406)
         os_linesep = os.linesep.encode("utf-8")
         expected_noarg = (
@@ -466,14 +456,14 @@ def test_to_csv_string_with_crlf(self, temp_file):
             + b'3,"g\r\nh\r\n\r\ni"'
             + os_linesep
         )
-        df.to_csv(path, index=False)
-        with open(path, "rb") as f:
+        df.to_csv(temp_file, index=False)
+        with open(temp_file, "rb") as f:
             assert f.read() == expected_noarg
 
         # case 2: LF as line terminator
         expected_lf = b'int,str_crlf\n1,abc\n2,"d\r\nef"\n3,"g\r\nh\r\n\r\ni"\n'
-        df.to_csv(path, lineterminator="\n", index=False)
-        with open(path, "rb") as f:
+        df.to_csv(temp_file, lineterminator="\n", index=False)
+        with open(temp_file, "rb") as f:
             assert f.read() == expected_lf
 
         # case 3: CRLF as line terminator
@@ -481,8 +471,8 @@ def test_to_csv_string_with_crlf(self, temp_file):
         expected_crlf = (
             b'int,str_crlf\r\n1,abc\r\n2,"d\r\nef"\r\n3,"g\r\nh\r\n\r\ni"\r\n'
         )
-        df.to_csv(path, lineterminator="\r\n", index=False)
-        with open(path, "rb") as f:
+        df.to_csv(temp_file, lineterminator="\r\n", index=False)
+        with open(temp_file, "rb") as f:
             assert f.read() == expected_crlf
 
     def test_to_csv_stdout_file(self, capsys):
@@ -514,11 +504,10 @@ def test_to_csv_write_to_open_file(self, temp_file):
 y
 z
 """
-        path = str(temp_file)
-        with open(path, "w", encoding="utf-8") as f:
+        with open(temp_file, "w", encoding="utf-8") as f:
             f.write("manual header\n")
             df.to_csv(f, header=None, index=None)
-        with open(path, encoding="utf-8") as f:
+        with open(temp_file, encoding="utf-8") as f:
             assert f.read() == expected
 
     def test_to_csv_write_to_open_file_with_newline_py3(self, temp_file):
@@ -528,12 +517,11 @@ def test_to_csv_write_to_open_file_with_newline_py3(self, temp_file):
         expected_rows = ["x", "y", "z"]
         expected = "manual header\n" + tm.convert_rows_list_to_csv_str(expected_rows)
 
-        path = str(temp_file)
-        with open(path, "w", newline="", encoding="utf-8") as f:
+        with open(temp_file, "w", newline="", encoding="utf-8") as f:
             f.write("manual header\n")
             df.to_csv(f, header=None, index=None)
 
-        with open(path, "rb") as f:
+        with open(temp_file, "rb") as f:
             assert f.read() == bytes(expected, "utf-8")
 
     @pytest.mark.parametrize("to_infer", [True, False])
@@ -664,8 +652,7 @@ def test_to_csv_errors(self, errors, temp_file):
         data = ["\ud800foo"]
         ser = pd.Series(data, index=Index(data, dtype=object), dtype=object)
 
-        path = str(temp_file)
-        ser.to_csv(path, errors=errors)
+        ser.to_csv(temp_file, errors=errors)
         # No use in reading back the data as it is not the same anymore
         # due to the error handling
 
@@ -683,10 +670,9 @@ def test_to_csv_binary_handle(self, mode, temp_file):
             index=Index([f"i-{i}" for i in range(30)]),
         )
 
-        path = str(temp_file)
-        with open(path, mode="w+b") as handle:
+        with open(temp_file, mode="w+b") as handle:
             df.to_csv(handle, mode=mode)
-        tm.assert_frame_equal(df, pd.read_csv(path, index_col=0))
+        tm.assert_frame_equal(df, pd.read_csv(temp_file, index_col=0))
 
     @pytest.mark.parametrize("mode", ["wb", "w"])
     def test_to_csv_encoding_binary_handle(self, mode, temp_file):
@@ -721,9 +707,10 @@ def test_to_csv_iterative_compression_name(compression, temp_file):
         columns=Index(list("ABCD")),
         index=Index([f"i-{i}" for i in range(30)]),
     )
-    path = str(temp_file)
-    df.to_csv(path, compression=compression, chunksize=1)
-    tm.assert_frame_equal(pd.read_csv(path, compression=compression, index_col=0), df)
+    df.to_csv(temp_file, compression=compression, chunksize=1)
+    tm.assert_frame_equal(
+        pd.read_csv(temp_file, compression=compression, index_col=0), df
+    )
 
 
 def test_to_csv_iterative_compression_buffer(compression):

From 4dfad6754456084bb9983649b57598823a3fb719 Mon Sep 17 00:00:00 2001
From: Harshit Pande <pandeconscious@users.noreply.github.com>
Date: Wed, 1 Oct 2025 11:45:59 +0000
Subject: [PATCH 08/10] fix str type cast in io test parquet

---
 pandas/tests/io/test_parquet.py | 112 +++++++++++++-------------------
 1 file changed, 46 insertions(+), 66 deletions(-)

diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py
index 79660394e1197..53f5a79625ee3 100644
--- a/pandas/tests/io/test_parquet.py
+++ b/pandas/tests/io/test_parquet.py
@@ -224,7 +224,7 @@ def compare(repeat):
             )
 
     if path is None:
-        path = str(temp_file)
+        path = temp_file
         compare(repeat)
     else:
         compare(repeat)
@@ -342,27 +342,25 @@ def test_cross_engine_pa_fp(df_cross_compat, pa, fp, temp_file):
     # cross-compat with differing reading/writing engines
 
     df = df_cross_compat
-    path = str(temp_file)
-    df.to_parquet(path, engine=pa, compression=None)
+    df.to_parquet(temp_file, engine=pa, compression=None)
 
-    result = read_parquet(path, engine=fp)
+    result = read_parquet(temp_file, engine=fp)
     tm.assert_frame_equal(result, df)
 
-    result = read_parquet(path, engine=fp, columns=["a", "d"])
+    result = read_parquet(temp_file, engine=fp, columns=["a", "d"])
     tm.assert_frame_equal(result, df[["a", "d"]])
 
 
 def test_cross_engine_fp_pa(df_cross_compat, pa, fp, temp_file):
     # cross-compat with differing reading/writing engines
     df = df_cross_compat
-    path = str(temp_file)
 
-    df.to_parquet(path, engine=fp, compression=None)
+    df.to_parquet(temp_file, engine=fp, compression=None)
 
-    result = read_parquet(path, engine=pa)
+    result = read_parquet(temp_file, engine=pa)
     tm.assert_frame_equal(result, df)
 
-    result = read_parquet(path, engine=pa, columns=["a", "d"])
+    result = read_parquet(temp_file, engine=pa, columns=["a", "d"])
     tm.assert_frame_equal(result, df[["a", "d"]])
 
 
@@ -388,8 +386,7 @@ def test_error(self, engine, temp_file):
             np.array([1, 2, 3]),
         ]:
             msg = "to_parquet only supports IO with DataFrames"
-            path = str(temp_file)
-            self.check_error_on_write(obj, engine, ValueError, msg, path)
+            self.check_error_on_write(obj, engine, ValueError, msg, temp_file)
 
     def test_columns_dtypes(self, engine, temp_file):
         df = pd.DataFrame({"string": list("abc"), "int": list(range(1, 4))})
@@ -535,10 +532,9 @@ def test_write_column_multiindex(self, engine, temp_file):
             np.random.default_rng(2).standard_normal((4, 3)), columns=mi_columns
         )
 
-        path = str(temp_file)
         if engine == "fastparquet":
             self.check_error_on_write(
-                df, engine, TypeError, "Column name must be a string", path
+                df, engine, TypeError, "Column name must be a string", temp_file
             )
         elif engine == "pyarrow":
             check_round_trip(df, temp_file, engine)
@@ -555,9 +551,8 @@ def test_write_column_multiindex_nonstring(self, engine, temp_file):
             np.random.default_rng(2).standard_normal((8, 8)), columns=arrays
         )
         df.columns.names = ["Level1", "Level2"]
-        path = str(temp_file)
         if engine == "fastparquet":
-            self.check_error_on_write(df, engine, ValueError, "Column name", path)
+            self.check_error_on_write(df, engine, ValueError, "Column name", temp_file)
         elif engine == "pyarrow":
             check_round_trip(df, temp_file, engine)
 
@@ -601,10 +596,9 @@ def test_write_column_index_nonstring(self, engine, temp_file):
             np.random.default_rng(2).standard_normal((8, 4)), columns=arrays
         )
         df.columns.name = "NonStringCol"
-        path = str(temp_file)
         if engine == "fastparquet":
             self.check_error_on_write(
-                df, engine, TypeError, "Column name must be a string", path
+                df, engine, TypeError, "Column name must be a string", temp_file
             )
         else:
             check_round_trip(df, temp_file, engine)
@@ -633,11 +627,10 @@ def test_dtype_backend(self, engine, request, temp_file):
                 "g": pyarrow.array([1.0, 2.0, 3.0, None], "float64"),
             }
         )
-        path = str(temp_file)
         # write manually with pyarrow to write integers
-        pq.write_table(table, path)
-        result1 = read_parquet(path, engine=engine)
-        result2 = read_parquet(path, engine=engine, dtype_backend="numpy_nullable")
+        pq.write_table(table, temp_file)
+        result1 = read_parquet(temp_file, engine=engine)
+        result2 = read_parquet(temp_file, engine=engine, dtype_backend="numpy_nullable")
 
         assert result1["a"].dtype == np.dtype("float64")
         expected = pd.DataFrame(
@@ -756,9 +749,8 @@ def test_to_bytes_without_path_or_buf_provided(self, pa, df_full):
     def test_duplicate_columns(self, pa, temp_file):
         # not currently able to handle duplicate columns
         df = pd.DataFrame(np.arange(12).reshape(4, 3), columns=list("aaa")).copy()
-        path = str(temp_file)
         self.check_error_on_write(
-            df, pa, ValueError, "Duplicate column names found", path
+            df, pa, ValueError, "Duplicate column names found", temp_file
         )
 
     def test_timedelta(self, pa, temp_file):
@@ -770,17 +762,17 @@ def test_unsupported(self, pa, temp_file):
         df = pd.DataFrame({"a": ["a", 1, 2.0]})
         # pyarrow 0.11 raises ArrowTypeError
         # older pyarrows raise ArrowInvalid
-        path = str(temp_file)
-        self.check_external_error_on_write(df, pa, pyarrow.ArrowException, path)
+        self.check_external_error_on_write(df, pa, pyarrow.ArrowException, temp_file)
 
     def test_unsupported_float16(self, pa, temp_file):
         # #44847, #44914
         # Not able to write float 16 column using pyarrow.
         data = np.arange(2, 10, dtype=np.float16)
         df = pd.DataFrame(data=data, columns=["fp16"])
-        path = str(temp_file)
         if pa_version_under15p0:
-            self.check_external_error_on_write(df, pa, pyarrow.ArrowException, path)
+            self.check_external_error_on_write(
+                df, pa, pyarrow.ArrowException, temp_file
+            )
         else:
             check_round_trip(df, temp_file, pa)
 
@@ -800,8 +792,7 @@ def test_unsupported_float16_cleanup(self, pa, path_type, temp_file):
         data = np.arange(2, 10, dtype=np.float16)
         df = pd.DataFrame(data=data, columns=["fp16"])
 
-        path_str = str(temp_file)
-        path = path_type(path_str)
+        path = path_type(temp_file)
         with tm.external_error_raised(pyarrow.ArrowException):
             df.to_parquet(path=path, engine=pa)
         assert not os.path.isfile(path)
@@ -1046,9 +1037,8 @@ def test_filter_row_groups(self, pa, temp_file):
         # https://github.com/pandas-dev/pandas/issues/26551
         pytest.importorskip("pyarrow")
         df = pd.DataFrame({"a": list(range(3))})
-        path = str(temp_file)
-        df.to_parquet(path, engine=pa)
-        result = read_parquet(path, pa, filters=[("a", "==", 0)])
+        df.to_parquet(temp_file, engine=pa)
+        result = read_parquet(temp_file, pa, filters=[("a", "==", 0)])
         assert len(result) == 1
 
     @pytest.mark.filterwarnings("ignore:make_block is deprecated:DeprecationWarning")
@@ -1251,29 +1241,27 @@ def test_columns_dtypes_invalid(self, fp, temp_file):
 
         err = TypeError
         msg = "Column name must be a string"
-        path = str(temp_file)
 
         # numeric
         df.columns = [0, 1]
-        self.check_error_on_write(df, fp, err, msg, path)
+        self.check_error_on_write(df, fp, err, msg, temp_file)
 
         # bytes
         df.columns = [b"foo", b"bar"]
-        self.check_error_on_write(df, fp, err, msg, path)
+        self.check_error_on_write(df, fp, err, msg, temp_file)
 
         # python object
         df.columns = [
             datetime.datetime(2011, 1, 1, 0, 0),
             datetime.datetime(2011, 1, 1, 1, 1),
         ]
-        self.check_error_on_write(df, fp, err, msg, path)
+        self.check_error_on_write(df, fp, err, msg, temp_file)
 
     def test_duplicate_columns(self, fp, temp_file):
         # not currently able to handle duplicate columns
         df = pd.DataFrame(np.arange(12).reshape(4, 3), columns=list("aaa")).copy()
         msg = "Cannot create parquet dataset with duplicate column names"
-        path = str(temp_file)
-        self.check_error_on_write(df, fp, ValueError, msg, path)
+        self.check_error_on_write(df, fp, ValueError, msg, temp_file)
 
     def test_bool_with_none(self, fp, request, temp_file):
         df = pd.DataFrame({"a": [True, None, False]})
@@ -1286,13 +1274,12 @@ def test_unsupported(self, fp, temp_file):
         # period
         df = pd.DataFrame({"a": pd.period_range("2013", freq="M", periods=3)})
         # error from fastparquet -> don't check exact error message
-        path = str(temp_file)
-        self.check_error_on_write(df, fp, ValueError, None, path)
+        self.check_error_on_write(df, fp, ValueError, None, temp_file)
 
         # mixed
         df = pd.DataFrame({"a": ["a", 1, 2.0]})
         msg = "Can't infer object conversion type"
-        self.check_error_on_write(df, fp, ValueError, msg, path)
+        self.check_error_on_write(df, fp, ValueError, msg, temp_file)
 
     def test_categorical(self, fp, temp_file):
         df = pd.DataFrame({"a": pd.Categorical(list("abc"))})
@@ -1301,9 +1288,8 @@ def test_categorical(self, fp, temp_file):
     def test_filter_row_groups(self, fp, temp_file):
         d = {"a": list(range(3))}
         df = pd.DataFrame(d)
-        path = str(temp_file)
-        df.to_parquet(path, engine=fp, compression=None, row_group_offsets=1)
-        result = read_parquet(path, fp, filters=[("a", "==", 0)])
+        df.to_parquet(temp_file, engine=fp, compression=None, row_group_offsets=1)
+        result = read_parquet(temp_file, fp, filters=[("a", "==", 0)])
         assert len(result) == 1
 
     @pytest.mark.single_cpu
@@ -1403,73 +1389,68 @@ def test_timezone_aware_index(
         check_round_trip(df, temp_file, fp, expected=expected)
 
     def test_close_file_handle_on_read_error(self, temp_file):
-        path = str(temp_file)
-        pathlib.Path(path).write_bytes(b"breakit")
+        pathlib.Path(temp_file).write_bytes(b"breakit")
         with tm.external_error_raised(Exception):  # Not important which exception
-            read_parquet(path, engine="fastparquet")
+            read_parquet(temp_file, engine="fastparquet")
         # The next line raises an error on Windows if the file is still open
-        pathlib.Path(path).unlink(missing_ok=False)
+        pathlib.Path(temp_file).unlink(missing_ok=False)
 
     def test_bytes_file_name(self, engine, temp_file):
         # GH#48944
         df = pd.DataFrame(data={"A": [0, 1], "B": [1, 0]})
-        path = str(temp_file)
-        with open(path.encode(), "wb") as f:
+        with open(temp_file, "wb") as f:
             df.to_parquet(f)
 
-        result = read_parquet(path, engine=engine)
+        result = read_parquet(temp_file, engine=engine)
         tm.assert_frame_equal(result, df)
 
     def test_filesystem_notimplemented(self, temp_file):
         pytest.importorskip("fastparquet")
         df = pd.DataFrame(data={"A": [0, 1], "B": [1, 0]})
-        path = str(temp_file)
         with pytest.raises(NotImplementedError, match="filesystem is not implemented"):
-            df.to_parquet(path, engine="fastparquet", filesystem="foo")
+            df.to_parquet(temp_file, engine="fastparquet", filesystem="foo")
 
-        pathlib.Path(path).write_bytes(b"foo")
+        pathlib.Path(temp_file).write_bytes(b"foo")
         with pytest.raises(NotImplementedError, match="filesystem is not implemented"):
-            read_parquet(path, engine="fastparquet", filesystem="foo")
+            read_parquet(temp_file, engine="fastparquet", filesystem="foo")
 
     def test_invalid_filesystem(self, temp_file):
         pytest.importorskip("pyarrow")
         df = pd.DataFrame(data={"A": [0, 1], "B": [1, 0]})
-        path = str(temp_file)
 
         with pytest.raises(
             ValueError, match="filesystem must be a pyarrow or fsspec FileSystem"
         ):
-            df.to_parquet(path, engine="pyarrow", filesystem="foo")
+            df.to_parquet(temp_file, engine="pyarrow", filesystem="foo")
 
-        pathlib.Path(path).write_bytes(b"foo")
+        pathlib.Path(temp_file).write_bytes(b"foo")
         with pytest.raises(
             ValueError, match="filesystem must be a pyarrow or fsspec FileSystem"
         ):
-            read_parquet(path, engine="pyarrow", filesystem="foo")
+            read_parquet(temp_file, engine="pyarrow", filesystem="foo")
 
     def test_unsupported_pa_filesystem_storage_options(self, temp_file):
         pa_fs = pytest.importorskip("pyarrow.fs")
         df = pd.DataFrame(data={"A": [0, 1], "B": [1, 0]})
-        path = str(temp_file)
 
         with pytest.raises(
             NotImplementedError,
             match="storage_options not supported with a pyarrow FileSystem.",
         ):
             df.to_parquet(
-                path,
+                temp_file,
                 engine="pyarrow",
                 filesystem=pa_fs.LocalFileSystem(),
                 storage_options={"foo": "bar"},
             )
 
-        pathlib.Path(path).write_bytes(b"foo")
+        pathlib.Path(temp_file).write_bytes(b"foo")
         with pytest.raises(
             NotImplementedError,
             match="storage_options not supported with a pyarrow FileSystem.",
         ):
             read_parquet(
-                path,
+                temp_file,
                 engine="pyarrow",
                 filesystem=pa_fs.LocalFileSystem(),
                 storage_options={"foo": "bar"},
@@ -1481,7 +1462,6 @@ def test_invalid_dtype_backend(self, engine, temp_file):
             "'pyarrow' are allowed."
         )
         df = pd.DataFrame({"int": list(range(1, 4))})
-        path = str(temp_file)
-        df.to_parquet(path)
+        df.to_parquet(temp_file)
         with pytest.raises(ValueError, match=msg):
-            read_parquet(path, dtype_backend="numpy")
+            read_parquet(temp_file, dtype_backend="numpy")

From da89e1b1640f84f64721b45b45e1550c8441fdf3 Mon Sep 17 00:00:00 2001
From: Harshit Pande <pandeconscious@users.noreply.github.com>
Date: Wed, 1 Oct 2025 12:02:38 +0000
Subject: [PATCH 09/10] cleanup str type cast remaining cases

---
 pandas/tests/io/formats/test_to_csv.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/pandas/tests/io/formats/test_to_csv.py b/pandas/tests/io/formats/test_to_csv.py
index e6bfe40c5433f..e184c33b0d979 100644
--- a/pandas/tests/io/formats/test_to_csv.py
+++ b/pandas/tests/io/formats/test_to_csv.py
@@ -567,9 +567,8 @@ def test_to_csv_compression_dict_no_method_raises(self, temp_file):
         compression = {"some_option": True}
         msg = "must have key 'method'"
 
-        path = str(temp_file)
         with pytest.raises(ValueError, match=msg):
-            df.to_csv(path, compression=compression)
+            df.to_csv(temp_file, compression=compression)
 
     @pytest.mark.parametrize("compression", ["zip", "infer"])
     @pytest.mark.parametrize("archive_name", ["test_to_csv.csv", "test_to_csv.zip"])
@@ -692,8 +691,7 @@ def test_to_csv_encoding_binary_handle(self, mode, temp_file):
         assert buffer.getvalue().startswith(content)
 
         # example from GH 13068
-        path = str(temp_file)
-        with open(path, "w+b") as handle:
+        with open(temp_file, "w+b") as handle:
             DataFrame().to_csv(handle, mode=mode, encoding="utf-8-sig")
 
             handle.seek(0)

From c7af43a5b7fe35fdcb70bead9f874144ff45ae99 Mon Sep 17 00:00:00 2001
From: Harshit Pande <pandeconscious@users.noreply.github.com>
Date: Wed, 1 Oct 2025 20:17:43 +0530
Subject: [PATCH 10/10] refactor name with ext

---
 pandas/tests/io/formats/test_to_csv.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pandas/tests/io/formats/test_to_csv.py b/pandas/tests/io/formats/test_to_csv.py
index e184c33b0d979..f70875172ccc8 100644
--- a/pandas/tests/io/formats/test_to_csv.py
+++ b/pandas/tests/io/formats/test_to_csv.py
@@ -542,9 +542,9 @@ def test_to_csv_compression(
         to_compression = "infer" if to_infer else compression
         read_compression = "infer" if read_infer else compression
 
-        path = str(temp_file) + "." + compression_to_extension[compression]
-        df.to_csv(path, compression=to_compression)
-        result = pd.read_csv(path, index_col=0, compression=read_compression)
+        path_ext = str(temp_file) + "." + compression_to_extension[compression]
+        df.to_csv(path_ext, compression=to_compression)
+        result = pd.read_csv(path_ext, index_col=0, compression=read_compression)
         tm.assert_frame_equal(result, df)
 
     def test_to_csv_compression_dict(self, compression_only, temp_file):