11import gzip
22import io
33import os
4- from pathlib import Path
54import subprocess
65import sys
76import tarfile
3130 ],
3231)
3332@pytest .mark .parametrize ("method" , ["to_pickle" , "to_json" , "to_csv" ])
34- def test_compression_size (obj , method , compression_only ):
33+ def test_compression_size (obj , method , compression_only , temp_file ):
3534 if compression_only == "tar" :
3635 compression_only = {"method" : "tar" , "mode" : "w:gz" }
3736
38- with tm . ensure_clean () as path :
39- getattr (obj , method )(path , compression = compression_only )
40- compressed_size = os .path .getsize (path )
41- getattr (obj , method )(path , compression = None )
42- uncompressed_size = os .path .getsize (path )
43- assert uncompressed_size > compressed_size
37+ path = temp_file
38+ getattr (obj , method )(path , compression = compression_only )
39+ compressed_size = os .path .getsize (path )
40+ getattr (obj , method )(path , compression = None )
41+ uncompressed_size = os .path .getsize (path )
42+ assert uncompressed_size > compressed_size
4443
4544
4645@pytest .mark .parametrize (
@@ -54,22 +53,25 @@ def test_compression_size(obj, method, compression_only):
5453 ],
5554)
5655@pytest .mark .parametrize ("method" , ["to_csv" , "to_json" ])
57- def test_compression_size_fh (obj , method , compression_only ):
58- with tm .ensure_clean () as path :
59- with icom .get_handle (
60- path ,
61- "w:gz" if compression_only == "tar" else "w" ,
62- compression = compression_only ,
63- ) as handles :
64- getattr (obj , method )(handles .handle )
65- assert not handles .handle .closed
66- compressed_size = os .path .getsize (path )
67- with tm .ensure_clean () as path :
68- with icom .get_handle (path , "w" , compression = None ) as handles :
69- getattr (obj , method )(handles .handle )
70- assert not handles .handle .closed
71- uncompressed_size = os .path .getsize (path )
72- assert uncompressed_size > compressed_size
56+ def test_compression_size_fh (obj , method , compression_only , temp_file ):
57+ path = temp_file
58+ with icom .get_handle (
59+ path ,
60+ "w:gz" if compression_only == "tar" else "w" ,
61+ compression = compression_only ,
62+ ) as handles :
63+ getattr (obj , method )(handles .handle )
64+ assert not handles .handle .closed
65+ compressed_size = os .path .getsize (path )
66+
67+ # Create a new temporary file for uncompressed comparison
68+ path2 = temp_file .parent / f"{ temp_file .stem } _uncompressed{ temp_file .suffix } "
69+ path2 .touch ()
70+ with icom .get_handle (path2 , "w" , compression = None ) as handles :
71+ getattr (obj , method )(handles .handle )
72+ assert not handles .handle .closed
73+ uncompressed_size = os .path .getsize (path2 )
74+ assert uncompressed_size > compressed_size
7375
7476
7577@pytest .mark .parametrize (
@@ -81,14 +83,14 @@ def test_compression_size_fh(obj, method, compression_only):
8183 ],
8284)
8385def test_dataframe_compression_defaults_to_infer (
84- write_method , write_kwargs , read_method , compression_only , compression_to_extension
86+ write_method , write_kwargs , read_method , compression_only , compression_to_extension , temp_file
8587):
8688 # GH22004
8789 input = pd .DataFrame ([[1.0 , 0 , - 4 ], [3.4 , 5 , 2 ]], columns = ["X" , "Y" , "Z" ])
8890 extension = compression_to_extension [compression_only ]
89- with tm . ensure_clean ( "compressed" + extension ) as path :
90- getattr (input , write_method )(path , ** write_kwargs )
91- output = read_method (path , compression = compression_only )
91+ path = temp_file . parent / f"compressed { extension } "
92+ getattr (input , write_method )(path , ** write_kwargs )
93+ output = read_method (path , compression = compression_only )
9294 tm .assert_frame_equal (output , input )
9395
9496
@@ -107,37 +109,38 @@ def test_series_compression_defaults_to_infer(
107109 read_kwargs ,
108110 compression_only ,
109111 compression_to_extension ,
112+ temp_file ,
110113):
111114 # GH22004
112115 input = pd .Series ([0 , 5 , - 2 , 10 ], name = "X" )
113116 extension = compression_to_extension [compression_only ]
114- with tm . ensure_clean ( "compressed" + extension ) as path :
115- getattr (input , write_method )(path , ** write_kwargs )
116- if "squeeze" in read_kwargs :
117- kwargs = read_kwargs .copy ()
118- del kwargs ["squeeze" ]
119- output = read_method (path , compression = compression_only , ** kwargs ).squeeze (
120- "columns"
121- )
122- else :
123- output = read_method (path , compression = compression_only , ** read_kwargs )
117+ path = temp_file . parent / f"compressed { extension } "
118+ getattr (input , write_method )(path , ** write_kwargs )
119+ if "squeeze" in read_kwargs :
120+ kwargs = read_kwargs .copy ()
121+ del kwargs ["squeeze" ]
122+ output = read_method (path , compression = compression_only , ** kwargs ).squeeze (
123+ "columns"
124+ )
125+ else :
126+ output = read_method (path , compression = compression_only , ** read_kwargs )
124127 tm .assert_series_equal (output , input , check_names = False )
125128
126129
127- def test_compression_warning (compression_only ):
130+ def test_compression_warning (compression_only , temp_file ):
128131 # Assert that passing a file object to to_csv while explicitly specifying a
129132 # compression protocol triggers a RuntimeWarning, as per GH21227.
130133 df = pd .DataFrame (
131134 100 * [[0.123456 , 0.234567 , 0.567567 ], [12.32112 , 123123.2 , 321321.2 ]],
132135 columns = ["X" , "Y" , "Z" ],
133136 )
134- with tm . ensure_clean () as path :
135- with icom .get_handle (path , "w" , compression = compression_only ) as handles :
136- with tm .assert_produces_warning (RuntimeWarning , match = "has no effect" ):
137- df .to_csv (handles .handle , compression = compression_only )
137+ path = temp_file
138+ with icom .get_handle (path , "w" , compression = compression_only ) as handles :
139+ with tm .assert_produces_warning (RuntimeWarning , match = "has no effect" ):
140+ df .to_csv (handles .handle , compression = compression_only )
138141
139142
140- def test_compression_binary (compression_only ):
143+ def test_compression_binary (compression_only , temp_file ):
141144 """
142145 Binary file handles support compression.
143146
@@ -150,13 +153,13 @@ def test_compression_binary(compression_only):
150153 )
151154
152155 # with a file
153- with tm . ensure_clean () as path :
154- with open (path , mode = "wb" ) as file :
155- df .to_csv (file , mode = "wb" , compression = compression_only )
156- file .seek (0 ) # file shouldn't be closed
157- tm .assert_frame_equal (
158- df , pd .read_csv (path , index_col = 0 , compression = compression_only )
159- )
156+ path = temp_file
157+ with open (path , mode = "wb" ) as file :
158+ df .to_csv (file , mode = "wb" , compression = compression_only )
159+ file .seek (0 ) # file shouldn't be closed
160+ tm .assert_frame_equal (
161+ df , pd .read_csv (path , index_col = 0 , compression = compression_only )
162+ )
160163
161164 # with BytesIO
162165 file = io .BytesIO ()
@@ -167,7 +170,7 @@ def test_compression_binary(compression_only):
167170 )
168171
169172
170- def test_gzip_reproducibility_file_name ():
173+ def test_gzip_reproducibility_file_name (temp_file ):
171174 """
172175 Gzip should create reproducible archives with mtime.
173176
@@ -183,13 +186,12 @@ def test_gzip_reproducibility_file_name():
183186 compression_options = {"method" : "gzip" , "mtime" : 1 }
184187
185188 # test for filename
186- with tm .ensure_clean () as path :
187- path = Path (path )
188- df .to_csv (path , compression = compression_options )
189- time .sleep (0.1 )
190- output = path .read_bytes ()
191- df .to_csv (path , compression = compression_options )
192- assert output == path .read_bytes ()
189+ path = temp_file
190+ df .to_csv (path , compression = compression_options )
191+ time .sleep (0.1 )
192+ output = path .read_bytes ()
193+ df .to_csv (path , compression = compression_options )
194+ assert output == path .read_bytes ()
193195
194196
195197def test_gzip_reproducibility_file_object ():
@@ -259,14 +261,14 @@ def test_with_missing_lzma_runtime():
259261 ],
260262)
261263@pytest .mark .parametrize ("method" , ["to_pickle" , "to_json" , "to_csv" ])
262- def test_gzip_compression_level (obj , method ):
264+ def test_gzip_compression_level (obj , method , temp_file ):
263265 # GH33196
264- with tm . ensure_clean () as path :
265- getattr (obj , method )(path , compression = "gzip" )
266- compressed_size_default = os .path .getsize (path )
267- getattr (obj , method )(path , compression = {"method" : "gzip" , "compresslevel" : 1 })
268- compressed_size_fast = os .path .getsize (path )
269- assert compressed_size_default < compressed_size_fast
266+ path = temp_file
267+ getattr (obj , method )(path , compression = "gzip" )
268+ compressed_size_default = os .path .getsize (path )
269+ getattr (obj , method )(path , compression = {"method" : "gzip" , "compresslevel" : 1 })
270+ compressed_size_fast = os .path .getsize (path )
271+ assert compressed_size_default < compressed_size_fast
270272
271273
272274@pytest .mark .parametrize (
@@ -280,15 +282,15 @@ def test_gzip_compression_level(obj, method):
280282 ],
281283)
282284@pytest .mark .parametrize ("method" , ["to_pickle" , "to_json" , "to_csv" ])
283- def test_xz_compression_level_read (obj , method ):
284- with tm . ensure_clean () as path :
285- getattr (obj , method )(path , compression = "xz" )
286- compressed_size_default = os .path .getsize (path )
287- getattr (obj , method )(path , compression = {"method" : "xz" , "preset" : 1 })
288- compressed_size_fast = os .path .getsize (path )
289- assert compressed_size_default < compressed_size_fast
290- if method == "to_csv" :
291- pd .read_csv (path , compression = "xz" )
285+ def test_xz_compression_level_read (obj , method , temp_file ):
286+ path = temp_file
287+ getattr (obj , method )(path , compression = "xz" )
288+ compressed_size_default = os .path .getsize (path )
289+ getattr (obj , method )(path , compression = {"method" : "xz" , "preset" : 1 })
290+ compressed_size_fast = os .path .getsize (path )
291+ assert compressed_size_default < compressed_size_fast
292+ if method == "to_csv" :
293+ pd .read_csv (path , compression = "xz" )
292294
293295
294296@pytest .mark .parametrize (
@@ -302,13 +304,13 @@ def test_xz_compression_level_read(obj, method):
302304 ],
303305)
304306@pytest .mark .parametrize ("method" , ["to_pickle" , "to_json" , "to_csv" ])
305- def test_bzip_compression_level (obj , method ):
307+ def test_bzip_compression_level (obj , method , temp_file ):
306308 """GH33196 bzip needs file size > 100k to show a size difference between
307309 compression levels, so here we just check if the call works when
308310 compression is passed as a dict.
309311 """
310- with tm . ensure_clean () as path :
311- getattr (obj , method )(path , compression = {"method" : "bz2" , "compresslevel" : 1 })
312+ path = temp_file
313+ getattr (obj , method )(path , compression = {"method" : "bz2" , "compresslevel" : 1 })
312314
313315
314316@pytest .mark .parametrize (
@@ -318,21 +320,21 @@ def test_bzip_compression_level(obj, method):
318320 (".tar" , tarfile .TarFile ),
319321 ],
320322)
321- def test_empty_archive_zip (suffix , archive ):
322- with tm . ensure_clean ( filename = suffix ) as path :
323- with archive (path , "w" ):
324- pass
325- with pytest .raises (ValueError , match = "Zero files found" ):
326- pd .read_csv (path )
323+ def test_empty_archive_zip (suffix , archive , temp_file ):
324+ path = temp_file . parent / f"archive { suffix } "
325+ with archive (path , "w" ):
326+ pass
327+ with pytest .raises (ValueError , match = "Zero files found" ):
328+ pd .read_csv (path )
327329
328330
329- def test_ambiguous_archive_zip ():
330- with tm . ensure_clean ( filename = " .zip") as path :
331- with zipfile .ZipFile (path , "w" ) as file :
332- file .writestr ("a.csv" , "foo,bar" )
333- file .writestr ("b.csv" , "foo,bar" )
334- with pytest .raises (ValueError , match = "Multiple files found in ZIP file" ):
335- pd .read_csv (path )
331+ def test_ambiguous_archive_zip (temp_file ):
332+ path = temp_file . parent / "archive .zip"
333+ with zipfile .ZipFile (path , "w" ) as file :
334+ file .writestr ("a.csv" , "foo,bar" )
335+ file .writestr ("b.csv" , "foo,bar" )
336+ with pytest .raises (ValueError , match = "Multiple files found in ZIP file" ):
337+ pd .read_csv (path )
336338
337339
338340def test_ambiguous_archive_tar (tmp_path ):
@@ -352,24 +354,24 @@ def test_ambiguous_archive_tar(tmp_path):
352354 pd .read_csv (tarpath )
353355
354356
355- def test_tar_gz_to_different_filename ():
356- with tm . ensure_clean ( filename = " .foo") as file :
357- pd .DataFrame (
358- [["1" , "2" ]],
359- columns = ["foo" , "bar" ],
360- ).to_csv (file , compression = {"method" : "tar" , "mode" : "w:gz" }, index = False )
361- with gzip .open (file ) as uncompressed :
362- with tarfile .TarFile (fileobj = uncompressed ) as archive :
363- members = archive .getmembers ()
364- assert len (members ) == 1
365- content = archive .extractfile (members [0 ]).read ().decode ("utf8" )
366-
367- if is_platform_windows ():
368- expected = "foo,bar\r \n 1,2\r \n "
369- else :
370- expected = "foo,bar\n 1,2\n "
371-
372- assert content == expected
357+ def test_tar_gz_to_different_filename (temp_file ):
358+ file = temp_file . parent / "archive .foo"
359+ pd .DataFrame (
360+ [["1" , "2" ]],
361+ columns = ["foo" , "bar" ],
362+ ).to_csv (file , compression = {"method" : "tar" , "mode" : "w:gz" }, index = False )
363+ with gzip .open (file ) as uncompressed :
364+ with tarfile .TarFile (fileobj = uncompressed ) as archive :
365+ members = archive .getmembers ()
366+ assert len (members ) == 1
367+ content = archive .extractfile (members [0 ]).read ().decode ("utf8" )
368+
369+ if is_platform_windows ():
370+ expected = "foo,bar\r \n 1,2\r \n "
371+ else :
372+ expected = "foo,bar\n 1,2\n "
373+
374+ assert content == expected
373375
374376
375377def test_tar_no_error_on_close ():
0 commit comments