Skip to content

Commit 506a8bf

Browse files
authored
Merge branch 'pandas-dev:main' into fix-assert-frame-equal-na-61473
2 parents 4e5f160 + 72ba35b commit 506a8bf

File tree

12 files changed

+96
-76
lines changed

12 files changed

+96
-76
lines changed

pandas/_libs/index.pyx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -838,7 +838,7 @@ cdef class BaseMultiIndexCodesEngine:
838838
raise KeyError(key)
839839
try:
840840
indices = [1 if checknull(v) else lev.get_loc(v) + multiindex_nulls_shift
841-
for lev, v in zip(self.levels, key)]
841+
for lev, v in zip(self.levels, key, strict=True)]
842842
except KeyError:
843843
raise KeyError(key)
844844

pandas/_libs/missing.pyx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ cpdef bint check_na_tuples_nonequal(object left, object right):
7272
if len(left) != len(right):
7373
return False
7474

75-
for left_element, right_element in zip(left, right):
75+
for left_element, right_element in zip(left, right, strict=True):
7676
if left_element is C_NA and right_element is not C_NA:
7777
return True
7878
elif right_element is C_NA and left_element is not C_NA:

pandas/_libs/tslibs/fields.pyx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ def month_position_check(fields, weekdays) -> str | None:
109109
int32_t[:] months = fields["M"]
110110
int32_t[:] days = fields["D"]
111111

112-
for y, m, d, wd in zip(years, months, days, weekdays):
112+
for y, m, d, wd in zip(years, months, days, weekdays, strict=True):
113113
if calendar_start:
114114
calendar_start &= d == 1
115115
if business_start:

pandas/_libs/tslibs/offsets.pyx

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2217,7 +2217,7 @@ cdef class BusinessHour(BusinessMixin):
22172217
# Use python string formatting to be faster than strftime
22182218
hours = ",".join(
22192219
f"{st.hour:02d}:{st.minute:02d}-{en.hour:02d}:{en.minute:02d}"
2220-
for st, en in zip(self.start, self.end)
2220+
for st, en in zip(self.start, self.end, strict=True)
22212221
)
22222222
attrs = [f"{self._prefix}={hours}"]
22232223
out += ": " + ", ".join(attrs)
@@ -2414,7 +2414,7 @@ cdef class BusinessHour(BusinessMixin):
24142414
# get total business hours by sec in one business day
24152415
businesshours = sum(
24162416
self._get_business_hours_by_sec(st, en)
2417-
for st, en in zip(self.start, self.end)
2417+
for st, en in zip(self.start, self.end, strict=True)
24182418
)
24192419

24202420
bd, r = divmod(abs(n * 60), businesshours // 60)
@@ -5357,7 +5357,7 @@ cpdef to_offset(freq, bint is_period=False):
53575357
# the last element must be blank
53585358
raise ValueError("last element must be blank")
53595359

5360-
tups = zip(split[0::4], split[1::4], split[2::4])
5360+
tups = zip(split[0::4], split[1::4], split[2::4], strict=False)
53615361
for n, (sep, stride, name) in enumerate(tups):
53625362
name = _warn_about_deprecated_aliases(name, is_period)
53635363
_validate_to_offset_alias(name, is_period)

pandas/_libs/tslibs/timezones.pyx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -252,7 +252,7 @@ cdef object _get_utc_trans_times_from_dateutil_tz(tzinfo tz):
252252
"""
253253
new_trans = list(tz._trans_list)
254254
last_std_offset = 0
255-
for i, (trans, tti) in enumerate(zip(tz._trans_list, tz._trans_idx)):
255+
for i, (trans, tti) in enumerate(zip(tz._trans_list, tz._trans_idx, strict=True)):
256256
if not tti.isdst:
257257
last_std_offset = tti.offset
258258
new_trans[i] = trans - last_std_offset

pandas/core/frame.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@
107107
is_list_like,
108108
is_scalar,
109109
is_sequence,
110+
is_string_dtype,
110111
needs_i8_conversion,
111112
pandas_dtype,
112113
)
@@ -4454,8 +4455,12 @@ def _set_item_frame_value(self, key, value: DataFrame) -> None:
44544455
cols_droplevel = maybe_droplevels(cols, key)
44554456
if (
44564457
not isinstance(cols_droplevel, MultiIndex)
4458+
and is_string_dtype(cols_droplevel.dtype)
44574459
and not cols_droplevel.any()
44584460
):
4461+
# if cols_droplevel contains only empty strings,
4462+
# value.reindex(cols_droplevel, axis=1) would be full of NaNs
4463+
# see GH#62518 and GH#61841
44594464
return
44604465
if len(cols_droplevel) and not cols_droplevel.equals(value.columns):
44614466
value = value.reindex(cols_droplevel, axis=1)

pandas/core/window/rolling.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -351,7 +351,7 @@ def __iter__(self) -> Iterator:
351351
)
352352
self._check_window_bounds(start, end, len(obj))
353353

354-
for s, e in zip(start, end):
354+
for s, e in zip(start, end, strict=True):
355355
result = obj.iloc[slice(s, e)]
356356
yield result
357357

@@ -802,7 +802,7 @@ def _apply_pairwise(
802802
groupby_codes = []
803803
groupby_levels = []
804804
# e.g. [[1, 2], [4, 5]] as [[1, 4], [2, 5]]
805-
for gb_level_pair in map(list, zip(*gb_pairs)):
805+
for gb_level_pair in map(list, zip(*gb_pairs, strict=True)):
806806
labels = np.repeat(np.array(gb_level_pair), old_result_len)
807807
codes, levels = factorize(labels)
808808
groupby_codes.append(codes)

pandas/tests/indexing/multiindex/test_multiindex.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -271,3 +271,24 @@ def test_multiindex_assign_aligns_as_implicit_tuple(self):
271271
df1["C"] = s1
272272
tm.assert_frame_equal(df1, df2)
273273
tm.assert_frame_equal(df1, df3)
274+
275+
def test_multiindex_assign_alignment_with_non_string_dtype(self):
276+
# GH 62518
277+
columns = MultiIndex.from_arrays(
278+
[["a", "a", "z", "z"], pd.Categorical([1, 2, 1, 2])]
279+
)
280+
281+
meta = DataFrame(columns=columns, dtype=object)
282+
meta["z"] = meta["z"].astype("int64")
283+
284+
result = DataFrame(
285+
data={
286+
("a", 1): Series([], dtype=object),
287+
("a", 2): Series([], dtype=object),
288+
("z", 1): Series([], dtype="int64"),
289+
("z", 2): Series([], dtype="int64"),
290+
},
291+
columns=columns,
292+
)
293+
294+
tm.assert_frame_equal(meta, result)

pandas/tests/io/json/test_compression.py

Lines changed: 43 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -12,22 +12,21 @@
1212
import pandas._testing as tm
1313

1414

15-
def test_compression_roundtrip(compression):
15+
def test_compression_roundtrip(compression, temp_file):
1616
df = pd.DataFrame(
1717
[[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]],
1818
index=["A", "B"],
1919
columns=["X", "Y", "Z"],
2020
)
2121

22-
with tm.ensure_clean() as path:
23-
df.to_json(path, compression=compression)
24-
tm.assert_frame_equal(df, pd.read_json(path, compression=compression))
22+
df.to_json(temp_file, compression=compression)
23+
tm.assert_frame_equal(df, pd.read_json(temp_file, compression=compression))
2524

26-
# explicitly ensure file was compressed.
27-
with tm.decompress_file(path, compression) as fh:
28-
result = fh.read().decode("utf8")
29-
data = StringIO(result)
30-
tm.assert_frame_equal(df, pd.read_json(data))
25+
# explicitly ensure file was compressed.
26+
with tm.decompress_file(temp_file, compression) as fh:
27+
result = fh.read().decode("utf8")
28+
data = StringIO(result)
29+
tm.assert_frame_equal(df, pd.read_json(data))
3130

3231

3332
def test_read_zipped_json(datapath):
@@ -43,15 +42,14 @@ def test_read_zipped_json(datapath):
4342
@td.skip_if_not_us_locale
4443
@pytest.mark.single_cpu
4544
@pytest.mark.network
46-
def test_with_s3_url(compression, s3_bucket_public, s3so):
45+
def test_with_s3_url(compression, s3_bucket_public, s3so, temp_file):
4746
# Bucket created in tests/io/conftest.py
4847
df = pd.read_json(StringIO('{"a": [1, 2, 3], "b": [4, 5, 6]}'))
4948

5049
key = f"{uuid.uuid4()}.json"
51-
with tm.ensure_clean() as path:
52-
df.to_json(path, compression=compression)
53-
with open(path, "rb") as f:
54-
s3_bucket_public.put_object(Key=key, Body=f)
50+
df.to_json(temp_file, compression=compression)
51+
with open(temp_file, "rb") as f:
52+
s3_bucket_public.put_object(Key=key, Body=f)
5553

5654
roundtripped_df = pd.read_json(
5755
f"s3://{s3_bucket_public.name}/{key}",
@@ -61,39 +59,35 @@ def test_with_s3_url(compression, s3_bucket_public, s3so):
6159
tm.assert_frame_equal(df, roundtripped_df)
6260

6361

64-
def test_lines_with_compression(compression):
65-
with tm.ensure_clean() as path:
66-
df = pd.read_json(StringIO('{"a": [1, 2, 3], "b": [4, 5, 6]}'))
67-
df.to_json(path, orient="records", lines=True, compression=compression)
68-
roundtripped_df = pd.read_json(path, lines=True, compression=compression)
69-
tm.assert_frame_equal(df, roundtripped_df)
62+
def test_lines_with_compression(compression, temp_file):
63+
df = pd.read_json(StringIO('{"a": [1, 2, 3], "b": [4, 5, 6]}'))
64+
df.to_json(temp_file, orient="records", lines=True, compression=compression)
65+
roundtripped_df = pd.read_json(temp_file, lines=True, compression=compression)
66+
tm.assert_frame_equal(df, roundtripped_df)
7067

7168

72-
def test_chunksize_with_compression(compression):
73-
with tm.ensure_clean() as path:
74-
df = pd.read_json(StringIO('{"a": ["foo", "bar", "baz"], "b": [4, 5, 6]}'))
75-
df.to_json(path, orient="records", lines=True, compression=compression)
69+
def test_chunksize_with_compression(compression, temp_file):
70+
df = pd.read_json(StringIO('{"a": ["foo", "bar", "baz"], "b": [4, 5, 6]}'))
71+
df.to_json(temp_file, orient="records", lines=True, compression=compression)
7672

77-
with pd.read_json(
78-
path, lines=True, chunksize=1, compression=compression
79-
) as res:
80-
roundtripped_df = pd.concat(res)
81-
tm.assert_frame_equal(df, roundtripped_df)
73+
with pd.read_json(
74+
temp_file, lines=True, chunksize=1, compression=compression
75+
) as res:
76+
roundtripped_df = pd.concat(res)
77+
tm.assert_frame_equal(df, roundtripped_df)
8278

8379

84-
def test_write_unsupported_compression_type():
80+
def test_write_unsupported_compression_type(temp_file):
8581
df = pd.read_json(StringIO('{"a": [1, 2, 3], "b": [4, 5, 6]}'))
86-
with tm.ensure_clean() as path:
87-
msg = "Unrecognized compression type: unsupported"
88-
with pytest.raises(ValueError, match=msg):
89-
df.to_json(path, compression="unsupported")
82+
msg = "Unrecognized compression type: unsupported"
83+
with pytest.raises(ValueError, match=msg):
84+
df.to_json(temp_file, compression="unsupported")
9085

9186

92-
def test_read_unsupported_compression_type():
93-
with tm.ensure_clean() as path:
94-
msg = "Unrecognized compression type: unsupported"
95-
with pytest.raises(ValueError, match=msg):
96-
pd.read_json(path, compression="unsupported")
87+
def test_read_unsupported_compression_type(temp_file):
88+
msg = "Unrecognized compression type: unsupported"
89+
with pytest.raises(ValueError, match=msg):
90+
pd.read_json(temp_file, compression="unsupported")
9791

9892

9993
@pytest.mark.parametrize(
@@ -102,25 +96,28 @@ def test_read_unsupported_compression_type():
10296
@pytest.mark.parametrize("to_infer", [True, False])
10397
@pytest.mark.parametrize("read_infer", [True, False])
10498
def test_to_json_compression(
105-
compression_only, read_infer, to_infer, compression_to_extension, infer_string
99+
compression_only,
100+
read_infer,
101+
to_infer,
102+
compression_to_extension,
103+
infer_string,
104+
tmp_path,
106105
):
107106
with pd.option_context("future.infer_string", infer_string):
108107
# see gh-15008
109108
compression = compression_only
110109

111110
# We'll complete file extension subsequently.
112-
filename = "test."
113-
filename += compression_to_extension[compression]
111+
filename = tmp_path / f"test.{compression_to_extension[compression]}"
114112

115113
df = pd.DataFrame({"A": [1]})
116114

117115
to_compression = "infer" if to_infer else compression
118116
read_compression = "infer" if read_infer else compression
119117

120-
with tm.ensure_clean(filename) as path:
121-
df.to_json(path, compression=to_compression)
122-
result = pd.read_json(path, compression=read_compression)
123-
tm.assert_frame_equal(result, df)
118+
df.to_json(filename, compression=to_compression)
119+
result = pd.read_json(filename, compression=read_compression)
120+
tm.assert_frame_equal(result, df)
124121

125122

126123
def test_to_json_compression_mode(compression):

pandas/tests/io/json/test_pandas.py

Lines changed: 14 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -806,11 +806,10 @@ def test_reconstruction_index(self):
806806
result = read_json(StringIO(df.to_json()))
807807
tm.assert_frame_equal(result, df)
808808

809-
def test_path(self, float_frame, int_frame, datetime_frame):
810-
with tm.ensure_clean("test.json") as path:
811-
for df in [float_frame, int_frame, datetime_frame]:
812-
df.to_json(path)
813-
read_json(path)
809+
def test_path(self, float_frame, int_frame, datetime_frame, temp_file):
810+
for df in [float_frame, int_frame, datetime_frame]:
811+
df.to_json(temp_file)
812+
read_json(temp_file)
814813

815814
def test_axis_dates(self, datetime_series, datetime_frame):
816815
# frame
@@ -1423,14 +1422,13 @@ def test_read_s3_jsonl(self, s3_bucket_public_with_data, s3so):
14231422
expected = DataFrame([[1, 2], [1, 2]], columns=["a", "b"])
14241423
tm.assert_frame_equal(result, expected)
14251424

1426-
def test_read_local_jsonl(self):
1425+
def test_read_local_jsonl(self, temp_file):
14271426
# GH17200
1428-
with tm.ensure_clean("tmp_items.json") as path:
1429-
with open(path, "w", encoding="utf-8") as infile:
1430-
infile.write('{"a": 1, "b": 2}\n{"b":2, "a" :1}\n')
1431-
result = read_json(path, lines=True)
1432-
expected = DataFrame([[1, 2], [1, 2]], columns=["a", "b"])
1433-
tm.assert_frame_equal(result, expected)
1427+
with open(temp_file, "w", encoding="utf-8") as infile:
1428+
infile.write('{"a": 1, "b": 2}\n{"b":2, "a" :1}\n')
1429+
result = read_json(temp_file, lines=True)
1430+
expected = DataFrame([[1, 2], [1, 2]], columns=["a", "b"])
1431+
tm.assert_frame_equal(result, expected)
14341432

14351433
def test_read_jsonl_unicode_chars(self):
14361434
# GH15132: non-ascii unicode characters
@@ -1526,17 +1524,16 @@ def test_to_jsonl(self):
15261524
],
15271525
)
15281526
@pytest.mark.parametrize("dtype", ["category", object])
1529-
def test_latin_encoding(self, dtype, val):
1527+
def test_latin_encoding(self, dtype, val, temp_file):
15301528
# GH 13774
15311529
ser = Series(
15321530
[x.decode("latin-1") if isinstance(x, bytes) else x for x in val],
15331531
dtype=dtype,
15341532
)
15351533
encoding = "latin-1"
1536-
with tm.ensure_clean("test.json") as path:
1537-
ser.to_json(path, encoding=encoding)
1538-
retr = read_json(StringIO(path), encoding=encoding)
1539-
tm.assert_series_equal(ser, retr, check_categorical=False)
1534+
ser.to_json(temp_file, encoding=encoding)
1535+
retr = read_json(StringIO(temp_file), encoding=encoding)
1536+
tm.assert_series_equal(ser, retr, check_categorical=False)
15401537

15411538
def test_data_frame_size_after_to_json(self):
15421539
# GH15344

0 commit comments

Comments
 (0)