Skip to content

Commit 37d4043

Browse files
authored
Merge branch 'main' into enforce-ruff-rule-b905-conftest
2 parents 1b638bd + 641ebf4 commit 37d4043

File tree

5 files changed

+77
-36
lines changed

5 files changed

+77
-36
lines changed

doc/source/whatsnew/v3.0.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1013,6 +1013,7 @@ Strings
10131013
^^^^^^^
10141014
- Bug in :meth:`Series.str.zfill` raising ``AttributeError`` for :class:`ArrowDtype` (:issue:`61485`)
10151015
- Bug in :meth:`Series.value_counts` would not respect ``sort=False`` for series having ``string`` dtype (:issue:`55224`)
1016+
- Bug in multiplication with a :class:`StringDtype` incorrectly allowing multiplying by bools; explicitly cast to integers instead (:issue:`62595`)
10161017

10171018
Interval
10181019
^^^^^^^^

pandas/core/arrays/string_.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1113,6 +1113,16 @@ def _cmp_method(self, other, op):
11131113
other = np.asarray(other)
11141114
other = other[valid]
11151115

1116+
other_dtype = getattr(other, "dtype", None)
1117+
if op.__name__.strip("_") in ["mul", "rmul"] and (
1118+
lib.is_bool(other) or lib.is_np_dtype(other_dtype, "b")
1119+
):
1120+
# GH#62595
1121+
raise TypeError(
1122+
"Cannot multiply StringArray by bools. "
1123+
"Explicitly cast to integers instead."
1124+
)
1125+
11161126
if op.__name__ in ops.ARITHMETIC_BINOPS:
11171127
result = np.empty_like(self._ndarray, dtype="object")
11181128
result[mask] = self.dtype.na_value

pandas/tests/arithmetic/test_string.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,3 +112,25 @@ def test_pyarrow_numpy_string_invalid():
112112

113113
with pytest.raises(TypeError, match="Invalid comparison"):
114114
ser > ser4
115+
116+
117+
def test_mul_bool_invalid(any_string_dtype):
118+
# GH#62595
119+
dtype = any_string_dtype
120+
ser = Series(["a", "b", "c"], dtype=dtype)
121+
122+
if dtype == object:
123+
pytest.skip("This is not expect to raise")
124+
elif dtype.storage == "python":
125+
msg = "Cannot multiply StringArray by bools. Explicitly cast to integers"
126+
else:
127+
msg = "Can only string multiply by an integer"
128+
129+
with pytest.raises(TypeError, match=msg):
130+
False * ser
131+
with pytest.raises(TypeError, match=msg):
132+
ser * True
133+
with pytest.raises(TypeError, match=msg):
134+
ser * np.array([True, False, True], dtype=bool)
135+
with pytest.raises(TypeError, match=msg):
136+
np.array([True, False, True], dtype=bool) * ser

pandas/tests/io/pytables/test_round_trip.py

Lines changed: 34 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from pandas import (
1212
DataFrame,
1313
DatetimeIndex,
14+
HDFStore,
1415
Index,
1516
Series,
1617
_testing as tm,
@@ -20,7 +21,6 @@
2021
)
2122
from pandas.tests.io.pytables.common import (
2223
_maybe_remove,
23-
ensure_clean_store,
2424
)
2525
from pandas.util import _test_decorators as td
2626

@@ -54,12 +54,12 @@ def roundtrip(key, obj, **kwargs):
5454
tm.assert_frame_equal(df[df.index > 2], result)
5555

5656

57-
def test_long_strings(setup_path):
57+
def test_long_strings(temp_file):
5858
# GH6166
5959
data = ["a" * 50] * 10
6060
df = DataFrame({"a": data}, index=data)
6161

62-
with ensure_clean_store(setup_path) as store:
62+
with HDFStore(temp_file) as store:
6363
store.append("df", df, data_columns=["a"])
6464

6565
result = store.select("df")
@@ -96,8 +96,8 @@ def test_api_append(tmp_path, setup_path):
9696
tm.assert_frame_equal(read_hdf(path, "df"), df)
9797

9898

99-
def test_api_2(tmp_path, setup_path):
100-
path = tmp_path / setup_path
99+
def test_api_2(tmp_path, temp_file):
100+
path = tmp_path / temp_file
101101

102102
df = DataFrame(range(20))
103103
df.to_hdf(path, key="df", append=False, format="fixed")
@@ -112,7 +112,7 @@ def test_api_2(tmp_path, setup_path):
112112
df.to_hdf(path, key="df")
113113
tm.assert_frame_equal(read_hdf(path, "df"), df)
114114

115-
with ensure_clean_store(setup_path) as store:
115+
with HDFStore(temp_file) as store:
116116
df = DataFrame(range(20))
117117

118118
_maybe_remove(store, "df")
@@ -171,8 +171,8 @@ def test_api_invalid(tmp_path, setup_path):
171171
read_hdf(path, "df")
172172

173173

174-
def test_get(setup_path):
175-
with ensure_clean_store(setup_path) as store:
174+
def test_get(temp_file):
175+
with HDFStore(temp_file) as store:
176176
store["a"] = Series(
177177
np.arange(10, dtype=np.float64), index=date_range("2020-01-01", periods=10)
178178
)
@@ -194,8 +194,8 @@ def test_put_integer(setup_path):
194194
_check_roundtrip(df, tm.assert_frame_equal, setup_path)
195195

196196

197-
def test_table_values_dtypes_roundtrip(setup_path, using_infer_string):
198-
with ensure_clean_store(setup_path) as store:
197+
def test_table_values_dtypes_roundtrip(temp_file, using_infer_string):
198+
with HDFStore(temp_file) as store:
199199
df1 = DataFrame({"a": [1, 2, 3]}, dtype="f8")
200200
store.append("df_f8", df1)
201201
tm.assert_series_equal(df1.dtypes, store["df_f8"].dtypes)
@@ -361,7 +361,7 @@ def test_timeseries_preepoch(setup_path, request):
361361
@pytest.mark.parametrize(
362362
"compression", [False, pytest.param(True, marks=td.skip_if_windows)]
363363
)
364-
def test_frame(compression, setup_path):
364+
def test_frame(compression, temp_file):
365365
df = DataFrame(
366366
1.1 * np.arange(120).reshape((30, 4)),
367367
columns=Index(list("ABCD")),
@@ -373,22 +373,20 @@ def test_frame(compression, setup_path):
373373
df.iloc[5, 3] = np.nan
374374

375375
_check_roundtrip_table(
376-
df, tm.assert_frame_equal, path=setup_path, compression=compression
377-
)
378-
_check_roundtrip(
379-
df, tm.assert_frame_equal, path=setup_path, compression=compression
376+
df, tm.assert_frame_equal, path=temp_file, compression=compression
380377
)
378+
_check_roundtrip(df, tm.assert_frame_equal, path=temp_file, compression=compression)
381379

382380
tdf = DataFrame(
383381
np.random.default_rng(2).standard_normal((10, 4)),
384382
columns=Index(list("ABCD")),
385383
index=date_range("2000-01-01", periods=10, freq="B"),
386384
)
387385
_check_roundtrip(
388-
tdf, tm.assert_frame_equal, path=setup_path, compression=compression
386+
tdf, tm.assert_frame_equal, path=temp_file, compression=compression
389387
)
390388

391-
with ensure_clean_store(setup_path) as store:
389+
with HDFStore(temp_file) as store:
392390
# not consolidated
393391
df["foo"] = np.random.default_rng(2).standard_normal(len(df))
394392
store["df"] = df
@@ -399,7 +397,7 @@ def test_frame(compression, setup_path):
399397
df2 = df[:0]
400398
# Prevent df2 from having index with inferred_type as string
401399
df2.index = Index([])
402-
_check_roundtrip(df2[:0], tm.assert_frame_equal, path=setup_path)
400+
_check_roundtrip(df2[:0], tm.assert_frame_equal, path=temp_file)
403401

404402

405403
def test_empty_series_frame(setup_path):
@@ -432,22 +430,22 @@ def test_can_serialize_dates(setup_path):
432430

433431

434432
def test_store_hierarchical(
435-
setup_path, using_infer_string, multiindex_dataframe_random_data
433+
temp_file, using_infer_string, multiindex_dataframe_random_data
436434
):
437435
frame = multiindex_dataframe_random_data
438436

439437
if using_infer_string:
440438
# TODO(infer_string) make this work for string dtype
441439
msg = "Saving a MultiIndex with an extension dtype is not supported."
442440
with pytest.raises(NotImplementedError, match=msg):
443-
_check_roundtrip(frame, tm.assert_frame_equal, path=setup_path)
441+
_check_roundtrip(frame, tm.assert_frame_equal, path=temp_file)
444442
return
445-
_check_roundtrip(frame, tm.assert_frame_equal, path=setup_path)
446-
_check_roundtrip(frame.T, tm.assert_frame_equal, path=setup_path)
447-
_check_roundtrip(frame["A"], tm.assert_series_equal, path=setup_path)
443+
_check_roundtrip(frame, tm.assert_frame_equal, path=temp_file)
444+
_check_roundtrip(frame.T, tm.assert_frame_equal, path=temp_file)
445+
_check_roundtrip(frame["A"], tm.assert_series_equal, path=temp_file)
448446

449447
# check that the names are stored
450-
with ensure_clean_store(setup_path) as store:
448+
with HDFStore(temp_file) as store:
451449
store["frame"] = frame
452450
recons = store["frame"]
453451
tm.assert_frame_equal(recons, frame)
@@ -456,7 +454,7 @@ def test_store_hierarchical(
456454
@pytest.mark.parametrize(
457455
"compression", [False, pytest.param(True, marks=td.skip_if_windows)]
458456
)
459-
def test_store_mixed(compression, setup_path):
457+
def test_store_mixed(compression, temp_file):
460458
def _make_one():
461459
df = DataFrame(
462460
1.1 * np.arange(120).reshape((30, 4)),
@@ -474,10 +472,10 @@ def _make_one():
474472
df1 = _make_one()
475473
df2 = _make_one()
476474

477-
_check_roundtrip(df1, tm.assert_frame_equal, path=setup_path)
478-
_check_roundtrip(df2, tm.assert_frame_equal, path=setup_path)
475+
_check_roundtrip(df1, tm.assert_frame_equal, path=temp_file)
476+
_check_roundtrip(df2, tm.assert_frame_equal, path=temp_file)
479477

480-
with ensure_clean_store(setup_path) as store:
478+
with HDFStore(temp_file) as store:
481479
store["obj"] = df1
482480
tm.assert_frame_equal(store["obj"], df1)
483481
store["obj"] = df2
@@ -487,19 +485,19 @@ def _make_one():
487485
_check_roundtrip(
488486
df1["obj1"],
489487
tm.assert_series_equal,
490-
path=setup_path,
488+
path=temp_file,
491489
compression=compression,
492490
)
493491
_check_roundtrip(
494492
df1["bool1"],
495493
tm.assert_series_equal,
496-
path=setup_path,
494+
path=temp_file,
497495
compression=compression,
498496
)
499497
_check_roundtrip(
500498
df1["int1"],
501499
tm.assert_series_equal,
502-
path=setup_path,
500+
path=temp_file,
503501
compression=compression,
504502
)
505503

@@ -509,7 +507,7 @@ def _check_roundtrip(obj, comparator, path, compression=False, **kwargs):
509507
if compression:
510508
options["complib"] = "blosc"
511509

512-
with ensure_clean_store(path, "w", **options) as store:
510+
with HDFStore(path, "w", **options) as store:
513511
store["obj"] = obj
514512
retrieved = store["obj"]
515513
comparator(retrieved, obj, **kwargs)
@@ -520,7 +518,7 @@ def _check_roundtrip_table(obj, comparator, path, compression=False):
520518
if compression:
521519
options["complib"] = "blosc"
522520

523-
with ensure_clean_store(path, "w", **options) as store:
521+
with HDFStore(path, "w", **options) as store:
524522
store.put("obj", obj, format="table")
525523
retrieved = store["obj"]
526524

@@ -537,17 +535,17 @@ def test_unicode_index(setup_path):
537535
_check_roundtrip(s, tm.assert_series_equal, path=setup_path)
538536

539537

540-
def test_unicode_longer_encoded(setup_path):
538+
def test_unicode_longer_encoded(temp_file):
541539
# GH 11234
542540
char = "\u0394"
543541
df = DataFrame({"A": [char]})
544-
with ensure_clean_store(setup_path) as store:
542+
with HDFStore(temp_file) as store:
545543
store.put("df", df, format="table", encoding="utf-8")
546544
result = store.get("df")
547545
tm.assert_frame_equal(result, df)
548546

549547
df = DataFrame({"A": ["a", char], "B": ["b", "b"]})
550-
with ensure_clean_store(setup_path) as store:
548+
with HDFStore(temp_file) as store:
551549
store.put("df", df, format="table", encoding="utf-8")
552550
result = store.get("df")
553551
tm.assert_frame_equal(result, df)

pandas/tests/util/test_assert_frame_equal.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import numpy as np
12
import pytest
23

34
from pandas.errors import Pandas4Warning
@@ -413,3 +414,12 @@ def test_datetimelike_compat_deprecated():
413414
tm.assert_series_equal(df["a"], df["a"], check_datetimelike_compat=True)
414415
with tm.assert_produces_warning(Pandas4Warning, match=msg):
415416
tm.assert_series_equal(df["a"], df["a"], check_datetimelike_compat=False)
417+
418+
419+
@pytest.mark.parametrize("na_value", [pd.NA, np.nan, None])
420+
def test_assert_frame_equal_nested_df_na(na_value):
421+
# GH#43022
422+
inner = DataFrame({"a": [1, na_value]})
423+
df1 = DataFrame({"df": [inner]})
424+
df2 = DataFrame({"df": [inner]})
425+
tm.assert_frame_equal(df1, df2)

0 commit comments

Comments
 (0)