Skip to content

Commit 56a85b2

Browse files
Zip Strict specification for pandas/core/indexes (#62533)
1 parent e61f1f1 commit 56a85b2

File tree

3 files changed

+27
-26
lines changed

3 files changed

+27
-26
lines changed

pandas/core/indexes/interval.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1056,8 +1056,8 @@ def _intersection_non_unique(self, other: IntervalIndex) -> IntervalIndex:
10561056
first_nan_loc = np.arange(len(self))[self.isna()][0]
10571057
mask[first_nan_loc] = True
10581058

1059-
other_tups = set(zip(other.left, other.right))
1060-
for i, tup in enumerate(zip(self.left, self.right)):
1059+
other_tups = set(zip(other.left, other.right, strict=True))
1060+
for i, tup in enumerate(zip(self.left, self.right, strict=True)):
10611061
if tup in other_tups:
10621062
mask[i] = True
10631063

pandas/core/indexes/multi.py

Lines changed: 25 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -720,7 +720,7 @@ def from_frame(
720720
if not isinstance(df, ABCDataFrame):
721721
raise TypeError("Input must be a DataFrame")
722722

723-
column_names, columns = zip(*df.items())
723+
column_names, columns = zip(*df.items(), strict=True)
724724
names = column_names if names is None else names
725725
return cls.from_arrays(columns, sortorder=sortorder, names=names)
726726

@@ -878,7 +878,10 @@ def levels(self) -> FrozenList:
878878
# Use cache_readonly to ensure that self.get_locs doesn't repeatedly
879879
# create new IndexEngine
880880
# https://github.com/pandas-dev/pandas/issues/31648
881-
result = [x._rename(name=name) for x, name in zip(self._levels, self._names)]
881+
result = [
882+
x._rename(name=name)
883+
for x, name in zip(self._levels, self._names, strict=True)
884+
]
882885
for level in result:
883886
# disallow midx.levels[0].name = "foo"
884887
level._no_setting_name = True
@@ -912,7 +915,7 @@ def _set_levels(
912915
else:
913916
level_numbers = [self._get_level_number(lev) for lev in level]
914917
new_levels_list = list(self._levels)
915-
for lev_num, lev in zip(level_numbers, levels):
918+
for lev_num, lev in zip(level_numbers, levels, strict=True):
916919
new_levels_list[lev_num] = ensure_index(lev, copy=copy)._view()
917920
new_levels = FrozenList(new_levels_list)
918921

@@ -1148,13 +1151,13 @@ def _set_codes(
11481151
if level is None:
11491152
new_codes = FrozenList(
11501153
_coerce_indexer_frozen(level_codes, lev, copy=copy).view()
1151-
for lev, level_codes in zip(self._levels, codes)
1154+
for lev, level_codes in zip(self._levels, codes, strict=True)
11521155
)
11531156
level_numbers = range(len(new_codes))
11541157
else:
11551158
level_numbers = [self._get_level_number(lev) for lev in level]
11561159
new_codes_list = list(self._codes)
1157-
for lev_num, level_codes in zip(level_numbers, codes):
1160+
for lev_num, level_codes in zip(level_numbers, codes, strict=True):
11581161
lev = self.levels[lev_num]
11591162
new_codes_list[lev_num] = _coerce_indexer_frozen(
11601163
level_codes, lev, copy=copy
@@ -1478,7 +1481,7 @@ def _formatter_func(self, tup):
14781481
Formats each item in tup according to its level's formatter function.
14791482
"""
14801483
formatter_funcs = (level._formatter_func for level in self.levels)
1481-
return tuple(func(val) for func, val in zip(formatter_funcs, tup))
1484+
return tuple(func(val) for func, val in zip(formatter_funcs, tup, strict=True))
14821485

14831486
def _get_values_for_csv(
14841487
self, *, na_rep: str = "nan", **kwargs
@@ -1487,7 +1490,7 @@ def _get_values_for_csv(
14871490
new_codes = []
14881491

14891492
# go through the levels and format them
1490-
for level, level_codes in zip(self.levels, self.codes):
1493+
for level, level_codes in zip(self.levels, self.codes, strict=True):
14911494
level_strs = level._get_values_for_csv(na_rep=na_rep, **kwargs)
14921495
# add nan values, if there are any
14931496
mask = level_codes == -1
@@ -1527,7 +1530,7 @@ def _format_multi(
15271530
return []
15281531

15291532
stringified_levels = []
1530-
for lev, level_codes in zip(self.levels, self.codes):
1533+
for lev, level_codes in zip(self.levels, self.codes, strict=True):
15311534
na = _get_na_rep(lev.dtype)
15321535

15331536
if len(lev) > 0:
@@ -1550,7 +1553,7 @@ def _format_multi(
15501553
stringified_levels.append(formatted)
15511554

15521555
result_levels = []
1553-
for lev, lev_name in zip(stringified_levels, self.names):
1556+
for lev, lev_name in zip(stringified_levels, self.names, strict=True):
15541557
level = []
15551558

15561559
if include_names:
@@ -1627,7 +1630,7 @@ def _set_names(self, names, *, level=None) -> None:
16271630
level = (self._get_level_number(lev) for lev in level)
16281631

16291632
# set the name
1630-
for lev, name in zip(level, names):
1633+
for lev, name in zip(level, names, strict=True):
16311634
if name is not None:
16321635
# GH 20527
16331636
# All items in 'names' need to be hashable:
@@ -2094,7 +2097,7 @@ def _sort_levels_monotonic(self, raise_if_incomparable: bool = False) -> MultiIn
20942097
new_levels = []
20952098
new_codes = []
20962099

2097-
for lev, level_codes in zip(self.levels, self.codes):
2100+
for lev, level_codes in zip(self.levels, self.codes, strict=True):
20982101
if not lev.is_monotonic_increasing:
20992102
try:
21002103
# indexer to reorder the levels
@@ -2173,7 +2176,7 @@ def remove_unused_levels(self) -> MultiIndex:
21732176
new_codes = []
21742177

21752178
changed = False
2176-
for lev, level_codes in zip(self.levels, self.codes):
2179+
for lev, level_codes in zip(self.levels, self.codes, strict=True):
21772180
# Since few levels are typically unused, bincount() is more
21782181
# efficient than unique() - however it only accepts positive values
21792182
# (and drops order):
@@ -2240,7 +2243,7 @@ def __getitem__(self, key):
22402243
key = com.cast_scalar_indexer(key)
22412244

22422245
retval = []
2243-
for lev, level_codes in zip(self.levels, self.codes):
2246+
for lev, level_codes in zip(self.levels, self.codes, strict=True):
22442247
if level_codes[key] == -1:
22452248
retval.append(np.nan)
22462249
else:
@@ -3078,7 +3081,7 @@ def _partial_tup_index(self, tup: tuple, side: Literal["left", "right"] = "left"
30783081

30793082
n = len(tup)
30803083
start, end = 0, len(self)
3081-
zipped = zip(tup, self.levels, self.codes)
3084+
zipped = zip(tup, self.levels, self.codes, strict=True)
30823085
for k, (lab, lev, level_codes) in enumerate(zipped):
30833086
section = level_codes[start:end]
30843087

@@ -3362,7 +3365,7 @@ def maybe_mi_droplevels(indexer, levels):
33623365
"Key for location must have same length as number of levels"
33633366
)
33643367
result = None
3365-
for lev, k in zip(level, key):
3368+
for lev, k in zip(level, key, strict=True):
33663369
loc, new_index = self._get_loc_level(k, level=lev)
33673370
if isinstance(loc, slice):
33683371
mask = np.zeros(len(self), dtype=bool)
@@ -3948,7 +3951,7 @@ def _union(self, other, sort) -> MultiIndex:
39483951
if isinstance(result, MultiIndex):
39493952
return result
39503953
return MultiIndex.from_arrays(
3951-
zip(*result), sortorder=None, names=result_names
3954+
zip(*result, strict=True), sortorder=None, names=result_names
39523955
)
39533956

39543957
else:
@@ -3995,7 +3998,7 @@ def _maybe_match_names(self, other):
39953998
if len(self.names) != len(other.names):
39963999
return [None] * len(self.names)
39974000
names = []
3998-
for a_name, b_name in zip(self.names, other.names):
4001+
for a_name, b_name in zip(self.names, other.names, strict=True):
39994002
if a_name == b_name:
40004003
names.append(a_name)
40014004
else:
@@ -4092,7 +4095,7 @@ def putmask(self, mask, value: MultiIndex) -> MultiIndex:
40924095
new_codes = []
40934096

40944097
for i, (value_level, level, level_codes) in enumerate(
4095-
zip(subset.levels, self.levels, self.codes)
4098+
zip(subset.levels, self.levels, self.codes, strict=True)
40964099
):
40974100
new_level = level.union(value_level, sort=False)
40984101
value_codes = new_level.get_indexer_for(subset.get_level_values(i))
@@ -4123,7 +4126,7 @@ def insert(self, loc: int, item) -> MultiIndex:
41234126

41244127
new_levels = []
41254128
new_codes = []
4126-
for k, level, level_codes in zip(item, self.levels, self.codes):
4129+
for k, level, level_codes in zip(item, self.levels, self.codes, strict=True):
41274130
if k not in level:
41284131
# have to insert into level
41294132
# must insert at end otherwise you have to recompute all the
@@ -4219,7 +4222,7 @@ def _lexsort_depth(codes: list[np.ndarray], nlevels: int) -> int:
42194222

42204223

42214224
def sparsify_labels(label_list, start: int = 0, sentinel: object = ""):
4222-
pivoted = list(zip(*label_list))
4225+
pivoted = list(zip(*label_list, strict=True))
42234226
k = len(label_list)
42244227

42254228
result = pivoted[: start + 1]
@@ -4228,7 +4231,7 @@ def sparsify_labels(label_list, start: int = 0, sentinel: object = ""):
42284231
for cur in pivoted[start + 1 :]:
42294232
sparse_cur = []
42304233

4231-
for i, (p, t) in enumerate(zip(prev, cur)):
4234+
for i, (p, t) in enumerate(zip(prev, cur, strict=True)):
42324235
if i == k - 1:
42334236
sparse_cur.append(t)
42344237
result.append(sparse_cur) # type: ignore[arg-type]
@@ -4243,7 +4246,7 @@ def sparsify_labels(label_list, start: int = 0, sentinel: object = ""):
42434246

42444247
prev = cur
42454248

4246-
return list(zip(*result))
4249+
return list(zip(*result, strict=True))
42474250

42484251

42494252
def _get_na_rep(dtype: DtypeObj) -> str:

pyproject.toml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -452,8 +452,6 @@ exclude = [
452452
"pandas/core/groupby/groupby.py" = ["B905"]
453453
"pandas/core/groupby/grouper.py" = ["B905"]
454454
"pandas/core/groupby/ops.py" = ["B905"]
455-
"pandas/core/indexes/interval.py" = ["B905"]
456-
"pandas/core/indexes/multi.py" = ["B905"]
457455
"pandas/core/methods/to_dict.py" = ["B905"]
458456
"pandas/core/reshape/concat.py" = ["B905"]
459457
"pandas/core/reshape/encoding.py" = ["B905"]

0 commit comments

Comments
 (0)