Skip to content

Commit f8071c3

Browse files
committed
Implement bulk remove via __isub__ operator
Reference in OrderedSet.remove docs as a way to avoid quadratic behavior.
1 parent bbca53d commit f8071c3

File tree

1 file changed

+54
-1
lines changed

1 file changed

+54
-1
lines changed

src/techcable/orderedset/_orderedset.py

Lines changed: 54 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from __future__ import annotations
22

33
import operator
4+
from collections import defaultdict
45
from collections.abc import (
56
AsyncGenerator,
67
AsyncIterable,
@@ -17,7 +18,7 @@
1718
if TYPE_CHECKING:
1819
from pydantic import GetCoreSchemaHandler
1920
from pydantic_core import core_schema
20-
from typing_extensions import Protocol, get_args, override
21+
from typing_extensions import Protocol, Self, get_args, override
2122

2223
class Comparable(Protocol): # noqa: PLW1641 - do not require __hash__ method
2324
def __lt__(self, other: Comparable) -> bool:
@@ -176,6 +177,7 @@ def remove(self, value: T, /) -> None:
176177
However, it takes linear time like [`list.remove`],
177178
instead of the constant time that [`set.remove`] takes.
178179
Invoking it repeatedly may cause quadratic blowup, just like `list.remove` would.
180+
Using [`OrderedSet.__isub__`] for bulk removes is much faster and avoids this.
179181
180182
See [`OrderedSet.discard`] for a variant that does nothing if the item is not present.
181183
"""
@@ -200,6 +202,57 @@ def discard(self, value: T, /) -> None:
200202
self._elements.remove(value)
201203
self._unique.remove(value)
202204

205+
def _assign(self, other: OrderedSet[T], /) -> Self:
206+
self._unique = other._unique
207+
self._elements = other._elements
208+
return self
209+
210+
def __sub__(self, other: Set[T]) -> OrderedSet[T]:
211+
if isinstance(other, Set):
212+
return OrderedSet(item for item in self if item not in other)
213+
else:
214+
raise NotImplementedError
215+
216+
def __and__(self, other: Set[T]) -> OrderedSet[T]:
217+
if isinstance(other, Set):
218+
return OrderedSet(item for item in self if item in other)
219+
else:
220+
raise NotImplementedError
221+
222+
if not TYPE_CHECKING:
223+
# too difficult to do with old-style typevars
224+
225+
def __xor__(self, other: Set[T]) -> OrderedSet[T]:
226+
if isinstance(other, Set):
227+
counts: dict[T, int] = defaultdict(lambda: 0)
228+
for item in self:
229+
counts[item] += 1
230+
for item in other:
231+
counts[item] += 1
232+
return OrderedSet(item for item, cnt in counts.items() if cnt == 1)
233+
else:
234+
raise NotImplementedError
235+
236+
def __ixor__(self, other: Set[T]) -> Self:
237+
return self._assign(self ^ other)
238+
239+
def __iand__(self, other: Set[T]) -> Self:
240+
# explicitly override to avoid quadratic blowup on remove
241+
return self._assign(self & other)
242+
243+
def __isub__(self, other: Set[T]) -> Self:
244+
"""
245+
Remove the specified elements from this set.
246+
247+
Avoids quadratic blowup that would occur by calling [`OrderedSet.remove`] in a loop.
248+
"""
249+
# explicitly override to avoid quadratic blowup on remove
250+
if other is self:
251+
self.clear()
252+
return self
253+
else:
254+
return self._assign(self - other)
255+
203256
def update(self, values: Iterable[T], /) -> None:
204257
"""
205258
Add all the specified values to this set.

0 commit comments

Comments
 (0)