Skip to content

Commit 9e59758

Browse files
committed
Bump version to 0.10.0; remove precompute option from RegexEnumerator and RegexParser; add tests for empty character class and group with alternative; optimized generation
1 parent 031c6f5 commit 9e59758

File tree

9 files changed

+112
-171
lines changed

9 files changed

+112
-171
lines changed

README.md

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -75,16 +75,6 @@ assert '¢' in result
7575
assert '£' in result
7676
```
7777

78-
## Precomputation
79-
80-
For optimization purposes, the library precomputes the strings of the elements in the regex pattern when those does not repeat indefinitely. To disable this feature, in order to reduce the time of the first call to `next()`, set `precompute=False` when creating the `RegexEnumerator`.
81-
82-
```python
83-
from regex_enumerator import RegexEnumerator
84-
85-
re = RegexEnumerator(r'a[0-9]b', precompute=False)
86-
```
87-
8878
## How it works
8979

9080
This library works by parsing the regex pattern into a tree structure. Once parsed, it performs a breadth-first search (BFS) on the tree to generate all matching strings. This ensures it does not get stuck on unbounded quantifiers for character classes or groups.

regex_enumerator/regex_enumerator.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44

55
class RegexEnumerator:
6-
def __init__(self, regex: str, additional_charset: str | list[str] = None, precompute: bool = True) -> None:
6+
def __init__(self, regex: str, additional_charset: str | list[str] = None) -> None:
77
default_charset = [chr(c) for c in range(32, 127)]
88

99
if additional_charset is None:
@@ -14,7 +14,7 @@ def __init__(self, regex: str, additional_charset: str | list[str] = None, preco
1414
additional = list(additional_charset)
1515

1616
charset = ''.join(sorted(set(default_charset + additional)))
17-
parser = RegexParser(regex, charset, precompute)
17+
parser = RegexParser(regex, charset)
1818
self.regexTree: RegexTree = parser.parse()
1919
self.current: list[str] = list(self.regexTree.current)
2020
self.done: bool = self.regexTree.done and len(self.current) == 0

regex_enumerator/regex_parser.py

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,9 @@ class RegexParser:
1818
DIGITS = '0123456789'
1919
SPACES = ' \t\n\r\f\v'
2020

21-
def __init__(self, regex: str, charset: str, precompute: bool):
21+
def __init__(self, regex: str, charset: str) -> None:
2222
self.regex = regex
2323
self.charset = charset
24-
self.precompute = precompute
2524

2625
def parse(self) -> RegexTree:
2726
self.index = 0
@@ -81,19 +80,19 @@ def _parseRegex(self, to_close: bool) -> RegexTree:
8180
chars = self._parseCharClass()
8281
min_len, max_len = self._parseQuantifier()
8382
elements.append(
84-
CharClass(chars, min_len, max_len, self.precompute))
83+
CharClass(chars, min_len, max_len))
8584
case '.':
8685
chars = list(self.charset)
8786
min_len, max_len = self._parseQuantifier()
8887
elements.append(
89-
CharClass(chars, min_len, max_len, self.precompute))
88+
CharClass(chars, min_len, max_len))
9089
case '\\':
9190
reference = self._parseBackReferenceLookahead()
9291
if reference is None:
9392
chars = self._parseEscapeChar()
9493
min_len, max_len = self._parseQuantifier()
9594
elements.append(
96-
CharClass([chars], min_len, max_len, self.precompute))
95+
CharClass([chars], min_len, max_len))
9796
continue
9897
if isinstance(reference, str):
9998
if reference not in named_groups:
@@ -105,20 +104,19 @@ def _parseRegex(self, to_close: bool) -> RegexTree:
105104
"Positional back reference not found")
106105
group = ordered_groups[reference - 1]
107106
min_len, max_len = self._parseQuantifier()
108-
reference = BackReference(
109-
group, min_len, max_len, self.precompute)
107+
reference = BackReference(group, min_len, max_len)
110108
group.add_reference(reference)
111109
elements.append(reference)
112110
case _:
113111
min_len, max_len = self._parseQuantifier()
114112
elements.append(
115-
CharClass([char], min_len, max_len, self.precompute))
113+
CharClass([char], min_len, max_len))
116114

117115
if to_close:
118116
self._raise_error("Unmatched opening parenthesis")
119117

120118
alternatives.append(Alternative(elements))
121-
return RegexTree(alternatives, min_len_group, max_len_group, self.precompute)
119+
return RegexTree(alternatives, min_len_group, max_len_group)
122120

123121
def _parseBackReferenceLookahead(self) -> str | int | None:
124122
if len(self.regex) <= self.index:

0 commit comments

Comments
 (0)