Skip to content

Commit 4428724

Browse files
committed
Check for valid I-Regexp before mapping
1 parent ebf38f1 commit 4428724

File tree

5 files changed

+151
-4
lines changed

5 files changed

+151
-4
lines changed

jsonpath_rfc9535/function_extensions/_pattern.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ def map_re(pattern: str) -> str:
1313

1414
if ch == ".":
1515
if not char_class:
16-
parts.append(r"(?:(?![\r\n])\P{Cs}|\\p{Cs}\p{Cs})")
16+
parts.append(r"(?:(?![\r\n])\P{Cs}|\p{Cs}\p{Cs})")
1717
else:
1818
parts.append(ch)
1919
elif ch == "\\":

jsonpath_rfc9535/function_extensions/match.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""The standard `match` function extension."""
22

33
import regex as re
4+
from iregexp_check import check
45

56
from jsonpath_rfc9535.function_extensions import ExpressionType
67
from jsonpath_rfc9535.function_extensions import FilterFunction
@@ -14,8 +15,11 @@ class Match(FilterFunction):
1415
arg_types = [ExpressionType.VALUE, ExpressionType.VALUE]
1516
return_type = ExpressionType.LOGICAL
1617

17-
def __call__(self, string: str, pattern: str) -> bool:
18+
def __call__(self, string: str, pattern: object) -> bool:
1819
"""Return `True` if _string_ matches _pattern_, or `False` otherwise."""
20+
if not isinstance(pattern, str) or not check(pattern):
21+
return False
22+
1923
try:
2024
# re.fullmatch caches compiled patterns internally
2125
return bool(re.fullmatch(map_re(pattern), string))

jsonpath_rfc9535/function_extensions/search.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""The standard `search` function extension."""
22

33
import regex as re
4+
from iregexp_check import check
45

56
from jsonpath_rfc9535.function_extensions import ExpressionType
67
from jsonpath_rfc9535.function_extensions import FilterFunction
@@ -14,8 +15,11 @@ class Search(FilterFunction):
1415
arg_types = [ExpressionType.VALUE, ExpressionType.VALUE]
1516
return_type = ExpressionType.LOGICAL
1617

17-
def __call__(self, string: str, pattern: str) -> bool:
18+
def __call__(self, string: str, pattern: object) -> bool:
1819
"""Return `True` if _string_ contains _pattern_, or `False` otherwise."""
20+
if not isinstance(pattern, str) or not check(pattern):
21+
return False
22+
1923
try:
2024
# re.search caches compiled patterns internally
2125
return bool(re.search(map_re(pattern), string, re.VERSION1))

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ classifiers = [
2424
"Programming Language :: Python :: Implementation :: CPython",
2525
"Programming Language :: Python :: Implementation :: PyPy",
2626
]
27-
dependencies = ["regex"]
27+
dependencies = ["regex", "iregexp-check>=0.1.3"]
2828

2929
[project.urls]
3030
Documentation = "https://jg-rp.github.io/python-jsonpath-rfc9535/"

tests/test_iregexp.py

Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
"""I-Regexp checking tests.
2+
3+
Some of these test cases are derived from https:github.com/f3ath/iregexp.
4+
Thanks go to @f3ath and the project's license is included here.
5+
6+
MIT License
7+
8+
Copyright (c) 2023 Alexey
9+
10+
Permission is hereby granted, free of charge, to any person obtaining a copy
11+
of this software and associated documentation files (the "Software"), to deal
12+
in the Software without restriction, including without limitation the rights
13+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14+
copies of the Software, and to permit persons to whom the Software is
15+
furnished to do so, subject to the following conditions:
16+
17+
The above copyright notice and this permission notice shall be included in all
18+
copies or substantial portions of the Software.
19+
20+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26+
SOFTWARE.
27+
"""
28+
29+
import dataclasses
30+
import operator
31+
32+
import pytest
33+
from iregexp_check import check
34+
35+
36+
@dataclasses.dataclass
37+
class Case:
38+
description: str
39+
pattern: str
40+
41+
42+
VALID_TEST_CASES = [
43+
Case("dot", r"a.b"),
44+
Case("char_class_expr", r"[0-9]"),
45+
Case("branch", r"foo|bar"),
46+
Case("range_quantifier_exact", r"[ab]{3}"),
47+
Case("range_quantifier", r"[ab]{3,5}"),
48+
Case("range_quantifier_open_ended", r"[ab]{3,}"),
49+
Case("char_class_expr_negation", r"[^ab]"),
50+
Case("unicode_character_category_letter", r"\p{L}"),
51+
Case("unicode_character_category_letter_uppercase", r"\p{Lu}"),
52+
Case("unicode_character_category_letter_lowercase", r"\p{Ll}"),
53+
Case("unicode_character_category_letter_titlecase", r"\p{Lt}"),
54+
Case("unicode_character_category_letter_modifier", r"\p{Lm}"),
55+
Case("unicode_character_category_letter_other", r"\p{Lo}"),
56+
Case("unicode_character_category_mark_nonspcaing", r"\p{Mn}"),
57+
Case("unicode_character_category_mark_spacing_combining", r"\p{Mc}"),
58+
Case("unicode_character_category_mark_enclosing", r"\p{Me}"),
59+
Case("unicode_character_category_number_decimal_digit", r"\p{Nd}"),
60+
Case("unicode_character_category_number_letter", r"\p{Nl}"),
61+
Case("unicode_character_category_number_other", r"\p{No}"),
62+
Case("unicode_character_category_punctuation_connector", r"\p{Pc}"),
63+
Case("unicode_character_category_punctuation_dash", r"\p{Pd}"),
64+
Case("unicode_character_category_punctuation_open", r"\p{Ps}"),
65+
Case("unicode_character_category_punctuation_close", r"\p{Pe}"),
66+
Case("unicode_character_category_punctuation_initial_quote", r"\p{Pi}"),
67+
Case("unicode_character_category_punctuation_final_quote", r"\p{Pf}"),
68+
Case("unicode_character_category_punctuation_other", r"\p{Po}"),
69+
Case("unicode_character_category_symbol_math", r"\p{Sm}"),
70+
Case("unicode_character_category_symbol_currency", r"\p{Sc}"),
71+
Case("unicode_character_category_symbol_modifier", r"\p{Sk}"),
72+
Case("unicode_character_category_symbol_other", r"\p{So}"),
73+
Case("unicode_character_category_separator_space", r"\p{Zs}"),
74+
Case("unicode_character_category_separator_line", r"\p{Zl}"),
75+
Case("unicode_character_category_separator_paragraph", r"\p{Zp}"),
76+
Case("unicode_character_category_other_control", r"\p{Cc}"),
77+
Case("unicode_character_category_other_format", r"\p{Cf}"),
78+
Case("unicode_character_category_other_private_use", r"\p{Co}"),
79+
Case("unicode_character_category_other_not_assigned", r"\p{Cn}"),
80+
Case("unicode_character_category_inverted_letter", r"\P{L}"),
81+
Case("unicode_character_category_inverted_letter_uppercase", r"\P{Lu}"),
82+
Case("unicode_character_category_inverted_letter_lowercase", r"\P{Ll}"),
83+
Case("unicode_character_category_inverted_letter_titlecase", r"\P{Lt}"),
84+
Case("unicode_character_category_inverted_letter_modifier", r"\P{Lm}"),
85+
Case("unicode_character_category_inverted_letter_other", r"\P{Lo}"),
86+
Case("unicode_character_category_inverted_mark_nonspacing", r"\P{Mn}"),
87+
Case("unicode_character_category_inverted_mark_spacing_combining", r"\P{Mc}"),
88+
Case("unicode_character_category_inverted_mark_enclosing", r"\P{Me}"),
89+
Case("unicode_character_category_inverted_number_decimal_digit", r"\P{Nd}"),
90+
Case("unicode_character_category_inverted_number_letter", r"\P{Nl}"),
91+
Case("unicode_character_category_inverted_number_other", r"\P{No}"),
92+
Case("unicode_character_category_inverted_punctuation_connector", r"\P{Pc}"),
93+
Case("unicode_character_category_inverted_punctuation_dash", r"\P{Pd}"),
94+
Case("unicode_character_category_inverted_punctuation_open", r"\P{Ps}"),
95+
Case("unicode_character_category_inverted_punctuation_close", r"\P{Pe}"),
96+
Case("unicode_character_category_inverted_punctuation_initial_quote", r"\P{Pi}"),
97+
Case("unicode_character_category_inverted_punctuation_final_quote", r"\P{Pf}"),
98+
Case("unicode_character_category_inverted_punctuation_other", r"\P{Po}"),
99+
Case("unicode_character_category_inverted_symbol_math", r"\P{Sm}"),
100+
Case("unicode_character_category_inverted_symbol_currency", r"\P{Sc}"),
101+
Case("unicode_character_category_inverted_symbol_modifier", r"\P{Sk}"),
102+
Case("unicode_character_category_inverted_symbol_other", r"\P{So}"),
103+
Case("unicode_character_category_inverted_separator_space", r"\P{Zs}"),
104+
Case("unicode_character_category_inverted_separator_line", r"\P{Zl}"),
105+
Case("unicode_character_category_inverted_separator_paragraph", r"\P{Zp}"),
106+
Case("unicode_character_category_inverted_other_control", r"\P{Cc}"),
107+
Case("unicode_character_category_inverted_other_format", r"\P{Cf}"),
108+
Case("unicode_character_category_inverted_other_private_use", r"\P{Co}"),
109+
Case("unicode_character_category_inverted_other_not_assigned", r"\P{Cn}"),
110+
]
111+
112+
INVALID_TEST_CASES = [
113+
Case("named_group", r"(?<group>[a-z]*)"),
114+
Case("multi_char_escape", r"\d"),
115+
Case("multi_char_escape_class_expr", r"[\S ]"),
116+
Case("non_greedy_repetition", r"[0-9]*?"),
117+
Case("back_reference", r"(\w)\1"),
118+
Case("lookahead", r"(?=.*[a-z])(?=.*[A-Z])(?=.*)[a-zA-Z]{8,}"),
119+
Case("lookbehind", r"(?<=[a-z]{4})\[a-z]{2}"),
120+
Case("non_capturing_group", r"(?:[a-z]+)"),
121+
Case("atomic_group", r"(?>[a-z]+)"),
122+
Case("conditional_group", r"(?(1)a|b)"),
123+
Case("comment", r"(?#comment)"),
124+
Case("flag", r"(?i)[a-z]+"),
125+
]
126+
127+
128+
@pytest.mark.parametrize(
129+
"case", VALID_TEST_CASES, ids=operator.attrgetter("description")
130+
)
131+
def test_valid_iregexp(case: Case) -> None:
132+
assert check(case.pattern)
133+
134+
135+
@pytest.mark.parametrize(
136+
"case", INVALID_TEST_CASES, ids=operator.attrgetter("description")
137+
)
138+
def test_invalid_iregexp(case: Case) -> None:
139+
assert not check(case.pattern)

0 commit comments

Comments
 (0)