|
| 1 | +"""I-Regexp checking tests. |
| 2 | +
|
| 3 | +Some of these test cases are derived from https:github.com/f3ath/iregexp. |
| 4 | +Thanks go to @f3ath and the project's license is included here. |
| 5 | +
|
| 6 | +MIT License |
| 7 | +
|
| 8 | +Copyright (c) 2023 Alexey |
| 9 | +
|
| 10 | +Permission is hereby granted, free of charge, to any person obtaining a copy |
| 11 | +of this software and associated documentation files (the "Software"), to deal |
| 12 | +in the Software without restriction, including without limitation the rights |
| 13 | +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
| 14 | +copies of the Software, and to permit persons to whom the Software is |
| 15 | +furnished to do so, subject to the following conditions: |
| 16 | +
|
| 17 | +The above copyright notice and this permission notice shall be included in all |
| 18 | +copies or substantial portions of the Software. |
| 19 | +
|
| 20 | +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| 21 | +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| 22 | +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
| 23 | +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| 24 | +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| 25 | +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
| 26 | +SOFTWARE. |
| 27 | +""" |
| 28 | + |
| 29 | +import dataclasses |
| 30 | +import operator |
| 31 | + |
| 32 | +import pytest |
| 33 | +from iregexp_check import check |
| 34 | + |
| 35 | + |
| 36 | +@dataclasses.dataclass |
| 37 | +class Case: |
| 38 | + description: str |
| 39 | + pattern: str |
| 40 | + |
| 41 | + |
| 42 | +VALID_TEST_CASES = [ |
| 43 | + Case("dot", r"a.b"), |
| 44 | + Case("char_class_expr", r"[0-9]"), |
| 45 | + Case("branch", r"foo|bar"), |
| 46 | + Case("range_quantifier_exact", r"[ab]{3}"), |
| 47 | + Case("range_quantifier", r"[ab]{3,5}"), |
| 48 | + Case("range_quantifier_open_ended", r"[ab]{3,}"), |
| 49 | + Case("char_class_expr_negation", r"[^ab]"), |
| 50 | + Case("unicode_character_category_letter", r"\p{L}"), |
| 51 | + Case("unicode_character_category_letter_uppercase", r"\p{Lu}"), |
| 52 | + Case("unicode_character_category_letter_lowercase", r"\p{Ll}"), |
| 53 | + Case("unicode_character_category_letter_titlecase", r"\p{Lt}"), |
| 54 | + Case("unicode_character_category_letter_modifier", r"\p{Lm}"), |
| 55 | + Case("unicode_character_category_letter_other", r"\p{Lo}"), |
| 56 | + Case("unicode_character_category_mark_nonspcaing", r"\p{Mn}"), |
| 57 | + Case("unicode_character_category_mark_spacing_combining", r"\p{Mc}"), |
| 58 | + Case("unicode_character_category_mark_enclosing", r"\p{Me}"), |
| 59 | + Case("unicode_character_category_number_decimal_digit", r"\p{Nd}"), |
| 60 | + Case("unicode_character_category_number_letter", r"\p{Nl}"), |
| 61 | + Case("unicode_character_category_number_other", r"\p{No}"), |
| 62 | + Case("unicode_character_category_punctuation_connector", r"\p{Pc}"), |
| 63 | + Case("unicode_character_category_punctuation_dash", r"\p{Pd}"), |
| 64 | + Case("unicode_character_category_punctuation_open", r"\p{Ps}"), |
| 65 | + Case("unicode_character_category_punctuation_close", r"\p{Pe}"), |
| 66 | + Case("unicode_character_category_punctuation_initial_quote", r"\p{Pi}"), |
| 67 | + Case("unicode_character_category_punctuation_final_quote", r"\p{Pf}"), |
| 68 | + Case("unicode_character_category_punctuation_other", r"\p{Po}"), |
| 69 | + Case("unicode_character_category_symbol_math", r"\p{Sm}"), |
| 70 | + Case("unicode_character_category_symbol_currency", r"\p{Sc}"), |
| 71 | + Case("unicode_character_category_symbol_modifier", r"\p{Sk}"), |
| 72 | + Case("unicode_character_category_symbol_other", r"\p{So}"), |
| 73 | + Case("unicode_character_category_separator_space", r"\p{Zs}"), |
| 74 | + Case("unicode_character_category_separator_line", r"\p{Zl}"), |
| 75 | + Case("unicode_character_category_separator_paragraph", r"\p{Zp}"), |
| 76 | + Case("unicode_character_category_other_control", r"\p{Cc}"), |
| 77 | + Case("unicode_character_category_other_format", r"\p{Cf}"), |
| 78 | + Case("unicode_character_category_other_private_use", r"\p{Co}"), |
| 79 | + Case("unicode_character_category_other_not_assigned", r"\p{Cn}"), |
| 80 | + Case("unicode_character_category_inverted_letter", r"\P{L}"), |
| 81 | + Case("unicode_character_category_inverted_letter_uppercase", r"\P{Lu}"), |
| 82 | + Case("unicode_character_category_inverted_letter_lowercase", r"\P{Ll}"), |
| 83 | + Case("unicode_character_category_inverted_letter_titlecase", r"\P{Lt}"), |
| 84 | + Case("unicode_character_category_inverted_letter_modifier", r"\P{Lm}"), |
| 85 | + Case("unicode_character_category_inverted_letter_other", r"\P{Lo}"), |
| 86 | + Case("unicode_character_category_inverted_mark_nonspacing", r"\P{Mn}"), |
| 87 | + Case("unicode_character_category_inverted_mark_spacing_combining", r"\P{Mc}"), |
| 88 | + Case("unicode_character_category_inverted_mark_enclosing", r"\P{Me}"), |
| 89 | + Case("unicode_character_category_inverted_number_decimal_digit", r"\P{Nd}"), |
| 90 | + Case("unicode_character_category_inverted_number_letter", r"\P{Nl}"), |
| 91 | + Case("unicode_character_category_inverted_number_other", r"\P{No}"), |
| 92 | + Case("unicode_character_category_inverted_punctuation_connector", r"\P{Pc}"), |
| 93 | + Case("unicode_character_category_inverted_punctuation_dash", r"\P{Pd}"), |
| 94 | + Case("unicode_character_category_inverted_punctuation_open", r"\P{Ps}"), |
| 95 | + Case("unicode_character_category_inverted_punctuation_close", r"\P{Pe}"), |
| 96 | + Case("unicode_character_category_inverted_punctuation_initial_quote", r"\P{Pi}"), |
| 97 | + Case("unicode_character_category_inverted_punctuation_final_quote", r"\P{Pf}"), |
| 98 | + Case("unicode_character_category_inverted_punctuation_other", r"\P{Po}"), |
| 99 | + Case("unicode_character_category_inverted_symbol_math", r"\P{Sm}"), |
| 100 | + Case("unicode_character_category_inverted_symbol_currency", r"\P{Sc}"), |
| 101 | + Case("unicode_character_category_inverted_symbol_modifier", r"\P{Sk}"), |
| 102 | + Case("unicode_character_category_inverted_symbol_other", r"\P{So}"), |
| 103 | + Case("unicode_character_category_inverted_separator_space", r"\P{Zs}"), |
| 104 | + Case("unicode_character_category_inverted_separator_line", r"\P{Zl}"), |
| 105 | + Case("unicode_character_category_inverted_separator_paragraph", r"\P{Zp}"), |
| 106 | + Case("unicode_character_category_inverted_other_control", r"\P{Cc}"), |
| 107 | + Case("unicode_character_category_inverted_other_format", r"\P{Cf}"), |
| 108 | + Case("unicode_character_category_inverted_other_private_use", r"\P{Co}"), |
| 109 | + Case("unicode_character_category_inverted_other_not_assigned", r"\P{Cn}"), |
| 110 | +] |
| 111 | + |
| 112 | +INVALID_TEST_CASES = [ |
| 113 | + Case("named_group", r"(?<group>[a-z]*)"), |
| 114 | + Case("multi_char_escape", r"\d"), |
| 115 | + Case("multi_char_escape_class_expr", r"[\S ]"), |
| 116 | + Case("non_greedy_repetition", r"[0-9]*?"), |
| 117 | + Case("back_reference", r"(\w)\1"), |
| 118 | + Case("lookahead", r"(?=.*[a-z])(?=.*[A-Z])(?=.*)[a-zA-Z]{8,}"), |
| 119 | + Case("lookbehind", r"(?<=[a-z]{4})\[a-z]{2}"), |
| 120 | + Case("non_capturing_group", r"(?:[a-z]+)"), |
| 121 | + Case("atomic_group", r"(?>[a-z]+)"), |
| 122 | + Case("conditional_group", r"(?(1)a|b)"), |
| 123 | + Case("comment", r"(?#comment)"), |
| 124 | + Case("flag", r"(?i)[a-z]+"), |
| 125 | +] |
| 126 | + |
| 127 | + |
| 128 | +@pytest.mark.parametrize( |
| 129 | + "case", VALID_TEST_CASES, ids=operator.attrgetter("description") |
| 130 | +) |
| 131 | +def test_valid_iregexp(case: Case) -> None: |
| 132 | + assert check(case.pattern) |
| 133 | + |
| 134 | + |
| 135 | +@pytest.mark.parametrize( |
| 136 | + "case", INVALID_TEST_CASES, ids=operator.attrgetter("description") |
| 137 | +) |
| 138 | +def test_invalid_iregexp(case: Case) -> None: |
| 139 | + assert not check(case.pattern) |
0 commit comments