1010
1111from .exceptions import JSONPathSyntaxError
1212from .token import TOKEN_AND
13- from .token import TOKEN_BARE_PROPERTY
13+ from .token import TOKEN_COLON
1414from .token import TOKEN_COMMA
1515from .token import TOKEN_CONTAINS
1616from .token import TOKEN_DDOT
17- from .token import TOKEN_DOT_PROPERTY
17+ from .token import TOKEN_DOT
1818from .token import TOKEN_DOUBLE_QUOTE_STRING
1919from .token import TOKEN_EQ
20- from .token import TOKEN_FAKE_ROOT
20+ from .token import TOKEN_ERROR
2121from .token import TOKEN_FALSE
2222from .token import TOKEN_FILTER
2323from .token import TOKEN_FILTER_CONTEXT
2424from .token import TOKEN_FLOAT
25- from .token import TOKEN_FUNCTION
2625from .token import TOKEN_GE
2726from .token import TOKEN_GT
28- from .token import TOKEN_ILLEGAL
2927from .token import TOKEN_IN
3028from .token import TOKEN_INT
3129from .token import TOKEN_INTERSECTION
3230from .token import TOKEN_KEY
3331from .token import TOKEN_KEYS
32+ from .token import TOKEN_KEYS_FILTER
33+ from .token import TOKEN_LBRACKET
3434from .token import TOKEN_LE
3535from .token import TOKEN_LG
36- from .token import TOKEN_LIST_SLICE
37- from .token import TOKEN_LIST_START
3836from .token import TOKEN_LPAREN
3937from .token import TOKEN_LT
4038from .token import TOKEN_MISSING
39+ from .token import TOKEN_NAME
4140from .token import TOKEN_NE
4241from .token import TOKEN_NIL
4342from .token import TOKEN_NONE
4443from .token import TOKEN_NOT
4544from .token import TOKEN_NULL
4645from .token import TOKEN_OR
47- from .token import TOKEN_PROPERTY
46+ from .token import TOKEN_PSEUDO_ROOT
4847from .token import TOKEN_RBRACKET
4948from .token import TOKEN_RE
5049from .token import TOKEN_RE_FLAGS
5352from .token import TOKEN_RPAREN
5453from .token import TOKEN_SELF
5554from .token import TOKEN_SINGLE_QUOTE_STRING
56- from .token import TOKEN_SKIP
57- from .token import TOKEN_SLICE_START
58- from .token import TOKEN_SLICE_STEP
59- from .token import TOKEN_SLICE_STOP
6055from .token import TOKEN_TRUE
6156from .token import TOKEN_UNDEFINED
6257from .token import TOKEN_UNION
58+ from .token import TOKEN_WHITESPACE
6359from .token import TOKEN_WILD
6460from .token import Token
6561
@@ -86,8 +82,9 @@ class attributes. Then setting `lexer_class` on a `JSONPathEnvironment`.
8682 """
8783
8884 key_pattern = r"[\u0080-\uFFFFa-zA-Z_][\u0080-\uFFFFa-zA-Z0-9_-]*"
85+ name_pattern = key_pattern # XXX:
8986
90- # `not` or !
87+ # ! or `not`
9188 logical_not_pattern = r"(?:not\b)|!"
9289
9390 # && or `and`
@@ -102,28 +99,17 @@ def __init__(self, *, env: JSONPathEnvironment) -> None:
10299 self .double_quote_pattern = r'"(?P<G_DQUOTE>(?:(?!(?<!\\)").)*)"'
103100 self .single_quote_pattern = r"'(?P<G_SQUOTE>(?:(?!(?<!\\)').)*)'"
104101
105- # .thing
106- self .dot_property_pattern = rf"\.(?P<G_PROP>{ self .key_pattern } )"
107-
108- self .slice_list_pattern = (
109- r"(?P<G_LSLICE_START>\-?\d*)\s*"
110- r":\s*(?P<G_LSLICE_STOP>\-?\d*)\s*"
111- r"(?::\s*(?P<G_LSLICE_STEP>\-?\d*))?"
112- )
113-
102+ # TODO: separate re literal tokens
114103 # /pattern/ or /pattern/flags
115104 self .re_pattern = r"/(?P<G_RE>.+?)/(?P<G_RE_FLAGS>[aims]*)"
116105
117- # func(
118- self .function_pattern = r"(?P<G_FUNC>[a-z][a-z_0-9]+)\(\s*"
119-
120106 self .rules = self .compile_rules ()
121107
122108 def compile_rules (self ) -> Pattern [str ]:
123109 """Prepare regular expression rules."""
124110 env_tokens = [
125111 (TOKEN_ROOT , self .env .root_token ),
126- (TOKEN_FAKE_ROOT , self .env .fake_root_token ),
112+ (TOKEN_PSEUDO_ROOT , self .env .pseudo_root_token ),
127113 (TOKEN_SELF , self .env .self_token ),
128114 (TOKEN_KEY , self .env .key_token ),
129115 (TOKEN_UNION , self .env .union_token ),
@@ -136,12 +122,10 @@ def compile_rules(self) -> Pattern[str]:
136122 (TOKEN_DOUBLE_QUOTE_STRING , self .double_quote_pattern ),
137123 (TOKEN_SINGLE_QUOTE_STRING , self .single_quote_pattern ),
138124 (TOKEN_RE_PATTERN , self .re_pattern ),
139- (TOKEN_LIST_SLICE , self .slice_list_pattern ),
140- (TOKEN_FUNCTION , self .function_pattern ),
141- (TOKEN_DOT_PROPERTY , self .dot_property_pattern ),
142125 (TOKEN_FLOAT , r"-?\d+\.\d*(?:[eE][+-]?\d+)?" ),
143126 (TOKEN_INT , r"-?\d+(?P<G_EXP>[eE][+\-]?\d+)?\b" ),
144127 (TOKEN_DDOT , r"\.\." ),
128+ (TOKEN_DOT , r"\." ),
145129 (TOKEN_AND , self .logical_and_pattern ),
146130 (TOKEN_OR , self .logical_or_pattern ),
147131 * [
@@ -153,6 +137,7 @@ def compile_rules(self) -> Pattern[str]:
153137 ],
154138 (TOKEN_WILD , r"\*" ),
155139 (TOKEN_FILTER , r"\?" ),
140+ (TOKEN_KEYS_FILTER , r"~\?" ), # TODO: get from env
156141 (TOKEN_IN , r"in\b" ),
157142 (TOKEN_TRUE , r"[Tt]rue\b" ),
158143 (TOKEN_FALSE , r"[Ff]alse\b" ),
@@ -162,9 +147,10 @@ def compile_rules(self) -> Pattern[str]:
162147 (TOKEN_CONTAINS , r"contains\b" ),
163148 (TOKEN_UNDEFINED , r"undefined\b" ),
164149 (TOKEN_MISSING , r"missing\b" ),
165- (TOKEN_LIST_START , r"\[" ),
150+ (TOKEN_LBRACKET , r"\[" ),
166151 (TOKEN_RBRACKET , r"]" ),
167152 (TOKEN_COMMA , r"," ),
153+ (TOKEN_COLON , r":" ),
168154 (TOKEN_EQ , r"==" ),
169155 (TOKEN_NE , r"!=" ),
170156 (TOKEN_LG , r"<>" ),
@@ -173,12 +159,12 @@ def compile_rules(self) -> Pattern[str]:
173159 (TOKEN_RE , r"=~" ),
174160 (TOKEN_LT , r"<" ),
175161 (TOKEN_GT , r">" ),
176- (TOKEN_NOT , self .logical_not_pattern ),
177- (TOKEN_BARE_PROPERTY , self .key_pattern ),
162+ (TOKEN_NOT , self .logical_not_pattern ), # Must go after "!="
163+ (TOKEN_NAME , self .key_pattern ), # Must go after reserved words
178164 (TOKEN_LPAREN , r"\(" ),
179165 (TOKEN_RPAREN , r"\)" ),
180- (TOKEN_SKIP , r"[ \n\t\r\. ]+" ),
181- (TOKEN_ILLEGAL , r"." ),
166+ (TOKEN_WHITESPACE , r"[ \n\t\r]+" ),
167+ (TOKEN_ERROR , r"." ),
182168 ]
183169
184170 return re .compile (
@@ -194,35 +180,7 @@ def tokenize(self, path: str) -> Iterator[Token]: # noqa PLR0912
194180 kind = match .lastgroup
195181 assert kind is not None
196182
197- if kind == TOKEN_DOT_PROPERTY :
198- yield _token (
199- kind = TOKEN_PROPERTY ,
200- value = match .group ("G_PROP" ),
201- index = match .start ("G_PROP" ),
202- )
203- elif kind == TOKEN_BARE_PROPERTY :
204- yield _token (
205- kind = TOKEN_BARE_PROPERTY ,
206- value = match .group (),
207- index = match .start (),
208- )
209- elif kind == TOKEN_LIST_SLICE :
210- yield _token (
211- kind = TOKEN_SLICE_START ,
212- value = match .group ("G_LSLICE_START" ),
213- index = match .start ("G_LSLICE_START" ),
214- )
215- yield _token (
216- kind = TOKEN_SLICE_STOP ,
217- value = match .group ("G_LSLICE_STOP" ),
218- index = match .start ("G_LSLICE_STOP" ),
219- )
220- yield _token (
221- kind = TOKEN_SLICE_STEP ,
222- value = match .group ("G_LSLICE_STEP" ) or "" ,
223- index = match .start ("G_LSLICE_STEP" ),
224- )
225- elif kind == TOKEN_DOUBLE_QUOTE_STRING :
183+ if kind == TOKEN_DOUBLE_QUOTE_STRING :
226184 yield _token (
227185 kind = TOKEN_DOUBLE_QUOTE_STRING ,
228186 value = match .group ("G_DQUOTE" ),
@@ -264,19 +222,11 @@ def tokenize(self, path: str) -> Iterator[Token]: # noqa PLR0912
264222 value = match .group (),
265223 index = match .start (),
266224 )
267- elif kind == TOKEN_FUNCTION :
268- yield _token (
269- kind = TOKEN_FUNCTION ,
270- value = match .group ("G_FUNC" ),
271- index = match .start ("G_FUNC" ),
272- )
273- elif kind == TOKEN_SKIP :
274- continue
275- elif kind == TOKEN_ILLEGAL :
225+ elif kind == TOKEN_ERROR :
276226 raise JSONPathSyntaxError (
277227 f"unexpected token { match .group ()!r} " ,
278228 token = _token (
279- TOKEN_ILLEGAL ,
229+ TOKEN_ERROR ,
280230 value = match .group (),
281231 index = match .start (),
282232 ),
0 commit comments