Skip to content

Commit 33fe76d

Browse files
committed
Rewrite parser WIP [skip ci]
1 parent 4bfcb7c commit 33fe76d

File tree

10 files changed

+396
-346
lines changed

10 files changed

+396
-346
lines changed

jsonpath/env.py

Lines changed: 17 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -92,8 +92,8 @@ class attributes `root_token`, `self_token` and `filter_context_token`.
9292
## Class attributes
9393
9494
Attributes:
95-
pseudo_root_token (str): The pattern used to select a "fake" root node, one level
96-
above the real root node.
95+
pseudo_root_token (str): The pattern used to select a "fake" root node, one
96+
level above the real root node.
9797
filter_context_token (str): The pattern used to select extra filter context
9898
data. Defaults to `"_"`.
9999
intersection_token (str): The pattern used as the intersection operator.
@@ -180,34 +180,35 @@ def compile(self, path: str) -> Union[JSONPath, CompoundJSONPath]: # noqa: A003
180180
"""
181181
tokens = self.lexer.tokenize(path)
182182
stream = TokenStream(tokens)
183-
pseudo_root = stream.current.kind == TOKEN_PSEUDO_ROOT
183+
pseudo_root = stream.current().kind == TOKEN_PSEUDO_ROOT
184184
_path: Union[JSONPath, CompoundJSONPath] = JSONPath(
185185
env=self, segments=self.parser.parse(stream), pseudo_root=pseudo_root
186186
)
187187

188-
if stream.current.kind != TOKEN_EOF:
188+
# TODO: better!
189+
if stream.current().kind != TOKEN_EOF:
189190
_path = CompoundJSONPath(env=self, path=_path)
190-
while stream.current.kind != TOKEN_EOF:
191-
if stream.peek.kind == TOKEN_EOF:
191+
while stream.current().kind != TOKEN_EOF:
192+
if stream.peek().kind == TOKEN_EOF:
192193
# trailing union or intersection
193194
raise JSONPathSyntaxError(
194-
f"expected a path after {stream.current.value!r}",
195-
token=stream.current,
195+
f"expected a path after {stream.current().value!r}",
196+
token=stream.current(),
196197
)
197198

198-
if stream.current.kind == TOKEN_UNION:
199-
stream.next_token()
200-
pseudo_root = stream.current.kind == TOKEN_PSEUDO_ROOT
199+
if stream.current().kind == TOKEN_UNION:
200+
stream.next()
201+
pseudo_root = stream.current().kind == TOKEN_PSEUDO_ROOT
201202
_path = _path.union(
202203
JSONPath(
203204
env=self,
204205
segments=self.parser.parse(stream),
205206
pseudo_root=pseudo_root,
206207
)
207208
)
208-
elif stream.current.kind == TOKEN_INTERSECTION:
209-
stream.next_token()
210-
pseudo_root = stream.current.kind == TOKEN_PSEUDO_ROOT
209+
elif stream.current().kind == TOKEN_INTERSECTION:
210+
stream.next()
211+
pseudo_root = stream.current().kind == TOKEN_PSEUDO_ROOT
211212
_path = _path.intersection(
212213
JSONPath(
213214
env=self,
@@ -218,8 +219,8 @@ def compile(self, path: str) -> Union[JSONPath, CompoundJSONPath]: # noqa: A003
218219
else: # pragma: no cover
219220
# Parser.parse catches this too
220221
raise JSONPathSyntaxError( # noqa: TRY003
221-
f"unexpected token {stream.current.value!r}",
222-
token=stream.current,
222+
f"unexpected token {stream.current().value!r}",
223+
token=stream.current(),
223224
)
224225

225226
return _path

jsonpath/lex.py

Lines changed: 34 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,15 @@
1515
from .token import TOKEN_CONTAINS
1616
from .token import TOKEN_DDOT
1717
from .token import TOKEN_DOT
18+
from .token import TOKEN_DOT_PROPERTY
1819
from .token import TOKEN_DOUBLE_QUOTE_STRING
1920
from .token import TOKEN_EQ
2021
from .token import TOKEN_ERROR
2122
from .token import TOKEN_FALSE
2223
from .token import TOKEN_FILTER
2324
from .token import TOKEN_FILTER_CONTEXT
2425
from .token import TOKEN_FLOAT
26+
from .token import TOKEN_FUNCTION
2527
from .token import TOKEN_GE
2628
from .token import TOKEN_GT
2729
from .token import TOKEN_IN
@@ -82,7 +84,6 @@ class attributes. Then setting `lexer_class` on a `JSONPathEnvironment`.
8284
"""
8385

8486
key_pattern = r"[\u0080-\uFFFFa-zA-Z_][\u0080-\uFFFFa-zA-Z0-9_-]*"
85-
name_pattern = key_pattern # XXX:
8687

8788
# ! or `not`
8889
logical_not_pattern = r"(?:not\b)|!"
@@ -99,10 +100,15 @@ def __init__(self, *, env: JSONPathEnvironment) -> None:
99100
self.double_quote_pattern = r'"(?P<G_DQUOTE>(?:(?!(?<!\\)").)*)"'
100101
self.single_quote_pattern = r"'(?P<G_SQUOTE>(?:(?!(?<!\\)').)*)'"
101102

102-
# TODO: separate re literal tokens
103+
# .thing
104+
self.dot_property_pattern = rf"(?P<G_DOT>\.)(?P<G_PROP>{self.key_pattern})"
105+
103106
# /pattern/ or /pattern/flags
104107
self.re_pattern = r"/(?P<G_RE>.+?)/(?P<G_RE_FLAGS>[aims]*)"
105108

109+
# func(
110+
self.function_pattern = r"(?P<G_FUNC>[a-z][a-z_0-9]+)(?P<G_FUNC_PAREN>\()"
111+
106112
self.rules = self.compile_rules()
107113

108114
def compile_rules(self) -> Pattern[str]:
@@ -122,6 +128,7 @@ def compile_rules(self) -> Pattern[str]:
122128
(TOKEN_DOUBLE_QUOTE_STRING, self.double_quote_pattern),
123129
(TOKEN_SINGLE_QUOTE_STRING, self.single_quote_pattern),
124130
(TOKEN_RE_PATTERN, self.re_pattern),
131+
(TOKEN_DOT_PROPERTY, self.dot_property_pattern),
125132
(TOKEN_FLOAT, r"-?\d+\.\d*(?:[eE][+-]?\d+)?"),
126133
(TOKEN_INT, r"-?\d+(?P<G_EXP>[eE][+\-]?\d+)?\b"),
127134
(TOKEN_DDOT, r"\.\."),
@@ -160,6 +167,7 @@ def compile_rules(self) -> Pattern[str]:
160167
(TOKEN_LT, r"<"),
161168
(TOKEN_GT, r">"),
162169
(TOKEN_NOT, self.logical_not_pattern), # Must go after "!="
170+
(TOKEN_FUNCTION, self.function_pattern),
163171
(TOKEN_NAME, self.key_pattern), # Must go after reserved words
164172
(TOKEN_LPAREN, r"\("),
165173
(TOKEN_RPAREN, r"\)"),
@@ -180,7 +188,18 @@ def tokenize(self, path: str) -> Iterator[Token]: # noqa PLR0912
180188
kind = match.lastgroup
181189
assert kind is not None
182190

183-
if kind == TOKEN_DOUBLE_QUOTE_STRING:
191+
if kind == TOKEN_DOT_PROPERTY:
192+
yield _token(
193+
kind=TOKEN_DOT,
194+
value=match.group("G_DOT"),
195+
index=match.start("G_DOT"),
196+
)
197+
yield _token(
198+
kind=TOKEN_NAME,
199+
value=match.group("G_PROP"),
200+
index=match.start("G_PROP"),
201+
)
202+
elif kind == TOKEN_DOUBLE_QUOTE_STRING:
184203
yield _token(
185204
kind=TOKEN_DOUBLE_QUOTE_STRING,
186205
value=match.group("G_DQUOTE"),
@@ -222,6 +241,18 @@ def tokenize(self, path: str) -> Iterator[Token]: # noqa PLR0912
222241
value=match.group(),
223242
index=match.start(),
224243
)
244+
elif kind == TOKEN_FUNCTION:
245+
yield _token(
246+
kind=TOKEN_FUNCTION,
247+
value=match.group("G_FUNC"),
248+
index=match.start("G_FUNC"),
249+
)
250+
251+
yield _token(
252+
kind=TOKEN_LPAREN,
253+
value=match.group("G_FUNC_PAREN"),
254+
index=match.start("G_FUNC_PAREN"),
255+
)
225256
elif kind == TOKEN_ERROR:
226257
raise JSONPathSyntaxError(
227258
f"unexpected token {match.group()!r}",

0 commit comments

Comments
 (0)