Skip to content

Commit 4bfcb7c

Browse files
committed
Version 2 WIP [skip ci]
1 parent 6b571e1 commit 4bfcb7c

15 files changed

+1348
-1208
lines changed

jsonpath/env.py

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,9 @@
2525
from .filter import UNDEFINED
2626
from .filter import VALUE_TYPE_EXPRESSIONS
2727
from .filter import FilterExpression
28+
from .filter import FilterQuery
2829
from .filter import FunctionExtension
2930
from .filter import InfixExpression
30-
from .filter import Path
3131
from .fluent_api import Query
3232
from .function_extensions import ExpressionType
3333
from .function_extensions import FilterFunction
@@ -40,8 +40,8 @@
4040
from .path import JSONPath
4141
from .stream import TokenStream
4242
from .token import TOKEN_EOF
43-
from .token import TOKEN_FAKE_ROOT
4443
from .token import TOKEN_INTERSECTION
44+
from .token import TOKEN_PSEUDO_ROOT
4545
from .token import TOKEN_UNION
4646
from .token import Token
4747

@@ -92,7 +92,7 @@ class attributes `root_token`, `self_token` and `filter_context_token`.
9292
## Class attributes
9393
9494
Attributes:
95-
fake_root_token (str): The pattern used to select a "fake" root node, one level
95+
pseudo_root_token (str): The pattern used to select a "fake" root node, one level
9696
above the real root node.
9797
filter_context_token (str): The pattern used to select extra filter context
9898
data. Defaults to `"_"`.
@@ -117,7 +117,7 @@ class attributes `root_token`, `self_token` and `filter_context_token`.
117117

118118
# These should be unescaped strings. `re.escape` will be called
119119
# on them automatically when compiling lexer rules.
120-
fake_root_token = "^"
120+
pseudo_root_token = "^"
121121
filter_context_token = "_"
122122
intersection_token = "&"
123123
key_token = "#"
@@ -180,9 +180,9 @@ def compile(self, path: str) -> Union[JSONPath, CompoundJSONPath]: # noqa: A003
180180
"""
181181
tokens = self.lexer.tokenize(path)
182182
stream = TokenStream(tokens)
183-
fake_root = stream.current.kind == TOKEN_FAKE_ROOT
183+
pseudo_root = stream.current.kind == TOKEN_PSEUDO_ROOT
184184
_path: Union[JSONPath, CompoundJSONPath] = JSONPath(
185-
env=self, selectors=self.parser.parse(stream), fake_root=fake_root
185+
env=self, segments=self.parser.parse(stream), pseudo_root=pseudo_root
186186
)
187187

188188
if stream.current.kind != TOKEN_EOF:
@@ -197,22 +197,22 @@ def compile(self, path: str) -> Union[JSONPath, CompoundJSONPath]: # noqa: A003
197197

198198
if stream.current.kind == TOKEN_UNION:
199199
stream.next_token()
200-
fake_root = stream.current.kind == TOKEN_FAKE_ROOT
200+
pseudo_root = stream.current.kind == TOKEN_PSEUDO_ROOT
201201
_path = _path.union(
202202
JSONPath(
203203
env=self,
204-
selectors=self.parser.parse(stream),
205-
fake_root=fake_root,
204+
segments=self.parser.parse(stream),
205+
pseudo_root=pseudo_root,
206206
)
207207
)
208208
elif stream.current.kind == TOKEN_INTERSECTION:
209209
stream.next_token()
210-
fake_root = stream.current.kind == TOKEN_FAKE_ROOT
210+
pseudo_root = stream.current.kind == TOKEN_PSEUDO_ROOT
211211
_path = _path.intersection(
212212
JSONPath(
213213
env=self,
214-
selectors=self.parser.parse(stream),
215-
fake_root=fake_root,
214+
segments=self.parser.parse(stream),
215+
pseudo_root=pseudo_root,
216216
)
217217
)
218218
else: # pragma: no cover
@@ -456,21 +456,21 @@ def check_well_typedness(
456456
if typ == ExpressionType.VALUE:
457457
if not (
458458
isinstance(arg, VALUE_TYPE_EXPRESSIONS)
459-
or (isinstance(arg, Path) and arg.path.singular_query())
459+
or (isinstance(arg, FilterQuery) and arg.path.singular_query())
460460
or (self._function_return_type(arg) == ExpressionType.VALUE)
461461
):
462462
raise JSONPathTypeError(
463463
f"{token.value}() argument {idx} must be of ValueType",
464464
token=token,
465465
)
466466
elif typ == ExpressionType.LOGICAL:
467-
if not isinstance(arg, (Path, InfixExpression)):
467+
if not isinstance(arg, (FilterQuery, InfixExpression)):
468468
raise JSONPathTypeError(
469469
f"{token.value}() argument {idx} must be of LogicalType",
470470
token=token,
471471
)
472472
elif typ == ExpressionType.NODES and not (
473-
isinstance(arg, Path)
473+
isinstance(arg, FilterQuery)
474474
or self._function_return_type(arg) == ExpressionType.NODES
475475
):
476476
raise JSONPathTypeError(

jsonpath/filter.py

Lines changed: 9 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@
2323
from .function_extensions import FilterFunction
2424
from .match import NodeList
2525
from .selectors import Filter as FilterSelector
26-
from .selectors import ListSelector
2726
from .serialize import canonical_string
2827

2928
if TYPE_CHECKING:
@@ -494,7 +493,7 @@ def set_children(self, children: List[FilterExpression]) -> None:
494493
self._expr.set_children(children)
495494

496495

497-
class Path(FilterExpression, ABC):
496+
class FilterQuery(FilterExpression, ABC):
498497
"""Base expression for all _sub paths_ found in filter expressions."""
499498

500499
__slots__ = ("path",)
@@ -504,25 +503,22 @@ def __init__(self, path: JSONPath) -> None:
504503
super().__init__()
505504

506505
def __eq__(self, other: object) -> bool:
507-
return isinstance(other, Path) and str(self) == str(other)
506+
return isinstance(other, FilterQuery) and str(self) == str(other)
508507

509508
def children(self) -> List[FilterExpression]:
510509
_children: List[FilterExpression] = []
511-
for segment in self.path.selectors:
512-
if isinstance(segment, ListSelector):
513-
_children.extend(
514-
selector.expression
515-
for selector in segment.items
516-
if isinstance(selector, FilterSelector)
517-
)
510+
for segment in self.path.segments:
511+
for selector in segment.selectors:
512+
if isinstance(selector, FilterSelector):
513+
_children.append(selector.expression)
518514
return _children
519515

520516
def set_children(self, children: List[FilterExpression]) -> None: # noqa: ARG002
521517
# self.path has its own cache
522518
return
523519

524520

525-
class SelfPath(Path):
521+
class RelativeFilterQuery(FilterQuery):
526522
"""A JSONPath starting at the current node."""
527523

528524
__slots__ = ()
@@ -572,7 +568,7 @@ async def evaluate_async(self, context: FilterContext) -> object:
572568
)
573569

574570

575-
class RootPath(Path):
571+
class RootFilterQuery(FilterQuery):
576572
"""A JSONPath starting at the root node."""
577573

578574
__slots__ = ()
@@ -606,7 +602,7 @@ async def evaluate_async(self, context: FilterContext) -> object:
606602
)
607603

608604

609-
class FilterContextPath(Path):
605+
class FilterContextPath(FilterQuery):
610606
"""A JSONPath starting at the root of any extra context data."""
611607

612608
__slots__ = ()

jsonpath/lex.py

Lines changed: 23 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -10,41 +10,40 @@
1010

1111
from .exceptions import JSONPathSyntaxError
1212
from .token import TOKEN_AND
13-
from .token import TOKEN_BARE_PROPERTY
13+
from .token import TOKEN_COLON
1414
from .token import TOKEN_COMMA
1515
from .token import TOKEN_CONTAINS
1616
from .token import TOKEN_DDOT
17-
from .token import TOKEN_DOT_PROPERTY
17+
from .token import TOKEN_DOT
1818
from .token import TOKEN_DOUBLE_QUOTE_STRING
1919
from .token import TOKEN_EQ
20-
from .token import TOKEN_FAKE_ROOT
20+
from .token import TOKEN_ERROR
2121
from .token import TOKEN_FALSE
2222
from .token import TOKEN_FILTER
2323
from .token import TOKEN_FILTER_CONTEXT
2424
from .token import TOKEN_FLOAT
25-
from .token import TOKEN_FUNCTION
2625
from .token import TOKEN_GE
2726
from .token import TOKEN_GT
28-
from .token import TOKEN_ILLEGAL
2927
from .token import TOKEN_IN
3028
from .token import TOKEN_INT
3129
from .token import TOKEN_INTERSECTION
3230
from .token import TOKEN_KEY
3331
from .token import TOKEN_KEYS
32+
from .token import TOKEN_KEYS_FILTER
33+
from .token import TOKEN_LBRACKET
3434
from .token import TOKEN_LE
3535
from .token import TOKEN_LG
36-
from .token import TOKEN_LIST_SLICE
37-
from .token import TOKEN_LIST_START
3836
from .token import TOKEN_LPAREN
3937
from .token import TOKEN_LT
4038
from .token import TOKEN_MISSING
39+
from .token import TOKEN_NAME
4140
from .token import TOKEN_NE
4241
from .token import TOKEN_NIL
4342
from .token import TOKEN_NONE
4443
from .token import TOKEN_NOT
4544
from .token import TOKEN_NULL
4645
from .token import TOKEN_OR
47-
from .token import TOKEN_PROPERTY
46+
from .token import TOKEN_PSEUDO_ROOT
4847
from .token import TOKEN_RBRACKET
4948
from .token import TOKEN_RE
5049
from .token import TOKEN_RE_FLAGS
@@ -53,13 +52,10 @@
5352
from .token import TOKEN_RPAREN
5453
from .token import TOKEN_SELF
5554
from .token import TOKEN_SINGLE_QUOTE_STRING
56-
from .token import TOKEN_SKIP
57-
from .token import TOKEN_SLICE_START
58-
from .token import TOKEN_SLICE_STEP
59-
from .token import TOKEN_SLICE_STOP
6055
from .token import TOKEN_TRUE
6156
from .token import TOKEN_UNDEFINED
6257
from .token import TOKEN_UNION
58+
from .token import TOKEN_WHITESPACE
6359
from .token import TOKEN_WILD
6460
from .token import Token
6561

@@ -86,8 +82,9 @@ class attributes. Then setting `lexer_class` on a `JSONPathEnvironment`.
8682
"""
8783

8884
key_pattern = r"[\u0080-\uFFFFa-zA-Z_][\u0080-\uFFFFa-zA-Z0-9_-]*"
85+
name_pattern = key_pattern # XXX:
8986

90-
# `not` or !
87+
# ! or `not`
9188
logical_not_pattern = r"(?:not\b)|!"
9289

9390
# && or `and`
@@ -102,28 +99,17 @@ def __init__(self, *, env: JSONPathEnvironment) -> None:
10299
self.double_quote_pattern = r'"(?P<G_DQUOTE>(?:(?!(?<!\\)").)*)"'
103100
self.single_quote_pattern = r"'(?P<G_SQUOTE>(?:(?!(?<!\\)').)*)'"
104101

105-
# .thing
106-
self.dot_property_pattern = rf"\.(?P<G_PROP>{self.key_pattern})"
107-
108-
self.slice_list_pattern = (
109-
r"(?P<G_LSLICE_START>\-?\d*)\s*"
110-
r":\s*(?P<G_LSLICE_STOP>\-?\d*)\s*"
111-
r"(?::\s*(?P<G_LSLICE_STEP>\-?\d*))?"
112-
)
113-
102+
# TODO: separate re literal tokens
114103
# /pattern/ or /pattern/flags
115104
self.re_pattern = r"/(?P<G_RE>.+?)/(?P<G_RE_FLAGS>[aims]*)"
116105

117-
# func(
118-
self.function_pattern = r"(?P<G_FUNC>[a-z][a-z_0-9]+)\(\s*"
119-
120106
self.rules = self.compile_rules()
121107

122108
def compile_rules(self) -> Pattern[str]:
123109
"""Prepare regular expression rules."""
124110
env_tokens = [
125111
(TOKEN_ROOT, self.env.root_token),
126-
(TOKEN_FAKE_ROOT, self.env.fake_root_token),
112+
(TOKEN_PSEUDO_ROOT, self.env.pseudo_root_token),
127113
(TOKEN_SELF, self.env.self_token),
128114
(TOKEN_KEY, self.env.key_token),
129115
(TOKEN_UNION, self.env.union_token),
@@ -136,12 +122,10 @@ def compile_rules(self) -> Pattern[str]:
136122
(TOKEN_DOUBLE_QUOTE_STRING, self.double_quote_pattern),
137123
(TOKEN_SINGLE_QUOTE_STRING, self.single_quote_pattern),
138124
(TOKEN_RE_PATTERN, self.re_pattern),
139-
(TOKEN_LIST_SLICE, self.slice_list_pattern),
140-
(TOKEN_FUNCTION, self.function_pattern),
141-
(TOKEN_DOT_PROPERTY, self.dot_property_pattern),
142125
(TOKEN_FLOAT, r"-?\d+\.\d*(?:[eE][+-]?\d+)?"),
143126
(TOKEN_INT, r"-?\d+(?P<G_EXP>[eE][+\-]?\d+)?\b"),
144127
(TOKEN_DDOT, r"\.\."),
128+
(TOKEN_DOT, r"\."),
145129
(TOKEN_AND, self.logical_and_pattern),
146130
(TOKEN_OR, self.logical_or_pattern),
147131
*[
@@ -153,6 +137,7 @@ def compile_rules(self) -> Pattern[str]:
153137
],
154138
(TOKEN_WILD, r"\*"),
155139
(TOKEN_FILTER, r"\?"),
140+
(TOKEN_KEYS_FILTER, r"~\?"), # TODO: get from env
156141
(TOKEN_IN, r"in\b"),
157142
(TOKEN_TRUE, r"[Tt]rue\b"),
158143
(TOKEN_FALSE, r"[Ff]alse\b"),
@@ -162,9 +147,10 @@ def compile_rules(self) -> Pattern[str]:
162147
(TOKEN_CONTAINS, r"contains\b"),
163148
(TOKEN_UNDEFINED, r"undefined\b"),
164149
(TOKEN_MISSING, r"missing\b"),
165-
(TOKEN_LIST_START, r"\["),
150+
(TOKEN_LBRACKET, r"\["),
166151
(TOKEN_RBRACKET, r"]"),
167152
(TOKEN_COMMA, r","),
153+
(TOKEN_COLON, r":"),
168154
(TOKEN_EQ, r"=="),
169155
(TOKEN_NE, r"!="),
170156
(TOKEN_LG, r"<>"),
@@ -173,12 +159,12 @@ def compile_rules(self) -> Pattern[str]:
173159
(TOKEN_RE, r"=~"),
174160
(TOKEN_LT, r"<"),
175161
(TOKEN_GT, r">"),
176-
(TOKEN_NOT, self.logical_not_pattern),
177-
(TOKEN_BARE_PROPERTY, self.key_pattern),
162+
(TOKEN_NOT, self.logical_not_pattern), # Must go after "!="
163+
(TOKEN_NAME, self.key_pattern), # Must go after reserved words
178164
(TOKEN_LPAREN, r"\("),
179165
(TOKEN_RPAREN, r"\)"),
180-
(TOKEN_SKIP, r"[ \n\t\r\.]+"),
181-
(TOKEN_ILLEGAL, r"."),
166+
(TOKEN_WHITESPACE, r"[ \n\t\r]+"),
167+
(TOKEN_ERROR, r"."),
182168
]
183169

184170
return re.compile(
@@ -194,35 +180,7 @@ def tokenize(self, path: str) -> Iterator[Token]: # noqa PLR0912
194180
kind = match.lastgroup
195181
assert kind is not None
196182

197-
if kind == TOKEN_DOT_PROPERTY:
198-
yield _token(
199-
kind=TOKEN_PROPERTY,
200-
value=match.group("G_PROP"),
201-
index=match.start("G_PROP"),
202-
)
203-
elif kind == TOKEN_BARE_PROPERTY:
204-
yield _token(
205-
kind=TOKEN_BARE_PROPERTY,
206-
value=match.group(),
207-
index=match.start(),
208-
)
209-
elif kind == TOKEN_LIST_SLICE:
210-
yield _token(
211-
kind=TOKEN_SLICE_START,
212-
value=match.group("G_LSLICE_START"),
213-
index=match.start("G_LSLICE_START"),
214-
)
215-
yield _token(
216-
kind=TOKEN_SLICE_STOP,
217-
value=match.group("G_LSLICE_STOP"),
218-
index=match.start("G_LSLICE_STOP"),
219-
)
220-
yield _token(
221-
kind=TOKEN_SLICE_STEP,
222-
value=match.group("G_LSLICE_STEP") or "",
223-
index=match.start("G_LSLICE_STEP"),
224-
)
225-
elif kind == TOKEN_DOUBLE_QUOTE_STRING:
183+
if kind == TOKEN_DOUBLE_QUOTE_STRING:
226184
yield _token(
227185
kind=TOKEN_DOUBLE_QUOTE_STRING,
228186
value=match.group("G_DQUOTE"),
@@ -264,19 +222,11 @@ def tokenize(self, path: str) -> Iterator[Token]: # noqa PLR0912
264222
value=match.group(),
265223
index=match.start(),
266224
)
267-
elif kind == TOKEN_FUNCTION:
268-
yield _token(
269-
kind=TOKEN_FUNCTION,
270-
value=match.group("G_FUNC"),
271-
index=match.start("G_FUNC"),
272-
)
273-
elif kind == TOKEN_SKIP:
274-
continue
275-
elif kind == TOKEN_ILLEGAL:
225+
elif kind == TOKEN_ERROR:
276226
raise JSONPathSyntaxError(
277227
f"unexpected token {match.group()!r}",
278228
token=_token(
279-
TOKEN_ILLEGAL,
229+
TOKEN_ERROR,
280230
value=match.group(),
281231
index=match.start(),
282232
),

0 commit comments

Comments
 (0)