Skip to content

Commit b4cb9c2

Browse files
committed
Add "key" and "keys filter" JSONPath selectors
1 parent e338a0c commit b4cb9c2

File tree

6 files changed

+206
-12
lines changed

6 files changed

+206
-12
lines changed

jsonpath/env.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,8 @@ class attributes `root_token`, `self_token` and `filter_context_token`.
102102
filtering a mapping or sequence. Defaults to `"#"`.
103103
keys_selector_token (str): The pattern used as the "keys" selector. Defaults to
104104
`"~"`.
105+
keys_filter_token (str): The pattern used as the "keys filter" selector.
106+
Defaults to `"~?"`.
105107
lexer_class: The lexer to use when tokenizing path strings.
106108
max_int_index (int): The maximum integer allowed when selecting array items by
107109
index. Defaults to `(2**53) - 1`.
@@ -122,6 +124,7 @@ class attributes `root_token`, `self_token` and `filter_context_token`.
122124
intersection_token = "&"
123125
key_token = "#"
124126
keys_selector_token = "~"
127+
keys_filter_token = "~?"
125128
root_token = "$"
126129
self_token = "@"
127130
union_token = "|"

jsonpath/lex.py

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from .token import TOKEN_CONTAINS
1616
from .token import TOKEN_DDOT
1717
from .token import TOKEN_DOT
18+
from .token import TOKEN_DOT_KEY_PROPERTY
1819
from .token import TOKEN_DOT_PROPERTY
1920
from .token import TOKEN_DOUBLE_QUOTE_STRING
2021
from .token import TOKEN_EQ
@@ -30,6 +31,7 @@
3031
from .token import TOKEN_INT
3132
from .token import TOKEN_INTERSECTION
3233
from .token import TOKEN_KEY
34+
from .token import TOKEN_KEY_NAME
3335
from .token import TOKEN_KEYS
3436
from .token import TOKEN_KEYS_FILTER
3537
from .token import TOKEN_LBRACKET
@@ -103,6 +105,13 @@ def __init__(self, *, env: JSONPathEnvironment) -> None:
103105
# .thing
104106
self.dot_property_pattern = rf"(?P<G_DOT>\.)(?P<G_PROP>{self.key_pattern})"
105107

108+
# .~thing
109+
self.dot_key_pattern = (
110+
r"(?P<G_DOT_KEY>\.)"
111+
rf"(?P<G_KEY>{re.escape(env.keys_selector_token)})"
112+
rf"(?P<G_PROP_KEY>{self.key_pattern})"
113+
)
114+
106115
# /pattern/ or /pattern/flags
107116
self.re_pattern = r"/(?P<G_RE>.+?)/(?P<G_RE_FLAGS>[aims]*)"
108117

@@ -122,12 +131,14 @@ def compile_rules(self) -> Pattern[str]:
122131
(TOKEN_INTERSECTION, self.env.intersection_token),
123132
(TOKEN_FILTER_CONTEXT, self.env.filter_context_token),
124133
(TOKEN_KEYS, self.env.keys_selector_token),
134+
(TOKEN_KEYS_FILTER, self.env.keys_filter_token),
125135
]
126136

127137
rules = [
128138
(TOKEN_DOUBLE_QUOTE_STRING, self.double_quote_pattern),
129139
(TOKEN_SINGLE_QUOTE_STRING, self.single_quote_pattern),
130140
(TOKEN_RE_PATTERN, self.re_pattern),
141+
(TOKEN_DOT_KEY_PROPERTY, self.dot_key_pattern),
131142
(TOKEN_DOT_PROPERTY, self.dot_property_pattern),
132143
(TOKEN_FLOAT, r"-?\d+\.\d*(?:[eE][+-]?\d+)?"),
133144
(TOKEN_INT, r"-?\d+(?P<G_EXP>[eE][+\-]?\d+)?\b"),
@@ -144,7 +155,6 @@ def compile_rules(self) -> Pattern[str]:
144155
],
145156
(TOKEN_WILD, r"\*"),
146157
(TOKEN_FILTER, r"\?"),
147-
(TOKEN_KEYS_FILTER, r"~\?"), # TODO: get from env
148158
(TOKEN_IN, r"in\b"),
149159
(TOKEN_TRUE, r"[Tt]rue\b"),
150160
(TOKEN_FALSE, r"[Ff]alse\b"),
@@ -199,6 +209,17 @@ def tokenize(self, path: str) -> Iterator[Token]: # noqa PLR0912
199209
value=match.group("G_PROP"),
200210
index=match.start("G_PROP"),
201211
)
212+
elif kind == TOKEN_DOT_KEY_PROPERTY:
213+
yield _token(
214+
kind=TOKEN_DOT,
215+
value=match.group("G_DOT_KEY"),
216+
index=match.start("G_DOT_KEY"),
217+
)
218+
yield _token(
219+
kind=TOKEN_KEY_NAME,
220+
value=match.group("G_PROP_KEY"),
221+
index=match.start("G_PROP_KEY"),
222+
)
202223
elif kind == TOKEN_DOUBLE_QUOTE_STRING:
203224
yield _token(
204225
kind=TOKEN_DOUBLE_QUOTE_STRING,

jsonpath/parse.py

Lines changed: 32 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from typing import Iterator
1212
from typing import List
1313
from typing import Optional
14+
from typing import Union
1415

1516
from jsonpath.function_extensions.filter_function import ExpressionType
1617
from jsonpath.function_extensions.filter_function import FilterFunction
@@ -45,6 +46,8 @@
4546
from .selectors import Filter
4647
from .selectors import IndexSelector
4748
from .selectors import JSONPathSelector
49+
from .selectors import KeySelector
50+
from .selectors import KeysFilter
4851
from .selectors import KeysSelector
4952
from .selectors import PropertySelector
5053
from .selectors import SliceSelector
@@ -69,7 +72,9 @@
6972
from .token import TOKEN_INT
7073
from .token import TOKEN_INTERSECTION
7174
from .token import TOKEN_KEY
75+
from .token import TOKEN_KEY_NAME
7276
from .token import TOKEN_KEYS
77+
from .token import TOKEN_KEYS_FILTER
7378
from .token import TOKEN_LBRACKET
7479
from .token import TOKEN_LE
7580
from .token import TOKEN_LG
@@ -314,15 +319,15 @@ def parse_path(self, stream: TokenStream) -> Iterable[JSONPathSegment]:
314319
if _token.kind == TOKEN_DOT:
315320
stream.eat(TOKEN_DOT)
316321
# Assert that dot is followed by shorthand selector without whitespace.
317-
stream.expect(TOKEN_NAME, TOKEN_WILD, TOKEN_KEYS)
322+
stream.expect(TOKEN_NAME, TOKEN_WILD, TOKEN_KEYS, TOKEN_KEY_NAME)
318323
token = stream.current()
319-
selectors = self.parse_selectors(stream)
324+
selectors = self.parse_selector(stream)
320325
yield JSONPathChildSegment(
321326
env=self.env, token=token, selectors=selectors
322327
)
323328
elif _token.kind == TOKEN_DDOT:
324329
token = stream.eat(TOKEN_DDOT)
325-
selectors = self.parse_selectors(stream)
330+
selectors = self.parse_selector(stream)
326331
if not selectors:
327332
raise JSONPathSyntaxError(
328333
"missing selector for recursive descent segment",
@@ -332,22 +337,22 @@ def parse_path(self, stream: TokenStream) -> Iterable[JSONPathSegment]:
332337
env=self.env, token=token, selectors=selectors
333338
)
334339
elif _token.kind == TOKEN_LBRACKET:
335-
selectors = self.parse_selectors(stream)
340+
selectors = self.parse_selector(stream)
336341
yield JSONPathChildSegment(
337342
env=self.env, token=_token, selectors=selectors
338343
)
339-
elif _token.kind in {TOKEN_NAME, TOKEN_WILD, TOKEN_KEYS}:
344+
elif _token.kind in {TOKEN_NAME, TOKEN_WILD, TOKEN_KEYS, TOKEN_KEY_NAME}:
340345
# A non-standard "bare" path. One without a leading identifier (`$`,
341346
# `@`, `^` or `_`).
342347
token = stream.current()
343-
selectors = self.parse_selectors(stream)
348+
selectors = self.parse_selector(stream)
344349
yield JSONPathChildSegment(
345350
env=self.env, token=token, selectors=selectors
346351
)
347352
else:
348353
break
349354

350-
def parse_selectors(self, stream: TokenStream) -> tuple[JSONPathSelector, ...]:
355+
def parse_selector(self, stream: TokenStream) -> tuple[JSONPathSelector, ...]:
351356
token = stream.next()
352357

353358
if token.kind == TOKEN_NAME:
@@ -359,6 +364,15 @@ def parse_selectors(self, stream: TokenStream) -> tuple[JSONPathSelector, ...]:
359364
),
360365
)
361366

367+
if token.kind == TOKEN_KEY_NAME:
368+
return (
369+
KeySelector(
370+
env=self.env,
371+
token=token,
372+
key=token.value,
373+
),
374+
)
375+
362376
if token.kind == TOKEN_WILD:
363377
return (
364378
WildSelector(
@@ -432,6 +446,8 @@ def parse_bracketed_selection(self, stream: TokenStream) -> List[JSONPathSelecto
432446
stream.next()
433447
elif token.kind == TOKEN_FILTER:
434448
selectors.append(self.parse_filter_selector(stream))
449+
elif token.kind == TOKEN_KEYS_FILTER:
450+
selectors.append(self.parse_filter_selector(stream, keys=True))
435451
elif token.kind == TOKEN_EOF:
436452
raise JSONPathSyntaxError("unexpected end of query", token=token)
437453
else:
@@ -514,8 +530,10 @@ def _maybe_index(token: Token) -> bool:
514530
step=step,
515531
)
516532

517-
def parse_filter_selector(self, stream: TokenStream) -> Filter:
518-
token = stream.eat(TOKEN_FILTER)
533+
def parse_filter_selector(
534+
self, stream: TokenStream, *, keys: bool = False
535+
) -> Union[Filter, KeysFilter]:
536+
token = stream.next()
519537
expr = self.parse_filter_expression(stream)
520538

521539
if self.env.well_typed and isinstance(expr, FunctionExtension):
@@ -536,6 +554,11 @@ def parse_filter_selector(self, stream: TokenStream) -> Filter:
536554
token=token,
537555
)
538556

557+
if keys:
558+
return KeysFilter(
559+
env=self.env, token=token, expression=BooleanExpression(expr)
560+
)
561+
539562
return Filter(env=self.env, token=token, expression=BooleanExpression(expr))
540563

541564
def parse_boolean(self, stream: TokenStream) -> FilterExpression:

jsonpath/selectors.py

Lines changed: 141 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -176,10 +176,57 @@ async def resolve_async(self, node: JSONPathMatch) -> AsyncIterable[JSONPathMatc
176176
yield match
177177

178178

179+
class KeySelector(JSONPathSelector):
180+
"""Select a single mapping/object name/key.
181+
182+
NOTE: This is a non-standard selector.
183+
184+
See https://jg-rp.github.io/json-p3/guides/jsonpath-extra#key-selector.
185+
"""
186+
187+
__slots__ = ("key",)
188+
189+
def __init__(self, *, env: JSONPathEnvironment, token: Token, key: str) -> None:
190+
super().__init__(env=env, token=token)
191+
self.key = key
192+
193+
def __str__(self) -> str:
194+
return f"{self.env.keys_selector_token}{canonical_string(self.key)}"
195+
196+
def __eq__(self, __value: object) -> bool:
197+
return (
198+
isinstance(__value, KeySelector)
199+
and self.token == __value.token
200+
and self.key == __value.key
201+
)
202+
203+
def __hash__(self) -> int:
204+
return hash((self.token, self.key))
205+
206+
def resolve(self, node: JSONPathMatch) -> Iterable[JSONPathMatch]:
207+
if isinstance(node.obj, Mapping) and self.key in node.obj:
208+
match = node.__class__(
209+
filter_context=node.filter_context(),
210+
obj=self.key,
211+
parent=node,
212+
parts=node.parts + (f"{self.env.keys_selector_token}{self.key}",),
213+
path=f"{node.path}[{self}]",
214+
root=node.root,
215+
)
216+
node.add_child(match)
217+
yield match
218+
219+
async def resolve_async(self, node: JSONPathMatch) -> AsyncIterable[JSONPathMatch]:
220+
for _node in self.resolve(node):
221+
yield _node
222+
223+
179224
class KeysSelector(JSONPathSelector):
180225
"""Select mapping/object keys/properties.
181226
182227
NOTE: This is a non-standard selector.
228+
229+
See https://jg-rp.github.io/json-p3/guides/jsonpath-extra#keys-selector
183230
"""
184231

185232
__slots__ = ()
@@ -198,13 +245,13 @@ def __hash__(self) -> int:
198245

199246
def _keys(self, node: JSONPathMatch) -> Iterable[JSONPathMatch]:
200247
if isinstance(node.obj, Mapping):
201-
for i, key in enumerate(node.obj.keys()):
248+
for key in node.obj:
202249
match = node.__class__(
203250
filter_context=node.filter_context(),
204251
obj=key,
205252
parent=node,
206253
parts=node.parts + (f"{self.env.keys_selector_token}{key}",),
207-
path=f"{node.path}[{self.env.keys_selector_token}][{i}]",
254+
path=f"{node.path}[{self.env.keys_selector_token}{canonical_string(key)}]",
208255
root=node.root,
209256
)
210257
node.add_child(match)
@@ -449,6 +496,98 @@ async def resolve_async(self, node: JSONPathMatch) -> AsyncIterable[JSONPathMatc
449496
yield match
450497

451498

499+
class KeysFilter(JSONPathSelector):
500+
"""Selects names from an object’s name/value members.
501+
502+
NOTE: This is a non-standard selector.
503+
504+
See https://jg-rp.github.io/json-p3/guides/jsonpath-extra#keys-filter-selector
505+
"""
506+
507+
__slots__ = ("expression",)
508+
509+
def __init__(
510+
self,
511+
*,
512+
env: JSONPathEnvironment,
513+
token: Token,
514+
expression: BooleanExpression,
515+
) -> None:
516+
super().__init__(env=env, token=token)
517+
self.expression = expression
518+
519+
def __str__(self) -> str:
520+
return f"~?{self.expression}"
521+
522+
def __eq__(self, __value: object) -> bool:
523+
return (
524+
isinstance(__value, Filter)
525+
and self.expression == __value.expression
526+
and self.token == __value.token
527+
)
528+
529+
def __hash__(self) -> int:
530+
return hash(("~", str(self.expression), self.token))
531+
532+
def resolve(self, node: JSONPathMatch) -> Iterable[JSONPathMatch]:
533+
if isinstance(node.value, Mapping):
534+
for key, val in node.value.items():
535+
context = FilterContext(
536+
env=self.env,
537+
current=val,
538+
root=node.root,
539+
extra_context=node.filter_context(),
540+
current_key=key,
541+
)
542+
543+
try:
544+
if self.expression.evaluate(context):
545+
match = node.__class__(
546+
filter_context=node.filter_context(),
547+
obj=key,
548+
parent=node,
549+
parts=node.parts
550+
+ (f"{self.env.keys_selector_token}{key}",),
551+
path=f"{node.path}[{self.env.keys_selector_token}{canonical_string(key)}]",
552+
root=node.root,
553+
)
554+
node.add_child(match)
555+
yield match
556+
except JSONPathTypeError as err:
557+
if not err.token:
558+
err.token = self.token
559+
raise
560+
561+
async def resolve_async(self, node: JSONPathMatch) -> AsyncIterable[JSONPathMatch]:
562+
if isinstance(node.value, Mapping):
563+
for key, val in node.value.items():
564+
context = FilterContext(
565+
env=self.env,
566+
current=val,
567+
root=node.root,
568+
extra_context=node.filter_context(),
569+
current_key=key,
570+
)
571+
572+
try:
573+
if await self.expression.evaluate_async(context):
574+
match = node.__class__(
575+
filter_context=node.filter_context(),
576+
obj=key,
577+
parent=node,
578+
parts=node.parts
579+
+ (f"{self.env.keys_selector_token}{key}",),
580+
path=f"{node.path}[{self.env.keys_selector_token}{canonical_string(key)}]",
581+
root=node.root,
582+
)
583+
node.add_child(match)
584+
yield match
585+
except JSONPathTypeError as err:
586+
if not err.token:
587+
err.token = self.token
588+
raise
589+
590+
452591
class FilterContext:
453592
"""Contextual information and data for evaluating a filter expression."""
454593

jsonpath/token.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@
2424
TOKEN_WILD = sys.intern("TOKEN_WILD")
2525
TOKEN_NAME = sys.intern("TOKEN_NAME")
2626
TOKEN_DOT_PROPERTY = sys.intern("TOKEN_DOT_PROPERTY")
27+
TOKEN_DOT_KEY_PROPERTY = sys.intern("TOKEN_DOT_KEY_PROPERTY")
28+
TOKEN_KEY_NAME = sys.intern("TOKEN_KEY_NAME")
2729

2830
# Filter expression tokens
2931
TOKEN_AND = sys.intern("TOKEN_AND")

0 commit comments

Comments
 (0)