Skip to content

Commit a8da304

Browse files
authored
Fix recursive descent nondeterminism (#2)
* Fix recursive descent nondeterminism * Add test cases from glyn/jsonpath-nondeterminism * Tidy
1 parent 100dc1c commit a8da304

File tree

4 files changed

+570
-102
lines changed

4 files changed

+570
-102
lines changed

jsonpath_rfc9535/segments.py

Lines changed: 50 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -69,15 +69,8 @@ class JSONPathRecursiveDescentSegment(JSONPathSegment):
6969

7070
def resolve(self, nodes: Iterable[JSONPathNode]) -> Iterable[JSONPathNode]:
7171
"""Select descendants of each node in _nodes_."""
72-
# The nondeterministic visitor never generates a pre order traversal, so we
73-
# still use the deterministic visitor 20% of the time, to cover all
74-
# permutations.
75-
#
76-
# XXX: This feels like a bit of a hack.
7772
visitor = (
78-
self._nondeterministic_visit
79-
if self.env.nondeterministic and random.random() < 0.8 # noqa: S311, PLR2004
80-
else self._visit
73+
self._nondeterministic_visit if self.env.nondeterministic else self._visit
8174
)
8275

8376
for node in nodes:
@@ -114,51 +107,51 @@ def _visit(self, node: JSONPathNode, depth: int = 1) -> Iterable[JSONPathNode]:
114107
def _nondeterministic_visit(
115108
self,
116109
root: JSONPathNode,
117-
_: int = 1,
110+
depth: int = 1,
118111
) -> Iterable[JSONPathNode]:
119-
def _children(node: JSONPathNode) -> Iterable[JSONPathNode]:
120-
if isinstance(node.value, dict):
121-
items = list(node.value.items())
122-
random.shuffle(items)
123-
for name, val in items:
124-
if isinstance(val, (dict, list)):
125-
yield JSONPathNode(
126-
value=val,
127-
location=node.location + (name,),
128-
root=node.root,
129-
)
130-
elif isinstance(node.value, list):
131-
for i, element in enumerate(node.value):
132-
if isinstance(element, (dict, list)):
133-
yield JSONPathNode(
134-
value=element,
135-
location=node.location + (i,),
136-
root=node.root,
137-
)
138-
112+
"""Nondeterministic node traversal."""
139113
# (node, depth) tuples
140114
queue: Deque[Tuple[JSONPathNode, int]] = deque()
141115

142-
yield root # visit the root node
143-
queue.extend([(child, 1) for child in _children(root)]) # queue root's children
116+
# Visit the root node
117+
yield root
118+
119+
# Queue root's children
120+
queue.extend([(child, depth) for child in _nondeterministic_children(root)])
144121

145122
while queue:
146-
_node, depth = queue.popleft()
123+
node, depth = queue.popleft()
124+
yield node
147125

148126
if depth >= self.env.max_recursion_depth:
149127
raise JSONPathRecursionError(
150128
"recursion limit exceeded", token=self.token
151129
)
152130

153-
yield _node
154-
155-
# Visit child nodes now or queue them for later?
131+
# Randomly choose to visit child nodes now or queue them for later?
156132
visit_children = random.choice([True, False]) # noqa: S311
157133

158-
for child in _children(_node):
134+
for child in _nondeterministic_children(node):
159135
if visit_children:
160136
yield child
161-
queue.extend([(child, depth + 2) for child in _children(child)])
137+
138+
# Queue grandchildren by randomly interleaving them into the
139+
# queue while maintaining queue and grandchild order.
140+
grandchildren = [
141+
(child, depth + 2)
142+
for child in _nondeterministic_children(child)
143+
]
144+
145+
queue = deque(
146+
[
147+
next(n)
148+
for n in random.sample(
149+
[iter(queue)] * len(queue)
150+
+ [iter(grandchildren)] * len(grandchildren),
151+
len(queue) + len(grandchildren),
152+
)
153+
]
154+
)
162155
else:
163156
queue.append((child, depth + 1))
164157

@@ -174,3 +167,23 @@ def __eq__(self, __value: object) -> bool:
174167

175168
def __hash__(self) -> int:
176169
return hash(("..", self.selectors, self.token))
170+
171+
172+
def _nondeterministic_children(node: JSONPathNode) -> Iterable[JSONPathNode]:
173+
"""Yield children of _node_ with nondeterministic object/dict iteration."""
174+
if isinstance(node.value, dict):
175+
items = list(node.value.items())
176+
random.shuffle(items)
177+
for name, val in items:
178+
yield JSONPathNode(
179+
value=val,
180+
location=node.location + (name,),
181+
root=node.root,
182+
)
183+
elif isinstance(node.value, list):
184+
for i, element in enumerate(node.value):
185+
yield JSONPathNode(
186+
value=element,
187+
location=node.location + (i,),
188+
root=node.root,
189+
)

jsonpath_rfc9535/utils/nondeterministic_descent.py

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -121,30 +121,38 @@ def breadth_first_visit(node: AuxNode) -> Iterable[AuxNode]:
121121

122122

123123
def nondeterministic_visit(root: AuxNode) -> Iterable[AuxNode]:
124-
"""Generate nodes rooted at _node_ from a nondeterministic traversal.
125-
126-
This tree visitor will never produce nodes in depth-first pre-order, so
127-
use `pre_order_visit` in addition to `nondeterministic_visit` to get all
128-
permutations. Or use `all_perms()`.
129-
"""
130-
queue: Deque[AuxNode] = deque(root.children)
124+
"""Generate nodes rooted at _node_ from a nondeterministic traversal."""
131125
yield root
126+
queue: Deque[AuxNode] = deque(root.children)
132127

133128
while queue:
134129
_node = queue.popleft()
135130
yield _node
136-
# Visit child nodes now or queue them for later?
131+
# Randomly choose to visit child nodes now or queue them for later?
137132
visit_children = random.choice([True, False])
138133
for child in _node.children:
139134
if visit_children:
140135
yield child
141-
queue.extend(child.children)
136+
137+
# Queue grandchildren by randomly interleaving them into the
138+
# queue while maintaining queue and grandchild order.
139+
grandchildren = child.children
140+
141+
queue = deque(
142+
[
143+
next(n)
144+
for n in random.sample(
145+
[iter(queue)] * len(queue)
146+
+ [iter(grandchildren)] * len(grandchildren),
147+
len(queue) + len(grandchildren),
148+
)
149+
]
150+
)
142151
else:
143152
queue.append(child)
144153

145154

146155
def all_perms(root: AuxNode) -> List[Tuple[AuxNode, ...]]:
147156
"""Return a list of valid permutations for the auxiliary tree _root_."""
148157
perms = {tuple(nondeterministic_visit(root)) for _ in range(1000)}
149-
perms.add(tuple(pre_order_visit(root)))
150158
return sorted(perms, key=lambda t: str(t))

tests/test_cts_nondeterminism.py

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
"""Test against the JSONPath Compliance Test Suite with nondeterminism enabled.
2+
3+
The CTS is a submodule located in /tests/cts. After a git clone, run
4+
`git submodule update --init` from the root of the repository.
5+
"""
6+
7+
import json
8+
import operator
9+
from dataclasses import dataclass
10+
from typing import Any
11+
from typing import List
12+
from typing import Optional
13+
from typing import Tuple
14+
15+
import pytest
16+
17+
from jsonpath_rfc9535 import JSONPathEnvironment
18+
from jsonpath_rfc9535 import JSONValue
19+
20+
21+
@dataclass
22+
class Case:
23+
name: str
24+
selector: str
25+
document: JSONValue = None
26+
result: Any = None
27+
results: Optional[List[Any]] = None
28+
invalid_selector: Optional[bool] = None
29+
30+
31+
def cases() -> List[Case]:
32+
with open("tests/cts/cts.json", encoding="utf8") as fd:
33+
data = json.load(fd)
34+
return [Case(**case) for case in data["tests"]]
35+
36+
37+
def valid_cases() -> List[Case]:
38+
return [case for case in cases() if not case.invalid_selector]
39+
40+
41+
def nondeterministic_cases() -> List[Case]:
42+
return [case for case in valid_cases() if isinstance(case.results, list)]
43+
44+
45+
class MockEnv(JSONPathEnvironment):
46+
nondeterministic = True
47+
48+
49+
@pytest.mark.parametrize("case", valid_cases(), ids=operator.attrgetter("name"))
50+
def test_nondeterminism_valid_cases(case: Case) -> None:
51+
assert case.document is not None
52+
env = MockEnv()
53+
rv = env.find(case.selector, case.document).values()
54+
55+
if case.results is not None:
56+
assert rv in case.results
57+
else:
58+
assert rv == case.result
59+
60+
61+
@pytest.mark.parametrize(
62+
"case", nondeterministic_cases(), ids=operator.attrgetter("name")
63+
)
64+
def test_nondeterminism(case: Case) -> None:
65+
"""Test that we agree with CTS when it comes to nondeterministic results."""
66+
assert case.document is not None
67+
assert case.results is not None
68+
69+
def _result_repr(rv: List[object]) -> Tuple[str, ...]:
70+
"""Return a hashable representation of a result list."""
71+
return tuple([str(value) for value in rv])
72+
73+
env = MockEnv()
74+
75+
# Repeat enough times to has high probability that we've covered all
76+
# valid permutations.
77+
results = {
78+
_result_repr(env.find(case.selector, case.document).values())
79+
for _ in range(1000)
80+
}
81+
82+
assert len(results) == len(case.results)
83+
assert results == {_result_repr(result) for result in case.results}

0 commit comments

Comments
 (0)