Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# Python JSONPath RFC 9535 Change Log

## Version 0.1.5 (unreleased)

**Fixes**

- Fixed "unbalanced parentheses" errors for queries that do have balanced brackets. See [#13](https://github.com/jg-rp/python-jsonpath-rfc9535/issues/13).

## Version 0.1.4

**Fixes**
Expand Down
2 changes: 1 addition & 1 deletion jsonpath_rfc9535/__about__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.1.4"
__version__ = "0.1.5"
70 changes: 54 additions & 16 deletions jsonpath_rfc9535/lex.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,19 +27,30 @@
class Lexer:
"""JSONPath expression lexical scanner."""

__slots__ = ("filter_depth", "paren_stack", "tokens", "start", "pos", "query")
__slots__ = (
"filter_depth",
"func_call_stack",
"bracket_stack",
"tokens",
"start",
"pos",
"query",
)

def __init__(self, query: str) -> None:
self.filter_depth = 0
"""Filter nesting level."""

self.paren_stack: List[int] = []
self.func_call_stack: List[int] = []
"""A running count of parentheses for each, possibly nested, function call.

If the stack is empty, we are not in a function call. Remember that
function arguments can be arbitrarily nested in parentheses.
"""

self.bracket_stack: list[tuple[str, int]] = []
"""A stack of opening (parentheses/bracket, index) pairs."""

self.tokens: List[Token] = []
"""Tokens resulting from scanning a JSONPath expression."""

Expand Down Expand Up @@ -133,7 +144,7 @@ def ignore_whitespace(self) -> bool:

def error(self, msg: str) -> None:
"""Emit an error token."""
# better error messages.
# TODO: better error messages.
self.tokens.append(
Token(
TokenType.ERROR,
Expand Down Expand Up @@ -179,6 +190,7 @@ def lex_segment(l: Lexer) -> Optional[StateFn]: # noqa: D103, PLR0911

if c == "[":
l.emit(TokenType.LBRACKET)
l.bracket_stack.append((c, l.pos - 1))
return lex_inside_bracketed_segment

if l.filter_depth:
Expand All @@ -202,6 +214,7 @@ def lex_descendant_segment(l: Lexer) -> Optional[StateFn]: # noqa: D103

if c == "[":
l.emit(TokenType.LBRACKET)
l.bracket_stack.append((c, l.pos - 1))
return lex_inside_bracketed_segment

l.backup()
Expand Down Expand Up @@ -244,11 +257,17 @@ def lex_inside_bracketed_segment(l: Lexer) -> Optional[StateFn]: # noqa: PLR091
c = l.next()

if c == "]":
if not l.bracket_stack or l.bracket_stack[-1][0] != "[":
l.backup()
l.error("unbalanced brackets")
return None

l.bracket_stack.pop()
l.emit(TokenType.RBRACKET)
return lex_segment

if c == "":
l.error("unclosed bracketed selection")
l.error("unbalanced brackets")
return None

if c == "*":
Expand Down Expand Up @@ -299,18 +318,14 @@ def lex_inside_filter(l: Lexer) -> Optional[StateFn]: # noqa: D103, PLR0915, PL

if c == "]":
l.filter_depth -= 1
if len(l.paren_stack) == 1:
l.error("unbalanced parentheses")
return None

l.backup()
return lex_inside_bracketed_segment

if c == ",":
l.emit(TokenType.COMMA)
# If we have unbalanced parens, we are inside a function call and a
# comma separates arguments. Otherwise a comma separates selectors.
if l.paren_stack:
if l.func_call_stack:
continue
l.filter_depth -= 1
return lex_inside_bracketed_segment
Expand All @@ -323,19 +338,26 @@ def lex_inside_filter(l: Lexer) -> Optional[StateFn]: # noqa: D103, PLR0915, PL

if c == "(":
l.emit(TokenType.LPAREN)
l.bracket_stack.append((c, l.pos - 1))
# Are we in a function call? If so, a function argument contains parens.
if l.paren_stack:
l.paren_stack[-1] += 1
if l.func_call_stack:
l.func_call_stack[-1] += 1
continue

if c == ")":
if not l.bracket_stack or l.bracket_stack[-1][0] != "(":
l.backup()
l.error("unbalanced parentheses")
return None

l.bracket_stack.pop()
l.emit(TokenType.RPAREN)
# Are we closing a function call or a parenthesized expression?
if l.paren_stack:
if l.paren_stack[-1] == 1:
l.paren_stack.pop()
if l.func_call_stack:
if l.func_call_stack[-1] == 1:
l.func_call_stack.pop()
else:
l.paren_stack[-1] -= 1
l.func_call_stack[-1] -= 1
continue

if c == "$":
Expand Down Expand Up @@ -402,8 +424,9 @@ def lex_inside_filter(l: Lexer) -> Optional[StateFn]: # noqa: D103, PLR0915, PL
l.emit(TokenType.INT)
elif l.accept_match(RE_FUNCTION_NAME) and l.peek() == "(":
# Keep track of parentheses for this function call.
l.paren_stack.append(1)
l.func_call_stack.append(1)
l.emit(TokenType.FUNCTION)
l.bracket_stack.append(("(", l.pos))
l.next()
l.ignore() # ignore LPAREN
else:
Expand Down Expand Up @@ -486,6 +509,21 @@ def tokenize(query: str) -> List[Token]:
lexer, tokens = lex(query)
lexer.run()

# Check for remaining opening brackets that have not been closes.
if lexer.bracket_stack:
ch, index = lexer.bracket_stack[0]
msg = f"unbalanced {'brackets' if ch == '[' else 'parentheses'}"
raise JSONPathSyntaxError(
msg,
token=Token(
TokenType.ERROR,
lexer.query[index],
index,
lexer.query,
msg,
),
)

if tokens and tokens[-1].type_ == TokenType.ERROR:
raise JSONPathSyntaxError(tokens[-1].message, token=tokens[-1])

Expand Down
9 changes: 7 additions & 2 deletions tests/test_errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,14 @@ def env() -> JSONPathEnvironment:

def test_unclosed_selection_list(env: JSONPathEnvironment) -> None:
with pytest.raises(
JSONPathSyntaxError, match=r"unclosed bracketed selection, line 1, column 5"
JSONPathSyntaxError, match=r"unbalanced brackets, line 1, column 1"
):
env.compile("$[1,2")


def test_unclosed_selection_list_inside_filter(env: JSONPathEnvironment) -> None:
with pytest.raises(
JSONPathSyntaxError, match=r"unclosed bracketed selection, line 1, column 10"
JSONPathSyntaxError, match=r"unbalanced brackets, line 1, column 1"
):
env.compile("$[?@.a < 1")

Expand Down Expand Up @@ -85,6 +85,11 @@ class MockEnv(JSONPathEnvironment):
env.find(query, data)


def test_nested_functions_unbalanced_parens(env: JSONPathEnvironment) -> None:
with pytest.raises(JSONPathSyntaxError, match="unbalanced brackets"):
env.compile("$.values[?match(@.a, value($..['regex'])]")


class FilterLiteralTestCase(NamedTuple):
description: str
query: str
Expand Down
6 changes: 6 additions & 0 deletions tests/test_issues.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
import jsonpath_rfc9535 as jsonpath


def test_issue_13() -> None:
# This was failing with "unbalanced parentheses".
_q = jsonpath.compile("$[? count(@.likes[? @.location]) > 3]")
Loading