|
2 | 2 |
|
3 | 3 | from __future__ import annotations |
4 | 4 |
|
5 | | -import json |
6 | 5 | from typing import TYPE_CHECKING |
7 | 6 | from typing import Callable |
8 | 7 | from typing import Dict |
@@ -312,6 +311,7 @@ def parse_bracketed_selection(self, stream: TokenStream) -> List[JSONPathSelecto |
312 | 311 | if stream.peek.type_ != TokenType.RBRACKET: |
313 | 312 | stream.expect_peek(TokenType.COMMA) |
314 | 313 | stream.next_token() |
| 314 | + stream.expect_peek_not(TokenType.RBRACKET, "unexpected trailing comma") |
315 | 315 |
|
316 | 316 | stream.next_token() |
317 | 317 |
|
@@ -362,11 +362,29 @@ def parse_string_literal(self, stream: TokenStream) -> Expression: |
362 | 362 | ) |
363 | 363 |
|
364 | 364 | def parse_integer_literal(self, stream: TokenStream) -> Expression: |
| 365 | + value = stream.current.value |
| 366 | + if value.startswith("0") and len(value) > 1: |
| 367 | + raise JSONPathSyntaxError("invalid integer literal", token=stream.current) |
| 368 | + |
365 | 369 | # Convert to float first to handle scientific notation. |
366 | | - return IntegerLiteral(stream.current, value=int(float(stream.current.value))) |
| 370 | + try: |
| 371 | + return IntegerLiteral(stream.current, value=int(float(value))) |
| 372 | + except ValueError as err: |
| 373 | + raise JSONPathSyntaxError( |
| 374 | + "invalid integer literal", token=stream.current |
| 375 | + ) from err |
367 | 376 |
|
368 | 377 | def parse_float_literal(self, stream: TokenStream) -> Expression: |
369 | | - return FloatLiteral(stream.current, value=float(stream.current.value)) |
| 378 | + value = stream.current.value |
| 379 | + if value.startswith("0") and len(value.split(".")[0]) > 1: |
| 380 | + raise JSONPathSyntaxError("invalid float literal", token=stream.current) |
| 381 | + |
| 382 | + try: |
| 383 | + return FloatLiteral(stream.current, value=float(stream.current.value)) |
| 384 | + except ValueError as err: |
| 385 | + raise JSONPathSyntaxError( |
| 386 | + "invalid float literal", token=stream.current |
| 387 | + ) from err |
370 | 388 |
|
371 | 389 | def parse_prefix_expression(self, stream: TokenStream) -> Expression: |
372 | 390 | tok = stream.next_token() |
@@ -514,12 +532,127 @@ def _decode_string_literal(self, token: Token) -> str: |
514 | 532 | value = token.value.replace('"', '\\"').replace("\\'", "'") |
515 | 533 | else: |
516 | 534 | value = token.value |
517 | | - try: |
518 | | - rv = json.loads(f'"{value}"') |
519 | | - assert isinstance(rv, str) |
520 | | - return rv |
521 | | - except json.JSONDecodeError as err: |
522 | | - raise JSONPathSyntaxError(str(err).split(":")[1], token=token) from None |
| 535 | + |
| 536 | + return self._unescape_string(value, token) |
| 537 | + |
| 538 | + def _unescape_string(self, value: str, token: Token) -> str: |
| 539 | + unescaped: List[str] = [] |
| 540 | + index = 0 |
| 541 | + |
| 542 | + while index < len(value): |
| 543 | + ch = value[index] |
| 544 | + if ch == "\\": |
| 545 | + index += 1 |
| 546 | + _ch, index = self._decode_escape_sequence(value, index, token) |
| 547 | + unescaped.append(_ch) |
| 548 | + else: |
| 549 | + self._string_from_codepoint(ord(ch), token) |
| 550 | + unescaped.append(ch) |
| 551 | + index += 1 |
| 552 | + return "".join(unescaped) |
| 553 | + |
| 554 | + def _decode_escape_sequence( # noqa: PLR0911 |
| 555 | + self, value: str, index: int, token: Token |
| 556 | + ) -> Tuple[str, int]: |
| 557 | + ch = value[index] |
| 558 | + if ch == '"': |
| 559 | + return '"', index |
| 560 | + if ch == "\\": |
| 561 | + return "\\", index |
| 562 | + if ch == "/": |
| 563 | + return "/", index |
| 564 | + if ch == "b": |
| 565 | + return "\x08", index |
| 566 | + if ch == "f": |
| 567 | + return "\x0c", index |
| 568 | + if ch == "n": |
| 569 | + return "\n", index |
| 570 | + if ch == "r": |
| 571 | + return "\r", index |
| 572 | + if ch == "t": |
| 573 | + return "\t", index |
| 574 | + if ch == "u": |
| 575 | + codepoint, index = self._decode_hex_char(value, index, token) |
| 576 | + return self._string_from_codepoint(codepoint, token), index |
| 577 | + |
| 578 | + raise JSONPathSyntaxError( |
| 579 | + f"unknown escape sequence at index {token.index + index - 1}", |
| 580 | + token=token, |
| 581 | + ) |
| 582 | + |
| 583 | + def _decode_hex_char(self, value: str, index: int, token: Token) -> Tuple[int, int]: |
| 584 | + length = len(value) |
| 585 | + |
| 586 | + if index + 4 >= length: |
| 587 | + raise JSONPathSyntaxError( |
| 588 | + f"incomplete escape sequence at index {token.index + index - 1}", |
| 589 | + token=token, |
| 590 | + ) |
| 591 | + |
| 592 | + index += 1 # move past 'u' |
| 593 | + codepoint = self._parse_hex_digits(value[index : index + 4], token) |
| 594 | + |
| 595 | + if self._is_low_surrogate(codepoint): |
| 596 | + raise JSONPathSyntaxError( |
| 597 | + f"unexpected low surrogate at index {token.index + index - 1}", |
| 598 | + token=token, |
| 599 | + ) |
| 600 | + |
| 601 | + if self._is_high_surrogate(codepoint): |
| 602 | + # expect a surrogate pair |
| 603 | + if not ( |
| 604 | + index + 9 < length |
| 605 | + and value[index + 4] == "\\" |
| 606 | + and value[index + 5] == "u" |
| 607 | + ): |
| 608 | + raise JSONPathSyntaxError( |
| 609 | + f"incomplete escape sequence at index {token.index + index - 2}", |
| 610 | + token=token, |
| 611 | + ) |
| 612 | + |
| 613 | + low_surrogate = self._parse_hex_digits(value[index + 6 : index + 10], token) |
| 614 | + |
| 615 | + if not self._is_low_surrogate(low_surrogate): |
| 616 | + raise JSONPathSyntaxError( |
| 617 | + f"unexpected codepoint at index {token.index + index + 4}", |
| 618 | + token=token, |
| 619 | + ) |
| 620 | + |
| 621 | + codepoint = 0x10000 + ( |
| 622 | + ((codepoint & 0x03FF) << 10) | (low_surrogate & 0x03FF) |
| 623 | + ) |
| 624 | + |
| 625 | + return (codepoint, index + 9) |
| 626 | + |
| 627 | + return (codepoint, index + 3) |
| 628 | + |
| 629 | + def _parse_hex_digits(self, digits: str, token: Token) -> int: |
| 630 | + codepoint = 0 |
| 631 | + for digit in digits.encode(): |
| 632 | + codepoint <<= 4 |
| 633 | + if digit >= 48 and digit <= 57: |
| 634 | + codepoint |= digit - 48 |
| 635 | + elif digit >= 65 and digit <= 70: |
| 636 | + codepoint |= digit - 65 + 10 |
| 637 | + elif digit >= 97 and digit <= 102: |
| 638 | + codepoint |= digit - 97 + 10 |
| 639 | + else: |
| 640 | + raise JSONPathSyntaxError( |
| 641 | + "invalid \\uXXXX escape sequence", |
| 642 | + token=token, |
| 643 | + ) |
| 644 | + return codepoint |
| 645 | + |
| 646 | + def _string_from_codepoint(self, codepoint: int, token: Token) -> str: |
| 647 | + if codepoint <= 0x1F: |
| 648 | + raise JSONPathSyntaxError("invalid character", token=token) |
| 649 | + return chr(codepoint) |
| 650 | + |
| 651 | + def _is_high_surrogate(self, codepoint: int) -> bool: |
| 652 | + return codepoint >= 0xD800 and codepoint <= 0xDBFF |
| 653 | + |
| 654 | + def _is_low_surrogate(self, codepoint: int) -> bool: |
| 655 | + return codepoint >= 0xDC00 and codepoint <= 0xDFFF |
523 | 656 |
|
524 | 657 | def _raise_for_non_comparable_function( |
525 | 658 | self, expr: Expression, token: Token |
|
0 commit comments