Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions deepnote_toolkit/ocelots/pandas/utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import json

import numpy as np
import pandas as pd
from packaging.requirements import Requirement
Expand All @@ -11,8 +13,23 @@ def safe_convert_to_string(value):

Note: For bytes, this returns Python's standard string representation (e.g., b'hello')
rather than base64 encoding, which is more human-readable.

For dicts, lists, and tuples, this returns valid JSON using json.dumps() rather than str().
This is critical for databases like Trino that return structured types (STRUCT/ROW/ARRAY)
as Python objects (NamedRowTuple, dict, list) instead of strings. Using str() on these
would produce invalid JSON with single quotes like "{'a': 'x'}" instead of valid JSON
like '{"a": "x"}', causing frontend rendering to fail.

Note: PostgreSQL returns ROW types as plain strings, so this conversion isn't needed for
them, but it doesn't hurt since str(string) returns the same string.
"""
try:
# Convert collection types to valid JSON strings for proper frontend rendering.
# Databases like Trino return structured types as Python objects (e.g. NamedRowTuple),
# while PostgreSQL returns them as strings. Using json.dumps() ensures valid JSON
# with double quotes, which the frontend can parse correctly.
if isinstance(value, (dict, list, tuple)):
return json.dumps(value)
return str(value)
except Exception:
return "<unconvertible>"
Expand Down
102 changes: 102 additions & 0 deletions tests/integration/test_trino.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,3 +225,105 @@ def test_execute_sql_with_autodetection(self, trino_credentials):
assert len(result) == 1
assert "detected" in result.columns
assert result["detected"].iloc[0] == test_value

def test_execute_sql_with_struct_types(self, trino_toolkit_connection):
"""
Test execute_sql with Trino STRUCT/ROW types
(regression reported in BLU-5140)

Named structs from Trino come through as NamedRowTuple (tuple subclass).
The rendering layer will convert them to JSON via safe_convert_to_string.
"""
query = """
SELECT id, simple_struct FROM (
SELECT
t.id,
CAST(
ROW(
'item_' || CAST(t.id AS VARCHAR),
'value_' || CAST(t.id * 10 AS VARCHAR)
)
AS ROW(a VARCHAR, b VARCHAR)
) AS simple_struct
FROM
UNNEST(SEQUENCE(1, 100)) AS t (id)
)
"""

result = execute_sql(
template=query,
sql_alchemy_json_env_var=trino_toolkit_connection,
)

assert isinstance(result, pd.DataFrame)
assert len(result) == 100
assert "id" in result.columns
assert "simple_struct" in result.columns

# Named structs from Trino come through as NamedRowTuple (tuple subclass)
first_struct = result["simple_struct"].iloc[0]
assert isinstance(
first_struct, tuple
), f"Expected named struct to be tuple, got {type(first_struct)}"

assert len(first_struct) == 2
assert first_struct[0] == "item_1"
assert first_struct[1] == "value_10"

assert first_struct.a == "item_1"
assert first_struct.b == "value_10"

def test_execute_sql_with_array_types(self, trino_toolkit_connection):
"""
Test execute_sql with Trino ARRAY types
(related to BLU-5140)

Arrays from Trino come through as Python lists.
The rendering layer will convert them to JSON via safe_convert_to_string.
Without proper handling, str(list) produces invalid JSON with single quotes.
"""
query = """
SELECT
id,
tags,
nested_array
FROM (
SELECT
t.id,
ARRAY['tag_' || CAST(t.id AS VARCHAR), 'item', 'test'] AS tags,
ARRAY[ARRAY[t.id, t.id * 2], ARRAY[t.id * 3, t.id * 4]] AS nested_array
FROM
UNNEST(SEQUENCE(1, 50)) AS t (id)
)
"""

result = execute_sql(
template=query,
sql_alchemy_json_env_var=trino_toolkit_connection,
)

assert isinstance(result, pd.DataFrame)
assert len(result) == 50
assert "id" in result.columns
assert "tags" in result.columns
assert "nested_array" in result.columns

# Arrays from Trino come through as Python lists
first_tags = result["tags"].iloc[0]
assert isinstance(
first_tags, list
), f"Expected array to be list, got {type(first_tags)}"

assert len(first_tags) == 3
assert first_tags[0] == "tag_1"
assert first_tags[1] == "item"
assert first_tags[2] == "test"

first_nested = result["nested_array"].iloc[0]
assert isinstance(
first_nested, list
), f"Expected nested array to be list, got {type(first_nested)}"
assert len(first_nested) == 2
assert isinstance(first_nested[0], list)
assert first_nested[0] == [1, 2]
assert first_nested[1] == [3, 4]
114 changes: 114 additions & 0 deletions tests/unit/test_ocelots_pandas_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
import json

import pytest

from deepnote_toolkit.ocelots.pandas.utils import safe_convert_to_string


def test_safe_convert_to_string_dict():
dict_value = {"a": "x", "b": "y"}
result = safe_convert_to_string(dict_value)

assert isinstance(result, str)
parsed = json.loads(result)
assert parsed == dict_value


def test_safe_convert_to_string_tuple():
tuple_value = (1, "x", True)
result = safe_convert_to_string(tuple_value)

assert isinstance(result, str)
parsed = json.loads(result)
assert parsed == [1, "x", True]


def test_safe_convert_to_string_list():
list_value = ["a", "b", "c"]
result = safe_convert_to_string(list_value)

assert isinstance(result, str)
parsed = json.loads(result)
assert parsed == list_value


def test_safe_convert_to_string_nested_structures():
nested_value = {"key": "value", "nested": {"inner": [1, 2, 3]}}
result = safe_convert_to_string(nested_value)

parsed = json.loads(result)
assert parsed == nested_value


def test_safe_convert_to_string_regular_values():
assert safe_convert_to_string("hello") == "hello"

assert safe_convert_to_string(42) == "42"
assert safe_convert_to_string(3.14) == "3.14"

assert safe_convert_to_string(True) == "True"

assert safe_convert_to_string(None) == "None"


def test_safe_convert_to_string_unconvertible():

class UnconvertibleObject:
def __str__(self):
raise ValueError("Cannot convert")

def __repr__(self):
raise ValueError("Cannot represent")

result = safe_convert_to_string(UnconvertibleObject())
assert result == "<unconvertible>"


# Tests for Trino-specific types
def test_safe_convert_to_string_trino_namedrowtuple():
"""Test that Trino's NamedRowTuple is converted to valid JSON strings."""
pytest.importorskip("trino.types")
from trino.types import NamedRowTuple

# Create a NamedRowTuple with field names and values (as returned by Trino)
row = NamedRowTuple(
values=["item_1", "value_10"], names=["a", "b"], types=[None, None]
)

result = safe_convert_to_string(row)

assert isinstance(result, str)
parsed = json.loads(result)
assert parsed == ["item_1", "value_10"]
assert row.a == "item_1"
assert row.b == "value_10"


def test_safe_convert_to_string_trino_array():
"""Test that Trino arrays (returned as Python lists) are converted to valid JSON."""

# Trino returns ARRAY types as Python lists
trino_array = ["tag_1", "item", "test"]

result = safe_convert_to_string(trino_array)

assert isinstance(result, str)

parsed = json.loads(result)
assert parsed == trino_array
assert '"tag_1"' in result
assert "'tag_1'" not in result


def test_safe_convert_to_string_trino_nested_array():
"""Test that nested Trino arrays are converted to valid JSON."""

# Trino returns nested ARRAY types as nested Python lists
nested_array = [[1, 2], [3, 4]]

result = safe_convert_to_string(nested_array)

parsed = json.loads(result)
assert parsed == nested_array
assert parsed[0] == [1, 2]
assert parsed[1] == [3, 4]