From d011e7b05d419441d06f0fa00c885a0f765a0933 Mon Sep 17 00:00:00 2001 From: Michal Baumgartner Date: Fri, 7 Nov 2025 14:07:14 +0100 Subject: [PATCH 1/3] fix(trino): convert STRUCT/ROW/ARRAY types to valid JSON strings --- deepnote_toolkit/ocelots/pandas/utils.py | 17 ++++ tests/integration/test_trino.py | 102 ++++++++++++++++++++ tests/unit/test_ocelots_pandas_utils.py | 114 +++++++++++++++++++++++ 3 files changed, 233 insertions(+) create mode 100644 tests/unit/test_ocelots_pandas_utils.py diff --git a/deepnote_toolkit/ocelots/pandas/utils.py b/deepnote_toolkit/ocelots/pandas/utils.py index 2514628..28305ca 100644 --- a/deepnote_toolkit/ocelots/pandas/utils.py +++ b/deepnote_toolkit/ocelots/pandas/utils.py @@ -1,3 +1,5 @@ +import json + import numpy as np import pandas as pd from packaging.requirements import Requirement @@ -11,8 +13,23 @@ def safe_convert_to_string(value): Note: For bytes, this returns Python's standard string representation (e.g., b'hello') rather than base64 encoding, which is more human-readable. + + For dicts, lists, and tuples, this returns valid JSON using json.dumps() rather than str(). + This is critical for databases like Trino that return structured types (STRUCT/ROW/ARRAY) + as Python objects (NamedRowTuple, dict, list) instead of strings. Using str() on these + would produce invalid JSON with single quotes like "{'a': 'x'}" instead of valid JSON + like '{"a": "x"}', causing frontend rendering to fail. + + Note: PostgreSQL returns ROW types as plain strings, so this conversion isn't needed for + them, but it doesn't hurt since str(string) returns the same string. """ try: + # Convert collection types to valid JSON strings for proper frontend rendering. + # Databases like Trino return structured types as Python objects (e.g. NamedRowTuple), + # while PostgreSQL returns them as strings. Using json.dumps() ensures valid JSON + # with double quotes, which the frontend can parse correctly. + if isinstance(value, (dict, list, tuple)): + return json.dumps(value) return str(value) except Exception: return "" diff --git a/tests/integration/test_trino.py b/tests/integration/test_trino.py index 695d6d7..8e81b5a 100644 --- a/tests/integration/test_trino.py +++ b/tests/integration/test_trino.py @@ -225,3 +225,105 @@ def test_execute_sql_with_autodetection(self, trino_credentials): assert len(result) == 1 assert "detected" in result.columns assert result["detected"].iloc[0] == test_value + + def test_execute_sql_with_struct_types(self, trino_toolkit_connection): + """ + Test execute_sql with Trino STRUCT/ROW types + (regression reported in BLU-5140) + + Named structs from Trino come through as NamedRowTuple (tuple subclass). + The rendering layer will convert them to JSON via safe_convert_to_string. + """ + query = """ + SELECT id, simple_struct FROM ( + SELECT + t.id, + CAST( + ROW( + 'item_' || CAST(t.id AS VARCHAR), + 'value_' || CAST(t.id * 10 AS VARCHAR) + ) + AS ROW(a VARCHAR, b VARCHAR) + ) AS simple_struct + FROM + UNNEST(SEQUENCE(1, 100)) AS t (id) + ) + """ + + result = execute_sql( + template=query, + sql_alchemy_json_env_var=trino_toolkit_connection, + ) + + assert isinstance(result, pd.DataFrame) + assert len(result) == 100 + assert "id" in result.columns + assert "simple_struct" in result.columns + + # Named structs from Trino come through as NamedRowTuple (tuple subclass) + first_struct = result["simple_struct"].iloc[0] + assert isinstance( + first_struct, tuple + ), f"Expected named struct to be tuple, got {type(first_struct)}" + + assert len(first_struct) == 2 + assert first_struct[0] == "item_1" + assert first_struct[1] == "value_10" + + assert first_struct.a == "item_1" + assert first_struct.b == "value_10" + + def test_execute_sql_with_array_types(self, trino_toolkit_connection): + """ + Test execute_sql with Trino ARRAY types + (related to BLU-5140) + + Arrays from Trino come through as Python lists. + The rendering layer will convert them to JSON via safe_convert_to_string. + Without proper handling, str(list) produces invalid JSON with single quotes. + """ + query = """ + SELECT + id, + tags, + nested_array + FROM ( + SELECT + t.id, + ARRAY['tag_' || CAST(t.id AS VARCHAR), 'item', 'test'] AS tags, + ARRAY[ARRAY[t.id, t.id * 2], ARRAY[t.id * 3, t.id * 4]] AS nested_array + FROM + UNNEST(SEQUENCE(1, 50)) AS t (id) + ) + """ + + result = execute_sql( + template=query, + sql_alchemy_json_env_var=trino_toolkit_connection, + ) + + assert isinstance(result, pd.DataFrame) + assert len(result) == 50 + assert "id" in result.columns + assert "tags" in result.columns + assert "nested_array" in result.columns + + # Arrays from Trino come through as Python lists + first_tags = result["tags"].iloc[0] + assert isinstance( + first_tags, list + ), f"Expected array to be list, got {type(first_tags)}" + + assert len(first_tags) == 3 + assert first_tags[0] == "tag_1" + assert first_tags[1] == "item" + assert first_tags[2] == "test" + + first_nested = result["nested_array"].iloc[0] + assert isinstance( + first_nested, list + ), f"Expected nested array to be list, got {type(first_nested)}" + assert len(first_nested) == 2 + assert isinstance(first_nested[0], list) + assert first_nested[0] == [1, 2] + assert first_nested[1] == [3, 4] diff --git a/tests/unit/test_ocelots_pandas_utils.py b/tests/unit/test_ocelots_pandas_utils.py new file mode 100644 index 0000000..700336e --- /dev/null +++ b/tests/unit/test_ocelots_pandas_utils.py @@ -0,0 +1,114 @@ +import json + +import pytest + +from deepnote_toolkit.ocelots.pandas.utils import safe_convert_to_string + + +def test_safe_convert_to_string_dict(): + dict_value = {"a": "x", "b": "y"} + result = safe_convert_to_string(dict_value) + + assert isinstance(result, str) + parsed = json.loads(result) + assert parsed == dict_value + + +def test_safe_convert_to_string_tuple(): + tuple_value = (1, "x", True) + result = safe_convert_to_string(tuple_value) + + assert isinstance(result, str) + parsed = json.loads(result) + assert parsed == [1, "x", True] + + +def test_safe_convert_to_string_list(): + list_value = ["a", "b", "c"] + result = safe_convert_to_string(list_value) + + assert isinstance(result, str) + parsed = json.loads(result) + assert parsed == list_value + + +def test_safe_convert_to_string_nested_structures(): + nested_value = {"key": "value", "nested": {"inner": [1, 2, 3]}} + result = safe_convert_to_string(nested_value) + + parsed = json.loads(result) + assert parsed == nested_value + + +def test_safe_convert_to_string_regular_values(): + assert safe_convert_to_string("hello") == "hello" + + assert safe_convert_to_string(42) == "42" + assert safe_convert_to_string(3.14) == "3.14" + + assert safe_convert_to_string(True) == "True" + + assert safe_convert_to_string(None) == "None" + + +def test_safe_convert_to_string_unconvertible(): + + class UnconvertibleObject: + def __str__(self): + raise ValueError("Cannot convert") + + def __repr__(self): + raise ValueError("Cannot represent") + + result = safe_convert_to_string(UnconvertibleObject()) + assert result == "" + + +# Tests for Trino-specific types +def test_safe_convert_to_string_trino_namedrowtuple(): + """Test that Trino's NamedRowTuple is converted to valid JSON strings.""" + pytest.importorskip("trino") + from trino.client import NamedRowTuple + + # Create a NamedRowTuple with field names and values (as returned by Trino) + row = NamedRowTuple( + values=["item_1", "value_10"], names=["a", "b"], types=[None, None] + ) + + result = safe_convert_to_string(row) + + assert isinstance(result, str) + parsed = json.loads(result) + assert parsed == ["item_1", "value_10"] + assert row.a == "item_1" + assert row.b == "value_10" + + +def test_safe_convert_to_string_trino_array(): + """Test that Trino arrays (returned as Python lists) are converted to valid JSON.""" + + # Trino returns ARRAY types as Python lists + trino_array = ["tag_1", "item", "test"] + + result = safe_convert_to_string(trino_array) + + assert isinstance(result, str) + + parsed = json.loads(result) + assert parsed == trino_array + assert '"tag_1"' in result + assert "'tag_1'" not in result + + +def test_safe_convert_to_string_trino_nested_array(): + """Test that nested Trino arrays are converted to valid JSON.""" + + # Trino returns nested ARRAY types as nested Python lists + nested_array = [[1, 2], [3, 4]] + + result = safe_convert_to_string(nested_array) + + parsed = json.loads(result) + assert parsed == nested_array + assert parsed[0] == [1, 2] + assert parsed[1] == [3, 4] From 6f4b085cc8484a46e57ad81f9b98329ec82e12ab Mon Sep 17 00:00:00 2001 From: Michal Baumgartner Date: Fri, 7 Nov 2025 14:21:08 +0100 Subject: [PATCH 2/3] fix: imports due to Trino update (0.327.0 -> 0.330.0) --- tests/unit/test_ocelots_pandas_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/unit/test_ocelots_pandas_utils.py b/tests/unit/test_ocelots_pandas_utils.py index 700336e..decf5a1 100644 --- a/tests/unit/test_ocelots_pandas_utils.py +++ b/tests/unit/test_ocelots_pandas_utils.py @@ -67,8 +67,8 @@ def __repr__(self): # Tests for Trino-specific types def test_safe_convert_to_string_trino_namedrowtuple(): """Test that Trino's NamedRowTuple is converted to valid JSON strings.""" - pytest.importorskip("trino") - from trino.client import NamedRowTuple + pytest.importorskip("trino.types") + from trino.types import NamedRowTuple # Create a NamedRowTuple with field names and values (as returned by Trino) row = NamedRowTuple( From 4b40a57d1e39cc81ddfea4bd2c8df7c6c3acf5da Mon Sep 17 00:00:00 2001 From: Michal Baumgartner Date: Fri, 7 Nov 2025 16:02:44 +0100 Subject: [PATCH 3/3] chore: Add regression tests for DataFrame rendering and analysis --- deepnote_toolkit/ocelots/pandas/utils.py | 17 -- tests/integration/test_trino.py | 87 ++++++---- tests/unit/test_dataframe_rendering.py | 210 +++++++++++++++++++++++ tests/unit/test_ocelots_pandas_utils.py | 114 ------------ 4 files changed, 268 insertions(+), 160 deletions(-) create mode 100644 tests/unit/test_dataframe_rendering.py delete mode 100644 tests/unit/test_ocelots_pandas_utils.py diff --git a/deepnote_toolkit/ocelots/pandas/utils.py b/deepnote_toolkit/ocelots/pandas/utils.py index 28305ca..2514628 100644 --- a/deepnote_toolkit/ocelots/pandas/utils.py +++ b/deepnote_toolkit/ocelots/pandas/utils.py @@ -1,5 +1,3 @@ -import json - import numpy as np import pandas as pd from packaging.requirements import Requirement @@ -13,23 +11,8 @@ def safe_convert_to_string(value): Note: For bytes, this returns Python's standard string representation (e.g., b'hello') rather than base64 encoding, which is more human-readable. - - For dicts, lists, and tuples, this returns valid JSON using json.dumps() rather than str(). - This is critical for databases like Trino that return structured types (STRUCT/ROW/ARRAY) - as Python objects (NamedRowTuple, dict, list) instead of strings. Using str() on these - would produce invalid JSON with single quotes like "{'a': 'x'}" instead of valid JSON - like '{"a": "x"}', causing frontend rendering to fail. - - Note: PostgreSQL returns ROW types as plain strings, so this conversion isn't needed for - them, but it doesn't hurt since str(string) returns the same string. """ try: - # Convert collection types to valid JSON strings for proper frontend rendering. - # Databases like Trino return structured types as Python objects (e.g. NamedRowTuple), - # while PostgreSQL returns them as strings. Using json.dumps() ensures valid JSON - # with double quotes, which the frontend can parse correctly. - if isinstance(value, (dict, list, tuple)): - return json.dumps(value) return str(value) except Exception: return "" diff --git a/tests/integration/test_trino.py b/tests/integration/test_trino.py index 8e81b5a..11b2778 100644 --- a/tests/integration/test_trino.py +++ b/tests/integration/test_trino.py @@ -228,12 +228,13 @@ def test_execute_sql_with_autodetection(self, trino_credentials): def test_execute_sql_with_struct_types(self, trino_toolkit_connection): """ - Test execute_sql with Trino STRUCT/ROW types - (regression reported in BLU-5140) + Test Trino STRUCT/ROW types don't break rendering (BLU-5140 regression). - Named structs from Trino come through as NamedRowTuple (tuple subclass). - The rendering layer will convert them to JSON via safe_convert_to_string. + Verifies both analyze_columns() for stats and to_records() for cell values. """ + from deepnote_toolkit.ocelots import DataFrame + from deepnote_toolkit.ocelots.pandas.analyze import analyze_columns + query = """ SELECT id, simple_struct FROM ( SELECT @@ -260,28 +261,43 @@ def test_execute_sql_with_struct_types(self, trino_toolkit_connection): assert "id" in result.columns assert "simple_struct" in result.columns - # Named structs from Trino come through as NamedRowTuple (tuple subclass) + # Verify NamedRowTuple structure first_struct = result["simple_struct"].iloc[0] - assert isinstance( - first_struct, tuple - ), f"Expected named struct to be tuple, got {type(first_struct)}" - + assert isinstance(first_struct, tuple) assert len(first_struct) == 2 assert first_struct[0] == "item_1" assert first_struct[1] == "value_10" - assert first_struct.a == "item_1" assert first_struct.b == "value_10" + # Verify analyze_columns() works without crashing + analysis_result = analyze_columns(result) + assert len(analysis_result) == 2 + + struct_col = next(col for col in analysis_result if col.name == "simple_struct") + assert struct_col.stats is not None + assert struct_col.stats.categories is not None + assert len(struct_col.stats.categories) > 0 + + # Verify to_records() produces stringified values + oc_df = DataFrame.from_native(result) + records = oc_df.to_records(mode="json") + + assert len(records) == 100 + cell_value = records[0]["simple_struct"] + assert isinstance(cell_value, str) + assert "item_1" in cell_value + assert "value_10" in cell_value + def test_execute_sql_with_array_types(self, trino_toolkit_connection): """ - Test execute_sql with Trino ARRAY types - (related to BLU-5140) + Test Trino ARRAY types don't break rendering (BLU-5140 regression). - Arrays from Trino come through as Python lists. - The rendering layer will convert them to JSON via safe_convert_to_string. - Without proper handling, str(list) produces invalid JSON with single quotes. + Verifies both analyze_columns() for stats and to_records() for cell values. """ + from deepnote_toolkit.ocelots import DataFrame + from deepnote_toolkit.ocelots.pandas.analyze import analyze_columns + query = """ SELECT id, @@ -308,22 +324,35 @@ def test_execute_sql_with_array_types(self, trino_toolkit_connection): assert "tags" in result.columns assert "nested_array" in result.columns - # Arrays from Trino come through as Python lists + # Verify array data first_tags = result["tags"].iloc[0] - assert isinstance( - first_tags, list - ), f"Expected array to be list, got {type(first_tags)}" - + assert isinstance(first_tags, list) assert len(first_tags) == 3 - assert first_tags[0] == "tag_1" - assert first_tags[1] == "item" - assert first_tags[2] == "test" + assert first_tags == ["tag_1", "item", "test"] first_nested = result["nested_array"].iloc[0] - assert isinstance( - first_nested, list - ), f"Expected nested array to be list, got {type(first_nested)}" + assert isinstance(first_nested, list) assert len(first_nested) == 2 - assert isinstance(first_nested[0], list) - assert first_nested[0] == [1, 2] - assert first_nested[1] == [3, 4] + assert first_nested == [[1, 2], [3, 4]] + + # Verify analyze_columns() works without crashing + analysis_result = analyze_columns(result) + assert len(analysis_result) == 3 + + for col_name in ["tags", "nested_array"]: + col = next(c for c in analysis_result if c.name == col_name) + assert col.stats is not None + assert col.stats.categories is not None + + # Verify to_records() produces stringified values + oc_df = DataFrame.from_native(result) + records = oc_df.to_records(mode="json") + + assert len(records) == 50 + tags_value = records[0]["tags"] + nested_value = records[0]["nested_array"] + + assert isinstance(tags_value, str) + assert isinstance(nested_value, str) + assert "tag_1" in tags_value + assert "item" in tags_value diff --git a/tests/unit/test_dataframe_rendering.py b/tests/unit/test_dataframe_rendering.py new file mode 100644 index 0000000..75c29f2 --- /dev/null +++ b/tests/unit/test_dataframe_rendering.py @@ -0,0 +1,210 @@ +""" +Unit tests for DataFrame rendering with structured types. + +These tests simulate the complete rendering flow that happens when the frontend +displays a DataFrame, ensuring both column analysis and data serialization work correctly. + +This is a regression test suite for BLU-5140 where Trino STRUCT/ROW types caused +analyze_columns() to crash, resulting in fallback to plain DataFrame view instead of +the Deepnote native DataFrame view. +""" + +import numpy as np +import pandas as pd +from trino.types import NamedRowTuple + +from deepnote_toolkit.ocelots import DataFrame +from deepnote_toolkit.ocelots.pandas.analyze import analyze_columns + + +def _test_rendering_flow(df, expected_columns): + """ + Simulate the complete rendering flow: + 1. analyze_columns() - for native view with stats + 2. to_records(mode="json") - for cell values + + Both paths must work for the Deepnote native DataFrame view to display correctly. + """ + # 1. column stats (native view) + analysis_result = analyze_columns(df) + + assert len(analysis_result) == len(expected_columns) + + for col_name in expected_columns: + col = next(c for c in analysis_result if c.name == col_name) + assert col.stats is not None, f"analyze_columns() failed for {col_name}" + # Object columns should have categories for display + if df[col_name].dtype == object: + assert ( + col.stats.categories is not None + ), f"No categories for object column {col_name}" + + # 2. cell values + oc_df = DataFrame.from_native(df) + records = oc_df.to_records(mode="json") + + assert len(records) == len(df) + # all values are JSON-serializable (strings, numbers, None) + for record in records: + for col_name in expected_columns: + value = record[col_name] + assert isinstance( + value, (str, int, float, type(None)) + ), f"Value for {col_name} is not JSON-serializable: {type(value)}" + + +def test_rendering_with_dict_objects(): + """Test rendering DataFrame with dict objects (simulates database ROW types).""" + df = pd.DataFrame( + { + "id": [1, 2, 3], + "struct_col": [ + {"a": "item_1", "b": "value_10"}, + {"a": "item_2", "b": "value_20"}, + {"a": "item_3", "b": "value_30"}, + ], + } + ) + + _test_rendering_flow(df, ["id", "struct_col"]) + + +def test_rendering_with_list_objects(): + """Test rendering DataFrame with list objects (simulates database ARRAY types).""" + df = pd.DataFrame( + { + "id": [1, 2, 3], + "array_col": [ + ["tag_1", "item", "test"], + ["tag_2", "item", "test"], + ["tag_3", "item", "test"], + ], + } + ) + + _test_rendering_flow(df, ["id", "array_col"]) + + +def test_rendering_with_tuple_objects(): + """Test rendering DataFrame with tuple objects.""" + df = pd.DataFrame( + { + "id": [1, 2, 3], + "tuple_col": [ + ("item_1", "value_10"), + ("item_2", "value_20"), + ("item_3", "value_30"), + ], + } + ) + + _test_rendering_flow(df, ["id", "tuple_col"]) + + +def test_rendering_with_trino_namedrowtuple(): + """ + Test rendering DataFrame with Trino NamedRowTuple objects. + + This is the exact scenario from BLU-5140 that caused the crash. + Before the fix, pd.Series(np_array.tolist()) would fail because + NamedRowTuple has a broken __array_struct__ attribute. + """ + # Create NamedRowTuple array using np.empty + assignment pattern. + # This avoids pandas conversion issues during DataFrame creation. + # Using [NamedRowTuple(...), ...] would trigger __array_struct__ bug. + np_array = np.empty(3, dtype=object) + np_array[0] = NamedRowTuple(["item_1", "value_10"], ["a", "b"], [None, None]) + np_array[1] = NamedRowTuple(["item_2", "value_20"], ["a", "b"], [None, None]) + np_array[2] = NamedRowTuple(["item_3", "value_30"], ["a", "b"], [None, None]) + + df = pd.DataFrame({"id": [1, 2, 3], "struct_col": np_array}) + + _test_rendering_flow(df, ["id", "struct_col"]) + + # stringified values should preserve structure + oc_df = DataFrame.from_native(df) + records = oc_df.to_records(mode="json") + + struct_value = records[0]["struct_col"] + assert isinstance(struct_value, str) + assert "item_1" in struct_value + assert "value_10" in struct_value + + +def test_rendering_with_nested_structures(): + """Test rendering DataFrame with nested dicts/lists.""" + df = pd.DataFrame( + { + "id": [1, 2, 3], + "nested_col": [ + {"outer": ["inner_1", "inner_2"]}, + {"outer": ["inner_3", "inner_4"]}, + {"outer": ["inner_5", "inner_6"]}, + ], + } + ) + + _test_rendering_flow(df, ["id", "nested_col"]) + + +def test_rendering_with_mixed_types(): + """Test rendering DataFrame with multiple structured type columns.""" + df = pd.DataFrame( + { + "id": [1, 2, 3], + "dict_col": [{"a": 1}, {"b": 2}, {"c": 3}], + "list_col": [[1, 2], [3, 4], [5, 6]], + "tuple_col": [(1, 2), (3, 4), (5, 6)], + } + ) + + _test_rendering_flow(df, ["id", "dict_col", "list_col", "tuple_col"]) + + +def test_rendering_with_namedrowtuple_and_missing_values(): + """Test rendering with NamedRowTuple including None values.""" + # Create NamedRowTuple array using np.empty + assignment pattern. + # Using [NamedRowTuple(...), ...] would trigger __array_struct__ bug. + np_array = np.empty(4, dtype=object) + np_array[0] = NamedRowTuple(["item_1", "value_10"], ["a", "b"], [None, None]) + np_array[1] = None + np_array[2] = NamedRowTuple(["item_2", "value_20"], ["a", "b"], [None, None]) + np_array[3] = NamedRowTuple(["item_1", "value_10"], ["a", "b"], [None, None]) + + df = pd.DataFrame({"id": [1, 2, 3, 4], "struct_col": np_array}) + + # Should not crash with None values + analysis_result = analyze_columns(df) + + struct_col = next(col for col in analysis_result if col.name == "struct_col") + assert struct_col.stats is not None + assert struct_col.stats.categories is not None + + # Should have "Missing" category + category_names = [cat["name"] for cat in struct_col.stats.categories] + assert "Missing" in category_names + + +def test_rendering_preserves_field_names_in_str_representation(): + """ + Test that NamedRowTuple field names are preserved in stringification. + """ + # Create NamedRowTuple array using np.empty + assignment pattern. + # Using [NamedRowTuple(...), ...] would trigger __array_struct__ bug. + np_array = np.empty(1, dtype=object) + np_array[0] = NamedRowTuple( + ["value_a", "value_b"], ["field_a", "field_b"], [None, None] + ) + + df = pd.DataFrame({"struct_col": np_array}) + + # Get the stringified representation + oc_df = DataFrame.from_native(df) + records = oc_df.to_records(mode="json") + + stringified = records[0]["struct_col"] + + # str(NamedRowTuple) produces something like: (field_a: 'value_a', field_b: 'value_b') + # This preserves field name information for better display + assert "field_a: 'value_a'" in stringified + assert "field_b: 'value_b'" in stringified diff --git a/tests/unit/test_ocelots_pandas_utils.py b/tests/unit/test_ocelots_pandas_utils.py deleted file mode 100644 index decf5a1..0000000 --- a/tests/unit/test_ocelots_pandas_utils.py +++ /dev/null @@ -1,114 +0,0 @@ -import json - -import pytest - -from deepnote_toolkit.ocelots.pandas.utils import safe_convert_to_string - - -def test_safe_convert_to_string_dict(): - dict_value = {"a": "x", "b": "y"} - result = safe_convert_to_string(dict_value) - - assert isinstance(result, str) - parsed = json.loads(result) - assert parsed == dict_value - - -def test_safe_convert_to_string_tuple(): - tuple_value = (1, "x", True) - result = safe_convert_to_string(tuple_value) - - assert isinstance(result, str) - parsed = json.loads(result) - assert parsed == [1, "x", True] - - -def test_safe_convert_to_string_list(): - list_value = ["a", "b", "c"] - result = safe_convert_to_string(list_value) - - assert isinstance(result, str) - parsed = json.loads(result) - assert parsed == list_value - - -def test_safe_convert_to_string_nested_structures(): - nested_value = {"key": "value", "nested": {"inner": [1, 2, 3]}} - result = safe_convert_to_string(nested_value) - - parsed = json.loads(result) - assert parsed == nested_value - - -def test_safe_convert_to_string_regular_values(): - assert safe_convert_to_string("hello") == "hello" - - assert safe_convert_to_string(42) == "42" - assert safe_convert_to_string(3.14) == "3.14" - - assert safe_convert_to_string(True) == "True" - - assert safe_convert_to_string(None) == "None" - - -def test_safe_convert_to_string_unconvertible(): - - class UnconvertibleObject: - def __str__(self): - raise ValueError("Cannot convert") - - def __repr__(self): - raise ValueError("Cannot represent") - - result = safe_convert_to_string(UnconvertibleObject()) - assert result == "" - - -# Tests for Trino-specific types -def test_safe_convert_to_string_trino_namedrowtuple(): - """Test that Trino's NamedRowTuple is converted to valid JSON strings.""" - pytest.importorskip("trino.types") - from trino.types import NamedRowTuple - - # Create a NamedRowTuple with field names and values (as returned by Trino) - row = NamedRowTuple( - values=["item_1", "value_10"], names=["a", "b"], types=[None, None] - ) - - result = safe_convert_to_string(row) - - assert isinstance(result, str) - parsed = json.loads(result) - assert parsed == ["item_1", "value_10"] - assert row.a == "item_1" - assert row.b == "value_10" - - -def test_safe_convert_to_string_trino_array(): - """Test that Trino arrays (returned as Python lists) are converted to valid JSON.""" - - # Trino returns ARRAY types as Python lists - trino_array = ["tag_1", "item", "test"] - - result = safe_convert_to_string(trino_array) - - assert isinstance(result, str) - - parsed = json.loads(result) - assert parsed == trino_array - assert '"tag_1"' in result - assert "'tag_1'" not in result - - -def test_safe_convert_to_string_trino_nested_array(): - """Test that nested Trino arrays are converted to valid JSON.""" - - # Trino returns nested ARRAY types as nested Python lists - nested_array = [[1, 2], [3, 4]] - - result = safe_convert_to_string(nested_array) - - parsed = json.loads(result) - assert parsed == nested_array - assert parsed[0] == [1, 2] - assert parsed[1] == [3, 4]