11import json
22import os
33import pathlib
4+ import re
45from itertools import product
56from packaging .version import Version
67
78import numpy as np
8- from pandas import ArrowDtype , DataFrame
9+ from pandas import ArrowDtype , DataFrame , Index , Series
910from pandas import read_parquet as pd_read_parquet
1011
1112import shapely
@@ -763,11 +764,29 @@ def test_write_empty_bbox(tmpdir, geometry):
763764 assert "bbox" not in metadata ["columns" ]["geometry" ]
764765
765766
767+ @pytest .mark .skipif (
768+ Version (pyarrow .__version__ ) < Version ("19.0.0" ),
769+ reason = "This version of pyarrow does not support reading complex types" ,
770+ )
766771@pytest .mark .parametrize ("format" , ["feather" , "parquet" ])
767772def test_write_read_to_pandas_kwargs (tmpdir , format ):
768773 filename = os .path .join (str (tmpdir ), f"test.{ format } " )
769- g = box (0 , 0 , 10 , 10 )
770- gdf = geopandas .GeoDataFrame ({"geometry" : [g ], "i" : [1 ], "s" : ["a" ]})
774+
775+ # Use arrow types to ensure that we can assert the roundtrip was successful
776+ int_type = ArrowDtype (pyarrow .int64 ())
777+ str_type = ArrowDtype (pyarrow .string ())
778+ complex_type = ArrowDtype (pyarrow .struct ([pyarrow .field ("foo" , pyarrow .string ())]))
779+ index = Index ([0 ], dtype = ArrowDtype (pyarrow .int64 ()))
780+
781+ gdf = geopandas .GeoDataFrame (
782+ {
783+ "geometry" : [box (0 , 0 , 10 , 10 )],
784+ "i" : Series ([1 ], index = index , dtype = int_type ),
785+ "s" : Series (["a" ], index = index , dtype = str_type ),
786+ "c" : Series ([{"foo" : "bar" }], index = index , dtype = complex_type ),
787+ },
788+ index = index ,
789+ )
771790
772791 if format == "feather" :
773792 gdf .to_feather (filename )
@@ -779,8 +798,37 @@ def test_write_read_to_pandas_kwargs(tmpdir, format):
779798 # simulate the `dtype_backend="pyarrow"` option in `pandas.read_parquet`
780799 gdf_roundtrip = read_func (filename , to_pandas_kwargs = {"types_mapper" : ArrowDtype })
781800 assert isinstance (gdf_roundtrip , geopandas .GeoDataFrame )
782- assert isinstance (gdf_roundtrip .dtypes ["i" ], ArrowDtype )
783- assert isinstance (gdf_roundtrip .dtypes ["s" ], ArrowDtype )
801+ assert gdf_roundtrip .dtypes ["i" ] == int_type
802+ assert gdf_roundtrip .dtypes ["s" ] == str_type
803+ assert gdf_roundtrip .dtypes ["c" ] == complex_type
804+ assert_geodataframe_equal (gdf_roundtrip , gdf , check_dtype = True )
805+
806+
807+ @pytest .mark .parametrize ("format" , ["feather" , "parquet" ])
808+ def test_read_complex_type_with_numpy_backend_xfail (tmpdir , format ):
809+ filename = os .path .join (str (tmpdir ), f"test.{ format } " )
810+ complex_type = ArrowDtype (pyarrow .struct ([pyarrow .field ("foo" , pyarrow .string ())]))
811+ index = Index ([0 ], dtype = ArrowDtype (pyarrow .int64 ()))
812+ gdf = geopandas .GeoDataFrame (
813+ {
814+ "geometry" : [box (0 , 0 , 10 , 10 )],
815+ "c" : Series ([{"foo" : "bar" }], index = index , dtype = complex_type ),
816+ },
817+ index = index ,
818+ )
819+ if format == "feather" :
820+ gdf .to_feather (filename )
821+ read_func = read_feather
822+ else :
823+ gdf .to_parquet (filename )
824+ read_func = read_parquet
825+ # Note: due to bugs in pyarrow, we can't read complex types without using
826+ # the types mapper. This is a long standing pandas issue as noted here:
827+ # - https://github.com/pandas-dev/pandas/issues/53011
828+ # - https://github.com/apache/arrow/issues/39914
829+ match = re .escape ("data type 'struct<foo: string>[pyarrow]' not understood" )
830+ with pytest .raises (TypeError , match = match ):
831+ read_func (filename )
784832
785833
786834@pytest .mark .parametrize ("format" , ["feather" , "parquet" ])
0 commit comments