@@ -383,3 +383,116 @@ def array_to_datetime(array: Sequence[Any]) -> np.ndarray:
383383 '2018-01-01T00:00:00.000000'], dtype='datetime64[us]')
384384 """
385385 return np .asarray (array , dtype = np .datetime64 )
386+
387+
388+ def _array_dtypes (array : Any ) -> tuple [str , str ]:
389+ """
390+ Get the dtypes of an array-like object and the numpy array after applying the
391+ np.ascontiguousarray function.
392+
393+ For the input array-like object, the function checks the "dtype" (for NumPy and
394+ Pandas objects) or "type" (for PyArrow objects) property to determine the dtype. If
395+ both of these properties are not found (e.g., a list), the dtype is set to an empty
396+ string. Then the function applies the np.ascontiguousarray function to the input
397+ object and determine the dtype of the converted np.ndarray object.
398+
399+ The function returns a tuple of the two dtypes. If the output dtype is "object", it
400+ means np.ascontiguousarray has failed to convert the input object to a NumPy dtype
401+ that can be recognized by the GMT C API, and we have to maintain a mapping from the
402+ input dtype to the expected output dtype (e.g., for a panda.Series with
403+ ``dtype="string[python]"``, the output is "object", and we need to have a dtype
404+ mapping from ``string`` to ``np.str_``).
405+
406+ This function is not used anywhere in the project. Instead, similar codes are used
407+ in the ``vectors_to_arrays`` function. This function is kept for understand the
408+ dtype's string representation of different array-like objects and what dtype they
409+ are converted to by NumPy. This function is kept for understanding the dtype
410+ conversion process and for testing purposes, since some of the dtypes may change in
411+ the future (e.g., pandas.StringDtype is still an experimental feature).
412+
413+ Parameters
414+ ----------
415+ array
416+ The array-like object to be checked.
417+
418+ Returns
419+ -------
420+ dtype
421+ The data type of the array-like object.
422+
423+ Examples
424+ --------
425+ >>> import datetime
426+ >>> import numpy as np
427+ >>> import pandas as pd
428+ >>> datetimes = [datetime.date(2021, 1, 1), datetime.date(2022, 1, 1)]
429+
430+ For Python built-in types:
431+ >>> _array_dtypes([1, 2, 3])
432+ ('', 'int64')
433+ >>> _array_dtypes([1.0, 2.0, 3.0])
434+ ('', 'float64')
435+ >>> _array_dtypes(["a", "b", "c"])
436+ ('', '<U1')
437+
438+ For NumPy arrays:
439+
440+ >>> _array_dtypes(np.array([1, 2, 3]))
441+ ('int64', 'int64')
442+ >>> _array_dtypes(np.array([1.0, 2.0, 3.0]))
443+ ('float64', 'float64')
444+ >>> _array_dtypes(np.datetime64("2021-01-01"))
445+ ('datetime64[D]', 'datetime64[D]')
446+
447+ For Pandas objects:
448+
449+ >>> _array_dtypes(pd.Series(data=[1, 2, 3]))
450+ ('int64', 'int64')
451+ >>> _array_dtypes(pd.Series(data=[1.0, 2.0, 3.0]))
452+ ('float64', 'float64')
453+ >>> _array_dtypes(pd.Series(data=[1, 2, 3], dtype=pd.Int32Dtype()))
454+ ('Int32', 'int32')
455+ >>> _array_dtypes(pd.Series(data=[1.0, 2.0, 3.0], dtype=pd.Float32Dtype()))
456+ ('Float32', 'float32')
457+ >>> _array_dtypes(pd.Series(data=["a", "b", "c"]))
458+ ('object', 'object')
459+ >>> _array_dtypes(pd.Series(data=["a", "b", "c"], dtype="string[python]"))
460+ ('string', 'object')
461+ >>> _array_dtypes(pd.Series(data=["a", "b", "c"], dtype="string[pyarrow]"))
462+ ('string', 'object')
463+ >>> _array_dtypes(pd.Series(data=datetimes, dtype="datetime64[ns]"))
464+ ('datetime64[ns]', 'datetime64[ns]')
465+ >>> _array_dtypes(pd.Series(data=datetimes, dtype="date32[day][pyarrow]"))
466+ ('date32[day][pyarrow]', 'object')
467+ >>> _array_dtypes(pd.Series(data=datetimes, dtype="date64[ms][pyarrow]"))
468+ ('date64[ms][pyarrow]', 'object')
469+
470+ For PyArrow objects:
471+
472+ >>> import pytest
473+ >>> pa = pytest.importorskip("pyarrow")
474+ >>> _array_dtypes(pa.array([1, 2, 3]))
475+ ('int64', 'int64')
476+ >>> _array_dtypes(pa.array([1.0, 2.0, 3.0]))
477+ ('double', 'float64')
478+ >>> _array_dtypes(pa.array([1, 2, 3], type=pa.int32()))
479+ ('int32', 'int32')
480+ >>> _array_dtypes(pa.array([1.0, 2.0, 3.0], type=pa.float32()))
481+ ('float', 'float32')
482+ >>> _array_dtypes(pa.array(["a", "b", "c"]))
483+ ('string', 'object')
484+ >>> _array_dtypes(pa.array(datetimes, type=pa.date32()))
485+ ('date32[day]', 'datetime64[D]')
486+ >>> _array_dtypes(pa.array(datetimes, type=pa.date64()))
487+ ('date64[ms]', 'datetime64[ms]')
488+ """
489+
490+ def _get_dtype (array ):
491+ """
492+ Get the data type of the array-like object.
493+ """
494+ return str (getattr (array , "dtype" , getattr (array , "type" , "" )))
495+
496+ dtype_in = _get_dtype (array )
497+ dtype_out = str (np .ascontiguousarray (array ).dtype )
498+ return dtype_in , dtype_out
0 commit comments