|
17 | 17 | from pandas.compat import ( |
18 | 18 | pa_version_under10p1, |
19 | 19 | pa_version_under13p0, |
| 20 | + pa_version_under16p0, |
20 | 21 | ) |
21 | 22 | from pandas.util._exceptions import find_stack_level |
22 | 23 |
|
@@ -71,6 +72,10 @@ def _chk_pyarrow_available() -> None: |
71 | 72 | raise ImportError(msg) |
72 | 73 |
|
73 | 74 |
|
| 75 | +def _is_string_view(typ): |
| 76 | + return not pa_version_under16p0 and pa.types.is_string_view(typ) |
| 77 | + |
| 78 | + |
74 | 79 | # TODO: Inherit directly from BaseStringArrayMethods. Currently we inherit from |
75 | 80 | # ObjectStringArrayMixin because we want to have the object-dtype based methods as |
76 | 81 | # fallback for the ones that pyarrow doesn't yet support |
@@ -128,11 +133,13 @@ def __init__(self, values) -> None: |
128 | 133 | _chk_pyarrow_available() |
129 | 134 | if isinstance(values, (pa.Array, pa.ChunkedArray)) and ( |
130 | 135 | pa.types.is_string(values.type) |
| 136 | + or _is_string_view(values.type) |
131 | 137 | or ( |
132 | 138 | pa.types.is_dictionary(values.type) |
133 | 139 | and ( |
134 | 140 | pa.types.is_string(values.type.value_type) |
135 | 141 | or pa.types.is_large_string(values.type.value_type) |
| 142 | + or _is_string_view(values.type.value_type) |
136 | 143 | ) |
137 | 144 | ) |
138 | 145 | ): |
@@ -216,7 +223,10 @@ def insert(self, loc: int, item) -> ArrowStringArray: |
216 | 223 | if self.dtype.na_value is np.nan and item is np.nan: |
217 | 224 | item = libmissing.NA |
218 | 225 | if not isinstance(item, str) and item is not libmissing.NA: |
219 | | - raise TypeError("Scalar must be NA or str") |
| 226 | + raise TypeError( |
| 227 | + f"Invalid value '{item}' for dtype 'str'. Value should be a " |
| 228 | + f"string or missing value, got '{type(item).__name__}' instead." |
| 229 | + ) |
220 | 230 | return super().insert(loc, item) |
221 | 231 |
|
222 | 232 | def _convert_bool_result(self, values, na=lib.no_default, method_name=None): |
@@ -248,13 +258,19 @@ def _maybe_convert_setitem_value(self, value): |
248 | 258 | if isna(value): |
249 | 259 | value = None |
250 | 260 | elif not isinstance(value, str): |
251 | | - raise TypeError("Scalar must be NA or str") |
| 261 | + raise TypeError( |
| 262 | + f"Invalid value '{value}' for dtype 'str'. Value should be a " |
| 263 | + f"string or missing value, got '{type(value).__name__}' instead." |
| 264 | + ) |
252 | 265 | else: |
253 | 266 | value = np.array(value, dtype=object, copy=True) |
254 | 267 | value[isna(value)] = None |
255 | 268 | for v in value: |
256 | 269 | if not (v is None or isinstance(v, str)): |
257 | | - raise TypeError("Must provide strings") |
| 270 | + raise TypeError( |
| 271 | + "Invalid value for dtype 'str'. Value should be a " |
| 272 | + "string or missing value (or array of those)." |
| 273 | + ) |
258 | 274 | return super()._maybe_convert_setitem_value(value) |
259 | 275 |
|
260 | 276 | def isin(self, values: ArrayLike) -> npt.NDArray[np.bool_]: |
|
0 commit comments