Skip to content

Commit ef8ac3e

Browse files
committed
update ArrowDtype.itemsize
1 parent fe3d799 commit ef8ac3e

File tree

3 files changed

+89
-93
lines changed

3 files changed

+89
-93
lines changed

pandas/core/dtypes/dtypes.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2328,13 +2328,14 @@ def itemsize(self) -> int:
23282328
>>> dtype.itemsize # falls back to numpy dtype
23292329
1
23302330
"""
2331+
if pa.types.is_boolean(self.pyarrow_dtype):
2332+
return self.numpy_dtype.itemsize
2333+
23312334
# Use pyarrow itemsize for fixed-width data types
23322335
# e.g. int32 -> 32 bits // 8 = 4 bytes
23332336
try:
2334-
if pa.types.is_boolean(self.pyarrow_dtype):
2335-
return self.numpy_dtype.itemsize
23362337
return self.pyarrow_dtype.bit_width // 8
2337-
except (ValueError, AttributeError):
2338+
except (ValueError, AttributeError, NotImplementedError):
23382339
return self.numpy_dtype.itemsize
23392340

23402341
def construct_array_type(self) -> type_t[ArrowExtensionArray]:

pandas/tests/dtypes/test_dtypes.py

Lines changed: 0 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -1259,93 +1259,3 @@ def test_categorical_nan_no_dtype_conversion():
12591259
expected = pd.DataFrame({"a": Categorical([1], [1]), "b": [1]})
12601260
df.loc[0, "a"] = np.array([1])
12611261
tm.assert_frame_equal(df, expected)
1262-
1263-
1264-
@pytest.fixture
1265-
def pa():
1266-
return pytest.importorskip("pyarrow")
1267-
1268-
1269-
@pytest.mark.parametrize(
1270-
"type_name, expected_size",
1271-
[
1272-
# Integer types
1273-
("int8", 1),
1274-
("int16", 2),
1275-
("int32", 4),
1276-
("int64", 8),
1277-
("uint8", 1),
1278-
("uint16", 2),
1279-
("uint32", 4),
1280-
("uint64", 8),
1281-
# Floating point types
1282-
("float16", 2),
1283-
("float32", 4),
1284-
("float64", 8),
1285-
# Boolean
1286-
("bool_", 1),
1287-
# Date and timestamp types
1288-
("date32", 4),
1289-
("date64", 8),
1290-
("timestamp", 8),
1291-
# Time types
1292-
("time32", 4),
1293-
("time64", 8),
1294-
# Decimal types
1295-
("decimal128", 16),
1296-
("decimal256", 32),
1297-
],
1298-
)
1299-
def test_arrow_dtype_itemsize_fixed_width(pa, type_name, expected_size):
1300-
# GH 57948
1301-
1302-
parametric_type_map = {
1303-
"timestamp": pa.timestamp("ns"),
1304-
"time32": pa.time32("s"),
1305-
"time64": pa.time64("ns"),
1306-
"decimal128": pa.decimal128(38, 10),
1307-
"decimal256": pa.decimal256(76, 10),
1308-
}
1309-
1310-
if type_name in parametric_type_map:
1311-
arrow_type = parametric_type_map.get(type_name)
1312-
else:
1313-
arrow_type = getattr(pa, type_name)()
1314-
dtype = pd.ArrowDtype(arrow_type)
1315-
1316-
if type_name == "bool_":
1317-
expected_size = dtype.numpy_dtype.itemsize
1318-
1319-
assert dtype.itemsize == expected_size, (
1320-
f"{type_name} expected {expected_size}, got {dtype.itemsize} "
1321-
f"(bit_width={getattr(dtype.pyarrow_dtype, 'bit_width', 'N/A')})"
1322-
)
1323-
1324-
1325-
@pytest.mark.parametrize("type_name", ["string", "binary", "large_string"])
1326-
def test_arrow_dtype_itemsize_variable_width(pa, type_name):
1327-
# GH 57948
1328-
1329-
arrow_type = getattr(pa, type_name)()
1330-
dtype = pd.ArrowDtype(arrow_type)
1331-
1332-
assert dtype.itemsize == dtype.numpy_dtype.itemsize
1333-
1334-
1335-
def test_arrow_dtype_error_fallback(pa, monkeypatch):
1336-
# GH 57948
1337-
1338-
dtype = pd.ArrowDtype(pa.int32())
1339-
1340-
class ErrorType:
1341-
id = None
1342-
1343-
@property
1344-
def bit_width(self):
1345-
raise ValueError("Simulated Error")
1346-
1347-
def to_pandas_dtype(self):
1348-
return Series([0]).dtype
1349-
1350-
monkeypatch.setattr(dtype, "pyarrow_dtype", ErrorType())
1351-
assert dtype.itemsize == dtype.numpy_dtype.itemsize

pandas/tests/extension/test_arrow.py

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3700,3 +3700,88 @@ def test_pow_with_all_na_float():
37003700
result = s.pow(2)
37013701
expected = pd.Series([pd.NA, pd.NA], dtype="float64[pyarrow]")
37023702
tm.assert_series_equal(result, expected)
3703+
3704+
3705+
@pytest.mark.parametrize(
3706+
"type_name, expected_size",
3707+
[
3708+
# Integer types
3709+
("int8", 1),
3710+
("int16", 2),
3711+
("int32", 4),
3712+
("int64", 8),
3713+
("uint8", 1),
3714+
("uint16", 2),
3715+
("uint32", 4),
3716+
("uint64", 8),
3717+
# Floating point types
3718+
("float16", 2),
3719+
("float32", 4),
3720+
("float64", 8),
3721+
# Boolean
3722+
("bool_", 1),
3723+
# Date and timestamp types
3724+
("date32", 4),
3725+
("date64", 8),
3726+
("timestamp", 8),
3727+
# Time types
3728+
("time32", 4),
3729+
("time64", 8),
3730+
# Decimal types
3731+
("decimal128", 16),
3732+
("decimal256", 32),
3733+
],
3734+
)
3735+
def test_arrow_dtype_itemsize_fixed_width(type_name, expected_size):
3736+
# GH 57948
3737+
3738+
parametric_type_map = {
3739+
"timestamp": pa.timestamp("ns"),
3740+
"time32": pa.time32("s"),
3741+
"time64": pa.time64("ns"),
3742+
"decimal128": pa.decimal128(38, 10),
3743+
"decimal256": pa.decimal256(76, 10),
3744+
}
3745+
3746+
if type_name in parametric_type_map:
3747+
arrow_type = parametric_type_map.get(type_name)
3748+
else:
3749+
arrow_type = getattr(pa, type_name)()
3750+
dtype = ArrowDtype(arrow_type)
3751+
3752+
if type_name == "bool_":
3753+
expected_size = dtype.numpy_dtype.itemsize
3754+
3755+
assert dtype.itemsize == expected_size, (
3756+
f"{type_name} expected {expected_size}, got {dtype.itemsize} "
3757+
f"(bit_width={getattr(dtype.pyarrow_dtype, 'bit_width', 'N/A')})"
3758+
)
3759+
3760+
3761+
@pytest.mark.parametrize("type_name", ["string", "binary", "large_string"])
3762+
def test_arrow_dtype_itemsize_variable_width(type_name):
3763+
# GH 57948
3764+
3765+
arrow_type = getattr(pa, type_name)()
3766+
dtype = ArrowDtype(arrow_type)
3767+
3768+
assert dtype.itemsize == dtype.numpy_dtype.itemsize
3769+
3770+
3771+
def test_arrow_dtype_error_fallback(monkeypatch):
3772+
# GH 57948
3773+
3774+
dtype = ArrowDtype(pa.int32())
3775+
3776+
class ErrorType:
3777+
id = None
3778+
3779+
@property
3780+
def bit_width(self):
3781+
raise ValueError("Simulated Error")
3782+
3783+
def to_pandas_dtype(self):
3784+
return pd.Series([0]).dtype
3785+
3786+
monkeypatch.setattr(dtype, "pyarrow_dtype", ErrorType())
3787+
assert dtype.itemsize == dtype.numpy_dtype.itemsize

0 commit comments

Comments
 (0)