Skip to content

Commit 7ef94d2

Browse files
ENH: Add safe_divide method to DataFrame and Series
- Add safe_divide method to DataFrame class that handles division by zero gracefully - Add safe_divide method to Series class with similar functionality - Support three zero_division modes: 'warn' (default), 'raise', and 'ignore' - Comprehensive test coverage for both DataFrame and Series methods - Maintains compatibility with existing pandas arithmetic operations - Addresses common pain point of division by zero in data analysis workflows The safe_divide method provides a user-friendly alternative to standard division operations, automatically handling division by zero cases without raising exceptions by default, while still allowing users to control the behavior through the zero_division parameter.
1 parent 5cc3240 commit 7ef94d2

File tree

5 files changed

+639
-0
lines changed

5 files changed

+639
-0
lines changed

pandas/core/frame.py

Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8707,6 +8707,126 @@ def rpow(
87078707
other, roperator.rpow, level=level, fill_value=fill_value, axis=axis
87088708
)
87098709

8710+
def safe_divide(
8711+
self,
8712+
other,
8713+
axis: Axis = "columns",
8714+
level=None,
8715+
fill_value=None,
8716+
zero_division="warn"
8717+
) -> DataFrame:
8718+
"""
8719+
Perform safe division that handles division by zero gracefully.
8720+
8721+
This method performs division while handling division by zero cases
8722+
without raising exceptions. It's particularly useful for data analysis
8723+
where division by zero is a common occurrence.
8724+
8725+
Parameters
8726+
----------
8727+
other : scalar, sequence, Series, or DataFrame
8728+
Object to divide with.
8729+
axis : {0 or 'index', 1 or 'columns'}, default 'columns'
8730+
Whether to compare by the index (0 or 'index') or columns (1 or 'columns').
8731+
level : int or label, default None
8732+
Broadcast across a level, matching Index values on the passed MultiIndex level.
8733+
fill_value : float or None, default None
8734+
Value to use for missing values. If specified, this value will be used
8735+
to fill missing values before performing the operation.
8736+
zero_division : {'warn', 'raise', 'ignore'}, default 'warn'
8737+
How to handle division by zero:
8738+
- 'warn': Issue a warning and return inf for division by zero
8739+
- 'raise': Raise an exception for division by zero
8740+
- 'ignore': Return inf for division by zero without warning
8741+
8742+
Returns
8743+
-------
8744+
DataFrame
8745+
Result of the safe division operation.
8746+
8747+
See Also
8748+
--------
8749+
DataFrame.truediv : Standard division operation.
8750+
DataFrame.div : Alias for truediv.
8751+
8752+
Examples
8753+
--------
8754+
>>> df = pd.DataFrame({'A': [1, 2, 0], 'B': [4, 5, 6]})
8755+
>>> other = pd.DataFrame({'A': [2, 0, 4], 'B': [1, 2, 3]})
8756+
>>> df.safe_divide(other)
8757+
A B
8758+
0 0.5 4.0
8759+
1 inf 2.5
8760+
2 0.0 2.0
8761+
8762+
>>> df.safe_divide(other, zero_division='ignore')
8763+
A B
8764+
0 0.5 4.0
8765+
1 inf 2.5
8766+
2 0.0 2.0
8767+
8768+
>>> df.safe_divide(2)
8769+
A B
8770+
0 0.5 2.0
8771+
1 1.0 2.5
8772+
2 0.0 3.0
8773+
"""
8774+
import warnings
8775+
8776+
# Handle zero_division parameter
8777+
if zero_division not in ['warn', 'raise', 'ignore']:
8778+
raise ValueError("zero_division must be one of 'warn', 'raise', or 'ignore'")
8779+
8780+
# Perform the division with error handling
8781+
with np.errstate(divide='ignore', invalid='ignore'):
8782+
result = self._flex_arith_method(
8783+
other, operator.truediv, level=level, fill_value=fill_value, axis=axis
8784+
)
8785+
8786+
# Handle division by zero cases
8787+
if zero_division == 'raise':
8788+
# Check for division by zero and raise if found
8789+
if isinstance(other, (DataFrame, Series)):
8790+
# For DataFrame/Series operations, check if any denominator is zero
8791+
if isinstance(other, DataFrame):
8792+
zero_mask = (other == 0) & (self != 0)
8793+
else: # Series
8794+
zero_mask = (other == 0) & (self != 0)
8795+
8796+
if zero_mask.any().any():
8797+
raise ZeroDivisionError("Division by zero encountered")
8798+
else:
8799+
# For scalar operations
8800+
if other == 0 and (self != 0).any().any():
8801+
raise ZeroDivisionError("Division by zero encountered")
8802+
8803+
elif zero_division == 'warn':
8804+
# Check for division by zero and warn if found
8805+
if isinstance(other, (DataFrame, Series)):
8806+
if isinstance(other, DataFrame):
8807+
zero_mask = (other == 0) & (self != 0)
8808+
else: # Series
8809+
zero_mask = (other == 0) & (self != 0)
8810+
8811+
if zero_mask.any().any():
8812+
warnings.warn(
8813+
"Division by zero encountered. Results will contain inf values.",
8814+
RuntimeWarning,
8815+
stacklevel=2
8816+
)
8817+
else:
8818+
if other == 0 and (self != 0).any().any():
8819+
warnings.warn(
8820+
"Division by zero encountered. Results will contain inf values.",
8821+
RuntimeWarning,
8822+
stacklevel=2
8823+
)
8824+
8825+
# For 'ignore' case, we don't need to do anything special
8826+
# The result already contains inf values where appropriate
8827+
8828+
return result
8829+
87108830
# ----------------------------------------------------------------------
87118831
# Combination-Related
87128832

pandas/core/series.py

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6598,6 +6598,117 @@ def rpow(self, other, level=None, fill_value=None, axis: Axis = 0) -> Series:
65986598
other, roperator.rpow, level=level, fill_value=fill_value, axis=axis
65996599
)
66006600

6601+
def safe_divide(
6602+
self,
6603+
other,
6604+
level=None,
6605+
fill_value=None,
6606+
axis: Axis = 0,
6607+
zero_division="warn"
6608+
) -> Series:
6609+
"""
6610+
Perform safe division that handles division by zero gracefully.
6611+
6612+
This method performs division while handling division by zero cases
6613+
without raising exceptions. It's particularly useful for data analysis
6614+
where division by zero is a common occurrence.
6615+
6616+
Parameters
6617+
----------
6618+
other : scalar, sequence, Series, or DataFrame
6619+
Object to divide with.
6620+
level : int or label, default None
6621+
Broadcast across a level, matching Index values on the passed MultiIndex level.
6622+
fill_value : float or None, default None
6623+
Value to use for missing values. If specified, this value will be used
6624+
to fill missing values before performing the operation.
6625+
axis : {0 or 'index'}, default 0
6626+
Unused. Parameter needed for compatibility with DataFrame.
6627+
zero_division : {'warn', 'raise', 'ignore'}, default 'warn'
6628+
How to handle division by zero:
6629+
- 'warn': Issue a warning and return inf for division by zero
6630+
- 'raise': Raise an exception for division by zero
6631+
- 'ignore': Return inf for division by zero without warning
6632+
6633+
Returns
6634+
-------
6635+
Series
6636+
Result of the safe division operation.
6637+
6638+
See Also
6639+
--------
6640+
Series.truediv : Standard division operation.
6641+
Series.div : Alias for truediv.
6642+
6643+
Examples
6644+
--------
6645+
>>> s = pd.Series([1, 2, 0])
6646+
>>> other = pd.Series([2, 0, 4])
6647+
>>> s.safe_divide(other)
6648+
0 0.5
6649+
1 inf
6650+
2 0.0
6651+
dtype: float64
6652+
6653+
>>> s.safe_divide(other, zero_division='ignore')
6654+
0 0.5
6655+
1 inf
6656+
2 0.0
6657+
dtype: float64
6658+
6659+
>>> s.safe_divide(2)
6660+
0 0.5
6661+
1 1.0
6662+
2 0.0
6663+
dtype: float64
6664+
"""
6665+
import warnings
6666+
6667+
# Handle zero_division parameter
6668+
if zero_division not in ['warn', 'raise', 'ignore']:
6669+
raise ValueError("zero_division must be one of 'warn', 'raise', or 'ignore'")
6670+
6671+
# Perform the division with error handling
6672+
with np.errstate(divide='ignore', invalid='ignore'):
6673+
result = self._flex_method(
6674+
other, operator.truediv, level=level, fill_value=fill_value, axis=axis
6675+
)
6676+
6677+
# Handle division by zero cases
6678+
if zero_division == 'raise':
6679+
# Check for division by zero and raise if found
6680+
if isinstance(other, Series):
6681+
zero_mask = (other == 0) & (self != 0)
6682+
if zero_mask.any():
6683+
raise ZeroDivisionError("Division by zero encountered")
6684+
else:
6685+
# For scalar operations
6686+
if other == 0 and (self != 0).any():
6687+
raise ZeroDivisionError("Division by zero encountered")
6688+
6689+
elif zero_division == 'warn':
6690+
# Check for division by zero and warn if found
6691+
if isinstance(other, Series):
6692+
zero_mask = (other == 0) & (self != 0)
6693+
if zero_mask.any():
6694+
warnings.warn(
6695+
"Division by zero encountered. Results will contain inf values.",
6696+
RuntimeWarning,
6697+
stacklevel=2
6698+
)
6699+
else:
6700+
if other == 0 and (self != 0).any():
6701+
warnings.warn(
6702+
"Division by zero encountered. Results will contain inf values.",
6703+
RuntimeWarning,
6704+
stacklevel=2
6705+
)
6706+
6707+
# For 'ignore' case, we don't need to do anything special
6708+
# The result already contains inf values where appropriate
6709+
6710+
return result
6711+
66016712
@Appender(ops.make_flex_doc("divmod", "series"))
66026713
def divmod(self, other, level=None, fill_value=None, axis: Axis = 0) -> Series:
66036714
return self._flex_method(

pandas/tests/frame/test_arithmetic.py

Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2192,3 +2192,146 @@ def test_mixed_col_index_dtype(string_dtype_no_object):
21922192
expected.columns = expected.columns.astype(string_dtype_no_object)
21932193

21942194
tm.assert_frame_equal(result, expected)
2195+
2196+
2197+
class TestDataFrameSafeDivide:
2198+
"""Test cases for DataFrame.safe_divide method."""
2199+
2200+
def test_safe_divide_basic(self):
2201+
"""Test basic safe division functionality."""
2202+
df = DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]})
2203+
other = DataFrame({'A': [2, 1, 3], 'B': [2, 2, 2]})
2204+
2205+
result = df.safe_divide(other)
2206+
expected = DataFrame({'A': [0.5, 2.0, 1.0], 'B': [2.0, 2.5, 3.0]})
2207+
2208+
tm.assert_frame_equal(result, expected)
2209+
2210+
def test_safe_divide_with_zero_division_warn(self):
2211+
"""Test safe division with zero division warning."""
2212+
df = DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]})
2213+
other = DataFrame({'A': [2, 0, 3], 'B': [2, 2, 2]})
2214+
2215+
with pytest.warns(RuntimeWarning, match="Division by zero encountered"):
2216+
result = df.safe_divide(other)
2217+
2218+
expected = DataFrame({'A': [0.5, np.inf, 1.0], 'B': [2.0, 2.5, 3.0]})
2219+
tm.assert_frame_equal(result, expected)
2220+
2221+
def test_safe_divide_with_zero_division_raise(self):
2222+
"""Test safe division with zero division raising exception."""
2223+
df = DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]})
2224+
other = DataFrame({'A': [2, 0, 3], 'B': [2, 2, 2]})
2225+
2226+
with pytest.raises(ZeroDivisionError, match="Division by zero encountered"):
2227+
df.safe_divide(other, zero_division='raise')
2228+
2229+
def test_safe_divide_with_zero_division_ignore(self):
2230+
"""Test safe division with zero division ignored."""
2231+
df = DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]})
2232+
other = DataFrame({'A': [2, 0, 3], 'B': [2, 2, 2]})
2233+
2234+
result = df.safe_divide(other, zero_division='ignore')
2235+
expected = DataFrame({'A': [0.5, np.inf, 1.0], 'B': [2.0, 2.5, 3.0]})
2236+
tm.assert_frame_equal(result, expected)
2237+
2238+
def test_safe_divide_with_scalar(self):
2239+
"""Test safe division with scalar values."""
2240+
df = DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]})
2241+
2242+
result = df.safe_divide(2)
2243+
expected = DataFrame({'A': [0.5, 1.0, 1.5], 'B': [2.0, 2.5, 3.0]})
2244+
tm.assert_frame_equal(result, expected)
2245+
2246+
def test_safe_divide_with_scalar_zero(self):
2247+
"""Test safe division with scalar zero."""
2248+
df = DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]})
2249+
2250+
with pytest.warns(RuntimeWarning, match="Division by zero encountered"):
2251+
result = df.safe_divide(0)
2252+
2253+
expected = DataFrame({'A': [np.inf, np.inf, np.inf], 'B': [np.inf, np.inf, np.inf]})
2254+
tm.assert_frame_equal(result, expected)
2255+
2256+
def test_safe_divide_with_series(self):
2257+
"""Test safe division with Series."""
2258+
df = DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]})
2259+
other = Series([2, 1, 0], index=['A', 'B', 'A'])
2260+
2261+
with pytest.warns(RuntimeWarning, match="Division by zero encountered"):
2262+
result = df.safe_divide(other, axis=0)
2263+
2264+
# The result should have inf where division by zero occurred
2265+
assert np.isinf(result.loc[0, 'A']).all() or np.isinf(result.loc[2, 'A']).all()
2266+
2267+
def test_safe_divide_with_nan_values(self):
2268+
"""Test safe division with NaN values."""
2269+
df = DataFrame({'A': [1, np.nan, 3], 'B': [4, 5, 6]})
2270+
other = DataFrame({'A': [2, 2, 0], 'B': [2, 2, 2]})
2271+
2272+
with pytest.warns(RuntimeWarning, match="Division by zero encountered"):
2273+
result = df.safe_divide(other)
2274+
2275+
expected = DataFrame({'A': [0.5, np.nan, np.inf], 'B': [2.0, 2.5, 3.0]})
2276+
tm.assert_frame_equal(result, expected)
2277+
2278+
def test_safe_divide_invalid_zero_division(self):
2279+
"""Test safe division with invalid zero_division parameter."""
2280+
df = DataFrame({'A': [1, 2, 3]})
2281+
2282+
with pytest.raises(ValueError, match="zero_division must be one of"):
2283+
df.safe_divide(2, zero_division='invalid')
2284+
2285+
def test_safe_divide_preserves_index_and_columns(self):
2286+
"""Test that safe_divide preserves index and column names."""
2287+
df = DataFrame({'A': [1, 2], 'B': [3, 4]}, index=['x', 'y'])
2288+
other = DataFrame({'A': [2, 1], 'B': [2, 2]}, index=['x', 'y'])
2289+
2290+
result = df.safe_divide(other)
2291+
2292+
tm.assert_index_equal(result.index, df.index)
2293+
tm.assert_index_equal(result.columns, df.columns)
2294+
2295+
def test_safe_divide_with_fill_value(self):
2296+
"""Test safe division with fill_value parameter."""
2297+
df = DataFrame({'A': [1, np.nan, 3], 'B': [4, 5, 6]})
2298+
other = DataFrame({'A': [2, 2, 2], 'B': [2, 2, 2]})
2299+
2300+
result = df.safe_divide(other, fill_value=1)
2301+
expected = DataFrame({'A': [0.5, 0.5, 1.5], 'B': [2.0, 2.5, 3.0]})
2302+
tm.assert_frame_equal(result, expected)
2303+
2304+
def test_safe_divide_axis_parameter(self):
2305+
"""Test safe division with different axis parameter."""
2306+
df = DataFrame({'A': [1, 2], 'B': [3, 4]})
2307+
other = Series([2, 1], index=['A', 'B'])
2308+
2309+
result = df.safe_divide(other, axis=1)
2310+
expected = DataFrame({'A': [0.5, 1.0], 'B': [3.0, 4.0]})
2311+
tm.assert_frame_equal(result, expected)
2312+
2313+
def test_safe_divide_empty_dataframe(self):
2314+
"""Test safe division with empty DataFrame."""
2315+
df = DataFrame()
2316+
other = DataFrame()
2317+
2318+
result = df.safe_divide(other)
2319+
tm.assert_frame_equal(result, df)
2320+
2321+
def test_safe_divide_single_element(self):
2322+
"""Test safe division with single element DataFrame."""
2323+
df = DataFrame({'A': [1]})
2324+
other = DataFrame({'A': [2]})
2325+
2326+
result = df.safe_divide(other)
2327+
expected = DataFrame({'A': [0.5]})
2328+
tm.assert_frame_equal(result, expected)
2329+
2330+
def test_safe_divide_mixed_dtypes(self):
2331+
"""Test safe division with mixed data types."""
2332+
df = DataFrame({'A': [1, 2], 'B': [3.0, 4.0]})
2333+
other = DataFrame({'A': [2, 1], 'B': [2.0, 2.0]})
2334+
2335+
result = df.safe_divide(other)
2336+
expected = DataFrame({'A': [0.5, 2.0], 'B': [1.5, 2.0]})
2337+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)