@@ -62,116 +62,7 @@ def to_numeric(
6262):
6363 """
6464 Convert argument to a numeric type.
65-
66- The default return dtype is `float64` or `int64`
67- depending on the data supplied. Use the `downcast` parameter
68- to obtain other dtypes.
69-
70- Please note that precision loss may occur if really large numbers
71- are passed in. Due to the internal limitations of `ndarray`, if
72- numbers smaller than `-9223372036854775808` (np.iinfo(np.int64).min)
73- or larger than `18446744073709551615` (np.iinfo(np.uint64).max) are
74- passed in, it is very likely they will be converted to float so that
75- they can be stored in an `ndarray`. These warnings apply similarly to
76- `Series` since it internally leverages `ndarray`.
77-
78- Parameters
79- ----------
80- arg : scalar, list, tuple, 1-d array, or Series
81- Argument to be converted.
82-
83- errors : {'raise', 'coerce'}, default 'raise'
84- - If 'raise', then invalid parsing will raise an exception.
85- - If 'coerce', then invalid parsing will be set as NaN.
86-
87- downcast : str, default None
88- Can be 'integer', 'signed', 'unsigned', or 'float'.
89- If not None, and if the data has been successfully cast to a
90- numerical dtype (or if the data was numeric to begin with),
91- downcast that resulting data to the smallest numerical dtype
92- possible according to the following rules:
93-
94- - 'integer' or 'signed': smallest signed int dtype (min.: np.int8)
95- - 'unsigned': smallest unsigned int dtype (min.: np.uint8)
96- - 'float': smallest float dtype (min.: np.float32)
97-
98- As this behaviour is separate from the core conversion to
99- numeric values, any errors raised during the downcasting
100- will be surfaced regardless of the value of the 'errors' input.
101-
102- In addition, downcasting will only occur if the size
103- of the resulting data's dtype is strictly larger than
104- the dtype it is to be cast to, so if none of the dtypes
105- checked satisfy that specification, no downcasting will be
106- performed on the data.
107-
108- dtype_backend : {'numpy_nullable', 'pyarrow'}
109- Back-end data type applied to the resultant :class:`DataFrame`
110- (still experimental). If not specified, the default behavior
111- is to not use nullable data types. If specified, the behavior
112- is as follows:
113-
114- * ``"numpy_nullable"``: returns nullable-dtype-backed object
115- * ``"pyarrow"``: returns with pyarrow-backed nullable object
116-
117- .. versionadded:: 2.0
118-
119- Returns
120- -------
121- ret
122- Numeric if parsing succeeded.
123- Return type depends on input. Series if Series, otherwise ndarray.
124-
125- See Also
126- --------
127- DataFrame.astype : Cast argument to a specified dtype.
128- to_datetime : Convert argument to datetime.
129- to_timedelta : Convert argument to timedelta.
130- numpy.ndarray.astype : Cast a numpy array to a specified type.
131- DataFrame.convert_dtypes : Convert dtypes.
132-
133- Examples
134- --------
135- Take separate series and convert to numeric, coercing when told to
136-
137- >>> s = pd.Series(["1.0", "2", -3])
138- >>> pd.to_numeric(s)
139- 0 1.0
140- 1 2.0
141- 2 -3.0
142- dtype: float64
143- >>> pd.to_numeric(s, downcast="float")
144- 0 1.0
145- 1 2.0
146- 2 -3.0
147- dtype: float32
148- >>> pd.to_numeric(s, downcast="signed")
149- 0 1
150- 1 2
151- 2 -3
152- dtype: int8
153- >>> s = pd.Series(["apple", "1.0", "2", -3])
154- >>> pd.to_numeric(s, errors="coerce")
155- 0 NaN
156- 1 1.0
157- 2 2.0
158- 3 -3.0
159- dtype: float64
160-
161- Downcasting of nullable integer and floating dtypes is supported:
162-
163- >>> s = pd.Series([1, 2, 3], dtype="Int64")
164- >>> pd.to_numeric(s, downcast="integer")
165- 0 1
166- 1 2
167- 2 3
168- dtype: Int8
169- >>> s = pd.Series([1.0, 2.1, 3.0], dtype="Float64")
170- >>> pd.to_numeric(s, downcast="float")
171- 0 1.0
172- 1 2.1
173- 2 3.0
174- dtype: Float32
65+ ...
17566 """
17667 if downcast not in (None , "integer" , "signed" , "unsigned" , "float" ):
17768 raise ValueError ("invalid downcasting method provided" )
@@ -208,8 +99,6 @@ def to_numeric(
20899 else :
209100 values = arg
210101
211- # GH33013: for IntegerArray & FloatingArray extract non-null values for casting
212- # save mask to reconstruct the full array after casting
213102 mask : npt .NDArray [np .bool_ ] | None = None
214103 if isinstance (values , BaseMaskedArray ):
215104 mask = values ._mask
@@ -220,6 +109,7 @@ def to_numeric(
220109 mask = values .isna ()
221110 values = values .dropna ().to_numpy ()
222111 new_mask : np .ndarray | None = None
112+
223113 if is_numeric_dtype (values_dtype ):
224114 pass
225115 elif lib .is_np_dtype (values_dtype , "mM" ):
@@ -231,9 +121,9 @@ def to_numeric(
231121 for idx , x in enumerate (values ):
232122 parsed_value = parse_numeric (x )
233123 if libmissing .checknull (parsed_values ):
234- if errors == ' raise' :
235- raise ValueError (f"Unable to parse string '{ x } ' at position{ idx } " )
236- elif errors == ' coerce' :
124+ if errors == " raise" :
125+ raise ValueError (f"Unable to parse string '{ x } ' at position { idx } " )
126+ elif errors == " coerce" :
237127 parsed_values .append (libmissing .NA )
238128 new_mask .append (True )
239129 continue
@@ -245,8 +135,6 @@ def to_numeric(
245135 new_mask = np .array (new_mask , dtype = bool )
246136
247137 if new_mask is not None :
248- # Remove unnecessary values, is expected later anyway and enables
249- # downcasting
250138 values = values [~ new_mask ]
251139 elif (
252140 dtype_backend is not lib .no_default
@@ -256,8 +144,6 @@ def to_numeric(
256144 ):
257145 new_mask = np .zeros (values .shape , dtype = np .bool_ )
258146
259- # attempt downcast only if the data has been successfully converted
260- # to a numerical dtype and if a downcast method has been specified
261147 if downcast is not None and is_numeric_dtype (values .dtype ):
262148 typecodes : str | None = None
263149
@@ -267,30 +153,23 @@ def to_numeric(
267153 typecodes = np .typecodes ["UnsignedInteger" ]
268154 elif downcast == "float" :
269155 typecodes = np .typecodes ["Float" ]
270-
271- # pandas support goes only to np.float32,
272- # as float dtypes smaller than that are
273- # extremely rare and not well supported
274156 float_32_char = np .dtype (np .float32 ).char
275157 float_32_ind = typecodes .index (float_32_char )
276158 typecodes = typecodes [float_32_ind :]
277159
278160 if typecodes is not None :
279- # from smallest to largest
280161 for typecode in typecodes :
281162 dtype = np .dtype (typecode )
282163 if dtype .itemsize <= values .dtype .itemsize :
164+ # Only downcast if values are all integers
165+ if downcast in ("integer" , "signed" , "unsigned" ) and not np .isin (np .mod (values , 1 ), 0 ).all ():
166+ continue # Skip downcasting if there are any float values
283167 values = maybe_downcast_numeric (values , dtype )
284-
285- # successful conversion
286168 if values .dtype == dtype :
287169 break
288170
289- # GH33013: for IntegerArray, BooleanArray & FloatingArray need to reconstruct
290- # masked array
291171 if (mask is not None or new_mask is not None ) and not is_string_dtype (values .dtype ):
292172 if mask is None or (new_mask is not None and new_mask .shape == mask .shape ):
293- # GH 52588
294173 mask = new_mask
295174 else :
296175 mask = mask .copy ()
@@ -320,10 +199,7 @@ def to_numeric(
320199 if is_series :
321200 return arg ._constructor (values , index = arg .index , name = arg .name )
322201 elif is_index :
323- # because we want to coerce to numeric if possible,
324- # do not use _shallow_copy
325202 from pandas import Index
326-
327203 return Index (values , name = arg .name )
328204 elif is_scalars :
329205 return values [0 ]
@@ -334,7 +210,7 @@ def to_numeric(
334210if __name__ == "__main__" :
335211 import numpy as np
336212
337- test_data = [' 0x1A' , ' 0b1010' , ' 0o17' , '25' , ' 3.14' , ' invalid' ]
338- result = to_numeric (test_data , errors = ' coerce' )
213+ test_data = [" 0x1A" , " 0b1010" , " 0o17" , "25" , " 3.14" , " invalid" ]
214+ result = to_numeric (test_data , errors = " coerce" )
339215 print ("Inputs:" , test_data )
340216 print ("ParseResult:" , result )
0 commit comments