From 7c3aa3b9c44a3643090fe39c6c39857b081c3eda Mon Sep 17 00:00:00 2001
From: Lzforevr <lz@2693611448@qq.com>
Date: Wed, 2 Oct 2024 21:53:48 +0800
Subject: [PATCH 1/3] Enhanced numeric.py to process hexadecimal,decimal,binary
 formats like 0x,0o,0b

---
 pandas/core/tools/numeric.py | 63 ++++++++++++++++++++++++++----------
 1 file changed, 46 insertions(+), 17 deletions(-)

diff --git a/pandas/core/tools/numeric.py b/pandas/core/tools/numeric.py
index 982851d0557c3..391bf28d62bbf 100644
--- a/pandas/core/tools/numeric.py
+++ b/pandas/core/tools/numeric.py
@@ -42,11 +42,23 @@
     )
 
 
+def parse_numeric(value):
+    if isinstance(value, str):
+        try:
+            return int(value, 0)  # Automatically detect radix
+        except ValueError:
+            try:
+                return float(value)
+            except ValueError:
+                return libmissing.NA
+    return value
+
+
 def to_numeric(
-    arg,
-    errors: DateTimeErrorChoices = "raise",
-    downcast: Literal["integer", "signed", "unsigned", "float"] | None = None,
-    dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default,
+        arg,
+        errors: DateTimeErrorChoices = "raise",
+        downcast: Literal["integer", "signed", "unsigned", "float"] | None = None,
+        dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default,
 ):
     """
     Convert argument to a numeric type.
@@ -214,25 +226,33 @@ def to_numeric(
         values = values.view(np.int64)
     else:
         values = ensure_object(values)
-        coerce_numeric = errors != "raise"
-        values, new_mask = lib.maybe_convert_numeric(  # type: ignore[call-overload]
-            values,
-            set(),
-            coerce_numeric=coerce_numeric,
-            convert_to_masked_nullable=dtype_backend is not lib.no_default
-            or isinstance(values_dtype, StringDtype)
-            and values_dtype.na_value is libmissing.NA,
-        )
+        parsed_values = []
+        new_mask = []
+        for idx, x in enumerate(values):
+            parsed_value = parse_numeric(x)
+            if libmissing.checknull(parsed_values):
+                if errors == 'raise':
+                    raise ValueError(f"Unable to parse string '{x}' at position{idx}")
+                elif errors == 'coerce':
+                    parsed_values.append(libmissing.NA)
+                    new_mask.append(True)
+                    continue
+            else:
+                parsed_values.append(parsed_value)
+                new_mask.append(False)
+
+        values = np.array(parsed_values, dtype=object)
+        new_mask = np.array(new_mask, dtype=bool)
 
     if new_mask is not None:
         # Remove unnecessary values, is expected later anyway and enables
         # downcasting
         values = values[~new_mask]
     elif (
-        dtype_backend is not lib.no_default
-        and new_mask is None
-        or isinstance(values_dtype, StringDtype)
-        and values_dtype.na_value is libmissing.NA
+            dtype_backend is not lib.no_default
+            and new_mask is None
+            or isinstance(values_dtype, StringDtype)
+            and values_dtype.na_value is libmissing.NA
     ):
         new_mask = np.zeros(values.shape, dtype=np.bool_)
 
@@ -309,3 +329,12 @@ def to_numeric(
         return values[0]
     else:
         return values
+
+
+if __name__ == "__main__":
+    import numpy as np
+
+    test_data = ['0x1A', '0b1010', '0o17', '25', '3.14', 'invalid']
+    result = to_numeric(test_data, errors='coerce')
+    print("Inputs:", test_data)
+    print("ParseResult:", result)

From ec07f931a1034826b4ab7f2f60e2c01d2b880609 Mon Sep 17 00:00:00 2001
From: Lzforevr <lz@2693611448@qq.com>
Date: Wed, 2 Oct 2024 23:23:07 +0800
Subject: [PATCH 2/3] Enhanced numeric.py to process hexadecimal,decimal,binary
 formats like 0x,0o,0b

---
 pandas/core/tools/numeric.py | 144 +++--------------------------------
 1 file changed, 10 insertions(+), 134 deletions(-)

diff --git a/pandas/core/tools/numeric.py b/pandas/core/tools/numeric.py
index 391bf28d62bbf..b744225a0e33a 100644
--- a/pandas/core/tools/numeric.py
+++ b/pandas/core/tools/numeric.py
@@ -62,116 +62,7 @@ def to_numeric(
 ):
     """
     Convert argument to a numeric type.
-
-    The default return dtype is `float64` or `int64`
-    depending on the data supplied. Use the `downcast` parameter
-    to obtain other dtypes.
-
-    Please note that precision loss may occur if really large numbers
-    are passed in. Due to the internal limitations of `ndarray`, if
-    numbers smaller than `-9223372036854775808` (np.iinfo(np.int64).min)
-    or larger than `18446744073709551615` (np.iinfo(np.uint64).max) are
-    passed in, it is very likely they will be converted to float so that
-    they can be stored in an `ndarray`. These warnings apply similarly to
-    `Series` since it internally leverages `ndarray`.
-
-    Parameters
-    ----------
-    arg : scalar, list, tuple, 1-d array, or Series
-        Argument to be converted.
-
-    errors : {'raise', 'coerce'}, default 'raise'
-        - If 'raise', then invalid parsing will raise an exception.
-        - If 'coerce', then invalid parsing will be set as NaN.
-
-    downcast : str, default None
-        Can be 'integer', 'signed', 'unsigned', or 'float'.
-        If not None, and if the data has been successfully cast to a
-        numerical dtype (or if the data was numeric to begin with),
-        downcast that resulting data to the smallest numerical dtype
-        possible according to the following rules:
-
-        - 'integer' or 'signed': smallest signed int dtype (min.: np.int8)
-        - 'unsigned': smallest unsigned int dtype (min.: np.uint8)
-        - 'float': smallest float dtype (min.: np.float32)
-
-        As this behaviour is separate from the core conversion to
-        numeric values, any errors raised during the downcasting
-        will be surfaced regardless of the value of the 'errors' input.
-
-        In addition, downcasting will only occur if the size
-        of the resulting data's dtype is strictly larger than
-        the dtype it is to be cast to, so if none of the dtypes
-        checked satisfy that specification, no downcasting will be
-        performed on the data.
-
-    dtype_backend : {'numpy_nullable', 'pyarrow'}
-        Back-end data type applied to the resultant :class:`DataFrame`
-        (still experimental). If not specified, the default behavior
-        is to not use nullable data types. If specified, the behavior
-        is as follows:
-
-        * ``"numpy_nullable"``: returns nullable-dtype-backed object
-        * ``"pyarrow"``: returns with pyarrow-backed nullable object
-
-        .. versionadded:: 2.0
-
-    Returns
-    -------
-    ret
-        Numeric if parsing succeeded.
-        Return type depends on input.  Series if Series, otherwise ndarray.
-
-    See Also
-    --------
-    DataFrame.astype : Cast argument to a specified dtype.
-    to_datetime : Convert argument to datetime.
-    to_timedelta : Convert argument to timedelta.
-    numpy.ndarray.astype : Cast a numpy array to a specified type.
-    DataFrame.convert_dtypes : Convert dtypes.
-
-    Examples
-    --------
-    Take separate series and convert to numeric, coercing when told to
-
-    >>> s = pd.Series(["1.0", "2", -3])
-    >>> pd.to_numeric(s)
-    0    1.0
-    1    2.0
-    2   -3.0
-    dtype: float64
-    >>> pd.to_numeric(s, downcast="float")
-    0    1.0
-    1    2.0
-    2   -3.0
-    dtype: float32
-    >>> pd.to_numeric(s, downcast="signed")
-    0    1
-    1    2
-    2   -3
-    dtype: int8
-    >>> s = pd.Series(["apple", "1.0", "2", -3])
-    >>> pd.to_numeric(s, errors="coerce")
-    0    NaN
-    1    1.0
-    2    2.0
-    3   -3.0
-    dtype: float64
-
-    Downcasting of nullable integer and floating dtypes is supported:
-
-    >>> s = pd.Series([1, 2, 3], dtype="Int64")
-    >>> pd.to_numeric(s, downcast="integer")
-    0    1
-    1    2
-    2    3
-    dtype: Int8
-    >>> s = pd.Series([1.0, 2.1, 3.0], dtype="Float64")
-    >>> pd.to_numeric(s, downcast="float")
-    0    1.0
-    1    2.1
-    2    3.0
-    dtype: Float32
+    ...
     """
     if downcast not in (None, "integer", "signed", "unsigned", "float"):
         raise ValueError("invalid downcasting method provided")
@@ -208,8 +99,6 @@ def to_numeric(
     else:
         values = arg
 
-    # GH33013: for IntegerArray & FloatingArray extract non-null values for casting
-    # save mask to reconstruct the full array after casting
     mask: npt.NDArray[np.bool_] | None = None
     if isinstance(values, BaseMaskedArray):
         mask = values._mask
@@ -220,6 +109,7 @@ def to_numeric(
         mask = values.isna()
         values = values.dropna().to_numpy()
     new_mask: np.ndarray | None = None
+
     if is_numeric_dtype(values_dtype):
         pass
     elif lib.is_np_dtype(values_dtype, "mM"):
@@ -231,9 +121,9 @@ def to_numeric(
         for idx, x in enumerate(values):
             parsed_value = parse_numeric(x)
             if libmissing.checknull(parsed_values):
-                if errors == 'raise':
-                    raise ValueError(f"Unable to parse string '{x}' at position{idx}")
-                elif errors == 'coerce':
+                if errors == "raise":
+                    raise ValueError(f"Unable to parse string '{x}' at position {idx}")
+                elif errors == "coerce":
                     parsed_values.append(libmissing.NA)
                     new_mask.append(True)
                     continue
@@ -245,8 +135,6 @@ def to_numeric(
         new_mask = np.array(new_mask, dtype=bool)
 
     if new_mask is not None:
-        # Remove unnecessary values, is expected later anyway and enables
-        # downcasting
         values = values[~new_mask]
     elif (
             dtype_backend is not lib.no_default
@@ -256,8 +144,6 @@ def to_numeric(
     ):
         new_mask = np.zeros(values.shape, dtype=np.bool_)
 
-    # attempt downcast only if the data has been successfully converted
-    # to a numerical dtype and if a downcast method has been specified
     if downcast is not None and is_numeric_dtype(values.dtype):
         typecodes: str | None = None
 
@@ -267,30 +153,23 @@ def to_numeric(
             typecodes = np.typecodes["UnsignedInteger"]
         elif downcast == "float":
             typecodes = np.typecodes["Float"]
-
-            # pandas support goes only to np.float32,
-            # as float dtypes smaller than that are
-            # extremely rare and not well supported
             float_32_char = np.dtype(np.float32).char
             float_32_ind = typecodes.index(float_32_char)
             typecodes = typecodes[float_32_ind:]
 
         if typecodes is not None:
-            # from smallest to largest
             for typecode in typecodes:
                 dtype = np.dtype(typecode)
                 if dtype.itemsize <= values.dtype.itemsize:
+                    # Only downcast if values are all integers
+                    if downcast in ("integer", "signed", "unsigned") and not np.isin(np.mod(values, 1), 0).all():
+                        continue  # Skip downcasting if there are any float values
                     values = maybe_downcast_numeric(values, dtype)
-
-                    # successful conversion
                     if values.dtype == dtype:
                         break
 
-    # GH33013: for IntegerArray, BooleanArray & FloatingArray need to reconstruct
-    # masked array
     if (mask is not None or new_mask is not None) and not is_string_dtype(values.dtype):
         if mask is None or (new_mask is not None and new_mask.shape == mask.shape):
-            # GH 52588
             mask = new_mask
         else:
             mask = mask.copy()
@@ -320,10 +199,7 @@ def to_numeric(
     if is_series:
         return arg._constructor(values, index=arg.index, name=arg.name)
     elif is_index:
-        # because we want to coerce to numeric if possible,
-        # do not use _shallow_copy
         from pandas import Index
-
         return Index(values, name=arg.name)
     elif is_scalars:
         return values[0]
@@ -334,7 +210,7 @@ def to_numeric(
 if __name__ == "__main__":
     import numpy as np
 
-    test_data = ['0x1A', '0b1010', '0o17', '25', '3.14', 'invalid']
-    result = to_numeric(test_data, errors='coerce')
+    test_data = ["0x1A", "0b1010", "0o17", "25", "3.14", "invalid"]
+    result = to_numeric(test_data, errors="coerce")
     print("Inputs:", test_data)
     print("ParseResult:", result)

From e618573c9891965f1f4b1633050e71fce61c4a67 Mon Sep 17 00:00:00 2001
From: Lzforevr <lz@2693611448@qq.com>
Date: Wed, 2 Oct 2024 23:28:40 +0800
Subject: [PATCH 3/3] Enhanced numeric.py to process hexadecimal,decimal,binary
 formats like 0x,0o,0b

---
 pandas/core/tools/numeric.py | 145 +++++++++++++++++++++++++++++++----
 1 file changed, 130 insertions(+), 15 deletions(-)

diff --git a/pandas/core/tools/numeric.py b/pandas/core/tools/numeric.py
index b744225a0e33a..b8fa0c6381638 100644
--- a/pandas/core/tools/numeric.py
+++ b/pandas/core/tools/numeric.py
@@ -62,7 +62,116 @@ def to_numeric(
 ):
     """
     Convert argument to a numeric type.
-    ...
+
+    The default return dtype is `float64` or `int64`
+    depending on the data supplied. Use the `downcast` parameter
+    to obtain other dtypes.
+
+    Please note that precision loss may occur if really large numbers
+    are passed in. Due to the internal limitations of `ndarray`, if
+    numbers smaller than `-9223372036854775808` (np.iinfo(np.int64).min)
+    or larger than `18446744073709551615` (np.iinfo(np.uint64).max) are
+    passed in, it is very likely they will be converted to float so that
+    they can be stored in an `ndarray`. These warnings apply similarly to
+    `Series` since it internally leverages `ndarray`.
+
+    Parameters
+    ----------
+    arg : scalar, list, tuple, 1-d array, or Series
+        Argument to be converted.
+
+    errors : {'raise', 'coerce'}, default 'raise'
+        - If 'raise', then invalid parsing will raise an exception.
+        - If 'coerce', then invalid parsing will be set as NaN.
+
+    downcast : str, default None
+        Can be 'integer', 'signed', 'unsigned', or 'float'.
+        If not None, and if the data has been successfully cast to a
+        numerical dtype (or if the data was numeric to begin with),
+        downcast that resulting data to the smallest numerical dtype
+        possible according to the following rules:
+
+        - 'integer' or 'signed': smallest signed int dtype (min.: np.int8)
+        - 'unsigned': smallest unsigned int dtype (min.: np.uint8)
+        - 'float': smallest float dtype (min.: np.float32)
+
+        As this behaviour is separate from the core conversion to
+        numeric values, any errors raised during the downcasting
+        will be surfaced regardless of the value of the 'errors' input.
+
+        In addition, downcasting will only occur if the size
+        of the resulting data's dtype is strictly larger than
+        the dtype it is to be cast to, so if none of the dtypes
+        checked satisfy that specification, no downcasting will be
+        performed on the data.
+
+    dtype_backend : {'numpy_nullable', 'pyarrow'}
+        Back-end data type applied to the resultant :class:`DataFrame`
+        (still experimental). If not specified, the default behavior
+        is to not use nullable data types. If specified, the behavior
+        is as follows:
+
+        * ``"numpy_nullable"``: returns nullable-dtype-backed object
+        * ``"pyarrow"``: returns with pyarrow-backed nullable object
+
+        .. versionadded:: 2.0
+
+    Returns
+    -------
+    ret
+        Numeric if parsing succeeded.
+        Return type depends on input.  Series if Series, otherwise ndarray.
+
+    See Also
+    --------
+    DataFrame.astype : Cast argument to a specified dtype.
+    to_datetime : Convert argument to datetime.
+    to_timedelta : Convert argument to timedelta.
+    numpy.ndarray.astype : Cast a numpy array to a specified type.
+    DataFrame.convert_dtypes : Convert dtypes.
+
+    Examples
+    --------
+    Take separate series and convert to numeric, coercing when told to
+
+    >>> s = pd.Series(["1.0", "2", -3])
+    >>> pd.to_numeric(s)
+    0    1.0
+    1    2.0
+    2   -3.0
+    dtype: float64
+    >>> pd.to_numeric(s, downcast="float")
+    0    1.0
+    1    2.0
+    2   -3.0
+    dtype: float32
+    >>> pd.to_numeric(s, downcast="signed")
+    0    1
+    1    2
+    2   -3
+    dtype: int8
+    >>> s = pd.Series(["apple", "1.0", "2", -3])
+    >>> pd.to_numeric(s, errors="coerce")
+    0    NaN
+    1    1.0
+    2    2.0
+    3   -3.0
+    dtype: float64
+
+    Downcasting of nullable integer and floating dtypes is supported:
+
+    >>> s = pd.Series([1, 2, 3], dtype="Int64")
+    >>> pd.to_numeric(s, downcast="integer")
+    0    1
+    1    2
+    2    3
+    dtype: Int8
+    >>> s = pd.Series([1.0, 2.1, 3.0], dtype="Float64")
+    >>> pd.to_numeric(s, downcast="float")
+    0    1.0
+    1    2.1
+    2    3.0
+    dtype: Float32
     """
     if downcast not in (None, "integer", "signed", "unsigned", "float"):
         raise ValueError("invalid downcasting method provided")
@@ -99,6 +208,8 @@ def to_numeric(
     else:
         values = arg
 
+    # GH33013: for IntegerArray & FloatingArray extract non-null values for casting
+    # save mask to reconstruct the full array after casting
     mask: npt.NDArray[np.bool_] | None = None
     if isinstance(values, BaseMaskedArray):
         mask = values._mask
@@ -109,7 +220,6 @@ def to_numeric(
         mask = values.isna()
         values = values.dropna().to_numpy()
     new_mask: np.ndarray | None = None
-
     if is_numeric_dtype(values_dtype):
         pass
     elif lib.is_np_dtype(values_dtype, "mM"):
@@ -122,7 +232,7 @@ def to_numeric(
             parsed_value = parse_numeric(x)
             if libmissing.checknull(parsed_values):
                 if errors == "raise":
-                    raise ValueError(f"Unable to parse string '{x}' at position {idx}")
+                    raise ValueError(f"Unable to parse string '{x}' at position{idx}")
                 elif errors == "coerce":
                     parsed_values.append(libmissing.NA)
                     new_mask.append(True)
@@ -135,6 +245,8 @@ def to_numeric(
         new_mask = np.array(new_mask, dtype=bool)
 
     if new_mask is not None:
+        # Remove unnecessary values, is expected later anyway and enables
+        # downcasting
         values = values[~new_mask]
     elif (
             dtype_backend is not lib.no_default
@@ -144,6 +256,8 @@ def to_numeric(
     ):
         new_mask = np.zeros(values.shape, dtype=np.bool_)
 
+    # attempt downcast only if the data has been successfully converted
+    # to a numerical dtype and if a downcast method has been specified
     if downcast is not None and is_numeric_dtype(values.dtype):
         typecodes: str | None = None
 
@@ -153,23 +267,30 @@ def to_numeric(
             typecodes = np.typecodes["UnsignedInteger"]
         elif downcast == "float":
             typecodes = np.typecodes["Float"]
+
+            # pandas support goes only to np.float32,
+            # as float dtypes smaller than that are
+            # extremely rare and not well supported
             float_32_char = np.dtype(np.float32).char
             float_32_ind = typecodes.index(float_32_char)
             typecodes = typecodes[float_32_ind:]
 
         if typecodes is not None:
+            # from smallest to largest
             for typecode in typecodes:
                 dtype = np.dtype(typecode)
                 if dtype.itemsize <= values.dtype.itemsize:
-                    # Only downcast if values are all integers
-                    if downcast in ("integer", "signed", "unsigned") and not np.isin(np.mod(values, 1), 0).all():
-                        continue  # Skip downcasting if there are any float values
                     values = maybe_downcast_numeric(values, dtype)
+
+                    # successful conversion
                     if values.dtype == dtype:
                         break
 
+    # GH33013: for IntegerArray, BooleanArray & FloatingArray need to reconstruct
+    # masked array
     if (mask is not None or new_mask is not None) and not is_string_dtype(values.dtype):
         if mask is None or (new_mask is not None and new_mask.shape == mask.shape):
+            # GH 52588
             mask = new_mask
         else:
             mask = mask.copy()
@@ -199,18 +320,12 @@ def to_numeric(
     if is_series:
         return arg._constructor(values, index=arg.index, name=arg.name)
     elif is_index:
+        # because we want to coerce to numeric if possible,
+        # do not use _shallow_copy
         from pandas import Index
+
         return Index(values, name=arg.name)
     elif is_scalars:
         return values[0]
     else:
         return values
-
-
-if __name__ == "__main__":
-    import numpy as np
-
-    test_data = ["0x1A", "0b1010", "0o17", "25", "3.14", "invalid"]
-    result = to_numeric(test_data, errors="coerce")
-    print("Inputs:", test_data)
-    print("ParseResult:", result)