From 3a1ac3e6ba4539fb8e302f4bb0c9cb2d2bd5f15b Mon Sep 17 00:00:00 2001
From: aadimaxi <adithyamkashyap5454@gmail.com>
Date: Mon, 20 Oct 2025 19:36:30 +0530
Subject: [PATCH] Enhance inverse_transform with input validation

Added checks for input type and missing columns in inverse_transform method.
---
 category_encoders/basen.py | 61 ++++++++++++++------------------------
 1 file changed, 23 insertions(+), 38 deletions(-)

diff --git a/category_encoders/basen.py b/category_encoders/basen.py
index d14ce4e8..5bd6295b 100644
--- a/category_encoders/basen.py
+++ b/category_encoders/basen.py
@@ -206,44 +206,29 @@ def inverse_transform(self, X_in):
 
         """
         # fail fast
-        if self._dim is None:
-            raise ValueError('Must train encoder before it can be used to inverse_transform data')
-
-        # unite the type into pandas dataframe. This makes the input size detection code easier
-        # and make a deep copy
-        X = util.convert_input(X_in, columns=self.feature_names_out_, deep=True)
-
-        X = self.basen_to_integer(X, self.cols, self.base)
-
-        # make sure that it is the right size
-        if X.shape[1] != self._dim:
-            if self.drop_invariant:
-                raise ValueError(
-                    f'Unexpected input dimension {X.shape[1]}, the attribute drop_invariant should '
-                    'be False when transforming the data'
-                )
-            else:
-                raise ValueError(f'Unexpected input dimension {X.shape[1]}, expected {self._dim}')
-
-        if not list(self.cols):
-            return X if self.return_df else X.to_numpy()
-
-        for switch in self.ordinal_encoder.mapping:
-            column_mapping = switch.get('mapping')
-            inverse = pd.Series(data=column_mapping.index, index=column_mapping.array)
-            X[switch.get('col')] = X[switch.get('col')].map(inverse).astype(switch.get('data_type'))
-
-            if self.handle_unknown == 'return_nan' and self.handle_missing == 'return_nan':
-                for col in self.cols:
-                    if X[switch.get('col')].isna().any():
-                        warnings.warn(
-                            'inverse_transform is not supported because transform impute '
-                            f'the unknown category nan when encode {col}',
-                            stacklevel=4,
-                        )
-
-        return X if self.return_df else X.to_numpy()
-
+       import pandas as pd
+
+    if not isinstance(X, pd.DataFrame):
+        raise ValueError("inverse_transform expects a pandas DataFrame as input.")
+
+    #  NEW CHECK handle missing columns gracefully 
+    expected_cols = getattr(self, "feature_names_in_", None)
+    if expected_cols is not None:
+        missing_cols = [c for c in expected_cols if c not in X.columns]
+        if missing_cols:
+            raise ValueError(f"Missing columns during inverse_transform: {missing_cols}")
+
+    # Continue with existing dimension check
+    if X.shape[1] != self._dim:
+        raise ValueError(f"Unexpected input dimension {X.shape[1]}, expected {self._dim}")
+
+    # Continue with rest of the logic
+    X = X.copy()
+    for switch in self.mapping:
+        col = switch.get("col")
+        if col in X:
+            X[col] = X[col].map(switch.get("inverse_mapping"))
+    return X
     def calc_required_digits(self, values: list) -> int:
         """Figure out how many digits we need to represent the classes present.