automl
diff --git a/‎autoPyTorch/api/base_task.py‎
Lines changed: 3 additions & 2 deletions b/‎autoPyTorch/api/base_task.py‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎autoPyTorch/data/tabular_feature_validator.py‎
Lines changed: 36 additions & 14 deletions b/‎autoPyTorch/data/tabular_feature_validator.py‎
Lines changed: 36 additions & 14 deletions
diff --git a/‎autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/encoding/NoEncoder.py‎
Lines changed: 1 addition & 1 deletion b/‎autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/encoding/NoEncoder.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/scaling/NoScaler.py‎
Lines changed: 1 addition & 1 deletion b/‎autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/scaling/NoScaler.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎autoPyTorch/pipeline/components/setup/network_embedding/base_network_embedding.py‎
Lines changed: 2 additions & 5 deletions b/‎autoPyTorch/pipeline/components/setup/network_embedding/base_network_embedding.py‎
Lines changed: 2 additions & 5 deletions
diff --git a/‎autoPyTorch/pipeline/components/training/trainer/AdversarialTrainer.py‎
Lines changed: 1 addition & 5 deletions b/‎autoPyTorch/pipeline/components/training/trainer/AdversarialTrainer.py‎
Lines changed: 1 addition & 5 deletions
diff --git a/‎autoPyTorch/pipeline/components/training/trainer/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎autoPyTorch/pipeline/components/training/trainer/__init__.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎examples/40_advanced/40_advanced/example_custom_configuration_space.py‎
Lines changed: 1 addition & 1 deletion b/‎examples/40_advanced/40_advanced/example_custom_configuration_space.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎test/test_data/test_feature_validator.py‎
Lines changed: 21 additions & 129 deletions b/‎test/test_data/test_feature_validator.py‎
Lines changed: 21 additions & 129 deletions
@@ -1773,7 +1773,7 @@ def fit_ensemble(
         Args:
             optimize_metric (str): name of the metric that is used to
                 evaluate a pipeline. if not specified, value passed to search will be used
-            precision (int), (default=32): Numeric precision used when loading
+            precision (Optional[int]): Numeric precision used when loading
                 ensemble data. Can be either 16, 32 or 64.
             ensemble_nbest (Optional[int]):
                 only consider the ensemble_nbest models to build the ensemble.
@@ -1816,6 +1816,7 @@ def fit_ensemble(
                              "Please call the `search()` method of {} prior to "
                              "fit_ensemble().".format(self.__class__.__name__))
 
+        precision = precision if precision is not None else self.precision
         if precision not in [16, 32, 64]:
             raise ValueError("precision must be one of 16, 32, 64 but got {}".format(precision))
 
@@ -1866,7 +1867,7 @@ def fit_ensemble(
         manager = self._init_ensemble_builder(
             time_left_for_ensembles=time_left_for_ensemble,
             optimize_metric=self.opt_metric if optimize_metric is None else optimize_metric,
-            precision=self.precision if precision is None else precision,
+            precision=precision,
             ensemble_size=ensemble_size,
             ensemble_nbest=ensemble_nbest,
         )
 
@@ -139,6 +139,7 @@ def _comparator(cmp1: str, cmp2: str) -> int:
         if cmp1 not in choices or cmp2 not in choices:
             raise ValueError('The comparator for the column order only accepts {}, '
                              'but got {} and {}'.format(choices, cmp1, cmp2))
+
         idx1, idx2 = choices.index(cmp1), choices.index(cmp2)
         return idx1 - idx2
 
@@ -284,13 +285,12 @@ def transform(
         # having a value for a categorical column.
         # We need to convert the column in test data to
         # object otherwise the test column is interpreted as float
-        if len(self.categorical_columns) > 0:
-            categorical_columns = self.column_transformer.transformers_[0][-1]
-            for column in categorical_columns:
-                if X[column].isna().all():
-                    X[column] = X[column].astype('object')
-
         if self.column_transformer is not None:
+            if len(self.categorical_columns) > 0:
+                categorical_columns = self.column_transformer.transformers_[0][-1]
+                for column in categorical_columns:
+                    if X[column].isna().all():
+                        X[column] = X[column].astype('object')
             X = self.column_transformer.transform(X)
 
         # Sparse related transformations
@@ -379,16 +379,11 @@ def _check_data(
                 self.column_order = column_order
 
             dtypes = [dtype.name for dtype in X.dtypes]
-            dtypes_diff = [s_dtype != dtype for s_dtype, dtype in zip(self.dtypes, dtypes)]
+
+            diff_cols = X.columns[[s_dtype != dtype for s_dtype, dtype in zip(self.dtypes, dtypes)]]
             if len(self.dtypes) == 0:
                 self.dtypes = dtypes
-            elif (
-                any(dtypes_diff)  # the dtypes of some columns are different in train and test dataset
-                and self.all_nan_columns is not None  # Ignore all_nan_columns is None
-                and len(set(X.columns[dtypes_diff]).difference(self.all_nan_columns)) != 0
-            ):
-                # The dtypes can be different if and only if the column belongs
-                # to all_nan_columns as these columns would be imputed.
+            elif not self._is_datasets_consistent(diff_cols, X):
                 raise ValueError("The dtype of the features must not be changed after fit(), but"
                                  " the dtypes of some columns are different between training ({}) and"
                                  " test ({}) datasets.".format(self.dtypes, dtypes))
@@ -619,6 +614,33 @@ def infer_objects(self, X: pd.DataFrame) -> pd.DataFrame:
 
         return X
 
+    def _is_datasets_consistent(self, diff_cols: List[Union[int, str]], X: pd.DataFrame) -> bool:
+        """
+        Check the consistency of dtypes between training and test datasets.
+        The dtypes can be different if the column belongs to `self.all_nan_columns`
+        (list of column names with all nans in training data) or if the column is
+        all nan as these columns would be imputed.
+
+        Args:
+            diff_cols (List[bool]):
+                The column labels that have different dtypes.
+            X (pd.DataFrame):
+                A validation or test dataset to be compared with the training dataset
+        Returns:
+            _ (bool): Whether the training and test datasets are consistent.
+        """
+        if self.all_nan_columns is None:
+            if len(diff_cols) == 0:
+                return True
+            else:
+                return all(X[diff_cols].isna().all())
+
+        # dtype is different ==> the column in at least either of train or test datasets must be all NaN
+        # inconsistent <==> dtype is different and the col in both train and test is not all NaN
+        inconsistent_cols = list(set(diff_cols) - self.all_nan_columns)
+
+        return len(inconsistent_cols) == 0 or all(X[inconsistent_cols].isna().all())
+
 
 def has_object_columns(
     feature_types: pd.Series,
 
@@ -40,7 +40,7 @@ def transform(self, X: Dict[str, Any]) -> Dict[str, Any]:
         Returns:
             (Dict[str, Any]): the updated 'X' dictionary
         """
-        X.update({'encoder': self.preprocessor})
+        # X.update({'encoder': self.preprocessor})
         return X
 
     @staticmethod
 
@@ -43,7 +43,7 @@ def transform(self, X: Dict[str, Any]) -> Dict[str, Any]:
         Returns:
             np.ndarray: Transformed features
         """
-        X.update({'scaler': self.preprocessor})
+        # X.update({'scaler': self.preprocessor})
         return X
 
     @staticmethod
 
@@ -1,10 +1,6 @@
-<<<<<<< HEAD
 import copy
 from typing import Any, Dict, List, Optional, Tuple, Union
-=======
-# import copy
-from typing import Any, Dict, Optional, Tuple
->>>>>>> Bug fixes (#249)
+
 
 import numpy as np
 
@@ -40,6 +36,7 @@ def fit(self, X: Dict[str, Any], y: Any = None) -> BaseEstimator:
                 self.feature_shapes = feature_shapes
             else:
                 self.feature_shapes = X['dataset_properties']['feature_shapes']
+
         return self
 
     def transform(self, X: Dict[str, Any]) -> Dict[str, Any]:
 
@@ -109,11 +109,7 @@ def train_step(self, data: np.ndarray, targets: np.ndarray) -> Tuple[float, torc
         loss = loss_func(self.criterion, original_outputs, adversarial_outputs)
         loss.backward()
         self.optimizer.step()
-        if self.scheduler:
-            if 'ReduceLROnPlateau' in self.scheduler.__class__.__name__:
-                self.scheduler.step(loss)
-            else:
-                self.scheduler.step()
+
         # only passing the original outputs since we do not care about
         # the adversarial performance.
         return loss.item(), original_outputs
 
@@ -282,6 +282,7 @@ def fit(self, X: Dict[str, Any], y: Any = None, **kwargs: Any) -> autoPyTorchCom
             y=y,
             **kwargs
         )
+
         # Add snapshots to base network to enable
         # predicting with snapshot ensemble
         self.choice: autoPyTorchComponent = cast(autoPyTorchComponent, self.choice)
 
@@ -59,7 +59,7 @@ def get_search_space_updates():
                    value_range=['shake-shake'],
                    default_value='shake-shake')
     updates.append(node_name='network_backbone',
-                   hyperparameter='ResNetBackbone:shake_shake_method',
+                   hyperparameter='ResNetBackbone:shake_shake_update_func',
                    value_range=['M3'],
                    default_value='M3'
                    )
 
@@ -1,4 +1,4 @@
- import copy
+import copy
 import functools
 
 import numpy as np
@@ -139,9 +139,9 @@ def test_featurevalidator_fitontypeA_transformtypeB(input_data_featuretest):
     if isinstance(input_data_featuretest, pd.DataFrame):
         pytest.skip("Column order change in pandas is not supported")
     elif isinstance(input_data_featuretest, np.ndarray):
-        complementary_type = validator.numpy_to_pandas(input_data_featuretest)
+        complementary_type = validator.numpy_array_to_pandas(input_data_featuretest)
     elif isinstance(input_data_featuretest, list):
-        complementary_type, _ = validator.list_to_pandas(input_data_featuretest)
+        complementary_type, _ = validator.list_to_dataframe(input_data_featuretest)
     elif sparse.issparse(input_data_featuretest):
         complementary_type = sparse.csr_matrix(input_data_featuretest.todense())
     else:
@@ -167,128 +167,10 @@ def test_featurevalidator_get_columns_to_encode():
     for col in df.columns:
         df[col] = df[col].astype(col)
 
-<<<<<<< HEAD
     transformed_columns, feature_types = validator._get_columns_to_encode(df)
 
     assert transformed_columns == ['category', 'bool']
     assert feature_types == ['numerical', 'numerical', 'categorical', 'categorical']
-=======
-    validator.fit(df)
-
-    categorical_columns, numerical_columns, feat_type = validator._get_columns_info(df)
-
-    assert numerical_columns == ['int', 'float']
-    assert categorical_columns == ['category', 'bool']
-    assert feat_type == ['numerical', 'numerical', 'categorical', 'categorical']
-
-
-def feature_validator_remove_nan_catcolumns(df_train: pd.DataFrame, df_test: pd.DataFrame,
-                                            ans_train: np.ndarray, ans_test: np.ndarray) -> None:
-    validator = TabularFeatureValidator()
-    validator.fit(df_train)
-    transformed_df_train = validator.transform(df_train)
-    transformed_df_test = validator.transform(df_test)
-
-    assert np.array_equal(transformed_df_train, ans_train)
-    assert np.array_equal(transformed_df_test, ans_test)
-
-
-def test_feature_validator_remove_nan_catcolumns():
-    """
-    Make sure categorical columns that have only nan values are removed.
-    Transform performs the folloing:
-        * simple imputation for both
-        * scaling for numerical
-        * one-hot encoding for categorical
-    For example,
-        data = [
-            {'A': 1, 'B': np.nan, 'C': np.nan},
-            {'A': np.nan, 'B': 3, 'C': np.nan},
-            {'A': 2, 'B': np.nan, 'C': np.nan}
-        ]
-    and suppose all the columns are categorical,
-    then
-        * `A` in {np.nan, 1, 2}
-        * `B` in {np.nan, 3}
-        * `C` in {np.nan} <=== it will be dropped.
-
-    So in the column A,
-        * np.nan ==> [1, 0, 0]
-        * 1      ==> [0, 1, 0]
-        * 2      ==> [0, 0, 1]
-    in the column B,
-        * np.nan ==> [1, 0]
-        * 3      ==> [0, 1]
-    Therefore, by concatenating,
-        * {'A': 1, 'B': np.nan, 'C': np.nan} ==> [0, 1, 0, 1, 0]
-        * {'A': np.nan, 'B': 3, 'C': np.nan} ==> [1, 0, 0, 0, 1]
-        * {'A': 2, 'B': np.nan, 'C': np.nan} ==> [0, 0, 1, 1, 0]
-    """
-    # First case, there exist null columns (B and C) in the train set
-    # and a same column (C) are not all null for the test set.
-
-    df_train = pd.DataFrame(
-        [
-            {'A': 1, 'B': np.nan, 'C': np.nan},
-            {'A': np.nan, 'C': np.nan},
-            {'A': 1}
-        ],
-        dtype='category',
-    )
-    ans_train = np.array([[0, 1], [1, 0], [0, 1]], dtype=np.float64)
-    df_test = pd.DataFrame(
-        [
-            {'A': np.nan, 'B': np.nan, 'C': 5},
-            {'A': np.nan, 'C': np.nan},
-            {'A': 1}
-        ],
-        dtype='category',
-    )
-    ans_test = np.array([[1, 0], [1, 0], [0, 1]], dtype=np.float64)
-    feature_validator_remove_nan_catcolumns(df_train, df_test, ans_train, ans_test)
-
-    # Second case, there exist null columns (B and C) in the training set and
-    # the same columns (B and C) are null in the test set.
-    df_train = pd.DataFrame(
-        [
-            {'A': 1, 'B': np.nan, 'C': np.nan},
-            {'A': np.nan, 'C': np.nan},
-            {'A': 1}
-        ],
-        dtype='category',
-    )
-    ans_train = np.array([[0, 1], [1, 0], [0, 1]], dtype=np.float64)
-    df_test = pd.DataFrame(
-        [
-            {'A': np.nan, 'B': np.nan, 'C': np.nan},
-            {'A': np.nan, 'C': np.nan},
-            {'A': 1}
-        ],
-        dtype='category',
-    )
-    ans_test = np.array([[1, 0], [1, 0], [0, 1]], dtype=np.float64)
-    feature_validator_remove_nan_catcolumns(df_train, df_test, ans_train, ans_test)
-
-    # Third case, there exist no null columns in the training set and
-    # null columns exist in the test set.
-    df_train = pd.DataFrame(
-        [
-            {'A': 1, 'B': 1},
-            {'A': 2, 'B': 2}
-        ],
-        dtype='category',
-    )
-    ans_train = np.array([[1, 0, 1, 0], [0, 1, 0, 1]], dtype=np.float64)
-    df_test = pd.DataFrame(
-        [
-            {'A': np.nan, 'B': np.nan},
-            {'A': np.nan, 'B': np.nan}
-        ],
-        dtype='category',
-    )
-    ans_test = np.array([[0, 0, 0, 0], [0, 0, 0, 0]], dtype=np.float64)
-    feature_validator_remove_nan_catcolumns(df_train, df_test, ans_train, ans_test)
->>>>>>> Bug fixes (#249)
 
 
 def test_features_unsupported_calls_are_raised():
@@ -529,6 +411,7 @@ def test_comparator():
     assert ans == feat_type
 
 
+<<<<<<< HEAD
 @pytest.fixture
 def input_data_feature_feat_types(request):
     if request.param == 'pandas_categoricalonly':
@@ -648,6 +531,8 @@ def test_feature_validator_get_columns_to_encode_error_feat_type(input_data_feat
     with pytest.raises(ValueError, match=r"Expected type of features to be in .*"):
         validator._validate_feat_types(X)
 
+=======
+>>>>>>> [FIX] Passing checks (#298)
 def test_feature_validator_imbalanced_data():
 
     # Null columns in the train split but not necessarily in the test split
@@ -670,16 +555,15 @@ def test_feature_validator_imbalanced_data():
     validator.fit(X_train)
 
     train_feature_types = copy.deepcopy(validator.feat_type)
-    assert train_feature_types == ['numerical']
+    assert train_feature_types == ['numerical', 'numerical', 'numerical', 'numerical']
     # validator will throw an error if the column types are not the same
     transformed_X_test = validator.transform(X_test)
     transformed_X_test = pd.DataFrame(transformed_X_test)
-    assert sorted(validator.all_nan_columns) == sorted(['A', 'C', 'D'])
-    # as there are no categorical columns, we can make such an
-    # assertion. We only expect to drop the all nan columns
-    total_all_nan_columns = len(validator.all_nan_columns)
-    total_columns = len(validator.column_order)
-    assert total_columns - total_all_nan_columns == len(transformed_X_test.columns)
+    null_columns = []
+    for column in transformed_X_test.columns:
+        if transformed_X_test[column].isna().all():
+            null_columns.append(column)
+    assert null_columns == [0, 2, 3]
 
     # Columns with not all null values in the train split and
     # completely null on the test split.
@@ -698,12 +582,12 @@ def test_feature_validator_imbalanced_data():
     X_test = pd.DataFrame.from_dict(test_features)
     validator = TabularFeatureValidator()
     validator.fit(X_train)
-
     train_feature_types = copy.deepcopy(validator.feat_type)
     assert train_feature_types == ['categorical', 'numerical', 'numerical']
 
     transformed_X_test = validator.transform(X_test)
     transformed_X_test = pd.DataFrame(transformed_X_test)
+<<<<<<< HEAD
     assert not len(validator.all_nan_columns)
 
 
@@ -733,3 +617,11 @@ def test_comparator():
     )
     assert ans == feat_type
 >>>>>>> Bug fixes (#249)
+=======
+    null_columns = []
+    for column in transformed_X_test.columns:
+        if transformed_X_test[column].isna().all():
+            null_columns.append(column)
+
+    assert null_columns == [1]
+>>>>>>> [FIX] Passing checks (#298)
Original file line number	Diff line number	Diff line change
`@@ -282,6 +282,7 @@ def fit(self, X: Dict[str, Any], y: Any = None, **kwargs: Any) -> autoPyTorchCom`
`282`	`282`	`y=y,`
`283`	`283`	`**kwargs`
`284`	`284`	`)`
	`285`	`+`
`285`	`286`	`# Add snapshots to base network to enable`
`286`	`287`	`# predicting with snapshot ensemble`
`287`	`288`	`self.choice: autoPyTorchComponent = cast(autoPyTorchComponent, self.choice)`
Original file line number	Diff line number	Diff line change
`@@ -59,7 +59,7 @@ def get_search_space_updates():`
`59`	`59`	`value_range=['shake-shake'],`
`60`	`60`	`default_value='shake-shake')`
`61`	`61`	`updates.append(node_name='network_backbone',`
`62`		`- hyperparameter='ResNetBackbone:shake_shake_method',`
	`62`	`+ hyperparameter='ResNetBackbone:shake_shake_update_func',`
`63`	`63`	`value_range=['M3'],`
`64`	`64`	`default_value='M3'`
`65`	`65`	`)`