Skip to content

Commit 3e50b27

Browse files
[FIX] Passing checks (#298)
* Initial fix for all tests passing locally py=3.8 * fix bug in tests * fix bug in test for data * debugging error in dummy forward pass * debug try -2 * catch runtime error in ci * catch runtime error in ci * add better debug test setup * debug some more * run this test only * remove sum backward * remove inplace in inception block * undo silly change * Enable all tests * fix flake * fix bug in test setup * remove anamoly detection * minor changes to comments * Apply suggestions from code review Co-authored-by: nabenabe0928 <47781922+nabenabe0928@users.noreply.github.com> * Address comments from Shuhei * revert change leading to bug * fix flake * change comment position in feature validator * Add documentation for _is_datasets_consistent * address comments from arlind * case when all nans in test Co-authored-by: nabenabe0928 <47781922+nabenabe0928@users.noreply.github.com>
1 parent 5167742 commit 3e50b27

File tree

18 files changed

+100
-263
lines changed

18 files changed

+100
-263
lines changed

autoPyTorch/api/base_task.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1737,7 +1737,7 @@ def fit_ensemble(
17371737
Args:
17381738
optimize_metric (str): name of the metric that is used to
17391739
evaluate a pipeline. if not specified, value passed to search will be used
1740-
precision (int), (default=32): Numeric precision used when loading
1740+
precision (Optional[int]): Numeric precision used when loading
17411741
ensemble data. Can be either 16, 32 or 64.
17421742
ensemble_nbest (Optional[int]):
17431743
only consider the ensemble_nbest models to build the ensemble.
@@ -1780,6 +1780,7 @@ def fit_ensemble(
17801780
"Please call the `search()` method of {} prior to "
17811781
"fit_ensemble().".format(self.__class__.__name__))
17821782

1783+
precision = precision if precision is not None else self.precision
17831784
if precision not in [16, 32, 64]:
17841785
raise ValueError("precision must be one of 16, 32, 64 but got {}".format(precision))
17851786

@@ -1830,7 +1831,7 @@ def fit_ensemble(
18301831
manager = self._init_ensemble_builder(
18311832
time_left_for_ensembles=time_left_for_ensemble,
18321833
optimize_metric=self.opt_metric if optimize_metric is None else optimize_metric,
1833-
precision=self.precision if precision is None else precision,
1834+
precision=precision,
18341835
ensemble_size=ensemble_size,
18351836
ensemble_nbest=ensemble_nbest,
18361837
)

autoPyTorch/data/tabular_feature_validator.py

Lines changed: 35 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,7 @@ def _comparator(cmp1: str, cmp2: str) -> int:
133133
if cmp1 not in choices or cmp2 not in choices:
134134
raise ValueError('The comparator for the column order only accepts {}, '
135135
'but got {} and {}'.format(choices, cmp1, cmp2))
136+
136137
idx1, idx2 = choices.index(cmp1), choices.index(cmp2)
137138
return idx1 - idx2
138139

@@ -279,13 +280,12 @@ def transform(
279280
# having a value for a categorical column.
280281
# We need to convert the column in test data to
281282
# object otherwise the test column is interpreted as float
282-
if len(self.categorical_columns) > 0:
283-
categorical_columns = self.column_transformer.transformers_[0][-1]
284-
for column in categorical_columns:
285-
if X[column].isna().all():
286-
X[column] = X[column].astype('object')
287-
288283
if self.column_transformer is not None:
284+
if len(self.categorical_columns) > 0:
285+
categorical_columns = self.column_transformer.transformers_[0][-1]
286+
for column in categorical_columns:
287+
if X[column].isna().all():
288+
X[column] = X[column].astype('object')
289289
X = self.column_transformer.transform(X)
290290

291291
# Sparse related transformations
@@ -371,16 +371,10 @@ def _check_data(
371371

372372
dtypes = [dtype.name for dtype in X.dtypes]
373373

374-
dtypes_diff = [s_dtype != dtype for s_dtype, dtype in zip(self.dtypes, dtypes)]
374+
diff_cols = X.columns[[s_dtype != dtype for s_dtype, dtype in zip(self.dtypes, dtypes)]]
375375
if len(self.dtypes) == 0:
376376
self.dtypes = dtypes
377-
elif (
378-
any(dtypes_diff) # the dtypes of some columns are different in train and test dataset
379-
and self.all_nan_columns is not None # Ignore all_nan_columns is None
380-
and len(set(X.columns[dtypes_diff]).difference(self.all_nan_columns)) != 0
381-
):
382-
# The dtypes can be different if and only if the column belongs
383-
# to all_nan_columns as these columns would be imputed.
377+
elif not self._is_datasets_consistent(diff_cols, X):
384378
raise ValueError("The dtype of the features must not be changed after fit(), but"
385379
" the dtypes of some columns are different between training ({}) and"
386380
" test ({}) datasets.".format(self.dtypes, dtypes))
@@ -548,6 +542,33 @@ def infer_objects(self, X: pd.DataFrame) -> pd.DataFrame:
548542

549543
return X
550544

545+
def _is_datasets_consistent(self, diff_cols: List[Union[int, str]], X: pd.DataFrame) -> bool:
546+
"""
547+
Check the consistency of dtypes between training and test datasets.
548+
The dtypes can be different if the column belongs to `self.all_nan_columns`
549+
(list of column names with all nans in training data) or if the column is
550+
all nan as these columns would be imputed.
551+
552+
Args:
553+
diff_cols (List[bool]):
554+
The column labels that have different dtypes.
555+
X (pd.DataFrame):
556+
A validation or test dataset to be compared with the training dataset
557+
Returns:
558+
_ (bool): Whether the training and test datasets are consistent.
559+
"""
560+
if self.all_nan_columns is None:
561+
if len(diff_cols) == 0:
562+
return True
563+
else:
564+
return all(X[diff_cols].isna().all())
565+
566+
# dtype is different ==> the column in at least either of train or test datasets must be all NaN
567+
# inconsistent <==> dtype is different and the col in both train and test is not all NaN
568+
inconsistent_cols = list(set(diff_cols) - self.all_nan_columns)
569+
570+
return len(inconsistent_cols) == 0 or all(X[inconsistent_cols].isna().all())
571+
551572

552573
def has_object_columns(
553574
feature_types: pd.Series,

autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/encoding/NoEncoder.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ def transform(self, X: Dict[str, Any]) -> Dict[str, Any]:
4040
Returns:
4141
(Dict[str, Any]): the updated 'X' dictionary
4242
"""
43-
X.update({'encoder': self.preprocessor})
43+
# X.update({'encoder': self.preprocessor})
4444
return X
4545

4646
@staticmethod

autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/scaling/NoScaler.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ def transform(self, X: Dict[str, Any]) -> Dict[str, Any]:
4343
Returns:
4444
np.ndarray: Transformed features
4545
"""
46-
X.update({'scaler': self.preprocessor})
46+
# X.update({'scaler': self.preprocessor})
4747
return X
4848

4949
@staticmethod

autoPyTorch/pipeline/components/setup/network_embedding/base_network_embedding.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ def fit(self, X: Dict[str, Any], y: Any = None) -> BaseEstimator:
2121

2222
self.embedding = self.build_embedding(
2323
num_input_features=num_input_features,
24-
num_numerical_features=num_numerical_columns)
24+
num_numerical_features=num_numerical_columns) # type: ignore[arg-type]
2525
return self
2626

2727
def transform(self, X: Dict[str, Any]) -> Dict[str, Any]:

autoPyTorch/pipeline/components/training/trainer/AdversarialTrainer.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -109,11 +109,7 @@ def train_step(self, data: np.ndarray, targets: np.ndarray) -> Tuple[float, torc
109109
loss = loss_func(self.criterion, original_outputs, adversarial_outputs)
110110
loss.backward()
111111
self.optimizer.step()
112-
if self.scheduler:
113-
if 'ReduceLROnPlateau' in self.scheduler.__class__.__name__:
114-
self.scheduler.step(loss)
115-
else:
116-
self.scheduler.step()
112+
117113
# only passing the original outputs since we do not care about
118114
# the adversarial performance.
119115
return loss.item(), original_outputs

autoPyTorch/pipeline/components/training/trainer/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -283,6 +283,7 @@ def fit(self, X: Dict[str, Any], y: Any = None, **kwargs: Any) -> autoPyTorchCom
283283
y=y,
284284
**kwargs
285285
)
286+
286287
# Add snapshots to base network to enable
287288
# predicting with snapshot ensemble
288289
self.choice: autoPyTorchComponent = cast(autoPyTorchComponent, self.choice)

examples/40_advanced/40_advanced/example_custom_configuration_space.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ def get_search_space_updates():
5959
value_range=['shake-shake'],
6060
default_value='shake-shake')
6161
updates.append(node_name='network_backbone',
62-
hyperparameter='ResNetBackbone:shake_shake_method',
62+
hyperparameter='ResNetBackbone:shake_shake_update_func',
6363
value_range=['M3'],
6464
default_value='M3'
6565
)

0 commit comments

Comments
 (0)