[refactor] Address Shuhei's comments

nabenabe0928 · ravinkohli · commit 5af8aab69ff6 · 2022-01-27T18:48:58.000+01:00
[fix] Fix Flake8 issues

[refactor] Address Shuhei's comment

[refactor] Address Shuhei's comments

[refactor] Address Shuhei's comments

[refactor] Address Shuhei's comments
diff --git a/autoPyTorch/evaluation/tae.py b/autoPyTorch/evaluation/tae.py
@@ -201,27 +201,12 @@ def __init__(
 
         self.search_space_updates = search_space_updates
 
-<<<<<<< HEAD
     def _check_and_get_default_budget(self) -> float:
         budget_type_choices = ('epochs', 'runtime')
         budget_choices = {
             budget_type: float(self.pipeline_config.get(budget_type, np.inf))
             for budget_type in budget_type_choices
         }
-=======
-        if isinstance(self.resampling_strategy, (HoldoutValTypes, CrossValTypes)):
-            eval_function = autoPyTorch.evaluation.train_evaluator.eval_function
-        elif isinstance(self.resampling_strategy, NoResamplingStrategyTypes):
-            eval_function = autoPyTorch.evaluation.fit_evaluator.eval_function
-        else:
-            raise ValueError("resampling strategy must be in "
-                             "(HoldoutValTypes, CrossValTypes, NoResamplingStrategyTypes), "
-                             "but got {}.".format(self.resampling_strategy)
-            )
-
-        self.worst_possible_result = cost_for_crash
->>>>>>> Cocktail hotfixes (#245)
-
         # budget is defined by epochs by default
         budget_type = str(self.pipeline_config.get('budget_type', 'epochs'))
         if self.budget_type is not None:
diff --git a/autoPyTorch/evaluation/train_evaluator.py b/autoPyTorch/evaluation/train_evaluator.py
@@ -419,11 +419,7 @@ def _predict(self, pipeline: BaseEstimator,
 
 
 # create closure for evaluating an algorithm
-<<<<<<< HEAD
 def eval_train_function(
-=======
-def eval_function(
->>>>>>> Create fit evaluator, no resampling strategy and fix bug for test statistics
     backend: Backend,
     queue: Queue,
     metric: autoPyTorchMetric,
diff --git a/autoPyTorch/pipeline/components/training/trainer/AdversarialTrainer.py b/autoPyTorch/pipeline/components/training/trainer/AdversarialTrainer.py
@@ -37,7 +37,11 @@ def __init__(
 
         Args:
             epsilon (float): The perturbation magnitude.
-
+        
+        References:
+            Explaining and Harnessing Adversarial Examples
+            Ian J. Goodfellow et. al.
+            https://arxiv.org/pdf/1412.6572.pdf
         """
         super().__init__(random_state=random_state,
                          weighted_loss=weighted_loss,
@@ -96,10 +100,10 @@ def train_step(self, data: np.ndarray, targets: np.ndarray) -> Tuple[float, torc
         # training
         self.optimizer.zero_grad()
         original_outputs = self.model(original_data)
-        adversarial_output = self.model(adversarial_data)
+        adversarial_outputs = self.model(adversarial_data)
 
         loss_func = self.criterion_preparation(**criterion_kwargs)
-        loss = loss_func(self.criterion, original_outputs, adversarial_output)
+        loss = loss_func(self.criterion, original_outputs, adversarial_outputs)
         loss.backward()
         self.optimizer.step()
         if self.scheduler:
@@ -125,6 +129,9 @@ def fgsm_attack(
 
         Returns:
             adv_data (np.ndarray): the adversarial examples.
+        
+        References:
+            https://pytorch.org/tutorials/beginner/fgsm_tutorial.html#fgsm-attack
         """
         data_copy = deepcopy(data)
         data_copy = data_copy.float().to(self.device)
@@ -159,7 +166,7 @@ def get_hyperparameter_search_space(
         dataset_properties: Optional[Dict] = None,
         weighted_loss: HyperparameterSearchSpace = HyperparameterSearchSpace(
             hyperparameter="weighted_loss",
-            value_range=[True, False],
+            value_range=(True, False),
             default_value=True),
         la_steps: HyperparameterSearchSpace = HyperparameterSearchSpace(
             hyperparameter="la_steps",
@@ -196,9 +203,7 @@ def get_hyperparameter_search_space(
 
         add_hyperparameter(cs, epsilon, UniformFloatHyperparameter)
         add_hyperparameter(cs, use_stochastic_weight_averaging, CategoricalHyperparameter)
-        snapshot_ensemble_flag = False
-        if any(use_snapshot_ensemble.value_range):
-            snapshot_ensemble_flag = True
+        snapshot_ensemble_flag = any(use_snapshot_ensemble.value_range)
 
         use_snapshot_ensemble = get_hyperparameter(use_snapshot_ensemble, CategoricalHyperparameter)
         cs.add_hyperparameter(use_snapshot_ensemble)
@@ -209,9 +214,7 @@ def get_hyperparameter_search_space(
             cond = EqualsCondition(se_lastk, use_snapshot_ensemble, True)
             cs.add_condition(cond)
 
-        lookahead_flag = False
-        if any(use_lookahead_optimizer.value_range):
-            lookahead_flag = True
+        lookahead_flag = any(use_lookahead_optimizer.value_range)
 
         use_lookahead_optimizer = get_hyperparameter(use_lookahead_optimizer, CategoricalHyperparameter)
         cs.add_hyperparameter(use_lookahead_optimizer)
diff --git a/autoPyTorch/pipeline/components/training/trainer/GridCutMixTrainer.py b/autoPyTorch/pipeline/components/training/trainer/GridCutMixTrainer.py
@@ -26,14 +26,15 @@ def data_preparation(self, X: np.ndarray, y: np.ndarray,
             np.ndarray: that processes data
             typing.Dict[str, np.ndarray]: arguments to the criterion function
         """
-        beta = 1.0
-        lam = self.random_state.beta(beta, beta)
-        batch_size, channel, W, H = X.size()
-        index = torch.randperm(batch_size).cuda() if X.is_cuda else torch.randperm(batch_size)
+        alpha, beta = 1.0, 1.0
+        lam = self.random_state.beta(alpha, beta)
+        batch_size, _, W, H = X.shape
+        device = torch.device('cuda' if X.is_cuda else 'cpu')
+        batch_indices = torch.randperm(batch_size).to(device)
 
         r = self.random_state.rand(1)
         if beta <= 0 or r > self.alpha:
-            return X, {'y_a': y, 'y_b': y[index], 'lam': 1}
+            return X, {'y_a': y, 'y_b': y[batch_indices], 'lam': 1}
 
         # Draw parameters of a random bounding box
         # Where to cut basically
@@ -47,12 +48,13 @@ def data_preparation(self, X: np.ndarray, y: np.ndarray,
         bbx2 = np.clip(cx + cut_w // 2, 0, W)
         bby2 = np.clip(cy + cut_h // 2, 0, H)
 
-        X[:, :, bbx1:bbx2, bby1:bby2] = X[index, :, bbx1:bbx2, bby1:bby2]
+        X[:, :, bbx1:bbx2, bby1:bby2] = X[batch_indices, :, bbx1:bbx2, bby1:bby2]
 
         # Adjust lam
-        lam = 1 - ((bbx2 - bbx1) * (bby2 - bby1) / (X.size()[-1] * X.size()[-2]))
+        pixel_size = W * H
+        lam = 1 - ((bbx2 - bbx1) * (bby2 - bby1) / pixel_size)
 
-        y_a, y_b = y, y[index]
+        y_a, y_b = y, y[batch_indices]
 
         return X, {'y_a': y_a, 'y_b': y_b, 'lam': lam}
 
diff --git a/autoPyTorch/pipeline/components/training/trainer/RowCutMixTrainer.py b/autoPyTorch/pipeline/components/training/trainer/RowCutMixTrainer.py
@@ -26,25 +26,31 @@ def data_preparation(self, X: np.ndarray, y: np.ndarray,
             np.ndarray: that processes data
             typing.Dict[str, np.ndarray]: arguments to the criterion function
         """
-        beta = 1.0
-        lam = self.random_state.beta(beta, beta)
-        batch_size = X.size()[0]
-        index = torch.randperm(batch_size).cuda() if X.is_cuda else torch.randperm(batch_size)
+        alpha, beta = 1.0, 1.0
+        lam = self.random_state.beta(alpha, beta)
+        batch_size = X.shape[0]
+        device = torch.device('cuda' if X.is_cuda else 'cpu')
+        batch_indices = torch.randperm(batch_size).to(device)
 
         r = self.random_state.rand(1)
         if beta <= 0 or r > self.alpha:
-            return X, {'y_a': y, 'y_b': y[index], 'lam': 1}
+            return X, {'y_a': y, 'y_b': y[batch_indices], 'lam': 1}
 
-        size = X.shape[1]
-        indices = torch.tensor(self.random_state.choice(range(1, size), max(1, np.int32(size * lam)),
-                                                        replace=False))
+        row_size = X.shape[1]
+        row_indices = torch.tensor(
+            self.random_state.choice(
+                range(1, row_size),
+                max(1, int(row_size * lam)),
+                replace=False
+            )
+        )
 
-        X[:, indices] = X[index, :][:, indices]
+        X[:, row_indices] = X[batch_indices, :][:, row_indices]
 
         # Adjust lam
-        lam = 1 - ((len(indices)) / (X.size()[1]))
+        lam = 1 - len(row_indices) / X.shape[1]
 
-        y_a, y_b = y, y[index]
+        y_a, y_b = y, y[batch_indices]
 
         return X, {'y_a': y_a, 'y_b': y_b, 'lam': lam}
 
diff --git a/autoPyTorch/pipeline/components/training/trainer/RowCutOutTrainer.py b/autoPyTorch/pipeline/components/training/trainer/RowCutOutTrainer.py
@@ -9,7 +9,9 @@
 
 
 class RowCutOutTrainer(CutOut, BaseTrainerComponent):
+    # 0 is non-informative in image data
     NUMERICAL_VALUE = 0
+    # -1 is the conceptually equivalent to 0 in a image, i.e. 0-pad
     CATEGORICAL_VALUE = -1
 
     def data_preparation(self, X: np.ndarray, y: np.ndarray,
@@ -36,23 +38,18 @@ def data_preparation(self, X: np.ndarray, y: np.ndarray,
             lam = 1
             return X, {'y_a': y_a, 'y_b': y_b, 'lam': lam}
 
-        size = X.shape[1]
-        indices = self.random_state.choice(range(1, size), max(1, np.int32(size * self.patch_ratio)),
-                                           replace=False)
+        row_size = X.shape[1]
+        row_indices = self.random_state.choice(range(1, row_size), max(1, int(row_size * self.patch_ratio)),
+                                               replace=False)
 
         if not isinstance(self.numerical_columns, typing.Iterable):
-            raise ValueError("{} requires numerical columns information of {}"
-                             "to prepare data got {}.".format(self.__class__.__name__,
-                                                              typing.Iterable,
-                                                              self.numerical_columns))
+            raise ValueError("numerical_columns in {} must be iterable, "
+                             "but got {}.".format(self.__class__.__name__,
+                                                  self.numerical_columns))
+
         numerical_indices = torch.tensor(self.numerical_columns)
-        categorical_indices = torch.tensor([index for index in indices if index not in self.numerical_columns])
+        categorical_indices = torch.tensor([idx for idx in row_indices if idx not in self.numerical_columns])
 
-        # We use an ordinal encoder on the categorical columns of tabular data
-        # -1 is the conceptual equivalent to 0 in a image, that does not
-        # have color as a feature and hence the network has to learn to deal
-        # without this data. For numerical columns we use 0 to cutout the features
-        # similar to the effect that setting 0 as a pixel value in an image.
         X[:, categorical_indices.long()] = self.CATEGORICAL_VALUE
         X[:, numerical_indices.long()] = self.NUMERICAL_VALUE
 
diff --git a/autoPyTorch/pipeline/components/training/trainer/__init__.py b/autoPyTorch/pipeline/components/training/trainer/__init__.py
@@ -384,11 +384,7 @@ def _fit(self, X: Dict[str, Any], y: Any = None, **kwargs: Any) -> 'TrainerChoic
 
             val_loss, val_metrics, test_loss, test_metrics = None, {}, None, {}
             if self.eval_valid_each_epoch(X):
-<<<<<<< HEAD
                 if X['val_data_loader']:
-=======
-                if 'val_data_loader' in X and X['val_data_loader']:
->>>>>>> Create fit evaluator, no resampling strategy and fix bug for test statistics
                     val_loss, val_metrics = self.choice.evaluate(X['val_data_loader'], epoch, writer)
                 if 'test_data_loader' in X and X['test_data_loader']:
                     test_loss, test_metrics = self.choice.evaluate(X['test_data_loader'], epoch, writer)
@@ -433,26 +429,20 @@ def _fit(self, X: Dict[str, Any], y: Any = None, **kwargs: Any) -> 'TrainerChoic
 
         if self.choice.use_stochastic_weight_averaging and self.choice.swa_updated:
             # update batch norm statistics
-            swa_utils.update_bn(X['train_data_loader'], self.choice.swa_model.double())
+            swa_utils.update_bn(loader=X['train_data_loader'], model=self.choice.swa_model.double())
+
             # change model
             update_model_state_dict_from_swa(X['network'], self.choice.swa_model.state_dict())
             if self.choice.use_snapshot_ensemble:
                 for model in self.choice.model_snapshots:
-                    swa_utils.update_bn(X['train_data_loader'], model.double())
+                    swa_utils.update_bn(loader=X['train_data_loader'], model=model.double())
 
         # wrap up -- add score if not evaluating every epoch
         if not self.eval_valid_each_epoch(X):
-<<<<<<< HEAD
             if X['val_data_loader']:
                 val_loss, val_metrics = self.choice.evaluate(X['val_data_loader'], epoch, writer)
             if 'test_data_loader' in X and X['val_data_loader']:
                 test_loss, test_metrics = self.choice.evaluate(X['test_data_loader'], epoch, writer)
-=======
-            if 'val_data_loader' in X and X['val_data_loader']:
-                val_loss, val_metrics = self.choice.evaluate(X['val_data_loader'], epoch, writer)
-            if 'test_data_loader' in X and X['test_data_loader']:
-                test_loss, test_metrics = self.choice.evaluate(X['test_data_loader'])
->>>>>>> Create fit evaluator, no resampling strategy and fix bug for test statistics
             self.run_summary.add_performance(
                 epoch=epoch,
                 start_time=start_time,
@@ -653,11 +643,12 @@ def __str__(self) -> str:
     def _get_search_space_updates(self, prefix: Optional[str] = None) -> Dict[str, HyperparameterSearchSpace]:
         """Get the search space updates with the given prefix
 
-        Keyword Arguments:
-            prefix {str} -- Only return search space updates with given prefix (default: {None})
+        Args:
+            prefix (Optional[str]): Only return search space updates with given prefix
 
         Returns:
-            dict -- Mapping of search space updates. Keys don't contain the prefix.
+            Dict[str, HyperparameterSearchSpace]:
+                Mapping of search space updates. Keys don't contain the prefix.
         """
         updates = super()._get_search_space_updates(prefix=prefix)
 
diff --git a/autoPyTorch/pipeline/components/training/trainer/base_trainer.py b/autoPyTorch/pipeline/components/training/trainer/base_trainer.py
@@ -28,7 +28,7 @@
 from autoPyTorch.pipeline.components.training.metrics.metrics import CLASSIFICATION_METRICS, REGRESSION_METRICS
 from autoPyTorch.pipeline.components.training.trainer.utils import Lookahead
 from autoPyTorch.pipeline.components.training.metrics.utils import calculate_score
-from autoPyTorch.pipeline.components.training.trainer.utils import Lookahead, swa_average_function
+from autoPyTorch.pipeline.components.training.trainer.utils import Lookahead, swa_update
 from autoPyTorch.utils.common import FitRequirement, HyperparameterSearchSpace, add_hyperparameter, get_hyperparameter
 from autoPyTorch.utils.implementations import get_loss_weight_strategy
 
@@ -226,7 +226,7 @@ def __init__(self, weighted_loss: bool = False,
                  use_snapshot_ensemble: bool = True,
                  se_lastk: int = 3,
                  use_lookahead_optimizer: bool = True,
-                 random_state: Optional[Union[np.random.RandomState, int]] = None,
+                 random_state: Optional[np.random.RandomState] = None,
                  swa_model: Optional[torch.nn.Module] = None,
                  model_snapshots: Optional[List[torch.nn.Module]] = None,
                  **lookahead_config: Any) -> None:
@@ -287,13 +287,14 @@ def prepare(
 
         # in case we are using swa, maintain an averaged model,
         if self.use_stochastic_weight_averaging:
-            self.swa_model = swa_utils.AveragedModel(self.model, avg_fn=swa_average_function)
+            self.swa_model = swa_utils.AveragedModel(self.model, avg_fn=swa_update)
 
         # in case we are using se or swa, initialise budget_threshold to know when to start swa or se
         self._budget_threshold = 0
         if self.use_stochastic_weight_averaging or self.use_snapshot_ensemble:
-            assert budget_tracker.max_epochs is not None, "Can only use stochastic weight averaging or snapshot " \
-                                                          "ensemble when budget is epochs"
+            if budget_tracker.max_epochs is None:
+                raise ValueError("Budget for stochastic weight averaging or snapshot ensemble must be `epoch`.")
+
             self._budget_threshold = int(0.75 * budget_tracker.max_epochs)
 
         # in case we are using se, initialise list to store model snapshots
@@ -591,7 +592,7 @@ def get_hyperparameter_search_space(
         dataset_properties: Optional[Dict] = None,
         weighted_loss: HyperparameterSearchSpace = HyperparameterSearchSpace(
             hyperparameter="weighted_loss",
-            value_range=[True, False],
+            value_range=(True, False),
             default_value=True),
         la_steps: HyperparameterSearchSpace = HyperparameterSearchSpace(
             hyperparameter="la_steps",
@@ -623,22 +624,30 @@ def get_hyperparameter_search_space(
         cs = ConfigurationSpace()
 
         add_hyperparameter(cs, use_stochastic_weight_averaging, CategoricalHyperparameter)
+        snapshot_ensemble_flag = any(use_snapshot_ensemble.value_range)
+
         use_snapshot_ensemble = get_hyperparameter(use_snapshot_ensemble, CategoricalHyperparameter)
-        se_lastk = get_hyperparameter(se_lastk, Constant)
-        cs.add_hyperparameters([use_snapshot_ensemble, se_lastk])
-        cond = EqualsCondition(se_lastk, use_snapshot_ensemble, True)
-        cs.add_condition(cond)
+        cs.add_hyperparameter(use_snapshot_ensemble)
 
+        if snapshot_ensemble_flag:
+            se_lastk = get_hyperparameter(se_lastk, Constant)
+            cs.add_hyperparameter(se_lastk)
+            cond = EqualsCondition(se_lastk, use_snapshot_ensemble, True)
+            cs.add_condition(cond)
+
+        lookahead_flag = any(use_lookahead_optimizer.value_range)
         use_lookahead_optimizer = get_hyperparameter(use_lookahead_optimizer, CategoricalHyperparameter)
         cs.add_hyperparameter(use_lookahead_optimizer)
-        la_config_space = Lookahead.get_hyperparameter_search_space(la_steps=la_steps,
-                                                                    la_alpha=la_alpha)
-        parent_hyperparameter = {'parent': use_lookahead_optimizer, 'value': True}
-        cs.add_configuration_space(
-            Lookahead.__name__,
-            la_config_space,
-            parent_hyperparameter=parent_hyperparameter
-        )
+
+        if lookahead_flag:
+            la_config_space = Lookahead.get_hyperparameter_search_space(la_steps=la_steps,
+                                                                        la_alpha=la_alpha)
+            parent_hyperparameter = {'parent': use_lookahead_optimizer, 'value': True}
+            cs.add_configuration_space(
+                Lookahead.__name__,
+                la_config_space,
+                parent_hyperparameter=parent_hyperparameter
+            )
 
         # TODO, decouple the weighted loss from the trainer
         if dataset_properties is not None:
diff --git a/autoPyTorch/pipeline/components/training/trainer/cutout_utils.py b/autoPyTorch/pipeline/components/training/trainer/cutout_utils.py
diff --git a/autoPyTorch/pipeline/components/training/trainer/mixup_utils.py b/autoPyTorch/pipeline/components/training/trainer/mixup_utils.py
diff --git a/autoPyTorch/pipeline/components/training/trainer/utils.py b/autoPyTorch/pipeline/components/training/trainer/utils.py