[refactor] Change files so that we can see the difference easier

nabenabe0928 · nabenabe0928 · commit 2d2ebb89fae8 · 2021-05-19T14:08:00.000+09:00
diff --git a/autoPyTorch/datasets/base_dataset.py b/autoPyTorch/datasets/base_dataset.py
@@ -200,7 +200,7 @@ def __len__(self) -> int:
     def _get_indices(self) -> np.ndarray:
         return self.random_state.permutation(len(self)) if self.shuffle else np.arange(len(self))
 
-    def _process_resampling_strategy_args(self) -> None:
+    def _check_resampling_strategy_args(self) -> None:
         if not any(isinstance(self.resampling_strategy, val_type)
                    for val_type in [HoldoutValTypes, CrossValTypes]):
             raise ValueError(f"resampling_strategy {self.resampling_strategy} is not supported.")
@@ -231,7 +231,7 @@ def get_splits_from_resampling_strategy(self) -> List[Tuple[List[int], List[int]
             (List[Tuple[List[int], List[int]]]): splits in the [train_indices, val_indices] format
         """
         # check if the requirements are met and if we can get splits
-        self._process_resampling_strategy_args()
+        self._check_resampling_strategy_args()
 
         labels_to_stratify = self.train_tensors[-1] if self.is_stratify else None
 
diff --git a/autoPyTorch/datasets/resampling_strategy.py b/autoPyTorch/datasets/resampling_strategy.py
@@ -22,77 +22,6 @@ class _ResamplingStrategyArgs(NamedTuple):
     stratify: bool = False
 
 
-class HoldoutFuncs():
-    @staticmethod
-    def holdout_validation(
-        indices: np.ndarray,
-        random_state: Optional[np.random.RandomState] = None,
-        val_share: Optional[float] = None,
-        shuffle: bool = False,
-        labels_to_stratify: Optional[Union[Tuple[np.ndarray, np.ndarray], Dataset]] = None
-    ) -> List[Tuple[np.ndarray, np.ndarray]]:
-
-        """ SKLearn requires shuffle=True for stratify """
-        train, val = train_test_split(
-            indices, test_size=val_share,
-            shuffle=shuffle if labels_to_stratify is None else True,
-            random_state=random_state,
-            stratify=labels_to_stratify
-        )
-        return [(train, val)]
-
-
-class CrossValFuncs():
-    # (shuffle, is_stratify) -> split_fn
-    _args2split_fn = {
-        (True, True): StratifiedShuffleSplit,
-        (True, False): ShuffleSplit,
-        (False, True): StratifiedKFold,
-        (False, False): KFold,
-    }
-
-    @staticmethod
-    def k_fold_cross_validation(
-        indices: np.ndarray,
-        random_state: Optional[np.random.RandomState] = None,
-        num_splits: Optional[int] = None,
-        shuffle: bool = False,
-        labels_to_stratify: Optional[Union[Tuple[np.ndarray, np.ndarray], Dataset]] = None
-    ) -> List[Tuple[np.ndarray, np.ndarray]]:
-        """
-        Returns:
-            splits (List[Tuple[List, List]]): list of tuples of training and validation indices
-        """
-
-        split_fn = CrossValFuncs._args2split_fn[(shuffle, labels_to_stratify is not None)]
-        cv = split_fn(n_splits=num_splits, random_state=random_state)
-        splits = list(cv.split(indices))
-        return splits
-
-    @staticmethod
-    def time_series(
-        indices: np.ndarray,
-        random_state: Optional[np.random.RandomState] = None,
-        num_splits: Optional[int] = None,
-        shuffle: bool = False,
-        labels_to_stratify: Optional[Union[Tuple[np.ndarray, np.ndarray], Dataset]] = None
-    ) -> List[Tuple[np.ndarray, np.ndarray]]:
-        """
-        Returns train and validation indices respecting the temporal ordering of the data.
-
-        Examples:
-            >>> indices = np.array([0, 1, 2, 3])
-            >>> CrossValFuncs.time_series_cross_validation(3, indices)
-                [([0], [1]),
-                 ([0, 1], [2]),
-                 ([0, 1, 2], [3])]
-
-        """
-        cv = TimeSeriesSplit(n_splits=num_splits)
-        splits = list(cv.split(indices))
-        return splits
-
-
 class CrossValTypes(IntEnum):
     """The type of cross validation
 
@@ -214,3 +143,74 @@ def __call__(
             shuffle=shuffle,
             labels_to_stratify=labels_to_stratify
         )
+
+
+class HoldoutFuncs():
+    @staticmethod
+    def holdout_validation(
+        indices: np.ndarray,
+        random_state: Optional[np.random.RandomState] = None,
+        val_share: Optional[float] = None,
+        shuffle: bool = False,
+        labels_to_stratify: Optional[Union[Tuple[np.ndarray, np.ndarray], Dataset]] = None
+    ) -> List[Tuple[np.ndarray, np.ndarray]]:
+
+        """ SKLearn requires shuffle=True for stratify """
+        train, val = train_test_split(
+            indices, test_size=val_share,
+            shuffle=shuffle if labels_to_stratify is None else True,
+            random_state=random_state,
+            stratify=labels_to_stratify
+        )
+        return [(train, val)]
+
+
+class CrossValFuncs():
+    # (shuffle, is_stratify) -> split_fn
+    _args2split_fn = {
+        (True, True): StratifiedShuffleSplit,
+        (True, False): ShuffleSplit,
+        (False, True): StratifiedKFold,
+        (False, False): KFold,
+    }
+
+    @staticmethod
+    def k_fold_cross_validation(
+        indices: np.ndarray,
+        random_state: Optional[np.random.RandomState] = None,
+        num_splits: Optional[int] = None,
+        shuffle: bool = False,
+        labels_to_stratify: Optional[Union[Tuple[np.ndarray, np.ndarray], Dataset]] = None
+    ) -> List[Tuple[np.ndarray, np.ndarray]]:
+        """
+        Returns:
+            splits (List[Tuple[List, List]]): list of tuples of training and validation indices
+        """
+
+        split_fn = CrossValFuncs._args2split_fn[(shuffle, labels_to_stratify is not None)]
+        cv = split_fn(n_splits=num_splits, random_state=random_state)
+        splits = list(cv.split(indices))
+        return splits
+
+    @staticmethod
+    def time_series(
+        indices: np.ndarray,
+        random_state: Optional[np.random.RandomState] = None,
+        num_splits: Optional[int] = None,
+        shuffle: bool = False,
+        labels_to_stratify: Optional[Union[Tuple[np.ndarray, np.ndarray], Dataset]] = None
+    ) -> List[Tuple[np.ndarray, np.ndarray]]:
+        """
+        Returns train and validation indices respecting the temporal ordering of the data.
+
+        Examples:
+            >>> indices = np.array([0, 1, 2, 3])
+            >>> CrossValFuncs.time_series_cross_validation(3, indices)
+                [([0], [1]),
+                 ([0, 1], [2]),
+                 ([0, 1, 2], [3])]
+
+        """
+        cv = TimeSeriesSplit(n_splits=num_splits)
+        splits = list(cv.split(indices))
+        return splits