automl
diff --git a/‎autoPyTorch/components/ensembles/abstract_ensemble.py‎
Lines changed: 2 additions & 0 deletions b/‎autoPyTorch/components/ensembles/abstract_ensemble.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎autoPyTorch/components/ensembles/ensemble_selection.py‎
Lines changed: 2 additions & 0 deletions b/‎autoPyTorch/components/ensembles/ensemble_selection.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎autoPyTorch/components/lr_scheduler/lr_schedulers.py‎
Lines changed: 12 additions & 2 deletions b/‎autoPyTorch/components/lr_scheduler/lr_schedulers.py‎
Lines changed: 12 additions & 2 deletions
diff --git a/‎autoPyTorch/components/metrics/additional_logs.py‎
Lines changed: 1 addition & 0 deletions b/‎autoPyTorch/components/metrics/additional_logs.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎autoPyTorch/components/metrics/balanced_accuracy.py‎
Lines changed: 1 addition & 0 deletions b/‎autoPyTorch/components/metrics/balanced_accuracy.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎autoPyTorch/core/autonet_classes/autonet_feature_classification.py‎
Lines changed: 1 addition & 0 deletions b/‎autoPyTorch/core/autonet_classes/autonet_feature_classification.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎autoPyTorch/core/autonet_classes/autonet_feature_data.py‎
Lines changed: 16 additions & 1 deletion b/‎autoPyTorch/core/autonet_classes/autonet_feature_data.py‎
Lines changed: 16 additions & 1 deletion
diff --git a/‎autoPyTorch/core/autonet_classes/autonet_feature_multilabel.py‎
Lines changed: 1 addition & 0 deletions b/‎autoPyTorch/core/autonet_classes/autonet_feature_multilabel.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎autoPyTorch/core/autonet_classes/autonet_feature_regression.py‎
Lines changed: 1 addition & 0 deletions b/‎autoPyTorch/core/autonet_classes/autonet_feature_regression.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎autoPyTorch/data_management/data_manager.py‎
Lines changed: 68 additions & 1 deletion b/‎autoPyTorch/data_management/data_manager.py‎
Lines changed: 68 additions & 1 deletion
@@ -2,6 +2,8 @@
 
 
 class AbstractEnsemble(object):
+    """Ensemble interface extracted from auto-sklearn"""
+
     __metaclass__ = ABCMeta
 
     @abstractmethod
 
@@ -7,6 +7,8 @@
 
 
 class EnsembleSelection(AbstractEnsemble):
+    """Ensemble Selection algorithm extracted from auto-sklearn"""
+    
     def __init__(self, ensemble_size, metric,
                  sorted_initialization_n_best=0, only_consider_n_best=0,
                  bagging=False, mode='fast'):
 
@@ -17,8 +17,18 @@
 __license__ = "BSD"
 
 class AutoNetLearningRateSchedulerBase(object):
-    def __new__(cls, params, config):
-        scheduler = cls._get_scheduler(cls, params, config)
+    def __new__(cls, optimizer, config):
+        """Get a new instance of the scheduler
+        
+        Arguments:
+            cls {class} -- Type of scheduler
+            optimizer {Optmizer} -- A PyTorch Optimizer
+            config {dict} -- Sampled lr_scheduler config
+        
+        Returns:
+            AutoNetLearningRateSchedulerBase -- The learning rate scheduler object
+        """
+        scheduler = cls._get_scheduler(cls, optimizer, config)
         if not hasattr(scheduler, "allows_early_stopping"):
             scheduler.allows_early_stopping = True
         if not hasattr(scheduler, "snapshot_before_restart"):
 
@@ -1,5 +1,6 @@
 
 class test_result():
+    """Log the performance on the test set"""
     def __init__(self, autonet, X_test, Y_test):
         self.autonet = autonet
         self.X_test = X_test
 
@@ -5,6 +5,7 @@
 
 
 def balanced_accuracy(solution, prediction):
+    """balanced accuracy implementation of auto-sklearn"""
 
     y_type, solution, prediction = _check_targets(solution, prediction)
 
 
@@ -3,6 +3,7 @@
 class AutoNetClassification(AutoNetFeatureData):
     preset_folder_name = "feature_classification"
 
+    # OVERRIDE
     @staticmethod
     def _apply_default_pipeline_settings(pipeline):
         from autoPyTorch.pipeline.nodes.network_selector import NetworkSelector
 
@@ -9,6 +9,11 @@ class AutoNetFeatureData(AutoNet):
 
     @classmethod
     def get_default_ensemble_pipeline(cls):
+        """Construct a default pipeline, include nodes for Ensemble.
+        
+        Returns:
+            Pipeline -- The constructed default pipeline
+        """
         from autoPyTorch.pipeline.base.pipeline import Pipeline
         from autoPyTorch.pipeline.nodes import AutoNetSettings, OptimizationAlgorithm, \
             CrossValidation, Imputation, NormalizationStrategySelector, OneHotEncoding, PreprocessorSelector, ResamplingStrategySelector, \
@@ -50,6 +55,11 @@ def get_default_ensemble_pipeline(cls):
 
     @classmethod
     def get_default_pipeline(cls):
+        """Construct a default pipeline, do not include nodes for Ensemble.
+        
+        Returns:
+            Pipeline -- The constructed default pipeline
+        """
         from autoPyTorch.pipeline.base.pipeline import Pipeline
         from autoPyTorch.pipeline.nodes import AutoNetSettings, OptimizationAlgorithm, \
             CrossValidation, Imputation, NormalizationStrategySelector, OneHotEncoding, PreprocessorSelector, ResamplingStrategySelector, \
@@ -87,6 +97,11 @@ def get_default_pipeline(cls):
 
     @staticmethod
     def _apply_default_pipeline_settings(pipeline):
+        """Add the components to the pipeline
+        
+        Arguments:
+            pipeline {pipeline} -- The pipelines to add the components to
+        """
         from autoPyTorch.pipeline.nodes import NormalizationStrategySelector, PreprocessorSelector, EmbeddingSelector, NetworkSelector, \
             OptimizerSelector, LearningrateSchedulerSelector, TrainNode, CrossValidation, InitializationSelector
 
@@ -150,4 +165,4 @@ def _apply_default_pipeline_settings(pipeline):
         train_node.add_batch_loss_computation_technique("mixup", Mixup)
 
         cv = pipeline[CrossValidation.get_name()]
-        cv.add_cross_validator("k_fold", KFold)
+        cv.add_cross_validator("k_fold", KFold)
@@ -3,6 +3,7 @@
 class AutoNetMultilabel(AutoNetFeatureData):
     preset_folder_name = "feature_multilabel"
 
+    # OVERRIDE
     @staticmethod
     def _apply_default_pipeline_settings(pipeline):
         from autoPyTorch.pipeline.nodes.network_selector import NetworkSelector
 
@@ -8,6 +8,7 @@
 class AutoNetRegression(AutoNetFeatureData):
     preset_folder_name = "feature_regression"
 
+    # OVERRIDE
     @staticmethod
     def _apply_default_pipeline_settings(pipeline):
         from autoPyTorch.pipeline.nodes.network_selector import NetworkSelector
 
@@ -21,7 +21,14 @@ class ProblemType(Enum):
     FeatureMultilabel = 4
 
 class DataManager(object):
+    """ Load data from multiple sources and formants"""
+
     def __init__(self, verbose=0):
+        """Construct the DataManager
+        
+        Keyword Arguments:
+            verbose {bool} -- Whether to print stuff. (default: {0})
+        """
         self.verbose = verbose
         self.X_train, self.Y_train = None, None
         self.X_test, self.Y_test = None, None
@@ -33,6 +40,16 @@ def __init__(self, verbose=0):
         self.categorical_features = None
 
     def read_data(self, file_name, test_split=0.0, is_classification=None, random_seed=0, **kwargs):
+        """Read the data.
+        
+        Arguments:
+            file_name {str} -- The name of the file to load. Different Readers are associated with different filenames.
+        
+        Keyword Arguments:
+            test_split {float} -- Amount of data to use as test split (default: {0.0})
+            is_classification {bool} -- Whether the data is a classification task (default: {None})
+            random_seed {int} -- a random seed (default: {0})
+        """
         print("Read:" + file_name)
         reader = self._get_reader(file_name, is_classification)
         reader.read()
@@ -53,6 +70,18 @@ def read_data(self, file_name, test_split=0.0, is_classification=None, random_se
         self._split_data(test_split, random_seed)
 
     def _get_reader(self, file_name, is_classification):
+        """Get the reader associated with the filename.
+        
+        Arguments:
+            file_name {str} -- The file to load
+            is_classification {bool} -- Whether the data is a classification task or not
+        
+        Raises:
+            ValueError: The given file type is not supported
+        
+        Returns:
+            DataReader -- A reader that is able to read the data type
+        """
         if file_name.endswith(".csv"):
             reader = CSVReader(file_name, is_classification=is_classification)
         elif file_name.startswith("openml:"):
@@ -65,6 +94,17 @@ def _get_reader(self, file_name, is_classification):
         return reader
 
     def generate_classification(self, num_classes, num_features, num_samples, test_split=0.1, seed=0):
+        """Generate a classification task
+        
+        Arguments:
+            num_classes {int} -- Number of classes
+            num_features {int} -- Number of features
+            num_samples {int} -- Number of samples
+        
+        Keyword Arguments:
+            test_split {float} -- Size of test split (default: {0.1})
+            seed {int} -- A random seed (default: {0})
+        """
         #X, Y = make_classification(n_samples=800, n_features=num_feats, n_classes=num_classes, n_informative=4)
         X, y = make_multilabel_classification(
             n_samples=num_samples, n_features=num_features, n_classes=num_classes, n_labels=0.01,
@@ -78,13 +118,29 @@ def generate_classification(self, num_classes, num_features, num_samples, test_s
         self._split_data(test_split, seed)
 
     def generate_regression(self, num_features, num_samples, test_split=0.1, seed=0):
+        """Generate a regression task
+        
+        Arguments:
+            num_features {int} -- Number of features
+            num_samples {int} -- Number of samples
+        
+        Keyword Arguments:
+            test_split {float} -- Size of test split (default: {0.1})
+            seed {int} -- a random seed (default: {0})
+        """
         X, Y = make_regression(n_samples=num_samples, n_features=num_features, random_state=seed)
         self.categorical_features = [False] * num_features
         self.problem_type = ProblemType.FeatureRegression
         self.X, self.Y = X, Y
         self._split_data(test_split, seed)
 
     def _split_data(self, test_split, seed):
+        """Split the data in test (, valid) and training set.
+        
+        Arguments:
+            test_split {[type]} -- [description]
+            seed {[type]} -- [description]
+        """
         valid_specified = self.X_valid is not None and self.Y_valid is not None
         test_specified = self.X_test is not None and self.Y_test is not None
 
@@ -101,6 +157,17 @@ def _split_data(self, test_split, seed):
         self.Y_train = self.Y
 
 def deterministic_shuffle_and_split(X, Y, split, seed):
+    """Split the data deterministically given the seed
+    
+    Arguments:
+        X {array} -- The feature data
+        Y {array} -- The targets
+        split {float} -- The size of the split
+        seed {int} -- A random seed
+    
+    Returns:
+        tuple -- Tuple of full data and the two splits
+    """
     rng = np.random.RandomState(seed)
     p = rng.permutation(X.shape[0])
 
@@ -110,4 +177,4 @@ def deterministic_shuffle_and_split(X, Y, split, seed):
         split = int(split * X.shape[0])
         return X, Y, X[0:-split], Y[0:-split], X[-split:], Y[-split:]
     else:
-        return X, Y, X, Y, None, None
+        return X, Y, X, Y, None, None
Original file line number	Diff line number	Diff line change
`@@ -5,6 +5,7 @@`
`5`	`5`
`6`	`6`
`7`	`7`	`def balanced_accuracy(solution, prediction):`
	`8`	`+ """balanced accuracy implementation of auto-sklearn"""`
`8`	`9`
`9`	`10`	`y_type, solution, prediction = _check_targets(solution, prediction)`
`10`	`11`