Skip to content

Commit f827467

Browse files
committed
Merge branch 'refactor_development' of github.com:automl/Auto-PyTorch into refactoring-base-dataset_splitting-functions
[fix] Update before merging
2 parents d2fdd90 + 68fc77f commit f827467

File tree

63 files changed

+1660
-989
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

63 files changed

+1660
-989
lines changed

autoPyTorch/evaluation/abstract_evaluator.py

Lines changed: 49 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,23 @@
5454

5555

5656
class MyTraditionalTabularClassificationPipeline(BaseEstimator):
57+
"""
58+
A wrapper class that holds a pipeline for traditional classification.
59+
Estimators like CatBoost, and Random Forest are considered traditional machine
60+
learning models and are fitted before neural architecture search.
61+
62+
This class is an interface to fit a pipeline containing a traditional machine
63+
learning model, and is the final object that is stored for inference.
64+
65+
Attributes:
66+
dataset_properties (Dict[str, Any]):
67+
A dictionary containing dataset specific information
68+
random_state (Optional[Union[int, np.random.RandomState]]):
69+
Object that contains a seed and allows for reproducible results
70+
init_params (Optional[Dict]):
71+
An optional dictionary that is passed to the pipeline's steps. It complies
72+
a similar function as the kwargs
73+
"""
5774
def __init__(self, config: str,
5875
dataset_properties: Dict[str, Any],
5976
random_state: Optional[Union[int, np.random.RandomState]] = None,
@@ -98,6 +115,21 @@ def get_default_pipeline_options() -> Dict[str, Any]:
98115

99116

100117
class DummyClassificationPipeline(DummyClassifier):
118+
"""
119+
A wrapper class that holds a pipeline for dummy classification.
120+
121+
A wrapper over DummyClassifier of scikit learn. This estimator is considered the
122+
worst performing model. In case of failure, at least this model will be fitted.
123+
124+
Attributes:
125+
dataset_properties (Dict[str, Any]):
126+
A dictionary containing dataset specific information
127+
random_state (Optional[Union[int, np.random.RandomState]]):
128+
Object that contains a seed and allows for reproducible results
129+
init_params (Optional[Dict]):
130+
An optional dictionary that is passed to the pipeline's steps. It complies
131+
a similar function as the kwargs
132+
"""
101133
def __init__(self, config: Configuration,
102134
random_state: Optional[Union[int, np.random.RandomState]] = None,
103135
init_params: Optional[Dict] = None
@@ -148,6 +180,21 @@ def get_default_pipeline_options() -> Dict[str, Any]:
148180

149181

150182
class DummyRegressionPipeline(DummyRegressor):
183+
"""
184+
A wrapper class that holds a pipeline for dummy regression.
185+
186+
A wrapper over DummyRegressor of scikit learn. This estimator is considered the
187+
worst performing model. In case of failure, at least this model will be fitted.
188+
189+
Attributes:
190+
dataset_properties (Dict[str, Any]):
191+
A dictionary containing dataset specific information
192+
random_state (Optional[Union[int, np.random.RandomState]]):
193+
Object that contains a seed and allows for reproducible results
194+
init_params (Optional[Dict]):
195+
An optional dictionary that is passed to the pipeline's steps. It complies
196+
a similar function as the kwargs
197+
"""
151198
def __init__(self, config: Configuration,
152199
random_state: Optional[Union[int, np.random.RandomState]] = None,
153200
init_params: Optional[Dict] = None) -> None:
@@ -351,7 +398,7 @@ def _get_pipeline(self) -> BaseEstimator:
351398
if isinstance(self.configuration, int):
352399
pipeline = self.pipeline_class(config=self.configuration,
353400
random_state=np.random.RandomState(self.seed),
354-
init_params=self.fit_dictionary)
401+
init_params=self._init_params)
355402
elif isinstance(self.configuration, Configuration):
356403
pipeline = self.pipeline_class(config=self.configuration,
357404
dataset_properties=self.dataset_properties,
@@ -364,7 +411,7 @@ def _get_pipeline(self) -> BaseEstimator:
364411
pipeline = self.pipeline_class(config=self.configuration,
365412
dataset_properties=self.dataset_properties,
366413
random_state=np.random.RandomState(self.seed),
367-
init_params=self.fit_dictionary)
414+
init_params=self._init_params)
368415
else:
369416
raise ValueError("Invalid configuration entered")
370417
return pipeline

autoPyTorch/pipeline/base_pipeline.py

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -398,24 +398,47 @@ def _check_search_space_updates(self, include: Optional[Dict[str, Any]],
398398
raise ValueError("Unknown node name. Expected update node name to be in {} "
399399
"got {}".format(self.named_steps.keys(), update.node_name))
400400
node = self.named_steps[update.node_name]
401+
# if node is a choice module
401402
if hasattr(node, 'get_components'):
402403
split_hyperparameter = update.hyperparameter.split(':')
403404

405+
# check if component is not present in include
404406
if include is not None and update.node_name in include.keys():
405407
if split_hyperparameter[0] not in include[update.node_name]:
406408
raise ValueError("Not found {} in include".format(split_hyperparameter[0]))
407409

410+
# check if component is present in exclude
408411
if exclude is not None and update.node_name in exclude.keys():
409412
if split_hyperparameter[0] in exclude[update.node_name]:
410413
raise ValueError("Found {} in exclude".format(split_hyperparameter[0]))
411414

412415
components = node.get_components()
413-
if split_hyperparameter[0] not in components.keys():
416+
# if hyperparameter is __choice__, check if
417+
# the components in the value range of search space update
418+
# are in components of the choice module
419+
if split_hyperparameter[0] == '__choice__':
420+
for choice in update.value_range:
421+
if include is not None and update.node_name in include.keys():
422+
if choice not in include[update.node_name]:
423+
raise ValueError("Not found {} in include".format(choice))
424+
if exclude is not None and update.node_name in exclude.keys():
425+
if choice in exclude[update.node_name]:
426+
raise ValueError("Found {} in exclude".format(choice))
427+
if choice not in components.keys():
428+
raise ValueError("Unknown hyperparameter for choice {}. "
429+
"Expected update hyperparameter "
430+
"to be in {} got {}".format(node.__class__.__name__,
431+
components.keys(), choice))
432+
# check if the component whose hyperparameter
433+
# needs to be updated is in components of the
434+
# choice module
435+
elif split_hyperparameter[0] not in components.keys():
414436
raise ValueError("Unknown hyperparameter for choice {}. "
415437
"Expected update hyperparameter "
416438
"to be in {} got {}".format(node.__class__.__name__,
417439
components.keys(), split_hyperparameter[0]))
418440
else:
441+
# check if hyperparameter is in the search space of the component
419442
component = components[split_hyperparameter[0]]
420443
if split_hyperparameter[1] not in component. \
421444
get_hyperparameter_search_space(dataset_properties=self.dataset_properties):

autoPyTorch/pipeline/components/base_choice.py

Lines changed: 25 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
1+
import re
12
import warnings
23
from collections import OrderedDict
3-
from typing import Any, Dict, List, Optional, Tuple, Union
4+
from typing import Any, Dict, List, Optional
45

56
from ConfigSpace.configuration_space import Configuration, ConfigurationSpace
67

@@ -9,7 +10,8 @@
910
from sklearn.utils import check_random_state
1011

1112
from autoPyTorch.pipeline.components.base_component import autoPyTorchComponent
12-
from autoPyTorch.utils.common import FitRequirement
13+
from autoPyTorch.utils.common import FitRequirement, HyperparameterSearchSpace
14+
from autoPyTorch.utils.hyperparameter_search_space_update import HyperparameterSearchSpaceUpdate
1315

1416

1517
class autoPyTorchChoice(object):
@@ -49,7 +51,7 @@ def __init__(self,
4951
# self.set_hyperparameters(self.configuration)
5052
self.choice: Optional[autoPyTorchComponent] = None
5153

52-
self._cs_updates: Dict[str, Tuple] = dict()
54+
self._cs_updates: Dict[str, HyperparameterSearchSpaceUpdate] = dict()
5355

5456
def get_fit_requirements(self) -> Optional[List[FitRequirement]]:
5557
if self.choice is not None:
@@ -247,35 +249,35 @@ def _check_dataset_properties(self, dataset_properties: Dict[str, Any]) -> None:
247249
"""
248250
assert isinstance(dataset_properties, dict), "dataset_properties must be a dictionary"
249251

250-
def _apply_search_space_update(self, name: str, new_value_range: Union[List, Tuple],
251-
default_value: Union[int, float, str], log: bool = False) -> None:
252-
"""Allows the user to update a hyperparameter
252+
def _apply_search_space_update(self, hyperparameter_search_space_update: HyperparameterSearchSpaceUpdate) -> None:
253+
"""
254+
Applies search space update to the class
253255
254-
Arguments:
255-
name {string} -- name of hyperparameter
256-
new_value_range {List[?] -- value range can be either lower, upper or a list of possible conditionals
257-
log {bool} -- is hyperparameter logscale
256+
Args:
257+
hyperparameter_search_space_update (HyperparameterSearchSpaceUpdate):
258+
Search Space update for the current autoPyTorchChoice module
258259
"""
259260

260-
if len(new_value_range) == 0:
261-
raise ValueError("The new value range needs at least one value")
262-
self._cs_updates[name] = tuple([new_value_range, default_value, log])
261+
self._cs_updates[hyperparameter_search_space_update.hyperparameter] = hyperparameter_search_space_update
263262

264-
def _get_search_space_updates(self, prefix: Optional[str] = None) -> Dict[str, Tuple]:
263+
def _get_search_space_updates(self, prefix: Optional[str] = None) -> Dict[str, HyperparameterSearchSpace]:
265264
"""Get the search space updates with the given prefix
266265
267-
Keyword Arguments:
268-
prefix {str} -- Only return search space updates with given prefix (default: {None})
266+
Args:
267+
prefix (str):
268+
Only return search space updates with given prefix (default: {None})
269269
270270
Returns:
271-
dict -- Mapping of search space updates. Keys don't contain the prefix.
271+
Dict[str, HyperparameterSearchSpace]:
272+
Mapping of search space updates. Keys don't contain the prefix.
272273
"""
273-
if prefix is None:
274-
return self._cs_updates
275-
result: Dict[str, Tuple] = dict()
276274

277-
# iterate over all search space updates of this node and filter the ones out, that have the given prefix
275+
result: Dict[str, HyperparameterSearchSpace] = dict()
276+
277+
# iterate over all search space updates of this node and keep the ones that have the given prefix
278278
for key in self._cs_updates.keys():
279-
if key.startswith(prefix):
280-
result[key[len(prefix) + 1:]] = self._cs_updates[key]
279+
if prefix is None:
280+
result[key] = self._cs_updates[key].get_search_space()
281+
elif re.search(f'^{prefix}', key) is not None:
282+
result[key[len(prefix) + 1:]] = self._cs_updates[key].get_search_space(remove_prefix=prefix)
281283
return result

autoPyTorch/pipeline/components/base_component.py

Lines changed: 16 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -4,19 +4,20 @@
44
import sys
55
import warnings
66
from collections import OrderedDict
7-
from typing import Any, Dict, List, Optional, Tuple, Union
7+
from typing import Any, Dict, List, Optional
88

99
from ConfigSpace.configuration_space import Configuration, ConfigurationSpace
1010

1111
from sklearn.base import BaseEstimator
1212

13-
from autoPyTorch.utils.common import FitRequirement
13+
from autoPyTorch.utils.common import FitRequirement, HyperparameterSearchSpace
14+
from autoPyTorch.utils.hyperparameter_search_space_update import HyperparameterSearchSpaceUpdate
1415

1516

1617
def find_components(
17-
package: str,
18-
directory: str,
19-
base_class: BaseEstimator
18+
package: str,
19+
directory: str,
20+
base_class: BaseEstimator
2021
) -> Dict[str, BaseEstimator]:
2122
"""Utility to find component on a given directory,
2223
that inherit from base_class
@@ -34,8 +35,7 @@ def find_components(
3435
module = importlib.import_module(full_module_name)
3536

3637
for member_name, obj in inspect.getmembers(module):
37-
if inspect.isclass(obj) and issubclass(obj, base_class) and \
38-
obj != base_class:
38+
if inspect.isclass(obj) and issubclass(obj, base_class) and obj != base_class:
3939
# TODO test if the obj implements the interface
4040
# Keep in mind that this only instantiates the ensemble_wrapper,
4141
# but not the real target classifier
@@ -96,7 +96,7 @@ class autoPyTorchComponent(BaseEstimator):
9696
def __init__(self) -> None:
9797
super().__init__()
9898
self._fit_requirements: List[FitRequirement] = list()
99-
self._cs_updates: Dict[str, Tuple] = dict()
99+
self._cs_updates: Dict[str, HyperparameterSearchSpaceUpdate] = dict()
100100

101101
@classmethod
102102
def get_required_properties(cls) -> Optional[List[str]]:
@@ -140,7 +140,7 @@ def get_properties(dataset_properties: Optional[Dict[str, str]] = None
140140

141141
@staticmethod
142142
def get_hyperparameter_search_space(
143-
dataset_properties: Optional[Dict[str, str]] = None
143+
dataset_properties: Optional[Dict[str, str]] = None
144144
) -> ConfigurationSpace:
145145
"""Return the configuration space of this classification algorithm.
146146
@@ -253,8 +253,7 @@ def __str__(self) -> str:
253253
name = self.get_properties()['name']
254254
return "autoPyTorch.pipeline %s" % name
255255

256-
def _apply_search_space_update(self, name: str, new_value_range: Union[List, Tuple],
257-
default_value: Union[int, float, str], log: bool = False) -> None:
256+
def _apply_search_space_update(self, hyperparameter_search_space_update: HyperparameterSearchSpaceUpdate) -> None:
258257
"""Allows the user to update a hyperparameter
259258
260259
Arguments:
@@ -263,26 +262,18 @@ def _apply_search_space_update(self, name: str, new_value_range: Union[List, Tup
263262
log {bool} -- is hyperparameter logscale
264263
"""
265264

266-
if len(new_value_range) == 0:
267-
raise ValueError("The new value range needs at least one value")
268-
self._cs_updates[name] = tuple([new_value_range, default_value, log])
265+
self._cs_updates[hyperparameter_search_space_update.hyperparameter] = hyperparameter_search_space_update
269266

270-
def _get_search_space_updates(self, prefix: Optional[str] = None) -> Dict[str, Tuple]:
271-
"""Get the search space updates with the given prefix
272-
273-
Keyword Arguments:
274-
prefix {str} -- Only return search space updates with given prefix (default: {None})
267+
def _get_search_space_updates(self) -> Dict[str, HyperparameterSearchSpace]:
268+
"""Get the search space updates
275269
276270
Returns:
277271
dict -- Mapping of search space updates. Keys don't contain the prefix.
278272
"""
279-
if prefix is None:
280-
return self._cs_updates
281-
result: Dict[str, Tuple] = dict()
273+
274+
result: Dict[str, HyperparameterSearchSpace] = dict()
282275

283276
# iterate over all search space updates of this node and keep the ones that have the given prefix
284277
for key in self._cs_updates.keys():
285-
if key.startswith(prefix):
286-
# different for autopytorch component as the hyperparameter
287-
result[key[len(prefix):]] = self._cs_updates[key]
278+
result[key] = self._cs_updates[key].get_search_space()
288279
return result

autoPyTorch/pipeline/components/preprocessing/image_preprocessing/normalise/base_normalizer_choice.py

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -76,10 +76,21 @@ def get_hyperparameter_search_space(self,
7676
default = default_
7777
break
7878

79-
preprocessor = CSH.CategoricalHyperparameter('__choice__',
80-
list(available_preprocessors.keys()),
81-
default_value=default)
82-
79+
updates = self._get_search_space_updates()
80+
if '__choice__' in updates.keys():
81+
choice_hyperparameter = updates['__choice__']
82+
if not set(choice_hyperparameter.value_range).issubset(available_preprocessors):
83+
raise ValueError("Expected given update for {} to have "
84+
"choices in {} got {}".format(self.__class__.__name__,
85+
available_preprocessors,
86+
choice_hyperparameter.value_range))
87+
preprocessor = CSH.CategoricalHyperparameter('__choice__',
88+
choice_hyperparameter.value_range,
89+
default_value=choice_hyperparameter.default_value)
90+
else:
91+
preprocessor = CSH.CategoricalHyperparameter('__choice__',
92+
list(available_preprocessors.keys()),
93+
default_value=default)
8394
cs.add_hyperparameter(preprocessor)
8495

8596
# add only child hyperparameters of early_preprocessor choices

0 commit comments

Comments
 (0)