|
1 | | -import logging |
2 | 1 | import math |
3 | 2 | from collections import OrderedDict |
4 | | -from typing import Any, Dict, List, Optional, Tuple |
| 3 | +from typing import Any, Dict, Optional, Tuple |
5 | 4 |
|
6 | 5 | import ConfigSpace as CS |
7 | 6 | from ConfigSpace.configuration_space import ConfigurationSpace |
|
16 | 15 | from torch.nn import functional as F |
17 | 16 |
|
18 | 17 | from autoPyTorch.pipeline.components.setup.network_backbone.base_network_backbone import NetworkBackboneComponent |
19 | | - |
20 | | -_activations: Dict[str, nn.Module] = { |
21 | | - "relu": nn.ReLU, |
22 | | - "tanh": nn.Tanh, |
23 | | - "sigmoid": nn.Sigmoid |
24 | | -} |
25 | | - |
26 | | - |
27 | | -class ConvNetImageBackbone(NetworkBackboneComponent): |
28 | | - supported_tasks = {"image_classification", "image_regression"} |
29 | | - |
30 | | - def __init__(self, **kwargs: Any): |
31 | | - super().__init__(**kwargs) |
32 | | - self.bn_args = {"eps": 1e-5, "momentum": 0.1} |
33 | | - |
34 | | - def _get_layer_size(self, w: int, h: int) -> Tuple[int, int]: |
35 | | - cw = ((w - self.config["conv_kernel_size"] + 2 * self.config["conv_kernel_padding"]) |
36 | | - // self.config["conv_kernel_stride"]) + 1 |
37 | | - ch = ((h - self.config["conv_kernel_size"] + 2 * self.config["conv_kernel_padding"]) |
38 | | - // self.config["conv_kernel_stride"]) + 1 |
39 | | - cw, ch = cw // self.config["pool_size"], ch // self.config["pool_size"] |
40 | | - return cw, ch |
41 | | - |
42 | | - def _add_layer(self, layers: List[nn.Module], in_filters: int, out_filters: int) -> None: |
43 | | - layers.append(nn.Conv2d(in_filters, out_filters, |
44 | | - kernel_size=self.config["conv_kernel_size"], |
45 | | - stride=self.config["conv_kernel_stride"], |
46 | | - padding=self.config["conv_kernel_padding"])) |
47 | | - layers.append(nn.BatchNorm2d(out_filters, **self.bn_args)) |
48 | | - layers.append(_activations[self.config["activation"]]()) |
49 | | - layers.append(nn.MaxPool2d(kernel_size=self.config["pool_size"], stride=self.config["pool_size"])) |
50 | | - |
51 | | - def build_backbone(self, input_shape: Tuple[int, ...]) -> nn.Module: |
52 | | - channels, iw, ih = input_shape |
53 | | - layers: List[nn.Module] = [] |
54 | | - init_filter = self.config["conv_init_filters"] |
55 | | - self._add_layer(layers, channels, init_filter) |
56 | | - |
57 | | - cw, ch = self._get_layer_size(iw, ih) |
58 | | - for i in range(2, self.config["num_layers"] + 1): |
59 | | - cw, ch = self._get_layer_size(cw, ch) |
60 | | - if cw == 0 or ch == 0: |
61 | | - logging.info("> reduce network size due to too small layers.") |
62 | | - break |
63 | | - self._add_layer(layers, init_filter, init_filter * 2) |
64 | | - init_filter *= 2 |
65 | | - backbone = nn.Sequential(*layers) |
66 | | - self.backbone = backbone |
67 | | - return backbone |
68 | | - |
69 | | - @staticmethod |
70 | | - def get_properties(dataset_properties: Optional[Dict[str, str]] = None) -> Dict[str, Any]: |
71 | | - return { |
72 | | - 'shortname': 'ConvNetImageBackbone', |
73 | | - 'name': 'ConvNetImageBackbone', |
74 | | - 'handles_tabular': False, |
75 | | - 'handles_image': True, |
76 | | - 'handles_time_series': False, |
77 | | - } |
78 | | - |
79 | | - @staticmethod |
80 | | - def get_hyperparameter_search_space(dataset_properties: Optional[Dict[str, str]] = None, |
81 | | - min_num_layers: int = 2, |
82 | | - max_num_layers: int = 5, |
83 | | - min_init_filters: int = 16, |
84 | | - max_init_filters: int = 64, |
85 | | - min_kernel_size: int = 2, |
86 | | - max_kernel_size: int = 5, |
87 | | - min_stride: int = 1, |
88 | | - max_stride: int = 3, |
89 | | - min_padding: int = 2, |
90 | | - max_padding: int = 3, |
91 | | - min_pool_size: int = 2, |
92 | | - max_pool_size: int = 3) -> ConfigurationSpace: |
93 | | - cs = CS.ConfigurationSpace() |
94 | | - |
95 | | - cs.add_hyperparameter(UniformIntegerHyperparameter('num_layers', |
96 | | - lower=min_num_layers, |
97 | | - upper=max_num_layers)) |
98 | | - cs.add_hyperparameter(CategoricalHyperparameter('activation', |
99 | | - choices=list(_activations.keys()))) |
100 | | - cs.add_hyperparameter(UniformIntegerHyperparameter('conv_init_filters', |
101 | | - lower=min_init_filters, |
102 | | - upper=max_init_filters)) |
103 | | - cs.add_hyperparameter(UniformIntegerHyperparameter('conv_kernel_size', |
104 | | - lower=min_kernel_size, |
105 | | - upper=max_kernel_size)) |
106 | | - cs.add_hyperparameter(UniformIntegerHyperparameter('conv_kernel_stride', |
107 | | - lower=min_stride, |
108 | | - upper=max_stride)) |
109 | | - cs.add_hyperparameter(UniformIntegerHyperparameter('conv_kernel_padding', |
110 | | - lower=min_padding, |
111 | | - upper=max_padding)) |
112 | | - cs.add_hyperparameter(UniformIntegerHyperparameter('pool_size', |
113 | | - lower=min_pool_size, |
114 | | - upper=max_pool_size)) |
115 | | - return cs |
| 18 | +from autoPyTorch.pipeline.components.setup.network_backbone.utils import _activations |
116 | 19 |
|
117 | 20 |
|
118 | 21 | class _DenseLayer(nn.Sequential): |
@@ -177,7 +80,9 @@ def __init__(self, |
177 | 80 |
|
178 | 81 |
|
179 | 82 | class DenseNetBackbone(NetworkBackboneComponent): |
180 | | - supported_tasks = {"image_classification", "image_regression"} |
| 83 | + """ |
| 84 | + Dense Net Backbone for images (see https://arxiv.org/pdf/1608.06993.pdf) |
| 85 | + """ |
181 | 86 |
|
182 | 87 | def __init__(self, **kwargs: Any): |
183 | 88 | super().__init__(**kwargs) |
@@ -247,39 +152,55 @@ def get_properties(dataset_properties: Optional[Dict[str, str]] = None) -> Dict[ |
247 | 152 | } |
248 | 153 |
|
249 | 154 | @staticmethod |
250 | | - def get_hyperparameter_search_space(dataset_properties: Optional[Dict[str, str]] = None, |
251 | | - min_growth_rate: int = 12, |
252 | | - max_growth_rate: int = 40, |
253 | | - min_num_blocks: int = 3, |
254 | | - max_num_blocks: int = 4, |
255 | | - min_num_layers: int = 4, |
256 | | - max_num_layers: int = 64) -> ConfigurationSpace: |
| 155 | + def get_hyperparameter_search_space(dataset_properties: Optional[Dict] = None, |
| 156 | + num_blocks: Tuple[Tuple, int] = ((3, 4), 3), |
| 157 | + num_layers: Tuple[Tuple, int] = ((4, 64), 16), |
| 158 | + growth_rate: Tuple[Tuple, int] = ((12, 40), 20), |
| 159 | + activation: Tuple[Tuple, str] = (tuple(_activations.keys()), |
| 160 | + list(_activations.keys())[0]), |
| 161 | + use_dropout: Tuple[Tuple, bool] = ((True, False), False), |
| 162 | + dropout: Tuple[Tuple, float] = ((0, 0.5), 0.2) |
| 163 | + ) -> ConfigurationSpace: |
257 | 164 | cs = CS.ConfigurationSpace() |
| 165 | + |
| 166 | + min_growth_rate, max_growth_rate = growth_rate[0] |
258 | 167 | growth_rate_hp = UniformIntegerHyperparameter('growth_rate', |
259 | 168 | lower=min_growth_rate, |
260 | | - upper=max_growth_rate) |
| 169 | + upper=max_growth_rate, |
| 170 | + default_value=growth_rate[1]) |
261 | 171 | cs.add_hyperparameter(growth_rate_hp) |
262 | 172 |
|
| 173 | + min_num_blocks, max_num_blocks = num_blocks[0] |
263 | 174 | blocks_hp = UniformIntegerHyperparameter('blocks', |
264 | 175 | lower=min_num_blocks, |
265 | | - upper=max_num_blocks) |
| 176 | + upper=max_num_blocks, |
| 177 | + default_value=num_blocks[1]) |
266 | 178 | cs.add_hyperparameter(blocks_hp) |
267 | 179 |
|
268 | 180 | activation_hp = CategoricalHyperparameter('activation', |
269 | | - choices=list(_activations.keys())) |
| 181 | + choices=activation[0], |
| 182 | + default_value=activation[1]) |
270 | 183 | cs.add_hyperparameter(activation_hp) |
271 | 184 |
|
272 | | - use_dropout = CategoricalHyperparameter('use_dropout', choices=[True, False]) |
| 185 | + use_dropout = CategoricalHyperparameter('use_dropout', |
| 186 | + choices=use_dropout[0], |
| 187 | + default_value=use_dropout[1]) |
| 188 | + |
| 189 | + min_dropout, max_dropout = dropout[0] |
273 | 190 | dropout = UniformFloatHyperparameter('dropout', |
274 | | - lower=0.0, |
275 | | - upper=1.0) |
| 191 | + lower=min_dropout, |
| 192 | + upper=max_dropout, |
| 193 | + default_value=dropout[1]) |
| 194 | + |
276 | 195 | cs.add_hyperparameters([use_dropout, dropout]) |
277 | 196 | cs.add_condition(CS.EqualsCondition(dropout, use_dropout, True)) |
278 | 197 |
|
279 | 198 | for i in range(1, max_num_blocks + 1): |
| 199 | + min_num_layers, max_num_layers = num_layers[0] |
280 | 200 | layer_hp = UniformIntegerHyperparameter('layer_in_block_%d' % i, |
281 | 201 | lower=min_num_layers, |
282 | | - upper=max_num_layers) |
| 202 | + upper=max_num_layers, |
| 203 | + default_value=num_layers[1]) |
283 | 204 | cs.add_hyperparameter(layer_hp) |
284 | 205 |
|
285 | 206 | if i > min_num_blocks: |
|
0 commit comments