Skip to content

Commit 3497199

Browse files
author
zimmerl
committed
Ensemble regularization
1 parent 25944ce commit 3497199

File tree

2 files changed

+31
-11
lines changed

2 files changed

+31
-11
lines changed

autoPyTorch/pipeline/nodes/baseline_trainer.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,23 +31,23 @@ class BaselineTrainer(PipelineNode):
3131
"extra_trees" : baselines.ExtraTreesBaseline,
3232
"lgb" : baselines.LGBBaseline,
3333
"catboost" : baselines.CatboostBaseline,
34-
"rotation_forest" : baselines.RotationForestBaseline,
34+
#"rotation_forest" : baselines.RotationForestBaseline,
3535
"knn" : baselines.KNNBaseline})
3636

3737
identifiers = {
3838
"random_forest": (-6, 0, 0, 0.0),
3939
"extra_trees": (-5, 0, 0, 0.0),
4040
"lgb": (-4, 0, 0, 0.0),
4141
"catboost": (-3, 0, 0, 0.0),
42-
"rotation_forest": (-2, 0, 0, 0.0),
42+
#"rotation_forest": (-2, 0, 0, 0.0),
4343
"knn": (-1, 0, 0, 0.0)}
4444

4545
identifiers_ens = {
4646
-6: baselines.RFBaseline,
4747
-5: baselines.ExtraTreesBaseline,
4848
-4: baselines.LGBBaseline,
4949
-3: baselines.CatboostBaseline,
50-
-2: baselines.RotationForestBaseline,
50+
#-2: baselines.RotationForestBaseline,
5151
-1: baselines.KNNBaseline}
5252

5353
def __init__(self):

create_trajectory.py

Lines changed: 28 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,7 @@ def get_ensemble_performance(self, timestep):
156156
else:
157157
test_performance = 0
158158

159-
if len(self.ensemble_pred_dir_test)>0:
159+
if len(self.ensemble_pred_dir_test)>0 and len(self.ensemble_predictions_enstest)>0:
160160
enstest_preds = self.ensemble_selection.predict(self.ensemble_predictions_enstest[0:cutoff_ind])
161161
if len(enstest_preds.shape)==3:
162162
enstest_preds = enstest_preds[0]
@@ -171,21 +171,41 @@ def get_ensemble_performance(self, timestep):
171171

172172
return self.ensemble_selection.get_validation_performance(), test_performance, enstest_performance, model_identifiers, model_weights
173173

174-
def simulate_trajectory(self):
174+
def restart_trajectory_with_reg(self, timelimit):
175+
print("==> Restarting ensemble selection with timelimit %f" %timelimit)
176+
self.ensemble_config["only_consider_n_best"] = 2
177+
self.ensemble_selection = EnsembleSelection(**ensemble_config)
178+
179+
self.simulate_trajectory(timelimit=timelimit, allow_restart=False)
180+
181+
def simulate_trajectory(self, timelimit=np.inf, allow_restart=True):
175182
self.trajectory = []
176183
self.test_trajectory = []
177184
self.enstest_trajectory = []
178185
self.model_identifiers = []
179186
self.model_weights = []
187+
self.ensemble_loss = []
188+
180189
for ind, t in enumerate(self.timesteps):
190+
if t>timelimit:
191+
break
181192
print("==> Building ensemble at %i -th timestep %f" %(ind, t))
182193
ensemble_performance, test_performance, enstest_performance, model_identifiers, model_weights = self.get_ensemble_performance(t)
183194
print("==> Performance:", ensemble_performance, "/", test_performance, "/", enstest_performance)
195+
if abs(ensemble_performance) == 100 and ind<20 and allow_restart:
196+
self.restart_trajectory_with_reg(timelimit=np.inf)
197+
break
198+
#if len(self.enstest_trajectory)>0 and (enstest_performance < 0.9 * np.max(self.enstest_trajectory)) and allow_restart: # dropcheck
199+
# self.restart_trajectory_with_reg(timelimit=t)
200+
# break
201+
self.ensemble_loss.append(ensemble_performance)
184202
self.trajectory.append((t, ensemble_performance))
185203
self.test_trajectory.append((t, test_performance))
186204
self.enstest_trajectory.append((t, enstest_performance))
187205
self.model_identifiers.append(model_identifiers)
188206
self.model_weights.append(model_weights)
207+
if t>timelimit:
208+
break
189209
#print(self.trajectory[-1])
190210

191211
def predict_with_weights(self, identifiers, weights):
@@ -268,7 +288,7 @@ def minimize_trf(value):
268288
autonet_accuracy = AutoNetMetric(name="accuracy", metric=accuracy, loss_transform=minimize_trf, ohe_transform=undo_ohe)
269289

270290
ensemble_config = {"ensemble_size" : 35, #35
271-
"only_consider_n_best" : 10, #10
291+
"only_consider_n_best" : 10, #10 #2
272292
"sorted_initialization_n_best" : 1,
273293
#"only_consider_n_best_percent" : 0,
274294
"metric" : autonet_accuracy}
@@ -322,7 +342,7 @@ def minimize_trf(value):
322342
for ident, weight in zip(identifiers, weights):
323343
ident_weight_dict[ident] = weight
324344
identifier_weight_dicts.append(ident_weight_dict)
325-
345+
326346
all_identifiers = simulator.model_identifiers[-1]
327347
weight_dict = {ident:0 for ident in all_identifiers}
328348

@@ -336,11 +356,11 @@ def minimize_trf(value):
336356

337357
print(weight_dict)
338358

339-
print("Combined score / true score:", combined_score, "/", incumbent_score_val)
359+
print("Incumbent ind / score:", incumbent_ind_val, "/", incumbent_score_val)
340360

341-
results = {"all_time_incumbent":incumbent_score_all_time,
342-
"all_time_incumbent_val":incumbent_score_all_time_val,
343-
"3600_without_val": score_at_3600,
361+
results = {#"all_time_incumbent":incumbent_score_all_time,
362+
#"all_time_incumbent_val":incumbent_score_all_time_val,
363+
#"3600_without_val": score_at_3600,
344364
"3600_incumbent_val":incumbent_score_val}
345365
#"3600_incumbent_val":combined_score}
346366

0 commit comments

Comments
 (0)