Skip to content

Commit 292fc60

Browse files
Add HPO for xgboost
1 parent 59900f0 commit 292fc60

File tree

4 files changed

+45
-8
lines changed

4 files changed

+45
-8
lines changed

python/src/lazylearn/pipeline/pipeline.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,3 +74,4 @@ def __init__(self):
7474
self.holdout_features_df: DataFrame = None
7575
self.holdout_targets: Series = None
7676
self.holdout_score: float = None
77+
self.regressor = None

python/src/lazylearn/regression/models/xgboost/xgb.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from regression.models.xgboost.xgb_regressor_steps.regressor_step import (
55
XGBRegressorStep,
66
)
7+
from regression.models.xgboost.xgb_regressor_steps.hpo_step import HyperParameterOptimizationStep # noqa
78

89

910
class XGBRegressionRunner:
@@ -32,7 +33,9 @@ def fit(self):
3233

3334
self.pipeline.add(OrdinalConverter(cat_vars=cat_vars))
3435

35-
self.pipeline.add(XGBRegressorStep(random_state=self.random_state))
36+
self.pipeline.add(HyperParameterOptimizationStep())
37+
38+
self.pipeline.add(XGBRegressorStep())
3639

3740
self.pipeline.fit()
3841

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
from xgboost import XGBRegressor
2+
3+
from pipeline.pipeline import PipelineStep, RegressionPipeline
4+
from sklearn.model_selection import RandomizedSearchCV, KFold
5+
6+
7+
class HyperParameterOptimizationStep(PipelineStep):
8+
def __init__(self, n_splits=5, random_state=None):
9+
self.n_splits = n_splits
10+
self.random_state = random_state
11+
self.param_grid = {
12+
'max_depth': [3, 4, 5, 6, 7, 8, 9, 10],
13+
'learning_rate': [0.001, 0.01, 0.1, 0.2, 0.3],
14+
'subsample': [0.5, 0.6, 0.7, 0.8, 0.9, 1.0],
15+
'colsample_bytree': [0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0],
16+
'colsample_bylevel': [0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0],
17+
'min_child_weight': [0.5, 1.0, 3.0, 5.0, 7.0, 10.0],
18+
'gamma': [0, 0.25, 0.5, 1.0],
19+
'n_estimators': [100, 200, 300, 500, 1000]
20+
}
21+
22+
def fit(self, pipeline: RegressionPipeline):
23+
xgbtuned = XGBRegressor()
24+
25+
cv = KFold(n_splits=self.n_splits) # time series cross validation split
26+
xgbtunedreg = RandomizedSearchCV(
27+
xgbtuned,
28+
param_distributions=self.param_grid,
29+
scoring='neg_mean_squared_error',
30+
n_iter=20,
31+
n_jobs=-1,
32+
cv=cv,
33+
verbose=1,
34+
)
35+
pipeline.regressor = xgbtunedreg
36+
37+
def predict(self, pipeline: RegressionPipeline):
38+
pass
Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,20 @@
11
from pipeline.pipeline import PipelineStep, RegressionPipeline
2-
from xgboost import XGBRegressor
32

43

54
class XGBRegressorStep(PipelineStep):
6-
def __init__(self, random_state=None):
7-
self.regressor = XGBRegressor(
8-
n_estimators=1000, random_state=random_state
9-
) # noqa
105

116
def fit(self, pipeline: RegressionPipeline):
127
pipeline.feature_list = [
138
item for item in pipeline.feature_list if item != pipeline.target
149
]
1510
print("Fitting XGBRegressor")
16-
self.regressor.fit(
11+
pipeline.regressor.fit(
1712
X=pipeline.train_features_df[pipeline.feature_list],
1813
y=pipeline.train_targets,
1914
) # noqa
2015
print("XGBRegressor fitted!")
2116

2217
def predict(self, pipeline: RegressionPipeline):
23-
pipeline.tmp_pred = self.regressor.predict(
18+
pipeline.tmp_pred = pipeline.regressor.predict(
2419
X=pipeline.tmp_test[pipeline.feature_list]
2520
)

0 commit comments

Comments
 (0)