Skip to content

Commit 7407ee7

Browse files
DOC Add example: Effect of resampling on probability calibration
1 parent 5a5f6d7 commit 7407ee7

File tree

1 file changed

+158
-0
lines changed

1 file changed

+158
-0
lines changed
Lines changed: 158 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,158 @@
1+
"""
2+
====================================================
3+
Effect of Resampling on Probability Calibration for Classifiers
4+
====================================================
5+
6+
With this example we illustrate how resampling a data set (like Under-sampling) can affect
7+
the calibration of a classifier's predicted probabilities , and how can we fix this issue using
8+
:class:`~sklearn.calibration.CalibratedClassifierCV`
9+
10+
When we resample a dataset so we can balance it , we change the prior probabilities
11+
of the classes contained in the dataset.The model learns that some classes
12+
are more frequent than they actually are and some classes are less frequent than they actually are.
13+
14+
This example shows:
15+
1. The calibration curve of a model trained on the original dataset.
16+
2. The resampled model's calibration curve, which is distorted
17+
3. How to recover the probabilities using calibration
18+
"""
19+
20+
# Authors: The imbalanced-learn developers
21+
# License: MIT
22+
23+
# %%
24+
# Create an imbalanced dataset with two classes using scikit-learn's
25+
# :func:`sklearn.datasets.make_classification` function with 95-5 class ratio
26+
# and split the data into training and testing sets (80-20) using
27+
# :func:`sklearn.model_selection.train_test_split.` function and set stratify = y
28+
# because we want to keep the 95-5 class ratio.
29+
from sklearn.datasets import make_classification
30+
from sklearn.model_selection import train_test_split
31+
import matplotlib.pyplot as plt
32+
33+
X , y = make_classification(
34+
n_samples=10000,
35+
n_features=20,
36+
n_classes=2,
37+
weights=[0.95,0.05],
38+
)
39+
40+
X_train , X_test , y_train , y_test = train_test_split(
41+
X,y , test_size=0.2 , random_state=42 , stratify=y
42+
)
43+
44+
# %%
45+
# The Problem: Resampling distorts probabilities
46+
# -----------------------------------------------
47+
# At first , we train a :class:`~sklearn.linear_model.LogisticRegression` classifier on
48+
# the original data. Then, we train a second :class:`~sklearn.linear_model.LogisticRegression` classifier
49+
# on data that has been undersampled to a 50-50 ratio using a :class:`imblearn.under_sampling.RandomUnderSampler`.
50+
51+
from sklearn.linear_model import LogisticRegression
52+
from imblearn.under_sampling import RandomUnderSampler
53+
54+
# Train Logistic Regression Model (Vanilla Model)
55+
lr_original = LogisticRegression(random_state=42)
56+
lr_original.fit(X_train,y_train)
57+
58+
# Train Resampled Model (Under-sampling)
59+
under_sampler = RandomUnderSampler(random_state=42)
60+
X_undersampled , y_undersampled = under_sampler.fit_resample(X_train, y_train)
61+
62+
lr_undersampled = LogisticRegression(random_state=42)
63+
lr_undersampled.fit(X_undersampled, y_undersampled)
64+
65+
# %%
66+
# We plot the calibration curves to compare the two models using :class:`~sklearn.calibration.CalibrationDisplay`.
67+
# The diagonal line represents a perfectly calibrated model.
68+
69+
from sklearn.calibration import CalibrationDisplay
70+
71+
fig, ax = plt.subplots(figsize=(8,6))
72+
73+
CalibrationDisplay.from_estimator(
74+
lr_original, X_test, y_test, n_bins=10, name="Original model", ax=ax
75+
)
76+
77+
CalibrationDisplay.from_estimator(
78+
lr_undersampled, X_test, y_test, n_bins=10, name="Undersampled model", ax=ax
79+
)
80+
81+
plt.title("Calibration: Original vs Resampled")
82+
plt.show()
83+
84+
# %%
85+
# **Observation:**
86+
# The resampled model's curve is significantly below the diagonal. It is obvious that the
87+
# model is over-confident: it predicts high probabilities for the
88+
# positive class, but the actual fraction of positives is much lower.
89+
90+
# %%
91+
# The Solution: Probability Calibration
92+
# ------------------------------------
93+
# We use :class:`~sklearn.calibration.CalibratedClassifierCV`to calibrate the model.It is important
94+
# to note that the calibrator needs to be trained to data with the real class distribution. Therefore we split
95+
# the training set into two parts:
96+
#
97+
# ``X_model_train``: used for training and resampled
98+
# ``X_calib``: used to train the calibrator (original distribution)
99+
100+
from sklearn.calibration import CalibratedClassifierCV
101+
102+
# Split the training set
103+
X_model_train, X_calib, y_model_train, y_calib = train_test_split(
104+
X_train, y_train, test_size=0.15, random_state=42, stratify=y_train
105+
)
106+
107+
# Resample and train the model
108+
X_undersampled2, y_undersampled2 = under_sampler.fit_resample(X_model_train, y_model_train)
109+
lr_resampled = LogisticRegression(random_state=42)
110+
lr_resampled.fit(X_undersampled2, y_undersampled2)
111+
112+
# Calibrate using the untouched set (X_calib,y_calib)
113+
# We use method='sigmoid', good for logisitc regression and for few positives samples in the calib set
114+
# We use cv='prefit' because the base model is already trained.
115+
calibrated_model = CalibratedClassifierCV(
116+
lr_resampled, method="sigmoid", cv="prefit"
117+
)
118+
calibrated_model.fit(X_calib, y_calib)
119+
120+
# %%
121+
# Comparing the Results
122+
# ---------------------
123+
# We plot the calibration curve of the fixed model
124+
125+
fig, ax = plt.subplots(figsize=(8, 6))
126+
127+
# Plot the undersampled uncalibrated model
128+
CalibrationDisplay.from_estimator(
129+
lr_resampled, X_test, y_test, n_bins=10, name="Uncalibrated (Undersampled)", ax=ax
130+
)
131+
132+
# Plot the new calibrated model
133+
CalibrationDisplay.from_estimator(
134+
calibrated_model, X_test, y_test, n_bins=10, name="Calibrated Model", ax=ax
135+
)
136+
137+
plt.title("Effect of Calibration on Resampled Model")
138+
plt.show()
139+
140+
# %%
141+
# We can also check that the calibration did not affect the model's ranking quality by
142+
# checking that ROC AUC (discrimination power) has not been affected.
143+
# In addition we can use the Brier Score metric to see the improvement in the
144+
# probabilty accuracy. We will use :func:`sklearn.metrics.roc_auc_score` function and
145+
# :func:`sklearn.metrics.brier_score_loss` function
146+
147+
from sklearn.metrics import roc_auc_score, brier_score_loss
148+
149+
# probability estimation for the train set for class 1 (minority)
150+
uncalibrated_prob = lr_resampled.predict_proba(X_test)[:,1]
151+
prob_calibrated = calibrated_model.predict_proba(X_test)[:, 1]
152+
153+
print(f"ROC AUC (Uncalibrated): {roc_auc_score(y_test, uncalibrated_prob):.4f}")
154+
print(f"ROC AUC (Calibrated): {roc_auc_score(y_test, prob_calibrated):.4f}")
155+
print("-" * 30)
156+
print("For the Brier Score the smaller is the better")
157+
print(f"Brier Score (Uncalibrated): {brier_score_loss(y_test, uncalibrated_prob):.4f}")
158+
print(f"Brier Score (Calibrated): {brier_score_loss(y_test, prob_calibrated):.4f}")

0 commit comments

Comments
 (0)