1+ """
2+ ====================================================
3+ Effect of Resampling on Probability Calibration for Classifiers
4+ ====================================================
5+
6+ With this example we illustrate how resampling a data set (like Under-sampling) can affect
7+ the calibration of a classifier's predicted probabilities , and how can we fix this issue using
8+ :class:`~sklearn.calibration.CalibratedClassifierCV`
9+
10+ When we resample a dataset so we can balance it , we change the prior probabilities
11+ of the classes contained in the dataset.The model learns that some classes
12+ are more frequent than they actually are and some classes are less frequent than they actually are.
13+
14+ This example shows:
15+ 1. The calibration curve of a model trained on the original dataset.
16+ 2. The resampled model's calibration curve, which is distorted
17+ 3. How to recover the probabilities using calibration
18+ """
19+
20+ # Authors: The imbalanced-learn developers
21+ # License: MIT
22+
23+ # %%
24+ # Create an imbalanced dataset with two classes using scikit-learn's
25+ # :func:`sklearn.datasets.make_classification` function with 95-5 class ratio
26+ # and split the data into training and testing sets (80-20) using
27+ # :func:`sklearn.model_selection.train_test_split.` function and set stratify = y
28+ # because we want to keep the 95-5 class ratio.
29+ from sklearn .datasets import make_classification
30+ from sklearn .model_selection import train_test_split
31+ import matplotlib .pyplot as plt
32+
33+ X , y = make_classification (
34+ n_samples = 10000 ,
35+ n_features = 20 ,
36+ n_classes = 2 ,
37+ weights = [0.95 ,0.05 ],
38+ )
39+
40+ X_train , X_test , y_train , y_test = train_test_split (
41+ X ,y , test_size = 0.2 , random_state = 42 , stratify = y
42+ )
43+
44+ # %%
45+ # The Problem: Resampling distorts probabilities
46+ # -----------------------------------------------
47+ # At first , we train a :class:`~sklearn.linear_model.LogisticRegression` classifier on
48+ # the original data. Then, we train a second :class:`~sklearn.linear_model.LogisticRegression` classifier
49+ # on data that has been undersampled to a 50-50 ratio using a :class:`imblearn.under_sampling.RandomUnderSampler`.
50+
51+ from sklearn .linear_model import LogisticRegression
52+ from imblearn .under_sampling import RandomUnderSampler
53+
54+ # Train Logistic Regression Model (Vanilla Model)
55+ lr_original = LogisticRegression (random_state = 42 )
56+ lr_original .fit (X_train ,y_train )
57+
58+ # Train Resampled Model (Under-sampling)
59+ under_sampler = RandomUnderSampler (random_state = 42 )
60+ X_undersampled , y_undersampled = under_sampler .fit_resample (X_train , y_train )
61+
62+ lr_undersampled = LogisticRegression (random_state = 42 )
63+ lr_undersampled .fit (X_undersampled , y_undersampled )
64+
65+ # %%
66+ # We plot the calibration curves to compare the two models using :class:`~sklearn.calibration.CalibrationDisplay`.
67+ # The diagonal line represents a perfectly calibrated model.
68+
69+ from sklearn .calibration import CalibrationDisplay
70+
71+ fig , ax = plt .subplots (figsize = (8 ,6 ))
72+
73+ CalibrationDisplay .from_estimator (
74+ lr_original , X_test , y_test , n_bins = 10 , name = "Original model" , ax = ax
75+ )
76+
77+ CalibrationDisplay .from_estimator (
78+ lr_undersampled , X_test , y_test , n_bins = 10 , name = "Undersampled model" , ax = ax
79+ )
80+
81+ plt .title ("Calibration: Original vs Resampled" )
82+ plt .show ()
83+
84+ # %%
85+ # **Observation:**
86+ # The resampled model's curve is significantly below the diagonal. It is obvious that the
87+ # model is over-confident: it predicts high probabilities for the
88+ # positive class, but the actual fraction of positives is much lower.
89+
90+ # %%
91+ # The Solution: Probability Calibration
92+ # ------------------------------------
93+ # We use :class:`~sklearn.calibration.CalibratedClassifierCV`to calibrate the model.It is important
94+ # to note that the calibrator needs to be trained to data with the real class distribution. Therefore we split
95+ # the training set into two parts:
96+ #
97+ # ``X_model_train``: used for training and resampled
98+ # ``X_calib``: used to train the calibrator (original distribution)
99+
100+ from sklearn .calibration import CalibratedClassifierCV
101+
102+ # Split the training set
103+ X_model_train , X_calib , y_model_train , y_calib = train_test_split (
104+ X_train , y_train , test_size = 0.15 , random_state = 42 , stratify = y_train
105+ )
106+
107+ # Resample and train the model
108+ X_undersampled2 , y_undersampled2 = under_sampler .fit_resample (X_model_train , y_model_train )
109+ lr_resampled = LogisticRegression (random_state = 42 )
110+ lr_resampled .fit (X_undersampled2 , y_undersampled2 )
111+
112+ # Calibrate using the untouched set (X_calib,y_calib)
113+ # We use method='sigmoid', good for logisitc regression and for few positives samples in the calib set
114+ # We use cv='prefit' because the base model is already trained.
115+ calibrated_model = CalibratedClassifierCV (
116+ lr_resampled , method = "sigmoid" , cv = "prefit"
117+ )
118+ calibrated_model .fit (X_calib , y_calib )
119+
120+ # %%
121+ # Comparing the Results
122+ # ---------------------
123+ # We plot the calibration curve of the fixed model
124+
125+ fig , ax = plt .subplots (figsize = (8 , 6 ))
126+
127+ # Plot the undersampled uncalibrated model
128+ CalibrationDisplay .from_estimator (
129+ lr_resampled , X_test , y_test , n_bins = 10 , name = "Uncalibrated (Undersampled)" , ax = ax
130+ )
131+
132+ # Plot the new calibrated model
133+ CalibrationDisplay .from_estimator (
134+ calibrated_model , X_test , y_test , n_bins = 10 , name = "Calibrated Model" , ax = ax
135+ )
136+
137+ plt .title ("Effect of Calibration on Resampled Model" )
138+ plt .show ()
139+
140+ # %%
141+ # We can also check that the calibration did not affect the model's ranking quality by
142+ # checking that ROC AUC (discrimination power) has not been affected.
143+ # In addition we can use the Brier Score metric to see the improvement in the
144+ # probabilty accuracy. We will use :func:`sklearn.metrics.roc_auc_score` function and
145+ # :func:`sklearn.metrics.brier_score_loss` function
146+
147+ from sklearn .metrics import roc_auc_score , brier_score_loss
148+
149+ # probability estimation for the train set for class 1 (minority)
150+ uncalibrated_prob = lr_resampled .predict_proba (X_test )[:,1 ]
151+ prob_calibrated = calibrated_model .predict_proba (X_test )[:, 1 ]
152+
153+ print (f"ROC AUC (Uncalibrated): { roc_auc_score (y_test , uncalibrated_prob ):.4f} " )
154+ print (f"ROC AUC (Calibrated): { roc_auc_score (y_test , prob_calibrated ):.4f} " )
155+ print ("-" * 30 )
156+ print ("For the Brier Score the smaller is the better" )
157+ print (f"Brier Score (Uncalibrated): { brier_score_loss (y_test , uncalibrated_prob ):.4f} " )
158+ print (f"Brier Score (Calibrated): { brier_score_loss (y_test , prob_calibrated ):.4f} " )
0 commit comments