22from __future__ import print_function
33from __future__ import division
44
5- from sklearn .utils import check_X_y
6-
75from ..over_sampling import SMOTE
86from ..under_sampling import EditedNearestNeighbours
97from ..base import SamplerMixin
@@ -22,11 +20,11 @@ class SMOTEENN(SamplerMixin):
2220 number of samples in the minority class over the the number of
2321 samples in the majority class.
2422
25- random_state : int or None, optional (default=None)
26- Seed for random number generation.
27-
28- verbose : bool, optional (default=True)
29- Whether or not to print information about the processing .
23+ random_state : int, RandomState instance or None, optional (default=None)
24+ If int, random_state is the seed used by the random number generator;
25+ If RandomState instance, random_state is the random number generator;
26+ If None, the random number generator is the RandomState instance used
27+ by np.random .
3028
3129 k : int, optional (default=5)
3230 Number of nearest neighbours to used to construct synthetic
@@ -60,15 +58,6 @@ class SMOTEENN(SamplerMixin):
6058
6159 Attributes
6260 ----------
63- ratio : str or float
64- If 'auto', the ratio will be defined automatically to balance
65- the dataset. Otherwise, the ratio is defined as the
66- number of samples in the minority class over the the number of
67- samples in the majority class.
68-
69- random_state : int or None
70- Seed for random number generation.
71-
7261 min_c_ : str or int
7362 The identifier of the minority class.
7463
@@ -96,81 +85,25 @@ class SMOTEENN(SamplerMixin):
9685
9786 """
9887
99- def __init__ (self , ratio = 'auto' , random_state = None , verbose = True ,
88+ def __init__ (self , ratio = 'auto' , random_state = None ,
10089 k = 5 , m = 10 , out_step = 0.5 , kind_smote = 'regular' ,
10190 size_ngh = 3 , kind_enn = 'all' , n_jobs = - 1 , ** kwargs ):
10291
103- """Initialise the SMOTE ENN object.
104-
105- Parameters
106- ----------
107- ratio : str or float, optional (default='auto')
108- If 'auto', the ratio will be defined automatically to balance
109- the dataset. Otherwise, the ratio is defined as the
110- number of samples in the minority class over the the number of
111- samples in the majority class.
112-
113- random_state : int or None, optional (default=None)
114- Seed for random number generation.
115-
116- verbose : bool, optional (default=True)
117- Whether or not to print information about the processing.
118-
119- k : int, optional (default=5)
120- Number of nearest neighbours to used to construct synthetic
121- samples.
122-
123- m : int, optional (default=10)
124- Number of nearest neighbours to use to determine if a minority
125- sample is in danger.
126-
127- out_step : float, optional (default=0.5)
128- Step size when extrapolating.
129-
130- kind_smote : str, optional (default='regular')
131- The type of SMOTE algorithm to use one of the following
132- options: 'regular', 'borderline1', 'borderline2', 'svm'.
133-
134- size_ngh : int, optional (default=3)
135- Size of the neighbourhood to consider to compute the average
136- distance to the minority point samples.
137-
138- kind_sel : str, optional (default='all')
139- Strategy to use in order to exclude samples.
140-
141- - If 'all', all neighbours will have to agree with the samples of
142- interest to not be excluded.
143- - If 'mode', the majority vote of the neighbours will be used in
144- order to exclude a sample.
145-
146- n_jobs : int, optional (default=-1)
147- The number of threads to open if possible.
148-
149- Returns
150- -------
151- None
152-
153- """
154- super (SMOTEENN , self ).__init__ (ratio = ratio , random_state = random_state ,
155- verbose = verbose )
156-
92+ super (SMOTEENN , self ).__init__ (ratio = ratio )
93+ self .random_state = random_state
15794 self .k = k
15895 self .m = m
15996 self .out_step = out_step
16097 self .kind_smote = kind_smote
98+ self .size_ngh = size_ngh
99+ self .kind_enn = kind_enn
161100 self .n_jobs = n_jobs
162101 self .kwargs = kwargs
163-
164102 self .sm = SMOTE (ratio = self .ratio , random_state = self .random_state ,
165- verbose = self .verbose , k = self .k , m = self .m ,
166- out_step = self .out_step , kind = self .kind_smote ,
167- n_jobs = self .n_jobs , ** self .kwargs )
168-
169- self .size_ngh = size_ngh
170- self .kind_enn = kind_enn
171-
103+ k = self .k , m = self .m , out_step = self .out_step ,
104+ kind = self .kind_smote , n_jobs = self .n_jobs ,
105+ ** self .kwargs )
172106 self .enn = EditedNearestNeighbours (random_state = self .random_state ,
173- verbose = self .verbose ,
174107 size_ngh = self .size_ngh ,
175108 kind_sel = self .kind_enn ,
176109 n_jobs = self .n_jobs )
@@ -192,8 +125,6 @@ def fit(self, X, y):
192125 Return self.
193126
194127 """
195- # Check the consistency of X and y
196- X , y = check_X_y (X , y )
197128
198129 super (SMOTEENN , self ).fit (X , y )
199130
@@ -202,7 +133,7 @@ def fit(self, X, y):
202133
203134 return self
204135
205- def sample (self , X , y ):
136+ def _sample (self , X , y ):
206137 """Resample the dataset.
207138
208139 Parameters
@@ -222,10 +153,6 @@ def sample(self, X, y):
222153 The corresponding label of `X_resampled`
223154
224155 """
225- # Check the consistency of X and y
226- X , y = check_X_y (X , y )
227-
228- super (SMOTEENN , self ).sample (X , y )
229156
230157 # Transform using SMOTE
231158 X , y = self .sm .sample (X , y )
0 commit comments