Skip to content

Commit 1dad889

Browse files
sklearn tests and tags rework (#7)
* actually change python version * dummy classifiers and sklearn lower bound change * test fix * test fix * dev * early sklearn version fixes * all interval classifiers * dummy and conversion bugfix * version * test * testing update * version * pandas * stop all workflows failing * copy check
1 parent 3542cc7 commit 1dad889

File tree

17 files changed

+681
-1509
lines changed

17 files changed

+681
-1509
lines changed

.github/workflows/tests.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ jobs:
3030
runs-on: ${{ matrix.os }}
3131

3232
strategy:
33+
fail-fast: false
3334
matrix:
3435
os: [ ubuntu-latest, macOS-latest, windows-latest ]
3536
python-version: [ '3.8', '3.9', '3.10' ]

pyproject.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
44

55
[project]
66
name = "tsml"
7-
version = "0.0.6"
7+
version = "0.0.7"
88
description = "A toolkit for time series machine learning algorithms."
99
authors = [
1010
{name = "Matthew Middlehurst", email = "m.middlehurst@uea.ac.uk"},
@@ -38,6 +38,7 @@ dependencies = [
3838
"numba>=0.55",
3939
"numpy>=1.21.0",
4040
"scikit-learn>=1.0.2",
41+
"pandas",
4142
]
4243

4344
[project.optional-dependencies]

tsml/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
# -*- coding: utf-8 -*-
22
"""tsml."""
33

4-
__version__ = "0.0.6"
4+
__version__ = "0.0.7"

tsml/dummy/_dummy.py

Lines changed: 59 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from sklearn.dummy import DummyRegressor as SklearnDummyRegressor
1111
from sklearn.utils import check_random_state
1212
from sklearn.utils.multiclass import check_classification_targets
13-
from sklearn.utils.validation import check_is_fitted
13+
from sklearn.utils.validation import _num_samples, check_is_fitted
1414

1515
from tsml.base import BaseTimeSeriesEstimator
1616

@@ -85,65 +85,77 @@ class prior probabilities.
8585
0.5
8686
"""
8787

88-
def __init__(self, strategy="prior", random_state=None, constant=None):
88+
def __init__(
89+
self, strategy="prior", validate=False, random_state=None, constant=None
90+
):
8991
self.strategy = strategy
92+
self.validate = validate
9093
self.random_state = random_state
9194
self.constant = constant
9295

9396
super(DummyClassifier, self).__init__()
9497

9598
def fit(self, X, y):
9699
""""""
97-
X, y = self._validate_data(X=X, y=y, ensure_min_series_length=1)
100+
if self.validate:
101+
X, y = self._validate_data(X=X, y=y, ensure_min_series_length=1)
98102

99-
check_classification_targets(y)
103+
check_classification_targets(y)
100104

101-
self.classes_ = np.unique(y)
102-
self.n_classes_ = self.classes_.shape[0]
103-
self.class_dictionary_ = {}
104-
for index, classVal in enumerate(self.classes_):
105-
self.class_dictionary_[classVal] = index
105+
self.classes_ = np.unique(np.asarray(y))
106106

107-
if self.n_classes_ == 1:
108-
return self
107+
if self.validate:
108+
self.n_classes_ = self.classes_.shape[0]
109+
self.class_dictionary_ = {}
110+
for index, classVal in enumerate(self.classes_):
111+
self.class_dictionary_[classVal] = index
109112

110-
self._clf = SklearnDummyClassifier(
113+
if self.n_classes_ == 1:
114+
return self
115+
116+
self.clf_ = SklearnDummyClassifier(
111117
strategy=self.strategy,
112118
random_state=self.random_state,
113119
constant=self.constant,
114120
)
115-
self._clf.fit(None, y)
121+
self.clf_.fit(None, y)
116122

117123
return self
118124

119125
def predict(self, X) -> np.ndarray:
120126
""""""
121127
check_is_fitted(self)
122128

123-
X = self._validate_data(X=X, reset=False, ensure_min_series_length=1)
129+
if self.validate:
130+
# treat case of single class seen in fit
131+
if self.n_classes_ == 1:
132+
return np.repeat(
133+
list(self.class_dictionary_.keys()), X.shape[0], axis=0
134+
)
124135

125-
# treat case of single class seen in fit
126-
if self.n_classes_ == 1:
127-
return np.repeat(list(self.class_dictionary_.keys()), X.shape[0], axis=0)
136+
X = self._validate_data(X=X, reset=False, ensure_min_series_length=1)
128137

129-
return self._clf.predict(np.zeros(X.shape))
138+
return self.clf_.predict(np.zeros((_num_samples(X), 2)))
130139

131140
def predict_proba(self, X) -> np.ndarray:
132141
""""""
133142
check_is_fitted(self)
134143

135-
# treat case of single class seen in fit
136-
if self.n_classes_ == 1:
137-
return np.repeat([[1]], X.shape[0], axis=0)
144+
if self.validate:
145+
# treat case of single class seen in fit
146+
if self.n_classes_ == 1:
147+
return np.repeat([[1]], X.shape[0], axis=0)
138148

139-
X = self._validate_data(X=X, reset=False, ensure_min_series_length=1)
149+
X = self._validate_data(X=X, reset=False, ensure_min_series_length=1)
140150

141-
return self._clf.predict_proba(np.zeros(X.shape))
151+
return self.clf_.predict_proba(np.zeros((_num_samples(X), 2)))
142152

143153
def _more_tags(self):
144154
return {
145155
"X_types": ["3darray", "2darray", "np_list"],
146156
"equal_length_only": False,
157+
"no_validation": not self.validate,
158+
"allow_nan": True,
147159
}
148160

149161

@@ -199,36 +211,41 @@ class DummyRegressor(RegressorMixin, BaseTimeSeriesEstimator):
199211
-0.07184048625633688
200212
"""
201213

202-
def __init__(self, strategy="mean", constant=None, quantile=None):
214+
def __init__(self, strategy="mean", validate=False, constant=None, quantile=None):
203215
self.strategy = strategy
216+
self.validate = validate
204217
self.constant = constant
205218
self.quantile = quantile
206219

207220
super(DummyRegressor, self).__init__()
208221

209222
def fit(self, X, y):
210223
""""""
211-
_, y = self._validate_data(X=X, y=y, ensure_min_series_length=1)
224+
if self.validate:
225+
_, y = self._validate_data(X=X, y=y, ensure_min_series_length=1)
212226

213-
self._reg = SklearnDummyRegressor(
227+
self.reg_ = SklearnDummyRegressor(
214228
strategy=self.strategy, constant=self.constant, quantile=self.quantile
215229
)
216-
self._reg.fit(None, y)
230+
self.reg_.fit(None, y)
217231

218232
return self
219233

220234
def predict(self, X):
221235
""""""
222236
check_is_fitted(self)
223237

224-
X = self._validate_data(X=X, reset=False, ensure_min_series_length=1)
238+
if self.validate:
239+
X = self._validate_data(X=X, reset=False, ensure_min_series_length=1)
225240

226-
return self._reg.predict(np.zeros(X.shape))
241+
return self.reg_.predict(np.zeros((_num_samples(X), 2)))
227242

228243
def _more_tags(self):
229244
return {
230245
"X_types": ["3darray", "2darray", "np_list"],
231246
"equal_length_only": False,
247+
"no_validation": not self.validate,
248+
"allow_nan": True,
232249
}
233250

234251

@@ -257,16 +274,20 @@ class DummyClusterer(ClusterMixin, BaseTimeSeriesEstimator):
257274
0.2087729039422543
258275
"""
259276

260-
def __init__(self, strategy="single", n_clusters=2, random_state=None):
277+
def __init__(
278+
self, strategy="single", validate=False, n_clusters=2, random_state=None
279+
):
261280
self.strategy = strategy
281+
self.validate = validate
262282
self.n_clusters = n_clusters
263283
self.random_state = random_state
264284

265285
super(DummyClusterer, self).__init__()
266286

267287
def fit(self, X, y=None):
268288
""""""
269-
X = self._validate_data(X=X, ensure_min_series_length=1)
289+
if self.validate:
290+
X = self._validate_data(X=X, ensure_min_series_length=1)
270291

271292
if self.strategy == "single":
272293
self.labels_ = np.zeros(len(X), dtype=np.int32)
@@ -284,20 +305,23 @@ def predict(self, X):
284305
""""""
285306
check_is_fitted(self)
286307

287-
X = self._validate_data(X=X, reset=False, ensure_min_series_length=1)
308+
if self.validate:
309+
X = self._validate_data(X=X, reset=False, ensure_min_series_length=1)
288310

289311
if self.strategy == "single":
290-
return np.zeros(len(X), dtype=np.int32)
312+
return np.zeros(_num_samples(X), dtype=np.int32)
291313
elif self.strategy == "unique":
292-
return np.arange(len(X), dtype=np.int32)
314+
return np.arange(_num_samples(X), dtype=np.int32)
293315
elif self.strategy == "random":
294316
rng = check_random_state(self.random_state)
295-
return rng.randint(self.n_clusters, size=len(X), dtype=np.int32)
317+
return rng.randint(self.n_clusters, size=_num_samples(X), dtype=np.int32)
296318
else:
297319
raise ValueError(f"Unknown strategy {self.strategy}")
298320

299321
def _more_tags(self):
300322
return {
301323
"X_types": ["3darray", "2darray", "np_list"],
302324
"equal_length_only": False,
325+
"no_validation": not self.validate,
326+
"allow_nan": True,
303327
}

tsml/interval_based/_base.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,8 @@ def fit(self, X, y):
214214
X, y = self._validate_data(X=X, y=y, ensure_min_samples=2)
215215
X = self._convert_X(X)
216216

217+
rng = check_random_state(self.random_state)
218+
217219
self.n_instances_, self.n_dims_, self.series_length_ = X.shape
218220
if is_classifier(self):
219221
check_classification_targets(y)
@@ -260,9 +262,7 @@ def fit(self, X, y):
260262
self._series_transformers = [None]
261263
# clone series_transformers if it is a transformer and transform the input data
262264
elif is_transformer(self.series_transformers):
263-
t = _clone_estimator(
264-
self.series_transformers, random_state=self.random_state
265-
)
265+
t = _clone_estimator(self.series_transformers, random_state=rng)
266266
Xt = [t.fit_transform(X, y)]
267267
self._series_transformers = [t]
268268
# clone each series_transformers transformer and include the base series if None
@@ -276,7 +276,7 @@ def fit(self, X, y):
276276
Xt.append(X)
277277
self._series_transformers.append(None)
278278
elif is_transformer(transformer):
279-
t = _clone_estimator(transformer, random_state=self.random_state)
279+
t = _clone_estimator(transformer, random_state=rng)
280280
Xt.append(t.fit_transform(X, y))
281281
self._series_transformers.append(t)
282282
else:
@@ -458,7 +458,8 @@ def fit(self, X, y):
458458
# single transformer or function for all series_transformers
459459
if is_transformer(self.interval_features):
460460
self._interval_transformer = [True] * len(Xt)
461-
self._interval_features = [[self.interval_features]] * len(Xt)
461+
transformer = _clone_estimator(self.interval_features, random_state=rng)
462+
self._interval_features = [[transformer]] * len(Xt)
462463
elif callable(self.interval_features):
463464
self._interval_function = [True] * len(Xt)
464465
self._interval_features = [[self.interval_features]] * len(Xt)
@@ -491,6 +492,7 @@ def fit(self, X, y):
491492
for method in feature:
492493
if is_transformer(method):
493494
self._interval_transformer[i] = True
495+
feature = _clone_estimator(feature, random_state=rng)
494496
elif callable(method):
495497
self._interval_function[i] = True
496498
else:
@@ -503,6 +505,7 @@ def fit(self, X, y):
503505
self._interval_features.append(feature)
504506
elif is_transformer(feature):
505507
self._interval_transformer[i] = True
508+
feature = _clone_estimator(feature, random_state=rng)
506509
self._interval_features.append([feature])
507510
elif callable(feature):
508511
self._interval_function[i] = True
@@ -1030,4 +1033,3 @@ def _predict_for_estimator(self, Xt, estimator, intervals, predict_proba=False):
10301033
return estimator.predict_proba(interval_features)
10311034
else:
10321035
return estimator.predict(interval_features)
1033-

tsml/interval_based/_interval_pipelines.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,7 @@ def fit(self, X, y):
9999
X, y = self._validate_data(
100100
X=X, y=y, ensure_min_samples=2, ensure_min_series_length=3
101101
)
102+
X = self._convert_X(X)
102103

103104
self.n_instances_, self.n_dims_, self.series_length_ = X.shape
104105
self.classes_ = np.unique(y)
@@ -152,6 +153,7 @@ def predict(self, X) -> np.ndarray:
152153
check_is_fitted(self)
153154

154155
X = self._validate_data(X=X, reset=False, ensure_min_series_length=3)
156+
X = self._convert_X(X)
155157

156158
return self._estimator.predict(self._transformer.transform(X))
157159

@@ -171,6 +173,7 @@ def predict_proba(self, X) -> np.ndarray:
171173
check_is_fitted(self)
172174

173175
X = self._validate_data(X=X, reset=False, ensure_min_series_length=3)
176+
X = self._convert_X(X)
174177

175178
m = getattr(self._estimator, "predict_proba", None)
176179
if callable(m):
@@ -311,6 +314,7 @@ def fit(self, X, y):
311314
X, y = self._validate_data(
312315
X=X, y=y, ensure_min_samples=2, ensure_min_series_length=3
313316
)
317+
X = self._convert_X(X)
314318

315319
self.n_instances_, self.n_dims_, self.series_length_ = X.shape
316320

@@ -359,6 +363,7 @@ def predict(self, X) -> np.ndarray:
359363
check_is_fitted(self)
360364

361365
X = self._validate_data(X=X, reset=False, ensure_min_series_length=3)
366+
X = self._convert_X(X)
362367

363368
return self._estimator.predict(self._transformer.transform(X))
364369

@@ -489,6 +494,7 @@ def fit(self, X, y):
489494
X, y = self._validate_data(
490495
X=X, y=y, ensure_min_samples=2, ensure_min_series_length=7
491496
)
497+
X = self._convert_X(X)
492498

493499
self.n_instances_, self.n_dims_, self.series_length_ = X.shape
494500
self.classes_ = np.unique(y)
@@ -542,6 +548,7 @@ def predict(self, X) -> np.ndarray:
542548
check_is_fitted(self)
543549

544550
X = self._validate_data(X=X, reset=False, ensure_min_series_length=7)
551+
X = self._convert_X(X)
545552

546553
return self._estimator.predict(self._transformer.transform(X))
547554

@@ -561,6 +568,7 @@ def predict_proba(self, X) -> np.ndarray:
561568
check_is_fitted(self)
562569

563570
X = self._validate_data(X=X, reset=False, ensure_min_series_length=7)
571+
X = self._convert_X(X)
564572

565573
m = getattr(self._estimator, "predict_proba", None)
566574
if callable(m):

0 commit comments

Comments
 (0)