Skip to content

Commit a530733

Browse files
Merge branch 'main' into add_requirements
2 parents e867294 + 1afacdd commit a530733

File tree

6 files changed

+59
-11
lines changed

6 files changed

+59
-11
lines changed

.gitignore

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ __pycache__*
33

44
# Visual Studio related files, e.g., ".vscode"
55
.vs*
6-
6+
*.code-workspace
77

88
# Misc. files at repository root:
99
# - default data cache directory
@@ -18,3 +18,7 @@ __pycache__*
1818
/*.sh
1919
# - archives with results or data
2020
/*.tgz
21+
22+
# Configurations from file explorers
23+
.directory
24+
.DS_Store

configs/regular/logreg.json

Lines changed: 46 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,18 @@
2020
"n_classes": [2, 5],
2121
"n_informative": "[SPECIAL_VALUE]0.6",
2222
"class_sep": 1.0
23-
},
23+
}
24+
],
25+
"split_kwargs": {
26+
"train_size": 0.05,
27+
"test_size": 0.95
28+
}
29+
}
30+
},
31+
{
32+
"data": {
33+
"source": "make_classification",
34+
"generation_kwargs": [
2435
{
2536
"n_samples": 1000000,
2637
"n_features": 500,
@@ -33,12 +44,41 @@
3344
"train_size": 0.05,
3445
"test_size": 0.95
3546
}
36-
}
47+
},
48+
"algorithm": {"estimator_params": {"C": 1e-6}}
3749
},
38-
{ "data": { "dataset": "mnist", "split_kwargs": { "train_size": 10000, "test_size": null } } },
39-
{ "data": { "dataset": ["susy", "hepmass"], "split_kwargs": { "train_size": 0.1, "test_size": null } } },
40-
{ "data": { "dataset": "cifar", "split_kwargs": { "train_size": 0.1, "test_size": null } } },
41-
{ "data": { "dataset": "gisette", "split_kwargs": { "train_size": 2000, "test_size": null } } }
50+
{
51+
"data": { "dataset": "mnist", "split_kwargs": { "train_size": 10000, "test_size": null } },
52+
"algorithm": {"estimator_params": {"C": 1e-8}}
53+
},
54+
{
55+
"data": {
56+
"dataset": "susy",
57+
"split_kwargs": { "train_size": 0.1, "test_size": null }
58+
},
59+
"algorithm": { "estimator_params": {"C": 1e-2} }
60+
},
61+
{
62+
"data": {
63+
"dataset": "hepmass",
64+
"split_kwargs": { "train_size": 0.1, "test_size": null }
65+
},
66+
"algorithm": { "estimator_params": {"C": 1e-5} }
67+
},
68+
{
69+
"data": {
70+
"dataset": "cifar",
71+
"split_kwargs": { "train_size": 0.1, "test_size": null }
72+
},
73+
"algorithm": { "estimator_params": {"C": 1e-9} }
74+
},
75+
{
76+
"data": {
77+
"dataset": "gisette",
78+
"split_kwargs": { "train_size": 2000, "test_size": null }
79+
},
80+
"algorithm": { "estimator_params": {"C": 1e1} }
81+
}
4282
]
4383
},
4484
"TEMPLATES": {

envs/conda-env-sklearn.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ channels:
33
- nodefaults
44
dependencies:
55
# additional frameworks
6-
- xgboost
6+
- xgboost ==3.0.5
77
- catboost
88
- lightgbm
99
- faiss-cpu

envs/requirements-sklearn.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# additional frameworks
2-
xgboost
2+
xgboost==3.0.5
33
catboost
44
lightgbm
55
faiss-cpu

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
[tool.black]
1919
line-length = 90
20-
target-version = ['py39', 'py310', 'py311', 'py312']
20+
target-version = ['py39', 'py310', 'py311', 'py312', 'py313']
2121
extend-ignore = 'E203'
2222

2323
[tool.isort]

sklbench/datasets/loaders.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
make_moons,
3131
make_regression,
3232
)
33+
from sklearn.preprocessing import StandardScaler
3334

3435
from .common import cache, load_data_description, load_data_from_cache, preprocess
3536
from .downloaders import download_and_read_csv, load_openml, retrieve
@@ -198,7 +199,7 @@ def load_hepmass(
198199
data = pd.concat([train_data, test_data])
199200
label = data.columns[0]
200201
y = data[label]
201-
x = data.drop(columns=[label])
202+
x = data.drop(columns=[label, "mass"])
202203

203204
data_desc = {
204205
"n_classes": 2,
@@ -418,6 +419,8 @@ def convert_y(y, n_samples):
418419
x = np.vstack([x_train, x_test])
419420
y = np.hstack([y_train, y_test])
420421

422+
x = StandardScaler(with_mean=True, with_std=True).fit_transform(x)
423+
421424
data_desc = {
422425
"n_classes": 2,
423426
"default_split": {
@@ -555,6 +558,7 @@ def load_cifar(
555558
Classification task. n_classes = 10.
556559
"""
557560
x, y = load_openml(40927, raw_data_cache)
561+
x = StandardScaler(with_mean=True, with_std=False).fit_transform(x)
558562
binary = dataset_params.get("binary", False)
559563
if binary:
560564
y = (y > 0).astype(int)

0 commit comments

Comments
 (0)