|
1 | 1 | # TODO 1: a using MLJModelInterface or import MLJModelInterface statement |
| 2 | +# Expose all instances of user specified structs and package artifcats. |
| 3 | +const ParallelKMeans_Desc = "Parallel & lightning fast implementation of all variants of the KMeans clustering algorithm in native Julia." |
| 4 | + |
| 5 | +# availalbe variants for reference |
| 6 | +const MLJDICT = Dict(:Lloyd => Lloyd(), |
| 7 | + :Hamerly => Hamerly(), |
| 8 | + :LightElkan => LightElkan()) |
2 | 9 |
|
3 | 10 | #### |
4 | 11 | #### MODEL DEFINITION |
5 | 12 | #### |
6 | 13 | # TODO 2: MLJ-compatible model types and constructors |
7 | | -@mlj_model mutable struct KMeans <: MLJModelInterface.Unsupervised |
8 | | - # Hyperparameters of the model |
9 | | - algo::Symbol = :Lloyd::(_ in (:Lloyd, :Hamerly, :LightElkan)) |
10 | | - k_init::String = "k-means++"::(_ in ("k-means++", String)) # allow user seeding? |
11 | | - k::Int = 3::(_ > 0) |
12 | | - tol::Float64 = 1e-6::(_ < 1) |
13 | | - max_iters::Int = 300::(_ > 0) |
14 | | - copy::Bool = true |
15 | | - threads::Int = Threads.nthreads()::(_ > 0) |
16 | | - verbosity::Int = 0::(_ in (0, 1)) # Temp fix. Do we need to follow mlj verbosity style? |
17 | | - init = nothing |
| 14 | + |
| 15 | +mutable struct KMeans <: MLJModelInterface.Unsupervised |
| 16 | + algo::Symbol |
| 17 | + k_init::String |
| 18 | + k::Int |
| 19 | + tol::Float64 |
| 20 | + max_iters::Int |
| 21 | + copy::Bool |
| 22 | + threads::Int |
| 23 | + verbosity::Int |
| 24 | + init |
18 | 25 | end |
19 | 26 |
|
20 | 27 |
|
21 | | -# Expose all instances of user specified structs and package artifcats. |
22 | | -const ParallelKMeans_Desc = "Parallel & lightning fast implementation of all variants of the KMeans clustering algorithm in native Julia." |
| 28 | +function KMeans(; algo=:Lloyd, k_init="k-means++", |
| 29 | + k=3, tol=1e-6, max_iters=300, copy=true, |
| 30 | + threads=Threads.nthreads(), verbosity=0, init=nothing) |
| 31 | + |
| 32 | + model = KMeans(algo, k_init, k, tol, max_iters, copy, threads, verbosity, init) |
| 33 | + message = MLJModelInterface.clean!(model) |
| 34 | + isempty(message) || @warn message |
| 35 | + return model |
| 36 | +end |
| 37 | + |
| 38 | + |
| 39 | +function MLJModelInterface.clean!(m::KMeans) |
| 40 | + warning = "" |
| 41 | + |
| 42 | + if !(m.algo ∈ keys(MLJDICT)) |
| 43 | + warning *= "Unsuppored algorithm supplied. Defauting to KMeans++ seeding algorithm." |
| 44 | + m.algo = :Lloyd |
| 45 | + |
| 46 | + elseif m.k_init != "k-means++" |
| 47 | + warning *= "Only `k-means++` or random seeding algorithms are supported. Defaulting to random seeding." |
| 48 | + m.k_init = "random" |
| 49 | + |
| 50 | + elseif m.k < 1 |
| 51 | + warning *= "Number of clusters must be greater than 0. Defaulting to 3 clusters." |
| 52 | + m.k = 3 |
| 53 | + |
| 54 | + elseif !(m.tol < 1.0) |
| 55 | + warning *= "Tolerance level must be less than 1. Defaulting to tol of 1e-6." |
| 56 | + m.tol = 1e-6 |
| 57 | + |
| 58 | + elseif !(m.max_iters > 0) |
| 59 | + warning *= "Number of permitted iterations must be greater than 0. Defaulting to 300 iterations." |
| 60 | + m.max_iters = 300 |
| 61 | + |
| 62 | + elseif !(m.threads > 0) |
| 63 | + warning *= "Number of threads must be at least 1. Defaulting to all threads available." |
| 64 | + m.threads = Threads.nthreads() |
| 65 | + |
| 66 | + elseif !(m.verbosity ∈ (0, 1)) |
| 67 | + warning *= "Verbosity must be either 0 (no info) or 1 (info requested). Defaulting to 0." |
| 68 | + m.verbosity = 0 |
| 69 | + end |
| 70 | + return warning |
| 71 | +end |
23 | 72 |
|
24 | | -# availalbe variants for reference |
25 | | -const MLJDICT = Dict(:Lloyd => Lloyd(), |
26 | | - :Hamerly => Hamerly(), |
27 | | - :LightElkan => LightElkan()) |
28 | 73 |
|
29 | 74 | # TODO 3: implementation of fit, predict, and fitted_params of the model |
30 | 75 | #### |
31 | 76 | #### FIT FUNCTION |
32 | 77 | #### |
33 | 78 | """ |
34 | 79 | TODO 3.1: Docs |
| 80 | + # fit the specified struct as a ParaKMeans model |
35 | 81 |
|
36 | 82 | See also the [package documentation](https://pydatablog.github.io/ParallelKMeans.jl/stable). |
37 | 83 | """ |
38 | 84 | function MLJModelInterface.fit(m::KMeans, X) |
39 | | - # fit the specified struct as a ParaKMeans model |
40 | | - |
41 | 85 | # convert tabular input data into the matrix model expects. Column assumed as features so input data is permuted |
42 | 86 | if !m.copy |
43 | 87 | # transpose input table without copying and pass to model |
@@ -123,4 +167,4 @@ metadata_model(KMeans, |
123 | 167 | output = MLJModelInterface.Table(MLJModelInterface.Count), |
124 | 168 | weights = false, |
125 | 169 | descr = ParallelKMeans_Desc, |
126 | | - path = "ParallelKMeans.src.mlj_interface.KMeans") |
| 170 | + path = "ParallelKMeans.KMeans") |
0 commit comments