Skip to content

Commit 3645114

Browse files
committed
Updated benchmark plot
1 parent a1e8227 commit 3645114

File tree

4 files changed

+75
-1
lines changed

4 files changed

+75
-1
lines changed

docs/src/benchmark_image.png

-75.1 KB
Loading

docs/src/index.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -198,7 +198,7 @@ ________________________________________________________________________________
198198
|19.955 |2.758 |0.000166957 |0.000009206 |ParallelKMeans Lloyd |Julia |full scan |
199199
|11.234 |1.654 |0.000109074 |0.000012819 |ParallelKMeans Hamerly |Julia |full scan |
200200
|19.394 |1.436 |0.000109262 |0.000013726 |ParallelKMeans Elkan |Julia |full scan |
201-
|14.080 |0.000972914 |0.000095325 |0.000009802 |ParallelKMeans YingYang |Julia |stochastic|
201+
|14.080 |0.000972914 |0.000095325 |0.000009802 |ParallelKMeans YingYang |Julia |full scan |
202202

203203
_________________________________________________________________________________________________________
204204

extras/benchmark_plot.jl

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
using Plots
2+
using CSV
3+
using DataFrames
4+
using Chain
5+
6+
7+
data = CSV.read("extras/updated_benchmarks_may_1.csv", DataFrame)
8+
9+
#data2 = stack(data, 1:4)
10+
#unstack(data2, :variable, :package, :value) |> CSV.write("extras/wide.csv")
11+
12+
long_data = @chain data begin
13+
rename(_, [replace(x, " " => "_") for x in names(_)])
14+
rename(_, [replace(x, "_sample_(secs)" => "") for x in names(_)])
15+
filter(:process => !=("stochastic"), _)
16+
stack(_, 1:4)
17+
end
18+
19+
color_map = Dict{String, Int}()
20+
21+
for (idx, val) in enumerate(unique(long_data.package))
22+
push!(color_map, val => idx)
23+
end
24+
25+
assign_linestyle(x) = occursin("ParallelKMeans", x) ? :solid : :dashdot
26+
27+
function assign_rank(x)
28+
if x == "1_million"
29+
return 4
30+
elseif x == "100k"
31+
return 3
32+
elseif x == "10k"
33+
return 2
34+
elseif x == "1k"
35+
return 1
36+
end
37+
end
38+
39+
long_data[:, "linestyle"] = assign_linestyle.(long_data.package);
40+
long_data[:, "rank"] = assign_rank.(long_data.variable);
41+
42+
43+
plt = plot(title = "Elbow Method Benchmark Results",
44+
yaxis=:log,
45+
palette=:seaborn_deep,
46+
size=(1000, 700),
47+
ylabel="Execution Time (in seconds - logged)",
48+
yrotation=30,
49+
xlabel="Sample Sizes",
50+
legend=:topleft)
51+
52+
for pkg in unique(long_data.package)
53+
pkg_data = filter(:package => ==(pkg), long_data)
54+
sort!(pkg_data, order(:rank, rev=false))
55+
plot!(pkg_data.variable,
56+
pkg_data.value,
57+
lw=3,
58+
linestyle=pkg_data.linestyle,
59+
label=pkg,
60+
color=color_map[pkg])
61+
scatter!(pkg_data.variable, pkg_data.value, markersize=4, color=color_map[pkg], label="")
62+
end
63+
64+
display(plt)
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
1 million sample (secs),100k sample (secs),10k sample (secs),1k sample (secs),package,language,process
2+
282.7,15.27,0.7324,0.01682,Knor,R,full scan
3+
854,87,6.11,0.000719,Sklearn KMeans,Python,full scan
4+
11.2,1.41,0.000317,0.000141,Sklearn MiniBatch Kmeans,Python,stochastic
5+
254.481,18.517,0.000794956,0.000031211,Mlpack ,C++ Wrapper,full scan
6+
653.178,45.468,0.000824115,0.000017301,Clustering.jl,Julia,full scan
7+
19.955,2.758,0.000166957,0.000009206,ParallelKMeans Lloyd,Julia,full scan
8+
11.234,1.654,0.000109074,0.000012819,ParallelKMeans Hamerly,Julia,full scan
9+
19.394,1.436,0.000109262,0.000013726,ParallelKMeans Elkan,Julia,full scan
10+
14.08,0.000972914,0.000095325,0.000009802,ParallelKMeans YingYang,Julia,full scan

0 commit comments

Comments
 (0)