Skip to content

Commit aa229b8

Browse files
committed
add julia app support
1 parent b4014b4 commit aa229b8

File tree

4 files changed

+171
-3
lines changed

4 files changed

+171
-3
lines changed

AutoAI/Project.toml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
name = "AutoAI"
22
uuid = "78f36755-6713-4987-acdd-2a78ef0fc2e4"
3-
authors = ["Paulito Palmes, PhD <ppalmes@gmail.com>"]
43
version = "0.1.0"
4+
authors = ["Paulito Palmes, PhD <ppalmes@gmail.com>"]
55

66
[deps]
77
AMLPipelineBase = "e3c3008a-8869-4d53-9f34-c96f99c8a2b6"
8+
ArgParse = "c7e460c6-2fb9-53a9-8c5b-16f535851c63"
89
AutoMLPipeline = "08437348-eef5-4817-bc1b-d4e9459680d6"
910
CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
1011
CondaPkg = "992eb4ea-22a4-4c89-a5bb-47a3300528ab"
@@ -23,6 +24,7 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
2324

2425
[compat]
2526
AMLPipelineBase = "0.1.18"
27+
ArgParse = "1.2.0"
2628
AutoMLPipeline = "0.4.6"
2729
CSV = "0.10.15"
2830
CondaPkg = "0.2.29"
@@ -39,6 +41,8 @@ Statistics = "1.11.1"
3941
StatsBase = "0.34.5"
4042
Test = "1.11.0"
4143

44+
[apps.autoai]
45+
4246
[extras]
4347
CondaPkg = "992eb4ea-22a4-4c89-a5bb-47a3300528ab"
4448
Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"

AutoAI/src/AutoAI.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ using AMLPipelineBase: AbsTypes, Utils
1313

1414
export get_iris
1515

16+
include("main.jl")
1617

1718
function get_iris()
1819
iris = CSV.read(joinpath(Base.@__DIR__, "../../data", "iris.csv"), DataFrame)

AutoAI/src/main.jl

Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
using Distributed
2+
using ArgParse
3+
using CSV
4+
using DataFrames: DataFrame
5+
using AutoAI
6+
using Statistics
7+
8+
9+
function parse_commandline()
10+
s = ArgParseSettings()
11+
@add_arg_table! s begin
12+
"--url", "-u"
13+
help = "mlflow server url"
14+
arg_type = String
15+
default = "http://localhost:8080"
16+
"--prediction_type", "-t"
17+
help = "classification, regression, anomalydetection"
18+
arg_type = String
19+
default = "classification"
20+
"--complexity", "-c"
21+
help = "pipeline complexity"
22+
arg_type = String
23+
default = "low"
24+
"--output_file", "-o"
25+
help = "output location"
26+
arg_type = String
27+
default = "NONE"
28+
"--nfolds", "-f"
29+
help = "number of crossvalidation folds"
30+
arg_type = Int64
31+
default = 3
32+
"--nworkers", "-w"
33+
help = "number of workers"
34+
arg_type = Int64
35+
default = 5
36+
"--no_save"
37+
help = "save model"
38+
action = :store_true
39+
"--predict_only", "-p"
40+
help = "no training, predict only"
41+
action = :store_true
42+
"--runid", "-r"
43+
help = "runid of experiment for trained model"
44+
arg_type = String
45+
default = "NONE"
46+
"csvfile"
47+
help = "input csv file"
48+
required = true
49+
end
50+
return parse_args(s; as_symbols=true)
51+
end
52+
53+
54+
function autoclassmode(args::Dict)
55+
url = args[:url]
56+
complexity = args[:complexity]
57+
nfolds = args[:nfolds]
58+
nworkers = args[:nworkers]
59+
prediction_type = args[:prediction_type]
60+
impl_args = (; complexity, nfolds, nworkers, prediction_type) |> pairs |> Dict
61+
fname = args[:csvfile]
62+
df = CSV.read(fname, DataFrame)
63+
X = df[:, 1:end-1]
64+
Y = df[:, end] |> collect
65+
autoclass = AutoMLFlowClassification(Dict(:url => url, :impl_args => impl_args))
66+
Yc = fit_transform!(autoclass, X, Y)
67+
println("accuracy = ", mean(Y .== Yc))
68+
return autoclass
69+
end
70+
71+
function autoregmode(args::Dict)
72+
url = args[:url]
73+
complexity = args[:complexity]
74+
nfolds = args[:nfolds]
75+
nworkers = args[:nworkers]
76+
prediction_type = args[:prediction_type]
77+
impl_args = (; complexity, nfolds, nworkers, prediction_type) |> pairs |> Dict
78+
fname = args[:csvfile]
79+
df = CSV.read(fname, DataFrame)
80+
X = df[:, 1:end-1]
81+
Y = df[:, end] |> collect
82+
autoreg = AutoMLFlowRegression(Dict(:url => url, :impl_args => impl_args))
83+
Yc = fit_transform!(autoreg, X, Y)
84+
println("mse = ", mean((Y - Yc) .^ 2))
85+
return autoreg
86+
end
87+
88+
function doprediction_only(args::Dict)
89+
fname = args[:csvfile]
90+
X = CSV.read(fname, DataFrame)
91+
run_id = args[:runid]
92+
url = args[:url]
93+
mlf =
94+
predtype = args[:prediction_type]
95+
mlf = if predtype == "classification"
96+
AutoMLFlowClassification(Dict(:run_id => run_id, :url => url))
97+
elseif predtype == "regression"
98+
AutoMLFlowRegression(Dict(:run_id => run_id, :url => url))
99+
else
100+
error("unknown predtype option")
101+
end
102+
Yn = transform!(mlf, X)
103+
ofile = args[:output_file]
104+
if ofile != "NONE"
105+
open(ofile, "w") do stfile
106+
println(stfile, "prediction: $Yn")
107+
println(stdout, "prediction: $Yn")
108+
end
109+
else
110+
println(stdout, "prediction: $Yn")
111+
end
112+
return Yn
113+
end
114+
115+
function printsummary(io::IO, automl::Workflow)
116+
r(x) = round(x, digits=2)
117+
trainedmodel = automl.model[:automodel]
118+
bestmodel = trainedmodel.model[:bestpipeline].model[:description]
119+
println(io, "pipelines: $(trainedmodel.model[:dfpipelines].Description)")
120+
println(io, "best_pipeline: $bestmodel")
121+
bestmean = trainedmodel.model[:performance].mean[1]
122+
bestsd = trainedmodel.model[:performance].sd[1]
123+
println(io, "best_pipeline_performance: $(r(bestmean)) ± $(r(bestsd))")
124+
end
125+
126+
function dotrainandpredict(args::Dict)
127+
# train model
128+
predtype = args[:prediction_type]
129+
automl = if predtype == "classification"
130+
autoclassmode(args)
131+
elseif predtype == "regression"
132+
autoregmode(args)
133+
end
134+
ofile = args[:output_file]
135+
if ofile != "NONE"
136+
open(ofile, "w") do stfile
137+
printsummary(stfile, automl)
138+
printsummary(stdout, automl)
139+
end
140+
else
141+
printsummary(stdout, automl)
142+
end
143+
end
144+
145+
function (@main)(MyARGS)
146+
ARGS = parse_commandline()
147+
global _workers = ARGS[:nworkers]
148+
149+
if ARGS[:predict_only] == false
150+
@eval (nprocs() == 1 && addprocs(_workers; exeflags=["--project=$(Base.active_project())"]))
151+
@eval (@everywhere using AutoAI)
152+
end
153+
154+
if ARGS[:predict_only] == true
155+
# predict only using run_id of model in the artifact
156+
doprediction_only(ARGS)
157+
else
158+
# train and predict
159+
dotrainandpredict(ARGS)
160+
end
161+
end

DockerizedAutoML/run.sh

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,11 @@ docker run -it --rm --platform=linux/amd64 automlai:v3.0
1111
julia --project -- ./main.jl -t regression -u http://mlflow.isiath.duckdns.org:8082 -p -r 064fb7a188d34a3da87f2271b8d8d9c2 -o /tmp/reg.txt ./iris_reg.csv
1212
julia --project -- ./main.jl -t classification -u http://mlflow.isiath.duckdns.org:8082 -p -r 8dbea59123ec469db3ee7b807b3ab6d9 -o /tmp/class.txt ./iris.csv
1313

14-
15-
docker run -it --rm -v $(pwd):/data/ localhost/automlai:v3.0 -u http://mlflow.isiath.duckdns.org:8082 -t regression /data/iris_reg.csv
14+
docker run -it --rm -v $(pwd):/data/ localhost/automlai:v3.0 -u http://mlflow.isiath.duckdns.org:8082 -t regression /data/iris_reg.csv
1615
docker run -it --rm -v $(pwd):/data/ localhost/automlai:v3.0 -u http://mlflow.isiath.duckdns.org:8082 -t classification /data/iris.csv
1716

1817
docker run -it --rm -v $(pwd):/data/ localhost/automlai:v3.0 -u http://mlflow.isiath.duckdns.org:8082 -t regression -p -r 064fb7a188d34a3da87f2271b8d8d9c2 /data/iris_reg.csv
1918
docker run -it --rm -v $(pwd):/data/ localhost/automlai:v3.0 -u http://mlflow.isiath.duckdns.org:8082 -t classification -p -r 8dbea59123ec469db3ee7b807b3ab6d9 /data/iris.csv
19+
20+
julia -m AutoAI -u http://mlflow.isiath.duckdns.org:8082 -t regression ../DockerizedAutoML/iris_reg.csv
21+
# notes to use CondaPkg to update/install python modules during docker build

0 commit comments

Comments
 (0)