add julia app support

ppalmes · ppalmes · commit aa229b8bf338 · 2025-10-30T20:36:00.000Z
diff --git a/AutoAI/Project.toml b/AutoAI/Project.toml
@@ -1,10 +1,11 @@
 name = "AutoAI"
 uuid = "78f36755-6713-4987-acdd-2a78ef0fc2e4"
-authors = ["Paulito Palmes, PhD <ppalmes@gmail.com>"]
 version = "0.1.0"
+authors = ["Paulito Palmes, PhD <ppalmes@gmail.com>"]
 
 [deps]
 AMLPipelineBase = "e3c3008a-8869-4d53-9f34-c96f99c8a2b6"
+ArgParse = "c7e460c6-2fb9-53a9-8c5b-16f535851c63"
 AutoMLPipeline = "08437348-eef5-4817-bc1b-d4e9459680d6"
 CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
 CondaPkg = "992eb4ea-22a4-4c89-a5bb-47a3300528ab"
@@ -23,6 +24,7 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [compat]
 AMLPipelineBase = "0.1.18"
+ArgParse = "1.2.0"
 AutoMLPipeline = "0.4.6"
 CSV = "0.10.15"
 CondaPkg = "0.2.29"
@@ -39,6 +41,8 @@ Statistics = "1.11.1"
 StatsBase = "0.34.5"
 Test = "1.11.0"
 
+[apps.autoai]
+
 [extras]
 CondaPkg = "992eb4ea-22a4-4c89-a5bb-47a3300528ab"
 Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
diff --git a/AutoAI/src/AutoAI.jl b/AutoAI/src/AutoAI.jl
@@ -13,6 +13,7 @@ using AMLPipelineBase: AbsTypes, Utils
 
 export get_iris
 
+include("main.jl")
 
 function get_iris()
   iris = CSV.read(joinpath(Base.@__DIR__, "../../data", "iris.csv"), DataFrame)
diff --git a/AutoAI/src/main.jl b/AutoAI/src/main.jl
@@ -0,0 +1,161 @@
+using Distributed
+using ArgParse
+using CSV
+using DataFrames: DataFrame
+using AutoAI
+using Statistics
+
+
+function parse_commandline()
+    s = ArgParseSettings()
+    @add_arg_table! s begin
+        "--url", "-u"
+        help = "mlflow server url"
+        arg_type = String
+        default = "http://localhost:8080"
+        "--prediction_type", "-t"
+        help = "classification, regression, anomalydetection"
+        arg_type = String
+        default = "classification"
+        "--complexity", "-c"
+        help = "pipeline complexity"
+        arg_type = String
+        default = "low"
+        "--output_file", "-o"
+        help = "output location"
+        arg_type = String
+        default = "NONE"
+        "--nfolds", "-f"
+        help = "number of crossvalidation folds"
+        arg_type = Int64
+        default = 3
+        "--nworkers", "-w"
+        help = "number of workers"
+        arg_type = Int64
+        default = 5
+        "--no_save"
+        help = "save model"
+        action = :store_true
+        "--predict_only", "-p"
+        help = "no training, predict only"
+        action = :store_true
+        "--runid", "-r"
+        help = "runid of experiment for trained model"
+        arg_type = String
+        default = "NONE"
+        "csvfile"
+        help = "input csv file"
+        required = true
+    end
+    return parse_args(s; as_symbols=true)
+end
+
+
+function autoclassmode(args::Dict)
+    url = args[:url]
+    complexity = args[:complexity]
+    nfolds = args[:nfolds]
+    nworkers = args[:nworkers]
+    prediction_type = args[:prediction_type]
+    impl_args = (; complexity, nfolds, nworkers, prediction_type) |> pairs |> Dict
+    fname = args[:csvfile]
+    df = CSV.read(fname, DataFrame)
+    X = df[:, 1:end-1]
+    Y = df[:, end] |> collect
+    autoclass = AutoMLFlowClassification(Dict(:url => url, :impl_args => impl_args))
+    Yc = fit_transform!(autoclass, X, Y)
+    println("accuracy = ", mean(Y .== Yc))
+    return autoclass
+end
+
+function autoregmode(args::Dict)
+    url = args[:url]
+    complexity = args[:complexity]
+    nfolds = args[:nfolds]
+    nworkers = args[:nworkers]
+    prediction_type = args[:prediction_type]
+    impl_args = (; complexity, nfolds, nworkers, prediction_type) |> pairs |> Dict
+    fname = args[:csvfile]
+    df = CSV.read(fname, DataFrame)
+    X = df[:, 1:end-1]
+    Y = df[:, end] |> collect
+    autoreg = AutoMLFlowRegression(Dict(:url => url, :impl_args => impl_args))
+    Yc = fit_transform!(autoreg, X, Y)
+    println("mse = ", mean((Y - Yc) .^ 2))
+    return autoreg
+end
+
+function doprediction_only(args::Dict)
+    fname = args[:csvfile]
+    X = CSV.read(fname, DataFrame)
+    run_id = args[:runid]
+    url = args[:url]
+    mlf =
+        predtype = args[:prediction_type]
+    mlf = if predtype == "classification"
+        AutoMLFlowClassification(Dict(:run_id => run_id, :url => url))
+    elseif predtype == "regression"
+        AutoMLFlowRegression(Dict(:run_id => run_id, :url => url))
+    else
+        error("unknown predtype option")
+    end
+    Yn = transform!(mlf, X)
+    ofile = args[:output_file]
+    if ofile != "NONE"
+        open(ofile, "w") do stfile
+            println(stfile, "prediction: $Yn")
+            println(stdout, "prediction: $Yn")
+        end
+    else
+        println(stdout, "prediction: $Yn")
+    end
+    return Yn
+end
+
+function printsummary(io::IO, automl::Workflow)
+    r(x) = round(x, digits=2)
+    trainedmodel = automl.model[:automodel]
+    bestmodel = trainedmodel.model[:bestpipeline].model[:description]
+    println(io, "pipelines: $(trainedmodel.model[:dfpipelines].Description)")
+    println(io, "best_pipeline: $bestmodel")
+    bestmean = trainedmodel.model[:performance].mean[1]
+    bestsd = trainedmodel.model[:performance].sd[1]
+    println(io, "best_pipeline_performance: $(r(bestmean)) ± $(r(bestsd))")
+end
+
+function dotrainandpredict(args::Dict)
+    # train model
+    predtype = args[:prediction_type]
+    automl = if predtype == "classification"
+        autoclassmode(args)
+    elseif predtype == "regression"
+        autoregmode(args)
+    end
+    ofile = args[:output_file]
+    if ofile != "NONE"
+        open(ofile, "w") do stfile
+            printsummary(stfile, automl)
+            printsummary(stdout, automl)
+        end
+    else
+        printsummary(stdout, automl)
+    end
+end
+
+function (@main)(MyARGS)
+  ARGS = parse_commandline()
+  global _workers = ARGS[:nworkers]
+
+  if ARGS[:predict_only] == false
+    @eval (nprocs() == 1 && addprocs(_workers; exeflags=["--project=$(Base.active_project())"]))
+    @eval (@everywhere using AutoAI)
+  end
+
+  if ARGS[:predict_only] == true
+    # predict only using run_id of model in the artifact
+    doprediction_only(ARGS)
+  else
+    # train and predict
+    dotrainandpredict(ARGS)
+  end
+end
diff --git a/DockerizedAutoML/run.sh b/DockerizedAutoML/run.sh
@@ -11,9 +11,11 @@ docker run -it --rm --platform=linux/amd64 automlai:v3.0
 julia --project -- ./main.jl -t regression -u http://mlflow.isiath.duckdns.org:8082 -p -r 064fb7a188d34a3da87f2271b8d8d9c2 -o /tmp/reg.txt ./iris_reg.csv
 julia --project -- ./main.jl -t classification -u http://mlflow.isiath.duckdns.org:8082 -p -r 8dbea59123ec469db3ee7b807b3ab6d9 -o /tmp/class.txt ./iris.csv
 
-
-docker run -it --rm -v $(pwd):/data/ localhost/automlai:v3.0 -u http://mlflow.isiath.duckdns.org:8082 -t regression  /data/iris_reg.csv
+docker run -it --rm -v $(pwd):/data/ localhost/automlai:v3.0 -u http://mlflow.isiath.duckdns.org:8082 -t regression /data/iris_reg.csv
 docker run -it --rm -v $(pwd):/data/ localhost/automlai:v3.0 -u http://mlflow.isiath.duckdns.org:8082 -t classification /data/iris.csv
 
 docker run -it --rm -v $(pwd):/data/ localhost/automlai:v3.0 -u http://mlflow.isiath.duckdns.org:8082 -t regression -p -r 064fb7a188d34a3da87f2271b8d8d9c2 /data/iris_reg.csv
 docker run -it --rm -v $(pwd):/data/ localhost/automlai:v3.0 -u http://mlflow.isiath.duckdns.org:8082 -t classification -p -r 8dbea59123ec469db3ee7b807b3ab6d9 /data/iris.csv
+
+julia -m AutoAI -u http://mlflow.isiath.duckdns.org:8082 -t regression ../DockerizedAutoML/iris_reg.csv
+# notes to use CondaPkg to update/install python modules during docker build