From bd5f46aa242bb5dd48aad87dcbc7cda8b3ff8a74 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hans=20W=C3=BCrfel?= Date: Fri, 28 Nov 2025 11:56:12 +0100 Subject: [PATCH 1/6] only build Cholesky and LU cache if possible --- ext/LinearSolveCUDAExt.jl | 2 ++ ext/LinearSolveSparseArraysExt.jl | 12 +++++++++--- src/LinearSolve.jl | 2 ++ src/factorization.jl | 8 +++++++- 4 files changed, 20 insertions(+), 4 deletions(-) diff --git a/ext/LinearSolveCUDAExt.jl b/ext/LinearSolveCUDAExt.jl index 58e2c3444..7619c4e99 100644 --- a/ext/LinearSolveCUDAExt.jl +++ b/ext/LinearSolveCUDAExt.jl @@ -17,6 +17,8 @@ function LinearSolve.is_cusparse(A::Union{ CUDA.CUSPARSE.CuSparseMatrixCSR, CUDA.CUSPARSE.CuSparseMatrixCSC}) true end +LinearSolve.is_cusparse_csr(::CUDA.CUSPARSE.CuSparseMatrixCSR) = true +LinearSolve.is_cusparse_csc(::CUDA.CUSPARSE.CuSparseMatrixCSC) = true function LinearSolve.defaultalg(A::CUDA.CUSPARSE.CuSparseMatrixCSR{Tv, Ti}, b, assump::OperatorAssumptions{Bool}) where {Tv, Ti} diff --git a/ext/LinearSolveSparseArraysExt.jl b/ext/LinearSolveSparseArraysExt.jl index f956539e3..17249618c 100644 --- a/ext/LinearSolveSparseArraysExt.jl +++ b/ext/LinearSolveSparseArraysExt.jl @@ -129,7 +129,7 @@ function LinearSolve.init_cacheval( maxiters::Int, abstol, reltol, verbose::Union{LinearVerbosity, Bool}, assumptions::OperatorAssumptions) where {T <: BLASELTYPES} if LinearSolve.is_cusparse(A) - ArrayInterface.lu_instance(A) + LinearSolve.cudss_loaded(A) ? ArrayInterface.lu_instance(A) : nothing else SparseArrays.UMFPACK.UmfpackLU(SparseMatrixCSC{T, Int64}( zero(Int64), zero(Int64), [Int64(1)], Int64[], T[])) @@ -141,7 +141,7 @@ function LinearSolve.init_cacheval( maxiters::Int, abstol, reltol, verbose::Union{LinearVerbosity, Bool}, assumptions::OperatorAssumptions) where {T <: BLASELTYPES} if LinearSolve.is_cusparse(A) - ArrayInterface.lu_instance(A) + LinearSolve.cudss_loaded(A) ? ArrayInterface.lu_instance(A) : nothing else SparseArrays.UMFPACK.UmfpackLU(SparseMatrixCSC{T, Int32}( zero(Int32), zero(Int32), [Int32(1)], Int32[], T[])) @@ -344,7 +344,13 @@ function LinearSolve.init_cacheval(alg::NormalCholeskyFactorization, Symmetric{T, <:AbstractSparseArray{T}}}, b, u, Pl, Pr, maxiters::Int, abstol, reltol, verbose::Union{LinearVerbosity, Bool}, assumptions::OperatorAssumptions) where {T <: BLASELTYPES} - ArrayInterface.cholesky_instance(convert(AbstractMatrix, A)) + if LinearSolve.is_cusparse_csc(A) + nothing + elseif LinearSolve.is_cusparse_csr(A) && !LinearSolve.cudss_loaded(A) + nothing + else + ArrayInterface.cholesky_instance(convert(AbstractMatrix, A)) + end end # Specialize QR for the non-square case diff --git a/src/LinearSolve.jl b/src/LinearSolve.jl index 37ed234f4..f21bc7331 100644 --- a/src/LinearSolve.jl +++ b/src/LinearSolve.jl @@ -478,6 +478,8 @@ ALREADY_WARNED_CUDSS = Ref{Bool}(false) error_no_cudss_lu(A) = nothing cudss_loaded(A) = false is_cusparse(A) = false +is_cusparse_csr(A) = false +is_cusparse_csc(A) = false export LUFactorization, SVDFactorization, QRFactorization, GenericFactorization, GenericLUFactorization, SimpleLUFactorization, RFLUFactorization, ButterflyFactorization, diff --git a/src/factorization.jl b/src/factorization.jl index 144153123..ed7aaa723 100644 --- a/src/factorization.jl +++ b/src/factorization.jl @@ -395,7 +395,13 @@ end function init_cacheval( alg::CholeskyFactorization, A::AbstractArray{<:BLASELTYPES}, b, u, Pl, Pr, maxiters::Int, abstol, reltol, verbose::Union{LinearVerbosity, Bool}, assumptions::OperatorAssumptions) - ArrayInterface.cholesky_instance(convert(AbstractMatrix, A), alg.pivot) + if LinearSolve.is_cusparse_csc(A) + nothing + elseif LinearSolve.is_cusparse_csr(A) && !LinearSolve.cudss_loaded(A) + nothing + else + ArrayInterface.cholesky_instance(convert(AbstractMatrix, A), alg.pivot) + end end const PREALLOCATED_CHOLESKY = ArrayInterface.cholesky_instance(rand(1, 1), NoPivot()) From b1a2c495945782815d88a9021457410d49588bdb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hans=20W=C3=BCrfel?= Date: Fri, 28 Nov 2025 11:57:26 +0100 Subject: [PATCH 2/6] always fall back to Krylov for CuSparseCSC --- ext/LinearSolveCUDAExt.jl | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/ext/LinearSolveCUDAExt.jl b/ext/LinearSolveCUDAExt.jl index 7619c4e99..9585efa15 100644 --- a/ext/LinearSolveCUDAExt.jl +++ b/ext/LinearSolveCUDAExt.jl @@ -33,6 +33,16 @@ function LinearSolve.defaultalg(A::CUDA.CUSPARSE.CuSparseMatrixCSR{Tv, Ti}, b, end end +function LinearSolve.defaultalg(A::CUDA.CUSPARSE.CuSparseMatrixCSC, b, + assump::OperatorAssumptions{Bool}) + if LinearSolve.cudss_loaded(A) + @warn("CUDSS.jl does not support CuSparseMatrixCSC for LU Factorizations, consider using CuSparseMatrixCSR instead. Falling back to Krylov", maxlog=1) + else + @warn("CuSparseMatrixCSC does not support LU Factorization falling back to Krylov. Consider using CUDSS.jl together with CuSparseMatrixCSR", maxlog=1) + end + LinearSolve.DefaultLinearSolver(LinearSolve.DefaultAlgorithmChoice.KrylovJL_GMRES) +end + function LinearSolve.error_no_cudss_lu(A::CUDA.CUSPARSE.CuSparseMatrixCSR) if !LinearSolve.cudss_loaded(A) error("CUDSS.jl is required for LU Factorizations on CuSparseMatrixCSR. Please load this library.") From 7769ca380d8b28b42596b993cf330d5588b5d5a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hans=20W=C3=BCrfel?= Date: Thu, 27 Nov 2025 15:41:01 +0100 Subject: [PATCH 3/6] fix CUSOLVERF test --- ext/LinearSolveCUSOLVERRFExt.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ext/LinearSolveCUSOLVERRFExt.jl b/ext/LinearSolveCUSOLVERRFExt.jl index 55a7f5f35..2e4e2364f 100644 --- a/ext/LinearSolveCUSOLVERRFExt.jl +++ b/ext/LinearSolveCUSOLVERRFExt.jl @@ -1,6 +1,6 @@ module LinearSolveCUSOLVERRFExt -using LinearSolve: LinearSolve, @get_cacheval, pattern_changed, OperatorAssumptions +using LinearSolve: LinearSolve, @get_cacheval, pattern_changed, OperatorAssumptions, LinearVerbosity using CUSOLVERRF: CUSOLVERRF, RFLU, CUDA using SparseArrays: SparseArrays, SparseMatrixCSC, nnz using CUSOLVERRF.CUDA.CUSPARSE: CuSparseMatrixCSR From ded449e1114b6fa002971de61b37862ad5dbe843 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hans=20W=C3=BCrfel?= Date: Thu, 27 Nov 2025 15:41:24 +0100 Subject: [PATCH 4/6] add test for CuSparseMatrixCSR + Cholesky --- test/gpu/cuda.jl | 65 +++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 62 insertions(+), 3 deletions(-) diff --git a/test/gpu/cuda.jl b/test/gpu/cuda.jl index 0d88bb741..1bb0fc71f 100644 --- a/test/gpu/cuda.jl +++ b/test/gpu/cuda.jl @@ -1,7 +1,66 @@ using LinearSolve, CUDA, LinearAlgebra, SparseArrays, StableRNGs -using CUDA.CUSPARSE, CUDSS +using CUDA.CUSPARSE using Test +@testset "Test default solver choice for CuSparse" begin + b = Float64[1, 2, 3, 4] + b_gpu = CUDA.adapt(CuArray, b) + + A = Float64[1 1 0 0 + 0 1 1 0 + 0 0 3 1 + 0 0 0 4] + A_gpu_csr = CUDA.CUSPARSE.CuSparseMatrixCSR(sparse(A)) + A_gpu_csc = CUDA.CUSPARSE.CuSparseMatrixCSC(sparse(A)) + prob_csr = LinearProblem(A_gpu_csr, b_gpu) + prob_csc = LinearProblem(A_gpu_csc, b_gpu) + + A_sym = Float64[1 1 0 0 + 1 0 0 2 + 0 0 3 0 + 0 2 0 0] + A_gpu_sym_csr = CUDA.CUSPARSE.CuSparseMatrixCSR(sparse(A_sym)) + A_gpu_sym_csc = CUDA.CUSPARSE.CuSparseMatrixCSC(sparse(A_sym)) + prob_sym_csr = LinearProblem(A_gpu_sym_csr, b_gpu) + prob_sym_csc = LinearProblem(A_gpu_sym_csc, b_gpu) + + @testset "Test without CUDSS loaded" begin + # assert CuDSS is not loaded yet + @test !LinearSolve.cudss_loaded(A_gpu_csr) + # csr fallback to krylov + alg = solve(prob_csr).alg + @test alg.alg == LinearSolve.DefaultAlgorithmChoice.KrylovJL_GMRES + # csc fallback to krylov + alg = solve(prob_csc).alg + @test alg.alg == LinearSolve.DefaultAlgorithmChoice.KrylovJL_GMRES + # csr symmetric fallback to krylov + alg = solve(prob_sym_csr).alg + @test alg.alg == LinearSolve.DefaultAlgorithmChoice.KrylovJL_GMRES + # csc symmetric fallback to krylov + alg = solve(prob_sym_csc).alg + @test alg.alg == LinearSolve.DefaultAlgorithmChoice.KrylovJL_GMRES + end + + # after loading CUDSS, it should fall back tu LU factorization + using CUDSS + + @testset "Test with CUDSS loaded" begin + @test LinearSolve.cudss_loaded(A_gpu_csr) + # csr uses LU + alg = solve(prob_csr).alg + @test alg.alg == LinearSolve.DefaultAlgorithmChoice.LUFactorization + # csc fallback to krylov + alg = solve(prob_csc).alg + @test alg.alg == LinearSolve.DefaultAlgorithmChoice.KrylovJL_GMRES + # csr symmetric uses LU/cholesky + alg = solve(prob_sym_csr).alg + @test alg.alg == LinearSolve.DefaultAlgorithmChoice.LUFactorization + # csc symetric fallback to krylov + alg = solve(prob_sym_csc).alg + @test alg.alg == LinearSolve.DefaultAlgorithmChoice.KrylovJL_GMRES + end +end + CUDA.allowscalar(false) n = 8 @@ -96,9 +155,9 @@ end @testset "CUDSS" begin T = Float32 n = 100 - A_cpu = sprand(T, n, n, 0.05) + I + A_cpu = sprand(rng, T, n, n, 0.05) + I x_cpu = zeros(T, n) - b_cpu = rand(T, n) + b_cpu = rand(rng, T, n) A_gpu_csr = CuSparseMatrixCSR(A_cpu) b_gpu = CuVector(b_cpu) From a9b47c8b3b521359f699f5c5a656b3820c60853a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hans=20W=C3=BCrfel?= Date: Fri, 28 Nov 2025 12:11:00 +0100 Subject: [PATCH 5/6] raise CUDSS ver since cholesky requires recent fix --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 5f386a640..74f320730 100644 --- a/Project.toml +++ b/Project.toml @@ -86,7 +86,7 @@ ArrayInterface = "7.17" BandedMatrices = "1.8" BlockDiagonals = "0.2" CUDA = "5.5" -CUDSS = "0.4, 0.6.1" +CUDSS = "0.6.3" CUSOLVERRF = "0.2.6" ChainRulesCore = "1.25" CliqueTrees = "1.11.0" From 3d0f5570ca5c9a267c5e238b6c7693deea961d56 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hans=20W=C3=BCrfel?= Date: Fri, 28 Nov 2025 12:28:26 +0100 Subject: [PATCH 6/6] fix spelling --- test/gpu/cuda.jl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/test/gpu/cuda.jl b/test/gpu/cuda.jl index 1bb0fc71f..8a3edf8d1 100644 --- a/test/gpu/cuda.jl +++ b/test/gpu/cuda.jl @@ -41,7 +41,6 @@ using Test @test alg.alg == LinearSolve.DefaultAlgorithmChoice.KrylovJL_GMRES end - # after loading CUDSS, it should fall back tu LU factorization using CUDSS @testset "Test with CUDSS loaded" begin @@ -55,7 +54,7 @@ using Test # csr symmetric uses LU/cholesky alg = solve(prob_sym_csr).alg @test alg.alg == LinearSolve.DefaultAlgorithmChoice.LUFactorization - # csc symetric fallback to krylov + # csc symmetric fallback to krylov alg = solve(prob_sym_csc).alg @test alg.alg == LinearSolve.DefaultAlgorithmChoice.KrylovJL_GMRES end